diff options
Diffstat (limited to 'crypto/bn')
96 files changed, 9938 insertions, 16925 deletions
diff --git a/crypto/bn/Makefile b/crypto/bn/Makefile deleted file mode 100644 index 20e8ef0a28d7..000000000000 --- a/crypto/bn/Makefile +++ /dev/null @@ -1,389 +0,0 @@ -# -# OpenSSL/crypto/bn/Makefile -# - -DIR= bn -TOP= ../.. -CC= cc -CPP= $(CC) -E -INCLUDES= -I.. -I$(TOP) -I../../include -CFLAG=-g -MAKEFILE= Makefile -AR= ar r - -BN_ASM= bn_asm.o - -CFLAGS= $(INCLUDES) $(CFLAG) -ASFLAGS= $(INCLUDES) $(ASFLAG) -AFLAGS= $(ASFLAGS) - -GENERAL=Makefile -TEST=bntest.c exptest.c -APPS= - -LIB=$(TOP)/libcrypto.a -LIBSRC= bn_add.c bn_div.c bn_exp.c bn_lib.c bn_ctx.c bn_mul.c bn_mod.c \ - bn_print.c bn_rand.c bn_shift.c bn_word.c bn_blind.c \ - bn_kron.c bn_sqrt.c bn_gcd.c bn_prime.c bn_err.c bn_sqr.c bn_asm.c \ - bn_recp.c bn_mont.c bn_mpi.c bn_exp2.c bn_gf2m.c bn_nist.c \ - bn_depr.c bn_const.c bn_x931p.c - -LIBOBJ= bn_add.o bn_div.o bn_exp.o bn_lib.o bn_ctx.o bn_mul.o bn_mod.o \ - bn_print.o bn_rand.o bn_shift.o bn_word.o bn_blind.o \ - bn_kron.o bn_sqrt.o bn_gcd.o bn_prime.o bn_err.o bn_sqr.o $(BN_ASM) \ - bn_recp.o bn_mont.o bn_mpi.o bn_exp2.o bn_gf2m.o bn_nist.o \ - bn_depr.o bn_const.o bn_x931p.o - -SRC= $(LIBSRC) - -EXHEADER= bn.h -HEADER= bn_lcl.h bn_prime.h $(EXHEADER) - -ALL= $(GENERAL) $(SRC) $(HEADER) - -top: - (cd ../..; $(MAKE) DIRS=crypto SDIRS=$(DIR) sub_all) - -all: lib - -bn_prime.h: bn_prime.pl - $(PERL) bn_prime.pl >bn_prime.h - -divtest: divtest.c ../../libcrypto.a - cc -I../../include divtest.c -o divtest ../../libcrypto.a - -bnbug: bnbug.c ../../libcrypto.a top - cc -g -I../../include bnbug.c -o bnbug ../../libcrypto.a - -lib: $(LIBOBJ) - $(AR) $(LIB) $(LIBOBJ) - $(RANLIB) $(LIB) || echo Never mind. - @touch lib - -bn-586.s: asm/bn-586.pl ../perlasm/x86asm.pl - $(PERL) asm/bn-586.pl $(PERLASM_SCHEME) $(CFLAGS) $(PROCESSOR) > $@ -co-586.s: asm/co-586.pl ../perlasm/x86asm.pl - $(PERL) asm/co-586.pl $(PERLASM_SCHEME) $(CFLAGS) $(PROCESSOR) > $@ -x86-mont.s: asm/x86-mont.pl ../perlasm/x86asm.pl - $(PERL) asm/x86-mont.pl $(PERLASM_SCHEME) $(CFLAGS) $(PROCESSOR) > $@ -x86-gf2m.s: asm/x86-gf2m.pl ../perlasm/x86asm.pl - $(PERL) asm/x86-gf2m.pl $(PERLASM_SCHEME) $(CFLAGS) $(PROCESSOR) > $@ - -sparcv8.o: asm/sparcv8.S - $(CC) $(CFLAGS) -c asm/sparcv8.S -bn-sparcv9.o: asm/sparcv8plus.S - $(CC) $(CFLAGS) -c -o $@ asm/sparcv8plus.S -sparcv9a-mont.s: asm/sparcv9a-mont.pl - $(PERL) asm/sparcv9a-mont.pl $(CFLAGS) > $@ -sparcv9-mont.s: asm/sparcv9-mont.pl - $(PERL) asm/sparcv9-mont.pl $(CFLAGS) > $@ -vis3-mont.s: asm/vis3-mont.pl - $(PERL) asm/vis3-mont.pl $(CFLAGS) > $@ -sparct4-mont.S: asm/sparct4-mont.pl - $(PERL) asm/sparct4-mont.pl $(CFLAGS) > $@ -sparcv9-gf2m.S: asm/sparcv9-gf2m.pl - $(PERL) asm/sparcv9-gf2m.pl $(CFLAGS) > $@ - -bn-mips3.o: asm/mips3.s - @if [ "$(CC)" = "gcc" ]; then \ - ABI=`expr "$(CFLAGS)" : ".*-mabi=\([n3264]*\)"` && \ - as -$$ABI -O -o $@ asm/mips3.s; \ - else $(CC) -c $(CFLAGS) -o $@ asm/mips3.s; fi - -bn-mips.s: asm/mips.pl - $(PERL) asm/mips.pl $(PERLASM_SCHEME) $@ -mips-mont.s: asm/mips-mont.pl - $(PERL) asm/mips-mont.pl $(PERLASM_SCHEME) $@ - -bn-s390x.o: asm/s390x.S - $(CC) $(CFLAGS) -c -o $@ asm/s390x.S -s390x-gf2m.s: asm/s390x-gf2m.pl - $(PERL) asm/s390x-gf2m.pl $(PERLASM_SCHEME) $@ - -x86_64-gcc.o: asm/x86_64-gcc.c - $(CC) $(CFLAGS) -c -o $@ asm/x86_64-gcc.c -x86_64-mont.s: asm/x86_64-mont.pl - $(PERL) asm/x86_64-mont.pl $(PERLASM_SCHEME) > $@ -x86_64-mont5.s: asm/x86_64-mont5.pl - $(PERL) asm/x86_64-mont5.pl $(PERLASM_SCHEME) > $@ -x86_64-gf2m.s: asm/x86_64-gf2m.pl - $(PERL) asm/x86_64-gf2m.pl $(PERLASM_SCHEME) > $@ -rsaz-x86_64.s: asm/rsaz-x86_64.pl - $(PERL) asm/rsaz-x86_64.pl $(PERLASM_SCHEME) > $@ -rsaz-avx2.s: asm/rsaz-avx2.pl - $(PERL) asm/rsaz-avx2.pl $(PERLASM_SCHEME) > $@ - -bn-ia64.s: asm/ia64.S - $(CC) $(CFLAGS) -E asm/ia64.S > $@ -ia64-mont.s: asm/ia64-mont.pl - $(PERL) asm/ia64-mont.pl $@ $(CFLAGS) - -# GNU assembler fails to compile PA-RISC2 modules, insist on calling -# vendor assembler... -pa-risc2W.o: asm/pa-risc2W.s - /usr/ccs/bin/as -o pa-risc2W.o asm/pa-risc2W.s -pa-risc2.o: asm/pa-risc2.s - /usr/ccs/bin/as -o pa-risc2.o asm/pa-risc2.s -parisc-mont.s: asm/parisc-mont.pl - $(PERL) asm/parisc-mont.pl $(PERLASM_SCHEME) $@ - -# ppc - AIX, Linux, MacOS X... -bn-ppc.s: asm/ppc.pl; $(PERL) asm/ppc.pl $(PERLASM_SCHEME) $@ -ppc-mont.s: asm/ppc-mont.pl;$(PERL) asm/ppc-mont.pl $(PERLASM_SCHEME) $@ -ppc64-mont.s: asm/ppc64-mont.pl;$(PERL) asm/ppc64-mont.pl $(PERLASM_SCHEME) $@ - -alpha-mont.s: asm/alpha-mont.pl - (preproc=$$$$.$@.S; trap "rm $$preproc" INT; \ - $(PERL) asm/alpha-mont.pl > $$preproc && \ - $(CC) -E -P $$preproc > $@ && rm $$preproc) - -# GNU make "catch all" -%-mont.S: asm/%-mont.pl; $(PERL) $< $(PERLASM_SCHEME) $@ -%-gf2m.S: asm/%-gf2m.pl; $(PERL) $< $(PERLASM_SCHEME) $@ - -armv4-mont.o: armv4-mont.S -armv4-gf2m.o: armv4-gf2m.S - -files: - $(PERL) $(TOP)/util/files.pl Makefile >> $(TOP)/MINFO - -links: - @$(PERL) $(TOP)/util/mklink.pl ../../include/openssl $(EXHEADER) - @$(PERL) $(TOP)/util/mklink.pl ../../test $(TEST) - @$(PERL) $(TOP)/util/mklink.pl ../../apps $(APPS) - -install: - @[ -n "$(INSTALLTOP)" ] # should be set by top Makefile... - @headerlist="$(EXHEADER)"; for i in $$headerlist ; \ - do \ - (cp $$i $(INSTALL_PREFIX)$(INSTALLTOP)/include/openssl/$$i; \ - chmod 644 $(INSTALL_PREFIX)$(INSTALLTOP)/include/openssl/$$i ); \ - done; - -exptest: - rm -f exptest - gcc -I../../include -g2 -ggdb -o exptest exptest.c ../../libcrypto.a - -div: - rm -f a.out - gcc -I.. -g div.c ../../libcrypto.a - -tags: - ctags $(SRC) - -tests: - -lint: - lint -DLINT $(INCLUDES) $(SRC)>fluff - -update: bn_prime.h depend - -depend: - @[ -n "$(MAKEDEPEND)" ] # should be set by upper Makefile... - $(MAKEDEPEND) -- $(CFLAG) $(INCLUDES) $(DEPFLAG) -- $(PROGS) $(LIBSRC) - -dclean: - $(PERL) -pe 'if (/^# DO NOT DELETE THIS LINE/) {print; exit(0);}' $(MAKEFILE) >Makefile.new - mv -f Makefile.new $(MAKEFILE) - -clean: - rm -f *.s *.S *.o *.obj lib tags core .pure .nfs* *.old *.bak fluff - -# DO NOT DELETE THIS LINE -- make depend depends on it. - -bn_add.o: ../../e_os.h ../../include/openssl/bio.h ../../include/openssl/bn.h -bn_add.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h -bn_add.o: ../../include/openssl/e_os2.h ../../include/openssl/err.h -bn_add.o: ../../include/openssl/lhash.h ../../include/openssl/opensslconf.h -bn_add.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h -bn_add.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h -bn_add.o: ../../include/openssl/symhacks.h ../cryptlib.h bn_add.c bn_lcl.h -bn_asm.o: ../../e_os.h ../../include/openssl/bio.h ../../include/openssl/bn.h -bn_asm.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h -bn_asm.o: ../../include/openssl/e_os2.h ../../include/openssl/err.h -bn_asm.o: ../../include/openssl/lhash.h ../../include/openssl/opensslconf.h -bn_asm.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h -bn_asm.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h -bn_asm.o: ../../include/openssl/symhacks.h ../cryptlib.h bn_asm.c bn_lcl.h -bn_blind.o: ../../e_os.h ../../include/openssl/bio.h ../../include/openssl/bn.h -bn_blind.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h -bn_blind.o: ../../include/openssl/e_os2.h ../../include/openssl/err.h -bn_blind.o: ../../include/openssl/lhash.h ../../include/openssl/opensslconf.h -bn_blind.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h -bn_blind.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h -bn_blind.o: ../../include/openssl/symhacks.h ../cryptlib.h bn_blind.c bn_lcl.h -bn_const.o: ../../include/openssl/crypto.h ../../include/openssl/e_os2.h -bn_const.o: ../../include/openssl/opensslconf.h -bn_const.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h -bn_const.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h -bn_const.o: ../../include/openssl/symhacks.h bn.h bn_const.c -bn_ctx.o: ../../e_os.h ../../include/openssl/bio.h ../../include/openssl/bn.h -bn_ctx.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h -bn_ctx.o: ../../include/openssl/e_os2.h ../../include/openssl/err.h -bn_ctx.o: ../../include/openssl/lhash.h ../../include/openssl/opensslconf.h -bn_ctx.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h -bn_ctx.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h -bn_ctx.o: ../../include/openssl/symhacks.h ../cryptlib.h bn_ctx.c bn_lcl.h -bn_depr.o: ../../e_os.h ../../include/openssl/bio.h ../../include/openssl/bn.h -bn_depr.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h -bn_depr.o: ../../include/openssl/e_os2.h ../../include/openssl/err.h -bn_depr.o: ../../include/openssl/lhash.h ../../include/openssl/opensslconf.h -bn_depr.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h -bn_depr.o: ../../include/openssl/rand.h ../../include/openssl/safestack.h -bn_depr.o: ../../include/openssl/stack.h ../../include/openssl/symhacks.h -bn_depr.o: ../cryptlib.h bn_depr.c bn_lcl.h -bn_div.o: ../../e_os.h ../../include/openssl/bio.h ../../include/openssl/bn.h -bn_div.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h -bn_div.o: ../../include/openssl/e_os2.h ../../include/openssl/err.h -bn_div.o: ../../include/openssl/lhash.h ../../include/openssl/opensslconf.h -bn_div.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h -bn_div.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h -bn_div.o: ../../include/openssl/symhacks.h ../cryptlib.h bn_div.c bn_lcl.h -bn_err.o: ../../include/openssl/bio.h ../../include/openssl/bn.h -bn_err.o: ../../include/openssl/crypto.h ../../include/openssl/e_os2.h -bn_err.o: ../../include/openssl/err.h ../../include/openssl/lhash.h -bn_err.o: ../../include/openssl/opensslconf.h ../../include/openssl/opensslv.h -bn_err.o: ../../include/openssl/ossl_typ.h ../../include/openssl/safestack.h -bn_err.o: ../../include/openssl/stack.h ../../include/openssl/symhacks.h -bn_err.o: bn_err.c -bn_exp.o: ../../e_os.h ../../include/openssl/bio.h ../../include/openssl/bn.h -bn_exp.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h -bn_exp.o: ../../include/openssl/e_os2.h ../../include/openssl/err.h -bn_exp.o: ../../include/openssl/lhash.h ../../include/openssl/opensslconf.h -bn_exp.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h -bn_exp.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h -bn_exp.o: ../../include/openssl/symhacks.h ../constant_time_locl.h -bn_exp.o: ../cryptlib.h bn_exp.c bn_lcl.h rsaz_exp.h -bn_exp2.o: ../../e_os.h ../../include/openssl/bio.h ../../include/openssl/bn.h -bn_exp2.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h -bn_exp2.o: ../../include/openssl/e_os2.h ../../include/openssl/err.h -bn_exp2.o: ../../include/openssl/lhash.h ../../include/openssl/opensslconf.h -bn_exp2.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h -bn_exp2.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h -bn_exp2.o: ../../include/openssl/symhacks.h ../cryptlib.h bn_exp2.c bn_lcl.h -bn_gcd.o: ../../e_os.h ../../include/openssl/bio.h ../../include/openssl/bn.h -bn_gcd.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h -bn_gcd.o: ../../include/openssl/e_os2.h ../../include/openssl/err.h -bn_gcd.o: ../../include/openssl/lhash.h ../../include/openssl/opensslconf.h -bn_gcd.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h -bn_gcd.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h -bn_gcd.o: ../../include/openssl/symhacks.h ../cryptlib.h bn_gcd.c bn_lcl.h -bn_gf2m.o: ../../e_os.h ../../include/openssl/bio.h ../../include/openssl/bn.h -bn_gf2m.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h -bn_gf2m.o: ../../include/openssl/e_os2.h ../../include/openssl/err.h -bn_gf2m.o: ../../include/openssl/lhash.h ../../include/openssl/opensslconf.h -bn_gf2m.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h -bn_gf2m.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h -bn_gf2m.o: ../../include/openssl/symhacks.h ../cryptlib.h bn_gf2m.c bn_lcl.h -bn_kron.o: ../../e_os.h ../../include/openssl/bio.h ../../include/openssl/bn.h -bn_kron.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h -bn_kron.o: ../../include/openssl/e_os2.h ../../include/openssl/err.h -bn_kron.o: ../../include/openssl/lhash.h ../../include/openssl/opensslconf.h -bn_kron.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h -bn_kron.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h -bn_kron.o: ../../include/openssl/symhacks.h ../cryptlib.h bn_kron.c bn_lcl.h -bn_lib.o: ../../e_os.h ../../include/openssl/bio.h ../../include/openssl/bn.h -bn_lib.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h -bn_lib.o: ../../include/openssl/e_os2.h ../../include/openssl/err.h -bn_lib.o: ../../include/openssl/lhash.h ../../include/openssl/opensslconf.h -bn_lib.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h -bn_lib.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h -bn_lib.o: ../../include/openssl/symhacks.h ../cryptlib.h bn_lcl.h bn_lib.c -bn_mod.o: ../../e_os.h ../../include/openssl/bio.h ../../include/openssl/bn.h -bn_mod.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h -bn_mod.o: ../../include/openssl/e_os2.h ../../include/openssl/err.h -bn_mod.o: ../../include/openssl/lhash.h ../../include/openssl/opensslconf.h -bn_mod.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h -bn_mod.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h -bn_mod.o: ../../include/openssl/symhacks.h ../cryptlib.h bn_lcl.h bn_mod.c -bn_mont.o: ../../e_os.h ../../include/openssl/bio.h ../../include/openssl/bn.h -bn_mont.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h -bn_mont.o: ../../include/openssl/e_os2.h ../../include/openssl/err.h -bn_mont.o: ../../include/openssl/lhash.h ../../include/openssl/opensslconf.h -bn_mont.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h -bn_mont.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h -bn_mont.o: ../../include/openssl/symhacks.h ../cryptlib.h bn_lcl.h bn_mont.c -bn_mpi.o: ../../e_os.h ../../include/openssl/bio.h ../../include/openssl/bn.h -bn_mpi.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h -bn_mpi.o: ../../include/openssl/e_os2.h ../../include/openssl/err.h -bn_mpi.o: ../../include/openssl/lhash.h ../../include/openssl/opensslconf.h -bn_mpi.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h -bn_mpi.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h -bn_mpi.o: ../../include/openssl/symhacks.h ../cryptlib.h bn_lcl.h bn_mpi.c -bn_mul.o: ../../e_os.h ../../include/openssl/bio.h ../../include/openssl/bn.h -bn_mul.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h -bn_mul.o: ../../include/openssl/e_os2.h ../../include/openssl/err.h -bn_mul.o: ../../include/openssl/lhash.h ../../include/openssl/opensslconf.h -bn_mul.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h -bn_mul.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h -bn_mul.o: ../../include/openssl/symhacks.h ../cryptlib.h bn_lcl.h bn_mul.c -bn_nist.o: ../../e_os.h ../../include/openssl/bio.h ../../include/openssl/bn.h -bn_nist.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h -bn_nist.o: ../../include/openssl/e_os2.h ../../include/openssl/err.h -bn_nist.o: ../../include/openssl/lhash.h ../../include/openssl/opensslconf.h -bn_nist.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h -bn_nist.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h -bn_nist.o: ../../include/openssl/symhacks.h ../cryptlib.h bn_lcl.h bn_nist.c -bn_prime.o: ../../e_os.h ../../include/openssl/bio.h ../../include/openssl/bn.h -bn_prime.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h -bn_prime.o: ../../include/openssl/e_os2.h ../../include/openssl/err.h -bn_prime.o: ../../include/openssl/lhash.h ../../include/openssl/opensslconf.h -bn_prime.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h -bn_prime.o: ../../include/openssl/rand.h ../../include/openssl/safestack.h -bn_prime.o: ../../include/openssl/stack.h ../../include/openssl/symhacks.h -bn_prime.o: ../cryptlib.h bn_lcl.h bn_prime.c bn_prime.h -bn_print.o: ../../e_os.h ../../include/openssl/bio.h ../../include/openssl/bn.h -bn_print.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h -bn_print.o: ../../include/openssl/e_os2.h ../../include/openssl/err.h -bn_print.o: ../../include/openssl/lhash.h ../../include/openssl/opensslconf.h -bn_print.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h -bn_print.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h -bn_print.o: ../../include/openssl/symhacks.h ../cryptlib.h bn_lcl.h bn_print.c -bn_rand.o: ../../e_os.h ../../include/openssl/bio.h ../../include/openssl/bn.h -bn_rand.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h -bn_rand.o: ../../include/openssl/e_os2.h ../../include/openssl/err.h -bn_rand.o: ../../include/openssl/lhash.h ../../include/openssl/opensslconf.h -bn_rand.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h -bn_rand.o: ../../include/openssl/rand.h ../../include/openssl/safestack.h -bn_rand.o: ../../include/openssl/stack.h ../../include/openssl/symhacks.h -bn_rand.o: ../cryptlib.h bn_lcl.h bn_rand.c -bn_recp.o: ../../e_os.h ../../include/openssl/bio.h ../../include/openssl/bn.h -bn_recp.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h -bn_recp.o: ../../include/openssl/e_os2.h ../../include/openssl/err.h -bn_recp.o: ../../include/openssl/lhash.h ../../include/openssl/opensslconf.h -bn_recp.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h -bn_recp.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h -bn_recp.o: ../../include/openssl/symhacks.h ../cryptlib.h bn_lcl.h bn_recp.c -bn_shift.o: ../../e_os.h ../../include/openssl/bio.h ../../include/openssl/bn.h -bn_shift.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h -bn_shift.o: ../../include/openssl/e_os2.h ../../include/openssl/err.h -bn_shift.o: ../../include/openssl/lhash.h ../../include/openssl/opensslconf.h -bn_shift.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h -bn_shift.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h -bn_shift.o: ../../include/openssl/symhacks.h ../cryptlib.h bn_lcl.h bn_shift.c -bn_sqr.o: ../../e_os.h ../../include/openssl/bio.h ../../include/openssl/bn.h -bn_sqr.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h -bn_sqr.o: ../../include/openssl/e_os2.h ../../include/openssl/err.h -bn_sqr.o: ../../include/openssl/lhash.h ../../include/openssl/opensslconf.h -bn_sqr.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h -bn_sqr.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h -bn_sqr.o: ../../include/openssl/symhacks.h ../cryptlib.h bn_lcl.h bn_sqr.c -bn_sqrt.o: ../../e_os.h ../../include/openssl/bio.h ../../include/openssl/bn.h -bn_sqrt.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h -bn_sqrt.o: ../../include/openssl/e_os2.h ../../include/openssl/err.h -bn_sqrt.o: ../../include/openssl/lhash.h ../../include/openssl/opensslconf.h -bn_sqrt.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h -bn_sqrt.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h -bn_sqrt.o: ../../include/openssl/symhacks.h ../cryptlib.h bn_lcl.h bn_sqrt.c -bn_word.o: ../../e_os.h ../../include/openssl/bio.h ../../include/openssl/bn.h -bn_word.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h -bn_word.o: ../../include/openssl/e_os2.h ../../include/openssl/err.h -bn_word.o: ../../include/openssl/lhash.h ../../include/openssl/opensslconf.h -bn_word.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h -bn_word.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h -bn_word.o: ../../include/openssl/symhacks.h ../cryptlib.h bn_lcl.h bn_word.c -bn_x931p.o: ../../include/openssl/bn.h ../../include/openssl/crypto.h -bn_x931p.o: ../../include/openssl/e_os2.h ../../include/openssl/opensslconf.h -bn_x931p.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h -bn_x931p.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h -bn_x931p.o: ../../include/openssl/symhacks.h bn_x931p.c diff --git a/crypto/bn/README.pod b/crypto/bn/README.pod new file mode 100644 index 000000000000..706a140342cd --- /dev/null +++ b/crypto/bn/README.pod @@ -0,0 +1,241 @@ +=pod + +=head1 NAME + +bn_mul_words, bn_mul_add_words, bn_sqr_words, bn_div_words, +bn_add_words, bn_sub_words, bn_mul_comba4, bn_mul_comba8, +bn_sqr_comba4, bn_sqr_comba8, bn_cmp_words, bn_mul_normal, +bn_mul_low_normal, bn_mul_recursive, bn_mul_part_recursive, +bn_mul_low_recursive, bn_sqr_normal, bn_sqr_recursive, +bn_expand, bn_wexpand, bn_expand2, bn_fix_top, bn_check_top, +bn_print, bn_dump, bn_set_max, bn_set_high, bn_set_low - BIGNUM +library internal functions + +=head1 SYNOPSIS + + #include <openssl/bn.h> + + BN_ULONG bn_mul_words(BN_ULONG *rp, BN_ULONG *ap, int num, BN_ULONG w); + BN_ULONG bn_mul_add_words(BN_ULONG *rp, BN_ULONG *ap, int num, + BN_ULONG w); + void bn_sqr_words(BN_ULONG *rp, BN_ULONG *ap, int num); + BN_ULONG bn_div_words(BN_ULONG h, BN_ULONG l, BN_ULONG d); + BN_ULONG bn_add_words(BN_ULONG *rp, BN_ULONG *ap, BN_ULONG *bp, + int num); + BN_ULONG bn_sub_words(BN_ULONG *rp, BN_ULONG *ap, BN_ULONG *bp, + int num); + + void bn_mul_comba4(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b); + void bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b); + void bn_sqr_comba4(BN_ULONG *r, BN_ULONG *a); + void bn_sqr_comba8(BN_ULONG *r, BN_ULONG *a); + + int bn_cmp_words(BN_ULONG *a, BN_ULONG *b, int n); + + void bn_mul_normal(BN_ULONG *r, BN_ULONG *a, int na, BN_ULONG *b, + int nb); + void bn_mul_low_normal(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n); + void bn_mul_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n2, + int dna, int dnb, BN_ULONG *tmp); + void bn_mul_part_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, + int n, int tna, int tnb, BN_ULONG *tmp); + void bn_mul_low_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, + int n2, BN_ULONG *tmp); + + void bn_sqr_normal(BN_ULONG *r, BN_ULONG *a, int n, BN_ULONG *tmp); + void bn_sqr_recursive(BN_ULONG *r, BN_ULONG *a, int n2, BN_ULONG *tmp); + + void mul(BN_ULONG r, BN_ULONG a, BN_ULONG w, BN_ULONG c); + void mul_add(BN_ULONG r, BN_ULONG a, BN_ULONG w, BN_ULONG c); + void sqr(BN_ULONG r0, BN_ULONG r1, BN_ULONG a); + + BIGNUM *bn_expand(BIGNUM *a, int bits); + BIGNUM *bn_wexpand(BIGNUM *a, int n); + BIGNUM *bn_expand2(BIGNUM *a, int n); + void bn_fix_top(BIGNUM *a); + + void bn_check_top(BIGNUM *a); + void bn_print(BIGNUM *a); + void bn_dump(BN_ULONG *d, int n); + void bn_set_max(BIGNUM *a); + void bn_set_high(BIGNUM *r, BIGNUM *a, int n); + void bn_set_low(BIGNUM *r, BIGNUM *a, int n); + +=head1 DESCRIPTION + +This page documents the internal functions used by the OpenSSL +B<BIGNUM> implementation. They are described here to facilitate +debugging and extending the library. They are I<not> to be used by +applications. + +=head2 The BIGNUM structure + + typedef struct bignum_st BIGNUM; + + struct bignum_st + { + BN_ULONG *d; /* Pointer to an array of 'BN_BITS2' bit chunks. */ + int top; /* Index of last used d +1. */ + /* The next are internal book keeping for bn_expand. */ + int dmax; /* Size of the d array. */ + int neg; /* one if the number is negative */ + int flags; + }; + + +The integer value is stored in B<d>, a malloc()ed array of words (B<BN_ULONG>), +least significant word first. A B<BN_ULONG> can be either 16, 32 or 64 bits +in size, depending on the 'number of bits' (B<BITS2>) specified in +C<openssl/bn.h>. + +B<dmax> is the size of the B<d> array that has been allocated. B<top> +is the number of words being used, so for a value of 4, bn.d[0]=4 and +bn.top=1. B<neg> is 1 if the number is negative. When a B<BIGNUM> is +B<0>, the B<d> field can be B<NULL> and B<top> == B<0>. + +B<flags> is a bit field of flags which are defined in C<openssl/bn.h>. The +flags begin with B<BN_FLG_>. The macros BN_set_flags(b, n) and +BN_get_flags(b, n) exist to enable or fetch flag(s) B<n> from B<BIGNUM> +structure B<b>. + +Various routines in this library require the use of temporary +B<BIGNUM> variables during their execution. Since dynamic memory +allocation to create B<BIGNUM>s is rather expensive when used in +conjunction with repeated subroutine calls, the B<BN_CTX> structure is +used. This structure contains B<BN_CTX_NUM> B<BIGNUM>s, see +L<BN_CTX_start(3)>. + +=head2 Low-level arithmetic operations + +These functions are implemented in C and for several platforms in +assembly language: + +bn_mul_words(B<rp>, B<ap>, B<num>, B<w>) operates on the B<num> word +arrays B<rp> and B<ap>. It computes B<ap> * B<w>, places the result +in B<rp>, and returns the high word (carry). + +bn_mul_add_words(B<rp>, B<ap>, B<num>, B<w>) operates on the B<num> +word arrays B<rp> and B<ap>. It computes B<ap> * B<w> + B<rp>, places +the result in B<rp>, and returns the high word (carry). + +bn_sqr_words(B<rp>, B<ap>, B<n>) operates on the B<num> word array +B<ap> and the 2*B<num> word array B<ap>. It computes B<ap> * B<ap> +word-wise, and places the low and high bytes of the result in B<rp>. + +bn_div_words(B<h>, B<l>, B<d>) divides the two word number (B<h>, B<l>) +by B<d> and returns the result. + +bn_add_words(B<rp>, B<ap>, B<bp>, B<num>) operates on the B<num> word +arrays B<ap>, B<bp> and B<rp>. It computes B<ap> + B<bp>, places the +result in B<rp>, and returns the high word (carry). + +bn_sub_words(B<rp>, B<ap>, B<bp>, B<num>) operates on the B<num> word +arrays B<ap>, B<bp> and B<rp>. It computes B<ap> - B<bp>, places the +result in B<rp>, and returns the carry (1 if B<bp> E<gt> B<ap>, 0 +otherwise). + +bn_mul_comba4(B<r>, B<a>, B<b>) operates on the 4 word arrays B<a> and +B<b> and the 8 word array B<r>. It computes B<a>*B<b> and places the +result in B<r>. + +bn_mul_comba8(B<r>, B<a>, B<b>) operates on the 8 word arrays B<a> and +B<b> and the 16 word array B<r>. It computes B<a>*B<b> and places the +result in B<r>. + +bn_sqr_comba4(B<r>, B<a>, B<b>) operates on the 4 word arrays B<a> and +B<b> and the 8 word array B<r>. + +bn_sqr_comba8(B<r>, B<a>, B<b>) operates on the 8 word arrays B<a> and +B<b> and the 16 word array B<r>. + +The following functions are implemented in C: + +bn_cmp_words(B<a>, B<b>, B<n>) operates on the B<n> word arrays B<a> +and B<b>. It returns 1, 0 and -1 if B<a> is greater than, equal and +less than B<b>. + +bn_mul_normal(B<r>, B<a>, B<na>, B<b>, B<nb>) operates on the B<na> +word array B<a>, the B<nb> word array B<b> and the B<na>+B<nb> word +array B<r>. It computes B<a>*B<b> and places the result in B<r>. + +bn_mul_low_normal(B<r>, B<a>, B<b>, B<n>) operates on the B<n> word +arrays B<r>, B<a> and B<b>. It computes the B<n> low words of +B<a>*B<b> and places the result in B<r>. + +bn_mul_recursive(B<r>, B<a>, B<b>, B<n2>, B<dna>, B<dnb>, B<t>) operates +on the word arrays B<a> and B<b> of length B<n2>+B<dna> and B<n2>+B<dnb> +(B<dna> and B<dnb> are currently allowed to be 0 or negative) and the 2*B<n2> +word arrays B<r> and B<t>. B<n2> must be a power of 2. It computes +B<a>*B<b> and places the result in B<r>. + +bn_mul_part_recursive(B<r>, B<a>, B<b>, B<n>, B<tna>, B<tnb>, B<tmp>) +operates on the word arrays B<a> and B<b> of length B<n>+B<tna> and +B<n>+B<tnb> and the 4*B<n> word arrays B<r> and B<tmp>. + +bn_mul_low_recursive(B<r>, B<a>, B<b>, B<n2>, B<tmp>) operates on the +B<n2> word arrays B<r> and B<tmp> and the B<n2>/2 word arrays B<a> +and B<b>. + +BN_mul() calls bn_mul_normal(), or an optimized implementation if the +factors have the same size: bn_mul_comba8() is used if they are 8 +words long, bn_mul_recursive() if they are larger than +B<BN_MULL_SIZE_NORMAL> and the size is an exact multiple of the word +size, and bn_mul_part_recursive() for others that are larger than +B<BN_MULL_SIZE_NORMAL>. + +bn_sqr_normal(B<r>, B<a>, B<n>, B<tmp>) operates on the B<n> word array +B<a> and the 2*B<n> word arrays B<tmp> and B<r>. + +The implementations use the following macros which, depending on the +architecture, may use "long long" C operations or inline assembler. +They are defined in C<bn_lcl.h>. + +mul(B<r>, B<a>, B<w>, B<c>) computes B<w>*B<a>+B<c> and places the +low word of the result in B<r> and the high word in B<c>. + +mul_add(B<r>, B<a>, B<w>, B<c>) computes B<w>*B<a>+B<r>+B<c> and +places the low word of the result in B<r> and the high word in B<c>. + +sqr(B<r0>, B<r1>, B<a>) computes B<a>*B<a> and places the low word +of the result in B<r0> and the high word in B<r1>. + +=head2 Size changes + +bn_expand() ensures that B<b> has enough space for a B<bits> bit +number. bn_wexpand() ensures that B<b> has enough space for an +B<n> word number. If the number has to be expanded, both macros +call bn_expand2(), which allocates a new B<d> array and copies the +data. They return B<NULL> on error, B<b> otherwise. + +The bn_fix_top() macro reduces B<a-E<gt>top> to point to the most +significant non-zero word plus one when B<a> has shrunk. + +=head2 Debugging + +bn_check_top() verifies that C<((a)-E<gt>top E<gt>= 0 && (a)-E<gt>top +E<lt>= (a)-E<gt>dmax)>. A violation will cause the program to abort. + +bn_print() prints B<a> to stderr. bn_dump() prints B<n> words at B<d> +(in reverse order, i.e. most significant word first) to stderr. + +bn_set_max() makes B<a> a static number with a B<dmax> of its current size. +This is used by bn_set_low() and bn_set_high() to make B<r> a read-only +B<BIGNUM> that contains the B<n> low or high words of B<a>. + +If B<BN_DEBUG> is not defined, bn_check_top(), bn_print(), bn_dump() +and bn_set_max() are defined as empty macros. + +=head1 SEE ALSO + +L<bn(3)> + +=head1 COPYRIGHT + +Copyright 2000-2016 The OpenSSL Project Authors. All Rights Reserved. + +Licensed under the OpenSSL license (the "License"). You may not use +this file except in compliance with the License. You can obtain a copy +in the file LICENSE in the source distribution or at +L<https://www.openssl.org/source/license.html>. + +=cut diff --git a/crypto/bn/asm/README b/crypto/bn/asm/README deleted file mode 100644 index b0f3a68a06ab..000000000000 --- a/crypto/bn/asm/README +++ /dev/null @@ -1,27 +0,0 @@ -<OBSOLETE> - -All assember in this directory are just version of the file -crypto/bn/bn_asm.c. - -Quite a few of these files are just the assember output from gcc since on -quite a few machines they are 2 times faster than the system compiler. - -For the x86, I have hand written assember because of the bad job all -compilers seem to do on it. This normally gives a 2 time speed up in the RSA -routines. - -For the DEC alpha, I also hand wrote the assember (except the division which -is just the output from the C compiler pasted on the end of the file). -On the 2 alpha C compilers I had access to, it was not possible to do -64b x 64b -> 128b calculations (both long and the long long data types -were 64 bits). So the hand assember gives access to the 128 bit result and -a 2 times speedup :-). - -There are 3 versions of assember for the HP PA-RISC. - -pa-risc.s is the origional one which works fine and generated using gcc :-) - -pa-risc2W.s and pa-risc2.s are 64 and 32-bit PA-RISC 2.0 implementations -by Chris Ruemmler from HP (with some help from the HP C compiler). - -</OBSOLETE> diff --git a/crypto/bn/asm/armv4-gf2m.pl b/crypto/bn/asm/armv4-gf2m.pl index 72381a77240c..7a0cdb2e8a00 100755 --- a/crypto/bn/asm/armv4-gf2m.pl +++ b/crypto/bn/asm/armv4-gf2m.pl @@ -1,4 +1,11 @@ -#!/usr/bin/env perl +#! /usr/bin/env perl +# Copyright 2011-2016 The OpenSSL Project Authors. All Rights Reserved. +# +# Licensed under the OpenSSL license (the "License"). You may not use +# this file except in compliance with the License. You can obtain a copy +# in the file LICENSE in the source distribution or at +# https://www.openssl.org/source/license.html + # # ==================================================================== # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL @@ -29,17 +36,34 @@ # # Câmara, D.; Gouvêa, C. P. L.; López, J. & Dahab, R.: Fast Software # Polynomial Multiplication on ARM Processors using the NEON Engine. -# +# # http://conradoplg.cryptoland.net/files/2010/12/mocrysen13.pdf -while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {} -open STDOUT,">$output"; +$flavour = shift; +if ($flavour=~/\w[\w\-]*\.\w+$/) { $output=$flavour; undef $flavour; } +else { while (($output=shift) && ($output!~/\w[\w\-]*\.\w+$/)) {} } + +if ($flavour && $flavour ne "void") { + $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; + ( $xlate="${dir}arm-xlate.pl" and -f $xlate ) or + ( $xlate="${dir}../../perlasm/arm-xlate.pl" and -f $xlate) or + die "can't locate arm-xlate.pl"; + + open STDOUT,"| \"$^X\" $xlate $flavour $output"; +} else { + open STDOUT,">$output"; +} $code=<<___; #include "arm_arch.h" .text +#if defined(__thumb2__) +.syntax unified +.thumb +#else .code 32 +#endif ___ ################ # private interface to mul_1x1_ialu @@ -120,11 +144,17 @@ mul_1x1_ialu: eor $hi,$hi,$t0,lsr#8 ldr $t0,[sp,$i0] @ tab[b >> 30 ] +#ifdef __thumb2__ + itt ne +#endif eorne $lo,$lo,$b,lsl#30 eorne $hi,$hi,$b,lsr#2 tst $a,#1<<31 eor $lo,$lo,$t1,lsl#27 eor $hi,$hi,$t1,lsr#5 +#ifdef __thumb2__ + itt ne +#endif eorne $lo,$lo,$b,lsl#31 eorne $hi,$hi,$b,lsr#1 eor $lo,$lo,$t0,lsl#30 @@ -144,20 +174,33 @@ $code.=<<___; .align 5 bn_GF2m_mul_2x2: #if __ARM_MAX_ARCH__>=7 + stmdb sp!,{r10,lr} ldr r12,.LOPENSSL_armcap -.Lpic: ldr r12,[pc,r12] - tst r12,#1 + adr r10,.LOPENSSL_armcap + ldr r12,[r12,r10] +#ifdef __APPLE__ + ldr r12,[r12] +#endif + tst r12,#ARMV7_NEON + itt ne + ldrne r10,[sp],#8 bne .LNEON + stmdb sp!,{r4-r9} +#else + stmdb sp!,{r4-r10,lr} #endif ___ $ret="r10"; # reassigned 1st argument $code.=<<___; - stmdb sp!,{r4-r10,lr} mov $ret,r0 @ reassign 1st argument mov $b,r3 @ $b=b1 + sub r7,sp,#36 + mov r8,sp + and r7,r7,#-32 ldr r3,[sp,#32] @ load b0 mov $mask,#7<<2 - sub sp,sp,#32 @ allocate tab[8] + mov sp,r7 @ allocate tab[8] + str r8,[r7,#32] bl mul_1x1_ialu @ a1·b1 str $lo,[$ret,#8] @@ -181,6 +224,7 @@ ___ $code.=<<___; ldmia $ret,{@r[0]-@r[3]} eor $lo,$lo,$hi + ldr sp,[sp,#32] @ destroy tab[8] eor $hi,$hi,@r[1] eor $lo,$lo,@r[0] eor $hi,$hi,@r[2] @@ -188,7 +232,6 @@ $code.=<<___; eor $hi,$hi,@r[3] str $hi,[$ret,#8] eor $lo,$lo,$hi - add sp,sp,#32 @ destroy tab[8] str $lo,[$ret,#4] #if __ARM_ARCH__>=5 @@ -213,8 +256,8 @@ $code.=<<___; .align 5 .LNEON: ldr r12, [sp] @ 5th argument - vmov.32 $a, r2, r1 - vmov.32 $b, r12, r3 + vmov $a, r2, r1 + vmov $b, r12, r3 vmov.i64 $k48, #0x0000ffffffffffff vmov.i64 $k32, #0x00000000ffffffff vmov.i64 $k16, #0x000000000000ffff @@ -267,7 +310,7 @@ $code.=<<___; #if __ARM_MAX_ARCH__>=7 .align 5 .LOPENSSL_armcap: -.word OPENSSL_armcap_P-(.Lpic+8) +.word OPENSSL_armcap_P-. #endif .asciz "GF(2^m) Multiplication for ARMv4/NEON, CRYPTOGAMS by <appro\@openssl.org>" .align 5 diff --git a/crypto/bn/asm/armv4-mont.pl b/crypto/bn/asm/armv4-mont.pl index 1d330e9f8aa3..6bedc62ba62d 100755 --- a/crypto/bn/asm/armv4-mont.pl +++ b/crypto/bn/asm/armv4-mont.pl @@ -1,4 +1,11 @@ -#!/usr/bin/env perl +#! /usr/bin/env perl +# Copyright 2007-2018 The OpenSSL Project Authors. All Rights Reserved. +# +# Licensed under the OpenSSL license (the "License"). You may not use +# this file except in compliance with the License. You can obtain a copy +# in the file LICENSE in the source distribution or at +# https://www.openssl.org/source/license.html + # ==================================================================== # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL @@ -16,7 +23,7 @@ # [depending on key length, less for longer keys] on ARM920T, and # +115-80% on Intel IXP425. This is compared to pre-bn_mul_mont code # base and compiler generated code with in-lined umull and even umlal -# instructions. The latter means that this code didn't really have an +# instructions. The latter means that this code didn't really have an # "advantage" of utilizing some "secret" instruction. # # The code is interoperable with Thumb ISA and is rather compact, less @@ -38,8 +45,29 @@ # for execution on all NEON-capable processors, because gain on # others outweighs the marginal loss on Cortex-A9. -while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {} -open STDOUT,">$output"; +# September 2015 +# +# Align Cortex-A9 performance with November 2013 improvements, i.e. +# NEON code is now ~20-105% faster than integer-only one on this +# processor. But this optimization further improved performance even +# on other processors: NEON code path is ~45-180% faster than original +# integer-only on Cortex-A8, ~10-210% on Cortex-A15, ~70-450% on +# Snapdragon S4. + +$flavour = shift; +if ($flavour=~/\w[\w\-]*\.\w+$/) { $output=$flavour; undef $flavour; } +else { while (($output=shift) && ($output!~/\w[\w\-]*\.\w+$/)) {} } + +if ($flavour && $flavour ne "void") { + $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; + ( $xlate="${dir}arm-xlate.pl" and -f $xlate ) or + ( $xlate="${dir}../../perlasm/arm-xlate.pl" and -f $xlate) or + die "can't locate arm-xlate.pl"; + + open STDOUT,"| \"$^X\" $xlate $flavour $output"; +} else { + open STDOUT,">$output"; +} $num="r0"; # starts as num argument, but holds &tp[num-1] $ap="r1"; @@ -70,12 +98,17 @@ $code=<<___; #include "arm_arch.h" .text +#if defined(__thumb2__) +.syntax unified +.thumb +#else .code 32 +#endif #if __ARM_MAX_ARCH__>=7 .align 5 .LOPENSSL_armcap: -.word OPENSSL_armcap_P-bn_mul_mont +.word OPENSSL_armcap_P-.Lbn_mul_mont #endif .global bn_mul_mont @@ -83,15 +116,19 @@ $code=<<___; .align 5 bn_mul_mont: +.Lbn_mul_mont: ldr ip,[sp,#4] @ load num stmdb sp!,{r0,r2} @ sp points at argument block #if __ARM_MAX_ARCH__>=7 tst ip,#7 bne .Lialu - adr r0,bn_mul_mont + adr r0,.Lbn_mul_mont ldr r2,.LOPENSSL_armcap ldr r0,[r0,r2] - tst r0,#1 @ NEON available? +#ifdef __APPLE__ + ldr r0,[r0] +#endif + tst r0,#ARMV7_NEON @ NEON available? ldmia sp, {r0,r2} beq .Lialu add sp,sp,#8 @@ -101,6 +138,9 @@ bn_mul_mont: #endif cmp ip,#2 mov $num,ip @ load num +#ifdef __thumb2__ + ittt lt +#endif movlt r0,#0 addlt sp,sp,#2*4 blt .Labrt @@ -148,10 +188,11 @@ bn_mul_mont: ldr $n0,[$_n0] @ restore n0 adc $nhi,$nhi,#0 str $nlo,[$num] @ tp[num-1]= + mov $tj,sp str $nhi,[$num,#4] @ tp[num]= .Louter: - sub $tj,$num,sp @ "original" $num-1 value + sub $tj,$num,$tj @ "original" $num-1 value sub $ap,$ap,$tj @ "rewind" ap to &ap[1] ldr $bi,[$tp,#4]! @ *(++bp) sub $np,$np,$tj @ "rewind" np to &np[1] @@ -196,11 +237,16 @@ bn_mul_mont: str $nhi,[$num,#4] @ tp[num]= cmp $tp,$tj +#ifdef __thumb2__ + itt ne +#endif + movne $tj,sp bne .Louter ldr $rp,[$_rp] @ pull rp + mov $aj,sp add $num,$num,#4 @ $num to point at &tp[num] - sub $aj,$num,sp @ "original" num value + sub $aj,$num,$aj @ "original" num value mov $tp,sp @ "rewind" $tp mov $ap,$tp @ "borrow" $ap sub $np,$np,$aj @ "rewind" $np to &np[0] @@ -216,17 +262,19 @@ bn_mul_mont: mov $tp,sp @ "rewind" $tp sub $rp,$rp,$aj @ "rewind" $rp - and $ap,$tp,$nhi - bic $np,$rp,$nhi - orr $ap,$ap,$np @ ap=borrow?tp:rp - -.Lcopy: ldr $tj,[$ap],#4 @ copy or in-place refresh +.Lcopy: ldr $tj,[$tp] @ conditional copy + ldr $aj,[$rp] str sp,[$tp],#4 @ zap tp - str $tj,[$rp],#4 - cmp $tp,$num +#ifdef __thumb2__ + it cc +#endif + movcc $aj,$tj + str $aj,[$rp],#4 + teq $tp,$num @ preserve carry bne .Lcopy - add sp,$num,#4 @ skip over tp[num+1] + mov sp,$num + add sp,sp,#4 @ skip over tp[num+1] ldmia sp!,{r4-r12,lr} @ restore registers add sp,sp,#2*4 @ skip over {r0,r2} mov r0,#1 @@ -241,19 +289,16 @@ bn_mul_mont: .size bn_mul_mont,.-bn_mul_mont ___ { -sub Dlo() { shift=~m|q([1]?[0-9])|?"d".($1*2):""; } -sub Dhi() { shift=~m|q([1]?[0-9])|?"d".($1*2+1):""; } - my ($A0,$A1,$A2,$A3)=map("d$_",(0..3)); my ($N0,$N1,$N2,$N3)=map("d$_",(4..7)); my ($Z,$Temp)=("q4","q5"); -my ($A0xB,$A1xB,$A2xB,$A3xB,$A4xB,$A5xB,$A6xB,$A7xB)=map("q$_",(6..13)); +my @ACC=map("q$_",(6..13)); my ($Bi,$Ni,$M0)=map("d$_",(28..31)); -my $zero=&Dlo($Z); -my $temp=&Dlo($Temp); +my $zero="$Z#lo"; +my $temp="$Temp#lo"; my ($rptr,$aptr,$bptr,$nptr,$n0,$num)=map("r$_",(0..5)); -my ($tinptr,$toutptr,$inner,$outer)=map("r$_",(6..9)); +my ($tinptr,$toutptr,$inner,$outer,$bnptr)=map("r$_",(6..11)); $code.=<<___; #if __ARM_MAX_ARCH__>=7 @@ -267,60 +312,60 @@ bn_mul8x_mont_neon: stmdb sp!,{r4-r11} vstmdb sp!,{d8-d15} @ ABI specification says so ldmia ip,{r4-r5} @ load rest of parameter block + mov ip,sp + + cmp $num,#8 + bhi .LNEON_8n + + @ special case for $num==8, everything is in register bank... - sub $toutptr,sp,#16 vld1.32 {${Bi}[0]}, [$bptr,:32]! - sub $toutptr,$toutptr,$num,lsl#4 + veor $zero,$zero,$zero + sub $toutptr,sp,$num,lsl#4 vld1.32 {$A0-$A3}, [$aptr]! @ can't specify :32 :-( and $toutptr,$toutptr,#-64 vld1.32 {${M0}[0]}, [$n0,:32] mov sp,$toutptr @ alloca - veor $zero,$zero,$zero - subs $inner,$num,#8 vzip.16 $Bi,$zero - vmull.u32 $A0xB,$Bi,${A0}[0] - vmull.u32 $A1xB,$Bi,${A0}[1] - vmull.u32 $A2xB,$Bi,${A1}[0] - vshl.i64 $temp,`&Dhi("$A0xB")`,#16 - vmull.u32 $A3xB,$Bi,${A1}[1] + vmull.u32 @ACC[0],$Bi,${A0}[0] + vmull.u32 @ACC[1],$Bi,${A0}[1] + vmull.u32 @ACC[2],$Bi,${A1}[0] + vshl.i64 $Ni,@ACC[0]#hi,#16 + vmull.u32 @ACC[3],$Bi,${A1}[1] - vadd.u64 $temp,$temp,`&Dlo("$A0xB")` + vadd.u64 $Ni,$Ni,@ACC[0]#lo veor $zero,$zero,$zero - vmul.u32 $Ni,$temp,$M0 + vmul.u32 $Ni,$Ni,$M0 - vmull.u32 $A4xB,$Bi,${A2}[0] + vmull.u32 @ACC[4],$Bi,${A2}[0] vld1.32 {$N0-$N3}, [$nptr]! - vmull.u32 $A5xB,$Bi,${A2}[1] - vmull.u32 $A6xB,$Bi,${A3}[0] + vmull.u32 @ACC[5],$Bi,${A2}[1] + vmull.u32 @ACC[6],$Bi,${A3}[0] vzip.16 $Ni,$zero - vmull.u32 $A7xB,$Bi,${A3}[1] - - bne .LNEON_1st - - @ special case for num=8, everything is in register bank... + vmull.u32 @ACC[7],$Bi,${A3}[1] - vmlal.u32 $A0xB,$Ni,${N0}[0] + vmlal.u32 @ACC[0],$Ni,${N0}[0] sub $outer,$num,#1 - vmlal.u32 $A1xB,$Ni,${N0}[1] - vmlal.u32 $A2xB,$Ni,${N1}[0] - vmlal.u32 $A3xB,$Ni,${N1}[1] - - vmlal.u32 $A4xB,$Ni,${N2}[0] - vmov $Temp,$A0xB - vmlal.u32 $A5xB,$Ni,${N2}[1] - vmov $A0xB,$A1xB - vmlal.u32 $A6xB,$Ni,${N3}[0] - vmov $A1xB,$A2xB - vmlal.u32 $A7xB,$Ni,${N3}[1] - vmov $A2xB,$A3xB - vmov $A3xB,$A4xB + vmlal.u32 @ACC[1],$Ni,${N0}[1] + vmlal.u32 @ACC[2],$Ni,${N1}[0] + vmlal.u32 @ACC[3],$Ni,${N1}[1] + + vmlal.u32 @ACC[4],$Ni,${N2}[0] + vmov $Temp,@ACC[0] + vmlal.u32 @ACC[5],$Ni,${N2}[1] + vmov @ACC[0],@ACC[1] + vmlal.u32 @ACC[6],$Ni,${N3}[0] + vmov @ACC[1],@ACC[2] + vmlal.u32 @ACC[7],$Ni,${N3}[1] + vmov @ACC[2],@ACC[3] + vmov @ACC[3],@ACC[4] vshr.u64 $temp,$temp,#16 - vmov $A4xB,$A5xB - vmov $A5xB,$A6xB - vadd.u64 $temp,$temp,`&Dhi("$Temp")` - vmov $A6xB,$A7xB - veor $A7xB,$A7xB + vmov @ACC[4],@ACC[5] + vmov @ACC[5],@ACC[6] + vadd.u64 $temp,$temp,$Temp#hi + vmov @ACC[6],@ACC[7] + veor @ACC[7],@ACC[7] vshr.u64 $temp,$temp,#16 b .LNEON_outer8 @@ -330,279 +375,302 @@ bn_mul8x_mont_neon: vld1.32 {${Bi}[0]}, [$bptr,:32]! veor $zero,$zero,$zero vzip.16 $Bi,$zero - vadd.u64 `&Dlo("$A0xB")`,`&Dlo("$A0xB")`,$temp + vadd.u64 @ACC[0]#lo,@ACC[0]#lo,$temp - vmlal.u32 $A0xB,$Bi,${A0}[0] - vmlal.u32 $A1xB,$Bi,${A0}[1] - vmlal.u32 $A2xB,$Bi,${A1}[0] - vshl.i64 $temp,`&Dhi("$A0xB")`,#16 - vmlal.u32 $A3xB,$Bi,${A1}[1] + vmlal.u32 @ACC[0],$Bi,${A0}[0] + vmlal.u32 @ACC[1],$Bi,${A0}[1] + vmlal.u32 @ACC[2],$Bi,${A1}[0] + vshl.i64 $Ni,@ACC[0]#hi,#16 + vmlal.u32 @ACC[3],$Bi,${A1}[1] - vadd.u64 $temp,$temp,`&Dlo("$A0xB")` + vadd.u64 $Ni,$Ni,@ACC[0]#lo veor $zero,$zero,$zero subs $outer,$outer,#1 - vmul.u32 $Ni,$temp,$M0 + vmul.u32 $Ni,$Ni,$M0 - vmlal.u32 $A4xB,$Bi,${A2}[0] - vmlal.u32 $A5xB,$Bi,${A2}[1] - vmlal.u32 $A6xB,$Bi,${A3}[0] + vmlal.u32 @ACC[4],$Bi,${A2}[0] + vmlal.u32 @ACC[5],$Bi,${A2}[1] + vmlal.u32 @ACC[6],$Bi,${A3}[0] vzip.16 $Ni,$zero - vmlal.u32 $A7xB,$Bi,${A3}[1] - - vmlal.u32 $A0xB,$Ni,${N0}[0] - vmlal.u32 $A1xB,$Ni,${N0}[1] - vmlal.u32 $A2xB,$Ni,${N1}[0] - vmlal.u32 $A3xB,$Ni,${N1}[1] - - vmlal.u32 $A4xB,$Ni,${N2}[0] - vmov $Temp,$A0xB - vmlal.u32 $A5xB,$Ni,${N2}[1] - vmov $A0xB,$A1xB - vmlal.u32 $A6xB,$Ni,${N3}[0] - vmov $A1xB,$A2xB - vmlal.u32 $A7xB,$Ni,${N3}[1] - vmov $A2xB,$A3xB - vmov $A3xB,$A4xB + vmlal.u32 @ACC[7],$Bi,${A3}[1] + + vmlal.u32 @ACC[0],$Ni,${N0}[0] + vmlal.u32 @ACC[1],$Ni,${N0}[1] + vmlal.u32 @ACC[2],$Ni,${N1}[0] + vmlal.u32 @ACC[3],$Ni,${N1}[1] + + vmlal.u32 @ACC[4],$Ni,${N2}[0] + vmov $Temp,@ACC[0] + vmlal.u32 @ACC[5],$Ni,${N2}[1] + vmov @ACC[0],@ACC[1] + vmlal.u32 @ACC[6],$Ni,${N3}[0] + vmov @ACC[1],@ACC[2] + vmlal.u32 @ACC[7],$Ni,${N3}[1] + vmov @ACC[2],@ACC[3] + vmov @ACC[3],@ACC[4] vshr.u64 $temp,$temp,#16 - vmov $A4xB,$A5xB - vmov $A5xB,$A6xB - vadd.u64 $temp,$temp,`&Dhi("$Temp")` - vmov $A6xB,$A7xB - veor $A7xB,$A7xB + vmov @ACC[4],@ACC[5] + vmov @ACC[5],@ACC[6] + vadd.u64 $temp,$temp,$Temp#hi + vmov @ACC[6],@ACC[7] + veor @ACC[7],@ACC[7] vshr.u64 $temp,$temp,#16 bne .LNEON_outer8 - vadd.u64 `&Dlo("$A0xB")`,`&Dlo("$A0xB")`,$temp + vadd.u64 @ACC[0]#lo,@ACC[0]#lo,$temp mov $toutptr,sp - vshr.u64 $temp,`&Dlo("$A0xB")`,#16 + vshr.u64 $temp,@ACC[0]#lo,#16 mov $inner,$num - vadd.u64 `&Dhi("$A0xB")`,`&Dhi("$A0xB")`,$temp - add $tinptr,sp,#16 - vshr.u64 $temp,`&Dhi("$A0xB")`,#16 - vzip.16 `&Dlo("$A0xB")`,`&Dhi("$A0xB")` + vadd.u64 @ACC[0]#hi,@ACC[0]#hi,$temp + add $tinptr,sp,#96 + vshr.u64 $temp,@ACC[0]#hi,#16 + vzip.16 @ACC[0]#lo,@ACC[0]#hi - b .LNEON_tail2 + b .LNEON_tail_entry .align 4 -.LNEON_1st: - vmlal.u32 $A0xB,$Ni,${N0}[0] - vld1.32 {$A0-$A3}, [$aptr]! - vmlal.u32 $A1xB,$Ni,${N0}[1] +.LNEON_8n: + veor @ACC[0],@ACC[0],@ACC[0] + sub $toutptr,sp,#128 + veor @ACC[1],@ACC[1],@ACC[1] + sub $toutptr,$toutptr,$num,lsl#4 + veor @ACC[2],@ACC[2],@ACC[2] + and $toutptr,$toutptr,#-64 + veor @ACC[3],@ACC[3],@ACC[3] + mov sp,$toutptr @ alloca + veor @ACC[4],@ACC[4],@ACC[4] + add $toutptr,$toutptr,#256 + veor @ACC[5],@ACC[5],@ACC[5] + sub $inner,$num,#8 + veor @ACC[6],@ACC[6],@ACC[6] + veor @ACC[7],@ACC[7],@ACC[7] + +.LNEON_8n_init: + vst1.64 {@ACC[0]-@ACC[1]},[$toutptr,:256]! subs $inner,$inner,#8 - vmlal.u32 $A2xB,$Ni,${N1}[0] - vmlal.u32 $A3xB,$Ni,${N1}[1] - - vmlal.u32 $A4xB,$Ni,${N2}[0] - vld1.32 {$N0-$N1}, [$nptr]! - vmlal.u32 $A5xB,$Ni,${N2}[1] - vst1.64 {$A0xB-$A1xB}, [$toutptr,:256]! - vmlal.u32 $A6xB,$Ni,${N3}[0] - vmlal.u32 $A7xB,$Ni,${N3}[1] - vst1.64 {$A2xB-$A3xB}, [$toutptr,:256]! - - vmull.u32 $A0xB,$Bi,${A0}[0] - vld1.32 {$N2-$N3}, [$nptr]! - vmull.u32 $A1xB,$Bi,${A0}[1] - vst1.64 {$A4xB-$A5xB}, [$toutptr,:256]! - vmull.u32 $A2xB,$Bi,${A1}[0] - vmull.u32 $A3xB,$Bi,${A1}[1] - vst1.64 {$A6xB-$A7xB}, [$toutptr,:256]! - - vmull.u32 $A4xB,$Bi,${A2}[0] - vmull.u32 $A5xB,$Bi,${A2}[1] - vmull.u32 $A6xB,$Bi,${A3}[0] - vmull.u32 $A7xB,$Bi,${A3}[1] - - bne .LNEON_1st - - vmlal.u32 $A0xB,$Ni,${N0}[0] - add $tinptr,sp,#16 - vmlal.u32 $A1xB,$Ni,${N0}[1] - sub $aptr,$aptr,$num,lsl#2 @ rewind $aptr - vmlal.u32 $A2xB,$Ni,${N1}[0] - vld1.64 {$Temp}, [sp,:128] - vmlal.u32 $A3xB,$Ni,${N1}[1] - sub $outer,$num,#1 - - vmlal.u32 $A4xB,$Ni,${N2}[0] - vst1.64 {$A0xB-$A1xB}, [$toutptr,:256]! - vmlal.u32 $A5xB,$Ni,${N2}[1] - vshr.u64 $temp,$temp,#16 - vld1.64 {$A0xB}, [$tinptr, :128]! - vmlal.u32 $A6xB,$Ni,${N3}[0] - vst1.64 {$A2xB-$A3xB}, [$toutptr,:256]! - vmlal.u32 $A7xB,$Ni,${N3}[1] - - vst1.64 {$A4xB-$A5xB}, [$toutptr,:256]! - vadd.u64 $temp,$temp,`&Dhi("$Temp")` - veor $Z,$Z,$Z - vst1.64 {$A6xB-$A7xB}, [$toutptr,:256]! - vld1.64 {$A1xB-$A2xB}, [$tinptr, :256]! - vst1.64 {$Z}, [$toutptr,:128] - vshr.u64 $temp,$temp,#16 - - b .LNEON_outer + vst1.64 {@ACC[2]-@ACC[3]},[$toutptr,:256]! + vst1.64 {@ACC[4]-@ACC[5]},[$toutptr,:256]! + vst1.64 {@ACC[6]-@ACC[7]},[$toutptr,:256]! + bne .LNEON_8n_init + + add $tinptr,sp,#256 + vld1.32 {$A0-$A3},[$aptr]! + add $bnptr,sp,#8 + vld1.32 {${M0}[0]},[$n0,:32] + mov $outer,$num + b .LNEON_8n_outer .align 4 -.LNEON_outer: - vld1.32 {${Bi}[0]}, [$bptr,:32]! - sub $nptr,$nptr,$num,lsl#2 @ rewind $nptr - vld1.32 {$A0-$A3}, [$aptr]! +.LNEON_8n_outer: + vld1.32 {${Bi}[0]},[$bptr,:32]! @ *b++ veor $zero,$zero,$zero - mov $toutptr,sp vzip.16 $Bi,$zero + add $toutptr,sp,#128 + vld1.32 {$N0-$N3},[$nptr]! + + vmlal.u32 @ACC[0],$Bi,${A0}[0] + vmlal.u32 @ACC[1],$Bi,${A0}[1] + veor $zero,$zero,$zero + vmlal.u32 @ACC[2],$Bi,${A1}[0] + vshl.i64 $Ni,@ACC[0]#hi,#16 + vmlal.u32 @ACC[3],$Bi,${A1}[1] + vadd.u64 $Ni,$Ni,@ACC[0]#lo + vmlal.u32 @ACC[4],$Bi,${A2}[0] + vmul.u32 $Ni,$Ni,$M0 + vmlal.u32 @ACC[5],$Bi,${A2}[1] + vst1.32 {$Bi},[sp,:64] @ put aside smashed b[8*i+0] + vmlal.u32 @ACC[6],$Bi,${A3}[0] + vzip.16 $Ni,$zero + vmlal.u32 @ACC[7],$Bi,${A3}[1] +___ +for ($i=0; $i<7;) { +$code.=<<___; + vld1.32 {${Bi}[0]},[$bptr,:32]! @ *b++ + vmlal.u32 @ACC[0],$Ni,${N0}[0] + veor $temp,$temp,$temp + vmlal.u32 @ACC[1],$Ni,${N0}[1] + vzip.16 $Bi,$temp + vmlal.u32 @ACC[2],$Ni,${N1}[0] + vshr.u64 @ACC[0]#lo,@ACC[0]#lo,#16 + vmlal.u32 @ACC[3],$Ni,${N1}[1] + vmlal.u32 @ACC[4],$Ni,${N2}[0] + vadd.u64 @ACC[0]#lo,@ACC[0]#lo,@ACC[0]#hi + vmlal.u32 @ACC[5],$Ni,${N2}[1] + vshr.u64 @ACC[0]#lo,@ACC[0]#lo,#16 + vmlal.u32 @ACC[6],$Ni,${N3}[0] + vmlal.u32 @ACC[7],$Ni,${N3}[1] + vadd.u64 @ACC[1]#lo,@ACC[1]#lo,@ACC[0]#lo + vst1.32 {$Ni},[$bnptr,:64]! @ put aside smashed m[8*i+$i] +___ + push(@ACC,shift(@ACC)); $i++; +$code.=<<___; + vmlal.u32 @ACC[0],$Bi,${A0}[0] + vld1.64 {@ACC[7]},[$tinptr,:128]! + vmlal.u32 @ACC[1],$Bi,${A0}[1] + veor $zero,$zero,$zero + vmlal.u32 @ACC[2],$Bi,${A1}[0] + vshl.i64 $Ni,@ACC[0]#hi,#16 + vmlal.u32 @ACC[3],$Bi,${A1}[1] + vadd.u64 $Ni,$Ni,@ACC[0]#lo + vmlal.u32 @ACC[4],$Bi,${A2}[0] + vmul.u32 $Ni,$Ni,$M0 + vmlal.u32 @ACC[5],$Bi,${A2}[1] + vst1.32 {$Bi},[$bnptr,:64]! @ put aside smashed b[8*i+$i] + vmlal.u32 @ACC[6],$Bi,${A3}[0] + vzip.16 $Ni,$zero + vmlal.u32 @ACC[7],$Bi,${A3}[1] +___ +} +$code.=<<___; + vld1.32 {$Bi},[sp,:64] @ pull smashed b[8*i+0] + vmlal.u32 @ACC[0],$Ni,${N0}[0] + vld1.32 {$A0-$A3},[$aptr]! + vmlal.u32 @ACC[1],$Ni,${N0}[1] + vmlal.u32 @ACC[2],$Ni,${N1}[0] + vshr.u64 @ACC[0]#lo,@ACC[0]#lo,#16 + vmlal.u32 @ACC[3],$Ni,${N1}[1] + vmlal.u32 @ACC[4],$Ni,${N2}[0] + vadd.u64 @ACC[0]#lo,@ACC[0]#lo,@ACC[0]#hi + vmlal.u32 @ACC[5],$Ni,${N2}[1] + vshr.u64 @ACC[0]#lo,@ACC[0]#lo,#16 + vmlal.u32 @ACC[6],$Ni,${N3}[0] + vmlal.u32 @ACC[7],$Ni,${N3}[1] + vadd.u64 @ACC[1]#lo,@ACC[1]#lo,@ACC[0]#lo + vst1.32 {$Ni},[$bnptr,:64] @ put aside smashed m[8*i+$i] + add $bnptr,sp,#8 @ rewind +___ + push(@ACC,shift(@ACC)); +$code.=<<___; sub $inner,$num,#8 - vadd.u64 `&Dlo("$A0xB")`,`&Dlo("$A0xB")`,$temp - - vmlal.u32 $A0xB,$Bi,${A0}[0] - vld1.64 {$A3xB-$A4xB},[$tinptr,:256]! - vmlal.u32 $A1xB,$Bi,${A0}[1] - vmlal.u32 $A2xB,$Bi,${A1}[0] - vld1.64 {$A5xB-$A6xB},[$tinptr,:256]! - vmlal.u32 $A3xB,$Bi,${A1}[1] - - vshl.i64 $temp,`&Dhi("$A0xB")`,#16 - veor $zero,$zero,$zero - vadd.u64 $temp,$temp,`&Dlo("$A0xB")` - vld1.64 {$A7xB},[$tinptr,:128]! - vmul.u32 $Ni,$temp,$M0 - - vmlal.u32 $A4xB,$Bi,${A2}[0] - vld1.32 {$N0-$N3}, [$nptr]! - vmlal.u32 $A5xB,$Bi,${A2}[1] - vmlal.u32 $A6xB,$Bi,${A3}[0] - vzip.16 $Ni,$zero - vmlal.u32 $A7xB,$Bi,${A3}[1] - -.LNEON_inner: - vmlal.u32 $A0xB,$Ni,${N0}[0] - vld1.32 {$A0-$A3}, [$aptr]! - vmlal.u32 $A1xB,$Ni,${N0}[1] - subs $inner,$inner,#8 - vmlal.u32 $A2xB,$Ni,${N1}[0] - vmlal.u32 $A3xB,$Ni,${N1}[1] - vst1.64 {$A0xB-$A1xB}, [$toutptr,:256]! - - vmlal.u32 $A4xB,$Ni,${N2}[0] - vld1.64 {$A0xB}, [$tinptr, :128]! - vmlal.u32 $A5xB,$Ni,${N2}[1] - vst1.64 {$A2xB-$A3xB}, [$toutptr,:256]! - vmlal.u32 $A6xB,$Ni,${N3}[0] - vld1.64 {$A1xB-$A2xB}, [$tinptr, :256]! - vmlal.u32 $A7xB,$Ni,${N3}[1] - vst1.64 {$A4xB-$A5xB}, [$toutptr,:256]! - - vmlal.u32 $A0xB,$Bi,${A0}[0] - vld1.64 {$A3xB-$A4xB}, [$tinptr, :256]! - vmlal.u32 $A1xB,$Bi,${A0}[1] - vst1.64 {$A6xB-$A7xB}, [$toutptr,:256]! - vmlal.u32 $A2xB,$Bi,${A1}[0] - vld1.64 {$A5xB-$A6xB}, [$tinptr, :256]! - vmlal.u32 $A3xB,$Bi,${A1}[1] - vld1.32 {$N0-$N3}, [$nptr]! - - vmlal.u32 $A4xB,$Bi,${A2}[0] - vld1.64 {$A7xB}, [$tinptr, :128]! - vmlal.u32 $A5xB,$Bi,${A2}[1] - vmlal.u32 $A6xB,$Bi,${A3}[0] - vmlal.u32 $A7xB,$Bi,${A3}[1] - - bne .LNEON_inner - - vmlal.u32 $A0xB,$Ni,${N0}[0] - add $tinptr,sp,#16 - vmlal.u32 $A1xB,$Ni,${N0}[1] - sub $aptr,$aptr,$num,lsl#2 @ rewind $aptr - vmlal.u32 $A2xB,$Ni,${N1}[0] - vld1.64 {$Temp}, [sp,:128] - vmlal.u32 $A3xB,$Ni,${N1}[1] - subs $outer,$outer,#1 + b .LNEON_8n_inner - vmlal.u32 $A4xB,$Ni,${N2}[0] - vst1.64 {$A0xB-$A1xB}, [$toutptr,:256]! - vmlal.u32 $A5xB,$Ni,${N2}[1] - vld1.64 {$A0xB}, [$tinptr, :128]! - vshr.u64 $temp,$temp,#16 - vst1.64 {$A2xB-$A3xB}, [$toutptr,:256]! - vmlal.u32 $A6xB,$Ni,${N3}[0] - vld1.64 {$A1xB-$A2xB}, [$tinptr, :256]! - vmlal.u32 $A7xB,$Ni,${N3}[1] - - vst1.64 {$A4xB-$A5xB}, [$toutptr,:256]! - vadd.u64 $temp,$temp,`&Dhi("$Temp")` - vst1.64 {$A6xB-$A7xB}, [$toutptr,:256]! - vshr.u64 $temp,$temp,#16 - - bne .LNEON_outer +.align 4 +.LNEON_8n_inner: + subs $inner,$inner,#8 + vmlal.u32 @ACC[0],$Bi,${A0}[0] + vld1.64 {@ACC[7]},[$tinptr,:128] + vmlal.u32 @ACC[1],$Bi,${A0}[1] + vld1.32 {$Ni},[$bnptr,:64]! @ pull smashed m[8*i+0] + vmlal.u32 @ACC[2],$Bi,${A1}[0] + vld1.32 {$N0-$N3},[$nptr]! + vmlal.u32 @ACC[3],$Bi,${A1}[1] + it ne + addne $tinptr,$tinptr,#16 @ don't advance in last iteration + vmlal.u32 @ACC[4],$Bi,${A2}[0] + vmlal.u32 @ACC[5],$Bi,${A2}[1] + vmlal.u32 @ACC[6],$Bi,${A3}[0] + vmlal.u32 @ACC[7],$Bi,${A3}[1] +___ +for ($i=1; $i<8; $i++) { +$code.=<<___; + vld1.32 {$Bi},[$bnptr,:64]! @ pull smashed b[8*i+$i] + vmlal.u32 @ACC[0],$Ni,${N0}[0] + vmlal.u32 @ACC[1],$Ni,${N0}[1] + vmlal.u32 @ACC[2],$Ni,${N1}[0] + vmlal.u32 @ACC[3],$Ni,${N1}[1] + vmlal.u32 @ACC[4],$Ni,${N2}[0] + vmlal.u32 @ACC[5],$Ni,${N2}[1] + vmlal.u32 @ACC[6],$Ni,${N3}[0] + vmlal.u32 @ACC[7],$Ni,${N3}[1] + vst1.64 {@ACC[0]},[$toutptr,:128]! +___ + push(@ACC,shift(@ACC)); +$code.=<<___; + vmlal.u32 @ACC[0],$Bi,${A0}[0] + vld1.64 {@ACC[7]},[$tinptr,:128] + vmlal.u32 @ACC[1],$Bi,${A0}[1] + vld1.32 {$Ni},[$bnptr,:64]! @ pull smashed m[8*i+$i] + vmlal.u32 @ACC[2],$Bi,${A1}[0] + it ne + addne $tinptr,$tinptr,#16 @ don't advance in last iteration + vmlal.u32 @ACC[3],$Bi,${A1}[1] + vmlal.u32 @ACC[4],$Bi,${A2}[0] + vmlal.u32 @ACC[5],$Bi,${A2}[1] + vmlal.u32 @ACC[6],$Bi,${A3}[0] + vmlal.u32 @ACC[7],$Bi,${A3}[1] +___ +} +$code.=<<___; + it eq + subeq $aptr,$aptr,$num,lsl#2 @ rewind + vmlal.u32 @ACC[0],$Ni,${N0}[0] + vld1.32 {$Bi},[sp,:64] @ pull smashed b[8*i+0] + vmlal.u32 @ACC[1],$Ni,${N0}[1] + vld1.32 {$A0-$A3},[$aptr]! + vmlal.u32 @ACC[2],$Ni,${N1}[0] + add $bnptr,sp,#8 @ rewind + vmlal.u32 @ACC[3],$Ni,${N1}[1] + vmlal.u32 @ACC[4],$Ni,${N2}[0] + vmlal.u32 @ACC[5],$Ni,${N2}[1] + vmlal.u32 @ACC[6],$Ni,${N3}[0] + vst1.64 {@ACC[0]},[$toutptr,:128]! + vmlal.u32 @ACC[7],$Ni,${N3}[1] + + bne .LNEON_8n_inner +___ + push(@ACC,shift(@ACC)); +$code.=<<___; + add $tinptr,sp,#128 + vst1.64 {@ACC[0]-@ACC[1]},[$toutptr,:256]! + veor q2,q2,q2 @ $N0-$N1 + vst1.64 {@ACC[2]-@ACC[3]},[$toutptr,:256]! + veor q3,q3,q3 @ $N2-$N3 + vst1.64 {@ACC[4]-@ACC[5]},[$toutptr,:256]! + vst1.64 {@ACC[6]},[$toutptr,:128] + + subs $outer,$outer,#8 + vld1.64 {@ACC[0]-@ACC[1]},[$tinptr,:256]! + vld1.64 {@ACC[2]-@ACC[3]},[$tinptr,:256]! + vld1.64 {@ACC[4]-@ACC[5]},[$tinptr,:256]! + vld1.64 {@ACC[6]-@ACC[7]},[$tinptr,:256]! + + itt ne + subne $nptr,$nptr,$num,lsl#2 @ rewind + bne .LNEON_8n_outer + + add $toutptr,sp,#128 + vst1.64 {q2-q3}, [sp,:256]! @ start wiping stack frame + vshr.u64 $temp,@ACC[0]#lo,#16 + vst1.64 {q2-q3},[sp,:256]! + vadd.u64 @ACC[0]#hi,@ACC[0]#hi,$temp + vst1.64 {q2-q3}, [sp,:256]! + vshr.u64 $temp,@ACC[0]#hi,#16 + vst1.64 {q2-q3}, [sp,:256]! + vzip.16 @ACC[0]#lo,@ACC[0]#hi - mov $toutptr,sp mov $inner,$num + b .LNEON_tail_entry +.align 4 .LNEON_tail: - vadd.u64 `&Dlo("$A0xB")`,`&Dlo("$A0xB")`,$temp - vld1.64 {$A3xB-$A4xB}, [$tinptr, :256]! - vshr.u64 $temp,`&Dlo("$A0xB")`,#16 - vadd.u64 `&Dhi("$A0xB")`,`&Dhi("$A0xB")`,$temp - vld1.64 {$A5xB-$A6xB}, [$tinptr, :256]! - vshr.u64 $temp,`&Dhi("$A0xB")`,#16 - vld1.64 {$A7xB}, [$tinptr, :128]! - vzip.16 `&Dlo("$A0xB")`,`&Dhi("$A0xB")` - -.LNEON_tail2: - vadd.u64 `&Dlo("$A1xB")`,`&Dlo("$A1xB")`,$temp - vst1.32 {`&Dlo("$A0xB")`[0]}, [$toutptr, :32]! - vshr.u64 $temp,`&Dlo("$A1xB")`,#16 - vadd.u64 `&Dhi("$A1xB")`,`&Dhi("$A1xB")`,$temp - vshr.u64 $temp,`&Dhi("$A1xB")`,#16 - vzip.16 `&Dlo("$A1xB")`,`&Dhi("$A1xB")` - - vadd.u64 `&Dlo("$A2xB")`,`&Dlo("$A2xB")`,$temp - vst1.32 {`&Dlo("$A1xB")`[0]}, [$toutptr, :32]! - vshr.u64 $temp,`&Dlo("$A2xB")`,#16 - vadd.u64 `&Dhi("$A2xB")`,`&Dhi("$A2xB")`,$temp - vshr.u64 $temp,`&Dhi("$A2xB")`,#16 - vzip.16 `&Dlo("$A2xB")`,`&Dhi("$A2xB")` - - vadd.u64 `&Dlo("$A3xB")`,`&Dlo("$A3xB")`,$temp - vst1.32 {`&Dlo("$A2xB")`[0]}, [$toutptr, :32]! - vshr.u64 $temp,`&Dlo("$A3xB")`,#16 - vadd.u64 `&Dhi("$A3xB")`,`&Dhi("$A3xB")`,$temp - vshr.u64 $temp,`&Dhi("$A3xB")`,#16 - vzip.16 `&Dlo("$A3xB")`,`&Dhi("$A3xB")` - - vadd.u64 `&Dlo("$A4xB")`,`&Dlo("$A4xB")`,$temp - vst1.32 {`&Dlo("$A3xB")`[0]}, [$toutptr, :32]! - vshr.u64 $temp,`&Dlo("$A4xB")`,#16 - vadd.u64 `&Dhi("$A4xB")`,`&Dhi("$A4xB")`,$temp - vshr.u64 $temp,`&Dhi("$A4xB")`,#16 - vzip.16 `&Dlo("$A4xB")`,`&Dhi("$A4xB")` - - vadd.u64 `&Dlo("$A5xB")`,`&Dlo("$A5xB")`,$temp - vst1.32 {`&Dlo("$A4xB")`[0]}, [$toutptr, :32]! - vshr.u64 $temp,`&Dlo("$A5xB")`,#16 - vadd.u64 `&Dhi("$A5xB")`,`&Dhi("$A5xB")`,$temp - vshr.u64 $temp,`&Dhi("$A5xB")`,#16 - vzip.16 `&Dlo("$A5xB")`,`&Dhi("$A5xB")` - - vadd.u64 `&Dlo("$A6xB")`,`&Dlo("$A6xB")`,$temp - vst1.32 {`&Dlo("$A5xB")`[0]}, [$toutptr, :32]! - vshr.u64 $temp,`&Dlo("$A6xB")`,#16 - vadd.u64 `&Dhi("$A6xB")`,`&Dhi("$A6xB")`,$temp - vld1.64 {$A0xB}, [$tinptr, :128]! - vshr.u64 $temp,`&Dhi("$A6xB")`,#16 - vzip.16 `&Dlo("$A6xB")`,`&Dhi("$A6xB")` - - vadd.u64 `&Dlo("$A7xB")`,`&Dlo("$A7xB")`,$temp - vst1.32 {`&Dlo("$A6xB")`[0]}, [$toutptr, :32]! - vshr.u64 $temp,`&Dlo("$A7xB")`,#16 - vadd.u64 `&Dhi("$A7xB")`,`&Dhi("$A7xB")`,$temp - vld1.64 {$A1xB-$A2xB}, [$tinptr, :256]! - vshr.u64 $temp,`&Dhi("$A7xB")`,#16 - vzip.16 `&Dlo("$A7xB")`,`&Dhi("$A7xB")` + vadd.u64 @ACC[0]#lo,@ACC[0]#lo,$temp + vshr.u64 $temp,@ACC[0]#lo,#16 + vld1.64 {@ACC[2]-@ACC[3]}, [$tinptr, :256]! + vadd.u64 @ACC[0]#hi,@ACC[0]#hi,$temp + vld1.64 {@ACC[4]-@ACC[5]}, [$tinptr, :256]! + vshr.u64 $temp,@ACC[0]#hi,#16 + vld1.64 {@ACC[6]-@ACC[7]}, [$tinptr, :256]! + vzip.16 @ACC[0]#lo,@ACC[0]#hi + +.LNEON_tail_entry: +___ +for ($i=1; $i<8; $i++) { +$code.=<<___; + vadd.u64 @ACC[1]#lo,@ACC[1]#lo,$temp + vst1.32 {@ACC[0]#lo[0]}, [$toutptr, :32]! + vshr.u64 $temp,@ACC[1]#lo,#16 + vadd.u64 @ACC[1]#hi,@ACC[1]#hi,$temp + vshr.u64 $temp,@ACC[1]#hi,#16 + vzip.16 @ACC[1]#lo,@ACC[1]#hi +___ + push(@ACC,shift(@ACC)); +} + push(@ACC,shift(@ACC)); +$code.=<<___; + vld1.64 {@ACC[0]-@ACC[1]}, [$tinptr, :256]! subs $inner,$inner,#8 - vst1.32 {`&Dlo("$A7xB")`[0]}, [$toutptr, :32]! - + vst1.32 {@ACC[7]#lo[0]}, [$toutptr, :32]! bne .LNEON_tail vst1.32 {${temp}[0]}, [$toutptr, :32] @ top-most bit @@ -622,8 +690,9 @@ bn_mul8x_mont_neon: bne .LNEON_sub ldr r10, [$aptr] @ load top-most bit + mov r11,sp veor q0,q0,q0 - sub r11,$bptr,sp @ this is num*4 + sub r11,$bptr,r11 @ this is num*4 veor q1,q1,q1 mov $aptr,sp sub $rptr,$rptr,r11 @ rewind $rptr @@ -633,27 +702,33 @@ bn_mul8x_mont_neon: .LNEON_copy_n_zap: ldmia $aptr!, {r4-r7} ldmia $rptr, {r8-r11} + it cc movcc r8, r4 vst1.64 {q0-q1}, [$nptr,:256]! @ wipe + itt cc movcc r9, r5 movcc r10,r6 vst1.64 {q0-q1}, [$nptr,:256]! @ wipe + it cc movcc r11,r7 ldmia $aptr, {r4-r7} stmia $rptr!, {r8-r11} sub $aptr,$aptr,#16 ldmia $rptr, {r8-r11} + it cc movcc r8, r4 vst1.64 {q0-q1}, [$aptr,:256]! @ wipe + itt cc movcc r9, r5 movcc r10,r6 vst1.64 {q0-q1}, [$nptr,:256]! @ wipe + it cc movcc r11,r7 teq $aptr,$bptr @ preserves carry stmia $rptr!, {r8-r11} bne .LNEON_copy_n_zap - sub sp,ip,#96 + mov sp,ip vldmia sp!,{d8-d15} ldmia sp!,{r4-r11} ret @ bx lr @@ -669,8 +744,14 @@ $code.=<<___; #endif ___ -$code =~ s/\`([^\`]*)\`/eval $1/gem; -$code =~ s/\bbx\s+lr\b/.word\t0xe12fff1e/gm; # make it possible to compile with -march=armv4 -$code =~ s/\bret\b/bx lr/gm; -print $code; +foreach (split("\n",$code)) { + s/\`([^\`]*)\`/eval $1/ge; + + s/\bq([0-9]+)#(lo|hi)/sprintf "d%d",2*$1+($2 eq "hi")/ge or + s/\bret\b/bx lr/g or + s/\bbx\s+lr\b/.word\t0xe12fff1e/g; # make it possible to compile with -march=armv4 + + print $_,"\n"; +} + close STDOUT; diff --git a/crypto/bn/asm/armv8-mont.pl b/crypto/bn/asm/armv8-mont.pl new file mode 100755 index 000000000000..5d5af1b6be25 --- /dev/null +++ b/crypto/bn/asm/armv8-mont.pl @@ -0,0 +1,1510 @@ +#! /usr/bin/env perl +# Copyright 2015-2016 The OpenSSL Project Authors. All Rights Reserved. +# +# Licensed under the OpenSSL license (the "License"). You may not use +# this file except in compliance with the License. You can obtain a copy +# in the file LICENSE in the source distribution or at +# https://www.openssl.org/source/license.html + + +# ==================================================================== +# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL +# project. The module is, however, dual licensed under OpenSSL and +# CRYPTOGAMS licenses depending on where you obtain it. For further +# details see http://www.openssl.org/~appro/cryptogams/. +# ==================================================================== + +# March 2015 +# +# "Teaser" Montgomery multiplication module for ARMv8. Needs more +# work. While it does improve RSA sign performance by 20-30% (less for +# longer keys) on most processors, for some reason RSA2048 is not +# faster and RSA4096 goes 15-20% slower on Cortex-A57. Multiplication +# instruction issue rate is limited on processor in question, meaning +# that dedicated squaring procedure is a must. Well, actually all +# contemporary AArch64 processors seem to have limited multiplication +# issue rate, i.e. they can't issue multiplication every cycle, which +# explains moderate improvement coefficients in comparison to +# compiler-generated code. Recall that compiler is instructed to use +# umulh and therefore uses same amount of multiplication instructions +# to do the job. Assembly's edge is to minimize number of "collateral" +# instructions and of course instruction scheduling. +# +# April 2015 +# +# Squaring procedure that handles lengths divisible by 8 improves +# RSA/DSA performance by 25-40-60% depending on processor and key +# length. Overall improvement coefficients are always positive in +# comparison to compiler-generated code. On Cortex-A57 improvement +# is still modest on longest key lengths, while others exhibit e.g. +# 50-70% improvement for RSA4096 sign. RSA2048 sign is ~25% faster +# on Cortex-A57 and ~60-100% faster on others. + +$flavour = shift; +$output = shift; + +$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; +( $xlate="${dir}arm-xlate.pl" and -f $xlate ) or +( $xlate="${dir}../../perlasm/arm-xlate.pl" and -f $xlate) or +die "can't locate arm-xlate.pl"; + +open OUT,"| \"$^X\" $xlate $flavour $output"; +*STDOUT=*OUT; + +($lo0,$hi0,$aj,$m0,$alo,$ahi, + $lo1,$hi1,$nj,$m1,$nlo,$nhi, + $ovf, $i,$j,$tp,$tj) = map("x$_",6..17,19..24); + +# int bn_mul_mont( +$rp="x0"; # BN_ULONG *rp, +$ap="x1"; # const BN_ULONG *ap, +$bp="x2"; # const BN_ULONG *bp, +$np="x3"; # const BN_ULONG *np, +$n0="x4"; # const BN_ULONG *n0, +$num="x5"; # int num); + +$code.=<<___; +.text + +.globl bn_mul_mont +.type bn_mul_mont,%function +.align 5 +bn_mul_mont: + tst $num,#7 + b.eq __bn_sqr8x_mont + tst $num,#3 + b.eq __bn_mul4x_mont +.Lmul_mont: + stp x29,x30,[sp,#-64]! + add x29,sp,#0 + stp x19,x20,[sp,#16] + stp x21,x22,[sp,#32] + stp x23,x24,[sp,#48] + + ldr $m0,[$bp],#8 // bp[0] + sub $tp,sp,$num,lsl#3 + ldp $hi0,$aj,[$ap],#16 // ap[0..1] + lsl $num,$num,#3 + ldr $n0,[$n0] // *n0 + and $tp,$tp,#-16 // ABI says so + ldp $hi1,$nj,[$np],#16 // np[0..1] + + mul $lo0,$hi0,$m0 // ap[0]*bp[0] + sub $j,$num,#16 // j=num-2 + umulh $hi0,$hi0,$m0 + mul $alo,$aj,$m0 // ap[1]*bp[0] + umulh $ahi,$aj,$m0 + + mul $m1,$lo0,$n0 // "tp[0]"*n0 + mov sp,$tp // alloca + + // (*) mul $lo1,$hi1,$m1 // np[0]*m1 + umulh $hi1,$hi1,$m1 + mul $nlo,$nj,$m1 // np[1]*m1 + // (*) adds $lo1,$lo1,$lo0 // discarded + // (*) As for removal of first multiplication and addition + // instructions. The outcome of first addition is + // guaranteed to be zero, which leaves two computationally + // significant outcomes: it either carries or not. Then + // question is when does it carry? Is there alternative + // way to deduce it? If you follow operations, you can + // observe that condition for carry is quite simple: + // $lo0 being non-zero. So that carry can be calculated + // by adding -1 to $lo0. That's what next instruction does. + subs xzr,$lo0,#1 // (*) + umulh $nhi,$nj,$m1 + adc $hi1,$hi1,xzr + cbz $j,.L1st_skip + +.L1st: + ldr $aj,[$ap],#8 + adds $lo0,$alo,$hi0 + sub $j,$j,#8 // j-- + adc $hi0,$ahi,xzr + + ldr $nj,[$np],#8 + adds $lo1,$nlo,$hi1 + mul $alo,$aj,$m0 // ap[j]*bp[0] + adc $hi1,$nhi,xzr + umulh $ahi,$aj,$m0 + + adds $lo1,$lo1,$lo0 + mul $nlo,$nj,$m1 // np[j]*m1 + adc $hi1,$hi1,xzr + umulh $nhi,$nj,$m1 + str $lo1,[$tp],#8 // tp[j-1] + cbnz $j,.L1st + +.L1st_skip: + adds $lo0,$alo,$hi0 + sub $ap,$ap,$num // rewind $ap + adc $hi0,$ahi,xzr + + adds $lo1,$nlo,$hi1 + sub $np,$np,$num // rewind $np + adc $hi1,$nhi,xzr + + adds $lo1,$lo1,$lo0 + sub $i,$num,#8 // i=num-1 + adcs $hi1,$hi1,$hi0 + + adc $ovf,xzr,xzr // upmost overflow bit + stp $lo1,$hi1,[$tp] + +.Louter: + ldr $m0,[$bp],#8 // bp[i] + ldp $hi0,$aj,[$ap],#16 + ldr $tj,[sp] // tp[0] + add $tp,sp,#8 + + mul $lo0,$hi0,$m0 // ap[0]*bp[i] + sub $j,$num,#16 // j=num-2 + umulh $hi0,$hi0,$m0 + ldp $hi1,$nj,[$np],#16 + mul $alo,$aj,$m0 // ap[1]*bp[i] + adds $lo0,$lo0,$tj + umulh $ahi,$aj,$m0 + adc $hi0,$hi0,xzr + + mul $m1,$lo0,$n0 + sub $i,$i,#8 // i-- + + // (*) mul $lo1,$hi1,$m1 // np[0]*m1 + umulh $hi1,$hi1,$m1 + mul $nlo,$nj,$m1 // np[1]*m1 + // (*) adds $lo1,$lo1,$lo0 + subs xzr,$lo0,#1 // (*) + umulh $nhi,$nj,$m1 + cbz $j,.Linner_skip + +.Linner: + ldr $aj,[$ap],#8 + adc $hi1,$hi1,xzr + ldr $tj,[$tp],#8 // tp[j] + adds $lo0,$alo,$hi0 + sub $j,$j,#8 // j-- + adc $hi0,$ahi,xzr + + adds $lo1,$nlo,$hi1 + ldr $nj,[$np],#8 + adc $hi1,$nhi,xzr + + mul $alo,$aj,$m0 // ap[j]*bp[i] + adds $lo0,$lo0,$tj + umulh $ahi,$aj,$m0 + adc $hi0,$hi0,xzr + + mul $nlo,$nj,$m1 // np[j]*m1 + adds $lo1,$lo1,$lo0 + umulh $nhi,$nj,$m1 + str $lo1,[$tp,#-16] // tp[j-1] + cbnz $j,.Linner + +.Linner_skip: + ldr $tj,[$tp],#8 // tp[j] + adc $hi1,$hi1,xzr + adds $lo0,$alo,$hi0 + sub $ap,$ap,$num // rewind $ap + adc $hi0,$ahi,xzr + + adds $lo1,$nlo,$hi1 + sub $np,$np,$num // rewind $np + adcs $hi1,$nhi,$ovf + adc $ovf,xzr,xzr + + adds $lo0,$lo0,$tj + adc $hi0,$hi0,xzr + + adds $lo1,$lo1,$lo0 + adcs $hi1,$hi1,$hi0 + adc $ovf,$ovf,xzr // upmost overflow bit + stp $lo1,$hi1,[$tp,#-16] + + cbnz $i,.Louter + + // Final step. We see if result is larger than modulus, and + // if it is, subtract the modulus. But comparison implies + // subtraction. So we subtract modulus, see if it borrowed, + // and conditionally copy original value. + ldr $tj,[sp] // tp[0] + add $tp,sp,#8 + ldr $nj,[$np],#8 // np[0] + subs $j,$num,#8 // j=num-1 and clear borrow + mov $ap,$rp +.Lsub: + sbcs $aj,$tj,$nj // tp[j]-np[j] + ldr $tj,[$tp],#8 + sub $j,$j,#8 // j-- + ldr $nj,[$np],#8 + str $aj,[$ap],#8 // rp[j]=tp[j]-np[j] + cbnz $j,.Lsub + + sbcs $aj,$tj,$nj + sbcs $ovf,$ovf,xzr // did it borrow? + str $aj,[$ap],#8 // rp[num-1] + + ldr $tj,[sp] // tp[0] + add $tp,sp,#8 + ldr $aj,[$rp],#8 // rp[0] + sub $num,$num,#8 // num-- + nop +.Lcond_copy: + sub $num,$num,#8 // num-- + csel $nj,$tj,$aj,lo // did it borrow? + ldr $tj,[$tp],#8 + ldr $aj,[$rp],#8 + str xzr,[$tp,#-16] // wipe tp + str $nj,[$rp,#-16] + cbnz $num,.Lcond_copy + + csel $nj,$tj,$aj,lo + str xzr,[$tp,#-8] // wipe tp + str $nj,[$rp,#-8] + + ldp x19,x20,[x29,#16] + mov sp,x29 + ldp x21,x22,[x29,#32] + mov x0,#1 + ldp x23,x24,[x29,#48] + ldr x29,[sp],#64 + ret +.size bn_mul_mont,.-bn_mul_mont +___ +{ +######################################################################## +# Following is ARMv8 adaptation of sqrx8x_mont from x86_64-mont5 module. + +my ($a0,$a1,$a2,$a3,$a4,$a5,$a6,$a7)=map("x$_",(6..13)); +my ($t0,$t1,$t2,$t3)=map("x$_",(14..17)); +my ($acc0,$acc1,$acc2,$acc3,$acc4,$acc5,$acc6,$acc7)=map("x$_",(19..26)); +my ($cnt,$carry,$topmost)=("x27","x28","x30"); +my ($tp,$ap_end,$na0)=($bp,$np,$carry); + +$code.=<<___; +.type __bn_sqr8x_mont,%function +.align 5 +__bn_sqr8x_mont: + cmp $ap,$bp + b.ne __bn_mul4x_mont +.Lsqr8x_mont: + stp x29,x30,[sp,#-128]! + add x29,sp,#0 + stp x19,x20,[sp,#16] + stp x21,x22,[sp,#32] + stp x23,x24,[sp,#48] + stp x25,x26,[sp,#64] + stp x27,x28,[sp,#80] + stp $rp,$np,[sp,#96] // offload rp and np + + ldp $a0,$a1,[$ap,#8*0] + ldp $a2,$a3,[$ap,#8*2] + ldp $a4,$a5,[$ap,#8*4] + ldp $a6,$a7,[$ap,#8*6] + + sub $tp,sp,$num,lsl#4 + lsl $num,$num,#3 + ldr $n0,[$n0] // *n0 + mov sp,$tp // alloca + sub $cnt,$num,#8*8 + b .Lsqr8x_zero_start + +.Lsqr8x_zero: + sub $cnt,$cnt,#8*8 + stp xzr,xzr,[$tp,#8*0] + stp xzr,xzr,[$tp,#8*2] + stp xzr,xzr,[$tp,#8*4] + stp xzr,xzr,[$tp,#8*6] +.Lsqr8x_zero_start: + stp xzr,xzr,[$tp,#8*8] + stp xzr,xzr,[$tp,#8*10] + stp xzr,xzr,[$tp,#8*12] + stp xzr,xzr,[$tp,#8*14] + add $tp,$tp,#8*16 + cbnz $cnt,.Lsqr8x_zero + + add $ap_end,$ap,$num + add $ap,$ap,#8*8 + mov $acc0,xzr + mov $acc1,xzr + mov $acc2,xzr + mov $acc3,xzr + mov $acc4,xzr + mov $acc5,xzr + mov $acc6,xzr + mov $acc7,xzr + mov $tp,sp + str $n0,[x29,#112] // offload n0 + + // Multiply everything but a[i]*a[i] +.align 4 +.Lsqr8x_outer_loop: + // a[1]a[0] (i) + // a[2]a[0] + // a[3]a[0] + // a[4]a[0] + // a[5]a[0] + // a[6]a[0] + // a[7]a[0] + // a[2]a[1] (ii) + // a[3]a[1] + // a[4]a[1] + // a[5]a[1] + // a[6]a[1] + // a[7]a[1] + // a[3]a[2] (iii) + // a[4]a[2] + // a[5]a[2] + // a[6]a[2] + // a[7]a[2] + // a[4]a[3] (iv) + // a[5]a[3] + // a[6]a[3] + // a[7]a[3] + // a[5]a[4] (v) + // a[6]a[4] + // a[7]a[4] + // a[6]a[5] (vi) + // a[7]a[5] + // a[7]a[6] (vii) + + mul $t0,$a1,$a0 // lo(a[1..7]*a[0]) (i) + mul $t1,$a2,$a0 + mul $t2,$a3,$a0 + mul $t3,$a4,$a0 + adds $acc1,$acc1,$t0 // t[1]+lo(a[1]*a[0]) + mul $t0,$a5,$a0 + adcs $acc2,$acc2,$t1 + mul $t1,$a6,$a0 + adcs $acc3,$acc3,$t2 + mul $t2,$a7,$a0 + adcs $acc4,$acc4,$t3 + umulh $t3,$a1,$a0 // hi(a[1..7]*a[0]) + adcs $acc5,$acc5,$t0 + umulh $t0,$a2,$a0 + adcs $acc6,$acc6,$t1 + umulh $t1,$a3,$a0 + adcs $acc7,$acc7,$t2 + umulh $t2,$a4,$a0 + stp $acc0,$acc1,[$tp],#8*2 // t[0..1] + adc $acc0,xzr,xzr // t[8] + adds $acc2,$acc2,$t3 // t[2]+lo(a[1]*a[0]) + umulh $t3,$a5,$a0 + adcs $acc3,$acc3,$t0 + umulh $t0,$a6,$a0 + adcs $acc4,$acc4,$t1 + umulh $t1,$a7,$a0 + adcs $acc5,$acc5,$t2 + mul $t2,$a2,$a1 // lo(a[2..7]*a[1]) (ii) + adcs $acc6,$acc6,$t3 + mul $t3,$a3,$a1 + adcs $acc7,$acc7,$t0 + mul $t0,$a4,$a1 + adc $acc0,$acc0,$t1 + + mul $t1,$a5,$a1 + adds $acc3,$acc3,$t2 + mul $t2,$a6,$a1 + adcs $acc4,$acc4,$t3 + mul $t3,$a7,$a1 + adcs $acc5,$acc5,$t0 + umulh $t0,$a2,$a1 // hi(a[2..7]*a[1]) + adcs $acc6,$acc6,$t1 + umulh $t1,$a3,$a1 + adcs $acc7,$acc7,$t2 + umulh $t2,$a4,$a1 + adcs $acc0,$acc0,$t3 + umulh $t3,$a5,$a1 + stp $acc2,$acc3,[$tp],#8*2 // t[2..3] + adc $acc1,xzr,xzr // t[9] + adds $acc4,$acc4,$t0 + umulh $t0,$a6,$a1 + adcs $acc5,$acc5,$t1 + umulh $t1,$a7,$a1 + adcs $acc6,$acc6,$t2 + mul $t2,$a3,$a2 // lo(a[3..7]*a[2]) (iii) + adcs $acc7,$acc7,$t3 + mul $t3,$a4,$a2 + adcs $acc0,$acc0,$t0 + mul $t0,$a5,$a2 + adc $acc1,$acc1,$t1 + + mul $t1,$a6,$a2 + adds $acc5,$acc5,$t2 + mul $t2,$a7,$a2 + adcs $acc6,$acc6,$t3 + umulh $t3,$a3,$a2 // hi(a[3..7]*a[2]) + adcs $acc7,$acc7,$t0 + umulh $t0,$a4,$a2 + adcs $acc0,$acc0,$t1 + umulh $t1,$a5,$a2 + adcs $acc1,$acc1,$t2 + umulh $t2,$a6,$a2 + stp $acc4,$acc5,[$tp],#8*2 // t[4..5] + adc $acc2,xzr,xzr // t[10] + adds $acc6,$acc6,$t3 + umulh $t3,$a7,$a2 + adcs $acc7,$acc7,$t0 + mul $t0,$a4,$a3 // lo(a[4..7]*a[3]) (iv) + adcs $acc0,$acc0,$t1 + mul $t1,$a5,$a3 + adcs $acc1,$acc1,$t2 + mul $t2,$a6,$a3 + adc $acc2,$acc2,$t3 + + mul $t3,$a7,$a3 + adds $acc7,$acc7,$t0 + umulh $t0,$a4,$a3 // hi(a[4..7]*a[3]) + adcs $acc0,$acc0,$t1 + umulh $t1,$a5,$a3 + adcs $acc1,$acc1,$t2 + umulh $t2,$a6,$a3 + adcs $acc2,$acc2,$t3 + umulh $t3,$a7,$a3 + stp $acc6,$acc7,[$tp],#8*2 // t[6..7] + adc $acc3,xzr,xzr // t[11] + adds $acc0,$acc0,$t0 + mul $t0,$a5,$a4 // lo(a[5..7]*a[4]) (v) + adcs $acc1,$acc1,$t1 + mul $t1,$a6,$a4 + adcs $acc2,$acc2,$t2 + mul $t2,$a7,$a4 + adc $acc3,$acc3,$t3 + + umulh $t3,$a5,$a4 // hi(a[5..7]*a[4]) + adds $acc1,$acc1,$t0 + umulh $t0,$a6,$a4 + adcs $acc2,$acc2,$t1 + umulh $t1,$a7,$a4 + adcs $acc3,$acc3,$t2 + mul $t2,$a6,$a5 // lo(a[6..7]*a[5]) (vi) + adc $acc4,xzr,xzr // t[12] + adds $acc2,$acc2,$t3 + mul $t3,$a7,$a5 + adcs $acc3,$acc3,$t0 + umulh $t0,$a6,$a5 // hi(a[6..7]*a[5]) + adc $acc4,$acc4,$t1 + + umulh $t1,$a7,$a5 + adds $acc3,$acc3,$t2 + mul $t2,$a7,$a6 // lo(a[7]*a[6]) (vii) + adcs $acc4,$acc4,$t3 + umulh $t3,$a7,$a6 // hi(a[7]*a[6]) + adc $acc5,xzr,xzr // t[13] + adds $acc4,$acc4,$t0 + sub $cnt,$ap_end,$ap // done yet? + adc $acc5,$acc5,$t1 + + adds $acc5,$acc5,$t2 + sub $t0,$ap_end,$num // rewinded ap + adc $acc6,xzr,xzr // t[14] + add $acc6,$acc6,$t3 + + cbz $cnt,.Lsqr8x_outer_break + + mov $n0,$a0 + ldp $a0,$a1,[$tp,#8*0] + ldp $a2,$a3,[$tp,#8*2] + ldp $a4,$a5,[$tp,#8*4] + ldp $a6,$a7,[$tp,#8*6] + adds $acc0,$acc0,$a0 + adcs $acc1,$acc1,$a1 + ldp $a0,$a1,[$ap,#8*0] + adcs $acc2,$acc2,$a2 + adcs $acc3,$acc3,$a3 + ldp $a2,$a3,[$ap,#8*2] + adcs $acc4,$acc4,$a4 + adcs $acc5,$acc5,$a5 + ldp $a4,$a5,[$ap,#8*4] + adcs $acc6,$acc6,$a6 + mov $rp,$ap + adcs $acc7,xzr,$a7 + ldp $a6,$a7,[$ap,#8*6] + add $ap,$ap,#8*8 + //adc $carry,xzr,xzr // moved below + mov $cnt,#-8*8 + + // a[8]a[0] + // a[9]a[0] + // a[a]a[0] + // a[b]a[0] + // a[c]a[0] + // a[d]a[0] + // a[e]a[0] + // a[f]a[0] + // a[8]a[1] + // a[f]a[1]........................ + // a[8]a[2] + // a[f]a[2]........................ + // a[8]a[3] + // a[f]a[3]........................ + // a[8]a[4] + // a[f]a[4]........................ + // a[8]a[5] + // a[f]a[5]........................ + // a[8]a[6] + // a[f]a[6]........................ + // a[8]a[7] + // a[f]a[7]........................ +.Lsqr8x_mul: + mul $t0,$a0,$n0 + adc $carry,xzr,xzr // carry bit, modulo-scheduled + mul $t1,$a1,$n0 + add $cnt,$cnt,#8 + mul $t2,$a2,$n0 + mul $t3,$a3,$n0 + adds $acc0,$acc0,$t0 + mul $t0,$a4,$n0 + adcs $acc1,$acc1,$t1 + mul $t1,$a5,$n0 + adcs $acc2,$acc2,$t2 + mul $t2,$a6,$n0 + adcs $acc3,$acc3,$t3 + mul $t3,$a7,$n0 + adcs $acc4,$acc4,$t0 + umulh $t0,$a0,$n0 + adcs $acc5,$acc5,$t1 + umulh $t1,$a1,$n0 + adcs $acc6,$acc6,$t2 + umulh $t2,$a2,$n0 + adcs $acc7,$acc7,$t3 + umulh $t3,$a3,$n0 + adc $carry,$carry,xzr + str $acc0,[$tp],#8 + adds $acc0,$acc1,$t0 + umulh $t0,$a4,$n0 + adcs $acc1,$acc2,$t1 + umulh $t1,$a5,$n0 + adcs $acc2,$acc3,$t2 + umulh $t2,$a6,$n0 + adcs $acc3,$acc4,$t3 + umulh $t3,$a7,$n0 + ldr $n0,[$rp,$cnt] + adcs $acc4,$acc5,$t0 + adcs $acc5,$acc6,$t1 + adcs $acc6,$acc7,$t2 + adcs $acc7,$carry,$t3 + //adc $carry,xzr,xzr // moved above + cbnz $cnt,.Lsqr8x_mul + // note that carry flag is guaranteed + // to be zero at this point + cmp $ap,$ap_end // done yet? + b.eq .Lsqr8x_break + + ldp $a0,$a1,[$tp,#8*0] + ldp $a2,$a3,[$tp,#8*2] + ldp $a4,$a5,[$tp,#8*4] + ldp $a6,$a7,[$tp,#8*6] + adds $acc0,$acc0,$a0 + ldr $n0,[$rp,#-8*8] + adcs $acc1,$acc1,$a1 + ldp $a0,$a1,[$ap,#8*0] + adcs $acc2,$acc2,$a2 + adcs $acc3,$acc3,$a3 + ldp $a2,$a3,[$ap,#8*2] + adcs $acc4,$acc4,$a4 + adcs $acc5,$acc5,$a5 + ldp $a4,$a5,[$ap,#8*4] + adcs $acc6,$acc6,$a6 + mov $cnt,#-8*8 + adcs $acc7,$acc7,$a7 + ldp $a6,$a7,[$ap,#8*6] + add $ap,$ap,#8*8 + //adc $carry,xzr,xzr // moved above + b .Lsqr8x_mul + +.align 4 +.Lsqr8x_break: + ldp $a0,$a1,[$rp,#8*0] + add $ap,$rp,#8*8 + ldp $a2,$a3,[$rp,#8*2] + sub $t0,$ap_end,$ap // is it last iteration? + ldp $a4,$a5,[$rp,#8*4] + sub $t1,$tp,$t0 + ldp $a6,$a7,[$rp,#8*6] + cbz $t0,.Lsqr8x_outer_loop + + stp $acc0,$acc1,[$tp,#8*0] + ldp $acc0,$acc1,[$t1,#8*0] + stp $acc2,$acc3,[$tp,#8*2] + ldp $acc2,$acc3,[$t1,#8*2] + stp $acc4,$acc5,[$tp,#8*4] + ldp $acc4,$acc5,[$t1,#8*4] + stp $acc6,$acc7,[$tp,#8*6] + mov $tp,$t1 + ldp $acc6,$acc7,[$t1,#8*6] + b .Lsqr8x_outer_loop + +.align 4 +.Lsqr8x_outer_break: + // Now multiply above result by 2 and add a[n-1]*a[n-1]|...|a[0]*a[0] + ldp $a1,$a3,[$t0,#8*0] // recall that $t0 is &a[0] + ldp $t1,$t2,[sp,#8*1] + ldp $a5,$a7,[$t0,#8*2] + add $ap,$t0,#8*4 + ldp $t3,$t0,[sp,#8*3] + + stp $acc0,$acc1,[$tp,#8*0] + mul $acc0,$a1,$a1 + stp $acc2,$acc3,[$tp,#8*2] + umulh $a1,$a1,$a1 + stp $acc4,$acc5,[$tp,#8*4] + mul $a2,$a3,$a3 + stp $acc6,$acc7,[$tp,#8*6] + mov $tp,sp + umulh $a3,$a3,$a3 + adds $acc1,$a1,$t1,lsl#1 + extr $t1,$t2,$t1,#63 + sub $cnt,$num,#8*4 + +.Lsqr4x_shift_n_add: + adcs $acc2,$a2,$t1 + extr $t2,$t3,$t2,#63 + sub $cnt,$cnt,#8*4 + adcs $acc3,$a3,$t2 + ldp $t1,$t2,[$tp,#8*5] + mul $a4,$a5,$a5 + ldp $a1,$a3,[$ap],#8*2 + umulh $a5,$a5,$a5 + mul $a6,$a7,$a7 + umulh $a7,$a7,$a7 + extr $t3,$t0,$t3,#63 + stp $acc0,$acc1,[$tp,#8*0] + adcs $acc4,$a4,$t3 + extr $t0,$t1,$t0,#63 + stp $acc2,$acc3,[$tp,#8*2] + adcs $acc5,$a5,$t0 + ldp $t3,$t0,[$tp,#8*7] + extr $t1,$t2,$t1,#63 + adcs $acc6,$a6,$t1 + extr $t2,$t3,$t2,#63 + adcs $acc7,$a7,$t2 + ldp $t1,$t2,[$tp,#8*9] + mul $a0,$a1,$a1 + ldp $a5,$a7,[$ap],#8*2 + umulh $a1,$a1,$a1 + mul $a2,$a3,$a3 + umulh $a3,$a3,$a3 + stp $acc4,$acc5,[$tp,#8*4] + extr $t3,$t0,$t3,#63 + stp $acc6,$acc7,[$tp,#8*6] + add $tp,$tp,#8*8 + adcs $acc0,$a0,$t3 + extr $t0,$t1,$t0,#63 + adcs $acc1,$a1,$t0 + ldp $t3,$t0,[$tp,#8*3] + extr $t1,$t2,$t1,#63 + cbnz $cnt,.Lsqr4x_shift_n_add +___ +my ($np,$np_end)=($ap,$ap_end); +$code.=<<___; + ldp $np,$n0,[x29,#104] // pull np and n0 + + adcs $acc2,$a2,$t1 + extr $t2,$t3,$t2,#63 + adcs $acc3,$a3,$t2 + ldp $t1,$t2,[$tp,#8*5] + mul $a4,$a5,$a5 + umulh $a5,$a5,$a5 + stp $acc0,$acc1,[$tp,#8*0] + mul $a6,$a7,$a7 + umulh $a7,$a7,$a7 + stp $acc2,$acc3,[$tp,#8*2] + extr $t3,$t0,$t3,#63 + adcs $acc4,$a4,$t3 + extr $t0,$t1,$t0,#63 + ldp $acc0,$acc1,[sp,#8*0] + adcs $acc5,$a5,$t0 + extr $t1,$t2,$t1,#63 + ldp $a0,$a1,[$np,#8*0] + adcs $acc6,$a6,$t1 + extr $t2,xzr,$t2,#63 + ldp $a2,$a3,[$np,#8*2] + adc $acc7,$a7,$t2 + ldp $a4,$a5,[$np,#8*4] + + // Reduce by 512 bits per iteration + mul $na0,$n0,$acc0 // t[0]*n0 + ldp $a6,$a7,[$np,#8*6] + add $np_end,$np,$num + ldp $acc2,$acc3,[sp,#8*2] + stp $acc4,$acc5,[$tp,#8*4] + ldp $acc4,$acc5,[sp,#8*4] + stp $acc6,$acc7,[$tp,#8*6] + ldp $acc6,$acc7,[sp,#8*6] + add $np,$np,#8*8 + mov $topmost,xzr // initial top-most carry + mov $tp,sp + mov $cnt,#8 + +.Lsqr8x_reduction: + // (*) mul $t0,$a0,$na0 // lo(n[0-7])*lo(t[0]*n0) + mul $t1,$a1,$na0 + sub $cnt,$cnt,#1 + mul $t2,$a2,$na0 + str $na0,[$tp],#8 // put aside t[0]*n0 for tail processing + mul $t3,$a3,$na0 + // (*) adds xzr,$acc0,$t0 + subs xzr,$acc0,#1 // (*) + mul $t0,$a4,$na0 + adcs $acc0,$acc1,$t1 + mul $t1,$a5,$na0 + adcs $acc1,$acc2,$t2 + mul $t2,$a6,$na0 + adcs $acc2,$acc3,$t3 + mul $t3,$a7,$na0 + adcs $acc3,$acc4,$t0 + umulh $t0,$a0,$na0 // hi(n[0-7])*lo(t[0]*n0) + adcs $acc4,$acc5,$t1 + umulh $t1,$a1,$na0 + adcs $acc5,$acc6,$t2 + umulh $t2,$a2,$na0 + adcs $acc6,$acc7,$t3 + umulh $t3,$a3,$na0 + adc $acc7,xzr,xzr + adds $acc0,$acc0,$t0 + umulh $t0,$a4,$na0 + adcs $acc1,$acc1,$t1 + umulh $t1,$a5,$na0 + adcs $acc2,$acc2,$t2 + umulh $t2,$a6,$na0 + adcs $acc3,$acc3,$t3 + umulh $t3,$a7,$na0 + mul $na0,$n0,$acc0 // next t[0]*n0 + adcs $acc4,$acc4,$t0 + adcs $acc5,$acc5,$t1 + adcs $acc6,$acc6,$t2 + adc $acc7,$acc7,$t3 + cbnz $cnt,.Lsqr8x_reduction + + ldp $t0,$t1,[$tp,#8*0] + ldp $t2,$t3,[$tp,#8*2] + mov $rp,$tp + sub $cnt,$np_end,$np // done yet? + adds $acc0,$acc0,$t0 + adcs $acc1,$acc1,$t1 + ldp $t0,$t1,[$tp,#8*4] + adcs $acc2,$acc2,$t2 + adcs $acc3,$acc3,$t3 + ldp $t2,$t3,[$tp,#8*6] + adcs $acc4,$acc4,$t0 + adcs $acc5,$acc5,$t1 + adcs $acc6,$acc6,$t2 + adcs $acc7,$acc7,$t3 + //adc $carry,xzr,xzr // moved below + cbz $cnt,.Lsqr8x8_post_condition + + ldr $n0,[$tp,#-8*8] + ldp $a0,$a1,[$np,#8*0] + ldp $a2,$a3,[$np,#8*2] + ldp $a4,$a5,[$np,#8*4] + mov $cnt,#-8*8 + ldp $a6,$a7,[$np,#8*6] + add $np,$np,#8*8 + +.Lsqr8x_tail: + mul $t0,$a0,$n0 + adc $carry,xzr,xzr // carry bit, modulo-scheduled + mul $t1,$a1,$n0 + add $cnt,$cnt,#8 + mul $t2,$a2,$n0 + mul $t3,$a3,$n0 + adds $acc0,$acc0,$t0 + mul $t0,$a4,$n0 + adcs $acc1,$acc1,$t1 + mul $t1,$a5,$n0 + adcs $acc2,$acc2,$t2 + mul $t2,$a6,$n0 + adcs $acc3,$acc3,$t3 + mul $t3,$a7,$n0 + adcs $acc4,$acc4,$t0 + umulh $t0,$a0,$n0 + adcs $acc5,$acc5,$t1 + umulh $t1,$a1,$n0 + adcs $acc6,$acc6,$t2 + umulh $t2,$a2,$n0 + adcs $acc7,$acc7,$t3 + umulh $t3,$a3,$n0 + adc $carry,$carry,xzr + str $acc0,[$tp],#8 + adds $acc0,$acc1,$t0 + umulh $t0,$a4,$n0 + adcs $acc1,$acc2,$t1 + umulh $t1,$a5,$n0 + adcs $acc2,$acc3,$t2 + umulh $t2,$a6,$n0 + adcs $acc3,$acc4,$t3 + umulh $t3,$a7,$n0 + ldr $n0,[$rp,$cnt] + adcs $acc4,$acc5,$t0 + adcs $acc5,$acc6,$t1 + adcs $acc6,$acc7,$t2 + adcs $acc7,$carry,$t3 + //adc $carry,xzr,xzr // moved above + cbnz $cnt,.Lsqr8x_tail + // note that carry flag is guaranteed + // to be zero at this point + ldp $a0,$a1,[$tp,#8*0] + sub $cnt,$np_end,$np // done yet? + sub $t2,$np_end,$num // rewinded np + ldp $a2,$a3,[$tp,#8*2] + ldp $a4,$a5,[$tp,#8*4] + ldp $a6,$a7,[$tp,#8*6] + cbz $cnt,.Lsqr8x_tail_break + + ldr $n0,[$rp,#-8*8] + adds $acc0,$acc0,$a0 + adcs $acc1,$acc1,$a1 + ldp $a0,$a1,[$np,#8*0] + adcs $acc2,$acc2,$a2 + adcs $acc3,$acc3,$a3 + ldp $a2,$a3,[$np,#8*2] + adcs $acc4,$acc4,$a4 + adcs $acc5,$acc5,$a5 + ldp $a4,$a5,[$np,#8*4] + adcs $acc6,$acc6,$a6 + mov $cnt,#-8*8 + adcs $acc7,$acc7,$a7 + ldp $a6,$a7,[$np,#8*6] + add $np,$np,#8*8 + //adc $carry,xzr,xzr // moved above + b .Lsqr8x_tail + +.align 4 +.Lsqr8x_tail_break: + ldr $n0,[x29,#112] // pull n0 + add $cnt,$tp,#8*8 // end of current t[num] window + + subs xzr,$topmost,#1 // "move" top-most carry to carry bit + adcs $t0,$acc0,$a0 + adcs $t1,$acc1,$a1 + ldp $acc0,$acc1,[$rp,#8*0] + adcs $acc2,$acc2,$a2 + ldp $a0,$a1,[$t2,#8*0] // recall that $t2 is &n[0] + adcs $acc3,$acc3,$a3 + ldp $a2,$a3,[$t2,#8*2] + adcs $acc4,$acc4,$a4 + adcs $acc5,$acc5,$a5 + ldp $a4,$a5,[$t2,#8*4] + adcs $acc6,$acc6,$a6 + adcs $acc7,$acc7,$a7 + ldp $a6,$a7,[$t2,#8*6] + add $np,$t2,#8*8 + adc $topmost,xzr,xzr // top-most carry + mul $na0,$n0,$acc0 + stp $t0,$t1,[$tp,#8*0] + stp $acc2,$acc3,[$tp,#8*2] + ldp $acc2,$acc3,[$rp,#8*2] + stp $acc4,$acc5,[$tp,#8*4] + ldp $acc4,$acc5,[$rp,#8*4] + cmp $cnt,x29 // did we hit the bottom? + stp $acc6,$acc7,[$tp,#8*6] + mov $tp,$rp // slide the window + ldp $acc6,$acc7,[$rp,#8*6] + mov $cnt,#8 + b.ne .Lsqr8x_reduction + + // Final step. We see if result is larger than modulus, and + // if it is, subtract the modulus. But comparison implies + // subtraction. So we subtract modulus, see if it borrowed, + // and conditionally copy original value. + ldr $rp,[x29,#96] // pull rp + add $tp,$tp,#8*8 + subs $t0,$acc0,$a0 + sbcs $t1,$acc1,$a1 + sub $cnt,$num,#8*8 + mov $ap_end,$rp // $rp copy + +.Lsqr8x_sub: + sbcs $t2,$acc2,$a2 + ldp $a0,$a1,[$np,#8*0] + sbcs $t3,$acc3,$a3 + stp $t0,$t1,[$rp,#8*0] + sbcs $t0,$acc4,$a4 + ldp $a2,$a3,[$np,#8*2] + sbcs $t1,$acc5,$a5 + stp $t2,$t3,[$rp,#8*2] + sbcs $t2,$acc6,$a6 + ldp $a4,$a5,[$np,#8*4] + sbcs $t3,$acc7,$a7 + ldp $a6,$a7,[$np,#8*6] + add $np,$np,#8*8 + ldp $acc0,$acc1,[$tp,#8*0] + sub $cnt,$cnt,#8*8 + ldp $acc2,$acc3,[$tp,#8*2] + ldp $acc4,$acc5,[$tp,#8*4] + ldp $acc6,$acc7,[$tp,#8*6] + add $tp,$tp,#8*8 + stp $t0,$t1,[$rp,#8*4] + sbcs $t0,$acc0,$a0 + stp $t2,$t3,[$rp,#8*6] + add $rp,$rp,#8*8 + sbcs $t1,$acc1,$a1 + cbnz $cnt,.Lsqr8x_sub + + sbcs $t2,$acc2,$a2 + mov $tp,sp + add $ap,sp,$num + ldp $a0,$a1,[$ap_end,#8*0] + sbcs $t3,$acc3,$a3 + stp $t0,$t1,[$rp,#8*0] + sbcs $t0,$acc4,$a4 + ldp $a2,$a3,[$ap_end,#8*2] + sbcs $t1,$acc5,$a5 + stp $t2,$t3,[$rp,#8*2] + sbcs $t2,$acc6,$a6 + ldp $acc0,$acc1,[$ap,#8*0] + sbcs $t3,$acc7,$a7 + ldp $acc2,$acc3,[$ap,#8*2] + sbcs xzr,$topmost,xzr // did it borrow? + ldr x30,[x29,#8] // pull return address + stp $t0,$t1,[$rp,#8*4] + stp $t2,$t3,[$rp,#8*6] + + sub $cnt,$num,#8*4 +.Lsqr4x_cond_copy: + sub $cnt,$cnt,#8*4 + csel $t0,$acc0,$a0,lo + stp xzr,xzr,[$tp,#8*0] + csel $t1,$acc1,$a1,lo + ldp $a0,$a1,[$ap_end,#8*4] + ldp $acc0,$acc1,[$ap,#8*4] + csel $t2,$acc2,$a2,lo + stp xzr,xzr,[$tp,#8*2] + add $tp,$tp,#8*4 + csel $t3,$acc3,$a3,lo + ldp $a2,$a3,[$ap_end,#8*6] + ldp $acc2,$acc3,[$ap,#8*6] + add $ap,$ap,#8*4 + stp $t0,$t1,[$ap_end,#8*0] + stp $t2,$t3,[$ap_end,#8*2] + add $ap_end,$ap_end,#8*4 + stp xzr,xzr,[$ap,#8*0] + stp xzr,xzr,[$ap,#8*2] + cbnz $cnt,.Lsqr4x_cond_copy + + csel $t0,$acc0,$a0,lo + stp xzr,xzr,[$tp,#8*0] + csel $t1,$acc1,$a1,lo + stp xzr,xzr,[$tp,#8*2] + csel $t2,$acc2,$a2,lo + csel $t3,$acc3,$a3,lo + stp $t0,$t1,[$ap_end,#8*0] + stp $t2,$t3,[$ap_end,#8*2] + + b .Lsqr8x_done + +.align 4 +.Lsqr8x8_post_condition: + adc $carry,xzr,xzr + ldr x30,[x29,#8] // pull return address + // $acc0-7,$carry hold result, $a0-7 hold modulus + subs $a0,$acc0,$a0 + ldr $ap,[x29,#96] // pull rp + sbcs $a1,$acc1,$a1 + stp xzr,xzr,[sp,#8*0] + sbcs $a2,$acc2,$a2 + stp xzr,xzr,[sp,#8*2] + sbcs $a3,$acc3,$a3 + stp xzr,xzr,[sp,#8*4] + sbcs $a4,$acc4,$a4 + stp xzr,xzr,[sp,#8*6] + sbcs $a5,$acc5,$a5 + stp xzr,xzr,[sp,#8*8] + sbcs $a6,$acc6,$a6 + stp xzr,xzr,[sp,#8*10] + sbcs $a7,$acc7,$a7 + stp xzr,xzr,[sp,#8*12] + sbcs $carry,$carry,xzr // did it borrow? + stp xzr,xzr,[sp,#8*14] + + // $a0-7 hold result-modulus + csel $a0,$acc0,$a0,lo + csel $a1,$acc1,$a1,lo + csel $a2,$acc2,$a2,lo + csel $a3,$acc3,$a3,lo + stp $a0,$a1,[$ap,#8*0] + csel $a4,$acc4,$a4,lo + csel $a5,$acc5,$a5,lo + stp $a2,$a3,[$ap,#8*2] + csel $a6,$acc6,$a6,lo + csel $a7,$acc7,$a7,lo + stp $a4,$a5,[$ap,#8*4] + stp $a6,$a7,[$ap,#8*6] + +.Lsqr8x_done: + ldp x19,x20,[x29,#16] + mov sp,x29 + ldp x21,x22,[x29,#32] + mov x0,#1 + ldp x23,x24,[x29,#48] + ldp x25,x26,[x29,#64] + ldp x27,x28,[x29,#80] + ldr x29,[sp],#128 + ret +.size __bn_sqr8x_mont,.-__bn_sqr8x_mont +___ +} + +{ +######################################################################## +# Even though this might look as ARMv8 adaptation of mulx4x_mont from +# x86_64-mont5 module, it's different in sense that it performs +# reduction 256 bits at a time. + +my ($a0,$a1,$a2,$a3, + $t0,$t1,$t2,$t3, + $m0,$m1,$m2,$m3, + $acc0,$acc1,$acc2,$acc3,$acc4, + $bi,$mi,$tp,$ap_end,$cnt) = map("x$_",(6..17,19..28)); +my $bp_end=$rp; +my ($carry,$topmost) = ($rp,"x30"); + +$code.=<<___; +.type __bn_mul4x_mont,%function +.align 5 +__bn_mul4x_mont: + stp x29,x30,[sp,#-128]! + add x29,sp,#0 + stp x19,x20,[sp,#16] + stp x21,x22,[sp,#32] + stp x23,x24,[sp,#48] + stp x25,x26,[sp,#64] + stp x27,x28,[sp,#80] + + sub $tp,sp,$num,lsl#3 + lsl $num,$num,#3 + ldr $n0,[$n0] // *n0 + sub sp,$tp,#8*4 // alloca + + add $t0,$bp,$num + add $ap_end,$ap,$num + stp $rp,$t0,[x29,#96] // offload rp and &b[num] + + ldr $bi,[$bp,#8*0] // b[0] + ldp $a0,$a1,[$ap,#8*0] // a[0..3] + ldp $a2,$a3,[$ap,#8*2] + add $ap,$ap,#8*4 + mov $acc0,xzr + mov $acc1,xzr + mov $acc2,xzr + mov $acc3,xzr + ldp $m0,$m1,[$np,#8*0] // n[0..3] + ldp $m2,$m3,[$np,#8*2] + adds $np,$np,#8*4 // clear carry bit + mov $carry,xzr + mov $cnt,#0 + mov $tp,sp + +.Loop_mul4x_1st_reduction: + mul $t0,$a0,$bi // lo(a[0..3]*b[0]) + adc $carry,$carry,xzr // modulo-scheduled + mul $t1,$a1,$bi + add $cnt,$cnt,#8 + mul $t2,$a2,$bi + and $cnt,$cnt,#31 + mul $t3,$a3,$bi + adds $acc0,$acc0,$t0 + umulh $t0,$a0,$bi // hi(a[0..3]*b[0]) + adcs $acc1,$acc1,$t1 + mul $mi,$acc0,$n0 // t[0]*n0 + adcs $acc2,$acc2,$t2 + umulh $t1,$a1,$bi + adcs $acc3,$acc3,$t3 + umulh $t2,$a2,$bi + adc $acc4,xzr,xzr + umulh $t3,$a3,$bi + ldr $bi,[$bp,$cnt] // next b[i] (or b[0]) + adds $acc1,$acc1,$t0 + // (*) mul $t0,$m0,$mi // lo(n[0..3]*t[0]*n0) + str $mi,[$tp],#8 // put aside t[0]*n0 for tail processing + adcs $acc2,$acc2,$t1 + mul $t1,$m1,$mi + adcs $acc3,$acc3,$t2 + mul $t2,$m2,$mi + adc $acc4,$acc4,$t3 // can't overflow + mul $t3,$m3,$mi + // (*) adds xzr,$acc0,$t0 + subs xzr,$acc0,#1 // (*) + umulh $t0,$m0,$mi // hi(n[0..3]*t[0]*n0) + adcs $acc0,$acc1,$t1 + umulh $t1,$m1,$mi + adcs $acc1,$acc2,$t2 + umulh $t2,$m2,$mi + adcs $acc2,$acc3,$t3 + umulh $t3,$m3,$mi + adcs $acc3,$acc4,$carry + adc $carry,xzr,xzr + adds $acc0,$acc0,$t0 + sub $t0,$ap_end,$ap + adcs $acc1,$acc1,$t1 + adcs $acc2,$acc2,$t2 + adcs $acc3,$acc3,$t3 + //adc $carry,$carry,xzr + cbnz $cnt,.Loop_mul4x_1st_reduction + + cbz $t0,.Lmul4x4_post_condition + + ldp $a0,$a1,[$ap,#8*0] // a[4..7] + ldp $a2,$a3,[$ap,#8*2] + add $ap,$ap,#8*4 + ldr $mi,[sp] // a[0]*n0 + ldp $m0,$m1,[$np,#8*0] // n[4..7] + ldp $m2,$m3,[$np,#8*2] + add $np,$np,#8*4 + +.Loop_mul4x_1st_tail: + mul $t0,$a0,$bi // lo(a[4..7]*b[i]) + adc $carry,$carry,xzr // modulo-scheduled + mul $t1,$a1,$bi + add $cnt,$cnt,#8 + mul $t2,$a2,$bi + and $cnt,$cnt,#31 + mul $t3,$a3,$bi + adds $acc0,$acc0,$t0 + umulh $t0,$a0,$bi // hi(a[4..7]*b[i]) + adcs $acc1,$acc1,$t1 + umulh $t1,$a1,$bi + adcs $acc2,$acc2,$t2 + umulh $t2,$a2,$bi + adcs $acc3,$acc3,$t3 + umulh $t3,$a3,$bi + adc $acc4,xzr,xzr + ldr $bi,[$bp,$cnt] // next b[i] (or b[0]) + adds $acc1,$acc1,$t0 + mul $t0,$m0,$mi // lo(n[4..7]*a[0]*n0) + adcs $acc2,$acc2,$t1 + mul $t1,$m1,$mi + adcs $acc3,$acc3,$t2 + mul $t2,$m2,$mi + adc $acc4,$acc4,$t3 // can't overflow + mul $t3,$m3,$mi + adds $acc0,$acc0,$t0 + umulh $t0,$m0,$mi // hi(n[4..7]*a[0]*n0) + adcs $acc1,$acc1,$t1 + umulh $t1,$m1,$mi + adcs $acc2,$acc2,$t2 + umulh $t2,$m2,$mi + adcs $acc3,$acc3,$t3 + adcs $acc4,$acc4,$carry + umulh $t3,$m3,$mi + adc $carry,xzr,xzr + ldr $mi,[sp,$cnt] // next t[0]*n0 + str $acc0,[$tp],#8 // result!!! + adds $acc0,$acc1,$t0 + sub $t0,$ap_end,$ap // done yet? + adcs $acc1,$acc2,$t1 + adcs $acc2,$acc3,$t2 + adcs $acc3,$acc4,$t3 + //adc $carry,$carry,xzr + cbnz $cnt,.Loop_mul4x_1st_tail + + sub $t1,$ap_end,$num // rewinded $ap + cbz $t0,.Lmul4x_proceed + + ldp $a0,$a1,[$ap,#8*0] + ldp $a2,$a3,[$ap,#8*2] + add $ap,$ap,#8*4 + ldp $m0,$m1,[$np,#8*0] + ldp $m2,$m3,[$np,#8*2] + add $np,$np,#8*4 + b .Loop_mul4x_1st_tail + +.align 5 +.Lmul4x_proceed: + ldr $bi,[$bp,#8*4]! // *++b + adc $topmost,$carry,xzr + ldp $a0,$a1,[$t1,#8*0] // a[0..3] + sub $np,$np,$num // rewind np + ldp $a2,$a3,[$t1,#8*2] + add $ap,$t1,#8*4 + + stp $acc0,$acc1,[$tp,#8*0] // result!!! + ldp $acc0,$acc1,[sp,#8*4] // t[0..3] + stp $acc2,$acc3,[$tp,#8*2] // result!!! + ldp $acc2,$acc3,[sp,#8*6] + + ldp $m0,$m1,[$np,#8*0] // n[0..3] + mov $tp,sp + ldp $m2,$m3,[$np,#8*2] + adds $np,$np,#8*4 // clear carry bit + mov $carry,xzr + +.align 4 +.Loop_mul4x_reduction: + mul $t0,$a0,$bi // lo(a[0..3]*b[4]) + adc $carry,$carry,xzr // modulo-scheduled + mul $t1,$a1,$bi + add $cnt,$cnt,#8 + mul $t2,$a2,$bi + and $cnt,$cnt,#31 + mul $t3,$a3,$bi + adds $acc0,$acc0,$t0 + umulh $t0,$a0,$bi // hi(a[0..3]*b[4]) + adcs $acc1,$acc1,$t1 + mul $mi,$acc0,$n0 // t[0]*n0 + adcs $acc2,$acc2,$t2 + umulh $t1,$a1,$bi + adcs $acc3,$acc3,$t3 + umulh $t2,$a2,$bi + adc $acc4,xzr,xzr + umulh $t3,$a3,$bi + ldr $bi,[$bp,$cnt] // next b[i] + adds $acc1,$acc1,$t0 + // (*) mul $t0,$m0,$mi + str $mi,[$tp],#8 // put aside t[0]*n0 for tail processing + adcs $acc2,$acc2,$t1 + mul $t1,$m1,$mi // lo(n[0..3]*t[0]*n0 + adcs $acc3,$acc3,$t2 + mul $t2,$m2,$mi + adc $acc4,$acc4,$t3 // can't overflow + mul $t3,$m3,$mi + // (*) adds xzr,$acc0,$t0 + subs xzr,$acc0,#1 // (*) + umulh $t0,$m0,$mi // hi(n[0..3]*t[0]*n0 + adcs $acc0,$acc1,$t1 + umulh $t1,$m1,$mi + adcs $acc1,$acc2,$t2 + umulh $t2,$m2,$mi + adcs $acc2,$acc3,$t3 + umulh $t3,$m3,$mi + adcs $acc3,$acc4,$carry + adc $carry,xzr,xzr + adds $acc0,$acc0,$t0 + adcs $acc1,$acc1,$t1 + adcs $acc2,$acc2,$t2 + adcs $acc3,$acc3,$t3 + //adc $carry,$carry,xzr + cbnz $cnt,.Loop_mul4x_reduction + + adc $carry,$carry,xzr + ldp $t0,$t1,[$tp,#8*4] // t[4..7] + ldp $t2,$t3,[$tp,#8*6] + ldp $a0,$a1,[$ap,#8*0] // a[4..7] + ldp $a2,$a3,[$ap,#8*2] + add $ap,$ap,#8*4 + adds $acc0,$acc0,$t0 + adcs $acc1,$acc1,$t1 + adcs $acc2,$acc2,$t2 + adcs $acc3,$acc3,$t3 + //adc $carry,$carry,xzr + + ldr $mi,[sp] // t[0]*n0 + ldp $m0,$m1,[$np,#8*0] // n[4..7] + ldp $m2,$m3,[$np,#8*2] + add $np,$np,#8*4 + +.align 4 +.Loop_mul4x_tail: + mul $t0,$a0,$bi // lo(a[4..7]*b[4]) + adc $carry,$carry,xzr // modulo-scheduled + mul $t1,$a1,$bi + add $cnt,$cnt,#8 + mul $t2,$a2,$bi + and $cnt,$cnt,#31 + mul $t3,$a3,$bi + adds $acc0,$acc0,$t0 + umulh $t0,$a0,$bi // hi(a[4..7]*b[4]) + adcs $acc1,$acc1,$t1 + umulh $t1,$a1,$bi + adcs $acc2,$acc2,$t2 + umulh $t2,$a2,$bi + adcs $acc3,$acc3,$t3 + umulh $t3,$a3,$bi + adc $acc4,xzr,xzr + ldr $bi,[$bp,$cnt] // next b[i] + adds $acc1,$acc1,$t0 + mul $t0,$m0,$mi // lo(n[4..7]*t[0]*n0) + adcs $acc2,$acc2,$t1 + mul $t1,$m1,$mi + adcs $acc3,$acc3,$t2 + mul $t2,$m2,$mi + adc $acc4,$acc4,$t3 // can't overflow + mul $t3,$m3,$mi + adds $acc0,$acc0,$t0 + umulh $t0,$m0,$mi // hi(n[4..7]*t[0]*n0) + adcs $acc1,$acc1,$t1 + umulh $t1,$m1,$mi + adcs $acc2,$acc2,$t2 + umulh $t2,$m2,$mi + adcs $acc3,$acc3,$t3 + umulh $t3,$m3,$mi + adcs $acc4,$acc4,$carry + ldr $mi,[sp,$cnt] // next a[0]*n0 + adc $carry,xzr,xzr + str $acc0,[$tp],#8 // result!!! + adds $acc0,$acc1,$t0 + sub $t0,$ap_end,$ap // done yet? + adcs $acc1,$acc2,$t1 + adcs $acc2,$acc3,$t2 + adcs $acc3,$acc4,$t3 + //adc $carry,$carry,xzr + cbnz $cnt,.Loop_mul4x_tail + + sub $t1,$np,$num // rewinded np? + adc $carry,$carry,xzr + cbz $t0,.Loop_mul4x_break + + ldp $t0,$t1,[$tp,#8*4] + ldp $t2,$t3,[$tp,#8*6] + ldp $a0,$a1,[$ap,#8*0] + ldp $a2,$a3,[$ap,#8*2] + add $ap,$ap,#8*4 + adds $acc0,$acc0,$t0 + adcs $acc1,$acc1,$t1 + adcs $acc2,$acc2,$t2 + adcs $acc3,$acc3,$t3 + //adc $carry,$carry,xzr + ldp $m0,$m1,[$np,#8*0] + ldp $m2,$m3,[$np,#8*2] + add $np,$np,#8*4 + b .Loop_mul4x_tail + +.align 4 +.Loop_mul4x_break: + ldp $t2,$t3,[x29,#96] // pull rp and &b[num] + adds $acc0,$acc0,$topmost + add $bp,$bp,#8*4 // bp++ + adcs $acc1,$acc1,xzr + sub $ap,$ap,$num // rewind ap + adcs $acc2,$acc2,xzr + stp $acc0,$acc1,[$tp,#8*0] // result!!! + adcs $acc3,$acc3,xzr + ldp $acc0,$acc1,[sp,#8*4] // t[0..3] + adc $topmost,$carry,xzr + stp $acc2,$acc3,[$tp,#8*2] // result!!! + cmp $bp,$t3 // done yet? + ldp $acc2,$acc3,[sp,#8*6] + ldp $m0,$m1,[$t1,#8*0] // n[0..3] + ldp $m2,$m3,[$t1,#8*2] + add $np,$t1,#8*4 + b.eq .Lmul4x_post + + ldr $bi,[$bp] + ldp $a0,$a1,[$ap,#8*0] // a[0..3] + ldp $a2,$a3,[$ap,#8*2] + adds $ap,$ap,#8*4 // clear carry bit + mov $carry,xzr + mov $tp,sp + b .Loop_mul4x_reduction + +.align 4 +.Lmul4x_post: + // Final step. We see if result is larger than modulus, and + // if it is, subtract the modulus. But comparison implies + // subtraction. So we subtract modulus, see if it borrowed, + // and conditionally copy original value. + mov $rp,$t2 + mov $ap_end,$t2 // $rp copy + subs $t0,$acc0,$m0 + add $tp,sp,#8*8 + sbcs $t1,$acc1,$m1 + sub $cnt,$num,#8*4 + +.Lmul4x_sub: + sbcs $t2,$acc2,$m2 + ldp $m0,$m1,[$np,#8*0] + sub $cnt,$cnt,#8*4 + ldp $acc0,$acc1,[$tp,#8*0] + sbcs $t3,$acc3,$m3 + ldp $m2,$m3,[$np,#8*2] + add $np,$np,#8*4 + ldp $acc2,$acc3,[$tp,#8*2] + add $tp,$tp,#8*4 + stp $t0,$t1,[$rp,#8*0] + sbcs $t0,$acc0,$m0 + stp $t2,$t3,[$rp,#8*2] + add $rp,$rp,#8*4 + sbcs $t1,$acc1,$m1 + cbnz $cnt,.Lmul4x_sub + + sbcs $t2,$acc2,$m2 + mov $tp,sp + add $ap,sp,#8*4 + ldp $a0,$a1,[$ap_end,#8*0] + sbcs $t3,$acc3,$m3 + stp $t0,$t1,[$rp,#8*0] + ldp $a2,$a3,[$ap_end,#8*2] + stp $t2,$t3,[$rp,#8*2] + ldp $acc0,$acc1,[$ap,#8*0] + ldp $acc2,$acc3,[$ap,#8*2] + sbcs xzr,$topmost,xzr // did it borrow? + ldr x30,[x29,#8] // pull return address + + sub $cnt,$num,#8*4 +.Lmul4x_cond_copy: + sub $cnt,$cnt,#8*4 + csel $t0,$acc0,$a0,lo + stp xzr,xzr,[$tp,#8*0] + csel $t1,$acc1,$a1,lo + ldp $a0,$a1,[$ap_end,#8*4] + ldp $acc0,$acc1,[$ap,#8*4] + csel $t2,$acc2,$a2,lo + stp xzr,xzr,[$tp,#8*2] + add $tp,$tp,#8*4 + csel $t3,$acc3,$a3,lo + ldp $a2,$a3,[$ap_end,#8*6] + ldp $acc2,$acc3,[$ap,#8*6] + add $ap,$ap,#8*4 + stp $t0,$t1,[$ap_end,#8*0] + stp $t2,$t3,[$ap_end,#8*2] + add $ap_end,$ap_end,#8*4 + cbnz $cnt,.Lmul4x_cond_copy + + csel $t0,$acc0,$a0,lo + stp xzr,xzr,[$tp,#8*0] + csel $t1,$acc1,$a1,lo + stp xzr,xzr,[$tp,#8*2] + csel $t2,$acc2,$a2,lo + stp xzr,xzr,[$tp,#8*3] + csel $t3,$acc3,$a3,lo + stp xzr,xzr,[$tp,#8*4] + stp $t0,$t1,[$ap_end,#8*0] + stp $t2,$t3,[$ap_end,#8*2] + + b .Lmul4x_done + +.align 4 +.Lmul4x4_post_condition: + adc $carry,$carry,xzr + ldr $ap,[x29,#96] // pull rp + // $acc0-3,$carry hold result, $m0-7 hold modulus + subs $a0,$acc0,$m0 + ldr x30,[x29,#8] // pull return address + sbcs $a1,$acc1,$m1 + stp xzr,xzr,[sp,#8*0] + sbcs $a2,$acc2,$m2 + stp xzr,xzr,[sp,#8*2] + sbcs $a3,$acc3,$m3 + stp xzr,xzr,[sp,#8*4] + sbcs xzr,$carry,xzr // did it borrow? + stp xzr,xzr,[sp,#8*6] + + // $a0-3 hold result-modulus + csel $a0,$acc0,$a0,lo + csel $a1,$acc1,$a1,lo + csel $a2,$acc2,$a2,lo + csel $a3,$acc3,$a3,lo + stp $a0,$a1,[$ap,#8*0] + stp $a2,$a3,[$ap,#8*2] + +.Lmul4x_done: + ldp x19,x20,[x29,#16] + mov sp,x29 + ldp x21,x22,[x29,#32] + mov x0,#1 + ldp x23,x24,[x29,#48] + ldp x25,x26,[x29,#64] + ldp x27,x28,[x29,#80] + ldr x29,[sp],#128 + ret +.size __bn_mul4x_mont,.-__bn_mul4x_mont +___ +} +$code.=<<___; +.asciz "Montgomery Multiplication for ARMv8, CRYPTOGAMS by <appro\@openssl.org>" +.align 4 +___ + +print $code; + +close STDOUT; diff --git a/crypto/bn/asm/bn-586.pl b/crypto/bn/asm/bn-586.pl index 332ef3e91d62..58effc8808dd 100644 --- a/crypto/bn/asm/bn-586.pl +++ b/crypto/bn/asm/bn-586.pl @@ -1,10 +1,20 @@ -#!/usr/local/bin/perl +#! /usr/bin/env perl +# Copyright 1995-2016 The OpenSSL Project Authors. All Rights Reserved. +# +# Licensed under the OpenSSL license (the "License"). You may not use +# this file except in compliance with the License. You can obtain a copy +# in the file LICENSE in the source distribution or at +# https://www.openssl.org/source/license.html + $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; push(@INC,"${dir}","${dir}../../perlasm"); require "x86asm.pl"; -&asm_init($ARGV[0],$0); +$output = pop; +open STDOUT,">$output"; + +&asm_init($ARGV[0]); $sse2=0; for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); } @@ -21,6 +31,8 @@ for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); } &asm_finish(); +close STDOUT; + sub bn_mul_add_words { local($name)=@_; @@ -42,7 +54,7 @@ sub bn_mul_add_words &movd("mm0",&wparam(3)); # mm0 = w &pxor("mm1","mm1"); # mm1 = carry_in &jmp(&label("maw_sse2_entry")); - + &set_label("maw_sse2_unrolled",16); &movd("mm3",&DWP(0,$r,"",0)); # mm3 = r[0] &paddq("mm1","mm3"); # mm1 = carry_in + r[0] @@ -663,20 +675,20 @@ sub bn_sub_part_words &adc($c,0); &mov(&DWP($i*4,$r,"",0),$tmp1); # *r } - + &comment(""); &add($b,32); &add($r,32); &sub($num,8); &jnz(&label("pw_neg_loop")); - + &set_label("pw_neg_finish",0); &mov($tmp2,&wparam(4)); # get dl &mov($num,0); &sub($num,$tmp2); &and($num,7); &jz(&label("pw_end")); - + for ($i=0; $i<7; $i++) { &comment("dl<0 Tail Round $i"); @@ -693,9 +705,9 @@ sub bn_sub_part_words } &jmp(&label("pw_end")); - + &set_label("pw_pos",0); - + &and($num,0xfffffff8); # num / 8 &jz(&label("pw_pos_finish")); @@ -710,18 +722,18 @@ sub bn_sub_part_words &mov(&DWP($i*4,$r,"",0),$tmp1); # *r &jnc(&label("pw_nc".$i)); } - + &comment(""); &add($a,32); &add($r,32); &sub($num,8); &jnz(&label("pw_pos_loop")); - + &set_label("pw_pos_finish",0); &mov($num,&wparam(4)); # get dl &and($num,7); &jz(&label("pw_end")); - + for ($i=0; $i<7; $i++) { &comment("dl>0 Tail Round $i"); @@ -742,17 +754,17 @@ sub bn_sub_part_words &mov(&DWP($i*4,$r,"",0),$tmp1); # *r &set_label("pw_nc".$i,0); } - + &comment(""); &add($a,32); &add($r,32); &sub($num,8); &jnz(&label("pw_nc_loop")); - + &mov($num,&wparam(4)); # get dl &and($num,7); &jz(&label("pw_nc_end")); - + for ($i=0; $i<7; $i++) { &mov($tmp1,&DWP($i*4,$a,"",0)); # *a @@ -771,4 +783,3 @@ sub bn_sub_part_words &function_end($name); } - diff --git a/crypto/bn/asm/bn-c64xplus.asm b/crypto/bn/asm/bn-c64xplus.asm new file mode 100644 index 000000000000..de6d37728fba --- /dev/null +++ b/crypto/bn/asm/bn-c64xplus.asm @@ -0,0 +1,382 @@ +;; Copyright 2012-2016 The OpenSSL Project Authors. All Rights Reserved. +;; +;; Licensed under the OpenSSL license (the "License"). You may not use +;; this file except in compliance with the License. You can obtain a copy +;; in the file LICENSE in the source distribution or at +;; https://www.openssl.org/source/license.html +;; +;;==================================================================== +;; Written by Andy Polyakov <appro@openssl.org> for the OpenSSL +;; project. +;; +;; Rights for redistribution and usage in source and binary forms are +;; granted according to the OpenSSL license. Warranty of any kind is +;; disclaimed. +;;==================================================================== +;; Compiler-generated multiply-n-add SPLOOP runs at 12*n cycles, n +;; being the number of 32-bit words, addition - 8*n. Corresponding 4x +;; unrolled SPLOOP-free loops - at ~8*n and ~5*n. Below assembler +;; SPLOOPs spin at ... 2*n cycles [plus epilogue]. +;;==================================================================== + .text + + .if .ASSEMBLER_VERSION<7000000 + .asg 0,__TI_EABI__ + .endif + .if __TI_EABI__ + .asg bn_mul_add_words,_bn_mul_add_words + .asg bn_mul_words,_bn_mul_words + .asg bn_sqr_words,_bn_sqr_words + .asg bn_add_words,_bn_add_words + .asg bn_sub_words,_bn_sub_words + .asg bn_div_words,_bn_div_words + .asg bn_sqr_comba8,_bn_sqr_comba8 + .asg bn_mul_comba8,_bn_mul_comba8 + .asg bn_sqr_comba4,_bn_sqr_comba4 + .asg bn_mul_comba4,_bn_mul_comba4 + .endif + + .asg B3,RA + .asg A4,ARG0 + .asg B4,ARG1 + .asg A6,ARG2 + .asg B6,ARG3 + .asg A8,ARG4 + .asg B8,ARG5 + .asg A4,RET + .asg A15,FP + .asg B14,DP + .asg B15,SP + + .global _bn_mul_add_words +_bn_mul_add_words: + .asmfunc + MV ARG2,B0 + [!B0] BNOP RA +||[!B0] MVK 0,RET + [B0] MVC B0,ILC + [B0] ZERO A19 ; high part of accumulator +|| [B0] MV ARG0,A2 +|| [B0] MV ARG3,A3 + NOP 3 + + SPLOOP 2 ; 2*n+10 +;;==================================================================== + LDW *ARG1++,B7 ; ap[i] + NOP 3 + LDW *ARG0++,A7 ; rp[i] + MPY32U B7,A3,A17:A16 + NOP 3 ; [2,0] in epilogue + ADDU A16,A7,A21:A20 + ADDU A19,A21:A20,A19:A18 +|| MV.S A17,A23 + SPKERNEL 2,1 ; leave slot for "return value" +|| STW A18,*A2++ ; rp[i] +|| ADD A19,A23,A19 +;;==================================================================== + BNOP RA,4 + MV A19,RET ; return value + .endasmfunc + + .global _bn_mul_words +_bn_mul_words: + .asmfunc + MV ARG2,B0 + [!B0] BNOP RA +||[!B0] MVK 0,RET + [B0] MVC B0,ILC + [B0] ZERO A19 ; high part of accumulator + NOP 3 + + SPLOOP 2 ; 2*n+10 +;;==================================================================== + LDW *ARG1++,A7 ; ap[i] + NOP 4 + MPY32U A7,ARG3,A17:A16 + NOP 4 ; [2,0] in epiloque + ADDU A19,A16,A19:A18 +|| MV.S A17,A21 + SPKERNEL 2,1 ; leave slot for "return value" +|| STW A18,*ARG0++ ; rp[i] +|| ADD.L A19,A21,A19 +;;==================================================================== + BNOP RA,4 + MV A19,RET ; return value + .endasmfunc + + .global _bn_sqr_words +_bn_sqr_words: + .asmfunc + MV ARG2,B0 + [!B0] BNOP RA +||[!B0] MVK 0,RET + [B0] MVC B0,ILC + [B0] MV ARG0,B2 +|| [B0] ADD 4,ARG0,ARG0 + NOP 3 + + SPLOOP 2 ; 2*n+10 +;;==================================================================== + LDW *ARG1++,B7 ; ap[i] + NOP 4 + MPY32U B7,B7,B1:B0 + NOP 3 ; [2,0] in epilogue + STW B0,*B2++(8) ; rp[2*i] + MV B1,A1 + SPKERNEL 2,0 ; fully overlap BNOP RA,5 +|| STW A1,*ARG0++(8) ; rp[2*i+1] +;;==================================================================== + BNOP RA,5 + .endasmfunc + + .global _bn_add_words +_bn_add_words: + .asmfunc + MV ARG3,B0 + [!B0] BNOP RA +||[!B0] MVK 0,RET + [B0] MVC B0,ILC + [B0] ZERO A1 ; carry flag +|| [B0] MV ARG0,A3 + NOP 3 + + SPLOOP 2 ; 2*n+6 +;;==================================================================== + LDW *ARG2++,A7 ; bp[i] +|| LDW *ARG1++,B7 ; ap[i] + NOP 4 + ADDU A7,B7,A9:A8 + ADDU A1,A9:A8,A1:A0 + SPKERNEL 0,0 ; fully overlap BNOP RA,5 +|| STW A0,*A3++ ; write result +|| MV A1,RET ; keep carry flag in RET +;;==================================================================== + BNOP RA,5 + .endasmfunc + + .global _bn_sub_words +_bn_sub_words: + .asmfunc + MV ARG3,B0 + [!B0] BNOP RA +||[!B0] MVK 0,RET + [B0] MVC B0,ILC + [B0] ZERO A2 ; borrow flag +|| [B0] MV ARG0,A3 + NOP 3 + + SPLOOP 2 ; 2*n+6 +;;==================================================================== + LDW *ARG2++,A7 ; bp[i] +|| LDW *ARG1++,B7 ; ap[i] + NOP 4 + SUBU B7,A7,A1:A0 + [A2] SUB A1:A0,1,A1:A0 + SPKERNEL 0,1 ; leave slot for "return borrow flag" +|| STW A0,*A3++ ; write result +|| AND 1,A1,A2 ; pass on borrow flag +;;==================================================================== + BNOP RA,4 + AND 1,A1,RET ; return borrow flag + .endasmfunc + + .global _bn_div_words +_bn_div_words: + .asmfunc + LMBD 1,A6,A0 ; leading zero bits in dv + LMBD 1,A4,A1 ; leading zero bits in hi +|| MVK 32,B0 + CMPLTU A1,A0,A2 +|| ADD A0,B0,B0 + [ A2] BNOP RA +||[ A2] MVK -1,A4 ; return overflow +||[!A2] MV A4,A3 ; reassign hi + [!A2] MV B4,A4 ; reassign lo, will be quotient +||[!A2] MVC B0,ILC + [!A2] SHL A6,A0,A6 ; normalize dv +|| MVK 1,A1 + + [!A2] CMPLTU A3,A6,A1 ; hi<dv? +||[!A2] SHL A4,1,A5:A4 ; lo<<1 + [!A1] SUB A3,A6,A3 ; hi-=dv +||[!A1] OR 1,A4,A4 + [!A2] SHRU A3,31,A1 ; upper bit +||[!A2] ADDAH A5,A3,A3 ; hi<<1|lo>>31 + + SPLOOP 3 + [!A1] CMPLTU A3,A6,A1 ; hi<dv? +||[ A1] ZERO A1 +|| SHL A4,1,A5:A4 ; lo<<1 + [!A1] SUB A3,A6,A3 ; hi-=dv +||[!A1] OR 1,A4,A4 ; quotient + SHRU A3,31,A1 ; upper bit +|| ADDAH A5,A3,A3 ; hi<<1|lo>>31 + SPKERNEL + + BNOP RA,5 + .endasmfunc + +;;==================================================================== +;; Not really Comba algorithm, just straightforward NxM... Dedicated +;; fully unrolled real Comba implementations are asymptotically 2x +;; faster, but naturally larger undertaking. Purpose of this exercise +;; was rather to learn to master nested SPLOOPs... +;;==================================================================== + .global _bn_sqr_comba8 + .global _bn_mul_comba8 +_bn_sqr_comba8: + MV ARG1,ARG2 +_bn_mul_comba8: + .asmfunc + MVK 8,B0 ; N, RILC +|| MVK 8,A0 ; M, outer loop counter +|| MV ARG1,A5 ; copy ap +|| MV ARG0,B4 ; copy rp +|| ZERO B19 ; high part of accumulator + MVC B0,RILC +|| SUB B0,2,B1 ; N-2, initial ILC +|| SUB B0,1,B2 ; const B2=N-1 +|| LDW *A5++,B6 ; ap[0] +|| MV A0,A3 ; const A3=M +sploopNxM?: ; for best performance arrange M<=N + [A0] SPLOOPD 2 ; 2*n+10 +|| MVC B1,ILC +|| ADDAW B4,B0,B5 +|| ZERO B7 +|| LDW *A5++,A9 ; pre-fetch ap[1] +|| ZERO A1 +|| SUB A0,1,A0 +;;==================================================================== +;; SPLOOP from bn_mul_add_words, but with flipped A<>B register files. +;; This is because of Advisory 15 from TI publication SPRZ247I. + LDW *ARG2++,A7 ; bp[i] + NOP 3 + [A1] LDW *B5++,B7 ; rp[i] + MPY32U A7,B6,B17:B16 + NOP 3 + ADDU B16,B7,B21:B20 + ADDU B19,B21:B20,B19:B18 +|| MV.S B17,B23 + SPKERNEL +|| STW B18,*B4++ ; rp[i] +|| ADD.S B19,B23,B19 +;;==================================================================== +outer?: ; m*2*(n+1)+10 + SUBAW ARG2,A3,ARG2 ; rewind bp to bp[0] + SPMASKR +|| CMPGT A0,1,A2 ; done pre-fetching ap[i+1]? + MVD A9,B6 ; move through .M unit(*) + [A2] LDW *A5++,A9 ; pre-fetch ap[i+1] + SUBAW B5,B2,B5 ; rewind rp to rp[1] + MVK 1,A1 + [A0] BNOP.S1 outer?,4 +|| [A0] SUB.L A0,1,A0 + STW B19,*B4--[B2] ; rewind rp tp rp[1] +|| ZERO.S B19 ; high part of accumulator +;; end of outer? + BNOP RA,5 ; return + .endasmfunc +;; (*) It should be noted that B6 is used as input to MPY32U in +;; chronologically next cycle in *preceding* SPLOOP iteration. +;; Normally such arrangement would require DINT, but at this +;; point SPLOOP is draining and interrupts are disabled +;; implicitly. + + .global _bn_sqr_comba4 + .global _bn_mul_comba4 +_bn_sqr_comba4: + MV ARG1,ARG2 +_bn_mul_comba4: + .asmfunc + .if 0 + BNOP sploopNxM?,3 + ;; Above mentioned m*2*(n+1)+10 does not apply in n=m=4 case, + ;; because of low-counter effect, when prologue phase finishes + ;; before SPKERNEL instruction is reached. As result it's 25% + ;; slower than expected... + MVK 4,B0 ; N, RILC +|| MVK 4,A0 ; M, outer loop counter +|| MV ARG1,A5 ; copy ap +|| MV ARG0,B4 ; copy rp +|| ZERO B19 ; high part of accumulator + MVC B0,RILC +|| SUB B0,2,B1 ; first ILC +|| SUB B0,1,B2 ; const B2=N-1 +|| LDW *A5++,B6 ; ap[0] +|| MV A0,A3 ; const A3=M + .else + ;; This alternative is an exercise in fully unrolled Comba + ;; algorithm implementation that operates at n*(n+1)+12, or + ;; as little as 32 cycles... + LDW *ARG1[0],B16 ; a[0] +|| LDW *ARG2[0],A16 ; b[0] + LDW *ARG1[1],B17 ; a[1] +|| LDW *ARG2[1],A17 ; b[1] + LDW *ARG1[2],B18 ; a[2] +|| LDW *ARG2[2],A18 ; b[2] + LDW *ARG1[3],B19 ; a[3] +|| LDW *ARG2[3],A19 ; b[3] + NOP + MPY32U A16,B16,A1:A0 ; a[0]*b[0] + MPY32U A17,B16,A23:A22 ; a[0]*b[1] + MPY32U A16,B17,A25:A24 ; a[1]*b[0] + MPY32U A16,B18,A27:A26 ; a[2]*b[0] + STW A0,*ARG0[0] +|| MPY32U A17,B17,A29:A28 ; a[1]*b[1] + MPY32U A18,B16,A31:A30 ; a[0]*b[2] +|| ADDU A22,A1,A1:A0 + MV A23,B0 +|| MPY32U A19,B16,A21:A20 ; a[3]*b[0] +|| ADDU A24,A1:A0,A1:A0 + ADDU A25,B0,B1:B0 +|| STW A0,*ARG0[1] +|| MPY32U A18,B17,A23:A22 ; a[2]*b[1] +|| ADDU A26,A1,A9:A8 + ADDU A27,B1,B9:B8 +|| MPY32U A17,B18,A25:A24 ; a[1]*b[2] +|| ADDU A28,A9:A8,A9:A8 + ADDU A29,B9:B8,B9:B8 +|| MPY32U A16,B19,A27:A26 ; a[0]*b[3] +|| ADDU A30,A9:A8,A9:A8 + ADDU A31,B9:B8,B9:B8 +|| ADDU B0,A9:A8,A9:A8 + STW A8,*ARG0[2] +|| ADDU A20,A9,A1:A0 + ADDU A21,B9,B1:B0 +|| MPY32U A19,B17,A21:A20 ; a[3]*b[1] +|| ADDU A22,A1:A0,A1:A0 + ADDU A23,B1:B0,B1:B0 +|| MPY32U A18,B18,A23:A22 ; a[2]*b[2] +|| ADDU A24,A1:A0,A1:A0 + ADDU A25,B1:B0,B1:B0 +|| MPY32U A17,B19,A25:A24 ; a[1]*b[3] +|| ADDU A26,A1:A0,A1:A0 + ADDU A27,B1:B0,B1:B0 +|| ADDU B8,A1:A0,A1:A0 + STW A0,*ARG0[3] +|| MPY32U A19,B18,A27:A26 ; a[3]*b[2] +|| ADDU A20,A1,A9:A8 + ADDU A21,B1,B9:B8 +|| MPY32U A18,B19,A29:A28 ; a[2]*b[3] +|| ADDU A22,A9:A8,A9:A8 + ADDU A23,B9:B8,B9:B8 +|| MPY32U A19,B19,A31:A30 ; a[3]*b[3] +|| ADDU A24,A9:A8,A9:A8 + ADDU A25,B9:B8,B9:B8 +|| ADDU B0,A9:A8,A9:A8 + STW A8,*ARG0[4] +|| ADDU A26,A9,A1:A0 + ADDU A27,B9,B1:B0 +|| ADDU A28,A1:A0,A1:A0 + ADDU A29,B1:B0,B1:B0 +|| BNOP RA +|| ADDU B8,A1:A0,A1:A0 + STW A0,*ARG0[5] +|| ADDU A30,A1,A9:A8 + ADD A31,B1,B8 + ADDU B0,A9:A8,A9:A8 ; removed || to avoid cross-path stall below + ADD B8,A9,A9 +|| STW A8,*ARG0[6] + STW A9,*ARG0[7] + .endif + .endasmfunc diff --git a/crypto/bn/asm/c64xplus-gf2m.pl b/crypto/bn/asm/c64xplus-gf2m.pl new file mode 100755 index 000000000000..9c46da3af8d1 --- /dev/null +++ b/crypto/bn/asm/c64xplus-gf2m.pl @@ -0,0 +1,160 @@ +#! /usr/bin/env perl +# Copyright 2012-2016 The OpenSSL Project Authors. All Rights Reserved. +# +# Licensed under the OpenSSL license (the "License"). You may not use +# this file except in compliance with the License. You can obtain a copy +# in the file LICENSE in the source distribution or at +# https://www.openssl.org/source/license.html + +# +# ==================================================================== +# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL +# project. The module is, however, dual licensed under OpenSSL and +# CRYPTOGAMS licenses depending on where you obtain it. For further +# details see http://www.openssl.org/~appro/cryptogams/. +# ==================================================================== +# +# February 2012 +# +# The module implements bn_GF2m_mul_2x2 polynomial multiplication +# used in bn_gf2m.c. It's kind of low-hanging mechanical port from +# C for the time being... The subroutine runs in 37 cycles, which is +# 4.5x faster than compiler-generated code. Though comparison is +# totally unfair, because this module utilizes Galois Field Multiply +# instruction. + +while (($output=shift) && ($output!~/\w[\w\-]*\.\w+$/)) {} +open STDOUT,">$output"; + +($rp,$a1,$a0,$b1,$b0)=("A4","B4","A6","B6","A8"); # argument vector + +($Alo,$Alox0,$Alox1,$Alox2,$Alox3)=map("A$_",(16..20)); +($Ahi,$Ahix0,$Ahix1,$Ahix2,$Ahix3)=map("B$_",(16..20)); +($B_0,$B_1,$B_2,$B_3)=("B5","A5","A7","B7"); +($A,$B)=($Alo,$B_1); +$xFF="B1"; + +sub mul_1x1_upper { +my ($A,$B)=@_; +$code.=<<___; + EXTU $B,8,24,$B_2 ; smash $B to 4 bytes +|| AND $B,$xFF,$B_0 +|| SHRU $B,24,$B_3 + SHRU $A,16, $Ahi ; smash $A to two halfwords +|| EXTU $A,16,16,$Alo + + XORMPY $Alo,$B_2,$Alox2 ; 16x8 bits multiplication +|| XORMPY $Ahi,$B_2,$Ahix2 +|| EXTU $B,16,24,$B_1 + XORMPY $Alo,$B_0,$Alox0 +|| XORMPY $Ahi,$B_0,$Ahix0 + XORMPY $Alo,$B_3,$Alox3 +|| XORMPY $Ahi,$B_3,$Ahix3 + XORMPY $Alo,$B_1,$Alox1 +|| XORMPY $Ahi,$B_1,$Ahix1 +___ +} +sub mul_1x1_merged { +my ($OUTlo,$OUThi,$A,$B)=@_; +$code.=<<___; + EXTU $B,8,24,$B_2 ; smash $B to 4 bytes +|| AND $B,$xFF,$B_0 +|| SHRU $B,24,$B_3 + SHRU $A,16, $Ahi ; smash $A to two halfwords +|| EXTU $A,16,16,$Alo + + XOR $Ahix0,$Alox2,$Ahix0 +|| MV $Ahix2,$OUThi +|| XORMPY $Alo,$B_2,$Alox2 + XORMPY $Ahi,$B_2,$Ahix2 +|| EXTU $B,16,24,$B_1 +|| XORMPY $Alo,$B_0,A1 ; $Alox0 + XOR $Ahix1,$Alox3,$Ahix1 +|| SHL $Ahix0,16,$OUTlo +|| SHRU $Ahix0,16,$Ahix0 + XOR $Alox0,$OUTlo,$OUTlo +|| XOR $Ahix0,$OUThi,$OUThi +|| XORMPY $Ahi,$B_0,$Ahix0 +|| XORMPY $Alo,$B_3,$Alox3 +|| SHL $Alox1,8,$Alox1 +|| SHL $Ahix3,8,$Ahix3 + XOR $Alox1,$OUTlo,$OUTlo +|| XOR $Ahix3,$OUThi,$OUThi +|| XORMPY $Ahi,$B_3,$Ahix3 +|| SHL $Ahix1,24,$Alox1 +|| SHRU $Ahix1,8, $Ahix1 + XOR $Alox1,$OUTlo,$OUTlo +|| XOR $Ahix1,$OUThi,$OUThi +|| XORMPY $Alo,$B_1,$Alox1 +|| XORMPY $Ahi,$B_1,$Ahix1 +|| MV A1,$Alox0 +___ +} +sub mul_1x1_lower { +my ($OUTlo,$OUThi)=@_; +$code.=<<___; + ;NOP + XOR $Ahix0,$Alox2,$Ahix0 +|| MV $Ahix2,$OUThi + NOP + XOR $Ahix1,$Alox3,$Ahix1 +|| SHL $Ahix0,16,$OUTlo +|| SHRU $Ahix0,16,$Ahix0 + XOR $Alox0,$OUTlo,$OUTlo +|| XOR $Ahix0,$OUThi,$OUThi +|| SHL $Alox1,8,$Alox1 +|| SHL $Ahix3,8,$Ahix3 + XOR $Alox1,$OUTlo,$OUTlo +|| XOR $Ahix3,$OUThi,$OUThi +|| SHL $Ahix1,24,$Alox1 +|| SHRU $Ahix1,8, $Ahix1 + XOR $Alox1,$OUTlo,$OUTlo +|| XOR $Ahix1,$OUThi,$OUThi +___ +} +$code.=<<___; + .text + + .if .ASSEMBLER_VERSION<7000000 + .asg 0,__TI_EABI__ + .endif + .if __TI_EABI__ + .asg bn_GF2m_mul_2x2,_bn_GF2m_mul_2x2 + .endif + + .global _bn_GF2m_mul_2x2 +_bn_GF2m_mul_2x2: + .asmfunc + MVK 0xFF,$xFF +___ + &mul_1x1_upper($a0,$b0); # a0·b0 +$code.=<<___; +|| MV $b1,$B + MV $a1,$A +___ + &mul_1x1_merged("A28","B28",$A,$B); # a0·b0/a1·b1 +$code.=<<___; +|| XOR $b0,$b1,$B + XOR $a0,$a1,$A +___ + &mul_1x1_merged("A31","B31",$A,$B); # a1·b1/(a0+a1)·(b0+b1) +$code.=<<___; + XOR A28,A31,A29 +|| XOR B28,B31,B29 ; a0·b0+a1·b1 +___ + &mul_1x1_lower("A30","B30"); # (a0+a1)·(b0+b1) +$code.=<<___; +|| BNOP B3 + XOR A29,A30,A30 +|| XOR B29,B30,B30 ; (a0+a1)·(b0+b1)-a0·b0-a1·b1 + XOR B28,A30,A30 +|| STW A28,*${rp}[0] + XOR B30,A31,A31 +|| STW A30,*${rp}[1] + STW A31,*${rp}[2] + STW B31,*${rp}[3] + .endasmfunc +___ + +print $code; +close STDOUT; diff --git a/crypto/bn/asm/co-586.pl b/crypto/bn/asm/co-586.pl index 57101a6bd775..97f5e3a19fc4 100644 --- a/crypto/bn/asm/co-586.pl +++ b/crypto/bn/asm/co-586.pl @@ -1,10 +1,19 @@ -#!/usr/local/bin/perl +#! /usr/bin/env perl +# Copyright 1995-2016 The OpenSSL Project Authors. All Rights Reserved. +# +# Licensed under the OpenSSL license (the "License"). You may not use +# this file except in compliance with the License. You can obtain a copy +# in the file LICENSE in the source distribution or at +# https://www.openssl.org/source/license.html $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; push(@INC,"${dir}","${dir}../../perlasm"); require "x86asm.pl"; -&asm_init($ARGV[0],$0); +$output = pop; +open STDOUT,">$output"; + +&asm_init($ARGV[0]); &bn_mul_comba("bn_mul_comba8",8); &bn_mul_comba("bn_mul_comba4",4); @@ -13,6 +22,8 @@ require "x86asm.pl"; &asm_finish(); +close STDOUT; + sub mul_add_c { local($a,$ai,$b,$bi,$c0,$c1,$c2,$pos,$i,$na,$nb)=@_; @@ -36,7 +47,7 @@ sub mul_add_c &mov("edx",&DWP(($nb)*4,$b,"",0)) if $pos == 1; # laod next b ### &adc($c2,0); - # is pos > 1, it means it is the last loop + # is pos > 1, it means it is the last loop &mov(&DWP($i*4,"eax","",0),$c0) if $pos > 0; # save r[]; &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 1; # laod next a } @@ -65,7 +76,7 @@ sub sqr_add_c &mov("edx",&DWP(($nb)*4,$a,"",0)) if ($pos == 1) && ($na != $nb); ### &adc($c2,0); - # is pos > 1, it means it is the last loop + # is pos > 1, it means it is the last loop &mov(&DWP($i*4,$r,"",0),$c0) if $pos > 0; # save r[]; &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 1; # load next b } @@ -116,7 +127,7 @@ sub bn_mul_comba $c2="ebp"; $a="esi"; $b="edi"; - + $as=0; $ae=0; $bs=0; @@ -131,9 +142,9 @@ sub bn_mul_comba &push("ebx"); &xor($c0,$c0); - &mov("eax",&DWP(0,$a,"",0)); # load the first word + &mov("eax",&DWP(0,$a,"",0)); # load the first word &xor($c1,$c1); - &mov("edx",&DWP(0,$b,"",0)); # load the first second + &mov("edx",&DWP(0,$b,"",0)); # load the first second for ($i=0; $i<$tot; $i++) { @@ -141,7 +152,7 @@ sub bn_mul_comba $bi=$bs; $end=$be+1; - &comment("################## Calculate word $i"); + &comment("################## Calculate word $i"); for ($j=$bs; $j<$end; $j++) { diff --git a/crypto/bn/asm/ia64-mont.pl b/crypto/bn/asm/ia64-mont.pl index e258658428a3..ec486f77792b 100755 --- a/crypto/bn/asm/ia64-mont.pl +++ b/crypto/bn/asm/ia64-mont.pl @@ -1,7 +1,14 @@ -#!/usr/bin/env perl +#! /usr/bin/env perl +# Copyright 2010-2018 The OpenSSL Project Authors. All Rights Reserved. +# +# Licensed under the OpenSSL license (the "License"). You may not use +# this file except in compliance with the License. You can obtain a copy +# in the file LICENSE in the source distribution or at +# https://www.openssl.org/source/license.html + # # ==================================================================== -# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL +# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL # project. The module is, however, dual licensed under OpenSSL and # CRYPTOGAMS licenses depending on where you obtain it. For further # details see http://www.openssl.org/~appro/cryptogams/. @@ -60,6 +67,8 @@ # hereafter less for longer keys, while verify - by 74-13%. # DSA performance improves by 115-30%. +$output=pop; + if ($^O eq "hpux") { $ADDP="addp4"; for (@ARGV) { $ADDP="add" if (/[\+DD|\-mlp]64/); } @@ -71,7 +80,7 @@ $code=<<___; // int bn_mul_mont (BN_ULONG *rp,const BN_ULONG *ap, // const BN_ULONG *bp,const BN_ULONG *np, -// const BN_ULONG *n0p,int num); +// const BN_ULONG *n0p,int num); .align 64 .global bn_mul_mont# .proc bn_mul_mont# @@ -194,7 +203,7 @@ bn_mul_mont_general: { .mmi; .pred.rel "mutex",p39,p41 (p39) add topbit=r0,r0 (p41) add topbit=r0,r0,1 - nop.i 0 } + nop.i 0 } { .mmi; st8 [tp_1]=n[0] add tptr=16,sp add tp_1=8,sp };; @@ -332,19 +341,19 @@ bn_mul_mont_general: { .mmb; sub rptr=rptr,len // rewind sub tptr=tptr,len clrrrb.pr };; -{ .mmi; and aptr=tptr,topbit - andcm bptr=rptr,topbit +{ .mmi; mov aptr=rptr + mov bptr=tptr mov pr.rot=1<<16 };; -{ .mii; or nptr=aptr,bptr +{ .mii; cmp.eq p0,p6=topbit,r0 mov ar.lc=lc - mov ar.ec=3 };; + mov ar.ec=2 };; .Lcopy_ctop: -{ .mmb; (p16) ld8 n[0]=[nptr],8 - (p18) st8 [tptr]=r0,8 - (p16) nop.b 0 } -{ .mmb; (p16) nop.m 0 - (p18) st8 [rptr]=n[2],8 +{ .mmi; (p16) ld8 a[0]=[aptr],8 + (p16) ld8 t[0]=[bptr],8 + (p6) mov a[1]=t[1] };; // (p17) +{ .mmb; (p17) st8 [rptr]=a[1],8 + (p17) st8 [tptr]=r0,8 br.ctop.sptk .Lcopy_ctop };; .Lcopy_cend: @@ -846,6 +855,6 @@ copyright: stringz "Montgomery multiplication for IA-64, CRYPTOGAMS by <appro\@openssl.org>" ___ -$output=shift and open STDOUT,">$output"; +open STDOUT,">$output" if $output; print $code; close STDOUT; diff --git a/crypto/bn/asm/ia64.S b/crypto/bn/asm/ia64.S index a9a42abfc302..d235c45e2d63 100644 --- a/crypto/bn/asm/ia64.S +++ b/crypto/bn/asm/ia64.S @@ -1,11 +1,18 @@ .explicit .text .ident "ia64.S, Version 2.1" -.ident "IA-64 ISA artwork by Andy Polyakov <appro@fy.chalmers.se>" +.ident "IA-64 ISA artwork by Andy Polyakov <appro@openssl.org>" + +// Copyright 2001-2018 The OpenSSL Project Authors. All Rights Reserved. +// +// Licensed under the OpenSSL license (the "License"). You may not use +// this file except in compliance with the License. You can obtain a copy +// in the file LICENSE in the source distribution or at +// https://www.openssl.org/source/license.html // // ==================================================================== -// Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL +// Written by Andy Polyakov <appro@openssl.org> for the OpenSSL // project. // // Rights for redistribution and usage in source and binary forms are @@ -13,7 +20,7 @@ // disclaimed. // ==================================================================== // -// Version 2.x is Itanium2 re-tune. Few words about how Itanum2 is +// Version 2.x is Itanium2 re-tune. Few words about how Itanium2 is // different from Itanium to this module viewpoint. Most notably, is it // "wider" than Itanium? Can you experience loop scalability as // discussed in commentary sections? Not really:-( Itanium2 has 6 @@ -22,7 +29,7 @@ // ports is the same, i.e. 2, while I need 4. In other words, to this // module Itanium2 remains effectively as "wide" as Itanium. Yet it's // essentially different in respect to this module, and a re-tune was -// required. Well, because some intruction latencies has changed. Most +// required. Well, because some instruction latencies has changed. Most // noticeably those intensively used: // // Itanium Itanium2 @@ -134,7 +141,7 @@ // User Mask I want to excuse the kernel from preserving upper // (f32-f128) FP register bank over process context switch, thus // minimizing bus bandwidth consumption during the switch (i.e. -// after PKI opration completes and the program is off doing +// after PKI operation completes and the program is off doing // something else like bulk symmetric encryption). Having said // this, I also want to point out that it might be good idea // to compile the whole toolkit (as well as majority of the @@ -150,12 +157,15 @@ #else #define ADDP add #endif +#ifdef __VMS +.alias abort, "decc$abort" +#endif #if 1 // // bn_[add|sub]_words routines. // -// Loops are spinning in 2*(n+5) ticks on Itanuim (provided that the +// Loops are spinning in 2*(n+5) ticks on Itanium (provided that the // data reside in L1 cache, i.e. 2 ticks away). It's possible to // compress the epilogue and get down to 2*n+6, but at the cost of // scalability (the neat feature of this implementation is that it @@ -363,7 +373,7 @@ bn_mul_words: // The loop therefore spins at the latency of xma minus 1, or in other // words at 6*(n+4) ticks:-( Compare to the "production" loop above // that runs in 2*(n+11) where the low latency problem is worked around -// by moving the dependency to one-tick latent interger ALU. Note that +// by moving the dependency to one-tick latent integer ALU. Note that // "distance" between ldf8 and xma is not latency of ldf8, but the // *difference* between xma and ldf8 latencies. .L_bn_mul_words_ctop: @@ -425,7 +435,7 @@ bn_mul_add_words: // version was performing *all* additions in IALU and was starving // for those even on Itanium 2. In this version one addition is // moved to FPU and is folded with multiplication. This is at cost -// of propogating the result from previous call to this subroutine +// of propagating the result from previous call to this subroutine // to L2 cache... In other words negligible even for shorter keys. // *Overall* performance improvement [over previous version] varies // from 11 to 22 percent depending on key length. @@ -493,9 +503,9 @@ bn_sqr_words: // possible to compress the epilogue (I'm getting tired to write this // comment over and over) and get down to 2*n+16 at the cost of // scalability. The decision will very likely be reconsidered after the -// benchmark program is profiled. I.e. if perfomance gain on Itanium +// benchmark program is profiled. I.e. if performance gain on Itanium // will appear larger than loss on "wider" IA-64, then the loop should -// be explicitely split and the epilogue compressed. +// be explicitly split and the epilogue compressed. .L_bn_sqr_words_ctop: { .mfi; (p16) ldf8 f32=[r33],8 (p25) xmpy.lu f42=f41,f41 @@ -929,7 +939,7 @@ bn_mul_comba8: xma.hu f118=f39,f127,f117 } { .mfi; xma.lu f117=f39,f127,f117 };;// //-------------------------------------------------// -// Leaving muliplier's heaven... Quite a ride, huh? +// Leaving multiplier's heaven... Quite a ride, huh? { .mii; getf.sig r31=f47 add r25=r25,r24 @@ -1421,6 +1431,7 @@ bn_div_words: mov ar.ec=0 // don't rotate at exit mov pr.rot=0 } { .mii; mov L=r33 // save l + mov r25=r0 // needed if abort is called on VMS mov r36=r0 };; .L_divw_shift: // -vv- note signed comparison @@ -1522,9 +1533,8 @@ bn_div_words: // output: f8 = (int)(a/b) // clobbered: f8,f9,f10,f11,pred pred=p15 -// One can argue that this snippet is copyrighted to Intel -// Corporation, as it's essentially identical to one of those -// found in "Divide, Square Root and Remainder" section at +// This snippet is based on text found in the "Divide, Square +// Root and Remainder" section at // http://www.intel.com/software/products/opensource/libraries/num.htm. // Yes, I admit that the referred code was used as template, // but after I realized that there hardly is any other instruction diff --git a/crypto/bn/asm/mips-mont.pl b/crypto/bn/asm/mips-mont.pl index a33cdf411121..fbe5d04f716c 100755 --- a/crypto/bn/asm/mips-mont.pl +++ b/crypto/bn/asm/mips-mont.pl @@ -1,4 +1,11 @@ -#!/usr/bin/env perl +#! /usr/bin/env perl +# Copyright 2010-2018 The OpenSSL Project Authors. All Rights Reserved. +# +# Licensed under the OpenSSL license (the "License"). You may not use +# this file except in compliance with the License. You can obtain a copy +# in the file LICENSE in the source distribution or at +# https://www.openssl.org/source/license.html + # # ==================================================================== # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL @@ -49,14 +56,14 @@ $flavour = shift || "o32"; # supported flavours are o32,n32,64,nubi32,nubi64 if ($flavour =~ /64|n32/i) { - $PTR_ADD="dadd"; # incidentally works even on n32 - $PTR_SUB="dsub"; # incidentally works even on n32 + $PTR_ADD="daddu"; # incidentally works even on n32 + $PTR_SUB="dsubu"; # incidentally works even on n32 $REG_S="sd"; $REG_L="ld"; $SZREG=8; } else { - $PTR_ADD="add"; - $PTR_SUB="sub"; + $PTR_ADD="addu"; + $PTR_SUB="subu"; $REG_S="sw"; $REG_L="lw"; $SZREG=4; @@ -67,7 +74,7 @@ $SAVED_REGS_MASK = ($flavour =~ /nubi/i) ? 0x00fff000 : 0x00ff0000; # ###################################################################### -while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {} +while (($output=shift) && ($output!~/\w[\w\-]*\.\w+$/)) {} open STDOUT,">$output"; if ($flavour =~ /64|n32/i) { @@ -114,6 +121,8 @@ $m1=$s11; $FRAMESIZE=14; $code=<<___; +#include "mips_arch.h" + .text .set noat @@ -176,27 +185,27 @@ $code.=<<___; $PTR_SUB $sp,$num and $sp,$at - $MULTU $aj,$bi - $LD $alo,$BNSZ($ap) - $LD $nlo,$BNSZ($np) - mflo $lo0 - mfhi $hi0 - $MULTU $lo0,$n0 - mflo $m1 - - $MULTU $alo,$bi - mflo $alo - mfhi $ahi - - $MULTU $nj,$m1 - mflo $lo1 - mfhi $hi1 - $MULTU $nlo,$m1 + $MULTU ($aj,$bi) + $LD $ahi,$BNSZ($ap) + $LD $nhi,$BNSZ($np) + mflo ($lo0,$aj,$bi) + mfhi ($hi0,$aj,$bi) + $MULTU ($lo0,$n0) + mflo ($m1,$lo0,$n0) + + $MULTU ($ahi,$bi) + mflo ($alo,$ahi,$bi) + mfhi ($ahi,$ahi,$bi) + + $MULTU ($nj,$m1) + mflo ($lo1,$nj,$m1) + mfhi ($hi1,$nj,$m1) + $MULTU ($nhi,$m1) $ADDU $lo1,$lo0 sltu $at,$lo1,$lo0 $ADDU $hi1,$at - mflo $nlo - mfhi $nhi + mflo ($nlo,$nhi,$m1) + mfhi ($nhi,$nhi,$m1) move $tp,$sp li $j,2*$BNSZ @@ -208,25 +217,25 @@ $code.=<<___; $LD $aj,($aj) $LD $nj,($nj) - $MULTU $aj,$bi + $MULTU ($aj,$bi) $ADDU $lo0,$alo,$hi0 $ADDU $lo1,$nlo,$hi1 sltu $at,$lo0,$hi0 sltu $t0,$lo1,$hi1 $ADDU $hi0,$ahi,$at $ADDU $hi1,$nhi,$t0 - mflo $alo - mfhi $ahi + mflo ($alo,$aj,$bi) + mfhi ($ahi,$aj,$bi) $ADDU $lo1,$lo0 sltu $at,$lo1,$lo0 - $MULTU $nj,$m1 + $MULTU ($nj,$m1) $ADDU $hi1,$at addu $j,$BNSZ $ST $lo1,($tp) sltu $t0,$j,$num - mflo $nlo - mfhi $nhi + mflo ($nlo,$nj,$m1) + mfhi ($nhi,$nj,$m1) bnez $t0,.L1st $PTR_ADD $tp,$BNSZ @@ -256,34 +265,34 @@ $code.=<<___; $PTR_ADD $bi,$bp,$i $LD $bi,($bi) $LD $aj,($ap) - $LD $alo,$BNSZ($ap) + $LD $ahi,$BNSZ($ap) $LD $tj,($sp) - $MULTU $aj,$bi + $MULTU ($aj,$bi) $LD $nj,($np) - $LD $nlo,$BNSZ($np) - mflo $lo0 - mfhi $hi0 + $LD $nhi,$BNSZ($np) + mflo ($lo0,$aj,$bi) + mfhi ($hi0,$aj,$bi) $ADDU $lo0,$tj - $MULTU $lo0,$n0 + $MULTU ($lo0,$n0) sltu $at,$lo0,$tj $ADDU $hi0,$at - mflo $m1 + mflo ($m1,$lo0,$n0) - $MULTU $alo,$bi - mflo $alo - mfhi $ahi + $MULTU ($ahi,$bi) + mflo ($alo,$ahi,$bi) + mfhi ($ahi,$ahi,$bi) - $MULTU $nj,$m1 - mflo $lo1 - mfhi $hi1 + $MULTU ($nj,$m1) + mflo ($lo1,$nj,$m1) + mfhi ($hi1,$nj,$m1) - $MULTU $nlo,$m1 + $MULTU ($nhi,$m1) $ADDU $lo1,$lo0 sltu $at,$lo1,$lo0 $ADDU $hi1,$at - mflo $nlo - mfhi $nhi + mflo ($nlo,$nhi,$m1) + mfhi ($nhi,$nhi,$m1) move $tp,$sp li $j,2*$BNSZ @@ -296,19 +305,19 @@ $code.=<<___; $LD $aj,($aj) $LD $nj,($nj) - $MULTU $aj,$bi + $MULTU ($aj,$bi) $ADDU $lo0,$alo,$hi0 $ADDU $lo1,$nlo,$hi1 sltu $at,$lo0,$hi0 sltu $t0,$lo1,$hi1 $ADDU $hi0,$ahi,$at $ADDU $hi1,$nhi,$t0 - mflo $alo - mfhi $ahi + mflo ($alo,$aj,$bi) + mfhi ($ahi,$aj,$bi) $ADDU $lo0,$tj addu $j,$BNSZ - $MULTU $nj,$m1 + $MULTU ($nj,$m1) sltu $at,$lo0,$tj $ADDU $lo1,$lo0 $ADDU $hi0,$at @@ -316,8 +325,8 @@ $code.=<<___; $LD $tj,2*$BNSZ($tp) $ADDU $hi1,$t0 sltu $at,$j,$num - mflo $nlo - mfhi $nhi + mflo ($nlo,$nj,$m1) + mfhi ($nhi,$nj,$m1) $ST $lo1,($tp) bnez $at,.Linner $PTR_ADD $tp,$BNSZ @@ -377,15 +386,13 @@ $code.=<<___; $PTR_SUB $rp,$num # restore rp not $hi1,$hi0 - and $ap,$hi0,$sp - and $bp,$hi1,$rp - or $ap,$ap,$bp # ap=borrow?tp:rp - -.align 4 -.Lcopy: $LD $aj,($ap) - $PTR_ADD $ap,$BNSZ +.Lcopy: $LD $nj,($tp) # conditional move + $LD $aj,($rp) $ST $zero,($tp) $PTR_ADD $tp,$BNSZ + and $nj,$hi0 + and $aj,$hi1 + or $aj,$nj sltu $at,$tp,$tj $ST $aj,($rp) bnez $at,.Lcopy diff --git a/crypto/bn/asm/mips.pl b/crypto/bn/asm/mips.pl index acafde5e5685..da35ec1b30ce 100755 --- a/crypto/bn/asm/mips.pl +++ b/crypto/bn/asm/mips.pl @@ -1,7 +1,14 @@ -#!/usr/bin/env perl +#! /usr/bin/env perl +# Copyright 2010-2016 The OpenSSL Project Authors. All Rights Reserved. +# +# Licensed under the OpenSSL license (the "License"). You may not use +# this file except in compliance with the License. You can obtain a copy +# in the file LICENSE in the source distribution or at +# https://www.openssl.org/source/license.html + # # ==================================================================== -# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL +# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL # project. # # Rights for redistribution and usage in source and binary forms are @@ -15,7 +22,7 @@ # This is drop-in MIPS III/IV ISA replacement for crypto/bn/bn_asm.c. # # The module is designed to work with either of the "new" MIPS ABI(5), -# namely N32 or N64, offered by IRIX 6.x. It's not ment to work under +# namely N32 or N64, offered by IRIX 6.x. It's not meant to work under # IRIX 5.x not only because it doesn't support new ABIs but also # because 5.x kernels put R4x00 CPU into 32-bit mode and all those # 64-bit instructions (daddu, dmultu, etc.) found below gonna only @@ -35,7 +42,7 @@ # Performance improvement is astonishing! 'apps/openssl speed rsa dsa' # goes way over 3 times faster! # -# <appro@fy.chalmers.se> +# <appro@openssl.org> # October 2010 # @@ -49,7 +56,7 @@ # key length, more for longer keys. $flavour = shift || "o32"; -while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {} +while (($output=shift) && ($output!~/\w[\w\-]*\.\w+$/)) {} open STDOUT,">$output"; if ($flavour =~ /64|n32/i) { @@ -102,6 +109,22 @@ $gp=$v1 if ($flavour =~ /nubi/i); $minus4=$v1; $code.=<<___; +#include "mips_arch.h" + +#if defined(_MIPS_ARCH_MIPS64R6) +# define ddivu(rs,rt) +# define mfqt(rd,rs,rt) ddivu rd,rs,rt +# define mfrm(rd,rs,rt) dmodu rd,rs,rt +#elif defined(_MIPS_ARCH_MIPS32R6) +# define divu(rs,rt) +# define mfqt(rd,rs,rt) divu rd,rs,rt +# define mfrm(rd,rs,rt) modu rd,rs,rt +#else +# define $DIVU(rs,rt) $DIVU $zero,rs,rt +# define mfqt(rd,rs,rt) mflo rd +# define mfrm(rd,rs,rt) mfhi rd +#endif + .rdata .asciiz "mips3.s, Version 1.2" .asciiz "MIPS II/III/IV ISA artwork by Andy Polyakov <appro\@fy.chalmers.se>" @@ -144,7 +167,7 @@ $code.=<<___; .L_bn_mul_add_words_loop: $LD $t0,0($a1) - $MULTU $t0,$a3 + $MULTU ($t0,$a3) $LD $t1,0($a0) $LD $t2,$BNSZ($a1) $LD $t3,$BNSZ($a0) @@ -154,11 +177,11 @@ $code.=<<___; sltu $v0,$t1,$v0 # All manuals say it "compares 32-bit # values", but it seems to work fine # even on 64-bit registers. - mflo $at - mfhi $t0 + mflo ($at,$t0,$a3) + mfhi ($t0,$t0,$a3) $ADDU $t1,$at $ADDU $v0,$t0 - $MULTU $t2,$a3 + $MULTU ($t2,$a3) sltu $at,$t1,$at $ST $t1,0($a0) $ADDU $v0,$at @@ -167,11 +190,11 @@ $code.=<<___; $LD $ta3,3*$BNSZ($a0) $ADDU $t3,$v0 sltu $v0,$t3,$v0 - mflo $at - mfhi $t2 + mflo ($at,$t2,$a3) + mfhi ($t2,$t2,$a3) $ADDU $t3,$at $ADDU $v0,$t2 - $MULTU $ta0,$a3 + $MULTU ($ta0,$a3) sltu $at,$t3,$at $ST $t3,$BNSZ($a0) $ADDU $v0,$at @@ -181,11 +204,11 @@ $code.=<<___; $PTR_ADD $a1,4*$BNSZ $ADDU $ta1,$v0 sltu $v0,$ta1,$v0 - mflo $at - mfhi $ta0 + mflo ($at,$ta0,$a3) + mfhi ($ta0,$ta0,$a3) $ADDU $ta1,$at $ADDU $v0,$ta0 - $MULTU $ta2,$a3 + $MULTU ($ta2,$a3) sltu $at,$ta1,$at $ST $ta1,-2*$BNSZ($a0) $ADDU $v0,$at @@ -194,8 +217,8 @@ $code.=<<___; and $ta0,$a2,$minus4 $ADDU $ta3,$v0 sltu $v0,$ta3,$v0 - mflo $at - mfhi $ta2 + mflo ($at,$ta2,$a3) + mfhi ($ta2,$ta2,$a3) $ADDU $ta3,$at $ADDU $v0,$ta2 sltu $at,$ta3,$at @@ -210,13 +233,13 @@ $code.=<<___; .L_bn_mul_add_words_tail: .set reorder $LD $t0,0($a1) - $MULTU $t0,$a3 + $MULTU ($t0,$a3) $LD $t1,0($a0) subu $a2,1 $ADDU $t1,$v0 sltu $v0,$t1,$v0 - mflo $at - mfhi $t0 + mflo ($at,$t0,$a3) + mfhi ($t0,$t0,$a3) $ADDU $t1,$at $ADDU $v0,$t0 sltu $at,$t1,$at @@ -225,13 +248,13 @@ $code.=<<___; beqz $a2,.L_bn_mul_add_words_return $LD $t0,$BNSZ($a1) - $MULTU $t0,$a3 + $MULTU ($t0,$a3) $LD $t1,$BNSZ($a0) subu $a2,1 $ADDU $t1,$v0 sltu $v0,$t1,$v0 - mflo $at - mfhi $t0 + mflo ($at,$t0,$a3) + mfhi ($t0,$t0,$a3) $ADDU $t1,$at $ADDU $v0,$t0 sltu $at,$t1,$at @@ -240,12 +263,12 @@ $code.=<<___; beqz $a2,.L_bn_mul_add_words_return $LD $t0,2*$BNSZ($a1) - $MULTU $t0,$a3 + $MULTU ($t0,$a3) $LD $t1,2*$BNSZ($a0) $ADDU $t1,$v0 sltu $v0,$t1,$v0 - mflo $at - mfhi $t0 + mflo ($at,$t0,$a3) + mfhi ($t0,$t0,$a3) $ADDU $t1,$at $ADDU $v0,$t0 sltu $at,$t1,$at @@ -303,40 +326,40 @@ $code.=<<___; .L_bn_mul_words_loop: $LD $t0,0($a1) - $MULTU $t0,$a3 + $MULTU ($t0,$a3) $LD $t2,$BNSZ($a1) $LD $ta0,2*$BNSZ($a1) $LD $ta2,3*$BNSZ($a1) - mflo $at - mfhi $t0 + mflo ($at,$t0,$a3) + mfhi ($t0,$t0,$a3) $ADDU $v0,$at sltu $t1,$v0,$at - $MULTU $t2,$a3 + $MULTU ($t2,$a3) $ST $v0,0($a0) $ADDU $v0,$t1,$t0 subu $a2,4 $PTR_ADD $a0,4*$BNSZ $PTR_ADD $a1,4*$BNSZ - mflo $at - mfhi $t2 + mflo ($at,$t2,$a3) + mfhi ($t2,$t2,$a3) $ADDU $v0,$at sltu $t3,$v0,$at - $MULTU $ta0,$a3 + $MULTU ($ta0,$a3) $ST $v0,-3*$BNSZ($a0) $ADDU $v0,$t3,$t2 - mflo $at - mfhi $ta0 + mflo ($at,$ta0,$a3) + mfhi ($ta0,$ta0,$a3) $ADDU $v0,$at sltu $ta1,$v0,$at - $MULTU $ta2,$a3 + $MULTU ($ta2,$a3) $ST $v0,-2*$BNSZ($a0) $ADDU $v0,$ta1,$ta0 and $ta0,$a2,$minus4 - mflo $at - mfhi $ta2 + mflo ($at,$ta2,$a3) + mfhi ($ta2,$ta2,$a3) $ADDU $v0,$at sltu $ta3,$v0,$at $ST $v0,-$BNSZ($a0) @@ -350,10 +373,10 @@ $code.=<<___; .L_bn_mul_words_tail: .set reorder $LD $t0,0($a1) - $MULTU $t0,$a3 + $MULTU ($t0,$a3) subu $a2,1 - mflo $at - mfhi $t0 + mflo ($at,$t0,$a3) + mfhi ($t0,$t0,$a3) $ADDU $v0,$at sltu $t1,$v0,$at $ST $v0,0($a0) @@ -361,10 +384,10 @@ $code.=<<___; beqz $a2,.L_bn_mul_words_return $LD $t0,$BNSZ($a1) - $MULTU $t0,$a3 + $MULTU ($t0,$a3) subu $a2,1 - mflo $at - mfhi $t0 + mflo ($at,$t0,$a3) + mfhi ($t0,$t0,$a3) $ADDU $v0,$at sltu $t1,$v0,$at $ST $v0,$BNSZ($a0) @@ -372,9 +395,9 @@ $code.=<<___; beqz $a2,.L_bn_mul_words_return $LD $t0,2*$BNSZ($a1) - $MULTU $t0,$a3 - mflo $at - mfhi $t0 + $MULTU ($t0,$a3) + mflo ($at,$t0,$a3) + mfhi ($t0,$t0,$a3) $ADDU $v0,$at sltu $t1,$v0,$at $ST $v0,2*$BNSZ($a0) @@ -431,35 +454,35 @@ $code.=<<___; .L_bn_sqr_words_loop: $LD $t0,0($a1) - $MULTU $t0,$t0 + $MULTU ($t0,$t0) $LD $t2,$BNSZ($a1) $LD $ta0,2*$BNSZ($a1) $LD $ta2,3*$BNSZ($a1) - mflo $t1 - mfhi $t0 + mflo ($t1,$t0,$t0) + mfhi ($t0,$t0,$t0) $ST $t1,0($a0) $ST $t0,$BNSZ($a0) - $MULTU $t2,$t2 + $MULTU ($t2,$t2) subu $a2,4 $PTR_ADD $a0,8*$BNSZ $PTR_ADD $a1,4*$BNSZ - mflo $t3 - mfhi $t2 + mflo ($t3,$t2,$t2) + mfhi ($t2,$t2,$t2) $ST $t3,-6*$BNSZ($a0) $ST $t2,-5*$BNSZ($a0) - $MULTU $ta0,$ta0 - mflo $ta1 - mfhi $ta0 + $MULTU ($ta0,$ta0) + mflo ($ta1,$ta0,$ta0) + mfhi ($ta0,$ta0,$ta0) $ST $ta1,-4*$BNSZ($a0) $ST $ta0,-3*$BNSZ($a0) - $MULTU $ta2,$ta2 + $MULTU ($ta2,$ta2) and $ta0,$a2,$minus4 - mflo $ta3 - mfhi $ta2 + mflo ($ta3,$ta2,$ta2) + mfhi ($ta2,$ta2,$ta2) $ST $ta3,-2*$BNSZ($a0) .set noreorder @@ -472,27 +495,27 @@ $code.=<<___; .L_bn_sqr_words_tail: .set reorder $LD $t0,0($a1) - $MULTU $t0,$t0 + $MULTU ($t0,$t0) subu $a2,1 - mflo $t1 - mfhi $t0 + mflo ($t1,$t0,$t0) + mfhi ($t0,$t0,$t0) $ST $t1,0($a0) $ST $t0,$BNSZ($a0) beqz $a2,.L_bn_sqr_words_return $LD $t0,$BNSZ($a1) - $MULTU $t0,$t0 + $MULTU ($t0,$t0) subu $a2,1 - mflo $t1 - mfhi $t0 + mflo ($t1,$t0,$t0) + mfhi ($t0,$t0,$t0) $ST $t1,2*$BNSZ($a0) $ST $t0,3*$BNSZ($a0) beqz $a2,.L_bn_sqr_words_return $LD $t0,2*$BNSZ($a1) - $MULTU $t0,$t0 - mflo $t1 - mfhi $t0 + $MULTU ($t0,$t0) + mflo ($t1,$t0,$t0) + mfhi ($t0,$t0,$t0) $ST $t1,4*$BNSZ($a0) $ST $t0,5*$BNSZ($a0) @@ -580,13 +603,13 @@ $code.=<<___; sltu $v0,$t2,$ta2 $ST $t2,-2*$BNSZ($a0) $ADDU $v0,$t8 - + $ADDU $ta3,$t3 sltu $t9,$ta3,$t3 $ADDU $t3,$ta3,$v0 sltu $v0,$t3,$ta3 $ST $t3,-$BNSZ($a0) - + .set noreorder bgtz $at,.L_bn_add_words_loop $ADDU $v0,$t9 @@ -785,7 +808,7 @@ bn_div_3_words: # so that we can save two arguments # and return address in registers # instead of stack:-) - + $LD $a0,($a3) move $ta2,$a1 bne $a0,$a2,bn_div_3_words_internal @@ -816,11 +839,11 @@ $code.=<<___; move $ta3,$ra bal bn_div_words_internal move $ra,$ta3 - $MULTU $ta2,$v0 + $MULTU ($ta2,$v0) $LD $t2,-2*$BNSZ($a3) move $ta0,$zero - mfhi $t1 - mflo $t0 + mfhi ($t1,$ta2,$v0) + mflo ($t0,$ta2,$v0) sltu $t8,$t1,$a1 .L_bn_div_3_words_inner_loop: bnez $t8,.L_bn_div_3_words_inner_loop_done @@ -923,15 +946,15 @@ $code.=<<___; $SRL $HH,$a0,4*$BNSZ # bits $SRL $QT,4*$BNSZ # q=0xffffffff beq $DH,$HH,.L_bn_div_words_skip_div1 - $DIVU $zero,$a0,$DH - mflo $QT + $DIVU ($a0,$DH) + mfqt ($QT,$a0,$DH) .L_bn_div_words_skip_div1: - $MULTU $a2,$QT + $MULTU ($a2,$QT) $SLL $t3,$a0,4*$BNSZ # bits $SRL $at,$a1,4*$BNSZ # bits or $t3,$at - mflo $t0 - mfhi $t1 + mflo ($t0,$a2,$QT) + mfhi ($t1,$a2,$QT) .L_bn_div_words_inner_loop1: sltu $t2,$t3,$t0 seq $t8,$HH,$t1 @@ -956,15 +979,15 @@ $code.=<<___; $SRL $HH,$a0,4*$BNSZ # bits $SRL $QT,4*$BNSZ # q=0xffffffff beq $DH,$HH,.L_bn_div_words_skip_div2 - $DIVU $zero,$a0,$DH - mflo $QT + $DIVU ($a0,$DH) + mfqt ($QT,$a0,$DH) .L_bn_div_words_skip_div2: - $MULTU $a2,$QT + $MULTU ($a2,$QT) $SLL $t3,$a0,4*$BNSZ # bits $SRL $at,$a1,4*$BNSZ # bits or $t3,$at - mflo $t0 - mfhi $t1 + mflo ($t0,$a2,$QT) + mfhi ($t1,$a2,$QT) .L_bn_div_words_inner_loop2: sltu $t2,$t3,$t0 seq $t8,$HH,$t1 @@ -1063,592 +1086,592 @@ $code.=<<___; $LD $b_0,0($a2) $LD $a_1,$BNSZ($a1) $LD $a_2,2*$BNSZ($a1) - $MULTU $a_0,$b_0 # mul_add_c(a[0],b[0],c1,c2,c3); + $MULTU ($a_0,$b_0) # mul_add_c(a[0],b[0],c1,c2,c3); $LD $a_3,3*$BNSZ($a1) $LD $b_1,$BNSZ($a2) $LD $b_2,2*$BNSZ($a2) $LD $b_3,3*$BNSZ($a2) - mflo $c_1 - mfhi $c_2 + mflo ($c_1,$a_0,$b_0) + mfhi ($c_2,$a_0,$b_0) $LD $a_4,4*$BNSZ($a1) $LD $a_5,5*$BNSZ($a1) - $MULTU $a_0,$b_1 # mul_add_c(a[0],b[1],c2,c3,c1); + $MULTU ($a_0,$b_1) # mul_add_c(a[0],b[1],c2,c3,c1); $LD $a_6,6*$BNSZ($a1) $LD $a_7,7*$BNSZ($a1) $LD $b_4,4*$BNSZ($a2) $LD $b_5,5*$BNSZ($a2) - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_0,$b_1) + mfhi ($t_2,$a_0,$b_1) $ADDU $c_2,$t_1 sltu $at,$c_2,$t_1 - $MULTU $a_1,$b_0 # mul_add_c(a[1],b[0],c2,c3,c1); + $MULTU ($a_1,$b_0) # mul_add_c(a[1],b[0],c2,c3,c1); $ADDU $c_3,$t_2,$at $LD $b_6,6*$BNSZ($a2) $LD $b_7,7*$BNSZ($a2) $ST $c_1,0($a0) # r[0]=c1; - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_1,$b_0) + mfhi ($t_2,$a_1,$b_0) $ADDU $c_2,$t_1 sltu $at,$c_2,$t_1 - $MULTU $a_2,$b_0 # mul_add_c(a[2],b[0],c3,c1,c2); + $MULTU ($a_2,$b_0) # mul_add_c(a[2],b[0],c3,c1,c2); $ADDU $t_2,$at $ADDU $c_3,$t_2 sltu $c_1,$c_3,$t_2 $ST $c_2,$BNSZ($a0) # r[1]=c2; - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_2,$b_0) + mfhi ($t_2,$a_2,$b_0) $ADDU $c_3,$t_1 sltu $at,$c_3,$t_1 - $MULTU $a_1,$b_1 # mul_add_c(a[1],b[1],c3,c1,c2); + $MULTU ($a_1,$b_1) # mul_add_c(a[1],b[1],c3,c1,c2); $ADDU $t_2,$at $ADDU $c_1,$t_2 - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_1,$b_1) + mfhi ($t_2,$a_1,$b_1) $ADDU $c_3,$t_1 sltu $at,$c_3,$t_1 - $MULTU $a_0,$b_2 # mul_add_c(a[0],b[2],c3,c1,c2); + $MULTU ($a_0,$b_2) # mul_add_c(a[0],b[2],c3,c1,c2); $ADDU $t_2,$at $ADDU $c_1,$t_2 sltu $c_2,$c_1,$t_2 - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_0,$b_2) + mfhi ($t_2,$a_0,$b_2) $ADDU $c_3,$t_1 sltu $at,$c_3,$t_1 - $MULTU $a_0,$b_3 # mul_add_c(a[0],b[3],c1,c2,c3); + $MULTU ($a_0,$b_3) # mul_add_c(a[0],b[3],c1,c2,c3); $ADDU $t_2,$at $ADDU $c_1,$t_2 sltu $at,$c_1,$t_2 $ADDU $c_2,$at $ST $c_3,2*$BNSZ($a0) # r[2]=c3; - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_0,$b_3) + mfhi ($t_2,$a_0,$b_3) $ADDU $c_1,$t_1 sltu $at,$c_1,$t_1 - $MULTU $a_1,$b_2 # mul_add_c(a[1],b[2],c1,c2,c3); + $MULTU ($a_1,$b_2) # mul_add_c(a[1],b[2],c1,c2,c3); $ADDU $t_2,$at $ADDU $c_2,$t_2 sltu $c_3,$c_2,$t_2 - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_1,$b_2) + mfhi ($t_2,$a_1,$b_2) $ADDU $c_1,$t_1 sltu $at,$c_1,$t_1 - $MULTU $a_2,$b_1 # mul_add_c(a[2],b[1],c1,c2,c3); + $MULTU ($a_2,$b_1) # mul_add_c(a[2],b[1],c1,c2,c3); $ADDU $t_2,$at $ADDU $c_2,$t_2 sltu $at,$c_2,$t_2 $ADDU $c_3,$at - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_2,$b_1) + mfhi ($t_2,$a_2,$b_1) $ADDU $c_1,$t_1 sltu $at,$c_1,$t_1 - $MULTU $a_3,$b_0 # mul_add_c(a[3],b[0],c1,c2,c3); + $MULTU ($a_3,$b_0) # mul_add_c(a[3],b[0],c1,c2,c3); $ADDU $t_2,$at $ADDU $c_2,$t_2 sltu $at,$c_2,$t_2 $ADDU $c_3,$at - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_3,$b_0) + mfhi ($t_2,$a_3,$b_0) $ADDU $c_1,$t_1 sltu $at,$c_1,$t_1 - $MULTU $a_4,$b_0 # mul_add_c(a[4],b[0],c2,c3,c1); + $MULTU ($a_4,$b_0) # mul_add_c(a[4],b[0],c2,c3,c1); $ADDU $t_2,$at $ADDU $c_2,$t_2 sltu $at,$c_2,$t_2 $ADDU $c_3,$at $ST $c_1,3*$BNSZ($a0) # r[3]=c1; - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_4,$b_0) + mfhi ($t_2,$a_4,$b_0) $ADDU $c_2,$t_1 sltu $at,$c_2,$t_1 - $MULTU $a_3,$b_1 # mul_add_c(a[3],b[1],c2,c3,c1); + $MULTU ($a_3,$b_1) # mul_add_c(a[3],b[1],c2,c3,c1); $ADDU $t_2,$at $ADDU $c_3,$t_2 sltu $c_1,$c_3,$t_2 - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_3,$b_1) + mfhi ($t_2,$a_3,$b_1) $ADDU $c_2,$t_1 sltu $at,$c_2,$t_1 - $MULTU $a_2,$b_2 # mul_add_c(a[2],b[2],c2,c3,c1); + $MULTU ($a_2,$b_2) # mul_add_c(a[2],b[2],c2,c3,c1); $ADDU $t_2,$at $ADDU $c_3,$t_2 sltu $at,$c_3,$t_2 $ADDU $c_1,$at - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_2,$b_2) + mfhi ($t_2,$a_2,$b_2) $ADDU $c_2,$t_1 sltu $at,$c_2,$t_1 - $MULTU $a_1,$b_3 # mul_add_c(a[1],b[3],c2,c3,c1); + $MULTU ($a_1,$b_3) # mul_add_c(a[1],b[3],c2,c3,c1); $ADDU $t_2,$at $ADDU $c_3,$t_2 sltu $at,$c_3,$t_2 $ADDU $c_1,$at - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_1,$b_3) + mfhi ($t_2,$a_1,$b_3) $ADDU $c_2,$t_1 sltu $at,$c_2,$t_1 - $MULTU $a_0,$b_4 # mul_add_c(a[0],b[4],c2,c3,c1); + $MULTU ($a_0,$b_4) # mul_add_c(a[0],b[4],c2,c3,c1); $ADDU $t_2,$at $ADDU $c_3,$t_2 sltu $at,$c_3,$t_2 $ADDU $c_1,$at - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_0,$b_4) + mfhi ($t_2,$a_0,$b_4) $ADDU $c_2,$t_1 sltu $at,$c_2,$t_1 - $MULTU $a_0,$b_5 # mul_add_c(a[0],b[5],c3,c1,c2); + $MULTU ($a_0,$b_5) # mul_add_c(a[0],b[5],c3,c1,c2); $ADDU $t_2,$at $ADDU $c_3,$t_2 sltu $at,$c_3,$t_2 $ADDU $c_1,$at $ST $c_2,4*$BNSZ($a0) # r[4]=c2; - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_0,$b_5) + mfhi ($t_2,$a_0,$b_5) $ADDU $c_3,$t_1 sltu $at,$c_3,$t_1 - $MULTU $a_1,$b_4 # mul_add_c(a[1],b[4],c3,c1,c2); + $MULTU ($a_1,$b_4) # mul_add_c(a[1],b[4],c3,c1,c2); $ADDU $t_2,$at $ADDU $c_1,$t_2 sltu $c_2,$c_1,$t_2 - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_1,$b_4) + mfhi ($t_2,$a_1,$b_4) $ADDU $c_3,$t_1 sltu $at,$c_3,$t_1 - $MULTU $a_2,$b_3 # mul_add_c(a[2],b[3],c3,c1,c2); + $MULTU ($a_2,$b_3) # mul_add_c(a[2],b[3],c3,c1,c2); $ADDU $t_2,$at $ADDU $c_1,$t_2 sltu $at,$c_1,$t_2 $ADDU $c_2,$at - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_2,$b_3) + mfhi ($t_2,$a_2,$b_3) $ADDU $c_3,$t_1 sltu $at,$c_3,$t_1 - $MULTU $a_3,$b_2 # mul_add_c(a[3],b[2],c3,c1,c2); + $MULTU ($a_3,$b_2) # mul_add_c(a[3],b[2],c3,c1,c2); $ADDU $t_2,$at $ADDU $c_1,$t_2 sltu $at,$c_1,$t_2 $ADDU $c_2,$at - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_3,$b_2) + mfhi ($t_2,$a_3,$b_2) $ADDU $c_3,$t_1 sltu $at,$c_3,$t_1 - $MULTU $a_4,$b_1 # mul_add_c(a[4],b[1],c3,c1,c2); + $MULTU ($a_4,$b_1) # mul_add_c(a[4],b[1],c3,c1,c2); $ADDU $t_2,$at $ADDU $c_1,$t_2 sltu $at,$c_1,$t_2 $ADDU $c_2,$at - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_4,$b_1) + mfhi ($t_2,$a_4,$b_1) $ADDU $c_3,$t_1 sltu $at,$c_3,$t_1 - $MULTU $a_5,$b_0 # mul_add_c(a[5],b[0],c3,c1,c2); + $MULTU ($a_5,$b_0) # mul_add_c(a[5],b[0],c3,c1,c2); $ADDU $t_2,$at $ADDU $c_1,$t_2 sltu $at,$c_1,$t_2 $ADDU $c_2,$at - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_5,$b_0) + mfhi ($t_2,$a_5,$b_0) $ADDU $c_3,$t_1 sltu $at,$c_3,$t_1 - $MULTU $a_6,$b_0 # mul_add_c(a[6],b[0],c1,c2,c3); + $MULTU ($a_6,$b_0) # mul_add_c(a[6],b[0],c1,c2,c3); $ADDU $t_2,$at $ADDU $c_1,$t_2 sltu $at,$c_1,$t_2 $ADDU $c_2,$at $ST $c_3,5*$BNSZ($a0) # r[5]=c3; - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_6,$b_0) + mfhi ($t_2,$a_6,$b_0) $ADDU $c_1,$t_1 sltu $at,$c_1,$t_1 - $MULTU $a_5,$b_1 # mul_add_c(a[5],b[1],c1,c2,c3); + $MULTU ($a_5,$b_1) # mul_add_c(a[5],b[1],c1,c2,c3); $ADDU $t_2,$at $ADDU $c_2,$t_2 sltu $c_3,$c_2,$t_2 - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_5,$b_1) + mfhi ($t_2,$a_5,$b_1) $ADDU $c_1,$t_1 sltu $at,$c_1,$t_1 - $MULTU $a_4,$b_2 # mul_add_c(a[4],b[2],c1,c2,c3); + $MULTU ($a_4,$b_2) # mul_add_c(a[4],b[2],c1,c2,c3); $ADDU $t_2,$at $ADDU $c_2,$t_2 sltu $at,$c_2,$t_2 $ADDU $c_3,$at - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_4,$b_2) + mfhi ($t_2,$a_4,$b_2) $ADDU $c_1,$t_1 sltu $at,$c_1,$t_1 - $MULTU $a_3,$b_3 # mul_add_c(a[3],b[3],c1,c2,c3); + $MULTU ($a_3,$b_3) # mul_add_c(a[3],b[3],c1,c2,c3); $ADDU $t_2,$at $ADDU $c_2,$t_2 sltu $at,$c_2,$t_2 $ADDU $c_3,$at - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_3,$b_3) + mfhi ($t_2,$a_3,$b_3) $ADDU $c_1,$t_1 sltu $at,$c_1,$t_1 - $MULTU $a_2,$b_4 # mul_add_c(a[2],b[4],c1,c2,c3); + $MULTU ($a_2,$b_4) # mul_add_c(a[2],b[4],c1,c2,c3); $ADDU $t_2,$at $ADDU $c_2,$t_2 sltu $at,$c_2,$t_2 $ADDU $c_3,$at - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_2,$b_4) + mfhi ($t_2,$a_2,$b_4) $ADDU $c_1,$t_1 sltu $at,$c_1,$t_1 - $MULTU $a_1,$b_5 # mul_add_c(a[1],b[5],c1,c2,c3); + $MULTU ($a_1,$b_5) # mul_add_c(a[1],b[5],c1,c2,c3); $ADDU $t_2,$at $ADDU $c_2,$t_2 sltu $at,$c_2,$t_2 $ADDU $c_3,$at - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_1,$b_5) + mfhi ($t_2,$a_1,$b_5) $ADDU $c_1,$t_1 sltu $at,$c_1,$t_1 - $MULTU $a_0,$b_6 # mul_add_c(a[0],b[6],c1,c2,c3); + $MULTU ($a_0,$b_6) # mul_add_c(a[0],b[6],c1,c2,c3); $ADDU $t_2,$at $ADDU $c_2,$t_2 sltu $at,$c_2,$t_2 $ADDU $c_3,$at - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_0,$b_6) + mfhi ($t_2,$a_0,$b_6) $ADDU $c_1,$t_1 sltu $at,$c_1,$t_1 - $MULTU $a_0,$b_7 # mul_add_c(a[0],b[7],c2,c3,c1); + $MULTU ($a_0,$b_7) # mul_add_c(a[0],b[7],c2,c3,c1); $ADDU $t_2,$at $ADDU $c_2,$t_2 sltu $at,$c_2,$t_2 $ADDU $c_3,$at $ST $c_1,6*$BNSZ($a0) # r[6]=c1; - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_0,$b_7) + mfhi ($t_2,$a_0,$b_7) $ADDU $c_2,$t_1 sltu $at,$c_2,$t_1 - $MULTU $a_1,$b_6 # mul_add_c(a[1],b[6],c2,c3,c1); + $MULTU ($a_1,$b_6) # mul_add_c(a[1],b[6],c2,c3,c1); $ADDU $t_2,$at $ADDU $c_3,$t_2 sltu $c_1,$c_3,$t_2 - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_1,$b_6) + mfhi ($t_2,$a_1,$b_6) $ADDU $c_2,$t_1 sltu $at,$c_2,$t_1 - $MULTU $a_2,$b_5 # mul_add_c(a[2],b[5],c2,c3,c1); + $MULTU ($a_2,$b_5) # mul_add_c(a[2],b[5],c2,c3,c1); $ADDU $t_2,$at $ADDU $c_3,$t_2 sltu $at,$c_3,$t_2 $ADDU $c_1,$at - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_2,$b_5) + mfhi ($t_2,$a_2,$b_5) $ADDU $c_2,$t_1 sltu $at,$c_2,$t_1 - $MULTU $a_3,$b_4 # mul_add_c(a[3],b[4],c2,c3,c1); + $MULTU ($a_3,$b_4) # mul_add_c(a[3],b[4],c2,c3,c1); $ADDU $t_2,$at $ADDU $c_3,$t_2 sltu $at,$c_3,$t_2 $ADDU $c_1,$at - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_3,$b_4) + mfhi ($t_2,$a_3,$b_4) $ADDU $c_2,$t_1 sltu $at,$c_2,$t_1 - $MULTU $a_4,$b_3 # mul_add_c(a[4],b[3],c2,c3,c1); + $MULTU ($a_4,$b_3) # mul_add_c(a[4],b[3],c2,c3,c1); $ADDU $t_2,$at $ADDU $c_3,$t_2 sltu $at,$c_3,$t_2 $ADDU $c_1,$at - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_4,$b_3) + mfhi ($t_2,$a_4,$b_3) $ADDU $c_2,$t_1 sltu $at,$c_2,$t_1 - $MULTU $a_5,$b_2 # mul_add_c(a[5],b[2],c2,c3,c1); + $MULTU ($a_5,$b_2) # mul_add_c(a[5],b[2],c2,c3,c1); $ADDU $t_2,$at $ADDU $c_3,$t_2 sltu $at,$c_3,$t_2 $ADDU $c_1,$at - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_5,$b_2) + mfhi ($t_2,$a_5,$b_2) $ADDU $c_2,$t_1 sltu $at,$c_2,$t_1 - $MULTU $a_6,$b_1 # mul_add_c(a[6],b[1],c2,c3,c1); + $MULTU ($a_6,$b_1) # mul_add_c(a[6],b[1],c2,c3,c1); $ADDU $t_2,$at $ADDU $c_3,$t_2 sltu $at,$c_3,$t_2 $ADDU $c_1,$at - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_6,$b_1) + mfhi ($t_2,$a_6,$b_1) $ADDU $c_2,$t_1 sltu $at,$c_2,$t_1 - $MULTU $a_7,$b_0 # mul_add_c(a[7],b[0],c2,c3,c1); + $MULTU ($a_7,$b_0) # mul_add_c(a[7],b[0],c2,c3,c1); $ADDU $t_2,$at $ADDU $c_3,$t_2 sltu $at,$c_3,$t_2 $ADDU $c_1,$at - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_7,$b_0) + mfhi ($t_2,$a_7,$b_0) $ADDU $c_2,$t_1 sltu $at,$c_2,$t_1 - $MULTU $a_7,$b_1 # mul_add_c(a[7],b[1],c3,c1,c2); + $MULTU ($a_7,$b_1) # mul_add_c(a[7],b[1],c3,c1,c2); $ADDU $t_2,$at $ADDU $c_3,$t_2 sltu $at,$c_3,$t_2 $ADDU $c_1,$at $ST $c_2,7*$BNSZ($a0) # r[7]=c2; - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_7,$b_1) + mfhi ($t_2,$a_7,$b_1) $ADDU $c_3,$t_1 sltu $at,$c_3,$t_1 - $MULTU $a_6,$b_2 # mul_add_c(a[6],b[2],c3,c1,c2); + $MULTU ($a_6,$b_2) # mul_add_c(a[6],b[2],c3,c1,c2); $ADDU $t_2,$at $ADDU $c_1,$t_2 sltu $c_2,$c_1,$t_2 - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_6,$b_2) + mfhi ($t_2,$a_6,$b_2) $ADDU $c_3,$t_1 sltu $at,$c_3,$t_1 - $MULTU $a_5,$b_3 # mul_add_c(a[5],b[3],c3,c1,c2); + $MULTU ($a_5,$b_3) # mul_add_c(a[5],b[3],c3,c1,c2); $ADDU $t_2,$at $ADDU $c_1,$t_2 sltu $at,$c_1,$t_2 $ADDU $c_2,$at - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_5,$b_3) + mfhi ($t_2,$a_5,$b_3) $ADDU $c_3,$t_1 sltu $at,$c_3,$t_1 - $MULTU $a_4,$b_4 # mul_add_c(a[4],b[4],c3,c1,c2); + $MULTU ($a_4,$b_4) # mul_add_c(a[4],b[4],c3,c1,c2); $ADDU $t_2,$at $ADDU $c_1,$t_2 sltu $at,$c_1,$t_2 $ADDU $c_2,$at - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_4,$b_4) + mfhi ($t_2,$a_4,$b_4) $ADDU $c_3,$t_1 sltu $at,$c_3,$t_1 - $MULTU $a_3,$b_5 # mul_add_c(a[3],b[5],c3,c1,c2); + $MULTU ($a_3,$b_5) # mul_add_c(a[3],b[5],c3,c1,c2); $ADDU $t_2,$at $ADDU $c_1,$t_2 sltu $at,$c_1,$t_2 $ADDU $c_2,$at - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_3,$b_5) + mfhi ($t_2,$a_3,$b_5) $ADDU $c_3,$t_1 sltu $at,$c_3,$t_1 - $MULTU $a_2,$b_6 # mul_add_c(a[2],b[6],c3,c1,c2); + $MULTU ($a_2,$b_6) # mul_add_c(a[2],b[6],c3,c1,c2); $ADDU $t_2,$at $ADDU $c_1,$t_2 sltu $at,$c_1,$t_2 $ADDU $c_2,$at - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_2,$b_6) + mfhi ($t_2,$a_2,$b_6) $ADDU $c_3,$t_1 sltu $at,$c_3,$t_1 - $MULTU $a_1,$b_7 # mul_add_c(a[1],b[7],c3,c1,c2); + $MULTU ($a_1,$b_7) # mul_add_c(a[1],b[7],c3,c1,c2); $ADDU $t_2,$at $ADDU $c_1,$t_2 sltu $at,$c_1,$t_2 $ADDU $c_2,$at - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_1,$b_7) + mfhi ($t_2,$a_1,$b_7) $ADDU $c_3,$t_1 sltu $at,$c_3,$t_1 - $MULTU $a_2,$b_7 # mul_add_c(a[2],b[7],c1,c2,c3); + $MULTU ($a_2,$b_7) # mul_add_c(a[2],b[7],c1,c2,c3); $ADDU $t_2,$at $ADDU $c_1,$t_2 sltu $at,$c_1,$t_2 $ADDU $c_2,$at $ST $c_3,8*$BNSZ($a0) # r[8]=c3; - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_2,$b_7) + mfhi ($t_2,$a_2,$b_7) $ADDU $c_1,$t_1 sltu $at,$c_1,$t_1 - $MULTU $a_3,$b_6 # mul_add_c(a[3],b[6],c1,c2,c3); + $MULTU ($a_3,$b_6) # mul_add_c(a[3],b[6],c1,c2,c3); $ADDU $t_2,$at $ADDU $c_2,$t_2 sltu $c_3,$c_2,$t_2 - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_3,$b_6) + mfhi ($t_2,$a_3,$b_6) $ADDU $c_1,$t_1 sltu $at,$c_1,$t_1 - $MULTU $a_4,$b_5 # mul_add_c(a[4],b[5],c1,c2,c3); + $MULTU ($a_4,$b_5) # mul_add_c(a[4],b[5],c1,c2,c3); $ADDU $t_2,$at $ADDU $c_2,$t_2 sltu $at,$c_2,$t_2 $ADDU $c_3,$at - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_4,$b_5) + mfhi ($t_2,$a_4,$b_5) $ADDU $c_1,$t_1 sltu $at,$c_1,$t_1 - $MULTU $a_5,$b_4 # mul_add_c(a[5],b[4],c1,c2,c3); + $MULTU ($a_5,$b_4) # mul_add_c(a[5],b[4],c1,c2,c3); $ADDU $t_2,$at $ADDU $c_2,$t_2 sltu $at,$c_2,$t_2 $ADDU $c_3,$at - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_5,$b_4) + mfhi ($t_2,$a_5,$b_4) $ADDU $c_1,$t_1 sltu $at,$c_1,$t_1 - $MULTU $a_6,$b_3 # mul_add_c(a[6],b[3],c1,c2,c3); + $MULTU ($a_6,$b_3) # mul_add_c(a[6],b[3],c1,c2,c3); $ADDU $t_2,$at $ADDU $c_2,$t_2 sltu $at,$c_2,$t_2 $ADDU $c_3,$at - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_6,$b_3) + mfhi ($t_2,$a_6,$b_3) $ADDU $c_1,$t_1 sltu $at,$c_1,$t_1 - $MULTU $a_7,$b_2 # mul_add_c(a[7],b[2],c1,c2,c3); + $MULTU ($a_7,$b_2) # mul_add_c(a[7],b[2],c1,c2,c3); $ADDU $t_2,$at $ADDU $c_2,$t_2 sltu $at,$c_2,$t_2 $ADDU $c_3,$at - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_7,$b_2) + mfhi ($t_2,$a_7,$b_2) $ADDU $c_1,$t_1 sltu $at,$c_1,$t_1 - $MULTU $a_7,$b_3 # mul_add_c(a[7],b[3],c2,c3,c1); + $MULTU ($a_7,$b_3) # mul_add_c(a[7],b[3],c2,c3,c1); $ADDU $t_2,$at $ADDU $c_2,$t_2 sltu $at,$c_2,$t_2 $ADDU $c_3,$at $ST $c_1,9*$BNSZ($a0) # r[9]=c1; - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_7,$b_3) + mfhi ($t_2,$a_7,$b_3) $ADDU $c_2,$t_1 sltu $at,$c_2,$t_1 - $MULTU $a_6,$b_4 # mul_add_c(a[6],b[4],c2,c3,c1); + $MULTU ($a_6,$b_4) # mul_add_c(a[6],b[4],c2,c3,c1); $ADDU $t_2,$at $ADDU $c_3,$t_2 sltu $c_1,$c_3,$t_2 - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_6,$b_4) + mfhi ($t_2,$a_6,$b_4) $ADDU $c_2,$t_1 sltu $at,$c_2,$t_1 - $MULTU $a_5,$b_5 # mul_add_c(a[5],b[5],c2,c3,c1); + $MULTU ($a_5,$b_5) # mul_add_c(a[5],b[5],c2,c3,c1); $ADDU $t_2,$at $ADDU $c_3,$t_2 sltu $at,$c_3,$t_2 $ADDU $c_1,$at - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_5,$b_5) + mfhi ($t_2,$a_5,$b_5) $ADDU $c_2,$t_1 sltu $at,$c_2,$t_1 - $MULTU $a_4,$b_6 # mul_add_c(a[4],b[6],c2,c3,c1); + $MULTU ($a_4,$b_6) # mul_add_c(a[4],b[6],c2,c3,c1); $ADDU $t_2,$at $ADDU $c_3,$t_2 sltu $at,$c_3,$t_2 $ADDU $c_1,$at - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_4,$b_6) + mfhi ($t_2,$a_4,$b_6) $ADDU $c_2,$t_1 sltu $at,$c_2,$t_1 - $MULTU $a_3,$b_7 # mul_add_c(a[3],b[7],c2,c3,c1); + $MULTU ($a_3,$b_7) # mul_add_c(a[3],b[7],c2,c3,c1); $ADDU $t_2,$at $ADDU $c_3,$t_2 sltu $at,$c_3,$t_2 $ADDU $c_1,$at - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_3,$b_7) + mfhi ($t_2,$a_3,$b_7) $ADDU $c_2,$t_1 sltu $at,$c_2,$t_1 - $MULTU $a_4,$b_7 # mul_add_c(a[4],b[7],c3,c1,c2); + $MULTU ($a_4,$b_7) # mul_add_c(a[4],b[7],c3,c1,c2); $ADDU $t_2,$at $ADDU $c_3,$t_2 sltu $at,$c_3,$t_2 $ADDU $c_1,$at $ST $c_2,10*$BNSZ($a0) # r[10]=c2; - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_4,$b_7) + mfhi ($t_2,$a_4,$b_7) $ADDU $c_3,$t_1 sltu $at,$c_3,$t_1 - $MULTU $a_5,$b_6 # mul_add_c(a[5],b[6],c3,c1,c2); + $MULTU ($a_5,$b_6) # mul_add_c(a[5],b[6],c3,c1,c2); $ADDU $t_2,$at $ADDU $c_1,$t_2 sltu $c_2,$c_1,$t_2 - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_5,$b_6) + mfhi ($t_2,$a_5,$b_6) $ADDU $c_3,$t_1 sltu $at,$c_3,$t_1 - $MULTU $a_6,$b_5 # mul_add_c(a[6],b[5],c3,c1,c2); + $MULTU ($a_6,$b_5) # mul_add_c(a[6],b[5],c3,c1,c2); $ADDU $t_2,$at $ADDU $c_1,$t_2 sltu $at,$c_1,$t_2 $ADDU $c_2,$at - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_6,$b_5) + mfhi ($t_2,$a_6,$b_5) $ADDU $c_3,$t_1 sltu $at,$c_3,$t_1 - $MULTU $a_7,$b_4 # mul_add_c(a[7],b[4],c3,c1,c2); + $MULTU ($a_7,$b_4) # mul_add_c(a[7],b[4],c3,c1,c2); $ADDU $t_2,$at $ADDU $c_1,$t_2 sltu $at,$c_1,$t_2 $ADDU $c_2,$at - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_7,$b_4) + mfhi ($t_2,$a_7,$b_4) $ADDU $c_3,$t_1 sltu $at,$c_3,$t_1 - $MULTU $a_7,$b_5 # mul_add_c(a[7],b[5],c1,c2,c3); + $MULTU ($a_7,$b_5) # mul_add_c(a[7],b[5],c1,c2,c3); $ADDU $t_2,$at $ADDU $c_1,$t_2 sltu $at,$c_1,$t_2 $ADDU $c_2,$at $ST $c_3,11*$BNSZ($a0) # r[11]=c3; - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_7,$b_5) + mfhi ($t_2,$a_7,$b_5) $ADDU $c_1,$t_1 sltu $at,$c_1,$t_1 - $MULTU $a_6,$b_6 # mul_add_c(a[6],b[6],c1,c2,c3); + $MULTU ($a_6,$b_6) # mul_add_c(a[6],b[6],c1,c2,c3); $ADDU $t_2,$at $ADDU $c_2,$t_2 sltu $c_3,$c_2,$t_2 - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_6,$b_6) + mfhi ($t_2,$a_6,$b_6) $ADDU $c_1,$t_1 sltu $at,$c_1,$t_1 - $MULTU $a_5,$b_7 # mul_add_c(a[5],b[7],c1,c2,c3); + $MULTU ($a_5,$b_7) # mul_add_c(a[5],b[7],c1,c2,c3); $ADDU $t_2,$at $ADDU $c_2,$t_2 sltu $at,$c_2,$t_2 $ADDU $c_3,$at - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_5,$b_7) + mfhi ($t_2,$a_5,$b_7) $ADDU $c_1,$t_1 sltu $at,$c_1,$t_1 - $MULTU $a_6,$b_7 # mul_add_c(a[6],b[7],c2,c3,c1); + $MULTU ($a_6,$b_7) # mul_add_c(a[6],b[7],c2,c3,c1); $ADDU $t_2,$at $ADDU $c_2,$t_2 sltu $at,$c_2,$t_2 $ADDU $c_3,$at $ST $c_1,12*$BNSZ($a0) # r[12]=c1; - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_6,$b_7) + mfhi ($t_2,$a_6,$b_7) $ADDU $c_2,$t_1 sltu $at,$c_2,$t_1 - $MULTU $a_7,$b_6 # mul_add_c(a[7],b[6],c2,c3,c1); + $MULTU ($a_7,$b_6) # mul_add_c(a[7],b[6],c2,c3,c1); $ADDU $t_2,$at $ADDU $c_3,$t_2 sltu $c_1,$c_3,$t_2 - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_7,$b_6) + mfhi ($t_2,$a_7,$b_6) $ADDU $c_2,$t_1 sltu $at,$c_2,$t_1 - $MULTU $a_7,$b_7 # mul_add_c(a[7],b[7],c3,c1,c2); + $MULTU ($a_7,$b_7) # mul_add_c(a[7],b[7],c3,c1,c2); $ADDU $t_2,$at $ADDU $c_3,$t_2 sltu $at,$c_3,$t_2 $ADDU $c_1,$at $ST $c_2,13*$BNSZ($a0) # r[13]=c2; - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_7,$b_7) + mfhi ($t_2,$a_7,$b_7) $ADDU $c_3,$t_1 sltu $at,$c_3,$t_1 $ADDU $t_2,$at @@ -1709,144 +1732,144 @@ $code.=<<___; $LD $b_0,0($a2) $LD $a_1,$BNSZ($a1) $LD $a_2,2*$BNSZ($a1) - $MULTU $a_0,$b_0 # mul_add_c(a[0],b[0],c1,c2,c3); + $MULTU ($a_0,$b_0) # mul_add_c(a[0],b[0],c1,c2,c3); $LD $a_3,3*$BNSZ($a1) $LD $b_1,$BNSZ($a2) $LD $b_2,2*$BNSZ($a2) $LD $b_3,3*$BNSZ($a2) - mflo $c_1 - mfhi $c_2 + mflo ($c_1,$a_0,$b_0) + mfhi ($c_2,$a_0,$b_0) $ST $c_1,0($a0) - $MULTU $a_0,$b_1 # mul_add_c(a[0],b[1],c2,c3,c1); - mflo $t_1 - mfhi $t_2 + $MULTU ($a_0,$b_1) # mul_add_c(a[0],b[1],c2,c3,c1); + mflo ($t_1,$a_0,$b_1) + mfhi ($t_2,$a_0,$b_1) $ADDU $c_2,$t_1 sltu $at,$c_2,$t_1 - $MULTU $a_1,$b_0 # mul_add_c(a[1],b[0],c2,c3,c1); + $MULTU ($a_1,$b_0) # mul_add_c(a[1],b[0],c2,c3,c1); $ADDU $c_3,$t_2,$at - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_1,$b_0) + mfhi ($t_2,$a_1,$b_0) $ADDU $c_2,$t_1 sltu $at,$c_2,$t_1 - $MULTU $a_2,$b_0 # mul_add_c(a[2],b[0],c3,c1,c2); + $MULTU ($a_2,$b_0) # mul_add_c(a[2],b[0],c3,c1,c2); $ADDU $t_2,$at $ADDU $c_3,$t_2 sltu $c_1,$c_3,$t_2 $ST $c_2,$BNSZ($a0) - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_2,$b_0) + mfhi ($t_2,$a_2,$b_0) $ADDU $c_3,$t_1 sltu $at,$c_3,$t_1 - $MULTU $a_1,$b_1 # mul_add_c(a[1],b[1],c3,c1,c2); + $MULTU ($a_1,$b_1) # mul_add_c(a[1],b[1],c3,c1,c2); $ADDU $t_2,$at $ADDU $c_1,$t_2 - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_1,$b_1) + mfhi ($t_2,$a_1,$b_1) $ADDU $c_3,$t_1 sltu $at,$c_3,$t_1 - $MULTU $a_0,$b_2 # mul_add_c(a[0],b[2],c3,c1,c2); + $MULTU ($a_0,$b_2) # mul_add_c(a[0],b[2],c3,c1,c2); $ADDU $t_2,$at $ADDU $c_1,$t_2 sltu $c_2,$c_1,$t_2 - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_0,$b_2) + mfhi ($t_2,$a_0,$b_2) $ADDU $c_3,$t_1 sltu $at,$c_3,$t_1 - $MULTU $a_0,$b_3 # mul_add_c(a[0],b[3],c1,c2,c3); + $MULTU ($a_0,$b_3) # mul_add_c(a[0],b[3],c1,c2,c3); $ADDU $t_2,$at $ADDU $c_1,$t_2 sltu $at,$c_1,$t_2 $ADDU $c_2,$at $ST $c_3,2*$BNSZ($a0) - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_0,$b_3) + mfhi ($t_2,$a_0,$b_3) $ADDU $c_1,$t_1 sltu $at,$c_1,$t_1 - $MULTU $a_1,$b_2 # mul_add_c(a[1],b[2],c1,c2,c3); + $MULTU ($a_1,$b_2) # mul_add_c(a[1],b[2],c1,c2,c3); $ADDU $t_2,$at $ADDU $c_2,$t_2 sltu $c_3,$c_2,$t_2 - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_1,$b_2) + mfhi ($t_2,$a_1,$b_2) $ADDU $c_1,$t_1 sltu $at,$c_1,$t_1 - $MULTU $a_2,$b_1 # mul_add_c(a[2],b[1],c1,c2,c3); + $MULTU ($a_2,$b_1) # mul_add_c(a[2],b[1],c1,c2,c3); $ADDU $t_2,$at $ADDU $c_2,$t_2 sltu $at,$c_2,$t_2 $ADDU $c_3,$at - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_2,$b_1) + mfhi ($t_2,$a_2,$b_1) $ADDU $c_1,$t_1 sltu $at,$c_1,$t_1 - $MULTU $a_3,$b_0 # mul_add_c(a[3],b[0],c1,c2,c3); + $MULTU ($a_3,$b_0) # mul_add_c(a[3],b[0],c1,c2,c3); $ADDU $t_2,$at $ADDU $c_2,$t_2 sltu $at,$c_2,$t_2 $ADDU $c_3,$at - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_3,$b_0) + mfhi ($t_2,$a_3,$b_0) $ADDU $c_1,$t_1 sltu $at,$c_1,$t_1 - $MULTU $a_3,$b_1 # mul_add_c(a[3],b[1],c2,c3,c1); + $MULTU ($a_3,$b_1) # mul_add_c(a[3],b[1],c2,c3,c1); $ADDU $t_2,$at $ADDU $c_2,$t_2 sltu $at,$c_2,$t_2 $ADDU $c_3,$at $ST $c_1,3*$BNSZ($a0) - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_3,$b_1) + mfhi ($t_2,$a_3,$b_1) $ADDU $c_2,$t_1 sltu $at,$c_2,$t_1 - $MULTU $a_2,$b_2 # mul_add_c(a[2],b[2],c2,c3,c1); + $MULTU ($a_2,$b_2) # mul_add_c(a[2],b[2],c2,c3,c1); $ADDU $t_2,$at $ADDU $c_3,$t_2 sltu $c_1,$c_3,$t_2 - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_2,$b_2) + mfhi ($t_2,$a_2,$b_2) $ADDU $c_2,$t_1 sltu $at,$c_2,$t_1 - $MULTU $a_1,$b_3 # mul_add_c(a[1],b[3],c2,c3,c1); + $MULTU ($a_1,$b_3) # mul_add_c(a[1],b[3],c2,c3,c1); $ADDU $t_2,$at $ADDU $c_3,$t_2 sltu $at,$c_3,$t_2 $ADDU $c_1,$at - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_1,$b_3) + mfhi ($t_2,$a_1,$b_3) $ADDU $c_2,$t_1 sltu $at,$c_2,$t_1 - $MULTU $a_2,$b_3 # mul_add_c(a[2],b[3],c3,c1,c2); + $MULTU ($a_2,$b_3) # mul_add_c(a[2],b[3],c3,c1,c2); $ADDU $t_2,$at $ADDU $c_3,$t_2 sltu $at,$c_3,$t_2 $ADDU $c_1,$at $ST $c_2,4*$BNSZ($a0) - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_2,$b_3) + mfhi ($t_2,$a_2,$b_3) $ADDU $c_3,$t_1 sltu $at,$c_3,$t_1 - $MULTU $a_3,$b_2 # mul_add_c(a[3],b[2],c3,c1,c2); + $MULTU ($a_3,$b_2) # mul_add_c(a[3],b[2],c3,c1,c2); $ADDU $t_2,$at $ADDU $c_1,$t_2 sltu $c_2,$c_1,$t_2 - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_3,$b_2) + mfhi ($t_2,$a_3,$b_2) $ADDU $c_3,$t_1 sltu $at,$c_3,$t_1 - $MULTU $a_3,$b_3 # mul_add_c(a[3],b[3],c1,c2,c3); + $MULTU ($a_3,$b_3) # mul_add_c(a[3],b[3],c1,c2,c3); $ADDU $t_2,$at $ADDU $c_1,$t_2 sltu $at,$c_1,$t_2 $ADDU $c_2,$at $ST $c_3,5*$BNSZ($a0) - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_3,$b_3) + mfhi ($t_2,$a_3,$b_3) $ADDU $c_1,$t_1 sltu $at,$c_1,$t_1 $ADDU $t_2,$at @@ -1881,11 +1904,9 @@ my ($hi,$lo,$c0,$c1,$c2, # commented as "forward multiplication" below]; )=@_; $code.=<<___; - mflo $lo - mfhi $hi $ADDU $c0,$lo sltu $at,$c0,$lo - $MULTU $an,$bn # forward multiplication + $MULTU ($an,$bn) # forward multiplication $ADDU $c0,$lo $ADDU $at,$hi sltu $lo,$c0,$lo @@ -1895,15 +1916,17 @@ ___ $code.=<<___ if (!$warm); sltu $c2,$c1,$at $ADDU $c1,$hi - sltu $hi,$c1,$hi - $ADDU $c2,$hi ___ $code.=<<___ if ($warm); sltu $at,$c1,$at $ADDU $c1,$hi $ADDU $c2,$at +___ +$code.=<<___; sltu $hi,$c1,$hi $ADDU $c2,$hi + mflo ($lo,$an,$bn) + mfhi ($hi,$an,$bn) ___ } @@ -1933,21 +1956,21 @@ $code.=<<___; $LD $a_2,2*$BNSZ($a1) $LD $a_3,3*$BNSZ($a1) - $MULTU $a_0,$a_0 # mul_add_c(a[0],b[0],c1,c2,c3); + $MULTU ($a_0,$a_0) # mul_add_c(a[0],b[0],c1,c2,c3); $LD $a_4,4*$BNSZ($a1) $LD $a_5,5*$BNSZ($a1) $LD $a_6,6*$BNSZ($a1) $LD $a_7,7*$BNSZ($a1) - mflo $c_1 - mfhi $c_2 + mflo ($c_1,$a_0,$a_0) + mfhi ($c_2,$a_0,$a_0) $ST $c_1,0($a0) - $MULTU $a_0,$a_1 # mul_add_c2(a[0],b[1],c2,c3,c1); - mflo $t_1 - mfhi $t_2 + $MULTU ($a_0,$a_1) # mul_add_c2(a[0],b[1],c2,c3,c1); + mflo ($t_1,$a_0,$a_1) + mfhi ($t_2,$a_0,$a_1) slt $c_1,$t_2,$zero $SLL $t_2,1 - $MULTU $a_2,$a_0 # mul_add_c2(a[2],b[0],c3,c1,c2); + $MULTU ($a_2,$a_0) # mul_add_c2(a[2],b[0],c3,c1,c2); slt $a2,$t_1,$zero $ADDU $t_2,$a2 $SLL $t_1,1 @@ -1955,20 +1978,22 @@ $code.=<<___; sltu $at,$c_2,$t_1 $ADDU $c_3,$t_2,$at $ST $c_2,$BNSZ($a0) + mflo ($t_1,$a_2,$a_0) + mfhi ($t_2,$a_2,$a_0) ___ &add_c2($t_2,$t_1,$c_3,$c_1,$c_2,0, $a_1,$a_1); # mul_add_c(a[1],b[1],c3,c1,c2); $code.=<<___; - mflo $t_1 - mfhi $t_2 $ADDU $c_3,$t_1 sltu $at,$c_3,$t_1 - $MULTU $a_0,$a_3 # mul_add_c2(a[0],b[3],c1,c2,c3); + $MULTU ($a_0,$a_3) # mul_add_c2(a[0],b[3],c1,c2,c3); $ADDU $t_2,$at $ADDU $c_1,$t_2 sltu $at,$c_1,$t_2 $ADDU $c_2,$at $ST $c_3,2*$BNSZ($a0) + mflo ($t_1,$a_0,$a_3) + mfhi ($t_2,$a_0,$a_3) ___ &add_c2($t_2,$t_1,$c_1,$c_2,$c_3,0, $a_1,$a_2); # mul_add_c2(a[1],b[2],c1,c2,c3); @@ -1982,16 +2007,16 @@ ___ &add_c2($t_2,$t_1,$c_2,$c_3,$c_1,1, $a_2,$a_2); # mul_add_c(a[2],b[2],c2,c3,c1); $code.=<<___; - mflo $t_1 - mfhi $t_2 $ADDU $c_2,$t_1 sltu $at,$c_2,$t_1 - $MULTU $a_0,$a_5 # mul_add_c2(a[0],b[5],c3,c1,c2); + $MULTU ($a_0,$a_5) # mul_add_c2(a[0],b[5],c3,c1,c2); $ADDU $t_2,$at $ADDU $c_3,$t_2 sltu $at,$c_3,$t_2 $ADDU $c_1,$at $ST $c_2,4*$BNSZ($a0) + mflo ($t_1,$a_0,$a_5) + mfhi ($t_2,$a_0,$a_5) ___ &add_c2($t_2,$t_1,$c_3,$c_1,$c_2,0, $a_1,$a_4); # mul_add_c2(a[1],b[4],c3,c1,c2); @@ -2009,16 +2034,16 @@ ___ &add_c2($t_2,$t_1,$c_1,$c_2,$c_3,1, $a_3,$a_3); # mul_add_c(a[3],b[3],c1,c2,c3); $code.=<<___; - mflo $t_1 - mfhi $t_2 $ADDU $c_1,$t_1 sltu $at,$c_1,$t_1 - $MULTU $a_0,$a_7 # mul_add_c2(a[0],b[7],c2,c3,c1); + $MULTU ($a_0,$a_7) # mul_add_c2(a[0],b[7],c2,c3,c1); $ADDU $t_2,$at $ADDU $c_2,$t_2 sltu $at,$c_2,$t_2 $ADDU $c_3,$at $ST $c_1,6*$BNSZ($a0) + mflo ($t_1,$a_0,$a_7) + mfhi ($t_2,$a_0,$a_7) ___ &add_c2($t_2,$t_1,$c_2,$c_3,$c_1,0, $a_1,$a_6); # mul_add_c2(a[1],b[6],c2,c3,c1); @@ -2038,16 +2063,16 @@ ___ &add_c2($t_2,$t_1,$c_3,$c_1,$c_2,1, $a_4,$a_4); # mul_add_c(a[4],b[4],c3,c1,c2); $code.=<<___; - mflo $t_1 - mfhi $t_2 $ADDU $c_3,$t_1 sltu $at,$c_3,$t_1 - $MULTU $a_2,$a_7 # mul_add_c2(a[2],b[7],c1,c2,c3); + $MULTU ($a_2,$a_7) # mul_add_c2(a[2],b[7],c1,c2,c3); $ADDU $t_2,$at $ADDU $c_1,$t_2 sltu $at,$c_1,$t_2 $ADDU $c_2,$at $ST $c_3,8*$BNSZ($a0) + mflo ($t_1,$a_2,$a_7) + mfhi ($t_2,$a_2,$a_7) ___ &add_c2($t_2,$t_1,$c_1,$c_2,$c_3,0, $a_3,$a_6); # mul_add_c2(a[3],b[6],c1,c2,c3); @@ -2063,16 +2088,16 @@ ___ &add_c2($t_2,$t_1,$c_2,$c_3,$c_1,1, $a_5,$a_5); # mul_add_c(a[5],b[5],c2,c3,c1); $code.=<<___; - mflo $t_1 - mfhi $t_2 $ADDU $c_2,$t_1 sltu $at,$c_2,$t_1 - $MULTU $a_4,$a_7 # mul_add_c2(a[4],b[7],c3,c1,c2); + $MULTU ($a_4,$a_7) # mul_add_c2(a[4],b[7],c3,c1,c2); $ADDU $t_2,$at $ADDU $c_3,$t_2 sltu $at,$c_3,$t_2 $ADDU $c_1,$at $ST $c_2,10*$BNSZ($a0) + mflo ($t_1,$a_4,$a_7) + mfhi ($t_2,$a_4,$a_7) ___ &add_c2($t_2,$t_1,$c_3,$c_1,$c_2,0, $a_5,$a_6); # mul_add_c2(a[5],b[6],c3,c1,c2); @@ -2084,24 +2109,22 @@ ___ &add_c2($t_2,$t_1,$c_1,$c_2,$c_3,0, $a_6,$a_6); # mul_add_c(a[6],b[6],c1,c2,c3); $code.=<<___; - mflo $t_1 - mfhi $t_2 $ADDU $c_1,$t_1 sltu $at,$c_1,$t_1 - $MULTU $a_6,$a_7 # mul_add_c2(a[6],b[7],c2,c3,c1); + $MULTU ($a_6,$a_7) # mul_add_c2(a[6],b[7],c2,c3,c1); $ADDU $t_2,$at $ADDU $c_2,$t_2 sltu $at,$c_2,$t_2 $ADDU $c_3,$at $ST $c_1,12*$BNSZ($a0) + mflo ($t_1,$a_6,$a_7) + mfhi ($t_2,$a_6,$a_7) ___ &add_c2($t_2,$t_1,$c_2,$c_3,$c_1,0, $a_7,$a_7); # mul_add_c(a[7],b[7],c3,c1,c2); $code.=<<___; $ST $c_2,13*$BNSZ($a0) - mflo $t_1 - mfhi $t_2 $ADDU $c_3,$t_1 sltu $at,$c_3,$t_1 $ADDU $t_2,$at @@ -2145,19 +2168,19 @@ $code.=<<___; .set reorder $LD $a_0,0($a1) $LD $a_1,$BNSZ($a1) - $MULTU $a_0,$a_0 # mul_add_c(a[0],b[0],c1,c2,c3); + $MULTU ($a_0,$a_0) # mul_add_c(a[0],b[0],c1,c2,c3); $LD $a_2,2*$BNSZ($a1) $LD $a_3,3*$BNSZ($a1) - mflo $c_1 - mfhi $c_2 + mflo ($c_1,$a_0,$a_0) + mfhi ($c_2,$a_0,$a_0) $ST $c_1,0($a0) - $MULTU $a_0,$a_1 # mul_add_c2(a[0],b[1],c2,c3,c1); - mflo $t_1 - mfhi $t_2 + $MULTU ($a_0,$a_1) # mul_add_c2(a[0],b[1],c2,c3,c1); + mflo ($t_1,$a_0,$a_1) + mfhi ($t_2,$a_0,$a_1) slt $c_1,$t_2,$zero $SLL $t_2,1 - $MULTU $a_2,$a_0 # mul_add_c2(a[2],b[0],c3,c1,c2); + $MULTU ($a_2,$a_0) # mul_add_c2(a[2],b[0],c3,c1,c2); slt $a2,$t_1,$zero $ADDU $t_2,$a2 $SLL $t_1,1 @@ -2165,20 +2188,22 @@ $code.=<<___; sltu $at,$c_2,$t_1 $ADDU $c_3,$t_2,$at $ST $c_2,$BNSZ($a0) + mflo ($t_1,$a_2,$a_0) + mfhi ($t_2,$a_2,$a_0) ___ &add_c2($t_2,$t_1,$c_3,$c_1,$c_2,0, $a_1,$a_1); # mul_add_c(a[1],b[1],c3,c1,c2); $code.=<<___; - mflo $t_1 - mfhi $t_2 $ADDU $c_3,$t_1 sltu $at,$c_3,$t_1 - $MULTU $a_0,$a_3 # mul_add_c2(a[0],b[3],c1,c2,c3); + $MULTU ($a_0,$a_3) # mul_add_c2(a[0],b[3],c1,c2,c3); $ADDU $t_2,$at $ADDU $c_1,$t_2 sltu $at,$c_1,$t_2 $ADDU $c_2,$at $ST $c_3,2*$BNSZ($a0) + mflo ($t_1,$a_0,$a_3) + mfhi ($t_2,$a_0,$a_3) ___ &add_c2($t_2,$t_1,$c_1,$c_2,$c_3,0, $a_1,$a_2); # mul_add_c2(a2[1],b[2],c1,c2,c3); @@ -2190,24 +2215,22 @@ ___ &add_c2($t_2,$t_1,$c_2,$c_3,$c_1,0, $a_2,$a_2); # mul_add_c(a[2],b[2],c2,c3,c1); $code.=<<___; - mflo $t_1 - mfhi $t_2 $ADDU $c_2,$t_1 sltu $at,$c_2,$t_1 - $MULTU $a_2,$a_3 # mul_add_c2(a[2],b[3],c3,c1,c2); + $MULTU ($a_2,$a_3) # mul_add_c2(a[2],b[3],c3,c1,c2); $ADDU $t_2,$at $ADDU $c_3,$t_2 sltu $at,$c_3,$t_2 $ADDU $c_1,$at $ST $c_2,4*$BNSZ($a0) + mflo ($t_1,$a_2,$a_3) + mfhi ($t_2,$a_2,$a_3) ___ &add_c2($t_2,$t_1,$c_3,$c_1,$c_2,0, $a_3,$a_3); # mul_add_c(a[3],b[3],c1,c2,c3); $code.=<<___; $ST $c_3,5*$BNSZ($a0) - mflo $t_1 - mfhi $t_2 $ADDU $c_1,$t_1 sltu $at,$c_1,$t_1 $ADDU $t_2,$at diff --git a/crypto/bn/asm/mips3-mont.pl b/crypto/bn/asm/mips3-mont.pl deleted file mode 100755 index 8f9156e02af3..000000000000 --- a/crypto/bn/asm/mips3-mont.pl +++ /dev/null @@ -1,327 +0,0 @@ -#!/usr/bin/env perl -# -# ==================================================================== -# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL -# project. The module is, however, dual licensed under OpenSSL and -# CRYPTOGAMS licenses depending on where you obtain it. For further -# details see http://www.openssl.org/~appro/cryptogams/. -# ==================================================================== - -# This module doesn't present direct interest for OpenSSL, because it -# doesn't provide better performance for longer keys. While 512-bit -# RSA private key operations are 40% faster, 1024-bit ones are hardly -# faster at all, while longer key operations are slower by up to 20%. -# It might be of interest to embedded system developers though, as -# it's smaller than 1KB, yet offers ~3x improvement over compiler -# generated code. -# -# The module targets N32 and N64 MIPS ABIs and currently is a bit -# IRIX-centric, i.e. is likely to require adaptation for other OSes. - -# int bn_mul_mont( -$rp="a0"; # BN_ULONG *rp, -$ap="a1"; # const BN_ULONG *ap, -$bp="a2"; # const BN_ULONG *bp, -$np="a3"; # const BN_ULONG *np, -$n0="a4"; # const BN_ULONG *n0, -$num="a5"; # int num); - -$lo0="a6"; -$hi0="a7"; -$lo1="v0"; -$hi1="v1"; -$aj="t0"; -$bi="t1"; -$nj="t2"; -$tp="t3"; -$alo="s0"; -$ahi="s1"; -$nlo="s2"; -$nhi="s3"; -$tj="s4"; -$i="s5"; -$j="s6"; -$fp="t8"; -$m1="t9"; - -$FRAME=8*(2+8); - -$code=<<___; -#include <asm.h> -#include <regdef.h> - -.text - -.set noat -.set reorder - -.align 5 -.globl bn_mul_mont -.ent bn_mul_mont -bn_mul_mont: - .set noreorder - PTR_SUB sp,64 - move $fp,sp - .frame $fp,64,ra - slt AT,$num,4 - li v0,0 - beqzl AT,.Lproceed - nop - jr ra - PTR_ADD sp,$fp,64 - .set reorder -.align 5 -.Lproceed: - ld $n0,0($n0) - ld $bi,0($bp) # bp[0] - ld $aj,0($ap) # ap[0] - ld $nj,0($np) # np[0] - PTR_SUB sp,16 # place for two extra words - sll $num,3 - li AT,-4096 - PTR_SUB sp,$num - and sp,AT - - sd s0,0($fp) - sd s1,8($fp) - sd s2,16($fp) - sd s3,24($fp) - sd s4,32($fp) - sd s5,40($fp) - sd s6,48($fp) - sd s7,56($fp) - - dmultu $aj,$bi - ld $alo,8($ap) - ld $nlo,8($np) - mflo $lo0 - mfhi $hi0 - dmultu $lo0,$n0 - mflo $m1 - - dmultu $alo,$bi - mflo $alo - mfhi $ahi - - dmultu $nj,$m1 - mflo $lo1 - mfhi $hi1 - dmultu $nlo,$m1 - daddu $lo1,$lo0 - sltu AT,$lo1,$lo0 - daddu $hi1,AT - mflo $nlo - mfhi $nhi - - move $tp,sp - li $j,16 -.align 4 -.L1st: - .set noreorder - PTR_ADD $aj,$ap,$j - ld $aj,($aj) - PTR_ADD $nj,$np,$j - ld $nj,($nj) - - dmultu $aj,$bi - daddu $lo0,$alo,$hi0 - daddu $lo1,$nlo,$hi1 - sltu AT,$lo0,$hi0 - sltu s7,$lo1,$hi1 - daddu $hi0,$ahi,AT - daddu $hi1,$nhi,s7 - mflo $alo - mfhi $ahi - - daddu $lo1,$lo0 - sltu AT,$lo1,$lo0 - dmultu $nj,$m1 - daddu $hi1,AT - addu $j,8 - sd $lo1,($tp) - sltu s7,$j,$num - mflo $nlo - mfhi $nhi - - bnez s7,.L1st - PTR_ADD $tp,8 - .set reorder - - daddu $lo0,$alo,$hi0 - sltu AT,$lo0,$hi0 - daddu $hi0,$ahi,AT - - daddu $lo1,$nlo,$hi1 - sltu s7,$lo1,$hi1 - daddu $hi1,$nhi,s7 - daddu $lo1,$lo0 - sltu AT,$lo1,$lo0 - daddu $hi1,AT - - sd $lo1,($tp) - - daddu $hi1,$hi0 - sltu AT,$hi1,$hi0 - sd $hi1,8($tp) - sd AT,16($tp) - - li $i,8 -.align 4 -.Louter: - PTR_ADD $bi,$bp,$i - ld $bi,($bi) - ld $aj,($ap) - ld $alo,8($ap) - ld $tj,(sp) - - dmultu $aj,$bi - ld $nj,($np) - ld $nlo,8($np) - mflo $lo0 - mfhi $hi0 - daddu $lo0,$tj - dmultu $lo0,$n0 - sltu AT,$lo0,$tj - daddu $hi0,AT - mflo $m1 - - dmultu $alo,$bi - mflo $alo - mfhi $ahi - - dmultu $nj,$m1 - mflo $lo1 - mfhi $hi1 - - dmultu $nlo,$m1 - daddu $lo1,$lo0 - sltu AT,$lo1,$lo0 - daddu $hi1,AT - mflo $nlo - mfhi $nhi - - move $tp,sp - li $j,16 - ld $tj,8($tp) -.align 4 -.Linner: - .set noreorder - PTR_ADD $aj,$ap,$j - ld $aj,($aj) - PTR_ADD $nj,$np,$j - ld $nj,($nj) - - dmultu $aj,$bi - daddu $lo0,$alo,$hi0 - daddu $lo1,$nlo,$hi1 - sltu AT,$lo0,$hi0 - sltu s7,$lo1,$hi1 - daddu $hi0,$ahi,AT - daddu $hi1,$nhi,s7 - mflo $alo - mfhi $ahi - - daddu $lo0,$tj - addu $j,8 - dmultu $nj,$m1 - sltu AT,$lo0,$tj - daddu $lo1,$lo0 - daddu $hi0,AT - sltu s7,$lo1,$lo0 - ld $tj,16($tp) - daddu $hi1,s7 - sltu AT,$j,$num - mflo $nlo - mfhi $nhi - sd $lo1,($tp) - bnez AT,.Linner - PTR_ADD $tp,8 - .set reorder - - daddu $lo0,$alo,$hi0 - sltu AT,$lo0,$hi0 - daddu $hi0,$ahi,AT - daddu $lo0,$tj - sltu s7,$lo0,$tj - daddu $hi0,s7 - - ld $tj,16($tp) - daddu $lo1,$nlo,$hi1 - sltu AT,$lo1,$hi1 - daddu $hi1,$nhi,AT - daddu $lo1,$lo0 - sltu s7,$lo1,$lo0 - daddu $hi1,s7 - sd $lo1,($tp) - - daddu $lo1,$hi1,$hi0 - sltu $hi1,$lo1,$hi0 - daddu $lo1,$tj - sltu AT,$lo1,$tj - daddu $hi1,AT - sd $lo1,8($tp) - sd $hi1,16($tp) - - addu $i,8 - sltu s7,$i,$num - bnez s7,.Louter - - .set noreorder - PTR_ADD $tj,sp,$num # &tp[num] - move $tp,sp - move $ap,sp - li $hi0,0 # clear borrow bit - -.align 4 -.Lsub: ld $lo0,($tp) - ld $lo1,($np) - PTR_ADD $tp,8 - PTR_ADD $np,8 - dsubu $lo1,$lo0,$lo1 # tp[i]-np[i] - sgtu AT,$lo1,$lo0 - dsubu $lo0,$lo1,$hi0 - sgtu $hi0,$lo0,$lo1 - sd $lo0,($rp) - or $hi0,AT - sltu AT,$tp,$tj - bnez AT,.Lsub - PTR_ADD $rp,8 - - dsubu $hi0,$hi1,$hi0 # handle upmost overflow bit - move $tp,sp - PTR_SUB $rp,$num # restore rp - not $hi1,$hi0 - - and $ap,$hi0,sp - and $bp,$hi1,$rp - or $ap,$ap,$bp # ap=borrow?tp:rp - -.align 4 -.Lcopy: ld $aj,($ap) - PTR_ADD $ap,8 - PTR_ADD $tp,8 - sd zero,-8($tp) - sltu AT,$tp,$tj - sd $aj,($rp) - bnez AT,.Lcopy - PTR_ADD $rp,8 - - ld s0,0($fp) - ld s1,8($fp) - ld s2,16($fp) - ld s3,24($fp) - ld s4,32($fp) - ld s5,40($fp) - ld s6,48($fp) - ld s7,56($fp) - li v0,1 - jr ra - PTR_ADD sp,$fp,64 - .set reorder -END(bn_mul_mont) -.rdata -.asciiz "Montgomery Multiplication for MIPS III/IV, CRYPTOGAMS by <appro\@openssl.org>" -___ - -print $code; -close STDOUT; diff --git a/crypto/bn/asm/mips3.s b/crypto/bn/asm/mips3.s deleted file mode 100644 index dca4105c7db1..000000000000 --- a/crypto/bn/asm/mips3.s +++ /dev/null @@ -1,2201 +0,0 @@ -.rdata -.asciiz "mips3.s, Version 1.1" -.asciiz "MIPS III/IV ISA artwork by Andy Polyakov <appro@fy.chalmers.se>" - -/* - * ==================================================================== - * Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL - * project. - * - * Rights for redistribution and usage in source and binary forms are - * granted according to the OpenSSL license. Warranty of any kind is - * disclaimed. - * ==================================================================== - */ - -/* - * This is my modest contributon to the OpenSSL project (see - * http://www.openssl.org/ for more information about it) and is - * a drop-in MIPS III/IV ISA replacement for crypto/bn/bn_asm.c - * module. For updates see http://fy.chalmers.se/~appro/hpe/. - * - * The module is designed to work with either of the "new" MIPS ABI(5), - * namely N32 or N64, offered by IRIX 6.x. It's not ment to work under - * IRIX 5.x not only because it doesn't support new ABIs but also - * because 5.x kernels put R4x00 CPU into 32-bit mode and all those - * 64-bit instructions (daddu, dmultu, etc.) found below gonna only - * cause illegal instruction exception:-( - * - * In addition the code depends on preprocessor flags set up by MIPSpro - * compiler driver (either as or cc) and therefore (probably?) can't be - * compiled by the GNU assembler. GNU C driver manages fine though... - * I mean as long as -mmips-as is specified or is the default option, - * because then it simply invokes /usr/bin/as which in turn takes - * perfect care of the preprocessor definitions. Another neat feature - * offered by the MIPSpro assembler is an optimization pass. This gave - * me the opportunity to have the code looking more regular as all those - * architecture dependent instruction rescheduling details were left to - * the assembler. Cool, huh? - * - * Performance improvement is astonishing! 'apps/openssl speed rsa dsa' - * goes way over 3 times faster! - * - * <appro@fy.chalmers.se> - */ -#include <asm.h> -#include <regdef.h> - -#if _MIPS_ISA>=4 -#define MOVNZ(cond,dst,src) \ - movn dst,src,cond -#else -#define MOVNZ(cond,dst,src) \ - .set noreorder; \ - bnezl cond,.+8; \ - move dst,src; \ - .set reorder -#endif - -.text - -.set noat -.set reorder - -#define MINUS4 v1 - -.align 5 -LEAF(bn_mul_add_words) - .set noreorder - bgtzl a2,.L_bn_mul_add_words_proceed - ld t0,0(a1) - jr ra - move v0,zero - .set reorder - -.L_bn_mul_add_words_proceed: - li MINUS4,-4 - and ta0,a2,MINUS4 - move v0,zero - beqz ta0,.L_bn_mul_add_words_tail - -.L_bn_mul_add_words_loop: - dmultu t0,a3 - ld t1,0(a0) - ld t2,8(a1) - ld t3,8(a0) - ld ta0,16(a1) - ld ta1,16(a0) - daddu t1,v0 - sltu v0,t1,v0 /* All manuals say it "compares 32-bit - * values", but it seems to work fine - * even on 64-bit registers. */ - mflo AT - mfhi t0 - daddu t1,AT - daddu v0,t0 - sltu AT,t1,AT - sd t1,0(a0) - daddu v0,AT - - dmultu t2,a3 - ld ta2,24(a1) - ld ta3,24(a0) - daddu t3,v0 - sltu v0,t3,v0 - mflo AT - mfhi t2 - daddu t3,AT - daddu v0,t2 - sltu AT,t3,AT - sd t3,8(a0) - daddu v0,AT - - dmultu ta0,a3 - subu a2,4 - PTR_ADD a0,32 - PTR_ADD a1,32 - daddu ta1,v0 - sltu v0,ta1,v0 - mflo AT - mfhi ta0 - daddu ta1,AT - daddu v0,ta0 - sltu AT,ta1,AT - sd ta1,-16(a0) - daddu v0,AT - - - dmultu ta2,a3 - and ta0,a2,MINUS4 - daddu ta3,v0 - sltu v0,ta3,v0 - mflo AT - mfhi ta2 - daddu ta3,AT - daddu v0,ta2 - sltu AT,ta3,AT - sd ta3,-8(a0) - daddu v0,AT - .set noreorder - bgtzl ta0,.L_bn_mul_add_words_loop - ld t0,0(a1) - - bnezl a2,.L_bn_mul_add_words_tail - ld t0,0(a1) - .set reorder - -.L_bn_mul_add_words_return: - jr ra - -.L_bn_mul_add_words_tail: - dmultu t0,a3 - ld t1,0(a0) - subu a2,1 - daddu t1,v0 - sltu v0,t1,v0 - mflo AT - mfhi t0 - daddu t1,AT - daddu v0,t0 - sltu AT,t1,AT - sd t1,0(a0) - daddu v0,AT - beqz a2,.L_bn_mul_add_words_return - - ld t0,8(a1) - dmultu t0,a3 - ld t1,8(a0) - subu a2,1 - daddu t1,v0 - sltu v0,t1,v0 - mflo AT - mfhi t0 - daddu t1,AT - daddu v0,t0 - sltu AT,t1,AT - sd t1,8(a0) - daddu v0,AT - beqz a2,.L_bn_mul_add_words_return - - ld t0,16(a1) - dmultu t0,a3 - ld t1,16(a0) - daddu t1,v0 - sltu v0,t1,v0 - mflo AT - mfhi t0 - daddu t1,AT - daddu v0,t0 - sltu AT,t1,AT - sd t1,16(a0) - daddu v0,AT - jr ra -END(bn_mul_add_words) - -.align 5 -LEAF(bn_mul_words) - .set noreorder - bgtzl a2,.L_bn_mul_words_proceed - ld t0,0(a1) - jr ra - move v0,zero - .set reorder - -.L_bn_mul_words_proceed: - li MINUS4,-4 - and ta0,a2,MINUS4 - move v0,zero - beqz ta0,.L_bn_mul_words_tail - -.L_bn_mul_words_loop: - dmultu t0,a3 - ld t2,8(a1) - ld ta0,16(a1) - ld ta2,24(a1) - mflo AT - mfhi t0 - daddu v0,AT - sltu t1,v0,AT - sd v0,0(a0) - daddu v0,t1,t0 - - dmultu t2,a3 - subu a2,4 - PTR_ADD a0,32 - PTR_ADD a1,32 - mflo AT - mfhi t2 - daddu v0,AT - sltu t3,v0,AT - sd v0,-24(a0) - daddu v0,t3,t2 - - dmultu ta0,a3 - mflo AT - mfhi ta0 - daddu v0,AT - sltu ta1,v0,AT - sd v0,-16(a0) - daddu v0,ta1,ta0 - - - dmultu ta2,a3 - and ta0,a2,MINUS4 - mflo AT - mfhi ta2 - daddu v0,AT - sltu ta3,v0,AT - sd v0,-8(a0) - daddu v0,ta3,ta2 - .set noreorder - bgtzl ta0,.L_bn_mul_words_loop - ld t0,0(a1) - - bnezl a2,.L_bn_mul_words_tail - ld t0,0(a1) - .set reorder - -.L_bn_mul_words_return: - jr ra - -.L_bn_mul_words_tail: - dmultu t0,a3 - subu a2,1 - mflo AT - mfhi t0 - daddu v0,AT - sltu t1,v0,AT - sd v0,0(a0) - daddu v0,t1,t0 - beqz a2,.L_bn_mul_words_return - - ld t0,8(a1) - dmultu t0,a3 - subu a2,1 - mflo AT - mfhi t0 - daddu v0,AT - sltu t1,v0,AT - sd v0,8(a0) - daddu v0,t1,t0 - beqz a2,.L_bn_mul_words_return - - ld t0,16(a1) - dmultu t0,a3 - mflo AT - mfhi t0 - daddu v0,AT - sltu t1,v0,AT - sd v0,16(a0) - daddu v0,t1,t0 - jr ra -END(bn_mul_words) - -.align 5 -LEAF(bn_sqr_words) - .set noreorder - bgtzl a2,.L_bn_sqr_words_proceed - ld t0,0(a1) - jr ra - move v0,zero - .set reorder - -.L_bn_sqr_words_proceed: - li MINUS4,-4 - and ta0,a2,MINUS4 - move v0,zero - beqz ta0,.L_bn_sqr_words_tail - -.L_bn_sqr_words_loop: - dmultu t0,t0 - ld t2,8(a1) - ld ta0,16(a1) - ld ta2,24(a1) - mflo t1 - mfhi t0 - sd t1,0(a0) - sd t0,8(a0) - - dmultu t2,t2 - subu a2,4 - PTR_ADD a0,64 - PTR_ADD a1,32 - mflo t3 - mfhi t2 - sd t3,-48(a0) - sd t2,-40(a0) - - dmultu ta0,ta0 - mflo ta1 - mfhi ta0 - sd ta1,-32(a0) - sd ta0,-24(a0) - - - dmultu ta2,ta2 - and ta0,a2,MINUS4 - mflo ta3 - mfhi ta2 - sd ta3,-16(a0) - sd ta2,-8(a0) - - .set noreorder - bgtzl ta0,.L_bn_sqr_words_loop - ld t0,0(a1) - - bnezl a2,.L_bn_sqr_words_tail - ld t0,0(a1) - .set reorder - -.L_bn_sqr_words_return: - move v0,zero - jr ra - -.L_bn_sqr_words_tail: - dmultu t0,t0 - subu a2,1 - mflo t1 - mfhi t0 - sd t1,0(a0) - sd t0,8(a0) - beqz a2,.L_bn_sqr_words_return - - ld t0,8(a1) - dmultu t0,t0 - subu a2,1 - mflo t1 - mfhi t0 - sd t1,16(a0) - sd t0,24(a0) - beqz a2,.L_bn_sqr_words_return - - ld t0,16(a1) - dmultu t0,t0 - mflo t1 - mfhi t0 - sd t1,32(a0) - sd t0,40(a0) - jr ra -END(bn_sqr_words) - -.align 5 -LEAF(bn_add_words) - .set noreorder - bgtzl a3,.L_bn_add_words_proceed - ld t0,0(a1) - jr ra - move v0,zero - .set reorder - -.L_bn_add_words_proceed: - li MINUS4,-4 - and AT,a3,MINUS4 - move v0,zero - beqz AT,.L_bn_add_words_tail - -.L_bn_add_words_loop: - ld ta0,0(a2) - subu a3,4 - ld t1,8(a1) - and AT,a3,MINUS4 - ld t2,16(a1) - PTR_ADD a2,32 - ld t3,24(a1) - PTR_ADD a0,32 - ld ta1,-24(a2) - PTR_ADD a1,32 - ld ta2,-16(a2) - ld ta3,-8(a2) - daddu ta0,t0 - sltu t8,ta0,t0 - daddu t0,ta0,v0 - sltu v0,t0,ta0 - sd t0,-32(a0) - daddu v0,t8 - - daddu ta1,t1 - sltu t9,ta1,t1 - daddu t1,ta1,v0 - sltu v0,t1,ta1 - sd t1,-24(a0) - daddu v0,t9 - - daddu ta2,t2 - sltu t8,ta2,t2 - daddu t2,ta2,v0 - sltu v0,t2,ta2 - sd t2,-16(a0) - daddu v0,t8 - - daddu ta3,t3 - sltu t9,ta3,t3 - daddu t3,ta3,v0 - sltu v0,t3,ta3 - sd t3,-8(a0) - daddu v0,t9 - - .set noreorder - bgtzl AT,.L_bn_add_words_loop - ld t0,0(a1) - - bnezl a3,.L_bn_add_words_tail - ld t0,0(a1) - .set reorder - -.L_bn_add_words_return: - jr ra - -.L_bn_add_words_tail: - ld ta0,0(a2) - daddu ta0,t0 - subu a3,1 - sltu t8,ta0,t0 - daddu t0,ta0,v0 - sltu v0,t0,ta0 - sd t0,0(a0) - daddu v0,t8 - beqz a3,.L_bn_add_words_return - - ld t1,8(a1) - ld ta1,8(a2) - daddu ta1,t1 - subu a3,1 - sltu t9,ta1,t1 - daddu t1,ta1,v0 - sltu v0,t1,ta1 - sd t1,8(a0) - daddu v0,t9 - beqz a3,.L_bn_add_words_return - - ld t2,16(a1) - ld ta2,16(a2) - daddu ta2,t2 - sltu t8,ta2,t2 - daddu t2,ta2,v0 - sltu v0,t2,ta2 - sd t2,16(a0) - daddu v0,t8 - jr ra -END(bn_add_words) - -.align 5 -LEAF(bn_sub_words) - .set noreorder - bgtzl a3,.L_bn_sub_words_proceed - ld t0,0(a1) - jr ra - move v0,zero - .set reorder - -.L_bn_sub_words_proceed: - li MINUS4,-4 - and AT,a3,MINUS4 - move v0,zero - beqz AT,.L_bn_sub_words_tail - -.L_bn_sub_words_loop: - ld ta0,0(a2) - subu a3,4 - ld t1,8(a1) - and AT,a3,MINUS4 - ld t2,16(a1) - PTR_ADD a2,32 - ld t3,24(a1) - PTR_ADD a0,32 - ld ta1,-24(a2) - PTR_ADD a1,32 - ld ta2,-16(a2) - ld ta3,-8(a2) - sltu t8,t0,ta0 - dsubu t0,ta0 - dsubu ta0,t0,v0 - sd ta0,-32(a0) - MOVNZ (t0,v0,t8) - - sltu t9,t1,ta1 - dsubu t1,ta1 - dsubu ta1,t1,v0 - sd ta1,-24(a0) - MOVNZ (t1,v0,t9) - - - sltu t8,t2,ta2 - dsubu t2,ta2 - dsubu ta2,t2,v0 - sd ta2,-16(a0) - MOVNZ (t2,v0,t8) - - sltu t9,t3,ta3 - dsubu t3,ta3 - dsubu ta3,t3,v0 - sd ta3,-8(a0) - MOVNZ (t3,v0,t9) - - .set noreorder - bgtzl AT,.L_bn_sub_words_loop - ld t0,0(a1) - - bnezl a3,.L_bn_sub_words_tail - ld t0,0(a1) - .set reorder - -.L_bn_sub_words_return: - jr ra - -.L_bn_sub_words_tail: - ld ta0,0(a2) - subu a3,1 - sltu t8,t0,ta0 - dsubu t0,ta0 - dsubu ta0,t0,v0 - MOVNZ (t0,v0,t8) - sd ta0,0(a0) - beqz a3,.L_bn_sub_words_return - - ld t1,8(a1) - subu a3,1 - ld ta1,8(a2) - sltu t9,t1,ta1 - dsubu t1,ta1 - dsubu ta1,t1,v0 - MOVNZ (t1,v0,t9) - sd ta1,8(a0) - beqz a3,.L_bn_sub_words_return - - ld t2,16(a1) - ld ta2,16(a2) - sltu t8,t2,ta2 - dsubu t2,ta2 - dsubu ta2,t2,v0 - MOVNZ (t2,v0,t8) - sd ta2,16(a0) - jr ra -END(bn_sub_words) - -#undef MINUS4 - -.align 5 -LEAF(bn_div_3_words) - .set reorder - move a3,a0 /* we know that bn_div_words doesn't - * touch a3, ta2, ta3 and preserves a2 - * so that we can save two arguments - * and return address in registers - * instead of stack:-) - */ - ld a0,(a3) - move ta2,a1 - ld a1,-8(a3) - bne a0,a2,.L_bn_div_3_words_proceed - li v0,-1 - jr ra -.L_bn_div_3_words_proceed: - move ta3,ra - bal bn_div_words - move ra,ta3 - dmultu ta2,v0 - ld t2,-16(a3) - move ta0,zero - mfhi t1 - mflo t0 - sltu t8,t1,v1 -.L_bn_div_3_words_inner_loop: - bnez t8,.L_bn_div_3_words_inner_loop_done - sgeu AT,t2,t0 - seq t9,t1,v1 - and AT,t9 - sltu t3,t0,ta2 - daddu v1,a2 - dsubu t1,t3 - dsubu t0,ta2 - sltu t8,t1,v1 - sltu ta0,v1,a2 - or t8,ta0 - .set noreorder - beqzl AT,.L_bn_div_3_words_inner_loop - dsubu v0,1 - .set reorder -.L_bn_div_3_words_inner_loop_done: - jr ra -END(bn_div_3_words) - -.align 5 -LEAF(bn_div_words) - .set noreorder - bnezl a2,.L_bn_div_words_proceed - move v1,zero - jr ra - li v0,-1 /* I'd rather signal div-by-zero - * which can be done with 'break 7' */ - -.L_bn_div_words_proceed: - bltz a2,.L_bn_div_words_body - move t9,v1 - dsll a2,1 - bgtz a2,.-4 - addu t9,1 - - .set reorder - negu t1,t9 - li t2,-1 - dsll t2,t1 - and t2,a0 - dsrl AT,a1,t1 - .set noreorder - bnezl t2,.+8 - break 6 /* signal overflow */ - .set reorder - dsll a0,t9 - dsll a1,t9 - or a0,AT - -#define QT ta0 -#define HH ta1 -#define DH v1 -.L_bn_div_words_body: - dsrl DH,a2,32 - sgeu AT,a0,a2 - .set noreorder - bnezl AT,.+8 - dsubu a0,a2 - .set reorder - - li QT,-1 - dsrl HH,a0,32 - dsrl QT,32 /* q=0xffffffff */ - beq DH,HH,.L_bn_div_words_skip_div1 - ddivu zero,a0,DH - mflo QT -.L_bn_div_words_skip_div1: - dmultu a2,QT - dsll t3,a0,32 - dsrl AT,a1,32 - or t3,AT - mflo t0 - mfhi t1 -.L_bn_div_words_inner_loop1: - sltu t2,t3,t0 - seq t8,HH,t1 - sltu AT,HH,t1 - and t2,t8 - sltu v0,t0,a2 - or AT,t2 - .set noreorder - beqz AT,.L_bn_div_words_inner_loop1_done - dsubu t1,v0 - dsubu t0,a2 - b .L_bn_div_words_inner_loop1 - dsubu QT,1 - .set reorder -.L_bn_div_words_inner_loop1_done: - - dsll a1,32 - dsubu a0,t3,t0 - dsll v0,QT,32 - - li QT,-1 - dsrl HH,a0,32 - dsrl QT,32 /* q=0xffffffff */ - beq DH,HH,.L_bn_div_words_skip_div2 - ddivu zero,a0,DH - mflo QT -.L_bn_div_words_skip_div2: -#undef DH - dmultu a2,QT - dsll t3,a0,32 - dsrl AT,a1,32 - or t3,AT - mflo t0 - mfhi t1 -.L_bn_div_words_inner_loop2: - sltu t2,t3,t0 - seq t8,HH,t1 - sltu AT,HH,t1 - and t2,t8 - sltu v1,t0,a2 - or AT,t2 - .set noreorder - beqz AT,.L_bn_div_words_inner_loop2_done - dsubu t1,v1 - dsubu t0,a2 - b .L_bn_div_words_inner_loop2 - dsubu QT,1 - .set reorder -.L_bn_div_words_inner_loop2_done: -#undef HH - - dsubu a0,t3,t0 - or v0,QT - dsrl v1,a0,t9 /* v1 contains remainder if anybody wants it */ - dsrl a2,t9 /* restore a2 */ - jr ra -#undef QT -END(bn_div_words) - -#define a_0 t0 -#define a_1 t1 -#define a_2 t2 -#define a_3 t3 -#define b_0 ta0 -#define b_1 ta1 -#define b_2 ta2 -#define b_3 ta3 - -#define a_4 s0 -#define a_5 s2 -#define a_6 s4 -#define a_7 a1 /* once we load a[7] we don't need a anymore */ -#define b_4 s1 -#define b_5 s3 -#define b_6 s5 -#define b_7 a2 /* once we load b[7] we don't need b anymore */ - -#define t_1 t8 -#define t_2 t9 - -#define c_1 v0 -#define c_2 v1 -#define c_3 a3 - -#define FRAME_SIZE 48 - -.align 5 -LEAF(bn_mul_comba8) - .set noreorder - PTR_SUB sp,FRAME_SIZE - .frame sp,64,ra - .set reorder - ld a_0,0(a1) /* If compiled with -mips3 option on - * R5000 box assembler barks on this - * line with "shouldn't have mult/div - * as last instruction in bb (R10K - * bug)" warning. If anybody out there - * has a clue about how to circumvent - * this do send me a note. - * <appro@fy.chalmers.se> - */ - ld b_0,0(a2) - ld a_1,8(a1) - ld a_2,16(a1) - ld a_3,24(a1) - ld b_1,8(a2) - ld b_2,16(a2) - ld b_3,24(a2) - dmultu a_0,b_0 /* mul_add_c(a[0],b[0],c1,c2,c3); */ - sd s0,0(sp) - sd s1,8(sp) - sd s2,16(sp) - sd s3,24(sp) - sd s4,32(sp) - sd s5,40(sp) - mflo c_1 - mfhi c_2 - - dmultu a_0,b_1 /* mul_add_c(a[0],b[1],c2,c3,c1); */ - ld a_4,32(a1) - ld a_5,40(a1) - ld a_6,48(a1) - ld a_7,56(a1) - ld b_4,32(a2) - ld b_5,40(a2) - mflo t_1 - mfhi t_2 - daddu c_2,t_1 - sltu AT,c_2,t_1 - daddu c_3,t_2,AT - dmultu a_1,b_0 /* mul_add_c(a[1],b[0],c2,c3,c1); */ - ld b_6,48(a2) - ld b_7,56(a2) - sd c_1,0(a0) /* r[0]=c1; */ - mflo t_1 - mfhi t_2 - daddu c_2,t_1 - sltu AT,c_2,t_1 - daddu t_2,AT - daddu c_3,t_2 - sltu c_1,c_3,t_2 - sd c_2,8(a0) /* r[1]=c2; */ - - dmultu a_2,b_0 /* mul_add_c(a[2],b[0],c3,c1,c2); */ - mflo t_1 - mfhi t_2 - daddu c_3,t_1 - sltu AT,c_3,t_1 - daddu t_2,AT - daddu c_1,t_2 - dmultu a_1,b_1 /* mul_add_c(a[1],b[1],c3,c1,c2); */ - mflo t_1 - mfhi t_2 - daddu c_3,t_1 - sltu AT,c_3,t_1 - daddu t_2,AT - daddu c_1,t_2 - sltu c_2,c_1,t_2 - dmultu a_0,b_2 /* mul_add_c(a[0],b[2],c3,c1,c2); */ - mflo t_1 - mfhi t_2 - daddu c_3,t_1 - sltu AT,c_3,t_1 - daddu t_2,AT - daddu c_1,t_2 - sltu AT,c_1,t_2 - daddu c_2,AT - sd c_3,16(a0) /* r[2]=c3; */ - - dmultu a_0,b_3 /* mul_add_c(a[0],b[3],c1,c2,c3); */ - mflo t_1 - mfhi t_2 - daddu c_1,t_1 - sltu AT,c_1,t_1 - daddu t_2,AT - daddu c_2,t_2 - sltu c_3,c_2,t_2 - dmultu a_1,b_2 /* mul_add_c(a[1],b[2],c1,c2,c3); */ - mflo t_1 - mfhi t_2 - daddu c_1,t_1 - sltu AT,c_1,t_1 - daddu t_2,AT - daddu c_2,t_2 - sltu AT,c_2,t_2 - daddu c_3,AT - dmultu a_2,b_1 /* mul_add_c(a[2],b[1],c1,c2,c3); */ - mflo t_1 - mfhi t_2 - daddu c_1,t_1 - sltu AT,c_1,t_1 - daddu t_2,AT - daddu c_2,t_2 - sltu AT,c_2,t_2 - daddu c_3,AT - dmultu a_3,b_0 /* mul_add_c(a[3],b[0],c1,c2,c3); */ - mflo t_1 - mfhi t_2 - daddu c_1,t_1 - sltu AT,c_1,t_1 - daddu t_2,AT - daddu c_2,t_2 - sltu AT,c_2,t_2 - daddu c_3,AT - sd c_1,24(a0) /* r[3]=c1; */ - - dmultu a_4,b_0 /* mul_add_c(a[4],b[0],c2,c3,c1); */ - mflo t_1 - mfhi t_2 - daddu c_2,t_1 - sltu AT,c_2,t_1 - daddu t_2,AT - daddu c_3,t_2 - sltu c_1,c_3,t_2 - dmultu a_3,b_1 /* mul_add_c(a[3],b[1],c2,c3,c1); */ - mflo t_1 - mfhi t_2 - daddu c_2,t_1 - sltu AT,c_2,t_1 - daddu t_2,AT - daddu c_3,t_2 - sltu AT,c_3,t_2 - daddu c_1,AT - dmultu a_2,b_2 /* mul_add_c(a[2],b[2],c2,c3,c1); */ - mflo t_1 - mfhi t_2 - daddu c_2,t_1 - sltu AT,c_2,t_1 - daddu t_2,AT - daddu c_3,t_2 - sltu AT,c_3,t_2 - daddu c_1,AT - dmultu a_1,b_3 /* mul_add_c(a[1],b[3],c2,c3,c1); */ - mflo t_1 - mfhi t_2 - daddu c_2,t_1 - sltu AT,c_2,t_1 - daddu t_2,AT - daddu c_3,t_2 - sltu AT,c_3,t_2 - daddu c_1,AT - dmultu a_0,b_4 /* mul_add_c(a[0],b[4],c2,c3,c1); */ - mflo t_1 - mfhi t_2 - daddu c_2,t_1 - sltu AT,c_2,t_1 - daddu t_2,AT - daddu c_3,t_2 - sltu AT,c_3,t_2 - daddu c_1,AT - sd c_2,32(a0) /* r[4]=c2; */ - - dmultu a_0,b_5 /* mul_add_c(a[0],b[5],c3,c1,c2); */ - mflo t_1 - mfhi t_2 - daddu c_3,t_1 - sltu AT,c_3,t_1 - daddu t_2,AT - daddu c_1,t_2 - sltu c_2,c_1,t_2 - dmultu a_1,b_4 /* mul_add_c(a[1],b[4],c3,c1,c2); */ - mflo t_1 - mfhi t_2 - daddu c_3,t_1 - sltu AT,c_3,t_1 - daddu t_2,AT - daddu c_1,t_2 - sltu AT,c_1,t_2 - daddu c_2,AT - dmultu a_2,b_3 /* mul_add_c(a[2],b[3],c3,c1,c2); */ - mflo t_1 - mfhi t_2 - daddu c_3,t_1 - sltu AT,c_3,t_1 - daddu t_2,AT - daddu c_1,t_2 - sltu AT,c_1,t_2 - daddu c_2,AT - dmultu a_3,b_2 /* mul_add_c(a[3],b[2],c3,c1,c2); */ - mflo t_1 - mfhi t_2 - daddu c_3,t_1 - sltu AT,c_3,t_1 - daddu t_2,AT - daddu c_1,t_2 - sltu AT,c_1,t_2 - daddu c_2,AT - dmultu a_4,b_1 /* mul_add_c(a[4],b[1],c3,c1,c2); */ - mflo t_1 - mfhi t_2 - daddu c_3,t_1 - sltu AT,c_3,t_1 - daddu t_2,AT - daddu c_1,t_2 - sltu AT,c_1,t_2 - daddu c_2,AT - dmultu a_5,b_0 /* mul_add_c(a[5],b[0],c3,c1,c2); */ - mflo t_1 - mfhi t_2 - daddu c_3,t_1 - sltu AT,c_3,t_1 - daddu t_2,AT - daddu c_1,t_2 - sltu AT,c_1,t_2 - daddu c_2,AT - sd c_3,40(a0) /* r[5]=c3; */ - - dmultu a_6,b_0 /* mul_add_c(a[6],b[0],c1,c2,c3); */ - mflo t_1 - mfhi t_2 - daddu c_1,t_1 - sltu AT,c_1,t_1 - daddu t_2,AT - daddu c_2,t_2 - sltu c_3,c_2,t_2 - dmultu a_5,b_1 /* mul_add_c(a[5],b[1],c1,c2,c3); */ - mflo t_1 - mfhi t_2 - daddu c_1,t_1 - sltu AT,c_1,t_1 - daddu t_2,AT - daddu c_2,t_2 - sltu AT,c_2,t_2 - daddu c_3,AT - dmultu a_4,b_2 /* mul_add_c(a[4],b[2],c1,c2,c3); */ - mflo t_1 - mfhi t_2 - daddu c_1,t_1 - sltu AT,c_1,t_1 - daddu t_2,AT - daddu c_2,t_2 - sltu AT,c_2,t_2 - daddu c_3,AT - dmultu a_3,b_3 /* mul_add_c(a[3],b[3],c1,c2,c3); */ - mflo t_1 - mfhi t_2 - daddu c_1,t_1 - sltu AT,c_1,t_1 - daddu t_2,AT - daddu c_2,t_2 - sltu AT,c_2,t_2 - daddu c_3,AT - dmultu a_2,b_4 /* mul_add_c(a[2],b[4],c1,c2,c3); */ - mflo t_1 - mfhi t_2 - daddu c_1,t_1 - sltu AT,c_1,t_1 - daddu t_2,AT - daddu c_2,t_2 - sltu AT,c_2,t_2 - daddu c_3,AT - dmultu a_1,b_5 /* mul_add_c(a[1],b[5],c1,c2,c3); */ - mflo t_1 - mfhi t_2 - daddu c_1,t_1 - sltu AT,c_1,t_1 - daddu t_2,AT - daddu c_2,t_2 - sltu AT,c_2,t_2 - daddu c_3,AT - dmultu a_0,b_6 /* mul_add_c(a[0],b[6],c1,c2,c3); */ - mflo t_1 - mfhi t_2 - daddu c_1,t_1 - sltu AT,c_1,t_1 - daddu t_2,AT - daddu c_2,t_2 - sltu AT,c_2,t_2 - daddu c_3,AT - sd c_1,48(a0) /* r[6]=c1; */ - - dmultu a_0,b_7 /* mul_add_c(a[0],b[7],c2,c3,c1); */ - mflo t_1 - mfhi t_2 - daddu c_2,t_1 - sltu AT,c_2,t_1 - daddu t_2,AT - daddu c_3,t_2 - sltu c_1,c_3,t_2 - dmultu a_1,b_6 /* mul_add_c(a[1],b[6],c2,c3,c1); */ - mflo t_1 - mfhi t_2 - daddu c_2,t_1 - sltu AT,c_2,t_1 - daddu t_2,AT - daddu c_3,t_2 - sltu AT,c_3,t_2 - daddu c_1,AT - dmultu a_2,b_5 /* mul_add_c(a[2],b[5],c2,c3,c1); */ - mflo t_1 - mfhi t_2 - daddu c_2,t_1 - sltu AT,c_2,t_1 - daddu t_2,AT - daddu c_3,t_2 - sltu AT,c_3,t_2 - daddu c_1,AT - dmultu a_3,b_4 /* mul_add_c(a[3],b[4],c2,c3,c1); */ - mflo t_1 - mfhi t_2 - daddu c_2,t_1 - sltu AT,c_2,t_1 - daddu t_2,AT - daddu c_3,t_2 - sltu AT,c_3,t_2 - daddu c_1,AT - dmultu a_4,b_3 /* mul_add_c(a[4],b[3],c2,c3,c1); */ - mflo t_1 - mfhi t_2 - daddu c_2,t_1 - sltu AT,c_2,t_1 - daddu t_2,AT - daddu c_3,t_2 - sltu AT,c_3,t_2 - daddu c_1,AT - dmultu a_5,b_2 /* mul_add_c(a[5],b[2],c2,c3,c1); */ - mflo t_1 - mfhi t_2 - daddu c_2,t_1 - sltu AT,c_2,t_1 - daddu t_2,AT - daddu c_3,t_2 - sltu AT,c_3,t_2 - daddu c_1,AT - dmultu a_6,b_1 /* mul_add_c(a[6],b[1],c2,c3,c1); */ - mflo t_1 - mfhi t_2 - daddu c_2,t_1 - sltu AT,c_2,t_1 - daddu t_2,AT - daddu c_3,t_2 - sltu AT,c_3,t_2 - daddu c_1,AT - dmultu a_7,b_0 /* mul_add_c(a[7],b[0],c2,c3,c1); */ - mflo t_1 - mfhi t_2 - daddu c_2,t_1 - sltu AT,c_2,t_1 - daddu t_2,AT - daddu c_3,t_2 - sltu AT,c_3,t_2 - daddu c_1,AT - sd c_2,56(a0) /* r[7]=c2; */ - - dmultu a_7,b_1 /* mul_add_c(a[7],b[1],c3,c1,c2); */ - mflo t_1 - mfhi t_2 - daddu c_3,t_1 - sltu AT,c_3,t_1 - daddu t_2,AT - daddu c_1,t_2 - sltu c_2,c_1,t_2 - dmultu a_6,b_2 /* mul_add_c(a[6],b[2],c3,c1,c2); */ - mflo t_1 - mfhi t_2 - daddu c_3,t_1 - sltu AT,c_3,t_1 - daddu t_2,AT - daddu c_1,t_2 - sltu AT,c_1,t_2 - daddu c_2,AT - dmultu a_5,b_3 /* mul_add_c(a[5],b[3],c3,c1,c2); */ - mflo t_1 - mfhi t_2 - daddu c_3,t_1 - sltu AT,c_3,t_1 - daddu t_2,AT - daddu c_1,t_2 - sltu AT,c_1,t_2 - daddu c_2,AT - dmultu a_4,b_4 /* mul_add_c(a[4],b[4],c3,c1,c2); */ - mflo t_1 - mfhi t_2 - daddu c_3,t_1 - sltu AT,c_3,t_1 - daddu t_2,AT - daddu c_1,t_2 - sltu AT,c_1,t_2 - daddu c_2,AT - dmultu a_3,b_5 /* mul_add_c(a[3],b[5],c3,c1,c2); */ - mflo t_1 - mfhi t_2 - daddu c_3,t_1 - sltu AT,c_3,t_1 - daddu t_2,AT - daddu c_1,t_2 - sltu AT,c_1,t_2 - daddu c_2,AT - dmultu a_2,b_6 /* mul_add_c(a[2],b[6],c3,c1,c2); */ - mflo t_1 - mfhi t_2 - daddu c_3,t_1 - sltu AT,c_3,t_1 - daddu t_2,AT - daddu c_1,t_2 - sltu AT,c_1,t_2 - daddu c_2,AT - dmultu a_1,b_7 /* mul_add_c(a[1],b[7],c3,c1,c2); */ - mflo t_1 - mfhi t_2 - daddu c_3,t_1 - sltu AT,c_3,t_1 - daddu t_2,AT - daddu c_1,t_2 - sltu AT,c_1,t_2 - daddu c_2,AT - sd c_3,64(a0) /* r[8]=c3; */ - - dmultu a_2,b_7 /* mul_add_c(a[2],b[7],c1,c2,c3); */ - mflo t_1 - mfhi t_2 - daddu c_1,t_1 - sltu AT,c_1,t_1 - daddu t_2,AT - daddu c_2,t_2 - sltu c_3,c_2,t_2 - dmultu a_3,b_6 /* mul_add_c(a[3],b[6],c1,c2,c3); */ - mflo t_1 - mfhi t_2 - daddu c_1,t_1 - sltu AT,c_1,t_1 - daddu t_2,AT - daddu c_2,t_2 - sltu AT,c_2,t_2 - daddu c_3,AT - dmultu a_4,b_5 /* mul_add_c(a[4],b[5],c1,c2,c3); */ - mflo t_1 - mfhi t_2 - daddu c_1,t_1 - sltu AT,c_1,t_1 - daddu t_2,AT - daddu c_2,t_2 - sltu AT,c_2,t_2 - daddu c_3,AT - dmultu a_5,b_4 /* mul_add_c(a[5],b[4],c1,c2,c3); */ - mflo t_1 - mfhi t_2 - daddu c_1,t_1 - sltu AT,c_1,t_1 - daddu t_2,AT - daddu c_2,t_2 - sltu AT,c_2,t_2 - daddu c_3,AT - dmultu a_6,b_3 /* mul_add_c(a[6],b[3],c1,c2,c3); */ - mflo t_1 - mfhi t_2 - daddu c_1,t_1 - sltu AT,c_1,t_1 - daddu t_2,AT - daddu c_2,t_2 - sltu AT,c_2,t_2 - daddu c_3,AT - dmultu a_7,b_2 /* mul_add_c(a[7],b[2],c1,c2,c3); */ - mflo t_1 - mfhi t_2 - daddu c_1,t_1 - sltu AT,c_1,t_1 - daddu t_2,AT - daddu c_2,t_2 - sltu AT,c_2,t_2 - daddu c_3,AT - sd c_1,72(a0) /* r[9]=c1; */ - - dmultu a_7,b_3 /* mul_add_c(a[7],b[3],c2,c3,c1); */ - mflo t_1 - mfhi t_2 - daddu c_2,t_1 - sltu AT,c_2,t_1 - daddu t_2,AT - daddu c_3,t_2 - sltu c_1,c_3,t_2 - dmultu a_6,b_4 /* mul_add_c(a[6],b[4],c2,c3,c1); */ - mflo t_1 - mfhi t_2 - daddu c_2,t_1 - sltu AT,c_2,t_1 - daddu t_2,AT - daddu c_3,t_2 - sltu AT,c_3,t_2 - daddu c_1,AT - dmultu a_5,b_5 /* mul_add_c(a[5],b[5],c2,c3,c1); */ - mflo t_1 - mfhi t_2 - daddu c_2,t_1 - sltu AT,c_2,t_1 - daddu t_2,AT - daddu c_3,t_2 - sltu AT,c_3,t_2 - daddu c_1,AT - dmultu a_4,b_6 /* mul_add_c(a[4],b[6],c2,c3,c1); */ - mflo t_1 - mfhi t_2 - daddu c_2,t_1 - sltu AT,c_2,t_1 - daddu t_2,AT - daddu c_3,t_2 - sltu AT,c_3,t_2 - daddu c_1,AT - dmultu a_3,b_7 /* mul_add_c(a[3],b[7],c2,c3,c1); */ - mflo t_1 - mfhi t_2 - daddu c_2,t_1 - sltu AT,c_2,t_1 - daddu t_2,AT - daddu c_3,t_2 - sltu AT,c_3,t_2 - daddu c_1,AT - sd c_2,80(a0) /* r[10]=c2; */ - - dmultu a_4,b_7 /* mul_add_c(a[4],b[7],c3,c1,c2); */ - mflo t_1 - mfhi t_2 - daddu c_3,t_1 - sltu AT,c_3,t_1 - daddu t_2,AT - daddu c_1,t_2 - sltu c_2,c_1,t_2 - dmultu a_5,b_6 /* mul_add_c(a[5],b[6],c3,c1,c2); */ - mflo t_1 - mfhi t_2 - daddu c_3,t_1 - sltu AT,c_3,t_1 - daddu t_2,AT - daddu c_1,t_2 - sltu AT,c_1,t_2 - daddu c_2,AT - dmultu a_6,b_5 /* mul_add_c(a[6],b[5],c3,c1,c2); */ - mflo t_1 - mfhi t_2 - daddu c_3,t_1 - sltu AT,c_3,t_1 - daddu t_2,AT - daddu c_1,t_2 - sltu AT,c_1,t_2 - daddu c_2,AT - dmultu a_7,b_4 /* mul_add_c(a[7],b[4],c3,c1,c2); */ - mflo t_1 - mfhi t_2 - daddu c_3,t_1 - sltu AT,c_3,t_1 - daddu t_2,AT - daddu c_1,t_2 - sltu AT,c_1,t_2 - daddu c_2,AT - sd c_3,88(a0) /* r[11]=c3; */ - - dmultu a_7,b_5 /* mul_add_c(a[7],b[5],c1,c2,c3); */ - mflo t_1 - mfhi t_2 - daddu c_1,t_1 - sltu AT,c_1,t_1 - daddu t_2,AT - daddu c_2,t_2 - sltu c_3,c_2,t_2 - dmultu a_6,b_6 /* mul_add_c(a[6],b[6],c1,c2,c3); */ - mflo t_1 - mfhi t_2 - daddu c_1,t_1 - sltu AT,c_1,t_1 - daddu t_2,AT - daddu c_2,t_2 - sltu AT,c_2,t_2 - daddu c_3,AT - dmultu a_5,b_7 /* mul_add_c(a[5],b[7],c1,c2,c3); */ - mflo t_1 - mfhi t_2 - daddu c_1,t_1 - sltu AT,c_1,t_1 - daddu t_2,AT - daddu c_2,t_2 - sltu AT,c_2,t_2 - daddu c_3,AT - sd c_1,96(a0) /* r[12]=c1; */ - - dmultu a_6,b_7 /* mul_add_c(a[6],b[7],c2,c3,c1); */ - mflo t_1 - mfhi t_2 - daddu c_2,t_1 - sltu AT,c_2,t_1 - daddu t_2,AT - daddu c_3,t_2 - sltu c_1,c_3,t_2 - dmultu a_7,b_6 /* mul_add_c(a[7],b[6],c2,c3,c1); */ - mflo t_1 - mfhi t_2 - daddu c_2,t_1 - sltu AT,c_2,t_1 - daddu t_2,AT - daddu c_3,t_2 - sltu AT,c_3,t_2 - daddu c_1,AT - sd c_2,104(a0) /* r[13]=c2; */ - - dmultu a_7,b_7 /* mul_add_c(a[7],b[7],c3,c1,c2); */ - ld s0,0(sp) - ld s1,8(sp) - ld s2,16(sp) - ld s3,24(sp) - ld s4,32(sp) - ld s5,40(sp) - mflo t_1 - mfhi t_2 - daddu c_3,t_1 - sltu AT,c_3,t_1 - daddu t_2,AT - daddu c_1,t_2 - sd c_3,112(a0) /* r[14]=c3; */ - sd c_1,120(a0) /* r[15]=c1; */ - - PTR_ADD sp,FRAME_SIZE - - jr ra -END(bn_mul_comba8) - -.align 5 -LEAF(bn_mul_comba4) - .set reorder - ld a_0,0(a1) - ld b_0,0(a2) - ld a_1,8(a1) - ld a_2,16(a1) - dmultu a_0,b_0 /* mul_add_c(a[0],b[0],c1,c2,c3); */ - ld a_3,24(a1) - ld b_1,8(a2) - ld b_2,16(a2) - ld b_3,24(a2) - mflo c_1 - mfhi c_2 - sd c_1,0(a0) - - dmultu a_0,b_1 /* mul_add_c(a[0],b[1],c2,c3,c1); */ - mflo t_1 - mfhi t_2 - daddu c_2,t_1 - sltu AT,c_2,t_1 - daddu c_3,t_2,AT - dmultu a_1,b_0 /* mul_add_c(a[1],b[0],c2,c3,c1); */ - mflo t_1 - mfhi t_2 - daddu c_2,t_1 - sltu AT,c_2,t_1 - daddu t_2,AT - daddu c_3,t_2 - sltu c_1,c_3,t_2 - sd c_2,8(a0) - - dmultu a_2,b_0 /* mul_add_c(a[2],b[0],c3,c1,c2); */ - mflo t_1 - mfhi t_2 - daddu c_3,t_1 - sltu AT,c_3,t_1 - daddu t_2,AT - daddu c_1,t_2 - dmultu a_1,b_1 /* mul_add_c(a[1],b[1],c3,c1,c2); */ - mflo t_1 - mfhi t_2 - daddu c_3,t_1 - sltu AT,c_3,t_1 - daddu t_2,AT - daddu c_1,t_2 - sltu c_2,c_1,t_2 - dmultu a_0,b_2 /* mul_add_c(a[0],b[2],c3,c1,c2); */ - mflo t_1 - mfhi t_2 - daddu c_3,t_1 - sltu AT,c_3,t_1 - daddu t_2,AT - daddu c_1,t_2 - sltu AT,c_1,t_2 - daddu c_2,AT - sd c_3,16(a0) - - dmultu a_0,b_3 /* mul_add_c(a[0],b[3],c1,c2,c3); */ - mflo t_1 - mfhi t_2 - daddu c_1,t_1 - sltu AT,c_1,t_1 - daddu t_2,AT - daddu c_2,t_2 - sltu c_3,c_2,t_2 - dmultu a_1,b_2 /* mul_add_c(a[1],b[2],c1,c2,c3); */ - mflo t_1 - mfhi t_2 - daddu c_1,t_1 - sltu AT,c_1,t_1 - daddu t_2,AT - daddu c_2,t_2 - sltu AT,c_2,t_2 - daddu c_3,AT - dmultu a_2,b_1 /* mul_add_c(a[2],b[1],c1,c2,c3); */ - mflo t_1 - mfhi t_2 - daddu c_1,t_1 - sltu AT,c_1,t_1 - daddu t_2,AT - daddu c_2,t_2 - sltu AT,c_2,t_2 - daddu c_3,AT - dmultu a_3,b_0 /* mul_add_c(a[3],b[0],c1,c2,c3); */ - mflo t_1 - mfhi t_2 - daddu c_1,t_1 - sltu AT,c_1,t_1 - daddu t_2,AT - daddu c_2,t_2 - sltu AT,c_2,t_2 - daddu c_3,AT - sd c_1,24(a0) - - dmultu a_3,b_1 /* mul_add_c(a[3],b[1],c2,c3,c1); */ - mflo t_1 - mfhi t_2 - daddu c_2,t_1 - sltu AT,c_2,t_1 - daddu t_2,AT - daddu c_3,t_2 - sltu c_1,c_3,t_2 - dmultu a_2,b_2 /* mul_add_c(a[2],b[2],c2,c3,c1); */ - mflo t_1 - mfhi t_2 - daddu c_2,t_1 - sltu AT,c_2,t_1 - daddu t_2,AT - daddu c_3,t_2 - sltu AT,c_3,t_2 - daddu c_1,AT - dmultu a_1,b_3 /* mul_add_c(a[1],b[3],c2,c3,c1); */ - mflo t_1 - mfhi t_2 - daddu c_2,t_1 - sltu AT,c_2,t_1 - daddu t_2,AT - daddu c_3,t_2 - sltu AT,c_3,t_2 - daddu c_1,AT - sd c_2,32(a0) - - dmultu a_2,b_3 /* mul_add_c(a[2],b[3],c3,c1,c2); */ - mflo t_1 - mfhi t_2 - daddu c_3,t_1 - sltu AT,c_3,t_1 - daddu t_2,AT - daddu c_1,t_2 - sltu c_2,c_1,t_2 - dmultu a_3,b_2 /* mul_add_c(a[3],b[2],c3,c1,c2); */ - mflo t_1 - mfhi t_2 - daddu c_3,t_1 - sltu AT,c_3,t_1 - daddu t_2,AT - daddu c_1,t_2 - sltu AT,c_1,t_2 - daddu c_2,AT - sd c_3,40(a0) - - dmultu a_3,b_3 /* mul_add_c(a[3],b[3],c1,c2,c3); */ - mflo t_1 - mfhi t_2 - daddu c_1,t_1 - sltu AT,c_1,t_1 - daddu t_2,AT - daddu c_2,t_2 - sd c_1,48(a0) - sd c_2,56(a0) - - jr ra -END(bn_mul_comba4) - -#undef a_4 -#undef a_5 -#undef a_6 -#undef a_7 -#define a_4 b_0 -#define a_5 b_1 -#define a_6 b_2 -#define a_7 b_3 - -.align 5 -LEAF(bn_sqr_comba8) - .set reorder - ld a_0,0(a1) - ld a_1,8(a1) - ld a_2,16(a1) - ld a_3,24(a1) - - dmultu a_0,a_0 /* mul_add_c(a[0],b[0],c1,c2,c3); */ - ld a_4,32(a1) - ld a_5,40(a1) - ld a_6,48(a1) - ld a_7,56(a1) - mflo c_1 - mfhi c_2 - sd c_1,0(a0) - - dmultu a_0,a_1 /* mul_add_c2(a[0],b[1],c2,c3,c1); */ - mflo t_1 - mfhi t_2 - slt c_1,t_2,zero - dsll t_2,1 - slt a2,t_1,zero - daddu t_2,a2 - dsll t_1,1 - daddu c_2,t_1 - sltu AT,c_2,t_1 - daddu c_3,t_2,AT - sd c_2,8(a0) - - dmultu a_2,a_0 /* mul_add_c2(a[2],b[0],c3,c1,c2); */ - mflo t_1 - mfhi t_2 - slt c_2,t_2,zero - dsll t_2,1 - slt a2,t_1,zero - daddu t_2,a2 - dsll t_1,1 - daddu c_3,t_1 - sltu AT,c_3,t_1 - daddu t_2,AT - daddu c_1,t_2 - sltu AT,c_1,t_2 - daddu c_2,AT - dmultu a_1,a_1 /* mul_add_c(a[1],b[1],c3,c1,c2); */ - mflo t_1 - mfhi t_2 - daddu c_3,t_1 - sltu AT,c_3,t_1 - daddu t_2,AT - daddu c_1,t_2 - sltu AT,c_1,t_2 - daddu c_2,AT - sd c_3,16(a0) - - dmultu a_0,a_3 /* mul_add_c2(a[0],b[3],c1,c2,c3); */ - mflo t_1 - mfhi t_2 - slt c_3,t_2,zero - dsll t_2,1 - slt a2,t_1,zero - daddu t_2,a2 - dsll t_1,1 - daddu c_1,t_1 - sltu AT,c_1,t_1 - daddu t_2,AT - daddu c_2,t_2 - sltu AT,c_2,t_2 - daddu c_3,AT - dmultu a_1,a_2 /* mul_add_c2(a[1],b[2],c1,c2,c3); */ - mflo t_1 - mfhi t_2 - slt AT,t_2,zero - daddu c_3,AT - dsll t_2,1 - slt a2,t_1,zero - daddu t_2,a2 - dsll t_1,1 - daddu c_1,t_1 - sltu AT,c_1,t_1 - daddu t_2,AT - daddu c_2,t_2 - sltu AT,c_2,t_2 - daddu c_3,AT - sd c_1,24(a0) - - dmultu a_4,a_0 /* mul_add_c2(a[4],b[0],c2,c3,c1); */ - mflo t_1 - mfhi t_2 - slt c_1,t_2,zero - dsll t_2,1 - slt a2,t_1,zero - daddu t_2,a2 - dsll t_1,1 - daddu c_2,t_1 - sltu AT,c_2,t_1 - daddu t_2,AT - daddu c_3,t_2 - sltu AT,c_3,t_2 - daddu c_1,AT - dmultu a_3,a_1 /* mul_add_c2(a[3],b[1],c2,c3,c1); */ - mflo t_1 - mfhi t_2 - slt AT,t_2,zero - daddu c_1,AT - dsll t_2,1 - slt a2,t_1,zero - daddu t_2,a2 - dsll t_1,1 - daddu c_2,t_1 - sltu AT,c_2,t_1 - daddu t_2,AT - daddu c_3,t_2 - sltu AT,c_3,t_2 - daddu c_1,AT - dmultu a_2,a_2 /* mul_add_c(a[2],b[2],c2,c3,c1); */ - mflo t_1 - mfhi t_2 - daddu c_2,t_1 - sltu AT,c_2,t_1 - daddu t_2,AT - daddu c_3,t_2 - sltu AT,c_3,t_2 - daddu c_1,AT - sd c_2,32(a0) - - dmultu a_0,a_5 /* mul_add_c2(a[0],b[5],c3,c1,c2); */ - mflo t_1 - mfhi t_2 - slt c_2,t_2,zero - dsll t_2,1 - slt a2,t_1,zero - daddu t_2,a2 - dsll t_1,1 - daddu c_3,t_1 - sltu AT,c_3,t_1 - daddu t_2,AT - daddu c_1,t_2 - sltu AT,c_1,t_2 - daddu c_2,AT - dmultu a_1,a_4 /* mul_add_c2(a[1],b[4],c3,c1,c2); */ - mflo t_1 - mfhi t_2 - slt AT,t_2,zero - daddu c_2,AT - dsll t_2,1 - slt a2,t_1,zero - daddu t_2,a2 - dsll t_1,1 - daddu c_3,t_1 - sltu AT,c_3,t_1 - daddu t_2,AT - daddu c_1,t_2 - sltu AT,c_1,t_2 - daddu c_2,AT - dmultu a_2,a_3 /* mul_add_c2(a[2],b[3],c3,c1,c2); */ - mflo t_1 - mfhi t_2 - slt AT,t_2,zero - daddu c_2,AT - dsll t_2,1 - slt a2,t_1,zero - daddu t_2,a2 - dsll t_1,1 - daddu c_3,t_1 - sltu AT,c_3,t_1 - daddu t_2,AT - daddu c_1,t_2 - sltu AT,c_1,t_2 - daddu c_2,AT - sd c_3,40(a0) - - dmultu a_6,a_0 /* mul_add_c2(a[6],b[0],c1,c2,c3); */ - mflo t_1 - mfhi t_2 - slt c_3,t_2,zero - dsll t_2,1 - slt a2,t_1,zero - daddu t_2,a2 - dsll t_1,1 - daddu c_1,t_1 - sltu AT,c_1,t_1 - daddu t_2,AT - daddu c_2,t_2 - sltu AT,c_2,t_2 - daddu c_3,AT - dmultu a_5,a_1 /* mul_add_c2(a[5],b[1],c1,c2,c3); */ - mflo t_1 - mfhi t_2 - slt AT,t_2,zero - daddu c_3,AT - dsll t_2,1 - slt a2,t_1,zero - daddu t_2,a2 - dsll t_1,1 - daddu c_1,t_1 - sltu AT,c_1,t_1 - daddu t_2,AT - daddu c_2,t_2 - sltu AT,c_2,t_2 - daddu c_3,AT - dmultu a_4,a_2 /* mul_add_c2(a[4],b[2],c1,c2,c3); */ - mflo t_1 - mfhi t_2 - slt AT,t_2,zero - daddu c_3,AT - dsll t_2,1 - slt a2,t_1,zero - daddu t_2,a2 - dsll t_1,1 - daddu c_1,t_1 - sltu AT,c_1,t_1 - daddu t_2,AT - daddu c_2,t_2 - sltu AT,c_2,t_2 - daddu c_3,AT - dmultu a_3,a_3 /* mul_add_c(a[3],b[3],c1,c2,c3); */ - mflo t_1 - mfhi t_2 - daddu c_1,t_1 - sltu AT,c_1,t_1 - daddu t_2,AT - daddu c_2,t_2 - sltu AT,c_2,t_2 - daddu c_3,AT - sd c_1,48(a0) - - dmultu a_0,a_7 /* mul_add_c2(a[0],b[7],c2,c3,c1); */ - mflo t_1 - mfhi t_2 - slt c_1,t_2,zero - dsll t_2,1 - slt a2,t_1,zero - daddu t_2,a2 - dsll t_1,1 - daddu c_2,t_1 - sltu AT,c_2,t_1 - daddu t_2,AT - daddu c_3,t_2 - sltu AT,c_3,t_2 - daddu c_1,AT - dmultu a_1,a_6 /* mul_add_c2(a[1],b[6],c2,c3,c1); */ - mflo t_1 - mfhi t_2 - slt AT,t_2,zero - daddu c_1,AT - dsll t_2,1 - slt a2,t_1,zero - daddu t_2,a2 - dsll t_1,1 - daddu c_2,t_1 - sltu AT,c_2,t_1 - daddu t_2,AT - daddu c_3,t_2 - sltu AT,c_3,t_2 - daddu c_1,AT - dmultu a_2,a_5 /* mul_add_c2(a[2],b[5],c2,c3,c1); */ - mflo t_1 - mfhi t_2 - slt AT,t_2,zero - daddu c_1,AT - dsll t_2,1 - slt a2,t_1,zero - daddu t_2,a2 - dsll t_1,1 - daddu c_2,t_1 - sltu AT,c_2,t_1 - daddu t_2,AT - daddu c_3,t_2 - sltu AT,c_3,t_2 - daddu c_1,AT - dmultu a_3,a_4 /* mul_add_c2(a[3],b[4],c2,c3,c1); */ - mflo t_1 - mfhi t_2 - slt AT,t_2,zero - daddu c_1,AT - dsll t_2,1 - slt a2,t_1,zero - daddu t_2,a2 - dsll t_1,1 - daddu c_2,t_1 - sltu AT,c_2,t_1 - daddu t_2,AT - daddu c_3,t_2 - sltu AT,c_3,t_2 - daddu c_1,AT - sd c_2,56(a0) - - dmultu a_7,a_1 /* mul_add_c2(a[7],b[1],c3,c1,c2); */ - mflo t_1 - mfhi t_2 - slt c_2,t_2,zero - dsll t_2,1 - slt a2,t_1,zero - daddu t_2,a2 - dsll t_1,1 - daddu c_3,t_1 - sltu AT,c_3,t_1 - daddu t_2,AT - daddu c_1,t_2 - sltu AT,c_1,t_2 - daddu c_2,AT - dmultu a_6,a_2 /* mul_add_c2(a[6],b[2],c3,c1,c2); */ - mflo t_1 - mfhi t_2 - slt AT,t_2,zero - daddu c_2,AT - dsll t_2,1 - slt a2,t_1,zero - daddu t_2,a2 - dsll t_1,1 - daddu c_3,t_1 - sltu AT,c_3,t_1 - daddu t_2,AT - daddu c_1,t_2 - sltu AT,c_1,t_2 - daddu c_2,AT - dmultu a_5,a_3 /* mul_add_c2(a[5],b[3],c3,c1,c2); */ - mflo t_1 - mfhi t_2 - slt AT,t_2,zero - daddu c_2,AT - dsll t_2,1 - slt a2,t_1,zero - daddu t_2,a2 - dsll t_1,1 - daddu c_3,t_1 - sltu AT,c_3,t_1 - daddu t_2,AT - daddu c_1,t_2 - sltu AT,c_1,t_2 - daddu c_2,AT - dmultu a_4,a_4 /* mul_add_c(a[4],b[4],c3,c1,c2); */ - mflo t_1 - mfhi t_2 - daddu c_3,t_1 - sltu AT,c_3,t_1 - daddu t_2,AT - daddu c_1,t_2 - sltu AT,c_1,t_2 - daddu c_2,AT - sd c_3,64(a0) - - dmultu a_2,a_7 /* mul_add_c2(a[2],b[7],c1,c2,c3); */ - mflo t_1 - mfhi t_2 - slt c_3,t_2,zero - dsll t_2,1 - slt a2,t_1,zero - daddu t_2,a2 - dsll t_1,1 - daddu c_1,t_1 - sltu AT,c_1,t_1 - daddu t_2,AT - daddu c_2,t_2 - sltu AT,c_2,t_2 - daddu c_3,AT - dmultu a_3,a_6 /* mul_add_c2(a[3],b[6],c1,c2,c3); */ - mflo t_1 - mfhi t_2 - slt AT,t_2,zero - daddu c_3,AT - dsll t_2,1 - slt a2,t_1,zero - daddu t_2,a2 - dsll t_1,1 - daddu c_1,t_1 - sltu AT,c_1,t_1 - daddu t_2,AT - daddu c_2,t_2 - sltu AT,c_2,t_2 - daddu c_3,AT - dmultu a_4,a_5 /* mul_add_c2(a[4],b[5],c1,c2,c3); */ - mflo t_1 - mfhi t_2 - slt AT,t_2,zero - daddu c_3,AT - dsll t_2,1 - slt a2,t_1,zero - daddu t_2,a2 - dsll t_1,1 - daddu c_1,t_1 - sltu AT,c_1,t_1 - daddu t_2,AT - daddu c_2,t_2 - sltu AT,c_2,t_2 - daddu c_3,AT - sd c_1,72(a0) - - dmultu a_7,a_3 /* mul_add_c2(a[7],b[3],c2,c3,c1); */ - mflo t_1 - mfhi t_2 - slt c_1,t_2,zero - dsll t_2,1 - slt a2,t_1,zero - daddu t_2,a2 - dsll t_1,1 - daddu c_2,t_1 - sltu AT,c_2,t_1 - daddu t_2,AT - daddu c_3,t_2 - sltu AT,c_3,t_2 - daddu c_1,AT - dmultu a_6,a_4 /* mul_add_c2(a[6],b[4],c2,c3,c1); */ - mflo t_1 - mfhi t_2 - slt AT,t_2,zero - daddu c_1,AT - dsll t_2,1 - slt a2,t_1,zero - daddu t_2,a2 - dsll t_1,1 - daddu c_2,t_1 - sltu AT,c_2,t_1 - daddu t_2,AT - daddu c_3,t_2 - sltu AT,c_3,t_2 - daddu c_1,AT - dmultu a_5,a_5 /* mul_add_c(a[5],b[5],c2,c3,c1); */ - mflo t_1 - mfhi t_2 - daddu c_2,t_1 - sltu AT,c_2,t_1 - daddu t_2,AT - daddu c_3,t_2 - sltu AT,c_3,t_2 - daddu c_1,AT - sd c_2,80(a0) - - dmultu a_4,a_7 /* mul_add_c2(a[4],b[7],c3,c1,c2); */ - mflo t_1 - mfhi t_2 - slt c_2,t_2,zero - dsll t_2,1 - slt a2,t_1,zero - daddu t_2,a2 - dsll t_1,1 - daddu c_3,t_1 - sltu AT,c_3,t_1 - daddu t_2,AT - daddu c_1,t_2 - sltu AT,c_1,t_2 - daddu c_2,AT - dmultu a_5,a_6 /* mul_add_c2(a[5],b[6],c3,c1,c2); */ - mflo t_1 - mfhi t_2 - slt AT,t_2,zero - daddu c_2,AT - dsll t_2,1 - slt a2,t_1,zero - daddu t_2,a2 - dsll t_1,1 - daddu c_3,t_1 - sltu AT,c_3,t_1 - daddu t_2,AT - daddu c_1,t_2 - sltu AT,c_1,t_2 - daddu c_2,AT - sd c_3,88(a0) - - dmultu a_7,a_5 /* mul_add_c2(a[7],b[5],c1,c2,c3); */ - mflo t_1 - mfhi t_2 - slt c_3,t_2,zero - dsll t_2,1 - slt a2,t_1,zero - daddu t_2,a2 - dsll t_1,1 - daddu c_1,t_1 - sltu AT,c_1,t_1 - daddu t_2,AT - daddu c_2,t_2 - sltu AT,c_2,t_2 - daddu c_3,AT - dmultu a_6,a_6 /* mul_add_c(a[6],b[6],c1,c2,c3); */ - mflo t_1 - mfhi t_2 - daddu c_1,t_1 - sltu AT,c_1,t_1 - daddu t_2,AT - daddu c_2,t_2 - sltu AT,c_2,t_2 - daddu c_3,AT - sd c_1,96(a0) - - dmultu a_6,a_7 /* mul_add_c2(a[6],b[7],c2,c3,c1); */ - mflo t_1 - mfhi t_2 - slt c_1,t_2,zero - dsll t_2,1 - slt a2,t_1,zero - daddu t_2,a2 - dsll t_1,1 - daddu c_2,t_1 - sltu AT,c_2,t_1 - daddu t_2,AT - daddu c_3,t_2 - sltu AT,c_3,t_2 - daddu c_1,AT - sd c_2,104(a0) - - dmultu a_7,a_7 /* mul_add_c(a[7],b[7],c3,c1,c2); */ - mflo t_1 - mfhi t_2 - daddu c_3,t_1 - sltu AT,c_3,t_1 - daddu t_2,AT - daddu c_1,t_2 - sd c_3,112(a0) - sd c_1,120(a0) - - jr ra -END(bn_sqr_comba8) - -.align 5 -LEAF(bn_sqr_comba4) - .set reorder - ld a_0,0(a1) - ld a_1,8(a1) - ld a_2,16(a1) - ld a_3,24(a1) - dmultu a_0,a_0 /* mul_add_c(a[0],b[0],c1,c2,c3); */ - mflo c_1 - mfhi c_2 - sd c_1,0(a0) - - dmultu a_0,a_1 /* mul_add_c2(a[0],b[1],c2,c3,c1); */ - mflo t_1 - mfhi t_2 - slt c_1,t_2,zero - dsll t_2,1 - slt a2,t_1,zero - daddu t_2,a2 - dsll t_1,1 - daddu c_2,t_1 - sltu AT,c_2,t_1 - daddu c_3,t_2,AT - sd c_2,8(a0) - - dmultu a_2,a_0 /* mul_add_c2(a[2],b[0],c3,c1,c2); */ - mflo t_1 - mfhi t_2 - slt c_2,t_2,zero - dsll t_2,1 - slt a2,t_1,zero - daddu t_2,a2 - dsll t_1,1 - daddu c_3,t_1 - sltu AT,c_3,t_1 - daddu t_2,AT - daddu c_1,t_2 - sltu AT,c_1,t_2 - daddu c_2,AT - dmultu a_1,a_1 /* mul_add_c(a[1],b[1],c3,c1,c2); */ - mflo t_1 - mfhi t_2 - daddu c_3,t_1 - sltu AT,c_3,t_1 - daddu t_2,AT - daddu c_1,t_2 - sltu AT,c_1,t_2 - daddu c_2,AT - sd c_3,16(a0) - - dmultu a_0,a_3 /* mul_add_c2(a[0],b[3],c1,c2,c3); */ - mflo t_1 - mfhi t_2 - slt c_3,t_2,zero - dsll t_2,1 - slt a2,t_1,zero - daddu t_2,a2 - dsll t_1,1 - daddu c_1,t_1 - sltu AT,c_1,t_1 - daddu t_2,AT - daddu c_2,t_2 - sltu AT,c_2,t_2 - daddu c_3,AT - dmultu a_1,a_2 /* mul_add_c(a2[1],b[2],c1,c2,c3); */ - mflo t_1 - mfhi t_2 - slt AT,t_2,zero - daddu c_3,AT - dsll t_2,1 - slt a2,t_1,zero - daddu t_2,a2 - dsll t_1,1 - daddu c_1,t_1 - sltu AT,c_1,t_1 - daddu t_2,AT - daddu c_2,t_2 - sltu AT,c_2,t_2 - daddu c_3,AT - sd c_1,24(a0) - - dmultu a_3,a_1 /* mul_add_c2(a[3],b[1],c2,c3,c1); */ - mflo t_1 - mfhi t_2 - slt c_1,t_2,zero - dsll t_2,1 - slt a2,t_1,zero - daddu t_2,a2 - dsll t_1,1 - daddu c_2,t_1 - sltu AT,c_2,t_1 - daddu t_2,AT - daddu c_3,t_2 - sltu AT,c_3,t_2 - daddu c_1,AT - dmultu a_2,a_2 /* mul_add_c(a[2],b[2],c2,c3,c1); */ - mflo t_1 - mfhi t_2 - daddu c_2,t_1 - sltu AT,c_2,t_1 - daddu t_2,AT - daddu c_3,t_2 - sltu AT,c_3,t_2 - daddu c_1,AT - sd c_2,32(a0) - - dmultu a_2,a_3 /* mul_add_c2(a[2],b[3],c3,c1,c2); */ - mflo t_1 - mfhi t_2 - slt c_2,t_2,zero - dsll t_2,1 - slt a2,t_1,zero - daddu t_2,a2 - dsll t_1,1 - daddu c_3,t_1 - sltu AT,c_3,t_1 - daddu t_2,AT - daddu c_1,t_2 - sltu AT,c_1,t_2 - daddu c_2,AT - sd c_3,40(a0) - - dmultu a_3,a_3 /* mul_add_c(a[3],b[3],c1,c2,c3); */ - mflo t_1 - mfhi t_2 - daddu c_1,t_1 - sltu AT,c_1,t_1 - daddu t_2,AT - daddu c_2,t_2 - sd c_1,48(a0) - sd c_2,56(a0) - - jr ra -END(bn_sqr_comba4) diff --git a/crypto/bn/asm/pa-risc2.s b/crypto/bn/asm/pa-risc2.s deleted file mode 100644 index f3b16290eb04..000000000000 --- a/crypto/bn/asm/pa-risc2.s +++ /dev/null @@ -1,1618 +0,0 @@ -; -; PA-RISC 2.0 implementation of bn_asm code, based on the -; 64-bit version of the code. This code is effectively the -; same as the 64-bit version except the register model is -; slightly different given all values must be 32-bit between -; function calls. Thus the 64-bit return values are returned -; in %ret0 and %ret1 vs just %ret0 as is done in 64-bit -; -; -; This code is approximately 2x faster than the C version -; for RSA/DSA. -; -; See http://devresource.hp.com/ for more details on the PA-RISC -; architecture. Also see the book "PA-RISC 2.0 Architecture" -; by Gerry Kane for information on the instruction set architecture. -; -; Code written by Chris Ruemmler (with some help from the HP C -; compiler). -; -; The code compiles with HP's assembler -; - - .level 2.0N - .space $TEXT$ - .subspa $CODE$,QUAD=0,ALIGN=8,ACCESS=0x2c,CODE_ONLY - -; -; Global Register definitions used for the routines. -; -; Some information about HP's runtime architecture for 32-bits. -; -; "Caller save" means the calling function must save the register -; if it wants the register to be preserved. -; "Callee save" means if a function uses the register, it must save -; the value before using it. -; -; For the floating point registers -; -; "caller save" registers: fr4-fr11, fr22-fr31 -; "callee save" registers: fr12-fr21 -; "special" registers: fr0-fr3 (status and exception registers) -; -; For the integer registers -; value zero : r0 -; "caller save" registers: r1,r19-r26 -; "callee save" registers: r3-r18 -; return register : r2 (rp) -; return values ; r28,r29 (ret0,ret1) -; Stack pointer ; r30 (sp) -; millicode return ptr ; r31 (also a caller save register) - - -; -; Arguments to the routines -; -r_ptr .reg %r26 -a_ptr .reg %r25 -b_ptr .reg %r24 -num .reg %r24 -n .reg %r23 - -; -; Note that the "w" argument for bn_mul_add_words and bn_mul_words -; is passed on the stack at a delta of -56 from the top of stack -; as the routine is entered. -; - -; -; Globals used in some routines -; - -top_overflow .reg %r23 -high_mask .reg %r22 ; value 0xffffffff80000000L - - -;------------------------------------------------------------------------------ -; -; bn_mul_add_words -; -;BN_ULONG bn_mul_add_words(BN_ULONG *r_ptr, BN_ULONG *a_ptr, -; int num, BN_ULONG w) -; -; arg0 = r_ptr -; arg1 = a_ptr -; arg3 = num -; -56(sp) = w -; -; Local register definitions -; - -fm1 .reg %fr22 -fm .reg %fr23 -ht_temp .reg %fr24 -ht_temp_1 .reg %fr25 -lt_temp .reg %fr26 -lt_temp_1 .reg %fr27 -fm1_1 .reg %fr28 -fm_1 .reg %fr29 - -fw_h .reg %fr7L -fw_l .reg %fr7R -fw .reg %fr7 - -fht_0 .reg %fr8L -flt_0 .reg %fr8R -t_float_0 .reg %fr8 - -fht_1 .reg %fr9L -flt_1 .reg %fr9R -t_float_1 .reg %fr9 - -tmp_0 .reg %r31 -tmp_1 .reg %r21 -m_0 .reg %r20 -m_1 .reg %r19 -ht_0 .reg %r1 -ht_1 .reg %r3 -lt_0 .reg %r4 -lt_1 .reg %r5 -m1_0 .reg %r6 -m1_1 .reg %r7 -rp_val .reg %r8 -rp_val_1 .reg %r9 - -bn_mul_add_words - .export bn_mul_add_words,entry,NO_RELOCATION,LONG_RETURN - .proc - .callinfo frame=128 - .entry - .align 64 - - STD %r3,0(%sp) ; save r3 - STD %r4,8(%sp) ; save r4 - NOP ; Needed to make the loop 16-byte aligned - NOP ; needed to make the loop 16-byte aligned - - STD %r5,16(%sp) ; save r5 - NOP - STD %r6,24(%sp) ; save r6 - STD %r7,32(%sp) ; save r7 - - STD %r8,40(%sp) ; save r8 - STD %r9,48(%sp) ; save r9 - COPY %r0,%ret1 ; return 0 by default - DEPDI,Z 1,31,1,top_overflow ; top_overflow = 1 << 32 - - CMPIB,>= 0,num,bn_mul_add_words_exit ; if (num <= 0) then exit - LDO 128(%sp),%sp ; bump stack - - ; - ; The loop is unrolled twice, so if there is only 1 number - ; then go straight to the cleanup code. - ; - CMPIB,= 1,num,bn_mul_add_words_single_top - FLDD -184(%sp),fw ; (-56-128) load up w into fw (fw_h/fw_l) - - ; - ; This loop is unrolled 2 times (64-byte aligned as well) - ; - ; PA-RISC 2.0 chips have two fully pipelined multipliers, thus - ; two 32-bit mutiplies can be issued per cycle. - ; -bn_mul_add_words_unroll2 - - FLDD 0(a_ptr),t_float_0 ; load up 64-bit value (fr8L) ht(L)/lt(R) - FLDD 8(a_ptr),t_float_1 ; load up 64-bit value (fr8L) ht(L)/lt(R) - LDD 0(r_ptr),rp_val ; rp[0] - LDD 8(r_ptr),rp_val_1 ; rp[1] - - XMPYU fht_0,fw_l,fm1 ; m1[0] = fht_0*fw_l - XMPYU fht_1,fw_l,fm1_1 ; m1[1] = fht_1*fw_l - FSTD fm1,-16(%sp) ; -16(sp) = m1[0] - FSTD fm1_1,-48(%sp) ; -48(sp) = m1[1] - - XMPYU flt_0,fw_h,fm ; m[0] = flt_0*fw_h - XMPYU flt_1,fw_h,fm_1 ; m[1] = flt_1*fw_h - FSTD fm,-8(%sp) ; -8(sp) = m[0] - FSTD fm_1,-40(%sp) ; -40(sp) = m[1] - - XMPYU fht_0,fw_h,ht_temp ; ht_temp = fht_0*fw_h - XMPYU fht_1,fw_h,ht_temp_1 ; ht_temp_1 = fht_1*fw_h - FSTD ht_temp,-24(%sp) ; -24(sp) = ht_temp - FSTD ht_temp_1,-56(%sp) ; -56(sp) = ht_temp_1 - - XMPYU flt_0,fw_l,lt_temp ; lt_temp = lt*fw_l - XMPYU flt_1,fw_l,lt_temp_1 ; lt_temp = lt*fw_l - FSTD lt_temp,-32(%sp) ; -32(sp) = lt_temp - FSTD lt_temp_1,-64(%sp) ; -64(sp) = lt_temp_1 - - LDD -8(%sp),m_0 ; m[0] - LDD -40(%sp),m_1 ; m[1] - LDD -16(%sp),m1_0 ; m1[0] - LDD -48(%sp),m1_1 ; m1[1] - - LDD -24(%sp),ht_0 ; ht[0] - LDD -56(%sp),ht_1 ; ht[1] - ADD,L m1_0,m_0,tmp_0 ; tmp_0 = m[0] + m1[0]; - ADD,L m1_1,m_1,tmp_1 ; tmp_1 = m[1] + m1[1]; - - LDD -32(%sp),lt_0 - LDD -64(%sp),lt_1 - CMPCLR,*>>= tmp_0,m1_0, %r0 ; if (m[0] < m1[0]) - ADD,L ht_0,top_overflow,ht_0 ; ht[0] += (1<<32) - - CMPCLR,*>>= tmp_1,m1_1,%r0 ; if (m[1] < m1[1]) - ADD,L ht_1,top_overflow,ht_1 ; ht[1] += (1<<32) - EXTRD,U tmp_0,31,32,m_0 ; m[0]>>32 - DEPD,Z tmp_0,31,32,m1_0 ; m1[0] = m[0]<<32 - - EXTRD,U tmp_1,31,32,m_1 ; m[1]>>32 - DEPD,Z tmp_1,31,32,m1_1 ; m1[1] = m[1]<<32 - ADD,L ht_0,m_0,ht_0 ; ht[0]+= (m[0]>>32) - ADD,L ht_1,m_1,ht_1 ; ht[1]+= (m[1]>>32) - - ADD lt_0,m1_0,lt_0 ; lt[0] = lt[0]+m1[0]; - ADD,DC ht_0,%r0,ht_0 ; ht[0]++ - ADD lt_1,m1_1,lt_1 ; lt[1] = lt[1]+m1[1]; - ADD,DC ht_1,%r0,ht_1 ; ht[1]++ - - ADD %ret1,lt_0,lt_0 ; lt[0] = lt[0] + c; - ADD,DC ht_0,%r0,ht_0 ; ht[0]++ - ADD lt_0,rp_val,lt_0 ; lt[0] = lt[0]+rp[0] - ADD,DC ht_0,%r0,ht_0 ; ht[0]++ - - LDO -2(num),num ; num = num - 2; - ADD ht_0,lt_1,lt_1 ; lt[1] = lt[1] + ht_0 (c); - ADD,DC ht_1,%r0,ht_1 ; ht[1]++ - STD lt_0,0(r_ptr) ; rp[0] = lt[0] - - ADD lt_1,rp_val_1,lt_1 ; lt[1] = lt[1]+rp[1] - ADD,DC ht_1,%r0,%ret1 ; ht[1]++ - LDO 16(a_ptr),a_ptr ; a_ptr += 2 - - STD lt_1,8(r_ptr) ; rp[1] = lt[1] - CMPIB,<= 2,num,bn_mul_add_words_unroll2 ; go again if more to do - LDO 16(r_ptr),r_ptr ; r_ptr += 2 - - CMPIB,=,N 0,num,bn_mul_add_words_exit ; are we done, or cleanup last one - - ; - ; Top of loop aligned on 64-byte boundary - ; -bn_mul_add_words_single_top - FLDD 0(a_ptr),t_float_0 ; load up 64-bit value (fr8L) ht(L)/lt(R) - LDD 0(r_ptr),rp_val ; rp[0] - LDO 8(a_ptr),a_ptr ; a_ptr++ - XMPYU fht_0,fw_l,fm1 ; m1 = ht*fw_l - FSTD fm1,-16(%sp) ; -16(sp) = m1 - XMPYU flt_0,fw_h,fm ; m = lt*fw_h - FSTD fm,-8(%sp) ; -8(sp) = m - XMPYU fht_0,fw_h,ht_temp ; ht_temp = ht*fw_h - FSTD ht_temp,-24(%sp) ; -24(sp) = ht - XMPYU flt_0,fw_l,lt_temp ; lt_temp = lt*fw_l - FSTD lt_temp,-32(%sp) ; -32(sp) = lt - - LDD -8(%sp),m_0 - LDD -16(%sp),m1_0 ; m1 = temp1 - ADD,L m_0,m1_0,tmp_0 ; tmp_0 = m + m1; - LDD -24(%sp),ht_0 - LDD -32(%sp),lt_0 - - CMPCLR,*>>= tmp_0,m1_0,%r0 ; if (m < m1) - ADD,L ht_0,top_overflow,ht_0 ; ht += (1<<32) - - EXTRD,U tmp_0,31,32,m_0 ; m>>32 - DEPD,Z tmp_0,31,32,m1_0 ; m1 = m<<32 - - ADD,L ht_0,m_0,ht_0 ; ht+= (m>>32) - ADD lt_0,m1_0,tmp_0 ; tmp_0 = lt+m1; - ADD,DC ht_0,%r0,ht_0 ; ht++ - ADD %ret1,tmp_0,lt_0 ; lt = lt + c; - ADD,DC ht_0,%r0,ht_0 ; ht++ - ADD lt_0,rp_val,lt_0 ; lt = lt+rp[0] - ADD,DC ht_0,%r0,%ret1 ; ht++ - STD lt_0,0(r_ptr) ; rp[0] = lt - -bn_mul_add_words_exit - .EXIT - - EXTRD,U %ret1,31,32,%ret0 ; for 32-bit, return in ret0/ret1 - LDD -80(%sp),%r9 ; restore r9 - LDD -88(%sp),%r8 ; restore r8 - LDD -96(%sp),%r7 ; restore r7 - LDD -104(%sp),%r6 ; restore r6 - LDD -112(%sp),%r5 ; restore r5 - LDD -120(%sp),%r4 ; restore r4 - BVE (%rp) - LDD,MB -128(%sp),%r3 ; restore r3 - .PROCEND ;in=23,24,25,26,29;out=28; - -;---------------------------------------------------------------------------- -; -;BN_ULONG bn_mul_words(BN_ULONG *rp, BN_ULONG *ap, int num, BN_ULONG w) -; -; arg0 = rp -; arg1 = ap -; arg3 = num -; w on stack at -56(sp) - -bn_mul_words - .proc - .callinfo frame=128 - .entry - .EXPORT bn_mul_words,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN - .align 64 - - STD %r3,0(%sp) ; save r3 - STD %r4,8(%sp) ; save r4 - NOP - STD %r5,16(%sp) ; save r5 - - STD %r6,24(%sp) ; save r6 - STD %r7,32(%sp) ; save r7 - COPY %r0,%ret1 ; return 0 by default - DEPDI,Z 1,31,1,top_overflow ; top_overflow = 1 << 32 - - CMPIB,>= 0,num,bn_mul_words_exit - LDO 128(%sp),%sp ; bump stack - - ; - ; See if only 1 word to do, thus just do cleanup - ; - CMPIB,= 1,num,bn_mul_words_single_top - FLDD -184(%sp),fw ; (-56-128) load up w into fw (fw_h/fw_l) - - ; - ; This loop is unrolled 2 times (64-byte aligned as well) - ; - ; PA-RISC 2.0 chips have two fully pipelined multipliers, thus - ; two 32-bit mutiplies can be issued per cycle. - ; -bn_mul_words_unroll2 - - FLDD 0(a_ptr),t_float_0 ; load up 64-bit value (fr8L) ht(L)/lt(R) - FLDD 8(a_ptr),t_float_1 ; load up 64-bit value (fr8L) ht(L)/lt(R) - XMPYU fht_0,fw_l,fm1 ; m1[0] = fht_0*fw_l - XMPYU fht_1,fw_l,fm1_1 ; m1[1] = ht*fw_l - - FSTD fm1,-16(%sp) ; -16(sp) = m1 - FSTD fm1_1,-48(%sp) ; -48(sp) = m1 - XMPYU flt_0,fw_h,fm ; m = lt*fw_h - XMPYU flt_1,fw_h,fm_1 ; m = lt*fw_h - - FSTD fm,-8(%sp) ; -8(sp) = m - FSTD fm_1,-40(%sp) ; -40(sp) = m - XMPYU fht_0,fw_h,ht_temp ; ht_temp = fht_0*fw_h - XMPYU fht_1,fw_h,ht_temp_1 ; ht_temp = ht*fw_h - - FSTD ht_temp,-24(%sp) ; -24(sp) = ht - FSTD ht_temp_1,-56(%sp) ; -56(sp) = ht - XMPYU flt_0,fw_l,lt_temp ; lt_temp = lt*fw_l - XMPYU flt_1,fw_l,lt_temp_1 ; lt_temp = lt*fw_l - - FSTD lt_temp,-32(%sp) ; -32(sp) = lt - FSTD lt_temp_1,-64(%sp) ; -64(sp) = lt - LDD -8(%sp),m_0 - LDD -40(%sp),m_1 - - LDD -16(%sp),m1_0 - LDD -48(%sp),m1_1 - LDD -24(%sp),ht_0 - LDD -56(%sp),ht_1 - - ADD,L m1_0,m_0,tmp_0 ; tmp_0 = m + m1; - ADD,L m1_1,m_1,tmp_1 ; tmp_1 = m + m1; - LDD -32(%sp),lt_0 - LDD -64(%sp),lt_1 - - CMPCLR,*>>= tmp_0,m1_0, %r0 ; if (m < m1) - ADD,L ht_0,top_overflow,ht_0 ; ht += (1<<32) - CMPCLR,*>>= tmp_1,m1_1,%r0 ; if (m < m1) - ADD,L ht_1,top_overflow,ht_1 ; ht += (1<<32) - - EXTRD,U tmp_0,31,32,m_0 ; m>>32 - DEPD,Z tmp_0,31,32,m1_0 ; m1 = m<<32 - EXTRD,U tmp_1,31,32,m_1 ; m>>32 - DEPD,Z tmp_1,31,32,m1_1 ; m1 = m<<32 - - ADD,L ht_0,m_0,ht_0 ; ht+= (m>>32) - ADD,L ht_1,m_1,ht_1 ; ht+= (m>>32) - ADD lt_0,m1_0,lt_0 ; lt = lt+m1; - ADD,DC ht_0,%r0,ht_0 ; ht++ - - ADD lt_1,m1_1,lt_1 ; lt = lt+m1; - ADD,DC ht_1,%r0,ht_1 ; ht++ - ADD %ret1,lt_0,lt_0 ; lt = lt + c (ret1); - ADD,DC ht_0,%r0,ht_0 ; ht++ - - ADD ht_0,lt_1,lt_1 ; lt = lt + c (ht_0) - ADD,DC ht_1,%r0,ht_1 ; ht++ - STD lt_0,0(r_ptr) ; rp[0] = lt - STD lt_1,8(r_ptr) ; rp[1] = lt - - COPY ht_1,%ret1 ; carry = ht - LDO -2(num),num ; num = num - 2; - LDO 16(a_ptr),a_ptr ; ap += 2 - CMPIB,<= 2,num,bn_mul_words_unroll2 - LDO 16(r_ptr),r_ptr ; rp++ - - CMPIB,=,N 0,num,bn_mul_words_exit ; are we done? - - ; - ; Top of loop aligned on 64-byte boundary - ; -bn_mul_words_single_top - FLDD 0(a_ptr),t_float_0 ; load up 64-bit value (fr8L) ht(L)/lt(R) - - XMPYU fht_0,fw_l,fm1 ; m1 = ht*fw_l - FSTD fm1,-16(%sp) ; -16(sp) = m1 - XMPYU flt_0,fw_h,fm ; m = lt*fw_h - FSTD fm,-8(%sp) ; -8(sp) = m - XMPYU fht_0,fw_h,ht_temp ; ht_temp = ht*fw_h - FSTD ht_temp,-24(%sp) ; -24(sp) = ht - XMPYU flt_0,fw_l,lt_temp ; lt_temp = lt*fw_l - FSTD lt_temp,-32(%sp) ; -32(sp) = lt - - LDD -8(%sp),m_0 - LDD -16(%sp),m1_0 - ADD,L m_0,m1_0,tmp_0 ; tmp_0 = m + m1; - LDD -24(%sp),ht_0 - LDD -32(%sp),lt_0 - - CMPCLR,*>>= tmp_0,m1_0,%r0 ; if (m < m1) - ADD,L ht_0,top_overflow,ht_0 ; ht += (1<<32) - - EXTRD,U tmp_0,31,32,m_0 ; m>>32 - DEPD,Z tmp_0,31,32,m1_0 ; m1 = m<<32 - - ADD,L ht_0,m_0,ht_0 ; ht+= (m>>32) - ADD lt_0,m1_0,lt_0 ; lt= lt+m1; - ADD,DC ht_0,%r0,ht_0 ; ht++ - - ADD %ret1,lt_0,lt_0 ; lt = lt + c; - ADD,DC ht_0,%r0,ht_0 ; ht++ - - COPY ht_0,%ret1 ; copy carry - STD lt_0,0(r_ptr) ; rp[0] = lt - -bn_mul_words_exit - .EXIT - EXTRD,U %ret1,31,32,%ret0 ; for 32-bit, return in ret0/ret1 - LDD -96(%sp),%r7 ; restore r7 - LDD -104(%sp),%r6 ; restore r6 - LDD -112(%sp),%r5 ; restore r5 - LDD -120(%sp),%r4 ; restore r4 - BVE (%rp) - LDD,MB -128(%sp),%r3 ; restore r3 - .PROCEND - -;---------------------------------------------------------------------------- -; -;void bn_sqr_words(BN_ULONG *rp, BN_ULONG *ap, int num) -; -; arg0 = rp -; arg1 = ap -; arg2 = num -; - -bn_sqr_words - .proc - .callinfo FRAME=128,ENTRY_GR=%r3,ARGS_SAVED,ORDERING_AWARE - .EXPORT bn_sqr_words,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN - .entry - .align 64 - - STD %r3,0(%sp) ; save r3 - STD %r4,8(%sp) ; save r4 - NOP - STD %r5,16(%sp) ; save r5 - - CMPIB,>= 0,num,bn_sqr_words_exit - LDO 128(%sp),%sp ; bump stack - - ; - ; If only 1, the goto straight to cleanup - ; - CMPIB,= 1,num,bn_sqr_words_single_top - DEPDI,Z -1,32,33,high_mask ; Create Mask 0xffffffff80000000L - - ; - ; This loop is unrolled 2 times (64-byte aligned as well) - ; - -bn_sqr_words_unroll2 - FLDD 0(a_ptr),t_float_0 ; a[0] - FLDD 8(a_ptr),t_float_1 ; a[1] - XMPYU fht_0,flt_0,fm ; m[0] - XMPYU fht_1,flt_1,fm_1 ; m[1] - - FSTD fm,-24(%sp) ; store m[0] - FSTD fm_1,-56(%sp) ; store m[1] - XMPYU flt_0,flt_0,lt_temp ; lt[0] - XMPYU flt_1,flt_1,lt_temp_1 ; lt[1] - - FSTD lt_temp,-16(%sp) ; store lt[0] - FSTD lt_temp_1,-48(%sp) ; store lt[1] - XMPYU fht_0,fht_0,ht_temp ; ht[0] - XMPYU fht_1,fht_1,ht_temp_1 ; ht[1] - - FSTD ht_temp,-8(%sp) ; store ht[0] - FSTD ht_temp_1,-40(%sp) ; store ht[1] - LDD -24(%sp),m_0 - LDD -56(%sp),m_1 - - AND m_0,high_mask,tmp_0 ; m[0] & Mask - AND m_1,high_mask,tmp_1 ; m[1] & Mask - DEPD,Z m_0,30,31,m_0 ; m[0] << 32+1 - DEPD,Z m_1,30,31,m_1 ; m[1] << 32+1 - - LDD -16(%sp),lt_0 - LDD -48(%sp),lt_1 - EXTRD,U tmp_0,32,33,tmp_0 ; tmp_0 = m[0]&Mask >> 32-1 - EXTRD,U tmp_1,32,33,tmp_1 ; tmp_1 = m[1]&Mask >> 32-1 - - LDD -8(%sp),ht_0 - LDD -40(%sp),ht_1 - ADD,L ht_0,tmp_0,ht_0 ; ht[0] += tmp_0 - ADD,L ht_1,tmp_1,ht_1 ; ht[1] += tmp_1 - - ADD lt_0,m_0,lt_0 ; lt = lt+m - ADD,DC ht_0,%r0,ht_0 ; ht[0]++ - STD lt_0,0(r_ptr) ; rp[0] = lt[0] - STD ht_0,8(r_ptr) ; rp[1] = ht[1] - - ADD lt_1,m_1,lt_1 ; lt = lt+m - ADD,DC ht_1,%r0,ht_1 ; ht[1]++ - STD lt_1,16(r_ptr) ; rp[2] = lt[1] - STD ht_1,24(r_ptr) ; rp[3] = ht[1] - - LDO -2(num),num ; num = num - 2; - LDO 16(a_ptr),a_ptr ; ap += 2 - CMPIB,<= 2,num,bn_sqr_words_unroll2 - LDO 32(r_ptr),r_ptr ; rp += 4 - - CMPIB,=,N 0,num,bn_sqr_words_exit ; are we done? - - ; - ; Top of loop aligned on 64-byte boundary - ; -bn_sqr_words_single_top - FLDD 0(a_ptr),t_float_0 ; load up 64-bit value (fr8L) ht(L)/lt(R) - - XMPYU fht_0,flt_0,fm ; m - FSTD fm,-24(%sp) ; store m - - XMPYU flt_0,flt_0,lt_temp ; lt - FSTD lt_temp,-16(%sp) ; store lt - - XMPYU fht_0,fht_0,ht_temp ; ht - FSTD ht_temp,-8(%sp) ; store ht - - LDD -24(%sp),m_0 ; load m - AND m_0,high_mask,tmp_0 ; m & Mask - DEPD,Z m_0,30,31,m_0 ; m << 32+1 - LDD -16(%sp),lt_0 ; lt - - LDD -8(%sp),ht_0 ; ht - EXTRD,U tmp_0,32,33,tmp_0 ; tmp_0 = m&Mask >> 32-1 - ADD m_0,lt_0,lt_0 ; lt = lt+m - ADD,L ht_0,tmp_0,ht_0 ; ht += tmp_0 - ADD,DC ht_0,%r0,ht_0 ; ht++ - - STD lt_0,0(r_ptr) ; rp[0] = lt - STD ht_0,8(r_ptr) ; rp[1] = ht - -bn_sqr_words_exit - .EXIT - LDD -112(%sp),%r5 ; restore r5 - LDD -120(%sp),%r4 ; restore r4 - BVE (%rp) - LDD,MB -128(%sp),%r3 - .PROCEND ;in=23,24,25,26,29;out=28; - - -;---------------------------------------------------------------------------- -; -;BN_ULONG bn_add_words(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n) -; -; arg0 = rp -; arg1 = ap -; arg2 = bp -; arg3 = n - -t .reg %r22 -b .reg %r21 -l .reg %r20 - -bn_add_words - .proc - .entry - .callinfo - .EXPORT bn_add_words,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN - .align 64 - - CMPIB,>= 0,n,bn_add_words_exit - COPY %r0,%ret1 ; return 0 by default - - ; - ; If 2 or more numbers do the loop - ; - CMPIB,= 1,n,bn_add_words_single_top - NOP - - ; - ; This loop is unrolled 2 times (64-byte aligned as well) - ; -bn_add_words_unroll2 - LDD 0(a_ptr),t - LDD 0(b_ptr),b - ADD t,%ret1,t ; t = t+c; - ADD,DC %r0,%r0,%ret1 ; set c to carry - ADD t,b,l ; l = t + b[0] - ADD,DC %ret1,%r0,%ret1 ; c+= carry - STD l,0(r_ptr) - - LDD 8(a_ptr),t - LDD 8(b_ptr),b - ADD t,%ret1,t ; t = t+c; - ADD,DC %r0,%r0,%ret1 ; set c to carry - ADD t,b,l ; l = t + b[0] - ADD,DC %ret1,%r0,%ret1 ; c+= carry - STD l,8(r_ptr) - - LDO -2(n),n - LDO 16(a_ptr),a_ptr - LDO 16(b_ptr),b_ptr - - CMPIB,<= 2,n,bn_add_words_unroll2 - LDO 16(r_ptr),r_ptr - - CMPIB,=,N 0,n,bn_add_words_exit ; are we done? - -bn_add_words_single_top - LDD 0(a_ptr),t - LDD 0(b_ptr),b - - ADD t,%ret1,t ; t = t+c; - ADD,DC %r0,%r0,%ret1 ; set c to carry (could use CMPCLR??) - ADD t,b,l ; l = t + b[0] - ADD,DC %ret1,%r0,%ret1 ; c+= carry - STD l,0(r_ptr) - -bn_add_words_exit - .EXIT - BVE (%rp) - EXTRD,U %ret1,31,32,%ret0 ; for 32-bit, return in ret0/ret1 - .PROCEND ;in=23,24,25,26,29;out=28; - -;---------------------------------------------------------------------------- -; -;BN_ULONG bn_sub_words(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n) -; -; arg0 = rp -; arg1 = ap -; arg2 = bp -; arg3 = n - -t1 .reg %r22 -t2 .reg %r21 -sub_tmp1 .reg %r20 -sub_tmp2 .reg %r19 - - -bn_sub_words - .proc - .callinfo - .EXPORT bn_sub_words,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN - .entry - .align 64 - - CMPIB,>= 0,n,bn_sub_words_exit - COPY %r0,%ret1 ; return 0 by default - - ; - ; If 2 or more numbers do the loop - ; - CMPIB,= 1,n,bn_sub_words_single_top - NOP - - ; - ; This loop is unrolled 2 times (64-byte aligned as well) - ; -bn_sub_words_unroll2 - LDD 0(a_ptr),t1 - LDD 0(b_ptr),t2 - SUB t1,t2,sub_tmp1 ; t3 = t1-t2; - SUB sub_tmp1,%ret1,sub_tmp1 ; t3 = t3- c; - - CMPCLR,*>> t1,t2,sub_tmp2 ; clear if t1 > t2 - LDO 1(%r0),sub_tmp2 - - CMPCLR,*= t1,t2,%r0 - COPY sub_tmp2,%ret1 - STD sub_tmp1,0(r_ptr) - - LDD 8(a_ptr),t1 - LDD 8(b_ptr),t2 - SUB t1,t2,sub_tmp1 ; t3 = t1-t2; - SUB sub_tmp1,%ret1,sub_tmp1 ; t3 = t3- c; - CMPCLR,*>> t1,t2,sub_tmp2 ; clear if t1 > t2 - LDO 1(%r0),sub_tmp2 - - CMPCLR,*= t1,t2,%r0 - COPY sub_tmp2,%ret1 - STD sub_tmp1,8(r_ptr) - - LDO -2(n),n - LDO 16(a_ptr),a_ptr - LDO 16(b_ptr),b_ptr - - CMPIB,<= 2,n,bn_sub_words_unroll2 - LDO 16(r_ptr),r_ptr - - CMPIB,=,N 0,n,bn_sub_words_exit ; are we done? - -bn_sub_words_single_top - LDD 0(a_ptr),t1 - LDD 0(b_ptr),t2 - SUB t1,t2,sub_tmp1 ; t3 = t1-t2; - SUB sub_tmp1,%ret1,sub_tmp1 ; t3 = t3- c; - CMPCLR,*>> t1,t2,sub_tmp2 ; clear if t1 > t2 - LDO 1(%r0),sub_tmp2 - - CMPCLR,*= t1,t2,%r0 - COPY sub_tmp2,%ret1 - - STD sub_tmp1,0(r_ptr) - -bn_sub_words_exit - .EXIT - BVE (%rp) - EXTRD,U %ret1,31,32,%ret0 ; for 32-bit, return in ret0/ret1 - .PROCEND ;in=23,24,25,26,29;out=28; - -;------------------------------------------------------------------------------ -; -; unsigned long bn_div_words(unsigned long h, unsigned long l, unsigned long d) -; -; arg0 = h -; arg1 = l -; arg2 = d -; -; This is mainly just output from the HP C compiler. -; -;------------------------------------------------------------------------------ -bn_div_words - .PROC - .EXPORT bn_div_words,ENTRY,PRIV_LEV=3,ARGW0=GR,ARGW1=GR,ARGW2=GR,ARGW3=GR,RTNVAL=GR,LONG_RETURN - .IMPORT BN_num_bits_word,CODE - ;--- not PIC .IMPORT __iob,DATA - ;--- not PIC .IMPORT fprintf,CODE - .IMPORT abort,CODE - .IMPORT $$div2U,MILLICODE - .CALLINFO CALLER,FRAME=144,ENTRY_GR=%r9,SAVE_RP,ARGS_SAVED,ORDERING_AWARE - .ENTRY - STW %r2,-20(%r30) ;offset 0x8ec - STW,MA %r3,192(%r30) ;offset 0x8f0 - STW %r4,-188(%r30) ;offset 0x8f4 - DEPD %r5,31,32,%r6 ;offset 0x8f8 - STD %r6,-184(%r30) ;offset 0x8fc - DEPD %r7,31,32,%r8 ;offset 0x900 - STD %r8,-176(%r30) ;offset 0x904 - STW %r9,-168(%r30) ;offset 0x908 - LDD -248(%r30),%r3 ;offset 0x90c - COPY %r26,%r4 ;offset 0x910 - COPY %r24,%r5 ;offset 0x914 - DEPD %r25,31,32,%r4 ;offset 0x918 - CMPB,*<> %r3,%r0,$0006000C ;offset 0x91c - DEPD %r23,31,32,%r5 ;offset 0x920 - MOVIB,TR -1,%r29,$00060002 ;offset 0x924 - EXTRD,U %r29,31,32,%r28 ;offset 0x928 -$0006002A - LDO -1(%r29),%r29 ;offset 0x92c - SUB %r23,%r7,%r23 ;offset 0x930 -$00060024 - SUB %r4,%r31,%r25 ;offset 0x934 - AND %r25,%r19,%r26 ;offset 0x938 - CMPB,*<>,N %r0,%r26,$00060046 ;offset 0x93c - DEPD,Z %r25,31,32,%r20 ;offset 0x940 - OR %r20,%r24,%r21 ;offset 0x944 - CMPB,*<<,N %r21,%r23,$0006002A ;offset 0x948 - SUB %r31,%r2,%r31 ;offset 0x94c -$00060046 -$0006002E - DEPD,Z %r23,31,32,%r25 ;offset 0x950 - EXTRD,U %r23,31,32,%r26 ;offset 0x954 - AND %r25,%r19,%r24 ;offset 0x958 - ADD,L %r31,%r26,%r31 ;offset 0x95c - CMPCLR,*>>= %r5,%r24,%r0 ;offset 0x960 - LDO 1(%r31),%r31 ;offset 0x964 -$00060032 - CMPB,*<<=,N %r31,%r4,$00060036 ;offset 0x968 - LDO -1(%r29),%r29 ;offset 0x96c - ADD,L %r4,%r3,%r4 ;offset 0x970 -$00060036 - ADDIB,=,N -1,%r8,$D0 ;offset 0x974 - SUB %r5,%r24,%r28 ;offset 0x978 -$0006003A - SUB %r4,%r31,%r24 ;offset 0x97c - SHRPD %r24,%r28,32,%r4 ;offset 0x980 - DEPD,Z %r29,31,32,%r9 ;offset 0x984 - DEPD,Z %r28,31,32,%r5 ;offset 0x988 -$0006001C - EXTRD,U %r4,31,32,%r31 ;offset 0x98c - CMPB,*<>,N %r31,%r2,$00060020 ;offset 0x990 - MOVB,TR %r6,%r29,$D1 ;offset 0x994 - STD %r29,-152(%r30) ;offset 0x998 -$0006000C - EXTRD,U %r3,31,32,%r25 ;offset 0x99c - COPY %r3,%r26 ;offset 0x9a0 - EXTRD,U %r3,31,32,%r9 ;offset 0x9a4 - EXTRD,U %r4,31,32,%r8 ;offset 0x9a8 - .CALL ARGW0=GR,ARGW1=GR,RTNVAL=GR ;in=25,26;out=28; - B,L BN_num_bits_word,%r2 ;offset 0x9ac - EXTRD,U %r5,31,32,%r7 ;offset 0x9b0 - LDI 64,%r20 ;offset 0x9b4 - DEPD %r7,31,32,%r5 ;offset 0x9b8 - DEPD %r8,31,32,%r4 ;offset 0x9bc - DEPD %r9,31,32,%r3 ;offset 0x9c0 - CMPB,= %r28,%r20,$00060012 ;offset 0x9c4 - COPY %r28,%r24 ;offset 0x9c8 - MTSARCM %r24 ;offset 0x9cc - DEPDI,Z -1,%sar,1,%r19 ;offset 0x9d0 - CMPB,*>>,N %r4,%r19,$D2 ;offset 0x9d4 -$00060012 - SUBI 64,%r24,%r31 ;offset 0x9d8 - CMPCLR,*<< %r4,%r3,%r0 ;offset 0x9dc - SUB %r4,%r3,%r4 ;offset 0x9e0 -$00060016 - CMPB,= %r31,%r0,$0006001A ;offset 0x9e4 - COPY %r0,%r9 ;offset 0x9e8 - MTSARCM %r31 ;offset 0x9ec - DEPD,Z %r3,%sar,64,%r3 ;offset 0x9f0 - SUBI 64,%r31,%r26 ;offset 0x9f4 - MTSAR %r26 ;offset 0x9f8 - SHRPD %r4,%r5,%sar,%r4 ;offset 0x9fc - MTSARCM %r31 ;offset 0xa00 - DEPD,Z %r5,%sar,64,%r5 ;offset 0xa04 -$0006001A - DEPDI,Z -1,31,32,%r19 ;offset 0xa08 - AND %r3,%r19,%r29 ;offset 0xa0c - EXTRD,U %r29,31,32,%r2 ;offset 0xa10 - DEPDI,Z -1,63,32,%r6 ;offset 0xa14 - MOVIB,TR 2,%r8,$0006001C ;offset 0xa18 - EXTRD,U %r3,63,32,%r7 ;offset 0xa1c -$D2 - ;--- not PIC ADDIL LR'__iob-$global$,%r27,%r1 ;offset 0xa20 - ;--- not PIC LDIL LR'C$7,%r21 ;offset 0xa24 - ;--- not PIC LDO RR'__iob-$global$+32(%r1),%r26 ;offset 0xa28 - ;--- not PIC .CALL ARGW0=GR,ARGW1=GR,ARGW2=GR,RTNVAL=GR ;in=24,25,26;out=28; - ;--- not PIC B,L fprintf,%r2 ;offset 0xa2c - ;--- not PIC LDO RR'C$7(%r21),%r25 ;offset 0xa30 - .CALL ; - B,L abort,%r2 ;offset 0xa34 - NOP ;offset 0xa38 - B $D3 ;offset 0xa3c - LDW -212(%r30),%r2 ;offset 0xa40 -$00060020 - COPY %r4,%r26 ;offset 0xa44 - EXTRD,U %r4,31,32,%r25 ;offset 0xa48 - COPY %r2,%r24 ;offset 0xa4c - .CALL ;in=23,24,25,26;out=20,21,22,28,29; (MILLICALL) - B,L $$div2U,%r31 ;offset 0xa50 - EXTRD,U %r2,31,32,%r23 ;offset 0xa54 - DEPD %r28,31,32,%r29 ;offset 0xa58 -$00060022 - STD %r29,-152(%r30) ;offset 0xa5c -$D1 - AND %r5,%r19,%r24 ;offset 0xa60 - EXTRD,U %r24,31,32,%r24 ;offset 0xa64 - STW %r2,-160(%r30) ;offset 0xa68 - STW %r7,-128(%r30) ;offset 0xa6c - FLDD -152(%r30),%fr4 ;offset 0xa70 - FLDD -152(%r30),%fr7 ;offset 0xa74 - FLDW -160(%r30),%fr8L ;offset 0xa78 - FLDW -128(%r30),%fr5L ;offset 0xa7c - XMPYU %fr8L,%fr7L,%fr10 ;offset 0xa80 - FSTD %fr10,-136(%r30) ;offset 0xa84 - XMPYU %fr8L,%fr7R,%fr22 ;offset 0xa88 - FSTD %fr22,-144(%r30) ;offset 0xa8c - XMPYU %fr5L,%fr4L,%fr11 ;offset 0xa90 - XMPYU %fr5L,%fr4R,%fr23 ;offset 0xa94 - FSTD %fr11,-112(%r30) ;offset 0xa98 - FSTD %fr23,-120(%r30) ;offset 0xa9c - LDD -136(%r30),%r28 ;offset 0xaa0 - DEPD,Z %r28,31,32,%r31 ;offset 0xaa4 - LDD -144(%r30),%r20 ;offset 0xaa8 - ADD,L %r20,%r31,%r31 ;offset 0xaac - LDD -112(%r30),%r22 ;offset 0xab0 - DEPD,Z %r22,31,32,%r22 ;offset 0xab4 - LDD -120(%r30),%r21 ;offset 0xab8 - B $00060024 ;offset 0xabc - ADD,L %r21,%r22,%r23 ;offset 0xac0 -$D0 - OR %r9,%r29,%r29 ;offset 0xac4 -$00060040 - EXTRD,U %r29,31,32,%r28 ;offset 0xac8 -$00060002 -$L2 - LDW -212(%r30),%r2 ;offset 0xacc -$D3 - LDW -168(%r30),%r9 ;offset 0xad0 - LDD -176(%r30),%r8 ;offset 0xad4 - EXTRD,U %r8,31,32,%r7 ;offset 0xad8 - LDD -184(%r30),%r6 ;offset 0xadc - EXTRD,U %r6,31,32,%r5 ;offset 0xae0 - LDW -188(%r30),%r4 ;offset 0xae4 - BVE (%r2) ;offset 0xae8 - .EXIT - LDW,MB -192(%r30),%r3 ;offset 0xaec - .PROCEND ;in=23,25;out=28,29;fpin=105,107; - - - - -;---------------------------------------------------------------------------- -; -; Registers to hold 64-bit values to manipulate. The "L" part -; of the register corresponds to the upper 32-bits, while the "R" -; part corresponds to the lower 32-bits -; -; Note, that when using b6 and b7, the code must save these before -; using them because they are callee save registers -; -; -; Floating point registers to use to save values that -; are manipulated. These don't collide with ftemp1-6 and -; are all caller save registers -; -a0 .reg %fr22 -a0L .reg %fr22L -a0R .reg %fr22R - -a1 .reg %fr23 -a1L .reg %fr23L -a1R .reg %fr23R - -a2 .reg %fr24 -a2L .reg %fr24L -a2R .reg %fr24R - -a3 .reg %fr25 -a3L .reg %fr25L -a3R .reg %fr25R - -a4 .reg %fr26 -a4L .reg %fr26L -a4R .reg %fr26R - -a5 .reg %fr27 -a5L .reg %fr27L -a5R .reg %fr27R - -a6 .reg %fr28 -a6L .reg %fr28L -a6R .reg %fr28R - -a7 .reg %fr29 -a7L .reg %fr29L -a7R .reg %fr29R - -b0 .reg %fr30 -b0L .reg %fr30L -b0R .reg %fr30R - -b1 .reg %fr31 -b1L .reg %fr31L -b1R .reg %fr31R - -; -; Temporary floating point variables, these are all caller save -; registers -; -ftemp1 .reg %fr4 -ftemp2 .reg %fr5 -ftemp3 .reg %fr6 -ftemp4 .reg %fr7 - -; -; The B set of registers when used. -; - -b2 .reg %fr8 -b2L .reg %fr8L -b2R .reg %fr8R - -b3 .reg %fr9 -b3L .reg %fr9L -b3R .reg %fr9R - -b4 .reg %fr10 -b4L .reg %fr10L -b4R .reg %fr10R - -b5 .reg %fr11 -b5L .reg %fr11L -b5R .reg %fr11R - -b6 .reg %fr12 -b6L .reg %fr12L -b6R .reg %fr12R - -b7 .reg %fr13 -b7L .reg %fr13L -b7R .reg %fr13R - -c1 .reg %r21 ; only reg -temp1 .reg %r20 ; only reg -temp2 .reg %r19 ; only reg -temp3 .reg %r31 ; only reg - -m1 .reg %r28 -c2 .reg %r23 -high_one .reg %r1 -ht .reg %r6 -lt .reg %r5 -m .reg %r4 -c3 .reg %r3 - -SQR_ADD_C .macro A0L,A0R,C1,C2,C3 - XMPYU A0L,A0R,ftemp1 ; m - FSTD ftemp1,-24(%sp) ; store m - - XMPYU A0R,A0R,ftemp2 ; lt - FSTD ftemp2,-16(%sp) ; store lt - - XMPYU A0L,A0L,ftemp3 ; ht - FSTD ftemp3,-8(%sp) ; store ht - - LDD -24(%sp),m ; load m - AND m,high_mask,temp2 ; m & Mask - DEPD,Z m,30,31,temp3 ; m << 32+1 - LDD -16(%sp),lt ; lt - - LDD -8(%sp),ht ; ht - EXTRD,U temp2,32,33,temp1 ; temp1 = m&Mask >> 32-1 - ADD temp3,lt,lt ; lt = lt+m - ADD,L ht,temp1,ht ; ht += temp1 - ADD,DC ht,%r0,ht ; ht++ - - ADD C1,lt,C1 ; c1=c1+lt - ADD,DC ht,%r0,ht ; ht++ - - ADD C2,ht,C2 ; c2=c2+ht - ADD,DC C3,%r0,C3 ; c3++ -.endm - -SQR_ADD_C2 .macro A0L,A0R,A1L,A1R,C1,C2,C3 - XMPYU A0L,A1R,ftemp1 ; m1 = bl*ht - FSTD ftemp1,-16(%sp) ; - XMPYU A0R,A1L,ftemp2 ; m = bh*lt - FSTD ftemp2,-8(%sp) ; - XMPYU A0R,A1R,ftemp3 ; lt = bl*lt - FSTD ftemp3,-32(%sp) - XMPYU A0L,A1L,ftemp4 ; ht = bh*ht - FSTD ftemp4,-24(%sp) ; - - LDD -8(%sp),m ; r21 = m - LDD -16(%sp),m1 ; r19 = m1 - ADD,L m,m1,m ; m+m1 - - DEPD,Z m,31,32,temp3 ; (m+m1<<32) - LDD -24(%sp),ht ; r24 = ht - - CMPCLR,*>>= m,m1,%r0 ; if (m < m1) - ADD,L ht,high_one,ht ; ht+=high_one - - EXTRD,U m,31,32,temp1 ; m >> 32 - LDD -32(%sp),lt ; lt - ADD,L ht,temp1,ht ; ht+= m>>32 - ADD lt,temp3,lt ; lt = lt+m1 - ADD,DC ht,%r0,ht ; ht++ - - ADD ht,ht,ht ; ht=ht+ht; - ADD,DC C3,%r0,C3 ; add in carry (c3++) - - ADD lt,lt,lt ; lt=lt+lt; - ADD,DC ht,%r0,ht ; add in carry (ht++) - - ADD C1,lt,C1 ; c1=c1+lt - ADD,DC,*NUV ht,%r0,ht ; add in carry (ht++) - LDO 1(C3),C3 ; bump c3 if overflow,nullify otherwise - - ADD C2,ht,C2 ; c2 = c2 + ht - ADD,DC C3,%r0,C3 ; add in carry (c3++) -.endm - -; -;void bn_sqr_comba8(BN_ULONG *r, BN_ULONG *a) -; arg0 = r_ptr -; arg1 = a_ptr -; - -bn_sqr_comba8 - .PROC - .CALLINFO FRAME=128,ENTRY_GR=%r3,ARGS_SAVED,ORDERING_AWARE - .EXPORT bn_sqr_comba8,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN - .ENTRY - .align 64 - - STD %r3,0(%sp) ; save r3 - STD %r4,8(%sp) ; save r4 - STD %r5,16(%sp) ; save r5 - STD %r6,24(%sp) ; save r6 - - ; - ; Zero out carries - ; - COPY %r0,c1 - COPY %r0,c2 - COPY %r0,c3 - - LDO 128(%sp),%sp ; bump stack - DEPDI,Z -1,32,33,high_mask ; Create Mask 0xffffffff80000000L - DEPDI,Z 1,31,1,high_one ; Create Value 1 << 32 - - ; - ; Load up all of the values we are going to use - ; - FLDD 0(a_ptr),a0 - FLDD 8(a_ptr),a1 - FLDD 16(a_ptr),a2 - FLDD 24(a_ptr),a3 - FLDD 32(a_ptr),a4 - FLDD 40(a_ptr),a5 - FLDD 48(a_ptr),a6 - FLDD 56(a_ptr),a7 - - SQR_ADD_C a0L,a0R,c1,c2,c3 - STD c1,0(r_ptr) ; r[0] = c1; - COPY %r0,c1 - - SQR_ADD_C2 a1L,a1R,a0L,a0R,c2,c3,c1 - STD c2,8(r_ptr) ; r[1] = c2; - COPY %r0,c2 - - SQR_ADD_C a1L,a1R,c3,c1,c2 - SQR_ADD_C2 a2L,a2R,a0L,a0R,c3,c1,c2 - STD c3,16(r_ptr) ; r[2] = c3; - COPY %r0,c3 - - SQR_ADD_C2 a3L,a3R,a0L,a0R,c1,c2,c3 - SQR_ADD_C2 a2L,a2R,a1L,a1R,c1,c2,c3 - STD c1,24(r_ptr) ; r[3] = c1; - COPY %r0,c1 - - SQR_ADD_C a2L,a2R,c2,c3,c1 - SQR_ADD_C2 a3L,a3R,a1L,a1R,c2,c3,c1 - SQR_ADD_C2 a4L,a4R,a0L,a0R,c2,c3,c1 - STD c2,32(r_ptr) ; r[4] = c2; - COPY %r0,c2 - - SQR_ADD_C2 a5L,a5R,a0L,a0R,c3,c1,c2 - SQR_ADD_C2 a4L,a4R,a1L,a1R,c3,c1,c2 - SQR_ADD_C2 a3L,a3R,a2L,a2R,c3,c1,c2 - STD c3,40(r_ptr) ; r[5] = c3; - COPY %r0,c3 - - SQR_ADD_C a3L,a3R,c1,c2,c3 - SQR_ADD_C2 a4L,a4R,a2L,a2R,c1,c2,c3 - SQR_ADD_C2 a5L,a5R,a1L,a1R,c1,c2,c3 - SQR_ADD_C2 a6L,a6R,a0L,a0R,c1,c2,c3 - STD c1,48(r_ptr) ; r[6] = c1; - COPY %r0,c1 - - SQR_ADD_C2 a7L,a7R,a0L,a0R,c2,c3,c1 - SQR_ADD_C2 a6L,a6R,a1L,a1R,c2,c3,c1 - SQR_ADD_C2 a5L,a5R,a2L,a2R,c2,c3,c1 - SQR_ADD_C2 a4L,a4R,a3L,a3R,c2,c3,c1 - STD c2,56(r_ptr) ; r[7] = c2; - COPY %r0,c2 - - SQR_ADD_C a4L,a4R,c3,c1,c2 - SQR_ADD_C2 a5L,a5R,a3L,a3R,c3,c1,c2 - SQR_ADD_C2 a6L,a6R,a2L,a2R,c3,c1,c2 - SQR_ADD_C2 a7L,a7R,a1L,a1R,c3,c1,c2 - STD c3,64(r_ptr) ; r[8] = c3; - COPY %r0,c3 - - SQR_ADD_C2 a7L,a7R,a2L,a2R,c1,c2,c3 - SQR_ADD_C2 a6L,a6R,a3L,a3R,c1,c2,c3 - SQR_ADD_C2 a5L,a5R,a4L,a4R,c1,c2,c3 - STD c1,72(r_ptr) ; r[9] = c1; - COPY %r0,c1 - - SQR_ADD_C a5L,a5R,c2,c3,c1 - SQR_ADD_C2 a6L,a6R,a4L,a4R,c2,c3,c1 - SQR_ADD_C2 a7L,a7R,a3L,a3R,c2,c3,c1 - STD c2,80(r_ptr) ; r[10] = c2; - COPY %r0,c2 - - SQR_ADD_C2 a7L,a7R,a4L,a4R,c3,c1,c2 - SQR_ADD_C2 a6L,a6R,a5L,a5R,c3,c1,c2 - STD c3,88(r_ptr) ; r[11] = c3; - COPY %r0,c3 - - SQR_ADD_C a6L,a6R,c1,c2,c3 - SQR_ADD_C2 a7L,a7R,a5L,a5R,c1,c2,c3 - STD c1,96(r_ptr) ; r[12] = c1; - COPY %r0,c1 - - SQR_ADD_C2 a7L,a7R,a6L,a6R,c2,c3,c1 - STD c2,104(r_ptr) ; r[13] = c2; - COPY %r0,c2 - - SQR_ADD_C a7L,a7R,c3,c1,c2 - STD c3, 112(r_ptr) ; r[14] = c3 - STD c1, 120(r_ptr) ; r[15] = c1 - - .EXIT - LDD -104(%sp),%r6 ; restore r6 - LDD -112(%sp),%r5 ; restore r5 - LDD -120(%sp),%r4 ; restore r4 - BVE (%rp) - LDD,MB -128(%sp),%r3 - - .PROCEND - -;----------------------------------------------------------------------------- -; -;void bn_sqr_comba4(BN_ULONG *r, BN_ULONG *a) -; arg0 = r_ptr -; arg1 = a_ptr -; - -bn_sqr_comba4 - .proc - .callinfo FRAME=128,ENTRY_GR=%r3,ARGS_SAVED,ORDERING_AWARE - .EXPORT bn_sqr_comba4,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN - .entry - .align 64 - STD %r3,0(%sp) ; save r3 - STD %r4,8(%sp) ; save r4 - STD %r5,16(%sp) ; save r5 - STD %r6,24(%sp) ; save r6 - - ; - ; Zero out carries - ; - COPY %r0,c1 - COPY %r0,c2 - COPY %r0,c3 - - LDO 128(%sp),%sp ; bump stack - DEPDI,Z -1,32,33,high_mask ; Create Mask 0xffffffff80000000L - DEPDI,Z 1,31,1,high_one ; Create Value 1 << 32 - - ; - ; Load up all of the values we are going to use - ; - FLDD 0(a_ptr),a0 - FLDD 8(a_ptr),a1 - FLDD 16(a_ptr),a2 - FLDD 24(a_ptr),a3 - FLDD 32(a_ptr),a4 - FLDD 40(a_ptr),a5 - FLDD 48(a_ptr),a6 - FLDD 56(a_ptr),a7 - - SQR_ADD_C a0L,a0R,c1,c2,c3 - - STD c1,0(r_ptr) ; r[0] = c1; - COPY %r0,c1 - - SQR_ADD_C2 a1L,a1R,a0L,a0R,c2,c3,c1 - - STD c2,8(r_ptr) ; r[1] = c2; - COPY %r0,c2 - - SQR_ADD_C a1L,a1R,c3,c1,c2 - SQR_ADD_C2 a2L,a2R,a0L,a0R,c3,c1,c2 - - STD c3,16(r_ptr) ; r[2] = c3; - COPY %r0,c3 - - SQR_ADD_C2 a3L,a3R,a0L,a0R,c1,c2,c3 - SQR_ADD_C2 a2L,a2R,a1L,a1R,c1,c2,c3 - - STD c1,24(r_ptr) ; r[3] = c1; - COPY %r0,c1 - - SQR_ADD_C a2L,a2R,c2,c3,c1 - SQR_ADD_C2 a3L,a3R,a1L,a1R,c2,c3,c1 - - STD c2,32(r_ptr) ; r[4] = c2; - COPY %r0,c2 - - SQR_ADD_C2 a3L,a3R,a2L,a2R,c3,c1,c2 - STD c3,40(r_ptr) ; r[5] = c3; - COPY %r0,c3 - - SQR_ADD_C a3L,a3R,c1,c2,c3 - STD c1,48(r_ptr) ; r[6] = c1; - STD c2,56(r_ptr) ; r[7] = c2; - - .EXIT - LDD -104(%sp),%r6 ; restore r6 - LDD -112(%sp),%r5 ; restore r5 - LDD -120(%sp),%r4 ; restore r4 - BVE (%rp) - LDD,MB -128(%sp),%r3 - - .PROCEND - - -;--------------------------------------------------------------------------- - -MUL_ADD_C .macro A0L,A0R,B0L,B0R,C1,C2,C3 - XMPYU A0L,B0R,ftemp1 ; m1 = bl*ht - FSTD ftemp1,-16(%sp) ; - XMPYU A0R,B0L,ftemp2 ; m = bh*lt - FSTD ftemp2,-8(%sp) ; - XMPYU A0R,B0R,ftemp3 ; lt = bl*lt - FSTD ftemp3,-32(%sp) - XMPYU A0L,B0L,ftemp4 ; ht = bh*ht - FSTD ftemp4,-24(%sp) ; - - LDD -8(%sp),m ; r21 = m - LDD -16(%sp),m1 ; r19 = m1 - ADD,L m,m1,m ; m+m1 - - DEPD,Z m,31,32,temp3 ; (m+m1<<32) - LDD -24(%sp),ht ; r24 = ht - - CMPCLR,*>>= m,m1,%r0 ; if (m < m1) - ADD,L ht,high_one,ht ; ht+=high_one - - EXTRD,U m,31,32,temp1 ; m >> 32 - LDD -32(%sp),lt ; lt - ADD,L ht,temp1,ht ; ht+= m>>32 - ADD lt,temp3,lt ; lt = lt+m1 - ADD,DC ht,%r0,ht ; ht++ - - ADD C1,lt,C1 ; c1=c1+lt - ADD,DC ht,%r0,ht ; bump c3 if overflow,nullify otherwise - - ADD C2,ht,C2 ; c2 = c2 + ht - ADD,DC C3,%r0,C3 ; add in carry (c3++) -.endm - - -; -;void bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b) -; arg0 = r_ptr -; arg1 = a_ptr -; arg2 = b_ptr -; - -bn_mul_comba8 - .proc - .callinfo FRAME=128,ENTRY_GR=%r3,ARGS_SAVED,ORDERING_AWARE - .EXPORT bn_mul_comba8,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN - .entry - .align 64 - - STD %r3,0(%sp) ; save r3 - STD %r4,8(%sp) ; save r4 - STD %r5,16(%sp) ; save r5 - STD %r6,24(%sp) ; save r6 - FSTD %fr12,32(%sp) ; save r6 - FSTD %fr13,40(%sp) ; save r7 - - ; - ; Zero out carries - ; - COPY %r0,c1 - COPY %r0,c2 - COPY %r0,c3 - - LDO 128(%sp),%sp ; bump stack - DEPDI,Z 1,31,1,high_one ; Create Value 1 << 32 - - ; - ; Load up all of the values we are going to use - ; - FLDD 0(a_ptr),a0 - FLDD 8(a_ptr),a1 - FLDD 16(a_ptr),a2 - FLDD 24(a_ptr),a3 - FLDD 32(a_ptr),a4 - FLDD 40(a_ptr),a5 - FLDD 48(a_ptr),a6 - FLDD 56(a_ptr),a7 - - FLDD 0(b_ptr),b0 - FLDD 8(b_ptr),b1 - FLDD 16(b_ptr),b2 - FLDD 24(b_ptr),b3 - FLDD 32(b_ptr),b4 - FLDD 40(b_ptr),b5 - FLDD 48(b_ptr),b6 - FLDD 56(b_ptr),b7 - - MUL_ADD_C a0L,a0R,b0L,b0R,c1,c2,c3 - STD c1,0(r_ptr) - COPY %r0,c1 - - MUL_ADD_C a0L,a0R,b1L,b1R,c2,c3,c1 - MUL_ADD_C a1L,a1R,b0L,b0R,c2,c3,c1 - STD c2,8(r_ptr) - COPY %r0,c2 - - MUL_ADD_C a2L,a2R,b0L,b0R,c3,c1,c2 - MUL_ADD_C a1L,a1R,b1L,b1R,c3,c1,c2 - MUL_ADD_C a0L,a0R,b2L,b2R,c3,c1,c2 - STD c3,16(r_ptr) - COPY %r0,c3 - - MUL_ADD_C a0L,a0R,b3L,b3R,c1,c2,c3 - MUL_ADD_C a1L,a1R,b2L,b2R,c1,c2,c3 - MUL_ADD_C a2L,a2R,b1L,b1R,c1,c2,c3 - MUL_ADD_C a3L,a3R,b0L,b0R,c1,c2,c3 - STD c1,24(r_ptr) - COPY %r0,c1 - - MUL_ADD_C a4L,a4R,b0L,b0R,c2,c3,c1 - MUL_ADD_C a3L,a3R,b1L,b1R,c2,c3,c1 - MUL_ADD_C a2L,a2R,b2L,b2R,c2,c3,c1 - MUL_ADD_C a1L,a1R,b3L,b3R,c2,c3,c1 - MUL_ADD_C a0L,a0R,b4L,b4R,c2,c3,c1 - STD c2,32(r_ptr) - COPY %r0,c2 - - MUL_ADD_C a0L,a0R,b5L,b5R,c3,c1,c2 - MUL_ADD_C a1L,a1R,b4L,b4R,c3,c1,c2 - MUL_ADD_C a2L,a2R,b3L,b3R,c3,c1,c2 - MUL_ADD_C a3L,a3R,b2L,b2R,c3,c1,c2 - MUL_ADD_C a4L,a4R,b1L,b1R,c3,c1,c2 - MUL_ADD_C a5L,a5R,b0L,b0R,c3,c1,c2 - STD c3,40(r_ptr) - COPY %r0,c3 - - MUL_ADD_C a6L,a6R,b0L,b0R,c1,c2,c3 - MUL_ADD_C a5L,a5R,b1L,b1R,c1,c2,c3 - MUL_ADD_C a4L,a4R,b2L,b2R,c1,c2,c3 - MUL_ADD_C a3L,a3R,b3L,b3R,c1,c2,c3 - MUL_ADD_C a2L,a2R,b4L,b4R,c1,c2,c3 - MUL_ADD_C a1L,a1R,b5L,b5R,c1,c2,c3 - MUL_ADD_C a0L,a0R,b6L,b6R,c1,c2,c3 - STD c1,48(r_ptr) - COPY %r0,c1 - - MUL_ADD_C a0L,a0R,b7L,b7R,c2,c3,c1 - MUL_ADD_C a1L,a1R,b6L,b6R,c2,c3,c1 - MUL_ADD_C a2L,a2R,b5L,b5R,c2,c3,c1 - MUL_ADD_C a3L,a3R,b4L,b4R,c2,c3,c1 - MUL_ADD_C a4L,a4R,b3L,b3R,c2,c3,c1 - MUL_ADD_C a5L,a5R,b2L,b2R,c2,c3,c1 - MUL_ADD_C a6L,a6R,b1L,b1R,c2,c3,c1 - MUL_ADD_C a7L,a7R,b0L,b0R,c2,c3,c1 - STD c2,56(r_ptr) - COPY %r0,c2 - - MUL_ADD_C a7L,a7R,b1L,b1R,c3,c1,c2 - MUL_ADD_C a6L,a6R,b2L,b2R,c3,c1,c2 - MUL_ADD_C a5L,a5R,b3L,b3R,c3,c1,c2 - MUL_ADD_C a4L,a4R,b4L,b4R,c3,c1,c2 - MUL_ADD_C a3L,a3R,b5L,b5R,c3,c1,c2 - MUL_ADD_C a2L,a2R,b6L,b6R,c3,c1,c2 - MUL_ADD_C a1L,a1R,b7L,b7R,c3,c1,c2 - STD c3,64(r_ptr) - COPY %r0,c3 - - MUL_ADD_C a2L,a2R,b7L,b7R,c1,c2,c3 - MUL_ADD_C a3L,a3R,b6L,b6R,c1,c2,c3 - MUL_ADD_C a4L,a4R,b5L,b5R,c1,c2,c3 - MUL_ADD_C a5L,a5R,b4L,b4R,c1,c2,c3 - MUL_ADD_C a6L,a6R,b3L,b3R,c1,c2,c3 - MUL_ADD_C a7L,a7R,b2L,b2R,c1,c2,c3 - STD c1,72(r_ptr) - COPY %r0,c1 - - MUL_ADD_C a7L,a7R,b3L,b3R,c2,c3,c1 - MUL_ADD_C a6L,a6R,b4L,b4R,c2,c3,c1 - MUL_ADD_C a5L,a5R,b5L,b5R,c2,c3,c1 - MUL_ADD_C a4L,a4R,b6L,b6R,c2,c3,c1 - MUL_ADD_C a3L,a3R,b7L,b7R,c2,c3,c1 - STD c2,80(r_ptr) - COPY %r0,c2 - - MUL_ADD_C a4L,a4R,b7L,b7R,c3,c1,c2 - MUL_ADD_C a5L,a5R,b6L,b6R,c3,c1,c2 - MUL_ADD_C a6L,a6R,b5L,b5R,c3,c1,c2 - MUL_ADD_C a7L,a7R,b4L,b4R,c3,c1,c2 - STD c3,88(r_ptr) - COPY %r0,c3 - - MUL_ADD_C a7L,a7R,b5L,b5R,c1,c2,c3 - MUL_ADD_C a6L,a6R,b6L,b6R,c1,c2,c3 - MUL_ADD_C a5L,a5R,b7L,b7R,c1,c2,c3 - STD c1,96(r_ptr) - COPY %r0,c1 - - MUL_ADD_C a6L,a6R,b7L,b7R,c2,c3,c1 - MUL_ADD_C a7L,a7R,b6L,b6R,c2,c3,c1 - STD c2,104(r_ptr) - COPY %r0,c2 - - MUL_ADD_C a7L,a7R,b7L,b7R,c3,c1,c2 - STD c3,112(r_ptr) - STD c1,120(r_ptr) - - .EXIT - FLDD -88(%sp),%fr13 - FLDD -96(%sp),%fr12 - LDD -104(%sp),%r6 ; restore r6 - LDD -112(%sp),%r5 ; restore r5 - LDD -120(%sp),%r4 ; restore r4 - BVE (%rp) - LDD,MB -128(%sp),%r3 - - .PROCEND - -;----------------------------------------------------------------------------- -; -;void bn_mul_comba4(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b) -; arg0 = r_ptr -; arg1 = a_ptr -; arg2 = b_ptr -; - -bn_mul_comba4 - .proc - .callinfo FRAME=128,ENTRY_GR=%r3,ARGS_SAVED,ORDERING_AWARE - .EXPORT bn_mul_comba4,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN - .entry - .align 64 - - STD %r3,0(%sp) ; save r3 - STD %r4,8(%sp) ; save r4 - STD %r5,16(%sp) ; save r5 - STD %r6,24(%sp) ; save r6 - FSTD %fr12,32(%sp) ; save r6 - FSTD %fr13,40(%sp) ; save r7 - - ; - ; Zero out carries - ; - COPY %r0,c1 - COPY %r0,c2 - COPY %r0,c3 - - LDO 128(%sp),%sp ; bump stack - DEPDI,Z 1,31,1,high_one ; Create Value 1 << 32 - - ; - ; Load up all of the values we are going to use - ; - FLDD 0(a_ptr),a0 - FLDD 8(a_ptr),a1 - FLDD 16(a_ptr),a2 - FLDD 24(a_ptr),a3 - - FLDD 0(b_ptr),b0 - FLDD 8(b_ptr),b1 - FLDD 16(b_ptr),b2 - FLDD 24(b_ptr),b3 - - MUL_ADD_C a0L,a0R,b0L,b0R,c1,c2,c3 - STD c1,0(r_ptr) - COPY %r0,c1 - - MUL_ADD_C a0L,a0R,b1L,b1R,c2,c3,c1 - MUL_ADD_C a1L,a1R,b0L,b0R,c2,c3,c1 - STD c2,8(r_ptr) - COPY %r0,c2 - - MUL_ADD_C a2L,a2R,b0L,b0R,c3,c1,c2 - MUL_ADD_C a1L,a1R,b1L,b1R,c3,c1,c2 - MUL_ADD_C a0L,a0R,b2L,b2R,c3,c1,c2 - STD c3,16(r_ptr) - COPY %r0,c3 - - MUL_ADD_C a0L,a0R,b3L,b3R,c1,c2,c3 - MUL_ADD_C a1L,a1R,b2L,b2R,c1,c2,c3 - MUL_ADD_C a2L,a2R,b1L,b1R,c1,c2,c3 - MUL_ADD_C a3L,a3R,b0L,b0R,c1,c2,c3 - STD c1,24(r_ptr) - COPY %r0,c1 - - MUL_ADD_C a3L,a3R,b1L,b1R,c2,c3,c1 - MUL_ADD_C a2L,a2R,b2L,b2R,c2,c3,c1 - MUL_ADD_C a1L,a1R,b3L,b3R,c2,c3,c1 - STD c2,32(r_ptr) - COPY %r0,c2 - - MUL_ADD_C a2L,a2R,b3L,b3R,c3,c1,c2 - MUL_ADD_C a3L,a3R,b2L,b2R,c3,c1,c2 - STD c3,40(r_ptr) - COPY %r0,c3 - - MUL_ADD_C a3L,a3R,b3L,b3R,c1,c2,c3 - STD c1,48(r_ptr) - STD c2,56(r_ptr) - - .EXIT - FLDD -88(%sp),%fr13 - FLDD -96(%sp),%fr12 - LDD -104(%sp),%r6 ; restore r6 - LDD -112(%sp),%r5 ; restore r5 - LDD -120(%sp),%r4 ; restore r4 - BVE (%rp) - LDD,MB -128(%sp),%r3 - - .PROCEND - - -;--- not PIC .SPACE $TEXT$ -;--- not PIC .SUBSPA $CODE$ -;--- not PIC .SPACE $PRIVATE$,SORT=16 -;--- not PIC .IMPORT $global$,DATA -;--- not PIC .SPACE $TEXT$ -;--- not PIC .SUBSPA $CODE$ -;--- not PIC .SUBSPA $LIT$,ACCESS=0x2c -;--- not PIC C$7 -;--- not PIC .ALIGN 8 -;--- not PIC .STRINGZ "Division would overflow (%d)\n" - .END diff --git a/crypto/bn/asm/pa-risc2W.s b/crypto/bn/asm/pa-risc2W.s deleted file mode 100644 index a99545754d18..000000000000 --- a/crypto/bn/asm/pa-risc2W.s +++ /dev/null @@ -1,1605 +0,0 @@ -; -; PA-RISC 64-bit implementation of bn_asm code -; -; This code is approximately 2x faster than the C version -; for RSA/DSA. -; -; See http://devresource.hp.com/ for more details on the PA-RISC -; architecture. Also see the book "PA-RISC 2.0 Architecture" -; by Gerry Kane for information on the instruction set architecture. -; -; Code written by Chris Ruemmler (with some help from the HP C -; compiler). -; -; The code compiles with HP's assembler -; - - .level 2.0W - .space $TEXT$ - .subspa $CODE$,QUAD=0,ALIGN=8,ACCESS=0x2c,CODE_ONLY - -; -; Global Register definitions used for the routines. -; -; Some information about HP's runtime architecture for 64-bits. -; -; "Caller save" means the calling function must save the register -; if it wants the register to be preserved. -; "Callee save" means if a function uses the register, it must save -; the value before using it. -; -; For the floating point registers -; -; "caller save" registers: fr4-fr11, fr22-fr31 -; "callee save" registers: fr12-fr21 -; "special" registers: fr0-fr3 (status and exception registers) -; -; For the integer registers -; value zero : r0 -; "caller save" registers: r1,r19-r26 -; "callee save" registers: r3-r18 -; return register : r2 (rp) -; return values ; r28 (ret0,ret1) -; Stack pointer ; r30 (sp) -; global data pointer ; r27 (dp) -; argument pointer ; r29 (ap) -; millicode return ptr ; r31 (also a caller save register) - - -; -; Arguments to the routines -; -r_ptr .reg %r26 -a_ptr .reg %r25 -b_ptr .reg %r24 -num .reg %r24 -w .reg %r23 -n .reg %r23 - - -; -; Globals used in some routines -; - -top_overflow .reg %r29 -high_mask .reg %r22 ; value 0xffffffff80000000L - - -;------------------------------------------------------------------------------ -; -; bn_mul_add_words -; -;BN_ULONG bn_mul_add_words(BN_ULONG *r_ptr, BN_ULONG *a_ptr, -; int num, BN_ULONG w) -; -; arg0 = r_ptr -; arg1 = a_ptr -; arg2 = num -; arg3 = w -; -; Local register definitions -; - -fm1 .reg %fr22 -fm .reg %fr23 -ht_temp .reg %fr24 -ht_temp_1 .reg %fr25 -lt_temp .reg %fr26 -lt_temp_1 .reg %fr27 -fm1_1 .reg %fr28 -fm_1 .reg %fr29 - -fw_h .reg %fr7L -fw_l .reg %fr7R -fw .reg %fr7 - -fht_0 .reg %fr8L -flt_0 .reg %fr8R -t_float_0 .reg %fr8 - -fht_1 .reg %fr9L -flt_1 .reg %fr9R -t_float_1 .reg %fr9 - -tmp_0 .reg %r31 -tmp_1 .reg %r21 -m_0 .reg %r20 -m_1 .reg %r19 -ht_0 .reg %r1 -ht_1 .reg %r3 -lt_0 .reg %r4 -lt_1 .reg %r5 -m1_0 .reg %r6 -m1_1 .reg %r7 -rp_val .reg %r8 -rp_val_1 .reg %r9 - -bn_mul_add_words - .export bn_mul_add_words,entry,NO_RELOCATION,LONG_RETURN - .proc - .callinfo frame=128 - .entry - .align 64 - - STD %r3,0(%sp) ; save r3 - STD %r4,8(%sp) ; save r4 - NOP ; Needed to make the loop 16-byte aligned - NOP ; Needed to make the loop 16-byte aligned - - STD %r5,16(%sp) ; save r5 - STD %r6,24(%sp) ; save r6 - STD %r7,32(%sp) ; save r7 - STD %r8,40(%sp) ; save r8 - - STD %r9,48(%sp) ; save r9 - COPY %r0,%ret0 ; return 0 by default - DEPDI,Z 1,31,1,top_overflow ; top_overflow = 1 << 32 - STD w,56(%sp) ; store w on stack - - CMPIB,>= 0,num,bn_mul_add_words_exit ; if (num <= 0) then exit - LDO 128(%sp),%sp ; bump stack - - ; - ; The loop is unrolled twice, so if there is only 1 number - ; then go straight to the cleanup code. - ; - CMPIB,= 1,num,bn_mul_add_words_single_top - FLDD -72(%sp),fw ; load up w into fp register fw (fw_h/fw_l) - - ; - ; This loop is unrolled 2 times (64-byte aligned as well) - ; - ; PA-RISC 2.0 chips have two fully pipelined multipliers, thus - ; two 32-bit mutiplies can be issued per cycle. - ; -bn_mul_add_words_unroll2 - - FLDD 0(a_ptr),t_float_0 ; load up 64-bit value (fr8L) ht(L)/lt(R) - FLDD 8(a_ptr),t_float_1 ; load up 64-bit value (fr8L) ht(L)/lt(R) - LDD 0(r_ptr),rp_val ; rp[0] - LDD 8(r_ptr),rp_val_1 ; rp[1] - - XMPYU fht_0,fw_l,fm1 ; m1[0] = fht_0*fw_l - XMPYU fht_1,fw_l,fm1_1 ; m1[1] = fht_1*fw_l - FSTD fm1,-16(%sp) ; -16(sp) = m1[0] - FSTD fm1_1,-48(%sp) ; -48(sp) = m1[1] - - XMPYU flt_0,fw_h,fm ; m[0] = flt_0*fw_h - XMPYU flt_1,fw_h,fm_1 ; m[1] = flt_1*fw_h - FSTD fm,-8(%sp) ; -8(sp) = m[0] - FSTD fm_1,-40(%sp) ; -40(sp) = m[1] - - XMPYU fht_0,fw_h,ht_temp ; ht_temp = fht_0*fw_h - XMPYU fht_1,fw_h,ht_temp_1 ; ht_temp_1 = fht_1*fw_h - FSTD ht_temp,-24(%sp) ; -24(sp) = ht_temp - FSTD ht_temp_1,-56(%sp) ; -56(sp) = ht_temp_1 - - XMPYU flt_0,fw_l,lt_temp ; lt_temp = lt*fw_l - XMPYU flt_1,fw_l,lt_temp_1 ; lt_temp = lt*fw_l - FSTD lt_temp,-32(%sp) ; -32(sp) = lt_temp - FSTD lt_temp_1,-64(%sp) ; -64(sp) = lt_temp_1 - - LDD -8(%sp),m_0 ; m[0] - LDD -40(%sp),m_1 ; m[1] - LDD -16(%sp),m1_0 ; m1[0] - LDD -48(%sp),m1_1 ; m1[1] - - LDD -24(%sp),ht_0 ; ht[0] - LDD -56(%sp),ht_1 ; ht[1] - ADD,L m1_0,m_0,tmp_0 ; tmp_0 = m[0] + m1[0]; - ADD,L m1_1,m_1,tmp_1 ; tmp_1 = m[1] + m1[1]; - - LDD -32(%sp),lt_0 - LDD -64(%sp),lt_1 - CMPCLR,*>>= tmp_0,m1_0, %r0 ; if (m[0] < m1[0]) - ADD,L ht_0,top_overflow,ht_0 ; ht[0] += (1<<32) - - CMPCLR,*>>= tmp_1,m1_1,%r0 ; if (m[1] < m1[1]) - ADD,L ht_1,top_overflow,ht_1 ; ht[1] += (1<<32) - EXTRD,U tmp_0,31,32,m_0 ; m[0]>>32 - DEPD,Z tmp_0,31,32,m1_0 ; m1[0] = m[0]<<32 - - EXTRD,U tmp_1,31,32,m_1 ; m[1]>>32 - DEPD,Z tmp_1,31,32,m1_1 ; m1[1] = m[1]<<32 - ADD,L ht_0,m_0,ht_0 ; ht[0]+= (m[0]>>32) - ADD,L ht_1,m_1,ht_1 ; ht[1]+= (m[1]>>32) - - ADD lt_0,m1_0,lt_0 ; lt[0] = lt[0]+m1[0]; - ADD,DC ht_0,%r0,ht_0 ; ht[0]++ - ADD lt_1,m1_1,lt_1 ; lt[1] = lt[1]+m1[1]; - ADD,DC ht_1,%r0,ht_1 ; ht[1]++ - - ADD %ret0,lt_0,lt_0 ; lt[0] = lt[0] + c; - ADD,DC ht_0,%r0,ht_0 ; ht[0]++ - ADD lt_0,rp_val,lt_0 ; lt[0] = lt[0]+rp[0] - ADD,DC ht_0,%r0,ht_0 ; ht[0]++ - - LDO -2(num),num ; num = num - 2; - ADD ht_0,lt_1,lt_1 ; lt[1] = lt[1] + ht_0 (c); - ADD,DC ht_1,%r0,ht_1 ; ht[1]++ - STD lt_0,0(r_ptr) ; rp[0] = lt[0] - - ADD lt_1,rp_val_1,lt_1 ; lt[1] = lt[1]+rp[1] - ADD,DC ht_1,%r0,%ret0 ; ht[1]++ - LDO 16(a_ptr),a_ptr ; a_ptr += 2 - - STD lt_1,8(r_ptr) ; rp[1] = lt[1] - CMPIB,<= 2,num,bn_mul_add_words_unroll2 ; go again if more to do - LDO 16(r_ptr),r_ptr ; r_ptr += 2 - - CMPIB,=,N 0,num,bn_mul_add_words_exit ; are we done, or cleanup last one - - ; - ; Top of loop aligned on 64-byte boundary - ; -bn_mul_add_words_single_top - FLDD 0(a_ptr),t_float_0 ; load up 64-bit value (fr8L) ht(L)/lt(R) - LDD 0(r_ptr),rp_val ; rp[0] - LDO 8(a_ptr),a_ptr ; a_ptr++ - XMPYU fht_0,fw_l,fm1 ; m1 = ht*fw_l - FSTD fm1,-16(%sp) ; -16(sp) = m1 - XMPYU flt_0,fw_h,fm ; m = lt*fw_h - FSTD fm,-8(%sp) ; -8(sp) = m - XMPYU fht_0,fw_h,ht_temp ; ht_temp = ht*fw_h - FSTD ht_temp,-24(%sp) ; -24(sp) = ht - XMPYU flt_0,fw_l,lt_temp ; lt_temp = lt*fw_l - FSTD lt_temp,-32(%sp) ; -32(sp) = lt - - LDD -8(%sp),m_0 - LDD -16(%sp),m1_0 ; m1 = temp1 - ADD,L m_0,m1_0,tmp_0 ; tmp_0 = m + m1; - LDD -24(%sp),ht_0 - LDD -32(%sp),lt_0 - - CMPCLR,*>>= tmp_0,m1_0,%r0 ; if (m < m1) - ADD,L ht_0,top_overflow,ht_0 ; ht += (1<<32) - - EXTRD,U tmp_0,31,32,m_0 ; m>>32 - DEPD,Z tmp_0,31,32,m1_0 ; m1 = m<<32 - - ADD,L ht_0,m_0,ht_0 ; ht+= (m>>32) - ADD lt_0,m1_0,tmp_0 ; tmp_0 = lt+m1; - ADD,DC ht_0,%r0,ht_0 ; ht++ - ADD %ret0,tmp_0,lt_0 ; lt = lt + c; - ADD,DC ht_0,%r0,ht_0 ; ht++ - ADD lt_0,rp_val,lt_0 ; lt = lt+rp[0] - ADD,DC ht_0,%r0,%ret0 ; ht++ - STD lt_0,0(r_ptr) ; rp[0] = lt - -bn_mul_add_words_exit - .EXIT - LDD -80(%sp),%r9 ; restore r9 - LDD -88(%sp),%r8 ; restore r8 - LDD -96(%sp),%r7 ; restore r7 - LDD -104(%sp),%r6 ; restore r6 - LDD -112(%sp),%r5 ; restore r5 - LDD -120(%sp),%r4 ; restore r4 - BVE (%rp) - LDD,MB -128(%sp),%r3 ; restore r3 - .PROCEND ;in=23,24,25,26,29;out=28; - -;---------------------------------------------------------------------------- -; -;BN_ULONG bn_mul_words(BN_ULONG *rp, BN_ULONG *ap, int num, BN_ULONG w) -; -; arg0 = rp -; arg1 = ap -; arg2 = num -; arg3 = w - -bn_mul_words - .proc - .callinfo frame=128 - .entry - .EXPORT bn_mul_words,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN - .align 64 - - STD %r3,0(%sp) ; save r3 - STD %r4,8(%sp) ; save r4 - STD %r5,16(%sp) ; save r5 - STD %r6,24(%sp) ; save r6 - - STD %r7,32(%sp) ; save r7 - COPY %r0,%ret0 ; return 0 by default - DEPDI,Z 1,31,1,top_overflow ; top_overflow = 1 << 32 - STD w,56(%sp) ; w on stack - - CMPIB,>= 0,num,bn_mul_words_exit - LDO 128(%sp),%sp ; bump stack - - ; - ; See if only 1 word to do, thus just do cleanup - ; - CMPIB,= 1,num,bn_mul_words_single_top - FLDD -72(%sp),fw ; load up w into fp register fw (fw_h/fw_l) - - ; - ; This loop is unrolled 2 times (64-byte aligned as well) - ; - ; PA-RISC 2.0 chips have two fully pipelined multipliers, thus - ; two 32-bit mutiplies can be issued per cycle. - ; -bn_mul_words_unroll2 - - FLDD 0(a_ptr),t_float_0 ; load up 64-bit value (fr8L) ht(L)/lt(R) - FLDD 8(a_ptr),t_float_1 ; load up 64-bit value (fr8L) ht(L)/lt(R) - XMPYU fht_0,fw_l,fm1 ; m1[0] = fht_0*fw_l - XMPYU fht_1,fw_l,fm1_1 ; m1[1] = ht*fw_l - - FSTD fm1,-16(%sp) ; -16(sp) = m1 - FSTD fm1_1,-48(%sp) ; -48(sp) = m1 - XMPYU flt_0,fw_h,fm ; m = lt*fw_h - XMPYU flt_1,fw_h,fm_1 ; m = lt*fw_h - - FSTD fm,-8(%sp) ; -8(sp) = m - FSTD fm_1,-40(%sp) ; -40(sp) = m - XMPYU fht_0,fw_h,ht_temp ; ht_temp = fht_0*fw_h - XMPYU fht_1,fw_h,ht_temp_1 ; ht_temp = ht*fw_h - - FSTD ht_temp,-24(%sp) ; -24(sp) = ht - FSTD ht_temp_1,-56(%sp) ; -56(sp) = ht - XMPYU flt_0,fw_l,lt_temp ; lt_temp = lt*fw_l - XMPYU flt_1,fw_l,lt_temp_1 ; lt_temp = lt*fw_l - - FSTD lt_temp,-32(%sp) ; -32(sp) = lt - FSTD lt_temp_1,-64(%sp) ; -64(sp) = lt - LDD -8(%sp),m_0 - LDD -40(%sp),m_1 - - LDD -16(%sp),m1_0 - LDD -48(%sp),m1_1 - LDD -24(%sp),ht_0 - LDD -56(%sp),ht_1 - - ADD,L m1_0,m_0,tmp_0 ; tmp_0 = m + m1; - ADD,L m1_1,m_1,tmp_1 ; tmp_1 = m + m1; - LDD -32(%sp),lt_0 - LDD -64(%sp),lt_1 - - CMPCLR,*>>= tmp_0,m1_0, %r0 ; if (m < m1) - ADD,L ht_0,top_overflow,ht_0 ; ht += (1<<32) - CMPCLR,*>>= tmp_1,m1_1,%r0 ; if (m < m1) - ADD,L ht_1,top_overflow,ht_1 ; ht += (1<<32) - - EXTRD,U tmp_0,31,32,m_0 ; m>>32 - DEPD,Z tmp_0,31,32,m1_0 ; m1 = m<<32 - EXTRD,U tmp_1,31,32,m_1 ; m>>32 - DEPD,Z tmp_1,31,32,m1_1 ; m1 = m<<32 - - ADD,L ht_0,m_0,ht_0 ; ht+= (m>>32) - ADD,L ht_1,m_1,ht_1 ; ht+= (m>>32) - ADD lt_0,m1_0,lt_0 ; lt = lt+m1; - ADD,DC ht_0,%r0,ht_0 ; ht++ - - ADD lt_1,m1_1,lt_1 ; lt = lt+m1; - ADD,DC ht_1,%r0,ht_1 ; ht++ - ADD %ret0,lt_0,lt_0 ; lt = lt + c (ret0); - ADD,DC ht_0,%r0,ht_0 ; ht++ - - ADD ht_0,lt_1,lt_1 ; lt = lt + c (ht_0) - ADD,DC ht_1,%r0,ht_1 ; ht++ - STD lt_0,0(r_ptr) ; rp[0] = lt - STD lt_1,8(r_ptr) ; rp[1] = lt - - COPY ht_1,%ret0 ; carry = ht - LDO -2(num),num ; num = num - 2; - LDO 16(a_ptr),a_ptr ; ap += 2 - CMPIB,<= 2,num,bn_mul_words_unroll2 - LDO 16(r_ptr),r_ptr ; rp++ - - CMPIB,=,N 0,num,bn_mul_words_exit ; are we done? - - ; - ; Top of loop aligned on 64-byte boundary - ; -bn_mul_words_single_top - FLDD 0(a_ptr),t_float_0 ; load up 64-bit value (fr8L) ht(L)/lt(R) - - XMPYU fht_0,fw_l,fm1 ; m1 = ht*fw_l - FSTD fm1,-16(%sp) ; -16(sp) = m1 - XMPYU flt_0,fw_h,fm ; m = lt*fw_h - FSTD fm,-8(%sp) ; -8(sp) = m - XMPYU fht_0,fw_h,ht_temp ; ht_temp = ht*fw_h - FSTD ht_temp,-24(%sp) ; -24(sp) = ht - XMPYU flt_0,fw_l,lt_temp ; lt_temp = lt*fw_l - FSTD lt_temp,-32(%sp) ; -32(sp) = lt - - LDD -8(%sp),m_0 - LDD -16(%sp),m1_0 - ADD,L m_0,m1_0,tmp_0 ; tmp_0 = m + m1; - LDD -24(%sp),ht_0 - LDD -32(%sp),lt_0 - - CMPCLR,*>>= tmp_0,m1_0,%r0 ; if (m < m1) - ADD,L ht_0,top_overflow,ht_0 ; ht += (1<<32) - - EXTRD,U tmp_0,31,32,m_0 ; m>>32 - DEPD,Z tmp_0,31,32,m1_0 ; m1 = m<<32 - - ADD,L ht_0,m_0,ht_0 ; ht+= (m>>32) - ADD lt_0,m1_0,lt_0 ; lt= lt+m1; - ADD,DC ht_0,%r0,ht_0 ; ht++ - - ADD %ret0,lt_0,lt_0 ; lt = lt + c; - ADD,DC ht_0,%r0,ht_0 ; ht++ - - COPY ht_0,%ret0 ; copy carry - STD lt_0,0(r_ptr) ; rp[0] = lt - -bn_mul_words_exit - .EXIT - LDD -96(%sp),%r7 ; restore r7 - LDD -104(%sp),%r6 ; restore r6 - LDD -112(%sp),%r5 ; restore r5 - LDD -120(%sp),%r4 ; restore r4 - BVE (%rp) - LDD,MB -128(%sp),%r3 ; restore r3 - .PROCEND ;in=23,24,25,26,29;out=28; - -;---------------------------------------------------------------------------- -; -;void bn_sqr_words(BN_ULONG *rp, BN_ULONG *ap, int num) -; -; arg0 = rp -; arg1 = ap -; arg2 = num -; - -bn_sqr_words - .proc - .callinfo FRAME=128,ENTRY_GR=%r3,ARGS_SAVED,ORDERING_AWARE - .EXPORT bn_sqr_words,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN - .entry - .align 64 - - STD %r3,0(%sp) ; save r3 - STD %r4,8(%sp) ; save r4 - NOP - STD %r5,16(%sp) ; save r5 - - CMPIB,>= 0,num,bn_sqr_words_exit - LDO 128(%sp),%sp ; bump stack - - ; - ; If only 1, the goto straight to cleanup - ; - CMPIB,= 1,num,bn_sqr_words_single_top - DEPDI,Z -1,32,33,high_mask ; Create Mask 0xffffffff80000000L - - ; - ; This loop is unrolled 2 times (64-byte aligned as well) - ; - -bn_sqr_words_unroll2 - FLDD 0(a_ptr),t_float_0 ; a[0] - FLDD 8(a_ptr),t_float_1 ; a[1] - XMPYU fht_0,flt_0,fm ; m[0] - XMPYU fht_1,flt_1,fm_1 ; m[1] - - FSTD fm,-24(%sp) ; store m[0] - FSTD fm_1,-56(%sp) ; store m[1] - XMPYU flt_0,flt_0,lt_temp ; lt[0] - XMPYU flt_1,flt_1,lt_temp_1 ; lt[1] - - FSTD lt_temp,-16(%sp) ; store lt[0] - FSTD lt_temp_1,-48(%sp) ; store lt[1] - XMPYU fht_0,fht_0,ht_temp ; ht[0] - XMPYU fht_1,fht_1,ht_temp_1 ; ht[1] - - FSTD ht_temp,-8(%sp) ; store ht[0] - FSTD ht_temp_1,-40(%sp) ; store ht[1] - LDD -24(%sp),m_0 - LDD -56(%sp),m_1 - - AND m_0,high_mask,tmp_0 ; m[0] & Mask - AND m_1,high_mask,tmp_1 ; m[1] & Mask - DEPD,Z m_0,30,31,m_0 ; m[0] << 32+1 - DEPD,Z m_1,30,31,m_1 ; m[1] << 32+1 - - LDD -16(%sp),lt_0 - LDD -48(%sp),lt_1 - EXTRD,U tmp_0,32,33,tmp_0 ; tmp_0 = m[0]&Mask >> 32-1 - EXTRD,U tmp_1,32,33,tmp_1 ; tmp_1 = m[1]&Mask >> 32-1 - - LDD -8(%sp),ht_0 - LDD -40(%sp),ht_1 - ADD,L ht_0,tmp_0,ht_0 ; ht[0] += tmp_0 - ADD,L ht_1,tmp_1,ht_1 ; ht[1] += tmp_1 - - ADD lt_0,m_0,lt_0 ; lt = lt+m - ADD,DC ht_0,%r0,ht_0 ; ht[0]++ - STD lt_0,0(r_ptr) ; rp[0] = lt[0] - STD ht_0,8(r_ptr) ; rp[1] = ht[1] - - ADD lt_1,m_1,lt_1 ; lt = lt+m - ADD,DC ht_1,%r0,ht_1 ; ht[1]++ - STD lt_1,16(r_ptr) ; rp[2] = lt[1] - STD ht_1,24(r_ptr) ; rp[3] = ht[1] - - LDO -2(num),num ; num = num - 2; - LDO 16(a_ptr),a_ptr ; ap += 2 - CMPIB,<= 2,num,bn_sqr_words_unroll2 - LDO 32(r_ptr),r_ptr ; rp += 4 - - CMPIB,=,N 0,num,bn_sqr_words_exit ; are we done? - - ; - ; Top of loop aligned on 64-byte boundary - ; -bn_sqr_words_single_top - FLDD 0(a_ptr),t_float_0 ; load up 64-bit value (fr8L) ht(L)/lt(R) - - XMPYU fht_0,flt_0,fm ; m - FSTD fm,-24(%sp) ; store m - - XMPYU flt_0,flt_0,lt_temp ; lt - FSTD lt_temp,-16(%sp) ; store lt - - XMPYU fht_0,fht_0,ht_temp ; ht - FSTD ht_temp,-8(%sp) ; store ht - - LDD -24(%sp),m_0 ; load m - AND m_0,high_mask,tmp_0 ; m & Mask - DEPD,Z m_0,30,31,m_0 ; m << 32+1 - LDD -16(%sp),lt_0 ; lt - - LDD -8(%sp),ht_0 ; ht - EXTRD,U tmp_0,32,33,tmp_0 ; tmp_0 = m&Mask >> 32-1 - ADD m_0,lt_0,lt_0 ; lt = lt+m - ADD,L ht_0,tmp_0,ht_0 ; ht += tmp_0 - ADD,DC ht_0,%r0,ht_0 ; ht++ - - STD lt_0,0(r_ptr) ; rp[0] = lt - STD ht_0,8(r_ptr) ; rp[1] = ht - -bn_sqr_words_exit - .EXIT - LDD -112(%sp),%r5 ; restore r5 - LDD -120(%sp),%r4 ; restore r4 - BVE (%rp) - LDD,MB -128(%sp),%r3 - .PROCEND ;in=23,24,25,26,29;out=28; - - -;---------------------------------------------------------------------------- -; -;BN_ULONG bn_add_words(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n) -; -; arg0 = rp -; arg1 = ap -; arg2 = bp -; arg3 = n - -t .reg %r22 -b .reg %r21 -l .reg %r20 - -bn_add_words - .proc - .entry - .callinfo - .EXPORT bn_add_words,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN - .align 64 - - CMPIB,>= 0,n,bn_add_words_exit - COPY %r0,%ret0 ; return 0 by default - - ; - ; If 2 or more numbers do the loop - ; - CMPIB,= 1,n,bn_add_words_single_top - NOP - - ; - ; This loop is unrolled 2 times (64-byte aligned as well) - ; -bn_add_words_unroll2 - LDD 0(a_ptr),t - LDD 0(b_ptr),b - ADD t,%ret0,t ; t = t+c; - ADD,DC %r0,%r0,%ret0 ; set c to carry - ADD t,b,l ; l = t + b[0] - ADD,DC %ret0,%r0,%ret0 ; c+= carry - STD l,0(r_ptr) - - LDD 8(a_ptr),t - LDD 8(b_ptr),b - ADD t,%ret0,t ; t = t+c; - ADD,DC %r0,%r0,%ret0 ; set c to carry - ADD t,b,l ; l = t + b[0] - ADD,DC %ret0,%r0,%ret0 ; c+= carry - STD l,8(r_ptr) - - LDO -2(n),n - LDO 16(a_ptr),a_ptr - LDO 16(b_ptr),b_ptr - - CMPIB,<= 2,n,bn_add_words_unroll2 - LDO 16(r_ptr),r_ptr - - CMPIB,=,N 0,n,bn_add_words_exit ; are we done? - -bn_add_words_single_top - LDD 0(a_ptr),t - LDD 0(b_ptr),b - - ADD t,%ret0,t ; t = t+c; - ADD,DC %r0,%r0,%ret0 ; set c to carry (could use CMPCLR??) - ADD t,b,l ; l = t + b[0] - ADD,DC %ret0,%r0,%ret0 ; c+= carry - STD l,0(r_ptr) - -bn_add_words_exit - .EXIT - BVE (%rp) - NOP - .PROCEND ;in=23,24,25,26,29;out=28; - -;---------------------------------------------------------------------------- -; -;BN_ULONG bn_sub_words(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n) -; -; arg0 = rp -; arg1 = ap -; arg2 = bp -; arg3 = n - -t1 .reg %r22 -t2 .reg %r21 -sub_tmp1 .reg %r20 -sub_tmp2 .reg %r19 - - -bn_sub_words - .proc - .callinfo - .EXPORT bn_sub_words,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN - .entry - .align 64 - - CMPIB,>= 0,n,bn_sub_words_exit - COPY %r0,%ret0 ; return 0 by default - - ; - ; If 2 or more numbers do the loop - ; - CMPIB,= 1,n,bn_sub_words_single_top - NOP - - ; - ; This loop is unrolled 2 times (64-byte aligned as well) - ; -bn_sub_words_unroll2 - LDD 0(a_ptr),t1 - LDD 0(b_ptr),t2 - SUB t1,t2,sub_tmp1 ; t3 = t1-t2; - SUB sub_tmp1,%ret0,sub_tmp1 ; t3 = t3- c; - - CMPCLR,*>> t1,t2,sub_tmp2 ; clear if t1 > t2 - LDO 1(%r0),sub_tmp2 - - CMPCLR,*= t1,t2,%r0 - COPY sub_tmp2,%ret0 - STD sub_tmp1,0(r_ptr) - - LDD 8(a_ptr),t1 - LDD 8(b_ptr),t2 - SUB t1,t2,sub_tmp1 ; t3 = t1-t2; - SUB sub_tmp1,%ret0,sub_tmp1 ; t3 = t3- c; - CMPCLR,*>> t1,t2,sub_tmp2 ; clear if t1 > t2 - LDO 1(%r0),sub_tmp2 - - CMPCLR,*= t1,t2,%r0 - COPY sub_tmp2,%ret0 - STD sub_tmp1,8(r_ptr) - - LDO -2(n),n - LDO 16(a_ptr),a_ptr - LDO 16(b_ptr),b_ptr - - CMPIB,<= 2,n,bn_sub_words_unroll2 - LDO 16(r_ptr),r_ptr - - CMPIB,=,N 0,n,bn_sub_words_exit ; are we done? - -bn_sub_words_single_top - LDD 0(a_ptr),t1 - LDD 0(b_ptr),t2 - SUB t1,t2,sub_tmp1 ; t3 = t1-t2; - SUB sub_tmp1,%ret0,sub_tmp1 ; t3 = t3- c; - CMPCLR,*>> t1,t2,sub_tmp2 ; clear if t1 > t2 - LDO 1(%r0),sub_tmp2 - - CMPCLR,*= t1,t2,%r0 - COPY sub_tmp2,%ret0 - - STD sub_tmp1,0(r_ptr) - -bn_sub_words_exit - .EXIT - BVE (%rp) - NOP - .PROCEND ;in=23,24,25,26,29;out=28; - -;------------------------------------------------------------------------------ -; -; unsigned long bn_div_words(unsigned long h, unsigned long l, unsigned long d) -; -; arg0 = h -; arg1 = l -; arg2 = d -; -; This is mainly just modified assembly from the compiler, thus the -; lack of variable names. -; -;------------------------------------------------------------------------------ -bn_div_words - .proc - .callinfo CALLER,FRAME=272,ENTRY_GR=%r10,SAVE_RP,ARGS_SAVED,ORDERING_AWARE - .EXPORT bn_div_words,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN - .IMPORT BN_num_bits_word,CODE,NO_RELOCATION - .IMPORT __iob,DATA - .IMPORT fprintf,CODE,NO_RELOCATION - .IMPORT abort,CODE,NO_RELOCATION - .IMPORT $$div2U,MILLICODE - .entry - STD %r2,-16(%r30) - STD,MA %r3,352(%r30) - STD %r4,-344(%r30) - STD %r5,-336(%r30) - STD %r6,-328(%r30) - STD %r7,-320(%r30) - STD %r8,-312(%r30) - STD %r9,-304(%r30) - STD %r10,-296(%r30) - - STD %r27,-288(%r30) ; save gp - - COPY %r24,%r3 ; save d - COPY %r26,%r4 ; save h (high 64-bits) - LDO -1(%r0),%ret0 ; return -1 by default - - CMPB,*= %r0,%arg2,$D3 ; if (d == 0) - COPY %r25,%r5 ; save l (low 64-bits) - - LDO -48(%r30),%r29 ; create ap - .CALL ;in=26,29;out=28; - B,L BN_num_bits_word,%r2 - COPY %r3,%r26 - LDD -288(%r30),%r27 ; restore gp - LDI 64,%r21 - - CMPB,= %r21,%ret0,$00000012 ;if (i == 64) (forward) - COPY %ret0,%r24 ; i - MTSARCM %r24 - DEPDI,Z -1,%sar,1,%r29 - CMPB,*<<,N %r29,%r4,bn_div_err_case ; if (h > 1<<i) (forward) - -$00000012 - SUBI 64,%r24,%r31 ; i = 64 - i; - CMPCLR,*<< %r4,%r3,%r0 ; if (h >= d) - SUB %r4,%r3,%r4 ; h -= d - CMPB,= %r31,%r0,$0000001A ; if (i) - COPY %r0,%r10 ; ret = 0 - MTSARCM %r31 ; i to shift - DEPD,Z %r3,%sar,64,%r3 ; d <<= i; - SUBI 64,%r31,%r19 ; 64 - i; redundent - MTSAR %r19 ; (64 -i) to shift - SHRPD %r4,%r5,%sar,%r4 ; l>> (64-i) - MTSARCM %r31 ; i to shift - DEPD,Z %r5,%sar,64,%r5 ; l <<= i; - -$0000001A - DEPDI,Z -1,31,32,%r19 - EXTRD,U %r3,31,32,%r6 ; dh=(d&0xfff)>>32 - EXTRD,U %r3,63,32,%r8 ; dl = d&0xffffff - LDO 2(%r0),%r9 - STD %r3,-280(%r30) ; "d" to stack - -$0000001C - DEPDI,Z -1,63,32,%r29 ; - EXTRD,U %r4,31,32,%r31 ; h >> 32 - CMPB,*=,N %r31,%r6,$D2 ; if ((h>>32) != dh)(forward) div - COPY %r4,%r26 - EXTRD,U %r4,31,32,%r25 - COPY %r6,%r24 - .CALL ;in=23,24,25,26;out=20,21,22,28,29; (MILLICALL) - B,L $$div2U,%r2 - EXTRD,U %r6,31,32,%r23 - DEPD %r28,31,32,%r29 -$D2 - STD %r29,-272(%r30) ; q - AND %r5,%r19,%r24 ; t & 0xffffffff00000000; - EXTRD,U %r24,31,32,%r24 ; ??? - FLDD -272(%r30),%fr7 ; q - FLDD -280(%r30),%fr8 ; d - XMPYU %fr8L,%fr7L,%fr10 - FSTD %fr10,-256(%r30) - XMPYU %fr8L,%fr7R,%fr22 - FSTD %fr22,-264(%r30) - XMPYU %fr8R,%fr7L,%fr11 - XMPYU %fr8R,%fr7R,%fr23 - FSTD %fr11,-232(%r30) - FSTD %fr23,-240(%r30) - LDD -256(%r30),%r28 - DEPD,Z %r28,31,32,%r2 - LDD -264(%r30),%r20 - ADD,L %r20,%r2,%r31 - LDD -232(%r30),%r22 - DEPD,Z %r22,31,32,%r22 - LDD -240(%r30),%r21 - B $00000024 ; enter loop - ADD,L %r21,%r22,%r23 - -$0000002A - LDO -1(%r29),%r29 - SUB %r23,%r8,%r23 -$00000024 - SUB %r4,%r31,%r25 - AND %r25,%r19,%r26 - CMPB,*<>,N %r0,%r26,$00000046 ; (forward) - DEPD,Z %r25,31,32,%r20 - OR %r20,%r24,%r21 - CMPB,*<<,N %r21,%r23,$0000002A ;(backward) - SUB %r31,%r6,%r31 -;-------------Break path--------------------- - -$00000046 - DEPD,Z %r23,31,32,%r25 ;tl - EXTRD,U %r23,31,32,%r26 ;t - AND %r25,%r19,%r24 ;tl = (tl<<32)&0xfffffff0000000L - ADD,L %r31,%r26,%r31 ;th += t; - CMPCLR,*>>= %r5,%r24,%r0 ;if (l<tl) - LDO 1(%r31),%r31 ; th++; - CMPB,*<<=,N %r31,%r4,$00000036 ;if (n < th) (forward) - LDO -1(%r29),%r29 ;q--; - ADD,L %r4,%r3,%r4 ;h += d; -$00000036 - ADDIB,=,N -1,%r9,$D1 ;if (--count == 0) break (forward) - SUB %r5,%r24,%r28 ; l -= tl; - SUB %r4,%r31,%r24 ; h -= th; - SHRPD %r24,%r28,32,%r4 ; h = ((h<<32)|(l>>32)); - DEPD,Z %r29,31,32,%r10 ; ret = q<<32 - b $0000001C - DEPD,Z %r28,31,32,%r5 ; l = l << 32 - -$D1 - OR %r10,%r29,%r28 ; ret |= q -$D3 - LDD -368(%r30),%r2 -$D0 - LDD -296(%r30),%r10 - LDD -304(%r30),%r9 - LDD -312(%r30),%r8 - LDD -320(%r30),%r7 - LDD -328(%r30),%r6 - LDD -336(%r30),%r5 - LDD -344(%r30),%r4 - BVE (%r2) - .EXIT - LDD,MB -352(%r30),%r3 - -bn_div_err_case - MFIA %r6 - ADDIL L'bn_div_words-bn_div_err_case,%r6,%r1 - LDO R'bn_div_words-bn_div_err_case(%r1),%r6 - ADDIL LT'__iob,%r27,%r1 - LDD RT'__iob(%r1),%r26 - ADDIL L'C$4-bn_div_words,%r6,%r1 - LDO R'C$4-bn_div_words(%r1),%r25 - LDO 64(%r26),%r26 - .CALL ;in=24,25,26,29;out=28; - B,L fprintf,%r2 - LDO -48(%r30),%r29 - LDD -288(%r30),%r27 - .CALL ;in=29; - B,L abort,%r2 - LDO -48(%r30),%r29 - LDD -288(%r30),%r27 - B $D0 - LDD -368(%r30),%r2 - .PROCEND ;in=24,25,26,29;out=28; - -;---------------------------------------------------------------------------- -; -; Registers to hold 64-bit values to manipulate. The "L" part -; of the register corresponds to the upper 32-bits, while the "R" -; part corresponds to the lower 32-bits -; -; Note, that when using b6 and b7, the code must save these before -; using them because they are callee save registers -; -; -; Floating point registers to use to save values that -; are manipulated. These don't collide with ftemp1-6 and -; are all caller save registers -; -a0 .reg %fr22 -a0L .reg %fr22L -a0R .reg %fr22R - -a1 .reg %fr23 -a1L .reg %fr23L -a1R .reg %fr23R - -a2 .reg %fr24 -a2L .reg %fr24L -a2R .reg %fr24R - -a3 .reg %fr25 -a3L .reg %fr25L -a3R .reg %fr25R - -a4 .reg %fr26 -a4L .reg %fr26L -a4R .reg %fr26R - -a5 .reg %fr27 -a5L .reg %fr27L -a5R .reg %fr27R - -a6 .reg %fr28 -a6L .reg %fr28L -a6R .reg %fr28R - -a7 .reg %fr29 -a7L .reg %fr29L -a7R .reg %fr29R - -b0 .reg %fr30 -b0L .reg %fr30L -b0R .reg %fr30R - -b1 .reg %fr31 -b1L .reg %fr31L -b1R .reg %fr31R - -; -; Temporary floating point variables, these are all caller save -; registers -; -ftemp1 .reg %fr4 -ftemp2 .reg %fr5 -ftemp3 .reg %fr6 -ftemp4 .reg %fr7 - -; -; The B set of registers when used. -; - -b2 .reg %fr8 -b2L .reg %fr8L -b2R .reg %fr8R - -b3 .reg %fr9 -b3L .reg %fr9L -b3R .reg %fr9R - -b4 .reg %fr10 -b4L .reg %fr10L -b4R .reg %fr10R - -b5 .reg %fr11 -b5L .reg %fr11L -b5R .reg %fr11R - -b6 .reg %fr12 -b6L .reg %fr12L -b6R .reg %fr12R - -b7 .reg %fr13 -b7L .reg %fr13L -b7R .reg %fr13R - -c1 .reg %r21 ; only reg -temp1 .reg %r20 ; only reg -temp2 .reg %r19 ; only reg -temp3 .reg %r31 ; only reg - -m1 .reg %r28 -c2 .reg %r23 -high_one .reg %r1 -ht .reg %r6 -lt .reg %r5 -m .reg %r4 -c3 .reg %r3 - -SQR_ADD_C .macro A0L,A0R,C1,C2,C3 - XMPYU A0L,A0R,ftemp1 ; m - FSTD ftemp1,-24(%sp) ; store m - - XMPYU A0R,A0R,ftemp2 ; lt - FSTD ftemp2,-16(%sp) ; store lt - - XMPYU A0L,A0L,ftemp3 ; ht - FSTD ftemp3,-8(%sp) ; store ht - - LDD -24(%sp),m ; load m - AND m,high_mask,temp2 ; m & Mask - DEPD,Z m,30,31,temp3 ; m << 32+1 - LDD -16(%sp),lt ; lt - - LDD -8(%sp),ht ; ht - EXTRD,U temp2,32,33,temp1 ; temp1 = m&Mask >> 32-1 - ADD temp3,lt,lt ; lt = lt+m - ADD,L ht,temp1,ht ; ht += temp1 - ADD,DC ht,%r0,ht ; ht++ - - ADD C1,lt,C1 ; c1=c1+lt - ADD,DC ht,%r0,ht ; ht++ - - ADD C2,ht,C2 ; c2=c2+ht - ADD,DC C3,%r0,C3 ; c3++ -.endm - -SQR_ADD_C2 .macro A0L,A0R,A1L,A1R,C1,C2,C3 - XMPYU A0L,A1R,ftemp1 ; m1 = bl*ht - FSTD ftemp1,-16(%sp) ; - XMPYU A0R,A1L,ftemp2 ; m = bh*lt - FSTD ftemp2,-8(%sp) ; - XMPYU A0R,A1R,ftemp3 ; lt = bl*lt - FSTD ftemp3,-32(%sp) - XMPYU A0L,A1L,ftemp4 ; ht = bh*ht - FSTD ftemp4,-24(%sp) ; - - LDD -8(%sp),m ; r21 = m - LDD -16(%sp),m1 ; r19 = m1 - ADD,L m,m1,m ; m+m1 - - DEPD,Z m,31,32,temp3 ; (m+m1<<32) - LDD -24(%sp),ht ; r24 = ht - - CMPCLR,*>>= m,m1,%r0 ; if (m < m1) - ADD,L ht,high_one,ht ; ht+=high_one - - EXTRD,U m,31,32,temp1 ; m >> 32 - LDD -32(%sp),lt ; lt - ADD,L ht,temp1,ht ; ht+= m>>32 - ADD lt,temp3,lt ; lt = lt+m1 - ADD,DC ht,%r0,ht ; ht++ - - ADD ht,ht,ht ; ht=ht+ht; - ADD,DC C3,%r0,C3 ; add in carry (c3++) - - ADD lt,lt,lt ; lt=lt+lt; - ADD,DC ht,%r0,ht ; add in carry (ht++) - - ADD C1,lt,C1 ; c1=c1+lt - ADD,DC,*NUV ht,%r0,ht ; add in carry (ht++) - LDO 1(C3),C3 ; bump c3 if overflow,nullify otherwise - - ADD C2,ht,C2 ; c2 = c2 + ht - ADD,DC C3,%r0,C3 ; add in carry (c3++) -.endm - -; -;void bn_sqr_comba8(BN_ULONG *r, BN_ULONG *a) -; arg0 = r_ptr -; arg1 = a_ptr -; - -bn_sqr_comba8 - .PROC - .CALLINFO FRAME=128,ENTRY_GR=%r3,ARGS_SAVED,ORDERING_AWARE - .EXPORT bn_sqr_comba8,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN - .ENTRY - .align 64 - - STD %r3,0(%sp) ; save r3 - STD %r4,8(%sp) ; save r4 - STD %r5,16(%sp) ; save r5 - STD %r6,24(%sp) ; save r6 - - ; - ; Zero out carries - ; - COPY %r0,c1 - COPY %r0,c2 - COPY %r0,c3 - - LDO 128(%sp),%sp ; bump stack - DEPDI,Z -1,32,33,high_mask ; Create Mask 0xffffffff80000000L - DEPDI,Z 1,31,1,high_one ; Create Value 1 << 32 - - ; - ; Load up all of the values we are going to use - ; - FLDD 0(a_ptr),a0 - FLDD 8(a_ptr),a1 - FLDD 16(a_ptr),a2 - FLDD 24(a_ptr),a3 - FLDD 32(a_ptr),a4 - FLDD 40(a_ptr),a5 - FLDD 48(a_ptr),a6 - FLDD 56(a_ptr),a7 - - SQR_ADD_C a0L,a0R,c1,c2,c3 - STD c1,0(r_ptr) ; r[0] = c1; - COPY %r0,c1 - - SQR_ADD_C2 a1L,a1R,a0L,a0R,c2,c3,c1 - STD c2,8(r_ptr) ; r[1] = c2; - COPY %r0,c2 - - SQR_ADD_C a1L,a1R,c3,c1,c2 - SQR_ADD_C2 a2L,a2R,a0L,a0R,c3,c1,c2 - STD c3,16(r_ptr) ; r[2] = c3; - COPY %r0,c3 - - SQR_ADD_C2 a3L,a3R,a0L,a0R,c1,c2,c3 - SQR_ADD_C2 a2L,a2R,a1L,a1R,c1,c2,c3 - STD c1,24(r_ptr) ; r[3] = c1; - COPY %r0,c1 - - SQR_ADD_C a2L,a2R,c2,c3,c1 - SQR_ADD_C2 a3L,a3R,a1L,a1R,c2,c3,c1 - SQR_ADD_C2 a4L,a4R,a0L,a0R,c2,c3,c1 - STD c2,32(r_ptr) ; r[4] = c2; - COPY %r0,c2 - - SQR_ADD_C2 a5L,a5R,a0L,a0R,c3,c1,c2 - SQR_ADD_C2 a4L,a4R,a1L,a1R,c3,c1,c2 - SQR_ADD_C2 a3L,a3R,a2L,a2R,c3,c1,c2 - STD c3,40(r_ptr) ; r[5] = c3; - COPY %r0,c3 - - SQR_ADD_C a3L,a3R,c1,c2,c3 - SQR_ADD_C2 a4L,a4R,a2L,a2R,c1,c2,c3 - SQR_ADD_C2 a5L,a5R,a1L,a1R,c1,c2,c3 - SQR_ADD_C2 a6L,a6R,a0L,a0R,c1,c2,c3 - STD c1,48(r_ptr) ; r[6] = c1; - COPY %r0,c1 - - SQR_ADD_C2 a7L,a7R,a0L,a0R,c2,c3,c1 - SQR_ADD_C2 a6L,a6R,a1L,a1R,c2,c3,c1 - SQR_ADD_C2 a5L,a5R,a2L,a2R,c2,c3,c1 - SQR_ADD_C2 a4L,a4R,a3L,a3R,c2,c3,c1 - STD c2,56(r_ptr) ; r[7] = c2; - COPY %r0,c2 - - SQR_ADD_C a4L,a4R,c3,c1,c2 - SQR_ADD_C2 a5L,a5R,a3L,a3R,c3,c1,c2 - SQR_ADD_C2 a6L,a6R,a2L,a2R,c3,c1,c2 - SQR_ADD_C2 a7L,a7R,a1L,a1R,c3,c1,c2 - STD c3,64(r_ptr) ; r[8] = c3; - COPY %r0,c3 - - SQR_ADD_C2 a7L,a7R,a2L,a2R,c1,c2,c3 - SQR_ADD_C2 a6L,a6R,a3L,a3R,c1,c2,c3 - SQR_ADD_C2 a5L,a5R,a4L,a4R,c1,c2,c3 - STD c1,72(r_ptr) ; r[9] = c1; - COPY %r0,c1 - - SQR_ADD_C a5L,a5R,c2,c3,c1 - SQR_ADD_C2 a6L,a6R,a4L,a4R,c2,c3,c1 - SQR_ADD_C2 a7L,a7R,a3L,a3R,c2,c3,c1 - STD c2,80(r_ptr) ; r[10] = c2; - COPY %r0,c2 - - SQR_ADD_C2 a7L,a7R,a4L,a4R,c3,c1,c2 - SQR_ADD_C2 a6L,a6R,a5L,a5R,c3,c1,c2 - STD c3,88(r_ptr) ; r[11] = c3; - COPY %r0,c3 - - SQR_ADD_C a6L,a6R,c1,c2,c3 - SQR_ADD_C2 a7L,a7R,a5L,a5R,c1,c2,c3 - STD c1,96(r_ptr) ; r[12] = c1; - COPY %r0,c1 - - SQR_ADD_C2 a7L,a7R,a6L,a6R,c2,c3,c1 - STD c2,104(r_ptr) ; r[13] = c2; - COPY %r0,c2 - - SQR_ADD_C a7L,a7R,c3,c1,c2 - STD c3, 112(r_ptr) ; r[14] = c3 - STD c1, 120(r_ptr) ; r[15] = c1 - - .EXIT - LDD -104(%sp),%r6 ; restore r6 - LDD -112(%sp),%r5 ; restore r5 - LDD -120(%sp),%r4 ; restore r4 - BVE (%rp) - LDD,MB -128(%sp),%r3 - - .PROCEND - -;----------------------------------------------------------------------------- -; -;void bn_sqr_comba4(BN_ULONG *r, BN_ULONG *a) -; arg0 = r_ptr -; arg1 = a_ptr -; - -bn_sqr_comba4 - .proc - .callinfo FRAME=128,ENTRY_GR=%r3,ARGS_SAVED,ORDERING_AWARE - .EXPORT bn_sqr_comba4,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN - .entry - .align 64 - STD %r3,0(%sp) ; save r3 - STD %r4,8(%sp) ; save r4 - STD %r5,16(%sp) ; save r5 - STD %r6,24(%sp) ; save r6 - - ; - ; Zero out carries - ; - COPY %r0,c1 - COPY %r0,c2 - COPY %r0,c3 - - LDO 128(%sp),%sp ; bump stack - DEPDI,Z -1,32,33,high_mask ; Create Mask 0xffffffff80000000L - DEPDI,Z 1,31,1,high_one ; Create Value 1 << 32 - - ; - ; Load up all of the values we are going to use - ; - FLDD 0(a_ptr),a0 - FLDD 8(a_ptr),a1 - FLDD 16(a_ptr),a2 - FLDD 24(a_ptr),a3 - FLDD 32(a_ptr),a4 - FLDD 40(a_ptr),a5 - FLDD 48(a_ptr),a6 - FLDD 56(a_ptr),a7 - - SQR_ADD_C a0L,a0R,c1,c2,c3 - - STD c1,0(r_ptr) ; r[0] = c1; - COPY %r0,c1 - - SQR_ADD_C2 a1L,a1R,a0L,a0R,c2,c3,c1 - - STD c2,8(r_ptr) ; r[1] = c2; - COPY %r0,c2 - - SQR_ADD_C a1L,a1R,c3,c1,c2 - SQR_ADD_C2 a2L,a2R,a0L,a0R,c3,c1,c2 - - STD c3,16(r_ptr) ; r[2] = c3; - COPY %r0,c3 - - SQR_ADD_C2 a3L,a3R,a0L,a0R,c1,c2,c3 - SQR_ADD_C2 a2L,a2R,a1L,a1R,c1,c2,c3 - - STD c1,24(r_ptr) ; r[3] = c1; - COPY %r0,c1 - - SQR_ADD_C a2L,a2R,c2,c3,c1 - SQR_ADD_C2 a3L,a3R,a1L,a1R,c2,c3,c1 - - STD c2,32(r_ptr) ; r[4] = c2; - COPY %r0,c2 - - SQR_ADD_C2 a3L,a3R,a2L,a2R,c3,c1,c2 - STD c3,40(r_ptr) ; r[5] = c3; - COPY %r0,c3 - - SQR_ADD_C a3L,a3R,c1,c2,c3 - STD c1,48(r_ptr) ; r[6] = c1; - STD c2,56(r_ptr) ; r[7] = c2; - - .EXIT - LDD -104(%sp),%r6 ; restore r6 - LDD -112(%sp),%r5 ; restore r5 - LDD -120(%sp),%r4 ; restore r4 - BVE (%rp) - LDD,MB -128(%sp),%r3 - - .PROCEND - - -;--------------------------------------------------------------------------- - -MUL_ADD_C .macro A0L,A0R,B0L,B0R,C1,C2,C3 - XMPYU A0L,B0R,ftemp1 ; m1 = bl*ht - FSTD ftemp1,-16(%sp) ; - XMPYU A0R,B0L,ftemp2 ; m = bh*lt - FSTD ftemp2,-8(%sp) ; - XMPYU A0R,B0R,ftemp3 ; lt = bl*lt - FSTD ftemp3,-32(%sp) - XMPYU A0L,B0L,ftemp4 ; ht = bh*ht - FSTD ftemp4,-24(%sp) ; - - LDD -8(%sp),m ; r21 = m - LDD -16(%sp),m1 ; r19 = m1 - ADD,L m,m1,m ; m+m1 - - DEPD,Z m,31,32,temp3 ; (m+m1<<32) - LDD -24(%sp),ht ; r24 = ht - - CMPCLR,*>>= m,m1,%r0 ; if (m < m1) - ADD,L ht,high_one,ht ; ht+=high_one - - EXTRD,U m,31,32,temp1 ; m >> 32 - LDD -32(%sp),lt ; lt - ADD,L ht,temp1,ht ; ht+= m>>32 - ADD lt,temp3,lt ; lt = lt+m1 - ADD,DC ht,%r0,ht ; ht++ - - ADD C1,lt,C1 ; c1=c1+lt - ADD,DC ht,%r0,ht ; bump c3 if overflow,nullify otherwise - - ADD C2,ht,C2 ; c2 = c2 + ht - ADD,DC C3,%r0,C3 ; add in carry (c3++) -.endm - - -; -;void bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b) -; arg0 = r_ptr -; arg1 = a_ptr -; arg2 = b_ptr -; - -bn_mul_comba8 - .proc - .callinfo FRAME=128,ENTRY_GR=%r3,ARGS_SAVED,ORDERING_AWARE - .EXPORT bn_mul_comba8,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN - .entry - .align 64 - - STD %r3,0(%sp) ; save r3 - STD %r4,8(%sp) ; save r4 - STD %r5,16(%sp) ; save r5 - STD %r6,24(%sp) ; save r6 - FSTD %fr12,32(%sp) ; save r6 - FSTD %fr13,40(%sp) ; save r7 - - ; - ; Zero out carries - ; - COPY %r0,c1 - COPY %r0,c2 - COPY %r0,c3 - - LDO 128(%sp),%sp ; bump stack - DEPDI,Z 1,31,1,high_one ; Create Value 1 << 32 - - ; - ; Load up all of the values we are going to use - ; - FLDD 0(a_ptr),a0 - FLDD 8(a_ptr),a1 - FLDD 16(a_ptr),a2 - FLDD 24(a_ptr),a3 - FLDD 32(a_ptr),a4 - FLDD 40(a_ptr),a5 - FLDD 48(a_ptr),a6 - FLDD 56(a_ptr),a7 - - FLDD 0(b_ptr),b0 - FLDD 8(b_ptr),b1 - FLDD 16(b_ptr),b2 - FLDD 24(b_ptr),b3 - FLDD 32(b_ptr),b4 - FLDD 40(b_ptr),b5 - FLDD 48(b_ptr),b6 - FLDD 56(b_ptr),b7 - - MUL_ADD_C a0L,a0R,b0L,b0R,c1,c2,c3 - STD c1,0(r_ptr) - COPY %r0,c1 - - MUL_ADD_C a0L,a0R,b1L,b1R,c2,c3,c1 - MUL_ADD_C a1L,a1R,b0L,b0R,c2,c3,c1 - STD c2,8(r_ptr) - COPY %r0,c2 - - MUL_ADD_C a2L,a2R,b0L,b0R,c3,c1,c2 - MUL_ADD_C a1L,a1R,b1L,b1R,c3,c1,c2 - MUL_ADD_C a0L,a0R,b2L,b2R,c3,c1,c2 - STD c3,16(r_ptr) - COPY %r0,c3 - - MUL_ADD_C a0L,a0R,b3L,b3R,c1,c2,c3 - MUL_ADD_C a1L,a1R,b2L,b2R,c1,c2,c3 - MUL_ADD_C a2L,a2R,b1L,b1R,c1,c2,c3 - MUL_ADD_C a3L,a3R,b0L,b0R,c1,c2,c3 - STD c1,24(r_ptr) - COPY %r0,c1 - - MUL_ADD_C a4L,a4R,b0L,b0R,c2,c3,c1 - MUL_ADD_C a3L,a3R,b1L,b1R,c2,c3,c1 - MUL_ADD_C a2L,a2R,b2L,b2R,c2,c3,c1 - MUL_ADD_C a1L,a1R,b3L,b3R,c2,c3,c1 - MUL_ADD_C a0L,a0R,b4L,b4R,c2,c3,c1 - STD c2,32(r_ptr) - COPY %r0,c2 - - MUL_ADD_C a0L,a0R,b5L,b5R,c3,c1,c2 - MUL_ADD_C a1L,a1R,b4L,b4R,c3,c1,c2 - MUL_ADD_C a2L,a2R,b3L,b3R,c3,c1,c2 - MUL_ADD_C a3L,a3R,b2L,b2R,c3,c1,c2 - MUL_ADD_C a4L,a4R,b1L,b1R,c3,c1,c2 - MUL_ADD_C a5L,a5R,b0L,b0R,c3,c1,c2 - STD c3,40(r_ptr) - COPY %r0,c3 - - MUL_ADD_C a6L,a6R,b0L,b0R,c1,c2,c3 - MUL_ADD_C a5L,a5R,b1L,b1R,c1,c2,c3 - MUL_ADD_C a4L,a4R,b2L,b2R,c1,c2,c3 - MUL_ADD_C a3L,a3R,b3L,b3R,c1,c2,c3 - MUL_ADD_C a2L,a2R,b4L,b4R,c1,c2,c3 - MUL_ADD_C a1L,a1R,b5L,b5R,c1,c2,c3 - MUL_ADD_C a0L,a0R,b6L,b6R,c1,c2,c3 - STD c1,48(r_ptr) - COPY %r0,c1 - - MUL_ADD_C a0L,a0R,b7L,b7R,c2,c3,c1 - MUL_ADD_C a1L,a1R,b6L,b6R,c2,c3,c1 - MUL_ADD_C a2L,a2R,b5L,b5R,c2,c3,c1 - MUL_ADD_C a3L,a3R,b4L,b4R,c2,c3,c1 - MUL_ADD_C a4L,a4R,b3L,b3R,c2,c3,c1 - MUL_ADD_C a5L,a5R,b2L,b2R,c2,c3,c1 - MUL_ADD_C a6L,a6R,b1L,b1R,c2,c3,c1 - MUL_ADD_C a7L,a7R,b0L,b0R,c2,c3,c1 - STD c2,56(r_ptr) - COPY %r0,c2 - - MUL_ADD_C a7L,a7R,b1L,b1R,c3,c1,c2 - MUL_ADD_C a6L,a6R,b2L,b2R,c3,c1,c2 - MUL_ADD_C a5L,a5R,b3L,b3R,c3,c1,c2 - MUL_ADD_C a4L,a4R,b4L,b4R,c3,c1,c2 - MUL_ADD_C a3L,a3R,b5L,b5R,c3,c1,c2 - MUL_ADD_C a2L,a2R,b6L,b6R,c3,c1,c2 - MUL_ADD_C a1L,a1R,b7L,b7R,c3,c1,c2 - STD c3,64(r_ptr) - COPY %r0,c3 - - MUL_ADD_C a2L,a2R,b7L,b7R,c1,c2,c3 - MUL_ADD_C a3L,a3R,b6L,b6R,c1,c2,c3 - MUL_ADD_C a4L,a4R,b5L,b5R,c1,c2,c3 - MUL_ADD_C a5L,a5R,b4L,b4R,c1,c2,c3 - MUL_ADD_C a6L,a6R,b3L,b3R,c1,c2,c3 - MUL_ADD_C a7L,a7R,b2L,b2R,c1,c2,c3 - STD c1,72(r_ptr) - COPY %r0,c1 - - MUL_ADD_C a7L,a7R,b3L,b3R,c2,c3,c1 - MUL_ADD_C a6L,a6R,b4L,b4R,c2,c3,c1 - MUL_ADD_C a5L,a5R,b5L,b5R,c2,c3,c1 - MUL_ADD_C a4L,a4R,b6L,b6R,c2,c3,c1 - MUL_ADD_C a3L,a3R,b7L,b7R,c2,c3,c1 - STD c2,80(r_ptr) - COPY %r0,c2 - - MUL_ADD_C a4L,a4R,b7L,b7R,c3,c1,c2 - MUL_ADD_C a5L,a5R,b6L,b6R,c3,c1,c2 - MUL_ADD_C a6L,a6R,b5L,b5R,c3,c1,c2 - MUL_ADD_C a7L,a7R,b4L,b4R,c3,c1,c2 - STD c3,88(r_ptr) - COPY %r0,c3 - - MUL_ADD_C a7L,a7R,b5L,b5R,c1,c2,c3 - MUL_ADD_C a6L,a6R,b6L,b6R,c1,c2,c3 - MUL_ADD_C a5L,a5R,b7L,b7R,c1,c2,c3 - STD c1,96(r_ptr) - COPY %r0,c1 - - MUL_ADD_C a6L,a6R,b7L,b7R,c2,c3,c1 - MUL_ADD_C a7L,a7R,b6L,b6R,c2,c3,c1 - STD c2,104(r_ptr) - COPY %r0,c2 - - MUL_ADD_C a7L,a7R,b7L,b7R,c3,c1,c2 - STD c3,112(r_ptr) - STD c1,120(r_ptr) - - .EXIT - FLDD -88(%sp),%fr13 - FLDD -96(%sp),%fr12 - LDD -104(%sp),%r6 ; restore r6 - LDD -112(%sp),%r5 ; restore r5 - LDD -120(%sp),%r4 ; restore r4 - BVE (%rp) - LDD,MB -128(%sp),%r3 - - .PROCEND - -;----------------------------------------------------------------------------- -; -;void bn_mul_comba4(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b) -; arg0 = r_ptr -; arg1 = a_ptr -; arg2 = b_ptr -; - -bn_mul_comba4 - .proc - .callinfo FRAME=128,ENTRY_GR=%r3,ARGS_SAVED,ORDERING_AWARE - .EXPORT bn_mul_comba4,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN - .entry - .align 64 - - STD %r3,0(%sp) ; save r3 - STD %r4,8(%sp) ; save r4 - STD %r5,16(%sp) ; save r5 - STD %r6,24(%sp) ; save r6 - FSTD %fr12,32(%sp) ; save r6 - FSTD %fr13,40(%sp) ; save r7 - - ; - ; Zero out carries - ; - COPY %r0,c1 - COPY %r0,c2 - COPY %r0,c3 - - LDO 128(%sp),%sp ; bump stack - DEPDI,Z 1,31,1,high_one ; Create Value 1 << 32 - - ; - ; Load up all of the values we are going to use - ; - FLDD 0(a_ptr),a0 - FLDD 8(a_ptr),a1 - FLDD 16(a_ptr),a2 - FLDD 24(a_ptr),a3 - - FLDD 0(b_ptr),b0 - FLDD 8(b_ptr),b1 - FLDD 16(b_ptr),b2 - FLDD 24(b_ptr),b3 - - MUL_ADD_C a0L,a0R,b0L,b0R,c1,c2,c3 - STD c1,0(r_ptr) - COPY %r0,c1 - - MUL_ADD_C a0L,a0R,b1L,b1R,c2,c3,c1 - MUL_ADD_C a1L,a1R,b0L,b0R,c2,c3,c1 - STD c2,8(r_ptr) - COPY %r0,c2 - - MUL_ADD_C a2L,a2R,b0L,b0R,c3,c1,c2 - MUL_ADD_C a1L,a1R,b1L,b1R,c3,c1,c2 - MUL_ADD_C a0L,a0R,b2L,b2R,c3,c1,c2 - STD c3,16(r_ptr) - COPY %r0,c3 - - MUL_ADD_C a0L,a0R,b3L,b3R,c1,c2,c3 - MUL_ADD_C a1L,a1R,b2L,b2R,c1,c2,c3 - MUL_ADD_C a2L,a2R,b1L,b1R,c1,c2,c3 - MUL_ADD_C a3L,a3R,b0L,b0R,c1,c2,c3 - STD c1,24(r_ptr) - COPY %r0,c1 - - MUL_ADD_C a3L,a3R,b1L,b1R,c2,c3,c1 - MUL_ADD_C a2L,a2R,b2L,b2R,c2,c3,c1 - MUL_ADD_C a1L,a1R,b3L,b3R,c2,c3,c1 - STD c2,32(r_ptr) - COPY %r0,c2 - - MUL_ADD_C a2L,a2R,b3L,b3R,c3,c1,c2 - MUL_ADD_C a3L,a3R,b2L,b2R,c3,c1,c2 - STD c3,40(r_ptr) - COPY %r0,c3 - - MUL_ADD_C a3L,a3R,b3L,b3R,c1,c2,c3 - STD c1,48(r_ptr) - STD c2,56(r_ptr) - - .EXIT - FLDD -88(%sp),%fr13 - FLDD -96(%sp),%fr12 - LDD -104(%sp),%r6 ; restore r6 - LDD -112(%sp),%r5 ; restore r5 - LDD -120(%sp),%r4 ; restore r4 - BVE (%rp) - LDD,MB -128(%sp),%r3 - - .PROCEND - - - .SPACE $TEXT$ - .SUBSPA $CODE$ - .SPACE $PRIVATE$,SORT=16 - .IMPORT $global$,DATA - .SPACE $TEXT$ - .SUBSPA $CODE$ - .SUBSPA $LIT$,ACCESS=0x2c -C$4 - .ALIGN 8 - .STRINGZ "Division would overflow (%d)\n" - .END diff --git a/crypto/bn/asm/parisc-mont.pl b/crypto/bn/asm/parisc-mont.pl index c02ef6f01466..aa9f626ed267 100755 --- a/crypto/bn/asm/parisc-mont.pl +++ b/crypto/bn/asm/parisc-mont.pl @@ -1,7 +1,14 @@ -#!/usr/bin/env perl +#! /usr/bin/env perl +# Copyright 2009-2018 The OpenSSL Project Authors. All Rights Reserved. +# +# Licensed under the OpenSSL license (the "License"). You may not use +# this file except in compliance with the License. You can obtain a copy +# in the file LICENSE in the source distribution or at +# https://www.openssl.org/source/license.html + # ==================================================================== -# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL +# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL # project. The module is, however, dual licensed under OpenSSL and # CRYPTOGAMS licenses depending on where you obtain it. For further # details see http://www.openssl.org/~appro/cryptogams/. @@ -14,7 +21,7 @@ # optimal in respect to instruction set capabilities. Fair comparison # with vendor compiler is problematic, because OpenSSL doesn't define # BN_LLONG [presumably] for historical reasons, which drives compiler -# toward 4 times 16x16=32-bit multiplicatons [plus complementary +# toward 4 times 16x16=32-bit multiplications [plus complementary # shifts and additions] instead. This means that you should observe # several times improvement over code generated by vendor compiler # for PA-RISC 1.1, but the "baseline" is far from optimal. The actual @@ -126,7 +133,7 @@ $fp="%r3"; $hi1="%r2"; $hi0="%r1"; -$xfer=$n0; # accomodates [-16..15] offset in fld[dw]s +$xfer=$n0; # accommodates [-16..15] offset in fld[dw]s $fm0="%fr4"; $fti=$fm0; $fbi="%fr5L"; @@ -510,7 +517,6 @@ L\$sub stws,ma $hi1,4($rp) subb $ti0,%r0,$hi1 - ldo -4($tp),$tp ___ $code.=<<___ if ($BN_SZ==8); ldd,ma 8($tp),$ti0 @@ -525,21 +531,19 @@ L\$sub extrd,u $ti0,31,32,$ti0 ; carry in flipped word order sub,db $ti0,%r0,$hi1 - ldo -8($tp),$tp ___ $code.=<<___; - and $tp,$hi1,$ap - andcm $rp,$hi1,$bp - or $ap,$bp,$np - + ldo `$LOCALS+32`($fp),$tp sub $rp,$arrsz,$rp ; rewind rp subi 0,$arrsz,$idx - ldo `$LOCALS+32`($fp),$tp L\$copy - ldd $idx($np),$hi0 + ldd 0($tp),$ti0 + ldd 0($rp),$hi0 std,ma %r0,8($tp) - addib,<> 8,$idx,.-8 ; L\$copy - std,ma $hi0,8($rp) + comiclr,= 0,$hi1,%r0 + copy $ti0,$hi0 + addib,<> 8,$idx,L\$copy + std,ma $hi0,8($rp) ___ if ($BN_SZ==4) { # PA-RISC 1.1 code-path @@ -849,19 +853,18 @@ L\$sub_pa11 stws,ma $hi1,4($rp) subb $ti0,%r0,$hi1 - ldo -4($tp),$tp - and $tp,$hi1,$ap - andcm $rp,$hi1,$bp - or $ap,$bp,$np + ldo `$LOCALS+32`($fp),$tp sub $rp,$arrsz,$rp ; rewind rp subi 0,$arrsz,$idx - ldo `$LOCALS+32`($fp),$tp L\$copy_pa11 - ldwx $idx($np),$hi0 + ldw 0($tp),$ti0 + ldw 0($rp),$hi0 stws,ma %r0,4($tp) + comiclr,= 0,$hi1,%r0 + copy $ti0,$hi0 addib,<> 4,$idx,L\$copy_pa11 - stws,ma $hi0,4($rp) + stws,ma $hi0,4($rp) nop ; alignment L\$done @@ -981,6 +984,11 @@ sub assemble { ref($opcode) eq 'CODE' ? &$opcode($mod,$args) : "\t$mnemonic$mod\t$args"; } +if (`$ENV{CC} -Wa,-v -c -o /dev/null -x assembler /dev/null 2>&1` + =~ /GNU assembler/) { + $gnuas = 1; +} + foreach (split("\n",$code)) { s/\`([^\`]*)\`/eval $1/ge; # flip word order in 64-bit mode... @@ -988,7 +996,10 @@ foreach (split("\n",$code)) { # assemble 2.0 instructions in 32-bit mode... s/^\s+([a-z]+)([\S]*)\s+([\S]*)/&assemble($1,$2,$3)/e if ($BN_SZ==4); - s/\bbv\b/bve/gm if ($SIZE_T==8); + s/(\.LEVEL\s+2\.0)W/$1w/ if ($gnuas && $SIZE_T==8); + s/\.SPACE\s+\$TEXT\$/.text/ if ($gnuas && $SIZE_T==8); + s/\.SUBSPA.*// if ($gnuas && $SIZE_T==8); + s/\bbv\b/bve/ if ($SIZE_T==8); print $_,"\n"; } diff --git a/crypto/bn/asm/ppc-mont.pl b/crypto/bn/asm/ppc-mont.pl index 6930a3acebd2..ec7e019a4380 100755 --- a/crypto/bn/asm/ppc-mont.pl +++ b/crypto/bn/asm/ppc-mont.pl @@ -1,7 +1,14 @@ -#!/usr/bin/env perl +#! /usr/bin/env perl +# Copyright 2006-2018 The OpenSSL Project Authors. All Rights Reserved. +# +# Licensed under the OpenSSL license (the "License"). You may not use +# this file except in compliance with the License. You can obtain a copy +# in the file LICENSE in the source distribution or at +# https://www.openssl.org/source/license.html + # ==================================================================== -# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL +# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL # project. The module is, however, dual licensed under OpenSSL and # CRYPTOGAMS licenses depending on where you obtain it. For further # details see http://www.openssl.org/~appro/cryptogams/. @@ -19,11 +26,21 @@ # So far RSA *sign* performance improvement over pre-bn_mul_mont asm # for 64-bit application running on PPC970/G5 is: # -# 512-bit +65% +# 512-bit +65% # 1024-bit +35% # 2048-bit +18% # 4096-bit +4% +# September 2016 +# +# Add multiplication procedure operating on lengths divisible by 4 +# and squaring procedure operating on lengths divisible by 8. Length +# is expressed in number of limbs. RSA private key operations are +# ~35-50% faster (more for longer keys) on contemporary high-end POWER +# processors in 64-bit builds, [mysteriously enough] more in 32-bit +# builds. On low-end 32-bit processors performance improvement turned +# to be marginal... + $flavour = shift; if ($flavour =~ /32/) { @@ -42,7 +59,8 @@ if ($flavour =~ /32/) { $UMULL= "mullw"; # unsigned multiply low $UMULH= "mulhwu"; # unsigned multiply high $UCMP= "cmplw"; # unsigned compare - $SHRI= "srwi"; # unsigned shift right by immediate + $SHRI= "srwi"; # unsigned shift right by immediate + $SHLI= "slwi"; # unsigned shift left by immediate $PUSH= $ST; $POP= $LD; } elsif ($flavour =~ /64/) { @@ -62,7 +80,8 @@ if ($flavour =~ /32/) { $UMULL= "mulld"; # unsigned multiply low $UMULH= "mulhdu"; # unsigned multiply high $UCMP= "cmpld"; # unsigned compare - $SHRI= "srdi"; # unsigned shift right by immediate + $SHRI= "srdi"; # unsigned shift right by immediate + $SHLI= "sldi"; # unsigned shift left by immediate $PUSH= $ST; $POP= $LD; } else { die "nonsense $flavour"; } @@ -79,43 +98,44 @@ open STDOUT,"| $^X $xlate $flavour ".shift || die "can't call $xlate: $!"; $sp="r1"; $toc="r2"; -$rp="r3"; $ovf="r3"; +$rp="r3"; $ap="r4"; $bp="r5"; $np="r6"; $n0="r7"; $num="r8"; -$rp="r9"; # $rp is reassigned -$aj="r10"; -$nj="r11"; -$tj="r12"; + +{ +my $ovf=$rp; +my $rp="r9"; # $rp is reassigned +my $aj="r10"; +my $nj="r11"; +my $tj="r12"; # non-volatile registers -$i="r20"; -$j="r21"; -$tp="r22"; -$m0="r23"; -$m1="r24"; -$lo0="r25"; -$hi0="r26"; -$lo1="r27"; -$hi1="r28"; -$alo="r29"; -$ahi="r30"; -$nlo="r31"; +my $i="r20"; +my $j="r21"; +my $tp="r22"; +my $m0="r23"; +my $m1="r24"; +my $lo0="r25"; +my $hi0="r26"; +my $lo1="r27"; +my $hi1="r28"; +my $alo="r29"; +my $ahi="r30"; +my $nlo="r31"; # -$nhi="r0"; +my $nhi="r0"; $code=<<___; .machine "any" .text .globl .bn_mul_mont_int -.align 4 +.align 5 .bn_mul_mont_int: - cmpwi $num,4 mr $rp,r3 ; $rp is reassigned li r3,0 - bltlr ___ $code.=<<___ if ($BNSZ==4); cmpwi $num,32 ; longer key performance is not better @@ -294,15 +314,16 @@ Lsub: $LDX $tj,$tp,$j li $j,0 mtctr $num subfe $ovf,$j,$ovf ; handle upmost overflow bit - and $ap,$tp,$ovf - andc $np,$rp,$ovf - or $ap,$ap,$np ; ap=borrow?tp:rp .align 4 -Lcopy: ; copy or in-place refresh - $LDX $tj,$ap,$j - $STX $tj,$rp,$j +Lcopy: ; conditional copy + $LDX $tj,$tp,$j + $LDX $aj,$rp,$j + and $tj,$tj,$ovf + andc $aj,$aj,$ovf $STX $j,$tp,$j ; zap at once + or $aj,$aj,$tj + $STX $aj,$rp,$j addi $j,$j,$BNSZ bdnz Lcopy @@ -326,7 +347,1641 @@ Lcopy: ; copy or in-place refresh .byte 0,12,4,0,0x80,12,6,0 .long 0 .size .bn_mul_mont_int,.-.bn_mul_mont_int +___ +} +if (1) { +my ($a0,$a1,$a2,$a3, + $t0,$t1,$t2,$t3, + $m0,$m1,$m2,$m3, + $acc0,$acc1,$acc2,$acc3,$acc4, + $bi,$mi,$tp,$ap_end,$cnt) = map("r$_",(9..12,14..31)); +my ($carry,$zero) = ($rp,"r0"); + +# sp----------->+-------------------------------+ +# | saved sp | +# +-------------------------------+ +# . . +# +8*size_t +-------------------------------+ +# | 4 "n0*t0" | +# . . +# . . +# +12*size_t +-------------------------------+ +# | size_t tmp[num] | +# . . +# . . +# . . +# +-------------------------------+ +# | topmost carry | +# . . +# -18*size_t +-------------------------------+ +# | 18 saved gpr, r14-r31 | +# . . +# . . +# +-------------------------------+ +$code.=<<___; +.globl .bn_mul4x_mont_int +.align 5 +.bn_mul4x_mont_int: + andi. r0,$num,7 + bne .Lmul4x_do + $UCMP $ap,$bp + bne .Lmul4x_do + b .Lsqr8x_do +.Lmul4x_do: + slwi $num,$num,`log($SIZE_T)/log(2)` + mr $a0,$sp + li $a1,-32*$SIZE_T + sub $a1,$a1,$num + $STUX $sp,$sp,$a1 # alloca + + $PUSH r14,-$SIZE_T*18($a0) + $PUSH r15,-$SIZE_T*17($a0) + $PUSH r16,-$SIZE_T*16($a0) + $PUSH r17,-$SIZE_T*15($a0) + $PUSH r18,-$SIZE_T*14($a0) + $PUSH r19,-$SIZE_T*13($a0) + $PUSH r20,-$SIZE_T*12($a0) + $PUSH r21,-$SIZE_T*11($a0) + $PUSH r22,-$SIZE_T*10($a0) + $PUSH r23,-$SIZE_T*9($a0) + $PUSH r24,-$SIZE_T*8($a0) + $PUSH r25,-$SIZE_T*7($a0) + $PUSH r26,-$SIZE_T*6($a0) + $PUSH r27,-$SIZE_T*5($a0) + $PUSH r28,-$SIZE_T*4($a0) + $PUSH r29,-$SIZE_T*3($a0) + $PUSH r30,-$SIZE_T*2($a0) + $PUSH r31,-$SIZE_T*1($a0) + + subi $ap,$ap,$SIZE_T # bias by -1 + subi $np,$np,$SIZE_T # bias by -1 + subi $rp,$rp,$SIZE_T # bias by -1 + $LD $n0,0($n0) # *n0 + + add $t0,$bp,$num + add $ap_end,$ap,$num + subi $t0,$t0,$SIZE_T*4 # &b[num-4] + + $LD $bi,$SIZE_T*0($bp) # b[0] + li $acc0,0 + $LD $a0,$SIZE_T*1($ap) # a[0..3] + li $acc1,0 + $LD $a1,$SIZE_T*2($ap) + li $acc2,0 + $LD $a2,$SIZE_T*3($ap) + li $acc3,0 + $LDU $a3,$SIZE_T*4($ap) + $LD $m0,$SIZE_T*1($np) # n[0..3] + $LD $m1,$SIZE_T*2($np) + $LD $m2,$SIZE_T*3($np) + $LDU $m3,$SIZE_T*4($np) + + $PUSH $rp,$SIZE_T*6($sp) # offload rp and &b[num-4] + $PUSH $t0,$SIZE_T*7($sp) + li $carry,0 + addic $tp,$sp,$SIZE_T*7 # &t[-1], clear carry bit + li $cnt,0 + li $zero,0 + b .Loop_mul4x_1st_reduction + +.align 5 +.Loop_mul4x_1st_reduction: + $UMULL $t0,$a0,$bi # lo(a[0..3]*b[0]) + addze $carry,$carry # modulo-scheduled + $UMULL $t1,$a1,$bi + addi $cnt,$cnt,$SIZE_T + $UMULL $t2,$a2,$bi + andi. $cnt,$cnt,$SIZE_T*4-1 + $UMULL $t3,$a3,$bi + addc $acc0,$acc0,$t0 + $UMULH $t0,$a0,$bi # hi(a[0..3]*b[0]) + adde $acc1,$acc1,$t1 + $UMULH $t1,$a1,$bi + adde $acc2,$acc2,$t2 + $UMULL $mi,$acc0,$n0 # t[0]*n0 + adde $acc3,$acc3,$t3 + $UMULH $t2,$a2,$bi + addze $acc4,$zero + $UMULH $t3,$a3,$bi + $LDX $bi,$bp,$cnt # next b[i] (or b[0]) + addc $acc1,$acc1,$t0 + # (*) mul $t0,$m0,$mi # lo(n[0..3]*t[0]*n0) + $STU $mi,$SIZE_T($tp) # put aside t[0]*n0 for tail processing + adde $acc2,$acc2,$t1 + $UMULL $t1,$m1,$mi + adde $acc3,$acc3,$t2 + $UMULL $t2,$m2,$mi + adde $acc4,$acc4,$t3 # can't overflow + $UMULL $t3,$m3,$mi + # (*) addc $acc0,$acc0,$t0 + # (*) As for removal of first multiplication and addition + # instructions. The outcome of first addition is + # guaranteed to be zero, which leaves two computationally + # significant outcomes: it either carries or not. Then + # question is when does it carry? Is there alternative + # way to deduce it? If you follow operations, you can + # observe that condition for carry is quite simple: + # $acc0 being non-zero. So that carry can be calculated + # by adding -1 to $acc0. That's what next instruction does. + addic $acc0,$acc0,-1 # (*), discarded + $UMULH $t0,$m0,$mi # hi(n[0..3]*t[0]*n0) + adde $acc0,$acc1,$t1 + $UMULH $t1,$m1,$mi + adde $acc1,$acc2,$t2 + $UMULH $t2,$m2,$mi + adde $acc2,$acc3,$t3 + $UMULH $t3,$m3,$mi + adde $acc3,$acc4,$carry + addze $carry,$zero + addc $acc0,$acc0,$t0 + adde $acc1,$acc1,$t1 + adde $acc2,$acc2,$t2 + adde $acc3,$acc3,$t3 + #addze $carry,$carry + bne .Loop_mul4x_1st_reduction + + $UCMP $ap_end,$ap + beq .Lmul4x4_post_condition + + $LD $a0,$SIZE_T*1($ap) # a[4..7] + $LD $a1,$SIZE_T*2($ap) + $LD $a2,$SIZE_T*3($ap) + $LDU $a3,$SIZE_T*4($ap) + $LD $mi,$SIZE_T*8($sp) # a[0]*n0 + $LD $m0,$SIZE_T*1($np) # n[4..7] + $LD $m1,$SIZE_T*2($np) + $LD $m2,$SIZE_T*3($np) + $LDU $m3,$SIZE_T*4($np) + b .Loop_mul4x_1st_tail + +.align 5 +.Loop_mul4x_1st_tail: + $UMULL $t0,$a0,$bi # lo(a[4..7]*b[i]) + addze $carry,$carry # modulo-scheduled + $UMULL $t1,$a1,$bi + addi $cnt,$cnt,$SIZE_T + $UMULL $t2,$a2,$bi + andi. $cnt,$cnt,$SIZE_T*4-1 + $UMULL $t3,$a3,$bi + addc $acc0,$acc0,$t0 + $UMULH $t0,$a0,$bi # hi(a[4..7]*b[i]) + adde $acc1,$acc1,$t1 + $UMULH $t1,$a1,$bi + adde $acc2,$acc2,$t2 + $UMULH $t2,$a2,$bi + adde $acc3,$acc3,$t3 + $UMULH $t3,$a3,$bi + addze $acc4,$zero + $LDX $bi,$bp,$cnt # next b[i] (or b[0]) + addc $acc1,$acc1,$t0 + $UMULL $t0,$m0,$mi # lo(n[4..7]*a[0]*n0) + adde $acc2,$acc2,$t1 + $UMULL $t1,$m1,$mi + adde $acc3,$acc3,$t2 + $UMULL $t2,$m2,$mi + adde $acc4,$acc4,$t3 # can't overflow + $UMULL $t3,$m3,$mi + addc $acc0,$acc0,$t0 + $UMULH $t0,$m0,$mi # hi(n[4..7]*a[0]*n0) + adde $acc1,$acc1,$t1 + $UMULH $t1,$m1,$mi + adde $acc2,$acc2,$t2 + $UMULH $t2,$m2,$mi + adde $acc3,$acc3,$t3 + adde $acc4,$acc4,$carry + $UMULH $t3,$m3,$mi + addze $carry,$zero + addi $mi,$sp,$SIZE_T*8 + $LDX $mi,$mi,$cnt # next t[0]*n0 + $STU $acc0,$SIZE_T($tp) # word of result + addc $acc0,$acc1,$t0 + adde $acc1,$acc2,$t1 + adde $acc2,$acc3,$t2 + adde $acc3,$acc4,$t3 + #addze $carry,$carry + bne .Loop_mul4x_1st_tail + + sub $t1,$ap_end,$num # rewinded $ap + $UCMP $ap_end,$ap # done yet? + beq .Lmul4x_proceed + + $LD $a0,$SIZE_T*1($ap) + $LD $a1,$SIZE_T*2($ap) + $LD $a2,$SIZE_T*3($ap) + $LDU $a3,$SIZE_T*4($ap) + $LD $m0,$SIZE_T*1($np) + $LD $m1,$SIZE_T*2($np) + $LD $m2,$SIZE_T*3($np) + $LDU $m3,$SIZE_T*4($np) + b .Loop_mul4x_1st_tail + +.align 5 +.Lmul4x_proceed: + $LDU $bi,$SIZE_T*4($bp) # *++b + addze $carry,$carry # topmost carry + $LD $a0,$SIZE_T*1($t1) + $LD $a1,$SIZE_T*2($t1) + $LD $a2,$SIZE_T*3($t1) + $LD $a3,$SIZE_T*4($t1) + addi $ap,$t1,$SIZE_T*4 + sub $np,$np,$num # rewind np + + $ST $acc0,$SIZE_T*1($tp) # result + $ST $acc1,$SIZE_T*2($tp) + $ST $acc2,$SIZE_T*3($tp) + $ST $acc3,$SIZE_T*4($tp) + $ST $carry,$SIZE_T*5($tp) # save topmost carry + $LD $acc0,$SIZE_T*12($sp) # t[0..3] + $LD $acc1,$SIZE_T*13($sp) + $LD $acc2,$SIZE_T*14($sp) + $LD $acc3,$SIZE_T*15($sp) + + $LD $m0,$SIZE_T*1($np) # n[0..3] + $LD $m1,$SIZE_T*2($np) + $LD $m2,$SIZE_T*3($np) + $LDU $m3,$SIZE_T*4($np) + addic $tp,$sp,$SIZE_T*7 # &t[-1], clear carry bit + li $carry,0 + b .Loop_mul4x_reduction + +.align 5 +.Loop_mul4x_reduction: + $UMULL $t0,$a0,$bi # lo(a[0..3]*b[4]) + addze $carry,$carry # modulo-scheduled + $UMULL $t1,$a1,$bi + addi $cnt,$cnt,$SIZE_T + $UMULL $t2,$a2,$bi + andi. $cnt,$cnt,$SIZE_T*4-1 + $UMULL $t3,$a3,$bi + addc $acc0,$acc0,$t0 + $UMULH $t0,$a0,$bi # hi(a[0..3]*b[4]) + adde $acc1,$acc1,$t1 + $UMULH $t1,$a1,$bi + adde $acc2,$acc2,$t2 + $UMULL $mi,$acc0,$n0 # t[0]*n0 + adde $acc3,$acc3,$t3 + $UMULH $t2,$a2,$bi + addze $acc4,$zero + $UMULH $t3,$a3,$bi + $LDX $bi,$bp,$cnt # next b[i] + addc $acc1,$acc1,$t0 + # (*) mul $t0,$m0,$mi + $STU $mi,$SIZE_T($tp) # put aside t[0]*n0 for tail processing + adde $acc2,$acc2,$t1 + $UMULL $t1,$m1,$mi # lo(n[0..3]*t[0]*n0 + adde $acc3,$acc3,$t2 + $UMULL $t2,$m2,$mi + adde $acc4,$acc4,$t3 # can't overflow + $UMULL $t3,$m3,$mi + # (*) addc $acc0,$acc0,$t0 + addic $acc0,$acc0,-1 # (*), discarded + $UMULH $t0,$m0,$mi # hi(n[0..3]*t[0]*n0 + adde $acc0,$acc1,$t1 + $UMULH $t1,$m1,$mi + adde $acc1,$acc2,$t2 + $UMULH $t2,$m2,$mi + adde $acc2,$acc3,$t3 + $UMULH $t3,$m3,$mi + adde $acc3,$acc4,$carry + addze $carry,$zero + addc $acc0,$acc0,$t0 + adde $acc1,$acc1,$t1 + adde $acc2,$acc2,$t2 + adde $acc3,$acc3,$t3 + #addze $carry,$carry + bne .Loop_mul4x_reduction + + $LD $t0,$SIZE_T*5($tp) # t[4..7] + addze $carry,$carry + $LD $t1,$SIZE_T*6($tp) + $LD $t2,$SIZE_T*7($tp) + $LD $t3,$SIZE_T*8($tp) + $LD $a0,$SIZE_T*1($ap) # a[4..7] + $LD $a1,$SIZE_T*2($ap) + $LD $a2,$SIZE_T*3($ap) + $LDU $a3,$SIZE_T*4($ap) + addc $acc0,$acc0,$t0 + adde $acc1,$acc1,$t1 + adde $acc2,$acc2,$t2 + adde $acc3,$acc3,$t3 + #addze $carry,$carry + + $LD $mi,$SIZE_T*8($sp) # t[0]*n0 + $LD $m0,$SIZE_T*1($np) # n[4..7] + $LD $m1,$SIZE_T*2($np) + $LD $m2,$SIZE_T*3($np) + $LDU $m3,$SIZE_T*4($np) + b .Loop_mul4x_tail + +.align 5 +.Loop_mul4x_tail: + $UMULL $t0,$a0,$bi # lo(a[4..7]*b[4]) + addze $carry,$carry # modulo-scheduled + $UMULL $t1,$a1,$bi + addi $cnt,$cnt,$SIZE_T + $UMULL $t2,$a2,$bi + andi. $cnt,$cnt,$SIZE_T*4-1 + $UMULL $t3,$a3,$bi + addc $acc0,$acc0,$t0 + $UMULH $t0,$a0,$bi # hi(a[4..7]*b[4]) + adde $acc1,$acc1,$t1 + $UMULH $t1,$a1,$bi + adde $acc2,$acc2,$t2 + $UMULH $t2,$a2,$bi + adde $acc3,$acc3,$t3 + $UMULH $t3,$a3,$bi + addze $acc4,$zero + $LDX $bi,$bp,$cnt # next b[i] + addc $acc1,$acc1,$t0 + $UMULL $t0,$m0,$mi # lo(n[4..7]*t[0]*n0) + adde $acc2,$acc2,$t1 + $UMULL $t1,$m1,$mi + adde $acc3,$acc3,$t2 + $UMULL $t2,$m2,$mi + adde $acc4,$acc4,$t3 # can't overflow + $UMULL $t3,$m3,$mi + addc $acc0,$acc0,$t0 + $UMULH $t0,$m0,$mi # hi(n[4..7]*t[0]*n0) + adde $acc1,$acc1,$t1 + $UMULH $t1,$m1,$mi + adde $acc2,$acc2,$t2 + $UMULH $t2,$m2,$mi + adde $acc3,$acc3,$t3 + $UMULH $t3,$m3,$mi + adde $acc4,$acc4,$carry + addi $mi,$sp,$SIZE_T*8 + $LDX $mi,$mi,$cnt # next a[0]*n0 + addze $carry,$zero + $STU $acc0,$SIZE_T($tp) # word of result + addc $acc0,$acc1,$t0 + adde $acc1,$acc2,$t1 + adde $acc2,$acc3,$t2 + adde $acc3,$acc4,$t3 + #addze $carry,$carry + bne .Loop_mul4x_tail + + $LD $t0,$SIZE_T*5($tp) # next t[i] or topmost carry + sub $t1,$np,$num # rewinded np? + addze $carry,$carry + $UCMP $ap_end,$ap # done yet? + beq .Loop_mul4x_break + + $LD $t1,$SIZE_T*6($tp) + $LD $t2,$SIZE_T*7($tp) + $LD $t3,$SIZE_T*8($tp) + $LD $a0,$SIZE_T*1($ap) + $LD $a1,$SIZE_T*2($ap) + $LD $a2,$SIZE_T*3($ap) + $LDU $a3,$SIZE_T*4($ap) + addc $acc0,$acc0,$t0 + adde $acc1,$acc1,$t1 + adde $acc2,$acc2,$t2 + adde $acc3,$acc3,$t3 + #addze $carry,$carry + + $LD $m0,$SIZE_T*1($np) # n[4..7] + $LD $m1,$SIZE_T*2($np) + $LD $m2,$SIZE_T*3($np) + $LDU $m3,$SIZE_T*4($np) + b .Loop_mul4x_tail + +.align 5 +.Loop_mul4x_break: + $POP $t2,$SIZE_T*6($sp) # pull rp and &b[num-4] + $POP $t3,$SIZE_T*7($sp) + addc $a0,$acc0,$t0 # accumulate topmost carry + $LD $acc0,$SIZE_T*12($sp) # t[0..3] + addze $a1,$acc1 + $LD $acc1,$SIZE_T*13($sp) + addze $a2,$acc2 + $LD $acc2,$SIZE_T*14($sp) + addze $a3,$acc3 + $LD $acc3,$SIZE_T*15($sp) + addze $carry,$carry # topmost carry + $ST $a0,$SIZE_T*1($tp) # result + sub $ap,$ap_end,$num # rewind ap + $ST $a1,$SIZE_T*2($tp) + $ST $a2,$SIZE_T*3($tp) + $ST $a3,$SIZE_T*4($tp) + $ST $carry,$SIZE_T*5($tp) # store topmost carry + + $LD $m0,$SIZE_T*1($t1) # n[0..3] + $LD $m1,$SIZE_T*2($t1) + $LD $m2,$SIZE_T*3($t1) + $LD $m3,$SIZE_T*4($t1) + addi $np,$t1,$SIZE_T*4 + $UCMP $bp,$t3 # done yet? + beq .Lmul4x_post + + $LDU $bi,$SIZE_T*4($bp) + $LD $a0,$SIZE_T*1($ap) # a[0..3] + $LD $a1,$SIZE_T*2($ap) + $LD $a2,$SIZE_T*3($ap) + $LDU $a3,$SIZE_T*4($ap) + li $carry,0 + addic $tp,$sp,$SIZE_T*7 # &t[-1], clear carry bit + b .Loop_mul4x_reduction + +.align 5 +.Lmul4x_post: + # Final step. We see if result is larger than modulus, and + # if it is, subtract the modulus. But comparison implies + # subtraction. So we subtract modulus, see if it borrowed, + # and conditionally copy original value. + srwi $cnt,$num,`log($SIZE_T)/log(2)+2` + mr $bp,$t2 # &rp[-1] + subi $cnt,$cnt,1 + mr $ap_end,$t2 # &rp[-1] copy + subfc $t0,$m0,$acc0 + addi $tp,$sp,$SIZE_T*15 + subfe $t1,$m1,$acc1 + + mtctr $cnt +.Lmul4x_sub: + $LD $m0,$SIZE_T*1($np) + $LD $acc0,$SIZE_T*1($tp) + subfe $t2,$m2,$acc2 + $LD $m1,$SIZE_T*2($np) + $LD $acc1,$SIZE_T*2($tp) + subfe $t3,$m3,$acc3 + $LD $m2,$SIZE_T*3($np) + $LD $acc2,$SIZE_T*3($tp) + $LDU $m3,$SIZE_T*4($np) + $LDU $acc3,$SIZE_T*4($tp) + $ST $t0,$SIZE_T*1($bp) + $ST $t1,$SIZE_T*2($bp) + subfe $t0,$m0,$acc0 + $ST $t2,$SIZE_T*3($bp) + $STU $t3,$SIZE_T*4($bp) + subfe $t1,$m1,$acc1 + bdnz .Lmul4x_sub + + $LD $a0,$SIZE_T*1($ap_end) + $ST $t0,$SIZE_T*1($bp) + $LD $t0,$SIZE_T*12($sp) + subfe $t2,$m2,$acc2 + $LD $a1,$SIZE_T*2($ap_end) + $ST $t1,$SIZE_T*2($bp) + $LD $t1,$SIZE_T*13($sp) + subfe $t3,$m3,$acc3 + subfe $carry,$zero,$carry # did it borrow? + addi $tp,$sp,$SIZE_T*12 + $LD $a2,$SIZE_T*3($ap_end) + $ST $t2,$SIZE_T*3($bp) + $LD $t2,$SIZE_T*14($sp) + $LD $a3,$SIZE_T*4($ap_end) + $ST $t3,$SIZE_T*4($bp) + $LD $t3,$SIZE_T*15($sp) + + mtctr $cnt +.Lmul4x_cond_copy: + and $t0,$t0,$carry + andc $a0,$a0,$carry + $ST $zero,$SIZE_T*0($tp) # wipe stack clean + and $t1,$t1,$carry + andc $a1,$a1,$carry + $ST $zero,$SIZE_T*1($tp) + and $t2,$t2,$carry + andc $a2,$a2,$carry + $ST $zero,$SIZE_T*2($tp) + and $t3,$t3,$carry + andc $a3,$a3,$carry + $ST $zero,$SIZE_T*3($tp) + or $acc0,$t0,$a0 + $LD $a0,$SIZE_T*5($ap_end) + $LD $t0,$SIZE_T*4($tp) + or $acc1,$t1,$a1 + $LD $a1,$SIZE_T*6($ap_end) + $LD $t1,$SIZE_T*5($tp) + or $acc2,$t2,$a2 + $LD $a2,$SIZE_T*7($ap_end) + $LD $t2,$SIZE_T*6($tp) + or $acc3,$t3,$a3 + $LD $a3,$SIZE_T*8($ap_end) + $LD $t3,$SIZE_T*7($tp) + addi $tp,$tp,$SIZE_T*4 + $ST $acc0,$SIZE_T*1($ap_end) + $ST $acc1,$SIZE_T*2($ap_end) + $ST $acc2,$SIZE_T*3($ap_end) + $STU $acc3,$SIZE_T*4($ap_end) + bdnz .Lmul4x_cond_copy + + $POP $bp,0($sp) # pull saved sp + and $t0,$t0,$carry + andc $a0,$a0,$carry + $ST $zero,$SIZE_T*0($tp) + and $t1,$t1,$carry + andc $a1,$a1,$carry + $ST $zero,$SIZE_T*1($tp) + and $t2,$t2,$carry + andc $a2,$a2,$carry + $ST $zero,$SIZE_T*2($tp) + and $t3,$t3,$carry + andc $a3,$a3,$carry + $ST $zero,$SIZE_T*3($tp) + or $acc0,$t0,$a0 + or $acc1,$t1,$a1 + $ST $zero,$SIZE_T*4($tp) + or $acc2,$t2,$a2 + or $acc3,$t3,$a3 + $ST $acc0,$SIZE_T*1($ap_end) + $ST $acc1,$SIZE_T*2($ap_end) + $ST $acc2,$SIZE_T*3($ap_end) + $ST $acc3,$SIZE_T*4($ap_end) + + b .Lmul4x_done + +.align 4 +.Lmul4x4_post_condition: + $POP $ap,$SIZE_T*6($sp) # pull &rp[-1] + $POP $bp,0($sp) # pull saved sp + addze $carry,$carry # modulo-scheduled + # $acc0-3,$carry hold result, $m0-3 hold modulus + subfc $a0,$m0,$acc0 + subfe $a1,$m1,$acc1 + subfe $a2,$m2,$acc2 + subfe $a3,$m3,$acc3 + subfe $carry,$zero,$carry # did it borrow? + + and $m0,$m0,$carry + and $m1,$m1,$carry + addc $a0,$a0,$m0 + and $m2,$m2,$carry + adde $a1,$a1,$m1 + and $m3,$m3,$carry + adde $a2,$a2,$m2 + adde $a3,$a3,$m3 + + $ST $a0,$SIZE_T*1($ap) # write result + $ST $a1,$SIZE_T*2($ap) + $ST $a2,$SIZE_T*3($ap) + $ST $a3,$SIZE_T*4($ap) + +.Lmul4x_done: + $ST $zero,$SIZE_T*8($sp) # wipe stack clean + $ST $zero,$SIZE_T*9($sp) + $ST $zero,$SIZE_T*10($sp) + $ST $zero,$SIZE_T*11($sp) + li r3,1 # signal "done" + $POP r14,-$SIZE_T*18($bp) + $POP r15,-$SIZE_T*17($bp) + $POP r16,-$SIZE_T*16($bp) + $POP r17,-$SIZE_T*15($bp) + $POP r18,-$SIZE_T*14($bp) + $POP r19,-$SIZE_T*13($bp) + $POP r20,-$SIZE_T*12($bp) + $POP r21,-$SIZE_T*11($bp) + $POP r22,-$SIZE_T*10($bp) + $POP r23,-$SIZE_T*9($bp) + $POP r24,-$SIZE_T*8($bp) + $POP r25,-$SIZE_T*7($bp) + $POP r26,-$SIZE_T*6($bp) + $POP r27,-$SIZE_T*5($bp) + $POP r28,-$SIZE_T*4($bp) + $POP r29,-$SIZE_T*3($bp) + $POP r30,-$SIZE_T*2($bp) + $POP r31,-$SIZE_T*1($bp) + mr $sp,$bp + blr + .long 0 + .byte 0,12,4,0x20,0x80,18,6,0 + .long 0 +.size .bn_mul4x_mont_int,.-.bn_mul4x_mont_int +___ +} + +if (1) { +######################################################################## +# Following is PPC adaptation of sqrx8x_mont from x86_64-mont5 module. + +my ($a0,$a1,$a2,$a3,$a4,$a5,$a6,$a7)=map("r$_",(9..12,14..17)); +my ($t0,$t1,$t2,$t3)=map("r$_",(18..21)); +my ($acc0,$acc1,$acc2,$acc3,$acc4,$acc5,$acc6,$acc7)=map("r$_",(22..29)); +my ($cnt,$carry,$zero)=("r30","r31","r0"); +my ($tp,$ap_end,$na0)=($bp,$np,$carry); + +# sp----------->+-------------------------------+ +# | saved sp | +# +-------------------------------+ +# . . +# +12*size_t +-------------------------------+ +# | size_t tmp[2*num] | +# . . +# . . +# . . +# +-------------------------------+ +# . . +# -18*size_t +-------------------------------+ +# | 18 saved gpr, r14-r31 | +# . . +# . . +# +-------------------------------+ +$code.=<<___; +.align 5 +__bn_sqr8x_mont: +.Lsqr8x_do: + mr $a0,$sp + slwi $a1,$num,`log($SIZE_T)/log(2)+1` + li $a2,-32*$SIZE_T + sub $a1,$a2,$a1 + slwi $num,$num,`log($SIZE_T)/log(2)` + $STUX $sp,$sp,$a1 # alloca + + $PUSH r14,-$SIZE_T*18($a0) + $PUSH r15,-$SIZE_T*17($a0) + $PUSH r16,-$SIZE_T*16($a0) + $PUSH r17,-$SIZE_T*15($a0) + $PUSH r18,-$SIZE_T*14($a0) + $PUSH r19,-$SIZE_T*13($a0) + $PUSH r20,-$SIZE_T*12($a0) + $PUSH r21,-$SIZE_T*11($a0) + $PUSH r22,-$SIZE_T*10($a0) + $PUSH r23,-$SIZE_T*9($a0) + $PUSH r24,-$SIZE_T*8($a0) + $PUSH r25,-$SIZE_T*7($a0) + $PUSH r26,-$SIZE_T*6($a0) + $PUSH r27,-$SIZE_T*5($a0) + $PUSH r28,-$SIZE_T*4($a0) + $PUSH r29,-$SIZE_T*3($a0) + $PUSH r30,-$SIZE_T*2($a0) + $PUSH r31,-$SIZE_T*1($a0) + + subi $ap,$ap,$SIZE_T # bias by -1 + subi $t0,$np,$SIZE_T # bias by -1 + subi $rp,$rp,$SIZE_T # bias by -1 + $LD $n0,0($n0) # *n0 + li $zero,0 + + add $ap_end,$ap,$num + $LD $a0,$SIZE_T*1($ap) + #li $acc0,0 + $LD $a1,$SIZE_T*2($ap) + li $acc1,0 + $LD $a2,$SIZE_T*3($ap) + li $acc2,0 + $LD $a3,$SIZE_T*4($ap) + li $acc3,0 + $LD $a4,$SIZE_T*5($ap) + li $acc4,0 + $LD $a5,$SIZE_T*6($ap) + li $acc5,0 + $LD $a6,$SIZE_T*7($ap) + li $acc6,0 + $LDU $a7,$SIZE_T*8($ap) + li $acc7,0 + + addi $tp,$sp,$SIZE_T*11 # &tp[-1] + subic. $cnt,$num,$SIZE_T*8 + b .Lsqr8x_zero_start + +.align 5 +.Lsqr8x_zero: + subic. $cnt,$cnt,$SIZE_T*8 + $ST $zero,$SIZE_T*1($tp) + $ST $zero,$SIZE_T*2($tp) + $ST $zero,$SIZE_T*3($tp) + $ST $zero,$SIZE_T*4($tp) + $ST $zero,$SIZE_T*5($tp) + $ST $zero,$SIZE_T*6($tp) + $ST $zero,$SIZE_T*7($tp) + $ST $zero,$SIZE_T*8($tp) +.Lsqr8x_zero_start: + $ST $zero,$SIZE_T*9($tp) + $ST $zero,$SIZE_T*10($tp) + $ST $zero,$SIZE_T*11($tp) + $ST $zero,$SIZE_T*12($tp) + $ST $zero,$SIZE_T*13($tp) + $ST $zero,$SIZE_T*14($tp) + $ST $zero,$SIZE_T*15($tp) + $STU $zero,$SIZE_T*16($tp) + bne .Lsqr8x_zero + + $PUSH $rp,$SIZE_T*6($sp) # offload &rp[-1] + $PUSH $t0,$SIZE_T*7($sp) # offload &np[-1] + $PUSH $n0,$SIZE_T*8($sp) # offload n0 + $PUSH $tp,$SIZE_T*9($sp) # &tp[2*num-1] + $PUSH $zero,$SIZE_T*10($sp) # initial top-most carry + addi $tp,$sp,$SIZE_T*11 # &tp[-1] + + # Multiply everything but a[i]*a[i] +.align 5 +.Lsqr8x_outer_loop: + # a[1]a[0] (i) + # a[2]a[0] + # a[3]a[0] + # a[4]a[0] + # a[5]a[0] + # a[6]a[0] + # a[7]a[0] + # a[2]a[1] (ii) + # a[3]a[1] + # a[4]a[1] + # a[5]a[1] + # a[6]a[1] + # a[7]a[1] + # a[3]a[2] (iii) + # a[4]a[2] + # a[5]a[2] + # a[6]a[2] + # a[7]a[2] + # a[4]a[3] (iv) + # a[5]a[3] + # a[6]a[3] + # a[7]a[3] + # a[5]a[4] (v) + # a[6]a[4] + # a[7]a[4] + # a[6]a[5] (vi) + # a[7]a[5] + # a[7]a[6] (vii) + + $UMULL $t0,$a1,$a0 # lo(a[1..7]*a[0]) (i) + $UMULL $t1,$a2,$a0 + $UMULL $t2,$a3,$a0 + $UMULL $t3,$a4,$a0 + addc $acc1,$acc1,$t0 # t[1]+lo(a[1]*a[0]) + $UMULL $t0,$a5,$a0 + adde $acc2,$acc2,$t1 + $UMULL $t1,$a6,$a0 + adde $acc3,$acc3,$t2 + $UMULL $t2,$a7,$a0 + adde $acc4,$acc4,$t3 + $UMULH $t3,$a1,$a0 # hi(a[1..7]*a[0]) + adde $acc5,$acc5,$t0 + $UMULH $t0,$a2,$a0 + adde $acc6,$acc6,$t1 + $UMULH $t1,$a3,$a0 + adde $acc7,$acc7,$t2 + $UMULH $t2,$a4,$a0 + $ST $acc0,$SIZE_T*1($tp) # t[0] + addze $acc0,$zero # t[8] + $ST $acc1,$SIZE_T*2($tp) # t[1] + addc $acc2,$acc2,$t3 # t[2]+lo(a[1]*a[0]) + $UMULH $t3,$a5,$a0 + adde $acc3,$acc3,$t0 + $UMULH $t0,$a6,$a0 + adde $acc4,$acc4,$t1 + $UMULH $t1,$a7,$a0 + adde $acc5,$acc5,$t2 + $UMULL $t2,$a2,$a1 # lo(a[2..7]*a[1]) (ii) + adde $acc6,$acc6,$t3 + $UMULL $t3,$a3,$a1 + adde $acc7,$acc7,$t0 + $UMULL $t0,$a4,$a1 + adde $acc0,$acc0,$t1 + + $UMULL $t1,$a5,$a1 + addc $acc3,$acc3,$t2 + $UMULL $t2,$a6,$a1 + adde $acc4,$acc4,$t3 + $UMULL $t3,$a7,$a1 + adde $acc5,$acc5,$t0 + $UMULH $t0,$a2,$a1 # hi(a[2..7]*a[1]) + adde $acc6,$acc6,$t1 + $UMULH $t1,$a3,$a1 + adde $acc7,$acc7,$t2 + $UMULH $t2,$a4,$a1 + adde $acc0,$acc0,$t3 + $UMULH $t3,$a5,$a1 + $ST $acc2,$SIZE_T*3($tp) # t[2] + addze $acc1,$zero # t[9] + $ST $acc3,$SIZE_T*4($tp) # t[3] + addc $acc4,$acc4,$t0 + $UMULH $t0,$a6,$a1 + adde $acc5,$acc5,$t1 + $UMULH $t1,$a7,$a1 + adde $acc6,$acc6,$t2 + $UMULL $t2,$a3,$a2 # lo(a[3..7]*a[2]) (iii) + adde $acc7,$acc7,$t3 + $UMULL $t3,$a4,$a2 + adde $acc0,$acc0,$t0 + $UMULL $t0,$a5,$a2 + adde $acc1,$acc1,$t1 + + $UMULL $t1,$a6,$a2 + addc $acc5,$acc5,$t2 + $UMULL $t2,$a7,$a2 + adde $acc6,$acc6,$t3 + $UMULH $t3,$a3,$a2 # hi(a[3..7]*a[2]) + adde $acc7,$acc7,$t0 + $UMULH $t0,$a4,$a2 + adde $acc0,$acc0,$t1 + $UMULH $t1,$a5,$a2 + adde $acc1,$acc1,$t2 + $UMULH $t2,$a6,$a2 + $ST $acc4,$SIZE_T*5($tp) # t[4] + addze $acc2,$zero # t[10] + $ST $acc5,$SIZE_T*6($tp) # t[5] + addc $acc6,$acc6,$t3 + $UMULH $t3,$a7,$a2 + adde $acc7,$acc7,$t0 + $UMULL $t0,$a4,$a3 # lo(a[4..7]*a[3]) (iv) + adde $acc0,$acc0,$t1 + $UMULL $t1,$a5,$a3 + adde $acc1,$acc1,$t2 + $UMULL $t2,$a6,$a3 + adde $acc2,$acc2,$t3 + + $UMULL $t3,$a7,$a3 + addc $acc7,$acc7,$t0 + $UMULH $t0,$a4,$a3 # hi(a[4..7]*a[3]) + adde $acc0,$acc0,$t1 + $UMULH $t1,$a5,$a3 + adde $acc1,$acc1,$t2 + $UMULH $t2,$a6,$a3 + adde $acc2,$acc2,$t3 + $UMULH $t3,$a7,$a3 + $ST $acc6,$SIZE_T*7($tp) # t[6] + addze $acc3,$zero # t[11] + $STU $acc7,$SIZE_T*8($tp) # t[7] + addc $acc0,$acc0,$t0 + $UMULL $t0,$a5,$a4 # lo(a[5..7]*a[4]) (v) + adde $acc1,$acc1,$t1 + $UMULL $t1,$a6,$a4 + adde $acc2,$acc2,$t2 + $UMULL $t2,$a7,$a4 + adde $acc3,$acc3,$t3 + + $UMULH $t3,$a5,$a4 # hi(a[5..7]*a[4]) + addc $acc1,$acc1,$t0 + $UMULH $t0,$a6,$a4 + adde $acc2,$acc2,$t1 + $UMULH $t1,$a7,$a4 + adde $acc3,$acc3,$t2 + $UMULL $t2,$a6,$a5 # lo(a[6..7]*a[5]) (vi) + addze $acc4,$zero # t[12] + addc $acc2,$acc2,$t3 + $UMULL $t3,$a7,$a5 + adde $acc3,$acc3,$t0 + $UMULH $t0,$a6,$a5 # hi(a[6..7]*a[5]) + adde $acc4,$acc4,$t1 + + $UMULH $t1,$a7,$a5 + addc $acc3,$acc3,$t2 + $UMULL $t2,$a7,$a6 # lo(a[7]*a[6]) (vii) + adde $acc4,$acc4,$t3 + $UMULH $t3,$a7,$a6 # hi(a[7]*a[6]) + addze $acc5,$zero # t[13] + addc $acc4,$acc4,$t0 + $UCMP $ap_end,$ap # done yet? + adde $acc5,$acc5,$t1 + + addc $acc5,$acc5,$t2 + sub $t0,$ap_end,$num # rewinded ap + addze $acc6,$zero # t[14] + add $acc6,$acc6,$t3 + + beq .Lsqr8x_outer_break + + mr $n0,$a0 + $LD $a0,$SIZE_T*1($tp) + $LD $a1,$SIZE_T*2($tp) + $LD $a2,$SIZE_T*3($tp) + $LD $a3,$SIZE_T*4($tp) + $LD $a4,$SIZE_T*5($tp) + $LD $a5,$SIZE_T*6($tp) + $LD $a6,$SIZE_T*7($tp) + $LD $a7,$SIZE_T*8($tp) + addc $acc0,$acc0,$a0 + $LD $a0,$SIZE_T*1($ap) + adde $acc1,$acc1,$a1 + $LD $a1,$SIZE_T*2($ap) + adde $acc2,$acc2,$a2 + $LD $a2,$SIZE_T*3($ap) + adde $acc3,$acc3,$a3 + $LD $a3,$SIZE_T*4($ap) + adde $acc4,$acc4,$a4 + $LD $a4,$SIZE_T*5($ap) + adde $acc5,$acc5,$a5 + $LD $a5,$SIZE_T*6($ap) + adde $acc6,$acc6,$a6 + $LD $a6,$SIZE_T*7($ap) + subi $rp,$ap,$SIZE_T*7 + addze $acc7,$a7 + $LDU $a7,$SIZE_T*8($ap) + #addze $carry,$zero # moved below + li $cnt,0 + b .Lsqr8x_mul + + # a[8]a[0] + # a[9]a[0] + # a[a]a[0] + # a[b]a[0] + # a[c]a[0] + # a[d]a[0] + # a[e]a[0] + # a[f]a[0] + # a[8]a[1] + # a[f]a[1]........................ + # a[8]a[2] + # a[f]a[2]........................ + # a[8]a[3] + # a[f]a[3]........................ + # a[8]a[4] + # a[f]a[4]........................ + # a[8]a[5] + # a[f]a[5]........................ + # a[8]a[6] + # a[f]a[6]........................ + # a[8]a[7] + # a[f]a[7]........................ +.align 5 +.Lsqr8x_mul: + $UMULL $t0,$a0,$n0 + addze $carry,$zero # carry bit, modulo-scheduled + $UMULL $t1,$a1,$n0 + addi $cnt,$cnt,$SIZE_T + $UMULL $t2,$a2,$n0 + andi. $cnt,$cnt,$SIZE_T*8-1 + $UMULL $t3,$a3,$n0 + addc $acc0,$acc0,$t0 + $UMULL $t0,$a4,$n0 + adde $acc1,$acc1,$t1 + $UMULL $t1,$a5,$n0 + adde $acc2,$acc2,$t2 + $UMULL $t2,$a6,$n0 + adde $acc3,$acc3,$t3 + $UMULL $t3,$a7,$n0 + adde $acc4,$acc4,$t0 + $UMULH $t0,$a0,$n0 + adde $acc5,$acc5,$t1 + $UMULH $t1,$a1,$n0 + adde $acc6,$acc6,$t2 + $UMULH $t2,$a2,$n0 + adde $acc7,$acc7,$t3 + $UMULH $t3,$a3,$n0 + addze $carry,$carry + $STU $acc0,$SIZE_T($tp) + addc $acc0,$acc1,$t0 + $UMULH $t0,$a4,$n0 + adde $acc1,$acc2,$t1 + $UMULH $t1,$a5,$n0 + adde $acc2,$acc3,$t2 + $UMULH $t2,$a6,$n0 + adde $acc3,$acc4,$t3 + $UMULH $t3,$a7,$n0 + $LDX $n0,$rp,$cnt + adde $acc4,$acc5,$t0 + adde $acc5,$acc6,$t1 + adde $acc6,$acc7,$t2 + adde $acc7,$carry,$t3 + #addze $carry,$zero # moved above + bne .Lsqr8x_mul + # note that carry flag is guaranteed + # to be zero at this point + $UCMP $ap,$ap_end # done yet? + beq .Lsqr8x_break + + $LD $a0,$SIZE_T*1($tp) + $LD $a1,$SIZE_T*2($tp) + $LD $a2,$SIZE_T*3($tp) + $LD $a3,$SIZE_T*4($tp) + $LD $a4,$SIZE_T*5($tp) + $LD $a5,$SIZE_T*6($tp) + $LD $a6,$SIZE_T*7($tp) + $LD $a7,$SIZE_T*8($tp) + addc $acc0,$acc0,$a0 + $LD $a0,$SIZE_T*1($ap) + adde $acc1,$acc1,$a1 + $LD $a1,$SIZE_T*2($ap) + adde $acc2,$acc2,$a2 + $LD $a2,$SIZE_T*3($ap) + adde $acc3,$acc3,$a3 + $LD $a3,$SIZE_T*4($ap) + adde $acc4,$acc4,$a4 + $LD $a4,$SIZE_T*5($ap) + adde $acc5,$acc5,$a5 + $LD $a5,$SIZE_T*6($ap) + adde $acc6,$acc6,$a6 + $LD $a6,$SIZE_T*7($ap) + adde $acc7,$acc7,$a7 + $LDU $a7,$SIZE_T*8($ap) + #addze $carry,$zero # moved above + b .Lsqr8x_mul + +.align 5 +.Lsqr8x_break: + $LD $a0,$SIZE_T*8($rp) + addi $ap,$rp,$SIZE_T*15 + $LD $a1,$SIZE_T*9($rp) + sub. $t0,$ap_end,$ap # is it last iteration? + $LD $a2,$SIZE_T*10($rp) + sub $t1,$tp,$t0 + $LD $a3,$SIZE_T*11($rp) + $LD $a4,$SIZE_T*12($rp) + $LD $a5,$SIZE_T*13($rp) + $LD $a6,$SIZE_T*14($rp) + $LD $a7,$SIZE_T*15($rp) + beq .Lsqr8x_outer_loop + + $ST $acc0,$SIZE_T*1($tp) + $LD $acc0,$SIZE_T*1($t1) + $ST $acc1,$SIZE_T*2($tp) + $LD $acc1,$SIZE_T*2($t1) + $ST $acc2,$SIZE_T*3($tp) + $LD $acc2,$SIZE_T*3($t1) + $ST $acc3,$SIZE_T*4($tp) + $LD $acc3,$SIZE_T*4($t1) + $ST $acc4,$SIZE_T*5($tp) + $LD $acc4,$SIZE_T*5($t1) + $ST $acc5,$SIZE_T*6($tp) + $LD $acc5,$SIZE_T*6($t1) + $ST $acc6,$SIZE_T*7($tp) + $LD $acc6,$SIZE_T*7($t1) + $ST $acc7,$SIZE_T*8($tp) + $LD $acc7,$SIZE_T*8($t1) + mr $tp,$t1 + b .Lsqr8x_outer_loop + +.align 5 +.Lsqr8x_outer_break: + #################################################################### + # Now multiply above result by 2 and add a[n-1]*a[n-1]|...|a[0]*a[0] + $LD $a1,$SIZE_T*1($t0) # recall that $t0 is &a[-1] + $LD $a3,$SIZE_T*2($t0) + $LD $a5,$SIZE_T*3($t0) + $LD $a7,$SIZE_T*4($t0) + addi $ap,$t0,$SIZE_T*4 + # "tp[x]" comments are for num==8 case + $LD $t1,$SIZE_T*13($sp) # =tp[1], t[0] is not interesting + $LD $t2,$SIZE_T*14($sp) + $LD $t3,$SIZE_T*15($sp) + $LD $t0,$SIZE_T*16($sp) + + $ST $acc0,$SIZE_T*1($tp) # tp[8]= + srwi $cnt,$num,`log($SIZE_T)/log(2)+2` + $ST $acc1,$SIZE_T*2($tp) + subi $cnt,$cnt,1 + $ST $acc2,$SIZE_T*3($tp) + $ST $acc3,$SIZE_T*4($tp) + $ST $acc4,$SIZE_T*5($tp) + $ST $acc5,$SIZE_T*6($tp) + $ST $acc6,$SIZE_T*7($tp) + #$ST $acc7,$SIZE_T*8($tp) # tp[15] is not interesting + addi $tp,$sp,$SIZE_T*11 # &tp[-1] + $UMULL $acc0,$a1,$a1 + $UMULH $a1,$a1,$a1 + add $acc1,$t1,$t1 # <<1 + $SHRI $t1,$t1,$BITS-1 + $UMULL $a2,$a3,$a3 + $UMULH $a3,$a3,$a3 + addc $acc1,$acc1,$a1 + add $acc2,$t2,$t2 + $SHRI $t2,$t2,$BITS-1 + add $acc3,$t3,$t3 + $SHRI $t3,$t3,$BITS-1 + or $acc2,$acc2,$t1 + + mtctr $cnt +.Lsqr4x_shift_n_add: + $UMULL $a4,$a5,$a5 + $UMULH $a5,$a5,$a5 + $LD $t1,$SIZE_T*6($tp) # =tp[5] + $LD $a1,$SIZE_T*1($ap) + adde $acc2,$acc2,$a2 + add $acc4,$t0,$t0 + $SHRI $t0,$t0,$BITS-1 + or $acc3,$acc3,$t2 + $LD $t2,$SIZE_T*7($tp) # =tp[6] + adde $acc3,$acc3,$a3 + $LD $a3,$SIZE_T*2($ap) + add $acc5,$t1,$t1 + $SHRI $t1,$t1,$BITS-1 + or $acc4,$acc4,$t3 + $LD $t3,$SIZE_T*8($tp) # =tp[7] + $UMULL $a6,$a7,$a7 + $UMULH $a7,$a7,$a7 + adde $acc4,$acc4,$a4 + add $acc6,$t2,$t2 + $SHRI $t2,$t2,$BITS-1 + or $acc5,$acc5,$t0 + $LD $t0,$SIZE_T*9($tp) # =tp[8] + adde $acc5,$acc5,$a5 + $LD $a5,$SIZE_T*3($ap) + add $acc7,$t3,$t3 + $SHRI $t3,$t3,$BITS-1 + or $acc6,$acc6,$t1 + $LD $t1,$SIZE_T*10($tp) # =tp[9] + $UMULL $a0,$a1,$a1 + $UMULH $a1,$a1,$a1 + adde $acc6,$acc6,$a6 + $ST $acc0,$SIZE_T*1($tp) # tp[0]= + add $acc0,$t0,$t0 + $SHRI $t0,$t0,$BITS-1 + or $acc7,$acc7,$t2 + $LD $t2,$SIZE_T*11($tp) # =tp[10] + adde $acc7,$acc7,$a7 + $LDU $a7,$SIZE_T*4($ap) + $ST $acc1,$SIZE_T*2($tp) # tp[1]= + add $acc1,$t1,$t1 + $SHRI $t1,$t1,$BITS-1 + or $acc0,$acc0,$t3 + $LD $t3,$SIZE_T*12($tp) # =tp[11] + $UMULL $a2,$a3,$a3 + $UMULH $a3,$a3,$a3 + adde $acc0,$acc0,$a0 + $ST $acc2,$SIZE_T*3($tp) # tp[2]= + add $acc2,$t2,$t2 + $SHRI $t2,$t2,$BITS-1 + or $acc1,$acc1,$t0 + $LD $t0,$SIZE_T*13($tp) # =tp[12] + adde $acc1,$acc1,$a1 + $ST $acc3,$SIZE_T*4($tp) # tp[3]= + $ST $acc4,$SIZE_T*5($tp) # tp[4]= + $ST $acc5,$SIZE_T*6($tp) # tp[5]= + $ST $acc6,$SIZE_T*7($tp) # tp[6]= + $STU $acc7,$SIZE_T*8($tp) # tp[7]= + add $acc3,$t3,$t3 + $SHRI $t3,$t3,$BITS-1 + or $acc2,$acc2,$t1 + bdnz .Lsqr4x_shift_n_add +___ +my ($np,$np_end)=($ap,$ap_end); +$code.=<<___; + $POP $np,$SIZE_T*7($sp) # pull &np[-1] and n0 + $POP $n0,$SIZE_T*8($sp) + $UMULL $a4,$a5,$a5 + $UMULH $a5,$a5,$a5 + $ST $acc0,$SIZE_T*1($tp) # tp[8]= + $LD $acc0,$SIZE_T*12($sp) # =tp[0] + $LD $t1,$SIZE_T*6($tp) # =tp[13] + adde $acc2,$acc2,$a2 + add $acc4,$t0,$t0 + $SHRI $t0,$t0,$BITS-1 + or $acc3,$acc3,$t2 + $LD $t2,$SIZE_T*7($tp) # =tp[14] + adde $acc3,$acc3,$a3 + add $acc5,$t1,$t1 + $SHRI $t1,$t1,$BITS-1 + or $acc4,$acc4,$t3 + $UMULL $a6,$a7,$a7 + $UMULH $a7,$a7,$a7 + adde $acc4,$acc4,$a4 + add $acc6,$t2,$t2 + $SHRI $t2,$t2,$BITS-1 + or $acc5,$acc5,$t0 + $ST $acc1,$SIZE_T*2($tp) # tp[9]= + $LD $acc1,$SIZE_T*13($sp) # =tp[1] + adde $acc5,$acc5,$a5 + or $acc6,$acc6,$t1 + $LD $a0,$SIZE_T*1($np) + $LD $a1,$SIZE_T*2($np) + adde $acc6,$acc6,$a6 + $LD $a2,$SIZE_T*3($np) + $LD $a3,$SIZE_T*4($np) + adde $acc7,$a7,$t2 + $LD $a4,$SIZE_T*5($np) + $LD $a5,$SIZE_T*6($np) + + ################################################################ + # Reduce by 8 limbs per iteration + $UMULL $na0,$n0,$acc0 # t[0]*n0 + li $cnt,8 + $LD $a6,$SIZE_T*7($np) + add $np_end,$np,$num + $LDU $a7,$SIZE_T*8($np) + $ST $acc2,$SIZE_T*3($tp) # tp[10]= + $LD $acc2,$SIZE_T*14($sp) + $ST $acc3,$SIZE_T*4($tp) # tp[11]= + $LD $acc3,$SIZE_T*15($sp) + $ST $acc4,$SIZE_T*5($tp) # tp[12]= + $LD $acc4,$SIZE_T*16($sp) + $ST $acc5,$SIZE_T*6($tp) # tp[13]= + $LD $acc5,$SIZE_T*17($sp) + $ST $acc6,$SIZE_T*7($tp) # tp[14]= + $LD $acc6,$SIZE_T*18($sp) + $ST $acc7,$SIZE_T*8($tp) # tp[15]= + $LD $acc7,$SIZE_T*19($sp) + addi $tp,$sp,$SIZE_T*11 # &tp[-1] + mtctr $cnt + b .Lsqr8x_reduction + +.align 5 +.Lsqr8x_reduction: + # (*) $UMULL $t0,$a0,$na0 # lo(n[0-7])*lo(t[0]*n0) + $UMULL $t1,$a1,$na0 + $UMULL $t2,$a2,$na0 + $STU $na0,$SIZE_T($tp) # put aside t[0]*n0 for tail processing + $UMULL $t3,$a3,$na0 + # (*) addc $acc0,$acc0,$t0 + addic $acc0,$acc0,-1 # (*) + $UMULL $t0,$a4,$na0 + adde $acc0,$acc1,$t1 + $UMULL $t1,$a5,$na0 + adde $acc1,$acc2,$t2 + $UMULL $t2,$a6,$na0 + adde $acc2,$acc3,$t3 + $UMULL $t3,$a7,$na0 + adde $acc3,$acc4,$t0 + $UMULH $t0,$a0,$na0 # hi(n[0-7])*lo(t[0]*n0) + adde $acc4,$acc5,$t1 + $UMULH $t1,$a1,$na0 + adde $acc5,$acc6,$t2 + $UMULH $t2,$a2,$na0 + adde $acc6,$acc7,$t3 + $UMULH $t3,$a3,$na0 + addze $acc7,$zero + addc $acc0,$acc0,$t0 + $UMULH $t0,$a4,$na0 + adde $acc1,$acc1,$t1 + $UMULH $t1,$a5,$na0 + adde $acc2,$acc2,$t2 + $UMULH $t2,$a6,$na0 + adde $acc3,$acc3,$t3 + $UMULH $t3,$a7,$na0 + $UMULL $na0,$n0,$acc0 # next t[0]*n0 + adde $acc4,$acc4,$t0 + adde $acc5,$acc5,$t1 + adde $acc6,$acc6,$t2 + adde $acc7,$acc7,$t3 + bdnz .Lsqr8x_reduction + + $LD $t0,$SIZE_T*1($tp) + $LD $t1,$SIZE_T*2($tp) + $LD $t2,$SIZE_T*3($tp) + $LD $t3,$SIZE_T*4($tp) + subi $rp,$tp,$SIZE_T*7 + $UCMP $np_end,$np # done yet? + addc $acc0,$acc0,$t0 + $LD $t0,$SIZE_T*5($tp) + adde $acc1,$acc1,$t1 + $LD $t1,$SIZE_T*6($tp) + adde $acc2,$acc2,$t2 + $LD $t2,$SIZE_T*7($tp) + adde $acc3,$acc3,$t3 + $LD $t3,$SIZE_T*8($tp) + adde $acc4,$acc4,$t0 + adde $acc5,$acc5,$t1 + adde $acc6,$acc6,$t2 + adde $acc7,$acc7,$t3 + #addze $carry,$zero # moved below + beq .Lsqr8x8_post_condition + + $LD $n0,$SIZE_T*0($rp) + $LD $a0,$SIZE_T*1($np) + $LD $a1,$SIZE_T*2($np) + $LD $a2,$SIZE_T*3($np) + $LD $a3,$SIZE_T*4($np) + $LD $a4,$SIZE_T*5($np) + $LD $a5,$SIZE_T*6($np) + $LD $a6,$SIZE_T*7($np) + $LDU $a7,$SIZE_T*8($np) + li $cnt,0 + +.align 5 +.Lsqr8x_tail: + $UMULL $t0,$a0,$n0 + addze $carry,$zero # carry bit, modulo-scheduled + $UMULL $t1,$a1,$n0 + addi $cnt,$cnt,$SIZE_T + $UMULL $t2,$a2,$n0 + andi. $cnt,$cnt,$SIZE_T*8-1 + $UMULL $t3,$a3,$n0 + addc $acc0,$acc0,$t0 + $UMULL $t0,$a4,$n0 + adde $acc1,$acc1,$t1 + $UMULL $t1,$a5,$n0 + adde $acc2,$acc2,$t2 + $UMULL $t2,$a6,$n0 + adde $acc3,$acc3,$t3 + $UMULL $t3,$a7,$n0 + adde $acc4,$acc4,$t0 + $UMULH $t0,$a0,$n0 + adde $acc5,$acc5,$t1 + $UMULH $t1,$a1,$n0 + adde $acc6,$acc6,$t2 + $UMULH $t2,$a2,$n0 + adde $acc7,$acc7,$t3 + $UMULH $t3,$a3,$n0 + addze $carry,$carry + $STU $acc0,$SIZE_T($tp) + addc $acc0,$acc1,$t0 + $UMULH $t0,$a4,$n0 + adde $acc1,$acc2,$t1 + $UMULH $t1,$a5,$n0 + adde $acc2,$acc3,$t2 + $UMULH $t2,$a6,$n0 + adde $acc3,$acc4,$t3 + $UMULH $t3,$a7,$n0 + $LDX $n0,$rp,$cnt + adde $acc4,$acc5,$t0 + adde $acc5,$acc6,$t1 + adde $acc6,$acc7,$t2 + adde $acc7,$carry,$t3 + #addze $carry,$zero # moved above + bne .Lsqr8x_tail + # note that carry flag is guaranteed + # to be zero at this point + $LD $a0,$SIZE_T*1($tp) + $POP $carry,$SIZE_T*10($sp) # pull top-most carry in case we break + $UCMP $np_end,$np # done yet? + $LD $a1,$SIZE_T*2($tp) + sub $t2,$np_end,$num # rewinded np + $LD $a2,$SIZE_T*3($tp) + $LD $a3,$SIZE_T*4($tp) + $LD $a4,$SIZE_T*5($tp) + $LD $a5,$SIZE_T*6($tp) + $LD $a6,$SIZE_T*7($tp) + $LD $a7,$SIZE_T*8($tp) + beq .Lsqr8x_tail_break + + addc $acc0,$acc0,$a0 + $LD $a0,$SIZE_T*1($np) + adde $acc1,$acc1,$a1 + $LD $a1,$SIZE_T*2($np) + adde $acc2,$acc2,$a2 + $LD $a2,$SIZE_T*3($np) + adde $acc3,$acc3,$a3 + $LD $a3,$SIZE_T*4($np) + adde $acc4,$acc4,$a4 + $LD $a4,$SIZE_T*5($np) + adde $acc5,$acc5,$a5 + $LD $a5,$SIZE_T*6($np) + adde $acc6,$acc6,$a6 + $LD $a6,$SIZE_T*7($np) + adde $acc7,$acc7,$a7 + $LDU $a7,$SIZE_T*8($np) + #addze $carry,$zero # moved above + b .Lsqr8x_tail + +.align 5 +.Lsqr8x_tail_break: + $POP $n0,$SIZE_T*8($sp) # pull n0 + $POP $t3,$SIZE_T*9($sp) # &tp[2*num-1] + addi $cnt,$tp,$SIZE_T*8 # end of current t[num] window + + addic $carry,$carry,-1 # "move" top-most carry to carry bit + adde $t0,$acc0,$a0 + $LD $acc0,$SIZE_T*8($rp) + $LD $a0,$SIZE_T*1($t2) # recall that $t2 is &n[-1] + adde $t1,$acc1,$a1 + $LD $acc1,$SIZE_T*9($rp) + $LD $a1,$SIZE_T*2($t2) + adde $acc2,$acc2,$a2 + $LD $a2,$SIZE_T*3($t2) + adde $acc3,$acc3,$a3 + $LD $a3,$SIZE_T*4($t2) + adde $acc4,$acc4,$a4 + $LD $a4,$SIZE_T*5($t2) + adde $acc5,$acc5,$a5 + $LD $a5,$SIZE_T*6($t2) + adde $acc6,$acc6,$a6 + $LD $a6,$SIZE_T*7($t2) + adde $acc7,$acc7,$a7 + $LD $a7,$SIZE_T*8($t2) + addi $np,$t2,$SIZE_T*8 + addze $t2,$zero # top-most carry + $UMULL $na0,$n0,$acc0 + $ST $t0,$SIZE_T*1($tp) + $UCMP $cnt,$t3 # did we hit the bottom? + $ST $t1,$SIZE_T*2($tp) + li $cnt,8 + $ST $acc2,$SIZE_T*3($tp) + $LD $acc2,$SIZE_T*10($rp) + $ST $acc3,$SIZE_T*4($tp) + $LD $acc3,$SIZE_T*11($rp) + $ST $acc4,$SIZE_T*5($tp) + $LD $acc4,$SIZE_T*12($rp) + $ST $acc5,$SIZE_T*6($tp) + $LD $acc5,$SIZE_T*13($rp) + $ST $acc6,$SIZE_T*7($tp) + $LD $acc6,$SIZE_T*14($rp) + $ST $acc7,$SIZE_T*8($tp) + $LD $acc7,$SIZE_T*15($rp) + $PUSH $t2,$SIZE_T*10($sp) # off-load top-most carry + addi $tp,$rp,$SIZE_T*7 # slide the window + mtctr $cnt + bne .Lsqr8x_reduction + + ################################################################ + # Final step. We see if result is larger than modulus, and + # if it is, subtract the modulus. But comparison implies + # subtraction. So we subtract modulus, see if it borrowed, + # and conditionally copy original value. + $POP $rp,$SIZE_T*6($sp) # pull &rp[-1] + srwi $cnt,$num,`log($SIZE_T)/log(2)+3` + mr $n0,$tp # put tp aside + addi $tp,$tp,$SIZE_T*8 + subi $cnt,$cnt,1 + subfc $t0,$a0,$acc0 + subfe $t1,$a1,$acc1 + mr $carry,$t2 + mr $ap_end,$rp # $rp copy + + mtctr $cnt + b .Lsqr8x_sub + +.align 5 +.Lsqr8x_sub: + $LD $a0,$SIZE_T*1($np) + $LD $acc0,$SIZE_T*1($tp) + $LD $a1,$SIZE_T*2($np) + $LD $acc1,$SIZE_T*2($tp) + subfe $t2,$a2,$acc2 + $LD $a2,$SIZE_T*3($np) + $LD $acc2,$SIZE_T*3($tp) + subfe $t3,$a3,$acc3 + $LD $a3,$SIZE_T*4($np) + $LD $acc3,$SIZE_T*4($tp) + $ST $t0,$SIZE_T*1($rp) + subfe $t0,$a4,$acc4 + $LD $a4,$SIZE_T*5($np) + $LD $acc4,$SIZE_T*5($tp) + $ST $t1,$SIZE_T*2($rp) + subfe $t1,$a5,$acc5 + $LD $a5,$SIZE_T*6($np) + $LD $acc5,$SIZE_T*6($tp) + $ST $t2,$SIZE_T*3($rp) + subfe $t2,$a6,$acc6 + $LD $a6,$SIZE_T*7($np) + $LD $acc6,$SIZE_T*7($tp) + $ST $t3,$SIZE_T*4($rp) + subfe $t3,$a7,$acc7 + $LDU $a7,$SIZE_T*8($np) + $LDU $acc7,$SIZE_T*8($tp) + $ST $t0,$SIZE_T*5($rp) + subfe $t0,$a0,$acc0 + $ST $t1,$SIZE_T*6($rp) + subfe $t1,$a1,$acc1 + $ST $t2,$SIZE_T*7($rp) + $STU $t3,$SIZE_T*8($rp) + bdnz .Lsqr8x_sub + + srwi $cnt,$num,`log($SIZE_T)/log(2)+2` + $LD $a0,$SIZE_T*1($ap_end) # original $rp + $LD $acc0,$SIZE_T*1($n0) # original $tp + subi $cnt,$cnt,1 + $LD $a1,$SIZE_T*2($ap_end) + $LD $acc1,$SIZE_T*2($n0) + subfe $t2,$a2,$acc2 + $LD $a2,$SIZE_T*3($ap_end) + $LD $acc2,$SIZE_T*3($n0) + subfe $t3,$a3,$acc3 + $LD $a3,$SIZE_T*4($ap_end) + $LDU $acc3,$SIZE_T*4($n0) + $ST $t0,$SIZE_T*1($rp) + subfe $t0,$a4,$acc4 + $ST $t1,$SIZE_T*2($rp) + subfe $t1,$a5,$acc5 + $ST $t2,$SIZE_T*3($rp) + subfe $t2,$a6,$acc6 + $ST $t3,$SIZE_T*4($rp) + subfe $t3,$a7,$acc7 + $ST $t0,$SIZE_T*5($rp) + subfe $carry,$zero,$carry # did it borrow? + $ST $t1,$SIZE_T*6($rp) + $ST $t2,$SIZE_T*7($rp) + $ST $t3,$SIZE_T*8($rp) + + addi $tp,$sp,$SIZE_T*11 + mtctr $cnt + +.Lsqr4x_cond_copy: + andc $a0,$a0,$carry + $ST $zero,-$SIZE_T*3($n0) # wipe stack clean + and $acc0,$acc0,$carry + $ST $zero,-$SIZE_T*2($n0) + andc $a1,$a1,$carry + $ST $zero,-$SIZE_T*1($n0) + and $acc1,$acc1,$carry + $ST $zero,-$SIZE_T*0($n0) + andc $a2,$a2,$carry + $ST $zero,$SIZE_T*1($tp) + and $acc2,$acc2,$carry + $ST $zero,$SIZE_T*2($tp) + andc $a3,$a3,$carry + $ST $zero,$SIZE_T*3($tp) + and $acc3,$acc3,$carry + $STU $zero,$SIZE_T*4($tp) + or $t0,$a0,$acc0 + $LD $a0,$SIZE_T*5($ap_end) + $LD $acc0,$SIZE_T*1($n0) + or $t1,$a1,$acc1 + $LD $a1,$SIZE_T*6($ap_end) + $LD $acc1,$SIZE_T*2($n0) + or $t2,$a2,$acc2 + $LD $a2,$SIZE_T*7($ap_end) + $LD $acc2,$SIZE_T*3($n0) + or $t3,$a3,$acc3 + $LD $a3,$SIZE_T*8($ap_end) + $LDU $acc3,$SIZE_T*4($n0) + $ST $t0,$SIZE_T*1($ap_end) + $ST $t1,$SIZE_T*2($ap_end) + $ST $t2,$SIZE_T*3($ap_end) + $STU $t3,$SIZE_T*4($ap_end) + bdnz .Lsqr4x_cond_copy + + $POP $ap,0($sp) # pull saved sp + andc $a0,$a0,$carry + and $acc0,$acc0,$carry + andc $a1,$a1,$carry + and $acc1,$acc1,$carry + andc $a2,$a2,$carry + and $acc2,$acc2,$carry + andc $a3,$a3,$carry + and $acc3,$acc3,$carry + or $t0,$a0,$acc0 + or $t1,$a1,$acc1 + or $t2,$a2,$acc2 + or $t3,$a3,$acc3 + $ST $t0,$SIZE_T*1($ap_end) + $ST $t1,$SIZE_T*2($ap_end) + $ST $t2,$SIZE_T*3($ap_end) + $ST $t3,$SIZE_T*4($ap_end) + + b .Lsqr8x_done + +.align 5 +.Lsqr8x8_post_condition: + $POP $rp,$SIZE_T*6($sp) # pull rp + $POP $ap,0($sp) # pull saved sp + addze $carry,$zero + + # $acc0-7,$carry hold result, $a0-7 hold modulus + subfc $acc0,$a0,$acc0 + subfe $acc1,$a1,$acc1 + $ST $zero,$SIZE_T*12($sp) # wipe stack clean + $ST $zero,$SIZE_T*13($sp) + subfe $acc2,$a2,$acc2 + $ST $zero,$SIZE_T*14($sp) + $ST $zero,$SIZE_T*15($sp) + subfe $acc3,$a3,$acc3 + $ST $zero,$SIZE_T*16($sp) + $ST $zero,$SIZE_T*17($sp) + subfe $acc4,$a4,$acc4 + $ST $zero,$SIZE_T*18($sp) + $ST $zero,$SIZE_T*19($sp) + subfe $acc5,$a5,$acc5 + $ST $zero,$SIZE_T*20($sp) + $ST $zero,$SIZE_T*21($sp) + subfe $acc6,$a6,$acc6 + $ST $zero,$SIZE_T*22($sp) + $ST $zero,$SIZE_T*23($sp) + subfe $acc7,$a7,$acc7 + $ST $zero,$SIZE_T*24($sp) + $ST $zero,$SIZE_T*25($sp) + subfe $carry,$zero,$carry # did it borrow? + $ST $zero,$SIZE_T*26($sp) + $ST $zero,$SIZE_T*27($sp) + + and $a0,$a0,$carry + and $a1,$a1,$carry + addc $acc0,$acc0,$a0 # add modulus back if borrowed + and $a2,$a2,$carry + adde $acc1,$acc1,$a1 + and $a3,$a3,$carry + adde $acc2,$acc2,$a2 + and $a4,$a4,$carry + adde $acc3,$acc3,$a3 + and $a5,$a5,$carry + adde $acc4,$acc4,$a4 + and $a6,$a6,$carry + adde $acc5,$acc5,$a5 + and $a7,$a7,$carry + adde $acc6,$acc6,$a6 + adde $acc7,$acc7,$a7 + $ST $acc0,$SIZE_T*1($rp) + $ST $acc1,$SIZE_T*2($rp) + $ST $acc2,$SIZE_T*3($rp) + $ST $acc3,$SIZE_T*4($rp) + $ST $acc4,$SIZE_T*5($rp) + $ST $acc5,$SIZE_T*6($rp) + $ST $acc6,$SIZE_T*7($rp) + $ST $acc7,$SIZE_T*8($rp) + +.Lsqr8x_done: + $PUSH $zero,$SIZE_T*8($sp) + $PUSH $zero,$SIZE_T*10($sp) + + $POP r14,-$SIZE_T*18($ap) + li r3,1 # signal "done" + $POP r15,-$SIZE_T*17($ap) + $POP r16,-$SIZE_T*16($ap) + $POP r17,-$SIZE_T*15($ap) + $POP r18,-$SIZE_T*14($ap) + $POP r19,-$SIZE_T*13($ap) + $POP r20,-$SIZE_T*12($ap) + $POP r21,-$SIZE_T*11($ap) + $POP r22,-$SIZE_T*10($ap) + $POP r23,-$SIZE_T*9($ap) + $POP r24,-$SIZE_T*8($ap) + $POP r25,-$SIZE_T*7($ap) + $POP r26,-$SIZE_T*6($ap) + $POP r27,-$SIZE_T*5($ap) + $POP r28,-$SIZE_T*4($ap) + $POP r29,-$SIZE_T*3($ap) + $POP r30,-$SIZE_T*2($ap) + $POP r31,-$SIZE_T*1($ap) + mr $sp,$ap + blr + .long 0 + .byte 0,12,4,0x20,0x80,18,6,0 + .long 0 +.size __bn_sqr8x_mont,.-__bn_sqr8x_mont +___ +} +$code.=<<___; .asciz "Montgomery Multiplication for PPC, CRYPTOGAMS by <appro\@openssl.org>" ___ diff --git a/crypto/bn/asm/ppc.pl b/crypto/bn/asm/ppc.pl index 446d8ba9492b..e37068192f2f 100644 --- a/crypto/bn/asm/ppc.pl +++ b/crypto/bn/asm/ppc.pl @@ -1,5 +1,11 @@ -#!/usr/bin/env perl +#! /usr/bin/env perl +# Copyright 2004-2018 The OpenSSL Project Authors. All Rights Reserved. # +# Licensed under the OpenSSL license (the "License"). You may not use +# this file except in compliance with the License. You can obtain a copy +# in the file LICENSE in the source distribution or at +# https://www.openssl.org/source/license.html + # Implemented as a Perl wrapper as we want to support several different # architectures with single file. We pick up the target based on the # file name we are asked to generate. @@ -32,9 +38,9 @@ #rsa 2048 bits 0.3036s 0.0085s 3.3 117.1 #rsa 4096 bits 2.0040s 0.0299s 0.5 33.4 #dsa 512 bits 0.0087s 0.0106s 114.3 94.5 -#dsa 1024 bits 0.0256s 0.0313s 39.0 32.0 +#dsa 1024 bits 0.0256s 0.0313s 39.0 32.0 # -# Same bechmark with this assembler code: +# Same benchmark with this assembler code: # #rsa 512 bits 0.0056s 0.0005s 178.6 2049.2 #rsa 1024 bits 0.0283s 0.0015s 35.3 674.1 @@ -68,7 +74,7 @@ #rsa 4096 bits 0.3700s 0.0058s 2.7 171.0 #dsa 512 bits 0.0016s 0.0020s 610.7 507.1 #dsa 1024 bits 0.0047s 0.0058s 212.5 173.2 -# +# # Again, performance increases by at about 75% # # Mac OS X, Apple G5 1.8GHz (Note this is 32 bit code) @@ -95,10 +101,7 @@ #dsa 2048 bits 0.0061s 0.0075s 163.5 132.8 # # Performance increase of ~60% -# -# If you have comments or suggestions to improve code send -# me a note at schari@us.ibm.com -# +# Based on submission from Suresh N. Chari of IBM $flavour = shift; @@ -119,7 +122,7 @@ if ($flavour =~ /32/) { $CNTLZ= "cntlzw"; # count leading zeros $SHL= "slw"; # shift left $SHR= "srw"; # unsigned shift right - $SHRI= "srwi"; # unsigned shift right by immediate + $SHRI= "srwi"; # unsigned shift right by immediate $SHLI= "slwi"; # shift left by immediate $CLRU= "clrlwi"; # clear upper bits $INSR= "insrwi"; # insert right @@ -143,10 +146,10 @@ if ($flavour =~ /32/) { $CNTLZ= "cntlzd"; # count leading zeros $SHL= "sld"; # shift left $SHR= "srd"; # unsigned shift right - $SHRI= "srdi"; # unsigned shift right by immediate + $SHRI= "srdi"; # unsigned shift right by immediate $SHLI= "sldi"; # shift left by immediate $CLRU= "clrldi"; # clear upper bits - $INSR= "insrdi"; # insert right + $INSR= "insrdi"; # insert right $ROTL= "rotldi"; # rotate left by immediate $TR= "td"; # conditional trap } else { die "nonsense $flavour"; } @@ -183,7 +186,7 @@ $data=<<EOF; # below. # 12/05/03 Suresh Chari # (with lots of help from) Andy Polyakov -## +## # 1. Initial version 10/20/02 Suresh Chari # # @@ -196,7 +199,7 @@ $data=<<EOF; # be done in the build process. # # Hand optimized assembly code for the following routines -# +# # bn_sqr_comba4 # bn_sqr_comba8 # bn_mul_comba4 @@ -219,10 +222,10 @@ $data=<<EOF; #-------------------------------------------------------------------------- # # Defines to be used in the assembly code. -# +# #.set r0,0 # we use it as storage for value of 0 #.set SP,1 # preserved -#.set RTOC,2 # preserved +#.set RTOC,2 # preserved #.set r3,3 # 1st argument/return value #.set r4,4 # 2nd argument/volatile register #.set r5,5 # 3rd argument/volatile register @@ -240,7 +243,7 @@ $data=<<EOF; # the first . i.e. for example change ".bn_sqr_comba4" # to "bn_sqr_comba4". This should be automatically done # in the build. - + .globl .bn_sqr_comba4 .globl .bn_sqr_comba8 .globl .bn_mul_comba4 @@ -251,9 +254,9 @@ $data=<<EOF; .globl .bn_sqr_words .globl .bn_mul_words .globl .bn_mul_add_words - + # .text section - + .machine "any" # @@ -272,8 +275,8 @@ $data=<<EOF; # r3 contains r # r4 contains a # -# Freely use registers r5,r6,r7,r8,r9,r10,r11 as follows: -# +# Freely use registers r5,r6,r7,r8,r9,r10,r11 as follows: +# # r5,r6 are the two BN_ULONGs being multiplied. # r7,r8 are the results of the 32x32 giving 64 bit multiply. # r9,r10, r11 are the equivalents of c1,c2, c3. @@ -282,10 +285,10 @@ $data=<<EOF; # xor r0,r0,r0 # set r0 = 0. Used in the addze # instructions below - + #sqr_add_c(a,0,c1,c2,c3) - $LD r5,`0*$BNSZ`(r4) - $UMULL r9,r5,r5 + $LD r5,`0*$BNSZ`(r4) + $UMULL r9,r5,r5 $UMULH r10,r5,r5 #in first iteration. No need #to add since c1=c2=c3=0. # Note c3(r11) is NOT set to 0 @@ -293,20 +296,20 @@ $data=<<EOF; $ST r9,`0*$BNSZ`(r3) # r[0]=c1; # sqr_add_c2(a,1,0,c2,c3,c1); - $LD r6,`1*$BNSZ`(r4) + $LD r6,`1*$BNSZ`(r4) $UMULL r7,r5,r6 $UMULH r8,r5,r6 - + addc r7,r7,r7 # compute (r7,r8)=2*(r7,r8) adde r8,r8,r8 addze r9,r0 # catch carry if any. - # r9= r0(=0) and carry - + # r9= r0(=0) and carry + addc r10,r7,r10 # now add to temp result. - addze r11,r8 # r8 added to r11 which is 0 + addze r11,r8 # r8 added to r11 which is 0 addze r9,r9 - - $ST r10,`1*$BNSZ`(r3) #r[1]=c2; + + $ST r10,`1*$BNSZ`(r3) #r[1]=c2; #sqr_add_c(a,1,c3,c1,c2) $UMULL r7,r6,r6 $UMULH r8,r6,r6 @@ -317,23 +320,23 @@ $data=<<EOF; $LD r6,`2*$BNSZ`(r4) $UMULL r7,r5,r6 $UMULH r8,r5,r6 - + addc r7,r7,r7 adde r8,r8,r8 addze r10,r10 - + addc r11,r7,r11 adde r9,r8,r9 addze r10,r10 - $ST r11,`2*$BNSZ`(r3) #r[2]=c3 + $ST r11,`2*$BNSZ`(r3) #r[2]=c3 #sqr_add_c2(a,3,0,c1,c2,c3); - $LD r6,`3*$BNSZ`(r4) + $LD r6,`3*$BNSZ`(r4) $UMULL r7,r5,r6 $UMULH r8,r5,r6 addc r7,r7,r7 adde r8,r8,r8 addze r11,r0 - + addc r9,r7,r9 adde r10,r8,r10 addze r11,r11 @@ -342,7 +345,7 @@ $data=<<EOF; $LD r6,`2*$BNSZ`(r4) $UMULL r7,r5,r6 $UMULH r8,r5,r6 - + addc r7,r7,r7 adde r8,r8,r8 addze r11,r11 @@ -357,31 +360,31 @@ $data=<<EOF; adde r11,r8,r11 addze r9,r0 #sqr_add_c2(a,3,1,c2,c3,c1); - $LD r6,`3*$BNSZ`(r4) + $LD r6,`3*$BNSZ`(r4) $UMULL r7,r5,r6 $UMULH r8,r5,r6 addc r7,r7,r7 adde r8,r8,r8 addze r9,r9 - + addc r10,r7,r10 adde r11,r8,r11 addze r9,r9 $ST r10,`4*$BNSZ`(r3) #r[4]=c2 #sqr_add_c2(a,3,2,c3,c1,c2); - $LD r5,`2*$BNSZ`(r4) + $LD r5,`2*$BNSZ`(r4) $UMULL r7,r5,r6 $UMULH r8,r5,r6 addc r7,r7,r7 adde r8,r8,r8 addze r10,r0 - + addc r11,r7,r11 adde r9,r8,r9 addze r10,r10 $ST r11,`5*$BNSZ`(r3) #r[5] = c3 #sqr_add_c(a,3,c1,c2,c3); - $UMULL r7,r6,r6 + $UMULL r7,r6,r6 $UMULH r8,r6,r6 addc r9,r7,r9 adde r10,r8,r10 @@ -400,7 +403,7 @@ $data=<<EOF; # for the gcc compiler. This should be automatically # done in the build # - + .align 4 .bn_sqr_comba8: # @@ -412,15 +415,15 @@ $data=<<EOF; # r3 contains r # r4 contains a # -# Freely use registers r5,r6,r7,r8,r9,r10,r11 as follows: -# +# Freely use registers r5,r6,r7,r8,r9,r10,r11 as follows: +# # r5,r6 are the two BN_ULONGs being multiplied. # r7,r8 are the results of the 32x32 giving 64 bit multiply. # r9,r10, r11 are the equivalents of c1,c2, c3. # # Possible optimization of loading all 8 longs of a into registers -# doesnt provide any speedup -# +# doesn't provide any speedup +# xor r0,r0,r0 #set r0 = 0.Used in addze #instructions below. @@ -433,18 +436,18 @@ $data=<<EOF; #sqr_add_c2(a,1,0,c2,c3,c1); $LD r6,`1*$BNSZ`(r4) $UMULL r7,r5,r6 - $UMULH r8,r5,r6 - + $UMULH r8,r5,r6 + addc r10,r7,r10 #add the two register number adde r11,r8,r0 # (r8,r7) to the three register addze r9,r0 # number (r9,r11,r10).NOTE:r0=0 - + addc r10,r7,r10 #add the two register number adde r11,r8,r11 # (r8,r7) to the three register addze r9,r9 # number (r9,r11,r10). - + $ST r10,`1*$BNSZ`(r3) # r[1]=c2 - + #sqr_add_c(a,1,c3,c1,c2); $UMULL r7,r6,r6 $UMULH r8,r6,r6 @@ -455,25 +458,25 @@ $data=<<EOF; $LD r6,`2*$BNSZ`(r4) $UMULL r7,r5,r6 $UMULH r8,r5,r6 - + addc r11,r7,r11 adde r9,r8,r9 addze r10,r10 - + addc r11,r7,r11 adde r9,r8,r9 addze r10,r10 - + $ST r11,`2*$BNSZ`(r3) #r[2]=c3 #sqr_add_c2(a,3,0,c1,c2,c3); $LD r6,`3*$BNSZ`(r4) #r6 = a[3]. r5 is already a[0]. $UMULL r7,r5,r6 $UMULH r8,r5,r6 - + addc r9,r7,r9 adde r10,r8,r10 addze r11,r0 - + addc r9,r7,r9 adde r10,r8,r10 addze r11,r11 @@ -482,20 +485,20 @@ $data=<<EOF; $LD r6,`2*$BNSZ`(r4) $UMULL r7,r5,r6 $UMULH r8,r5,r6 - + addc r9,r7,r9 adde r10,r8,r10 addze r11,r11 - + addc r9,r7,r9 adde r10,r8,r10 addze r11,r11 - + $ST r9,`3*$BNSZ`(r3) #r[3]=c1; #sqr_add_c(a,2,c2,c3,c1); $UMULL r7,r6,r6 $UMULH r8,r6,r6 - + addc r10,r7,r10 adde r11,r8,r11 addze r9,r0 @@ -503,11 +506,11 @@ $data=<<EOF; $LD r6,`3*$BNSZ`(r4) $UMULL r7,r5,r6 $UMULH r8,r5,r6 - + addc r10,r7,r10 adde r11,r8,r11 addze r9,r9 - + addc r10,r7,r10 adde r11,r8,r11 addze r9,r9 @@ -516,11 +519,11 @@ $data=<<EOF; $LD r6,`4*$BNSZ`(r4) $UMULL r7,r5,r6 $UMULH r8,r5,r6 - + addc r10,r7,r10 adde r11,r8,r11 addze r9,r9 - + addc r10,r7,r10 adde r11,r8,r11 addze r9,r9 @@ -529,11 +532,11 @@ $data=<<EOF; $LD r6,`5*$BNSZ`(r4) $UMULL r7,r5,r6 $UMULH r8,r5,r6 - + addc r11,r7,r11 adde r9,r8,r9 addze r10,r0 - + addc r11,r7,r11 adde r9,r8,r9 addze r10,r10 @@ -542,11 +545,11 @@ $data=<<EOF; $LD r6,`4*$BNSZ`(r4) $UMULL r7,r5,r6 $UMULH r8,r5,r6 - + addc r11,r7,r11 adde r9,r8,r9 addze r10,r10 - + addc r11,r7,r11 adde r9,r8,r9 addze r10,r10 @@ -555,11 +558,11 @@ $data=<<EOF; $LD r6,`3*$BNSZ`(r4) $UMULL r7,r5,r6 $UMULH r8,r5,r6 - + addc r11,r7,r11 adde r9,r8,r9 addze r10,r10 - + addc r11,r7,r11 adde r9,r8,r9 addze r10,r10 @@ -574,11 +577,11 @@ $data=<<EOF; $LD r6,`4*$BNSZ`(r4) $UMULL r7,r5,r6 $UMULH r8,r5,r6 - + addc r9,r7,r9 adde r10,r8,r10 addze r11,r11 - + addc r9,r7,r9 adde r10,r8,r10 addze r11,r11 @@ -587,11 +590,11 @@ $data=<<EOF; $LD r6,`5*$BNSZ`(r4) $UMULL r7,r5,r6 $UMULH r8,r5,r6 - + addc r9,r7,r9 adde r10,r8,r10 addze r11,r11 - + addc r9,r7,r9 adde r10,r8,r10 addze r11,r11 @@ -611,7 +614,7 @@ $data=<<EOF; $LD r6,`7*$BNSZ`(r4) $UMULL r7,r5,r6 $UMULH r8,r5,r6 - + addc r10,r7,r10 adde r11,r8,r11 addze r9,r0 @@ -623,7 +626,7 @@ $data=<<EOF; $LD r6,`6*$BNSZ`(r4) $UMULL r7,r5,r6 $UMULH r8,r5,r6 - + addc r10,r7,r10 adde r11,r8,r11 addze r9,r9 @@ -646,7 +649,7 @@ $data=<<EOF; $LD r6,`4*$BNSZ`(r4) $UMULL r7,r5,r6 $UMULH r8,r5,r6 - + addc r10,r7,r10 adde r11,r8,r11 addze r9,r9 @@ -678,7 +681,7 @@ $data=<<EOF; addc r11,r7,r11 adde r9,r8,r9 addze r10,r10 - + addc r11,r7,r11 adde r9,r8,r9 addze r10,r10 @@ -698,7 +701,7 @@ $data=<<EOF; $LD r5,`2*$BNSZ`(r4) $UMULL r7,r5,r6 $UMULH r8,r5,r6 - + addc r9,r7,r9 adde r10,r8,r10 addze r11,r0 @@ -795,7 +798,7 @@ $data=<<EOF; adde r10,r8,r10 addze r11,r11 $ST r9,`12*$BNSZ`(r3) #r[12]=c1; - + #sqr_add_c2(a,7,6,c2,c3,c1) $LD r5,`6*$BNSZ`(r4) $UMULL r7,r5,r6 @@ -844,21 +847,21 @@ $data=<<EOF; # xor r0,r0,r0 #r0=0. Used in addze below. #mul_add_c(a[0],b[0],c1,c2,c3); - $LD r6,`0*$BNSZ`(r4) - $LD r7,`0*$BNSZ`(r5) - $UMULL r10,r6,r7 - $UMULH r11,r6,r7 + $LD r6,`0*$BNSZ`(r4) + $LD r7,`0*$BNSZ`(r5) + $UMULL r10,r6,r7 + $UMULH r11,r6,r7 $ST r10,`0*$BNSZ`(r3) #r[0]=c1 #mul_add_c(a[0],b[1],c2,c3,c1); - $LD r7,`1*$BNSZ`(r5) + $LD r7,`1*$BNSZ`(r5) $UMULL r8,r6,r7 $UMULH r9,r6,r7 addc r11,r8,r11 adde r12,r9,r0 addze r10,r0 #mul_add_c(a[1],b[0],c2,c3,c1); - $LD r6, `1*$BNSZ`(r4) - $LD r7, `0*$BNSZ`(r5) + $LD r6, `1*$BNSZ`(r4) + $LD r7, `0*$BNSZ`(r5) $UMULL r8,r6,r7 $UMULH r9,r6,r7 addc r11,r8,r11 @@ -866,23 +869,23 @@ $data=<<EOF; addze r10,r10 $ST r11,`1*$BNSZ`(r3) #r[1]=c2 #mul_add_c(a[2],b[0],c3,c1,c2); - $LD r6,`2*$BNSZ`(r4) + $LD r6,`2*$BNSZ`(r4) $UMULL r8,r6,r7 $UMULH r9,r6,r7 addc r12,r8,r12 adde r10,r9,r10 addze r11,r0 #mul_add_c(a[1],b[1],c3,c1,c2); - $LD r6,`1*$BNSZ`(r4) - $LD r7,`1*$BNSZ`(r5) + $LD r6,`1*$BNSZ`(r4) + $LD r7,`1*$BNSZ`(r5) $UMULL r8,r6,r7 $UMULH r9,r6,r7 addc r12,r8,r12 adde r10,r9,r10 addze r11,r11 #mul_add_c(a[0],b[2],c3,c1,c2); - $LD r6,`0*$BNSZ`(r4) - $LD r7,`2*$BNSZ`(r5) + $LD r6,`0*$BNSZ`(r4) + $LD r7,`2*$BNSZ`(r5) $UMULL r8,r6,r7 $UMULH r9,r6,r7 addc r12,r8,r12 @@ -890,7 +893,7 @@ $data=<<EOF; addze r11,r11 $ST r12,`2*$BNSZ`(r3) #r[2]=c3 #mul_add_c(a[0],b[3],c1,c2,c3); - $LD r7,`3*$BNSZ`(r5) + $LD r7,`3*$BNSZ`(r5) $UMULL r8,r6,r7 $UMULH r9,r6,r7 addc r10,r8,r10 @@ -922,7 +925,7 @@ $data=<<EOF; addze r12,r12 $ST r10,`3*$BNSZ`(r3) #r[3]=c1 #mul_add_c(a[3],b[1],c2,c3,c1); - $LD r7,`1*$BNSZ`(r5) + $LD r7,`1*$BNSZ`(r5) $UMULL r8,r6,r7 $UMULH r9,r6,r7 addc r11,r8,r11 @@ -946,7 +949,7 @@ $data=<<EOF; addze r10,r10 $ST r11,`4*$BNSZ`(r3) #r[4]=c2 #mul_add_c(a[2],b[3],c3,c1,c2); - $LD r6,`2*$BNSZ`(r4) + $LD r6,`2*$BNSZ`(r4) $UMULL r8,r6,r7 $UMULH r9,r6,r7 addc r12,r8,r12 @@ -962,7 +965,7 @@ $data=<<EOF; addze r11,r11 $ST r12,`5*$BNSZ`(r3) #r[5]=c3 #mul_add_c(a[3],b[3],c1,c2,c3); - $LD r7,`3*$BNSZ`(r5) + $LD r7,`3*$BNSZ`(r5) $UMULL r8,r6,r7 $UMULH r9,r6,r7 addc r10,r8,r10 @@ -982,7 +985,7 @@ $data=<<EOF; # for the gcc compiler. This should be automatically # done in the build # - + .align 4 .bn_mul_comba8: # @@ -997,7 +1000,7 @@ $data=<<EOF; # r10, r11, r12 are the equivalents of c1, c2, and c3. # xor r0,r0,r0 #r0=0. Used in addze below. - + #mul_add_c(a[0],b[0],c1,c2,c3); $LD r6,`0*$BNSZ`(r4) #a[0] $LD r7,`0*$BNSZ`(r5) #b[0] @@ -1009,7 +1012,7 @@ $data=<<EOF; $UMULL r8,r6,r7 $UMULH r9,r6,r7 addc r11,r11,r8 - addze r12,r9 # since we didnt set r12 to zero before. + addze r12,r9 # since we didn't set r12 to zero before. addze r10,r0 #mul_add_c(a[1],b[0],c2,c3,c1); $LD r6,`1*$BNSZ`(r4) @@ -1059,7 +1062,7 @@ $data=<<EOF; addc r10,r10,r8 adde r11,r11,r9 addze r12,r12 - + #mul_add_c(a[2],b[1],c1,c2,c3); $LD r6,`2*$BNSZ`(r4) $LD r7,`1*$BNSZ`(r5) @@ -1125,7 +1128,7 @@ $data=<<EOF; adde r10,r10,r9 addze r11,r0 #mul_add_c(a[1],b[4],c3,c1,c2); - $LD r6,`1*$BNSZ`(r4) + $LD r6,`1*$BNSZ`(r4) $LD r7,`4*$BNSZ`(r5) $UMULL r8,r6,r7 $UMULH r9,r6,r7 @@ -1133,7 +1136,7 @@ $data=<<EOF; adde r10,r10,r9 addze r11,r11 #mul_add_c(a[2],b[3],c3,c1,c2); - $LD r6,`2*$BNSZ`(r4) + $LD r6,`2*$BNSZ`(r4) $LD r7,`3*$BNSZ`(r5) $UMULL r8,r6,r7 $UMULH r9,r6,r7 @@ -1141,7 +1144,7 @@ $data=<<EOF; adde r10,r10,r9 addze r11,r11 #mul_add_c(a[3],b[2],c3,c1,c2); - $LD r6,`3*$BNSZ`(r4) + $LD r6,`3*$BNSZ`(r4) $LD r7,`2*$BNSZ`(r5) $UMULL r8,r6,r7 $UMULH r9,r6,r7 @@ -1149,7 +1152,7 @@ $data=<<EOF; adde r10,r10,r9 addze r11,r11 #mul_add_c(a[4],b[1],c3,c1,c2); - $LD r6,`4*$BNSZ`(r4) + $LD r6,`4*$BNSZ`(r4) $LD r7,`1*$BNSZ`(r5) $UMULL r8,r6,r7 $UMULH r9,r6,r7 @@ -1157,7 +1160,7 @@ $data=<<EOF; adde r10,r10,r9 addze r11,r11 #mul_add_c(a[5],b[0],c3,c1,c2); - $LD r6,`5*$BNSZ`(r4) + $LD r6,`5*$BNSZ`(r4) $LD r7,`0*$BNSZ`(r5) $UMULL r8,r6,r7 $UMULH r9,r6,r7 @@ -1549,7 +1552,7 @@ $data=<<EOF; addi r3,r3,-$BNSZ addi r5,r5,-$BNSZ mtctr r6 -Lppcasm_sub_mainloop: +Lppcasm_sub_mainloop: $LDU r7,$BNSZ(r4) $LDU r8,$BNSZ(r5) subfe r6,r8,r7 # r6 = r7+carry bit + onescomplement(r8) @@ -1557,7 +1560,7 @@ Lppcasm_sub_mainloop: # is r7-r8 -1 as we need. $STU r6,$BNSZ(r3) bdnz Lppcasm_sub_mainloop -Lppcasm_sub_adios: +Lppcasm_sub_adios: subfze r3,r0 # if carry bit is set then r3 = 0 else -1 andi. r3,r3,1 # keep only last bit. blr @@ -1598,13 +1601,13 @@ Lppcasm_sub_adios: addi r3,r3,-$BNSZ addi r5,r5,-$BNSZ mtctr r6 -Lppcasm_add_mainloop: +Lppcasm_add_mainloop: $LDU r7,$BNSZ(r4) $LDU r8,$BNSZ(r5) adde r8,r7,r8 $STU r8,$BNSZ(r3) bdnz Lppcasm_add_mainloop -Lppcasm_add_adios: +Lppcasm_add_adios: addze r3,r0 #return carry bit. blr .long 0 @@ -1627,11 +1630,11 @@ Lppcasm_add_adios: # the PPC instruction to count leading zeros instead # of call to num_bits_word. Since this was compiled # only at level -O2 we can possibly squeeze it more? -# +# # r3 = h # r4 = l # r5 = d - + $UCMPI 0,r5,0 # compare r5 and 0 bne Lppcasm_div1 # proceed if d!=0 li r3,-1 # d=0 return -1 @@ -1647,7 +1650,7 @@ Lppcasm_div1: Lppcasm_div2: $UCMP 0,r3,r5 #h>=d? blt Lppcasm_div3 #goto Lppcasm_div3 if not - subf r3,r5,r3 #h-=d ; + subf r3,r5,r3 #h-=d ; Lppcasm_div3: #r7 = BN_BITS2-i. so r7=i cmpi 0,0,r7,0 # is (i == 0)? beq Lppcasm_div4 @@ -1662,7 +1665,7 @@ Lppcasm_div4: # as it saves registers. li r6,2 #r6=2 mtctr r6 #counter will be in count. -Lppcasm_divouterloop: +Lppcasm_divouterloop: $SHRI r8,r3,`$BITS/2` #r8 = (h>>BN_BITS4) $SHRI r11,r4,`$BITS/2` #r11= (l&BN_MASK2h)>>BN_BITS4 # compute here for innerloop. @@ -1670,7 +1673,7 @@ Lppcasm_divouterloop: bne Lppcasm_div5 # goto Lppcasm_div5 if not li r8,-1 - $CLRU r8,r8,`$BITS/2` #q = BN_MASK2l + $CLRU r8,r8,`$BITS/2` #q = BN_MASK2l b Lppcasm_div6 Lppcasm_div5: $UDIV r8,r3,r9 #q = h/dh @@ -1678,7 +1681,7 @@ Lppcasm_div6: $UMULL r12,r9,r8 #th = q*dh $CLRU r10,r5,`$BITS/2` #r10=dl $UMULL r6,r8,r10 #tl = q*dl - + Lppcasm_divinnerloop: subf r10,r12,r3 #t = h -th $SHRI r7,r10,`$BITS/2` #r7= (t &BN_MASK2H), sort of... @@ -1755,7 +1758,7 @@ Lppcasm_div9: addi r4,r4,-$BNSZ addi r3,r3,-$BNSZ mtctr r5 -Lppcasm_sqr_mainloop: +Lppcasm_sqr_mainloop: #sqr(r[0],r[1],a[0]); $LDU r6,$BNSZ(r4) $UMULL r7,r6,r6 @@ -1763,7 +1766,7 @@ Lppcasm_sqr_mainloop: $STU r7,$BNSZ(r3) $STU r8,$BNSZ(r3) bdnz Lppcasm_sqr_mainloop -Lppcasm_sqr_adios: +Lppcasm_sqr_adios: blr .long 0 .byte 0,12,0x14,0,0,0,3,0 @@ -1777,7 +1780,7 @@ Lppcasm_sqr_adios: # done in the build # -.align 4 +.align 4 .bn_mul_words: # # BN_ULONG bn_mul_words(BN_ULONG *rp, BN_ULONG *ap, int num, BN_ULONG w) @@ -1791,7 +1794,7 @@ Lppcasm_sqr_adios: rlwinm. r7,r5,30,2,31 # num >> 2 beq Lppcasm_mw_REM mtctr r7 -Lppcasm_mw_LOOP: +Lppcasm_mw_LOOP: #mul(rp[0],ap[0],w,c1); $LD r8,`0*$BNSZ`(r4) $UMULL r9,r6,r8 @@ -1803,7 +1806,7 @@ Lppcasm_mw_LOOP: #using adde. $ST r9,`0*$BNSZ`(r3) #mul(rp[1],ap[1],w,c1); - $LD r8,`1*$BNSZ`(r4) + $LD r8,`1*$BNSZ`(r4) $UMULL r11,r6,r8 $UMULH r12,r6,r8 adde r11,r11,r10 @@ -1824,7 +1827,7 @@ Lppcasm_mw_LOOP: addze r12,r12 #this spin we collect carry into #r12 $ST r11,`3*$BNSZ`(r3) - + addi r3,r3,`4*$BNSZ` addi r4,r4,`4*$BNSZ` bdnz Lppcasm_mw_LOOP @@ -1840,25 +1843,25 @@ Lppcasm_mw_REM: addze r10,r10 $ST r9,`0*$BNSZ`(r3) addi r12,r10,0 - + addi r5,r5,-1 cmpli 0,0,r5,0 beq Lppcasm_mw_OVER - + #mul(rp[1],ap[1],w,c1); - $LD r8,`1*$BNSZ`(r4) + $LD r8,`1*$BNSZ`(r4) $UMULL r9,r6,r8 $UMULH r10,r6,r8 addc r9,r9,r12 addze r10,r10 $ST r9,`1*$BNSZ`(r3) addi r12,r10,0 - + addi r5,r5,-1 cmpli 0,0,r5,0 beq Lppcasm_mw_OVER - + #mul_add(rp[2],ap[2],w,c1); $LD r8,`2*$BNSZ`(r4) $UMULL r9,r6,r8 @@ -1867,14 +1870,14 @@ Lppcasm_mw_REM: addze r10,r10 $ST r9,`2*$BNSZ`(r3) addi r12,r10,0 - -Lppcasm_mw_OVER: + +Lppcasm_mw_OVER: addi r3,r12,0 blr .long 0 .byte 0,12,0x14,0,0,0,4,0 .long 0 -.size bn_mul_words,.-bn_mul_words +.size .bn_mul_words,.-.bn_mul_words # # NOTE: The following label name should be changed to @@ -1896,11 +1899,11 @@ Lppcasm_mw_OVER: # empirical evidence suggests that unrolled version performs best!! # xor r0,r0,r0 #r0 = 0 - xor r12,r12,r12 #r12 = 0 . used for carry + xor r12,r12,r12 #r12 = 0 . used for carry rlwinm. r7,r5,30,2,31 # num >> 2 beq Lppcasm_maw_leftover # if (num < 4) go LPPCASM_maw_leftover mtctr r7 -Lppcasm_maw_mainloop: +Lppcasm_maw_mainloop: #mul_add(rp[0],ap[0],w,c1); $LD r8,`0*$BNSZ`(r4) $LD r11,`0*$BNSZ`(r3) @@ -1916,9 +1919,9 @@ Lppcasm_maw_mainloop: #by multiply and will be collected #in the next spin $ST r9,`0*$BNSZ`(r3) - + #mul_add(rp[1],ap[1],w,c1); - $LD r8,`1*$BNSZ`(r4) + $LD r8,`1*$BNSZ`(r4) $LD r9,`1*$BNSZ`(r3) $UMULL r11,r6,r8 $UMULH r12,r6,r8 @@ -1927,7 +1930,7 @@ Lppcasm_maw_mainloop: addc r11,r11,r9 #addze r12,r12 $ST r11,`1*$BNSZ`(r3) - + #mul_add(rp[2],ap[2],w,c1); $LD r8,`2*$BNSZ`(r4) $UMULL r9,r6,r8 @@ -1938,7 +1941,7 @@ Lppcasm_maw_mainloop: addc r9,r9,r11 #addze r10,r10 $ST r9,`2*$BNSZ`(r3) - + #mul_add(rp[3],ap[3],w,c1); $LD r8,`3*$BNSZ`(r4) $UMULL r11,r6,r8 @@ -1952,7 +1955,7 @@ Lppcasm_maw_mainloop: addi r3,r3,`4*$BNSZ` addi r4,r4,`4*$BNSZ` bdnz Lppcasm_maw_mainloop - + Lppcasm_maw_leftover: andi. r5,r5,0x3 beq Lppcasm_maw_adios @@ -1969,10 +1972,10 @@ Lppcasm_maw_leftover: addc r9,r9,r12 addze r12,r10 $ST r9,0(r3) - + bdz Lppcasm_maw_adios #mul_add(rp[1],ap[1],w,c1); - $LDU r8,$BNSZ(r4) + $LDU r8,$BNSZ(r4) $UMULL r9,r6,r8 $UMULH r10,r6,r8 $LDU r11,$BNSZ(r3) @@ -1981,7 +1984,7 @@ Lppcasm_maw_leftover: addc r9,r9,r12 addze r12,r10 $ST r9,0(r3) - + bdz Lppcasm_maw_adios #mul_add(rp[2],ap[2],w,c1); $LDU r8,$BNSZ(r4) @@ -1993,8 +1996,8 @@ Lppcasm_maw_leftover: addc r9,r9,r12 addze r12,r10 $ST r9,0(r3) - -Lppcasm_maw_adios: + +Lppcasm_maw_adios: addi r3,r12,0 blr .long 0 diff --git a/crypto/bn/asm/ppc64-mont.pl b/crypto/bn/asm/ppc64-mont.pl index 595fc6d31f60..c41b620bc23e 100755 --- a/crypto/bn/asm/ppc64-mont.pl +++ b/crypto/bn/asm/ppc64-mont.pl @@ -1,4 +1,11 @@ -#!/usr/bin/env perl +#! /usr/bin/env perl +# Copyright 2007-2018 The OpenSSL Project Authors. All Rights Reserved. +# +# Licensed under the OpenSSL license (the "License"). You may not use +# this file except in compliance with the License. You can obtain a copy +# in the file LICENSE in the source distribution or at +# https://www.openssl.org/source/license.html + # ==================================================================== # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL @@ -28,7 +35,7 @@ # key lengths. As it's obviously inappropriate as "best all-round" # alternative, it has to be complemented with run-time CPU family # detection. Oh! It should also be noted that unlike other PowerPC -# implementation IALU ppc-mont.pl module performs *suboptimaly* on +# implementation IALU ppc-mont.pl module performs *suboptimally* on # >=1024-bit key lengths on Power 6. It should also be noted that # *everything* said so far applies to 64-bit builds! As far as 32-bit # application executed on 64-bit CPU goes, this module is likely to @@ -1346,7 +1353,7 @@ $code.=<<___; std $t3,-16($tp) ; tp[j-1] std $t5,-8($tp) ; tp[j] - add $carry,$carry,$ovf ; comsume upmost overflow + add $carry,$carry,$ovf ; consume upmost overflow add $t6,$t6,$carry ; can not overflow srdi $carry,$t6,16 add $t7,$t7,$carry @@ -1494,16 +1501,14 @@ Lsub: ldx $t0,$tp,$i li $i,0 subfe $ovf,$i,$ovf ; handle upmost overflow bit - and $ap,$tp,$ovf - andc $np,$rp,$ovf - or $ap,$ap,$np ; ap=borrow?tp:rp - addi $t7,$ap,8 mtctr $j .align 4 -Lcopy: ; copy or in-place refresh - ldx $t0,$ap,$i - ldx $t1,$t7,$i +Lcopy: ; conditional copy + ldx $t0,$tp,$i + ldx $t1,$t4,$i + ldx $t2,$rp,$i + ldx $t3,$t6,$i std $i,8($nap_d) ; zap nap_d std $i,16($nap_d) std $i,24($nap_d) @@ -1512,6 +1517,12 @@ Lcopy: ; copy or in-place refresh std $i,48($nap_d) std $i,56($nap_d) stdu $i,64($nap_d) + and $t0,$t0,$ovf + and $t1,$t1,$ovf + andc $t2,$t2,$ovf + andc $t3,$t3,$ovf + or $t0,$t0,$t2 + or $t1,$t1,$t3 stdx $t0,$rp,$i stdx $t1,$t6,$i stdx $i,$tp,$i ; zap tp at once @@ -1554,20 +1565,21 @@ Lsub: lwz $t0,12($tp) ; load tp[j..j+3] in 64-bit word order li $i,0 subfe $ovf,$i,$ovf ; handle upmost overflow bit - addi $tp,$sp,`$FRAME+$TRANSFER+4` + addi $ap,$sp,`$FRAME+$TRANSFER+4` subf $rp,$num,$rp ; rewind rp - and $ap,$tp,$ovf - andc $np,$rp,$ovf - or $ap,$ap,$np ; ap=borrow?tp:rp addi $tp,$sp,`$FRAME+$TRANSFER` mtctr $j .align 4 -Lcopy: ; copy or in-place refresh +Lcopy: ; conditional copy lwz $t0,4($ap) lwz $t1,8($ap) lwz $t2,12($ap) lwzu $t3,16($ap) + lwz $t4,4($rp) + lwz $t5,8($rp) + lwz $t6,12($rp) + lwz $t7,16($rp) std $i,8($nap_d) ; zap nap_d std $i,16($nap_d) std $i,24($nap_d) @@ -1576,6 +1588,18 @@ Lcopy: ; copy or in-place refresh std $i,48($nap_d) std $i,56($nap_d) stdu $i,64($nap_d) + and $t0,$t0,$ovf + and $t1,$t1,$ovf + and $t2,$t2,$ovf + and $t3,$t3,$ovf + andc $t4,$t4,$ovf + andc $t5,$t5,$ovf + andc $t6,$t6,$ovf + andc $t7,$t7,$ovf + or $t0,$t0,$t4 + or $t1,$t1,$t5 + or $t2,$t2,$t6 + or $t3,$t3,$t7 stw $t0,4($rp) stw $t1,8($rp) stw $t2,12($rp) diff --git a/crypto/bn/asm/rsaz-avx2.pl b/crypto/bn/asm/rsaz-avx2.pl index 2b3f8b0e21ec..f1292cc75cfb 100755 --- a/crypto/bn/asm/rsaz-avx2.pl +++ b/crypto/bn/asm/rsaz-avx2.pl @@ -1,61 +1,30 @@ -#!/usr/bin/env perl - -############################################################################## -# # -# Copyright (c) 2012, Intel Corporation # -# # -# All rights reserved. # -# # -# Redistribution and use in source and binary forms, with or without # -# modification, are permitted provided that the following conditions are # -# met: # -# # -# * Redistributions of source code must retain the above copyright # -# notice, this list of conditions and the following disclaimer. # -# # -# * Redistributions in binary form must reproduce the above copyright # -# notice, this list of conditions and the following disclaimer in the # -# documentation and/or other materials provided with the # -# distribution. # -# # -# * Neither the name of the Intel Corporation nor the names of its # -# contributors may be used to endorse or promote products derived from # -# this software without specific prior written permission. # -# # -# # -# THIS SOFTWARE IS PROVIDED BY INTEL CORPORATION ""AS IS"" AND ANY # -# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE # -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR # -# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL CORPORATION OR # -# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, # -# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, # -# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR # -# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF # -# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING # -# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS # -# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # -# # -############################################################################## -# Developers and authors: # -# Shay Gueron (1, 2), and Vlad Krasnov (1) # -# (1) Intel Corporation, Israel Development Center, Haifa, Israel # -# (2) University of Haifa, Israel # -############################################################################## -# Reference: # -# [1] S. Gueron, V. Krasnov: "Software Implementation of Modular # -# Exponentiation, Using Advanced Vector Instructions Architectures", # -# F. Ozbudak and F. Rodriguez-Henriquez (Eds.): WAIFI 2012, LNCS 7369, # -# pp. 119?135, 2012. Springer-Verlag Berlin Heidelberg 2012 # -# [2] S. Gueron: "Efficient Software Implementations of Modular # -# Exponentiation", Journal of Cryptographic Engineering 2:31-43 (2012). # -# [3] S. Gueron, V. Krasnov: "Speeding up Big-numbers Squaring",IEEE # -# Proceedings of 9th International Conference on Information Technology: # -# New Generations (ITNG 2012), pp.821-823 (2012) # -# [4] S. Gueron, V. Krasnov: "[PATCH] Efficient and side channel analysis # -# resistant 1024-bit modular exponentiation, for optimizing RSA2048 # -# on AVX2 capable x86_64 platforms", # -# http://rt.openssl.org/Ticket/Display.html?id=2850&user=guest&pass=guest# -############################################################################## +#! /usr/bin/env perl +# Copyright 2013-2018 The OpenSSL Project Authors. All Rights Reserved. +# Copyright (c) 2012, Intel Corporation. All Rights Reserved. +# +# Licensed under the OpenSSL license (the "License"). You may not use +# this file except in compliance with the License. You can obtain a copy +# in the file LICENSE in the source distribution or at +# https://www.openssl.org/source/license.html +# +# Originally written by Shay Gueron (1, 2), and Vlad Krasnov (1) +# (1) Intel Corporation, Israel Development Center, Haifa, Israel +# (2) University of Haifa, Israel +# +# References: +# [1] S. Gueron, V. Krasnov: "Software Implementation of Modular +# Exponentiation, Using Advanced Vector Instructions Architectures", +# F. Ozbudak and F. Rodriguez-Henriquez (Eds.): WAIFI 2012, LNCS 7369, +# pp. 119?135, 2012. Springer-Verlag Berlin Heidelberg 2012 +# [2] S. Gueron: "Efficient Software Implementations of Modular +# Exponentiation", Journal of Cryptographic Engineering 2:31-43 (2012). +# [3] S. Gueron, V. Krasnov: "Speeding up Big-numbers Squaring",IEEE +# Proceedings of 9th International Conference on Information Technology: +# New Generations (ITNG 2012), pp.821-823 (2012) +# [4] S. Gueron, V. Krasnov: "[PATCH] Efficient and side channel analysis +# resistant 1024-bit modular exponentiation, for optimizing RSA2048 +# on AVX2 capable x86_64 platforms", +# http://rt.openssl.org/Ticket/Display.html?id=2850&user=guest&pass=guest # # +13% improvement over original submission by <appro@openssl.org> # @@ -97,13 +66,13 @@ if (!$avx && $win64 && ($flavour =~ /masm/ || $ENV{ASM} =~ /ml64/) && $addx = ($1>=11); } -if (!$avx && `$ENV{CC} -v 2>&1` =~ /(^clang version|based on LLVM) ([3-9])\.([0-9]+)/) { +if (!$avx && `$ENV{CC} -v 2>&1` =~ /((?:^clang|LLVM) version|based on LLVM) ([3-9])\.([0-9]+)/) { my $ver = $2 + $3/100.0; # 3.1->3.01, 3.10->3.10 $avx = ($ver>=3.0) + ($ver>=3.01); $addx = ($ver>=3.03); } -open OUT,"| \"$^X\" $xlate $flavour $output"; +open OUT,"| \"$^X\" \"$xlate\" $flavour \"$output\""; *STDOUT = *OUT; if ($avx>1) {{{ @@ -161,13 +130,21 @@ $code.=<<___; .type rsaz_1024_sqr_avx2,\@function,5 .align 64 rsaz_1024_sqr_avx2: # 702 cycles, 14% faster than rsaz_1024_mul_avx2 +.cfi_startproc lea (%rsp), %rax +.cfi_def_cfa_register %rax push %rbx +.cfi_push %rbx push %rbp +.cfi_push %rbp push %r12 +.cfi_push %r12 push %r13 +.cfi_push %r13 push %r14 +.cfi_push %r14 push %r15 +.cfi_push %r15 vzeroupper ___ $code.=<<___ if ($win64); @@ -186,6 +163,7 @@ $code.=<<___ if ($win64); ___ $code.=<<___; mov %rax,%rbp +.cfi_def_cfa_register %rbp mov %rdx, $np # reassigned argument sub \$$FrameSize, %rsp mov $np, $tmp @@ -375,7 +353,7 @@ $code.=<<___; vpaddq $TEMP1, $ACC1, $ACC1 vpmuludq 32*7-128($aap), $B2, $ACC2 vpbroadcastq 32*5-128($tpa), $B2 - vpaddq 32*11-448($tp1), $ACC2, $ACC2 + vpaddq 32*11-448($tp1), $ACC2, $ACC2 vmovdqu $ACC6, 32*6-192($tp0) vmovdqu $ACC7, 32*7-192($tp0) @@ -434,7 +412,7 @@ $code.=<<___; vmovdqu $ACC7, 32*16-448($tp1) lea 8($tp1), $tp1 - dec $i + dec $i jnz .LOOP_SQR_1024 ___ $ZERO = $ACC9; @@ -779,7 +757,7 @@ $code.=<<___; vpblendd \$3, $TEMP4, $TEMP5, $TEMP4 vpaddq $TEMP3, $ACC7, $ACC7 vpaddq $TEMP4, $ACC8, $ACC8 - + vpsrlq \$29, $ACC4, $TEMP1 vpand $AND_MASK, $ACC4, $ACC4 vpsrlq \$29, $ACC5, $TEMP2 @@ -818,8 +796,10 @@ $code.=<<___; vzeroall mov %rbp, %rax +.cfi_def_cfa_register %rax ___ $code.=<<___ if ($win64); +.Lsqr_1024_in_tail: movaps -0xd8(%rax),%xmm6 movaps -0xc8(%rax),%xmm7 movaps -0xb8(%rax),%xmm8 @@ -833,14 +813,22 @@ $code.=<<___ if ($win64); ___ $code.=<<___; mov -48(%rax),%r15 +.cfi_restore %r15 mov -40(%rax),%r14 +.cfi_restore %r14 mov -32(%rax),%r13 +.cfi_restore %r13 mov -24(%rax),%r12 +.cfi_restore %r12 mov -16(%rax),%rbp +.cfi_restore %rbp mov -8(%rax),%rbx +.cfi_restore %rbx lea (%rax),%rsp # restore %rsp +.cfi_def_cfa_register %rsp .Lsqr_1024_epilogue: ret +.cfi_endproc .size rsaz_1024_sqr_avx2,.-rsaz_1024_sqr_avx2 ___ } @@ -893,13 +881,21 @@ $code.=<<___; .type rsaz_1024_mul_avx2,\@function,5 .align 64 rsaz_1024_mul_avx2: +.cfi_startproc lea (%rsp), %rax +.cfi_def_cfa_register %rax push %rbx +.cfi_push %rbx push %rbp +.cfi_push %rbp push %r12 +.cfi_push %r12 push %r13 +.cfi_push %r13 push %r14 +.cfi_push %r14 push %r15 +.cfi_push %r15 ___ $code.=<<___ if ($win64); vzeroupper @@ -918,6 +914,7 @@ $code.=<<___ if ($win64); ___ $code.=<<___; mov %rax,%rbp +.cfi_def_cfa_register %rbp vzeroall mov %rdx, $bp # reassigned argument sub \$64,%rsp @@ -1443,15 +1440,17 @@ $code.=<<___; vpaddq $TEMP4, $ACC8, $ACC8 vmovdqu $ACC4, 128-128($rp) - vmovdqu $ACC5, 160-128($rp) + vmovdqu $ACC5, 160-128($rp) vmovdqu $ACC6, 192-128($rp) vmovdqu $ACC7, 224-128($rp) vmovdqu $ACC8, 256-128($rp) vzeroupper mov %rbp, %rax +.cfi_def_cfa_register %rax ___ $code.=<<___ if ($win64); +.Lmul_1024_in_tail: movaps -0xd8(%rax),%xmm6 movaps -0xc8(%rax),%xmm7 movaps -0xb8(%rax),%xmm8 @@ -1465,14 +1464,22 @@ $code.=<<___ if ($win64); ___ $code.=<<___; mov -48(%rax),%r15 +.cfi_restore %r15 mov -40(%rax),%r14 +.cfi_restore %r14 mov -32(%rax),%r13 +.cfi_restore %r13 mov -24(%rax),%r12 +.cfi_restore %r12 mov -16(%rax),%rbp +.cfi_restore %rbp mov -8(%rax),%rbx +.cfi_restore %rbx lea (%rax),%rsp # restore %rsp +.cfi_def_cfa_register %rsp .Lmul_1024_epilogue: ret +.cfi_endproc .size rsaz_1024_mul_avx2,.-rsaz_1024_mul_avx2 ___ } @@ -1591,8 +1598,10 @@ rsaz_1024_scatter5_avx2: .type rsaz_1024_gather5_avx2,\@abi-omnipotent .align 32 rsaz_1024_gather5_avx2: +.cfi_startproc vzeroupper mov %rsp,%r11 +.cfi_def_cfa_register %r11 ___ $code.=<<___ if ($win64); lea -0x88(%rsp),%rax @@ -1730,11 +1739,13 @@ $code.=<<___ if ($win64); movaps -0x38(%r11),%xmm13 movaps -0x28(%r11),%xmm14 movaps -0x18(%r11),%xmm15 -.LSEH_end_rsaz_1024_gather5: ___ $code.=<<___; lea (%r11),%rsp +.cfi_def_cfa_register %rsp ret +.cfi_endproc +.LSEH_end_rsaz_1024_gather5: .size rsaz_1024_gather5_avx2,.-rsaz_1024_gather5_avx2 ___ } @@ -1807,14 +1818,17 @@ rsaz_se_handler: cmp %r10,%rbx # context->Rip<prologue label jb .Lcommon_seh_tail - mov 152($context),%rax # pull context->Rsp - mov 4(%r11),%r10d # HandlerData[1] lea (%rsi,%r10),%r10 # epilogue label cmp %r10,%rbx # context->Rip>=epilogue label jae .Lcommon_seh_tail - mov 160($context),%rax # pull context->Rbp + mov 160($context),%rbp # pull context->Rbp + + mov 8(%r11),%r10d # HandlerData[2] + lea (%rsi,%r10),%r10 # "in tail" label + cmp %r10,%rbx # context->Rip>="in tail" label + cmovc %rbp,%rax mov -48(%rax),%r15 mov -40(%rax),%r14 @@ -1892,11 +1906,13 @@ rsaz_se_handler: .LSEH_info_rsaz_1024_sqr_avx2: .byte 9,0,0,0 .rva rsaz_se_handler - .rva .Lsqr_1024_body,.Lsqr_1024_epilogue + .rva .Lsqr_1024_body,.Lsqr_1024_epilogue,.Lsqr_1024_in_tail + .long 0 .LSEH_info_rsaz_1024_mul_avx2: .byte 9,0,0,0 .rva rsaz_se_handler - .rva .Lmul_1024_body,.Lmul_1024_epilogue + .rva .Lmul_1024_body,.Lmul_1024_epilogue,.Lmul_1024_in_tail + .long 0 .LSEH_info_rsaz_1024_gather5: .byte 0x01,0x36,0x17,0x0b .byte 0x36,0xf8,0x09,0x00 # vmovaps 0x90(rsp),xmm15 diff --git a/crypto/bn/asm/rsaz-x86_64.pl b/crypto/bn/asm/rsaz-x86_64.pl index 87ce2c34d90c..b1797b649f00 100755 --- a/crypto/bn/asm/rsaz-x86_64.pl +++ b/crypto/bn/asm/rsaz-x86_64.pl @@ -1,61 +1,29 @@ -#!/usr/bin/env perl - -############################################################################## -# # -# Copyright (c) 2012, Intel Corporation # -# # -# All rights reserved. # -# # -# Redistribution and use in source and binary forms, with or without # -# modification, are permitted provided that the following conditions are # -# met: # -# # -# * Redistributions of source code must retain the above copyright # -# notice, this list of conditions and the following disclaimer. # -# # -# * Redistributions in binary form must reproduce the above copyright # -# notice, this list of conditions and the following disclaimer in the # -# documentation and/or other materials provided with the # -# distribution. # -# # -# * Neither the name of the Intel Corporation nor the names of its # -# contributors may be used to endorse or promote products derived from # -# this software without specific prior written permission. # -# # -# # -# THIS SOFTWARE IS PROVIDED BY INTEL CORPORATION ""AS IS"" AND ANY # -# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE # -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR # -# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL CORPORATION OR # -# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, # -# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, # -# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR # -# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF # -# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING # -# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS # -# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # -# # -############################################################################## -# Developers and authors: # -# Shay Gueron (1, 2), and Vlad Krasnov (1) # -# (1) Intel Architecture Group, Microprocessor and Chipset Development, # -# Israel Development Center, Haifa, Israel # -# (2) University of Haifa # -############################################################################## -# Reference: # -# [1] S. Gueron, "Efficient Software Implementations of Modular # -# Exponentiation", http://eprint.iacr.org/2011/239 # -# [2] S. Gueron, V. Krasnov. "Speeding up Big-Numbers Squaring". # -# IEEE Proceedings of 9th International Conference on Information # -# Technology: New Generations (ITNG 2012), 821-823 (2012). # -# [3] S. Gueron, Efficient Software Implementations of Modular Exponentiation# -# Journal of Cryptographic Engineering 2:31-43 (2012). # -# [4] S. Gueron, V. Krasnov: "[PATCH] Efficient and side channel analysis # -# resistant 512-bit and 1024-bit modular exponentiation for optimizing # -# RSA1024 and RSA2048 on x86_64 platforms", # -# http://rt.openssl.org/Ticket/Display.html?id=2582&user=guest&pass=guest# -############################################################################## - +#! /usr/bin/env perl +# Copyright 2013-2016 The OpenSSL Project Authors. All Rights Reserved. +# Copyright (c) 2012, Intel Corporation. All Rights Reserved. +# +# Licensed under the OpenSSL license (the "License"). You may not use +# this file except in compliance with the License. You can obtain a copy +# in the file LICENSE in the source distribution or at +# https://www.openssl.org/source/license.html +# +# Originally written by Shay Gueron (1, 2), and Vlad Krasnov (1) +# (1) Intel Corporation, Israel Development Center, Haifa, Israel +# (2) University of Haifa, Israel +# +# References: +# [1] S. Gueron, "Efficient Software Implementations of Modular +# Exponentiation", http://eprint.iacr.org/2011/239 +# [2] S. Gueron, V. Krasnov. "Speeding up Big-Numbers Squaring". +# IEEE Proceedings of 9th International Conference on Information +# Technology: New Generations (ITNG 2012), 821-823 (2012). +# [3] S. Gueron, Efficient Software Implementations of Modular Exponentiation +# Journal of Cryptographic Engineering 2:31-43 (2012). +# [4] S. Gueron, V. Krasnov: "[PATCH] Efficient and side channel analysis +# resistant 512-bit and 1024-bit modular exponentiation for optimizing +# RSA1024 and RSA2048 on x86_64 platforms", +# http://rt.openssl.org/Ticket/Display.html?id=2582&user=guest&pass=guest +# # While original submission covers 512- and 1024-bit exponentiation, # this module is limited to 512-bit version only (and as such # accelerates RSA1024 sign). This is because improvement for longer @@ -95,7 +63,7 @@ $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; ( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or die "can't locate x86_64-xlate.pl"; -open OUT,"| \"$^X\" $xlate $flavour $output"; +open OUT,"| \"$^X\" \"$xlate\" $flavour \"$output\""; *STDOUT=*OUT; if (`$ENV{CC} -Wa,-v -c -o /dev/null -x assembler /dev/null 2>&1` @@ -131,14 +99,22 @@ $code.=<<___; .type rsaz_512_sqr,\@function,5 .align 32 rsaz_512_sqr: # 25-29% faster than rsaz_512_mul +.cfi_startproc push %rbx +.cfi_push %rbx push %rbp +.cfi_push %rbp push %r12 +.cfi_push %r12 push %r13 +.cfi_push %r13 push %r14 +.cfi_push %r14 push %r15 +.cfi_push %r15 subq \$128+24, %rsp +.cfi_adjust_cfa_offset 128+24 .Lsqr_body: movq $mod, %rbp # common argument movq ($inp), %rdx @@ -275,9 +251,9 @@ $code.=<<___; movq %r9, 16(%rsp) movq %r10, 24(%rsp) shrq \$63, %rbx - + #third iteration - movq 16($inp), %r9 + movq 16($inp), %r9 movq 24($inp), %rax mulq %r9 addq %rax, %r12 @@ -525,7 +501,7 @@ $code.=<<___; movl $times,128+8(%rsp) movq $out, %xmm0 # off-load movq %rbp, %xmm1 # off-load -#first iteration +#first iteration mulx %rax, %r8, %r9 mulx 16($inp), %rcx, %r10 @@ -561,7 +537,7 @@ $code.=<<___; mov %rax, (%rsp) mov %r8, 8(%rsp) -#second iteration +#second iteration mulx 16($inp), %rax, %rbx adox %rax, %r10 adcx %rbx, %r11 @@ -600,8 +576,8 @@ $code.=<<___; mov %r9, 16(%rsp) .byte 0x4c,0x89,0x94,0x24,0x18,0x00,0x00,0x00 # mov %r10, 24(%rsp) - -#third iteration + +#third iteration .byte 0xc4,0x62,0xc3,0xf6,0x8e,0x18,0x00,0x00,0x00 # mulx 24($inp), $out, %r9 adox $out, %r12 adcx %r9, %r13 @@ -636,8 +612,8 @@ $code.=<<___; mov %r11, 32(%rsp) .byte 0x4c,0x89,0xa4,0x24,0x28,0x00,0x00,0x00 # mov %r12, 40(%rsp) - -#fourth iteration + +#fourth iteration .byte 0xc4,0xe2,0xfb,0xf6,0x9e,0x20,0x00,0x00,0x00 # mulx 32($inp), %rax, %rbx adox %rax, %r14 adcx %rbx, %r15 @@ -669,8 +645,8 @@ $code.=<<___; mov %r13, 48(%rsp) mov %r14, 56(%rsp) - -#fifth iteration + +#fifth iteration .byte 0xc4,0x62,0xc3,0xf6,0x9e,0x28,0x00,0x00,0x00 # mulx 40($inp), $out, %r11 adox $out, %r8 adcx %r11, %r9 @@ -697,8 +673,8 @@ $code.=<<___; mov %r15, 64(%rsp) mov %r8, 72(%rsp) - -#sixth iteration + +#sixth iteration .byte 0xc4,0xe2,0xfb,0xf6,0x9e,0x30,0x00,0x00,0x00 # mulx 48($inp), %rax, %rbx adox %rax, %r10 adcx %rbx, %r11 @@ -793,15 +769,24 @@ ___ $code.=<<___; leaq 128+24+48(%rsp), %rax +.cfi_def_cfa %rax,8 movq -48(%rax), %r15 +.cfi_restore %r15 movq -40(%rax), %r14 +.cfi_restore %r14 movq -32(%rax), %r13 +.cfi_restore %r13 movq -24(%rax), %r12 +.cfi_restore %r12 movq -16(%rax), %rbp +.cfi_restore %rbp movq -8(%rax), %rbx +.cfi_restore %rbx leaq (%rax), %rsp +.cfi_def_cfa_register %rsp .Lsqr_epilogue: ret +.cfi_endproc .size rsaz_512_sqr,.-rsaz_512_sqr ___ } @@ -812,14 +797,22 @@ $code.=<<___; .type rsaz_512_mul,\@function,5 .align 32 rsaz_512_mul: +.cfi_startproc push %rbx +.cfi_push %rbx push %rbp +.cfi_push %rbp push %r12 +.cfi_push %r12 push %r13 +.cfi_push %r13 push %r14 +.cfi_push %r14 push %r15 +.cfi_push %r15 subq \$128+24, %rsp +.cfi_adjust_cfa_offset 128+24 .Lmul_body: movq $out, %xmm0 # off-load arguments movq $mod, %xmm1 @@ -889,15 +882,24 @@ $code.=<<___; call __rsaz_512_subtract leaq 128+24+48(%rsp), %rax +.cfi_def_cfa %rax,8 movq -48(%rax), %r15 +.cfi_restore %r15 movq -40(%rax), %r14 +.cfi_restore %r14 movq -32(%rax), %r13 +.cfi_restore %r13 movq -24(%rax), %r12 +.cfi_restore %r12 movq -16(%rax), %rbp +.cfi_restore %rbp movq -8(%rax), %rbx +.cfi_restore %rbx leaq (%rax), %rsp +.cfi_def_cfa_register %rsp .Lmul_epilogue: ret +.cfi_endproc .size rsaz_512_mul,.-rsaz_512_mul ___ } @@ -908,14 +910,22 @@ $code.=<<___; .type rsaz_512_mul_gather4,\@function,6 .align 32 rsaz_512_mul_gather4: +.cfi_startproc push %rbx +.cfi_push %rbx push %rbp +.cfi_push %rbp push %r12 +.cfi_push %r12 push %r13 +.cfi_push %r13 push %r14 +.cfi_push %r14 push %r15 +.cfi_push %r15 subq \$`128+24+($win64?0xb0:0)`, %rsp +.cfi_adjust_cfa_offset `128+24+($win64?0xb0:0)` ___ $code.=<<___ if ($win64); movaps %xmm6,0xa0(%rsp) @@ -1041,7 +1051,7 @@ $code.=<<___; movq 56($ap), %rax movq %rdx, %r14 adcq \$0, %r14 - + mulq %rbx addq %rax, %r14 movq ($ap), %rax @@ -1143,7 +1153,7 @@ $code.=<<___; movq ($ap), %rax adcq \$0, %rdx addq %r15, %r14 - movq %rdx, %r15 + movq %rdx, %r15 adcq \$0, %r15 leaq 8(%rdi), %rdi @@ -1205,7 +1215,7 @@ $code.=<<___ if ($addx); mulx 48($ap), %rbx, %r14 adcx %rax, %r12 - + mulx 56($ap), %rax, %r15 adcx %rbx, %r13 adcx %rax, %r14 @@ -1341,15 +1351,24 @@ $code.=<<___ if ($win64); lea 0xb0(%rax),%rax ___ $code.=<<___; +.cfi_def_cfa %rax,8 movq -48(%rax), %r15 +.cfi_restore %r15 movq -40(%rax), %r14 +.cfi_restore %r14 movq -32(%rax), %r13 +.cfi_restore %r13 movq -24(%rax), %r12 +.cfi_restore %r12 movq -16(%rax), %rbp +.cfi_restore %rbp movq -8(%rax), %rbx +.cfi_restore %rbx leaq (%rax), %rsp +.cfi_def_cfa_register %rsp .Lmul_gather4_epilogue: ret +.cfi_endproc .size rsaz_512_mul_gather4,.-rsaz_512_mul_gather4 ___ } @@ -1360,15 +1379,23 @@ $code.=<<___; .type rsaz_512_mul_scatter4,\@function,6 .align 32 rsaz_512_mul_scatter4: +.cfi_startproc push %rbx +.cfi_push %rbx push %rbp +.cfi_push %rbp push %r12 +.cfi_push %r12 push %r13 +.cfi_push %r13 push %r14 +.cfi_push %r14 push %r15 +.cfi_push %r15 mov $pwr, $pwr subq \$128+24, %rsp +.cfi_adjust_cfa_offset 128+24 .Lmul_scatter4_body: leaq ($tbl,$pwr,8), $tbl movq $out, %xmm0 # off-load arguments @@ -1404,7 +1431,7 @@ $code.=<<___; ___ $code.=<<___ if ($addx); jmp .Lmul_scatter_tail - + .align 32 .Lmulx_scatter: movq ($out), %rdx # pass b[0] @@ -1451,15 +1478,24 @@ $code.=<<___; movq %r15, 128*7($inp) leaq 128+24+48(%rsp), %rax +.cfi_def_cfa %rax,8 movq -48(%rax), %r15 +.cfi_restore %r15 movq -40(%rax), %r14 +.cfi_restore %r14 movq -32(%rax), %r13 +.cfi_restore %r13 movq -24(%rax), %r12 +.cfi_restore %r12 movq -16(%rax), %rbp +.cfi_restore %rbp movq -8(%rax), %rbx +.cfi_restore %rbx leaq (%rax), %rsp +.cfi_def_cfa_register %rsp .Lmul_scatter4_epilogue: ret +.cfi_endproc .size rsaz_512_mul_scatter4,.-rsaz_512_mul_scatter4 ___ } @@ -1470,14 +1506,22 @@ $code.=<<___; .type rsaz_512_mul_by_one,\@function,4 .align 32 rsaz_512_mul_by_one: +.cfi_startproc push %rbx +.cfi_push %rbx push %rbp +.cfi_push %rbp push %r12 +.cfi_push %r12 push %r13 +.cfi_push %r13 push %r14 +.cfi_push %r14 push %r15 +.cfi_push %r15 subq \$128+24, %rsp +.cfi_adjust_cfa_offset 128+24 .Lmul_by_one_body: ___ $code.=<<___ if ($addx); @@ -1532,15 +1576,24 @@ $code.=<<___; movq %r15, 56($out) leaq 128+24+48(%rsp), %rax +.cfi_def_cfa %rax,8 movq -48(%rax), %r15 +.cfi_restore %r15 movq -40(%rax), %r14 +.cfi_restore %r14 movq -32(%rax), %r13 +.cfi_restore %r13 movq -24(%rax), %r12 +.cfi_restore %r12 movq -16(%rax), %rbp +.cfi_restore %rbp movq -8(%rax), %rbx +.cfi_restore %rbx leaq (%rax), %rsp +.cfi_def_cfa_register %rsp .Lmul_by_one_epilogue: ret +.cfi_endproc .size rsaz_512_mul_by_one,.-rsaz_512_mul_by_one ___ } @@ -1767,7 +1820,7 @@ ___ { # __rsaz_512_mul # # input: %rsi - ap, %rbp - bp - # ouput: + # output: # clobbers: everything my ($ap,$bp) = ("%rsi","%rbp"); $code.=<<___; @@ -1817,7 +1870,7 @@ __rsaz_512_mul: movq 56($ap), %rax movq %rdx, %r14 adcq \$0, %r14 - + mulq %rbx addq %rax, %r14 movq ($ap), %rax @@ -1894,7 +1947,7 @@ __rsaz_512_mul: movq ($ap), %rax adcq \$0, %rdx addq %r15, %r14 - movq %rdx, %r15 + movq %rdx, %r15 adcq \$0, %r15 leaq 8(%rdi), %rdi @@ -1919,7 +1972,7 @@ if ($addx) { # __rsaz_512_mulx # # input: %rsi - ap, %rbp - bp - # ouput: + # output: # clobbers: everything my ($ap,$bp,$zero) = ("%rsi","%rbp","%rdi"); $code.=<<___; diff --git a/crypto/bn/asm/s390x-gf2m.pl b/crypto/bn/asm/s390x-gf2m.pl index 9d18d40e7784..06181bf9b95f 100755 --- a/crypto/bn/asm/s390x-gf2m.pl +++ b/crypto/bn/asm/s390x-gf2m.pl @@ -1,4 +1,11 @@ -#!/usr/bin/env perl +#! /usr/bin/env perl +# Copyright 2011-2016 The OpenSSL Project Authors. All Rights Reserved. +# +# Licensed under the OpenSSL license (the "License"). You may not use +# this file except in compliance with the License. You can obtain a copy +# in the file LICENSE in the source distribution or at +# https://www.openssl.org/source/license.html + # # ==================================================================== # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL @@ -13,7 +20,7 @@ # in bn_gf2m.c. It's kind of low-hanging mechanical port from C for # the time being... gcc 4.3 appeared to generate poor code, therefore # the effort. And indeed, the module delivers 55%-90%(*) improvement -# on haviest ECDSA verify and ECDH benchmarks for 163- and 571-bit +# on heaviest ECDSA verify and ECDH benchmarks for 163- and 571-bit # key lengths on z990, 30%-55%(*) - on z10, and 70%-110%(*) - on z196. # This is for 64-bit build. In 32-bit "highgprs" case improvement is # even higher, for example on z990 it was measured 80%-150%. ECDSA @@ -35,7 +42,7 @@ if ($flavour =~ /3[12]/) { $g="g"; } -while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {} +while (($output=shift) && ($output!~/\w[\w\-]*\.\w+$/)) {} open STDOUT,">$output"; $stdframe=16*$SIZE_T+4*8; @@ -191,7 +198,7 @@ $code.=<<___; xgr $hi,@r[1] xgr $lo,@r[0] xgr $hi,@r[2] - xgr $lo,@r[3] + xgr $lo,@r[3] xgr $hi,@r[3] xgr $lo,$hi stg $hi,16($rp) diff --git a/crypto/bn/asm/s390x-mont.pl b/crypto/bn/asm/s390x-mont.pl index 9fd64e81eef3..c2fc5adffe0d 100755 --- a/crypto/bn/asm/s390x-mont.pl +++ b/crypto/bn/asm/s390x-mont.pl @@ -1,7 +1,14 @@ -#!/usr/bin/env perl +#! /usr/bin/env perl +# Copyright 2007-2018 The OpenSSL Project Authors. All Rights Reserved. +# +# Licensed under the OpenSSL license (the "License"). You may not use +# this file except in compliance with the License. You can obtain a copy +# in the file LICENSE in the source distribution or at +# https://www.openssl.org/source/license.html + # ==================================================================== -# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL +# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL # project. The module is, however, dual licensed under OpenSSL and # CRYPTOGAMS licenses depending on where you obtain it. For further # details see http://www.openssl.org/~appro/cryptogams/. @@ -54,7 +61,7 @@ if ($flavour =~ /3[12]/) { $g="g"; } -while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {} +while (($output=shift) && ($output!~/\w[\w\-]*\.\w+$/)) {} open STDOUT,">$output"; $stdframe=16*$SIZE_T+4*8; @@ -245,16 +252,16 @@ $code.=<<___; brct $count,.Lsub lghi $ahi,0 slbgr $AHI,$ahi # handle upmost carry - - ngr $ap,$AHI - lghi $np,-1 - xgr $np,$AHI - ngr $np,$rp - ogr $ap,$np # ap=borrow?tp:rp + lghi $NHI,-1 + xgr $NHI,$AHI la $j,0(%r0) lgr $count,$num -.Lcopy: lg $alo,0($j,$ap) # copy or in-place refresh +.Lcopy: lg $ahi,$stdframe($j,$sp) # conditional copy + lg $alo,0($j,$rp) + ngr $ahi,$AHI + ngr $alo,$NHI + ogr $alo,$ahi _dswap $alo stg $j,$stdframe($j,$sp) # zap tp stg $alo,0($j,$rp) diff --git a/crypto/bn/asm/s390x.S b/crypto/bn/asm/s390x.S index f5eebe413a28..292a7a9998bd 100755 --- a/crypto/bn/asm/s390x.S +++ b/crypto/bn/asm/s390x.S @@ -1,11 +1,11 @@ .ident "s390x.S, version 1.1" // ==================================================================== -// Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL -// project. +// Copyright 2007-2016 The OpenSSL Project Authors. All Rights Reserved. // -// Rights for redistribution and usage in source and binary forms are -// granted according to the OpenSSL license. Warranty of any kind is -// disclaimed. +// Licensed under the OpenSSL license (the "License"). You may not use +// this file except in compliance with the License. You can obtain a copy +// in the file LICENSE in the source distribution or at +// https://www.openssl.org/source/license.html // ==================================================================== .text diff --git a/crypto/bn/asm/sparct4-mont.pl b/crypto/bn/asm/sparct4-mont.pl index 71b45002a42f..fcae9cfc5b44 100755 --- a/crypto/bn/asm/sparct4-mont.pl +++ b/crypto/bn/asm/sparct4-mont.pl @@ -1,9 +1,16 @@ -#!/usr/bin/env perl +#! /usr/bin/env perl +# Copyright 2012-2018 The OpenSSL Project Authors. All Rights Reserved. +# +# Licensed under the OpenSSL license (the "License"). You may not use +# this file except in compliance with the License. You can obtain a copy +# in the file LICENSE in the source distribution or at +# https://www.openssl.org/source/license.html + # ==================================================================== -# Written by David S. Miller <davem@devemloft.net> and Andy Polyakov -# <appro@openssl.org>. The module is licensed under 2-clause BSD -# license. November 2012. All rights reserved. +# Written by David S. Miller and Andy Polyakov +# The module is licensed under 2-clause BSD license. +# November 2012. All rights reserved. # ==================================================================== ###################################################################### @@ -76,6 +83,9 @@ $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; push(@INC,"${dir}","${dir}../../perlasm"); require "sparcv9_modes.pl"; +$output = pop; +open STDOUT,">$output"; + $code.=<<___; #include "sparc_arch.h" @@ -878,19 +888,17 @@ $code.=<<___; sub $tp, $num, $tp sub $rp, $num, $rp - subc $ovf, %g0, $ovf ! handle upmost overflow bit - and $tp, $ovf, $ap - andn $rp, $ovf, $np - or $np, $ap, $ap ! ap=borrow?tp:rp + subccc $ovf, %g0, $ovf ! handle upmost overflow bit ba .Lcopy sub $num, 8, $cnt .align 16 -.Lcopy: ! copy or in-place refresh - ldx [$ap+0], $t2 - add $ap, 8, $ap +.Lcopy: ! conditional copy + ldx [$tp], $tj + ldx [$rp+0], $t2 stx %g0, [$tp] ! zap add $tp, 8, $tp + movcs %icc, $tj, $t2 stx $t2, [$rp+0] add $rp, 8, $rp brnz $cnt, .Lcopy @@ -1126,19 +1134,17 @@ $code.=<<___; sub $tp, $num, $tp sub $rp, $num, $rp - subc $ovf, %g0, $ovf ! handle upmost overflow bit - and $tp, $ovf, $ap - andn $rp, $ovf, $np - or $np, $ap, $ap ! ap=borrow?tp:rp + subccc $ovf, %g0, $ovf ! handle upmost overflow bit ba .Lcopy_g5 sub $num, 8, $cnt .align 16 -.Lcopy_g5: ! copy or in-place refresh - ldx [$ap+0], $t2 - add $ap, 8, $ap +.Lcopy_g5: ! conditional copy + ldx [$tp], $tj + ldx [$rp+0], $t2 stx %g0, [$tp] ! zap add $tp, 8, $tp + movcs %icc, $tj, $t2 stx $t2, [$rp+0] add $rp, 8, $rp brnz $cnt, .Lcopy_g5 diff --git a/crypto/bn/asm/sparcv8.S b/crypto/bn/asm/sparcv8.S index 88c5dc480a76..75d72eb92c74 100644 --- a/crypto/bn/asm/sparcv8.S +++ b/crypto/bn/asm/sparcv8.S @@ -1,19 +1,19 @@ .ident "sparcv8.s, Version 1.4" -.ident "SPARC v8 ISA artwork by Andy Polyakov <appro@fy.chalmers.se>" +.ident "SPARC v8 ISA artwork by Andy Polyakov <appro@openssl.org>" /* * ==================================================================== - * Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL - * project. + * Copyright 1999-2016 The OpenSSL Project Authors. All Rights Reserved. * - * Rights for redistribution and usage in source and binary forms are - * granted according to the OpenSSL license. Warranty of any kind is - * disclaimed. + * Licensed under the OpenSSL license (the "License"). You may not use + * this file except in compliance with the License. You can obtain a copy + * in the file LICENSE in the source distribution or at + * https://www.openssl.org/source/license.html * ==================================================================== */ /* - * This is my modest contributon to OpenSSL project (see + * This is my modest contribution to OpenSSL project (see * http://www.openssl.org/ for more information about it) and is * a drop-in SuperSPARC ISA replacement for crypto/bn/bn_asm.c * module. For updates see http://fy.chalmers.se/~appro/hpe/. @@ -159,12 +159,12 @@ bn_mul_add_words: */ bn_mul_words: cmp %o2,0 - bg,a .L_bn_mul_words_proceeed + bg,a .L_bn_mul_words_proceed ld [%o1],%g2 retl clr %o0 -.L_bn_mul_words_proceeed: +.L_bn_mul_words_proceed: andcc %o2,-4,%g0 bz .L_bn_mul_words_tail clr %o5 @@ -251,12 +251,12 @@ bn_mul_words: */ bn_sqr_words: cmp %o2,0 - bg,a .L_bn_sqr_words_proceeed + bg,a .L_bn_sqr_words_proceed ld [%o1],%g2 retl clr %o0 -.L_bn_sqr_words_proceeed: +.L_bn_sqr_words_proceed: andcc %o2,-4,%g0 bz .L_bn_sqr_words_tail clr %o5 diff --git a/crypto/bn/asm/sparcv8plus.S b/crypto/bn/asm/sparcv8plus.S index 63de1860f285..fe4699b2bdd1 100644 --- a/crypto/bn/asm/sparcv8plus.S +++ b/crypto/bn/asm/sparcv8plus.S @@ -1,19 +1,19 @@ .ident "sparcv8plus.s, Version 1.4" -.ident "SPARC v9 ISA artwork by Andy Polyakov <appro@fy.chalmers.se>" +.ident "SPARC v9 ISA artwork by Andy Polyakov <appro@openssl.org>" /* * ==================================================================== - * Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL - * project. + * Copyright 1999-2016 The OpenSSL Project Authors. All Rights Reserved. * - * Rights for redistribution and usage in source and binary forms are - * granted according to the OpenSSL license. Warranty of any kind is - * disclaimed. + * Licensed under the OpenSSL license (the "License"). You may not use + * this file except in compliance with the License. You can obtain a copy + * in the file LICENSE in the source distribution or at + * https://www.openssl.org/source/license.html * ==================================================================== */ /* - * This is my modest contributon to OpenSSL project (see + * This is my modest contribution to OpenSSL project (see * http://www.openssl.org/ for more information about it) and is * a drop-in UltraSPARC ISA replacement for crypto/bn/bn_asm.c * module. For updates see http://fy.chalmers.se/~appro/hpe/. @@ -52,7 +52,7 @@ * # cd ../.. * # make; make test * - * Q. V8plus achitecture? What kind of beast is that? + * Q. V8plus architecture? What kind of beast is that? * A. Well, it's rather a programming model than an architecture... * It's actually v9-compliant, i.e. *any* UltraSPARC, CPU under * special conditions, namely when kernel doesn't preserve upper @@ -71,7 +71,7 @@ * * Q. 64-bit registers under 32-bit kernels? Didn't you just say it * doesn't work? - * A. You can't adress *all* registers as 64-bit wide:-( The catch is + * A. You can't address *all* registers as 64-bit wide:-( The catch is * that you actually may rely upon %o0-%o5 and %g1-%g4 being fully * preserved if you're in a leaf function, i.e. such never calling * any other functions. All functions in this module are leaf and @@ -278,7 +278,7 @@ bn_mul_add_words: */ bn_mul_words: sra %o2,%g0,%o2 ! signx %o2 - brgz,a %o2,.L_bn_mul_words_proceeed + brgz,a %o2,.L_bn_mul_words_proceed lduw [%o1],%g2 retl clr %o0 @@ -286,7 +286,7 @@ bn_mul_words: nop nop -.L_bn_mul_words_proceeed: +.L_bn_mul_words_proceed: srl %o3,%g0,%o3 ! clruw %o3 andcc %o2,-4,%g0 bz,pn %icc,.L_bn_mul_words_tail @@ -366,7 +366,7 @@ bn_mul_words: */ bn_sqr_words: sra %o2,%g0,%o2 ! signx %o2 - brgz,a %o2,.L_bn_sqr_words_proceeed + brgz,a %o2,.L_bn_sqr_words_proceed lduw [%o1],%g2 retl clr %o0 @@ -374,7 +374,7 @@ bn_sqr_words: nop nop -.L_bn_sqr_words_proceeed: +.L_bn_sqr_words_proceed: andcc %o2,-4,%g0 nop bz,pn %icc,.L_bn_sqr_words_tail diff --git a/crypto/bn/asm/sparcv9-gf2m.pl b/crypto/bn/asm/sparcv9-gf2m.pl index ab94cd917c41..dcf11a87a18e 100755 --- a/crypto/bn/asm/sparcv9-gf2m.pl +++ b/crypto/bn/asm/sparcv9-gf2m.pl @@ -1,4 +1,11 @@ -#!/usr/bin/env perl +#! /usr/bin/env perl +# Copyright 2012-2016 The OpenSSL Project Authors. All Rights Reserved. +# +# Licensed under the OpenSSL license (the "License"). You may not use +# this file except in compliance with the License. You can obtain a copy +# in the file LICENSE in the source distribution or at +# https://www.openssl.org/source/license.html + # # ==================================================================== # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL @@ -18,6 +25,9 @@ # ~100-230% faster than gcc-generated code and ~35-90% faster than # the pure SPARCv9 code path. +$output = pop; +open STDOUT,">$output"; + $locals=16*8; $tab="%l0"; diff --git a/crypto/bn/asm/sparcv9-mont.pl b/crypto/bn/asm/sparcv9-mont.pl index d8662878006e..b41903af985f 100755 --- a/crypto/bn/asm/sparcv9-mont.pl +++ b/crypto/bn/asm/sparcv9-mont.pl @@ -1,7 +1,14 @@ -#!/usr/bin/env perl +#! /usr/bin/env perl +# Copyright 2005-2018 The OpenSSL Project Authors. All Rights Reserved. +# +# Licensed under the OpenSSL license (the "License"). You may not use +# this file except in compliance with the License. You can obtain a copy +# in the file LICENSE in the source distribution or at +# https://www.openssl.org/source/license.html + # ==================================================================== -# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL +# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL # project. The module is, however, dual licensed under OpenSSL and # CRYPTOGAMS licenses depending on where you obtain it. For further # details see http://www.openssl.org/~appro/cryptogams/. @@ -13,7 +20,7 @@ # for undertaken effort are multiple. First of all, UltraSPARC is not # the whole SPARCv9 universe and other VIS-free implementations deserve # optimized code as much. Secondly, newly introduced UltraSPARC T1, -# a.k.a. Niagara, has shared FPU and concurrent FPU-intensive pathes, +# a.k.a. Niagara, has shared FPU and concurrent FPU-intensive paths, # such as sparcv9a-mont, will simply sink it. Yes, T1 is equipped with # several integrated RSA/DSA accelerator circuits accessible through # kernel driver [only(*)], but having decent user-land software @@ -23,7 +30,7 @@ # instructions... # (*) Engine accessing the driver in question is on my TODO list. -# For reference, acceleator is estimated to give 6 to 10 times +# For reference, accelerator is estimated to give 6 to 10 times # improvement on single-threaded RSA sign. It should be noted # that 6-10x improvement coefficient does not actually mean # something extraordinary in terms of absolute [single-threaded] @@ -42,6 +49,9 @@ # module still have hidden potential [see TODO list there], which is # estimated to be larger than 20%... +$output = pop; +open STDOUT,">$output"; + # int bn_mul_mont( $rp="%i0"; # BN_ULONG *rp, $ap="%i1"; # const BN_ULONG *ap, @@ -50,10 +60,8 @@ $np="%i3"; # const BN_ULONG *np, $n0="%i4"; # const BN_ULONG *n0, $num="%i5"; # int num); -$bits=32; -for (@ARGV) { $bits=64 if (/\-m64/ || /\-xarch\=v9/); } -if ($bits==64) { $bias=2047; $frame=192; } -else { $bias=0; $frame=128; } +$frame="STACK_FRAME"; +$bias="STACK_BIAS"; $car0="%o0"; $car1="%o1"; @@ -76,6 +84,8 @@ $tpj="%l7"; $fname="bn_mul_mont_int"; $code=<<___; +#include "sparc_arch.h" + .section ".text",#alloc,#execinstr .global $fname @@ -105,7 +115,7 @@ $fname: ld [$np],$car1 ! np[0] sub %o7,$bias,%sp ! alloca ld [$np+4],$npj ! np[1] - be,pt `$bits==32?"%icc":"%xcc"`,.Lbn_sqr_mont + be,pt SIZE_T_CC,.Lbn_sqr_mont mov 12,$j mulx $car0,$mul0,$car0 ! ap[0]*bp[0] @@ -255,7 +265,6 @@ $fname: .Ltail: add $np,$num,$np add $rp,$num,$rp - mov $tp,$ap sub %g0,$num,%o7 ! k=-num ba .Lsub subcc %g0,%g0,%g0 ! clear %icc.c @@ -268,15 +277,14 @@ $fname: add %o7,4,%o7 brnz %o7,.Lsub st %o1,[$i] - subc $car2,0,$car2 ! handle upmost overflow bit - and $tp,$car2,$ap - andn $rp,$car2,$np - or $ap,$np,$ap + subccc $car2,0,$car2 ! handle upmost overflow bit sub %g0,$num,%o7 .Lcopy: - ld [$ap+%o7],%o0 ! copy or in-place refresh + ld [$tp+%o7],%o1 ! conditional copy + ld [$rp+%o7],%o0 st %g0,[$tp+%o7] ! zap tp + movcs %icc,%o1,%o0 st %o0,[$rp+%o7] add %o7,4,%o7 brnz %o7,.Lcopy @@ -485,6 +493,9 @@ $code.=<<___; mulx $npj,$mul1,$acc1 add $tpj,$car1,$car1 ld [$np+$j],$npj ! np[j] + srlx $car1,32,$tmp0 + and $car1,$mask,$car1 + add $tmp0,$sbit,$sbit add $acc0,$car1,$car1 ld [$tp+8],$tpj ! tp[j] add $acc1,$car1,$car1 @@ -601,7 +612,7 @@ $code.=<<___; add $tp,8,$tp .type $fname,#function .size $fname,(.-$fname) -.asciz "Montgomery Multipltication for SPARCv9, CRYPTOGAMS by <appro\@openssl.org>" +.asciz "Montgomery Multiplication for SPARCv9, CRYPTOGAMS by <appro\@openssl.org>" .align 32 ___ $code =~ s/\`([^\`]*)\`/eval($1)/gem; diff --git a/crypto/bn/asm/sparcv9a-mont.pl b/crypto/bn/asm/sparcv9a-mont.pl index a14205f2f006..c8f759df9fbd 100755 --- a/crypto/bn/asm/sparcv9a-mont.pl +++ b/crypto/bn/asm/sparcv9a-mont.pl @@ -1,7 +1,14 @@ -#!/usr/bin/env perl +#! /usr/bin/env perl +# Copyright 2005-2016 The OpenSSL Project Authors. All Rights Reserved. +# +# Licensed under the OpenSSL license (the "License"). You may not use +# this file except in compliance with the License. You can obtain a copy +# in the file LICENSE in the source distribution or at +# https://www.openssl.org/source/license.html + # ==================================================================== -# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL +# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL # project. The module is, however, dual licensed under OpenSSL and # CRYPTOGAMS licenses depending on where you obtain it. For further # details see http://www.openssl.org/~appro/cryptogams/. @@ -51,21 +58,17 @@ # # Modulo-scheduled inner loops allow to interleave floating point and # integer instructions and minimize Read-After-Write penalties. This -# results in *further* 20-50% perfromance improvement [depending on +# results in *further* 20-50% performance improvement [depending on # key length, more for longer keys] on USI&II cores and 30-80% - on # USIII&IV. +$output = pop; +open STDOUT,">$output"; + $fname="bn_mul_mont_fpu"; -$bits=32; -for (@ARGV) { $bits=64 if (/\-m64/ || /\-xarch\=v9/); } - -if ($bits==64) { - $bias=2047; - $frame=192; -} else { - $bias=0; - $frame=128; # 96 rounded up to largest known cache-line -} + +$frame="STACK_FRAME"; +$bias="STACK_BIAS"; $locals=64; # In order to provide for 32-/64-bit ABI duality, I keep integers wider @@ -121,6 +124,8 @@ $nhia="%f56"; $nhib="%f58"; $nhic="%f60"; $nhid="%f62"; $ASI_FL16_P=0xD2; # magic ASI value to engage 16-bit FP load $code=<<___; +#include "sparc_arch.h" + .section ".text",#alloc,#execinstr .global $fname @@ -860,14 +865,14 @@ $fname: restore .type $fname,#function .size $fname,(.-$fname) -.asciz "Montgomery Multipltication for UltraSPARC, CRYPTOGAMS by <appro\@openssl.org>" +.asciz "Montgomery Multiplication for UltraSPARC, CRYPTOGAMS by <appro\@openssl.org>" .align 32 ___ $code =~ s/\`([^\`]*)\`/eval($1)/gem; # Below substitution makes it possible to compile without demanding -# VIS extentions on command line, e.g. -xarch=v9 vs. -xarch=v9a. I +# VIS extensions on command line, e.g. -xarch=v9 vs. -xarch=v9a. I # dare to do this, because VIS capability is detected at run-time now # and this routine is not called on CPU not capable to execute it. Do # note that fzeros is not the only VIS dependency! Another dependency diff --git a/crypto/bn/asm/via-mont.pl b/crypto/bn/asm/via-mont.pl index c046a514c873..9cf717e84102 100755 --- a/crypto/bn/asm/via-mont.pl +++ b/crypto/bn/asm/via-mont.pl @@ -1,7 +1,14 @@ -#!/usr/bin/env perl +#! /usr/bin/env perl +# Copyright 2006-2018 The OpenSSL Project Authors. All Rights Reserved. +# +# Licensed under the OpenSSL license (the "License"). You may not use +# this file except in compliance with the License. You can obtain a copy +# in the file LICENSE in the source distribution or at +# https://www.openssl.org/source/license.html + # # ==================================================================== -# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL +# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL # project. The module is, however, dual licensed under OpenSSL and # CRYPTOGAMS licenses depending on where you obtain it. For further # details see http://www.openssl.org/~appro/cryptogams/. @@ -69,7 +76,7 @@ # dsa 1024 bits 0.001346s 0.001595s 742.7 627.0 # dsa 2048 bits 0.004745s 0.005582s 210.7 179.1 # -# Conclusions: +# Conclusions: # - VIA SDK leaves a *lot* of room for improvement (which this # implementation successfully fills:-); # - 'rep montmul' gives up to >3x performance improvement depending on @@ -81,7 +88,10 @@ $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; push(@INC,"${dir}","${dir}../../perlasm"); require "x86asm.pl"; -&asm_init($ARGV[0],"via-mont.pl"); +$output = pop; +open STDOUT,">$output"; + +&asm_init($ARGV[0]); # int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np,const BN_ULONG *n0, int num); $func="bn_mul_mont_padlock"; @@ -203,18 +213,15 @@ $sp=&DWP(28,"esp"); &mov ("eax",&DWP(0,"esi","edx",4)); # upmost overflow bit &sbb ("eax",0); - &and ("esi","eax"); - ¬ ("eax"); - &mov ("ebp","edi"); - &and ("ebp","eax"); - &or ("esi","ebp"); # tp=carry?tp:rp &mov ("ecx","edx"); # num - &xor ("edx","edx"); # i=0 + &mov ("edx",0); # i=0 &set_label("copy",8); - &mov ("eax",&DWP(0,"esi","edx",4)); - &mov (&DWP(64,"esp","edx",4),"ecx"); # zap tp + &mov ("ebx",&DWP(0,"esi","edx",4)); + &mov ("eax",&DWP(0,"edi","edx",4)); + &mov (&DWP(0,"esi","edx",4),"ecx"); # zap tp + &cmovc ("eax","ebx"); &mov (&DWP(0,"edi","edx",4),"eax"); &lea ("edx",&DWP(1,"edx")); # i++ &loop (&label("copy")); @@ -240,3 +247,5 @@ $sp=&DWP(28,"esp"); &asciz("Padlock Montgomery Multiplication, CRYPTOGAMS by <appro\@openssl.org>"); &asm_finish(); + +close STDOUT; diff --git a/crypto/bn/asm/vis3-mont.pl b/crypto/bn/asm/vis3-mont.pl index 263ac02b6f45..04833a0c876d 100755 --- a/crypto/bn/asm/vis3-mont.pl +++ b/crypto/bn/asm/vis3-mont.pl @@ -1,4 +1,11 @@ -#!/usr/bin/env perl +#! /usr/bin/env perl +# Copyright 2012-2018 The OpenSSL Project Authors. All Rights Reserved. +# +# Licensed under the OpenSSL license (the "License"). You may not use +# this file except in compliance with the License. You can obtain a copy +# in the file LICENSE in the source distribution or at +# https://www.openssl.org/source/license.html + # ==================================================================== # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL @@ -9,7 +16,7 @@ # October 2012. # -# SPARCv9 VIS3 Montgomery multiplicaion procedure suitable for T3 and +# SPARCv9 VIS3 Montgomery multiplication procedure suitable for T3 and # onward. There are three new instructions used here: umulxhi, # addxc[cc] and initializing store. On T3 RSA private key operations # are 1.54/1.87/2.11/2.26 times faster for 512/1024/2048/4096-bit key @@ -18,16 +25,20 @@ # for reference purposes, because T4 has dedicated Montgomery # multiplication and squaring *instructions* that deliver even more. -$bits=32; -for (@ARGV) { $bits=64 if (/\-m64/ || /\-xarch\=v9/); } -if ($bits==64) { $bias=2047; $frame=192; } -else { $bias=0; $frame=112; } +$output = pop; +open STDOUT,">$output"; + +$frame = "STACK_FRAME"; +$bias = "STACK_BIAS"; + +$code.=<<___; +#include "sparc_arch.h" -$code.=<<___ if ($bits==64); +#ifdef __arch64__ .register %g2,#scratch .register %g3,#scratch -___ -$code.=<<___; +#endif + .section ".text",#alloc,#execinstr ___ @@ -299,23 +310,23 @@ $code.=<<___; sub $anp, $num, $anp sub $rp, $num, $rp - subc $ovf, %g0, $ovf ! handle upmost overflow bit - and $tp, $ovf, $ap - andn $rp, $ovf, $np - or $np, $ap, $ap ! ap=borrow?tp:rp + subccc $ovf, %g0, $ovf ! handle upmost overflow bit ba .Lcopy sub $num, 8, $cnt .align 16 -.Lcopy: ! copy or in-place refresh - ld [$ap+0], $t2 - ld [$ap+4], $t3 - add $ap, 8, $ap +.Lcopy: ! conditional copy + ld [$tp+0], $t0 + ld [$tp+4], $t1 + ld [$rp+0], $t2 + ld [$rp+4], $t3 stx %g0, [$tp] ! zap add $tp, 8, $tp stx %g0, [$anp] ! zap stx %g0, [$anp+8] add $anp, 16, $anp + movcs %icc, $t0, $t2 + movcs %icc, $t1, $t3 st $t3, [$rp+0] ! flip order st $t2, [$rp+4] add $rp, 8, $rp @@ -333,7 +344,7 @@ ___ # Purpose of these subroutines is to explicitly encode VIS instructions, # so that one can compile the module without having to specify VIS -# extentions on compiler command line, e.g. -xarch=v9 vs. -xarch=v9a. +# extensions on compiler command line, e.g. -xarch=v9 vs. -xarch=v9a. # Idea is to reserve for option to produce "universal" binary and let # programmer detect if current CPU is VIS capable at run-time. sub unvis3 { diff --git a/crypto/bn/asm/x86-gf2m.pl b/crypto/bn/asm/x86-gf2m.pl index b57953027298..d03efcc75023 100755 --- a/crypto/bn/asm/x86-gf2m.pl +++ b/crypto/bn/asm/x86-gf2m.pl @@ -1,4 +1,11 @@ -#!/usr/bin/env perl +#! /usr/bin/env perl +# Copyright 2011-2016 The OpenSSL Project Authors. All Rights Reserved. +# +# Licensed under the OpenSSL license (the "License"). You may not use +# this file except in compliance with the License. You can obtain a copy +# in the file LICENSE in the source distribution or at +# https://www.openssl.org/source/license.html + # # ==================================================================== # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL @@ -36,7 +43,10 @@ $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; push(@INC,"${dir}","${dir}../../perlasm"); require "x86asm.pl"; -&asm_init($ARGV[0],$0,$x86only = $ARGV[$#ARGV] eq "386"); +$output = pop; +open STDOUT,">$output"; + +&asm_init($ARGV[0],$x86only = $ARGV[$#ARGV] eq "386"); $sse2=0; for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); } @@ -142,7 +152,7 @@ $R="mm0"; &xor ($a4,$a2); # a2=a4^a2^a4 &mov (&DWP(5*4,"esp"),$a1); # a1^a4 &xor ($a4,$a1); # a1^a2^a4 - &sar (@i[1],31); # broardcast 30th bit + &sar (@i[1],31); # broadcast 30th bit &and ($lo,$b); &mov (&DWP(6*4,"esp"),$a2); # a2^a4 &and (@i[1],$b); @@ -311,3 +321,5 @@ if ($sse2) { &asciz ("GF(2^m) Multiplication for x86, CRYPTOGAMS by <appro\@openssl.org>"); &asm_finish(); + +close STDOUT; diff --git a/crypto/bn/asm/x86-mont.pl b/crypto/bn/asm/x86-mont.pl index 1c4003efc20a..7ba2133ac9c3 100755 --- a/crypto/bn/asm/x86-mont.pl +++ b/crypto/bn/asm/x86-mont.pl @@ -1,7 +1,14 @@ -#!/usr/bin/env perl +#! /usr/bin/env perl +# Copyright 2005-2018 The OpenSSL Project Authors. All Rights Reserved. +# +# Licensed under the OpenSSL license (the "License"). You may not use +# this file except in compliance with the License. You can obtain a copy +# in the file LICENSE in the source distribution or at +# https://www.openssl.org/source/license.html + # ==================================================================== -# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL +# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL # project. The module is, however, dual licensed under OpenSSL and # CRYPTOGAMS licenses depending on where you obtain it. For further # details see http://www.openssl.org/~appro/cryptogams/. @@ -30,7 +37,10 @@ $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; push(@INC,"${dir}","${dir}../../perlasm"); require "x86asm.pl"; -&asm_init($ARGV[0],$0); +$output = pop; +open STDOUT,">$output"; + +&asm_init($ARGV[0]); $sse2=0; for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); } @@ -68,7 +78,7 @@ $frame=32; # size of above frame rounded up to 16n &lea ("ebp",&DWP(-$frame,"esp","edi",4)); # future alloca($frame+4*(num+2)) &neg ("edi"); - # minimize cache contention by arraning 2K window between stack + # minimize cache contention by arranging 2K window between stack # pointer and ap argument [np is also position sensitive vector, # but it's assumed to be near ap, as it's allocated at ~same # time]. @@ -84,7 +94,9 @@ $frame=32; # size of above frame rounded up to 16n &and ("ebp",-64); # align to cache line - # Some OSes, *cough*-dows, insist on stack being "wired" to + # An OS-agnostic version of __chkstk. + # + # Some OSes (Windows) insist on stack being "wired" to # physical memory in strictly sequential manner, i.e. if stack # allocation spans two pages, then reference to farmost one can # be punishable by SEGV. But page walking can do good even on @@ -289,7 +301,7 @@ if (0) { &xor ("eax","eax"); # signal "not fast enough [yet]" &jmp (&label("just_leave")); # While the below code provides competitive performance for - # all key lengthes on modern Intel cores, it's still more + # all key lengths on modern Intel cores, it's still more # than 10% slower for 4096-bit key elsewhere:-( "Competitive" # means compared to the original integer-only assembler. # 512-bit RSA sign is better by ~40%, but that's about all @@ -592,16 +604,18 @@ $sbit=$num; &jge (&label("sub")); &sbb ("eax",0); # handle upmost overflow bit - &and ($tp,"eax"); - ¬ ("eax"); - &mov ($np,$rp); - &and ($np,"eax"); - &or ($tp,$np); # tp=carry?tp:rp - -&set_label("copy",16); # copy or in-place refresh - &mov ("eax",&DWP(0,$tp,$num,4)); - &mov (&DWP(0,$rp,$num,4),"eax"); # rp[i]=tp[i] + &mov ("edx",-1); + &xor ("edx","eax"); + &jmp (&label("copy")); + +&set_label("copy",16); # conditional copy + &mov ($tp,&DWP($frame,"esp",$num,4)); + &mov ($np,&DWP(0,$rp,$num,4)); &mov (&DWP($frame,"esp",$num,4),$j); # zap temporary vector + &and ($tp,"eax"); + &and ($np,"edx"); + &or ($np,$tp); + &mov (&DWP(0,$rp,$num,4),$np); &dec ($num); &jge (&label("copy")); @@ -613,3 +627,5 @@ $sbit=$num; &asciz("Montgomery Multiplication for x86, CRYPTOGAMS by <appro\@openssl.org>"); &asm_finish(); + +close STDOUT; diff --git a/crypto/bn/asm/x86.pl b/crypto/bn/asm/x86.pl deleted file mode 100644 index 1bc4f1bb2747..000000000000 --- a/crypto/bn/asm/x86.pl +++ /dev/null @@ -1,28 +0,0 @@ -#!/usr/local/bin/perl - -push(@INC,"perlasm","../../perlasm"); -require "x86asm.pl"; - -require("x86/mul_add.pl"); -require("x86/mul.pl"); -require("x86/sqr.pl"); -require("x86/div.pl"); -require("x86/add.pl"); -require("x86/sub.pl"); -require("x86/comba.pl"); - -&asm_init($ARGV[0],$0); - -&bn_mul_add_words("bn_mul_add_words"); -&bn_mul_words("bn_mul_words"); -&bn_sqr_words("bn_sqr_words"); -&bn_div_words("bn_div_words"); -&bn_add_words("bn_add_words"); -&bn_sub_words("bn_sub_words"); -&bn_mul_comba("bn_mul_comba8",8); -&bn_mul_comba("bn_mul_comba4",4); -&bn_sqr_comba("bn_sqr_comba8",8); -&bn_sqr_comba("bn_sqr_comba4",4); - -&asm_finish(); - diff --git a/crypto/bn/asm/x86/add.pl b/crypto/bn/asm/x86/add.pl deleted file mode 100644 index 0b5cf583e37f..000000000000 --- a/crypto/bn/asm/x86/add.pl +++ /dev/null @@ -1,76 +0,0 @@ -#!/usr/local/bin/perl -# x86 assember - -sub bn_add_words - { - local($name)=@_; - - &function_begin($name,""); - - &comment(""); - $a="esi"; - $b="edi"; - $c="eax"; - $r="ebx"; - $tmp1="ecx"; - $tmp2="edx"; - $num="ebp"; - - &mov($r,&wparam(0)); # get r - &mov($a,&wparam(1)); # get a - &mov($b,&wparam(2)); # get b - &mov($num,&wparam(3)); # get num - &xor($c,$c); # clear carry - &and($num,0xfffffff8); # num / 8 - - &jz(&label("aw_finish")); - - &set_label("aw_loop",0); - for ($i=0; $i<8; $i++) - { - &comment("Round $i"); - - &mov($tmp1,&DWP($i*4,$a,"",0)); # *a - &mov($tmp2,&DWP($i*4,$b,"",0)); # *b - &add($tmp1,$c); - &mov($c,0); - &adc($c,$c); - &add($tmp1,$tmp2); - &adc($c,0); - &mov(&DWP($i*4,$r,"",0),$tmp1); # *r - } - - &comment(""); - &add($a,32); - &add($b,32); - &add($r,32); - &sub($num,8); - &jnz(&label("aw_loop")); - - &set_label("aw_finish",0); - &mov($num,&wparam(3)); # get num - &and($num,7); - &jz(&label("aw_end")); - - for ($i=0; $i<7; $i++) - { - &comment("Tail Round $i"); - &mov($tmp1,&DWP($i*4,$a,"",0)); # *a - &mov($tmp2,&DWP($i*4,$b,"",0));# *b - &add($tmp1,$c); - &mov($c,0); - &adc($c,$c); - &add($tmp1,$tmp2); - &adc($c,0); - &dec($num) if ($i != 6); - &mov(&DWP($i*4,$r,"",0),$tmp1); # *a - &jz(&label("aw_end")) if ($i != 6); - } - &set_label("aw_end",0); - -# &mov("eax",$c); # $c is "eax" - - &function_end($name); - } - -1; diff --git a/crypto/bn/asm/x86/comba.pl b/crypto/bn/asm/x86/comba.pl deleted file mode 100644 index 22912536293d..000000000000 --- a/crypto/bn/asm/x86/comba.pl +++ /dev/null @@ -1,277 +0,0 @@ -#!/usr/local/bin/perl -# x86 assember - -sub mul_add_c - { - local($a,$ai,$b,$bi,$c0,$c1,$c2,$pos,$i,$na,$nb)=@_; - - # pos == -1 if eax and edx are pre-loaded, 0 to load from next - # words, and 1 if load return value - - &comment("mul a[$ai]*b[$bi]"); - - # "eax" and "edx" will always be pre-loaded. - # &mov("eax",&DWP($ai*4,$a,"",0)) ; - # &mov("edx",&DWP($bi*4,$b,"",0)); - - &mul("edx"); - &add($c0,"eax"); - &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 0; # laod next a - &mov("eax",&wparam(0)) if $pos > 0; # load r[] - ### - &adc($c1,"edx"); - &mov("edx",&DWP(($nb)*4,$b,"",0)) if $pos == 0; # laod next b - &mov("edx",&DWP(($nb)*4,$b,"",0)) if $pos == 1; # laod next b - ### - &adc($c2,0); - # is pos > 1, it means it is the last loop - &mov(&DWP($i*4,"eax","",0),$c0) if $pos > 0; # save r[]; - &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 1; # laod next a - } - -sub sqr_add_c - { - local($r,$a,$ai,$bi,$c0,$c1,$c2,$pos,$i,$na,$nb)=@_; - - # pos == -1 if eax and edx are pre-loaded, 0 to load from next - # words, and 1 if load return value - - &comment("sqr a[$ai]*a[$bi]"); - - # "eax" and "edx" will always be pre-loaded. - # &mov("eax",&DWP($ai*4,$a,"",0)) ; - # &mov("edx",&DWP($bi*4,$b,"",0)); - - if ($ai == $bi) - { &mul("eax");} - else - { &mul("edx");} - &add($c0,"eax"); - &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 0; # load next a - ### - &adc($c1,"edx"); - &mov("edx",&DWP(($nb)*4,$a,"",0)) if ($pos == 1) && ($na != $nb); - ### - &adc($c2,0); - # is pos > 1, it means it is the last loop - &mov(&DWP($i*4,$r,"",0),$c0) if $pos > 0; # save r[]; - &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 1; # load next b - } - -sub sqr_add_c2 - { - local($r,$a,$ai,$bi,$c0,$c1,$c2,$pos,$i,$na,$nb)=@_; - - # pos == -1 if eax and edx are pre-loaded, 0 to load from next - # words, and 1 if load return value - - &comment("sqr a[$ai]*a[$bi]"); - - # "eax" and "edx" will always be pre-loaded. - # &mov("eax",&DWP($ai*4,$a,"",0)) ; - # &mov("edx",&DWP($bi*4,$a,"",0)); - - if ($ai == $bi) - { &mul("eax");} - else - { &mul("edx");} - &add("eax","eax"); - ### - &adc("edx","edx"); - ### - &adc($c2,0); - &add($c0,"eax"); - &adc($c1,"edx"); - &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 0; # load next a - &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 1; # load next b - &adc($c2,0); - &mov(&DWP($i*4,$r,"",0),$c0) if $pos > 0; # save r[]; - &mov("edx",&DWP(($nb)*4,$a,"",0)) if ($pos <= 1) && ($na != $nb); - ### - } - -sub bn_mul_comba - { - local($name,$num)=@_; - local($a,$b,$c0,$c1,$c2); - local($i,$as,$ae,$bs,$be,$ai,$bi); - local($tot,$end); - - &function_begin_B($name,""); - - $c0="ebx"; - $c1="ecx"; - $c2="ebp"; - $a="esi"; - $b="edi"; - - $as=0; - $ae=0; - $bs=0; - $be=0; - $tot=$num+$num-1; - - &push("esi"); - &mov($a,&wparam(1)); - &push("edi"); - &mov($b,&wparam(2)); - &push("ebp"); - &push("ebx"); - - &xor($c0,$c0); - &mov("eax",&DWP(0,$a,"",0)); # load the first word - &xor($c1,$c1); - &mov("edx",&DWP(0,$b,"",0)); # load the first second - - for ($i=0; $i<$tot; $i++) - { - $ai=$as; - $bi=$bs; - $end=$be+1; - - &comment("################## Calculate word $i"); - - for ($j=$bs; $j<$end; $j++) - { - &xor($c2,$c2) if ($j == $bs); - if (($j+1) == $end) - { - $v=1; - $v=2 if (($i+1) == $tot); - } - else - { $v=0; } - if (($j+1) != $end) - { - $na=($ai-1); - $nb=($bi+1); - } - else - { - $na=$as+($i < ($num-1)); - $nb=$bs+($i >= ($num-1)); - } -#printf STDERR "[$ai,$bi] -> [$na,$nb]\n"; - &mul_add_c($a,$ai,$b,$bi,$c0,$c1,$c2,$v,$i,$na,$nb); - if ($v) - { - &comment("saved r[$i]"); - # &mov("eax",&wparam(0)); - # &mov(&DWP($i*4,"eax","",0),$c0); - ($c0,$c1,$c2)=($c1,$c2,$c0); - } - $ai--; - $bi++; - } - $as++ if ($i < ($num-1)); - $ae++ if ($i >= ($num-1)); - - $bs++ if ($i >= ($num-1)); - $be++ if ($i < ($num-1)); - } - &comment("save r[$i]"); - # &mov("eax",&wparam(0)); - &mov(&DWP($i*4,"eax","",0),$c0); - - &pop("ebx"); - &pop("ebp"); - &pop("edi"); - &pop("esi"); - &ret(); - &function_end_B($name); - } - -sub bn_sqr_comba - { - local($name,$num)=@_; - local($r,$a,$c0,$c1,$c2)=@_; - local($i,$as,$ae,$bs,$be,$ai,$bi); - local($b,$tot,$end,$half); - - &function_begin_B($name,""); - - $c0="ebx"; - $c1="ecx"; - $c2="ebp"; - $a="esi"; - $r="edi"; - - &push("esi"); - &push("edi"); - &push("ebp"); - &push("ebx"); - &mov($r,&wparam(0)); - &mov($a,&wparam(1)); - &xor($c0,$c0); - &xor($c1,$c1); - &mov("eax",&DWP(0,$a,"",0)); # load the first word - - $as=0; - $ae=0; - $bs=0; - $be=0; - $tot=$num+$num-1; - - for ($i=0; $i<$tot; $i++) - { - $ai=$as; - $bi=$bs; - $end=$be+1; - - &comment("############### Calculate word $i"); - for ($j=$bs; $j<$end; $j++) - { - &xor($c2,$c2) if ($j == $bs); - if (($ai-1) < ($bi+1)) - { - $v=1; - $v=2 if ($i+1) == $tot; - } - else - { $v=0; } - if (!$v) - { - $na=$ai-1; - $nb=$bi+1; - } - else - { - $na=$as+($i < ($num-1)); - $nb=$bs+($i >= ($num-1)); - } - if ($ai == $bi) - { - &sqr_add_c($r,$a,$ai,$bi, - $c0,$c1,$c2,$v,$i,$na,$nb); - } - else - { - &sqr_add_c2($r,$a,$ai,$bi, - $c0,$c1,$c2,$v,$i,$na,$nb); - } - if ($v) - { - &comment("saved r[$i]"); - #&mov(&DWP($i*4,$r,"",0),$c0); - ($c0,$c1,$c2)=($c1,$c2,$c0); - last; - } - $ai--; - $bi++; - } - $as++ if ($i < ($num-1)); - $ae++ if ($i >= ($num-1)); - - $bs++ if ($i >= ($num-1)); - $be++ if ($i < ($num-1)); - } - &mov(&DWP($i*4,$r,"",0),$c0); - &pop("ebx"); - &pop("ebp"); - &pop("edi"); - &pop("esi"); - &ret(); - &function_end_B($name); - } - -1; diff --git a/crypto/bn/asm/x86/div.pl b/crypto/bn/asm/x86/div.pl deleted file mode 100644 index 0e90152caa95..000000000000 --- a/crypto/bn/asm/x86/div.pl +++ /dev/null @@ -1,15 +0,0 @@ -#!/usr/local/bin/perl -# x86 assember - -sub bn_div_words - { - local($name)=@_; - - &function_begin($name,""); - &mov("edx",&wparam(0)); # - &mov("eax",&wparam(1)); # - &mov("ebx",&wparam(2)); # - &div("ebx"); - &function_end($name); - } -1; diff --git a/crypto/bn/asm/x86/f b/crypto/bn/asm/x86/f deleted file mode 100644 index 22e411222431..000000000000 --- a/crypto/bn/asm/x86/f +++ /dev/null @@ -1,3 +0,0 @@ -#!/usr/local/bin/perl -# x86 assember - diff --git a/crypto/bn/asm/x86/mul.pl b/crypto/bn/asm/x86/mul.pl deleted file mode 100644 index 674cb9b05512..000000000000 --- a/crypto/bn/asm/x86/mul.pl +++ /dev/null @@ -1,77 +0,0 @@ -#!/usr/local/bin/perl -# x86 assember - -sub bn_mul_words - { - local($name)=@_; - - &function_begin($name,""); - - &comment(""); - $Low="eax"; - $High="edx"; - $a="ebx"; - $w="ecx"; - $r="edi"; - $c="esi"; - $num="ebp"; - - &xor($c,$c); # clear carry - &mov($r,&wparam(0)); # - &mov($a,&wparam(1)); # - &mov($num,&wparam(2)); # - &mov($w,&wparam(3)); # - - &and($num,0xfffffff8); # num / 8 - &jz(&label("mw_finish")); - - &set_label("mw_loop",0); - for ($i=0; $i<32; $i+=4) - { - &comment("Round $i"); - - &mov("eax",&DWP($i,$a,"",0)); # *a - &mul($w); # *a * w - &add("eax",$c); # L(t)+=c - # XXX - - &adc("edx",0); # H(t)+=carry - &mov(&DWP($i,$r,"",0),"eax"); # *r= L(t); - - &mov($c,"edx"); # c= H(t); - } - - &comment(""); - &add($a,32); - &add($r,32); - &sub($num,8); - &jz(&label("mw_finish")); - &jmp(&label("mw_loop")); - - &set_label("mw_finish",0); - &mov($num,&wparam(2)); # get num - &and($num,7); - &jnz(&label("mw_finish2")); - &jmp(&label("mw_end")); - - &set_label("mw_finish2",1); - for ($i=0; $i<7; $i++) - { - &comment("Tail Round $i"); - &mov("eax",&DWP($i*4,$a,"",0));# *a - &mul($w); # *a * w - &add("eax",$c); # L(t)+=c - # XXX - &adc("edx",0); # H(t)+=carry - &mov(&DWP($i*4,$r,"",0),"eax");# *r= L(t); - &mov($c,"edx"); # c= H(t); - &dec($num) if ($i != 7-1); - &jz(&label("mw_end")) if ($i != 7-1); - } - &set_label("mw_end",0); - &mov("eax",$c); - - &function_end($name); - } - -1; diff --git a/crypto/bn/asm/x86/mul_add.pl b/crypto/bn/asm/x86/mul_add.pl deleted file mode 100644 index 61830d3a906a..000000000000 --- a/crypto/bn/asm/x86/mul_add.pl +++ /dev/null @@ -1,87 +0,0 @@ -#!/usr/local/bin/perl -# x86 assember - -sub bn_mul_add_words - { - local($name)=@_; - - &function_begin($name,""); - - &comment(""); - $Low="eax"; - $High="edx"; - $a="ebx"; - $w="ebp"; - $r="edi"; - $c="esi"; - - &xor($c,$c); # clear carry - &mov($r,&wparam(0)); # - - &mov("ecx",&wparam(2)); # - &mov($a,&wparam(1)); # - - &and("ecx",0xfffffff8); # num / 8 - &mov($w,&wparam(3)); # - - &push("ecx"); # Up the stack for a tmp variable - - &jz(&label("maw_finish")); - - &set_label("maw_loop",0); - - &mov(&swtmp(0),"ecx"); # - - for ($i=0; $i<32; $i+=4) - { - &comment("Round $i"); - - &mov("eax",&DWP($i,$a,"",0)); # *a - &mul($w); # *a * w - &add("eax",$c); # L(t)+= *r - &mov($c,&DWP($i,$r,"",0)); # L(t)+= *r - &adc("edx",0); # H(t)+=carry - &add("eax",$c); # L(t)+=c - &adc("edx",0); # H(t)+=carry - &mov(&DWP($i,$r,"",0),"eax"); # *r= L(t); - &mov($c,"edx"); # c= H(t); - } - - &comment(""); - &mov("ecx",&swtmp(0)); # - &add($a,32); - &add($r,32); - &sub("ecx",8); - &jnz(&label("maw_loop")); - - &set_label("maw_finish",0); - &mov("ecx",&wparam(2)); # get num - &and("ecx",7); - &jnz(&label("maw_finish2")); # helps branch prediction - &jmp(&label("maw_end")); - - &set_label("maw_finish2",1); - for ($i=0; $i<7; $i++) - { - &comment("Tail Round $i"); - &mov("eax",&DWP($i*4,$a,"",0));# *a - &mul($w); # *a * w - &add("eax",$c); # L(t)+=c - &mov($c,&DWP($i*4,$r,"",0)); # L(t)+= *r - &adc("edx",0); # H(t)+=carry - &add("eax",$c); - &adc("edx",0); # H(t)+=carry - &dec("ecx") if ($i != 7-1); - &mov(&DWP($i*4,$r,"",0),"eax"); # *r= L(t); - &mov($c,"edx"); # c= H(t); - &jz(&label("maw_end")) if ($i != 7-1); - } - &set_label("maw_end",0); - &mov("eax",$c); - - &pop("ecx"); # clear variable from - - &function_end($name); - } - -1; diff --git a/crypto/bn/asm/x86/sqr.pl b/crypto/bn/asm/x86/sqr.pl deleted file mode 100644 index 1f90993cf689..000000000000 --- a/crypto/bn/asm/x86/sqr.pl +++ /dev/null @@ -1,60 +0,0 @@ -#!/usr/local/bin/perl -# x86 assember - -sub bn_sqr_words - { - local($name)=@_; - - &function_begin($name,""); - - &comment(""); - $r="esi"; - $a="edi"; - $num="ebx"; - - &mov($r,&wparam(0)); # - &mov($a,&wparam(1)); # - &mov($num,&wparam(2)); # - - &and($num,0xfffffff8); # num / 8 - &jz(&label("sw_finish")); - - &set_label("sw_loop",0); - for ($i=0; $i<32; $i+=4) - { - &comment("Round $i"); - &mov("eax",&DWP($i,$a,"",0)); # *a - # XXX - &mul("eax"); # *a * *a - &mov(&DWP($i*2,$r,"",0),"eax"); # - &mov(&DWP($i*2+4,$r,"",0),"edx");# - } - - &comment(""); - &add($a,32); - &add($r,64); - &sub($num,8); - &jnz(&label("sw_loop")); - - &set_label("sw_finish",0); - &mov($num,&wparam(2)); # get num - &and($num,7); - &jz(&label("sw_end")); - - for ($i=0; $i<7; $i++) - { - &comment("Tail Round $i"); - &mov("eax",&DWP($i*4,$a,"",0)); # *a - # XXX - &mul("eax"); # *a * *a - &mov(&DWP($i*8,$r,"",0),"eax"); # - &dec($num) if ($i != 7-1); - &mov(&DWP($i*8+4,$r,"",0),"edx"); - &jz(&label("sw_end")) if ($i != 7-1); - } - &set_label("sw_end",0); - - &function_end($name); - } - -1; diff --git a/crypto/bn/asm/x86/sub.pl b/crypto/bn/asm/x86/sub.pl deleted file mode 100644 index 837b0e1b078d..000000000000 --- a/crypto/bn/asm/x86/sub.pl +++ /dev/null @@ -1,76 +0,0 @@ -#!/usr/local/bin/perl -# x86 assember - -sub bn_sub_words - { - local($name)=@_; - - &function_begin($name,""); - - &comment(""); - $a="esi"; - $b="edi"; - $c="eax"; - $r="ebx"; - $tmp1="ecx"; - $tmp2="edx"; - $num="ebp"; - - &mov($r,&wparam(0)); # get r - &mov($a,&wparam(1)); # get a - &mov($b,&wparam(2)); # get b - &mov($num,&wparam(3)); # get num - &xor($c,$c); # clear carry - &and($num,0xfffffff8); # num / 8 - - &jz(&label("aw_finish")); - - &set_label("aw_loop",0); - for ($i=0; $i<8; $i++) - { - &comment("Round $i"); - - &mov($tmp1,&DWP($i*4,$a,"",0)); # *a - &mov($tmp2,&DWP($i*4,$b,"",0)); # *b - &sub($tmp1,$c); - &mov($c,0); - &adc($c,$c); - &sub($tmp1,$tmp2); - &adc($c,0); - &mov(&DWP($i*4,$r,"",0),$tmp1); # *r - } - - &comment(""); - &add($a,32); - &add($b,32); - &add($r,32); - &sub($num,8); - &jnz(&label("aw_loop")); - - &set_label("aw_finish",0); - &mov($num,&wparam(3)); # get num - &and($num,7); - &jz(&label("aw_end")); - - for ($i=0; $i<7; $i++) - { - &comment("Tail Round $i"); - &mov($tmp1,&DWP($i*4,$a,"",0)); # *a - &mov($tmp2,&DWP($i*4,$b,"",0));# *b - &sub($tmp1,$c); - &mov($c,0); - &adc($c,$c); - &sub($tmp1,$tmp2); - &adc($c,0); - &dec($num) if ($i != 6); - &mov(&DWP($i*4,$r,"",0),$tmp1); # *a - &jz(&label("aw_end")) if ($i != 6); - } - &set_label("aw_end",0); - -# &mov("eax",$c); # $c is "eax" - - &function_end($name); - } - -1; diff --git a/crypto/bn/asm/x86_64-gcc.c b/crypto/bn/asm/x86_64-gcc.c index 1729b479d43e..d38f33716477 100644 --- a/crypto/bn/asm/x86_64-gcc.c +++ b/crypto/bn/asm/x86_64-gcc.c @@ -1,3 +1,12 @@ +/* + * Copyright 2002-2016 The OpenSSL Project Authors. All Rights Reserved. + * + * Licensed under the OpenSSL license (the "License"). You may not use + * this file except in compliance with the License. You can obtain a copy + * in the file LICENSE in the source distribution or at + * https://www.openssl.org/source/license.html + */ + #include "../bn_lcl.h" #if !(defined(__GNUC__) && __GNUC__>=2) # include "../bn_asm.c" /* kind of dirty hack for Sun Studio */ @@ -5,7 +14,7 @@ /*- * x86_64 BIGNUM accelerator version 0.1, December 2002. * - * Implemented by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL + * Implemented by Andy Polyakov <appro@openssl.org> for the OpenSSL * project. * * Rights for redistribution and usage in source and binary forms are @@ -111,7 +120,7 @@ BN_ULONG bn_mul_add_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG c1 = 0; if (num <= 0) - return (c1); + return c1; while (num & ~3) { mul_add(rp[0], ap[0], w, c1); @@ -133,7 +142,7 @@ BN_ULONG bn_mul_add_words(BN_ULONG *rp, const BN_ULONG *ap, int num, return c1; } - return (c1); + return c1; } BN_ULONG bn_mul_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w) @@ -141,7 +150,7 @@ BN_ULONG bn_mul_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w) BN_ULONG c1 = 0; if (num <= 0) - return (c1); + return c1; while (num & ~3) { mul(rp[0], ap[0], w, c1); @@ -161,7 +170,7 @@ BN_ULONG bn_mul_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w) return c1; mul(rp[2], ap[2], w, c1); } - return (c1); + return c1; } void bn_sqr_words(BN_ULONG *r, const BN_ULONG *a, int n) @@ -216,9 +225,10 @@ BN_ULONG bn_add_words(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, " adcq (%5,%2,8),%0 \n" " movq %0,(%3,%2,8) \n" " lea 1(%2),%2 \n" - " loop 1b \n" - " sbbq %0,%0 \n":"=&r" (ret), "+c"(n), - "+r"(i) + " dec %1 \n" + " jnz 1b \n" + " sbbq %0,%0 \n" + :"=&r" (ret), "+c"(n), "+r"(i) :"r"(rp), "r"(ap), "r"(bp) :"cc", "memory"); @@ -242,9 +252,10 @@ BN_ULONG bn_sub_words(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, " sbbq (%5,%2,8),%0 \n" " movq %0,(%3,%2,8) \n" " lea 1(%2),%2 \n" - " loop 1b \n" - " sbbq %0,%0 \n":"=&r" (ret), "+c"(n), - "+r"(i) + " dec %1 \n" + " jnz 1b \n" + " sbbq %0,%0 \n" + :"=&r" (ret), "+c"(n), "+r"(i) :"r"(rp), "r"(ap), "r"(bp) :"cc", "memory"); @@ -259,7 +270,7 @@ BN_ULONG bn_sub_words(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n) int c = 0; if (n <= 0) - return ((BN_ULONG)0); + return (BN_ULONG)0; for (;;) { t1 = a[0]; @@ -298,7 +309,7 @@ BN_ULONG bn_sub_words(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n) b += 4; r += 4; } - return (c); + return c; } # endif diff --git a/crypto/bn/asm/x86_64-gf2m.pl b/crypto/bn/asm/x86_64-gf2m.pl index 42bbec2fb7ef..0fd6e985d7b0 100755 --- a/crypto/bn/asm/x86_64-gf2m.pl +++ b/crypto/bn/asm/x86_64-gf2m.pl @@ -1,4 +1,11 @@ -#!/usr/bin/env perl +#! /usr/bin/env perl +# Copyright 2011-2016 The OpenSSL Project Authors. All Rights Reserved. +# +# Licensed under the OpenSSL license (the "License"). You may not use +# this file except in compliance with the License. You can obtain a copy +# in the file LICENSE in the source distribution or at +# https://www.openssl.org/source/license.html + # # ==================================================================== # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL @@ -31,7 +38,7 @@ $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; ( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or die "can't locate x86_64-xlate.pl"; -open OUT,"| \"$^X\" $xlate $flavour $output"; +open OUT,"| \"$^X\" \"$xlate\" $flavour \"$output\""; *STDOUT=*OUT; ($lo,$hi)=("%rax","%rdx"); $a=$lo; @@ -47,7 +54,9 @@ $code.=<<___; .type _mul_1x1,\@abi-omnipotent .align 16 _mul_1x1: +.cfi_startproc sub \$128+8,%rsp +.cfi_adjust_cfa_offset 128+8 mov \$-1,$a1 lea ($a,$a),$i0 shr \$3,$a1 @@ -59,7 +68,7 @@ _mul_1x1: sar \$63,$i0 # broadcast 62nd bit lea (,$a1,4),$a4 and $b,$a - sar \$63,$i1 # boardcast 61st bit + sar \$63,$i1 # broadcast 61st bit mov $a,$hi # $a is $lo shl \$63,$lo and $b,$i0 @@ -153,8 +162,10 @@ $code.=<<___; xor $i1,$hi add \$128+8,%rsp +.cfi_adjust_cfa_offset -128-8 ret .Lend_mul_1x1: +.cfi_endproc .size _mul_1x1,.-_mul_1x1 ___ @@ -167,8 +178,10 @@ $code.=<<___; .type bn_GF2m_mul_2x2,\@abi-omnipotent .align 16 bn_GF2m_mul_2x2: - mov OPENSSL_ia32cap_P(%rip),%rax - bt \$33,%rax +.cfi_startproc + mov %rsp,%rax + mov OPENSSL_ia32cap_P(%rip),%r10 + bt \$33,%r10 jnc .Lvanilla_mul_2x2 movq $a1,%xmm0 @@ -203,6 +216,7 @@ $code.=<<___; .align 16 .Lvanilla_mul_2x2: lea -8*17(%rsp),%rsp +.cfi_adjust_cfa_offset 8*17 ___ $code.=<<___ if ($win64); mov `8*17+40`(%rsp),$b0 @@ -211,10 +225,15 @@ $code.=<<___ if ($win64); ___ $code.=<<___; mov %r14,8*10(%rsp) +.cfi_rel_offset %r14,8*10 mov %r13,8*11(%rsp) +.cfi_rel_offset %r13,8*11 mov %r12,8*12(%rsp) +.cfi_rel_offset %r12,8*12 mov %rbp,8*13(%rsp) +.cfi_rel_offset %rbp,8*13 mov %rbx,8*14(%rsp) +.cfi_rel_offset %rbx,8*14 .Lbody_mul_2x2: mov $rp,32(%rsp) # save the arguments mov $a1,40(%rsp) @@ -262,10 +281,15 @@ $code.=<<___; mov $lo,8(%rbp) mov 8*10(%rsp),%r14 +.cfi_restore %r14 mov 8*11(%rsp),%r13 +.cfi_restore %r13 mov 8*12(%rsp),%r12 +.cfi_restore %r12 mov 8*13(%rsp),%rbp +.cfi_restore %rbp mov 8*14(%rsp),%rbx +.cfi_restore %rbx ___ $code.=<<___ if ($win64); mov 8*15(%rsp),%rdi @@ -273,8 +297,11 @@ $code.=<<___ if ($win64); ___ $code.=<<___; lea 8*17(%rsp),%rsp +.cfi_adjust_cfa_offset -8*17 +.Lepilogue_mul_2x2: ret .Lend_mul_2x2: +.cfi_endproc .size bn_GF2m_mul_2x2,.-bn_GF2m_mul_2x2 .asciz "GF(2^m) Multiplication for x86_64, CRYPTOGAMS by <appro\@openssl.org>" .align 16 @@ -305,13 +332,19 @@ se_handler: pushfq sub \$64,%rsp - mov 152($context),%rax # pull context->Rsp + mov 120($context),%rax # pull context->Rax mov 248($context),%rbx # pull context->Rip lea .Lbody_mul_2x2(%rip),%r10 cmp %r10,%rbx # context->Rip<"prologue" label jb .Lin_prologue + mov 152($context),%rax # pull context->Rsp + + lea .Lepilogue_mul_2x2(%rip),%r10 + cmp %r10,%rbx # context->Rip>="epilogue" label + jae .Lin_prologue + mov 8*10(%rax),%r14 # mimic epilogue mov 8*11(%rax),%r13 mov 8*12(%rax),%r12 @@ -328,8 +361,9 @@ se_handler: mov %r13,224($context) # restore context->R13 mov %r14,232($context) # restore context->R14 -.Lin_prologue: lea 8*17(%rax),%rax + +.Lin_prologue: mov %rax,152($context) # restore context->Rsp mov 40($disp),%rdi # disp->ContextRecord diff --git a/crypto/bn/asm/x86_64-mont.pl b/crypto/bn/asm/x86_64-mont.pl index 80492d8e6381..c051135e30dd 100755 --- a/crypto/bn/asm/x86_64-mont.pl +++ b/crypto/bn/asm/x86_64-mont.pl @@ -1,4 +1,11 @@ -#!/usr/bin/env perl +#! /usr/bin/env perl +# Copyright 2005-2018 The OpenSSL Project Authors. All Rights Reserved. +# +# Licensed under the OpenSSL license (the "License"). You may not use +# this file except in compliance with the License. You can obtain a copy +# in the file LICENSE in the source distribution or at +# https://www.openssl.org/source/license.html + # ==================================================================== # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL @@ -50,7 +57,7 @@ $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; ( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or die "can't locate x86_64-xlate.pl"; -open OUT,"| \"$^X\" $xlate $flavour $output"; +open OUT,"| \"$^X\" \"$xlate\" $flavour \"$output\""; *STDOUT=*OUT; if (`$ENV{CC} -Wa,-v -c -o /dev/null -x assembler /dev/null 2>&1` @@ -97,8 +104,10 @@ $code=<<___; .type bn_mul_mont,\@function,6 .align 16 bn_mul_mont: +.cfi_startproc mov ${num}d,${num}d mov %rsp,%rax +.cfi_def_cfa_register %rax test \$3,${num}d jnz .Lmul_enter cmp \$8,${num}d @@ -117,11 +126,17 @@ $code.=<<___; .align 16 .Lmul_enter: push %rbx +.cfi_push %rbx push %rbp +.cfi_push %rbp push %r12 +.cfi_push %r12 push %r13 +.cfi_push %r13 push %r14 +.cfi_push %r14 push %r15 +.cfi_push %r15 neg $num mov %rsp,%r11 @@ -129,7 +144,9 @@ $code.=<<___; neg $num # restore $num and \$-1024,%r10 # minimize TLB usage - # Some OSes, *cough*-dows, insist on stack being "wired" to + # An OS-agnostic version of __chkstk. + # + # Some OSes (Windows) insist on stack being "wired" to # physical memory in strictly sequential manner, i.e. if stack # allocation spans two pages, then reference to farmost one can # be punishable by SEGV. But page walking can do good even on @@ -152,6 +169,7 @@ $code.=<<___; .Lmul_page_walk_done: mov %rax,8(%rsp,$num,8) # tp[num+1]=%rsp +.cfi_cfa_expression %rsp+8,$num,8,mul,plus,deref,+8 .Lmul_body: mov $bp,%r12 # reassign $bp ___ @@ -293,45 +311,54 @@ $code.=<<___; xor $i,$i # i=0 and clear CF! mov (%rsp),%rax # tp[0] - lea (%rsp),$ap # borrow ap for tp mov $num,$j # j=num - jmp .Lsub + .align 16 .Lsub: sbb ($np,$i,8),%rax mov %rax,($rp,$i,8) # rp[i]=tp[i]-np[i] - mov 8($ap,$i,8),%rax # tp[i+1] + mov 8(%rsp,$i,8),%rax # tp[i+1] lea 1($i),$i # i++ - dec $j # doesnn't affect CF! + dec $j # doesn't affect CF! jnz .Lsub sbb \$0,%rax # handle upmost overflow bit + mov \$-1,%rbx + xor %rax,%rbx # not %rax xor $i,$i - and %rax,$ap - not %rax - mov $rp,$np - and %rax,$np mov $num,$j # j=num - or $np,$ap # ap=borrow?tp:rp -.align 16 -.Lcopy: # copy or in-place refresh - mov ($ap,$i,8),%rax - mov $i,(%rsp,$i,8) # zap temporary vector - mov %rax,($rp,$i,8) # rp[i]=tp[i] + +.Lcopy: # conditional copy + mov ($rp,$i,8),%rcx + mov (%rsp,$i,8),%rdx + and %rbx,%rcx + and %rax,%rdx + mov $num,(%rsp,$i,8) # zap temporary vector + or %rcx,%rdx + mov %rdx,($rp,$i,8) # rp[i]=tp[i] lea 1($i),$i sub \$1,$j jnz .Lcopy mov 8(%rsp,$num,8),%rsi # restore %rsp +.cfi_def_cfa %rsi,8 mov \$1,%rax mov -48(%rsi),%r15 +.cfi_restore %r15 mov -40(%rsi),%r14 +.cfi_restore %r14 mov -32(%rsi),%r13 +.cfi_restore %r13 mov -24(%rsi),%r12 +.cfi_restore %r12 mov -16(%rsi),%rbp +.cfi_restore %rbp mov -8(%rsi),%rbx +.cfi_restore %rbx lea (%rsi),%rsp +.cfi_def_cfa_register %rsp .Lmul_epilogue: ret +.cfi_endproc .size bn_mul_mont,.-bn_mul_mont ___ {{{ @@ -341,8 +368,10 @@ $code.=<<___; .type bn_mul4x_mont,\@function,6 .align 16 bn_mul4x_mont: +.cfi_startproc mov ${num}d,${num}d mov %rsp,%rax +.cfi_def_cfa_register %rax .Lmul4x_enter: ___ $code.=<<___ if ($addx); @@ -352,11 +381,17 @@ $code.=<<___ if ($addx); ___ $code.=<<___; push %rbx +.cfi_push %rbx push %rbp +.cfi_push %rbp push %r12 +.cfi_push %r12 push %r13 +.cfi_push %r13 push %r14 +.cfi_push %r14 push %r15 +.cfi_push %r15 neg $num mov %rsp,%r11 @@ -380,6 +415,7 @@ $code.=<<___; .Lmul4x_page_walk_done: mov %rax,8(%rsp,$num,8) # tp[num+1]=%rsp +.cfi_cfa_expression %rsp+8,$num,8,mul,plus,deref,+8 .Lmul4x_body: mov $rp,16(%rsp,$num,8) # tp[num+2]=$rp mov %rdx,%r12 # reassign $bp @@ -686,10 +722,10 @@ ___ my @ri=("%rax","%rdx",$m0,$m1); $code.=<<___; mov 16(%rsp,$num,8),$rp # restore $rp + lea -4($num),$j mov 0(%rsp),@ri[0] # tp[0] - pxor %xmm0,%xmm0 mov 8(%rsp),@ri[1] # tp[1] - shr \$2,$num # num/=4 + shr \$2,$j # j=num/4-1 lea (%rsp),$ap # borrow ap for tp xor $i,$i # i=0 and clear CF! @@ -697,9 +733,7 @@ $code.=<<___; mov 16($ap),@ri[2] # tp[2] mov 24($ap),@ri[3] # tp[3] sbb 8($np),@ri[1] - lea -1($num),$j # j=num/4-1 - jmp .Lsub4x -.align 16 + .Lsub4x: mov @ri[0],0($rp,$i,8) # rp[i]=tp[i]-np[i] mov @ri[1],8($rp,$i,8) # rp[i]=tp[i]-np[i] @@ -714,7 +748,7 @@ $code.=<<___; mov 56($ap,$i,8),@ri[3] sbb 40($np,$i,8),@ri[1] lea 4($i),$i # i++ - dec $j # doesnn't affect CF! + dec $j # doesn't affect CF! jnz .Lsub4x mov @ri[0],0($rp,$i,8) # rp[i]=tp[i]-np[i] @@ -726,48 +760,58 @@ $code.=<<___; sbb \$0,@ri[0] # handle upmost overflow bit mov @ri[3],24($rp,$i,8) # rp[i]=tp[i]-np[i] - xor $i,$i # i=0 - and @ri[0],$ap - not @ri[0] - mov $rp,$np - and @ri[0],$np - lea -1($num),$j - or $np,$ap # ap=borrow?tp:rp - - movdqu ($ap),%xmm1 - movdqa %xmm0,(%rsp) - movdqu %xmm1,($rp) + pxor %xmm0,%xmm0 + movq @ri[0],%xmm4 + pcmpeqd %xmm5,%xmm5 + pshufd \$0,%xmm4,%xmm4 + mov $num,$j + pxor %xmm4,%xmm5 + shr \$2,$j # j=num/4 + xor %eax,%eax # i=0 + jmp .Lcopy4x .align 16 -.Lcopy4x: # copy or in-place refresh - movdqu 16($ap,$i),%xmm2 - movdqu 32($ap,$i),%xmm1 - movdqa %xmm0,16(%rsp,$i) - movdqu %xmm2,16($rp,$i) - movdqa %xmm0,32(%rsp,$i) - movdqu %xmm1,32($rp,$i) - lea 32($i),$i +.Lcopy4x: # conditional copy + movdqa (%rsp,%rax),%xmm1 + movdqu ($rp,%rax),%xmm2 + pand %xmm4,%xmm1 + pand %xmm5,%xmm2 + movdqa 16(%rsp,%rax),%xmm3 + movdqa %xmm0,(%rsp,%rax) + por %xmm2,%xmm1 + movdqu 16($rp,%rax),%xmm2 + movdqu %xmm1,($rp,%rax) + pand %xmm4,%xmm3 + pand %xmm5,%xmm2 + movdqa %xmm0,16(%rsp,%rax) + por %xmm2,%xmm3 + movdqu %xmm3,16($rp,%rax) + lea 32(%rax),%rax dec $j jnz .Lcopy4x - - shl \$2,$num - movdqu 16($ap,$i),%xmm2 - movdqa %xmm0,16(%rsp,$i) - movdqu %xmm2,16($rp,$i) ___ } $code.=<<___; mov 8(%rsp,$num,8),%rsi # restore %rsp +.cfi_def_cfa %rsi, 8 mov \$1,%rax mov -48(%rsi),%r15 +.cfi_restore %r15 mov -40(%rsi),%r14 +.cfi_restore %r14 mov -32(%rsi),%r13 +.cfi_restore %r13 mov -24(%rsi),%r12 +.cfi_restore %r12 mov -16(%rsi),%rbp +.cfi_restore %rbp mov -8(%rsi),%rbx +.cfi_restore %rbx lea (%rsi),%rsp +.cfi_def_cfa_register %rsp .Lmul4x_epilogue: ret +.cfi_endproc .size bn_mul4x_mont,.-bn_mul4x_mont ___ }}} @@ -795,14 +839,22 @@ $code.=<<___; .type bn_sqr8x_mont,\@function,6 .align 32 bn_sqr8x_mont: +.cfi_startproc mov %rsp,%rax +.cfi_def_cfa_register %rax .Lsqr8x_enter: push %rbx +.cfi_push %rbx push %rbp +.cfi_push %rbp push %r12 +.cfi_push %r12 push %r13 +.cfi_push %r13 push %r14 +.cfi_push %r14 push %r15 +.cfi_push %r15 .Lsqr8x_prologue: mov ${num}d,%r10d @@ -858,6 +910,7 @@ bn_sqr8x_mont: mov $n0, 32(%rsp) mov %rax, 40(%rsp) # save original %rsp +.cfi_cfa_expression %rsp+40,deref,+8 .Lsqr8x_body: movq $nptr, %xmm2 # save pointer to modulus @@ -927,6 +980,7 @@ $code.=<<___; pxor %xmm0,%xmm0 pshufd \$0,%xmm1,%xmm1 mov 40(%rsp),%rsi # restore %rsp +.cfi_def_cfa %rsi,8 jmp .Lsqr8x_cond_copy .align 32 @@ -956,14 +1010,22 @@ $code.=<<___; mov \$1,%rax mov -48(%rsi),%r15 +.cfi_restore %r15 mov -40(%rsi),%r14 +.cfi_restore %r14 mov -32(%rsi),%r13 +.cfi_restore %r13 mov -24(%rsi),%r12 +.cfi_restore %r12 mov -16(%rsi),%rbp +.cfi_restore %rbp mov -8(%rsi),%rbx +.cfi_restore %rbx lea (%rsi),%rsp +.cfi_def_cfa_register %rsp .Lsqr8x_epilogue: ret +.cfi_endproc .size bn_sqr8x_mont,.-bn_sqr8x_mont ___ }}} @@ -975,14 +1037,22 @@ $code.=<<___; .type bn_mulx4x_mont,\@function,6 .align 32 bn_mulx4x_mont: +.cfi_startproc mov %rsp,%rax +.cfi_def_cfa_register %rax .Lmulx4x_enter: push %rbx +.cfi_push %rbx push %rbp +.cfi_push %rbp push %r12 +.cfi_push %r12 push %r13 +.cfi_push %r13 push %r14 +.cfi_push %r14 push %r15 +.cfi_push %r15 .Lmulx4x_prologue: shl \$3,${num}d # convert $num to bytes @@ -1028,6 +1098,7 @@ bn_mulx4x_mont: mov $n0, 24(%rsp) # save *n0 mov $rp, 32(%rsp) # save $rp mov %rax,40(%rsp) # save original %rsp +.cfi_cfa_expression %rsp+40,deref,+8 mov $num,48(%rsp) # inner counter jmp .Lmulx4x_body @@ -1277,6 +1348,7 @@ $code.=<<___; pxor %xmm0,%xmm0 pshufd \$0,%xmm1,%xmm1 mov 40(%rsp),%rsi # restore %rsp +.cfi_def_cfa %rsi,8 jmp .Lmulx4x_cond_copy .align 32 @@ -1306,14 +1378,22 @@ $code.=<<___; mov \$1,%rax mov -48(%rsi),%r15 +.cfi_restore %r15 mov -40(%rsi),%r14 +.cfi_restore %r14 mov -32(%rsi),%r13 +.cfi_restore %r13 mov -24(%rsi),%r12 +.cfi_restore %r12 mov -16(%rsi),%rbp +.cfi_restore %rbp mov -8(%rsi),%rbx +.cfi_restore %rbx lea (%rsi),%rsp +.cfi_def_cfa_register %rsp .Lmulx4x_epilogue: ret +.cfi_endproc .size bn_mulx4x_mont,.-bn_mulx4x_mont ___ }}} @@ -1392,12 +1472,12 @@ sqr_handler: mov 0(%r11),%r10d # HandlerData[0] lea (%rsi,%r10),%r10 # end of prologue label - cmp %r10,%rbx # context->Rip<.Lsqr_body + cmp %r10,%rbx # context->Rip<.Lsqr_prologue jb .Lcommon_seh_tail mov 4(%r11),%r10d # HandlerData[1] lea (%rsi,%r10),%r10 # body label - cmp %r10,%rbx # context->Rip>=.Lsqr_epilogue + cmp %r10,%rbx # context->Rip<.Lsqr_body jb .Lcommon_pop_regs mov 152($context),%rax # pull context->Rsp diff --git a/crypto/bn/asm/x86_64-mont5.pl b/crypto/bn/asm/x86_64-mont5.pl index 42178e455a98..ad6e8ada3ce7 100755 --- a/crypto/bn/asm/x86_64-mont5.pl +++ b/crypto/bn/asm/x86_64-mont5.pl @@ -1,4 +1,11 @@ -#!/usr/bin/env perl +#! /usr/bin/env perl +# Copyright 2011-2018 The OpenSSL Project Authors. All Rights Reserved. +# +# Licensed under the OpenSSL license (the "License"). You may not use +# this file except in compliance with the License. You can obtain a copy +# in the file LICENSE in the source distribution or at +# https://www.openssl.org/source/license.html + # ==================================================================== # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL @@ -35,7 +42,7 @@ $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; ( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or die "can't locate x86_64-xlate.pl"; -open OUT,"| \"$^X\" $xlate $flavour $output"; +open OUT,"| \"$^X\" \"$xlate\" $flavour \"$output\""; *STDOUT=*OUT; if (`$ENV{CC} -Wa,-v -c -o /dev/null -x assembler /dev/null 2>&1` @@ -86,8 +93,10 @@ $code=<<___; .type bn_mul_mont_gather5,\@function,6 .align 64 bn_mul_mont_gather5: +.cfi_startproc mov ${num}d,${num}d mov %rsp,%rax +.cfi_def_cfa_register %rax test \$7,${num}d jnz .Lmul_enter ___ @@ -101,11 +110,17 @@ $code.=<<___; .Lmul_enter: movd `($win64?56:8)`(%rsp),%xmm5 # load 7th argument push %rbx +.cfi_push %rbx push %rbp +.cfi_push %rbp push %r12 +.cfi_push %r12 push %r13 +.cfi_push %r13 push %r14 +.cfi_push %r14 push %r15 +.cfi_push %r15 neg $num mov %rsp,%r11 @@ -113,7 +128,9 @@ $code.=<<___; neg $num # restore $num and \$-1024,%r10 # minimize TLB usage - # Some OSes, *cough*-dows, insist on stack being "wired" to + # An OS-agnostic version of __chkstk. + # + # Some OSes (Windows) insist on stack being "wired" to # physical memory in strictly sequential manner, i.e. if stack # allocation spans two pages, then reference to farmost one can # be punishable by SEGV. But page walking can do good even on @@ -136,6 +153,7 @@ $code.=<<___; lea .Linc(%rip),%r10 mov %rax,8(%rsp,$num,8) # tp[num+1]=%rsp +.cfi_cfa_expression %rsp+8,$num,8,mul,plus,deref,+8 .Lmul_body: lea 128($bp),%r12 # reassign $bp (+size optimization) @@ -401,38 +419,48 @@ $code.=<<___; mov %rax,($rp,$i,8) # rp[i]=tp[i]-np[i] mov 8($ap,$i,8),%rax # tp[i+1] lea 1($i),$i # i++ - dec $j # doesnn't affect CF! + dec $j # doesn't affect CF! jnz .Lsub sbb \$0,%rax # handle upmost overflow bit + mov \$-1,%rbx + xor %rax,%rbx xor $i,$i - and %rax,$ap - not %rax - mov $rp,$np - and %rax,$np mov $num,$j # j=num - or $np,$ap # ap=borrow?tp:rp -.align 16 -.Lcopy: # copy or in-place refresh - mov ($ap,$i,8),%rax + +.Lcopy: # conditional copy + mov ($rp,$i,8),%rcx + mov (%rsp,$i,8),%rdx + and %rbx,%rcx + and %rax,%rdx mov $i,(%rsp,$i,8) # zap temporary vector - mov %rax,($rp,$i,8) # rp[i]=tp[i] + or %rcx,%rdx + mov %rdx,($rp,$i,8) # rp[i]=tp[i] lea 1($i),$i sub \$1,$j jnz .Lcopy mov 8(%rsp,$num,8),%rsi # restore %rsp +.cfi_def_cfa %rsi,8 mov \$1,%rax mov -48(%rsi),%r15 +.cfi_restore %r15 mov -40(%rsi),%r14 +.cfi_restore %r14 mov -32(%rsi),%r13 +.cfi_restore %r13 mov -24(%rsi),%r12 +.cfi_restore %r12 mov -16(%rsi),%rbp +.cfi_restore %rbp mov -8(%rsi),%rbx +.cfi_restore %rbx lea (%rsi),%rsp +.cfi_def_cfa_register %rsp .Lmul_epilogue: ret +.cfi_endproc .size bn_mul_mont_gather5,.-bn_mul_mont_gather5 ___ {{{ @@ -442,8 +470,10 @@ $code.=<<___; .type bn_mul4x_mont_gather5,\@function,6 .align 32 bn_mul4x_mont_gather5: +.cfi_startproc .byte 0x67 mov %rsp,%rax +.cfi_def_cfa_register %rax .Lmul4x_enter: ___ $code.=<<___ if ($addx); @@ -453,11 +483,17 @@ $code.=<<___ if ($addx); ___ $code.=<<___; push %rbx +.cfi_push %rbx push %rbp +.cfi_push %rbp push %r12 +.cfi_push %r12 push %r13 +.cfi_push %r13 push %r14 +.cfi_push %r14 push %r15 +.cfi_push %r15 .Lmul4x_prologue: .byte 0x67 @@ -513,22 +549,32 @@ $code.=<<___; neg $num mov %rax,40(%rsp) +.cfi_cfa_expression %rsp+40,deref,+8 .Lmul4x_body: call mul4x_internal mov 40(%rsp),%rsi # restore %rsp +.cfi_def_cfa %rsi,8 mov \$1,%rax mov -48(%rsi),%r15 +.cfi_restore %r15 mov -40(%rsi),%r14 +.cfi_restore %r14 mov -32(%rsi),%r13 +.cfi_restore %r13 mov -24(%rsi),%r12 +.cfi_restore %r12 mov -16(%rsi),%rbp +.cfi_restore %rbp mov -8(%rsi),%rbx +.cfi_restore %rbx lea (%rsi),%rsp +.cfi_def_cfa_register %rsp .Lmul4x_epilogue: ret +.cfi_endproc .size bn_mul4x_mont_gather5,.-bn_mul4x_mont_gather5 .type mul4x_internal,\@abi-omnipotent @@ -1040,7 +1086,7 @@ my $bptr="%rdx"; # const void *table, my $nptr="%rcx"; # const BN_ULONG *nptr, my $n0 ="%r8"; # const BN_ULONG *n0); my $num ="%r9"; # int num, has to be divisible by 8 - # int pwr + # int pwr my ($i,$j,$tptr)=("%rbp","%rcx",$rptr); my @A0=("%r10","%r11"); @@ -1052,7 +1098,9 @@ $code.=<<___; .type bn_power5,\@function,6 .align 32 bn_power5: +.cfi_startproc mov %rsp,%rax +.cfi_def_cfa_register %rax ___ $code.=<<___ if ($addx); mov OPENSSL_ia32cap_P+8(%rip),%r11d @@ -1062,11 +1110,17 @@ $code.=<<___ if ($addx); ___ $code.=<<___; push %rbx +.cfi_push %rbx push %rbp +.cfi_push %rbp push %r12 +.cfi_push %r12 push %r13 +.cfi_push %r13 push %r14 +.cfi_push %r14 push %r15 +.cfi_push %r15 .Lpower5_prologue: shl \$3,${num}d # convert $num to bytes @@ -1117,7 +1171,7 @@ $code.=<<___; ja .Lpwr_page_walk .Lpwr_page_walk_done: - mov $num,%r10 + mov $num,%r10 neg $num ############################################################## @@ -1131,6 +1185,7 @@ $code.=<<___; # mov $n0, 32(%rsp) mov %rax, 40(%rsp) # save original %rsp +.cfi_cfa_expression %rsp+40,deref,+8 .Lpower5_body: movq $rptr,%xmm1 # save $rptr, used in sqr8x movq $nptr,%xmm2 # save $nptr @@ -1157,16 +1212,25 @@ $code.=<<___; call mul4x_internal mov 40(%rsp),%rsi # restore %rsp +.cfi_def_cfa %rsi,8 mov \$1,%rax mov -48(%rsi),%r15 +.cfi_restore %r15 mov -40(%rsi),%r14 +.cfi_restore %r14 mov -32(%rsi),%r13 +.cfi_restore %r13 mov -24(%rsi),%r12 +.cfi_restore %r12 mov -16(%rsi),%rbp +.cfi_restore %rbp mov -8(%rsi),%rbx +.cfi_restore %rbx lea (%rsi),%rsp +.cfi_def_cfa_register %rsp .Lpower5_epilogue: ret +.cfi_endproc .size bn_power5,.-bn_power5 .globl bn_sqr8x_internal @@ -2026,7 +2090,7 @@ __bn_post4x_internal: jnz .Lsqr4x_sub mov $num,%r10 # prepare for back-to-back call - neg $num # restore $num + neg $num # restore $num ret .size __bn_post4x_internal,.-__bn_post4x_internal ___ @@ -2046,14 +2110,22 @@ bn_from_montgomery: .type bn_from_mont8x,\@function,6 .align 32 bn_from_mont8x: +.cfi_startproc .byte 0x67 mov %rsp,%rax +.cfi_def_cfa_register %rax push %rbx +.cfi_push %rbx push %rbp +.cfi_push %rbp push %r12 +.cfi_push %r12 push %r13 +.cfi_push %r13 push %r14 +.cfi_push %r14 push %r15 +.cfi_push %r15 .Lfrom_prologue: shl \$3,${num}d # convert $num to bytes @@ -2118,6 +2190,7 @@ bn_from_mont8x: # mov $n0, 32(%rsp) mov %rax, 40(%rsp) # save original %rsp +.cfi_cfa_expression %rsp+40,deref,+8 .Lfrom_body: mov $num,%r11 lea 48(%rsp),%rax @@ -2161,7 +2234,6 @@ $code.=<<___ if ($addx); pxor %xmm0,%xmm0 lea 48(%rsp),%rax - mov 40(%rsp),%rsi # restore %rsp jmp .Lfrom_mont_zero .align 32 @@ -2173,11 +2245,12 @@ $code.=<<___; pxor %xmm0,%xmm0 lea 48(%rsp),%rax - mov 40(%rsp),%rsi # restore %rsp jmp .Lfrom_mont_zero .align 32 .Lfrom_mont_zero: + mov 40(%rsp),%rsi # restore %rsp +.cfi_def_cfa %rsi,8 movdqa %xmm0,16*0(%rax) movdqa %xmm0,16*1(%rax) movdqa %xmm0,16*2(%rax) @@ -2188,14 +2261,22 @@ $code.=<<___; mov \$1,%rax mov -48(%rsi),%r15 +.cfi_restore %r15 mov -40(%rsi),%r14 +.cfi_restore %r14 mov -32(%rsi),%r13 +.cfi_restore %r13 mov -24(%rsi),%r12 +.cfi_restore %r12 mov -16(%rsi),%rbp +.cfi_restore %rbp mov -8(%rsi),%rbx +.cfi_restore %rbx lea (%rsi),%rsp +.cfi_def_cfa_register %rsp .Lfrom_epilogue: ret +.cfi_endproc .size bn_from_mont8x,.-bn_from_mont8x ___ } @@ -2208,14 +2289,22 @@ $code.=<<___; .type bn_mulx4x_mont_gather5,\@function,6 .align 32 bn_mulx4x_mont_gather5: +.cfi_startproc mov %rsp,%rax +.cfi_def_cfa_register %rax .Lmulx4x_enter: push %rbx +.cfi_push %rbx push %rbp +.cfi_push %rbp push %r12 +.cfi_push %r12 push %r13 +.cfi_push %r13 push %r14 +.cfi_push %r14 push %r15 +.cfi_push %r15 .Lmulx4x_prologue: shl \$3,${num}d # convert $num to bytes @@ -2249,7 +2338,7 @@ bn_mulx4x_mont_gather5: mov \$0,%r10 cmovc %r10,%r11 sub %r11,%rbp -.Lmulx4xsp_done: +.Lmulx4xsp_done: and \$-64,%rbp # ensure alignment mov %rsp,%r11 sub %rbp,%r11 @@ -2281,21 +2370,31 @@ bn_mulx4x_mont_gather5: # mov $n0, 32(%rsp) # save *n0 mov %rax,40(%rsp) # save original %rsp +.cfi_cfa_expression %rsp+40,deref,+8 .Lmulx4x_body: call mulx4x_internal mov 40(%rsp),%rsi # restore %rsp +.cfi_def_cfa %rsi,8 mov \$1,%rax mov -48(%rsi),%r15 +.cfi_restore %r15 mov -40(%rsi),%r14 +.cfi_restore %r14 mov -32(%rsi),%r13 +.cfi_restore %r13 mov -24(%rsi),%r12 +.cfi_restore %r12 mov -16(%rsi),%rbp +.cfi_restore %rbp mov -8(%rsi),%rbx +.cfi_restore %rbx lea (%rsi),%rsp +.cfi_def_cfa_register %rsp .Lmulx4x_epilogue: ret +.cfi_endproc .size bn_mulx4x_mont_gather5,.-bn_mulx4x_mont_gather5 .type mulx4x_internal,\@abi-omnipotent @@ -2323,7 +2422,7 @@ my $N=$STRIDE/4; # should match cache line size $code.=<<___; movdqa 0(%rax),%xmm0 # 00000001000000010000000000000000 movdqa 16(%rax),%xmm1 # 00000002000000020000000200000002 - lea 88-112(%rsp,%r10),%r10 # place the mask after tp[num+1] (+ICache optimizaton) + lea 88-112(%rsp,%r10),%r10 # place the mask after tp[num+1] (+ICache optimization) lea 128($bp),$bptr # size optimization pshufd \$0,%xmm5,%xmm5 # broadcast index @@ -2673,14 +2772,22 @@ $code.=<<___; .type bn_powerx5,\@function,6 .align 32 bn_powerx5: +.cfi_startproc mov %rsp,%rax +.cfi_def_cfa_register %rax .Lpowerx5_enter: push %rbx +.cfi_push %rbx push %rbp +.cfi_push %rbp push %r12 +.cfi_push %r12 push %r13 +.cfi_push %r13 push %r14 +.cfi_push %r14 push %r15 +.cfi_push %r15 .Lpowerx5_prologue: shl \$3,${num}d # convert $num to bytes @@ -2731,7 +2838,7 @@ bn_powerx5: ja .Lpwrx_page_walk .Lpwrx_page_walk_done: - mov $num,%r10 + mov $num,%r10 neg $num ############################################################## @@ -2752,6 +2859,7 @@ bn_powerx5: movq $bptr,%xmm4 mov $n0, 32(%rsp) mov %rax, 40(%rsp) # save original %rsp +.cfi_cfa_expression %rsp+40,deref,+8 .Lpowerx5_body: call __bn_sqrx8x_internal @@ -2774,17 +2882,26 @@ bn_powerx5: call mulx4x_internal mov 40(%rsp),%rsi # restore %rsp +.cfi_def_cfa %rsi,8 mov \$1,%rax mov -48(%rsi),%r15 +.cfi_restore %r15 mov -40(%rsi),%r14 +.cfi_restore %r14 mov -32(%rsi),%r13 +.cfi_restore %r13 mov -24(%rsi),%r12 +.cfi_restore %r12 mov -16(%rsi),%rbp +.cfi_restore %rbp mov -8(%rsi),%rbx +.cfi_restore %rbx lea (%rsi),%rsp +.cfi_def_cfa_register %rsp .Lpowerx5_epilogue: ret +.cfi_endproc .size bn_powerx5,.-bn_powerx5 .globl bn_sqrx8x_internal @@ -3668,8 +3785,8 @@ mul_handler: jb .Lcommon_seh_tail mov 4(%r11),%r10d # HandlerData[1] - lea (%rsi,%r10),%r10 # epilogue label - cmp %r10,%rbx # context->Rip>=epilogue label + lea (%rsi,%r10),%r10 # beginning of body label + cmp %r10,%rbx # context->Rip<body label jb .Lcommon_pop_regs mov 152($context),%rax # pull context->Rsp diff --git a/crypto/bn/bn.h b/crypto/bn/bn.h deleted file mode 100644 index 633d1b1f6013..000000000000 --- a/crypto/bn/bn.h +++ /dev/null @@ -1,951 +0,0 @@ -/* crypto/bn/bn.h */ -/* Copyright (C) 1995-1997 Eric Young (eay@cryptsoft.com) - * All rights reserved. - * - * This package is an SSL implementation written - * by Eric Young (eay@cryptsoft.com). - * The implementation was written so as to conform with Netscapes SSL. - * - * This library is free for commercial and non-commercial use as long as - * the following conditions are aheared to. The following conditions - * apply to all code found in this distribution, be it the RC4, RSA, - * lhash, DES, etc., code; not just the SSL code. The SSL documentation - * included with this distribution is covered by the same copyright terms - * except that the holder is Tim Hudson (tjh@cryptsoft.com). - * - * Copyright remains Eric Young's, and as such any Copyright notices in - * the code are not to be removed. - * If this package is used in a product, Eric Young should be given attribution - * as the author of the parts of the library used. - * This can be in the form of a textual message at program startup or - * in documentation (online or textual) provided with the package. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * "This product includes cryptographic software written by - * Eric Young (eay@cryptsoft.com)" - * The word 'cryptographic' can be left out if the rouines from the library - * being used are not cryptographic related :-). - * 4. If you include any Windows specific code (or a derivative thereof) from - * the apps directory (application code) you must include an acknowledgement: - * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)" - * - * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * The licence and distribution terms for any publically available version or - * derivative of this code cannot be changed. i.e. this code cannot simply be - * copied and put under another distribution licence - * [including the GNU Public Licence.] - */ -/* ==================================================================== - * Copyright (c) 1998-2006 The OpenSSL Project. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * - * 3. All advertising materials mentioning features or use of this - * software must display the following acknowledgment: - * "This product includes software developed by the OpenSSL Project - * for use in the OpenSSL Toolkit. (http://www.openssl.org/)" - * - * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to - * endorse or promote products derived from this software without - * prior written permission. For written permission, please contact - * openssl-core@openssl.org. - * - * 5. Products derived from this software may not be called "OpenSSL" - * nor may "OpenSSL" appear in their names without prior written - * permission of the OpenSSL Project. - * - * 6. Redistributions of any form whatsoever must retain the following - * acknowledgment: - * "This product includes software developed by the OpenSSL Project - * for use in the OpenSSL Toolkit (http://www.openssl.org/)" - * - * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY - * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR - * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, - * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED - * OF THE POSSIBILITY OF SUCH DAMAGE. - * ==================================================================== - * - * This product includes cryptographic software written by Eric Young - * (eay@cryptsoft.com). This product includes software written by Tim - * Hudson (tjh@cryptsoft.com). - * - */ -/* ==================================================================== - * Copyright 2002 Sun Microsystems, Inc. ALL RIGHTS RESERVED. - * - * Portions of the attached software ("Contribution") are developed by - * SUN MICROSYSTEMS, INC., and are contributed to the OpenSSL project. - * - * The Contribution is licensed pursuant to the Eric Young open source - * license provided above. - * - * The binary polynomial arithmetic software is originally written by - * Sheueling Chang Shantz and Douglas Stebila of Sun Microsystems Laboratories. - * - */ - -#ifndef HEADER_BN_H -# define HEADER_BN_H - -# include <limits.h> -# include <openssl/e_os2.h> -# ifndef OPENSSL_NO_FP_API -# include <stdio.h> /* FILE */ -# endif -# include <openssl/ossl_typ.h> -# include <openssl/crypto.h> - -#ifdef __cplusplus -extern "C" { -#endif - -/* - * These preprocessor symbols control various aspects of the bignum headers - * and library code. They're not defined by any "normal" configuration, as - * they are intended for development and testing purposes. NB: defining all - * three can be useful for debugging application code as well as openssl - * itself. BN_DEBUG - turn on various debugging alterations to the bignum - * code BN_DEBUG_RAND - uses random poisoning of unused words to trip up - * mismanagement of bignum internals. You must also define BN_DEBUG. - */ -/* #define BN_DEBUG */ -/* #define BN_DEBUG_RAND */ - -# ifndef OPENSSL_SMALL_FOOTPRINT -# define BN_MUL_COMBA -# define BN_SQR_COMBA -# define BN_RECURSION -# endif - -/* - * This next option uses the C libraries (2 word)/(1 word) function. If it is - * not defined, I use my C version (which is slower). The reason for this - * flag is that when the particular C compiler library routine is used, and - * the library is linked with a different compiler, the library is missing. - * This mostly happens when the library is built with gcc and then linked - * using normal cc. This would be a common occurrence because gcc normally - * produces code that is 2 times faster than system compilers for the big - * number stuff. For machines with only one compiler (or shared libraries), - * this should be on. Again this in only really a problem on machines using - * "long long's", are 32bit, and are not using my assembler code. - */ -# if defined(OPENSSL_SYS_MSDOS) || defined(OPENSSL_SYS_WINDOWS) || \ - defined(OPENSSL_SYS_WIN32) || defined(linux) -# ifndef BN_DIV2W -# define BN_DIV2W -# endif -# endif - -/* - * assuming long is 64bit - this is the DEC Alpha unsigned long long is only - * 64 bits :-(, don't define BN_LLONG for the DEC Alpha - */ -# ifdef SIXTY_FOUR_BIT_LONG -# define BN_ULLONG unsigned long long -# define BN_ULONG unsigned long -# define BN_LONG long -# define BN_BITS 128 -# define BN_BYTES 8 -# define BN_BITS2 64 -# define BN_BITS4 32 -# define BN_MASK (0xffffffffffffffffffffffffffffffffLL) -# define BN_MASK2 (0xffffffffffffffffL) -# define BN_MASK2l (0xffffffffL) -# define BN_MASK2h (0xffffffff00000000L) -# define BN_MASK2h1 (0xffffffff80000000L) -# define BN_TBIT (0x8000000000000000L) -# define BN_DEC_CONV (10000000000000000000UL) -# define BN_DEC_FMT1 "%lu" -# define BN_DEC_FMT2 "%019lu" -# define BN_DEC_NUM 19 -# define BN_HEX_FMT1 "%lX" -# define BN_HEX_FMT2 "%016lX" -# endif - -/* - * This is where the long long data type is 64 bits, but long is 32. For - * machines where there are 64bit registers, this is the mode to use. IRIX, - * on R4000 and above should use this mode, along with the relevant assembler - * code :-). Do NOT define BN_LLONG. - */ -# ifdef SIXTY_FOUR_BIT -# undef BN_LLONG -# undef BN_ULLONG -# define BN_ULONG unsigned long long -# define BN_LONG long long -# define BN_BITS 128 -# define BN_BYTES 8 -# define BN_BITS2 64 -# define BN_BITS4 32 -# define BN_MASK2 (0xffffffffffffffffLL) -# define BN_MASK2l (0xffffffffL) -# define BN_MASK2h (0xffffffff00000000LL) -# define BN_MASK2h1 (0xffffffff80000000LL) -# define BN_TBIT (0x8000000000000000LL) -# define BN_DEC_CONV (10000000000000000000ULL) -# define BN_DEC_FMT1 "%llu" -# define BN_DEC_FMT2 "%019llu" -# define BN_DEC_NUM 19 -# define BN_HEX_FMT1 "%llX" -# define BN_HEX_FMT2 "%016llX" -# endif - -# ifdef THIRTY_TWO_BIT -# ifdef BN_LLONG -# if defined(_WIN32) && !defined(__GNUC__) -# define BN_ULLONG unsigned __int64 -# define BN_MASK (0xffffffffffffffffI64) -# else -# define BN_ULLONG unsigned long long -# define BN_MASK (0xffffffffffffffffLL) -# endif -# endif -# define BN_ULONG unsigned int -# define BN_LONG int -# define BN_BITS 64 -# define BN_BYTES 4 -# define BN_BITS2 32 -# define BN_BITS4 16 -# define BN_MASK2 (0xffffffffL) -# define BN_MASK2l (0xffff) -# define BN_MASK2h1 (0xffff8000L) -# define BN_MASK2h (0xffff0000L) -# define BN_TBIT (0x80000000L) -# define BN_DEC_CONV (1000000000L) -# define BN_DEC_FMT1 "%u" -# define BN_DEC_FMT2 "%09u" -# define BN_DEC_NUM 9 -# define BN_HEX_FMT1 "%X" -# define BN_HEX_FMT2 "%08X" -# endif - -# define BN_DEFAULT_BITS 1280 - -# define BN_FLG_MALLOCED 0x01 -# define BN_FLG_STATIC_DATA 0x02 - -/* - * avoid leaking exponent information through timing, - * BN_mod_exp_mont() will call BN_mod_exp_mont_consttime, - * BN_div() will call BN_div_no_branch, - * BN_mod_inverse() will call BN_mod_inverse_no_branch. - */ -# define BN_FLG_CONSTTIME 0x04 - -# ifdef OPENSSL_NO_DEPRECATED -/* deprecated name for the flag */ -# define BN_FLG_EXP_CONSTTIME BN_FLG_CONSTTIME -/* - * avoid leaking exponent information through timings - * (BN_mod_exp_mont() will call BN_mod_exp_mont_consttime) - */ -# endif - -# ifndef OPENSSL_NO_DEPRECATED -# define BN_FLG_FREE 0x8000 - /* used for debuging */ -# endif -# define BN_set_flags(b,n) ((b)->flags|=(n)) -# define BN_get_flags(b,n) ((b)->flags&(n)) - -/* - * get a clone of a BIGNUM with changed flags, for *temporary* use only (the - * two BIGNUMs cannot not be used in parallel!) - */ -# define BN_with_flags(dest,b,n) ((dest)->d=(b)->d, \ - (dest)->top=(b)->top, \ - (dest)->dmax=(b)->dmax, \ - (dest)->neg=(b)->neg, \ - (dest)->flags=(((dest)->flags & BN_FLG_MALLOCED) \ - | ((b)->flags & ~BN_FLG_MALLOCED) \ - | BN_FLG_STATIC_DATA \ - | (n))) - -/* Already declared in ossl_typ.h */ -# if 0 -typedef struct bignum_st BIGNUM; -/* Used for temp variables (declaration hidden in bn_lcl.h) */ -typedef struct bignum_ctx BN_CTX; -typedef struct bn_blinding_st BN_BLINDING; -typedef struct bn_mont_ctx_st BN_MONT_CTX; -typedef struct bn_recp_ctx_st BN_RECP_CTX; -typedef struct bn_gencb_st BN_GENCB; -# endif - -struct bignum_st { - BN_ULONG *d; /* Pointer to an array of 'BN_BITS2' bit - * chunks. */ - int top; /* Index of last used d +1. */ - /* The next are internal book keeping for bn_expand. */ - int dmax; /* Size of the d array. */ - int neg; /* one if the number is negative */ - int flags; -}; - -/* Used for montgomery multiplication */ -struct bn_mont_ctx_st { - int ri; /* number of bits in R */ - BIGNUM RR; /* used to convert to montgomery form */ - BIGNUM N; /* The modulus */ - BIGNUM Ni; /* R*(1/R mod N) - N*Ni = 1 (Ni is only - * stored for bignum algorithm) */ - BN_ULONG n0[2]; /* least significant word(s) of Ni; (type - * changed with 0.9.9, was "BN_ULONG n0;" - * before) */ - int flags; -}; - -/* - * Used for reciprocal division/mod functions It cannot be shared between - * threads - */ -struct bn_recp_ctx_st { - BIGNUM N; /* the divisor */ - BIGNUM Nr; /* the reciprocal */ - int num_bits; - int shift; - int flags; -}; - -/* Used for slow "generation" functions. */ -struct bn_gencb_st { - unsigned int ver; /* To handle binary (in)compatibility */ - void *arg; /* callback-specific data */ - union { - /* if(ver==1) - handles old style callbacks */ - void (*cb_1) (int, int, void *); - /* if(ver==2) - new callback style */ - int (*cb_2) (int, int, BN_GENCB *); - } cb; -}; -/* Wrapper function to make using BN_GENCB easier, */ -int BN_GENCB_call(BN_GENCB *cb, int a, int b); -/* Macro to populate a BN_GENCB structure with an "old"-style callback */ -# define BN_GENCB_set_old(gencb, callback, cb_arg) { \ - BN_GENCB *tmp_gencb = (gencb); \ - tmp_gencb->ver = 1; \ - tmp_gencb->arg = (cb_arg); \ - tmp_gencb->cb.cb_1 = (callback); } -/* Macro to populate a BN_GENCB structure with a "new"-style callback */ -# define BN_GENCB_set(gencb, callback, cb_arg) { \ - BN_GENCB *tmp_gencb = (gencb); \ - tmp_gencb->ver = 2; \ - tmp_gencb->arg = (cb_arg); \ - tmp_gencb->cb.cb_2 = (callback); } - -# define BN_prime_checks 0 /* default: select number of iterations based - * on the size of the number */ - -/* - * number of Miller-Rabin iterations for an error rate of less than 2^-80 for - * random 'b'-bit input, b >= 100 (taken from table 4.4 in the Handbook of - * Applied Cryptography [Menezes, van Oorschot, Vanstone; CRC Press 1996]; - * original paper: Damgaard, Landrock, Pomerance: Average case error - * estimates for the strong probable prime test. -- Math. Comp. 61 (1993) - * 177-194) - */ -# define BN_prime_checks_for_size(b) ((b) >= 1300 ? 2 : \ - (b) >= 850 ? 3 : \ - (b) >= 650 ? 4 : \ - (b) >= 550 ? 5 : \ - (b) >= 450 ? 6 : \ - (b) >= 400 ? 7 : \ - (b) >= 350 ? 8 : \ - (b) >= 300 ? 9 : \ - (b) >= 250 ? 12 : \ - (b) >= 200 ? 15 : \ - (b) >= 150 ? 18 : \ - /* b >= 100 */ 27) - -# define BN_num_bytes(a) ((BN_num_bits(a)+7)/8) - -/* Note that BN_abs_is_word didn't work reliably for w == 0 until 0.9.8 */ -# define BN_abs_is_word(a,w) ((((a)->top == 1) && ((a)->d[0] == (BN_ULONG)(w))) || \ - (((w) == 0) && ((a)->top == 0))) -# define BN_is_zero(a) ((a)->top == 0) -# define BN_is_one(a) (BN_abs_is_word((a),1) && !(a)->neg) -# define BN_is_word(a,w) (BN_abs_is_word((a),(w)) && (!(w) || !(a)->neg)) -# define BN_is_odd(a) (((a)->top > 0) && ((a)->d[0] & 1)) - -# define BN_one(a) (BN_set_word((a),1)) -# define BN_zero_ex(a) \ - do { \ - BIGNUM *_tmp_bn = (a); \ - _tmp_bn->top = 0; \ - _tmp_bn->neg = 0; \ - } while(0) -# ifdef OPENSSL_NO_DEPRECATED -# define BN_zero(a) BN_zero_ex(a) -# else -# define BN_zero(a) (BN_set_word((a),0)) -# endif - -const BIGNUM *BN_value_one(void); -char *BN_options(void); -BN_CTX *BN_CTX_new(void); -# ifndef OPENSSL_NO_DEPRECATED -void BN_CTX_init(BN_CTX *c); -# endif -void BN_CTX_free(BN_CTX *c); -void BN_CTX_start(BN_CTX *ctx); -BIGNUM *BN_CTX_get(BN_CTX *ctx); -void BN_CTX_end(BN_CTX *ctx); -int BN_rand(BIGNUM *rnd, int bits, int top, int bottom); -int BN_pseudo_rand(BIGNUM *rnd, int bits, int top, int bottom); -int BN_rand_range(BIGNUM *rnd, const BIGNUM *range); -int BN_pseudo_rand_range(BIGNUM *rnd, const BIGNUM *range); -int BN_num_bits(const BIGNUM *a); -int BN_num_bits_word(BN_ULONG); -BIGNUM *BN_new(void); -void BN_init(BIGNUM *); -void BN_clear_free(BIGNUM *a); -BIGNUM *BN_copy(BIGNUM *a, const BIGNUM *b); -void BN_swap(BIGNUM *a, BIGNUM *b); -BIGNUM *BN_bin2bn(const unsigned char *s, int len, BIGNUM *ret); -int BN_bn2bin(const BIGNUM *a, unsigned char *to); -BIGNUM *BN_mpi2bn(const unsigned char *s, int len, BIGNUM *ret); -int BN_bn2mpi(const BIGNUM *a, unsigned char *to); -int BN_sub(BIGNUM *r, const BIGNUM *a, const BIGNUM *b); -int BN_usub(BIGNUM *r, const BIGNUM *a, const BIGNUM *b); -int BN_uadd(BIGNUM *r, const BIGNUM *a, const BIGNUM *b); -int BN_add(BIGNUM *r, const BIGNUM *a, const BIGNUM *b); -int BN_mul(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, BN_CTX *ctx); -int BN_sqr(BIGNUM *r, const BIGNUM *a, BN_CTX *ctx); -/** BN_set_negative sets sign of a BIGNUM - * \param b pointer to the BIGNUM object - * \param n 0 if the BIGNUM b should be positive and a value != 0 otherwise - */ -void BN_set_negative(BIGNUM *b, int n); -/** BN_is_negative returns 1 if the BIGNUM is negative - * \param a pointer to the BIGNUM object - * \return 1 if a < 0 and 0 otherwise - */ -# define BN_is_negative(a) ((a)->neg != 0) - -int BN_div(BIGNUM *dv, BIGNUM *rem, const BIGNUM *m, const BIGNUM *d, - BN_CTX *ctx); -# define BN_mod(rem,m,d,ctx) BN_div(NULL,(rem),(m),(d),(ctx)) -int BN_nnmod(BIGNUM *r, const BIGNUM *m, const BIGNUM *d, BN_CTX *ctx); -int BN_mod_add(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, const BIGNUM *m, - BN_CTX *ctx); -int BN_mod_add_quick(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, - const BIGNUM *m); -int BN_mod_sub(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, const BIGNUM *m, - BN_CTX *ctx); -int BN_mod_sub_quick(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, - const BIGNUM *m); -int BN_mod_mul(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, const BIGNUM *m, - BN_CTX *ctx); -int BN_mod_sqr(BIGNUM *r, const BIGNUM *a, const BIGNUM *m, BN_CTX *ctx); -int BN_mod_lshift1(BIGNUM *r, const BIGNUM *a, const BIGNUM *m, BN_CTX *ctx); -int BN_mod_lshift1_quick(BIGNUM *r, const BIGNUM *a, const BIGNUM *m); -int BN_mod_lshift(BIGNUM *r, const BIGNUM *a, int n, const BIGNUM *m, - BN_CTX *ctx); -int BN_mod_lshift_quick(BIGNUM *r, const BIGNUM *a, int n, const BIGNUM *m); - -BN_ULONG BN_mod_word(const BIGNUM *a, BN_ULONG w); -BN_ULONG BN_div_word(BIGNUM *a, BN_ULONG w); -int BN_mul_word(BIGNUM *a, BN_ULONG w); -int BN_add_word(BIGNUM *a, BN_ULONG w); -int BN_sub_word(BIGNUM *a, BN_ULONG w); -int BN_set_word(BIGNUM *a, BN_ULONG w); -BN_ULONG BN_get_word(const BIGNUM *a); - -int BN_cmp(const BIGNUM *a, const BIGNUM *b); -void BN_free(BIGNUM *a); -int BN_is_bit_set(const BIGNUM *a, int n); -int BN_lshift(BIGNUM *r, const BIGNUM *a, int n); -int BN_lshift1(BIGNUM *r, const BIGNUM *a); -int BN_exp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx); - -int BN_mod_exp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, - const BIGNUM *m, BN_CTX *ctx); -int BN_mod_exp_mont(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, - const BIGNUM *m, BN_CTX *ctx, BN_MONT_CTX *m_ctx); -int BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, - const BIGNUM *m, BN_CTX *ctx, - BN_MONT_CTX *in_mont); -int BN_mod_exp_mont_word(BIGNUM *r, BN_ULONG a, const BIGNUM *p, - const BIGNUM *m, BN_CTX *ctx, BN_MONT_CTX *m_ctx); -int BN_mod_exp2_mont(BIGNUM *r, const BIGNUM *a1, const BIGNUM *p1, - const BIGNUM *a2, const BIGNUM *p2, const BIGNUM *m, - BN_CTX *ctx, BN_MONT_CTX *m_ctx); -int BN_mod_exp_simple(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, - const BIGNUM *m, BN_CTX *ctx); - -int BN_mask_bits(BIGNUM *a, int n); -# ifndef OPENSSL_NO_FP_API -int BN_print_fp(FILE *fp, const BIGNUM *a); -# endif -# ifdef HEADER_BIO_H -int BN_print(BIO *fp, const BIGNUM *a); -# else -int BN_print(void *fp, const BIGNUM *a); -# endif -int BN_reciprocal(BIGNUM *r, const BIGNUM *m, int len, BN_CTX *ctx); -int BN_rshift(BIGNUM *r, const BIGNUM *a, int n); -int BN_rshift1(BIGNUM *r, const BIGNUM *a); -void BN_clear(BIGNUM *a); -BIGNUM *BN_dup(const BIGNUM *a); -int BN_ucmp(const BIGNUM *a, const BIGNUM *b); -int BN_set_bit(BIGNUM *a, int n); -int BN_clear_bit(BIGNUM *a, int n); -char *BN_bn2hex(const BIGNUM *a); -char *BN_bn2dec(const BIGNUM *a); -int BN_hex2bn(BIGNUM **a, const char *str); -int BN_dec2bn(BIGNUM **a, const char *str); -int BN_asc2bn(BIGNUM **a, const char *str); -int BN_gcd(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, BN_CTX *ctx); -int BN_kronecker(const BIGNUM *a, const BIGNUM *b, BN_CTX *ctx); /* returns - * -2 for - * error */ -BIGNUM *BN_mod_inverse(BIGNUM *ret, - const BIGNUM *a, const BIGNUM *n, BN_CTX *ctx); -BIGNUM *BN_mod_sqrt(BIGNUM *ret, - const BIGNUM *a, const BIGNUM *n, BN_CTX *ctx); - -void BN_consttime_swap(BN_ULONG swap, BIGNUM *a, BIGNUM *b, int nwords); - -/* Deprecated versions */ -# ifndef OPENSSL_NO_DEPRECATED -BIGNUM *BN_generate_prime(BIGNUM *ret, int bits, int safe, - const BIGNUM *add, const BIGNUM *rem, - void (*callback) (int, int, void *), void *cb_arg); -int BN_is_prime(const BIGNUM *p, int nchecks, - void (*callback) (int, int, void *), - BN_CTX *ctx, void *cb_arg); -int BN_is_prime_fasttest(const BIGNUM *p, int nchecks, - void (*callback) (int, int, void *), BN_CTX *ctx, - void *cb_arg, int do_trial_division); -# endif /* !defined(OPENSSL_NO_DEPRECATED) */ - -/* Newer versions */ -int BN_generate_prime_ex(BIGNUM *ret, int bits, int safe, const BIGNUM *add, - const BIGNUM *rem, BN_GENCB *cb); -int BN_is_prime_ex(const BIGNUM *p, int nchecks, BN_CTX *ctx, BN_GENCB *cb); -int BN_is_prime_fasttest_ex(const BIGNUM *p, int nchecks, BN_CTX *ctx, - int do_trial_division, BN_GENCB *cb); - -int BN_X931_generate_Xpq(BIGNUM *Xp, BIGNUM *Xq, int nbits, BN_CTX *ctx); - -int BN_X931_derive_prime_ex(BIGNUM *p, BIGNUM *p1, BIGNUM *p2, - const BIGNUM *Xp, const BIGNUM *Xp1, - const BIGNUM *Xp2, const BIGNUM *e, BN_CTX *ctx, - BN_GENCB *cb); -int BN_X931_generate_prime_ex(BIGNUM *p, BIGNUM *p1, BIGNUM *p2, BIGNUM *Xp1, - BIGNUM *Xp2, const BIGNUM *Xp, const BIGNUM *e, - BN_CTX *ctx, BN_GENCB *cb); - -BN_MONT_CTX *BN_MONT_CTX_new(void); -void BN_MONT_CTX_init(BN_MONT_CTX *ctx); -int BN_mod_mul_montgomery(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, - BN_MONT_CTX *mont, BN_CTX *ctx); -# define BN_to_montgomery(r,a,mont,ctx) BN_mod_mul_montgomery(\ - (r),(a),&((mont)->RR),(mont),(ctx)) -int BN_from_montgomery(BIGNUM *r, const BIGNUM *a, - BN_MONT_CTX *mont, BN_CTX *ctx); -void BN_MONT_CTX_free(BN_MONT_CTX *mont); -int BN_MONT_CTX_set(BN_MONT_CTX *mont, const BIGNUM *mod, BN_CTX *ctx); -BN_MONT_CTX *BN_MONT_CTX_copy(BN_MONT_CTX *to, BN_MONT_CTX *from); -BN_MONT_CTX *BN_MONT_CTX_set_locked(BN_MONT_CTX **pmont, int lock, - const BIGNUM *mod, BN_CTX *ctx); - -/* BN_BLINDING flags */ -# define BN_BLINDING_NO_UPDATE 0x00000001 -# define BN_BLINDING_NO_RECREATE 0x00000002 - -BN_BLINDING *BN_BLINDING_new(const BIGNUM *A, const BIGNUM *Ai, BIGNUM *mod); -void BN_BLINDING_free(BN_BLINDING *b); -int BN_BLINDING_update(BN_BLINDING *b, BN_CTX *ctx); -int BN_BLINDING_convert(BIGNUM *n, BN_BLINDING *b, BN_CTX *ctx); -int BN_BLINDING_invert(BIGNUM *n, BN_BLINDING *b, BN_CTX *ctx); -int BN_BLINDING_convert_ex(BIGNUM *n, BIGNUM *r, BN_BLINDING *b, BN_CTX *); -int BN_BLINDING_invert_ex(BIGNUM *n, const BIGNUM *r, BN_BLINDING *b, - BN_CTX *); -# ifndef OPENSSL_NO_DEPRECATED -unsigned long BN_BLINDING_get_thread_id(const BN_BLINDING *); -void BN_BLINDING_set_thread_id(BN_BLINDING *, unsigned long); -# endif -CRYPTO_THREADID *BN_BLINDING_thread_id(BN_BLINDING *); -unsigned long BN_BLINDING_get_flags(const BN_BLINDING *); -void BN_BLINDING_set_flags(BN_BLINDING *, unsigned long); -BN_BLINDING *BN_BLINDING_create_param(BN_BLINDING *b, - const BIGNUM *e, BIGNUM *m, BN_CTX *ctx, - int (*bn_mod_exp) (BIGNUM *r, - const BIGNUM *a, - const BIGNUM *p, - const BIGNUM *m, - BN_CTX *ctx, - BN_MONT_CTX *m_ctx), - BN_MONT_CTX *m_ctx); - -# ifndef OPENSSL_NO_DEPRECATED -void BN_set_params(int mul, int high, int low, int mont); -int BN_get_params(int which); /* 0, mul, 1 high, 2 low, 3 mont */ -# endif - -void BN_RECP_CTX_init(BN_RECP_CTX *recp); -BN_RECP_CTX *BN_RECP_CTX_new(void); -void BN_RECP_CTX_free(BN_RECP_CTX *recp); -int BN_RECP_CTX_set(BN_RECP_CTX *recp, const BIGNUM *rdiv, BN_CTX *ctx); -int BN_mod_mul_reciprocal(BIGNUM *r, const BIGNUM *x, const BIGNUM *y, - BN_RECP_CTX *recp, BN_CTX *ctx); -int BN_mod_exp_recp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, - const BIGNUM *m, BN_CTX *ctx); -int BN_div_recp(BIGNUM *dv, BIGNUM *rem, const BIGNUM *m, - BN_RECP_CTX *recp, BN_CTX *ctx); - -# ifndef OPENSSL_NO_EC2M - -/* - * Functions for arithmetic over binary polynomials represented by BIGNUMs. - * The BIGNUM::neg property of BIGNUMs representing binary polynomials is - * ignored. Note that input arguments are not const so that their bit arrays - * can be expanded to the appropriate size if needed. - */ - -/* - * r = a + b - */ -int BN_GF2m_add(BIGNUM *r, const BIGNUM *a, const BIGNUM *b); -# define BN_GF2m_sub(r, a, b) BN_GF2m_add(r, a, b) -/* - * r=a mod p - */ -int BN_GF2m_mod(BIGNUM *r, const BIGNUM *a, const BIGNUM *p); -/* r = (a * b) mod p */ -int BN_GF2m_mod_mul(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, - const BIGNUM *p, BN_CTX *ctx); -/* r = (a * a) mod p */ -int BN_GF2m_mod_sqr(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx); -/* r = (1 / b) mod p */ -int BN_GF2m_mod_inv(BIGNUM *r, const BIGNUM *b, const BIGNUM *p, BN_CTX *ctx); -/* r = (a / b) mod p */ -int BN_GF2m_mod_div(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, - const BIGNUM *p, BN_CTX *ctx); -/* r = (a ^ b) mod p */ -int BN_GF2m_mod_exp(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, - const BIGNUM *p, BN_CTX *ctx); -/* r = sqrt(a) mod p */ -int BN_GF2m_mod_sqrt(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, - BN_CTX *ctx); -/* r^2 + r = a mod p */ -int BN_GF2m_mod_solve_quad(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, - BN_CTX *ctx); -# define BN_GF2m_cmp(a, b) BN_ucmp((a), (b)) -/*- - * Some functions allow for representation of the irreducible polynomials - * as an unsigned int[], say p. The irreducible f(t) is then of the form: - * t^p[0] + t^p[1] + ... + t^p[k] - * where m = p[0] > p[1] > ... > p[k] = 0. - */ -/* r = a mod p */ -int BN_GF2m_mod_arr(BIGNUM *r, const BIGNUM *a, const int p[]); -/* r = (a * b) mod p */ -int BN_GF2m_mod_mul_arr(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, - const int p[], BN_CTX *ctx); -/* r = (a * a) mod p */ -int BN_GF2m_mod_sqr_arr(BIGNUM *r, const BIGNUM *a, const int p[], - BN_CTX *ctx); -/* r = (1 / b) mod p */ -int BN_GF2m_mod_inv_arr(BIGNUM *r, const BIGNUM *b, const int p[], - BN_CTX *ctx); -/* r = (a / b) mod p */ -int BN_GF2m_mod_div_arr(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, - const int p[], BN_CTX *ctx); -/* r = (a ^ b) mod p */ -int BN_GF2m_mod_exp_arr(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, - const int p[], BN_CTX *ctx); -/* r = sqrt(a) mod p */ -int BN_GF2m_mod_sqrt_arr(BIGNUM *r, const BIGNUM *a, - const int p[], BN_CTX *ctx); -/* r^2 + r = a mod p */ -int BN_GF2m_mod_solve_quad_arr(BIGNUM *r, const BIGNUM *a, - const int p[], BN_CTX *ctx); -int BN_GF2m_poly2arr(const BIGNUM *a, int p[], int max); -int BN_GF2m_arr2poly(const int p[], BIGNUM *a); - -# endif - -/* - * faster mod functions for the 'NIST primes' 0 <= a < p^2 - */ -int BN_nist_mod_192(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx); -int BN_nist_mod_224(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx); -int BN_nist_mod_256(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx); -int BN_nist_mod_384(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx); -int BN_nist_mod_521(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx); - -const BIGNUM *BN_get0_nist_prime_192(void); -const BIGNUM *BN_get0_nist_prime_224(void); -const BIGNUM *BN_get0_nist_prime_256(void); -const BIGNUM *BN_get0_nist_prime_384(void); -const BIGNUM *BN_get0_nist_prime_521(void); - -/* library internal functions */ - -# define bn_expand(a,bits) \ - ( \ - bits > (INT_MAX - BN_BITS2 + 1) ? \ - NULL \ - : \ - (((bits+BN_BITS2-1)/BN_BITS2) <= (a)->dmax) ? \ - (a) \ - : \ - bn_expand2((a),(bits+BN_BITS2-1)/BN_BITS2) \ - ) - -# define bn_wexpand(a,words) (((words) <= (a)->dmax)?(a):bn_expand2((a),(words))) -BIGNUM *bn_expand2(BIGNUM *a, int words); -# ifndef OPENSSL_NO_DEPRECATED -BIGNUM *bn_dup_expand(const BIGNUM *a, int words); /* unused */ -# endif - -/*- - * Bignum consistency macros - * There is one "API" macro, bn_fix_top(), for stripping leading zeroes from - * bignum data after direct manipulations on the data. There is also an - * "internal" macro, bn_check_top(), for verifying that there are no leading - * zeroes. Unfortunately, some auditing is required due to the fact that - * bn_fix_top() has become an overabused duct-tape because bignum data is - * occasionally passed around in an inconsistent state. So the following - * changes have been made to sort this out; - * - bn_fix_top()s implementation has been moved to bn_correct_top() - * - if BN_DEBUG isn't defined, bn_fix_top() maps to bn_correct_top(), and - * bn_check_top() is as before. - * - if BN_DEBUG *is* defined; - * - bn_check_top() tries to pollute unused words even if the bignum 'top' is - * consistent. (ed: only if BN_DEBUG_RAND is defined) - * - bn_fix_top() maps to bn_check_top() rather than "fixing" anything. - * The idea is to have debug builds flag up inconsistent bignums when they - * occur. If that occurs in a bn_fix_top(), we examine the code in question; if - * the use of bn_fix_top() was appropriate (ie. it follows directly after code - * that manipulates the bignum) it is converted to bn_correct_top(), and if it - * was not appropriate, we convert it permanently to bn_check_top() and track - * down the cause of the bug. Eventually, no internal code should be using the - * bn_fix_top() macro. External applications and libraries should try this with - * their own code too, both in terms of building against the openssl headers - * with BN_DEBUG defined *and* linking with a version of OpenSSL built with it - * defined. This not only improves external code, it provides more test - * coverage for openssl's own code. - */ - -# ifdef BN_DEBUG - -/* We only need assert() when debugging */ -# include <assert.h> - -# ifdef BN_DEBUG_RAND -/* To avoid "make update" cvs wars due to BN_DEBUG, use some tricks */ -# ifndef RAND_pseudo_bytes -int RAND_pseudo_bytes(unsigned char *buf, int num); -# define BN_DEBUG_TRIX -# endif -# define bn_pollute(a) \ - do { \ - const BIGNUM *_bnum1 = (a); \ - if(_bnum1->top < _bnum1->dmax) { \ - unsigned char _tmp_char; \ - /* We cast away const without the compiler knowing, any \ - * *genuinely* constant variables that aren't mutable \ - * wouldn't be constructed with top!=dmax. */ \ - BN_ULONG *_not_const; \ - memcpy(&_not_const, &_bnum1->d, sizeof(BN_ULONG*)); \ - /* Debug only - safe to ignore error return */ \ - RAND_pseudo_bytes(&_tmp_char, 1); \ - memset((unsigned char *)(_not_const + _bnum1->top), _tmp_char, \ - (_bnum1->dmax - _bnum1->top) * sizeof(BN_ULONG)); \ - } \ - } while(0) -# ifdef BN_DEBUG_TRIX -# undef RAND_pseudo_bytes -# endif -# else -# define bn_pollute(a) -# endif -# define bn_check_top(a) \ - do { \ - const BIGNUM *_bnum2 = (a); \ - if (_bnum2 != NULL) { \ - assert((_bnum2->top == 0) || \ - (_bnum2->d[_bnum2->top - 1] != 0)); \ - bn_pollute(_bnum2); \ - } \ - } while(0) - -# define bn_fix_top(a) bn_check_top(a) - -# define bn_check_size(bn, bits) bn_wcheck_size(bn, ((bits+BN_BITS2-1))/BN_BITS2) -# define bn_wcheck_size(bn, words) \ - do { \ - const BIGNUM *_bnum2 = (bn); \ - assert((words) <= (_bnum2)->dmax && (words) >= (_bnum2)->top); \ - /* avoid unused variable warning with NDEBUG */ \ - (void)(_bnum2); \ - } while(0) - -# else /* !BN_DEBUG */ - -# define bn_pollute(a) -# define bn_check_top(a) -# define bn_fix_top(a) bn_correct_top(a) -# define bn_check_size(bn, bits) -# define bn_wcheck_size(bn, words) - -# endif - -# define bn_correct_top(a) \ - { \ - BN_ULONG *ftl; \ - int tmp_top = (a)->top; \ - if (tmp_top > 0) \ - { \ - for (ftl= &((a)->d[tmp_top-1]); tmp_top > 0; tmp_top--) \ - if (*(ftl--)) break; \ - (a)->top = tmp_top; \ - } \ - if ((a)->top == 0) \ - (a)->neg = 0; \ - bn_pollute(a); \ - } - -BN_ULONG bn_mul_add_words(BN_ULONG *rp, const BN_ULONG *ap, int num, - BN_ULONG w); -BN_ULONG bn_mul_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w); -void bn_sqr_words(BN_ULONG *rp, const BN_ULONG *ap, int num); -BN_ULONG bn_div_words(BN_ULONG h, BN_ULONG l, BN_ULONG d); -BN_ULONG bn_add_words(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, - int num); -BN_ULONG bn_sub_words(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, - int num); - -/* Primes from RFC 2409 */ -BIGNUM *get_rfc2409_prime_768(BIGNUM *bn); -BIGNUM *get_rfc2409_prime_1024(BIGNUM *bn); - -/* Primes from RFC 3526 */ -BIGNUM *get_rfc3526_prime_1536(BIGNUM *bn); -BIGNUM *get_rfc3526_prime_2048(BIGNUM *bn); -BIGNUM *get_rfc3526_prime_3072(BIGNUM *bn); -BIGNUM *get_rfc3526_prime_4096(BIGNUM *bn); -BIGNUM *get_rfc3526_prime_6144(BIGNUM *bn); -BIGNUM *get_rfc3526_prime_8192(BIGNUM *bn); - -int BN_bntest_rand(BIGNUM *rnd, int bits, int top, int bottom); - -/* BEGIN ERROR CODES */ -/* - * The following lines are auto generated by the script mkerr.pl. Any changes - * made after this point may be overwritten when the script is next run. - */ -void ERR_load_BN_strings(void); - -/* Error codes for the BN functions. */ - -/* Function codes. */ -# define BN_F_BNRAND 127 -# define BN_F_BN_BLINDING_CONVERT_EX 100 -# define BN_F_BN_BLINDING_CREATE_PARAM 128 -# define BN_F_BN_BLINDING_INVERT_EX 101 -# define BN_F_BN_BLINDING_NEW 102 -# define BN_F_BN_BLINDING_UPDATE 103 -# define BN_F_BN_BN2DEC 104 -# define BN_F_BN_BN2HEX 105 -# define BN_F_BN_CTX_GET 116 -# define BN_F_BN_CTX_NEW 106 -# define BN_F_BN_CTX_START 129 -# define BN_F_BN_DIV 107 -# define BN_F_BN_DIV_NO_BRANCH 138 -# define BN_F_BN_DIV_RECP 130 -# define BN_F_BN_EXP 123 -# define BN_F_BN_EXPAND2 108 -# define BN_F_BN_EXPAND_INTERNAL 120 -# define BN_F_BN_GF2M_MOD 131 -# define BN_F_BN_GF2M_MOD_EXP 132 -# define BN_F_BN_GF2M_MOD_MUL 133 -# define BN_F_BN_GF2M_MOD_SOLVE_QUAD 134 -# define BN_F_BN_GF2M_MOD_SOLVE_QUAD_ARR 135 -# define BN_F_BN_GF2M_MOD_SQR 136 -# define BN_F_BN_GF2M_MOD_SQRT 137 -# define BN_F_BN_LSHIFT 145 -# define BN_F_BN_MOD_EXP2_MONT 118 -# define BN_F_BN_MOD_EXP_MONT 109 -# define BN_F_BN_MOD_EXP_MONT_CONSTTIME 124 -# define BN_F_BN_MOD_EXP_MONT_WORD 117 -# define BN_F_BN_MOD_EXP_RECP 125 -# define BN_F_BN_MOD_EXP_SIMPLE 126 -# define BN_F_BN_MOD_INVERSE 110 -# define BN_F_BN_MOD_INVERSE_NO_BRANCH 139 -# define BN_F_BN_MOD_LSHIFT_QUICK 119 -# define BN_F_BN_MOD_MUL_RECIPROCAL 111 -# define BN_F_BN_MOD_SQRT 121 -# define BN_F_BN_MPI2BN 112 -# define BN_F_BN_NEW 113 -# define BN_F_BN_RAND 114 -# define BN_F_BN_RAND_RANGE 122 -# define BN_F_BN_RSHIFT 146 -# define BN_F_BN_USUB 115 - -/* Reason codes. */ -# define BN_R_ARG2_LT_ARG3 100 -# define BN_R_BAD_RECIPROCAL 101 -# define BN_R_BIGNUM_TOO_LONG 114 -# define BN_R_BITS_TOO_SMALL 118 -# define BN_R_CALLED_WITH_EVEN_MODULUS 102 -# define BN_R_DIV_BY_ZERO 103 -# define BN_R_ENCODING_ERROR 104 -# define BN_R_EXPAND_ON_STATIC_BIGNUM_DATA 105 -# define BN_R_INPUT_NOT_REDUCED 110 -# define BN_R_INVALID_LENGTH 106 -# define BN_R_INVALID_RANGE 115 -# define BN_R_INVALID_SHIFT 119 -# define BN_R_NOT_A_SQUARE 111 -# define BN_R_NOT_INITIALIZED 107 -# define BN_R_NO_INVERSE 108 -# define BN_R_NO_SOLUTION 116 -# define BN_R_P_IS_NOT_PRIME 112 -# define BN_R_TOO_MANY_ITERATIONS 113 -# define BN_R_TOO_MANY_TEMPORARY_VARIABLES 109 - -#ifdef __cplusplus -} -#endif -#endif diff --git a/crypto/bn/bn.mul b/crypto/bn/bn.mul deleted file mode 100644 index 9728870d38ae..000000000000 --- a/crypto/bn/bn.mul +++ /dev/null @@ -1,19 +0,0 @@ -We need - -* bn_mul_comba8 -* bn_mul_comba4 -* bn_mul_normal -* bn_mul_recursive - -* bn_sqr_comba8 -* bn_sqr_comba4 -bn_sqr_normal -> BN_sqr -* bn_sqr_recursive - -* bn_mul_low_recursive -* bn_mul_low_normal -* bn_mul_high - -* bn_mul_part_recursive # symetric but not power of 2 - -bn_mul_asymetric_recursive # uneven, but do the chop up. diff --git a/crypto/bn/bn_add.c b/crypto/bn/bn_add.c index 2f3d11044990..f2736b8f6d41 100644 --- a/crypto/bn/bn_add.c +++ b/crypto/bn/bn_add.c @@ -1,119 +1,90 @@ -/* crypto/bn/bn_add.c */ -/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) - * All rights reserved. +/* + * Copyright 1995-2018 The OpenSSL Project Authors. All Rights Reserved. * - * This package is an SSL implementation written - * by Eric Young (eay@cryptsoft.com). - * The implementation was written so as to conform with Netscapes SSL. - * - * This library is free for commercial and non-commercial use as long as - * the following conditions are aheared to. The following conditions - * apply to all code found in this distribution, be it the RC4, RSA, - * lhash, DES, etc., code; not just the SSL code. The SSL documentation - * included with this distribution is covered by the same copyright terms - * except that the holder is Tim Hudson (tjh@cryptsoft.com). - * - * Copyright remains Eric Young's, and as such any Copyright notices in - * the code are not to be removed. - * If this package is used in a product, Eric Young should be given attribution - * as the author of the parts of the library used. - * This can be in the form of a textual message at program startup or - * in documentation (online or textual) provided with the package. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * "This product includes cryptographic software written by - * Eric Young (eay@cryptsoft.com)" - * The word 'cryptographic' can be left out if the rouines from the library - * being used are not cryptographic related :-). - * 4. If you include any Windows specific code (or a derivative thereof) from - * the apps directory (application code) you must include an acknowledgement: - * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)" - * - * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * The licence and distribution terms for any publically available version or - * derivative of this code cannot be changed. i.e. this code cannot simply be - * copied and put under another distribution licence - * [including the GNU Public Licence.] + * Licensed under the OpenSSL license (the "License"). You may not use + * this file except in compliance with the License. You can obtain a copy + * in the file LICENSE in the source distribution or at + * https://www.openssl.org/source/license.html */ -#include <stdio.h> -#include "cryptlib.h" +#include "internal/cryptlib.h" #include "bn_lcl.h" -/* r can == a or b */ +/* signed add of b to a. */ int BN_add(BIGNUM *r, const BIGNUM *a, const BIGNUM *b) { - const BIGNUM *tmp; - int a_neg = a->neg, ret; + int ret, r_neg, cmp_res; bn_check_top(a); bn_check_top(b); - /*- - * a + b a+b - * a + -b a-b - * -a + b b-a - * -a + -b -(a+b) - */ - if (a_neg ^ b->neg) { - /* only one is negative */ - if (a_neg) { - tmp = a; - a = b; - b = tmp; + if (a->neg == b->neg) { + r_neg = a->neg; + ret = BN_uadd(r, a, b); + } else { + cmp_res = BN_ucmp(a, b); + if (cmp_res > 0) { + r_neg = a->neg; + ret = BN_usub(r, a, b); + } else if (cmp_res < 0) { + r_neg = b->neg; + ret = BN_usub(r, b, a); + } else { + r_neg = 0; + BN_zero(r); + ret = 1; } + } - /* we are now a - b */ + r->neg = r_neg; + bn_check_top(r); + return ret; +} - if (BN_ucmp(a, b) < 0) { - if (!BN_usub(r, b, a)) - return (0); - r->neg = 1; +/* signed sub of b from a. */ +int BN_sub(BIGNUM *r, const BIGNUM *a, const BIGNUM *b) +{ + int ret, r_neg, cmp_res; + + bn_check_top(a); + bn_check_top(b); + + if (a->neg != b->neg) { + r_neg = a->neg; + ret = BN_uadd(r, a, b); + } else { + cmp_res = BN_ucmp(a, b); + if (cmp_res > 0) { + r_neg = a->neg; + ret = BN_usub(r, a, b); + } else if (cmp_res < 0) { + r_neg = !b->neg; + ret = BN_usub(r, b, a); } else { - if (!BN_usub(r, a, b)) - return (0); - r->neg = 0; + r_neg = 0; + BN_zero(r); + ret = 1; } - return (1); } - ret = BN_uadd(r, a, b); - r->neg = a_neg; + r->neg = r_neg; bn_check_top(r); return ret; } -/* unsigned add of b to a */ +/* unsigned add of b to a, r can be equal to a or b. */ int BN_uadd(BIGNUM *r, const BIGNUM *a, const BIGNUM *b) { int max, min, dif; - BN_ULONG *ap, *bp, *rp, carry, t1, t2; - const BIGNUM *tmp; + const BN_ULONG *ap, *bp; + BN_ULONG *rp, carry, t1, t2; bn_check_top(a); bn_check_top(b); if (a->top < b->top) { + const BIGNUM *tmp; + tmp = a; a = b; b = tmp; @@ -134,29 +105,17 @@ int BN_uadd(BIGNUM *r, const BIGNUM *a, const BIGNUM *b) carry = bn_add_words(rp, ap, bp, min); rp += min; ap += min; - bp += min; - if (carry) { - while (dif) { - dif--; - t1 = *(ap++); - t2 = (t1 + 1) & BN_MASK2; - *(rp++) = t2; - if (t2) { - carry = 0; - break; - } - } - if (carry) { - /* carry != 0 => dif == 0 */ - *rp = 1; - r->top++; - } + while (dif) { + dif--; + t1 = *(ap++); + t2 = (t1 + carry) & BN_MASK2; + *(rp++) = t2; + carry &= (t2 == 0); } - if (dif && rp != ap) - while (dif--) - /* copy remaining words if ap != rp */ - *(rp++) = *(ap++); + *rp = carry; + r->top += carry; + r->neg = 0; bn_check_top(r); return 1; @@ -166,11 +125,8 @@ int BN_uadd(BIGNUM *r, const BIGNUM *a, const BIGNUM *b) int BN_usub(BIGNUM *r, const BIGNUM *a, const BIGNUM *b) { int max, min, dif; - register BN_ULONG t1, t2, *ap, *bp, *rp; - int i, carry; -#if defined(IRIX_CC_BUG) && !defined(LINT) - int dummy; -#endif + BN_ULONG t1, t2, borrow, *rp; + const BN_ULONG *ap, *bp; bn_check_top(a); bn_check_top(b); @@ -181,133 +137,35 @@ int BN_usub(BIGNUM *r, const BIGNUM *a, const BIGNUM *b) if (dif < 0) { /* hmm... should not be happening */ BNerr(BN_F_BN_USUB, BN_R_ARG2_LT_ARG3); - return (0); + return 0; } if (bn_wexpand(r, max) == NULL) - return (0); + return 0; ap = a->d; bp = b->d; rp = r->d; -#if 1 - carry = 0; - for (i = min; i != 0; i--) { - t1 = *(ap++); - t2 = *(bp++); - if (carry) { - carry = (t1 <= t2); - t1 = (t1 - t2 - 1) & BN_MASK2; - } else { - carry = (t1 < t2); - t1 = (t1 - t2) & BN_MASK2; - } -# if defined(IRIX_CC_BUG) && !defined(LINT) - dummy = t1; -# endif - *(rp++) = t1 & BN_MASK2; - } -#else - carry = bn_sub_words(rp, ap, bp, min); + borrow = bn_sub_words(rp, ap, bp, min); ap += min; - bp += min; rp += min; -#endif - if (carry) { /* subtracted */ - if (!dif) - /* error: a < b */ - return 0; - while (dif) { - dif--; - t1 = *(ap++); - t2 = (t1 - 1) & BN_MASK2; - *(rp++) = t2; - if (t1) - break; - } - } -#if 0 - memcpy(rp, ap, sizeof(*rp) * (max - i)); -#else - if (rp != ap) { - for (;;) { - if (!dif--) - break; - rp[0] = ap[0]; - if (!dif--) - break; - rp[1] = ap[1]; - if (!dif--) - break; - rp[2] = ap[2]; - if (!dif--) - break; - rp[3] = ap[3]; - rp += 4; - ap += 4; - } - } -#endif - - r->top = max; - r->neg = 0; - bn_correct_top(r); - return (1); -} - -int BN_sub(BIGNUM *r, const BIGNUM *a, const BIGNUM *b) -{ - int max; - int add = 0, neg = 0; - const BIGNUM *tmp; - - bn_check_top(a); - bn_check_top(b); - /*- - * a - b a-b - * a - -b a+b - * -a - b -(a+b) - * -a - -b b-a - */ - if (a->neg) { - if (b->neg) { - tmp = a; - a = b; - b = tmp; - } else { - add = 1; - neg = 1; - } - } else { - if (b->neg) { - add = 1; - neg = 0; - } + while (dif) { + dif--; + t1 = *(ap++); + t2 = (t1 - borrow) & BN_MASK2; + *(rp++) = t2; + borrow &= (t1 == 0); } - if (add) { - if (!BN_uadd(r, a, b)) - return (0); - r->neg = neg; - return (1); - } + while (max && *--rp == 0) + max--; - /* We are actually doing a - b :-) */ + r->top = max; + r->neg = 0; + bn_pollute(r); - max = (a->top > b->top) ? a->top : b->top; - if (bn_wexpand(r, max) == NULL) - return (0); - if (BN_ucmp(a, b) < 0) { - if (!BN_usub(r, b, a)) - return (0); - r->neg = 1; - } else { - if (!BN_usub(r, a, b)) - return (0); - r->neg = 0; - } - bn_check_top(r); - return (1); + return 1; } + diff --git a/crypto/bn/bn_asm.c b/crypto/bn/bn_asm.c index 03a33cffe5ee..729b2480acd1 100644 --- a/crypto/bn/bn_asm.c +++ b/crypto/bn/bn_asm.c @@ -1,69 +1,15 @@ -/* crypto/bn/bn_asm.c */ -/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) - * All rights reserved. - * - * This package is an SSL implementation written - * by Eric Young (eay@cryptsoft.com). - * The implementation was written so as to conform with Netscapes SSL. - * - * This library is free for commercial and non-commercial use as long as - * the following conditions are aheared to. The following conditions - * apply to all code found in this distribution, be it the RC4, RSA, - * lhash, DES, etc., code; not just the SSL code. The SSL documentation - * included with this distribution is covered by the same copyright terms - * except that the holder is Tim Hudson (tjh@cryptsoft.com). - * - * Copyright remains Eric Young's, and as such any Copyright notices in - * the code are not to be removed. - * If this package is used in a product, Eric Young should be given attribution - * as the author of the parts of the library used. - * This can be in the form of a textual message at program startup or - * in documentation (online or textual) provided with the package. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * "This product includes cryptographic software written by - * Eric Young (eay@cryptsoft.com)" - * The word 'cryptographic' can be left out if the rouines from the library - * being used are not cryptographic related :-). - * 4. If you include any Windows specific code (or a derivative thereof) from - * the apps directory (application code) you must include an acknowledgement: - * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)" - * - * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. +/* + * Copyright 1995-2016 The OpenSSL Project Authors. All Rights Reserved. * - * The licence and distribution terms for any publically available version or - * derivative of this code cannot be changed. i.e. this code cannot simply be - * copied and put under another distribution licence - * [including the GNU Public Licence.] + * Licensed under the OpenSSL license (the "License"). You may not use + * this file except in compliance with the License. You can obtain a copy + * in the file LICENSE in the source distribution or at + * https://www.openssl.org/source/license.html */ -#ifndef BN_DEBUG -# undef NDEBUG /* avoid conflicting definitions */ -# define NDEBUG -#endif - -#include <stdio.h> #include <assert.h> -#include "cryptlib.h" +#include <openssl/crypto.h> +#include "internal/cryptlib.h" #include "bn_lcl.h" #if defined(BN_LLONG) || defined(BN_UMULT_HIGH) @@ -75,7 +21,7 @@ BN_ULONG bn_mul_add_words(BN_ULONG *rp, const BN_ULONG *ap, int num, assert(num >= 0); if (num <= 0) - return (c1); + return c1; # ifndef OPENSSL_SMALL_FOOTPRINT while (num & ~3) { @@ -95,7 +41,7 @@ BN_ULONG bn_mul_add_words(BN_ULONG *rp, const BN_ULONG *ap, int num, num--; } - return (c1); + return c1; } BN_ULONG bn_mul_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w) @@ -104,7 +50,7 @@ BN_ULONG bn_mul_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w) assert(num >= 0); if (num <= 0) - return (c1); + return c1; # ifndef OPENSSL_SMALL_FOOTPRINT while (num & ~3) { @@ -123,7 +69,7 @@ BN_ULONG bn_mul_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w) rp++; num--; } - return (c1); + return c1; } void bn_sqr_words(BN_ULONG *r, const BN_ULONG *a, int n) @@ -162,7 +108,7 @@ BN_ULONG bn_mul_add_words(BN_ULONG *rp, const BN_ULONG *ap, int num, assert(num >= 0); if (num <= 0) - return ((BN_ULONG)0); + return (BN_ULONG)0; bl = LBITS(w); bh = HBITS(w); @@ -184,7 +130,7 @@ BN_ULONG bn_mul_add_words(BN_ULONG *rp, const BN_ULONG *ap, int num, rp++; num--; } - return (c); + return c; } BN_ULONG bn_mul_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w) @@ -194,7 +140,7 @@ BN_ULONG bn_mul_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w) assert(num >= 0); if (num <= 0) - return ((BN_ULONG)0); + return (BN_ULONG)0; bl = LBITS(w); bh = HBITS(w); @@ -216,7 +162,7 @@ BN_ULONG bn_mul_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w) rp++; num--; } - return (carry); + return carry; } void bn_sqr_words(BN_ULONG *r, const BN_ULONG *a, int n) @@ -264,7 +210,7 @@ BN_ULONG bn_div_words(BN_ULONG h, BN_ULONG l, BN_ULONG d) int i, count = 2; if (d == 0) - return (BN_MASK2); + return BN_MASK2; i = BN_num_bits_word(d); assert((i == BN_BITS2) || (h <= (BN_ULONG)1 << i)); @@ -318,7 +264,7 @@ BN_ULONG bn_div_words(BN_ULONG h, BN_ULONG l, BN_ULONG d) l = (l & BN_MASK2l) << BN_BITS4; } ret |= q; - return (ret); + return ret; } #endif /* !defined(BN_LLONG) && defined(BN_DIV2W) */ @@ -330,7 +276,7 @@ BN_ULONG bn_add_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, assert(n >= 0); if (n <= 0) - return ((BN_ULONG)0); + return (BN_ULONG)0; # ifndef OPENSSL_SMALL_FOOTPRINT while (n & ~3) { @@ -361,7 +307,7 @@ BN_ULONG bn_add_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, r++; n--; } - return ((BN_ULONG)ll); + return (BN_ULONG)ll; } #else /* !BN_LLONG */ BN_ULONG bn_add_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, @@ -371,7 +317,7 @@ BN_ULONG bn_add_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, assert(n >= 0); if (n <= 0) - return ((BN_ULONG)0); + return (BN_ULONG)0; c = 0; # ifndef OPENSSL_SMALL_FOOTPRINT @@ -418,7 +364,7 @@ BN_ULONG bn_add_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, r++; n--; } - return ((BN_ULONG)c); + return (BN_ULONG)c; } #endif /* !BN_LLONG */ @@ -430,7 +376,7 @@ BN_ULONG bn_sub_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, assert(n >= 0); if (n <= 0) - return ((BN_ULONG)0); + return (BN_ULONG)0; #ifndef OPENSSL_SMALL_FOOTPRINT while (n & ~3) { @@ -471,7 +417,7 @@ BN_ULONG bn_sub_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, r++; n--; } - return (c); + return c; } #if defined(BN_MUL_COMBA) && !defined(OPENSSL_SMALL_FOOTPRINT) @@ -1005,13 +951,13 @@ int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, /* hmm... is it faster just to do a multiply? */ # undef bn_sqr_comba4 +# undef bn_sqr_comba8 void bn_sqr_comba4(BN_ULONG *r, const BN_ULONG *a) { BN_ULONG t[8]; bn_sqr_normal(r, a, 4, t); } -# undef bn_sqr_comba8 void bn_sqr_comba8(BN_ULONG *r, const BN_ULONG *a) { BN_ULONG t[16]; diff --git a/crypto/bn/bn_blind.c b/crypto/bn/bn_blind.c index d448daa3c77c..450cdfb34866 100644 --- a/crypto/bn/bn_blind.c +++ b/crypto/bn/bn_blind.c @@ -1,116 +1,14 @@ -/* crypto/bn/bn_blind.c */ -/* ==================================================================== - * Copyright (c) 1998-2006 The OpenSSL Project. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * - * 3. All advertising materials mentioning features or use of this - * software must display the following acknowledgment: - * "This product includes software developed by the OpenSSL Project - * for use in the OpenSSL Toolkit. (http://www.openssl.org/)" - * - * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to - * endorse or promote products derived from this software without - * prior written permission. For written permission, please contact - * openssl-core@openssl.org. - * - * 5. Products derived from this software may not be called "OpenSSL" - * nor may "OpenSSL" appear in their names without prior written - * permission of the OpenSSL Project. - * - * 6. Redistributions of any form whatsoever must retain the following - * acknowledgment: - * "This product includes software developed by the OpenSSL Project - * for use in the OpenSSL Toolkit (http://www.openssl.org/)" - * - * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY - * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR - * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, - * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED - * OF THE POSSIBILITY OF SUCH DAMAGE. - * ==================================================================== - * - * This product includes cryptographic software written by Eric Young - * (eay@cryptsoft.com). This product includes software written by Tim - * Hudson (tjh@cryptsoft.com). +/* + * Copyright 1998-2018 The OpenSSL Project Authors. All Rights Reserved. * - */ -/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) - * All rights reserved. - * - * This package is an SSL implementation written - * by Eric Young (eay@cryptsoft.com). - * The implementation was written so as to conform with Netscapes SSL. - * - * This library is free for commercial and non-commercial use as long as - * the following conditions are aheared to. The following conditions - * apply to all code found in this distribution, be it the RC4, RSA, - * lhash, DES, etc., code; not just the SSL code. The SSL documentation - * included with this distribution is covered by the same copyright terms - * except that the holder is Tim Hudson (tjh@cryptsoft.com). - * - * Copyright remains Eric Young's, and as such any Copyright notices in - * the code are not to be removed. - * If this package is used in a product, Eric Young should be given attribution - * as the author of the parts of the library used. - * This can be in the form of a textual message at program startup or - * in documentation (online or textual) provided with the package. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * "This product includes cryptographic software written by - * Eric Young (eay@cryptsoft.com)" - * The word 'cryptographic' can be left out if the rouines from the library - * being used are not cryptographic related :-). - * 4. If you include any Windows specific code (or a derivative thereof) from - * the apps directory (application code) you must include an acknowledgement: - * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)" - * - * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * The licence and distribution terms for any publically available version or - * derivative of this code cannot be changed. i.e. this code cannot simply be - * copied and put under another distribution licence - * [including the GNU Public Licence.] + * Licensed under the OpenSSL license (the "License"). You may not use + * this file except in compliance with the License. You can obtain a copy + * in the file LICENSE in the source distribution or at + * https://www.openssl.org/source/license.html */ -#include <stdio.h> -#include "cryptlib.h" +#include <openssl/opensslconf.h> +#include "internal/cryptlib.h" #include "bn_lcl.h" #define BN_BLINDING_COUNTER 32 @@ -120,16 +18,13 @@ struct bn_blinding_st { BIGNUM *Ai; BIGNUM *e; BIGNUM *mod; /* just a reference */ -#ifndef OPENSSL_NO_DEPRECATED - unsigned long thread_id; /* added in OpenSSL 0.9.6j and 0.9.7b; used - * only by crypto/rsa/rsa_eay.c, rsa_lib.c */ -#endif - CRYPTO_THREADID tid; + CRYPTO_THREAD_ID tid; int counter; unsigned long flags; BN_MONT_CTX *m_ctx; int (*bn_mod_exp) (BIGNUM *r, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m, BN_CTX *ctx, BN_MONT_CTX *m_ctx); + CRYPTO_RWLOCK *lock; }; BN_BLINDING *BN_BLINDING_new(const BIGNUM *A, const BIGNUM *Ai, BIGNUM *mod) @@ -138,15 +33,25 @@ BN_BLINDING *BN_BLINDING_new(const BIGNUM *A, const BIGNUM *Ai, BIGNUM *mod) bn_check_top(mod); - if ((ret = (BN_BLINDING *)OPENSSL_malloc(sizeof(BN_BLINDING))) == NULL) { + if ((ret = OPENSSL_zalloc(sizeof(*ret))) == NULL) { BNerr(BN_F_BN_BLINDING_NEW, ERR_R_MALLOC_FAILURE); - return (NULL); + return NULL; } - memset(ret, 0, sizeof(BN_BLINDING)); + + ret->lock = CRYPTO_THREAD_lock_new(); + if (ret->lock == NULL) { + BNerr(BN_F_BN_BLINDING_NEW, ERR_R_MALLOC_FAILURE); + OPENSSL_free(ret); + return NULL; + } + + BN_BLINDING_set_current_thread(ret); + if (A != NULL) { if ((ret->A = BN_dup(A)) == NULL) goto err; } + if (Ai != NULL) { if ((ret->Ai = BN_dup(Ai)) == NULL) goto err; @@ -155,6 +60,7 @@ BN_BLINDING *BN_BLINDING_new(const BIGNUM *A, const BIGNUM *Ai, BIGNUM *mod) /* save a copy of mod in the BN_BLINDING structure */ if ((ret->mod = BN_dup(mod)) == NULL) goto err; + if (BN_get_flags(mod, BN_FLG_CONSTTIME) != 0) BN_set_flags(ret->mod, BN_FLG_CONSTTIME); @@ -164,27 +70,23 @@ BN_BLINDING *BN_BLINDING_new(const BIGNUM *A, const BIGNUM *Ai, BIGNUM *mod) * use. */ ret->counter = -1; - CRYPTO_THREADID_current(&ret->tid); - return (ret); + + return ret; + err: - if (ret != NULL) - BN_BLINDING_free(ret); - return (NULL); + BN_BLINDING_free(ret); + return NULL; } void BN_BLINDING_free(BN_BLINDING *r) { if (r == NULL) return; - - if (r->A != NULL) - BN_free(r->A); - if (r->Ai != NULL) - BN_free(r->Ai); - if (r->e != NULL) - BN_free(r->e); - if (r->mod != NULL) - BN_free(r->mod); + BN_free(r->A); + BN_free(r->Ai); + BN_free(r->e); + BN_free(r->mod); + CRYPTO_THREAD_lock_free(r->lock); OPENSSL_free(r); } @@ -206,17 +108,22 @@ int BN_BLINDING_update(BN_BLINDING *b, BN_CTX *ctx) if (!BN_BLINDING_create_param(b, NULL, NULL, ctx, NULL, NULL)) goto err; } else if (!(b->flags & BN_BLINDING_NO_UPDATE)) { - if (!BN_mod_mul(b->A, b->A, b->A, b->mod, ctx)) - goto err; - if (!BN_mod_mul(b->Ai, b->Ai, b->Ai, b->mod, ctx)) - goto err; + if (b->m_ctx != NULL) { + if (!bn_mul_mont_fixed_top(b->Ai, b->Ai, b->Ai, b->m_ctx, ctx) + || !bn_mul_mont_fixed_top(b->A, b->A, b->A, b->m_ctx, ctx)) + goto err; + } else { + if (!BN_mod_mul(b->Ai, b->Ai, b->Ai, b->mod, ctx) + || !BN_mod_mul(b->A, b->A, b->A, b->mod, ctx)) + goto err; + } } ret = 1; err: if (b->counter == BN_BLINDING_COUNTER) b->counter = 0; - return (ret); + return ret; } int BN_BLINDING_convert(BIGNUM *n, BN_BLINDING *b, BN_CTX *ctx) @@ -232,22 +139,22 @@ int BN_BLINDING_convert_ex(BIGNUM *n, BIGNUM *r, BN_BLINDING *b, BN_CTX *ctx) if ((b->A == NULL) || (b->Ai == NULL)) { BNerr(BN_F_BN_BLINDING_CONVERT_EX, BN_R_NOT_INITIALIZED); - return (0); + return 0; } if (b->counter == -1) /* Fresh blinding, doesn't need updating. */ b->counter = 0; else if (!BN_BLINDING_update(b, ctx)) - return (0); + return 0; - if (r != NULL) { - if (!BN_copy(r, b->Ai)) - ret = 0; - } + if (r != NULL && (BN_copy(r, b->Ai) == NULL)) + return 0; - if (!BN_mod_mul(n, n, b->A, b->mod, ctx)) - ret = 0; + if (b->m_ctx != NULL) + ret = BN_mod_mul_montgomery(n, n, b->A, b->m_ctx, ctx); + else + ret = BN_mod_mul(n, n, b->A, b->mod, ctx); return ret; } @@ -264,35 +171,53 @@ int BN_BLINDING_invert_ex(BIGNUM *n, const BIGNUM *r, BN_BLINDING *b, bn_check_top(n); - if (r != NULL) - ret = BN_mod_mul(n, n, r, b->mod, ctx); - else { - if (b->Ai == NULL) { - BNerr(BN_F_BN_BLINDING_INVERT_EX, BN_R_NOT_INITIALIZED); - return (0); + if (r == NULL && (r = b->Ai) == NULL) { + BNerr(BN_F_BN_BLINDING_INVERT_EX, BN_R_NOT_INITIALIZED); + return 0; + } + + if (b->m_ctx != NULL) { + /* ensure that BN_mod_mul_montgomery takes pre-defined path */ + if (n->dmax >= r->top) { + size_t i, rtop = r->top, ntop = n->top; + BN_ULONG mask; + + for (i = 0; i < rtop; i++) { + mask = (BN_ULONG)0 - ((i - ntop) >> (8 * sizeof(i) - 1)); + n->d[i] &= mask; + } + mask = (BN_ULONG)0 - ((rtop - ntop) >> (8 * sizeof(ntop) - 1)); + /* always true, if (rtop >= ntop) n->top = r->top; */ + n->top = (int)(rtop & ~mask) | (ntop & mask); + n->flags |= (BN_FLG_FIXED_TOP & ~mask); } - ret = BN_mod_mul(n, n, b->Ai, b->mod, ctx); + ret = BN_mod_mul_montgomery(n, n, r, b->m_ctx, ctx); + } else { + ret = BN_mod_mul(n, n, r, b->mod, ctx); } bn_check_top(n); - return (ret); + return ret; +} + +int BN_BLINDING_is_current_thread(BN_BLINDING *b) +{ + return CRYPTO_THREAD_compare_id(CRYPTO_THREAD_get_current_id(), b->tid); } -#ifndef OPENSSL_NO_DEPRECATED -unsigned long BN_BLINDING_get_thread_id(const BN_BLINDING *b) +void BN_BLINDING_set_current_thread(BN_BLINDING *b) { - return b->thread_id; + b->tid = CRYPTO_THREAD_get_current_id(); } -void BN_BLINDING_set_thread_id(BN_BLINDING *b, unsigned long n) +int BN_BLINDING_lock(BN_BLINDING *b) { - b->thread_id = n; + return CRYPTO_THREAD_write_lock(b->lock); } -#endif -CRYPTO_THREADID *BN_BLINDING_thread_id(BN_BLINDING *b) +int BN_BLINDING_unlock(BN_BLINDING *b) { - return &b->tid; + return CRYPTO_THREAD_unlock(b->lock); } unsigned long BN_BLINDING_get_flags(const BN_BLINDING *b) @@ -332,8 +257,7 @@ BN_BLINDING *BN_BLINDING_create_param(BN_BLINDING *b, goto err; if (e != NULL) { - if (ret->e != NULL) - BN_free(ret->e); + BN_free(ret->e); ret->e = BN_dup(e); } if (ret->e == NULL) @@ -345,38 +269,41 @@ BN_BLINDING *BN_BLINDING_create_param(BN_BLINDING *b, ret->m_ctx = m_ctx; do { - if (!BN_rand_range(ret->A, ret->mod)) + int rv; + if (!BN_priv_rand_range(ret->A, ret->mod)) goto err; - if (BN_mod_inverse(ret->Ai, ret->A, ret->mod, ctx) == NULL) { - /* - * this should almost never happen for good RSA keys - */ - unsigned long error = ERR_peek_last_error(); - if (ERR_GET_REASON(error) == BN_R_NO_INVERSE) { - if (retry_counter-- == 0) { - BNerr(BN_F_BN_BLINDING_CREATE_PARAM, - BN_R_TOO_MANY_ITERATIONS); - goto err; - } - ERR_clear_error(); - } else - goto err; - } else + if (int_bn_mod_inverse(ret->Ai, ret->A, ret->mod, ctx, &rv)) break; + + /* + * this should almost never happen for good RSA keys + */ + if (!rv) + goto err; + + if (retry_counter-- == 0) { + BNerr(BN_F_BN_BLINDING_CREATE_PARAM, BN_R_TOO_MANY_ITERATIONS); + goto err; + } } while (1); if (ret->bn_mod_exp != NULL && ret->m_ctx != NULL) { - if (!ret->bn_mod_exp - (ret->A, ret->A, ret->e, ret->mod, ctx, ret->m_ctx)) + if (!ret->bn_mod_exp(ret->A, ret->A, ret->e, ret->mod, ctx, ret->m_ctx)) goto err; } else { if (!BN_mod_exp(ret->A, ret->A, ret->e, ret->mod, ctx)) goto err; } + if (ret->m_ctx != NULL) { + if (!bn_to_mont_fixed_top(ret->Ai, ret->Ai, ret->m_ctx, ctx) + || !bn_to_mont_fixed_top(ret->A, ret->A, ret->m_ctx, ctx)) + goto err; + } + return ret; err: - if (b == NULL && ret != NULL) { + if (b == NULL) { BN_BLINDING_free(ret); ret = NULL; } diff --git a/crypto/bn/bn_const.c b/crypto/bn/bn_const.c index 12c3208c2492..39dd61202ad7 100755 --- a/crypto/bn/bn_const.c +++ b/crypto/bn/bn_const.c @@ -1,7 +1,13 @@ -/* crypto/bn/knownprimes.c */ -/* Insert boilerplate */ +/* + * Copyright 2005-2016 The OpenSSL Project Authors. All Rights Reserved. + * + * Licensed under the OpenSSL license (the "License"). You may not use + * this file except in compliance with the License. You can obtain a copy + * in the file LICENSE in the source distribution or at + * https://www.openssl.org/source/license.html + */ -#include "bn.h" +#include <openssl/bn.h> /*- * "First Oakley Default Group" from RFC2409, section 6.1. @@ -12,7 +18,7 @@ * RFC2412 specifies a generator of of 22. */ -BIGNUM *get_rfc2409_prime_768(BIGNUM *bn) +BIGNUM *BN_get_rfc2409_prime_768(BIGNUM *bn) { static const unsigned char RFC2409_PRIME_768[] = { 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, @@ -40,7 +46,7 @@ BIGNUM *get_rfc2409_prime_768(BIGNUM *bn) * RFC2412 specifies a generator of 22. */ -BIGNUM *get_rfc2409_prime_1024(BIGNUM *bn) +BIGNUM *BN_get_rfc2409_prime_1024(BIGNUM *bn) { static const unsigned char RFC2409_PRIME_1024[] = { 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, @@ -72,7 +78,7 @@ BIGNUM *get_rfc2409_prime_1024(BIGNUM *bn) * RFC2312 specifies a generator of 22. */ -BIGNUM *get_rfc3526_prime_1536(BIGNUM *bn) +BIGNUM *BN_get_rfc3526_prime_1536(BIGNUM *bn) { static const unsigned char RFC3526_PRIME_1536[] = { 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, @@ -111,7 +117,7 @@ BIGNUM *get_rfc3526_prime_1536(BIGNUM *bn) * RFC3526 specifies a generator of 2. */ -BIGNUM *get_rfc3526_prime_2048(BIGNUM *bn) +BIGNUM *BN_get_rfc3526_prime_2048(BIGNUM *bn) { static const unsigned char RFC3526_PRIME_2048[] = { 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, @@ -158,7 +164,7 @@ BIGNUM *get_rfc3526_prime_2048(BIGNUM *bn) * RFC3526 specifies a generator of 2. */ -BIGNUM *get_rfc3526_prime_3072(BIGNUM *bn) +BIGNUM *BN_get_rfc3526_prime_3072(BIGNUM *bn) { static const unsigned char RFC3526_PRIME_3072[] = { 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, @@ -221,7 +227,7 @@ BIGNUM *get_rfc3526_prime_3072(BIGNUM *bn) * RFC3526 specifies a generator of 2. */ -BIGNUM *get_rfc3526_prime_4096(BIGNUM *bn) +BIGNUM *BN_get_rfc3526_prime_4096(BIGNUM *bn) { static const unsigned char RFC3526_PRIME_4096[] = { 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, @@ -300,7 +306,7 @@ BIGNUM *get_rfc3526_prime_4096(BIGNUM *bn) * RFC3526 specifies a generator of 2. */ -BIGNUM *get_rfc3526_prime_6144(BIGNUM *bn) +BIGNUM *BN_get_rfc3526_prime_6144(BIGNUM *bn) { static const unsigned char RFC3526_PRIME_6144[] = { 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, @@ -411,7 +417,7 @@ BIGNUM *get_rfc3526_prime_6144(BIGNUM *bn) * RFC3526 specifies a generator of 2. */ -BIGNUM *get_rfc3526_prime_8192(BIGNUM *bn) +BIGNUM *BN_get_rfc3526_prime_8192(BIGNUM *bn) { static const unsigned char RFC3526_PRIME_8192[] = { 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, diff --git a/crypto/bn/bn_ctx.c b/crypto/bn/bn_ctx.c index 526c6a046d16..aa08b31a34bb 100644 --- a/crypto/bn/bn_ctx.c +++ b/crypto/bn/bn_ctx.c @@ -1,69 +1,13 @@ -/* crypto/bn/bn_ctx.c */ -/* Written by Ulf Moeller for the OpenSSL project. */ -/* ==================================================================== - * Copyright (c) 1998-2004 The OpenSSL Project. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * - * 3. All advertising materials mentioning features or use of this - * software must display the following acknowledgment: - * "This product includes software developed by the OpenSSL Project - * for use in the OpenSSL Toolkit. (http://www.openssl.org/)" - * - * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to - * endorse or promote products derived from this software without - * prior written permission. For written permission, please contact - * openssl-core@openssl.org. - * - * 5. Products derived from this software may not be called "OpenSSL" - * nor may "OpenSSL" appear in their names without prior written - * permission of the OpenSSL Project. - * - * 6. Redistributions of any form whatsoever must retain the following - * acknowledgment: - * "This product includes software developed by the OpenSSL Project - * for use in the OpenSSL Toolkit (http://www.openssl.org/)" - * - * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY - * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR - * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, - * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED - * OF THE POSSIBILITY OF SUCH DAMAGE. - * ==================================================================== - * - * This product includes cryptographic software written by Eric Young - * (eay@cryptsoft.com). This product includes software written by Tim - * Hudson (tjh@cryptsoft.com). +/* + * Copyright 2000-2018 The OpenSSL Project Authors. All Rights Reserved. * + * Licensed under the OpenSSL license (the "License"). You may not use + * this file except in compliance with the License. You can obtain a copy + * in the file LICENSE in the source distribution or at + * https://www.openssl.org/source/license.html */ -#if !defined(BN_CTX_DEBUG) && !defined(BN_DEBUG) -# ifndef NDEBUG -# define NDEBUG -# endif -#endif - -#include <stdio.h> -#include <assert.h> - -#include "cryptlib.h" +#include "internal/cryptlib.h" #include "bn_lcl.h" /*- @@ -104,10 +48,7 @@ typedef struct bignum_pool { } BN_POOL; static void BN_POOL_init(BN_POOL *); static void BN_POOL_finish(BN_POOL *); -#ifndef OPENSSL_NO_DEPRECATED -static void BN_POOL_reset(BN_POOL *); -#endif -static BIGNUM *BN_POOL_get(BN_POOL *); +static BIGNUM *BN_POOL_get(BN_POOL *, int); static void BN_POOL_release(BN_POOL *, unsigned int); /************/ @@ -123,9 +64,6 @@ typedef struct bignum_ctx_stack { } BN_STACK; static void BN_STACK_init(BN_STACK *); static void BN_STACK_finish(BN_STACK *); -#ifndef OPENSSL_NO_DEPRECATED -static void BN_STACK_reset(BN_STACK *); -#endif static int BN_STACK_push(BN_STACK *, unsigned int); static unsigned int BN_STACK_pop(BN_STACK *); @@ -145,6 +83,8 @@ struct bignum_ctx { int err_stack; /* Block "gets" until an "end" (compatibility behaviour) */ int too_many; + /* Flags. */ + int flags; }; /* Enable this to find BN_CTX bugs */ @@ -190,40 +130,27 @@ static void ctxdbg(BN_CTX *ctx) # define CTXDBG_RET(ctx,ret) #endif -/* - * This function is an evil legacy and should not be used. This - * implementation is WYSIWYG, though I've done my best. - */ -#ifndef OPENSSL_NO_DEPRECATED -void BN_CTX_init(BN_CTX *ctx) -{ - /* - * Assume the caller obtained the context via BN_CTX_new() and so is - * trying to reset it for use. Nothing else makes sense, least of all - * binary compatibility from a time when they could declare a static - * variable. - */ - BN_POOL_reset(&ctx->pool); - BN_STACK_reset(&ctx->stack); - ctx->used = 0; - ctx->err_stack = 0; - ctx->too_many = 0; -} -#endif BN_CTX *BN_CTX_new(void) { - BN_CTX *ret = OPENSSL_malloc(sizeof(BN_CTX)); - if (!ret) { + BN_CTX *ret; + + if ((ret = OPENSSL_zalloc(sizeof(*ret))) == NULL) { BNerr(BN_F_BN_CTX_NEW, ERR_R_MALLOC_FAILURE); return NULL; } /* Initialise the structure */ BN_POOL_init(&ret->pool); BN_STACK_init(&ret->stack); - ret->used = 0; - ret->err_stack = 0; - ret->too_many = 0; + return ret; +} + +BN_CTX *BN_CTX_secure_new(void) +{ + BN_CTX *ret = BN_CTX_new(); + + if (ret != NULL) + ret->flags = BN_FLG_SECURE; return ret; } @@ -285,10 +212,11 @@ void BN_CTX_end(BN_CTX *ctx) BIGNUM *BN_CTX_get(BN_CTX *ctx) { BIGNUM *ret; + CTXDBG_ENTRY("BN_CTX_get", ctx); if (ctx->err_stack || ctx->too_many) return NULL; - if ((ret = BN_POOL_get(&ctx->pool)) == NULL) { + if ((ret = BN_POOL_get(&ctx->pool, ctx->flags)) == NULL) { /* * Setting too_many prevents repeated "get" attempts from cluttering * the error stack. @@ -316,32 +244,26 @@ static void BN_STACK_init(BN_STACK *st) static void BN_STACK_finish(BN_STACK *st) { - if (st->size) - OPENSSL_free(st->indexes); + OPENSSL_free(st->indexes); + st->indexes = NULL; } -#ifndef OPENSSL_NO_DEPRECATED -static void BN_STACK_reset(BN_STACK *st) -{ - st->depth = 0; -} -#endif static int BN_STACK_push(BN_STACK *st, unsigned int idx) { - if (st->depth == st->size) + if (st->depth == st->size) { /* Need to expand */ - { - unsigned int newsize = (st->size ? - (st->size * 3 / 2) : BN_CTX_START_FRAMES); - unsigned int *newitems = OPENSSL_malloc(newsize * - sizeof(unsigned int)); - if (!newitems) + unsigned int newsize = + st->size ? (st->size * 3 / 2) : BN_CTX_START_FRAMES; + unsigned int *newitems; + + if ((newitems = OPENSSL_malloc(sizeof(*newitems) * newsize)) == NULL) { + BNerr(BN_F_BN_STACK_PUSH, ERR_R_MALLOC_FAILURE); return 0; + } if (st->depth) - memcpy(newitems, st->indexes, st->depth * sizeof(unsigned int)); - if (st->size) - OPENSSL_free(st->indexes); + memcpy(newitems, st->indexes, sizeof(*newitems) * st->depth); + OPENSSL_free(st->indexes); st->indexes = newitems; st->size = newsize; } @@ -366,55 +288,42 @@ static void BN_POOL_init(BN_POOL *p) static void BN_POOL_finish(BN_POOL *p) { + unsigned int loop; + BIGNUM *bn; + while (p->head) { - unsigned int loop = 0; - BIGNUM *bn = p->head->vals; - while (loop++ < BN_CTX_POOL_SIZE) { + for (loop = 0, bn = p->head->vals; loop++ < BN_CTX_POOL_SIZE; bn++) if (bn->d) BN_clear_free(bn); - bn++; - } p->current = p->head->next; OPENSSL_free(p->head); p->head = p->current; } } -#ifndef OPENSSL_NO_DEPRECATED -static void BN_POOL_reset(BN_POOL *p) -{ - BN_POOL_ITEM *item = p->head; - while (item) { - unsigned int loop = 0; - BIGNUM *bn = item->vals; - while (loop++ < BN_CTX_POOL_SIZE) { - if (bn->d) - BN_clear(bn); - bn++; - } - item = item->next; - } - p->current = p->head; - p->used = 0; -} -#endif -static BIGNUM *BN_POOL_get(BN_POOL *p) +static BIGNUM *BN_POOL_get(BN_POOL *p, int flag) { + BIGNUM *bn; + unsigned int loop; + + /* Full; allocate a new pool item and link it in. */ if (p->used == p->size) { - BIGNUM *bn; - unsigned int loop = 0; - BN_POOL_ITEM *item = OPENSSL_malloc(sizeof(BN_POOL_ITEM)); - if (!item) + BN_POOL_ITEM *item; + + if ((item = OPENSSL_malloc(sizeof(*item))) == NULL) { + BNerr(BN_F_BN_POOL_GET, ERR_R_MALLOC_FAILURE); return NULL; - /* Initialise the structure */ - bn = item->vals; - while (loop++ < BN_CTX_POOL_SIZE) - BN_init(bn++); + } + for (loop = 0, bn = item->vals; loop++ < BN_CTX_POOL_SIZE; bn++) { + bn_init(bn); + if ((flag & BN_FLG_SECURE) != 0) + BN_set_flags(bn, BN_FLG_SECURE); + } item->prev = p->tail; item->next = NULL; - /* Link it in */ - if (!p->head) + + if (p->head == NULL) p->head = p->current = p->tail = item; else { p->tail->next = item; @@ -426,6 +335,7 @@ static BIGNUM *BN_POOL_get(BN_POOL *p) /* Return the first bignum from the new pool */ return item->vals; } + if (!p->used) p->current = p->head; else if ((p->used % BN_CTX_POOL_SIZE) == 0) @@ -436,10 +346,11 @@ static BIGNUM *BN_POOL_get(BN_POOL *p) static void BN_POOL_release(BN_POOL *p, unsigned int num) { unsigned int offset = (p->used - 1) % BN_CTX_POOL_SIZE; + p->used -= num; while (num--) { bn_check_top(p->current->vals + offset); - if (!offset) { + if (offset == 0) { offset = BN_CTX_POOL_SIZE - 1; p->current = p->current->prev; } else diff --git a/crypto/bn/bn_depr.c b/crypto/bn/bn_depr.c index 34895f598268..7d89214b1c16 100644 --- a/crypto/bn/bn_depr.c +++ b/crypto/bn/bn_depr.c @@ -1,56 +1,10 @@ -/* crypto/bn/bn_depr.c */ -/* ==================================================================== - * Copyright (c) 1998-2002 The OpenSSL Project. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * - * 3. All advertising materials mentioning features or use of this - * software must display the following acknowledgment: - * "This product includes software developed by the OpenSSL Project - * for use in the OpenSSL Toolkit. (http://www.openssl.org/)" - * - * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to - * endorse or promote products derived from this software without - * prior written permission. For written permission, please contact - * openssl-core@openssl.org. - * - * 5. Products derived from this software may not be called "OpenSSL" - * nor may "OpenSSL" appear in their names without prior written - * permission of the OpenSSL Project. - * - * 6. Redistributions of any form whatsoever must retain the following - * acknowledgment: - * "This product includes software developed by the OpenSSL Project - * for use in the OpenSSL Toolkit (http://www.openssl.org/)" - * - * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY - * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR - * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, - * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED - * OF THE POSSIBILITY OF SUCH DAMAGE. - * ==================================================================== - * - * This product includes cryptographic software written by Eric Young - * (eay@cryptsoft.com). This product includes software written by Tim - * Hudson (tjh@cryptsoft.com). +/* + * Copyright 2002-2016 The OpenSSL Project Authors. All Rights Reserved. * + * Licensed under the OpenSSL license (the "License"). You may not use + * this file except in compliance with the License. You can obtain a copy + * in the file LICENSE in the source distribution or at + * https://www.openssl.org/source/license.html */ /* @@ -58,22 +12,22 @@ * slurp this code if applications are using them directly. */ -#include <stdio.h> -#include <time.h> -#include "cryptlib.h" -#include "bn_lcl.h" -#include <openssl/rand.h> +#include <openssl/opensslconf.h> +#if OPENSSL_API_COMPAT >= 0x00908000L +NON_EMPTY_TRANSLATION_UNIT +#else -static void *dummy = &dummy; +# include <stdio.h> +# include <time.h> +# include "internal/cryptlib.h" +# include "bn_lcl.h" -#ifndef OPENSSL_NO_DEPRECATED BIGNUM *BN_generate_prime(BIGNUM *ret, int bits, int safe, const BIGNUM *add, const BIGNUM *rem, void (*callback) (int, int, void *), void *cb_arg) { BN_GENCB cb; BIGNUM *rnd = NULL; - int found = 0; BN_GENCB_set_old(&cb, callback, cb_arg); @@ -86,11 +40,10 @@ BIGNUM *BN_generate_prime(BIGNUM *ret, int bits, int safe, goto err; /* we have a prime :-) */ - found = 1; + return ret; err: - if (!found && (ret == NULL) && (rnd != NULL)) - BN_free(rnd); - return (found ? rnd : NULL); + BN_free(rnd); + return NULL; } int BN_is_prime(const BIGNUM *a, int checks, diff --git a/crypto/bn/bn_dh.c b/crypto/bn/bn_dh.c new file mode 100644 index 000000000000..38acdee234d0 --- /dev/null +++ b/crypto/bn/bn_dh.c @@ -0,0 +1,512 @@ +/* + * Copyright 2014-2017 The OpenSSL Project Authors. All Rights Reserved. + * + * Licensed under the OpenSSL license (the "License"). You may not use + * this file except in compliance with the License. You can obtain a copy + * in the file LICENSE in the source distribution or at + * https://www.openssl.org/source/license.html + */ + +#include "bn_lcl.h" +#include "internal/nelem.h" + +#ifndef OPENSSL_NO_DH +#include <openssl/dh.h> +#include "internal/bn_dh.h" +/* DH parameters from RFC5114 */ + +# if BN_BITS2 == 64 +static const BN_ULONG dh1024_160_p[] = { + 0xDF1FB2BC2E4A4371ULL, 0xE68CFDA76D4DA708ULL, 0x45BF37DF365C1A65ULL, + 0xA151AF5F0DC8B4BDULL, 0xFAA31A4FF55BCCC0ULL, 0x4EFFD6FAE5644738ULL, + 0x98488E9C219A7372ULL, 0xACCBDD7D90C4BD70ULL, 0x24975C3CD49B83BFULL, + 0x13ECB4AEA9061123ULL, 0x9838EF1E2EE652C0ULL, 0x6073E28675A23D18ULL, + 0x9A6A9DCA52D23B61ULL, 0x52C99FBCFB06A3C6ULL, 0xDE92DE5EAE5D54ECULL, + 0xB10B8F96A080E01DULL +}; + +static const BN_ULONG dh1024_160_g[] = { + 0x855E6EEB22B3B2E5ULL, 0x858F4DCEF97C2A24ULL, 0x2D779D5918D08BC8ULL, + 0xD662A4D18E73AFA3ULL, 0x1DBF0A0169B6A28AULL, 0xA6A24C087A091F53ULL, + 0x909D0D2263F80A76ULL, 0xD7FBD7D3B9A92EE1ULL, 0x5E91547F9E2749F4ULL, + 0x160217B4B01B886AULL, 0x777E690F5504F213ULL, 0x266FEA1E5C41564BULL, + 0xD6406CFF14266D31ULL, 0xF8104DD258AC507FULL, 0x6765A442EFB99905ULL, + 0xA4D1CBD5C3FD3412ULL +}; + +static const BN_ULONG dh1024_160_q[] = { + 0x64B7CB9D49462353ULL, 0x81A8DF278ABA4E7DULL, 0x00000000F518AA87ULL +}; + +static const BN_ULONG dh2048_224_p[] = { + 0x0AC4DFFE0C10E64FULL, 0xCF9DE5384E71B81CULL, 0x7EF363E2FFA31F71ULL, + 0xE3FB73C16B8E75B9ULL, 0xC9B53DCF4BA80A29ULL, 0x23F10B0E16E79763ULL, + 0xC52172E413042E9BULL, 0xBE60E69CC928B2B9ULL, 0x80CD86A1B9E587E8ULL, + 0x315D75E198C641A4ULL, 0xCDF93ACC44328387ULL, 0x15987D9ADC0A486DULL, + 0x7310F7121FD5A074ULL, 0x278273C7DE31EFDCULL, 0x1602E714415D9330ULL, + 0x81286130BC8985DBULL, 0xB3BF8A3170918836ULL, 0x6A00E0A0B9C49708ULL, + 0xC6BA0B2C8BBC27BEULL, 0xC9F98D11ED34DBF6ULL, 0x7AD5B7D0B6C12207ULL, + 0xD91E8FEF55B7394BULL, 0x9037C9EDEFDA4DF8ULL, 0x6D3F8152AD6AC212ULL, + 0x1DE6B85A1274A0A6ULL, 0xEB3D688A309C180EULL, 0xAF9A3C407BA1DF15ULL, + 0xE6FA141DF95A56DBULL, 0xB54B1597B61D0A75ULL, 0xA20D64E5683B9FD1ULL, + 0xD660FAA79559C51FULL, 0xAD107E1E9123A9D0ULL +}; + +static const BN_ULONG dh2048_224_g[] = { + 0x84B890D3191F2BFAULL, 0x81BC087F2A7065B3ULL, 0x19C418E1F6EC0179ULL, + 0x7B5A0F1C71CFFF4CULL, 0xEDFE72FE9B6AA4BDULL, 0x81E1BCFE94B30269ULL, + 0x566AFBB48D6C0191ULL, 0xB539CCE3409D13CDULL, 0x6AA21E7F5F2FF381ULL, + 0xD9E263E4770589EFULL, 0x10E183EDD19963DDULL, 0xB70A8137150B8EEBULL, + 0x051AE3D428C8F8ACULL, 0xBB77A86F0C1AB15BULL, 0x6E3025E316A330EFULL, + 0x19529A45D6F83456ULL, 0xF180EB34118E98D1ULL, 0xB5F6C6B250717CBEULL, + 0x09939D54DA7460CDULL, 0xE247150422EA1ED4ULL, 0xB8A762D0521BC98AULL, + 0xF4D027275AC1348BULL, 0xC17669101999024AULL, 0xBE5E9001A8D66AD7ULL, + 0xC57DB17C620A8652ULL, 0xAB739D7700C29F52ULL, 0xDD921F01A70C4AFAULL, + 0xA6824A4E10B9A6F0ULL, 0x74866A08CFE4FFE3ULL, 0x6CDEBE7B89998CAFULL, + 0x9DF30B5C8FFDAC50ULL, 0xAC4032EF4F2D9AE3ULL +}; + +static const BN_ULONG dh2048_224_q[] = { + 0xBF389A99B36371EBULL, 0x1F80535A4738CEBCULL, 0xC58D93FE99717710ULL, + 0x00000000801C0D34ULL +}; + +static const BN_ULONG dh2048_256_p[] = { + 0xDB094AE91E1A1597ULL, 0x693877FAD7EF09CAULL, 0x6116D2276E11715FULL, + 0xA4B54330C198AF12ULL, 0x75F26375D7014103ULL, 0xC3A3960A54E710C3ULL, + 0xDED4010ABD0BE621ULL, 0xC0B857F689962856ULL, 0xB3CA3F7971506026ULL, + 0x1CCACB83E6B486F6ULL, 0x67E144E514056425ULL, 0xF6A167B5A41825D9ULL, + 0x3AD8347796524D8EULL, 0xF13C6D9A51BFA4ABULL, 0x2D52526735488A0EULL, + 0xB63ACAE1CAA6B790ULL, 0x4FDB70C581B23F76ULL, 0xBC39A0BF12307F5CULL, + 0xB941F54EB1E59BB8ULL, 0x6C5BFC11D45F9088ULL, 0x22E0B1EF4275BF7BULL, + 0x91F9E6725B4758C0ULL, 0x5A8A9D306BCF67EDULL, 0x209E0C6497517ABDULL, + 0x3BF4296D830E9A7CULL, 0x16C3D91134096FAAULL, 0xFAF7DF4561B2AA30ULL, + 0xE00DF8F1D61957D4ULL, 0x5D2CEED4435E3B00ULL, 0x8CEEF608660DD0F2ULL, + 0xFFBBD19C65195999ULL, 0x87A8E61DB4B6663CULL +}; + +static const BN_ULONG dh2048_256_g[] = { + 0x664B4C0F6CC41659ULL, 0x5E2327CFEF98C582ULL, 0xD647D148D4795451ULL, + 0x2F63078490F00EF8ULL, 0x184B523D1DB246C3ULL, 0xC7891428CDC67EB6ULL, + 0x7FD028370DF92B52ULL, 0xB3353BBB64E0EC37ULL, 0xECD06E1557CD0915ULL, + 0xB7D2BBD2DF016199ULL, 0xC8484B1E052588B9ULL, 0xDB2A3B7313D3FE14ULL, + 0xD052B985D182EA0AULL, 0xA4BD1BFFE83B9C80ULL, 0xDFC967C1FB3F2E55ULL, + 0xB5045AF2767164E1ULL, 0x1D14348F6F2F9193ULL, 0x64E67982428EBC83ULL, + 0x8AC376D282D6ED38ULL, 0x777DE62AAAB8A862ULL, 0xDDF463E5E9EC144BULL, + 0x0196F931C77A57F2ULL, 0xA55AE31341000A65ULL, 0x901228F8C28CBB18ULL, + 0xBC3773BF7E8C6F62ULL, 0xBE3A6C1B0C6B47B1ULL, 0xFF4FED4AAC0BB555ULL, + 0x10DBC15077BE463FULL, 0x07F4793A1A0BA125ULL, 0x4CA7B18F21EF2054ULL, + 0x2E77506660EDBD48ULL, 0x3FB32C9B73134D0BULL +}; + +static const BN_ULONG dh2048_256_q[] = { + 0xA308B0FE64F5FBD3ULL, 0x99B1A47D1EB3750BULL, 0xB447997640129DA2ULL, + 0x8CF83642A709A097ULL +}; + +/* Primes from RFC 7919 */ +static const BN_ULONG ffdhe2048_p[] = { + 0xFFFFFFFFFFFFFFFFULL, 0x886B423861285C97ULL, 0xC6F34A26C1B2EFFAULL, + 0xC58EF1837D1683B2ULL, 0x3BB5FCBC2EC22005ULL, 0xC3FE3B1B4C6FAD73ULL, + 0x8E4F1232EEF28183ULL, 0x9172FE9CE98583FFULL, 0xC03404CD28342F61ULL, + 0x9E02FCE1CDF7E2ECULL, 0x0B07A7C8EE0A6D70ULL, 0xAE56EDE76372BB19ULL, + 0x1D4F42A3DE394DF4ULL, 0xB96ADAB760D7F468ULL, 0xD108A94BB2C8E3FBULL, + 0xBC0AB182B324FB61ULL, 0x30ACCA4F483A797AULL, 0x1DF158A136ADE735ULL, + 0xE2A689DAF3EFE872ULL, 0x984F0C70E0E68B77ULL, 0xB557135E7F57C935ULL, + 0x856365553DED1AF3ULL, 0x2433F51F5F066ED0ULL, 0xD3DF1ED5D5FD6561ULL, + 0xF681B202AEC4617AULL, 0x7D2FE363630C75D8ULL, 0xCC939DCE249B3EF9ULL, + 0xA9E13641146433FBULL, 0xD8B9C583CE2D3695ULL, 0xAFDC5620273D3CF1ULL, + 0xADF85458A2BB4A9AULL, 0xFFFFFFFFFFFFFFFFULL +}; + +static const BN_ULONG ffdhe3072_p[] = { + 0xFFFFFFFFFFFFFFFFULL, 0x25E41D2B66C62E37ULL, 0x3C1B20EE3FD59D7CULL, + 0x0ABCD06BFA53DDEFULL, 0x1DBF9A42D5C4484EULL, 0xABC521979B0DEADAULL, + 0xE86D2BC522363A0DULL, 0x5CAE82AB9C9DF69EULL, 0x64F2E21E71F54BFFULL, + 0xF4FD4452E2D74DD3ULL, 0xB4130C93BC437944ULL, 0xAEFE130985139270ULL, + 0x598CB0FAC186D91CULL, 0x7AD91D2691F7F7EEULL, 0x61B46FC9D6E6C907ULL, + 0xBC34F4DEF99C0238ULL, 0xDE355B3B6519035BULL, 0x886B4238611FCFDCULL, + 0xC6F34A26C1B2EFFAULL, 0xC58EF1837D1683B2ULL, 0x3BB5FCBC2EC22005ULL, + 0xC3FE3B1B4C6FAD73ULL, 0x8E4F1232EEF28183ULL, 0x9172FE9CE98583FFULL, + 0xC03404CD28342F61ULL, 0x9E02FCE1CDF7E2ECULL, 0x0B07A7C8EE0A6D70ULL, + 0xAE56EDE76372BB19ULL, 0x1D4F42A3DE394DF4ULL, 0xB96ADAB760D7F468ULL, + 0xD108A94BB2C8E3FBULL, 0xBC0AB182B324FB61ULL, 0x30ACCA4F483A797AULL, + 0x1DF158A136ADE735ULL, 0xE2A689DAF3EFE872ULL, 0x984F0C70E0E68B77ULL, + 0xB557135E7F57C935ULL, 0x856365553DED1AF3ULL, 0x2433F51F5F066ED0ULL, + 0xD3DF1ED5D5FD6561ULL, 0xF681B202AEC4617AULL, 0x7D2FE363630C75D8ULL, + 0xCC939DCE249B3EF9ULL, 0xA9E13641146433FBULL, 0xD8B9C583CE2D3695ULL, + 0xAFDC5620273D3CF1ULL, 0xADF85458A2BB4A9AULL, 0xFFFFFFFFFFFFFFFFULL +}; + +static const BN_ULONG ffdhe4096_p[] = { + 0xFFFFFFFFFFFFFFFFULL, 0xC68A007E5E655F6AULL, 0x4DB5A851F44182E1ULL, + 0x8EC9B55A7F88A46BULL, 0x0A8291CDCEC97DCFULL, 0x2A4ECEA9F98D0ACCULL, + 0x1A1DB93D7140003CULL, 0x092999A333CB8B7AULL, 0x6DC778F971AD0038ULL, + 0xA907600A918130C4ULL, 0xED6A1E012D9E6832ULL, 0x7135C886EFB4318AULL, + 0x87F55BA57E31CC7AULL, 0x7763CF1D55034004ULL, 0xAC7D5F42D69F6D18ULL, + 0x7930E9E4E58857B6ULL, 0x6E6F52C3164DF4FBULL, 0x25E41D2B669E1EF1ULL, + 0x3C1B20EE3FD59D7CULL, 0x0ABCD06BFA53DDEFULL, 0x1DBF9A42D5C4484EULL, + 0xABC521979B0DEADAULL, 0xE86D2BC522363A0DULL, 0x5CAE82AB9C9DF69EULL, + 0x64F2E21E71F54BFFULL, 0xF4FD4452E2D74DD3ULL, 0xB4130C93BC437944ULL, + 0xAEFE130985139270ULL, 0x598CB0FAC186D91CULL, 0x7AD91D2691F7F7EEULL, + 0x61B46FC9D6E6C907ULL, 0xBC34F4DEF99C0238ULL, 0xDE355B3B6519035BULL, + 0x886B4238611FCFDCULL, 0xC6F34A26C1B2EFFAULL, 0xC58EF1837D1683B2ULL, + 0x3BB5FCBC2EC22005ULL, 0xC3FE3B1B4C6FAD73ULL, 0x8E4F1232EEF28183ULL, + 0x9172FE9CE98583FFULL, 0xC03404CD28342F61ULL, 0x9E02FCE1CDF7E2ECULL, + 0x0B07A7C8EE0A6D70ULL, 0xAE56EDE76372BB19ULL, 0x1D4F42A3DE394DF4ULL, + 0xB96ADAB760D7F468ULL, 0xD108A94BB2C8E3FBULL, 0xBC0AB182B324FB61ULL, + 0x30ACCA4F483A797AULL, 0x1DF158A136ADE735ULL, 0xE2A689DAF3EFE872ULL, + 0x984F0C70E0E68B77ULL, 0xB557135E7F57C935ULL, 0x856365553DED1AF3ULL, + 0x2433F51F5F066ED0ULL, 0xD3DF1ED5D5FD6561ULL, 0xF681B202AEC4617AULL, + 0x7D2FE363630C75D8ULL, 0xCC939DCE249B3EF9ULL, 0xA9E13641146433FBULL, + 0xD8B9C583CE2D3695ULL, 0xAFDC5620273D3CF1ULL, 0xADF85458A2BB4A9AULL, + 0xFFFFFFFFFFFFFFFFULL +}; + +static const BN_ULONG ffdhe6144_p[] = { + 0xFFFFFFFFFFFFFFFFULL, 0xA40E329CD0E40E65ULL, 0xA41D570D7938DAD4ULL, + 0x62A69526D43161C1ULL, 0x3FDD4A8E9ADB1E69ULL, 0x5B3B71F9DC6B80D6ULL, + 0xEC9D1810C6272B04ULL, 0x8CCF2DD5CACEF403ULL, 0xE49F5235C95B9117ULL, + 0x505DC82DB854338AULL, 0x62292C311562A846ULL, 0xD72B03746AE77F5EULL, + 0xF9C9091B462D538CULL, 0x0AE8DB5847A67CBEULL, 0xB3A739C122611682ULL, + 0xEEAAC0232A281BF6ULL, 0x94C6651E77CAF992ULL, 0x763E4E4B94B2BBC1ULL, + 0x587E38DA0077D9B4ULL, 0x7FB29F8C183023C3ULL, 0x0ABEC1FFF9E3A26EULL, + 0xA00EF092350511E3ULL, 0xB855322EDB6340D8ULL, 0xA52471F7A9A96910ULL, + 0x388147FB4CFDB477ULL, 0x9B1F5C3E4E46041FULL, 0xCDAD0657FCCFEC71ULL, + 0xB38E8C334C701C3AULL, 0x917BDD64B1C0FD4CULL, 0x3BB454329B7624C8ULL, + 0x23BA4442CAF53EA6ULL, 0x4E677D2C38532A3AULL, 0x0BFD64B645036C7AULL, + 0xC68A007E5E0DD902ULL, 0x4DB5A851F44182E1ULL, 0x8EC9B55A7F88A46BULL, + 0x0A8291CDCEC97DCFULL, 0x2A4ECEA9F98D0ACCULL, 0x1A1DB93D7140003CULL, + 0x092999A333CB8B7AULL, 0x6DC778F971AD0038ULL, 0xA907600A918130C4ULL, + 0xED6A1E012D9E6832ULL, 0x7135C886EFB4318AULL, 0x87F55BA57E31CC7AULL, + 0x7763CF1D55034004ULL, 0xAC7D5F42D69F6D18ULL, 0x7930E9E4E58857B6ULL, + 0x6E6F52C3164DF4FBULL, 0x25E41D2B669E1EF1ULL, 0x3C1B20EE3FD59D7CULL, + 0x0ABCD06BFA53DDEFULL, 0x1DBF9A42D5C4484EULL, 0xABC521979B0DEADAULL, + 0xE86D2BC522363A0DULL, 0x5CAE82AB9C9DF69EULL, 0x64F2E21E71F54BFFULL, + 0xF4FD4452E2D74DD3ULL, 0xB4130C93BC437944ULL, 0xAEFE130985139270ULL, + 0x598CB0FAC186D91CULL, 0x7AD91D2691F7F7EEULL, 0x61B46FC9D6E6C907ULL, + 0xBC34F4DEF99C0238ULL, 0xDE355B3B6519035BULL, 0x886B4238611FCFDCULL, + 0xC6F34A26C1B2EFFAULL, 0xC58EF1837D1683B2ULL, 0x3BB5FCBC2EC22005ULL, + 0xC3FE3B1B4C6FAD73ULL, 0x8E4F1232EEF28183ULL, 0x9172FE9CE98583FFULL, + 0xC03404CD28342F61ULL, 0x9E02FCE1CDF7E2ECULL, 0x0B07A7C8EE0A6D70ULL, + 0xAE56EDE76372BB19ULL, 0x1D4F42A3DE394DF4ULL, 0xB96ADAB760D7F468ULL, + 0xD108A94BB2C8E3FBULL, 0xBC0AB182B324FB61ULL, 0x30ACCA4F483A797AULL, + 0x1DF158A136ADE735ULL, 0xE2A689DAF3EFE872ULL, 0x984F0C70E0E68B77ULL, + 0xB557135E7F57C935ULL, 0x856365553DED1AF3ULL, 0x2433F51F5F066ED0ULL, + 0xD3DF1ED5D5FD6561ULL, 0xF681B202AEC4617AULL, 0x7D2FE363630C75D8ULL, + 0xCC939DCE249B3EF9ULL, 0xA9E13641146433FBULL, 0xD8B9C583CE2D3695ULL, + 0xAFDC5620273D3CF1ULL, 0xADF85458A2BB4A9AULL, 0xFFFFFFFFFFFFFFFFULL +}; + +static const BN_ULONG ffdhe8192_p[] = { + 0xFFFFFFFFFFFFFFFFULL, 0xD68C8BB7C5C6424CULL, 0x011E2A94838FF88CULL, + 0x0822E506A9F4614EULL, 0x97D11D49F7A8443DULL, 0xA6BBFDE530677F0DULL, + 0x2F741EF8C1FE86FEULL, 0xFAFABE1C5D71A87EULL, 0xDED2FBABFBE58A30ULL, + 0xB6855DFE72B0A66EULL, 0x1EFC8CE0BA8A4FE8ULL, 0x83F81D4A3F2FA457ULL, + 0xA1FE3075A577E231ULL, 0xD5B8019488D9C0A0ULL, 0x624816CDAD9A95F9ULL, + 0x99E9E31650C1217BULL, 0x51AA691E0E423CFCULL, 0x1C217E6C3826E52CULL, + 0x51A8A93109703FEEULL, 0xBB7099876A460E74ULL, 0x541FC68C9C86B022ULL, + 0x59160CC046FD8251ULL, 0x2846C0BA35C35F5CULL, 0x54504AC78B758282ULL, + 0x29388839D2AF05E4ULL, 0xCB2C0F1CC01BD702ULL, 0x555B2F747C932665ULL, + 0x86B63142A3AB8829ULL, 0x0B8CC3BDF64B10EFULL, 0x687FEB69EDD1CC5EULL, + 0xFDB23FCEC9509D43ULL, 0x1E425A31D951AE64ULL, 0x36AD004CF600C838ULL, + 0xA40E329CCFF46AAAULL, 0xA41D570D7938DAD4ULL, 0x62A69526D43161C1ULL, + 0x3FDD4A8E9ADB1E69ULL, 0x5B3B71F9DC6B80D6ULL, 0xEC9D1810C6272B04ULL, + 0x8CCF2DD5CACEF403ULL, 0xE49F5235C95B9117ULL, 0x505DC82DB854338AULL, + 0x62292C311562A846ULL, 0xD72B03746AE77F5EULL, 0xF9C9091B462D538CULL, + 0x0AE8DB5847A67CBEULL, 0xB3A739C122611682ULL, 0xEEAAC0232A281BF6ULL, + 0x94C6651E77CAF992ULL, 0x763E4E4B94B2BBC1ULL, 0x587E38DA0077D9B4ULL, + 0x7FB29F8C183023C3ULL, 0x0ABEC1FFF9E3A26EULL, 0xA00EF092350511E3ULL, + 0xB855322EDB6340D8ULL, 0xA52471F7A9A96910ULL, 0x388147FB4CFDB477ULL, + 0x9B1F5C3E4E46041FULL, 0xCDAD0657FCCFEC71ULL, 0xB38E8C334C701C3AULL, + 0x917BDD64B1C0FD4CULL, 0x3BB454329B7624C8ULL, 0x23BA4442CAF53EA6ULL, + 0x4E677D2C38532A3AULL, 0x0BFD64B645036C7AULL, 0xC68A007E5E0DD902ULL, + 0x4DB5A851F44182E1ULL, 0x8EC9B55A7F88A46BULL, 0x0A8291CDCEC97DCFULL, + 0x2A4ECEA9F98D0ACCULL, 0x1A1DB93D7140003CULL, 0x092999A333CB8B7AULL, + 0x6DC778F971AD0038ULL, 0xA907600A918130C4ULL, 0xED6A1E012D9E6832ULL, + 0x7135C886EFB4318AULL, 0x87F55BA57E31CC7AULL, 0x7763CF1D55034004ULL, + 0xAC7D5F42D69F6D18ULL, 0x7930E9E4E58857B6ULL, 0x6E6F52C3164DF4FBULL, + 0x25E41D2B669E1EF1ULL, 0x3C1B20EE3FD59D7CULL, 0x0ABCD06BFA53DDEFULL, + 0x1DBF9A42D5C4484EULL, 0xABC521979B0DEADAULL, 0xE86D2BC522363A0DULL, + 0x5CAE82AB9C9DF69EULL, 0x64F2E21E71F54BFFULL, 0xF4FD4452E2D74DD3ULL, + 0xB4130C93BC437944ULL, 0xAEFE130985139270ULL, 0x598CB0FAC186D91CULL, + 0x7AD91D2691F7F7EEULL, 0x61B46FC9D6E6C907ULL, 0xBC34F4DEF99C0238ULL, + 0xDE355B3B6519035BULL, 0x886B4238611FCFDCULL, 0xC6F34A26C1B2EFFAULL, + 0xC58EF1837D1683B2ULL, 0x3BB5FCBC2EC22005ULL, 0xC3FE3B1B4C6FAD73ULL, + 0x8E4F1232EEF28183ULL, 0x9172FE9CE98583FFULL, 0xC03404CD28342F61ULL, + 0x9E02FCE1CDF7E2ECULL, 0x0B07A7C8EE0A6D70ULL, 0xAE56EDE76372BB19ULL, + 0x1D4F42A3DE394DF4ULL, 0xB96ADAB760D7F468ULL, 0xD108A94BB2C8E3FBULL, + 0xBC0AB182B324FB61ULL, 0x30ACCA4F483A797AULL, 0x1DF158A136ADE735ULL, + 0xE2A689DAF3EFE872ULL, 0x984F0C70E0E68B77ULL, 0xB557135E7F57C935ULL, + 0x856365553DED1AF3ULL, 0x2433F51F5F066ED0ULL, 0xD3DF1ED5D5FD6561ULL, + 0xF681B202AEC4617AULL, 0x7D2FE363630C75D8ULL, 0xCC939DCE249B3EF9ULL, + 0xA9E13641146433FBULL, 0xD8B9C583CE2D3695ULL, 0xAFDC5620273D3CF1ULL, + 0xADF85458A2BB4A9AULL, 0xFFFFFFFFFFFFFFFFULL +}; + +# elif BN_BITS2 == 32 + +static const BN_ULONG dh1024_160_p[] = { + 0x2E4A4371, 0xDF1FB2BC, 0x6D4DA708, 0xE68CFDA7, 0x365C1A65, 0x45BF37DF, + 0x0DC8B4BD, 0xA151AF5F, 0xF55BCCC0, 0xFAA31A4F, 0xE5644738, 0x4EFFD6FA, + 0x219A7372, 0x98488E9C, 0x90C4BD70, 0xACCBDD7D, 0xD49B83BF, 0x24975C3C, + 0xA9061123, 0x13ECB4AE, 0x2EE652C0, 0x9838EF1E, 0x75A23D18, 0x6073E286, + 0x52D23B61, 0x9A6A9DCA, 0xFB06A3C6, 0x52C99FBC, 0xAE5D54EC, 0xDE92DE5E, + 0xA080E01D, 0xB10B8F96 +}; + +static const BN_ULONG dh1024_160_g[] = { + 0x22B3B2E5, 0x855E6EEB, 0xF97C2A24, 0x858F4DCE, 0x18D08BC8, 0x2D779D59, + 0x8E73AFA3, 0xD662A4D1, 0x69B6A28A, 0x1DBF0A01, 0x7A091F53, 0xA6A24C08, + 0x63F80A76, 0x909D0D22, 0xB9A92EE1, 0xD7FBD7D3, 0x9E2749F4, 0x5E91547F, + 0xB01B886A, 0x160217B4, 0x5504F213, 0x777E690F, 0x5C41564B, 0x266FEA1E, + 0x14266D31, 0xD6406CFF, 0x58AC507F, 0xF8104DD2, 0xEFB99905, 0x6765A442, + 0xC3FD3412, 0xA4D1CBD5 +}; + +static const BN_ULONG dh1024_160_q[] = { + 0x49462353, 0x64B7CB9D, 0x8ABA4E7D, 0x81A8DF27, 0xF518AA87 +}; + +static const BN_ULONG dh2048_224_p[] = { + 0x0C10E64F, 0x0AC4DFFE, 0x4E71B81C, 0xCF9DE538, 0xFFA31F71, 0x7EF363E2, + 0x6B8E75B9, 0xE3FB73C1, 0x4BA80A29, 0xC9B53DCF, 0x16E79763, 0x23F10B0E, + 0x13042E9B, 0xC52172E4, 0xC928B2B9, 0xBE60E69C, 0xB9E587E8, 0x80CD86A1, + 0x98C641A4, 0x315D75E1, 0x44328387, 0xCDF93ACC, 0xDC0A486D, 0x15987D9A, + 0x1FD5A074, 0x7310F712, 0xDE31EFDC, 0x278273C7, 0x415D9330, 0x1602E714, + 0xBC8985DB, 0x81286130, 0x70918836, 0xB3BF8A31, 0xB9C49708, 0x6A00E0A0, + 0x8BBC27BE, 0xC6BA0B2C, 0xED34DBF6, 0xC9F98D11, 0xB6C12207, 0x7AD5B7D0, + 0x55B7394B, 0xD91E8FEF, 0xEFDA4DF8, 0x9037C9ED, 0xAD6AC212, 0x6D3F8152, + 0x1274A0A6, 0x1DE6B85A, 0x309C180E, 0xEB3D688A, 0x7BA1DF15, 0xAF9A3C40, + 0xF95A56DB, 0xE6FA141D, 0xB61D0A75, 0xB54B1597, 0x683B9FD1, 0xA20D64E5, + 0x9559C51F, 0xD660FAA7, 0x9123A9D0, 0xAD107E1E +}; + +static const BN_ULONG dh2048_224_g[] = { + 0x191F2BFA, 0x84B890D3, 0x2A7065B3, 0x81BC087F, 0xF6EC0179, 0x19C418E1, + 0x71CFFF4C, 0x7B5A0F1C, 0x9B6AA4BD, 0xEDFE72FE, 0x94B30269, 0x81E1BCFE, + 0x8D6C0191, 0x566AFBB4, 0x409D13CD, 0xB539CCE3, 0x5F2FF381, 0x6AA21E7F, + 0x770589EF, 0xD9E263E4, 0xD19963DD, 0x10E183ED, 0x150B8EEB, 0xB70A8137, + 0x28C8F8AC, 0x051AE3D4, 0x0C1AB15B, 0xBB77A86F, 0x16A330EF, 0x6E3025E3, + 0xD6F83456, 0x19529A45, 0x118E98D1, 0xF180EB34, 0x50717CBE, 0xB5F6C6B2, + 0xDA7460CD, 0x09939D54, 0x22EA1ED4, 0xE2471504, 0x521BC98A, 0xB8A762D0, + 0x5AC1348B, 0xF4D02727, 0x1999024A, 0xC1766910, 0xA8D66AD7, 0xBE5E9001, + 0x620A8652, 0xC57DB17C, 0x00C29F52, 0xAB739D77, 0xA70C4AFA, 0xDD921F01, + 0x10B9A6F0, 0xA6824A4E, 0xCFE4FFE3, 0x74866A08, 0x89998CAF, 0x6CDEBE7B, + 0x8FFDAC50, 0x9DF30B5C, 0x4F2D9AE3, 0xAC4032EF +}; + +static const BN_ULONG dh2048_224_q[] = { + 0xB36371EB, 0xBF389A99, 0x4738CEBC, 0x1F80535A, 0x99717710, 0xC58D93FE, + 0x801C0D34 +}; + +static const BN_ULONG dh2048_256_p[] = { + 0x1E1A1597, 0xDB094AE9, 0xD7EF09CA, 0x693877FA, 0x6E11715F, 0x6116D227, + 0xC198AF12, 0xA4B54330, 0xD7014103, 0x75F26375, 0x54E710C3, 0xC3A3960A, + 0xBD0BE621, 0xDED4010A, 0x89962856, 0xC0B857F6, 0x71506026, 0xB3CA3F79, + 0xE6B486F6, 0x1CCACB83, 0x14056425, 0x67E144E5, 0xA41825D9, 0xF6A167B5, + 0x96524D8E, 0x3AD83477, 0x51BFA4AB, 0xF13C6D9A, 0x35488A0E, 0x2D525267, + 0xCAA6B790, 0xB63ACAE1, 0x81B23F76, 0x4FDB70C5, 0x12307F5C, 0xBC39A0BF, + 0xB1E59BB8, 0xB941F54E, 0xD45F9088, 0x6C5BFC11, 0x4275BF7B, 0x22E0B1EF, + 0x5B4758C0, 0x91F9E672, 0x6BCF67ED, 0x5A8A9D30, 0x97517ABD, 0x209E0C64, + 0x830E9A7C, 0x3BF4296D, 0x34096FAA, 0x16C3D911, 0x61B2AA30, 0xFAF7DF45, + 0xD61957D4, 0xE00DF8F1, 0x435E3B00, 0x5D2CEED4, 0x660DD0F2, 0x8CEEF608, + 0x65195999, 0xFFBBD19C, 0xB4B6663C, 0x87A8E61D +}; + +static const BN_ULONG dh2048_256_g[] = { + 0x6CC41659, 0x664B4C0F, 0xEF98C582, 0x5E2327CF, 0xD4795451, 0xD647D148, + 0x90F00EF8, 0x2F630784, 0x1DB246C3, 0x184B523D, 0xCDC67EB6, 0xC7891428, + 0x0DF92B52, 0x7FD02837, 0x64E0EC37, 0xB3353BBB, 0x57CD0915, 0xECD06E15, + 0xDF016199, 0xB7D2BBD2, 0x052588B9, 0xC8484B1E, 0x13D3FE14, 0xDB2A3B73, + 0xD182EA0A, 0xD052B985, 0xE83B9C80, 0xA4BD1BFF, 0xFB3F2E55, 0xDFC967C1, + 0x767164E1, 0xB5045AF2, 0x6F2F9193, 0x1D14348F, 0x428EBC83, 0x64E67982, + 0x82D6ED38, 0x8AC376D2, 0xAAB8A862, 0x777DE62A, 0xE9EC144B, 0xDDF463E5, + 0xC77A57F2, 0x0196F931, 0x41000A65, 0xA55AE313, 0xC28CBB18, 0x901228F8, + 0x7E8C6F62, 0xBC3773BF, 0x0C6B47B1, 0xBE3A6C1B, 0xAC0BB555, 0xFF4FED4A, + 0x77BE463F, 0x10DBC150, 0x1A0BA125, 0x07F4793A, 0x21EF2054, 0x4CA7B18F, + 0x60EDBD48, 0x2E775066, 0x73134D0B, 0x3FB32C9B +}; + +static const BN_ULONG dh2048_256_q[] = { + 0x64F5FBD3, 0xA308B0FE, 0x1EB3750B, 0x99B1A47D, 0x40129DA2, 0xB4479976, + 0xA709A097, 0x8CF83642 +}; + +/* Primes from RFC 7919 */ + +static const BN_ULONG ffdhe2048_p[] = { + 0xFFFFFFFF, 0xFFFFFFFF, 0x61285C97, 0x886B4238, 0xC1B2EFFA, 0xC6F34A26, + 0x7D1683B2, 0xC58EF183, 0x2EC22005, 0x3BB5FCBC, 0x4C6FAD73, 0xC3FE3B1B, + 0xEEF28183, 0x8E4F1232, 0xE98583FF, 0x9172FE9C, 0x28342F61, 0xC03404CD, + 0xCDF7E2EC, 0x9E02FCE1, 0xEE0A6D70, 0x0B07A7C8, 0x6372BB19, 0xAE56EDE7, + 0xDE394DF4, 0x1D4F42A3, 0x60D7F468, 0xB96ADAB7, 0xB2C8E3FB, 0xD108A94B, + 0xB324FB61, 0xBC0AB182, 0x483A797A, 0x30ACCA4F, 0x36ADE735, 0x1DF158A1, + 0xF3EFE872, 0xE2A689DA, 0xE0E68B77, 0x984F0C70, 0x7F57C935, 0xB557135E, + 0x3DED1AF3, 0x85636555, 0x5F066ED0, 0x2433F51F, 0xD5FD6561, 0xD3DF1ED5, + 0xAEC4617A, 0xF681B202, 0x630C75D8, 0x7D2FE363, 0x249B3EF9, 0xCC939DCE, + 0x146433FB, 0xA9E13641, 0xCE2D3695, 0xD8B9C583, 0x273D3CF1, 0xAFDC5620, + 0xA2BB4A9A, 0xADF85458, 0xFFFFFFFF, 0xFFFFFFFF +}; + +static const BN_ULONG ffdhe3072_p[] = { + 0xFFFFFFFF, 0xFFFFFFFF, 0x66C62E37, 0x25E41D2B, 0x3FD59D7C, 0x3C1B20EE, + 0xFA53DDEF, 0x0ABCD06B, 0xD5C4484E, 0x1DBF9A42, 0x9B0DEADA, 0xABC52197, + 0x22363A0D, 0xE86D2BC5, 0x9C9DF69E, 0x5CAE82AB, 0x71F54BFF, 0x64F2E21E, + 0xE2D74DD3, 0xF4FD4452, 0xBC437944, 0xB4130C93, 0x85139270, 0xAEFE1309, + 0xC186D91C, 0x598CB0FA, 0x91F7F7EE, 0x7AD91D26, 0xD6E6C907, 0x61B46FC9, + 0xF99C0238, 0xBC34F4DE, 0x6519035B, 0xDE355B3B, 0x611FCFDC, 0x886B4238, + 0xC1B2EFFA, 0xC6F34A26, 0x7D1683B2, 0xC58EF183, 0x2EC22005, 0x3BB5FCBC, + 0x4C6FAD73, 0xC3FE3B1B, 0xEEF28183, 0x8E4F1232, 0xE98583FF, 0x9172FE9C, + 0x28342F61, 0xC03404CD, 0xCDF7E2EC, 0x9E02FCE1, 0xEE0A6D70, 0x0B07A7C8, + 0x6372BB19, 0xAE56EDE7, 0xDE394DF4, 0x1D4F42A3, 0x60D7F468, 0xB96ADAB7, + 0xB2C8E3FB, 0xD108A94B, 0xB324FB61, 0xBC0AB182, 0x483A797A, 0x30ACCA4F, + 0x36ADE735, 0x1DF158A1, 0xF3EFE872, 0xE2A689DA, 0xE0E68B77, 0x984F0C70, + 0x7F57C935, 0xB557135E, 0x3DED1AF3, 0x85636555, 0x5F066ED0, 0x2433F51F, + 0xD5FD6561, 0xD3DF1ED5, 0xAEC4617A, 0xF681B202, 0x630C75D8, 0x7D2FE363, + 0x249B3EF9, 0xCC939DCE, 0x146433FB, 0xA9E13641, 0xCE2D3695, 0xD8B9C583, + 0x273D3CF1, 0xAFDC5620, 0xA2BB4A9A, 0xADF85458, 0xFFFFFFFF, 0xFFFFFFFF +}; + +static const BN_ULONG ffdhe4096_p[] = { + 0xFFFFFFFF, 0xFFFFFFFF, 0x5E655F6A, 0xC68A007E, 0xF44182E1, 0x4DB5A851, + 0x7F88A46B, 0x8EC9B55A, 0xCEC97DCF, 0x0A8291CD, 0xF98D0ACC, 0x2A4ECEA9, + 0x7140003C, 0x1A1DB93D, 0x33CB8B7A, 0x092999A3, 0x71AD0038, 0x6DC778F9, + 0x918130C4, 0xA907600A, 0x2D9E6832, 0xED6A1E01, 0xEFB4318A, 0x7135C886, + 0x7E31CC7A, 0x87F55BA5, 0x55034004, 0x7763CF1D, 0xD69F6D18, 0xAC7D5F42, + 0xE58857B6, 0x7930E9E4, 0x164DF4FB, 0x6E6F52C3, 0x669E1EF1, 0x25E41D2B, + 0x3FD59D7C, 0x3C1B20EE, 0xFA53DDEF, 0x0ABCD06B, 0xD5C4484E, 0x1DBF9A42, + 0x9B0DEADA, 0xABC52197, 0x22363A0D, 0xE86D2BC5, 0x9C9DF69E, 0x5CAE82AB, + 0x71F54BFF, 0x64F2E21E, 0xE2D74DD3, 0xF4FD4452, 0xBC437944, 0xB4130C93, + 0x85139270, 0xAEFE1309, 0xC186D91C, 0x598CB0FA, 0x91F7F7EE, 0x7AD91D26, + 0xD6E6C907, 0x61B46FC9, 0xF99C0238, 0xBC34F4DE, 0x6519035B, 0xDE355B3B, + 0x611FCFDC, 0x886B4238, 0xC1B2EFFA, 0xC6F34A26, 0x7D1683B2, 0xC58EF183, + 0x2EC22005, 0x3BB5FCBC, 0x4C6FAD73, 0xC3FE3B1B, 0xEEF28183, 0x8E4F1232, + 0xE98583FF, 0x9172FE9C, 0x28342F61, 0xC03404CD, 0xCDF7E2EC, 0x9E02FCE1, + 0xEE0A6D70, 0x0B07A7C8, 0x6372BB19, 0xAE56EDE7, 0xDE394DF4, 0x1D4F42A3, + 0x60D7F468, 0xB96ADAB7, 0xB2C8E3FB, 0xD108A94B, 0xB324FB61, 0xBC0AB182, + 0x483A797A, 0x30ACCA4F, 0x36ADE735, 0x1DF158A1, 0xF3EFE872, 0xE2A689DA, + 0xE0E68B77, 0x984F0C70, 0x7F57C935, 0xB557135E, 0x3DED1AF3, 0x85636555, + 0x5F066ED0, 0x2433F51F, 0xD5FD6561, 0xD3DF1ED5, 0xAEC4617A, 0xF681B202, + 0x630C75D8, 0x7D2FE363, 0x249B3EF9, 0xCC939DCE, 0x146433FB, 0xA9E13641, + 0xCE2D3695, 0xD8B9C583, 0x273D3CF1, 0xAFDC5620, 0xA2BB4A9A, 0xADF85458, + 0xFFFFFFFF, 0xFFFFFFFF +}; + +static const BN_ULONG ffdhe6144_p[] = { + 0xFFFFFFFF, 0xFFFFFFFF, 0xD0E40E65, 0xA40E329C, 0x7938DAD4, 0xA41D570D, + 0xD43161C1, 0x62A69526, 0x9ADB1E69, 0x3FDD4A8E, 0xDC6B80D6, 0x5B3B71F9, + 0xC6272B04, 0xEC9D1810, 0xCACEF403, 0x8CCF2DD5, 0xC95B9117, 0xE49F5235, + 0xB854338A, 0x505DC82D, 0x1562A846, 0x62292C31, 0x6AE77F5E, 0xD72B0374, + 0x462D538C, 0xF9C9091B, 0x47A67CBE, 0x0AE8DB58, 0x22611682, 0xB3A739C1, + 0x2A281BF6, 0xEEAAC023, 0x77CAF992, 0x94C6651E, 0x94B2BBC1, 0x763E4E4B, + 0x0077D9B4, 0x587E38DA, 0x183023C3, 0x7FB29F8C, 0xF9E3A26E, 0x0ABEC1FF, + 0x350511E3, 0xA00EF092, 0xDB6340D8, 0xB855322E, 0xA9A96910, 0xA52471F7, + 0x4CFDB477, 0x388147FB, 0x4E46041F, 0x9B1F5C3E, 0xFCCFEC71, 0xCDAD0657, + 0x4C701C3A, 0xB38E8C33, 0xB1C0FD4C, 0x917BDD64, 0x9B7624C8, 0x3BB45432, + 0xCAF53EA6, 0x23BA4442, 0x38532A3A, 0x4E677D2C, 0x45036C7A, 0x0BFD64B6, + 0x5E0DD902, 0xC68A007E, 0xF44182E1, 0x4DB5A851, 0x7F88A46B, 0x8EC9B55A, + 0xCEC97DCF, 0x0A8291CD, 0xF98D0ACC, 0x2A4ECEA9, 0x7140003C, 0x1A1DB93D, + 0x33CB8B7A, 0x092999A3, 0x71AD0038, 0x6DC778F9, 0x918130C4, 0xA907600A, + 0x2D9E6832, 0xED6A1E01, 0xEFB4318A, 0x7135C886, 0x7E31CC7A, 0x87F55BA5, + 0x55034004, 0x7763CF1D, 0xD69F6D18, 0xAC7D5F42, 0xE58857B6, 0x7930E9E4, + 0x164DF4FB, 0x6E6F52C3, 0x669E1EF1, 0x25E41D2B, 0x3FD59D7C, 0x3C1B20EE, + 0xFA53DDEF, 0x0ABCD06B, 0xD5C4484E, 0x1DBF9A42, 0x9B0DEADA, 0xABC52197, + 0x22363A0D, 0xE86D2BC5, 0x9C9DF69E, 0x5CAE82AB, 0x71F54BFF, 0x64F2E21E, + 0xE2D74DD3, 0xF4FD4452, 0xBC437944, 0xB4130C93, 0x85139270, 0xAEFE1309, + 0xC186D91C, 0x598CB0FA, 0x91F7F7EE, 0x7AD91D26, 0xD6E6C907, 0x61B46FC9, + 0xF99C0238, 0xBC34F4DE, 0x6519035B, 0xDE355B3B, 0x611FCFDC, 0x886B4238, + 0xC1B2EFFA, 0xC6F34A26, 0x7D1683B2, 0xC58EF183, 0x2EC22005, 0x3BB5FCBC, + 0x4C6FAD73, 0xC3FE3B1B, 0xEEF28183, 0x8E4F1232, 0xE98583FF, 0x9172FE9C, + 0x28342F61, 0xC03404CD, 0xCDF7E2EC, 0x9E02FCE1, 0xEE0A6D70, 0x0B07A7C8, + 0x6372BB19, 0xAE56EDE7, 0xDE394DF4, 0x1D4F42A3, 0x60D7F468, 0xB96ADAB7, + 0xB2C8E3FB, 0xD108A94B, 0xB324FB61, 0xBC0AB182, 0x483A797A, 0x30ACCA4F, + 0x36ADE735, 0x1DF158A1, 0xF3EFE872, 0xE2A689DA, 0xE0E68B77, 0x984F0C70, + 0x7F57C935, 0xB557135E, 0x3DED1AF3, 0x85636555, 0x5F066ED0, 0x2433F51F, + 0xD5FD6561, 0xD3DF1ED5, 0xAEC4617A, 0xF681B202, 0x630C75D8, 0x7D2FE363, + 0x249B3EF9, 0xCC939DCE, 0x146433FB, 0xA9E13641, 0xCE2D3695, 0xD8B9C583, + 0x273D3CF1, 0xAFDC5620, 0xA2BB4A9A, 0xADF85458, 0xFFFFFFFF, 0xFFFFFFFF +}; + +static const BN_ULONG ffdhe8192_p[] = { + 0xFFFFFFFF, 0xFFFFFFFF, 0xC5C6424C, 0xD68C8BB7, 0x838FF88C, 0x011E2A94, + 0xA9F4614E, 0x0822E506, 0xF7A8443D, 0x97D11D49, 0x30677F0D, 0xA6BBFDE5, + 0xC1FE86FE, 0x2F741EF8, 0x5D71A87E, 0xFAFABE1C, 0xFBE58A30, 0xDED2FBAB, + 0x72B0A66E, 0xB6855DFE, 0xBA8A4FE8, 0x1EFC8CE0, 0x3F2FA457, 0x83F81D4A, + 0xA577E231, 0xA1FE3075, 0x88D9C0A0, 0xD5B80194, 0xAD9A95F9, 0x624816CD, + 0x50C1217B, 0x99E9E316, 0x0E423CFC, 0x51AA691E, 0x3826E52C, 0x1C217E6C, + 0x09703FEE, 0x51A8A931, 0x6A460E74, 0xBB709987, 0x9C86B022, 0x541FC68C, + 0x46FD8251, 0x59160CC0, 0x35C35F5C, 0x2846C0BA, 0x8B758282, 0x54504AC7, + 0xD2AF05E4, 0x29388839, 0xC01BD702, 0xCB2C0F1C, 0x7C932665, 0x555B2F74, + 0xA3AB8829, 0x86B63142, 0xF64B10EF, 0x0B8CC3BD, 0xEDD1CC5E, 0x687FEB69, + 0xC9509D43, 0xFDB23FCE, 0xD951AE64, 0x1E425A31, 0xF600C838, 0x36AD004C, + 0xCFF46AAA, 0xA40E329C, 0x7938DAD4, 0xA41D570D, 0xD43161C1, 0x62A69526, + 0x9ADB1E69, 0x3FDD4A8E, 0xDC6B80D6, 0x5B3B71F9, 0xC6272B04, 0xEC9D1810, + 0xCACEF403, 0x8CCF2DD5, 0xC95B9117, 0xE49F5235, 0xB854338A, 0x505DC82D, + 0x1562A846, 0x62292C31, 0x6AE77F5E, 0xD72B0374, 0x462D538C, 0xF9C9091B, + 0x47A67CBE, 0x0AE8DB58, 0x22611682, 0xB3A739C1, 0x2A281BF6, 0xEEAAC023, + 0x77CAF992, 0x94C6651E, 0x94B2BBC1, 0x763E4E4B, 0x0077D9B4, 0x587E38DA, + 0x183023C3, 0x7FB29F8C, 0xF9E3A26E, 0x0ABEC1FF, 0x350511E3, 0xA00EF092, + 0xDB6340D8, 0xB855322E, 0xA9A96910, 0xA52471F7, 0x4CFDB477, 0x388147FB, + 0x4E46041F, 0x9B1F5C3E, 0xFCCFEC71, 0xCDAD0657, 0x4C701C3A, 0xB38E8C33, + 0xB1C0FD4C, 0x917BDD64, 0x9B7624C8, 0x3BB45432, 0xCAF53EA6, 0x23BA4442, + 0x38532A3A, 0x4E677D2C, 0x45036C7A, 0x0BFD64B6, 0x5E0DD902, 0xC68A007E, + 0xF44182E1, 0x4DB5A851, 0x7F88A46B, 0x8EC9B55A, 0xCEC97DCF, 0x0A8291CD, + 0xF98D0ACC, 0x2A4ECEA9, 0x7140003C, 0x1A1DB93D, 0x33CB8B7A, 0x092999A3, + 0x71AD0038, 0x6DC778F9, 0x918130C4, 0xA907600A, 0x2D9E6832, 0xED6A1E01, + 0xEFB4318A, 0x7135C886, 0x7E31CC7A, 0x87F55BA5, 0x55034004, 0x7763CF1D, + 0xD69F6D18, 0xAC7D5F42, 0xE58857B6, 0x7930E9E4, 0x164DF4FB, 0x6E6F52C3, + 0x669E1EF1, 0x25E41D2B, 0x3FD59D7C, 0x3C1B20EE, 0xFA53DDEF, 0x0ABCD06B, + 0xD5C4484E, 0x1DBF9A42, 0x9B0DEADA, 0xABC52197, 0x22363A0D, 0xE86D2BC5, + 0x9C9DF69E, 0x5CAE82AB, 0x71F54BFF, 0x64F2E21E, 0xE2D74DD3, 0xF4FD4452, + 0xBC437944, 0xB4130C93, 0x85139270, 0xAEFE1309, 0xC186D91C, 0x598CB0FA, + 0x91F7F7EE, 0x7AD91D26, 0xD6E6C907, 0x61B46FC9, 0xF99C0238, 0xBC34F4DE, + 0x6519035B, 0xDE355B3B, 0x611FCFDC, 0x886B4238, 0xC1B2EFFA, 0xC6F34A26, + 0x7D1683B2, 0xC58EF183, 0x2EC22005, 0x3BB5FCBC, 0x4C6FAD73, 0xC3FE3B1B, + 0xEEF28183, 0x8E4F1232, 0xE98583FF, 0x9172FE9C, 0x28342F61, 0xC03404CD, + 0xCDF7E2EC, 0x9E02FCE1, 0xEE0A6D70, 0x0B07A7C8, 0x6372BB19, 0xAE56EDE7, + 0xDE394DF4, 0x1D4F42A3, 0x60D7F468, 0xB96ADAB7, 0xB2C8E3FB, 0xD108A94B, + 0xB324FB61, 0xBC0AB182, 0x483A797A, 0x30ACCA4F, 0x36ADE735, 0x1DF158A1, + 0xF3EFE872, 0xE2A689DA, 0xE0E68B77, 0x984F0C70, 0x7F57C935, 0xB557135E, + 0x3DED1AF3, 0x85636555, 0x5F066ED0, 0x2433F51F, 0xD5FD6561, 0xD3DF1ED5, + 0xAEC4617A, 0xF681B202, 0x630C75D8, 0x7D2FE363, 0x249B3EF9, 0xCC939DCE, + 0x146433FB, 0xA9E13641, 0xCE2D3695, 0xD8B9C583, 0x273D3CF1, 0xAFDC5620, + 0xA2BB4A9A, 0xADF85458, 0xFFFFFFFF, 0xFFFFFFFF +}; + +# else +# error "unsupported BN_BITS2" +# endif + +/* Macro to make a BIGNUM from static data */ + +# define make_dh_bn(x) extern const BIGNUM _bignum_##x; \ + const BIGNUM _bignum_##x = { (BN_ULONG *) x, \ + OSSL_NELEM(x),\ + OSSL_NELEM(x),\ + 0, BN_FLG_STATIC_DATA }; + +static const BN_ULONG value_2 = 2; + +const BIGNUM _bignum_const_2 = + { (BN_ULONG *)&value_2, 1, 1, 0, BN_FLG_STATIC_DATA }; + +make_dh_bn(dh1024_160_p) +make_dh_bn(dh1024_160_g) +make_dh_bn(dh1024_160_q) +make_dh_bn(dh2048_224_p) +make_dh_bn(dh2048_224_g) +make_dh_bn(dh2048_224_q) +make_dh_bn(dh2048_256_p) +make_dh_bn(dh2048_256_g) +make_dh_bn(dh2048_256_q) + +make_dh_bn(ffdhe2048_p) +make_dh_bn(ffdhe3072_p) +make_dh_bn(ffdhe4096_p) +make_dh_bn(ffdhe6144_p) +make_dh_bn(ffdhe8192_p) + + +#endif diff --git a/crypto/bn/bn_div.c b/crypto/bn/bn_div.c index bc37671cf138..70add10c7d6c 100644 --- a/crypto/bn/bn_div.c +++ b/crypto/bn/bn_div.c @@ -1,64 +1,14 @@ -/* crypto/bn/bn_div.c */ -/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) - * All rights reserved. +/* + * Copyright 1995-2018 The OpenSSL Project Authors. All Rights Reserved. * - * This package is an SSL implementation written - * by Eric Young (eay@cryptsoft.com). - * The implementation was written so as to conform with Netscapes SSL. - * - * This library is free for commercial and non-commercial use as long as - * the following conditions are aheared to. The following conditions - * apply to all code found in this distribution, be it the RC4, RSA, - * lhash, DES, etc., code; not just the SSL code. The SSL documentation - * included with this distribution is covered by the same copyright terms - * except that the holder is Tim Hudson (tjh@cryptsoft.com). - * - * Copyright remains Eric Young's, and as such any Copyright notices in - * the code are not to be removed. - * If this package is used in a product, Eric Young should be given attribution - * as the author of the parts of the library used. - * This can be in the form of a textual message at program startup or - * in documentation (online or textual) provided with the package. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * "This product includes cryptographic software written by - * Eric Young (eay@cryptsoft.com)" - * The word 'cryptographic' can be left out if the rouines from the library - * being used are not cryptographic related :-). - * 4. If you include any Windows specific code (or a derivative thereof) from - * the apps directory (application code) you must include an acknowledgement: - * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)" - * - * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * The licence and distribution terms for any publically available version or - * derivative of this code cannot be changed. i.e. this code cannot simply be - * copied and put under another distribution licence - * [including the GNU Public Licence.] + * Licensed under the OpenSSL license (the "License"). You may not use + * this file except in compliance with the License. You can obtain a copy + * in the file LICENSE in the source distribution or at + * https://www.openssl.org/source/license.html */ -#include <stdio.h> #include <openssl/bn.h> -#include "cryptlib.h" +#include "internal/cryptlib.h" #include "bn_lcl.h" /* The old slow way */ @@ -74,17 +24,17 @@ int BN_div(BIGNUM *dv, BIGNUM *rem, const BIGNUM *m, const BIGNUM *d, bn_check_top(d); if (BN_is_zero(d)) { BNerr(BN_F_BN_DIV, BN_R_DIV_BY_ZERO); - return (0); + return 0; } if (BN_ucmp(m, d) < 0) { if (rem != NULL) { if (BN_copy(rem, m) == NULL) - return (0); + return 0; } if (dv != NULL) BN_zero(dv); - return (1); + return 1; } BN_CTX_start(ctx); @@ -131,7 +81,7 @@ int BN_div(BIGNUM *dv, BIGNUM *rem, const BIGNUM *m, const BIGNUM *d, ret = 1; end: BN_CTX_end(ctx); - return (ret); + return ret; } #else @@ -147,8 +97,6 @@ int BN_div(BIGNUM *dv, BIGNUM *rem, const BIGNUM *m, const BIGNUM *d, * understand why...); * - divl doesn't only calculate quotient, but also leaves * remainder in %edx which we can definitely use here:-) - * - * <appro@fy.chalmers.se> */ # undef bn_div_words # define bn_div_words(n0,n1,d0) \ @@ -163,7 +111,6 @@ int BN_div(BIGNUM *dv, BIGNUM *rem, const BIGNUM *m, const BIGNUM *d, # elif defined(__x86_64) && defined(SIXTY_FOUR_BIT_LONG) /* * Same story here, but it's 128-bit by 64-bit division. Wow! - * <appro@fy.chalmers.se> */ # undef bn_div_words # define bn_div_words(n0,n1,d0) \ @@ -180,7 +127,7 @@ int BN_div(BIGNUM *dv, BIGNUM *rem, const BIGNUM *m, const BIGNUM *d, # endif /* OPENSSL_NO_ASM */ /*- - * BN_div computes dv := num / divisor, rounding towards + * BN_div computes dv := num / divisor, rounding towards * zero, and sets up rm such that dv*divisor + rm = num holds. * Thus: * dv->neg == num->neg ^ divisor->neg (unless the result is zero) @@ -227,28 +174,25 @@ int BN_div(BIGNUM *dv, BIGNUM *rm, const BIGNUM *num, const BIGNUM *divisor, if (BN_is_zero(divisor)) { BNerr(BN_F_BN_DIV, BN_R_DIV_BY_ZERO); - return (0); + return 0; } if (!no_branch && BN_ucmp(num, divisor) < 0) { if (rm != NULL) { if (BN_copy(rm, num) == NULL) - return (0); + return 0; } if (dv != NULL) BN_zero(dv); - return (1); + return 1; } BN_CTX_start(ctx); + res = (dv == NULL) ? BN_CTX_get(ctx) : dv; tmp = BN_CTX_get(ctx); snum = BN_CTX_get(ctx); sdiv = BN_CTX_get(ctx); - if (dv == NULL) - res = BN_CTX_get(ctx); - else - res = dv; - if (sdiv == NULL || res == NULL || tmp == NULL || snum == NULL) + if (sdiv == NULL) goto err; /* First we normalise the numbers */ @@ -290,6 +234,7 @@ int BN_div(BIGNUM *dv, BIGNUM *rm, const BIGNUM *num, const BIGNUM *divisor, wnum.neg = 0; wnum.d = &(snum->d[loop]); wnum.top = div_n; + wnum.flags = BN_FLG_STATIC_DATA; /* * only needed when BN_ucmp messes up the values between top and max */ @@ -304,9 +249,9 @@ int BN_div(BIGNUM *dv, BIGNUM *rm, const BIGNUM *num, const BIGNUM *divisor, wnump = &(snum->d[num_n - 1]); /* Setup to 'res' */ - res->neg = (num->neg ^ divisor->neg); if (!bn_wexpand(res, (loop + 1))) goto err; + res->neg = (num->neg ^ divisor->neg); res->top = loop - no_branch; resp = &(res->d[loop - 1]); @@ -328,6 +273,9 @@ int BN_div(BIGNUM *dv, BIGNUM *rm, const BIGNUM *num, const BIGNUM *divisor, res->top--; } + /* Increase the resp pointer so that we never create an invalid pointer. */ + resp++; + /* * if res->top == 0 then clear the neg value otherwise decrease the resp * pointer @@ -337,7 +285,7 @@ int BN_div(BIGNUM *dv, BIGNUM *rm, const BIGNUM *num, const BIGNUM *divisor, else resp--; - for (i = 0; i < loop - 1; i++, wnump--, resp--) { + for (i = 0; i < loop - 1; i++, wnump--) { BN_ULONG q, l0; /* * the first part of the loop uses the top two words of snum and sdiv @@ -362,10 +310,6 @@ int BN_div(BIGNUM *dv, BIGNUM *rm, const BIGNUM *num, const BIGNUM *divisor, q = (BN_ULONG)(((((BN_ULLONG) n0) << BN_BITS2) | n1) / d0); # else q = bn_div_words(n0, n1, d0); -# ifdef BN_DEBUG_LEVITTE - fprintf(stderr, "DEBUG: bn_div_words(0x%08X,0x%08X,0x%08\ -X) -> 0x%08X\n", n0, n1, d0, q); -# endif # endif # ifndef REMAINDER_IS_ALREADY_CALCULATED @@ -390,10 +334,6 @@ X) -> 0x%08X\n", n0, n1, d0, q); BN_ULONG t2l, t2h; q = bn_div_words(n0, n1, d0); -# ifdef BN_DEBUG_LEVITTE - fprintf(stderr, "DEBUG: bn_div_words(0x%08X,0x%08X,0x%08\ -X) -> 0x%08X\n", n0, n1, d0, q); -# endif # ifndef REMAINDER_IS_ALREADY_CALCULATED rem = (n1 - q * d0) & BN_MASK2; # endif @@ -451,6 +391,7 @@ X) -> 0x%08X\n", n0, n1, d0, q); (*wnump)++; } /* store part of the result */ + resp--; *resp = q; } bn_correct_top(snum); @@ -468,10 +409,10 @@ X) -> 0x%08X\n", n0, n1, d0, q); if (no_branch) bn_correct_top(res); BN_CTX_end(ctx); - return (1); + return 1; err: bn_check_top(rm); BN_CTX_end(ctx); - return (0); + return 0; } #endif diff --git a/crypto/bn/bn_err.c b/crypto/bn/bn_err.c index e7a703826ee5..dd87c152cf37 100644 --- a/crypto/bn/bn_err.c +++ b/crypto/bn/bn_err.c @@ -1,154 +1,118 @@ -/* crypto/bn/bn_err.c */ -/* ==================================================================== - * Copyright (c) 1999-2015 The OpenSSL Project. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * - * 3. All advertising materials mentioning features or use of this - * software must display the following acknowledgment: - * "This product includes software developed by the OpenSSL Project - * for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)" - * - * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to - * endorse or promote products derived from this software without - * prior written permission. For written permission, please contact - * openssl-core@OpenSSL.org. - * - * 5. Products derived from this software may not be called "OpenSSL" - * nor may "OpenSSL" appear in their names without prior written - * permission of the OpenSSL Project. - * - * 6. Redistributions of any form whatsoever must retain the following - * acknowledgment: - * "This product includes software developed by the OpenSSL Project - * for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)" - * - * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY - * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR - * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, - * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED - * OF THE POSSIBILITY OF SUCH DAMAGE. - * ==================================================================== - * - * This product includes cryptographic software written by Eric Young - * (eay@cryptsoft.com). This product includes software written by Tim - * Hudson (tjh@cryptsoft.com). - * - */ - /* - * NOTE: this file was auto generated by the mkerr.pl script: any changes - * made to it will be overwritten when the script next updates this file, - * only reason strings will be preserved. + * Generated by util/mkerr.pl DO NOT EDIT + * Copyright 1995-2018 The OpenSSL Project Authors. All Rights Reserved. + * + * Licensed under the OpenSSL license (the "License"). You may not use + * this file except in compliance with the License. You can obtain a copy + * in the file LICENSE in the source distribution or at + * https://www.openssl.org/source/license.html */ -#include <stdio.h> #include <openssl/err.h> -#include <openssl/bn.h> +#include <openssl/bnerr.h> -/* BEGIN ERROR CODES */ #ifndef OPENSSL_NO_ERR -# define ERR_FUNC(func) ERR_PACK(ERR_LIB_BN,func,0) -# define ERR_REASON(reason) ERR_PACK(ERR_LIB_BN,0,reason) - -static ERR_STRING_DATA BN_str_functs[] = { - {ERR_FUNC(BN_F_BNRAND), "BNRAND"}, - {ERR_FUNC(BN_F_BN_BLINDING_CONVERT_EX), "BN_BLINDING_convert_ex"}, - {ERR_FUNC(BN_F_BN_BLINDING_CREATE_PARAM), "BN_BLINDING_create_param"}, - {ERR_FUNC(BN_F_BN_BLINDING_INVERT_EX), "BN_BLINDING_invert_ex"}, - {ERR_FUNC(BN_F_BN_BLINDING_NEW), "BN_BLINDING_new"}, - {ERR_FUNC(BN_F_BN_BLINDING_UPDATE), "BN_BLINDING_update"}, - {ERR_FUNC(BN_F_BN_BN2DEC), "BN_bn2dec"}, - {ERR_FUNC(BN_F_BN_BN2HEX), "BN_bn2hex"}, - {ERR_FUNC(BN_F_BN_CTX_GET), "BN_CTX_get"}, - {ERR_FUNC(BN_F_BN_CTX_NEW), "BN_CTX_new"}, - {ERR_FUNC(BN_F_BN_CTX_START), "BN_CTX_start"}, - {ERR_FUNC(BN_F_BN_DIV), "BN_div"}, - {ERR_FUNC(BN_F_BN_DIV_NO_BRANCH), "BN_div_no_branch"}, - {ERR_FUNC(BN_F_BN_DIV_RECP), "BN_div_recp"}, - {ERR_FUNC(BN_F_BN_EXP), "BN_exp"}, - {ERR_FUNC(BN_F_BN_EXPAND2), "bn_expand2"}, - {ERR_FUNC(BN_F_BN_EXPAND_INTERNAL), "BN_EXPAND_INTERNAL"}, - {ERR_FUNC(BN_F_BN_GF2M_MOD), "BN_GF2m_mod"}, - {ERR_FUNC(BN_F_BN_GF2M_MOD_EXP), "BN_GF2m_mod_exp"}, - {ERR_FUNC(BN_F_BN_GF2M_MOD_MUL), "BN_GF2m_mod_mul"}, - {ERR_FUNC(BN_F_BN_GF2M_MOD_SOLVE_QUAD), "BN_GF2m_mod_solve_quad"}, - {ERR_FUNC(BN_F_BN_GF2M_MOD_SOLVE_QUAD_ARR), "BN_GF2m_mod_solve_quad_arr"}, - {ERR_FUNC(BN_F_BN_GF2M_MOD_SQR), "BN_GF2m_mod_sqr"}, - {ERR_FUNC(BN_F_BN_GF2M_MOD_SQRT), "BN_GF2m_mod_sqrt"}, - {ERR_FUNC(BN_F_BN_LSHIFT), "BN_lshift"}, - {ERR_FUNC(BN_F_BN_MOD_EXP2_MONT), "BN_mod_exp2_mont"}, - {ERR_FUNC(BN_F_BN_MOD_EXP_MONT), "BN_mod_exp_mont"}, - {ERR_FUNC(BN_F_BN_MOD_EXP_MONT_CONSTTIME), "BN_mod_exp_mont_consttime"}, - {ERR_FUNC(BN_F_BN_MOD_EXP_MONT_WORD), "BN_mod_exp_mont_word"}, - {ERR_FUNC(BN_F_BN_MOD_EXP_RECP), "BN_mod_exp_recp"}, - {ERR_FUNC(BN_F_BN_MOD_EXP_SIMPLE), "BN_mod_exp_simple"}, - {ERR_FUNC(BN_F_BN_MOD_INVERSE), "BN_mod_inverse"}, - {ERR_FUNC(BN_F_BN_MOD_INVERSE_NO_BRANCH), "BN_mod_inverse_no_branch"}, - {ERR_FUNC(BN_F_BN_MOD_LSHIFT_QUICK), "BN_mod_lshift_quick"}, - {ERR_FUNC(BN_F_BN_MOD_MUL_RECIPROCAL), "BN_mod_mul_reciprocal"}, - {ERR_FUNC(BN_F_BN_MOD_SQRT), "BN_mod_sqrt"}, - {ERR_FUNC(BN_F_BN_MPI2BN), "BN_mpi2bn"}, - {ERR_FUNC(BN_F_BN_NEW), "BN_new"}, - {ERR_FUNC(BN_F_BN_RAND), "BN_rand"}, - {ERR_FUNC(BN_F_BN_RAND_RANGE), "BN_rand_range"}, - {ERR_FUNC(BN_F_BN_RSHIFT), "BN_rshift"}, - {ERR_FUNC(BN_F_BN_USUB), "BN_usub"}, +static const ERR_STRING_DATA BN_str_functs[] = { + {ERR_PACK(ERR_LIB_BN, BN_F_BNRAND, 0), "bnrand"}, + {ERR_PACK(ERR_LIB_BN, BN_F_BNRAND_RANGE, 0), "bnrand_range"}, + {ERR_PACK(ERR_LIB_BN, BN_F_BN_BLINDING_CONVERT_EX, 0), + "BN_BLINDING_convert_ex"}, + {ERR_PACK(ERR_LIB_BN, BN_F_BN_BLINDING_CREATE_PARAM, 0), + "BN_BLINDING_create_param"}, + {ERR_PACK(ERR_LIB_BN, BN_F_BN_BLINDING_INVERT_EX, 0), + "BN_BLINDING_invert_ex"}, + {ERR_PACK(ERR_LIB_BN, BN_F_BN_BLINDING_NEW, 0), "BN_BLINDING_new"}, + {ERR_PACK(ERR_LIB_BN, BN_F_BN_BLINDING_UPDATE, 0), "BN_BLINDING_update"}, + {ERR_PACK(ERR_LIB_BN, BN_F_BN_BN2DEC, 0), "BN_bn2dec"}, + {ERR_PACK(ERR_LIB_BN, BN_F_BN_BN2HEX, 0), "BN_bn2hex"}, + {ERR_PACK(ERR_LIB_BN, BN_F_BN_COMPUTE_WNAF, 0), "bn_compute_wNAF"}, + {ERR_PACK(ERR_LIB_BN, BN_F_BN_CTX_GET, 0), "BN_CTX_get"}, + {ERR_PACK(ERR_LIB_BN, BN_F_BN_CTX_NEW, 0), "BN_CTX_new"}, + {ERR_PACK(ERR_LIB_BN, BN_F_BN_CTX_START, 0), "BN_CTX_start"}, + {ERR_PACK(ERR_LIB_BN, BN_F_BN_DIV, 0), "BN_div"}, + {ERR_PACK(ERR_LIB_BN, BN_F_BN_DIV_RECP, 0), "BN_div_recp"}, + {ERR_PACK(ERR_LIB_BN, BN_F_BN_EXP, 0), "BN_exp"}, + {ERR_PACK(ERR_LIB_BN, BN_F_BN_EXPAND_INTERNAL, 0), "bn_expand_internal"}, + {ERR_PACK(ERR_LIB_BN, BN_F_BN_GENCB_NEW, 0), "BN_GENCB_new"}, + {ERR_PACK(ERR_LIB_BN, BN_F_BN_GENERATE_DSA_NONCE, 0), + "BN_generate_dsa_nonce"}, + {ERR_PACK(ERR_LIB_BN, BN_F_BN_GENERATE_PRIME_EX, 0), + "BN_generate_prime_ex"}, + {ERR_PACK(ERR_LIB_BN, BN_F_BN_GF2M_MOD, 0), "BN_GF2m_mod"}, + {ERR_PACK(ERR_LIB_BN, BN_F_BN_GF2M_MOD_EXP, 0), "BN_GF2m_mod_exp"}, + {ERR_PACK(ERR_LIB_BN, BN_F_BN_GF2M_MOD_MUL, 0), "BN_GF2m_mod_mul"}, + {ERR_PACK(ERR_LIB_BN, BN_F_BN_GF2M_MOD_SOLVE_QUAD, 0), + "BN_GF2m_mod_solve_quad"}, + {ERR_PACK(ERR_LIB_BN, BN_F_BN_GF2M_MOD_SOLVE_QUAD_ARR, 0), + "BN_GF2m_mod_solve_quad_arr"}, + {ERR_PACK(ERR_LIB_BN, BN_F_BN_GF2M_MOD_SQR, 0), "BN_GF2m_mod_sqr"}, + {ERR_PACK(ERR_LIB_BN, BN_F_BN_GF2M_MOD_SQRT, 0), "BN_GF2m_mod_sqrt"}, + {ERR_PACK(ERR_LIB_BN, BN_F_BN_LSHIFT, 0), "BN_lshift"}, + {ERR_PACK(ERR_LIB_BN, BN_F_BN_MOD_EXP2_MONT, 0), "BN_mod_exp2_mont"}, + {ERR_PACK(ERR_LIB_BN, BN_F_BN_MOD_EXP_MONT, 0), "BN_mod_exp_mont"}, + {ERR_PACK(ERR_LIB_BN, BN_F_BN_MOD_EXP_MONT_CONSTTIME, 0), + "BN_mod_exp_mont_consttime"}, + {ERR_PACK(ERR_LIB_BN, BN_F_BN_MOD_EXP_MONT_WORD, 0), + "BN_mod_exp_mont_word"}, + {ERR_PACK(ERR_LIB_BN, BN_F_BN_MOD_EXP_RECP, 0), "BN_mod_exp_recp"}, + {ERR_PACK(ERR_LIB_BN, BN_F_BN_MOD_EXP_SIMPLE, 0), "BN_mod_exp_simple"}, + {ERR_PACK(ERR_LIB_BN, BN_F_BN_MOD_INVERSE, 0), "BN_mod_inverse"}, + {ERR_PACK(ERR_LIB_BN, BN_F_BN_MOD_INVERSE_NO_BRANCH, 0), + "BN_mod_inverse_no_branch"}, + {ERR_PACK(ERR_LIB_BN, BN_F_BN_MOD_LSHIFT_QUICK, 0), "BN_mod_lshift_quick"}, + {ERR_PACK(ERR_LIB_BN, BN_F_BN_MOD_SQRT, 0), "BN_mod_sqrt"}, + {ERR_PACK(ERR_LIB_BN, BN_F_BN_MONT_CTX_NEW, 0), "BN_MONT_CTX_new"}, + {ERR_PACK(ERR_LIB_BN, BN_F_BN_MPI2BN, 0), "BN_mpi2bn"}, + {ERR_PACK(ERR_LIB_BN, BN_F_BN_NEW, 0), "BN_new"}, + {ERR_PACK(ERR_LIB_BN, BN_F_BN_POOL_GET, 0), "BN_POOL_get"}, + {ERR_PACK(ERR_LIB_BN, BN_F_BN_RAND, 0), "BN_rand"}, + {ERR_PACK(ERR_LIB_BN, BN_F_BN_RAND_RANGE, 0), "BN_rand_range"}, + {ERR_PACK(ERR_LIB_BN, BN_F_BN_RECP_CTX_NEW, 0), "BN_RECP_CTX_new"}, + {ERR_PACK(ERR_LIB_BN, BN_F_BN_RSHIFT, 0), "BN_rshift"}, + {ERR_PACK(ERR_LIB_BN, BN_F_BN_SET_WORDS, 0), "bn_set_words"}, + {ERR_PACK(ERR_LIB_BN, BN_F_BN_STACK_PUSH, 0), "BN_STACK_push"}, + {ERR_PACK(ERR_LIB_BN, BN_F_BN_USUB, 0), "BN_usub"}, {0, NULL} }; -static ERR_STRING_DATA BN_str_reasons[] = { - {ERR_REASON(BN_R_ARG2_LT_ARG3), "arg2 lt arg3"}, - {ERR_REASON(BN_R_BAD_RECIPROCAL), "bad reciprocal"}, - {ERR_REASON(BN_R_BIGNUM_TOO_LONG), "bignum too long"}, - {ERR_REASON(BN_R_BITS_TOO_SMALL), "bits too small"}, - {ERR_REASON(BN_R_CALLED_WITH_EVEN_MODULUS), "called with even modulus"}, - {ERR_REASON(BN_R_DIV_BY_ZERO), "div by zero"}, - {ERR_REASON(BN_R_ENCODING_ERROR), "encoding error"}, - {ERR_REASON(BN_R_EXPAND_ON_STATIC_BIGNUM_DATA), - "expand on static bignum data"}, - {ERR_REASON(BN_R_INPUT_NOT_REDUCED), "input not reduced"}, - {ERR_REASON(BN_R_INVALID_LENGTH), "invalid length"}, - {ERR_REASON(BN_R_INVALID_RANGE), "invalid range"}, - {ERR_REASON(BN_R_INVALID_SHIFT), "invalid shift"}, - {ERR_REASON(BN_R_NOT_A_SQUARE), "not a square"}, - {ERR_REASON(BN_R_NOT_INITIALIZED), "not initialized"}, - {ERR_REASON(BN_R_NO_INVERSE), "no inverse"}, - {ERR_REASON(BN_R_NO_SOLUTION), "no solution"}, - {ERR_REASON(BN_R_P_IS_NOT_PRIME), "p is not prime"}, - {ERR_REASON(BN_R_TOO_MANY_ITERATIONS), "too many iterations"}, - {ERR_REASON(BN_R_TOO_MANY_TEMPORARY_VARIABLES), - "too many temporary variables"}, +static const ERR_STRING_DATA BN_str_reasons[] = { + {ERR_PACK(ERR_LIB_BN, 0, BN_R_ARG2_LT_ARG3), "arg2 lt arg3"}, + {ERR_PACK(ERR_LIB_BN, 0, BN_R_BAD_RECIPROCAL), "bad reciprocal"}, + {ERR_PACK(ERR_LIB_BN, 0, BN_R_BIGNUM_TOO_LONG), "bignum too long"}, + {ERR_PACK(ERR_LIB_BN, 0, BN_R_BITS_TOO_SMALL), "bits too small"}, + {ERR_PACK(ERR_LIB_BN, 0, BN_R_CALLED_WITH_EVEN_MODULUS), + "called with even modulus"}, + {ERR_PACK(ERR_LIB_BN, 0, BN_R_DIV_BY_ZERO), "div by zero"}, + {ERR_PACK(ERR_LIB_BN, 0, BN_R_ENCODING_ERROR), "encoding error"}, + {ERR_PACK(ERR_LIB_BN, 0, BN_R_EXPAND_ON_STATIC_BIGNUM_DATA), + "expand on static bignum data"}, + {ERR_PACK(ERR_LIB_BN, 0, BN_R_INPUT_NOT_REDUCED), "input not reduced"}, + {ERR_PACK(ERR_LIB_BN, 0, BN_R_INVALID_LENGTH), "invalid length"}, + {ERR_PACK(ERR_LIB_BN, 0, BN_R_INVALID_RANGE), "invalid range"}, + {ERR_PACK(ERR_LIB_BN, 0, BN_R_INVALID_SHIFT), "invalid shift"}, + {ERR_PACK(ERR_LIB_BN, 0, BN_R_NOT_A_SQUARE), "not a square"}, + {ERR_PACK(ERR_LIB_BN, 0, BN_R_NOT_INITIALIZED), "not initialized"}, + {ERR_PACK(ERR_LIB_BN, 0, BN_R_NO_INVERSE), "no inverse"}, + {ERR_PACK(ERR_LIB_BN, 0, BN_R_NO_SOLUTION), "no solution"}, + {ERR_PACK(ERR_LIB_BN, 0, BN_R_PRIVATE_KEY_TOO_LARGE), + "private key too large"}, + {ERR_PACK(ERR_LIB_BN, 0, BN_R_P_IS_NOT_PRIME), "p is not prime"}, + {ERR_PACK(ERR_LIB_BN, 0, BN_R_TOO_MANY_ITERATIONS), "too many iterations"}, + {ERR_PACK(ERR_LIB_BN, 0, BN_R_TOO_MANY_TEMPORARY_VARIABLES), + "too many temporary variables"}, {0, NULL} }; #endif -void ERR_load_BN_strings(void) +int ERR_load_BN_strings(void) { #ifndef OPENSSL_NO_ERR - if (ERR_func_error_string(BN_str_functs[0].error) == NULL) { - ERR_load_strings(0, BN_str_functs); - ERR_load_strings(0, BN_str_reasons); + ERR_load_strings_const(BN_str_functs); + ERR_load_strings_const(BN_str_reasons); } #endif + return 1; } diff --git a/crypto/bn/bn_exp.c b/crypto/bn/bn_exp.c index 40115fc72052..2c92d7eac9d5 100644 --- a/crypto/bn/bn_exp.c +++ b/crypto/bn/bn_exp.c @@ -1,116 +1,14 @@ -/* crypto/bn/bn_exp.c */ -/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) - * All rights reserved. - * - * This package is an SSL implementation written - * by Eric Young (eay@cryptsoft.com). - * The implementation was written so as to conform with Netscapes SSL. - * - * This library is free for commercial and non-commercial use as long as - * the following conditions are aheared to. The following conditions - * apply to all code found in this distribution, be it the RC4, RSA, - * lhash, DES, etc., code; not just the SSL code. The SSL documentation - * included with this distribution is covered by the same copyright terms - * except that the holder is Tim Hudson (tjh@cryptsoft.com). - * - * Copyright remains Eric Young's, and as such any Copyright notices in - * the code are not to be removed. - * If this package is used in a product, Eric Young should be given attribution - * as the author of the parts of the library used. - * This can be in the form of a textual message at program startup or - * in documentation (online or textual) provided with the package. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * "This product includes cryptographic software written by - * Eric Young (eay@cryptsoft.com)" - * The word 'cryptographic' can be left out if the rouines from the library - * being used are not cryptographic related :-). - * 4. If you include any Windows specific code (or a derivative thereof) from - * the apps directory (application code) you must include an acknowledgement: - * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)" - * - * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * The licence and distribution terms for any publically available version or - * derivative of this code cannot be changed. i.e. this code cannot simply be - * copied and put under another distribution licence - * [including the GNU Public Licence.] - */ -/* ==================================================================== - * Copyright (c) 1998-2018 The OpenSSL Project. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * - * 3. All advertising materials mentioning features or use of this - * software must display the following acknowledgment: - * "This product includes software developed by the OpenSSL Project - * for use in the OpenSSL Toolkit. (http://www.openssl.org/)" - * - * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to - * endorse or promote products derived from this software without - * prior written permission. For written permission, please contact - * openssl-core@openssl.org. - * - * 5. Products derived from this software may not be called "OpenSSL" - * nor may "OpenSSL" appear in their names without prior written - * permission of the OpenSSL Project. - * - * 6. Redistributions of any form whatsoever must retain the following - * acknowledgment: - * "This product includes software developed by the OpenSSL Project - * for use in the OpenSSL Toolkit (http://www.openssl.org/)" - * - * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY - * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR - * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, - * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED - * OF THE POSSIBILITY OF SUCH DAMAGE. - * ==================================================================== - * - * This product includes cryptographic software written by Eric Young - * (eay@cryptsoft.com). This product includes software written by Tim - * Hudson (tjh@cryptsoft.com). +/* + * Copyright 1995-2018 The OpenSSL Project Authors. All Rights Reserved. * + * Licensed under the OpenSSL license (the "License"). You may not use + * this file except in compliance with the License. You can obtain a copy + * in the file LICENSE in the source distribution or at + * https://www.openssl.org/source/license.html */ -#include "cryptlib.h" -#include "constant_time_locl.h" +#include "internal/cryptlib.h" +#include "internal/constant_time_locl.h" #include "bn_lcl.h" #include <stdlib.h> @@ -153,10 +51,7 @@ int BN_exp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx) } BN_CTX_start(ctx); - if ((r == a) || (r == p)) - rr = BN_CTX_get(ctx); - else - rr = r; + rr = ((r == a) || (r == p)) ? BN_CTX_get(ctx) : r; v = BN_CTX_get(ctx); if (rr == NULL || v == NULL) goto err; @@ -188,7 +83,7 @@ int BN_exp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx) err: BN_CTX_end(ctx); bn_check_top(r); - return (ret); + return ret; } int BN_mod_exp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m, @@ -201,7 +96,7 @@ int BN_mod_exp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m, bn_check_top(m); /*- - * For even modulus m = 2^k*m_odd, it might make sense to compute + * For even modulus m = 2^k*m_odd, it might make sense to compute * a^p mod m_odd and a^p mod 2^k separately (with Montgomery * exponentiation for the odd part), using appropriate exponent * reductions, and combine the results using the CRT. @@ -236,13 +131,6 @@ int BN_mod_exp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m, #define RECP_MUL_MOD #ifdef MONT_MUL_MOD - /* - * I have finally been able to take out this pre-condition of the top bit - * being set. It was caused by an error in BN_div with negatives. There - * was also another problem when for a^b%m a >= m. eay 07-May-97 - */ - /* if ((m->d[m->top-1]&BN_TBIT) && BN_is_odd(m)) */ - if (BN_is_odd(m)) { # ifdef MONT_EXP_WORD if (a->top == 1 && !a->neg @@ -267,7 +155,7 @@ int BN_mod_exp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m, #endif bn_check_top(r); - return (ret); + return ret; } int BN_mod_exp_recp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, @@ -290,8 +178,8 @@ int BN_mod_exp_recp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, bits = BN_num_bits(p); if (bits == 0) { - /* x**0 mod 1 is still zero. */ - if (BN_is_one(m)) { + /* x**0 mod 1, or x**0 mod -1 is still zero. */ + if (BN_abs_is_word(m, 1)) { ret = 1; BN_zero(r); } else { @@ -303,7 +191,7 @@ int BN_mod_exp_recp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, BN_CTX_start(ctx); aa = BN_CTX_get(ctx); val[0] = BN_CTX_get(ctx); - if (!aa || !val[0]) + if (val[0] == NULL) goto err; BN_RECP_CTX_init(&recp); @@ -402,7 +290,7 @@ int BN_mod_exp_recp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, BN_CTX_end(ctx); BN_RECP_CTX_free(&recp); bn_check_top(r); - return (ret); + return ret; } int BN_mod_exp_mont(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, @@ -428,12 +316,12 @@ int BN_mod_exp_mont(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, if (!BN_is_odd(m)) { BNerr(BN_F_BN_MOD_EXP_MONT, BN_R_CALLED_WITH_EVEN_MODULUS); - return (0); + return 0; } bits = BN_num_bits(p); if (bits == 0) { - /* x**0 mod 1 is still zero. */ - if (BN_is_one(m)) { + /* x**0 mod 1, or x**0 mod -1 is still zero. */ + if (BN_abs_is_word(m, 1)) { ret = 1; BN_zero(rr); } else { @@ -446,7 +334,7 @@ int BN_mod_exp_mont(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, d = BN_CTX_get(ctx); r = BN_CTX_get(ctx); val[0] = BN_CTX_get(ctx); - if (!d || !r || !val[0]) + if (val[0] == NULL) goto err; /* @@ -468,22 +356,17 @@ int BN_mod_exp_mont(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, aa = val[0]; } else aa = a; - if (BN_is_zero(aa)) { - BN_zero(rr); - ret = 1; - goto err; - } - if (!BN_to_montgomery(val[0], aa, mont, ctx)) + if (!bn_to_mont_fixed_top(val[0], aa, mont, ctx)) goto err; /* 1 */ window = BN_window_bits_for_exponent_size(bits); if (window > 1) { - if (!BN_mod_mul_montgomery(d, val[0], val[0], mont, ctx)) + if (!bn_mul_mont_fixed_top(d, val[0], val[0], mont, ctx)) goto err; /* 2 */ j = 1 << (window - 1); for (i = 1; i < j; i++) { if (((val[i] = BN_CTX_get(ctx)) == NULL) || - !BN_mod_mul_montgomery(val[i], val[i - 1], d, mont, ctx)) + !bn_mul_mont_fixed_top(val[i], val[i - 1], d, mont, ctx)) goto err; } } @@ -505,19 +388,15 @@ int BN_mod_exp_mont(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, for (i = 1; i < j; i++) r->d[i] = (~m->d[i]) & BN_MASK2; r->top = j; - /* - * Upper words will be zero if the corresponding words of 'm' were - * 0xfff[...], so decrement r->top accordingly. - */ - bn_correct_top(r); + r->flags |= BN_FLG_FIXED_TOP; } else #endif - if (!BN_to_montgomery(r, BN_value_one(), mont, ctx)) + if (!bn_to_mont_fixed_top(r, BN_value_one(), mont, ctx)) goto err; for (;;) { if (BN_is_bit_set(p, wstart) == 0) { if (!start) { - if (!BN_mod_mul_montgomery(r, r, r, mont, ctx)) + if (!bn_mul_mont_fixed_top(r, r, r, mont, ctx)) goto err; } if (wstart == 0) @@ -548,12 +427,12 @@ int BN_mod_exp_mont(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, /* add the 'bytes above' */ if (!start) for (i = 0; i < j; i++) { - if (!BN_mod_mul_montgomery(r, r, r, mont, ctx)) + if (!bn_mul_mont_fixed_top(r, r, r, mont, ctx)) goto err; } /* wvalue will be an odd number < 2^window */ - if (!BN_mod_mul_montgomery(r, r, val[wvalue >> 1], mont, ctx)) + if (!bn_mul_mont_fixed_top(r, r, val[wvalue >> 1], mont, ctx)) goto err; /* move the 'window' down further */ @@ -563,6 +442,11 @@ int BN_mod_exp_mont(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, if (wstart < 0) break; } + /* + * Done with zero-padded intermediate BIGNUMs. Final BN_from_montgomery + * removes padding [if any] and makes return value suitable for public + * API consumer. + */ #if defined(SPARC_T4_MONT) if (OPENSSL_sparcv9cap_P[0] & (SPARCV9_VIS3 | SPARCV9_PREFER_FPU)) { j = mont->N.top; /* borrow j */ @@ -578,14 +462,13 @@ int BN_mod_exp_mont(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, goto err; ret = 1; err: - if ((in_mont == NULL) && (mont != NULL)) + if (in_mont == NULL) BN_MONT_CTX_free(mont); BN_CTX_end(ctx); bn_check_top(rr); - return (ret); + return ret; } -#if defined(SPARC_T4_MONT) static BN_ULONG bn_get_bits(const BIGNUM *a, int bitpos) { BN_ULONG ret = 0; @@ -604,7 +487,6 @@ static BN_ULONG bn_get_bits(const BIGNUM *a, int bitpos) return ret & BN_MASK2; } -#endif /* * BN_mod_exp_mont_consttime() stores the precomputed powers in a specific @@ -637,6 +519,14 @@ static int MOD_EXP_CTIME_COPY_FROM_PREBUF(BIGNUM *b, int top, { int i, j; int width = 1 << window; + /* + * We declare table 'volatile' in order to discourage compiler + * from reordering loads from the table. Concern is that if + * reordered in specific manner loads might give away the + * information we are trying to conceal. Some would argue that + * compiler can reorder them anyway, but it can as well be + * argued that doing so would be violation of standard... + */ volatile BN_ULONG *table = (volatile BN_ULONG *)buf; if (bn_wexpand(b, top) == NULL) @@ -681,7 +571,7 @@ static int MOD_EXP_CTIME_COPY_FROM_PREBUF(BIGNUM *b, int top, } b->top = top; - bn_correct_top(b); + b->flags |= BN_FLG_FIXED_TOP; return 1; } @@ -703,7 +593,7 @@ int BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m, BN_CTX *ctx, BN_MONT_CTX *in_mont) { - int i, bits, ret = 0, window, wvalue; + int i, bits, ret = 0, window, wvalue, wmask, window0; int top; BN_MONT_CTX *mont = NULL; @@ -722,7 +612,7 @@ int BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, if (!BN_is_odd(m)) { BNerr(BN_F_BN_MOD_EXP_MONT_CONSTTIME, BN_R_CALLED_WITH_EVEN_MODULUS); - return (0); + return 0; } top = m->top; @@ -733,8 +623,8 @@ int BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, */ bits = p->top * BN_BITS2; if (bits == 0) { - /* x**0 mod 1 is still zero. */ - if (BN_is_one(m)) { + /* x**0 mod 1, or x**0 mod -1 is still zero. */ + if (BN_abs_is_word(m, 1)) { ret = 1; BN_zero(rr); } else { @@ -759,31 +649,33 @@ int BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, } #ifdef RSAZ_ENABLED - /* - * If the size of the operands allow it, perform the optimized - * RSAZ exponentiation. For further information see - * crypto/bn/rsaz_exp.c and accompanying assembly modules. - */ - if ((16 == a->top) && (16 == p->top) && (BN_num_bits(m) == 1024) - && rsaz_avx2_eligible()) { - if (NULL == bn_wexpand(rr, 16)) + if (!a->neg) { + /* + * If the size of the operands allow it, perform the optimized + * RSAZ exponentiation. For further information see + * crypto/bn/rsaz_exp.c and accompanying assembly modules. + */ + if ((16 == a->top) && (16 == p->top) && (BN_num_bits(m) == 1024) + && rsaz_avx2_eligible()) { + if (NULL == bn_wexpand(rr, 16)) + goto err; + RSAZ_1024_mod_exp_avx2(rr->d, a->d, p->d, m->d, mont->RR.d, + mont->n0[0]); + rr->top = 16; + rr->neg = 0; + bn_correct_top(rr); + ret = 1; goto err; - RSAZ_1024_mod_exp_avx2(rr->d, a->d, p->d, m->d, mont->RR.d, - mont->n0[0]); - rr->top = 16; - rr->neg = 0; - bn_correct_top(rr); - ret = 1; - goto err; - } else if ((8 == a->top) && (8 == p->top) && (BN_num_bits(m) == 512)) { - if (NULL == bn_wexpand(rr, 8)) + } else if ((8 == a->top) && (8 == p->top) && (BN_num_bits(m) == 512)) { + if (NULL == bn_wexpand(rr, 8)) + goto err; + RSAZ_512_mod_exp(rr->d, a->d, p->d, m->d, mont->n0[0], mont->RR.d); + rr->top = 8; + rr->neg = 0; + bn_correct_top(rr); + ret = 1; goto err; - RSAZ_512_mod_exp(rr->d, a->d, p->d, m->d, mont->n0[0], mont->RR.d); - rr->top = 8; - rr->neg = 0; - bn_correct_top(rr); - ret = 1; - goto err; + } } #endif @@ -821,8 +713,7 @@ int BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, else #endif if ((powerbufFree = - (unsigned char *)OPENSSL_malloc(powerbufLen + - MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH)) + OPENSSL_malloc(powerbufLen + MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH)) == NULL) goto err; @@ -852,16 +743,16 @@ int BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, tmp.top = top; } else #endif - if (!BN_to_montgomery(&tmp, BN_value_one(), mont, ctx)) + if (!bn_to_mont_fixed_top(&tmp, BN_value_one(), mont, ctx)) goto err; /* prepare a^1 in Montgomery domain */ if (a->neg || BN_ucmp(a, m) >= 0) { - if (!BN_mod(&am, a, m, ctx)) + if (!BN_nnmod(&am, a, m, ctx)) goto err; - if (!BN_to_montgomery(&am, &am, mont, ctx)) + if (!bn_to_mont_fixed_top(&am, &am, mont, ctx)) goto err; - } else if (!BN_to_montgomery(&am, a, mont, ctx)) + } else if (!bn_to_mont_fixed_top(&am, a, mont, ctx)) goto err; #if defined(SPARC_T4_MONT) @@ -955,20 +846,27 @@ int BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, top /= 2; bn_flip_t4(np, mont->N.d, top); - bits--; - for (wvalue = 0, i = bits % 5; i >= 0; i--, bits--) - wvalue = (wvalue << 1) + BN_is_bit_set(p, bits); + /* + * The exponent may not have a whole number of fixed-size windows. + * To simplify the main loop, the initial window has between 1 and + * full-window-size bits such that what remains is always a whole + * number of windows + */ + window0 = (bits - 1) % 5 + 1; + wmask = (1 << window0) - 1; + bits -= window0; + wvalue = bn_get_bits(p, bits) & wmask; bn_gather5_t4(tmp.d, top, powerbuf, wvalue); /* * Scan the exponent one window at a time starting from the most * significant bits. */ - while (bits >= 0) { + while (bits > 0) { if (bits < stride) - stride = bits + 1; + stride = bits; bits -= stride; - wvalue = bn_get_bits(p, bits + 1); + wvalue = bn_get_bits(p, bits); if ((*pwr5_worker) (tmp.d, np, n0, powerbuf, wvalue, stride)) continue; @@ -1076,32 +974,36 @@ int BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, bn_scatter5(tmp.d, top, powerbuf, i); } # endif - bits--; - for (wvalue = 0, i = bits % 5; i >= 0; i--, bits--) - wvalue = (wvalue << 1) + BN_is_bit_set(p, bits); + /* + * The exponent may not have a whole number of fixed-size windows. + * To simplify the main loop, the initial window has between 1 and + * full-window-size bits such that what remains is always a whole + * number of windows + */ + window0 = (bits - 1) % 5 + 1; + wmask = (1 << window0) - 1; + bits -= window0; + wvalue = bn_get_bits(p, bits) & wmask; bn_gather5(tmp.d, top, powerbuf, wvalue); /* * Scan the exponent one window at a time starting from the most * significant bits. */ - if (top & 7) - while (bits >= 0) { - for (wvalue = 0, i = 0; i < 5; i++, bits--) - wvalue = (wvalue << 1) + BN_is_bit_set(p, bits); - + if (top & 7) { + while (bits > 0) { bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top); bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top); bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top); bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top); bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top); bn_mul_mont_gather5(tmp.d, tmp.d, powerbuf, np, n0, top, - wvalue); + bn_get_bits5(p->d, bits -= 5)); + } } else { - while (bits >= 0) { - wvalue = bn_get_bits5(p->d, bits - 4); - bits -= 5; - bn_power5(tmp.d, tmp.d, powerbuf, np, n0, top, wvalue); + while (bits > 0) { + bn_power5(tmp.d, tmp.d, powerbuf, np, n0, top, + bn_get_bits5(p->d, bits -= 5)); } } @@ -1128,14 +1030,14 @@ int BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, * performance advantage of sqr over mul). */ if (window > 1) { - if (!BN_mod_mul_montgomery(&tmp, &am, &am, mont, ctx)) + if (!bn_mul_mont_fixed_top(&tmp, &am, &am, mont, ctx)) goto err; if (!MOD_EXP_CTIME_COPY_TO_PREBUF(&tmp, top, powerbuf, 2, window)) goto err; for (i = 3; i < numPowers; i++) { /* Calculate a^i = a^(i-1) * a */ - if (!BN_mod_mul_montgomery(&tmp, &am, &tmp, mont, ctx)) + if (!bn_mul_mont_fixed_top(&tmp, &am, &tmp, mont, ctx)) goto err; if (!MOD_EXP_CTIME_COPY_TO_PREBUF(&tmp, top, powerbuf, i, window)) @@ -1143,28 +1045,45 @@ int BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, } } - bits--; - for (wvalue = 0, i = bits % window; i >= 0; i--, bits--) - wvalue = (wvalue << 1) + BN_is_bit_set(p, bits); + /* + * The exponent may not have a whole number of fixed-size windows. + * To simplify the main loop, the initial window has between 1 and + * full-window-size bits such that what remains is always a whole + * number of windows + */ + window0 = (bits - 1) % window + 1; + wmask = (1 << window0) - 1; + bits -= window0; + wvalue = bn_get_bits(p, bits) & wmask; if (!MOD_EXP_CTIME_COPY_FROM_PREBUF(&tmp, top, powerbuf, wvalue, window)) goto err; + wmask = (1 << window) - 1; /* * Scan the exponent one window at a time starting from the most * significant bits. */ - while (bits >= 0) { - wvalue = 0; /* The 'value' of the window */ + while (bits > 0) { - /* Scan the window, squaring the result as we go */ - for (i = 0; i < window; i++, bits--) { - if (!BN_mod_mul_montgomery(&tmp, &tmp, &tmp, mont, ctx)) + /* Square the result window-size times */ + for (i = 0; i < window; i++) + if (!bn_mul_mont_fixed_top(&tmp, &tmp, &tmp, mont, ctx)) goto err; - wvalue = (wvalue << 1) + BN_is_bit_set(p, bits); - } /* + * Get a window's worth of bits from the exponent + * This avoids calling BN_is_bit_set for each bit, which + * is not only slower but also makes each bit vulnerable to + * EM (and likely other) side-channel attacks like One&Done + * (for details see "One&Done: A Single-Decryption EM-Based + * Attack on OpenSSL’s Constant-Time Blinded RSA" by M. Alam, + * H. Khan, M. Dey, N. Sinha, R. Callan, A. Zajic, and + * M. Prvulovic, in USENIX Security'18) + */ + bits -= window; + wvalue = bn_get_bits(p, bits) & wmask; + /* * Fetch the appropriate pre-computed value from the pre-buf */ if (!MOD_EXP_CTIME_COPY_FROM_PREBUF(&am, top, powerbuf, wvalue, @@ -1172,12 +1091,16 @@ int BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, goto err; /* Multiply the result into the intermediate result */ - if (!BN_mod_mul_montgomery(&tmp, &tmp, &am, mont, ctx)) + if (!bn_mul_mont_fixed_top(&tmp, &tmp, &am, mont, ctx)) goto err; } } - /* Convert the final result from montgomery to standard format */ + /* + * Done with zero-padded intermediate BIGNUMs. Final BN_from_montgomery + * removes padding [if any] and makes return value suitable for public + * API consumer. + */ #if defined(SPARC_T4_MONT) if (OPENSSL_sparcv9cap_P[0] & (SPARCV9_VIS3 | SPARCV9_PREFER_FPU)) { am.d[0] = 1; /* borrow am */ @@ -1191,15 +1114,14 @@ int BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, goto err; ret = 1; err: - if ((in_mont == NULL) && (mont != NULL)) + if (in_mont == NULL) BN_MONT_CTX_free(mont); if (powerbuf != NULL) { OPENSSL_cleanse(powerbuf, powerbufLen); - if (powerbufFree) - OPENSSL_free(powerbufFree); + OPENSSL_free(powerbufFree); } BN_CTX_end(ctx); - return (ret); + return ret; } int BN_mod_exp_mont_word(BIGNUM *rr, BN_ULONG a, const BIGNUM *p, @@ -1209,7 +1131,7 @@ int BN_mod_exp_mont_word(BIGNUM *rr, BN_ULONG a, const BIGNUM *p, int b, bits, ret = 0; int r_is_one; BN_ULONG w, next_w; - BIGNUM *d, *r, *t; + BIGNUM *r, *t; BIGNUM *swap_tmp; #define BN_MOD_MUL_WORD(r, w, m) \ (BN_mul_word(r, (w)) && \ @@ -1240,15 +1162,15 @@ int BN_mod_exp_mont_word(BIGNUM *rr, BN_ULONG a, const BIGNUM *p, if (!BN_is_odd(m)) { BNerr(BN_F_BN_MOD_EXP_MONT_WORD, BN_R_CALLED_WITH_EVEN_MODULUS); - return (0); + return 0; } if (m->top == 1) a %= m->d[0]; /* make sure that 'a' is reduced */ bits = BN_num_bits(p); if (bits == 0) { - /* x**0 mod 1 is still zero. */ - if (BN_is_one(m)) { + /* x**0 mod 1, or x**0 mod -1 is still zero. */ + if (BN_abs_is_word(m, 1)) { ret = 1; BN_zero(rr); } else { @@ -1263,10 +1185,9 @@ int BN_mod_exp_mont_word(BIGNUM *rr, BN_ULONG a, const BIGNUM *p, } BN_CTX_start(ctx); - d = BN_CTX_get(ctx); r = BN_CTX_get(ctx); t = BN_CTX_get(ctx); - if (d == NULL || r == NULL || t == NULL) + if (t == NULL) goto err; if (in_mont != NULL) @@ -1343,11 +1264,11 @@ int BN_mod_exp_mont_word(BIGNUM *rr, BN_ULONG a, const BIGNUM *p, } ret = 1; err: - if ((in_mont == NULL) && (mont != NULL)) + if (in_mont == NULL) BN_MONT_CTX_free(mont); BN_CTX_end(ctx); bn_check_top(rr); - return (ret); + return ret; } /* The old fallback, simple version :-) */ @@ -1369,9 +1290,9 @@ int BN_mod_exp_simple(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, } bits = BN_num_bits(p); - if (bits == 0) { - /* x**0 mod 1 is still zero. */ - if (BN_is_one(m)) { + if (bits == 0) { + /* x**0 mod 1, or x**0 mod -1 is still zero. */ + if (BN_abs_is_word(m, 1)) { ret = 1; BN_zero(r); } else { @@ -1383,7 +1304,7 @@ int BN_mod_exp_simple(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, BN_CTX_start(ctx); d = BN_CTX_get(ctx); val[0] = BN_CTX_get(ctx); - if (!d || !val[0]) + if (val[0] == NULL) goto err; if (!BN_nnmod(val[0], a, m, ctx)) @@ -1468,5 +1389,5 @@ int BN_mod_exp_simple(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, err: BN_CTX_end(ctx); bn_check_top(r); - return (ret); + return ret; } diff --git a/crypto/bn/bn_exp2.c b/crypto/bn/bn_exp2.c index 43fd2044c024..082c9286a0f4 100644 --- a/crypto/bn/bn_exp2.c +++ b/crypto/bn/bn_exp2.c @@ -1,116 +1,14 @@ -/* crypto/bn/bn_exp2.c */ -/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) - * All rights reserved. - * - * This package is an SSL implementation written - * by Eric Young (eay@cryptsoft.com). - * The implementation was written so as to conform with Netscapes SSL. - * - * This library is free for commercial and non-commercial use as long as - * the following conditions are aheared to. The following conditions - * apply to all code found in this distribution, be it the RC4, RSA, - * lhash, DES, etc., code; not just the SSL code. The SSL documentation - * included with this distribution is covered by the same copyright terms - * except that the holder is Tim Hudson (tjh@cryptsoft.com). - * - * Copyright remains Eric Young's, and as such any Copyright notices in - * the code are not to be removed. - * If this package is used in a product, Eric Young should be given attribution - * as the author of the parts of the library used. - * This can be in the form of a textual message at program startup or - * in documentation (online or textual) provided with the package. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * "This product includes cryptographic software written by - * Eric Young (eay@cryptsoft.com)" - * The word 'cryptographic' can be left out if the rouines from the library - * being used are not cryptographic related :-). - * 4. If you include any Windows specific code (or a derivative thereof) from - * the apps directory (application code) you must include an acknowledgement: - * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)" - * - * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * The licence and distribution terms for any publically available version or - * derivative of this code cannot be changed. i.e. this code cannot simply be - * copied and put under another distribution licence - * [including the GNU Public Licence.] - */ -/* ==================================================================== - * Copyright (c) 1998-2000 The OpenSSL Project. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * - * 3. All advertising materials mentioning features or use of this - * software must display the following acknowledgment: - * "This product includes software developed by the OpenSSL Project - * for use in the OpenSSL Toolkit. (http://www.openssl.org/)" - * - * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to - * endorse or promote products derived from this software without - * prior written permission. For written permission, please contact - * openssl-core@openssl.org. - * - * 5. Products derived from this software may not be called "OpenSSL" - * nor may "OpenSSL" appear in their names without prior written - * permission of the OpenSSL Project. - * - * 6. Redistributions of any form whatsoever must retain the following - * acknowledgment: - * "This product includes software developed by the OpenSSL Project - * for use in the OpenSSL Toolkit (http://www.openssl.org/)" - * - * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY - * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR - * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, - * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED - * OF THE POSSIBILITY OF SUCH DAMAGE. - * ==================================================================== - * - * This product includes cryptographic software written by Eric Young - * (eay@cryptsoft.com). This product includes software written by Tim - * Hudson (tjh@cryptsoft.com). +/* + * Copyright 1995-2017 The OpenSSL Project Authors. All Rights Reserved. * + * Licensed under the OpenSSL license (the "License"). You may not use + * this file except in compliance with the License. You can obtain a copy + * in the file LICENSE in the source distribution or at + * https://www.openssl.org/source/license.html */ #include <stdio.h> -#include "cryptlib.h" +#include "internal/cryptlib.h" #include "bn_lcl.h" #define TABLE_SIZE 32 @@ -136,7 +34,7 @@ int BN_mod_exp2_mont(BIGNUM *rr, const BIGNUM *a1, const BIGNUM *p1, if (!(m->d[0] & 1)) { BNerr(BN_F_BN_MOD_EXP2_MONT, BN_R_CALLED_WITH_EVEN_MODULUS); - return (0); + return 0; } bits1 = BN_num_bits(p1); bits2 = BN_num_bits(p2); @@ -152,7 +50,7 @@ int BN_mod_exp2_mont(BIGNUM *rr, const BIGNUM *a1, const BIGNUM *p1, r = BN_CTX_get(ctx); val1[0] = BN_CTX_get(ctx); val2[0] = BN_CTX_get(ctx); - if (!d || !r || !val1[0] || !val2[0]) + if (val2[0] == NULL) goto err; if (in_mont != NULL) @@ -295,9 +193,9 @@ int BN_mod_exp2_mont(BIGNUM *rr, const BIGNUM *a1, const BIGNUM *p1, goto err; ret = 1; err: - if ((in_mont == NULL) && (mont != NULL)) + if (in_mont == NULL) BN_MONT_CTX_free(mont); BN_CTX_end(ctx); bn_check_top(rr); - return (ret); + return ret; } diff --git a/crypto/bn/bn_gcd.c b/crypto/bn/bn_gcd.c index ce59fe701f9d..0091ea4e08a6 100644 --- a/crypto/bn/bn_gcd.c +++ b/crypto/bn/bn_gcd.c @@ -1,115 +1,13 @@ -/* crypto/bn/bn_gcd.c */ -/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) - * All rights reserved. - * - * This package is an SSL implementation written - * by Eric Young (eay@cryptsoft.com). - * The implementation was written so as to conform with Netscapes SSL. - * - * This library is free for commercial and non-commercial use as long as - * the following conditions are aheared to. The following conditions - * apply to all code found in this distribution, be it the RC4, RSA, - * lhash, DES, etc., code; not just the SSL code. The SSL documentation - * included with this distribution is covered by the same copyright terms - * except that the holder is Tim Hudson (tjh@cryptsoft.com). - * - * Copyright remains Eric Young's, and as such any Copyright notices in - * the code are not to be removed. - * If this package is used in a product, Eric Young should be given attribution - * as the author of the parts of the library used. - * This can be in the form of a textual message at program startup or - * in documentation (online or textual) provided with the package. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * "This product includes cryptographic software written by - * Eric Young (eay@cryptsoft.com)" - * The word 'cryptographic' can be left out if the rouines from the library - * being used are not cryptographic related :-). - * 4. If you include any Windows specific code (or a derivative thereof) from - * the apps directory (application code) you must include an acknowledgement: - * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)" - * - * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * The licence and distribution terms for any publically available version or - * derivative of this code cannot be changed. i.e. this code cannot simply be - * copied and put under another distribution licence - * [including the GNU Public Licence.] - */ -/* ==================================================================== - * Copyright (c) 1998-2001 The OpenSSL Project. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * - * 3. All advertising materials mentioning features or use of this - * software must display the following acknowledgment: - * "This product includes software developed by the OpenSSL Project - * for use in the OpenSSL Toolkit. (http://www.openssl.org/)" - * - * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to - * endorse or promote products derived from this software without - * prior written permission. For written permission, please contact - * openssl-core@openssl.org. - * - * 5. Products derived from this software may not be called "OpenSSL" - * nor may "OpenSSL" appear in their names without prior written - * permission of the OpenSSL Project. - * - * 6. Redistributions of any form whatsoever must retain the following - * acknowledgment: - * "This product includes software developed by the OpenSSL Project - * for use in the OpenSSL Toolkit (http://www.openssl.org/)" - * - * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY - * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR - * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, - * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED - * OF THE POSSIBILITY OF SUCH DAMAGE. - * ==================================================================== - * - * This product includes cryptographic software written by Eric Young - * (eay@cryptsoft.com). This product includes software written by Tim - * Hudson (tjh@cryptsoft.com). +/* + * Copyright 1995-2018 The OpenSSL Project Authors. All Rights Reserved. * + * Licensed under the OpenSSL license (the "License"). You may not use + * this file except in compliance with the License. You can obtain a copy + * in the file LICENSE in the source distribution or at + * https://www.openssl.org/source/license.html */ -#include "cryptlib.h" +#include "internal/cryptlib.h" #include "bn_lcl.h" static BIGNUM *euclid(BIGNUM *a, BIGNUM *b); @@ -125,7 +23,7 @@ int BN_gcd(BIGNUM *r, const BIGNUM *in_a, const BIGNUM *in_b, BN_CTX *ctx) BN_CTX_start(ctx); a = BN_CTX_get(ctx); b = BN_CTX_get(ctx); - if (a == NULL || b == NULL) + if (b == NULL) goto err; if (BN_copy(a, in_a) == NULL) @@ -150,7 +48,7 @@ int BN_gcd(BIGNUM *r, const BIGNUM *in_a, const BIGNUM *in_b, BN_CTX *ctx) err: BN_CTX_end(ctx); bn_check_top(r); - return (ret); + return ret; } static BIGNUM *euclid(BIGNUM *a, BIGNUM *b) @@ -213,9 +111,9 @@ static BIGNUM *euclid(BIGNUM *a, BIGNUM *b) goto err; } bn_check_top(a); - return (a); + return a; err: - return (NULL); + return NULL; } /* solves ax == 1 (mod n) */ @@ -226,10 +124,32 @@ static BIGNUM *BN_mod_inverse_no_branch(BIGNUM *in, BIGNUM *BN_mod_inverse(BIGNUM *in, const BIGNUM *a, const BIGNUM *n, BN_CTX *ctx) { + BIGNUM *rv; + int noinv; + rv = int_bn_mod_inverse(in, a, n, ctx, &noinv); + if (noinv) + BNerr(BN_F_BN_MOD_INVERSE, BN_R_NO_INVERSE); + return rv; +} + +BIGNUM *int_bn_mod_inverse(BIGNUM *in, + const BIGNUM *a, const BIGNUM *n, BN_CTX *ctx, + int *pnoinv) +{ BIGNUM *A, *B, *X, *Y, *M, *D, *T, *R = NULL; BIGNUM *ret = NULL; int sign; + /* This is invalid input so we don't worry about constant time here */ + if (BN_abs_is_word(n, 1) || BN_is_zero(n)) { + if (pnoinv != NULL) + *pnoinv = 1; + return NULL; + } + + if (pnoinv != NULL) + *pnoinv = 0; + if ((BN_get_flags(a, BN_FLG_CONSTTIME) != 0) || (BN_get_flags(n, BN_FLG_CONSTTIME) != 0)) { return BN_mod_inverse_no_branch(in, a, n, ctx); @@ -276,11 +196,11 @@ BIGNUM *BN_mod_inverse(BIGNUM *in, * sign*Y*a == A (mod |n|). */ - if (BN_is_odd(n) && (BN_num_bits(n) <= (BN_BITS <= 32 ? 450 : 2048))) { + if (BN_is_odd(n) && (BN_num_bits(n) <= 2048)) { /* * Binary inversion algorithm; requires odd modulus. This is faster * than the general algorithm if the modulus is sufficiently small - * (about 400 .. 500 bits on 32-bit sytems, but much more on 64-bit + * (about 400 .. 500 bits on 32-bit systems, but much more on 64-bit * systems) */ int shift; @@ -364,8 +284,7 @@ BIGNUM *BN_mod_inverse(BIGNUM *in, if (!BN_uadd(Y, Y, X)) goto err; /* - * as above, BN_mod_add_quick(Y, Y, X, n) would slow things - * down + * as above, BN_mod_add_quick(Y, Y, X, n) would slow things down */ if (!BN_usub(A, A, B)) goto err; @@ -435,8 +354,7 @@ BIGNUM *BN_mod_inverse(BIGNUM *in, * (**) sign*Y*a == D*B + M (mod |n|). */ - tmp = A; /* keep the BIGNUM object, the value does not - * matter */ + tmp = A; /* keep the BIGNUM object, the value does not matter */ /* (A, B) := (B, A mod B) ... */ A = B; @@ -457,15 +375,14 @@ BIGNUM *BN_mod_inverse(BIGNUM *in, * i.e. * sign*(Y + D*X)*a == B (mod |n|). * - * So if we set (X, Y, sign) := (Y + D*X, X, -sign), we arrive back at + * So if we set (X, Y, sign) := (Y + D*X, X, -sign), we arrive back at * -sign*X*a == B (mod |n|), * sign*Y*a == A (mod |n|). * Note that X and Y stay non-negative all the time. */ /* - * most of the time D is very small, so we can optimize tmp := - * D*X+Y + * most of the time D is very small, so we can optimize tmp := D*X+Y */ if (BN_is_one(D)) { if (!BN_add(tmp, X, Y)) @@ -490,8 +407,7 @@ BIGNUM *BN_mod_inverse(BIGNUM *in, goto err; } - M = Y; /* keep the BIGNUM object, the value does not - * matter */ + M = Y; /* keep the BIGNUM object, the value does not matter */ Y = X; X = tmp; sign = -sign; @@ -522,7 +438,8 @@ BIGNUM *BN_mod_inverse(BIGNUM *in, goto err; } } else { - BNerr(BN_F_BN_MOD_INVERSE, BN_R_NO_INVERSE); + if (pnoinv) + *pnoinv = 1; goto err; } ret = R; @@ -531,7 +448,7 @@ BIGNUM *BN_mod_inverse(BIGNUM *in, BN_free(R); BN_CTX_end(ctx); bn_check_top(ret); - return (ret); + return ret; } /* @@ -543,8 +460,6 @@ static BIGNUM *BN_mod_inverse_no_branch(BIGNUM *in, BN_CTX *ctx) { BIGNUM *A, *B, *X, *Y, *M, *D, *T, *R = NULL; - BIGNUM local_A, local_B; - BIGNUM *pA, *pB; BIGNUM *ret = NULL; int sign; @@ -582,11 +497,14 @@ static BIGNUM *BN_mod_inverse_no_branch(BIGNUM *in, * Turn BN_FLG_CONSTTIME flag on, so that when BN_div is invoked, * BN_div_no_branch will be called eventually. */ - pB = &local_B; - local_B.flags = 0; - BN_with_flags(pB, B, BN_FLG_CONSTTIME); - if (!BN_nnmod(B, pB, A, ctx)) - goto err; + { + BIGNUM local_B; + bn_init(&local_B); + BN_with_flags(&local_B, B, BN_FLG_CONSTTIME); + if (!BN_nnmod(B, &local_B, A, ctx)) + goto err; + /* Ensure local_B goes out of scope before any further use of B */ + } } sign = -1; /*- @@ -610,13 +528,16 @@ static BIGNUM *BN_mod_inverse_no_branch(BIGNUM *in, * Turn BN_FLG_CONSTTIME flag on, so that when BN_div is invoked, * BN_div_no_branch will be called eventually. */ - pA = &local_A; - local_A.flags = 0; - BN_with_flags(pA, A, BN_FLG_CONSTTIME); + { + BIGNUM local_A; + bn_init(&local_A); + BN_with_flags(&local_A, A, BN_FLG_CONSTTIME); - /* (D, M) := (A/B, A%B) ... */ - if (!BN_div(D, M, pA, B, ctx)) - goto err; + /* (D, M) := (A/B, A%B) ... */ + if (!BN_div(D, M, &local_A, B, ctx)) + goto err; + /* Ensure local_A goes out of scope before any further use of A */ + } /*- * Now @@ -647,7 +568,7 @@ static BIGNUM *BN_mod_inverse_no_branch(BIGNUM *in, * i.e. * sign*(Y + D*X)*a == B (mod |n|). * - * So if we set (X, Y, sign) := (Y + D*X, X, -sign), we arrive back at + * So if we set (X, Y, sign) := (Y + D*X, X, -sign), we arrive back at * -sign*X*a == B (mod |n|), * sign*Y*a == A (mod |n|). * Note that X and Y stay non-negative all the time. @@ -698,5 +619,5 @@ static BIGNUM *BN_mod_inverse_no_branch(BIGNUM *in, BN_free(R); BN_CTX_end(ctx); bn_check_top(ret); - return (ret); + return ret; } diff --git a/crypto/bn/bn_gf2m.c b/crypto/bn/bn_gf2m.c index 2c61da11093f..34d8b69c1ec9 100644 --- a/crypto/bn/bn_gf2m.c +++ b/crypto/bn/bn_gf2m.c @@ -1,98 +1,17 @@ -/* crypto/bn/bn_gf2m.c */ -/* ==================================================================== - * Copyright 2002 Sun Microsystems, Inc. ALL RIGHTS RESERVED. - * - * The Elliptic Curve Public-Key Crypto Library (ECC Code) included - * herein is developed by SUN MICROSYSTEMS, INC., and is contributed - * to the OpenSSL project. - * - * The ECC Code is licensed pursuant to the OpenSSL open source - * license provided below. - * - * In addition, Sun covenants to all licensees who provide a reciprocal - * covenant with respect to their own patents if any, not to sue under - * current and future patent claims necessarily infringed by the making, - * using, practicing, selling, offering for sale and/or otherwise - * disposing of the ECC Code as delivered hereunder (or portions thereof), - * provided that such covenant shall not apply: - * 1) for code that a licensee deletes from the ECC Code; - * 2) separates from the ECC Code; or - * 3) for infringements caused by: - * i) the modification of the ECC Code or - * ii) the combination of the ECC Code with other software or - * devices where such combination causes the infringement. - * - * The software is originally written by Sheueling Chang Shantz and - * Douglas Stebila of Sun Microsystems Laboratories. - * - */ - /* - * NOTE: This file is licensed pursuant to the OpenSSL license below and may - * be modified; but after modifications, the above covenant may no longer - * apply! In such cases, the corresponding paragraph ["In addition, Sun - * covenants ... causes the infringement."] and this note can be edited out; - * but please keep the Sun copyright notice and attribution. - */ - -/* ==================================================================== - * Copyright (c) 1998-2002 The OpenSSL Project. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * - * 3. All advertising materials mentioning features or use of this - * software must display the following acknowledgment: - * "This product includes software developed by the OpenSSL Project - * for use in the OpenSSL Toolkit. (http://www.openssl.org/)" - * - * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to - * endorse or promote products derived from this software without - * prior written permission. For written permission, please contact - * openssl-core@openssl.org. - * - * 5. Products derived from this software may not be called "OpenSSL" - * nor may "OpenSSL" appear in their names without prior written - * permission of the OpenSSL Project. - * - * 6. Redistributions of any form whatsoever must retain the following - * acknowledgment: - * "This product includes software developed by the OpenSSL Project - * for use in the OpenSSL Toolkit (http://www.openssl.org/)" - * - * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY - * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR - * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, - * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED - * OF THE POSSIBILITY OF SUCH DAMAGE. - * ==================================================================== - * - * This product includes cryptographic software written by Eric Young - * (eay@cryptsoft.com). This product includes software written by Tim - * Hudson (tjh@cryptsoft.com). + * Copyright 2002-2018 The OpenSSL Project Authors. All Rights Reserved. + * Copyright (c) 2002, Oracle and/or its affiliates. All rights reserved * + * Licensed under the OpenSSL license (the "License"). You may not use + * this file except in compliance with the License. You can obtain a copy + * in the file LICENSE in the source distribution or at + * https://www.openssl.org/source/license.html */ #include <assert.h> #include <limits.h> #include <stdio.h> -#include "cryptlib.h" +#include "internal/cryptlib.h" #include "bn_lcl.h" #ifndef OPENSSL_NO_EC2M @@ -103,30 +22,32 @@ */ # define MAX_ITERATIONS 50 -static const BN_ULONG SQR_tb[16] = { 0, 1, 4, 5, 16, 17, 20, 21, - 64, 65, 68, 69, 80, 81, 84, 85 -}; +# define SQR_nibble(w) ((((w) & 8) << 3) \ + | (((w) & 4) << 2) \ + | (((w) & 2) << 1) \ + | ((w) & 1)) + /* Platform-specific macros to accelerate squaring. */ # if defined(SIXTY_FOUR_BIT) || defined(SIXTY_FOUR_BIT_LONG) # define SQR1(w) \ - SQR_tb[(w) >> 60 & 0xF] << 56 | SQR_tb[(w) >> 56 & 0xF] << 48 | \ - SQR_tb[(w) >> 52 & 0xF] << 40 | SQR_tb[(w) >> 48 & 0xF] << 32 | \ - SQR_tb[(w) >> 44 & 0xF] << 24 | SQR_tb[(w) >> 40 & 0xF] << 16 | \ - SQR_tb[(w) >> 36 & 0xF] << 8 | SQR_tb[(w) >> 32 & 0xF] + SQR_nibble((w) >> 60) << 56 | SQR_nibble((w) >> 56) << 48 | \ + SQR_nibble((w) >> 52) << 40 | SQR_nibble((w) >> 48) << 32 | \ + SQR_nibble((w) >> 44) << 24 | SQR_nibble((w) >> 40) << 16 | \ + SQR_nibble((w) >> 36) << 8 | SQR_nibble((w) >> 32) # define SQR0(w) \ - SQR_tb[(w) >> 28 & 0xF] << 56 | SQR_tb[(w) >> 24 & 0xF] << 48 | \ - SQR_tb[(w) >> 20 & 0xF] << 40 | SQR_tb[(w) >> 16 & 0xF] << 32 | \ - SQR_tb[(w) >> 12 & 0xF] << 24 | SQR_tb[(w) >> 8 & 0xF] << 16 | \ - SQR_tb[(w) >> 4 & 0xF] << 8 | SQR_tb[(w) & 0xF] + SQR_nibble((w) >> 28) << 56 | SQR_nibble((w) >> 24) << 48 | \ + SQR_nibble((w) >> 20) << 40 | SQR_nibble((w) >> 16) << 32 | \ + SQR_nibble((w) >> 12) << 24 | SQR_nibble((w) >> 8) << 16 | \ + SQR_nibble((w) >> 4) << 8 | SQR_nibble((w) ) # endif # ifdef THIRTY_TWO_BIT # define SQR1(w) \ - SQR_tb[(w) >> 28 & 0xF] << 24 | SQR_tb[(w) >> 24 & 0xF] << 16 | \ - SQR_tb[(w) >> 20 & 0xF] << 8 | SQR_tb[(w) >> 16 & 0xF] + SQR_nibble((w) >> 28) << 24 | SQR_nibble((w) >> 24) << 16 | \ + SQR_nibble((w) >> 20) << 8 | SQR_nibble((w) >> 16) # define SQR0(w) \ - SQR_tb[(w) >> 12 & 0xF] << 24 | SQR_tb[(w) >> 8 & 0xF] << 16 | \ - SQR_tb[(w) >> 4 & 0xF] << 8 | SQR_tb[(w) & 0xF] + SQR_nibble((w) >> 12) << 24 | SQR_nibble((w) >> 8) << 16 | \ + SQR_nibble((w) >> 4) << 8 | SQR_nibble((w) ) # endif # if !defined(OPENSSL_BN_ASM_GF2m) @@ -472,8 +393,8 @@ int BN_GF2m_mod(BIGNUM *r, const BIGNUM *a, const BIGNUM *p) int arr[6]; bn_check_top(a); bn_check_top(p); - ret = BN_GF2m_poly2arr(p, arr, sizeof(arr) / sizeof(arr[0])); - if (!ret || ret > (int)(sizeof(arr) / sizeof(arr[0]))) { + ret = BN_GF2m_poly2arr(p, arr, OSSL_NELEM(arr)); + if (!ret || ret > (int)OSSL_NELEM(arr)) { BNerr(BN_F_BN_GF2M_MOD, BN_R_INVALID_LENGTH); return 0; } @@ -550,7 +471,7 @@ int BN_GF2m_mod_mul(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, bn_check_top(a); bn_check_top(b); bn_check_top(p); - if ((arr = (int *)OPENSSL_malloc(sizeof(int) * max)) == NULL) + if ((arr = OPENSSL_malloc(sizeof(*arr) * max)) == NULL) goto err; ret = BN_GF2m_poly2arr(p, arr, max); if (!ret || ret > max) { @@ -560,8 +481,7 @@ int BN_GF2m_mod_mul(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, ret = BN_GF2m_mod_mul_arr(r, a, b, arr, ctx); bn_check_top(r); err: - if (arr) - OPENSSL_free(arr); + OPENSSL_free(arr); return ret; } @@ -609,7 +529,7 @@ int BN_GF2m_mod_sqr(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx) bn_check_top(a); bn_check_top(p); - if ((arr = (int *)OPENSSL_malloc(sizeof(int) * max)) == NULL) + if ((arr = OPENSSL_malloc(sizeof(*arr) * max)) == NULL) goto err; ret = BN_GF2m_poly2arr(p, arr, max); if (!ret || ret > max) { @@ -619,8 +539,7 @@ int BN_GF2m_mod_sqr(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx) ret = BN_GF2m_mod_sqr_arr(r, a, arr, ctx); bn_check_top(r); err: - if (arr) - OPENSSL_free(arr); + OPENSSL_free(arr); return ret; } @@ -630,7 +549,8 @@ int BN_GF2m_mod_sqr(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx) * Hernandez, J.L., and Menezes, A. "Software Implementation of Elliptic * Curve Cryptography Over Binary Fields". */ -int BN_GF2m_mod_inv(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx) +static int BN_GF2m_mod_inv_vartime(BIGNUM *r, const BIGNUM *a, + const BIGNUM *p, BN_CTX *ctx) { BIGNUM *b, *c = NULL, *u = NULL, *v = NULL, *tmp; int ret = 0; @@ -640,13 +560,11 @@ int BN_GF2m_mod_inv(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx) BN_CTX_start(ctx); - if ((b = BN_CTX_get(ctx)) == NULL) - goto err; - if ((c = BN_CTX_get(ctx)) == NULL) - goto err; - if ((u = BN_CTX_get(ctx)) == NULL) - goto err; - if ((v = BN_CTX_get(ctx)) == NULL) + b = BN_CTX_get(ctx); + c = BN_CTX_get(ctx); + u = BN_CTX_get(ctx); + v = BN_CTX_get(ctx); + if (v == NULL) goto err; if (!BN_GF2m_mod(u, a, p)) @@ -798,6 +716,46 @@ int BN_GF2m_mod_inv(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx) return ret; } +/*- + * Wrapper for BN_GF2m_mod_inv_vartime that blinds the input before calling. + * This is not constant time. + * But it does eliminate first order deduction on the input. + */ +int BN_GF2m_mod_inv(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx) +{ + BIGNUM *b = NULL; + int ret = 0; + + BN_CTX_start(ctx); + if ((b = BN_CTX_get(ctx)) == NULL) + goto err; + + /* generate blinding value */ + do { + if (!BN_priv_rand(b, BN_num_bits(p) - 1, + BN_RAND_TOP_ANY, BN_RAND_BOTTOM_ANY)) + goto err; + } while (BN_is_zero(b)); + + /* r := a * b */ + if (!BN_GF2m_mod_mul(r, a, b, p, ctx)) + goto err; + + /* r := 1/(a * b) */ + if (!BN_GF2m_mod_inv_vartime(r, r, p, ctx)) + goto err; + + /* r := b/(a * b) = 1/a */ + if (!BN_GF2m_mod_mul(r, r, b, p, ctx)) + goto err; + + ret = 1; + + err: + BN_CTX_end(ctx); + return ret; +} + /* * Invert xx, reduce modulo p, and store the result in r. r could be xx. * This function calls down to the BN_GF2m_mod_inv implementation; this @@ -825,7 +783,6 @@ int BN_GF2m_mod_inv_arr(BIGNUM *r, const BIGNUM *xx, const int p[], return ret; } -# ifndef OPENSSL_SUN_GF2M_DIV /* * Divide y by x, reduce modulo p, and store the result in r. r could be x * or y, x could equal y. @@ -856,94 +813,6 @@ int BN_GF2m_mod_div(BIGNUM *r, const BIGNUM *y, const BIGNUM *x, BN_CTX_end(ctx); return ret; } -# else -/* - * Divide y by x, reduce modulo p, and store the result in r. r could be x - * or y, x could equal y. Uses algorithm Modular_Division_GF(2^m) from - * Chang-Shantz, S. "From Euclid's GCD to Montgomery Multiplication to the - * Great Divide". - */ -int BN_GF2m_mod_div(BIGNUM *r, const BIGNUM *y, const BIGNUM *x, - const BIGNUM *p, BN_CTX *ctx) -{ - BIGNUM *a, *b, *u, *v; - int ret = 0; - - bn_check_top(y); - bn_check_top(x); - bn_check_top(p); - - BN_CTX_start(ctx); - - a = BN_CTX_get(ctx); - b = BN_CTX_get(ctx); - u = BN_CTX_get(ctx); - v = BN_CTX_get(ctx); - if (v == NULL) - goto err; - - /* reduce x and y mod p */ - if (!BN_GF2m_mod(u, y, p)) - goto err; - if (!BN_GF2m_mod(a, x, p)) - goto err; - if (!BN_copy(b, p)) - goto err; - - while (!BN_is_odd(a)) { - if (!BN_rshift1(a, a)) - goto err; - if (BN_is_odd(u)) - if (!BN_GF2m_add(u, u, p)) - goto err; - if (!BN_rshift1(u, u)) - goto err; - } - - do { - if (BN_GF2m_cmp(b, a) > 0) { - if (!BN_GF2m_add(b, b, a)) - goto err; - if (!BN_GF2m_add(v, v, u)) - goto err; - do { - if (!BN_rshift1(b, b)) - goto err; - if (BN_is_odd(v)) - if (!BN_GF2m_add(v, v, p)) - goto err; - if (!BN_rshift1(v, v)) - goto err; - } while (!BN_is_odd(b)); - } else if (BN_abs_is_word(a, 1)) - break; - else { - if (!BN_GF2m_add(a, a, b)) - goto err; - if (!BN_GF2m_add(u, u, v)) - goto err; - do { - if (!BN_rshift1(a, a)) - goto err; - if (BN_is_odd(u)) - if (!BN_GF2m_add(u, u, p)) - goto err; - if (!BN_rshift1(u, u)) - goto err; - } while (!BN_is_odd(a)); - } - } while (1); - - if (!BN_copy(r, u)) - goto err; - bn_check_top(r); - ret = 1; - - err: - BN_CTX_end(ctx); - return ret; -} -# endif /* * Divide yy by xx, reduce modulo p, and store the result in r. r could be xx @@ -989,7 +858,7 @@ int BN_GF2m_mod_exp_arr(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, bn_check_top(b); if (BN_is_zero(b)) - return (BN_one(r)); + return BN_one(r); if (BN_abs_is_word(b, 1)) return (BN_copy(r, a) != NULL); @@ -1034,7 +903,7 @@ int BN_GF2m_mod_exp(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, bn_check_top(a); bn_check_top(b); bn_check_top(p); - if ((arr = (int *)OPENSSL_malloc(sizeof(int) * max)) == NULL) + if ((arr = OPENSSL_malloc(sizeof(*arr) * max)) == NULL) goto err; ret = BN_GF2m_poly2arr(p, arr, max); if (!ret || ret > max) { @@ -1044,8 +913,7 @@ int BN_GF2m_mod_exp(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, ret = BN_GF2m_mod_exp_arr(r, a, b, arr, ctx); bn_check_top(r); err: - if (arr) - OPENSSL_free(arr); + OPENSSL_free(arr); return ret; } @@ -1094,7 +962,7 @@ int BN_GF2m_mod_sqrt(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx) int *arr = NULL; bn_check_top(a); bn_check_top(p); - if ((arr = (int *)OPENSSL_malloc(sizeof(int) * max)) == NULL) + if ((arr = OPENSSL_malloc(sizeof(*arr) * max)) == NULL) goto err; ret = BN_GF2m_poly2arr(p, arr, max); if (!ret || ret > max) { @@ -1104,8 +972,7 @@ int BN_GF2m_mod_sqrt(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx) ret = BN_GF2m_mod_sqrt_arr(r, a, arr, ctx); bn_check_top(r); err: - if (arr) - OPENSSL_free(arr); + OPENSSL_free(arr); return ret; } @@ -1164,7 +1031,7 @@ int BN_GF2m_mod_solve_quad_arr(BIGNUM *r, const BIGNUM *a_, const int p[], if (tmp == NULL) goto err; do { - if (!BN_rand(rho, p[0], 0, 0)) + if (!BN_priv_rand(rho, p[0], BN_RAND_TOP_ONE, BN_RAND_BOTTOM_ANY)) goto err; if (!BN_GF2m_mod_arr(rho, rho, p)) goto err; @@ -1225,7 +1092,7 @@ int BN_GF2m_mod_solve_quad(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, int *arr = NULL; bn_check_top(a); bn_check_top(p); - if ((arr = (int *)OPENSSL_malloc(sizeof(int) * max)) == NULL) + if ((arr = OPENSSL_malloc(sizeof(*arr) * max)) == NULL) goto err; ret = BN_GF2m_poly2arr(p, arr, max); if (!ret || ret > max) { @@ -1235,8 +1102,7 @@ int BN_GF2m_mod_solve_quad(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, ret = BN_GF2m_mod_solve_quad_arr(r, a, arr, ctx); bn_check_top(r); err: - if (arr) - OPENSSL_free(arr); + OPENSSL_free(arr); return ret; } diff --git a/crypto/bn/bn_intern.c b/crypto/bn/bn_intern.c new file mode 100644 index 000000000000..46bc97575dfe --- /dev/null +++ b/crypto/bn/bn_intern.c @@ -0,0 +1,199 @@ +/* + * Copyright 2014-2018 The OpenSSL Project Authors. All Rights Reserved. + * + * Licensed under the OpenSSL license (the "License"). You may not use + * this file except in compliance with the License. You can obtain a copy + * in the file LICENSE in the source distribution or at + * https://www.openssl.org/source/license.html + */ + +#include "internal/cryptlib.h" +#include "bn_lcl.h" + +/* + * Determine the modified width-(w+1) Non-Adjacent Form (wNAF) of 'scalar'. + * This is an array r[] of values that are either zero or odd with an + * absolute value less than 2^w satisfying + * scalar = \sum_j r[j]*2^j + * where at most one of any w+1 consecutive digits is non-zero + * with the exception that the most significant digit may be only + * w-1 zeros away from that next non-zero digit. + */ +signed char *bn_compute_wNAF(const BIGNUM *scalar, int w, size_t *ret_len) +{ + int window_val; + signed char *r = NULL; + int sign = 1; + int bit, next_bit, mask; + size_t len = 0, j; + + if (BN_is_zero(scalar)) { + r = OPENSSL_malloc(1); + if (r == NULL) { + BNerr(BN_F_BN_COMPUTE_WNAF, ERR_R_MALLOC_FAILURE); + goto err; + } + r[0] = 0; + *ret_len = 1; + return r; + } + + if (w <= 0 || w > 7) { /* 'signed char' can represent integers with + * absolute values less than 2^7 */ + BNerr(BN_F_BN_COMPUTE_WNAF, ERR_R_INTERNAL_ERROR); + goto err; + } + bit = 1 << w; /* at most 128 */ + next_bit = bit << 1; /* at most 256 */ + mask = next_bit - 1; /* at most 255 */ + + if (BN_is_negative(scalar)) { + sign = -1; + } + + if (scalar->d == NULL || scalar->top == 0) { + BNerr(BN_F_BN_COMPUTE_WNAF, ERR_R_INTERNAL_ERROR); + goto err; + } + + len = BN_num_bits(scalar); + r = OPENSSL_malloc(len + 1); /* + * Modified wNAF may be one digit longer than binary representation + * (*ret_len will be set to the actual length, i.e. at most + * BN_num_bits(scalar) + 1) + */ + if (r == NULL) { + BNerr(BN_F_BN_COMPUTE_WNAF, ERR_R_MALLOC_FAILURE); + goto err; + } + window_val = scalar->d[0] & mask; + j = 0; + while ((window_val != 0) || (j + w + 1 < len)) { /* if j+w+1 >= len, + * window_val will not + * increase */ + int digit = 0; + + /* 0 <= window_val <= 2^(w+1) */ + + if (window_val & 1) { + /* 0 < window_val < 2^(w+1) */ + + if (window_val & bit) { + digit = window_val - next_bit; /* -2^w < digit < 0 */ + +#if 1 /* modified wNAF */ + if (j + w + 1 >= len) { + /* + * Special case for generating modified wNAFs: + * no new bits will be added into window_val, + * so using a positive digit here will decrease + * the total length of the representation + */ + + digit = window_val & (mask >> 1); /* 0 < digit < 2^w */ + } +#endif + } else { + digit = window_val; /* 0 < digit < 2^w */ + } + + if (digit <= -bit || digit >= bit || !(digit & 1)) { + BNerr(BN_F_BN_COMPUTE_WNAF, ERR_R_INTERNAL_ERROR); + goto err; + } + + window_val -= digit; + + /* + * now window_val is 0 or 2^(w+1) in standard wNAF generation; + * for modified window NAFs, it may also be 2^w + */ + if (window_val != 0 && window_val != next_bit + && window_val != bit) { + BNerr(BN_F_BN_COMPUTE_WNAF, ERR_R_INTERNAL_ERROR); + goto err; + } + } + + r[j++] = sign * digit; + + window_val >>= 1; + window_val += bit * BN_is_bit_set(scalar, j + w); + + if (window_val > next_bit) { + BNerr(BN_F_BN_COMPUTE_WNAF, ERR_R_INTERNAL_ERROR); + goto err; + } + } + + if (j > len + 1) { + BNerr(BN_F_BN_COMPUTE_WNAF, ERR_R_INTERNAL_ERROR); + goto err; + } + *ret_len = j; + return r; + + err: + OPENSSL_free(r); + return NULL; +} + +int bn_get_top(const BIGNUM *a) +{ + return a->top; +} + +int bn_get_dmax(const BIGNUM *a) +{ + return a->dmax; +} + +void bn_set_all_zero(BIGNUM *a) +{ + int i; + + for (i = a->top; i < a->dmax; i++) + a->d[i] = 0; +} + +int bn_copy_words(BN_ULONG *out, const BIGNUM *in, int size) +{ + if (in->top > size) + return 0; + + memset(out, 0, sizeof(*out) * size); + if (in->d != NULL) + memcpy(out, in->d, sizeof(*out) * in->top); + return 1; +} + +BN_ULONG *bn_get_words(const BIGNUM *a) +{ + return a->d; +} + +void bn_set_static_words(BIGNUM *a, const BN_ULONG *words, int size) +{ + /* + * |const| qualifier omission is compensated by BN_FLG_STATIC_DATA + * flag, which effectively means "read-only data". + */ + a->d = (BN_ULONG *)words; + a->dmax = a->top = size; + a->neg = 0; + a->flags |= BN_FLG_STATIC_DATA; + bn_correct_top(a); +} + +int bn_set_words(BIGNUM *a, const BN_ULONG *words, int num_words) +{ + if (bn_wexpand(a, num_words) == NULL) { + BNerr(BN_F_BN_SET_WORDS, ERR_R_MALLOC_FAILURE); + return 0; + } + + memcpy(a->d, words, sizeof(BN_ULONG) * num_words); + a->top = num_words; + bn_correct_top(a); + return 1; +} diff --git a/crypto/bn/bn_kron.c b/crypto/bn/bn_kron.c index 88d731ac75cd..b9bc6cca27fe 100644 --- a/crypto/bn/bn_kron.c +++ b/crypto/bn/bn_kron.c @@ -1,59 +1,13 @@ -/* crypto/bn/bn_kron.c */ -/* ==================================================================== - * Copyright (c) 1998-2000 The OpenSSL Project. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * - * 3. All advertising materials mentioning features or use of this - * software must display the following acknowledgment: - * "This product includes software developed by the OpenSSL Project - * for use in the OpenSSL Toolkit. (http://www.openssl.org/)" - * - * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to - * endorse or promote products derived from this software without - * prior written permission. For written permission, please contact - * openssl-core@openssl.org. - * - * 5. Products derived from this software may not be called "OpenSSL" - * nor may "OpenSSL" appear in their names without prior written - * permission of the OpenSSL Project. - * - * 6. Redistributions of any form whatsoever must retain the following - * acknowledgment: - * "This product includes software developed by the OpenSSL Project - * for use in the OpenSSL Toolkit (http://www.openssl.org/)" - * - * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY - * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR - * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, - * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED - * OF THE POSSIBILITY OF SUCH DAMAGE. - * ==================================================================== - * - * This product includes cryptographic software written by Eric Young - * (eay@cryptsoft.com). This product includes software written by Tim - * Hudson (tjh@cryptsoft.com). +/* + * Copyright 2000-2016 The OpenSSL Project Authors. All Rights Reserved. * + * Licensed under the OpenSSL license (the "License"). You may not use + * this file except in compliance with the License. You can obtain a copy + * in the file LICENSE in the source distribution or at + * https://www.openssl.org/source/license.html */ -#include "cryptlib.h" +#include "internal/cryptlib.h" #include "bn_lcl.h" /* least significant word */ @@ -92,7 +46,7 @@ int BN_kronecker(const BIGNUM *a, const BIGNUM *b, BN_CTX *ctx) goto end; /* - * Kronecker symbol, imlemented according to Henri Cohen, + * Kronecker symbol, implemented according to Henri Cohen, * "A Course in Computational Algebraic Number Theory" * (algorithm 1.4.10). */ diff --git a/crypto/bn/bn_lcl.h b/crypto/bn/bn_lcl.h index 00f4f09945b3..8a36db2e8b67 100644 --- a/crypto/bn/bn_lcl.h +++ b/crypto/bn/bn_lcl.h @@ -1,122 +1,267 @@ -/* crypto/bn/bn_lcl.h */ -/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) - * All rights reserved. - * - * This package is an SSL implementation written - * by Eric Young (eay@cryptsoft.com). - * The implementation was written so as to conform with Netscapes SSL. - * - * This library is free for commercial and non-commercial use as long as - * the following conditions are aheared to. The following conditions - * apply to all code found in this distribution, be it the RC4, RSA, - * lhash, DES, etc., code; not just the SSL code. The SSL documentation - * included with this distribution is covered by the same copyright terms - * except that the holder is Tim Hudson (tjh@cryptsoft.com). - * - * Copyright remains Eric Young's, and as such any Copyright notices in - * the code are not to be removed. - * If this package is used in a product, Eric Young should be given attribution - * as the author of the parts of the library used. - * This can be in the form of a textual message at program startup or - * in documentation (online or textual) provided with the package. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * "This product includes cryptographic software written by - * Eric Young (eay@cryptsoft.com)" - * The word 'cryptographic' can be left out if the rouines from the library - * being used are not cryptographic related :-). - * 4. If you include any Windows specific code (or a derivative thereof) from - * the apps directory (application code) you must include an acknowledgement: - * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)" - * - * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * The licence and distribution terms for any publically available version or - * derivative of this code cannot be changed. i.e. this code cannot simply be - * copied and put under another distribution licence - * [including the GNU Public Licence.] - */ -/* ==================================================================== - * Copyright (c) 1998-2000 The OpenSSL Project. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * - * 3. All advertising materials mentioning features or use of this - * software must display the following acknowledgment: - * "This product includes software developed by the OpenSSL Project - * for use in the OpenSSL Toolkit. (http://www.openssl.org/)" - * - * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to - * endorse or promote products derived from this software without - * prior written permission. For written permission, please contact - * openssl-core@openssl.org. - * - * 5. Products derived from this software may not be called "OpenSSL" - * nor may "OpenSSL" appear in their names without prior written - * permission of the OpenSSL Project. - * - * 6. Redistributions of any form whatsoever must retain the following - * acknowledgment: - * "This product includes software developed by the OpenSSL Project - * for use in the OpenSSL Toolkit (http://www.openssl.org/)" - * - * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY - * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR - * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, - * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED - * OF THE POSSIBILITY OF SUCH DAMAGE. - * ==================================================================== - * - * This product includes cryptographic software written by Eric Young - * (eay@cryptsoft.com). This product includes software written by Tim - * Hudson (tjh@cryptsoft.com). +/* + * Copyright 1995-2018 The OpenSSL Project Authors. All Rights Reserved. * + * Licensed under the OpenSSL license (the "License"). You may not use + * this file except in compliance with the License. You can obtain a copy + * in the file LICENSE in the source distribution or at + * https://www.openssl.org/source/license.html */ #ifndef HEADER_BN_LCL_H # define HEADER_BN_LCL_H -# include <openssl/bn.h> +/* + * The EDK2 build doesn't use bn_conf.h; it sets THIRTY_TWO_BIT or + * SIXTY_FOUR_BIT in its own environment since it doesn't re-run our + * Configure script and needs to support both 32-bit and 64-bit. + */ +# include <openssl/opensslconf.h> -#ifdef __cplusplus -extern "C" { -#endif +# if !defined(OPENSSL_SYS_UEFI) +# include "internal/bn_conf.h" +# endif + +# include "internal/bn_int.h" + +/* + * These preprocessor symbols control various aspects of the bignum headers + * and library code. They're not defined by any "normal" configuration, as + * they are intended for development and testing purposes. NB: defining all + * three can be useful for debugging application code as well as openssl + * itself. BN_DEBUG - turn on various debugging alterations to the bignum + * code BN_DEBUG_RAND - uses random poisoning of unused words to trip up + * mismanagement of bignum internals. You must also define BN_DEBUG. + */ +/* #define BN_DEBUG */ +/* #define BN_DEBUG_RAND */ + +# ifndef OPENSSL_SMALL_FOOTPRINT +# define BN_MUL_COMBA +# define BN_SQR_COMBA +# define BN_RECURSION +# endif + +/* + * This next option uses the C libraries (2 word)/(1 word) function. If it is + * not defined, I use my C version (which is slower). The reason for this + * flag is that when the particular C compiler library routine is used, and + * the library is linked with a different compiler, the library is missing. + * This mostly happens when the library is built with gcc and then linked + * using normal cc. This would be a common occurrence because gcc normally + * produces code that is 2 times faster than system compilers for the big + * number stuff. For machines with only one compiler (or shared libraries), + * this should be on. Again this in only really a problem on machines using + * "long long's", are 32bit, and are not using my assembler code. + */ +# if defined(OPENSSL_SYS_MSDOS) || defined(OPENSSL_SYS_WINDOWS) || \ + defined(OPENSSL_SYS_WIN32) || defined(linux) +# define BN_DIV2W +# endif + +/* + * 64-bit processor with LP64 ABI + */ +# ifdef SIXTY_FOUR_BIT_LONG +# define BN_ULLONG unsigned long long +# define BN_BITS4 32 +# define BN_MASK2 (0xffffffffffffffffL) +# define BN_MASK2l (0xffffffffL) +# define BN_MASK2h (0xffffffff00000000L) +# define BN_MASK2h1 (0xffffffff80000000L) +# define BN_DEC_CONV (10000000000000000000UL) +# define BN_DEC_NUM 19 +# define BN_DEC_FMT1 "%lu" +# define BN_DEC_FMT2 "%019lu" +# endif + +/* + * 64-bit processor other than LP64 ABI + */ +# ifdef SIXTY_FOUR_BIT +# undef BN_LLONG +# undef BN_ULLONG +# define BN_BITS4 32 +# define BN_MASK2 (0xffffffffffffffffLL) +# define BN_MASK2l (0xffffffffL) +# define BN_MASK2h (0xffffffff00000000LL) +# define BN_MASK2h1 (0xffffffff80000000LL) +# define BN_DEC_CONV (10000000000000000000ULL) +# define BN_DEC_NUM 19 +# define BN_DEC_FMT1 "%llu" +# define BN_DEC_FMT2 "%019llu" +# endif + +# ifdef THIRTY_TWO_BIT +# ifdef BN_LLONG +# if defined(_WIN32) && !defined(__GNUC__) +# define BN_ULLONG unsigned __int64 +# else +# define BN_ULLONG unsigned long long +# endif +# endif +# define BN_BITS4 16 +# define BN_MASK2 (0xffffffffL) +# define BN_MASK2l (0xffff) +# define BN_MASK2h1 (0xffff8000L) +# define BN_MASK2h (0xffff0000L) +# define BN_DEC_CONV (1000000000L) +# define BN_DEC_NUM 9 +# define BN_DEC_FMT1 "%u" +# define BN_DEC_FMT2 "%09u" +# endif + + +/*- + * Bignum consistency macros + * There is one "API" macro, bn_fix_top(), for stripping leading zeroes from + * bignum data after direct manipulations on the data. There is also an + * "internal" macro, bn_check_top(), for verifying that there are no leading + * zeroes. Unfortunately, some auditing is required due to the fact that + * bn_fix_top() has become an overabused duct-tape because bignum data is + * occasionally passed around in an inconsistent state. So the following + * changes have been made to sort this out; + * - bn_fix_top()s implementation has been moved to bn_correct_top() + * - if BN_DEBUG isn't defined, bn_fix_top() maps to bn_correct_top(), and + * bn_check_top() is as before. + * - if BN_DEBUG *is* defined; + * - bn_check_top() tries to pollute unused words even if the bignum 'top' is + * consistent. (ed: only if BN_DEBUG_RAND is defined) + * - bn_fix_top() maps to bn_check_top() rather than "fixing" anything. + * The idea is to have debug builds flag up inconsistent bignums when they + * occur. If that occurs in a bn_fix_top(), we examine the code in question; if + * the use of bn_fix_top() was appropriate (ie. it follows directly after code + * that manipulates the bignum) it is converted to bn_correct_top(), and if it + * was not appropriate, we convert it permanently to bn_check_top() and track + * down the cause of the bug. Eventually, no internal code should be using the + * bn_fix_top() macro. External applications and libraries should try this with + * their own code too, both in terms of building against the openssl headers + * with BN_DEBUG defined *and* linking with a version of OpenSSL built with it + * defined. This not only improves external code, it provides more test + * coverage for openssl's own code. + */ + +# ifdef BN_DEBUG +/* + * The new BN_FLG_FIXED_TOP flag marks vectors that were not treated with + * bn_correct_top, in other words such vectors are permitted to have zeros + * in most significant limbs. Such vectors are used internally to achieve + * execution time invariance for critical operations with private keys. + * It's BN_DEBUG-only flag, because user application is not supposed to + * observe it anyway. Moreover, optimizing compiler would actually remove + * all operations manipulating the bit in question in non-BN_DEBUG build. + */ +# define BN_FLG_FIXED_TOP 0x10000 +# ifdef BN_DEBUG_RAND +# define bn_pollute(a) \ + do { \ + const BIGNUM *_bnum1 = (a); \ + if (_bnum1->top < _bnum1->dmax) { \ + unsigned char _tmp_char; \ + /* We cast away const without the compiler knowing, any \ + * *genuinely* constant variables that aren't mutable \ + * wouldn't be constructed with top!=dmax. */ \ + BN_ULONG *_not_const; \ + memcpy(&_not_const, &_bnum1->d, sizeof(_not_const)); \ + RAND_bytes(&_tmp_char, 1); /* Debug only - safe to ignore error return */\ + memset(_not_const + _bnum1->top, _tmp_char, \ + sizeof(*_not_const) * (_bnum1->dmax - _bnum1->top)); \ + } \ + } while(0) +# else +# define bn_pollute(a) +# endif +# define bn_check_top(a) \ + do { \ + const BIGNUM *_bnum2 = (a); \ + if (_bnum2 != NULL) { \ + int _top = _bnum2->top; \ + (void)ossl_assert((_top == 0 && !_bnum2->neg) || \ + (_top && ((_bnum2->flags & BN_FLG_FIXED_TOP) \ + || _bnum2->d[_top - 1] != 0))); \ + bn_pollute(_bnum2); \ + } \ + } while(0) + +# define bn_fix_top(a) bn_check_top(a) + +# define bn_check_size(bn, bits) bn_wcheck_size(bn, ((bits+BN_BITS2-1))/BN_BITS2) +# define bn_wcheck_size(bn, words) \ + do { \ + const BIGNUM *_bnum2 = (bn); \ + assert((words) <= (_bnum2)->dmax && \ + (words) >= (_bnum2)->top); \ + /* avoid unused variable warning with NDEBUG */ \ + (void)(_bnum2); \ + } while(0) + +# else /* !BN_DEBUG */ + +# define BN_FLG_FIXED_TOP 0 +# define bn_pollute(a) +# define bn_check_top(a) +# define bn_fix_top(a) bn_correct_top(a) +# define bn_check_size(bn, bits) +# define bn_wcheck_size(bn, words) + +# endif + +BN_ULONG bn_mul_add_words(BN_ULONG *rp, const BN_ULONG *ap, int num, + BN_ULONG w); +BN_ULONG bn_mul_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w); +void bn_sqr_words(BN_ULONG *rp, const BN_ULONG *ap, int num); +BN_ULONG bn_div_words(BN_ULONG h, BN_ULONG l, BN_ULONG d); +BN_ULONG bn_add_words(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, + int num); +BN_ULONG bn_sub_words(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, + int num); + +struct bignum_st { + BN_ULONG *d; /* Pointer to an array of 'BN_BITS2' bit + * chunks. */ + int top; /* Index of last used d +1. */ + /* The next are internal book keeping for bn_expand. */ + int dmax; /* Size of the d array. */ + int neg; /* one if the number is negative */ + int flags; +}; + +/* Used for montgomery multiplication */ +struct bn_mont_ctx_st { + int ri; /* number of bits in R */ + BIGNUM RR; /* used to convert to montgomery form, + possibly zero-padded */ + BIGNUM N; /* The modulus */ + BIGNUM Ni; /* R*(1/R mod N) - N*Ni = 1 (Ni is only + * stored for bignum algorithm) */ + BN_ULONG n0[2]; /* least significant word(s) of Ni; (type + * changed with 0.9.9, was "BN_ULONG n0;" + * before) */ + int flags; +}; + +/* + * Used for reciprocal division/mod functions It cannot be shared between + * threads + */ +struct bn_recp_ctx_st { + BIGNUM N; /* the divisor */ + BIGNUM Nr; /* the reciprocal */ + int num_bits; + int shift; + int flags; +}; + +/* Used for slow "generation" functions. */ +struct bn_gencb_st { + unsigned int ver; /* To handle binary (in)compatibility */ + void *arg; /* callback-specific data */ + union { + /* if (ver==1) - handles old style callbacks */ + void (*cb_1) (int, int, void *); + /* if (ver==2) - new callback style */ + int (*cb_2) (int, int, BN_GENCB *); + } cb; +}; /*- * BN_window_bits_for_exponent_size -- macro for sliding window mod_exp functions @@ -143,23 +288,11 @@ extern "C" { * (with draws in between). Very small exponents are often selected * with low Hamming weight, so we use w = 1 for b <= 23. */ -# if 1 -# define BN_window_bits_for_exponent_size(b) \ +# define BN_window_bits_for_exponent_size(b) \ ((b) > 671 ? 6 : \ (b) > 239 ? 5 : \ (b) > 79 ? 4 : \ (b) > 23 ? 3 : 1) -# else -/* - * Old SSLeay/OpenSSL table. Maximum window size was 5, so this table differs - * for b==1024; but it coincides for other interesting values (b==160, - * b==512). - */ -# define BN_window_bits_for_exponent_size(b) \ - ((b) > 255 ? 5 : \ - (b) > 127 ? 4 : \ - (b) > 17 ? 3 : 1) -# endif /* * BN_mod_exp_mont_conttime is based on the assumption that the L1 data cache @@ -225,59 +358,58 @@ extern "C" { # if !defined(OPENSSL_NO_ASM) && !defined(OPENSSL_NO_INLINE_ASM) && !defined(PEDANTIC) /* * BN_UMULT_HIGH section. - * - * No, I'm not trying to overwhelm you when stating that the - * product of N-bit numbers is 2*N bits wide:-) No, I don't expect - * you to be impressed when I say that if the compiler doesn't - * support 2*N integer type, then you have to replace every N*N - * multiplication with 4 (N/2)*(N/2) accompanied by some shifts - * and additions which unavoidably results in severe performance - * penalties. Of course provided that the hardware is capable of - * producing 2*N result... That's when you normally start - * considering assembler implementation. However! It should be - * pointed out that some CPUs (most notably Alpha, PowerPC and - * upcoming IA-64 family:-) provide *separate* instruction - * calculating the upper half of the product placing the result - * into a general purpose register. Now *if* the compiler supports - * inline assembler, then it's not impossible to implement the - * "bignum" routines (and have the compiler optimize 'em) - * exhibiting "native" performance in C. That's what BN_UMULT_HIGH - * macro is about:-) - * - * <appro@fy.chalmers.se> + * If the compiler doesn't support 2*N integer type, then you have to + * replace every N*N multiplication with 4 (N/2)*(N/2) accompanied by some + * shifts and additions which unavoidably results in severe performance + * penalties. Of course provided that the hardware is capable of producing + * 2*N result... That's when you normally start considering assembler + * implementation. However! It should be pointed out that some CPUs (e.g., + * PowerPC, Alpha, and IA-64) provide *separate* instruction calculating + * the upper half of the product placing the result into a general + * purpose register. Now *if* the compiler supports inline assembler, + * then it's not impossible to implement the "bignum" routines (and have + * the compiler optimize 'em) exhibiting "native" performance in C. That's + * what BN_UMULT_HIGH macro is about:-) Note that more recent compilers do + * support 2*64 integer type, which is also used here. */ -# if defined(__alpha) && (defined(SIXTY_FOUR_BIT_LONG) || defined(SIXTY_FOUR_BIT)) +# if defined(__SIZEOF_INT128__) && __SIZEOF_INT128__==16 && \ + (defined(SIXTY_FOUR_BIT) || defined(SIXTY_FOUR_BIT_LONG)) +# define BN_UMULT_HIGH(a,b) (((__uint128_t)(a)*(b))>>64) +# define BN_UMULT_LOHI(low,high,a,b) ({ \ + __uint128_t ret=(__uint128_t)(a)*(b); \ + (high)=ret>>64; (low)=ret; }) +# elif defined(__alpha) && (defined(SIXTY_FOUR_BIT_LONG) || defined(SIXTY_FOUR_BIT)) # if defined(__DECC) # include <c_asm.h> # define BN_UMULT_HIGH(a,b) (BN_ULONG)asm("umulh %a0,%a1,%v0",(a),(b)) # elif defined(__GNUC__) && __GNUC__>=2 -# define BN_UMULT_HIGH(a,b) ({ \ +# define BN_UMULT_HIGH(a,b) ({ \ register BN_ULONG ret; \ asm ("umulh %1,%2,%0" \ : "=r"(ret) \ : "r"(a), "r"(b)); \ - ret; }) + ret; }) # endif /* compiler */ -# elif defined(_ARCH_PPC) && defined(__64BIT__) && defined(SIXTY_FOUR_BIT_LONG) +# elif defined(_ARCH_PPC64) && defined(SIXTY_FOUR_BIT_LONG) # if defined(__GNUC__) && __GNUC__>=2 -# define BN_UMULT_HIGH(a,b) ({ \ +# define BN_UMULT_HIGH(a,b) ({ \ register BN_ULONG ret; \ asm ("mulhdu %0,%1,%2" \ : "=r"(ret) \ : "r"(a), "r"(b)); \ - ret; }) + ret; }) # endif /* compiler */ # elif (defined(__x86_64) || defined(__x86_64__)) && \ (defined(SIXTY_FOUR_BIT_LONG) || defined(SIXTY_FOUR_BIT)) # if defined(__GNUC__) && __GNUC__>=2 -# define BN_UMULT_HIGH(a,b) ({ \ +# define BN_UMULT_HIGH(a,b) ({ \ register BN_ULONG ret,discard; \ asm ("mulq %3" \ : "=a"(discard),"=d"(ret) \ : "a"(a), "g"(b) \ : "cc"); \ - ret; }) -# define BN_UMULT_LOHI(low,high,a,b) \ + ret; }) +# define BN_UMULT_LOHI(low,high,a,b) \ asm ("mulq %3" \ : "=a"(low),"=d"(high) \ : "a"(a),"g"(b) \ @@ -294,43 +426,29 @@ unsigned __int64 _umul128(unsigned __int64 a, unsigned __int64 b, # endif # elif defined(__mips) && (defined(SIXTY_FOUR_BIT) || defined(SIXTY_FOUR_BIT_LONG)) # if defined(__GNUC__) && __GNUC__>=2 -# if __GNUC__>4 || (__GNUC__>=4 && __GNUC_MINOR__>=4) - /* "h" constraint is no more since 4.4 */ -# define BN_UMULT_HIGH(a,b) (((__uint128_t)(a)*(b))>>64) -# define BN_UMULT_LOHI(low,high,a,b) ({ \ - __uint128_t ret=(__uint128_t)(a)*(b); \ - (high)=ret>>64; (low)=ret; }) -# else -# define BN_UMULT_HIGH(a,b) ({ \ +# define BN_UMULT_HIGH(a,b) ({ \ register BN_ULONG ret; \ asm ("dmultu %1,%2" \ : "=h"(ret) \ : "r"(a), "r"(b) : "l"); \ ret; }) -# define BN_UMULT_LOHI(low,high,a,b)\ +# define BN_UMULT_LOHI(low,high,a,b) \ asm ("dmultu %2,%3" \ : "=l"(low),"=h"(high) \ : "r"(a), "r"(b)); -# endif # endif # elif defined(__aarch64__) && defined(SIXTY_FOUR_BIT_LONG) # if defined(__GNUC__) && __GNUC__>=2 -# define BN_UMULT_HIGH(a,b) ({ \ +# define BN_UMULT_HIGH(a,b) ({ \ register BN_ULONG ret; \ asm ("umulh %0,%1,%2" \ : "=r"(ret) \ : "r"(a), "r"(b)); \ - ret; }) + ret; }) # endif # endif /* cpu */ # endif /* OPENSSL_NO_ASM */ -/************************************************************* - * Using the long long type - */ -# define Lw(t) (((BN_ULONG)(t))&BN_MASK2) -# define Hw(t) (((BN_ULONG)((t)>>BN_BITS2))&BN_MASK2) - # ifdef BN_DEBUG_RAND # define bn_clear_top2max(a) \ { \ @@ -344,6 +462,12 @@ unsigned __int64 _umul128(unsigned __int64 a, unsigned __int64 b, # endif # ifdef BN_LLONG +/******************************************************************* + * Using the long long type, has to be twice as wide as BN_ULONG... + */ +# define Lw(t) (((BN_ULONG)(t))&BN_MASK2) +# define Hw(t) (((BN_ULONG)((t)>>BN_BITS2))&BN_MASK2) + # define mul_add(r,a,w,c) { \ BN_ULLONG t; \ t=(BN_ULLONG)w * (a) + (r) + (c); \ @@ -501,10 +625,10 @@ unsigned __int64 _umul128(unsigned __int64 a, unsigned __int64 b, } # endif /* !BN_LLONG */ -# if defined(OPENSSL_DOING_MAKEDEPEND) && defined(OPENSSL_FIPS) -# undef bn_div_words -# endif +void BN_RECP_CTX_init(BN_RECP_CTX *recp); +void BN_MONT_CTX_init(BN_MONT_CTX *ctx); +void bn_init(BIGNUM *a); void bn_mul_normal(BN_ULONG *r, BN_ULONG *a, int na, BN_ULONG *b, int nb); void bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b); void bn_mul_comba4(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b); @@ -521,17 +645,27 @@ void bn_sqr_recursive(BN_ULONG *r, const BN_ULONG *a, int n2, BN_ULONG *t); void bn_mul_low_normal(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n); void bn_mul_low_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n2, BN_ULONG *t); -void bn_mul_high(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, BN_ULONG *l, int n2, - BN_ULONG *t); -BN_ULONG bn_add_part_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, - int cl, int dl); BN_ULONG bn_sub_part_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, int cl, int dl); int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np, const BN_ULONG *n0, int num); -#ifdef __cplusplus +BIGNUM *int_bn_mod_inverse(BIGNUM *in, + const BIGNUM *a, const BIGNUM *n, BN_CTX *ctx, + int *noinv); + +int bn_probable_prime_dh(BIGNUM *rnd, int bits, + const BIGNUM *add, const BIGNUM *rem, BN_CTX *ctx); + +static ossl_inline BIGNUM *bn_expand(BIGNUM *a, int bits) +{ + if (bits > (INT_MAX - BN_BITS2 + 1)) + return NULL; + + if (((bits+BN_BITS2-1)/BN_BITS2) <= (a)->dmax) + return a; + + return bn_expand2((a),(bits+BN_BITS2-1)/BN_BITS2); } -#endif #endif diff --git a/crypto/bn/bn_lib.c b/crypto/bn/bn_lib.c index 27b9bdbc3c28..266a3dd3046b 100644 --- a/crypto/bn/bn_lib.c +++ b/crypto/bn/bn_lib.c @@ -1,76 +1,21 @@ -/* crypto/bn/bn_lib.c */ -/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) - * All rights reserved. - * - * This package is an SSL implementation written - * by Eric Young (eay@cryptsoft.com). - * The implementation was written so as to conform with Netscapes SSL. - * - * This library is free for commercial and non-commercial use as long as - * the following conditions are aheared to. The following conditions - * apply to all code found in this distribution, be it the RC4, RSA, - * lhash, DES, etc., code; not just the SSL code. The SSL documentation - * included with this distribution is covered by the same copyright terms - * except that the holder is Tim Hudson (tjh@cryptsoft.com). - * - * Copyright remains Eric Young's, and as such any Copyright notices in - * the code are not to be removed. - * If this package is used in a product, Eric Young should be given attribution - * as the author of the parts of the library used. - * This can be in the form of a textual message at program startup or - * in documentation (online or textual) provided with the package. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * "This product includes cryptographic software written by - * Eric Young (eay@cryptsoft.com)" - * The word 'cryptographic' can be left out if the rouines from the library - * being used are not cryptographic related :-). - * 4. If you include any Windows specific code (or a derivative thereof) from - * the apps directory (application code) you must include an acknowledgement: - * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)" - * - * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. +/* + * Copyright 1995-2018 The OpenSSL Project Authors. All Rights Reserved. * - * The licence and distribution terms for any publically available version or - * derivative of this code cannot be changed. i.e. this code cannot simply be - * copied and put under another distribution licence - * [including the GNU Public Licence.] + * Licensed under the OpenSSL license (the "License"). You may not use + * this file except in compliance with the License. You can obtain a copy + * in the file LICENSE in the source distribution or at + * https://www.openssl.org/source/license.html */ -#ifndef BN_DEBUG -# undef NDEBUG /* avoid conflicting definitions */ -# define NDEBUG -#endif - #include <assert.h> #include <limits.h> -#include <stdio.h> -#include "cryptlib.h" +#include "internal/cryptlib.h" #include "bn_lcl.h" - -const char BN_version[] = "Big Number" OPENSSL_VERSION_PTEXT; +#include <openssl/opensslconf.h> +#include "internal/constant_time_locl.h" /* This stuff appears to be completely unused, so is deprecated */ -#ifndef OPENSSL_NO_DEPRECATED +#if OPENSSL_API_COMPAT < 0x00908000L /*- * For a 32 bit machine * 2 - 4 == 128 @@ -121,15 +66,15 @@ void BN_set_params(int mult, int high, int low, int mont) int BN_get_params(int which) { if (which == 0) - return (bn_limit_bits); + return bn_limit_bits; else if (which == 1) - return (bn_limit_bits_high); + return bn_limit_bits_high; else if (which == 2) - return (bn_limit_bits_low); + return bn_limit_bits_low; else if (which == 3) - return (bn_limit_bits_mont); + return bn_limit_bits_mont; else - return (0); + return 0; } #endif @@ -139,7 +84,7 @@ const BIGNUM *BN_value_one(void) static const BIGNUM const_one = { (BN_ULONG *)&data_one, 1, 1, 0, BN_FLG_STATIC_DATA }; - return (&const_one); + return &const_one; } int BN_num_bits_word(BN_ULONG l) @@ -197,44 +142,44 @@ int BN_num_bits(const BIGNUM *a) return ((i * BN_BITS2) + BN_num_bits_word(a->d[i])); } -void BN_clear_free(BIGNUM *a) +static void bn_free_d(BIGNUM *a) { - int i; + if (BN_get_flags(a, BN_FLG_SECURE)) + OPENSSL_secure_free(a->d); + else + OPENSSL_free(a->d); +} + +void BN_clear_free(BIGNUM *a) +{ if (a == NULL) return; - bn_check_top(a); - if (a->d != NULL) { + if (a->d != NULL && !BN_get_flags(a, BN_FLG_STATIC_DATA)) { OPENSSL_cleanse(a->d, a->dmax * sizeof(a->d[0])); - if (!(BN_get_flags(a, BN_FLG_STATIC_DATA))) - OPENSSL_free(a->d); + bn_free_d(a); } - i = BN_get_flags(a, BN_FLG_MALLOCED); - OPENSSL_cleanse(a, sizeof(BIGNUM)); - if (i) + if (BN_get_flags(a, BN_FLG_MALLOCED)) { + OPENSSL_cleanse(a, sizeof(*a)); OPENSSL_free(a); + } } void BN_free(BIGNUM *a) { if (a == NULL) return; - bn_check_top(a); - if ((a->d != NULL) && !(BN_get_flags(a, BN_FLG_STATIC_DATA))) - OPENSSL_free(a->d); + if (!BN_get_flags(a, BN_FLG_STATIC_DATA)) + bn_free_d(a); if (a->flags & BN_FLG_MALLOCED) OPENSSL_free(a); - else { -#ifndef OPENSSL_NO_DEPRECATED - a->flags |= BN_FLG_FREE; -#endif - a->d = NULL; - } } -void BN_init(BIGNUM *a) +void bn_init(BIGNUM *a) { - memset(a, 0, sizeof(BIGNUM)); + static BIGNUM nilbn; + + *a = nilbn; bn_check_top(a); } @@ -242,28 +187,28 @@ BIGNUM *BN_new(void) { BIGNUM *ret; - if ((ret = (BIGNUM *)OPENSSL_malloc(sizeof(BIGNUM))) == NULL) { + if ((ret = OPENSSL_zalloc(sizeof(*ret))) == NULL) { BNerr(BN_F_BN_NEW, ERR_R_MALLOC_FAILURE); - return (NULL); + return NULL; } ret->flags = BN_FLG_MALLOCED; - ret->top = 0; - ret->neg = 0; - ret->dmax = 0; - ret->d = NULL; bn_check_top(ret); - return (ret); + return ret; } -/* This is used both by bn_expand2() and bn_dup_expand() */ + BIGNUM *BN_secure_new(void) + { + BIGNUM *ret = BN_new(); + if (ret != NULL) + ret->flags |= BN_FLG_SECURE; + return ret; + } + +/* This is used by bn_expand2() */ /* The caller MUST check that words > b->dmax before calling this */ static BN_ULONG *bn_expand_internal(const BIGNUM *b, int words) { - BN_ULONG *A, *a = NULL; - const BN_ULONG *B; - int i; - - bn_check_top(b); + BN_ULONG *a = NULL; if (words > (INT_MAX / (4 * BN_BITS2))) { BNerr(BN_F_BN_EXPAND_INTERNAL, BN_R_BIGNUM_TOO_LONG); @@ -271,124 +216,25 @@ static BN_ULONG *bn_expand_internal(const BIGNUM *b, int words) } if (BN_get_flags(b, BN_FLG_STATIC_DATA)) { BNerr(BN_F_BN_EXPAND_INTERNAL, BN_R_EXPAND_ON_STATIC_BIGNUM_DATA); - return (NULL); + return NULL; } - a = A = (BN_ULONG *)OPENSSL_malloc(sizeof(BN_ULONG) * words); - if (A == NULL) { + if (BN_get_flags(b, BN_FLG_SECURE)) + a = OPENSSL_secure_zalloc(words * sizeof(*a)); + else + a = OPENSSL_zalloc(words * sizeof(*a)); + if (a == NULL) { BNerr(BN_F_BN_EXPAND_INTERNAL, ERR_R_MALLOC_FAILURE); - return (NULL); + return NULL; } -#ifdef PURIFY - /* - * Valgrind complains in BN_consttime_swap because we process the whole - * array even if it's not initialised yet. This doesn't matter in that - * function - what's important is constant time operation (we're not - * actually going to use the data) - */ - memset(a, 0, sizeof(BN_ULONG) * words); -#endif -#if 1 - B = b->d; - /* Check if the previous number needs to be copied */ - if (B != NULL) { - for (i = b->top >> 2; i > 0; i--, A += 4, B += 4) { - /* - * The fact that the loop is unrolled - * 4-wise is a tribute to Intel. It's - * the one that doesn't have enough - * registers to accomodate more data. - * I'd unroll it 8-wise otherwise:-) - * - * <appro@fy.chalmers.se> - */ - BN_ULONG a0, a1, a2, a3; - a0 = B[0]; - a1 = B[1]; - a2 = B[2]; - a3 = B[3]; - A[0] = a0; - A[1] = a1; - A[2] = a2; - A[3] = a3; - } - /* - * workaround for ultrix cc: without 'case 0', the optimizer does - * the switch table by doing a=top&3; a--; goto jump_table[a]; - * which fails for top== 0 - */ - switch (b->top & 3) { - case 3: - A[2] = B[2]; - case 2: - A[1] = B[1]; - case 1: - A[0] = B[0]; - case 0: - ; - } - } -#else - memset(A, 0, sizeof(BN_ULONG) * words); - memcpy(A, b->d, sizeof(b->d[0]) * b->top); -#endif + assert(b->top <= words); + if (b->top > 0) + memcpy(a, b->d, sizeof(*a) * b->top); - return (a); + return a; } /* - * This is an internal function that can be used instead of bn_expand2() when - * there is a need to copy BIGNUMs instead of only expanding the data part, - * while still expanding them. Especially useful when needing to expand - * BIGNUMs that are declared 'const' and should therefore not be changed. The - * reason to use this instead of a BN_dup() followed by a bn_expand2() is - * memory allocation overhead. A BN_dup() followed by a bn_expand2() will - * allocate new memory for the BIGNUM data twice, and free it once, while - * bn_dup_expand() makes sure allocation is made only once. - */ - -#ifndef OPENSSL_NO_DEPRECATED -BIGNUM *bn_dup_expand(const BIGNUM *b, int words) -{ - BIGNUM *r = NULL; - - bn_check_top(b); - - /* - * This function does not work if words <= b->dmax && top < words because - * BN_dup() does not preserve 'dmax'! (But bn_dup_expand() is not used - * anywhere yet.) - */ - - if (words > b->dmax) { - BN_ULONG *a = bn_expand_internal(b, words); - - if (a) { - r = BN_new(); - if (r) { - r->top = b->top; - r->dmax = words; - r->neg = b->neg; - r->d = a; - } else { - /* r == NULL, BN_new failure */ - OPENSSL_free(a); - } - } - /* - * If a == NULL, there was an error in allocation in - * bn_expand_internal(), and NULL should be returned - */ - } else { - r = BN_dup(b); - } - - bn_check_top(r); - return r; -} -#endif - -/* * This is an internal function that should not be used in applications. It * ensures that 'b' has enough room for a 'words' word number and initialises * any unused part of b->d with leading zeros. It is mostly used by the @@ -398,42 +244,18 @@ BIGNUM *bn_dup_expand(const BIGNUM *b, int words) BIGNUM *bn_expand2(BIGNUM *b, int words) { - bn_check_top(b); - if (words > b->dmax) { BN_ULONG *a = bn_expand_internal(b, words); if (!a) return NULL; - if (b->d) - OPENSSL_free(b->d); + if (b->d) { + OPENSSL_cleanse(b->d, b->dmax * sizeof(b->d[0])); + bn_free_d(b); + } b->d = a; b->dmax = words; } -/* None of this should be necessary because of what b->top means! */ -#if 0 - /* - * NB: bn_wexpand() calls this only if the BIGNUM really has to grow - */ - if (b->top < b->dmax) { - int i; - BN_ULONG *A = &(b->d[b->top]); - for (i = (b->dmax - b->top) >> 3; i > 0; i--, A += 8) { - A[0] = 0; - A[1] = 0; - A[2] = 0; - A[3] = 0; - A[4] = 0; - A[5] = 0; - A[6] = 0; - A[7] = 0; - } - for (i = (b->dmax - b->top) & 7; i > 0; i--, A++) - A[0] = 0; - assert(A == &(b->d[b->dmax])); - } -#endif - bn_check_top(b); return b; } @@ -445,7 +267,7 @@ BIGNUM *BN_dup(const BIGNUM *a) return NULL; bn_check_top(a); - t = BN_new(); + t = BN_get_flags(a, BN_FLG_SECURE) ? BN_secure_new() : BN_new(); if (t == NULL) return NULL; if (!BN_copy(t, a)) { @@ -458,51 +280,29 @@ BIGNUM *BN_dup(const BIGNUM *a) BIGNUM *BN_copy(BIGNUM *a, const BIGNUM *b) { - int i; - BN_ULONG *A; - const BN_ULONG *B; - bn_check_top(b); if (a == b) - return (a); + return a; if (bn_wexpand(a, b->top) == NULL) - return (NULL); - -#if 1 - A = a->d; - B = b->d; - for (i = b->top >> 2; i > 0; i--, A += 4, B += 4) { - BN_ULONG a0, a1, a2, a3; - a0 = B[0]; - a1 = B[1]; - a2 = B[2]; - a3 = B[3]; - A[0] = a0; - A[1] = a1; - A[2] = a2; - A[3] = a3; - } - /* ultrix cc workaround, see comments in bn_expand_internal */ - switch (b->top & 3) { - case 3: - A[2] = B[2]; - case 2: - A[1] = B[1]; - case 1: - A[0] = B[0]; - case 0:; - } -#else - memcpy(a->d, b->d, sizeof(b->d[0]) * b->top); -#endif + return NULL; + + if (b->top > 0) + memcpy(a->d, b->d, sizeof(b->d[0]) * b->top); - a->top = b->top; a->neg = b->neg; + a->top = b->top; + a->flags |= b->flags & BN_FLG_FIXED_TOP; bn_check_top(a); - return (a); + return a; } +#define FLAGS_DATA(flags) ((flags) & (BN_FLG_STATIC_DATA \ + | BN_FLG_CONSTTIME \ + | BN_FLG_SECURE \ + | BN_FLG_FIXED_TOP)) +#define FLAGS_STRUCT(flags) ((flags) & (BN_FLG_MALLOCED)) + void BN_swap(BIGNUM *a, BIGNUM *b) { int flags_old_a, flags_old_b; @@ -530,10 +330,8 @@ void BN_swap(BIGNUM *a, BIGNUM *b) b->dmax = tmp_dmax; b->neg = tmp_neg; - a->flags = - (flags_old_a & BN_FLG_MALLOCED) | (flags_old_b & BN_FLG_STATIC_DATA); - b->flags = - (flags_old_b & BN_FLG_MALLOCED) | (flags_old_a & BN_FLG_STATIC_DATA); + a->flags = FLAGS_STRUCT(flags_old_a) | FLAGS_DATA(flags_old_b); + b->flags = FLAGS_STRUCT(flags_old_b) | FLAGS_DATA(flags_old_a); bn_check_top(a); bn_check_top(b); } @@ -542,9 +340,10 @@ void BN_clear(BIGNUM *a) { bn_check_top(a); if (a->d != NULL) - OPENSSL_cleanse(a->d, a->dmax * sizeof(a->d[0])); - a->top = 0; + OPENSSL_cleanse(a->d, sizeof(*a->d) * a->dmax); a->neg = 0; + a->top = 0; + a->flags &= ~BN_FLG_FIXED_TOP; } BN_ULONG BN_get_word(const BIGNUM *a) @@ -561,12 +360,13 @@ int BN_set_word(BIGNUM *a, BN_ULONG w) { bn_check_top(a); if (bn_expand(a, (int)sizeof(BN_ULONG) * 8) == NULL) - return (0); + return 0; a->neg = 0; a->d[0] = w; a->top = (w ? 1 : 0); + a->flags &= ~BN_FLG_FIXED_TOP; bn_check_top(a); - return (1); + return 1; } BIGNUM *BN_bin2bn(const unsigned char *s, int len, BIGNUM *ret) @@ -579,23 +379,25 @@ BIGNUM *BN_bin2bn(const unsigned char *s, int len, BIGNUM *ret) if (ret == NULL) ret = bn = BN_new(); if (ret == NULL) - return (NULL); + return NULL; bn_check_top(ret); - l = 0; + /* Skip leading zero's. */ + for ( ; len > 0 && *s == 0; s++, len--) + continue; n = len; if (n == 0) { ret->top = 0; - return (ret); + return ret; } i = ((n - 1) / BN_BYTES) + 1; m = ((n - 1) % (BN_BYTES)); if (bn_wexpand(ret, (int)i) == NULL) { - if (bn) - BN_free(bn); + BN_free(bn); return NULL; } ret->top = i; ret->neg = 0; + l = 0; while (n--) { l = (l << 8L) | *(s++); if (m-- == 0) { @@ -609,22 +411,129 @@ BIGNUM *BN_bin2bn(const unsigned char *s, int len, BIGNUM *ret) * bit set (-ve number) */ bn_correct_top(ret); - return (ret); + return ret; } /* ignore negative */ +static int bn2binpad(const BIGNUM *a, unsigned char *to, int tolen) +{ + int n; + size_t i, lasti, j, atop, mask; + BN_ULONG l; + + /* + * In case |a| is fixed-top, BN_num_bytes can return bogus length, + * but it's assumed that fixed-top inputs ought to be "nominated" + * even for padded output, so it works out... + */ + n = BN_num_bytes(a); + if (tolen == -1) { + tolen = n; + } else if (tolen < n) { /* uncommon/unlike case */ + BIGNUM temp = *a; + + bn_correct_top(&temp); + n = BN_num_bytes(&temp); + if (tolen < n) + return -1; + } + + /* Swipe through whole available data and don't give away padded zero. */ + atop = a->dmax * BN_BYTES; + if (atop == 0) { + OPENSSL_cleanse(to, tolen); + return tolen; + } + + lasti = atop - 1; + atop = a->top * BN_BYTES; + for (i = 0, j = 0, to += tolen; j < (size_t)tolen; j++) { + l = a->d[i / BN_BYTES]; + mask = 0 - ((j - atop) >> (8 * sizeof(i) - 1)); + *--to = (unsigned char)(l >> (8 * (i % BN_BYTES)) & mask); + i += (i - lasti) >> (8 * sizeof(i) - 1); /* stay on last limb */ + } + + return tolen; +} + +int BN_bn2binpad(const BIGNUM *a, unsigned char *to, int tolen) +{ + if (tolen < 0) + return -1; + return bn2binpad(a, to, tolen); +} + int BN_bn2bin(const BIGNUM *a, unsigned char *to) { - int n, i; + return bn2binpad(a, to, -1); +} + +BIGNUM *BN_lebin2bn(const unsigned char *s, int len, BIGNUM *ret) +{ + unsigned int i, m; + unsigned int n; BN_ULONG l; + BIGNUM *bn = NULL; + if (ret == NULL) + ret = bn = BN_new(); + if (ret == NULL) + return NULL; + bn_check_top(ret); + s += len; + /* Skip trailing zeroes. */ + for ( ; len > 0 && s[-1] == 0; s--, len--) + continue; + n = len; + if (n == 0) { + ret->top = 0; + return ret; + } + i = ((n - 1) / BN_BYTES) + 1; + m = ((n - 1) % (BN_BYTES)); + if (bn_wexpand(ret, (int)i) == NULL) { + BN_free(bn); + return NULL; + } + ret->top = i; + ret->neg = 0; + l = 0; + while (n--) { + s--; + l = (l << 8L) | *s; + if (m-- == 0) { + ret->d[--i] = l; + l = 0; + m = BN_BYTES - 1; + } + } + /* + * need to call this due to clear byte at top if avoiding having the top + * bit set (-ve number) + */ + bn_correct_top(ret); + return ret; +} + +int BN_bn2lebinpad(const BIGNUM *a, unsigned char *to, int tolen) +{ + int i; + BN_ULONG l; bn_check_top(a); - n = i = BN_num_bytes(a); + i = BN_num_bytes(a); + if (tolen < i) + return -1; + /* Add trailing zeroes if necessary */ + if (tolen > i) + memset(to + i, 0, tolen - i); + to += i; while (i--) { l = a->d[i / BN_BYTES]; - *(to++) = (unsigned char)(l >> (8 * (i % BN_BYTES))) & 0xff; + to--; + *to = (unsigned char)(l >> (8 * (i % BN_BYTES))) & 0xff; } - return (n); + return tolen; } int BN_ucmp(const BIGNUM *a, const BIGNUM *b) @@ -637,7 +546,7 @@ int BN_ucmp(const BIGNUM *a, const BIGNUM *b) i = a->top - b->top; if (i != 0) - return (i); + return i; ap = a->d; bp = b->d; for (i = a->top - 1; i >= 0; i--) { @@ -646,7 +555,7 @@ int BN_ucmp(const BIGNUM *a, const BIGNUM *b) if (t1 != t2) return ((t1 > t2) ? 1 : -1); } - return (0); + return 0; } int BN_cmp(const BIGNUM *a, const BIGNUM *b) @@ -657,11 +566,11 @@ int BN_cmp(const BIGNUM *a, const BIGNUM *b) if ((a == NULL) || (b == NULL)) { if (a != NULL) - return (-1); + return -1; else if (b != NULL) - return (1); + return 1; else - return (0); + return 0; } bn_check_top(a); @@ -669,9 +578,9 @@ int BN_cmp(const BIGNUM *a, const BIGNUM *b) if (a->neg != b->neg) { if (a->neg) - return (-1); + return -1; else - return (1); + return 1; } if (a->neg == 0) { gt = 1; @@ -682,18 +591,18 @@ int BN_cmp(const BIGNUM *a, const BIGNUM *b) } if (a->top > b->top) - return (gt); + return gt; if (a->top < b->top) - return (lt); + return lt; for (i = a->top - 1; i >= 0; i--) { t1 = a->d[i]; t2 = b->d[i]; if (t1 > t2) - return (gt); + return gt; if (t1 < t2) - return (lt); + return lt; } - return (0); + return 0; } int BN_set_bit(BIGNUM *a, int n) @@ -707,15 +616,16 @@ int BN_set_bit(BIGNUM *a, int n) j = n % BN_BITS2; if (a->top <= i) { if (bn_wexpand(a, i + 1) == NULL) - return (0); + return 0; for (k = a->top; k < i + 1; k++) a->d[k] = 0; a->top = i + 1; + a->flags &= ~BN_FLG_FIXED_TOP; } a->d[i] |= (((BN_ULONG)1) << j); bn_check_top(a); - return (1); + return 1; } int BN_clear_bit(BIGNUM *a, int n) @@ -729,11 +639,11 @@ int BN_clear_bit(BIGNUM *a, int n) i = n / BN_BITS2; j = n % BN_BITS2; if (a->top <= i) - return (0); + return 0; a->d[i] &= (~(((BN_ULONG)1) << j)); bn_correct_top(a); - return (1); + return 1; } int BN_is_bit_set(const BIGNUM *a, int n) @@ -769,7 +679,7 @@ int BN_mask_bits(BIGNUM *a, int n) a->d[w] &= ~(BN_MASK2 << b); } bn_correct_top(a); - return (1); + return 1; } void BN_set_negative(BIGNUM *a, int b) @@ -795,14 +705,14 @@ int bn_cmp_words(const BN_ULONG *a, const BN_ULONG *b, int n) if (aa != bb) return ((aa > bb) ? 1 : -1); } - return (0); + return 0; } /* * Here follows a specialised variants of bn_cmp_words(). It has the - * property of performing the operation on arrays of different sizes. The + * capability of performing the operation on arrays of different sizes. The * sizes of those arrays is expressed through cl, which is the common length - * ( basicall, min(len(a),len(b)) ), and dl, which is the delta between the + * ( basically, min(len(a),len(b)) ), and dl, which is the delta between the * two lengths, calculated as len(a)-len(b). All lengths are the number of * BN_ULONGs... */ @@ -852,6 +762,34 @@ void BN_consttime_swap(BN_ULONG condition, BIGNUM *a, BIGNUM *b, int nwords) a->top ^= t; b->top ^= t; + t = (a->neg ^ b->neg) & condition; + a->neg ^= t; + b->neg ^= t; + + /*- + * Idea behind BN_FLG_STATIC_DATA is actually to + * indicate that data may not be written to. + * Intention is actually to treat it as it's + * read-only data, and some (if not most) of it does + * reside in read-only segment. In other words + * observation of BN_FLG_STATIC_DATA in + * BN_consttime_swap should be treated as fatal + * condition. It would either cause SEGV or + * effectively cause data corruption. + * BN_FLG_MALLOCED refers to BN structure itself, + * and hence must be preserved. Remaining flags are + * BN_FLG_CONSTIME and BN_FLG_SECURE. Latter must be + * preserved, because it determines how x->d was + * allocated and hence how to free it. This leaves + * BN_FLG_CONSTTIME that one can do something about. + * To summarize it's sufficient to mask and swap + * BN_FLG_CONSTTIME alone. BN_FLG_STATIC_DATA should + * be treated as fatal. + */ + t = ((a->flags ^ b->flags) & BN_FLG_CONSTTIME) & condition; + a->flags ^= t; + b->flags ^= t; + #define BN_CONSTTIME_SWAP(ind) \ do { \ t = (a->d[ind] ^ b->d[ind]) & condition; \ @@ -887,3 +825,160 @@ void BN_consttime_swap(BN_ULONG condition, BIGNUM *a, BIGNUM *b, int nwords) } #undef BN_CONSTTIME_SWAP } + +/* Bits of security, see SP800-57 */ + +int BN_security_bits(int L, int N) +{ + int secbits, bits; + if (L >= 15360) + secbits = 256; + else if (L >= 7680) + secbits = 192; + else if (L >= 3072) + secbits = 128; + else if (L >= 2048) + secbits = 112; + else if (L >= 1024) + secbits = 80; + else + return 0; + if (N == -1) + return secbits; + bits = N / 2; + if (bits < 80) + return 0; + return bits >= secbits ? secbits : bits; +} + +void BN_zero_ex(BIGNUM *a) +{ + a->neg = 0; + a->top = 0; + a->flags &= ~BN_FLG_FIXED_TOP; +} + +int BN_abs_is_word(const BIGNUM *a, const BN_ULONG w) +{ + return ((a->top == 1) && (a->d[0] == w)) || ((w == 0) && (a->top == 0)); +} + +int BN_is_zero(const BIGNUM *a) +{ + return a->top == 0; +} + +int BN_is_one(const BIGNUM *a) +{ + return BN_abs_is_word(a, 1) && !a->neg; +} + +int BN_is_word(const BIGNUM *a, const BN_ULONG w) +{ + return BN_abs_is_word(a, w) && (!w || !a->neg); +} + +int BN_is_odd(const BIGNUM *a) +{ + return (a->top > 0) && (a->d[0] & 1); +} + +int BN_is_negative(const BIGNUM *a) +{ + return (a->neg != 0); +} + +int BN_to_montgomery(BIGNUM *r, const BIGNUM *a, BN_MONT_CTX *mont, + BN_CTX *ctx) +{ + return BN_mod_mul_montgomery(r, a, &(mont->RR), mont, ctx); +} + +void BN_with_flags(BIGNUM *dest, const BIGNUM *b, int flags) +{ + dest->d = b->d; + dest->top = b->top; + dest->dmax = b->dmax; + dest->neg = b->neg; + dest->flags = ((dest->flags & BN_FLG_MALLOCED) + | (b->flags & ~BN_FLG_MALLOCED) + | BN_FLG_STATIC_DATA | flags); +} + +BN_GENCB *BN_GENCB_new(void) +{ + BN_GENCB *ret; + + if ((ret = OPENSSL_malloc(sizeof(*ret))) == NULL) { + BNerr(BN_F_BN_GENCB_NEW, ERR_R_MALLOC_FAILURE); + return NULL; + } + + return ret; +} + +void BN_GENCB_free(BN_GENCB *cb) +{ + if (cb == NULL) + return; + OPENSSL_free(cb); +} + +void BN_set_flags(BIGNUM *b, int n) +{ + b->flags |= n; +} + +int BN_get_flags(const BIGNUM *b, int n) +{ + return b->flags & n; +} + +/* Populate a BN_GENCB structure with an "old"-style callback */ +void BN_GENCB_set_old(BN_GENCB *gencb, void (*callback) (int, int, void *), + void *cb_arg) +{ + BN_GENCB *tmp_gencb = gencb; + tmp_gencb->ver = 1; + tmp_gencb->arg = cb_arg; + tmp_gencb->cb.cb_1 = callback; +} + +/* Populate a BN_GENCB structure with a "new"-style callback */ +void BN_GENCB_set(BN_GENCB *gencb, int (*callback) (int, int, BN_GENCB *), + void *cb_arg) +{ + BN_GENCB *tmp_gencb = gencb; + tmp_gencb->ver = 2; + tmp_gencb->arg = cb_arg; + tmp_gencb->cb.cb_2 = callback; +} + +void *BN_GENCB_get_arg(BN_GENCB *cb) +{ + return cb->arg; +} + +BIGNUM *bn_wexpand(BIGNUM *a, int words) +{ + return (words <= a->dmax) ? a : bn_expand2(a, words); +} + +void bn_correct_top(BIGNUM *a) +{ + BN_ULONG *ftl; + int tmp_top = a->top; + + if (tmp_top > 0) { + for (ftl = &(a->d[tmp_top]); tmp_top > 0; tmp_top--) { + ftl--; + if (*ftl != 0) + break; + } + a->top = tmp_top; + } + if (a->top == 0) + a->neg = 0; + a->flags &= ~BN_FLG_FIXED_TOP; + bn_pollute(a); +} diff --git a/crypto/bn/bn_mod.c b/crypto/bn/bn_mod.c index ffbce890cf28..712fc8ac145b 100644 --- a/crypto/bn/bn_mod.c +++ b/crypto/bn/bn_mod.c @@ -1,129 +1,15 @@ -/* crypto/bn/bn_mod.c */ /* - * Includes code written by Lenka Fibikova <fibikova@exp-math.uni-essen.de> - * for the OpenSSL project. - */ -/* ==================================================================== - * Copyright (c) 1998-2000 The OpenSSL Project. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * - * 3. All advertising materials mentioning features or use of this - * software must display the following acknowledgment: - * "This product includes software developed by the OpenSSL Project - * for use in the OpenSSL Toolkit. (http://www.openssl.org/)" - * - * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to - * endorse or promote products derived from this software without - * prior written permission. For written permission, please contact - * openssl-core@openssl.org. - * - * 5. Products derived from this software may not be called "OpenSSL" - * nor may "OpenSSL" appear in their names without prior written - * permission of the OpenSSL Project. - * - * 6. Redistributions of any form whatsoever must retain the following - * acknowledgment: - * "This product includes software developed by the OpenSSL Project - * for use in the OpenSSL Toolkit (http://www.openssl.org/)" - * - * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY - * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR - * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, - * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED - * OF THE POSSIBILITY OF SUCH DAMAGE. - * ==================================================================== - * - * This product includes cryptographic software written by Eric Young - * (eay@cryptsoft.com). This product includes software written by Tim - * Hudson (tjh@cryptsoft.com). - * - */ -/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) - * All rights reserved. - * - * This package is an SSL implementation written - * by Eric Young (eay@cryptsoft.com). - * The implementation was written so as to conform with Netscapes SSL. - * - * This library is free for commercial and non-commercial use as long as - * the following conditions are aheared to. The following conditions - * apply to all code found in this distribution, be it the RC4, RSA, - * lhash, DES, etc., code; not just the SSL code. The SSL documentation - * included with this distribution is covered by the same copyright terms - * except that the holder is Tim Hudson (tjh@cryptsoft.com). - * - * Copyright remains Eric Young's, and as such any Copyright notices in - * the code are not to be removed. - * If this package is used in a product, Eric Young should be given attribution - * as the author of the parts of the library used. - * This can be in the form of a textual message at program startup or - * in documentation (online or textual) provided with the package. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * "This product includes cryptographic software written by - * Eric Young (eay@cryptsoft.com)" - * The word 'cryptographic' can be left out if the rouines from the library - * being used are not cryptographic related :-). - * 4. If you include any Windows specific code (or a derivative thereof) from - * the apps directory (application code) you must include an acknowledgement: - * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)" - * - * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. + * Copyright 1998-2018 The OpenSSL Project Authors. All Rights Reserved. * - * The licence and distribution terms for any publically available version or - * derivative of this code cannot be changed. i.e. this code cannot simply be - * copied and put under another distribution licence - * [including the GNU Public Licence.] + * Licensed under the OpenSSL license (the "License"). You may not use + * this file except in compliance with the License. You can obtain a copy + * in the file LICENSE in the source distribution or at + * https://www.openssl.org/source/license.html */ -#include "cryptlib.h" +#include "internal/cryptlib.h" #include "bn_lcl.h" -#if 0 /* now just a #define */ -int BN_mod(BIGNUM *rem, const BIGNUM *m, const BIGNUM *d, BN_CTX *ctx) -{ - return (BN_div(NULL, rem, m, d, ctx)); - /* note that rem->neg == m->neg (unless the remainder is zero) */ -} -#endif - int BN_nnmod(BIGNUM *r, const BIGNUM *m, const BIGNUM *d, BN_CTX *ctx) { /* @@ -149,18 +35,74 @@ int BN_mod_add(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, const BIGNUM *m, /* * BN_mod_add variant that may be used if both a and b are non-negative and - * less than m + * less than m. The original algorithm was + * + * if (!BN_uadd(r, a, b)) + * return 0; + * if (BN_ucmp(r, m) >= 0) + * return BN_usub(r, r, m); + * + * which is replaced with addition, subtracting modulus, and conditional + * move depending on whether or not subtraction borrowed. */ -int BN_mod_add_quick(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, - const BIGNUM *m) +int bn_mod_add_fixed_top(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, + const BIGNUM *m) { - if (!BN_uadd(r, a, b)) + size_t i, ai, bi, mtop = m->top; + BN_ULONG storage[1024 / BN_BITS2]; + BN_ULONG carry, temp, mask, *rp, *tp = storage; + const BN_ULONG *ap, *bp; + + if (bn_wexpand(r, mtop) == NULL) + return 0; + + if (mtop > sizeof(storage) / sizeof(storage[0]) + && (tp = OPENSSL_malloc(mtop * sizeof(BN_ULONG))) == NULL) return 0; - if (BN_ucmp(r, m) >= 0) - return BN_usub(r, r, m); + + ap = a->d != NULL ? a->d : tp; + bp = b->d != NULL ? b->d : tp; + + for (i = 0, ai = 0, bi = 0, carry = 0; i < mtop;) { + mask = (BN_ULONG)0 - ((i - a->top) >> (8 * sizeof(i) - 1)); + temp = ((ap[ai] & mask) + carry) & BN_MASK2; + carry = (temp < carry); + + mask = (BN_ULONG)0 - ((i - b->top) >> (8 * sizeof(i) - 1)); + tp[i] = ((bp[bi] & mask) + temp) & BN_MASK2; + carry += (tp[i] < temp); + + i++; + ai += (i - a->dmax) >> (8 * sizeof(i) - 1); + bi += (i - b->dmax) >> (8 * sizeof(i) - 1); + } + rp = r->d; + carry -= bn_sub_words(rp, tp, m->d, mtop); + for (i = 0; i < mtop; i++) { + rp[i] = (carry & tp[i]) | (~carry & rp[i]); + ((volatile BN_ULONG *)tp)[i] = 0; + } + r->top = mtop; + r->flags |= BN_FLG_FIXED_TOP; + r->neg = 0; + + if (tp != storage) + OPENSSL_free(tp); + return 1; } +int BN_mod_add_quick(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, + const BIGNUM *m) +{ + int ret = bn_mod_add_fixed_top(r, a, b, m); + + if (ret) + bn_correct_top(r); + + return ret; +} + int BN_mod_sub(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, const BIGNUM *m, BN_CTX *ctx) { @@ -170,6 +112,70 @@ int BN_mod_sub(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, const BIGNUM *m, } /* + * BN_mod_sub variant that may be used if both a and b are non-negative, + * a is less than m, while b is of same bit width as m. It's implemented + * as subtraction followed by two conditional additions. + * + * 0 <= a < m + * 0 <= b < 2^w < 2*m + * + * after subtraction + * + * -2*m < r = a - b < m + * + * Thus it takes up to two conditional additions to make |r| positive. + */ +int bn_mod_sub_fixed_top(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, + const BIGNUM *m) +{ + size_t i, ai, bi, mtop = m->top; + BN_ULONG borrow, carry, ta, tb, mask, *rp; + const BN_ULONG *ap, *bp; + + if (bn_wexpand(r, mtop) == NULL) + return 0; + + rp = r->d; + ap = a->d != NULL ? a->d : rp; + bp = b->d != NULL ? b->d : rp; + + for (i = 0, ai = 0, bi = 0, borrow = 0; i < mtop;) { + mask = (BN_ULONG)0 - ((i - a->top) >> (8 * sizeof(i) - 1)); + ta = ap[ai] & mask; + + mask = (BN_ULONG)0 - ((i - b->top) >> (8 * sizeof(i) - 1)); + tb = bp[bi] & mask; + rp[i] = ta - tb - borrow; + if (ta != tb) + borrow = (ta < tb); + + i++; + ai += (i - a->dmax) >> (8 * sizeof(i) - 1); + bi += (i - b->dmax) >> (8 * sizeof(i) - 1); + } + ap = m->d; + for (i = 0, mask = 0 - borrow, carry = 0; i < mtop; i++) { + ta = ((ap[i] & mask) + carry) & BN_MASK2; + carry = (ta < carry); + rp[i] = (rp[i] + ta) & BN_MASK2; + carry += (rp[i] < ta); + } + borrow -= carry; + for (i = 0, mask = 0 - borrow, carry = 0; i < mtop; i++) { + ta = ((ap[i] & mask) + carry) & BN_MASK2; + carry = (ta < carry); + rp[i] = (rp[i] + ta) & BN_MASK2; + carry += (rp[i] < ta); + } + + r->top = mtop; + r->flags |= BN_FLG_FIXED_TOP; + r->neg = 0; + + return 1; +} + +/* * BN_mod_sub variant that may be used if both a and b are non-negative and * less than m */ @@ -210,7 +216,7 @@ int BN_mod_mul(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, const BIGNUM *m, ret = 1; err: BN_CTX_end(ctx); - return (ret); + return ret; } int BN_mod_sqr(BIGNUM *r, const BIGNUM *a, const BIGNUM *m, BN_CTX *ctx) @@ -262,8 +268,7 @@ int BN_mod_lshift(BIGNUM *r, const BIGNUM *a, int n, const BIGNUM *m, ret = BN_mod_lshift_quick(r, r, n, (abs_m ? abs_m : m)); bn_check_top(r); - if (abs_m) - BN_free(abs_m); + BN_free(abs_m); return ret; } diff --git a/crypto/bn/bn_mont.c b/crypto/bn/bn_mont.c index c1703650ef1e..393d27c392c7 100644 --- a/crypto/bn/bn_mont.c +++ b/crypto/bn/bn_mont.c @@ -1,112 +1,10 @@ -/* crypto/bn/bn_mont.c */ -/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) - * All rights reserved. - * - * This package is an SSL implementation written - * by Eric Young (eay@cryptsoft.com). - * The implementation was written so as to conform with Netscapes SSL. - * - * This library is free for commercial and non-commercial use as long as - * the following conditions are aheared to. The following conditions - * apply to all code found in this distribution, be it the RC4, RSA, - * lhash, DES, etc., code; not just the SSL code. The SSL documentation - * included with this distribution is covered by the same copyright terms - * except that the holder is Tim Hudson (tjh@cryptsoft.com). - * - * Copyright remains Eric Young's, and as such any Copyright notices in - * the code are not to be removed. - * If this package is used in a product, Eric Young should be given attribution - * as the author of the parts of the library used. - * This can be in the form of a textual message at program startup or - * in documentation (online or textual) provided with the package. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * "This product includes cryptographic software written by - * Eric Young (eay@cryptsoft.com)" - * The word 'cryptographic' can be left out if the rouines from the library - * being used are not cryptographic related :-). - * 4. If you include any Windows specific code (or a derivative thereof) from - * the apps directory (application code) you must include an acknowledgement: - * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)" - * - * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * The licence and distribution terms for any publically available version or - * derivative of this code cannot be changed. i.e. this code cannot simply be - * copied and put under another distribution licence - * [including the GNU Public Licence.] - */ -/* ==================================================================== - * Copyright (c) 1998-2018 The OpenSSL Project. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * - * 3. All advertising materials mentioning features or use of this - * software must display the following acknowledgment: - * "This product includes software developed by the OpenSSL Project - * for use in the OpenSSL Toolkit. (http://www.openssl.org/)" - * - * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to - * endorse or promote products derived from this software without - * prior written permission. For written permission, please contact - * openssl-core@openssl.org. - * - * 5. Products derived from this software may not be called "OpenSSL" - * nor may "OpenSSL" appear in their names without prior written - * permission of the OpenSSL Project. - * - * 6. Redistributions of any form whatsoever must retain the following - * acknowledgment: - * "This product includes software developed by the OpenSSL Project - * for use in the OpenSSL Toolkit (http://www.openssl.org/)" - * - * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY - * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR - * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, - * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED - * OF THE POSSIBILITY OF SUCH DAMAGE. - * ==================================================================== - * - * This product includes cryptographic software written by Eric Young - * (eay@cryptsoft.com). This product includes software written by Tim - * Hudson (tjh@cryptsoft.com). +/* + * Copyright 1995-2018 The OpenSSL Project Authors. All Rights Reserved. * + * Licensed under the OpenSSL license (the "License"). You may not use + * this file except in compliance with the License. You can obtain a copy + * in the file LICENSE in the source distribution or at + * https://www.openssl.org/source/license.html */ /* @@ -116,36 +14,49 @@ * sections 3.8 and 4.2 in http://security.ece.orst.edu/koc/papers/r01rsasw.pdf */ -#include <stdio.h> -#include "cryptlib.h" +#include "internal/cryptlib.h" #include "bn_lcl.h" #define MONT_WORD /* use the faster word-based algorithm */ #ifdef MONT_WORD -static int BN_from_montgomery_word(BIGNUM *ret, BIGNUM *r, BN_MONT_CTX *mont); +static int bn_from_montgomery_word(BIGNUM *ret, BIGNUM *r, BN_MONT_CTX *mont); #endif int BN_mod_mul_montgomery(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, BN_MONT_CTX *mont, BN_CTX *ctx) { + int ret = bn_mul_mont_fixed_top(r, a, b, mont, ctx); + + bn_correct_top(r); + bn_check_top(r); + + return ret; +} + +int bn_mul_mont_fixed_top(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, + BN_MONT_CTX *mont, BN_CTX *ctx) +{ BIGNUM *tmp; int ret = 0; -#if defined(OPENSSL_BN_ASM_MONT) && defined(MONT_WORD) int num = mont->N.top; +#if defined(OPENSSL_BN_ASM_MONT) && defined(MONT_WORD) if (num > 1 && a->top == num && b->top == num) { if (bn_wexpand(r, num) == NULL) - return (0); + return 0; if (bn_mul_mont(r->d, a->d, b->d, mont->N.d, mont->n0, num)) { r->neg = a->neg ^ b->neg; r->top = num; - bn_correct_top(r); - return (1); + r->flags |= BN_FLG_FIXED_TOP; + return 1; } } #endif + if ((a->top + b->top) > 2 * num) + return 0; + BN_CTX_start(ctx); tmp = BN_CTX_get(ctx); if (tmp == NULL) @@ -153,58 +64,57 @@ int BN_mod_mul_montgomery(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, bn_check_top(tmp); if (a == b) { - if (!BN_sqr(tmp, a, ctx)) + if (!bn_sqr_fixed_top(tmp, a, ctx)) goto err; } else { - if (!BN_mul(tmp, a, b, ctx)) + if (!bn_mul_fixed_top(tmp, a, b, ctx)) goto err; } /* reduce from aRR to aR */ #ifdef MONT_WORD - if (!BN_from_montgomery_word(r, tmp, mont)) + if (!bn_from_montgomery_word(r, tmp, mont)) goto err; #else if (!BN_from_montgomery(r, tmp, mont, ctx)) goto err; #endif - bn_check_top(r); ret = 1; err: BN_CTX_end(ctx); - return (ret); + return ret; } #ifdef MONT_WORD -static int BN_from_montgomery_word(BIGNUM *ret, BIGNUM *r, BN_MONT_CTX *mont) +static int bn_from_montgomery_word(BIGNUM *ret, BIGNUM *r, BN_MONT_CTX *mont) { BIGNUM *n; BN_ULONG *ap, *np, *rp, n0, v, carry; int nl, max, i; + unsigned int rtop; n = &(mont->N); nl = n->top; if (nl == 0) { ret->top = 0; - return (1); + return 1; } max = (2 * nl); /* carry is stored separately */ if (bn_wexpand(r, max) == NULL) - return (0); + return 0; r->neg ^= n->neg; np = n->d; rp = r->d; /* clear the top words of T */ -# if 1 - for (i = r->top; i < max; i++) /* memset? XXX */ - rp[i] = 0; -# else - memset(&(rp[r->top]), 0, (max - r->top) * sizeof(BN_ULONG)); -# endif + for (rtop = r->top, i = 0; i < max; i++) { + v = (BN_ULONG)0 - ((i - rtop) >> (8 * sizeof(rtop) - 1)); + rp[i] &= v; + } r->top = max; + r->flags |= BN_FLG_FIXED_TOP; n0 = mont->n0[0]; /* @@ -221,8 +131,9 @@ static int BN_from_montgomery_word(BIGNUM *ret, BIGNUM *r, BN_MONT_CTX *mont) } if (bn_wexpand(ret, nl) == NULL) - return (0); + return 0; ret->top = nl; + ret->flags |= BN_FLG_FIXED_TOP; ret->neg = r->neg; rp = ret->d; @@ -233,35 +144,44 @@ static int BN_from_montgomery_word(BIGNUM *ret, BIGNUM *r, BN_MONT_CTX *mont) */ ap = &(r->d[nl]); + carry -= bn_sub_words(rp, ap, np, nl); /* - * |v| is one if |ap| - |np| underflowed or zero if it did not. Note |v| - * cannot be -1. That would imply the subtraction did not fit in |nl| words, - * and we know at most one subtraction is needed. + * |carry| is -1 if |ap| - |np| underflowed or zero if it did not. Note + * |carry| cannot be 1. That would imply the subtraction did not fit in + * |nl| words, and we know at most one subtraction is needed. */ - v = bn_sub_words(rp, ap, np, nl) - carry; - v = 0 - v; for (i = 0; i < nl; i++) { - rp[i] = (v & ap[i]) | (~v & rp[i]); + rp[i] = (carry & ap[i]) | (~carry & rp[i]); ap[i] = 0; } - bn_correct_top(r); - bn_correct_top(ret); - bn_check_top(ret); - return (1); + return 1; } #endif /* MONT_WORD */ int BN_from_montgomery(BIGNUM *ret, const BIGNUM *a, BN_MONT_CTX *mont, BN_CTX *ctx) { + int retn; + + retn = bn_from_mont_fixed_top(ret, a, mont, ctx); + bn_correct_top(ret); + bn_check_top(ret); + + return retn; +} + +int bn_from_mont_fixed_top(BIGNUM *ret, const BIGNUM *a, BN_MONT_CTX *mont, + BN_CTX *ctx) +{ int retn = 0; #ifdef MONT_WORD BIGNUM *t; BN_CTX_start(ctx); - if ((t = BN_CTX_get(ctx)) && BN_copy(t, a)) - retn = BN_from_montgomery_word(ret, t, mont); + if ((t = BN_CTX_get(ctx)) && BN_copy(t, a)) { + retn = bn_from_montgomery_word(ret, t, mont); + } BN_CTX_end(ctx); #else /* !MONT_WORD */ BIGNUM *t1, *t2; @@ -269,7 +189,7 @@ int BN_from_montgomery(BIGNUM *ret, const BIGNUM *a, BN_MONT_CTX *mont, BN_CTX_start(ctx); t1 = BN_CTX_get(ctx); t2 = BN_CTX_get(ctx); - if (t1 == NULL || t2 == NULL) + if (t2 == NULL) goto err; if (!BN_copy(t1, a)) @@ -296,27 +216,35 @@ int BN_from_montgomery(BIGNUM *ret, const BIGNUM *a, BN_MONT_CTX *mont, err: BN_CTX_end(ctx); #endif /* MONT_WORD */ - return (retn); + return retn; +} + +int bn_to_mont_fixed_top(BIGNUM *r, const BIGNUM *a, BN_MONT_CTX *mont, + BN_CTX *ctx) +{ + return bn_mul_mont_fixed_top(r, a, &(mont->RR), mont, ctx); } BN_MONT_CTX *BN_MONT_CTX_new(void) { BN_MONT_CTX *ret; - if ((ret = (BN_MONT_CTX *)OPENSSL_malloc(sizeof(BN_MONT_CTX))) == NULL) - return (NULL); + if ((ret = OPENSSL_malloc(sizeof(*ret))) == NULL) { + BNerr(BN_F_BN_MONT_CTX_NEW, ERR_R_MALLOC_FAILURE); + return NULL; + } BN_MONT_CTX_init(ret); ret->flags = BN_FLG_MALLOCED; - return (ret); + return ret; } void BN_MONT_CTX_init(BN_MONT_CTX *ctx) { ctx->ri = 0; - BN_init(&(ctx->RR)); - BN_init(&(ctx->N)); - BN_init(&(ctx->Ni)); + bn_init(&ctx->RR); + bn_init(&ctx->N); + bn_init(&ctx->Ni); ctx->n0[0] = ctx->n0[1] = 0; ctx->flags = 0; } @@ -325,17 +253,16 @@ void BN_MONT_CTX_free(BN_MONT_CTX *mont) { if (mont == NULL) return; - - BN_clear_free(&(mont->RR)); - BN_clear_free(&(mont->N)); - BN_clear_free(&(mont->Ni)); + BN_clear_free(&mont->RR); + BN_clear_free(&mont->N); + BN_clear_free(&mont->Ni); if (mont->flags & BN_FLG_MALLOCED) OPENSSL_free(mont); } int BN_MONT_CTX_set(BN_MONT_CTX *mont, const BIGNUM *mod, BN_CTX *ctx) { - int ret = 0; + int i, ret = 0; BIGNUM *Ri, *R; if (BN_is_zero(mod)) @@ -356,7 +283,7 @@ int BN_MONT_CTX_set(BN_MONT_CTX *mont, const BIGNUM *mod, BN_CTX *ctx) BIGNUM tmod; BN_ULONG buf[2]; - BN_init(&tmod); + bn_init(&tmod); tmod.d = buf; tmod.dmax = 2; tmod.neg = 0; @@ -384,7 +311,9 @@ int BN_MONT_CTX_set(BN_MONT_CTX *mont, const BIGNUM *mod, BN_CTX *ctx) if ((buf[1] = mod->top > 1 ? mod->d[1] : 0)) tmod.top = 2; - if ((BN_mod_inverse(Ri, R, &tmod, ctx)) == NULL) + if (BN_is_one(&tmod)) + BN_zero(Ri); + else if ((BN_mod_inverse(Ri, R, &tmod, ctx)) == NULL) goto err; if (!BN_lshift(Ri, Ri, 2 * BN_BITS2)) goto err; /* R*Ri */ @@ -417,7 +346,9 @@ int BN_MONT_CTX_set(BN_MONT_CTX *mont, const BIGNUM *mod, BN_CTX *ctx) buf[1] = 0; tmod.top = buf[0] != 0 ? 1 : 0; /* Ri = R^-1 mod N */ - if ((BN_mod_inverse(Ri, R, &tmod, ctx)) == NULL) + if (BN_is_one(&tmod)) + BN_zero(Ri); + else if ((BN_mod_inverse(Ri, R, &tmod, ctx)) == NULL) goto err; if (!BN_lshift(Ri, Ri, BN_BITS2)) goto err; /* R*Ri */ @@ -466,6 +397,11 @@ int BN_MONT_CTX_set(BN_MONT_CTX *mont, const BIGNUM *mod, BN_CTX *ctx) if (!BN_mod(&(mont->RR), &(mont->RR), &(mont->N), ctx)) goto err; + for (i = mont->RR.top, ret = mont->N.top; i < ret; i++) + mont->RR.d[i] = 0; + mont->RR.top = ret; + mont->RR.flags |= BN_FLG_FIXED_TOP; + ret = 1; err: BN_CTX_end(ctx); @@ -475,7 +411,7 @@ int BN_MONT_CTX_set(BN_MONT_CTX *mont, const BIGNUM *mod, BN_CTX *ctx) BN_MONT_CTX *BN_MONT_CTX_copy(BN_MONT_CTX *to, BN_MONT_CTX *from) { if (to == from) - return (to); + return to; if (!BN_copy(&(to->RR), &(from->RR))) return NULL; @@ -486,17 +422,17 @@ BN_MONT_CTX *BN_MONT_CTX_copy(BN_MONT_CTX *to, BN_MONT_CTX *from) to->ri = from->ri; to->n0[0] = from->n0[0]; to->n0[1] = from->n0[1]; - return (to); + return to; } -BN_MONT_CTX *BN_MONT_CTX_set_locked(BN_MONT_CTX **pmont, int lock, +BN_MONT_CTX *BN_MONT_CTX_set_locked(BN_MONT_CTX **pmont, CRYPTO_RWLOCK *lock, const BIGNUM *mod, BN_CTX *ctx) { BN_MONT_CTX *ret; - CRYPTO_r_lock(lock); + CRYPTO_THREAD_read_lock(lock); ret = *pmont; - CRYPTO_r_unlock(lock); + CRYPTO_THREAD_unlock(lock); if (ret) return ret; @@ -509,7 +445,7 @@ BN_MONT_CTX *BN_MONT_CTX_set_locked(BN_MONT_CTX **pmont, int lock, * (the losers throw away the work they've done). */ ret = BN_MONT_CTX_new(); - if (!ret) + if (ret == NULL) return NULL; if (!BN_MONT_CTX_set(ret, mod, ctx)) { BN_MONT_CTX_free(ret); @@ -517,12 +453,12 @@ BN_MONT_CTX *BN_MONT_CTX_set_locked(BN_MONT_CTX **pmont, int lock, } /* The locked compare-and-set, after the local work is done. */ - CRYPTO_w_lock(lock); + CRYPTO_THREAD_write_lock(lock); if (*pmont) { BN_MONT_CTX_free(ret); ret = *pmont; } else *pmont = ret; - CRYPTO_w_unlock(lock); + CRYPTO_THREAD_unlock(lock); return ret; } diff --git a/crypto/bn/bn_mpi.c b/crypto/bn/bn_mpi.c index 3bd40bbd2bca..043e21d26a38 100644 --- a/crypto/bn/bn_mpi.c +++ b/crypto/bn/bn_mpi.c @@ -1,63 +1,14 @@ -/* crypto/bn/bn_mpi.c */ -/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) - * All rights reserved. +/* + * Copyright 1995-2016 The OpenSSL Project Authors. All Rights Reserved. * - * This package is an SSL implementation written - * by Eric Young (eay@cryptsoft.com). - * The implementation was written so as to conform with Netscapes SSL. - * - * This library is free for commercial and non-commercial use as long as - * the following conditions are aheared to. The following conditions - * apply to all code found in this distribution, be it the RC4, RSA, - * lhash, DES, etc., code; not just the SSL code. The SSL documentation - * included with this distribution is covered by the same copyright terms - * except that the holder is Tim Hudson (tjh@cryptsoft.com). - * - * Copyright remains Eric Young's, and as such any Copyright notices in - * the code are not to be removed. - * If this package is used in a product, Eric Young should be given attribution - * as the author of the parts of the library used. - * This can be in the form of a textual message at program startup or - * in documentation (online or textual) provided with the package. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * "This product includes cryptographic software written by - * Eric Young (eay@cryptsoft.com)" - * The word 'cryptographic' can be left out if the rouines from the library - * being used are not cryptographic related :-). - * 4. If you include any Windows specific code (or a derivative thereof) from - * the apps directory (application code) you must include an acknowledgement: - * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)" - * - * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * The licence and distribution terms for any publically available version or - * derivative of this code cannot be changed. i.e. this code cannot simply be - * copied and put under another distribution licence - * [including the GNU Public Licence.] + * Licensed under the OpenSSL license (the "License"). You may not use + * this file except in compliance with the License. You can obtain a copy + * in the file LICENSE in the source distribution or at + * https://www.openssl.org/source/license.html */ #include <stdio.h> -#include "cryptlib.h" +#include "internal/cryptlib.h" #include "bn_lcl.h" int BN_bn2mpi(const BIGNUM *a, unsigned char *d) @@ -88,41 +39,48 @@ int BN_bn2mpi(const BIGNUM *a, unsigned char *d) return (num + 4 + ext); } -BIGNUM *BN_mpi2bn(const unsigned char *d, int n, BIGNUM *a) +BIGNUM *BN_mpi2bn(const unsigned char *d, int n, BIGNUM *ain) { long len; int neg = 0; + BIGNUM *a = NULL; if (n < 4) { BNerr(BN_F_BN_MPI2BN, BN_R_INVALID_LENGTH); - return (NULL); + return NULL; } len = ((long)d[0] << 24) | ((long)d[1] << 16) | ((int)d[2] << 8) | (int) d[3]; if ((len + 4) != n) { BNerr(BN_F_BN_MPI2BN, BN_R_ENCODING_ERROR); - return (NULL); + return NULL; } - if (a == NULL) + if (ain == NULL) a = BN_new(); + else + a = ain; + if (a == NULL) - return (NULL); + return NULL; if (len == 0) { a->neg = 0; a->top = 0; - return (a); + return a; } d += 4; if ((*d) & 0x80) neg = 1; - if (BN_bin2bn(d, (int)len, a) == NULL) - return (NULL); + if (BN_bin2bn(d, (int)len, a) == NULL) { + if (ain == NULL) + BN_free(a); + return NULL; + } a->neg = neg; if (neg) { BN_clear_bit(a, BN_num_bits(a) - 1); } bn_check_top(a); - return (a); + return a; } diff --git a/crypto/bn/bn_mul.c b/crypto/bn/bn_mul.c index 6b455a755f71..5eda65cfbb04 100644 --- a/crypto/bn/bn_mul.c +++ b/crypto/bn/bn_mul.c @@ -1,69 +1,14 @@ -/* crypto/bn/bn_mul.c */ -/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) - * All rights reserved. - * - * This package is an SSL implementation written - * by Eric Young (eay@cryptsoft.com). - * The implementation was written so as to conform with Netscapes SSL. - * - * This library is free for commercial and non-commercial use as long as - * the following conditions are aheared to. The following conditions - * apply to all code found in this distribution, be it the RC4, RSA, - * lhash, DES, etc., code; not just the SSL code. The SSL documentation - * included with this distribution is covered by the same copyright terms - * except that the holder is Tim Hudson (tjh@cryptsoft.com). - * - * Copyright remains Eric Young's, and as such any Copyright notices in - * the code are not to be removed. - * If this package is used in a product, Eric Young should be given attribution - * as the author of the parts of the library used. - * This can be in the form of a textual message at program startup or - * in documentation (online or textual) provided with the package. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * "This product includes cryptographic software written by - * Eric Young (eay@cryptsoft.com)" - * The word 'cryptographic' can be left out if the rouines from the library - * being used are not cryptographic related :-). - * 4. If you include any Windows specific code (or a derivative thereof) from - * the apps directory (application code) you must include an acknowledgement: - * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)" - * - * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. +/* + * Copyright 1995-2018 The OpenSSL Project Authors. All Rights Reserved. * - * The licence and distribution terms for any publically available version or - * derivative of this code cannot be changed. i.e. this code cannot simply be - * copied and put under another distribution licence - * [including the GNU Public Licence.] + * Licensed under the OpenSSL license (the "License"). You may not use + * this file except in compliance with the License. You can obtain a copy + * in the file LICENSE in the source distribution or at + * https://www.openssl.org/source/license.html */ -#ifndef BN_DEBUG -# undef NDEBUG /* avoid conflicting definitions */ -# define NDEBUG -#endif - -#include <stdio.h> #include <assert.h> -#include "cryptlib.h" +#include "internal/cryptlib.h" #include "bn_lcl.h" #if defined(OPENSSL_NO_ASM) || !defined(OPENSSL_BN_ASM_PART_WORDS) @@ -71,7 +16,7 @@ * Here follows specialised variants of bn_add_words() and bn_sub_words(). * They have the property performing operations on arrays of different sizes. * The sizes of those arrays is expressed through cl, which is the common - * length ( basicall, min(len(a),len(b)) ), and dl, which is the delta + * length ( basically, min(len(a),len(b)) ), and dl, which is the delta * between the two lengths, calculated as len(a)-len(b). All lengths are the * number of BN_ULONGs... For the operations that require a result array as * parameter, it must have the length cl+abs(dl). These functions should @@ -96,10 +41,6 @@ BN_ULONG bn_sub_part_words(BN_ULONG *r, b += cl; if (dl < 0) { -# ifdef BN_COUNT - fprintf(stderr, " bn_sub_part_words %d + %d (dl < 0, c = %d)\n", cl, - dl, c); -# endif for (;;) { t = b[0]; r[0] = (0 - t - c) & BN_MASK2; @@ -134,10 +75,6 @@ BN_ULONG bn_sub_part_words(BN_ULONG *r, } } else { int save_dl = dl; -# ifdef BN_COUNT - fprintf(stderr, " bn_sub_part_words %d + %d (dl > 0, c = %d)\n", cl, - dl, c); -# endif while (c) { t = a[0]; r[0] = (t - c) & BN_MASK2; @@ -172,20 +109,18 @@ BN_ULONG bn_sub_part_words(BN_ULONG *r, r += 4; } if (dl > 0) { -# ifdef BN_COUNT - fprintf(stderr, " bn_sub_part_words %d + %d (dl > 0, c == 0)\n", - cl, dl); -# endif if (save_dl > dl) { switch (save_dl - dl) { case 1: r[1] = a[1]; if (--dl <= 0) break; + /* fall thru */ case 2: r[2] = a[2]; if (--dl <= 0) break; + /* fall thru */ case 3: r[3] = a[3]; if (--dl <= 0) @@ -196,10 +131,6 @@ BN_ULONG bn_sub_part_words(BN_ULONG *r, } } if (dl > 0) { -# ifdef BN_COUNT - fprintf(stderr, " bn_sub_part_words %d + %d (dl > 0, copy)\n", - cl, dl); -# endif for (;;) { r[0] = a[0]; if (--dl <= 0) @@ -223,189 +154,6 @@ BN_ULONG bn_sub_part_words(BN_ULONG *r, } #endif -BN_ULONG bn_add_part_words(BN_ULONG *r, - const BN_ULONG *a, const BN_ULONG *b, - int cl, int dl) -{ - BN_ULONG c, l, t; - - assert(cl >= 0); - c = bn_add_words(r, a, b, cl); - - if (dl == 0) - return c; - - r += cl; - a += cl; - b += cl; - - if (dl < 0) { - int save_dl = dl; -#ifdef BN_COUNT - fprintf(stderr, " bn_add_part_words %d + %d (dl < 0, c = %d)\n", cl, - dl, c); -#endif - while (c) { - l = (c + b[0]) & BN_MASK2; - c = (l < c); - r[0] = l; - if (++dl >= 0) - break; - - l = (c + b[1]) & BN_MASK2; - c = (l < c); - r[1] = l; - if (++dl >= 0) - break; - - l = (c + b[2]) & BN_MASK2; - c = (l < c); - r[2] = l; - if (++dl >= 0) - break; - - l = (c + b[3]) & BN_MASK2; - c = (l < c); - r[3] = l; - if (++dl >= 0) - break; - - save_dl = dl; - b += 4; - r += 4; - } - if (dl < 0) { -#ifdef BN_COUNT - fprintf(stderr, " bn_add_part_words %d + %d (dl < 0, c == 0)\n", - cl, dl); -#endif - if (save_dl < dl) { - switch (dl - save_dl) { - case 1: - r[1] = b[1]; - if (++dl >= 0) - break; - case 2: - r[2] = b[2]; - if (++dl >= 0) - break; - case 3: - r[3] = b[3]; - if (++dl >= 0) - break; - } - b += 4; - r += 4; - } - } - if (dl < 0) { -#ifdef BN_COUNT - fprintf(stderr, " bn_add_part_words %d + %d (dl < 0, copy)\n", - cl, dl); -#endif - for (;;) { - r[0] = b[0]; - if (++dl >= 0) - break; - r[1] = b[1]; - if (++dl >= 0) - break; - r[2] = b[2]; - if (++dl >= 0) - break; - r[3] = b[3]; - if (++dl >= 0) - break; - - b += 4; - r += 4; - } - } - } else { - int save_dl = dl; -#ifdef BN_COUNT - fprintf(stderr, " bn_add_part_words %d + %d (dl > 0)\n", cl, dl); -#endif - while (c) { - t = (a[0] + c) & BN_MASK2; - c = (t < c); - r[0] = t; - if (--dl <= 0) - break; - - t = (a[1] + c) & BN_MASK2; - c = (t < c); - r[1] = t; - if (--dl <= 0) - break; - - t = (a[2] + c) & BN_MASK2; - c = (t < c); - r[2] = t; - if (--dl <= 0) - break; - - t = (a[3] + c) & BN_MASK2; - c = (t < c); - r[3] = t; - if (--dl <= 0) - break; - - save_dl = dl; - a += 4; - r += 4; - } -#ifdef BN_COUNT - fprintf(stderr, " bn_add_part_words %d + %d (dl > 0, c == 0)\n", cl, - dl); -#endif - if (dl > 0) { - if (save_dl > dl) { - switch (save_dl - dl) { - case 1: - r[1] = a[1]; - if (--dl <= 0) - break; - case 2: - r[2] = a[2]; - if (--dl <= 0) - break; - case 3: - r[3] = a[3]; - if (--dl <= 0) - break; - } - a += 4; - r += 4; - } - } - if (dl > 0) { -#ifdef BN_COUNT - fprintf(stderr, " bn_add_part_words %d + %d (dl > 0, copy)\n", - cl, dl); -#endif - for (;;) { - r[0] = a[0]; - if (--dl <= 0) - break; - r[1] = a[1]; - if (--dl <= 0) - break; - r[2] = a[2]; - if (--dl <= 0) - break; - r[3] = a[3]; - if (--dl <= 0) - break; - - a += 4; - r += 4; - } - } - } - return c; -} - #ifdef BN_RECURSION /* * Karatsuba recursive multiplication algorithm (cf. Knuth, The Art of @@ -432,9 +180,6 @@ void bn_mul_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n2, unsigned int neg, zero; BN_ULONG ln, lo, *p; -# ifdef BN_COUNT - fprintf(stderr, " bn_mul_recursive %d%+d * %d%+d\n", n2, dna, n2, dnb); -# endif # ifdef BN_MUL_COMBA # if 0 if (n2 == 4) { @@ -501,7 +246,7 @@ void bn_mul_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n2, if (!zero) bn_mul_comba4(&(t[n2]), t, &(t[n])); else - memset(&(t[n2]), 0, 8 * sizeof(BN_ULONG)); + memset(&t[n2], 0, sizeof(*t) * 8); bn_mul_comba4(r, a, b); bn_mul_comba4(&(r[n2]), &(a[n]), &(b[n])); @@ -511,7 +256,7 @@ void bn_mul_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n2, if (!zero) bn_mul_comba8(&(t[n2]), t, &(t[n])); else - memset(&(t[n2]), 0, 16 * sizeof(BN_ULONG)); + memset(&t[n2], 0, sizeof(*t) * 16); bn_mul_comba8(r, a, b); bn_mul_comba8(&(r[n2]), &(a[n]), &(b[n])); @@ -522,7 +267,7 @@ void bn_mul_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n2, if (!zero) bn_mul_recursive(&(t[n2]), t, &(t[n]), n, 0, 0, p); else - memset(&(t[n2]), 0, n2 * sizeof(BN_ULONG)); + memset(&t[n2], 0, sizeof(*t) * n2); bn_mul_recursive(r, a, b, n, 0, 0, p); bn_mul_recursive(&(r[n2]), &(a[n]), &(b[n]), n, dna, dnb, p); } @@ -581,10 +326,6 @@ void bn_mul_part_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n, int c1, c2, neg; BN_ULONG ln, lo, *p; -# ifdef BN_COUNT - fprintf(stderr, " bn_mul_part_recursive (%d%+d) * (%d%+d)\n", - n, tna, n, tnb); -# endif if (n < 8) { bn_mul_normal(r, a, n + tna, b, n + tnb); return; @@ -600,7 +341,6 @@ void bn_mul_part_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n, bn_sub_part_words(&(t[n]), b, &(b[n]), tnb, n - tnb); /* - */ break; case -3: - /* break; */ case -2: bn_sub_part_words(t, &(a[n]), a, tna, tna - n); /* - */ bn_sub_part_words(&(t[n]), &(b[n]), b, tnb, tnb - n); /* + */ @@ -609,14 +349,12 @@ void bn_mul_part_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n, case -1: case 0: case 1: - /* break; */ case 2: bn_sub_part_words(t, a, &(a[n]), tna, n - tna); /* + */ bn_sub_part_words(&(t[n]), b, &(b[n]), tnb, n - tnb); /* - */ neg = 1; break; case 3: - /* break; */ case 4: bn_sub_part_words(t, a, &(a[n]), tna, n - tna); bn_sub_part_words(&(t[n]), &(b[n]), b, tnb, tnb - n); @@ -631,14 +369,14 @@ void bn_mul_part_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n, bn_mul_comba4(&(t[n2]), t, &(t[n])); bn_mul_comba4(r, a, b); bn_mul_normal(&(r[n2]), &(a[n]), tn, &(b[n]), tn); - memset(&(r[n2 + tn * 2]), 0, sizeof(BN_ULONG) * (n2 - tn * 2)); + memset(&r[n2 + tn * 2], 0, sizeof(*r) * (n2 - tn * 2)); } else # endif if (n == 8) { bn_mul_comba8(&(t[n2]), t, &(t[n])); bn_mul_comba8(r, a, b); bn_mul_normal(&(r[n2]), &(a[n]), tna, &(b[n]), tnb); - memset(&(r[n2 + tna + tnb]), 0, sizeof(BN_ULONG) * (n2 - tna - tnb)); + memset(&r[n2 + tna + tnb], 0, sizeof(*r) * (n2 - tna - tnb)); } else { p = &(t[n2 * 2]); bn_mul_recursive(&(t[n2]), t, &(t[n]), n, 0, 0, p); @@ -654,7 +392,7 @@ void bn_mul_part_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n, if (j == 0) { bn_mul_recursive(&(r[n2]), &(a[n]), &(b[n]), i, tna - i, tnb - i, p); - memset(&(r[n2 + i * 2]), 0, sizeof(BN_ULONG) * (n2 - i * 2)); + memset(&r[n2 + i * 2], 0, sizeof(*r) * (n2 - i * 2)); } else if (j > 0) { /* eg, n == 16, i == 8 and tn == 11 */ bn_mul_part_recursive(&(r[n2]), &(a[n]), &(b[n]), i, tna - i, tnb - i, p); @@ -662,7 +400,7 @@ void bn_mul_part_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n, sizeof(BN_ULONG) * (n2 - tna - tnb)); } else { /* (j < 0) eg, n == 16, i == 8 and tn == 5 */ - memset(&(r[n2]), 0, sizeof(BN_ULONG) * n2); + memset(&r[n2], 0, sizeof(*r) * n2); if (tna < BN_MUL_RECURSIVE_SIZE_NORMAL && tnb < BN_MUL_RECURSIVE_SIZE_NORMAL) { bn_mul_normal(&(r[n2]), &(a[n]), tna, &(b[n]), tnb); @@ -741,10 +479,6 @@ void bn_mul_low_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n2, { int n = n2 / 2; -# ifdef BN_COUNT - fprintf(stderr, " bn_mul_low_recursive %d * %d\n", n2, n2); -# endif - bn_mul_recursive(r, a, b, n, 0, 0, &(t[0])); if (n >= BN_MUL_LOW_RECURSIVE_SIZE_NORMAL) { bn_mul_low_recursive(&(t[0]), &(a[0]), &(b[n]), n, &(t[n2])); @@ -758,183 +492,19 @@ void bn_mul_low_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n2, bn_add_words(&(r[n]), &(r[n]), &(t[n]), n); } } +#endif /* BN_RECURSION */ -/*- - * a and b must be the same size, which is n2. - * r needs to be n2 words and t needs to be n2*2 - * l is the low words of the output. - * t needs to be n2*3 - */ -void bn_mul_high(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, BN_ULONG *l, int n2, - BN_ULONG *t) +int BN_mul(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, BN_CTX *ctx) { - int i, n; - int c1, c2; - int neg, oneg, zero; - BN_ULONG ll, lc, *lp, *mp; - -# ifdef BN_COUNT - fprintf(stderr, " bn_mul_high %d * %d\n", n2, n2); -# endif - n = n2 / 2; + int ret = bn_mul_fixed_top(r, a, b, ctx); - /* Calculate (al-ah)*(bh-bl) */ - neg = zero = 0; - c1 = bn_cmp_words(&(a[0]), &(a[n]), n); - c2 = bn_cmp_words(&(b[n]), &(b[0]), n); - switch (c1 * 3 + c2) { - case -4: - bn_sub_words(&(r[0]), &(a[n]), &(a[0]), n); - bn_sub_words(&(r[n]), &(b[0]), &(b[n]), n); - break; - case -3: - zero = 1; - break; - case -2: - bn_sub_words(&(r[0]), &(a[n]), &(a[0]), n); - bn_sub_words(&(r[n]), &(b[n]), &(b[0]), n); - neg = 1; - break; - case -1: - case 0: - case 1: - zero = 1; - break; - case 2: - bn_sub_words(&(r[0]), &(a[0]), &(a[n]), n); - bn_sub_words(&(r[n]), &(b[0]), &(b[n]), n); - neg = 1; - break; - case 3: - zero = 1; - break; - case 4: - bn_sub_words(&(r[0]), &(a[0]), &(a[n]), n); - bn_sub_words(&(r[n]), &(b[n]), &(b[0]), n); - break; - } - - oneg = neg; - /* t[10] = (a[0]-a[1])*(b[1]-b[0]) */ - /* r[10] = (a[1]*b[1]) */ -# ifdef BN_MUL_COMBA - if (n == 8) { - bn_mul_comba8(&(t[0]), &(r[0]), &(r[n])); - bn_mul_comba8(r, &(a[n]), &(b[n])); - } else -# endif - { - bn_mul_recursive(&(t[0]), &(r[0]), &(r[n]), n, 0, 0, &(t[n2])); - bn_mul_recursive(r, &(a[n]), &(b[n]), n, 0, 0, &(t[n2])); - } - - /*- - * s0 == low(al*bl) - * s1 == low(ah*bh)+low((al-ah)*(bh-bl))+low(al*bl)+high(al*bl) - * We know s0 and s1 so the only unknown is high(al*bl) - * high(al*bl) == s1 - low(ah*bh+s0+(al-ah)*(bh-bl)) - * high(al*bl) == s1 - (r[0]+l[0]+t[0]) - */ - if (l != NULL) { - lp = &(t[n2 + n]); - c1 = (int)(bn_add_words(lp, &(r[0]), &(l[0]), n)); - } else { - c1 = 0; - lp = &(r[0]); - } - - if (neg) - neg = (int)(bn_sub_words(&(t[n2]), lp, &(t[0]), n)); - else { - bn_add_words(&(t[n2]), lp, &(t[0]), n); - neg = 0; - } - - if (l != NULL) { - bn_sub_words(&(t[n2 + n]), &(l[n]), &(t[n2]), n); - } else { - lp = &(t[n2 + n]); - mp = &(t[n2]); - for (i = 0; i < n; i++) - lp[i] = ((~mp[i]) + 1) & BN_MASK2; - } - - /*- - * s[0] = low(al*bl) - * t[3] = high(al*bl) - * t[10] = (a[0]-a[1])*(b[1]-b[0]) neg is the sign - * r[10] = (a[1]*b[1]) - */ - /*- - * R[10] = al*bl - * R[21] = al*bl + ah*bh + (a[0]-a[1])*(b[1]-b[0]) - * R[32] = ah*bh - */ - /*- - * R[1]=t[3]+l[0]+r[0](+-)t[0] (have carry/borrow) - * R[2]=r[0]+t[3]+r[1](+-)t[1] (have carry/borrow) - * R[3]=r[1]+(carry/borrow) - */ - if (l != NULL) { - lp = &(t[n2]); - c1 = (int)(bn_add_words(lp, &(t[n2 + n]), &(l[0]), n)); - } else { - lp = &(t[n2 + n]); - c1 = 0; - } - c1 += (int)(bn_add_words(&(t[n2]), lp, &(r[0]), n)); - if (oneg) - c1 -= (int)(bn_sub_words(&(t[n2]), &(t[n2]), &(t[0]), n)); - else - c1 += (int)(bn_add_words(&(t[n2]), &(t[n2]), &(t[0]), n)); - - c2 = (int)(bn_add_words(&(r[0]), &(r[0]), &(t[n2 + n]), n)); - c2 += (int)(bn_add_words(&(r[0]), &(r[0]), &(r[n]), n)); - if (oneg) - c2 -= (int)(bn_sub_words(&(r[0]), &(r[0]), &(t[n]), n)); - else - c2 += (int)(bn_add_words(&(r[0]), &(r[0]), &(t[n]), n)); + bn_correct_top(r); + bn_check_top(r); - if (c1 != 0) { /* Add starting at r[0], could be +ve or -ve */ - i = 0; - if (c1 > 0) { - lc = c1; - do { - ll = (r[i] + lc) & BN_MASK2; - r[i++] = ll; - lc = (lc > ll); - } while (lc); - } else { - lc = -c1; - do { - ll = r[i]; - r[i++] = (ll - lc) & BN_MASK2; - lc = (lc > ll); - } while (lc); - } - } - if (c2 != 0) { /* Add starting at r[1] */ - i = n; - if (c2 > 0) { - lc = c2; - do { - ll = (r[i] + lc) & BN_MASK2; - r[i++] = ll; - lc = (lc > ll); - } while (lc); - } else { - lc = -c2; - do { - ll = r[i]; - r[i++] = (ll - lc) & BN_MASK2; - lc = (lc > ll); - } while (lc); - } - } + return ret; } -#endif /* BN_RECURSION */ -int BN_mul(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, BN_CTX *ctx) +int bn_mul_fixed_top(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, BN_CTX *ctx) { int ret = 0; int top, al, bl; @@ -947,10 +517,6 @@ int BN_mul(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, BN_CTX *ctx) int j = 0, k; #endif -#ifdef BN_COUNT - fprintf(stderr, "BN_mul %d * %d\n", a->top, b->top); -#endif - bn_check_top(a); bn_check_top(b); bn_check_top(r); @@ -960,7 +526,7 @@ int BN_mul(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, BN_CTX *ctx) if ((al == 0) || (bl == 0)) { BN_zero(r); - return (1); + return 1; } top = al + bl; @@ -970,7 +536,6 @@ int BN_mul(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, BN_CTX *ctx) goto err; } else rr = r; - rr->neg = a->neg ^ b->neg; #if defined(BN_MUL_COMBA) || defined(BN_RECURSION) i = al - bl; @@ -1042,7 +607,8 @@ int BN_mul(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, BN_CTX *ctx) #if defined(BN_MUL_COMBA) || defined(BN_RECURSION) end: #endif - bn_correct_top(rr); + rr->neg = a->neg ^ b->neg; + rr->flags |= BN_FLG_FIXED_TOP; if (r != rr && BN_copy(r, rr) == NULL) goto err; @@ -1050,17 +616,13 @@ int BN_mul(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, BN_CTX *ctx) err: bn_check_top(r); BN_CTX_end(ctx); - return (ret); + return ret; } void bn_mul_normal(BN_ULONG *r, BN_ULONG *a, int na, BN_ULONG *b, int nb) { BN_ULONG *rr; -#ifdef BN_COUNT - fprintf(stderr, " bn_mul_normal %d * %d\n", na, nb); -#endif - if (na < nb) { int itmp; BN_ULONG *ltmp; @@ -1101,9 +663,6 @@ void bn_mul_normal(BN_ULONG *r, BN_ULONG *a, int na, BN_ULONG *b, int nb) void bn_mul_low_normal(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n) { -#ifdef BN_COUNT - fprintf(stderr, " bn_mul_low_normal %d * %d\n", n, n); -#endif bn_mul_words(r, a, n, b[0]); for (;;) { diff --git a/crypto/bn/bn_nist.c b/crypto/bn/bn_nist.c index 4a45404c6f89..dcdd321c6688 100644 --- a/crypto/bn/bn_nist.c +++ b/crypto/bn/bn_nist.c @@ -1,63 +1,14 @@ -/* crypto/bn/bn_nist.c */ /* - * Written by Nils Larsch for the OpenSSL project - */ -/* ==================================================================== - * Copyright (c) 1998-2005 The OpenSSL Project. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * - * 3. All advertising materials mentioning features or use of this - * software must display the following acknowledgment: - * "This product includes software developed by the OpenSSL Project - * for use in the OpenSSL Toolkit. (http://www.openssl.org/)" - * - * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to - * endorse or promote products derived from this software without - * prior written permission. For written permission, please contact - * openssl-core@openssl.org. - * - * 5. Products derived from this software may not be called "OpenSSL" - * nor may "OpenSSL" appear in their names without prior written - * permission of the OpenSSL Project. - * - * 6. Redistributions of any form whatsoever must retain the following - * acknowledgment: - * "This product includes software developed by the OpenSSL Project - * for use in the OpenSSL Toolkit (http://www.openssl.org/)" - * - * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY - * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR - * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, - * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED - * OF THE POSSIBILITY OF SUCH DAMAGE. - * ==================================================================== - * - * This product includes cryptographic software written by Eric Young - * (eay@cryptsoft.com). This product includes software written by Tim - * Hudson (tjh@cryptsoft.com). + * Copyright 2002-2018 The OpenSSL Project Authors. All Rights Reserved. * + * Licensed under the OpenSSL license (the "License"). You may not use + * this file except in compliance with the License. You can obtain a copy + * in the file LICENSE in the source distribution or at + * https://www.openssl.org/source/license.html */ #include "bn_lcl.h" -#include "cryptlib.h" +#include "internal/cryptlib.h" #define BN_NIST_192_TOP (192+BN_BITS2-1)/BN_BITS2 #define BN_NIST_224_TOP (224+BN_BITS2-1)/BN_BITS2 @@ -303,7 +254,7 @@ static void nist_cp_bn_0(BN_ULONG *dst, const BN_ULONG *src, int top, int max) int i; #ifdef BN_DEBUG - OPENSSL_assert(top <= max); + (void)ossl_assert(top <= max); #endif for (i = 0; i < top; i++) dst[i] = src[i]; @@ -379,8 +330,8 @@ int BN_nist_mod_192(BIGNUM *r, const BIGNUM *a, const BIGNUM *field, PTR_SIZE_INT mask; static const BIGNUM _bignum_nist_p_192_sqr = { (BN_ULONG *)_nist_p_192_sqr, - sizeof(_nist_p_192_sqr) / sizeof(_nist_p_192_sqr[0]), - sizeof(_nist_p_192_sqr) / sizeof(_nist_p_192_sqr[0]), + OSSL_NELEM(_nist_p_192_sqr), + OSSL_NELEM(_nist_p_192_sqr), 0, BN_FLG_STATIC_DATA }; @@ -524,8 +475,8 @@ int BN_nist_mod_224(BIGNUM *r, const BIGNUM *a, const BIGNUM *field, } u; static const BIGNUM _bignum_nist_p_224_sqr = { (BN_ULONG *)_nist_p_224_sqr, - sizeof(_nist_p_224_sqr) / sizeof(_nist_p_224_sqr[0]), - sizeof(_nist_p_224_sqr) / sizeof(_nist_p_224_sqr[0]), + OSSL_NELEM(_nist_p_224_sqr), + OSSL_NELEM(_nist_p_224_sqr), 0, BN_FLG_STATIC_DATA }; @@ -645,7 +596,7 @@ int BN_nist_mod_224(BIGNUM *r, const BIGNUM *a, const BIGNUM *field, #endif } else if (carry < 0) { /* - * it's a bit more comlicated logic in this case. if bn_add_words + * it's a bit more complicated logic in this case. if bn_add_words * yields no carry, then result has to be adjusted by unconditionally * *adding* the modulus. but if it does, then result has to be * compared to the modulus and conditionally adjusted by @@ -705,8 +656,8 @@ int BN_nist_mod_256(BIGNUM *r, const BIGNUM *a, const BIGNUM *field, } u; static const BIGNUM _bignum_nist_p_256_sqr = { (BN_ULONG *)_nist_p_256_sqr, - sizeof(_nist_p_256_sqr) / sizeof(_nist_p_256_sqr[0]), - sizeof(_nist_p_256_sqr) / sizeof(_nist_p_256_sqr[0]), + OSSL_NELEM(_nist_p_256_sqr), + OSSL_NELEM(_nist_p_256_sqr), 0, BN_FLG_STATIC_DATA }; @@ -951,8 +902,8 @@ int BN_nist_mod_384(BIGNUM *r, const BIGNUM *a, const BIGNUM *field, } u; static const BIGNUM _bignum_nist_p_384_sqr = { (BN_ULONG *)_nist_p_384_sqr, - sizeof(_nist_p_384_sqr) / sizeof(_nist_p_384_sqr[0]), - sizeof(_nist_p_384_sqr) / sizeof(_nist_p_384_sqr[0]), + OSSL_NELEM(_nist_p_384_sqr), + OSSL_NELEM(_nist_p_384_sqr), 0, BN_FLG_STATIC_DATA }; @@ -1209,8 +1160,8 @@ int BN_nist_mod_521(BIGNUM *r, const BIGNUM *a, const BIGNUM *field, PTR_SIZE_INT mask; static const BIGNUM _bignum_nist_p_521_sqr = { (BN_ULONG *)_nist_p_521_sqr, - sizeof(_nist_p_521_sqr) / sizeof(_nist_p_521_sqr[0]), - sizeof(_nist_p_521_sqr) / sizeof(_nist_p_521_sqr[0]), + OSSL_NELEM(_nist_p_521_sqr), + OSSL_NELEM(_nist_p_521_sqr), 0, BN_FLG_STATIC_DATA }; @@ -1239,9 +1190,20 @@ int BN_nist_mod_521(BIGNUM *r, const BIGNUM *a, const BIGNUM *field, top - (BN_NIST_521_TOP - 1), BN_NIST_521_TOP); /* ... and right shift */ for (val = t_d[0], i = 0; i < BN_NIST_521_TOP - 1; i++) { +#if 0 + /* + * MSC ARM compiler [version 2013, presumably even earlier, + * much earlier] miscompiles this code, but not one in + * #else section. See RT#3541. + */ + tmp = val >> BN_NIST_521_RSHIFT; + val = t_d[i + 1]; + t_d[i] = (tmp | val << BN_NIST_521_LSHIFT) & BN_MASK2; +#else t_d[i] = (val >> BN_NIST_521_RSHIFT | (tmp = t_d[i + 1]) << BN_NIST_521_LSHIFT) & BN_MASK2; val = tmp; +#endif } t_d[i] = val >> BN_NIST_521_RSHIFT; /* lower 521 bits */ @@ -1260,3 +1222,18 @@ int BN_nist_mod_521(BIGNUM *r, const BIGNUM *a, const BIGNUM *field, return 1; } + +int (*BN_nist_mod_func(const BIGNUM *p)) (BIGNUM *r, const BIGNUM *a, + const BIGNUM *field, BN_CTX *ctx) { + if (BN_ucmp(&_bignum_nist_p_192, p) == 0) + return BN_nist_mod_192; + if (BN_ucmp(&_bignum_nist_p_224, p) == 0) + return BN_nist_mod_224; + if (BN_ucmp(&_bignum_nist_p_256, p) == 0) + return BN_nist_mod_256; + if (BN_ucmp(&_bignum_nist_p_384, p) == 0) + return BN_nist_mod_384; + if (BN_ucmp(&_bignum_nist_p_521, p) == 0) + return BN_nist_mod_521; + return 0; +} diff --git a/crypto/bn/bn_prime.c b/crypto/bn/bn_prime.c index e911e157859a..b91b31b1f304 100644 --- a/crypto/bn/bn_prime.c +++ b/crypto/bn/bn_prime.c @@ -1,125 +1,16 @@ -/* crypto/bn/bn_prime.c */ -/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) - * All rights reserved. - * - * This package is an SSL implementation written - * by Eric Young (eay@cryptsoft.com). - * The implementation was written so as to conform with Netscapes SSL. - * - * This library is free for commercial and non-commercial use as long as - * the following conditions are aheared to. The following conditions - * apply to all code found in this distribution, be it the RC4, RSA, - * lhash, DES, etc., code; not just the SSL code. The SSL documentation - * included with this distribution is covered by the same copyright terms - * except that the holder is Tim Hudson (tjh@cryptsoft.com). - * - * Copyright remains Eric Young's, and as such any Copyright notices in - * the code are not to be removed. - * If this package is used in a product, Eric Young should be given attribution - * as the author of the parts of the library used. - * This can be in the form of a textual message at program startup or - * in documentation (online or textual) provided with the package. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * "This product includes cryptographic software written by - * Eric Young (eay@cryptsoft.com)" - * The word 'cryptographic' can be left out if the rouines from the library - * being used are not cryptographic related :-). - * 4. If you include any Windows specific code (or a derivative thereof) from - * the apps directory (application code) you must include an acknowledgement: - * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)" - * - * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * The licence and distribution terms for any publically available version or - * derivative of this code cannot be changed. i.e. this code cannot simply be - * copied and put under another distribution licence - * [including the GNU Public Licence.] - */ -/* ==================================================================== - * Copyright (c) 1998-2001 The OpenSSL Project. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * - * 3. All advertising materials mentioning features or use of this - * software must display the following acknowledgment: - * "This product includes software developed by the OpenSSL Project - * for use in the OpenSSL Toolkit. (http://www.openssl.org/)" - * - * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to - * endorse or promote products derived from this software without - * prior written permission. For written permission, please contact - * openssl-core@openssl.org. - * - * 5. Products derived from this software may not be called "OpenSSL" - * nor may "OpenSSL" appear in their names without prior written - * permission of the OpenSSL Project. - * - * 6. Redistributions of any form whatsoever must retain the following - * acknowledgment: - * "This product includes software developed by the OpenSSL Project - * for use in the OpenSSL Toolkit (http://www.openssl.org/)" - * - * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY - * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR - * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, - * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED - * OF THE POSSIBILITY OF SUCH DAMAGE. - * ==================================================================== - * - * This product includes cryptographic software written by Eric Young - * (eay@cryptsoft.com). This product includes software written by Tim - * Hudson (tjh@cryptsoft.com). +/* + * Copyright 1995-2018 The OpenSSL Project Authors. All Rights Reserved. * + * Licensed under the OpenSSL license (the "License"). You may not use + * this file except in compliance with the License. You can obtain a copy + * in the file LICENSE in the source distribution or at + * https://www.openssl.org/source/license.html */ #include <stdio.h> #include <time.h> -#include "cryptlib.h" +#include "internal/cryptlib.h" #include "bn_lcl.h" -#include <openssl/rand.h> - -/* - * NB: these functions have been "upgraded", the deprecated versions (which - * are compatibility wrappers using these functions) are in bn_depr.c. - - * Geoff - */ /* * The quick sieve algorithm approach to weeding out primes is Philip @@ -131,12 +22,10 @@ static int witness(BIGNUM *w, const BIGNUM *a, const BIGNUM *a1, const BIGNUM *a1_odd, int k, BN_CTX *ctx, BN_MONT_CTX *mont); -static int probable_prime(BIGNUM *rnd, int bits); -static int probable_prime_dh(BIGNUM *rnd, int bits, - const BIGNUM *add, const BIGNUM *rem, - BN_CTX *ctx); -static int probable_prime_dh_safe(BIGNUM *rnd, int bits, const BIGNUM *add, - const BIGNUM *rem, BN_CTX *ctx); +static int probable_prime(BIGNUM *rnd, int bits, prime_t *mods); +static int probable_prime_dh_safe(BIGNUM *rnd, int bits, + const BIGNUM *add, const BIGNUM *rem, + BN_CTX *ctx); int BN_GENCB_call(BN_GENCB *cb, int a, int b) { @@ -166,31 +55,46 @@ int BN_generate_prime_ex(BIGNUM *ret, int bits, int safe, BIGNUM *t; int found = 0; int i, j, c1 = 0; - BN_CTX *ctx; + BN_CTX *ctx = NULL; + prime_t *mods = NULL; int checks = BN_prime_checks_for_size(bits); + if (bits < 2) { + /* There are no prime numbers this small. */ + BNerr(BN_F_BN_GENERATE_PRIME_EX, BN_R_BITS_TOO_SMALL); + return 0; + } else if (bits == 2 && safe) { + /* The smallest safe prime (7) is three bits. */ + BNerr(BN_F_BN_GENERATE_PRIME_EX, BN_R_BITS_TOO_SMALL); + return 0; + } + + mods = OPENSSL_zalloc(sizeof(*mods) * NUMPRIMES); + if (mods == NULL) + goto err; + ctx = BN_CTX_new(); if (ctx == NULL) goto err; BN_CTX_start(ctx); t = BN_CTX_get(ctx); - if (!t) + if (t == NULL) goto err; loop: /* make a random number and set the top and bottom bits */ if (add == NULL) { - if (!probable_prime(ret, bits)) + if (!probable_prime(ret, bits, mods)) goto err; } else { if (safe) { if (!probable_prime_dh_safe(ret, bits, add, rem, ctx)) goto err; } else { - if (!probable_prime_dh(ret, bits, add, rem, ctx)) + if (!bn_probable_prime_dh(ret, bits, add, rem, ctx)) goto err; } } - /* if (BN_mod_word(ret,(BN_ULONG)3) == 1) goto loop; */ + if (!BN_GENCB_call(cb, 0, c1++)) /* aborted */ goto err; @@ -230,10 +134,10 @@ int BN_generate_prime_ex(BIGNUM *ret, int bits, int safe, /* we have a prime :-) */ found = 1; err: - if (ctx != NULL) { + OPENSSL_free(mods); + if (ctx != NULL) BN_CTX_end(ctx); - BN_CTX_free(ctx); - } + BN_CTX_free(ctx); bn_check_top(ret); return found; } @@ -250,23 +154,29 @@ int BN_is_prime_fasttest_ex(const BIGNUM *a, int checks, BN_CTX *ctx_passed, int i, j, ret = -1; int k; BN_CTX *ctx = NULL; - BIGNUM *A1, *A1_odd, *check; /* taken from ctx */ + BIGNUM *A1, *A1_odd, *A3, *check; /* taken from ctx */ BN_MONT_CTX *mont = NULL; - if (BN_cmp(a, BN_value_one()) <= 0) + /* Take care of the really small primes 2 & 3 */ + if (BN_is_word(a, 2) || BN_is_word(a, 3)) + return 1; + + /* Check odd and bigger than 1 */ + if (!BN_is_odd(a) || BN_cmp(a, BN_value_one()) <= 0) return 0; if (checks == BN_prime_checks) checks = BN_prime_checks_for_size(BN_num_bits(a)); /* first look for small factors */ - if (!BN_is_odd(a)) - /* a is even => a is prime if and only if a == 2 */ - return BN_is_word(a, 2); if (do_trial_division) { - for (i = 1; i < NUMPRIMES; i++) - if (BN_mod_word(a, primes[i]) == 0) - return 0; + for (i = 1; i < NUMPRIMES; i++) { + BN_ULONG mod = BN_mod_word(a, primes[i]); + if (mod == (BN_ULONG)-1) + goto err; + if (mod == 0) + return BN_is_word(a, primes[i]); + } if (!BN_GENCB_call(cb, 1, -1)) goto err; } @@ -278,20 +188,18 @@ int BN_is_prime_fasttest_ex(const BIGNUM *a, int checks, BN_CTX *ctx_passed, BN_CTX_start(ctx); A1 = BN_CTX_get(ctx); + A3 = BN_CTX_get(ctx); A1_odd = BN_CTX_get(ctx); check = BN_CTX_get(ctx); if (check == NULL) goto err; /* compute A1 := a - 1 */ - if (!BN_copy(A1, a)) + if (!BN_copy(A1, a) || !BN_sub_word(A1, 1)) goto err; - if (!BN_sub_word(A1, 1)) + /* compute A3 := a - 3 */ + if (!BN_copy(A3, a) || !BN_sub_word(A3, 3)) goto err; - if (BN_is_zero(A1)) { - ret = 0; - goto err; - } /* write A1 as A1_odd * 2^k */ k = 1; @@ -308,11 +216,9 @@ int BN_is_prime_fasttest_ex(const BIGNUM *a, int checks, BN_CTX *ctx_passed, goto err; for (i = 0; i < checks; i++) { - if (!BN_pseudo_rand_range(check, A1)) + /* 1 < check < a-1 */ + if (!BN_priv_rand_range(check, A3) || !BN_add_word(check, 2)) goto err; - if (!BN_add_word(check, 1)) - goto err; - /* now 1 <= check < a */ j = witness(check, a, A1, A1_odd, k, ctx, mont); if (j == -1) @@ -331,10 +237,9 @@ int BN_is_prime_fasttest_ex(const BIGNUM *a, int checks, BN_CTX *ctx_passed, if (ctx_passed == NULL) BN_CTX_free(ctx); } - if (mont != NULL) - BN_MONT_CTX_free(mont); + BN_MONT_CTX_free(mont); - return (ret); + return ret; } static int witness(BIGNUM *w, const BIGNUM *a, const BIGNUM *a1, @@ -364,41 +269,91 @@ static int witness(BIGNUM *w, const BIGNUM *a, const BIGNUM *a1, return 1; } -static int probable_prime(BIGNUM *rnd, int bits) +static int probable_prime(BIGNUM *rnd, int bits, prime_t *mods) { int i; - prime_t mods[NUMPRIMES]; - BN_ULONG delta, maxdelta; + BN_ULONG delta; + BN_ULONG maxdelta = BN_MASK2 - primes[NUMPRIMES - 1]; + char is_single_word = bits <= BN_BITS2; again: - if (!BN_rand(rnd, bits, 1, 1)) - return (0); - /* we now have a random number 'rand' to test. */ - for (i = 1; i < NUMPRIMES; i++) - mods[i] = (prime_t) BN_mod_word(rnd, (BN_ULONG)primes[i]); - maxdelta = BN_MASK2 - primes[NUMPRIMES - 1]; + /* TODO: Not all primes are private */ + if (!BN_priv_rand(rnd, bits, BN_RAND_TOP_TWO, BN_RAND_BOTTOM_ODD)) + return 0; + /* we now have a random number 'rnd' to test. */ + for (i = 1; i < NUMPRIMES; i++) { + BN_ULONG mod = BN_mod_word(rnd, (BN_ULONG)primes[i]); + if (mod == (BN_ULONG)-1) + return 0; + mods[i] = (prime_t) mod; + } + /* + * If bits is so small that it fits into a single word then we + * additionally don't want to exceed that many bits. + */ + if (is_single_word) { + BN_ULONG size_limit; + + if (bits == BN_BITS2) { + /* + * Shifting by this much has undefined behaviour so we do it a + * different way + */ + size_limit = ~((BN_ULONG)0) - BN_get_word(rnd); + } else { + size_limit = (((BN_ULONG)1) << bits) - BN_get_word(rnd) - 1; + } + if (size_limit < maxdelta) + maxdelta = size_limit; + } delta = 0; - loop:for (i = 1; i < NUMPRIMES; i++) { - /* - * check that rnd is not a prime and also that gcd(rnd-1,primes) == 1 - * (except for 2) + loop: + if (is_single_word) { + BN_ULONG rnd_word = BN_get_word(rnd); + + /*- + * In the case that the candidate prime is a single word then + * we check that: + * 1) It's greater than primes[i] because we shouldn't reject + * 3 as being a prime number because it's a multiple of + * three. + * 2) That it's not a multiple of a known prime. We don't + * check that rnd-1 is also coprime to all the known + * primes because there aren't many small primes where + * that's true. */ - if (((mods[i] + delta) % primes[i]) <= 1) { - delta += 2; - if (delta > maxdelta) - goto again; - goto loop; + for (i = 1; i < NUMPRIMES && primes[i] < rnd_word; i++) { + if ((mods[i] + delta) % primes[i] == 0) { + delta += 2; + if (delta > maxdelta) + goto again; + goto loop; + } + } + } else { + for (i = 1; i < NUMPRIMES; i++) { + /* + * check that rnd is not a prime and also that gcd(rnd-1,primes) + * == 1 (except for 2) + */ + if (((mods[i] + delta) % primes[i]) <= 1) { + delta += 2; + if (delta > maxdelta) + goto again; + goto loop; + } } } if (!BN_add_word(rnd, delta)) - return (0); + return 0; + if (BN_num_bits(rnd) != bits) + goto again; bn_check_top(rnd); - return (1); + return 1; } -static int probable_prime_dh(BIGNUM *rnd, int bits, - const BIGNUM *add, const BIGNUM *rem, - BN_CTX *ctx) +int bn_probable_prime_dh(BIGNUM *rnd, int bits, + const BIGNUM *add, const BIGNUM *rem, BN_CTX *ctx) { int i, ret = 0; BIGNUM *t1; @@ -407,7 +362,7 @@ static int probable_prime_dh(BIGNUM *rnd, int bits, if ((t1 = BN_CTX_get(ctx)) == NULL) goto err; - if (!BN_rand(rnd, bits, 0, 1)) + if (!BN_rand(rnd, bits, BN_RAND_TOP_ONE, BN_RAND_BOTTOM_ODD)) goto err; /* we need ((rnd-rem) % add) == 0 */ @@ -426,19 +381,24 @@ static int probable_prime_dh(BIGNUM *rnd, int bits, /* we now have a random number 'rand' to test. */ - loop:for (i = 1; i < NUMPRIMES; i++) { + loop: + for (i = 1; i < NUMPRIMES; i++) { /* check that rnd is a prime */ - if (BN_mod_word(rnd, (BN_ULONG)primes[i]) <= 1) { + BN_ULONG mod = BN_mod_word(rnd, (BN_ULONG)primes[i]); + if (mod == (BN_ULONG)-1) + goto err; + if (mod <= 1) { if (!BN_add(rnd, rnd, add)) goto err; goto loop; } } ret = 1; + err: BN_CTX_end(ctx); bn_check_top(rnd); - return (ret); + return ret; } static int probable_prime_dh_safe(BIGNUM *p, int bits, const BIGNUM *padd, @@ -458,7 +418,7 @@ static int probable_prime_dh_safe(BIGNUM *p, int bits, const BIGNUM *padd, if (!BN_rshift1(qadd, padd)) goto err; - if (!BN_rand(q, bits, 0, 1)) + if (!BN_rand(q, bits, BN_RAND_TOP_ONE, BN_RAND_BOTTOM_ODD)) goto err; /* we need ((rnd-rem) % add) == 0 */ @@ -482,13 +442,17 @@ static int probable_prime_dh_safe(BIGNUM *p, int bits, const BIGNUM *padd, if (!BN_add_word(p, 1)) goto err; - loop:for (i = 1; i < NUMPRIMES; i++) { + loop: + for (i = 1; i < NUMPRIMES; i++) { /* check that p and q are prime */ /* * check that for p and q gcd(p-1,primes) == 1 (except for 2) */ - if ((BN_mod_word(p, (BN_ULONG)primes[i]) == 0) || - (BN_mod_word(q, (BN_ULONG)primes[i]) == 0)) { + BN_ULONG pmod = BN_mod_word(p, (BN_ULONG)primes[i]); + BN_ULONG qmod = BN_mod_word(q, (BN_ULONG)primes[i]); + if (pmod == (BN_ULONG)-1 || qmod == (BN_ULONG)-1) + goto err; + if (pmod == 0 || qmod == 0) { if (!BN_add(p, p, padd)) goto err; if (!BN_add(q, q, qadd)) @@ -497,8 +461,9 @@ static int probable_prime_dh_safe(BIGNUM *p, int bits, const BIGNUM *padd, } } ret = 1; + err: BN_CTX_end(ctx); bn_check_top(p); - return (ret); + return ret; } diff --git a/crypto/bn/bn_prime.h b/crypto/bn/bn_prime.h index 489af8b424ca..a64c9630f3b0 100644 --- a/crypto/bn/bn_prime.h +++ b/crypto/bn/bn_prime.h @@ -1,326 +1,273 @@ -/* Auto generated by bn_prime.pl */ -/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) - * All rights reserved. +/* + * WARNING: do not edit! + * Generated by crypto/bn/bn_prime.pl * - * This package is an SSL implementation written - * by Eric Young (eay@cryptsoft.com). - * The implementation was written so as to conform with Netscapes SSL. + * Copyright 1998-2018 The OpenSSL Project Authors. All Rights Reserved. * - * This library is free for commercial and non-commercial use as long as - * the following conditions are aheared to. The following conditions - * apply to all code found in this distribution, be it the RC4, RSA, - * lhash, DES, etc., code; not just the SSL code. The SSL documentation - * included with this distribution is covered by the same copyright terms - * except that the holder is Tim Hudson (tjh@cryptsoft.com). - * - * Copyright remains Eric Young's, and as such any Copyright notices in - * the code are not to be removed. - * If this package is used in a product, Eric Young should be given attribution - * as the author of the parts of the library used. - * This can be in the form of a textual message at program startup or - * in documentation (online or textual) provided with the package. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * "This product includes cryptographic software written by - * Eric Young (eay@cryptsoft.com)" - * The word 'cryptographic' can be left out if the rouines from the library - * being used are not cryptographic related :-). - * 4. If you include any Windows specific code (or a derivative thereof) from - * the apps directory (application code) you must include an acknowledgement: - * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)" - * - * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * The licence and distribution terms for any publically available version or - * derivative of this code cannot be changed. i.e. this code cannot simply be - * copied and put under another distribution licence - * [including the GNU Public Licence.] + * Licensed under the OpenSSL license (the "License"). You may not use + * this file except in compliance with the License. You can obtain a copy + * in the file LICENSE in the source distribution or at + * https://www.openssl.org/source/license.html */ -#ifndef EIGHT_BIT -# define NUMPRIMES 2048 typedef unsigned short prime_t; -#else -# define NUMPRIMES 54 -typedef unsigned char prime_t; -#endif -static const prime_t primes[NUMPRIMES] = { - 2, 3, 5, 7, 11, 13, 17, 19, - 23, 29, 31, 37, 41, 43, 47, 53, - 59, 61, 67, 71, 73, 79, 83, 89, - 97, 101, 103, 107, 109, 113, 127, 131, - 137, 139, 149, 151, 157, 163, 167, 173, - 179, 181, 191, 193, 197, 199, 211, 223, - 227, 229, 233, 239, 241, 251, -#ifndef EIGHT_BIT - 257, 263, - 269, 271, 277, 281, 283, 293, 307, 311, - 313, 317, 331, 337, 347, 349, 353, 359, - 367, 373, 379, 383, 389, 397, 401, 409, - 419, 421, 431, 433, 439, 443, 449, 457, - 461, 463, 467, 479, 487, 491, 499, 503, - 509, 521, 523, 541, 547, 557, 563, 569, - 571, 577, 587, 593, 599, 601, 607, 613, - 617, 619, 631, 641, 643, 647, 653, 659, - 661, 673, 677, 683, 691, 701, 709, 719, - 727, 733, 739, 743, 751, 757, 761, 769, - 773, 787, 797, 809, 811, 821, 823, 827, - 829, 839, 853, 857, 859, 863, 877, 881, - 883, 887, 907, 911, 919, 929, 937, 941, - 947, 953, 967, 971, 977, 983, 991, 997, - 1009, 1013, 1019, 1021, 1031, 1033, 1039, 1049, - 1051, 1061, 1063, 1069, 1087, 1091, 1093, 1097, - 1103, 1109, 1117, 1123, 1129, 1151, 1153, 1163, - 1171, 1181, 1187, 1193, 1201, 1213, 1217, 1223, - 1229, 1231, 1237, 1249, 1259, 1277, 1279, 1283, - 1289, 1291, 1297, 1301, 1303, 1307, 1319, 1321, - 1327, 1361, 1367, 1373, 1381, 1399, 1409, 1423, - 1427, 1429, 1433, 1439, 1447, 1451, 1453, 1459, - 1471, 1481, 1483, 1487, 1489, 1493, 1499, 1511, - 1523, 1531, 1543, 1549, 1553, 1559, 1567, 1571, - 1579, 1583, 1597, 1601, 1607, 1609, 1613, 1619, - 1621, 1627, 1637, 1657, 1663, 1667, 1669, 1693, - 1697, 1699, 1709, 1721, 1723, 1733, 1741, 1747, - 1753, 1759, 1777, 1783, 1787, 1789, 1801, 1811, - 1823, 1831, 1847, 1861, 1867, 1871, 1873, 1877, - 1879, 1889, 1901, 1907, 1913, 1931, 1933, 1949, - 1951, 1973, 1979, 1987, 1993, 1997, 1999, 2003, - 2011, 2017, 2027, 2029, 2039, 2053, 2063, 2069, - 2081, 2083, 2087, 2089, 2099, 2111, 2113, 2129, - 2131, 2137, 2141, 2143, 2153, 2161, 2179, 2203, - 2207, 2213, 2221, 2237, 2239, 2243, 2251, 2267, - 2269, 2273, 2281, 2287, 2293, 2297, 2309, 2311, - 2333, 2339, 2341, 2347, 2351, 2357, 2371, 2377, - 2381, 2383, 2389, 2393, 2399, 2411, 2417, 2423, - 2437, 2441, 2447, 2459, 2467, 2473, 2477, 2503, - 2521, 2531, 2539, 2543, 2549, 2551, 2557, 2579, - 2591, 2593, 2609, 2617, 2621, 2633, 2647, 2657, - 2659, 2663, 2671, 2677, 2683, 2687, 2689, 2693, - 2699, 2707, 2711, 2713, 2719, 2729, 2731, 2741, - 2749, 2753, 2767, 2777, 2789, 2791, 2797, 2801, - 2803, 2819, 2833, 2837, 2843, 2851, 2857, 2861, - 2879, 2887, 2897, 2903, 2909, 2917, 2927, 2939, - 2953, 2957, 2963, 2969, 2971, 2999, 3001, 3011, - 3019, 3023, 3037, 3041, 3049, 3061, 3067, 3079, - 3083, 3089, 3109, 3119, 3121, 3137, 3163, 3167, - 3169, 3181, 3187, 3191, 3203, 3209, 3217, 3221, - 3229, 3251, 3253, 3257, 3259, 3271, 3299, 3301, - 3307, 3313, 3319, 3323, 3329, 3331, 3343, 3347, - 3359, 3361, 3371, 3373, 3389, 3391, 3407, 3413, - 3433, 3449, 3457, 3461, 3463, 3467, 3469, 3491, - 3499, 3511, 3517, 3527, 3529, 3533, 3539, 3541, - 3547, 3557, 3559, 3571, 3581, 3583, 3593, 3607, - 3613, 3617, 3623, 3631, 3637, 3643, 3659, 3671, - 3673, 3677, 3691, 3697, 3701, 3709, 3719, 3727, - 3733, 3739, 3761, 3767, 3769, 3779, 3793, 3797, - 3803, 3821, 3823, 3833, 3847, 3851, 3853, 3863, - 3877, 3881, 3889, 3907, 3911, 3917, 3919, 3923, - 3929, 3931, 3943, 3947, 3967, 3989, 4001, 4003, - 4007, 4013, 4019, 4021, 4027, 4049, 4051, 4057, - 4073, 4079, 4091, 4093, 4099, 4111, 4127, 4129, - 4133, 4139, 4153, 4157, 4159, 4177, 4201, 4211, - 4217, 4219, 4229, 4231, 4241, 4243, 4253, 4259, - 4261, 4271, 4273, 4283, 4289, 4297, 4327, 4337, - 4339, 4349, 4357, 4363, 4373, 4391, 4397, 4409, - 4421, 4423, 4441, 4447, 4451, 4457, 4463, 4481, - 4483, 4493, 4507, 4513, 4517, 4519, 4523, 4547, - 4549, 4561, 4567, 4583, 4591, 4597, 4603, 4621, - 4637, 4639, 4643, 4649, 4651, 4657, 4663, 4673, - 4679, 4691, 4703, 4721, 4723, 4729, 4733, 4751, - 4759, 4783, 4787, 4789, 4793, 4799, 4801, 4813, - 4817, 4831, 4861, 4871, 4877, 4889, 4903, 4909, - 4919, 4931, 4933, 4937, 4943, 4951, 4957, 4967, - 4969, 4973, 4987, 4993, 4999, 5003, 5009, 5011, - 5021, 5023, 5039, 5051, 5059, 5077, 5081, 5087, - 5099, 5101, 5107, 5113, 5119, 5147, 5153, 5167, - 5171, 5179, 5189, 5197, 5209, 5227, 5231, 5233, - 5237, 5261, 5273, 5279, 5281, 5297, 5303, 5309, - 5323, 5333, 5347, 5351, 5381, 5387, 5393, 5399, - 5407, 5413, 5417, 5419, 5431, 5437, 5441, 5443, - 5449, 5471, 5477, 5479, 5483, 5501, 5503, 5507, - 5519, 5521, 5527, 5531, 5557, 5563, 5569, 5573, - 5581, 5591, 5623, 5639, 5641, 5647, 5651, 5653, - 5657, 5659, 5669, 5683, 5689, 5693, 5701, 5711, - 5717, 5737, 5741, 5743, 5749, 5779, 5783, 5791, - 5801, 5807, 5813, 5821, 5827, 5839, 5843, 5849, - 5851, 5857, 5861, 5867, 5869, 5879, 5881, 5897, - 5903, 5923, 5927, 5939, 5953, 5981, 5987, 6007, - 6011, 6029, 6037, 6043, 6047, 6053, 6067, 6073, - 6079, 6089, 6091, 6101, 6113, 6121, 6131, 6133, - 6143, 6151, 6163, 6173, 6197, 6199, 6203, 6211, - 6217, 6221, 6229, 6247, 6257, 6263, 6269, 6271, - 6277, 6287, 6299, 6301, 6311, 6317, 6323, 6329, - 6337, 6343, 6353, 6359, 6361, 6367, 6373, 6379, - 6389, 6397, 6421, 6427, 6449, 6451, 6469, 6473, - 6481, 6491, 6521, 6529, 6547, 6551, 6553, 6563, - 6569, 6571, 6577, 6581, 6599, 6607, 6619, 6637, - 6653, 6659, 6661, 6673, 6679, 6689, 6691, 6701, - 6703, 6709, 6719, 6733, 6737, 6761, 6763, 6779, - 6781, 6791, 6793, 6803, 6823, 6827, 6829, 6833, - 6841, 6857, 6863, 6869, 6871, 6883, 6899, 6907, - 6911, 6917, 6947, 6949, 6959, 6961, 6967, 6971, - 6977, 6983, 6991, 6997, 7001, 7013, 7019, 7027, - 7039, 7043, 7057, 7069, 7079, 7103, 7109, 7121, - 7127, 7129, 7151, 7159, 7177, 7187, 7193, 7207, - 7211, 7213, 7219, 7229, 7237, 7243, 7247, 7253, - 7283, 7297, 7307, 7309, 7321, 7331, 7333, 7349, - 7351, 7369, 7393, 7411, 7417, 7433, 7451, 7457, - 7459, 7477, 7481, 7487, 7489, 7499, 7507, 7517, - 7523, 7529, 7537, 7541, 7547, 7549, 7559, 7561, - 7573, 7577, 7583, 7589, 7591, 7603, 7607, 7621, - 7639, 7643, 7649, 7669, 7673, 7681, 7687, 7691, - 7699, 7703, 7717, 7723, 7727, 7741, 7753, 7757, - 7759, 7789, 7793, 7817, 7823, 7829, 7841, 7853, - 7867, 7873, 7877, 7879, 7883, 7901, 7907, 7919, - 7927, 7933, 7937, 7949, 7951, 7963, 7993, 8009, - 8011, 8017, 8039, 8053, 8059, 8069, 8081, 8087, - 8089, 8093, 8101, 8111, 8117, 8123, 8147, 8161, - 8167, 8171, 8179, 8191, 8209, 8219, 8221, 8231, - 8233, 8237, 8243, 8263, 8269, 8273, 8287, 8291, - 8293, 8297, 8311, 8317, 8329, 8353, 8363, 8369, - 8377, 8387, 8389, 8419, 8423, 8429, 8431, 8443, - 8447, 8461, 8467, 8501, 8513, 8521, 8527, 8537, - 8539, 8543, 8563, 8573, 8581, 8597, 8599, 8609, - 8623, 8627, 8629, 8641, 8647, 8663, 8669, 8677, - 8681, 8689, 8693, 8699, 8707, 8713, 8719, 8731, - 8737, 8741, 8747, 8753, 8761, 8779, 8783, 8803, - 8807, 8819, 8821, 8831, 8837, 8839, 8849, 8861, - 8863, 8867, 8887, 8893, 8923, 8929, 8933, 8941, - 8951, 8963, 8969, 8971, 8999, 9001, 9007, 9011, - 9013, 9029, 9041, 9043, 9049, 9059, 9067, 9091, - 9103, 9109, 9127, 9133, 9137, 9151, 9157, 9161, - 9173, 9181, 9187, 9199, 9203, 9209, 9221, 9227, - 9239, 9241, 9257, 9277, 9281, 9283, 9293, 9311, - 9319, 9323, 9337, 9341, 9343, 9349, 9371, 9377, - 9391, 9397, 9403, 9413, 9419, 9421, 9431, 9433, - 9437, 9439, 9461, 9463, 9467, 9473, 9479, 9491, - 9497, 9511, 9521, 9533, 9539, 9547, 9551, 9587, - 9601, 9613, 9619, 9623, 9629, 9631, 9643, 9649, - 9661, 9677, 9679, 9689, 9697, 9719, 9721, 9733, - 9739, 9743, 9749, 9767, 9769, 9781, 9787, 9791, - 9803, 9811, 9817, 9829, 9833, 9839, 9851, 9857, - 9859, 9871, 9883, 9887, 9901, 9907, 9923, 9929, - 9931, 9941, 9949, 9967, 9973, 10007, 10009, 10037, - 10039, 10061, 10067, 10069, 10079, 10091, 10093, 10099, - 10103, 10111, 10133, 10139, 10141, 10151, 10159, 10163, - 10169, 10177, 10181, 10193, 10211, 10223, 10243, 10247, - 10253, 10259, 10267, 10271, 10273, 10289, 10301, 10303, - 10313, 10321, 10331, 10333, 10337, 10343, 10357, 10369, - 10391, 10399, 10427, 10429, 10433, 10453, 10457, 10459, - 10463, 10477, 10487, 10499, 10501, 10513, 10529, 10531, - 10559, 10567, 10589, 10597, 10601, 10607, 10613, 10627, - 10631, 10639, 10651, 10657, 10663, 10667, 10687, 10691, - 10709, 10711, 10723, 10729, 10733, 10739, 10753, 10771, - 10781, 10789, 10799, 10831, 10837, 10847, 10853, 10859, - 10861, 10867, 10883, 10889, 10891, 10903, 10909, 10937, - 10939, 10949, 10957, 10973, 10979, 10987, 10993, 11003, - 11027, 11047, 11057, 11059, 11069, 11071, 11083, 11087, - 11093, 11113, 11117, 11119, 11131, 11149, 11159, 11161, - 11171, 11173, 11177, 11197, 11213, 11239, 11243, 11251, - 11257, 11261, 11273, 11279, 11287, 11299, 11311, 11317, - 11321, 11329, 11351, 11353, 11369, 11383, 11393, 11399, - 11411, 11423, 11437, 11443, 11447, 11467, 11471, 11483, - 11489, 11491, 11497, 11503, 11519, 11527, 11549, 11551, - 11579, 11587, 11593, 11597, 11617, 11621, 11633, 11657, - 11677, 11681, 11689, 11699, 11701, 11717, 11719, 11731, - 11743, 11777, 11779, 11783, 11789, 11801, 11807, 11813, - 11821, 11827, 11831, 11833, 11839, 11863, 11867, 11887, - 11897, 11903, 11909, 11923, 11927, 11933, 11939, 11941, - 11953, 11959, 11969, 11971, 11981, 11987, 12007, 12011, - 12037, 12041, 12043, 12049, 12071, 12073, 12097, 12101, - 12107, 12109, 12113, 12119, 12143, 12149, 12157, 12161, - 12163, 12197, 12203, 12211, 12227, 12239, 12241, 12251, - 12253, 12263, 12269, 12277, 12281, 12289, 12301, 12323, - 12329, 12343, 12347, 12373, 12377, 12379, 12391, 12401, - 12409, 12413, 12421, 12433, 12437, 12451, 12457, 12473, - 12479, 12487, 12491, 12497, 12503, 12511, 12517, 12527, - 12539, 12541, 12547, 12553, 12569, 12577, 12583, 12589, - 12601, 12611, 12613, 12619, 12637, 12641, 12647, 12653, - 12659, 12671, 12689, 12697, 12703, 12713, 12721, 12739, - 12743, 12757, 12763, 12781, 12791, 12799, 12809, 12821, - 12823, 12829, 12841, 12853, 12889, 12893, 12899, 12907, - 12911, 12917, 12919, 12923, 12941, 12953, 12959, 12967, - 12973, 12979, 12983, 13001, 13003, 13007, 13009, 13033, - 13037, 13043, 13049, 13063, 13093, 13099, 13103, 13109, - 13121, 13127, 13147, 13151, 13159, 13163, 13171, 13177, - 13183, 13187, 13217, 13219, 13229, 13241, 13249, 13259, - 13267, 13291, 13297, 13309, 13313, 13327, 13331, 13337, - 13339, 13367, 13381, 13397, 13399, 13411, 13417, 13421, - 13441, 13451, 13457, 13463, 13469, 13477, 13487, 13499, - 13513, 13523, 13537, 13553, 13567, 13577, 13591, 13597, - 13613, 13619, 13627, 13633, 13649, 13669, 13679, 13681, - 13687, 13691, 13693, 13697, 13709, 13711, 13721, 13723, - 13729, 13751, 13757, 13759, 13763, 13781, 13789, 13799, - 13807, 13829, 13831, 13841, 13859, 13873, 13877, 13879, - 13883, 13901, 13903, 13907, 13913, 13921, 13931, 13933, - 13963, 13967, 13997, 13999, 14009, 14011, 14029, 14033, - 14051, 14057, 14071, 14081, 14083, 14087, 14107, 14143, - 14149, 14153, 14159, 14173, 14177, 14197, 14207, 14221, - 14243, 14249, 14251, 14281, 14293, 14303, 14321, 14323, - 14327, 14341, 14347, 14369, 14387, 14389, 14401, 14407, - 14411, 14419, 14423, 14431, 14437, 14447, 14449, 14461, - 14479, 14489, 14503, 14519, 14533, 14537, 14543, 14549, - 14551, 14557, 14561, 14563, 14591, 14593, 14621, 14627, - 14629, 14633, 14639, 14653, 14657, 14669, 14683, 14699, - 14713, 14717, 14723, 14731, 14737, 14741, 14747, 14753, - 14759, 14767, 14771, 14779, 14783, 14797, 14813, 14821, - 14827, 14831, 14843, 14851, 14867, 14869, 14879, 14887, - 14891, 14897, 14923, 14929, 14939, 14947, 14951, 14957, - 14969, 14983, 15013, 15017, 15031, 15053, 15061, 15073, - 15077, 15083, 15091, 15101, 15107, 15121, 15131, 15137, - 15139, 15149, 15161, 15173, 15187, 15193, 15199, 15217, - 15227, 15233, 15241, 15259, 15263, 15269, 15271, 15277, - 15287, 15289, 15299, 15307, 15313, 15319, 15329, 15331, - 15349, 15359, 15361, 15373, 15377, 15383, 15391, 15401, - 15413, 15427, 15439, 15443, 15451, 15461, 15467, 15473, - 15493, 15497, 15511, 15527, 15541, 15551, 15559, 15569, - 15581, 15583, 15601, 15607, 15619, 15629, 15641, 15643, - 15647, 15649, 15661, 15667, 15671, 15679, 15683, 15727, - 15731, 15733, 15737, 15739, 15749, 15761, 15767, 15773, - 15787, 15791, 15797, 15803, 15809, 15817, 15823, 15859, - 15877, 15881, 15887, 15889, 15901, 15907, 15913, 15919, - 15923, 15937, 15959, 15971, 15973, 15991, 16001, 16007, - 16033, 16057, 16061, 16063, 16067, 16069, 16073, 16087, - 16091, 16097, 16103, 16111, 16127, 16139, 16141, 16183, - 16187, 16189, 16193, 16217, 16223, 16229, 16231, 16249, - 16253, 16267, 16273, 16301, 16319, 16333, 16339, 16349, - 16361, 16363, 16369, 16381, 16411, 16417, 16421, 16427, - 16433, 16447, 16451, 16453, 16477, 16481, 16487, 16493, - 16519, 16529, 16547, 16553, 16561, 16567, 16573, 16603, - 16607, 16619, 16631, 16633, 16649, 16651, 16657, 16661, - 16673, 16691, 16693, 16699, 16703, 16729, 16741, 16747, - 16759, 16763, 16787, 16811, 16823, 16829, 16831, 16843, - 16871, 16879, 16883, 16889, 16901, 16903, 16921, 16927, - 16931, 16937, 16943, 16963, 16979, 16981, 16987, 16993, - 17011, 17021, 17027, 17029, 17033, 17041, 17047, 17053, - 17077, 17093, 17099, 17107, 17117, 17123, 17137, 17159, - 17167, 17183, 17189, 17191, 17203, 17207, 17209, 17231, - 17239, 17257, 17291, 17293, 17299, 17317, 17321, 17327, - 17333, 17341, 17351, 17359, 17377, 17383, 17387, 17389, - 17393, 17401, 17417, 17419, 17431, 17443, 17449, 17467, - 17471, 17477, 17483, 17489, 17491, 17497, 17509, 17519, - 17539, 17551, 17569, 17573, 17579, 17581, 17597, 17599, - 17609, 17623, 17627, 17657, 17659, 17669, 17681, 17683, - 17707, 17713, 17729, 17737, 17747, 17749, 17761, 17783, - 17789, 17791, 17807, 17827, 17837, 17839, 17851, 17863, -#endif +# define NUMPRIMES 2048 + +static const prime_t primes[2048] = { + 2, 3, 5, 7, 11, 13, 17, 19, + 23, 29, 31, 37, 41, 43, 47, 53, + 59, 61, 67, 71, 73, 79, 83, 89, + 97, 101, 103, 107, 109, 113, 127, 131, + 137, 139, 149, 151, 157, 163, 167, 173, + 179, 181, 191, 193, 197, 199, 211, 223, + 227, 229, 233, 239, 241, 251, 257, 263, + 269, 271, 277, 281, 283, 293, 307, 311, + 313, 317, 331, 337, 347, 349, 353, 359, + 367, 373, 379, 383, 389, 397, 401, 409, + 419, 421, 431, 433, 439, 443, 449, 457, + 461, 463, 467, 479, 487, 491, 499, 503, + 509, 521, 523, 541, 547, 557, 563, 569, + 571, 577, 587, 593, 599, 601, 607, 613, + 617, 619, 631, 641, 643, 647, 653, 659, + 661, 673, 677, 683, 691, 701, 709, 719, + 727, 733, 739, 743, 751, 757, 761, 769, + 773, 787, 797, 809, 811, 821, 823, 827, + 829, 839, 853, 857, 859, 863, 877, 881, + 883, 887, 907, 911, 919, 929, 937, 941, + 947, 953, 967, 971, 977, 983, 991, 997, + 1009, 1013, 1019, 1021, 1031, 1033, 1039, 1049, + 1051, 1061, 1063, 1069, 1087, 1091, 1093, 1097, + 1103, 1109, 1117, 1123, 1129, 1151, 1153, 1163, + 1171, 1181, 1187, 1193, 1201, 1213, 1217, 1223, + 1229, 1231, 1237, 1249, 1259, 1277, 1279, 1283, + 1289, 1291, 1297, 1301, 1303, 1307, 1319, 1321, + 1327, 1361, 1367, 1373, 1381, 1399, 1409, 1423, + 1427, 1429, 1433, 1439, 1447, 1451, 1453, 1459, + 1471, 1481, 1483, 1487, 1489, 1493, 1499, 1511, + 1523, 1531, 1543, 1549, 1553, 1559, 1567, 1571, + 1579, 1583, 1597, 1601, 1607, 1609, 1613, 1619, + 1621, 1627, 1637, 1657, 1663, 1667, 1669, 1693, + 1697, 1699, 1709, 1721, 1723, 1733, 1741, 1747, + 1753, 1759, 1777, 1783, 1787, 1789, 1801, 1811, + 1823, 1831, 1847, 1861, 1867, 1871, 1873, 1877, + 1879, 1889, 1901, 1907, 1913, 1931, 1933, 1949, + 1951, 1973, 1979, 1987, 1993, 1997, 1999, 2003, + 2011, 2017, 2027, 2029, 2039, 2053, 2063, 2069, + 2081, 2083, 2087, 2089, 2099, 2111, 2113, 2129, + 2131, 2137, 2141, 2143, 2153, 2161, 2179, 2203, + 2207, 2213, 2221, 2237, 2239, 2243, 2251, 2267, + 2269, 2273, 2281, 2287, 2293, 2297, 2309, 2311, + 2333, 2339, 2341, 2347, 2351, 2357, 2371, 2377, + 2381, 2383, 2389, 2393, 2399, 2411, 2417, 2423, + 2437, 2441, 2447, 2459, 2467, 2473, 2477, 2503, + 2521, 2531, 2539, 2543, 2549, 2551, 2557, 2579, + 2591, 2593, 2609, 2617, 2621, 2633, 2647, 2657, + 2659, 2663, 2671, 2677, 2683, 2687, 2689, 2693, + 2699, 2707, 2711, 2713, 2719, 2729, 2731, 2741, + 2749, 2753, 2767, 2777, 2789, 2791, 2797, 2801, + 2803, 2819, 2833, 2837, 2843, 2851, 2857, 2861, + 2879, 2887, 2897, 2903, 2909, 2917, 2927, 2939, + 2953, 2957, 2963, 2969, 2971, 2999, 3001, 3011, + 3019, 3023, 3037, 3041, 3049, 3061, 3067, 3079, + 3083, 3089, 3109, 3119, 3121, 3137, 3163, 3167, + 3169, 3181, 3187, 3191, 3203, 3209, 3217, 3221, + 3229, 3251, 3253, 3257, 3259, 3271, 3299, 3301, + 3307, 3313, 3319, 3323, 3329, 3331, 3343, 3347, + 3359, 3361, 3371, 3373, 3389, 3391, 3407, 3413, + 3433, 3449, 3457, 3461, 3463, 3467, 3469, 3491, + 3499, 3511, 3517, 3527, 3529, 3533, 3539, 3541, + 3547, 3557, 3559, 3571, 3581, 3583, 3593, 3607, + 3613, 3617, 3623, 3631, 3637, 3643, 3659, 3671, + 3673, 3677, 3691, 3697, 3701, 3709, 3719, 3727, + 3733, 3739, 3761, 3767, 3769, 3779, 3793, 3797, + 3803, 3821, 3823, 3833, 3847, 3851, 3853, 3863, + 3877, 3881, 3889, 3907, 3911, 3917, 3919, 3923, + 3929, 3931, 3943, 3947, 3967, 3989, 4001, 4003, + 4007, 4013, 4019, 4021, 4027, 4049, 4051, 4057, + 4073, 4079, 4091, 4093, 4099, 4111, 4127, 4129, + 4133, 4139, 4153, 4157, 4159, 4177, 4201, 4211, + 4217, 4219, 4229, 4231, 4241, 4243, 4253, 4259, + 4261, 4271, 4273, 4283, 4289, 4297, 4327, 4337, + 4339, 4349, 4357, 4363, 4373, 4391, 4397, 4409, + 4421, 4423, 4441, 4447, 4451, 4457, 4463, 4481, + 4483, 4493, 4507, 4513, 4517, 4519, 4523, 4547, + 4549, 4561, 4567, 4583, 4591, 4597, 4603, 4621, + 4637, 4639, 4643, 4649, 4651, 4657, 4663, 4673, + 4679, 4691, 4703, 4721, 4723, 4729, 4733, 4751, + 4759, 4783, 4787, 4789, 4793, 4799, 4801, 4813, + 4817, 4831, 4861, 4871, 4877, 4889, 4903, 4909, + 4919, 4931, 4933, 4937, 4943, 4951, 4957, 4967, + 4969, 4973, 4987, 4993, 4999, 5003, 5009, 5011, + 5021, 5023, 5039, 5051, 5059, 5077, 5081, 5087, + 5099, 5101, 5107, 5113, 5119, 5147, 5153, 5167, + 5171, 5179, 5189, 5197, 5209, 5227, 5231, 5233, + 5237, 5261, 5273, 5279, 5281, 5297, 5303, 5309, + 5323, 5333, 5347, 5351, 5381, 5387, 5393, 5399, + 5407, 5413, 5417, 5419, 5431, 5437, 5441, 5443, + 5449, 5471, 5477, 5479, 5483, 5501, 5503, 5507, + 5519, 5521, 5527, 5531, 5557, 5563, 5569, 5573, + 5581, 5591, 5623, 5639, 5641, 5647, 5651, 5653, + 5657, 5659, 5669, 5683, 5689, 5693, 5701, 5711, + 5717, 5737, 5741, 5743, 5749, 5779, 5783, 5791, + 5801, 5807, 5813, 5821, 5827, 5839, 5843, 5849, + 5851, 5857, 5861, 5867, 5869, 5879, 5881, 5897, + 5903, 5923, 5927, 5939, 5953, 5981, 5987, 6007, + 6011, 6029, 6037, 6043, 6047, 6053, 6067, 6073, + 6079, 6089, 6091, 6101, 6113, 6121, 6131, 6133, + 6143, 6151, 6163, 6173, 6197, 6199, 6203, 6211, + 6217, 6221, 6229, 6247, 6257, 6263, 6269, 6271, + 6277, 6287, 6299, 6301, 6311, 6317, 6323, 6329, + 6337, 6343, 6353, 6359, 6361, 6367, 6373, 6379, + 6389, 6397, 6421, 6427, 6449, 6451, 6469, 6473, + 6481, 6491, 6521, 6529, 6547, 6551, 6553, 6563, + 6569, 6571, 6577, 6581, 6599, 6607, 6619, 6637, + 6653, 6659, 6661, 6673, 6679, 6689, 6691, 6701, + 6703, 6709, 6719, 6733, 6737, 6761, 6763, 6779, + 6781, 6791, 6793, 6803, 6823, 6827, 6829, 6833, + 6841, 6857, 6863, 6869, 6871, 6883, 6899, 6907, + 6911, 6917, 6947, 6949, 6959, 6961, 6967, 6971, + 6977, 6983, 6991, 6997, 7001, 7013, 7019, 7027, + 7039, 7043, 7057, 7069, 7079, 7103, 7109, 7121, + 7127, 7129, 7151, 7159, 7177, 7187, 7193, 7207, + 7211, 7213, 7219, 7229, 7237, 7243, 7247, 7253, + 7283, 7297, 7307, 7309, 7321, 7331, 7333, 7349, + 7351, 7369, 7393, 7411, 7417, 7433, 7451, 7457, + 7459, 7477, 7481, 7487, 7489, 7499, 7507, 7517, + 7523, 7529, 7537, 7541, 7547, 7549, 7559, 7561, + 7573, 7577, 7583, 7589, 7591, 7603, 7607, 7621, + 7639, 7643, 7649, 7669, 7673, 7681, 7687, 7691, + 7699, 7703, 7717, 7723, 7727, 7741, 7753, 7757, + 7759, 7789, 7793, 7817, 7823, 7829, 7841, 7853, + 7867, 7873, 7877, 7879, 7883, 7901, 7907, 7919, + 7927, 7933, 7937, 7949, 7951, 7963, 7993, 8009, + 8011, 8017, 8039, 8053, 8059, 8069, 8081, 8087, + 8089, 8093, 8101, 8111, 8117, 8123, 8147, 8161, + 8167, 8171, 8179, 8191, 8209, 8219, 8221, 8231, + 8233, 8237, 8243, 8263, 8269, 8273, 8287, 8291, + 8293, 8297, 8311, 8317, 8329, 8353, 8363, 8369, + 8377, 8387, 8389, 8419, 8423, 8429, 8431, 8443, + 8447, 8461, 8467, 8501, 8513, 8521, 8527, 8537, + 8539, 8543, 8563, 8573, 8581, 8597, 8599, 8609, + 8623, 8627, 8629, 8641, 8647, 8663, 8669, 8677, + 8681, 8689, 8693, 8699, 8707, 8713, 8719, 8731, + 8737, 8741, 8747, 8753, 8761, 8779, 8783, 8803, + 8807, 8819, 8821, 8831, 8837, 8839, 8849, 8861, + 8863, 8867, 8887, 8893, 8923, 8929, 8933, 8941, + 8951, 8963, 8969, 8971, 8999, 9001, 9007, 9011, + 9013, 9029, 9041, 9043, 9049, 9059, 9067, 9091, + 9103, 9109, 9127, 9133, 9137, 9151, 9157, 9161, + 9173, 9181, 9187, 9199, 9203, 9209, 9221, 9227, + 9239, 9241, 9257, 9277, 9281, 9283, 9293, 9311, + 9319, 9323, 9337, 9341, 9343, 9349, 9371, 9377, + 9391, 9397, 9403, 9413, 9419, 9421, 9431, 9433, + 9437, 9439, 9461, 9463, 9467, 9473, 9479, 9491, + 9497, 9511, 9521, 9533, 9539, 9547, 9551, 9587, + 9601, 9613, 9619, 9623, 9629, 9631, 9643, 9649, + 9661, 9677, 9679, 9689, 9697, 9719, 9721, 9733, + 9739, 9743, 9749, 9767, 9769, 9781, 9787, 9791, + 9803, 9811, 9817, 9829, 9833, 9839, 9851, 9857, + 9859, 9871, 9883, 9887, 9901, 9907, 9923, 9929, + 9931, 9941, 9949, 9967, 9973, 10007, 10009, 10037, + 10039, 10061, 10067, 10069, 10079, 10091, 10093, 10099, + 10103, 10111, 10133, 10139, 10141, 10151, 10159, 10163, + 10169, 10177, 10181, 10193, 10211, 10223, 10243, 10247, + 10253, 10259, 10267, 10271, 10273, 10289, 10301, 10303, + 10313, 10321, 10331, 10333, 10337, 10343, 10357, 10369, + 10391, 10399, 10427, 10429, 10433, 10453, 10457, 10459, + 10463, 10477, 10487, 10499, 10501, 10513, 10529, 10531, + 10559, 10567, 10589, 10597, 10601, 10607, 10613, 10627, + 10631, 10639, 10651, 10657, 10663, 10667, 10687, 10691, + 10709, 10711, 10723, 10729, 10733, 10739, 10753, 10771, + 10781, 10789, 10799, 10831, 10837, 10847, 10853, 10859, + 10861, 10867, 10883, 10889, 10891, 10903, 10909, 10937, + 10939, 10949, 10957, 10973, 10979, 10987, 10993, 11003, + 11027, 11047, 11057, 11059, 11069, 11071, 11083, 11087, + 11093, 11113, 11117, 11119, 11131, 11149, 11159, 11161, + 11171, 11173, 11177, 11197, 11213, 11239, 11243, 11251, + 11257, 11261, 11273, 11279, 11287, 11299, 11311, 11317, + 11321, 11329, 11351, 11353, 11369, 11383, 11393, 11399, + 11411, 11423, 11437, 11443, 11447, 11467, 11471, 11483, + 11489, 11491, 11497, 11503, 11519, 11527, 11549, 11551, + 11579, 11587, 11593, 11597, 11617, 11621, 11633, 11657, + 11677, 11681, 11689, 11699, 11701, 11717, 11719, 11731, + 11743, 11777, 11779, 11783, 11789, 11801, 11807, 11813, + 11821, 11827, 11831, 11833, 11839, 11863, 11867, 11887, + 11897, 11903, 11909, 11923, 11927, 11933, 11939, 11941, + 11953, 11959, 11969, 11971, 11981, 11987, 12007, 12011, + 12037, 12041, 12043, 12049, 12071, 12073, 12097, 12101, + 12107, 12109, 12113, 12119, 12143, 12149, 12157, 12161, + 12163, 12197, 12203, 12211, 12227, 12239, 12241, 12251, + 12253, 12263, 12269, 12277, 12281, 12289, 12301, 12323, + 12329, 12343, 12347, 12373, 12377, 12379, 12391, 12401, + 12409, 12413, 12421, 12433, 12437, 12451, 12457, 12473, + 12479, 12487, 12491, 12497, 12503, 12511, 12517, 12527, + 12539, 12541, 12547, 12553, 12569, 12577, 12583, 12589, + 12601, 12611, 12613, 12619, 12637, 12641, 12647, 12653, + 12659, 12671, 12689, 12697, 12703, 12713, 12721, 12739, + 12743, 12757, 12763, 12781, 12791, 12799, 12809, 12821, + 12823, 12829, 12841, 12853, 12889, 12893, 12899, 12907, + 12911, 12917, 12919, 12923, 12941, 12953, 12959, 12967, + 12973, 12979, 12983, 13001, 13003, 13007, 13009, 13033, + 13037, 13043, 13049, 13063, 13093, 13099, 13103, 13109, + 13121, 13127, 13147, 13151, 13159, 13163, 13171, 13177, + 13183, 13187, 13217, 13219, 13229, 13241, 13249, 13259, + 13267, 13291, 13297, 13309, 13313, 13327, 13331, 13337, + 13339, 13367, 13381, 13397, 13399, 13411, 13417, 13421, + 13441, 13451, 13457, 13463, 13469, 13477, 13487, 13499, + 13513, 13523, 13537, 13553, 13567, 13577, 13591, 13597, + 13613, 13619, 13627, 13633, 13649, 13669, 13679, 13681, + 13687, 13691, 13693, 13697, 13709, 13711, 13721, 13723, + 13729, 13751, 13757, 13759, 13763, 13781, 13789, 13799, + 13807, 13829, 13831, 13841, 13859, 13873, 13877, 13879, + 13883, 13901, 13903, 13907, 13913, 13921, 13931, 13933, + 13963, 13967, 13997, 13999, 14009, 14011, 14029, 14033, + 14051, 14057, 14071, 14081, 14083, 14087, 14107, 14143, + 14149, 14153, 14159, 14173, 14177, 14197, 14207, 14221, + 14243, 14249, 14251, 14281, 14293, 14303, 14321, 14323, + 14327, 14341, 14347, 14369, 14387, 14389, 14401, 14407, + 14411, 14419, 14423, 14431, 14437, 14447, 14449, 14461, + 14479, 14489, 14503, 14519, 14533, 14537, 14543, 14549, + 14551, 14557, 14561, 14563, 14591, 14593, 14621, 14627, + 14629, 14633, 14639, 14653, 14657, 14669, 14683, 14699, + 14713, 14717, 14723, 14731, 14737, 14741, 14747, 14753, + 14759, 14767, 14771, 14779, 14783, 14797, 14813, 14821, + 14827, 14831, 14843, 14851, 14867, 14869, 14879, 14887, + 14891, 14897, 14923, 14929, 14939, 14947, 14951, 14957, + 14969, 14983, 15013, 15017, 15031, 15053, 15061, 15073, + 15077, 15083, 15091, 15101, 15107, 15121, 15131, 15137, + 15139, 15149, 15161, 15173, 15187, 15193, 15199, 15217, + 15227, 15233, 15241, 15259, 15263, 15269, 15271, 15277, + 15287, 15289, 15299, 15307, 15313, 15319, 15329, 15331, + 15349, 15359, 15361, 15373, 15377, 15383, 15391, 15401, + 15413, 15427, 15439, 15443, 15451, 15461, 15467, 15473, + 15493, 15497, 15511, 15527, 15541, 15551, 15559, 15569, + 15581, 15583, 15601, 15607, 15619, 15629, 15641, 15643, + 15647, 15649, 15661, 15667, 15671, 15679, 15683, 15727, + 15731, 15733, 15737, 15739, 15749, 15761, 15767, 15773, + 15787, 15791, 15797, 15803, 15809, 15817, 15823, 15859, + 15877, 15881, 15887, 15889, 15901, 15907, 15913, 15919, + 15923, 15937, 15959, 15971, 15973, 15991, 16001, 16007, + 16033, 16057, 16061, 16063, 16067, 16069, 16073, 16087, + 16091, 16097, 16103, 16111, 16127, 16139, 16141, 16183, + 16187, 16189, 16193, 16217, 16223, 16229, 16231, 16249, + 16253, 16267, 16273, 16301, 16319, 16333, 16339, 16349, + 16361, 16363, 16369, 16381, 16411, 16417, 16421, 16427, + 16433, 16447, 16451, 16453, 16477, 16481, 16487, 16493, + 16519, 16529, 16547, 16553, 16561, 16567, 16573, 16603, + 16607, 16619, 16631, 16633, 16649, 16651, 16657, 16661, + 16673, 16691, 16693, 16699, 16703, 16729, 16741, 16747, + 16759, 16763, 16787, 16811, 16823, 16829, 16831, 16843, + 16871, 16879, 16883, 16889, 16901, 16903, 16921, 16927, + 16931, 16937, 16943, 16963, 16979, 16981, 16987, 16993, + 17011, 17021, 17027, 17029, 17033, 17041, 17047, 17053, + 17077, 17093, 17099, 17107, 17117, 17123, 17137, 17159, + 17167, 17183, 17189, 17191, 17203, 17207, 17209, 17231, + 17239, 17257, 17291, 17293, 17299, 17317, 17321, 17327, + 17333, 17341, 17351, 17359, 17377, 17383, 17387, 17389, + 17393, 17401, 17417, 17419, 17431, 17443, 17449, 17467, + 17471, 17477, 17483, 17489, 17491, 17497, 17509, 17519, + 17539, 17551, 17569, 17573, 17579, 17581, 17597, 17599, + 17609, 17623, 17627, 17657, 17659, 17669, 17681, 17683, + 17707, 17713, 17729, 17737, 17747, 17749, 17761, 17783, + 17789, 17791, 17807, 17827, 17837, 17839, 17851, 17863, }; diff --git a/crypto/bn/bn_prime.pl b/crypto/bn/bn_prime.pl index 6bede65e1dd4..eeca475b9366 100644 --- a/crypto/bn/bn_prime.pl +++ b/crypto/bn/bn_prime.pl @@ -1,119 +1,48 @@ -#!/usr/local/bin/perl -# bn_prime.pl - -$num=2048; -$num=$ARGV[0] if ($#ARGV >= 0); - -push(@primes,2); -$p=1; -loop: while ($#primes < $num-1) - { - $p+=2; - $s=int(sqrt($p)); - - for ($i=0; defined($primes[$i]) && $primes[$i]<=$s; $i++) - { - next loop if (($p%$primes[$i]) == 0); - } - push(@primes,$p); - } - -# print <<"EOF"; -# /* Auto generated by bn_prime.pl */ -# /* Copyright (C) 1995-1997 Eric Young (eay\@mincom.oz.au). -# * All rights reserved. -# * Copyright remains Eric Young's, and as such any Copyright notices in -# * the code are not to be removed. -# * See the COPYRIGHT file in the SSLeay distribution for more details. -# */ +#! /usr/bin/env perl +# Copyright 1998-2018 The OpenSSL Project Authors. All Rights Reserved. # -# EOF - -print <<\EOF; -/* Auto generated by bn_prime.pl */ -/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) - * All rights reserved. - * - * This package is an SSL implementation written - * by Eric Young (eay@cryptsoft.com). - * The implementation was written so as to conform with Netscapes SSL. +# Licensed under the OpenSSL license (the "License"). You may not use +# this file except in compliance with the License. You can obtain a copy +# in the file LICENSE in the source distribution or at +# https://www.openssl.org/source/license.html + +# Output year depends on the year of the script. +my $YEAR = [localtime([stat($0)]->[9])]->[5] + 1900; +print <<"EOF"; +/* + * WARNING: do not edit! + * Generated by crypto/bn/bn_prime.pl * - * This library is free for commercial and non-commercial use as long as - * the following conditions are aheared to. The following conditions - * apply to all code found in this distribution, be it the RC4, RSA, - * lhash, DES, etc., code; not just the SSL code. The SSL documentation - * included with this distribution is covered by the same copyright terms - * except that the holder is Tim Hudson (tjh@cryptsoft.com). + * Copyright 1998-$YEAR The OpenSSL Project Authors. All Rights Reserved. * - * Copyright remains Eric Young's, and as such any Copyright notices in - * the code are not to be removed. - * If this package is used in a product, Eric Young should be given attribution - * as the author of the parts of the library used. - * This can be in the form of a textual message at program startup or - * in documentation (online or textual) provided with the package. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * "This product includes cryptographic software written by - * Eric Young (eay@cryptsoft.com)" - * The word 'cryptographic' can be left out if the rouines from the library - * being used are not cryptographic related :-). - * 4. If you include any Windows specific code (or a derivative thereof) from - * the apps directory (application code) you must include an acknowledgement: - * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)" - * - * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * The licence and distribution terms for any publically available version or - * derivative of this code cannot be changed. i.e. this code cannot simply be - * copied and put under another distribution licence - * [including the GNU Public Licence.] + * Licensed under the OpenSSL license (the "License"). You may not use + * this file except in compliance with the License. You can obtain a copy + * in the file LICENSE in the source distribution or at + * https://www.openssl.org/source/license.html */ EOF -for ($i=0; $i <= $#primes; $i++) - { - if ($primes[$i] > 256) - { - $eight=$i; - last; - } - } - -printf "#ifndef EIGHT_BIT\n"; -printf "# define NUMPRIMES %d\n",$num; -printf "typedef unsigned short prime_t;\n"; -printf "#else\n"; -printf "# define NUMPRIMES %d\n",$eight; -printf "typedef unsigned char prime_t;\n"; -printf "#endif\n"; -print "static const prime_t primes[NUMPRIMES] = {"; -$init=0; -for ($i=0; $i <= $#primes; $i++) - { - printf "\n#ifndef EIGHT_BIT\n " if ($primes[$i] > 256) && !($init++); - printf("\n ") if ($i%8) == 0; - printf(" %5d,",$primes[$i]); - } -print "\n#endif\n};\n"; - +my $num = shift || 2048; +my @primes = ( 2 ); +my $p = 1; +loop: while ($#primes < $num-1) { + $p += 2; + my $s = int(sqrt($p)); + + for (my $i = 0; defined($primes[$i]) && $primes[$i] <= $s; $i++) { + next loop if ($p % $primes[$i]) == 0; + } + push(@primes, $p); +} + +print "typedef unsigned short prime_t;\n"; +printf "# define NUMPRIMES %d\n\n", $num; + +printf "static const prime_t primes[%d] = {", $num; +for (my $i = 0; $i <= $#primes; $i++) { + printf "\n " if ($i % 8) == 0; + printf " %5d,", $primes[$i]; +} +print "\n};\n"; diff --git a/crypto/bn/bn_print.c b/crypto/bn/bn_print.c index c0b029dad6cd..1853269d903d 100644 --- a/crypto/bn/bn_print.c +++ b/crypto/bn/bn_print.c @@ -1,65 +1,16 @@ -/* crypto/bn/bn_print.c */ -/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) - * All rights reserved. +/* + * Copyright 1995-2017 The OpenSSL Project Authors. All Rights Reserved. * - * This package is an SSL implementation written - * by Eric Young (eay@cryptsoft.com). - * The implementation was written so as to conform with Netscapes SSL. - * - * This library is free for commercial and non-commercial use as long as - * the following conditions are aheared to. The following conditions - * apply to all code found in this distribution, be it the RC4, RSA, - * lhash, DES, etc., code; not just the SSL code. The SSL documentation - * included with this distribution is covered by the same copyright terms - * except that the holder is Tim Hudson (tjh@cryptsoft.com). - * - * Copyright remains Eric Young's, and as such any Copyright notices in - * the code are not to be removed. - * If this package is used in a product, Eric Young should be given attribution - * as the author of the parts of the library used. - * This can be in the form of a textual message at program startup or - * in documentation (online or textual) provided with the package. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * "This product includes cryptographic software written by - * Eric Young (eay@cryptsoft.com)" - * The word 'cryptographic' can be left out if the rouines from the library - * being used are not cryptographic related :-). - * 4. If you include any Windows specific code (or a derivative thereof) from - * the apps directory (application code) you must include an acknowledgement: - * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)" - * - * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * The licence and distribution terms for any publically available version or - * derivative of this code cannot be changed. i.e. this code cannot simply be - * copied and put under another distribution licence - * [including the GNU Public Licence.] + * Licensed under the OpenSSL license (the "License"). You may not use + * this file except in compliance with the License. You can obtain a copy + * in the file LICENSE in the source distribution or at + * https://www.openssl.org/source/license.html */ #include <stdio.h> -#include <ctype.h> +#include "internal/ctype.h" #include <limits.h> -#include "cryptlib.h" +#include "internal/cryptlib.h" #include <openssl/buffer.h> #include "bn_lcl.h" @@ -81,27 +32,27 @@ char *BN_bn2hex(const BIGNUM *a) } p = buf; if (a->neg) - *(p++) = '-'; + *p++ = '-'; for (i = a->top - 1; i >= 0; i--) { for (j = BN_BITS2 - 8; j >= 0; j -= 8) { /* strip leading zeros */ - v = ((int)(a->d[i] >> (long)j)) & 0xff; - if (z || (v != 0)) { - *(p++) = Hex[v >> 4]; - *(p++) = Hex[v & 0x0f]; + v = (int)((a->d[i] >> j) & 0xff); + if (z || v != 0) { + *p++ = Hex[v >> 4]; + *p++ = Hex[v & 0x0f]; z = 1; } } } *p = '\0'; err: - return (buf); + return buf; } /* Must 'OPENSSL_free' the returned data */ char *BN_bn2dec(const BIGNUM *a) { - int i = 0, num, ok = 0; + int i = 0, num, ok = 0, n, tbytes; char *buf = NULL; char *p; BIGNUM *t = NULL; @@ -111,27 +62,27 @@ char *BN_bn2dec(const BIGNUM *a) /*- * get an upper bound for the length of the decimal integer * num <= (BN_num_bits(a) + 1) * log(2) - * <= 3 * BN_num_bits(a) * 0.1001 + log(2) + 1 (rounding error) - * <= BN_num_bits(a)/10 + BN_num_bits/1000 + 1 + 1 + * <= 3 * BN_num_bits(a) * 0.101 + log(2) + 1 (rounding error) + * <= 3 * BN_num_bits(a) / 10 + 3 * BN_num_bits / 1000 + 1 + 1 */ i = BN_num_bits(a) * 3; num = (i / 10 + i / 1000 + 1) + 1; + tbytes = num + 3; /* negative and terminator and one spare? */ bn_data_num = num / BN_DEC_NUM + 1; bn_data = OPENSSL_malloc(bn_data_num * sizeof(BN_ULONG)); - buf = OPENSSL_malloc(num + 3); - if ((buf == NULL) || (bn_data == NULL)) { + buf = OPENSSL_malloc(tbytes); + if (buf == NULL || bn_data == NULL) { BNerr(BN_F_BN_BN2DEC, ERR_R_MALLOC_FAILURE); goto err; } if ((t = BN_dup(a)) == NULL) goto err; -#define BUF_REMAIN (num+3 - (size_t)(p - buf)) p = buf; lp = bn_data; if (BN_is_zero(t)) { - *(p++) = '0'; - *(p++) = '\0'; + *p++ = '0'; + *p++ = '\0'; } else { if (BN_is_negative(t)) *p++ = '-'; @@ -150,28 +101,26 @@ char *BN_bn2dec(const BIGNUM *a) * the last one needs truncation. The blocks need to be reversed in * order. */ - BIO_snprintf(p, BUF_REMAIN, BN_DEC_FMT1, *lp); - while (*p) - p++; + n = BIO_snprintf(p, tbytes - (size_t)(p - buf), BN_DEC_FMT1, *lp); + if (n < 0) + goto err; + p += n; while (lp != bn_data) { lp--; - BIO_snprintf(p, BUF_REMAIN, BN_DEC_FMT2, *lp); - while (*p) - p++; + n = BIO_snprintf(p, tbytes - (size_t)(p - buf), BN_DEC_FMT2, *lp); + if (n < 0) + goto err; + p += n; } } ok = 1; err: - if (bn_data != NULL) - OPENSSL_free(bn_data); - if (t != NULL) - BN_free(t); - if (!ok && buf) { - OPENSSL_free(buf); - buf = NULL; - } - - return (buf); + OPENSSL_free(bn_data); + BN_free(t); + if (ok) + return buf; + OPENSSL_free(buf); + return NULL; } int BN_hex2bn(BIGNUM **bn, const char *a) @@ -181,28 +130,28 @@ int BN_hex2bn(BIGNUM **bn, const char *a) int neg = 0, h, m, i, j, k, c; int num; - if ((a == NULL) || (*a == '\0')) - return (0); + if (a == NULL || *a == '\0') + return 0; if (*a == '-') { neg = 1; a++; } - for (i = 0; i <= (INT_MAX/4) && isxdigit((unsigned char)a[i]); i++) + for (i = 0; i <= INT_MAX / 4 && ossl_isxdigit(a[i]); i++) continue; - if (i > INT_MAX/4) + if (i == 0 || i > INT_MAX / 4) goto err; num = i + neg; if (bn == NULL) - return (num); + return num; /* a is the start of the hex digits, and it is 'i' long */ if (*bn == NULL) { if ((ret = BN_new()) == NULL) - return (0); + return 0; } else { ret = *bn; BN_zero(ret); @@ -216,17 +165,12 @@ int BN_hex2bn(BIGNUM **bn, const char *a) m = 0; h = 0; while (j > 0) { - m = ((BN_BYTES * 2) <= j) ? (BN_BYTES * 2) : j; + m = (BN_BYTES * 2 <= j) ? BN_BYTES * 2 : j; l = 0; for (;;) { c = a[j - m]; - if ((c >= '0') && (c <= '9')) - k = c - '0'; - else if ((c >= 'a') && (c <= 'f')) - k = c - 'a' + 10; - else if ((c >= 'A') && (c <= 'F')) - k = c - 'A' + 10; - else + k = OPENSSL_hexchar2int(c); + if (k < 0) k = 0; /* paranoia */ l = (l << 4) | k; @@ -235,7 +179,7 @@ int BN_hex2bn(BIGNUM **bn, const char *a) break; } } - j -= (BN_BYTES * 2); + j -= BN_BYTES * 2; } ret->top = h; bn_correct_top(ret); @@ -245,11 +189,11 @@ int BN_hex2bn(BIGNUM **bn, const char *a) /* Don't set the negative flag if it's zero. */ if (ret->top != 0) ret->neg = neg; - return (num); + return num; err: if (*bn == NULL) BN_free(ret); - return (0); + return 0; } int BN_dec2bn(BIGNUM **bn, const char *a) @@ -259,22 +203,22 @@ int BN_dec2bn(BIGNUM **bn, const char *a) int neg = 0, i, j; int num; - if ((a == NULL) || (*a == '\0')) - return (0); + if (a == NULL || *a == '\0') + return 0; if (*a == '-') { neg = 1; a++; } - for (i = 0; i <= (INT_MAX/4) && isdigit((unsigned char)a[i]); i++) + for (i = 0; i <= INT_MAX / 4 && ossl_isdigit(a[i]); i++) continue; - if (i > INT_MAX/4) + if (i == 0 || i > INT_MAX / 4) goto err; num = i + neg; if (bn == NULL) - return (num); + return num; /* * a is the start of the digits, and it is 'i' long. We chop it into @@ -282,7 +226,7 @@ int BN_dec2bn(BIGNUM **bn, const char *a) */ if (*bn == NULL) { if ((ret = BN_new()) == NULL) - return (0); + return 0; } else { ret = *bn; BN_zero(ret); @@ -292,7 +236,7 @@ int BN_dec2bn(BIGNUM **bn, const char *a) if (bn_expand(ret, i * 4) == NULL) goto err; - j = BN_DEC_NUM - (i % BN_DEC_NUM); + j = BN_DEC_NUM - i % BN_DEC_NUM; if (j == BN_DEC_NUM) j = 0; l = 0; @@ -301,8 +245,9 @@ int BN_dec2bn(BIGNUM **bn, const char *a) l += *a - '0'; a++; if (++j == BN_DEC_NUM) { - BN_mul_word(ret, BN_DEC_CONV); - BN_add_word(ret, l); + if (!BN_mul_word(ret, BN_DEC_CONV) + || !BN_add_word(ret, l)) + goto err; l = 0; j = 0; } @@ -314,11 +259,11 @@ int BN_dec2bn(BIGNUM **bn, const char *a) /* Don't set the negative flag if it's zero. */ if (ret->top != 0) ret->neg = neg; - return (num); + return num; err: if (*bn == NULL) BN_free(ret); - return (0); + return 0; } int BN_asc2bn(BIGNUM **bn, const char *a) @@ -341,19 +286,18 @@ int BN_asc2bn(BIGNUM **bn, const char *a) return 1; } -#ifndef OPENSSL_NO_BIO -# ifndef OPENSSL_NO_FP_API +# ifndef OPENSSL_NO_STDIO int BN_print_fp(FILE *fp, const BIGNUM *a) { BIO *b; int ret; if ((b = BIO_new(BIO_s_file())) == NULL) - return (0); + return 0; BIO_set_fp(b, fp, BIO_NOCLOSE); ret = BN_print(b, a); BIO_free(b); - return (ret); + return ret; } # endif @@ -362,16 +306,16 @@ int BN_print(BIO *bp, const BIGNUM *a) int i, j, v, z = 0; int ret = 0; - if ((a->neg) && (BIO_write(bp, "-", 1) != 1)) + if ((a->neg) && BIO_write(bp, "-", 1) != 1) goto end; - if (BN_is_zero(a) && (BIO_write(bp, "0", 1) != 1)) + if (BN_is_zero(a) && BIO_write(bp, "0", 1) != 1) goto end; for (i = a->top - 1; i >= 0; i--) { for (j = BN_BITS2 - 4; j >= 0; j -= 4) { /* strip leading zeros */ - v = ((int)(a->d[i] >> (long)j)) & 0x0f; - if (z || (v != 0)) { - if (BIO_write(bp, &(Hex[v]), 1) != 1) + v = (int)((a->d[i] >> j) & 0x0f); + if (z || v != 0) { + if (BIO_write(bp, &Hex[v], 1) != 1) goto end; z = 1; } @@ -379,9 +323,8 @@ int BN_print(BIO *bp, const BIGNUM *a) } ret = 1; end: - return (ret); + return ret; } -#endif char *BN_options(void) { @@ -391,12 +334,12 @@ char *BN_options(void) if (!init) { init++; #ifdef BN_LLONG - BIO_snprintf(data, sizeof(data), "bn(%d,%d)", - (int)sizeof(BN_ULLONG) * 8, (int)sizeof(BN_ULONG) * 8); + BIO_snprintf(data, sizeof(data), "bn(%zu,%zu)", + sizeof(BN_ULLONG) * 8, sizeof(BN_ULONG) * 8); #else - BIO_snprintf(data, sizeof(data), "bn(%d,%d)", - (int)sizeof(BN_ULONG) * 8, (int)sizeof(BN_ULONG) * 8); + BIO_snprintf(data, sizeof(data), "bn(%zu,%zu)", + sizeof(BN_ULONG) * 8, sizeof(BN_ULONG) * 8); #endif } - return (data); + return data; } diff --git a/crypto/bn/bn_rand.c b/crypto/bn/bn_rand.c index 60d3f2260ba1..c0d1a32292ba 100644 --- a/crypto/bn/bn_rand.c +++ b/crypto/bn/bn_rand.c @@ -1,128 +1,30 @@ -/* crypto/bn/bn_rand.c */ -/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) - * All rights reserved. - * - * This package is an SSL implementation written - * by Eric Young (eay@cryptsoft.com). - * The implementation was written so as to conform with Netscapes SSL. - * - * This library is free for commercial and non-commercial use as long as - * the following conditions are aheared to. The following conditions - * apply to all code found in this distribution, be it the RC4, RSA, - * lhash, DES, etc., code; not just the SSL code. The SSL documentation - * included with this distribution is covered by the same copyright terms - * except that the holder is Tim Hudson (tjh@cryptsoft.com). - * - * Copyright remains Eric Young's, and as such any Copyright notices in - * the code are not to be removed. - * If this package is used in a product, Eric Young should be given attribution - * as the author of the parts of the library used. - * This can be in the form of a textual message at program startup or - * in documentation (online or textual) provided with the package. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * "This product includes cryptographic software written by - * Eric Young (eay@cryptsoft.com)" - * The word 'cryptographic' can be left out if the rouines from the library - * being used are not cryptographic related :-). - * 4. If you include any Windows specific code (or a derivative thereof) from - * the apps directory (application code) you must include an acknowledgement: - * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)" - * - * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * The licence and distribution terms for any publically available version or - * derivative of this code cannot be changed. i.e. this code cannot simply be - * copied and put under another distribution licence - * [including the GNU Public Licence.] - */ -/* ==================================================================== - * Copyright (c) 1998-2001 The OpenSSL Project. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * - * 3. All advertising materials mentioning features or use of this - * software must display the following acknowledgment: - * "This product includes software developed by the OpenSSL Project - * for use in the OpenSSL Toolkit. (http://www.openssl.org/)" - * - * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to - * endorse or promote products derived from this software without - * prior written permission. For written permission, please contact - * openssl-core@openssl.org. - * - * 5. Products derived from this software may not be called "OpenSSL" - * nor may "OpenSSL" appear in their names without prior written - * permission of the OpenSSL Project. - * - * 6. Redistributions of any form whatsoever must retain the following - * acknowledgment: - * "This product includes software developed by the OpenSSL Project - * for use in the OpenSSL Toolkit (http://www.openssl.org/)" - * - * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY - * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR - * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, - * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED - * OF THE POSSIBILITY OF SUCH DAMAGE. - * ==================================================================== - * - * This product includes cryptographic software written by Eric Young - * (eay@cryptsoft.com). This product includes software written by Tim - * Hudson (tjh@cryptsoft.com). +/* + * Copyright 1995-2018 The OpenSSL Project Authors. All Rights Reserved. * + * Licensed under the OpenSSL license (the "License"). You may not use + * this file except in compliance with the License. You can obtain a copy + * in the file LICENSE in the source distribution or at + * https://www.openssl.org/source/license.html */ #include <stdio.h> #include <time.h> -#include "cryptlib.h" +#include "internal/cryptlib.h" #include "bn_lcl.h" #include <openssl/rand.h> +#include <openssl/sha.h> -static int bnrand(int pseudorand, BIGNUM *rnd, int bits, int top, int bottom) +typedef enum bnrand_flag_e { + NORMAL, TESTING, PRIVATE +} BNRAND_FLAG; + +static int bnrand(BNRAND_FLAG flag, BIGNUM *rnd, int bits, int top, int bottom) { unsigned char *buf = NULL; - int ret = 0, bit, bytes, mask; - time_t tim; + int b, ret = 0, bit, bytes, mask; if (bits == 0) { - if (top != -1 || bottom != 0) + if (top != BN_RAND_TOP_ANY || bottom != BN_RAND_BOTTOM_ANY) goto toosmall; BN_zero(rnd); return 1; @@ -134,22 +36,18 @@ static int bnrand(int pseudorand, BIGNUM *rnd, int bits, int top, int bottom) bit = (bits - 1) % 8; mask = 0xff << (bit + 1); - buf = (unsigned char *)OPENSSL_malloc(bytes); + buf = OPENSSL_malloc(bytes); if (buf == NULL) { BNerr(BN_F_BNRAND, ERR_R_MALLOC_FAILURE); goto err; } /* make a random number and set the top and bottom bits */ - time(&tim); - RAND_add(&tim, sizeof(tim), 0.0); - - /* We ignore the value of pseudorand and always call RAND_bytes */ - if (RAND_bytes(buf, bytes) <= 0) + b = flag == NORMAL ? RAND_bytes(buf, bytes) : RAND_priv_bytes(buf, bytes); + if (b <= 0) goto err; -#if 1 - if (pseudorand == 2) { + if (flag == TESTING) { /* * generate patterns that are more likely to trigger BN library bugs */ @@ -157,7 +55,7 @@ static int bnrand(int pseudorand, BIGNUM *rnd, int bits, int top, int bottom) unsigned char c; for (i = 0; i < bytes; i++) { - if (RAND_pseudo_bytes(&c, 1) < 0) + if (RAND_bytes(&c, 1) <= 0) goto err; if (c >= 128 && i > 0) buf[i] = buf[i - 1]; @@ -167,7 +65,6 @@ static int bnrand(int pseudorand, BIGNUM *rnd, int bits, int top, int bottom) buf[i] = 255; } } -#endif if (top >= 0) { if (top) { @@ -188,12 +85,9 @@ static int bnrand(int pseudorand, BIGNUM *rnd, int bits, int top, int bottom) goto err; ret = 1; err: - if (buf != NULL) { - OPENSSL_cleanse(buf, bytes); - OPENSSL_free(buf); - } + OPENSSL_clear_free(buf, bytes); bn_check_top(rnd); - return (ret); + return ret; toosmall: BNerr(BN_F_BNRAND, BN_R_BITS_TOO_SMALL); @@ -202,31 +96,27 @@ toosmall: int BN_rand(BIGNUM *rnd, int bits, int top, int bottom) { - return bnrand(0, rnd, bits, top, bottom); + return bnrand(NORMAL, rnd, bits, top, bottom); } -int BN_pseudo_rand(BIGNUM *rnd, int bits, int top, int bottom) +int BN_bntest_rand(BIGNUM *rnd, int bits, int top, int bottom) { - return bnrand(1, rnd, bits, top, bottom); + return bnrand(TESTING, rnd, bits, top, bottom); } -#if 1 -int BN_bntest_rand(BIGNUM *rnd, int bits, int top, int bottom) +int BN_priv_rand(BIGNUM *rnd, int bits, int top, int bottom) { - return bnrand(2, rnd, bits, top, bottom); + return bnrand(PRIVATE, rnd, bits, top, bottom); } -#endif /* random number r: 0 <= r < range */ -static int bn_rand_range(int pseudo, BIGNUM *r, const BIGNUM *range) +static int bnrand_range(BNRAND_FLAG flag, BIGNUM *r, const BIGNUM *range) { - int (*bn_rand) (BIGNUM *, int, int, int) = - pseudo ? BN_pseudo_rand : BN_rand; int n; int count = 100; if (range->neg || BN_is_zero(range)) { - BNerr(BN_F_BN_RAND_RANGE, BN_R_INVALID_RANGE); + BNerr(BN_F_BNRAND_RANGE, BN_R_INVALID_RANGE); return 0; } @@ -242,8 +132,9 @@ static int bn_rand_range(int pseudo, BIGNUM *r, const BIGNUM *range) * than range */ do { - if (!bn_rand(r, n + 1, -1, 0)) + if (!bnrand(flag, r, n + 1, BN_RAND_TOP_ANY, BN_RAND_BOTTOM_ANY)) return 0; + /* * If r < 3*range, use r := r MOD range (which is either r, r - * range, or r - 2*range). Otherwise, iterate once more. Since @@ -259,7 +150,7 @@ static int bn_rand_range(int pseudo, BIGNUM *r, const BIGNUM *range) } if (!--count) { - BNerr(BN_F_BN_RAND_RANGE, BN_R_TOO_MANY_ITERATIONS); + BNerr(BN_F_BNRAND_RANGE, BN_R_TOO_MANY_ITERATIONS); return 0; } @@ -268,11 +159,11 @@ static int bn_rand_range(int pseudo, BIGNUM *r, const BIGNUM *range) } else { do { /* range = 11..._2 or range = 101..._2 */ - if (!bn_rand(r, n, -1, 0)) + if (!bnrand(flag, r, n, BN_RAND_TOP_ANY, BN_RAND_BOTTOM_ANY)) return 0; if (!--count) { - BNerr(BN_F_BN_RAND_RANGE, BN_R_TOO_MANY_ITERATIONS); + BNerr(BN_F_BNRAND_RANGE, BN_R_TOO_MANY_ITERATIONS); return 0; } } @@ -285,10 +176,93 @@ static int bn_rand_range(int pseudo, BIGNUM *r, const BIGNUM *range) int BN_rand_range(BIGNUM *r, const BIGNUM *range) { - return bn_rand_range(0, r, range); + return bnrand_range(NORMAL, r, range); +} + +int BN_priv_rand_range(BIGNUM *r, const BIGNUM *range) +{ + return bnrand_range(PRIVATE, r, range); +} + +int BN_pseudo_rand(BIGNUM *rnd, int bits, int top, int bottom) +{ + return BN_rand(rnd, bits, top, bottom); } int BN_pseudo_rand_range(BIGNUM *r, const BIGNUM *range) { - return bn_rand_range(1, r, range); + return BN_rand_range(r, range); +} + +/* + * BN_generate_dsa_nonce generates a random number 0 <= out < range. Unlike + * BN_rand_range, it also includes the contents of |priv| and |message| in + * the generation so that an RNG failure isn't fatal as long as |priv| + * remains secret. This is intended for use in DSA and ECDSA where an RNG + * weakness leads directly to private key exposure unless this function is + * used. + */ +int BN_generate_dsa_nonce(BIGNUM *out, const BIGNUM *range, + const BIGNUM *priv, const unsigned char *message, + size_t message_len, BN_CTX *ctx) +{ + SHA512_CTX sha; + /* + * We use 512 bits of random data per iteration to ensure that we have at + * least |range| bits of randomness. + */ + unsigned char random_bytes[64]; + unsigned char digest[SHA512_DIGEST_LENGTH]; + unsigned done, todo; + /* We generate |range|+8 bytes of random output. */ + const unsigned num_k_bytes = BN_num_bytes(range) + 8; + unsigned char private_bytes[96]; + unsigned char *k_bytes; + int ret = 0; + + k_bytes = OPENSSL_malloc(num_k_bytes); + if (k_bytes == NULL) + goto err; + + /* We copy |priv| into a local buffer to avoid exposing its length. */ + todo = sizeof(priv->d[0]) * priv->top; + if (todo > sizeof(private_bytes)) { + /* + * No reasonable DSA or ECDSA key should have a private key this + * large and we don't handle this case in order to avoid leaking the + * length of the private key. + */ + BNerr(BN_F_BN_GENERATE_DSA_NONCE, BN_R_PRIVATE_KEY_TOO_LARGE); + goto err; + } + memcpy(private_bytes, priv->d, todo); + memset(private_bytes + todo, 0, sizeof(private_bytes) - todo); + + for (done = 0; done < num_k_bytes;) { + if (RAND_priv_bytes(random_bytes, sizeof(random_bytes)) != 1) + goto err; + SHA512_Init(&sha); + SHA512_Update(&sha, &done, sizeof(done)); + SHA512_Update(&sha, private_bytes, sizeof(private_bytes)); + SHA512_Update(&sha, message, message_len); + SHA512_Update(&sha, random_bytes, sizeof(random_bytes)); + SHA512_Final(digest, &sha); + + todo = num_k_bytes - done; + if (todo > SHA512_DIGEST_LENGTH) + todo = SHA512_DIGEST_LENGTH; + memcpy(k_bytes + done, digest, todo); + done += todo; + } + + if (!BN_bin2bn(k_bytes, num_k_bytes, out)) + goto err; + if (BN_mod(out, out, range, ctx) != 1) + goto err; + ret = 1; + + err: + OPENSSL_free(k_bytes); + OPENSSL_cleanse(private_bytes, sizeof(private_bytes)); + return ret; } diff --git a/crypto/bn/bn_recp.c b/crypto/bn/bn_recp.c index f047040efe03..9ab767f42fe9 100644 --- a/crypto/bn/bn_recp.c +++ b/crypto/bn/bn_recp.c @@ -1,93 +1,43 @@ -/* crypto/bn/bn_recp.c */ -/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) - * All rights reserved. - * - * This package is an SSL implementation written - * by Eric Young (eay@cryptsoft.com). - * The implementation was written so as to conform with Netscapes SSL. - * - * This library is free for commercial and non-commercial use as long as - * the following conditions are aheared to. The following conditions - * apply to all code found in this distribution, be it the RC4, RSA, - * lhash, DES, etc., code; not just the SSL code. The SSL documentation - * included with this distribution is covered by the same copyright terms - * except that the holder is Tim Hudson (tjh@cryptsoft.com). - * - * Copyright remains Eric Young's, and as such any Copyright notices in - * the code are not to be removed. - * If this package is used in a product, Eric Young should be given attribution - * as the author of the parts of the library used. - * This can be in the form of a textual message at program startup or - * in documentation (online or textual) provided with the package. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * "This product includes cryptographic software written by - * Eric Young (eay@cryptsoft.com)" - * The word 'cryptographic' can be left out if the rouines from the library - * being used are not cryptographic related :-). - * 4. If you include any Windows specific code (or a derivative thereof) from - * the apps directory (application code) you must include an acknowledgement: - * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)" - * - * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. +/* + * Copyright 1995-2018 The OpenSSL Project Authors. All Rights Reserved. * - * The licence and distribution terms for any publically available version or - * derivative of this code cannot be changed. i.e. this code cannot simply be - * copied and put under another distribution licence - * [including the GNU Public Licence.] + * Licensed under the OpenSSL license (the "License"). You may not use + * this file except in compliance with the License. You can obtain a copy + * in the file LICENSE in the source distribution or at + * https://www.openssl.org/source/license.html */ -#include <stdio.h> -#include "cryptlib.h" +#include "internal/cryptlib.h" #include "bn_lcl.h" void BN_RECP_CTX_init(BN_RECP_CTX *recp) { - BN_init(&(recp->N)); - BN_init(&(recp->Nr)); - recp->num_bits = 0; - recp->shift = 0; - recp->flags = 0; + memset(recp, 0, sizeof(*recp)); + bn_init(&(recp->N)); + bn_init(&(recp->Nr)); } BN_RECP_CTX *BN_RECP_CTX_new(void) { BN_RECP_CTX *ret; - if ((ret = (BN_RECP_CTX *)OPENSSL_malloc(sizeof(BN_RECP_CTX))) == NULL) - return (NULL); + if ((ret = OPENSSL_zalloc(sizeof(*ret))) == NULL) { + BNerr(BN_F_BN_RECP_CTX_NEW, ERR_R_MALLOC_FAILURE); + return NULL; + } - BN_RECP_CTX_init(ret); + bn_init(&(ret->N)); + bn_init(&(ret->Nr)); ret->flags = BN_FLG_MALLOCED; - return (ret); + return ret; } void BN_RECP_CTX_free(BN_RECP_CTX *recp) { if (recp == NULL) return; - - BN_free(&(recp->N)); - BN_free(&(recp->Nr)); + BN_free(&recp->N); + BN_free(&recp->Nr); if (recp->flags & BN_FLG_MALLOCED) OPENSSL_free(recp); } @@ -99,7 +49,7 @@ int BN_RECP_CTX_set(BN_RECP_CTX *recp, const BIGNUM *d, BN_CTX *ctx) BN_zero(&(recp->Nr)); recp->num_bits = BN_num_bits(d); recp->shift = 0; - return (1); + return 1; } int BN_mod_mul_reciprocal(BIGNUM *r, const BIGNUM *x, const BIGNUM *y, @@ -128,7 +78,7 @@ int BN_mod_mul_reciprocal(BIGNUM *r, const BIGNUM *x, const BIGNUM *y, err: BN_CTX_end(ctx); bn_check_top(r); - return (ret); + return ret; } int BN_div_recp(BIGNUM *dv, BIGNUM *rem, const BIGNUM *m, @@ -138,17 +88,11 @@ int BN_div_recp(BIGNUM *dv, BIGNUM *rem, const BIGNUM *m, BIGNUM *a, *b, *d, *r; BN_CTX_start(ctx); + d = (dv != NULL) ? dv : BN_CTX_get(ctx); + r = (rem != NULL) ? rem : BN_CTX_get(ctx); a = BN_CTX_get(ctx); b = BN_CTX_get(ctx); - if (dv != NULL) - d = dv; - else - d = BN_CTX_get(ctx); - if (rem != NULL) - r = rem; - else - r = BN_CTX_get(ctx); - if (a == NULL || b == NULL || d == NULL || r == NULL) + if (b == NULL) goto err; if (BN_ucmp(m, &(recp->N)) < 0) { @@ -158,7 +102,7 @@ int BN_div_recp(BIGNUM *dv, BIGNUM *rem, const BIGNUM *m, return 0; } BN_CTX_end(ctx); - return (1); + return 1; } /* @@ -199,7 +143,6 @@ int BN_div_recp(BIGNUM *dv, BIGNUM *rem, const BIGNUM *m, goto err; r->neg = 0; -#if 1 j = 0; while (BN_ucmp(r, &(recp->N)) >= 0) { if (j++ > 2) { @@ -211,7 +154,6 @@ int BN_div_recp(BIGNUM *dv, BIGNUM *rem, const BIGNUM *m, if (!BN_add_word(d, 1)) goto err; } -#endif r->neg = BN_is_zero(r) ? 0 : m->neg; d->neg = m->neg ^ recp->N.neg; @@ -220,7 +162,7 @@ int BN_div_recp(BIGNUM *dv, BIGNUM *rem, const BIGNUM *m, BN_CTX_end(ctx); bn_check_top(dv); bn_check_top(rem); - return (ret); + return ret; } /* @@ -248,5 +190,5 @@ int BN_reciprocal(BIGNUM *r, const BIGNUM *m, int len, BN_CTX *ctx) err: bn_check_top(r); BN_CTX_end(ctx); - return (ret); + return ret; } diff --git a/crypto/bn/bn_shift.c b/crypto/bn/bn_shift.c index 9673d9a30633..15d4b321ba26 100644 --- a/crypto/bn/bn_shift.c +++ b/crypto/bn/bn_shift.c @@ -1,63 +1,13 @@ -/* crypto/bn/bn_shift.c */ -/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) - * All rights reserved. +/* + * Copyright 1995-2016 The OpenSSL Project Authors. All Rights Reserved. * - * This package is an SSL implementation written - * by Eric Young (eay@cryptsoft.com). - * The implementation was written so as to conform with Netscapes SSL. - * - * This library is free for commercial and non-commercial use as long as - * the following conditions are aheared to. The following conditions - * apply to all code found in this distribution, be it the RC4, RSA, - * lhash, DES, etc., code; not just the SSL code. The SSL documentation - * included with this distribution is covered by the same copyright terms - * except that the holder is Tim Hudson (tjh@cryptsoft.com). - * - * Copyright remains Eric Young's, and as such any Copyright notices in - * the code are not to be removed. - * If this package is used in a product, Eric Young should be given attribution - * as the author of the parts of the library used. - * This can be in the form of a textual message at program startup or - * in documentation (online or textual) provided with the package. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * "This product includes cryptographic software written by - * Eric Young (eay@cryptsoft.com)" - * The word 'cryptographic' can be left out if the rouines from the library - * being used are not cryptographic related :-). - * 4. If you include any Windows specific code (or a derivative thereof) from - * the apps directory (application code) you must include an acknowledgement: - * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)" - * - * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * The licence and distribution terms for any publically available version or - * derivative of this code cannot be changed. i.e. this code cannot simply be - * copied and put under another distribution licence - * [including the GNU Public Licence.] + * Licensed under the OpenSSL license (the "License"). You may not use + * this file except in compliance with the License. You can obtain a copy + * in the file LICENSE in the source distribution or at + * https://www.openssl.org/source/license.html */ -#include <stdio.h> -#include "cryptlib.h" +#include "internal/cryptlib.h" #include "bn_lcl.h" int BN_lshift1(BIGNUM *r, const BIGNUM *a) @@ -71,11 +21,11 @@ int BN_lshift1(BIGNUM *r, const BIGNUM *a) if (r != a) { r->neg = a->neg; if (bn_wexpand(r, a->top + 1) == NULL) - return (0); + return 0; r->top = a->top; } else { if (bn_wexpand(r, a->top + 1) == NULL) - return (0); + return 0; } ap = a->d; rp = r->d; @@ -90,7 +40,7 @@ int BN_lshift1(BIGNUM *r, const BIGNUM *a) r->top++; } bn_check_top(r); - return (1); + return 1; } int BN_rshift1(BIGNUM *r, const BIGNUM *a) @@ -103,14 +53,14 @@ int BN_rshift1(BIGNUM *r, const BIGNUM *a) if (BN_is_zero(a)) { BN_zero(r); - return (1); + return 1; } i = a->top; ap = a->d; j = i - (ap[i - 1] == 1); if (a != r) { if (bn_wexpand(r, j) == NULL) - return (0); + return 0; r->neg = a->neg; } rp = r->d; @@ -124,8 +74,10 @@ int BN_rshift1(BIGNUM *r, const BIGNUM *a) c = (t & 1) ? BN_TBIT : 0; } r->top = j; + if (!r->top) + r->neg = 0; /* don't allow negative zero */ bn_check_top(r); - return (1); + return 1; } int BN_lshift(BIGNUM *r, const BIGNUM *a, int n) @@ -142,10 +94,10 @@ int BN_lshift(BIGNUM *r, const BIGNUM *a, int n) return 0; } - r->neg = a->neg; nw = n / BN_BITS2; if (bn_wexpand(r, a->top + nw + 1) == NULL) - return (0); + return 0; + r->neg = a->neg; lb = n % BN_BITS2; rb = BN_BITS2 - lb; f = a->d; @@ -160,14 +112,11 @@ int BN_lshift(BIGNUM *r, const BIGNUM *a, int n) t[nw + i + 1] |= (l >> rb) & BN_MASK2; t[nw + i] = (l << lb) & BN_MASK2; } - memset(t, 0, nw * sizeof(t[0])); - /* - * for (i=0; i<nw; i++) t[i]=0; - */ + memset(t, 0, sizeof(*t) * nw); r->top = a->top + nw + 1; bn_correct_top(r); bn_check_top(r); - return (1); + return 1; } int BN_rshift(BIGNUM *r, const BIGNUM *a, int n) @@ -189,13 +138,13 @@ int BN_rshift(BIGNUM *r, const BIGNUM *a, int n) lb = BN_BITS2 - rb; if (nw >= a->top || a->top == 0) { BN_zero(r); - return (1); + return 1; } i = (BN_num_bits(a) - n + (BN_BITS2 - 1)) / BN_BITS2; if (r != a) { - r->neg = a->neg; if (bn_wexpand(r, i) == NULL) - return (0); + return 0; + r->neg = a->neg; } else { if (n == 0) return 1; /* or the copying loop will go berserk */ @@ -219,6 +168,8 @@ int BN_rshift(BIGNUM *r, const BIGNUM *a, int n) if ((l = (l >> rb) & BN_MASK2)) *(t) = l; } + if (!r->top) + r->neg = 0; /* don't allow negative zero */ bn_check_top(r); - return (1); + return 1; } diff --git a/crypto/bn/bn_sqr.c b/crypto/bn/bn_sqr.c index 256d26e8dbec..0c0a590f0c6a 100644 --- a/crypto/bn/bn_sqr.c +++ b/crypto/bn/bn_sqr.c @@ -1,63 +1,13 @@ -/* crypto/bn/bn_sqr.c */ -/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) - * All rights reserved. - * - * This package is an SSL implementation written - * by Eric Young (eay@cryptsoft.com). - * The implementation was written so as to conform with Netscapes SSL. - * - * This library is free for commercial and non-commercial use as long as - * the following conditions are aheared to. The following conditions - * apply to all code found in this distribution, be it the RC4, RSA, - * lhash, DES, etc., code; not just the SSL code. The SSL documentation - * included with this distribution is covered by the same copyright terms - * except that the holder is Tim Hudson (tjh@cryptsoft.com). - * - * Copyright remains Eric Young's, and as such any Copyright notices in - * the code are not to be removed. - * If this package is used in a product, Eric Young should be given attribution - * as the author of the parts of the library used. - * This can be in the form of a textual message at program startup or - * in documentation (online or textual) provided with the package. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * "This product includes cryptographic software written by - * Eric Young (eay@cryptsoft.com)" - * The word 'cryptographic' can be left out if the rouines from the library - * being used are not cryptographic related :-). - * 4. If you include any Windows specific code (or a derivative thereof) from - * the apps directory (application code) you must include an acknowledgement: - * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)" - * - * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. +/* + * Copyright 1995-2018 The OpenSSL Project Authors. All Rights Reserved. * - * The licence and distribution terms for any publically available version or - * derivative of this code cannot be changed. i.e. this code cannot simply be - * copied and put under another distribution licence - * [including the GNU Public Licence.] + * Licensed under the OpenSSL license (the "License"). You may not use + * this file except in compliance with the License. You can obtain a copy + * in the file LICENSE in the source distribution or at + * https://www.openssl.org/source/license.html */ -#include <stdio.h> -#include "cryptlib.h" +#include "internal/cryptlib.h" #include "bn_lcl.h" /* r must not be a */ @@ -66,13 +16,20 @@ */ int BN_sqr(BIGNUM *r, const BIGNUM *a, BN_CTX *ctx) { + int ret = bn_sqr_fixed_top(r, a, ctx); + + bn_correct_top(r); + bn_check_top(r); + + return ret; +} + +int bn_sqr_fixed_top(BIGNUM *r, const BIGNUM *a, BN_CTX *ctx) +{ int max, al; int ret = 0; BIGNUM *tmp, *rr; -#ifdef BN_COUNT - fprintf(stderr, "BN_sqr %d * %d\n", a->top, a->top); -#endif bn_check_top(a); al = a->top; @@ -85,7 +42,7 @@ int BN_sqr(BIGNUM *r, const BIGNUM *a, BN_CTX *ctx) BN_CTX_start(ctx); rr = (a != r) ? r : BN_CTX_get(ctx); tmp = BN_CTX_get(ctx); - if (!rr || !tmp) + if (rr == NULL || tmp == NULL) goto err; max = 2 * al; /* Non-zero (from above) */ @@ -135,14 +92,8 @@ int BN_sqr(BIGNUM *r, const BIGNUM *a, BN_CTX *ctx) } rr->neg = 0; - /* - * If the most-significant half of the top word of 'a' is zero, then the - * square of 'a' will max-1 words. - */ - if (a->d[al - 1] == (a->d[al - 1] & BN_MASK2l)) - rr->top = max - 1; - else - rr->top = max; + rr->top = max; + rr->flags |= BN_FLG_FIXED_TOP; if (r != rr && BN_copy(r, rr) == NULL) goto err; @@ -151,7 +102,7 @@ int BN_sqr(BIGNUM *r, const BIGNUM *a, BN_CTX *ctx) bn_check_top(rr); bn_check_top(tmp); BN_CTX_end(ctx); - return (ret); + return ret; } /* tmp must have 2*n words */ @@ -208,9 +159,6 @@ void bn_sqr_recursive(BN_ULONG *r, const BN_ULONG *a, int n2, BN_ULONG *t) int zero, c1; BN_ULONG ln, lo, *p; -# ifdef BN_COUNT - fprintf(stderr, " bn_sqr_recursive %d * %d\n", n2, n2); -# endif if (n2 == 4) { # ifndef BN_SQR_COMBA bn_sqr_normal(r, a, 4, t); @@ -246,7 +194,7 @@ void bn_sqr_recursive(BN_ULONG *r, const BN_ULONG *a, int n2, BN_ULONG *t) if (!zero) bn_sqr_recursive(&(t[n2]), t, n, p); else - memset(&(t[n2]), 0, n2 * sizeof(BN_ULONG)); + memset(&t[n2], 0, sizeof(*t) * n2); bn_sqr_recursive(r, a, n, p); bn_sqr_recursive(&(r[n2]), &(a[n]), n, p); diff --git a/crypto/bn/bn_sqrt.c b/crypto/bn/bn_sqrt.c index 232af99a216d..b97d8ca43ba2 100644 --- a/crypto/bn/bn_sqrt.c +++ b/crypto/bn/bn_sqrt.c @@ -1,63 +1,13 @@ -/* crypto/bn/bn_sqrt.c */ /* - * Written by Lenka Fibikova <fibikova@exp-math.uni-essen.de> and Bodo - * Moeller for the OpenSSL project. - */ -/* ==================================================================== - * Copyright (c) 1998-2000 The OpenSSL Project. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * - * 3. All advertising materials mentioning features or use of this - * software must display the following acknowledgment: - * "This product includes software developed by the OpenSSL Project - * for use in the OpenSSL Toolkit. (http://www.openssl.org/)" - * - * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to - * endorse or promote products derived from this software without - * prior written permission. For written permission, please contact - * openssl-core@openssl.org. - * - * 5. Products derived from this software may not be called "OpenSSL" - * nor may "OpenSSL" appear in their names without prior written - * permission of the OpenSSL Project. - * - * 6. Redistributions of any form whatsoever must retain the following - * acknowledgment: - * "This product includes software developed by the OpenSSL Project - * for use in the OpenSSL Toolkit (http://www.openssl.org/)" - * - * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY - * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR - * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, - * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED - * OF THE POSSIBILITY OF SUCH DAMAGE. - * ==================================================================== - * - * This product includes cryptographic software written by Eric Young - * (eay@cryptsoft.com). This product includes software written by Tim - * Hudson (tjh@cryptsoft.com). + * Copyright 2000-2018 The OpenSSL Project Authors. All Rights Reserved. * + * Licensed under the OpenSSL license (the "License"). You may not use + * this file except in compliance with the License. You can obtain a copy + * in the file LICENSE in the source distribution or at + * https://www.openssl.org/source/license.html */ -#include "cryptlib.h" +#include "internal/cryptlib.h" #include "bn_lcl.h" BIGNUM *BN_mod_sqrt(BIGNUM *in, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx) @@ -89,7 +39,7 @@ BIGNUM *BN_mod_sqrt(BIGNUM *in, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx) } BNerr(BN_F_BN_MOD_SQRT, BN_R_P_IS_NOT_PRIME); - return (NULL); + return NULL; } if (BN_is_zero(a) || BN_is_one(a)) { @@ -229,7 +179,7 @@ BIGNUM *BN_mod_sqrt(BIGNUM *in, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx) if (!BN_set_word(y, i)) goto end; } else { - if (!BN_pseudo_rand(y, BN_num_bits(p), 0, 0)) + if (!BN_priv_rand(y, BN_num_bits(p), 0, 0)) goto end; if (BN_ucmp(y, p) >= 0) { if (!(p->neg ? BN_add : BN_sub) (y, y, p)) @@ -398,9 +348,8 @@ BIGNUM *BN_mod_sqrt(BIGNUM *in, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx) end: if (err) { - if (ret != NULL && ret != in) { + if (ret != in) BN_clear_free(ret); - } ret = NULL; } BN_CTX_end(ctx); diff --git a/crypto/bn/bn_srp.c b/crypto/bn/bn_srp.c new file mode 100644 index 000000000000..27b6ebe518ea --- /dev/null +++ b/crypto/bn/bn_srp.c @@ -0,0 +1,545 @@ +/* + * Copyright 2014-2017 The OpenSSL Project Authors. All Rights Reserved. + * + * Licensed under the OpenSSL license (the "License"). You may not use + * this file except in compliance with the License. You can obtain a copy + * in the file LICENSE in the source distribution or at + * https://www.openssl.org/source/license.html + */ + +#include "bn_lcl.h" +#include "internal/nelem.h" + +#ifndef OPENSSL_NO_SRP + +#include <openssl/srp.h> +#include "internal/bn_srp.h" + +# if (BN_BYTES == 8) +# if (defined(_WIN32) || defined(_WIN64)) && !defined(__MINGW32__) +# define bn_pack4(a1,a2,a3,a4) ((a1##UI64<<48)|(a2##UI64<<32)|(a3##UI64<<16)|a4##UI64) +# elif defined(__arch64__) +# define bn_pack4(a1,a2,a3,a4) ((a1##UL<<48)|(a2##UL<<32)|(a3##UL<<16)|a4##UL) +# else +# define bn_pack4(a1,a2,a3,a4) ((a1##ULL<<48)|(a2##ULL<<32)|(a3##ULL<<16)|a4##ULL) +# endif +# elif (BN_BYTES == 4) +# define bn_pack4(a1,a2,a3,a4) ((a3##UL<<16)|a4##UL), ((a1##UL<<16)|a2##UL) +# else +# error "unsupported BN_BYTES" +# endif + +static const BN_ULONG bn_group_1024_value[] = { + bn_pack4(0x9FC6, 0x1D2F, 0xC0EB, 0x06E3), + bn_pack4(0xFD51, 0x38FE, 0x8376, 0x435B), + bn_pack4(0x2FD4, 0xCBF4, 0x976E, 0xAA9A), + bn_pack4(0x68ED, 0xBC3C, 0x0572, 0x6CC0), + bn_pack4(0xC529, 0xF566, 0x660E, 0x57EC), + bn_pack4(0x8255, 0x9B29, 0x7BCF, 0x1885), + bn_pack4(0xCE8E, 0xF4AD, 0x69B1, 0x5D49), + bn_pack4(0x5DC7, 0xD7B4, 0x6154, 0xD6B6), + bn_pack4(0x8E49, 0x5C1D, 0x6089, 0xDAD1), + bn_pack4(0xE0D5, 0xD8E2, 0x50B9, 0x8BE4), + bn_pack4(0x383B, 0x4813, 0xD692, 0xC6E0), + bn_pack4(0xD674, 0xDF74, 0x96EA, 0x81D3), + bn_pack4(0x9EA2, 0x314C, 0x9C25, 0x6576), + bn_pack4(0x6072, 0x6187, 0x75FF, 0x3C0B), + bn_pack4(0x9C33, 0xF80A, 0xFA8F, 0xC5E8), + bn_pack4(0xEEAF, 0x0AB9, 0xADB3, 0x8DD6) +}; + +const BIGNUM bn_group_1024 = { + (BN_ULONG *)bn_group_1024_value, + OSSL_NELEM(bn_group_1024_value), + OSSL_NELEM(bn_group_1024_value), + 0, + BN_FLG_STATIC_DATA +}; + +static const BN_ULONG bn_group_1536_value[] = { + bn_pack4(0xCF76, 0xE3FE, 0xD135, 0xF9BB), + bn_pack4(0x1518, 0x0F93, 0x499A, 0x234D), + bn_pack4(0x8CE7, 0xA28C, 0x2442, 0xC6F3), + bn_pack4(0x5A02, 0x1FFF, 0x5E91, 0x479E), + bn_pack4(0x7F8A, 0x2FE9, 0xB8B5, 0x292E), + bn_pack4(0x837C, 0x264A, 0xE3A9, 0xBEB8), + bn_pack4(0xE442, 0x734A, 0xF7CC, 0xB7AE), + bn_pack4(0x6577, 0x2E43, 0x7D6C, 0x7F8C), + bn_pack4(0xDB2F, 0xD53D, 0x24B7, 0xC486), + bn_pack4(0x6EDF, 0x0195, 0x3934, 0x9627), + bn_pack4(0x158B, 0xFD3E, 0x2B9C, 0x8CF5), + bn_pack4(0x764E, 0x3F4B, 0x53DD, 0x9DA1), + bn_pack4(0x4754, 0x8381, 0xDBC5, 0xB1FC), + bn_pack4(0x9B60, 0x9E0B, 0xE3BA, 0xB63D), + bn_pack4(0x8134, 0xB1C8, 0xB979, 0x8914), + bn_pack4(0xDF02, 0x8A7C, 0xEC67, 0xF0D0), + bn_pack4(0x80B6, 0x55BB, 0x9A22, 0xE8DC), + bn_pack4(0x1558, 0x903B, 0xA0D0, 0xF843), + bn_pack4(0x51C6, 0xA94B, 0xE460, 0x7A29), + bn_pack4(0x5F4F, 0x5F55, 0x6E27, 0xCBDE), + bn_pack4(0xBEEE, 0xA961, 0x4B19, 0xCC4D), + bn_pack4(0xDBA5, 0x1DF4, 0x99AC, 0x4C80), + bn_pack4(0xB1F1, 0x2A86, 0x17A4, 0x7BBB), + bn_pack4(0x9DEF, 0x3CAF, 0xB939, 0x277A) +}; + +const BIGNUM bn_group_1536 = { + (BN_ULONG *)bn_group_1536_value, + OSSL_NELEM(bn_group_1536_value), + OSSL_NELEM(bn_group_1536_value), + 0, + BN_FLG_STATIC_DATA +}; + +static const BN_ULONG bn_group_2048_value[] = { + bn_pack4(0x0FA7, 0x111F, 0x9E4A, 0xFF73), + bn_pack4(0x9B65, 0xE372, 0xFCD6, 0x8EF2), + bn_pack4(0x35DE, 0x236D, 0x525F, 0x5475), + bn_pack4(0x94B5, 0xC803, 0xD89F, 0x7AE4), + bn_pack4(0x71AE, 0x35F8, 0xE9DB, 0xFBB6), + bn_pack4(0x2A56, 0x98F3, 0xA8D0, 0xC382), + bn_pack4(0x9CCC, 0x041C, 0x7BC3, 0x08D8), + bn_pack4(0xAF87, 0x4E73, 0x03CE, 0x5329), + bn_pack4(0x6160, 0x2790, 0x04E5, 0x7AE6), + bn_pack4(0x032C, 0xFBDB, 0xF52F, 0xB378), + bn_pack4(0x5EA7, 0x7A27, 0x75D2, 0xECFA), + bn_pack4(0x5445, 0x23B5, 0x24B0, 0xD57D), + bn_pack4(0x5B9D, 0x32E6, 0x88F8, 0x7748), + bn_pack4(0xF1D2, 0xB907, 0x8717, 0x461A), + bn_pack4(0x76BD, 0x207A, 0x436C, 0x6481), + bn_pack4(0xCA97, 0xB43A, 0x23FB, 0x8016), + bn_pack4(0x1D28, 0x1E44, 0x6B14, 0x773B), + bn_pack4(0x7359, 0xD041, 0xD5C3, 0x3EA7), + bn_pack4(0xA80D, 0x740A, 0xDBF4, 0xFF74), + bn_pack4(0x55F9, 0x7993, 0xEC97, 0x5EEA), + bn_pack4(0x2918, 0xA996, 0x2F0B, 0x93B8), + bn_pack4(0x661A, 0x05FB, 0xD5FA, 0xAAE8), + bn_pack4(0xCF60, 0x9517, 0x9A16, 0x3AB3), + bn_pack4(0xE808, 0x3969, 0xEDB7, 0x67B0), + bn_pack4(0xCD7F, 0x48A9, 0xDA04, 0xFD50), + bn_pack4(0xD523, 0x12AB, 0x4B03, 0x310D), + bn_pack4(0x8193, 0xE075, 0x7767, 0xA13D), + bn_pack4(0xA373, 0x29CB, 0xB4A0, 0x99ED), + bn_pack4(0xFC31, 0x9294, 0x3DB5, 0x6050), + bn_pack4(0xAF72, 0xB665, 0x1987, 0xEE07), + bn_pack4(0xF166, 0xDE5E, 0x1389, 0x582F), + bn_pack4(0xAC6B, 0xDB41, 0x324A, 0x9A9B) +}; + +const BIGNUM bn_group_2048 = { + (BN_ULONG *)bn_group_2048_value, + OSSL_NELEM(bn_group_2048_value), + OSSL_NELEM(bn_group_2048_value), + 0, + BN_FLG_STATIC_DATA +}; + +static const BN_ULONG bn_group_3072_value[] = { + bn_pack4(0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF), + bn_pack4(0x4B82, 0xD120, 0xA93A, 0xD2CA), + bn_pack4(0x43DB, 0x5BFC, 0xE0FD, 0x108E), + bn_pack4(0x08E2, 0x4FA0, 0x74E5, 0xAB31), + bn_pack4(0x7709, 0x88C0, 0xBAD9, 0x46E2), + bn_pack4(0xBBE1, 0x1757, 0x7A61, 0x5D6C), + bn_pack4(0x521F, 0x2B18, 0x177B, 0x200C), + bn_pack4(0xD876, 0x0273, 0x3EC8, 0x6A64), + bn_pack4(0xF12F, 0xFA06, 0xD98A, 0x0864), + bn_pack4(0xCEE3, 0xD226, 0x1AD2, 0xEE6B), + bn_pack4(0x1E8C, 0x94E0, 0x4A25, 0x619D), + bn_pack4(0xABF5, 0xAE8C, 0xDB09, 0x33D7), + bn_pack4(0xB397, 0x0F85, 0xA6E1, 0xE4C7), + bn_pack4(0x8AEA, 0x7157, 0x5D06, 0x0C7D), + bn_pack4(0xECFB, 0x8504, 0x58DB, 0xEF0A), + bn_pack4(0xA855, 0x21AB, 0xDF1C, 0xBA64), + bn_pack4(0xAD33, 0x170D, 0x0450, 0x7A33), + bn_pack4(0x1572, 0x8E5A, 0x8AAA, 0xC42D), + bn_pack4(0x15D2, 0x2618, 0x98FA, 0x0510), + bn_pack4(0x3995, 0x497C, 0xEA95, 0x6AE5), + bn_pack4(0xDE2B, 0xCBF6, 0x9558, 0x1718), + bn_pack4(0xB5C5, 0x5DF0, 0x6F4C, 0x52C9), + bn_pack4(0x9B27, 0x83A2, 0xEC07, 0xA28F), + bn_pack4(0xE39E, 0x772C, 0x180E, 0x8603), + bn_pack4(0x3290, 0x5E46, 0x2E36, 0xCE3B), + bn_pack4(0xF174, 0x6C08, 0xCA18, 0x217C), + bn_pack4(0x670C, 0x354E, 0x4ABC, 0x9804), + bn_pack4(0x9ED5, 0x2907, 0x7096, 0x966D), + bn_pack4(0x1C62, 0xF356, 0x2085, 0x52BB), + bn_pack4(0x8365, 0x5D23, 0xDCA3, 0xAD96), + bn_pack4(0x6916, 0x3FA8, 0xFD24, 0xCF5F), + bn_pack4(0x98DA, 0x4836, 0x1C55, 0xD39A), + bn_pack4(0xC200, 0x7CB8, 0xA163, 0xBF05), + bn_pack4(0x4928, 0x6651, 0xECE4, 0x5B3D), + bn_pack4(0xAE9F, 0x2411, 0x7C4B, 0x1FE6), + bn_pack4(0xEE38, 0x6BFB, 0x5A89, 0x9FA5), + bn_pack4(0x0BFF, 0x5CB6, 0xF406, 0xB7ED), + bn_pack4(0xF44C, 0x42E9, 0xA637, 0xED6B), + bn_pack4(0xE485, 0xB576, 0x625E, 0x7EC6), + bn_pack4(0x4FE1, 0x356D, 0x6D51, 0xC245), + bn_pack4(0x302B, 0x0A6D, 0xF25F, 0x1437), + bn_pack4(0xEF95, 0x19B3, 0xCD3A, 0x431B), + bn_pack4(0x514A, 0x0879, 0x8E34, 0x04DD), + bn_pack4(0x020B, 0xBEA6, 0x3B13, 0x9B22), + bn_pack4(0x2902, 0x4E08, 0x8A67, 0xCC74), + bn_pack4(0xC4C6, 0x628B, 0x80DC, 0x1CD1), + bn_pack4(0xC90F, 0xDAA2, 0x2168, 0xC234), + bn_pack4(0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF) +}; + +const BIGNUM bn_group_3072 = { + (BN_ULONG *)bn_group_3072_value, + OSSL_NELEM(bn_group_3072_value), + OSSL_NELEM(bn_group_3072_value), + 0, + BN_FLG_STATIC_DATA +}; + +static const BN_ULONG bn_group_4096_value[] = { + bn_pack4(0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF), + bn_pack4(0x4DF4, 0x35C9, 0x3406, 0x3199), + bn_pack4(0x86FF, 0xB7DC, 0x90A6, 0xC08F), + bn_pack4(0x93B4, 0xEA98, 0x8D8F, 0xDDC1), + bn_pack4(0xD006, 0x9127, 0xD5B0, 0x5AA9), + bn_pack4(0xB81B, 0xDD76, 0x2170, 0x481C), + bn_pack4(0x1F61, 0x2970, 0xCEE2, 0xD7AF), + bn_pack4(0x233B, 0xA186, 0x515B, 0xE7ED), + bn_pack4(0x99B2, 0x964F, 0xA090, 0xC3A2), + bn_pack4(0x287C, 0x5947, 0x4E6B, 0xC05D), + bn_pack4(0x2E8E, 0xFC14, 0x1FBE, 0xCAA6), + bn_pack4(0xDBBB, 0xC2DB, 0x04DE, 0x8EF9), + bn_pack4(0x2583, 0xE9CA, 0x2AD4, 0x4CE8), + bn_pack4(0x1A94, 0x6834, 0xB615, 0x0BDA), + bn_pack4(0x99C3, 0x2718, 0x6AF4, 0xE23C), + bn_pack4(0x8871, 0x9A10, 0xBDBA, 0x5B26), + bn_pack4(0x1A72, 0x3C12, 0xA787, 0xE6D7), + bn_pack4(0x4B82, 0xD120, 0xA921, 0x0801), + bn_pack4(0x43DB, 0x5BFC, 0xE0FD, 0x108E), + bn_pack4(0x08E2, 0x4FA0, 0x74E5, 0xAB31), + bn_pack4(0x7709, 0x88C0, 0xBAD9, 0x46E2), + bn_pack4(0xBBE1, 0x1757, 0x7A61, 0x5D6C), + bn_pack4(0x521F, 0x2B18, 0x177B, 0x200C), + bn_pack4(0xD876, 0x0273, 0x3EC8, 0x6A64), + bn_pack4(0xF12F, 0xFA06, 0xD98A, 0x0864), + bn_pack4(0xCEE3, 0xD226, 0x1AD2, 0xEE6B), + bn_pack4(0x1E8C, 0x94E0, 0x4A25, 0x619D), + bn_pack4(0xABF5, 0xAE8C, 0xDB09, 0x33D7), + bn_pack4(0xB397, 0x0F85, 0xA6E1, 0xE4C7), + bn_pack4(0x8AEA, 0x7157, 0x5D06, 0x0C7D), + bn_pack4(0xECFB, 0x8504, 0x58DB, 0xEF0A), + bn_pack4(0xA855, 0x21AB, 0xDF1C, 0xBA64), + bn_pack4(0xAD33, 0x170D, 0x0450, 0x7A33), + bn_pack4(0x1572, 0x8E5A, 0x8AAA, 0xC42D), + bn_pack4(0x15D2, 0x2618, 0x98FA, 0x0510), + bn_pack4(0x3995, 0x497C, 0xEA95, 0x6AE5), + bn_pack4(0xDE2B, 0xCBF6, 0x9558, 0x1718), + bn_pack4(0xB5C5, 0x5DF0, 0x6F4C, 0x52C9), + bn_pack4(0x9B27, 0x83A2, 0xEC07, 0xA28F), + bn_pack4(0xE39E, 0x772C, 0x180E, 0x8603), + bn_pack4(0x3290, 0x5E46, 0x2E36, 0xCE3B), + bn_pack4(0xF174, 0x6C08, 0xCA18, 0x217C), + bn_pack4(0x670C, 0x354E, 0x4ABC, 0x9804), + bn_pack4(0x9ED5, 0x2907, 0x7096, 0x966D), + bn_pack4(0x1C62, 0xF356, 0x2085, 0x52BB), + bn_pack4(0x8365, 0x5D23, 0xDCA3, 0xAD96), + bn_pack4(0x6916, 0x3FA8, 0xFD24, 0xCF5F), + bn_pack4(0x98DA, 0x4836, 0x1C55, 0xD39A), + bn_pack4(0xC200, 0x7CB8, 0xA163, 0xBF05), + bn_pack4(0x4928, 0x6651, 0xECE4, 0x5B3D), + bn_pack4(0xAE9F, 0x2411, 0x7C4B, 0x1FE6), + bn_pack4(0xEE38, 0x6BFB, 0x5A89, 0x9FA5), + bn_pack4(0x0BFF, 0x5CB6, 0xF406, 0xB7ED), + bn_pack4(0xF44C, 0x42E9, 0xA637, 0xED6B), + bn_pack4(0xE485, 0xB576, 0x625E, 0x7EC6), + bn_pack4(0x4FE1, 0x356D, 0x6D51, 0xC245), + bn_pack4(0x302B, 0x0A6D, 0xF25F, 0x1437), + bn_pack4(0xEF95, 0x19B3, 0xCD3A, 0x431B), + bn_pack4(0x514A, 0x0879, 0x8E34, 0x04DD), + bn_pack4(0x020B, 0xBEA6, 0x3B13, 0x9B22), + bn_pack4(0x2902, 0x4E08, 0x8A67, 0xCC74), + bn_pack4(0xC4C6, 0x628B, 0x80DC, 0x1CD1), + bn_pack4(0xC90F, 0xDAA2, 0x2168, 0xC234), + bn_pack4(0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF) +}; + +const BIGNUM bn_group_4096 = { + (BN_ULONG *)bn_group_4096_value, + OSSL_NELEM(bn_group_4096_value), + OSSL_NELEM(bn_group_4096_value), + 0, + BN_FLG_STATIC_DATA +}; + +static const BN_ULONG bn_group_6144_value[] = { + bn_pack4(0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF), + bn_pack4(0xE694, 0xF91E, 0x6DCC, 0x4024), + bn_pack4(0x12BF, 0x2D5B, 0x0B74, 0x74D6), + bn_pack4(0x043E, 0x8F66, 0x3F48, 0x60EE), + bn_pack4(0x387F, 0xE8D7, 0x6E3C, 0x0468), + bn_pack4(0xDA56, 0xC9EC, 0x2EF2, 0x9632), + bn_pack4(0xEB19, 0xCCB1, 0xA313, 0xD55C), + bn_pack4(0xF550, 0xAA3D, 0x8A1F, 0xBFF0), + bn_pack4(0x06A1, 0xD58B, 0xB7C5, 0xDA76), + bn_pack4(0xA797, 0x15EE, 0xF29B, 0xE328), + bn_pack4(0x14CC, 0x5ED2, 0x0F80, 0x37E0), + bn_pack4(0xCC8F, 0x6D7E, 0xBF48, 0xE1D8), + bn_pack4(0x4BD4, 0x07B2, 0x2B41, 0x54AA), + bn_pack4(0x0F1D, 0x45B7, 0xFF58, 0x5AC5), + bn_pack4(0x23A9, 0x7A7E, 0x36CC, 0x88BE), + bn_pack4(0x59E7, 0xC97F, 0xBEC7, 0xE8F3), + bn_pack4(0xB5A8, 0x4031, 0x900B, 0x1C9E), + bn_pack4(0xD55E, 0x702F, 0x4698, 0x0C82), + bn_pack4(0xF482, 0xD7CE, 0x6E74, 0xFEF6), + bn_pack4(0xF032, 0xEA15, 0xD172, 0x1D03), + bn_pack4(0x5983, 0xCA01, 0xC64B, 0x92EC), + bn_pack4(0x6FB8, 0xF401, 0x378C, 0xD2BF), + bn_pack4(0x3320, 0x5151, 0x2BD7, 0xAF42), + bn_pack4(0xDB7F, 0x1447, 0xE6CC, 0x254B), + bn_pack4(0x44CE, 0x6CBA, 0xCED4, 0xBB1B), + bn_pack4(0xDA3E, 0xDBEB, 0xCF9B, 0x14ED), + bn_pack4(0x1797, 0x27B0, 0x865A, 0x8918), + bn_pack4(0xB06A, 0x53ED, 0x9027, 0xD831), + bn_pack4(0xE5DB, 0x382F, 0x4130, 0x01AE), + bn_pack4(0xF8FF, 0x9406, 0xAD9E, 0x530E), + bn_pack4(0xC975, 0x1E76, 0x3DBA, 0x37BD), + bn_pack4(0xC1D4, 0xDCB2, 0x6026, 0x46DE), + bn_pack4(0x36C3, 0xFAB4, 0xD27C, 0x7026), + bn_pack4(0x4DF4, 0x35C9, 0x3402, 0x8492), + bn_pack4(0x86FF, 0xB7DC, 0x90A6, 0xC08F), + bn_pack4(0x93B4, 0xEA98, 0x8D8F, 0xDDC1), + bn_pack4(0xD006, 0x9127, 0xD5B0, 0x5AA9), + bn_pack4(0xB81B, 0xDD76, 0x2170, 0x481C), + bn_pack4(0x1F61, 0x2970, 0xCEE2, 0xD7AF), + bn_pack4(0x233B, 0xA186, 0x515B, 0xE7ED), + bn_pack4(0x99B2, 0x964F, 0xA090, 0xC3A2), + bn_pack4(0x287C, 0x5947, 0x4E6B, 0xC05D), + bn_pack4(0x2E8E, 0xFC14, 0x1FBE, 0xCAA6), + bn_pack4(0xDBBB, 0xC2DB, 0x04DE, 0x8EF9), + bn_pack4(0x2583, 0xE9CA, 0x2AD4, 0x4CE8), + bn_pack4(0x1A94, 0x6834, 0xB615, 0x0BDA), + bn_pack4(0x99C3, 0x2718, 0x6AF4, 0xE23C), + bn_pack4(0x8871, 0x9A10, 0xBDBA, 0x5B26), + bn_pack4(0x1A72, 0x3C12, 0xA787, 0xE6D7), + bn_pack4(0x4B82, 0xD120, 0xA921, 0x0801), + bn_pack4(0x43DB, 0x5BFC, 0xE0FD, 0x108E), + bn_pack4(0x08E2, 0x4FA0, 0x74E5, 0xAB31), + bn_pack4(0x7709, 0x88C0, 0xBAD9, 0x46E2), + bn_pack4(0xBBE1, 0x1757, 0x7A61, 0x5D6C), + bn_pack4(0x521F, 0x2B18, 0x177B, 0x200C), + bn_pack4(0xD876, 0x0273, 0x3EC8, 0x6A64), + bn_pack4(0xF12F, 0xFA06, 0xD98A, 0x0864), + bn_pack4(0xCEE3, 0xD226, 0x1AD2, 0xEE6B), + bn_pack4(0x1E8C, 0x94E0, 0x4A25, 0x619D), + bn_pack4(0xABF5, 0xAE8C, 0xDB09, 0x33D7), + bn_pack4(0xB397, 0x0F85, 0xA6E1, 0xE4C7), + bn_pack4(0x8AEA, 0x7157, 0x5D06, 0x0C7D), + bn_pack4(0xECFB, 0x8504, 0x58DB, 0xEF0A), + bn_pack4(0xA855, 0x21AB, 0xDF1C, 0xBA64), + bn_pack4(0xAD33, 0x170D, 0x0450, 0x7A33), + bn_pack4(0x1572, 0x8E5A, 0x8AAA, 0xC42D), + bn_pack4(0x15D2, 0x2618, 0x98FA, 0x0510), + bn_pack4(0x3995, 0x497C, 0xEA95, 0x6AE5), + bn_pack4(0xDE2B, 0xCBF6, 0x9558, 0x1718), + bn_pack4(0xB5C5, 0x5DF0, 0x6F4C, 0x52C9), + bn_pack4(0x9B27, 0x83A2, 0xEC07, 0xA28F), + bn_pack4(0xE39E, 0x772C, 0x180E, 0x8603), + bn_pack4(0x3290, 0x5E46, 0x2E36, 0xCE3B), + bn_pack4(0xF174, 0x6C08, 0xCA18, 0x217C), + bn_pack4(0x670C, 0x354E, 0x4ABC, 0x9804), + bn_pack4(0x9ED5, 0x2907, 0x7096, 0x966D), + bn_pack4(0x1C62, 0xF356, 0x2085, 0x52BB), + bn_pack4(0x8365, 0x5D23, 0xDCA3, 0xAD96), + bn_pack4(0x6916, 0x3FA8, 0xFD24, 0xCF5F), + bn_pack4(0x98DA, 0x4836, 0x1C55, 0xD39A), + bn_pack4(0xC200, 0x7CB8, 0xA163, 0xBF05), + bn_pack4(0x4928, 0x6651, 0xECE4, 0x5B3D), + bn_pack4(0xAE9F, 0x2411, 0x7C4B, 0x1FE6), + bn_pack4(0xEE38, 0x6BFB, 0x5A89, 0x9FA5), + bn_pack4(0x0BFF, 0x5CB6, 0xF406, 0xB7ED), + bn_pack4(0xF44C, 0x42E9, 0xA637, 0xED6B), + bn_pack4(0xE485, 0xB576, 0x625E, 0x7EC6), + bn_pack4(0x4FE1, 0x356D, 0x6D51, 0xC245), + bn_pack4(0x302B, 0x0A6D, 0xF25F, 0x1437), + bn_pack4(0xEF95, 0x19B3, 0xCD3A, 0x431B), + bn_pack4(0x514A, 0x0879, 0x8E34, 0x04DD), + bn_pack4(0x020B, 0xBEA6, 0x3B13, 0x9B22), + bn_pack4(0x2902, 0x4E08, 0x8A67, 0xCC74), + bn_pack4(0xC4C6, 0x628B, 0x80DC, 0x1CD1), + bn_pack4(0xC90F, 0xDAA2, 0x2168, 0xC234), + bn_pack4(0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF) +}; + +const BIGNUM bn_group_6144 = { + (BN_ULONG *)bn_group_6144_value, + OSSL_NELEM(bn_group_6144_value), + OSSL_NELEM(bn_group_6144_value), + 0, + BN_FLG_STATIC_DATA +}; + +static const BN_ULONG bn_group_8192_value[] = { + bn_pack4(0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF), + bn_pack4(0x60C9, 0x80DD, 0x98ED, 0xD3DF), + bn_pack4(0xC81F, 0x56E8, 0x80B9, 0x6E71), + bn_pack4(0x9E30, 0x50E2, 0x7656, 0x94DF), + bn_pack4(0x9558, 0xE447, 0x5677, 0xE9AA), + bn_pack4(0xC919, 0x0DA6, 0xFC02, 0x6E47), + bn_pack4(0x889A, 0x002E, 0xD5EE, 0x382B), + bn_pack4(0x4009, 0x438B, 0x481C, 0x6CD7), + bn_pack4(0x3590, 0x46F4, 0xEB87, 0x9F92), + bn_pack4(0xFAF3, 0x6BC3, 0x1ECF, 0xA268), + bn_pack4(0xB1D5, 0x10BD, 0x7EE7, 0x4D73), + bn_pack4(0xF9AB, 0x4819, 0x5DED, 0x7EA1), + bn_pack4(0x64F3, 0x1CC5, 0x0846, 0x851D), + bn_pack4(0x4597, 0xE899, 0xA025, 0x5DC1), + bn_pack4(0xDF31, 0x0EE0, 0x74AB, 0x6A36), + bn_pack4(0x6D2A, 0x13F8, 0x3F44, 0xF82D), + bn_pack4(0x062B, 0x3CF5, 0xB3A2, 0x78A6), + bn_pack4(0x7968, 0x3303, 0xED5B, 0xDD3A), + bn_pack4(0xFA9D, 0x4B7F, 0xA2C0, 0x87E8), + bn_pack4(0x4BCB, 0xC886, 0x2F83, 0x85DD), + bn_pack4(0x3473, 0xFC64, 0x6CEA, 0x306B), + bn_pack4(0x13EB, 0x57A8, 0x1A23, 0xF0C7), + bn_pack4(0x2222, 0x2E04, 0xA403, 0x7C07), + bn_pack4(0xE3FD, 0xB8BE, 0xFC84, 0x8AD9), + bn_pack4(0x238F, 0x16CB, 0xE39D, 0x652D), + bn_pack4(0x3423, 0xB474, 0x2BF1, 0xC978), + bn_pack4(0x3AAB, 0x639C, 0x5AE4, 0xF568), + bn_pack4(0x2576, 0xF693, 0x6BA4, 0x2466), + bn_pack4(0x741F, 0xA7BF, 0x8AFC, 0x47ED), + bn_pack4(0x3BC8, 0x32B6, 0x8D9D, 0xD300), + bn_pack4(0xD8BE, 0xC4D0, 0x73B9, 0x31BA), + bn_pack4(0x3877, 0x7CB6, 0xA932, 0xDF8C), + bn_pack4(0x74A3, 0x926F, 0x12FE, 0xE5E4), + bn_pack4(0xE694, 0xF91E, 0x6DBE, 0x1159), + bn_pack4(0x12BF, 0x2D5B, 0x0B74, 0x74D6), + bn_pack4(0x043E, 0x8F66, 0x3F48, 0x60EE), + bn_pack4(0x387F, 0xE8D7, 0x6E3C, 0x0468), + bn_pack4(0xDA56, 0xC9EC, 0x2EF2, 0x9632), + bn_pack4(0xEB19, 0xCCB1, 0xA313, 0xD55C), + bn_pack4(0xF550, 0xAA3D, 0x8A1F, 0xBFF0), + bn_pack4(0x06A1, 0xD58B, 0xB7C5, 0xDA76), + bn_pack4(0xA797, 0x15EE, 0xF29B, 0xE328), + bn_pack4(0x14CC, 0x5ED2, 0x0F80, 0x37E0), + bn_pack4(0xCC8F, 0x6D7E, 0xBF48, 0xE1D8), + bn_pack4(0x4BD4, 0x07B2, 0x2B41, 0x54AA), + bn_pack4(0x0F1D, 0x45B7, 0xFF58, 0x5AC5), + bn_pack4(0x23A9, 0x7A7E, 0x36CC, 0x88BE), + bn_pack4(0x59E7, 0xC97F, 0xBEC7, 0xE8F3), + bn_pack4(0xB5A8, 0x4031, 0x900B, 0x1C9E), + bn_pack4(0xD55E, 0x702F, 0x4698, 0x0C82), + bn_pack4(0xF482, 0xD7CE, 0x6E74, 0xFEF6), + bn_pack4(0xF032, 0xEA15, 0xD172, 0x1D03), + bn_pack4(0x5983, 0xCA01, 0xC64B, 0x92EC), + bn_pack4(0x6FB8, 0xF401, 0x378C, 0xD2BF), + bn_pack4(0x3320, 0x5151, 0x2BD7, 0xAF42), + bn_pack4(0xDB7F, 0x1447, 0xE6CC, 0x254B), + bn_pack4(0x44CE, 0x6CBA, 0xCED4, 0xBB1B), + bn_pack4(0xDA3E, 0xDBEB, 0xCF9B, 0x14ED), + bn_pack4(0x1797, 0x27B0, 0x865A, 0x8918), + bn_pack4(0xB06A, 0x53ED, 0x9027, 0xD831), + bn_pack4(0xE5DB, 0x382F, 0x4130, 0x01AE), + bn_pack4(0xF8FF, 0x9406, 0xAD9E, 0x530E), + bn_pack4(0xC975, 0x1E76, 0x3DBA, 0x37BD), + bn_pack4(0xC1D4, 0xDCB2, 0x6026, 0x46DE), + bn_pack4(0x36C3, 0xFAB4, 0xD27C, 0x7026), + bn_pack4(0x4DF4, 0x35C9, 0x3402, 0x8492), + bn_pack4(0x86FF, 0xB7DC, 0x90A6, 0xC08F), + bn_pack4(0x93B4, 0xEA98, 0x8D8F, 0xDDC1), + bn_pack4(0xD006, 0x9127, 0xD5B0, 0x5AA9), + bn_pack4(0xB81B, 0xDD76, 0x2170, 0x481C), + bn_pack4(0x1F61, 0x2970, 0xCEE2, 0xD7AF), + bn_pack4(0x233B, 0xA186, 0x515B, 0xE7ED), + bn_pack4(0x99B2, 0x964F, 0xA090, 0xC3A2), + bn_pack4(0x287C, 0x5947, 0x4E6B, 0xC05D), + bn_pack4(0x2E8E, 0xFC14, 0x1FBE, 0xCAA6), + bn_pack4(0xDBBB, 0xC2DB, 0x04DE, 0x8EF9), + bn_pack4(0x2583, 0xE9CA, 0x2AD4, 0x4CE8), + bn_pack4(0x1A94, 0x6834, 0xB615, 0x0BDA), + bn_pack4(0x99C3, 0x2718, 0x6AF4, 0xE23C), + bn_pack4(0x8871, 0x9A10, 0xBDBA, 0x5B26), + bn_pack4(0x1A72, 0x3C12, 0xA787, 0xE6D7), + bn_pack4(0x4B82, 0xD120, 0xA921, 0x0801), + bn_pack4(0x43DB, 0x5BFC, 0xE0FD, 0x108E), + bn_pack4(0x08E2, 0x4FA0, 0x74E5, 0xAB31), + bn_pack4(0x7709, 0x88C0, 0xBAD9, 0x46E2), + bn_pack4(0xBBE1, 0x1757, 0x7A61, 0x5D6C), + bn_pack4(0x521F, 0x2B18, 0x177B, 0x200C), + bn_pack4(0xD876, 0x0273, 0x3EC8, 0x6A64), + bn_pack4(0xF12F, 0xFA06, 0xD98A, 0x0864), + bn_pack4(0xCEE3, 0xD226, 0x1AD2, 0xEE6B), + bn_pack4(0x1E8C, 0x94E0, 0x4A25, 0x619D), + bn_pack4(0xABF5, 0xAE8C, 0xDB09, 0x33D7), + bn_pack4(0xB397, 0x0F85, 0xA6E1, 0xE4C7), + bn_pack4(0x8AEA, 0x7157, 0x5D06, 0x0C7D), + bn_pack4(0xECFB, 0x8504, 0x58DB, 0xEF0A), + bn_pack4(0xA855, 0x21AB, 0xDF1C, 0xBA64), + bn_pack4(0xAD33, 0x170D, 0x0450, 0x7A33), + bn_pack4(0x1572, 0x8E5A, 0x8AAA, 0xC42D), + bn_pack4(0x15D2, 0x2618, 0x98FA, 0x0510), + bn_pack4(0x3995, 0x497C, 0xEA95, 0x6AE5), + bn_pack4(0xDE2B, 0xCBF6, 0x9558, 0x1718), + bn_pack4(0xB5C5, 0x5DF0, 0x6F4C, 0x52C9), + bn_pack4(0x9B27, 0x83A2, 0xEC07, 0xA28F), + bn_pack4(0xE39E, 0x772C, 0x180E, 0x8603), + bn_pack4(0x3290, 0x5E46, 0x2E36, 0xCE3B), + bn_pack4(0xF174, 0x6C08, 0xCA18, 0x217C), + bn_pack4(0x670C, 0x354E, 0x4ABC, 0x9804), + bn_pack4(0x9ED5, 0x2907, 0x7096, 0x966D), + bn_pack4(0x1C62, 0xF356, 0x2085, 0x52BB), + bn_pack4(0x8365, 0x5D23, 0xDCA3, 0xAD96), + bn_pack4(0x6916, 0x3FA8, 0xFD24, 0xCF5F), + bn_pack4(0x98DA, 0x4836, 0x1C55, 0xD39A), + bn_pack4(0xC200, 0x7CB8, 0xA163, 0xBF05), + bn_pack4(0x4928, 0x6651, 0xECE4, 0x5B3D), + bn_pack4(0xAE9F, 0x2411, 0x7C4B, 0x1FE6), + bn_pack4(0xEE38, 0x6BFB, 0x5A89, 0x9FA5), + bn_pack4(0x0BFF, 0x5CB6, 0xF406, 0xB7ED), + bn_pack4(0xF44C, 0x42E9, 0xA637, 0xED6B), + bn_pack4(0xE485, 0xB576, 0x625E, 0x7EC6), + bn_pack4(0x4FE1, 0x356D, 0x6D51, 0xC245), + bn_pack4(0x302B, 0x0A6D, 0xF25F, 0x1437), + bn_pack4(0xEF95, 0x19B3, 0xCD3A, 0x431B), + bn_pack4(0x514A, 0x0879, 0x8E34, 0x04DD), + bn_pack4(0x020B, 0xBEA6, 0x3B13, 0x9B22), + bn_pack4(0x2902, 0x4E08, 0x8A67, 0xCC74), + bn_pack4(0xC4C6, 0x628B, 0x80DC, 0x1CD1), + bn_pack4(0xC90F, 0xDAA2, 0x2168, 0xC234), + bn_pack4(0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF) +}; + +const BIGNUM bn_group_8192 = { + (BN_ULONG *)bn_group_8192_value, + OSSL_NELEM(bn_group_8192_value), + OSSL_NELEM(bn_group_8192_value), + 0, + BN_FLG_STATIC_DATA +}; + +static const BN_ULONG bn_generator_19_value[] = { 19 }; + +const BIGNUM bn_generator_19 = { + (BN_ULONG *)bn_generator_19_value, + 1, + 1, + 0, + BN_FLG_STATIC_DATA +}; +static const BN_ULONG bn_generator_5_value[] = { 5 }; + +const BIGNUM bn_generator_5 = { + (BN_ULONG *)bn_generator_5_value, + 1, + 1, + 0, + BN_FLG_STATIC_DATA +}; +static const BN_ULONG bn_generator_2_value[] = { 2 }; + +const BIGNUM bn_generator_2 = { + (BN_ULONG *)bn_generator_2_value, + 1, + 1, + 0, + BN_FLG_STATIC_DATA +}; + +#endif diff --git a/crypto/bn/bn_word.c b/crypto/bn/bn_word.c index 9b5f9cb98c3a..262d7668fcd4 100644 --- a/crypto/bn/bn_word.c +++ b/crypto/bn/bn_word.c @@ -1,63 +1,13 @@ -/* crypto/bn/bn_word.c */ -/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) - * All rights reserved. +/* + * Copyright 1995-2016 The OpenSSL Project Authors. All Rights Reserved. * - * This package is an SSL implementation written - * by Eric Young (eay@cryptsoft.com). - * The implementation was written so as to conform with Netscapes SSL. - * - * This library is free for commercial and non-commercial use as long as - * the following conditions are aheared to. The following conditions - * apply to all code found in this distribution, be it the RC4, RSA, - * lhash, DES, etc., code; not just the SSL code. The SSL documentation - * included with this distribution is covered by the same copyright terms - * except that the holder is Tim Hudson (tjh@cryptsoft.com). - * - * Copyright remains Eric Young's, and as such any Copyright notices in - * the code are not to be removed. - * If this package is used in a product, Eric Young should be given attribution - * as the author of the parts of the library used. - * This can be in the form of a textual message at program startup or - * in documentation (online or textual) provided with the package. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * "This product includes cryptographic software written by - * Eric Young (eay@cryptsoft.com)" - * The word 'cryptographic' can be left out if the rouines from the library - * being used are not cryptographic related :-). - * 4. If you include any Windows specific code (or a derivative thereof) from - * the apps directory (application code) you must include an acknowledgement: - * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)" - * - * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * The licence and distribution terms for any publically available version or - * derivative of this code cannot be changed. i.e. this code cannot simply be - * copied and put under another distribution licence - * [including the GNU Public Licence.] + * Licensed under the OpenSSL license (the "License"). You may not use + * this file except in compliance with the License. You can obtain a copy + * in the file LICENSE in the source distribution or at + * https://www.openssl.org/source/license.html */ -#include <stdio.h> -#include "cryptlib.h" +#include "internal/cryptlib.h" #include "bn_lcl.h" BN_ULONG BN_mod_word(const BIGNUM *a, BN_ULONG w) @@ -105,7 +55,7 @@ BN_ULONG BN_mod_word(const BIGNUM *a, BN_ULONG w) (BN_ULLONG) w); #endif } - return ((BN_ULONG)ret); + return (BN_ULONG)ret; } BN_ULONG BN_div_word(BIGNUM *a, BN_ULONG w) @@ -139,8 +89,10 @@ BN_ULONG BN_div_word(BIGNUM *a, BN_ULONG w) if ((a->top > 0) && (a->d[a->top - 1] == 0)) a->top--; ret >>= j; + if (!a->top) + a->neg = 0; /* don't allow negative zero */ bn_check_top(a); - return (ret); + return ret; } int BN_add_word(BIGNUM *a, BN_ULONG w) @@ -163,7 +115,7 @@ int BN_add_word(BIGNUM *a, BN_ULONG w) i = BN_sub_word(a, w); if (!BN_is_zero(a)) a->neg = !(a->neg); - return (i); + return i; } for (i = 0; w != 0 && i < a->top; i++) { a->d[i] = l = (a->d[i] + w) & BN_MASK2; @@ -176,7 +128,7 @@ int BN_add_word(BIGNUM *a, BN_ULONG w) a->d[i] = w; } bn_check_top(a); - return (1); + return 1; } int BN_sub_word(BIGNUM *a, BN_ULONG w) @@ -201,13 +153,13 @@ int BN_sub_word(BIGNUM *a, BN_ULONG w) a->neg = 0; i = BN_add_word(a, w); a->neg = 1; - return (i); + return i; } if ((a->top == 1) && (a->d[0] < w)) { a->d[0] = w - a->d[0]; a->neg = 1; - return (1); + return 1; } i = 0; for (;;) { @@ -223,7 +175,7 @@ int BN_sub_word(BIGNUM *a, BN_ULONG w) if ((a->d[i] == 0) && (i == (a->top - 1))) a->top--; bn_check_top(a); - return (1); + return 1; } int BN_mul_word(BIGNUM *a, BN_ULONG w) @@ -239,11 +191,11 @@ int BN_mul_word(BIGNUM *a, BN_ULONG w) ll = bn_mul_words(a->d, a->d, a->top, w); if (ll) { if (bn_wexpand(a, a->top + 1) == NULL) - return (0); + return 0; a->d[a->top++] = ll; } } } bn_check_top(a); - return (1); + return 1; } diff --git a/crypto/bn/bn_x931p.c b/crypto/bn/bn_x931p.c index f444af3feabd..9eb8384fdeb2 100644 --- a/crypto/bn/bn_x931p.c +++ b/crypto/bn/bn_x931p.c @@ -1,64 +1,15 @@ -/* bn_x931p.c */ /* - * Written by Dr Stephen N Henson (steve@openssl.org) for the OpenSSL project - * 2005. - */ -/* ==================================================================== - * Copyright (c) 2005 The OpenSSL Project. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * - * 3. All advertising materials mentioning features or use of this - * software must display the following acknowledgment: - * "This product includes software developed by the OpenSSL Project - * for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)" - * - * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to - * endorse or promote products derived from this software without - * prior written permission. For written permission, please contact - * licensing@OpenSSL.org. - * - * 5. Products derived from this software may not be called "OpenSSL" - * nor may "OpenSSL" appear in their names without prior written - * permission of the OpenSSL Project. - * - * 6. Redistributions of any form whatsoever must retain the following - * acknowledgment: - * "This product includes software developed by the OpenSSL Project - * for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)" - * - * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY - * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR - * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, - * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED - * OF THE POSSIBILITY OF SUCH DAMAGE. - * ==================================================================== - * - * This product includes cryptographic software written by Eric Young - * (eay@cryptsoft.com). This product includes software written by Tim - * Hudson (tjh@cryptsoft.com). + * Copyright 2011-2018 The OpenSSL Project Authors. All Rights Reserved. * + * Licensed under the OpenSSL license (the "License"). You may not use + * this file except in compliance with the License. You can obtain a copy + * in the file LICENSE in the source distribution or at + * https://www.openssl.org/source/license.html */ #include <stdio.h> #include <openssl/bn.h> +#include "bn_lcl.h" /* X9.31 routines for prime derivation */ @@ -70,7 +21,7 @@ static int bn_x931_derive_pi(BIGNUM *pi, const BIGNUM *Xpi, BN_CTX *ctx, BN_GENCB *cb) { - int i = 0; + int i = 0, is_prime; if (!BN_copy(pi, Xpi)) return 0; if (!BN_is_odd(pi) && !BN_add_word(pi, 1)) @@ -78,8 +29,11 @@ static int bn_x931_derive_pi(BIGNUM *pi, const BIGNUM *Xpi, BN_CTX *ctx, for (;;) { i++; BN_GENCB_call(cb, 0, i); - /* NB 27 MR is specificed in X9.31 */ - if (BN_is_prime_fasttest_ex(pi, 27, ctx, 1, cb)) + /* NB 27 MR is specified in X9.31 */ + is_prime = BN_is_prime_fasttest_ex(pi, 27, ctx, 1, cb); + if (is_prime < 0) + return 0; + if (is_prime) break; if (!BN_add_word(pi, 2)) return 0; @@ -108,10 +62,10 @@ int BN_X931_derive_prime_ex(BIGNUM *p, BIGNUM *p1, BIGNUM *p2, return 0; BN_CTX_start(ctx); - if (!p1) + if (p1 == NULL) p1 = BN_CTX_get(ctx); - if (!p2) + if (p2 == NULL) p2 = BN_CTX_get(ctx); t = BN_CTX_get(ctx); @@ -120,6 +74,9 @@ int BN_X931_derive_prime_ex(BIGNUM *p, BIGNUM *p1, BIGNUM *p2, pm1 = BN_CTX_get(ctx); + if (pm1 == NULL) + goto err; + if (!bn_x931_derive_pi(p1, Xp1, ctx, cb)) goto err; @@ -168,14 +125,18 @@ int BN_X931_derive_prime_ex(BIGNUM *p, BIGNUM *p1, BIGNUM *p2, goto err; if (!BN_gcd(t, pm1, e, ctx)) goto err; - if (BN_is_one(t) + if (BN_is_one(t)) { /* * X9.31 specifies 8 MR and 1 Lucas test or any prime test * offering similar or better guarantees 50 MR is considerably * better. */ - && BN_is_prime_fasttest_ex(p, 50, ctx, 1, cb)) - break; + int r = BN_is_prime_fasttest_ex(p, 50, ctx, 1, cb); + if (r < 0) + goto err; + if (r) + break; + } if (!BN_add(p, p, p1p2)) goto err; } @@ -192,8 +153,8 @@ int BN_X931_derive_prime_ex(BIGNUM *p, BIGNUM *p1, BIGNUM *p2, } /* - * Generate pair of paramters Xp, Xq for X9.31 prime generation. Note: nbits - * paramter is sum of number of bits in both. + * Generate pair of parameters Xp, Xq for X9.31 prime generation. Note: nbits + * parameter is sum of number of bits in both. */ int BN_X931_generate_Xpq(BIGNUM *Xp, BIGNUM *Xq, int nbits, BN_CTX *ctx) @@ -212,7 +173,7 @@ int BN_X931_generate_Xpq(BIGNUM *Xp, BIGNUM *Xq, int nbits, BN_CTX *ctx) * - 1. By setting the top two bits we ensure that the lower bound is * exceeded. */ - if (!BN_rand(Xp, nbits, 1, 0)) + if (!BN_priv_rand(Xp, nbits, BN_RAND_TOP_TWO, BN_RAND_BOTTOM_ANY)) goto err; BN_CTX_start(ctx); @@ -221,10 +182,12 @@ int BN_X931_generate_Xpq(BIGNUM *Xp, BIGNUM *Xq, int nbits, BN_CTX *ctx) goto err; for (i = 0; i < 1000; i++) { - if (!BN_rand(Xq, nbits, 1, 0)) + if (!BN_priv_rand(Xq, nbits, BN_RAND_TOP_TWO, BN_RAND_BOTTOM_ANY)) goto err; + /* Check that |Xp - Xq| > 2^(nbits - 100) */ - BN_sub(t, Xp, Xq); + if (!BN_sub(t, Xp, Xq)) + goto err; if (BN_num_bits(t) > (nbits - 100)) break; } @@ -264,9 +227,9 @@ int BN_X931_generate_prime_ex(BIGNUM *p, BIGNUM *p1, BIGNUM *p2, if (Xp1 == NULL || Xp2 == NULL) goto error; - if (!BN_rand(Xp1, 101, 0, 0)) + if (!BN_priv_rand(Xp1, 101, BN_RAND_TOP_ONE, BN_RAND_BOTTOM_ANY)) goto error; - if (!BN_rand(Xp2, 101, 0, 0)) + if (!BN_priv_rand(Xp2, 101, BN_RAND_TOP_ONE, BN_RAND_BOTTOM_ANY)) goto error; if (!BN_X931_derive_prime_ex(p, p1, p2, Xp, Xp1, Xp2, e, ctx, cb)) goto error; diff --git a/crypto/bn/bnspeed.c b/crypto/bn/bnspeed.c deleted file mode 100644 index e387fdfbc2f0..000000000000 --- a/crypto/bn/bnspeed.c +++ /dev/null @@ -1,232 +0,0 @@ -/* unused */ - -/* crypto/bn/bnspeed.c */ -/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) - * All rights reserved. - * - * This package is an SSL implementation written - * by Eric Young (eay@cryptsoft.com). - * The implementation was written so as to conform with Netscapes SSL. - * - * This library is free for commercial and non-commercial use as long as - * the following conditions are aheared to. The following conditions - * apply to all code found in this distribution, be it the RC4, RSA, - * lhash, DES, etc., code; not just the SSL code. The SSL documentation - * included with this distribution is covered by the same copyright terms - * except that the holder is Tim Hudson (tjh@cryptsoft.com). - * - * Copyright remains Eric Young's, and as such any Copyright notices in - * the code are not to be removed. - * If this package is used in a product, Eric Young should be given attribution - * as the author of the parts of the library used. - * This can be in the form of a textual message at program startup or - * in documentation (online or textual) provided with the package. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * "This product includes cryptographic software written by - * Eric Young (eay@cryptsoft.com)" - * The word 'cryptographic' can be left out if the rouines from the library - * being used are not cryptographic related :-). - * 4. If you include any Windows specific code (or a derivative thereof) from - * the apps directory (application code) you must include an acknowledgement: - * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)" - * - * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * The licence and distribution terms for any publically available version or - * derivative of this code cannot be changed. i.e. this code cannot simply be - * copied and put under another distribution licence - * [including the GNU Public Licence.] - */ - -/* most of this code has been pilfered from my libdes speed.c program */ - -#define BASENUM 1000000 -#undef PROG -#define PROG bnspeed_main - -#include <stdio.h> -#include <stdlib.h> -#include <signal.h> -#include <string.h> -#include <openssl/crypto.h> -#include <openssl/err.h> - -#if !defined(OPENSSL_SYS_MSDOS) && (!defined(OPENSSL_SYS_VMS) || defined(__DECC)) && !defined(OPENSSL_SYS_MACOSX) -# define TIMES -#endif - -#ifndef _IRIX -# include <time.h> -#endif -#ifdef TIMES -# include <sys/types.h> -# include <sys/times.h> -#endif - -/* - * Depending on the VMS version, the tms structure is perhaps defined. The - * __TMS macro will show if it was. If it wasn't defined, we should undefine - * TIMES, since that tells the rest of the program how things should be - * handled. -- Richard Levitte - */ -#if defined(OPENSSL_SYS_VMS_DECC) && !defined(__TMS) -# undef TIMES -#endif - -#ifndef TIMES -# include <sys/timeb.h> -#endif - -#if defined(sun) || defined(__ultrix) -# define _POSIX_SOURCE -# include <limits.h> -# include <sys/param.h> -#endif - -#include <openssl/bn.h> -#include <openssl/x509.h> - -/* The following if from times(3) man page. It may need to be changed */ -#ifndef HZ -# ifndef CLK_TCK -# ifndef _BSD_CLK_TCK_ /* FreeBSD hack */ -# define HZ 100.0 -# else /* _BSD_CLK_TCK_ */ -# define HZ ((double)_BSD_CLK_TCK_) -# endif -# else /* CLK_TCK */ -# define HZ ((double)CLK_TCK) -# endif -#endif - -#undef BUFSIZE -#define BUFSIZE ((long)1024*8) -int run = 0; - -static double Time_F(int s); -#define START 0 -#define STOP 1 - -static double Time_F(int s) -{ - double ret; -#ifdef TIMES - static struct tms tstart, tend; - - if (s == START) { - times(&tstart); - return (0); - } else { - times(&tend); - ret = ((double)(tend.tms_utime - tstart.tms_utime)) / HZ; - return ((ret < 1e-3) ? 1e-3 : ret); - } -#else /* !times() */ - static struct timeb tstart, tend; - long i; - - if (s == START) { - ftime(&tstart); - return (0); - } else { - ftime(&tend); - i = (long)tend.millitm - (long)tstart.millitm; - ret = ((double)(tend.time - tstart.time)) + ((double)i) / 1000.0; - return ((ret < 0.001) ? 0.001 : ret); - } -#endif -} - -#define NUM_SIZES 5 -static int sizes[NUM_SIZES] = { 128, 256, 512, 1024, 2048 }; - -/* - * static int sizes[NUM_SIZES]={59,179,299,419,539}; - */ - -void do_mul(BIGNUM *r, BIGNUM *a, BIGNUM *b, BN_CTX *ctx); - -int main(int argc, char **argv) -{ - BN_CTX *ctx; - BIGNUM a, b, c; - - ctx = BN_CTX_new(); - BN_init(&a); - BN_init(&b); - BN_init(&c); - - do_mul(&a, &b, &c, ctx); -} - -void do_mul(BIGNUM *r, BIGNUM *a, BIGNUM *b, BN_CTX *ctx) -{ - int i, j, k; - double tm; - long num; - - for (i = 0; i < NUM_SIZES; i++) { - num = BASENUM; - if (i) - num /= (i * 3); - BN_rand(a, sizes[i], 1, 0); - for (j = i; j < NUM_SIZES; j++) { - BN_rand(b, sizes[j], 1, 0); - Time_F(START); - for (k = 0; k < num; k++) - BN_mul(r, b, a, ctx); - tm = Time_F(STOP); - printf("mul %4d x %4d -> %8.3fms\n", sizes[i], sizes[j], - tm * 1000.0 / num); - } - } - - for (i = 0; i < NUM_SIZES; i++) { - num = BASENUM; - if (i) - num /= (i * 3); - BN_rand(a, sizes[i], 1, 0); - Time_F(START); - for (k = 0; k < num; k++) - BN_sqr(r, a, ctx); - tm = Time_F(STOP); - printf("sqr %4d x %4d -> %8.3fms\n", sizes[i], sizes[i], - tm * 1000.0 / num); - } - - for (i = 0; i < NUM_SIZES; i++) { - num = BASENUM / 10; - if (i) - num /= (i * 3); - BN_rand(a, sizes[i] - 1, 1, 0); - for (j = i; j < NUM_SIZES; j++) { - BN_rand(b, sizes[j], 1, 0); - Time_F(START); - for (k = 0; k < 100000; k++) - BN_div(r, NULL, b, a, ctx); - tm = Time_F(STOP); - printf("div %4d / %4d -> %8.3fms\n", sizes[j], sizes[i] - 1, - tm * 1000.0 / num); - } - } -} diff --git a/crypto/bn/bntest.c b/crypto/bn/bntest.c deleted file mode 100644 index abe5dbe0b01a..000000000000 --- a/crypto/bn/bntest.c +++ /dev/null @@ -1,2160 +0,0 @@ -/* crypto/bn/bntest.c */ -/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) - * All rights reserved. - * - * This package is an SSL implementation written - * by Eric Young (eay@cryptsoft.com). - * The implementation was written so as to conform with Netscapes SSL. - * - * This library is free for commercial and non-commercial use as long as - * the following conditions are aheared to. The following conditions - * apply to all code found in this distribution, be it the RC4, RSA, - * lhash, DES, etc., code; not just the SSL code. The SSL documentation - * included with this distribution is covered by the same copyright terms - * except that the holder is Tim Hudson (tjh@cryptsoft.com). - * - * Copyright remains Eric Young's, and as such any Copyright notices in - * the code are not to be removed. - * If this package is used in a product, Eric Young should be given attribution - * as the author of the parts of the library used. - * This can be in the form of a textual message at program startup or - * in documentation (online or textual) provided with the package. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * "This product includes cryptographic software written by - * Eric Young (eay@cryptsoft.com)" - * The word 'cryptographic' can be left out if the rouines from the library - * being used are not cryptographic related :-). - * 4. If you include any Windows specific code (or a derivative thereof) from - * the apps directory (application code) you must include an acknowledgement: - * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)" - * - * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * The licence and distribution terms for any publically available version or - * derivative of this code cannot be changed. i.e. this code cannot simply be - * copied and put under another distribution licence - * [including the GNU Public Licence.] - */ -/* ==================================================================== - * Copyright 2002 Sun Microsystems, Inc. ALL RIGHTS RESERVED. - * - * Portions of the attached software ("Contribution") are developed by - * SUN MICROSYSTEMS, INC., and are contributed to the OpenSSL project. - * - * The Contribution is licensed pursuant to the Eric Young open source - * license provided above. - * - * The binary polynomial arithmetic software is originally written by - * Sheueling Chang Shantz and Douglas Stebila of Sun Microsystems Laboratories. - * - */ - -/* - * Until the key-gen callbacks are modified to use newer prototypes, we allow - * deprecated functions for openssl-internal code - */ -#ifdef OPENSSL_NO_DEPRECATED -# undef OPENSSL_NO_DEPRECATED -#endif - -#include <stdio.h> -#include <stdlib.h> -#include <string.h> - -#include "e_os.h" - -#include <openssl/bio.h> -#include <openssl/bn.h> -#include <openssl/rand.h> -#include <openssl/x509.h> -#include <openssl/err.h> - -const int num0 = 100; /* number of tests */ -const int num1 = 50; /* additional tests for some functions */ -const int num2 = 5; /* number of tests for slow functions */ - -int test_add(BIO *bp); -int test_sub(BIO *bp); -int test_lshift1(BIO *bp); -int test_lshift(BIO *bp, BN_CTX *ctx, BIGNUM *a_); -int test_rshift1(BIO *bp); -int test_rshift(BIO *bp, BN_CTX *ctx); -int test_div(BIO *bp, BN_CTX *ctx); -int test_div_word(BIO *bp); -int test_div_recp(BIO *bp, BN_CTX *ctx); -int test_mul(BIO *bp); -int test_sqr(BIO *bp, BN_CTX *ctx); -int test_mont(BIO *bp, BN_CTX *ctx); -int test_mod(BIO *bp, BN_CTX *ctx); -int test_mod_mul(BIO *bp, BN_CTX *ctx); -int test_mod_exp(BIO *bp, BN_CTX *ctx); -int test_mod_exp_mont_consttime(BIO *bp, BN_CTX *ctx); -int test_mod_exp_mont5(BIO *bp, BN_CTX *ctx); -int test_exp(BIO *bp, BN_CTX *ctx); -int test_gf2m_add(BIO *bp); -int test_gf2m_mod(BIO *bp); -int test_gf2m_mod_mul(BIO *bp, BN_CTX *ctx); -int test_gf2m_mod_sqr(BIO *bp, BN_CTX *ctx); -int test_gf2m_mod_inv(BIO *bp, BN_CTX *ctx); -int test_gf2m_mod_div(BIO *bp, BN_CTX *ctx); -int test_gf2m_mod_exp(BIO *bp, BN_CTX *ctx); -int test_gf2m_mod_sqrt(BIO *bp, BN_CTX *ctx); -int test_gf2m_mod_solve_quad(BIO *bp, BN_CTX *ctx); -int test_kron(BIO *bp, BN_CTX *ctx); -int test_sqrt(BIO *bp, BN_CTX *ctx); -int rand_neg(void); -static int results = 0; - -static unsigned char lst[] = - "\xC6\x4F\x43\x04\x2A\xEA\xCA\x6E\x58\x36\x80\x5B\xE8\xC9" - "\x9B\x04\x5D\x48\x36\xC2\xFD\x16\xC9\x64\xF0"; - -static const char rnd_seed[] = - "string to make the random number generator think it has entropy"; - -static void message(BIO *out, char *m) -{ - fprintf(stderr, "test %s\n", m); - BIO_puts(out, "print \"test "); - BIO_puts(out, m); - BIO_puts(out, "\\n\"\n"); -} - -int main(int argc, char *argv[]) -{ - BN_CTX *ctx; - BIO *out; - char *outfile = NULL; - - results = 0; - - RAND_seed(rnd_seed, sizeof(rnd_seed)); /* or BN_generate_prime may fail */ - - argc--; - argv++; - while (argc >= 1) { - if (strcmp(*argv, "-results") == 0) - results = 1; - else if (strcmp(*argv, "-out") == 0) { - if (--argc < 1) - break; - outfile = *(++argv); - } - argc--; - argv++; - } - - ctx = BN_CTX_new(); - if (ctx == NULL) - EXIT(1); - - out = BIO_new(BIO_s_file()); - if (out == NULL) - EXIT(1); - if (outfile == NULL) { - BIO_set_fp(out, stdout, BIO_NOCLOSE); - } else { - if (!BIO_write_filename(out, outfile)) { - perror(outfile); - EXIT(1); - } - } - - if (!results) - BIO_puts(out, "obase=16\nibase=16\n"); - - message(out, "BN_add"); - if (!test_add(out)) - goto err; - (void)BIO_flush(out); - - message(out, "BN_sub"); - if (!test_sub(out)) - goto err; - (void)BIO_flush(out); - - message(out, "BN_lshift1"); - if (!test_lshift1(out)) - goto err; - (void)BIO_flush(out); - - message(out, "BN_lshift (fixed)"); - if (!test_lshift(out, ctx, BN_bin2bn(lst, sizeof(lst) - 1, NULL))) - goto err; - (void)BIO_flush(out); - - message(out, "BN_lshift"); - if (!test_lshift(out, ctx, NULL)) - goto err; - (void)BIO_flush(out); - - message(out, "BN_rshift1"); - if (!test_rshift1(out)) - goto err; - (void)BIO_flush(out); - - message(out, "BN_rshift"); - if (!test_rshift(out, ctx)) - goto err; - (void)BIO_flush(out); - - message(out, "BN_sqr"); - if (!test_sqr(out, ctx)) - goto err; - (void)BIO_flush(out); - - message(out, "BN_mul"); - if (!test_mul(out)) - goto err; - (void)BIO_flush(out); - - message(out, "BN_div"); - if (!test_div(out, ctx)) - goto err; - (void)BIO_flush(out); - - message(out, "BN_div_word"); - if (!test_div_word(out)) - goto err; - (void)BIO_flush(out); - - message(out, "BN_div_recp"); - if (!test_div_recp(out, ctx)) - goto err; - (void)BIO_flush(out); - - message(out, "BN_mod"); - if (!test_mod(out, ctx)) - goto err; - (void)BIO_flush(out); - - message(out, "BN_mod_mul"); - if (!test_mod_mul(out, ctx)) - goto err; - (void)BIO_flush(out); - - message(out, "BN_mont"); - if (!test_mont(out, ctx)) - goto err; - (void)BIO_flush(out); - - message(out, "BN_mod_exp"); - if (!test_mod_exp(out, ctx)) - goto err; - (void)BIO_flush(out); - - message(out, "BN_mod_exp_mont_consttime"); - if (!test_mod_exp_mont_consttime(out, ctx)) - goto err; - if (!test_mod_exp_mont5(out, ctx)) - goto err; - (void)BIO_flush(out); - - message(out, "BN_exp"); - if (!test_exp(out, ctx)) - goto err; - (void)BIO_flush(out); - - message(out, "BN_kronecker"); - if (!test_kron(out, ctx)) - goto err; - (void)BIO_flush(out); - - message(out, "BN_mod_sqrt"); - if (!test_sqrt(out, ctx)) - goto err; - (void)BIO_flush(out); -#ifndef OPENSSL_NO_EC2M - message(out, "BN_GF2m_add"); - if (!test_gf2m_add(out)) - goto err; - (void)BIO_flush(out); - - message(out, "BN_GF2m_mod"); - if (!test_gf2m_mod(out)) - goto err; - (void)BIO_flush(out); - - message(out, "BN_GF2m_mod_mul"); - if (!test_gf2m_mod_mul(out, ctx)) - goto err; - (void)BIO_flush(out); - - message(out, "BN_GF2m_mod_sqr"); - if (!test_gf2m_mod_sqr(out, ctx)) - goto err; - (void)BIO_flush(out); - - message(out, "BN_GF2m_mod_inv"); - if (!test_gf2m_mod_inv(out, ctx)) - goto err; - (void)BIO_flush(out); - - message(out, "BN_GF2m_mod_div"); - if (!test_gf2m_mod_div(out, ctx)) - goto err; - (void)BIO_flush(out); - - message(out, "BN_GF2m_mod_exp"); - if (!test_gf2m_mod_exp(out, ctx)) - goto err; - (void)BIO_flush(out); - - message(out, "BN_GF2m_mod_sqrt"); - if (!test_gf2m_mod_sqrt(out, ctx)) - goto err; - (void)BIO_flush(out); - - message(out, "BN_GF2m_mod_solve_quad"); - if (!test_gf2m_mod_solve_quad(out, ctx)) - goto err; - (void)BIO_flush(out); -#endif - BN_CTX_free(ctx); - BIO_free(out); - - EXIT(0); - err: - BIO_puts(out, "1\n"); /* make sure the Perl script fed by bc - * notices the failure, see test_bn in - * test/Makefile.ssl */ - (void)BIO_flush(out); - ERR_load_crypto_strings(); - ERR_print_errors_fp(stderr); - EXIT(1); - return (1); -} - -int test_add(BIO *bp) -{ - BIGNUM a, b, c; - int i; - - BN_init(&a); - BN_init(&b); - BN_init(&c); - - BN_bntest_rand(&a, 512, 0, 0); - for (i = 0; i < num0; i++) { - BN_bntest_rand(&b, 450 + i, 0, 0); - a.neg = rand_neg(); - b.neg = rand_neg(); - BN_add(&c, &a, &b); - if (bp != NULL) { - if (!results) { - BN_print(bp, &a); - BIO_puts(bp, " + "); - BN_print(bp, &b); - BIO_puts(bp, " - "); - } - BN_print(bp, &c); - BIO_puts(bp, "\n"); - } - a.neg = !a.neg; - b.neg = !b.neg; - BN_add(&c, &c, &b); - BN_add(&c, &c, &a); - if (!BN_is_zero(&c)) { - fprintf(stderr, "Add test failed!\n"); - return 0; - } - } - BN_free(&a); - BN_free(&b); - BN_free(&c); - return (1); -} - -int test_sub(BIO *bp) -{ - BIGNUM a, b, c; - int i; - - BN_init(&a); - BN_init(&b); - BN_init(&c); - - for (i = 0; i < num0 + num1; i++) { - if (i < num1) { - BN_bntest_rand(&a, 512, 0, 0); - BN_copy(&b, &a); - if (BN_set_bit(&a, i) == 0) - return (0); - BN_add_word(&b, i); - } else { - BN_bntest_rand(&b, 400 + i - num1, 0, 0); - a.neg = rand_neg(); - b.neg = rand_neg(); - } - BN_sub(&c, &a, &b); - if (bp != NULL) { - if (!results) { - BN_print(bp, &a); - BIO_puts(bp, " - "); - BN_print(bp, &b); - BIO_puts(bp, " - "); - } - BN_print(bp, &c); - BIO_puts(bp, "\n"); - } - BN_add(&c, &c, &b); - BN_sub(&c, &c, &a); - if (!BN_is_zero(&c)) { - fprintf(stderr, "Subtract test failed!\n"); - return 0; - } - } - BN_free(&a); - BN_free(&b); - BN_free(&c); - return (1); -} - -int test_div(BIO *bp, BN_CTX *ctx) -{ - BIGNUM a, b, c, d, e; - int i; - - BN_init(&a); - BN_init(&b); - BN_init(&c); - BN_init(&d); - BN_init(&e); - - BN_one(&a); - BN_zero(&b); - - if (BN_div(&d, &c, &a, &b, ctx)) { - fprintf(stderr, "Division by zero succeeded!\n"); - return 0; - } - - for (i = 0; i < num0 + num1; i++) { - if (i < num1) { - BN_bntest_rand(&a, 400, 0, 0); - BN_copy(&b, &a); - BN_lshift(&a, &a, i); - BN_add_word(&a, i); - } else - BN_bntest_rand(&b, 50 + 3 * (i - num1), 0, 0); - a.neg = rand_neg(); - b.neg = rand_neg(); - BN_div(&d, &c, &a, &b, ctx); - if (bp != NULL) { - if (!results) { - BN_print(bp, &a); - BIO_puts(bp, " / "); - BN_print(bp, &b); - BIO_puts(bp, " - "); - } - BN_print(bp, &d); - BIO_puts(bp, "\n"); - - if (!results) { - BN_print(bp, &a); - BIO_puts(bp, " % "); - BN_print(bp, &b); - BIO_puts(bp, " - "); - } - BN_print(bp, &c); - BIO_puts(bp, "\n"); - } - BN_mul(&e, &d, &b, ctx); - BN_add(&d, &e, &c); - BN_sub(&d, &d, &a); - if (!BN_is_zero(&d)) { - fprintf(stderr, "Division test failed!\n"); - return 0; - } - } - BN_free(&a); - BN_free(&b); - BN_free(&c); - BN_free(&d); - BN_free(&e); - return (1); -} - -static void print_word(BIO *bp, BN_ULONG w) -{ -#ifdef SIXTY_FOUR_BIT - if (sizeof(w) > sizeof(unsigned long)) { - unsigned long h = (unsigned long)(w >> 32), l = (unsigned long)(w); - - if (h) - BIO_printf(bp, "%lX%08lX", h, l); - else - BIO_printf(bp, "%lX", l); - return; - } -#endif - BIO_printf(bp, BN_HEX_FMT1, w); -} - -int test_div_word(BIO *bp) -{ - BIGNUM a, b; - BN_ULONG r, rmod, s; - int i; - - BN_init(&a); - BN_init(&b); - - for (i = 0; i < num0; i++) { - do { - BN_bntest_rand(&a, 512, -1, 0); - BN_bntest_rand(&b, BN_BITS2, -1, 0); - } while (BN_is_zero(&b)); - - s = b.d[0]; - BN_copy(&b, &a); - rmod = BN_mod_word(&b, s); - r = BN_div_word(&b, s); - - if (rmod != r) { - fprintf(stderr, "Mod (word) test failed!\n"); - return 0; - } - - if (bp != NULL) { - if (!results) { - BN_print(bp, &a); - BIO_puts(bp, " / "); - print_word(bp, s); - BIO_puts(bp, " - "); - } - BN_print(bp, &b); - BIO_puts(bp, "\n"); - - if (!results) { - BN_print(bp, &a); - BIO_puts(bp, " % "); - print_word(bp, s); - BIO_puts(bp, " - "); - } - print_word(bp, r); - BIO_puts(bp, "\n"); - } - BN_mul_word(&b, s); - BN_add_word(&b, r); - BN_sub(&b, &a, &b); - if (!BN_is_zero(&b)) { - fprintf(stderr, "Division (word) test failed!\n"); - return 0; - } - } - BN_free(&a); - BN_free(&b); - return (1); -} - -int test_div_recp(BIO *bp, BN_CTX *ctx) -{ - BIGNUM a, b, c, d, e; - BN_RECP_CTX recp; - int i; - - BN_RECP_CTX_init(&recp); - BN_init(&a); - BN_init(&b); - BN_init(&c); - BN_init(&d); - BN_init(&e); - - for (i = 0; i < num0 + num1; i++) { - if (i < num1) { - BN_bntest_rand(&a, 400, 0, 0); - BN_copy(&b, &a); - BN_lshift(&a, &a, i); - BN_add_word(&a, i); - } else - BN_bntest_rand(&b, 50 + 3 * (i - num1), 0, 0); - a.neg = rand_neg(); - b.neg = rand_neg(); - BN_RECP_CTX_set(&recp, &b, ctx); - BN_div_recp(&d, &c, &a, &recp, ctx); - if (bp != NULL) { - if (!results) { - BN_print(bp, &a); - BIO_puts(bp, " / "); - BN_print(bp, &b); - BIO_puts(bp, " - "); - } - BN_print(bp, &d); - BIO_puts(bp, "\n"); - - if (!results) { - BN_print(bp, &a); - BIO_puts(bp, " % "); - BN_print(bp, &b); - BIO_puts(bp, " - "); - } - BN_print(bp, &c); - BIO_puts(bp, "\n"); - } - BN_mul(&e, &d, &b, ctx); - BN_add(&d, &e, &c); - BN_sub(&d, &d, &a); - if (!BN_is_zero(&d)) { - fprintf(stderr, "Reciprocal division test failed!\n"); - fprintf(stderr, "a="); - BN_print_fp(stderr, &a); - fprintf(stderr, "\nb="); - BN_print_fp(stderr, &b); - fprintf(stderr, "\n"); - return 0; - } - } - BN_free(&a); - BN_free(&b); - BN_free(&c); - BN_free(&d); - BN_free(&e); - BN_RECP_CTX_free(&recp); - return (1); -} - -int test_mul(BIO *bp) -{ - BIGNUM a, b, c, d, e; - int i; - BN_CTX *ctx; - - ctx = BN_CTX_new(); - if (ctx == NULL) - EXIT(1); - - BN_init(&a); - BN_init(&b); - BN_init(&c); - BN_init(&d); - BN_init(&e); - - for (i = 0; i < num0 + num1; i++) { - if (i <= num1) { - BN_bntest_rand(&a, 100, 0, 0); - BN_bntest_rand(&b, 100, 0, 0); - } else - BN_bntest_rand(&b, i - num1, 0, 0); - a.neg = rand_neg(); - b.neg = rand_neg(); - BN_mul(&c, &a, &b, ctx); - if (bp != NULL) { - if (!results) { - BN_print(bp, &a); - BIO_puts(bp, " * "); - BN_print(bp, &b); - BIO_puts(bp, " - "); - } - BN_print(bp, &c); - BIO_puts(bp, "\n"); - } - BN_div(&d, &e, &c, &a, ctx); - BN_sub(&d, &d, &b); - if (!BN_is_zero(&d) || !BN_is_zero(&e)) { - fprintf(stderr, "Multiplication test failed!\n"); - return 0; - } - } - BN_free(&a); - BN_free(&b); - BN_free(&c); - BN_free(&d); - BN_free(&e); - BN_CTX_free(ctx); - return (1); -} - -int test_sqr(BIO *bp, BN_CTX *ctx) -{ - BIGNUM *a, *c, *d, *e; - int i, ret = 0; - - a = BN_new(); - c = BN_new(); - d = BN_new(); - e = BN_new(); - if (a == NULL || c == NULL || d == NULL || e == NULL) { - goto err; - } - - for (i = 0; i < num0; i++) { - BN_bntest_rand(a, 40 + i * 10, 0, 0); - a->neg = rand_neg(); - BN_sqr(c, a, ctx); - if (bp != NULL) { - if (!results) { - BN_print(bp, a); - BIO_puts(bp, " * "); - BN_print(bp, a); - BIO_puts(bp, " - "); - } - BN_print(bp, c); - BIO_puts(bp, "\n"); - } - BN_div(d, e, c, a, ctx); - BN_sub(d, d, a); - if (!BN_is_zero(d) || !BN_is_zero(e)) { - fprintf(stderr, "Square test failed!\n"); - goto err; - } - } - - /* Regression test for a BN_sqr overflow bug. */ - BN_hex2bn(&a, - "80000000000000008000000000000001" - "FFFFFFFFFFFFFFFE0000000000000000"); - BN_sqr(c, a, ctx); - if (bp != NULL) { - if (!results) { - BN_print(bp, a); - BIO_puts(bp, " * "); - BN_print(bp, a); - BIO_puts(bp, " - "); - } - BN_print(bp, c); - BIO_puts(bp, "\n"); - } - BN_mul(d, a, a, ctx); - if (BN_cmp(c, d)) { - fprintf(stderr, "Square test failed: BN_sqr and BN_mul produce " - "different results!\n"); - goto err; - } - - /* Regression test for a BN_sqr overflow bug. */ - BN_hex2bn(&a, - "80000000000000000000000080000001" - "FFFFFFFE000000000000000000000000"); - BN_sqr(c, a, ctx); - if (bp != NULL) { - if (!results) { - BN_print(bp, a); - BIO_puts(bp, " * "); - BN_print(bp, a); - BIO_puts(bp, " - "); - } - BN_print(bp, c); - BIO_puts(bp, "\n"); - } - BN_mul(d, a, a, ctx); - if (BN_cmp(c, d)) { - fprintf(stderr, "Square test failed: BN_sqr and BN_mul produce " - "different results!\n"); - goto err; - } - ret = 1; - err: - if (a != NULL) - BN_free(a); - if (c != NULL) - BN_free(c); - if (d != NULL) - BN_free(d); - if (e != NULL) - BN_free(e); - return ret; -} - -int test_mont(BIO *bp, BN_CTX *ctx) -{ - BIGNUM a, b, c, d, A, B; - BIGNUM n; - int i; - BN_MONT_CTX *mont; - - BN_init(&a); - BN_init(&b); - BN_init(&c); - BN_init(&d); - BN_init(&A); - BN_init(&B); - BN_init(&n); - - mont = BN_MONT_CTX_new(); - if (mont == NULL) - return 0; - - BN_zero(&n); - if (BN_MONT_CTX_set(mont, &n, ctx)) { - fprintf(stderr, "BN_MONT_CTX_set succeeded for zero modulus!\n"); - return 0; - } - - BN_set_word(&n, 16); - if (BN_MONT_CTX_set(mont, &n, ctx)) { - fprintf(stderr, "BN_MONT_CTX_set succeeded for even modulus!\n"); - return 0; - } - - BN_bntest_rand(&a, 100, 0, 0); - BN_bntest_rand(&b, 100, 0, 0); - for (i = 0; i < num2; i++) { - int bits = (200 * (i + 1)) / num2; - - if (bits == 0) - continue; - BN_bntest_rand(&n, bits, 0, 1); - BN_MONT_CTX_set(mont, &n, ctx); - - BN_nnmod(&a, &a, &n, ctx); - BN_nnmod(&b, &b, &n, ctx); - - BN_to_montgomery(&A, &a, mont, ctx); - BN_to_montgomery(&B, &b, mont, ctx); - - BN_mod_mul_montgomery(&c, &A, &B, mont, ctx); - BN_from_montgomery(&A, &c, mont, ctx); - if (bp != NULL) { - if (!results) { -#ifdef undef - fprintf(stderr, "%d * %d %% %d\n", - BN_num_bits(&a), - BN_num_bits(&b), BN_num_bits(mont->N)); -#endif - BN_print(bp, &a); - BIO_puts(bp, " * "); - BN_print(bp, &b); - BIO_puts(bp, " % "); - BN_print(bp, &(mont->N)); - BIO_puts(bp, " - "); - } - BN_print(bp, &A); - BIO_puts(bp, "\n"); - } - BN_mod_mul(&d, &a, &b, &n, ctx); - BN_sub(&d, &d, &A); - if (!BN_is_zero(&d)) { - fprintf(stderr, "Montgomery multiplication test failed!\n"); - return 0; - } - } - BN_MONT_CTX_free(mont); - BN_free(&a); - BN_free(&b); - BN_free(&c); - BN_free(&d); - BN_free(&A); - BN_free(&B); - BN_free(&n); - return (1); -} - -int test_mod(BIO *bp, BN_CTX *ctx) -{ - BIGNUM *a, *b, *c, *d, *e; - int i; - - a = BN_new(); - b = BN_new(); - c = BN_new(); - d = BN_new(); - e = BN_new(); - - BN_bntest_rand(a, 1024, 0, 0); - for (i = 0; i < num0; i++) { - BN_bntest_rand(b, 450 + i * 10, 0, 0); - a->neg = rand_neg(); - b->neg = rand_neg(); - BN_mod(c, a, b, ctx); - if (bp != NULL) { - if (!results) { - BN_print(bp, a); - BIO_puts(bp, " % "); - BN_print(bp, b); - BIO_puts(bp, " - "); - } - BN_print(bp, c); - BIO_puts(bp, "\n"); - } - BN_div(d, e, a, b, ctx); - BN_sub(e, e, c); - if (!BN_is_zero(e)) { - fprintf(stderr, "Modulo test failed!\n"); - return 0; - } - } - BN_free(a); - BN_free(b); - BN_free(c); - BN_free(d); - BN_free(e); - return (1); -} - -int test_mod_mul(BIO *bp, BN_CTX *ctx) -{ - BIGNUM *a, *b, *c, *d, *e; - int i, j; - - a = BN_new(); - b = BN_new(); - c = BN_new(); - d = BN_new(); - e = BN_new(); - - BN_one(a); - BN_one(b); - BN_zero(c); - if (BN_mod_mul(e, a, b, c, ctx)) { - fprintf(stderr, "BN_mod_mul with zero modulus succeeded!\n"); - return 0; - } - - for (j = 0; j < 3; j++) { - BN_bntest_rand(c, 1024, 0, 0); - for (i = 0; i < num0; i++) { - BN_bntest_rand(a, 475 + i * 10, 0, 0); - BN_bntest_rand(b, 425 + i * 11, 0, 0); - a->neg = rand_neg(); - b->neg = rand_neg(); - if (!BN_mod_mul(e, a, b, c, ctx)) { - unsigned long l; - - while ((l = ERR_get_error())) - fprintf(stderr, "ERROR:%s\n", ERR_error_string(l, NULL)); - EXIT(1); - } - if (bp != NULL) { - if (!results) { - BN_print(bp, a); - BIO_puts(bp, " * "); - BN_print(bp, b); - BIO_puts(bp, " % "); - BN_print(bp, c); - if ((a->neg ^ b->neg) && !BN_is_zero(e)) { - /* - * If (a*b) % c is negative, c must be added in order - * to obtain the normalized remainder (new with - * OpenSSL 0.9.7, previous versions of BN_mod_mul - * could generate negative results) - */ - BIO_puts(bp, " + "); - BN_print(bp, c); - } - BIO_puts(bp, " - "); - } - BN_print(bp, e); - BIO_puts(bp, "\n"); - } - BN_mul(d, a, b, ctx); - BN_sub(d, d, e); - BN_div(a, b, d, c, ctx); - if (!BN_is_zero(b)) { - fprintf(stderr, "Modulo multiply test failed!\n"); - ERR_print_errors_fp(stderr); - return 0; - } - } - } - BN_free(a); - BN_free(b); - BN_free(c); - BN_free(d); - BN_free(e); - return (1); -} - -int test_mod_exp(BIO *bp, BN_CTX *ctx) -{ - BIGNUM *a, *b, *c, *d, *e; - int i; - - a = BN_new(); - b = BN_new(); - c = BN_new(); - d = BN_new(); - e = BN_new(); - - BN_one(a); - BN_one(b); - BN_zero(c); - if (BN_mod_exp(d, a, b, c, ctx)) { - fprintf(stderr, "BN_mod_exp with zero modulus succeeded!\n"); - return 0; - } - - BN_bntest_rand(c, 30, 0, 1); /* must be odd for montgomery */ - for (i = 0; i < num2; i++) { - BN_bntest_rand(a, 20 + i * 5, 0, 0); - BN_bntest_rand(b, 2 + i, 0, 0); - - if (!BN_mod_exp(d, a, b, c, ctx)) - return (0); - - if (bp != NULL) { - if (!results) { - BN_print(bp, a); - BIO_puts(bp, " ^ "); - BN_print(bp, b); - BIO_puts(bp, " % "); - BN_print(bp, c); - BIO_puts(bp, " - "); - } - BN_print(bp, d); - BIO_puts(bp, "\n"); - } - BN_exp(e, a, b, ctx); - BN_sub(e, e, d); - BN_div(a, b, e, c, ctx); - if (!BN_is_zero(b)) { - fprintf(stderr, "Modulo exponentiation test failed!\n"); - return 0; - } - } - - /* Regression test for carry propagation bug in sqr8x_reduction */ - BN_hex2bn(&a, "050505050505"); - BN_hex2bn(&b, "02"); - BN_hex2bn(&c, - "4141414141414141414141274141414141414141414141414141414141414141" - "4141414141414141414141414141414141414141414141414141414141414141" - "4141414141414141414141800000000000000000000000000000000000000000" - "0000000000000000000000000000000000000000000000000000000000000000" - "0000000000000000000000000000000000000000000000000000000000000000" - "0000000000000000000000000000000000000000000000000000000001"); - BN_mod_exp(d, a, b, c, ctx); - BN_mul(e, a, a, ctx); - if (BN_cmp(d, e)) { - fprintf(stderr, "BN_mod_exp and BN_mul produce different results!\n"); - return 0; - } - - BN_free(a); - BN_free(b); - BN_free(c); - BN_free(d); - BN_free(e); - return (1); -} - -int test_mod_exp_mont_consttime(BIO *bp, BN_CTX *ctx) -{ - BIGNUM *a, *b, *c, *d, *e; - int i; - - a = BN_new(); - b = BN_new(); - c = BN_new(); - d = BN_new(); - e = BN_new(); - - BN_one(a); - BN_one(b); - BN_zero(c); - if (BN_mod_exp_mont_consttime(d, a, b, c, ctx, NULL)) { - fprintf(stderr, "BN_mod_exp_mont_consttime with zero modulus " - "succeeded\n"); - return 0; - } - - BN_set_word(c, 16); - if (BN_mod_exp_mont_consttime(d, a, b, c, ctx, NULL)) { - fprintf(stderr, "BN_mod_exp_mont_consttime with even modulus " - "succeeded\n"); - return 0; - } - - BN_bntest_rand(c, 30, 0, 1); /* must be odd for montgomery */ - for (i = 0; i < num2; i++) { - BN_bntest_rand(a, 20 + i * 5, 0, 0); - BN_bntest_rand(b, 2 + i, 0, 0); - - if (!BN_mod_exp_mont_consttime(d, a, b, c, ctx, NULL)) - return (00); - - if (bp != NULL) { - if (!results) { - BN_print(bp, a); - BIO_puts(bp, " ^ "); - BN_print(bp, b); - BIO_puts(bp, " % "); - BN_print(bp, c); - BIO_puts(bp, " - "); - } - BN_print(bp, d); - BIO_puts(bp, "\n"); - } - BN_exp(e, a, b, ctx); - BN_sub(e, e, d); - BN_div(a, b, e, c, ctx); - if (!BN_is_zero(b)) { - fprintf(stderr, "Modulo exponentiation test failed!\n"); - return 0; - } - } - BN_free(a); - BN_free(b); - BN_free(c); - BN_free(d); - BN_free(e); - return (1); -} - -/* - * Test constant-time modular exponentiation with 1024-bit inputs, which on - * x86_64 cause a different code branch to be taken. - */ -int test_mod_exp_mont5(BIO *bp, BN_CTX *ctx) -{ - BIGNUM *a, *p, *m, *d, *e; - BN_MONT_CTX *mont; - - a = BN_new(); - p = BN_new(); - m = BN_new(); - d = BN_new(); - e = BN_new(); - mont = BN_MONT_CTX_new(); - - BN_bntest_rand(m, 1024, 0, 1); /* must be odd for montgomery */ - /* Zero exponent */ - BN_bntest_rand(a, 1024, 0, 0); - BN_zero(p); - if (!BN_mod_exp_mont_consttime(d, a, p, m, ctx, NULL)) - return 0; - if (!BN_is_one(d)) { - fprintf(stderr, "Modular exponentiation test failed!\n"); - return 0; - } - /* Zero input */ - BN_bntest_rand(p, 1024, 0, 0); - BN_zero(a); - if (!BN_mod_exp_mont_consttime(d, a, p, m, ctx, NULL)) - return 0; - if (!BN_is_zero(d)) { - fprintf(stderr, "Modular exponentiation test failed!\n"); - return 0; - } - /* - * Craft an input whose Montgomery representation is 1, i.e., shorter - * than the modulus m, in order to test the const time precomputation - * scattering/gathering. - */ - BN_one(a); - BN_MONT_CTX_set(mont, m, ctx); - if (!BN_from_montgomery(e, a, mont, ctx)) - return 0; - if (!BN_mod_exp_mont_consttime(d, e, p, m, ctx, NULL)) - return 0; - if (!BN_mod_exp_simple(a, e, p, m, ctx)) - return 0; - if (BN_cmp(a, d) != 0) { - fprintf(stderr, "Modular exponentiation test failed!\n"); - return 0; - } - /* Finally, some regular test vectors. */ - BN_bntest_rand(e, 1024, 0, 0); - if (!BN_mod_exp_mont_consttime(d, e, p, m, ctx, NULL)) - return 0; - if (!BN_mod_exp_simple(a, e, p, m, ctx)) - return 0; - if (BN_cmp(a, d) != 0) { - fprintf(stderr, "Modular exponentiation test failed!\n"); - return 0; - } - BN_MONT_CTX_free(mont); - BN_free(a); - BN_free(p); - BN_free(m); - BN_free(d); - BN_free(e); - return (1); -} - -int test_exp(BIO *bp, BN_CTX *ctx) -{ - BIGNUM *a, *b, *d, *e, *one; - int i; - - a = BN_new(); - b = BN_new(); - d = BN_new(); - e = BN_new(); - one = BN_new(); - BN_one(one); - - for (i = 0; i < num2; i++) { - BN_bntest_rand(a, 20 + i * 5, 0, 0); - BN_bntest_rand(b, 2 + i, 0, 0); - - if (BN_exp(d, a, b, ctx) <= 0) - return (0); - - if (bp != NULL) { - if (!results) { - BN_print(bp, a); - BIO_puts(bp, " ^ "); - BN_print(bp, b); - BIO_puts(bp, " - "); - } - BN_print(bp, d); - BIO_puts(bp, "\n"); - } - BN_one(e); - for (; !BN_is_zero(b); BN_sub(b, b, one)) - BN_mul(e, e, a, ctx); - BN_sub(e, e, d); - if (!BN_is_zero(e)) { - fprintf(stderr, "Exponentiation test failed!\n"); - return 0; - } - } - BN_free(a); - BN_free(b); - BN_free(d); - BN_free(e); - BN_free(one); - return (1); -} - -#ifndef OPENSSL_NO_EC2M -int test_gf2m_add(BIO *bp) -{ - BIGNUM a, b, c; - int i, ret = 0; - - BN_init(&a); - BN_init(&b); - BN_init(&c); - - for (i = 0; i < num0; i++) { - BN_rand(&a, 512, 0, 0); - BN_copy(&b, BN_value_one()); - a.neg = rand_neg(); - b.neg = rand_neg(); - BN_GF2m_add(&c, &a, &b); -# if 0 /* make test uses ouput in bc but bc can't - * handle GF(2^m) arithmetic */ - if (bp != NULL) { - if (!results) { - BN_print(bp, &a); - BIO_puts(bp, " ^ "); - BN_print(bp, &b); - BIO_puts(bp, " = "); - } - BN_print(bp, &c); - BIO_puts(bp, "\n"); - } -# endif - /* Test that two added values have the correct parity. */ - if ((BN_is_odd(&a) && BN_is_odd(&c)) - || (!BN_is_odd(&a) && !BN_is_odd(&c))) { - fprintf(stderr, "GF(2^m) addition test (a) failed!\n"); - goto err; - } - BN_GF2m_add(&c, &c, &c); - /* Test that c + c = 0. */ - if (!BN_is_zero(&c)) { - fprintf(stderr, "GF(2^m) addition test (b) failed!\n"); - goto err; - } - } - ret = 1; - err: - BN_free(&a); - BN_free(&b); - BN_free(&c); - return ret; -} - -int test_gf2m_mod(BIO *bp) -{ - BIGNUM *a, *b[2], *c, *d, *e; - int i, j, ret = 0; - int p0[] = { 163, 7, 6, 3, 0, -1 }; - int p1[] = { 193, 15, 0, -1 }; - - a = BN_new(); - b[0] = BN_new(); - b[1] = BN_new(); - c = BN_new(); - d = BN_new(); - e = BN_new(); - - BN_GF2m_arr2poly(p0, b[0]); - BN_GF2m_arr2poly(p1, b[1]); - - for (i = 0; i < num0; i++) { - BN_bntest_rand(a, 1024, 0, 0); - for (j = 0; j < 2; j++) { - BN_GF2m_mod(c, a, b[j]); -# if 0 /* make test uses ouput in bc but bc can't - * handle GF(2^m) arithmetic */ - if (bp != NULL) { - if (!results) { - BN_print(bp, a); - BIO_puts(bp, " % "); - BN_print(bp, b[j]); - BIO_puts(bp, " - "); - BN_print(bp, c); - BIO_puts(bp, "\n"); - } - } -# endif - BN_GF2m_add(d, a, c); - BN_GF2m_mod(e, d, b[j]); - /* Test that a + (a mod p) mod p == 0. */ - if (!BN_is_zero(e)) { - fprintf(stderr, "GF(2^m) modulo test failed!\n"); - goto err; - } - } - } - ret = 1; - err: - BN_free(a); - BN_free(b[0]); - BN_free(b[1]); - BN_free(c); - BN_free(d); - BN_free(e); - return ret; -} - -int test_gf2m_mod_mul(BIO *bp, BN_CTX *ctx) -{ - BIGNUM *a, *b[2], *c, *d, *e, *f, *g, *h; - int i, j, ret = 0; - int p0[] = { 163, 7, 6, 3, 0, -1 }; - int p1[] = { 193, 15, 0, -1 }; - - a = BN_new(); - b[0] = BN_new(); - b[1] = BN_new(); - c = BN_new(); - d = BN_new(); - e = BN_new(); - f = BN_new(); - g = BN_new(); - h = BN_new(); - - BN_GF2m_arr2poly(p0, b[0]); - BN_GF2m_arr2poly(p1, b[1]); - - for (i = 0; i < num0; i++) { - BN_bntest_rand(a, 1024, 0, 0); - BN_bntest_rand(c, 1024, 0, 0); - BN_bntest_rand(d, 1024, 0, 0); - for (j = 0; j < 2; j++) { - BN_GF2m_mod_mul(e, a, c, b[j], ctx); -# if 0 /* make test uses ouput in bc but bc can't - * handle GF(2^m) arithmetic */ - if (bp != NULL) { - if (!results) { - BN_print(bp, a); - BIO_puts(bp, " * "); - BN_print(bp, c); - BIO_puts(bp, " % "); - BN_print(bp, b[j]); - BIO_puts(bp, " - "); - BN_print(bp, e); - BIO_puts(bp, "\n"); - } - } -# endif - BN_GF2m_add(f, a, d); - BN_GF2m_mod_mul(g, f, c, b[j], ctx); - BN_GF2m_mod_mul(h, d, c, b[j], ctx); - BN_GF2m_add(f, e, g); - BN_GF2m_add(f, f, h); - /* Test that (a+d)*c = a*c + d*c. */ - if (!BN_is_zero(f)) { - fprintf(stderr, - "GF(2^m) modular multiplication test failed!\n"); - goto err; - } - } - } - ret = 1; - err: - BN_free(a); - BN_free(b[0]); - BN_free(b[1]); - BN_free(c); - BN_free(d); - BN_free(e); - BN_free(f); - BN_free(g); - BN_free(h); - return ret; -} - -int test_gf2m_mod_sqr(BIO *bp, BN_CTX *ctx) -{ - BIGNUM *a, *b[2], *c, *d; - int i, j, ret = 0; - int p0[] = { 163, 7, 6, 3, 0, -1 }; - int p1[] = { 193, 15, 0, -1 }; - - a = BN_new(); - b[0] = BN_new(); - b[1] = BN_new(); - c = BN_new(); - d = BN_new(); - - BN_GF2m_arr2poly(p0, b[0]); - BN_GF2m_arr2poly(p1, b[1]); - - for (i = 0; i < num0; i++) { - BN_bntest_rand(a, 1024, 0, 0); - for (j = 0; j < 2; j++) { - BN_GF2m_mod_sqr(c, a, b[j], ctx); - BN_copy(d, a); - BN_GF2m_mod_mul(d, a, d, b[j], ctx); -# if 0 /* make test uses ouput in bc but bc can't - * handle GF(2^m) arithmetic */ - if (bp != NULL) { - if (!results) { - BN_print(bp, a); - BIO_puts(bp, " ^ 2 % "); - BN_print(bp, b[j]); - BIO_puts(bp, " = "); - BN_print(bp, c); - BIO_puts(bp, "; a * a = "); - BN_print(bp, d); - BIO_puts(bp, "\n"); - } - } -# endif - BN_GF2m_add(d, c, d); - /* Test that a*a = a^2. */ - if (!BN_is_zero(d)) { - fprintf(stderr, "GF(2^m) modular squaring test failed!\n"); - goto err; - } - } - } - ret = 1; - err: - BN_free(a); - BN_free(b[0]); - BN_free(b[1]); - BN_free(c); - BN_free(d); - return ret; -} - -int test_gf2m_mod_inv(BIO *bp, BN_CTX *ctx) -{ - BIGNUM *a, *b[2], *c, *d; - int i, j, ret = 0; - int p0[] = { 163, 7, 6, 3, 0, -1 }; - int p1[] = { 193, 15, 0, -1 }; - - a = BN_new(); - b[0] = BN_new(); - b[1] = BN_new(); - c = BN_new(); - d = BN_new(); - - BN_GF2m_arr2poly(p0, b[0]); - BN_GF2m_arr2poly(p1, b[1]); - - for (i = 0; i < num0; i++) { - BN_bntest_rand(a, 512, 0, 0); - for (j = 0; j < 2; j++) { - BN_GF2m_mod_inv(c, a, b[j], ctx); - BN_GF2m_mod_mul(d, a, c, b[j], ctx); -# if 0 /* make test uses ouput in bc but bc can't - * handle GF(2^m) arithmetic */ - if (bp != NULL) { - if (!results) { - BN_print(bp, a); - BIO_puts(bp, " * "); - BN_print(bp, c); - BIO_puts(bp, " - 1 % "); - BN_print(bp, b[j]); - BIO_puts(bp, "\n"); - } - } -# endif - /* Test that ((1/a)*a) = 1. */ - if (!BN_is_one(d)) { - fprintf(stderr, "GF(2^m) modular inversion test failed!\n"); - goto err; - } - } - } - ret = 1; - err: - BN_free(a); - BN_free(b[0]); - BN_free(b[1]); - BN_free(c); - BN_free(d); - return ret; -} - -int test_gf2m_mod_div(BIO *bp, BN_CTX *ctx) -{ - BIGNUM *a, *b[2], *c, *d, *e, *f; - int i, j, ret = 0; - int p0[] = { 163, 7, 6, 3, 0, -1 }; - int p1[] = { 193, 15, 0, -1 }; - - a = BN_new(); - b[0] = BN_new(); - b[1] = BN_new(); - c = BN_new(); - d = BN_new(); - e = BN_new(); - f = BN_new(); - - BN_GF2m_arr2poly(p0, b[0]); - BN_GF2m_arr2poly(p1, b[1]); - - for (i = 0; i < num0; i++) { - BN_bntest_rand(a, 512, 0, 0); - BN_bntest_rand(c, 512, 0, 0); - for (j = 0; j < 2; j++) { - BN_GF2m_mod_div(d, a, c, b[j], ctx); - BN_GF2m_mod_mul(e, d, c, b[j], ctx); - BN_GF2m_mod_div(f, a, e, b[j], ctx); -# if 0 /* make test uses ouput in bc but bc can't - * handle GF(2^m) arithmetic */ - if (bp != NULL) { - if (!results) { - BN_print(bp, a); - BIO_puts(bp, " = "); - BN_print(bp, c); - BIO_puts(bp, " * "); - BN_print(bp, d); - BIO_puts(bp, " % "); - BN_print(bp, b[j]); - BIO_puts(bp, "\n"); - } - } -# endif - /* Test that ((a/c)*c)/a = 1. */ - if (!BN_is_one(f)) { - fprintf(stderr, "GF(2^m) modular division test failed!\n"); - goto err; - } - } - } - ret = 1; - err: - BN_free(a); - BN_free(b[0]); - BN_free(b[1]); - BN_free(c); - BN_free(d); - BN_free(e); - BN_free(f); - return ret; -} - -int test_gf2m_mod_exp(BIO *bp, BN_CTX *ctx) -{ - BIGNUM *a, *b[2], *c, *d, *e, *f; - int i, j, ret = 0; - int p0[] = { 163, 7, 6, 3, 0, -1 }; - int p1[] = { 193, 15, 0, -1 }; - - a = BN_new(); - b[0] = BN_new(); - b[1] = BN_new(); - c = BN_new(); - d = BN_new(); - e = BN_new(); - f = BN_new(); - - BN_GF2m_arr2poly(p0, b[0]); - BN_GF2m_arr2poly(p1, b[1]); - - for (i = 0; i < num0; i++) { - BN_bntest_rand(a, 512, 0, 0); - BN_bntest_rand(c, 512, 0, 0); - BN_bntest_rand(d, 512, 0, 0); - for (j = 0; j < 2; j++) { - BN_GF2m_mod_exp(e, a, c, b[j], ctx); - BN_GF2m_mod_exp(f, a, d, b[j], ctx); - BN_GF2m_mod_mul(e, e, f, b[j], ctx); - BN_add(f, c, d); - BN_GF2m_mod_exp(f, a, f, b[j], ctx); -# if 0 /* make test uses ouput in bc but bc can't - * handle GF(2^m) arithmetic */ - if (bp != NULL) { - if (!results) { - BN_print(bp, a); - BIO_puts(bp, " ^ ("); - BN_print(bp, c); - BIO_puts(bp, " + "); - BN_print(bp, d); - BIO_puts(bp, ") = "); - BN_print(bp, e); - BIO_puts(bp, "; - "); - BN_print(bp, f); - BIO_puts(bp, " % "); - BN_print(bp, b[j]); - BIO_puts(bp, "\n"); - } - } -# endif - BN_GF2m_add(f, e, f); - /* Test that a^(c+d)=a^c*a^d. */ - if (!BN_is_zero(f)) { - fprintf(stderr, - "GF(2^m) modular exponentiation test failed!\n"); - goto err; - } - } - } - ret = 1; - err: - BN_free(a); - BN_free(b[0]); - BN_free(b[1]); - BN_free(c); - BN_free(d); - BN_free(e); - BN_free(f); - return ret; -} - -int test_gf2m_mod_sqrt(BIO *bp, BN_CTX *ctx) -{ - BIGNUM *a, *b[2], *c, *d, *e, *f; - int i, j, ret = 0; - int p0[] = { 163, 7, 6, 3, 0, -1 }; - int p1[] = { 193, 15, 0, -1 }; - - a = BN_new(); - b[0] = BN_new(); - b[1] = BN_new(); - c = BN_new(); - d = BN_new(); - e = BN_new(); - f = BN_new(); - - BN_GF2m_arr2poly(p0, b[0]); - BN_GF2m_arr2poly(p1, b[1]); - - for (i = 0; i < num0; i++) { - BN_bntest_rand(a, 512, 0, 0); - for (j = 0; j < 2; j++) { - BN_GF2m_mod(c, a, b[j]); - BN_GF2m_mod_sqrt(d, a, b[j], ctx); - BN_GF2m_mod_sqr(e, d, b[j], ctx); -# if 0 /* make test uses ouput in bc but bc can't - * handle GF(2^m) arithmetic */ - if (bp != NULL) { - if (!results) { - BN_print(bp, d); - BIO_puts(bp, " ^ 2 - "); - BN_print(bp, a); - BIO_puts(bp, "\n"); - } - } -# endif - BN_GF2m_add(f, c, e); - /* Test that d^2 = a, where d = sqrt(a). */ - if (!BN_is_zero(f)) { - fprintf(stderr, "GF(2^m) modular square root test failed!\n"); - goto err; - } - } - } - ret = 1; - err: - BN_free(a); - BN_free(b[0]); - BN_free(b[1]); - BN_free(c); - BN_free(d); - BN_free(e); - BN_free(f); - return ret; -} - -int test_gf2m_mod_solve_quad(BIO *bp, BN_CTX *ctx) -{ - BIGNUM *a, *b[2], *c, *d, *e; - int i, j, s = 0, t, ret = 0; - int p0[] = { 163, 7, 6, 3, 0, -1 }; - int p1[] = { 193, 15, 0, -1 }; - - a = BN_new(); - b[0] = BN_new(); - b[1] = BN_new(); - c = BN_new(); - d = BN_new(); - e = BN_new(); - - BN_GF2m_arr2poly(p0, b[0]); - BN_GF2m_arr2poly(p1, b[1]); - - for (i = 0; i < num0; i++) { - BN_bntest_rand(a, 512, 0, 0); - for (j = 0; j < 2; j++) { - t = BN_GF2m_mod_solve_quad(c, a, b[j], ctx); - if (t) { - s++; - BN_GF2m_mod_sqr(d, c, b[j], ctx); - BN_GF2m_add(d, c, d); - BN_GF2m_mod(e, a, b[j]); -# if 0 /* make test uses ouput in bc but bc can't - * handle GF(2^m) arithmetic */ - if (bp != NULL) { - if (!results) { - BN_print(bp, c); - BIO_puts(bp, " is root of z^2 + z = "); - BN_print(bp, a); - BIO_puts(bp, " % "); - BN_print(bp, b[j]); - BIO_puts(bp, "\n"); - } - } -# endif - BN_GF2m_add(e, e, d); - /* - * Test that solution of quadratic c satisfies c^2 + c = a. - */ - if (!BN_is_zero(e)) { - fprintf(stderr, - "GF(2^m) modular solve quadratic test failed!\n"); - goto err; - } - - } else { -# if 0 /* make test uses ouput in bc but bc can't - * handle GF(2^m) arithmetic */ - if (bp != NULL) { - if (!results) { - BIO_puts(bp, "There are no roots of z^2 + z = "); - BN_print(bp, a); - BIO_puts(bp, " % "); - BN_print(bp, b[j]); - BIO_puts(bp, "\n"); - } - } -# endif - } - } - } - if (s == 0) { - fprintf(stderr, - "All %i tests of GF(2^m) modular solve quadratic resulted in no roots;\n", - num0); - fprintf(stderr, - "this is very unlikely and probably indicates an error.\n"); - goto err; - } - ret = 1; - err: - BN_free(a); - BN_free(b[0]); - BN_free(b[1]); - BN_free(c); - BN_free(d); - BN_free(e); - return ret; -} -#endif -static int genprime_cb(int p, int n, BN_GENCB *arg) -{ - char c = '*'; - - if (p == 0) - c = '.'; - if (p == 1) - c = '+'; - if (p == 2) - c = '*'; - if (p == 3) - c = '\n'; - putc(c, stderr); - fflush(stderr); - return 1; -} - -int test_kron(BIO *bp, BN_CTX *ctx) -{ - BN_GENCB cb; - BIGNUM *a, *b, *r, *t; - int i; - int legendre, kronecker; - int ret = 0; - - a = BN_new(); - b = BN_new(); - r = BN_new(); - t = BN_new(); - if (a == NULL || b == NULL || r == NULL || t == NULL) - goto err; - - BN_GENCB_set(&cb, genprime_cb, NULL); - - /* - * We test BN_kronecker(a, b, ctx) just for b odd (Jacobi symbol). In - * this case we know that if b is prime, then BN_kronecker(a, b, ctx) is - * congruent to $a^{(b-1)/2}$, modulo $b$ (Legendre symbol). So we - * generate a random prime b and compare these values for a number of - * random a's. (That is, we run the Solovay-Strassen primality test to - * confirm that b is prime, except that we don't want to test whether b - * is prime but whether BN_kronecker works.) - */ - - if (!BN_generate_prime_ex(b, 512, 0, NULL, NULL, &cb)) - goto err; - b->neg = rand_neg(); - putc('\n', stderr); - - for (i = 0; i < num0; i++) { - if (!BN_bntest_rand(a, 512, 0, 0)) - goto err; - a->neg = rand_neg(); - - /* t := (|b|-1)/2 (note that b is odd) */ - if (!BN_copy(t, b)) - goto err; - t->neg = 0; - if (!BN_sub_word(t, 1)) - goto err; - if (!BN_rshift1(t, t)) - goto err; - /* r := a^t mod b */ - b->neg = 0; - - if (!BN_mod_exp_recp(r, a, t, b, ctx)) - goto err; - b->neg = 1; - - if (BN_is_word(r, 1)) - legendre = 1; - else if (BN_is_zero(r)) - legendre = 0; - else { - if (!BN_add_word(r, 1)) - goto err; - if (0 != BN_ucmp(r, b)) { - fprintf(stderr, "Legendre symbol computation failed\n"); - goto err; - } - legendre = -1; - } - - kronecker = BN_kronecker(a, b, ctx); - if (kronecker < -1) - goto err; - /* we actually need BN_kronecker(a, |b|) */ - if (a->neg && b->neg) - kronecker = -kronecker; - - if (legendre != kronecker) { - fprintf(stderr, "legendre != kronecker; a = "); - BN_print_fp(stderr, a); - fprintf(stderr, ", b = "); - BN_print_fp(stderr, b); - fprintf(stderr, "\n"); - goto err; - } - - putc('.', stderr); - fflush(stderr); - } - - putc('\n', stderr); - fflush(stderr); - ret = 1; - err: - if (a != NULL) - BN_free(a); - if (b != NULL) - BN_free(b); - if (r != NULL) - BN_free(r); - if (t != NULL) - BN_free(t); - return ret; -} - -int test_sqrt(BIO *bp, BN_CTX *ctx) -{ - BN_GENCB cb; - BIGNUM *a, *p, *r; - int i, j; - int ret = 0; - - a = BN_new(); - p = BN_new(); - r = BN_new(); - if (a == NULL || p == NULL || r == NULL) - goto err; - - BN_GENCB_set(&cb, genprime_cb, NULL); - - for (i = 0; i < 16; i++) { - if (i < 8) { - unsigned primes[8] = { 2, 3, 5, 7, 11, 13, 17, 19 }; - - if (!BN_set_word(p, primes[i])) - goto err; - } else { - if (!BN_set_word(a, 32)) - goto err; - if (!BN_set_word(r, 2 * i + 1)) - goto err; - - if (!BN_generate_prime_ex(p, 256, 0, a, r, &cb)) - goto err; - putc('\n', stderr); - } - p->neg = rand_neg(); - - for (j = 0; j < num2; j++) { - /* - * construct 'a' such that it is a square modulo p, but in - * general not a proper square and not reduced modulo p - */ - if (!BN_bntest_rand(r, 256, 0, 3)) - goto err; - if (!BN_nnmod(r, r, p, ctx)) - goto err; - if (!BN_mod_sqr(r, r, p, ctx)) - goto err; - if (!BN_bntest_rand(a, 256, 0, 3)) - goto err; - if (!BN_nnmod(a, a, p, ctx)) - goto err; - if (!BN_mod_sqr(a, a, p, ctx)) - goto err; - if (!BN_mul(a, a, r, ctx)) - goto err; - if (rand_neg()) - if (!BN_sub(a, a, p)) - goto err; - - if (!BN_mod_sqrt(r, a, p, ctx)) - goto err; - if (!BN_mod_sqr(r, r, p, ctx)) - goto err; - - if (!BN_nnmod(a, a, p, ctx)) - goto err; - - if (BN_cmp(a, r) != 0) { - fprintf(stderr, "BN_mod_sqrt failed: a = "); - BN_print_fp(stderr, a); - fprintf(stderr, ", r = "); - BN_print_fp(stderr, r); - fprintf(stderr, ", p = "); - BN_print_fp(stderr, p); - fprintf(stderr, "\n"); - goto err; - } - - putc('.', stderr); - fflush(stderr); - } - - putc('\n', stderr); - fflush(stderr); - } - ret = 1; - err: - if (a != NULL) - BN_free(a); - if (p != NULL) - BN_free(p); - if (r != NULL) - BN_free(r); - return ret; -} - -int test_lshift(BIO *bp, BN_CTX *ctx, BIGNUM *a_) -{ - BIGNUM *a, *b, *c, *d; - int i; - - b = BN_new(); - c = BN_new(); - d = BN_new(); - BN_one(c); - - if (a_) - a = a_; - else { - a = BN_new(); - BN_bntest_rand(a, 200, 0, 0); - a->neg = rand_neg(); - } - for (i = 0; i < num0; i++) { - BN_lshift(b, a, i + 1); - BN_add(c, c, c); - if (bp != NULL) { - if (!results) { - BN_print(bp, a); - BIO_puts(bp, " * "); - BN_print(bp, c); - BIO_puts(bp, " - "); - } - BN_print(bp, b); - BIO_puts(bp, "\n"); - } - BN_mul(d, a, c, ctx); - BN_sub(d, d, b); - if (!BN_is_zero(d)) { - fprintf(stderr, "Left shift test failed!\n"); - fprintf(stderr, "a="); - BN_print_fp(stderr, a); - fprintf(stderr, "\nb="); - BN_print_fp(stderr, b); - fprintf(stderr, "\nc="); - BN_print_fp(stderr, c); - fprintf(stderr, "\nd="); - BN_print_fp(stderr, d); - fprintf(stderr, "\n"); - return 0; - } - } - BN_free(a); - BN_free(b); - BN_free(c); - BN_free(d); - return (1); -} - -int test_lshift1(BIO *bp) -{ - BIGNUM *a, *b, *c; - int i; - - a = BN_new(); - b = BN_new(); - c = BN_new(); - - BN_bntest_rand(a, 200, 0, 0); - a->neg = rand_neg(); - for (i = 0; i < num0; i++) { - BN_lshift1(b, a); - if (bp != NULL) { - if (!results) { - BN_print(bp, a); - BIO_puts(bp, " * 2"); - BIO_puts(bp, " - "); - } - BN_print(bp, b); - BIO_puts(bp, "\n"); - } - BN_add(c, a, a); - BN_sub(a, b, c); - if (!BN_is_zero(a)) { - fprintf(stderr, "Left shift one test failed!\n"); - return 0; - } - - BN_copy(a, b); - } - BN_free(a); - BN_free(b); - BN_free(c); - return (1); -} - -int test_rshift(BIO *bp, BN_CTX *ctx) -{ - BIGNUM *a, *b, *c, *d, *e; - int i; - - a = BN_new(); - b = BN_new(); - c = BN_new(); - d = BN_new(); - e = BN_new(); - BN_one(c); - - BN_bntest_rand(a, 200, 0, 0); - a->neg = rand_neg(); - for (i = 0; i < num0; i++) { - BN_rshift(b, a, i + 1); - BN_add(c, c, c); - if (bp != NULL) { - if (!results) { - BN_print(bp, a); - BIO_puts(bp, " / "); - BN_print(bp, c); - BIO_puts(bp, " - "); - } - BN_print(bp, b); - BIO_puts(bp, "\n"); - } - BN_div(d, e, a, c, ctx); - BN_sub(d, d, b); - if (!BN_is_zero(d)) { - fprintf(stderr, "Right shift test failed!\n"); - return 0; - } - } - BN_free(a); - BN_free(b); - BN_free(c); - BN_free(d); - BN_free(e); - return (1); -} - -int test_rshift1(BIO *bp) -{ - BIGNUM *a, *b, *c; - int i; - - a = BN_new(); - b = BN_new(); - c = BN_new(); - - BN_bntest_rand(a, 200, 0, 0); - a->neg = rand_neg(); - for (i = 0; i < num0; i++) { - BN_rshift1(b, a); - if (bp != NULL) { - if (!results) { - BN_print(bp, a); - BIO_puts(bp, " / 2"); - BIO_puts(bp, " - "); - } - BN_print(bp, b); - BIO_puts(bp, "\n"); - } - BN_sub(c, a, b); - BN_sub(c, c, b); - if (!BN_is_zero(c) && !BN_abs_is_word(c, 1)) { - fprintf(stderr, "Right shift one test failed!\n"); - return 0; - } - BN_copy(a, b); - } - BN_free(a); - BN_free(b); - BN_free(c); - return (1); -} - -int rand_neg(void) -{ - static unsigned int neg = 0; - static int sign[8] = { 0, 0, 0, 1, 1, 0, 1, 1 }; - - return (sign[(neg++) % 8]); -} diff --git a/crypto/bn/build.info b/crypto/bn/build.info new file mode 100644 index 000000000000..a463eddabbbb --- /dev/null +++ b/crypto/bn/build.info @@ -0,0 +1,67 @@ +LIBS=../../libcrypto +SOURCE[../../libcrypto]=\ + bn_add.c bn_div.c bn_exp.c bn_lib.c bn_ctx.c bn_mul.c bn_mod.c \ + bn_print.c bn_rand.c bn_shift.c bn_word.c bn_blind.c \ + bn_kron.c bn_sqrt.c bn_gcd.c bn_prime.c bn_err.c bn_sqr.c \ + {- $target{bn_asm_src} -} \ + bn_recp.c bn_mont.c bn_mpi.c bn_exp2.c bn_gf2m.c bn_nist.c \ + bn_depr.c bn_const.c bn_x931p.c bn_intern.c bn_dh.c bn_srp.c +INCLUDE[../../libcrypto]=../../crypto/include + +INCLUDE[bn_exp.o]=.. + +GENERATE[bn-586.s]=asm/bn-586.pl \ + $(PERLASM_SCHEME) $(LIB_CFLAGS) $(LIB_CPPFLAGS) $(PROCESSOR) +DEPEND[bn-586.s]=../perlasm/x86asm.pl +GENERATE[co-586.s]=asm/co-586.pl \ + $(PERLASM_SCHEME) $(LIB_CFLAGS) $(LIB_CPPFLAGS) $(PROCESSOR) +DEPEND[co-586.s]=../perlasm/x86asm.pl +GENERATE[x86-mont.s]=asm/x86-mont.pl \ + $(PERLASM_SCHEME) $(LIB_CFLAGS) $(LIB_CPPFLAGS) $(PROCESSOR) +DEPEND[x86-mont.s]=../perlasm/x86asm.pl +GENERATE[x86-gf2m.s]=asm/x86-gf2m.pl \ + $(PERLASM_SCHEME) $(LIB_CFLAGS) $(LIB_CPPFLAGS) $(PROCESSOR) +DEPEND[x86-gf2m.s]=../perlasm/x86asm.pl + +GENERATE[sparcv9a-mont.S]=asm/sparcv9a-mont.pl $(PERLASM_SCHEME) +INCLUDE[sparcv9a-mont.o]=.. +GENERATE[sparcv9-mont.S]=asm/sparcv9-mont.pl $(PERLASM_SCHEME) +INCLUDE[sparcv9-mont.o]=.. +GENERATE[vis3-mont.S]=asm/vis3-mont.pl $(PERLASM_SCHEME) +INCLUDE[vis3-mont.o]=.. +GENERATE[sparct4-mont.S]=asm/sparct4-mont.pl $(PERLASM_SCHEME) +INCLUDE[sparct4-mont.o]=.. +GENERATE[sparcv9-gf2m.S]=asm/sparcv9-gf2m.pl $(PERLASM_SCHEME) +INCLUDE[sparcv9-gf2m.o]=.. + +GENERATE[bn-mips.S]=asm/mips.pl $(PERLASM_SCHEME) +INCLUDE[bn-mips.o]=.. +GENERATE[mips-mont.S]=asm/mips-mont.pl $(PERLASM_SCHEME) +INCLUDE[mips-mont.o]=.. + +GENERATE[s390x-mont.S]=asm/s390x-mont.pl $(PERLASM_SCHEME) +GENERATE[s390x-gf2m.s]=asm/s390x-gf2m.pl $(PERLASM_SCHEME) + +GENERATE[x86_64-mont.s]=asm/x86_64-mont.pl $(PERLASM_SCHEME) +GENERATE[x86_64-mont5.s]=asm/x86_64-mont5.pl $(PERLASM_SCHEME) +GENERATE[x86_64-gf2m.s]=asm/x86_64-gf2m.pl $(PERLASM_SCHEME) +GENERATE[rsaz-x86_64.s]=asm/rsaz-x86_64.pl $(PERLASM_SCHEME) +GENERATE[rsaz-avx2.s]=asm/rsaz-avx2.pl $(PERLASM_SCHEME) + +GENERATE[bn-ia64.s]=asm/ia64.S +GENERATE[ia64-mont.s]=asm/ia64-mont.pl $(LIB_CFLAGS) $(LIB_CPPFLAGS) + +GENERATE[parisc-mont.s]=asm/parisc-mont.pl $(PERLASM_SCHEME) + +# ppc - AIX, Linux, MacOS X... +GENERATE[bn-ppc.s]=asm/ppc.pl $(PERLASM_SCHEME) +GENERATE[ppc-mont.s]=asm/ppc-mont.pl $(PERLASM_SCHEME) +GENERATE[ppc64-mont.s]=asm/ppc64-mont.pl $(PERLASM_SCHEME) + +GENERATE[alpha-mont.S]=asm/alpha-mont.pl $(PERLASM_SCHEME) + +GENERATE[armv4-mont.S]=asm/armv4-mont.pl $(PERLASM_SCHEME) +INCLUDE[armv4-mont.o]=.. +GENERATE[armv4-gf2m.S]=asm/armv4-gf2m.pl $(PERLASM_SCHEME) +INCLUDE[armv4-gf2m.o]=.. +GENERATE[armv8-mont.S]=asm/armv8-mont.pl $(PERLASM_SCHEME) diff --git a/crypto/bn/divtest.c b/crypto/bn/divtest.c deleted file mode 100644 index 2590b4581b18..000000000000 --- a/crypto/bn/divtest.c +++ /dev/null @@ -1,42 +0,0 @@ -#include <openssl/bn.h> -#include <openssl/rand.h> - -static int Rand(n) -{ - unsigned char x[2]; - RAND_pseudo_bytes(x, 2); - return (x[0] + 2 * x[1]); -} - -static void bug(char *m, BIGNUM *a, BIGNUM *b) -{ - printf("%s!\na=", m); - BN_print_fp(stdout, a); - printf("\nb="); - BN_print_fp(stdout, b); - printf("\n"); - fflush(stdout); -} - -main() -{ - BIGNUM *a = BN_new(), *b = BN_new(), *c = BN_new(), *d = BN_new(), - *C = BN_new(), *D = BN_new(); - BN_RECP_CTX *recp = BN_RECP_CTX_new(); - BN_CTX *ctx = BN_CTX_new(); - - for (;;) { - BN_pseudo_rand(a, Rand(), 0, 0); - BN_pseudo_rand(b, Rand(), 0, 0); - if (BN_is_zero(b)) - continue; - - BN_RECP_CTX_set(recp, b, ctx); - if (BN_div(C, D, a, b, ctx) != 1) - bug("BN_div failed", a, b); - if (BN_div_recp(c, d, a, recp, ctx) != 1) - bug("BN_div_recp failed", a, b); - else if (BN_cmp(c, C) != 0 || BN_cmp(c, C) != 0) - bug("mismatch", a, b); - } -} diff --git a/crypto/bn/exp.c b/crypto/bn/exp.c deleted file mode 100644 index fbce28c5be9e..000000000000 --- a/crypto/bn/exp.c +++ /dev/null @@ -1,61 +0,0 @@ -/* unused */ - -#include <stdio.h> -#include <openssl/tmdiff.h> -#include "bn_lcl.h" - -#define SIZE 256 -#define NUM (8*8*8) -#define MOD (8*8*8*8*8) - -main(argc, argv) -int argc; -char *argv[]; -{ - BN_CTX ctx; - BIGNUM a, b, c, r, rr, t, l; - int j, i, size = SIZE, num = NUM, mod = MOD; - char *start, *end; - BN_MONT_CTX mont; - double d, md; - - BN_MONT_CTX_init(&mont); - BN_CTX_init(&ctx); - BN_init(&a); - BN_init(&b); - BN_init(&c); - BN_init(&r); - - start = ms_time_new(); - end = ms_time_new(); - while (size <= 1024 * 8) { - BN_rand(&a, size, 0, 0); - BN_rand(&b, size, 1, 0); - BN_rand(&c, size, 0, 1); - - BN_mod(&a, &a, &c, &ctx); - - ms_time_get(start); - for (i = 0; i < 10; i++) - BN_MONT_CTX_set(&mont, &c, &ctx); - ms_time_get(end); - md = ms_time_diff(start, end); - - ms_time_get(start); - for (i = 0; i < num; i++) { - /* bn_mull(&r,&a,&b,&ctx); */ - /* BN_sqr(&r,&a,&ctx); */ - BN_mod_exp_mont(&r, &a, &b, &c, &ctx, &mont); - } - ms_time_get(end); - d = ms_time_diff(start, end) /* *50/33 */ ; - printf("%5d bit:%6.2f %6d %6.4f %4d m_set(%5.4f)\n", size, - d, num, d / num, (int)((d / num) * mod), md / 10.0); - num /= 8; - mod /= 8; - if (num <= 0) - num = 1; - size *= 2; - } - -} diff --git a/crypto/bn/expspeed.c b/crypto/bn/expspeed.c deleted file mode 100644 index 8ea980cdd201..000000000000 --- a/crypto/bn/expspeed.c +++ /dev/null @@ -1,381 +0,0 @@ -/* unused */ - -/* crypto/bn/expspeed.c */ -/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) - * All rights reserved. - * - * This package is an SSL implementation written - * by Eric Young (eay@cryptsoft.com). - * The implementation was written so as to conform with Netscapes SSL. - * - * This library is free for commercial and non-commercial use as long as - * the following conditions are aheared to. The following conditions - * apply to all code found in this distribution, be it the RC4, RSA, - * lhash, DES, etc., code; not just the SSL code. The SSL documentation - * included with this distribution is covered by the same copyright terms - * except that the holder is Tim Hudson (tjh@cryptsoft.com). - * - * Copyright remains Eric Young's, and as such any Copyright notices in - * the code are not to be removed. - * If this package is used in a product, Eric Young should be given attribution - * as the author of the parts of the library used. - * This can be in the form of a textual message at program startup or - * in documentation (online or textual) provided with the package. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * "This product includes cryptographic software written by - * Eric Young (eay@cryptsoft.com)" - * The word 'cryptographic' can be left out if the rouines from the library - * being used are not cryptographic related :-). - * 4. If you include any Windows specific code (or a derivative thereof) from - * the apps directory (application code) you must include an acknowledgement: - * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)" - * - * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * The licence and distribution terms for any publically available version or - * derivative of this code cannot be changed. i.e. this code cannot simply be - * copied and put under another distribution licence - * [including the GNU Public Licence.] - */ - -/* most of this code has been pilfered from my libdes speed.c program */ - -#define BASENUM 5000 -#define NUM_START 0 - -/* - * determine timings for modexp, modmul, modsqr, gcd, Kronecker symbol, - * modular inverse, or modular square roots - */ -#define TEST_EXP -#undef TEST_MUL -#undef TEST_SQR -#undef TEST_GCD -#undef TEST_KRON -#undef TEST_INV -#undef TEST_SQRT -#define P_MOD_64 9 /* least significant 6 bits for prime to be - * used for BN_sqrt timings */ - -#if defined(TEST_EXP) + defined(TEST_MUL) + defined(TEST_SQR) + defined(TEST_GCD) + defined(TEST_KRON) + defined(TEST_INV) +defined(TEST_SQRT) != 1 -# error "choose one test" -#endif - -#if defined(TEST_INV) || defined(TEST_SQRT) -# define C_PRIME -static void genprime_cb(int p, int n, void *arg); -#endif - -#undef PROG -#define PROG bnspeed_main - -#include <stdio.h> -#include <stdlib.h> -#include <signal.h> -#include <string.h> -#include <openssl/crypto.h> -#include <openssl/err.h> -#include <openssl/rand.h> - -#if !defined(OPENSSL_SYS_MSDOS) && (!defined(OPENSSL_SYS_VMS) || defined(__DECC)) && !defined(OPENSSL_SYS_MACOSX) -# define TIMES -#endif - -#ifndef _IRIX -# include <time.h> -#endif -#ifdef TIMES -# include <sys/types.h> -# include <sys/times.h> -#endif - -/* - * Depending on the VMS version, the tms structure is perhaps defined. The - * __TMS macro will show if it was. If it wasn't defined, we should undefine - * TIMES, since that tells the rest of the program how things should be - * handled. -- Richard Levitte - */ -#if defined(OPENSSL_SYS_VMS_DECC) && !defined(__TMS) -# undef TIMES -#endif - -#ifndef TIMES -# include <sys/timeb.h> -#endif - -#if defined(sun) || defined(__ultrix) -# define _POSIX_SOURCE -# include <limits.h> -# include <sys/param.h> -#endif - -#include <openssl/bn.h> -#include <openssl/x509.h> - -/* The following if from times(3) man page. It may need to be changed */ -#ifndef HZ -# ifndef CLK_TCK -# ifndef _BSD_CLK_TCK_ /* FreeBSD hack */ -# define HZ 100.0 -# else /* _BSD_CLK_TCK_ */ -# define HZ ((double)_BSD_CLK_TCK_) -# endif -# else /* CLK_TCK */ -# define HZ ((double)CLK_TCK) -# endif -#endif - -#undef BUFSIZE -#define BUFSIZE ((long)1024*8) -int run = 0; - -static double Time_F(int s); -#define START 0 -#define STOP 1 - -static double Time_F(int s) -{ - double ret; -#ifdef TIMES - static struct tms tstart, tend; - - if (s == START) { - times(&tstart); - return (0); - } else { - times(&tend); - ret = ((double)(tend.tms_utime - tstart.tms_utime)) / HZ; - return ((ret < 1e-3) ? 1e-3 : ret); - } -#else /* !times() */ - static struct timeb tstart, tend; - long i; - - if (s == START) { - ftime(&tstart); - return (0); - } else { - ftime(&tend); - i = (long)tend.millitm - (long)tstart.millitm; - ret = ((double)(tend.time - tstart.time)) + ((double)i) / 1000.0; - return ((ret < 0.001) ? 0.001 : ret); - } -#endif -} - -#define NUM_SIZES 7 -#if NUM_START > NUM_SIZES -# error "NUM_START > NUM_SIZES" -#endif -static int sizes[NUM_SIZES] = { 128, 256, 512, 1024, 2048, 4096, 8192 }; - -static int mul_c[NUM_SIZES] = - { 8 * 8 * 8 * 8 * 8 * 8, 8 * 8 * 8 * 8 * 8, 8 * 8 * 8 * 8, 8 * 8 * 8, - 8 * 8, 8, 1 -}; - -/* - * static int sizes[NUM_SIZES]={59,179,299,419,539}; - */ - -#define RAND_SEED(string) { const char str[] = string; RAND_seed(string, sizeof(str)); } - -void do_mul_exp(BIGNUM *r, BIGNUM *a, BIGNUM *b, BIGNUM *c, BN_CTX *ctx); - -int main(int argc, char **argv) -{ - BN_CTX *ctx; - BIGNUM *a, *b, *c, *r; - -#if 1 - if (!CRYPTO_set_mem_debug_functions(0, 0, 0, 0, 0)) - abort(); -#endif - - ctx = BN_CTX_new(); - a = BN_new(); - b = BN_new(); - c = BN_new(); - r = BN_new(); - - while (!RAND_status()) - /* not enough bits */ - RAND_SEED("I demand a manual recount!"); - - do_mul_exp(r, a, b, c, ctx); - return 0; -} - -void do_mul_exp(BIGNUM *r, BIGNUM *a, BIGNUM *b, BIGNUM *c, BN_CTX *ctx) -{ - int i, k; - double tm; - long num; - - num = BASENUM; - for (i = NUM_START; i < NUM_SIZES; i++) { -#ifdef C_PRIME -# ifdef TEST_SQRT - if (!BN_set_word(a, 64)) - goto err; - if (!BN_set_word(b, P_MOD_64)) - goto err; -# define ADD a -# define REM b -# else -# define ADD NULL -# define REM NULL -# endif - if (!BN_generate_prime(c, sizes[i], 0, ADD, REM, genprime_cb, NULL)) - goto err; - putc('\n', stderr); - fflush(stderr); -#endif - - for (k = 0; k < num; k++) { - if (k % 50 == 0) { /* Average over num/50 different choices of - * random numbers. */ - if (!BN_pseudo_rand(a, sizes[i], 1, 0)) - goto err; - - if (!BN_pseudo_rand(b, sizes[i], 1, 0)) - goto err; - -#ifndef C_PRIME - if (!BN_pseudo_rand(c, sizes[i], 1, 1)) - goto err; -#endif - -#ifdef TEST_SQRT - if (!BN_mod_sqr(a, a, c, ctx)) - goto err; - if (!BN_mod_sqr(b, b, c, ctx)) - goto err; -#else - if (!BN_nnmod(a, a, c, ctx)) - goto err; - if (!BN_nnmod(b, b, c, ctx)) - goto err; -#endif - - if (k == 0) - Time_F(START); - } -#if defined(TEST_EXP) - if (!BN_mod_exp(r, a, b, c, ctx)) - goto err; -#elif defined(TEST_MUL) - { - int i = 0; - for (i = 0; i < 50; i++) - if (!BN_mod_mul(r, a, b, c, ctx)) - goto err; - } -#elif defined(TEST_SQR) - { - int i = 0; - for (i = 0; i < 50; i++) { - if (!BN_mod_sqr(r, a, c, ctx)) - goto err; - if (!BN_mod_sqr(r, b, c, ctx)) - goto err; - } - } -#elif defined(TEST_GCD) - if (!BN_gcd(r, a, b, ctx)) - goto err; - if (!BN_gcd(r, b, c, ctx)) - goto err; - if (!BN_gcd(r, c, a, ctx)) - goto err; -#elif defined(TEST_KRON) - if (-2 == BN_kronecker(a, b, ctx)) - goto err; - if (-2 == BN_kronecker(b, c, ctx)) - goto err; - if (-2 == BN_kronecker(c, a, ctx)) - goto err; -#elif defined(TEST_INV) - if (!BN_mod_inverse(r, a, c, ctx)) - goto err; - if (!BN_mod_inverse(r, b, c, ctx)) - goto err; -#else /* TEST_SQRT */ - if (!BN_mod_sqrt(r, a, c, ctx)) - goto err; - if (!BN_mod_sqrt(r, b, c, ctx)) - goto err; -#endif - } - tm = Time_F(STOP); - printf( -#if defined(TEST_EXP) - "modexp %4d ^ %4d %% %4d" -#elif defined(TEST_MUL) - "50*modmul %4d %4d %4d" -#elif defined(TEST_SQR) - "100*modsqr %4d %4d %4d" -#elif defined(TEST_GCD) - "3*gcd %4d %4d %4d" -#elif defined(TEST_KRON) - "3*kronecker %4d %4d %4d" -#elif defined(TEST_INV) - "2*inv %4d %4d mod %4d" -#else /* TEST_SQRT */ - "2*sqrt [prime == %d (mod 64)] %4d %4d mod %4d" -#endif - " -> %8.6fms %5.1f (%ld)\n", -#ifdef TEST_SQRT - P_MOD_64, -#endif - sizes[i], sizes[i], sizes[i], tm * 1000.0 / num, - tm * mul_c[i] / num, num); - num /= 7; - if (num <= 0) - num = 1; - } - return; - - err: - ERR_print_errors_fp(stderr); -} - -#ifdef C_PRIME -static void genprime_cb(int p, int n, void *arg) -{ - char c = '*'; - - if (p == 0) - c = '.'; - if (p == 1) - c = '+'; - if (p == 2) - c = '*'; - if (p == 3) - c = '\n'; - putc(c, stderr); - fflush(stderr); - (void)n; - (void)arg; -} -#endif diff --git a/crypto/bn/exptest.c b/crypto/bn/exptest.c deleted file mode 100644 index 779ee902036c..000000000000 --- a/crypto/bn/exptest.c +++ /dev/null @@ -1,315 +0,0 @@ -/* crypto/bn/exptest.c */ -/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) - * All rights reserved. - * - * This package is an SSL implementation written - * by Eric Young (eay@cryptsoft.com). - * The implementation was written so as to conform with Netscapes SSL. - * - * This library is free for commercial and non-commercial use as long as - * the following conditions are aheared to. The following conditions - * apply to all code found in this distribution, be it the RC4, RSA, - * lhash, DES, etc., code; not just the SSL code. The SSL documentation - * included with this distribution is covered by the same copyright terms - * except that the holder is Tim Hudson (tjh@cryptsoft.com). - * - * Copyright remains Eric Young's, and as such any Copyright notices in - * the code are not to be removed. - * If this package is used in a product, Eric Young should be given attribution - * as the author of the parts of the library used. - * This can be in the form of a textual message at program startup or - * in documentation (online or textual) provided with the package. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * "This product includes cryptographic software written by - * Eric Young (eay@cryptsoft.com)" - * The word 'cryptographic' can be left out if the rouines from the library - * being used are not cryptographic related :-). - * 4. If you include any Windows specific code (or a derivative thereof) from - * the apps directory (application code) you must include an acknowledgement: - * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)" - * - * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * The licence and distribution terms for any publically available version or - * derivative of this code cannot be changed. i.e. this code cannot simply be - * copied and put under another distribution licence - * [including the GNU Public Licence.] - */ - -#include <stdio.h> -#include <stdlib.h> -#include <string.h> - -#include "../e_os.h" - -#include <openssl/bio.h> -#include <openssl/bn.h> -#include <openssl/rand.h> -#include <openssl/err.h> - -#define NUM_BITS (BN_BITS*2) - -static const char rnd_seed[] = - "string to make the random number generator think it has entropy"; - -/* - * Test that r == 0 in test_exp_mod_zero(). Returns one on success, - * returns zero and prints debug output otherwise. - */ -static int a_is_zero_mod_one(const char *method, const BIGNUM *r, - const BIGNUM *a) { - if (!BN_is_zero(r)) { - fprintf(stderr, "%s failed:\n", method); - fprintf(stderr, "a ** 0 mod 1 = r (should be 0)\n"); - fprintf(stderr, "a = "); - BN_print_fp(stderr, a); - fprintf(stderr, "\nr = "); - BN_print_fp(stderr, r); - fprintf(stderr, "\n"); - return 0; - } - return 1; -} - -/* - * test_exp_mod_zero tests that x**0 mod 1 == 0. It returns zero on success. - */ -static int test_exp_mod_zero() -{ - BIGNUM a, p, m; - BIGNUM r; - BN_ULONG one_word = 1; - BN_CTX *ctx = BN_CTX_new(); - int ret = 1, failed = 0; - - BN_init(&m); - BN_one(&m); - - BN_init(&a); - BN_one(&a); - - BN_init(&p); - BN_zero(&p); - - BN_init(&r); - - if (!BN_rand(&a, 1024, 0, 0)) - goto err; - - if (!BN_mod_exp(&r, &a, &p, &m, ctx)) - goto err; - - if (!a_is_zero_mod_one("BN_mod_exp", &r, &a)) - failed = 1; - - if (!BN_mod_exp_recp(&r, &a, &p, &m, ctx)) - goto err; - - if (!a_is_zero_mod_one("BN_mod_exp_recp", &r, &a)) - failed = 1; - - if (!BN_mod_exp_simple(&r, &a, &p, &m, ctx)) - goto err; - - if (!a_is_zero_mod_one("BN_mod_exp_simple", &r, &a)) - failed = 1; - - if (!BN_mod_exp_mont(&r, &a, &p, &m, ctx, NULL)) - goto err; - - if (!a_is_zero_mod_one("BN_mod_exp_mont", &r, &a)) - failed = 1; - - if (!BN_mod_exp_mont_consttime(&r, &a, &p, &m, ctx, NULL)) { - goto err; - } - - if (!a_is_zero_mod_one("BN_mod_exp_mont_consttime", &r, &a)) - failed = 1; - - /* - * A different codepath exists for single word multiplication - * in non-constant-time only. - */ - if (!BN_mod_exp_mont_word(&r, one_word, &p, &m, ctx, NULL)) - goto err; - - if (!BN_is_zero(&r)) { - fprintf(stderr, "BN_mod_exp_mont_word failed:\n"); - fprintf(stderr, "1 ** 0 mod 1 = r (should be 0)\n"); - fprintf(stderr, "r = "); - BN_print_fp(stderr, &r); - fprintf(stderr, "\n"); - return 0; - } - - ret = failed; - - err: - BN_free(&r); - BN_free(&a); - BN_free(&p); - BN_free(&m); - BN_CTX_free(ctx); - - return ret; -} - -int main(int argc, char *argv[]) -{ - BN_CTX *ctx; - BIO *out = NULL; - int i, ret; - unsigned char c; - BIGNUM *r_mont, *r_mont_const, *r_recp, *r_simple, *a, *b, *m; - - /* - * Seed or BN_rand may fail, and we don't even check its return - * value (which we should) - */ - RAND_seed(rnd_seed, sizeof(rnd_seed)); - - ERR_load_BN_strings(); - - ctx = BN_CTX_new(); - if (ctx == NULL) - EXIT(1); - r_mont = BN_new(); - r_mont_const = BN_new(); - r_recp = BN_new(); - r_simple = BN_new(); - a = BN_new(); - b = BN_new(); - m = BN_new(); - if ((r_mont == NULL) || (r_recp == NULL) || (a == NULL) || (b == NULL)) - goto err; - - out = BIO_new(BIO_s_file()); - - if (out == NULL) - EXIT(1); - BIO_set_fp(out, stdout, BIO_NOCLOSE); - - for (i = 0; i < 200; i++) { - RAND_bytes(&c, 1); - c = (c % BN_BITS) - BN_BITS2; - BN_rand(a, NUM_BITS + c, 0, 0); - - RAND_bytes(&c, 1); - c = (c % BN_BITS) - BN_BITS2; - BN_rand(b, NUM_BITS + c, 0, 0); - - RAND_bytes(&c, 1); - c = (c % BN_BITS) - BN_BITS2; - BN_rand(m, NUM_BITS + c, 0, 1); - - BN_mod(a, a, m, ctx); - BN_mod(b, b, m, ctx); - - ret = BN_mod_exp_mont(r_mont, a, b, m, ctx, NULL); - if (ret <= 0) { - printf("BN_mod_exp_mont() problems\n"); - ERR_print_errors(out); - EXIT(1); - } - - ret = BN_mod_exp_recp(r_recp, a, b, m, ctx); - if (ret <= 0) { - printf("BN_mod_exp_recp() problems\n"); - ERR_print_errors(out); - EXIT(1); - } - - ret = BN_mod_exp_simple(r_simple, a, b, m, ctx); - if (ret <= 0) { - printf("BN_mod_exp_simple() problems\n"); - ERR_print_errors(out); - EXIT(1); - } - - ret = BN_mod_exp_mont_consttime(r_mont_const, a, b, m, ctx, NULL); - if (ret <= 0) { - printf("BN_mod_exp_mont_consttime() problems\n"); - ERR_print_errors(out); - EXIT(1); - } - - if (BN_cmp(r_simple, r_mont) == 0 - && BN_cmp(r_simple, r_recp) == 0 - && BN_cmp(r_simple, r_mont_const) == 0) { - printf("."); - fflush(stdout); - } else { - if (BN_cmp(r_simple, r_mont) != 0) - printf("\nsimple and mont results differ\n"); - if (BN_cmp(r_simple, r_mont_const) != 0) - printf("\nsimple and mont const time results differ\n"); - if (BN_cmp(r_simple, r_recp) != 0) - printf("\nsimple and recp results differ\n"); - - printf("a (%3d) = ", BN_num_bits(a)); - BN_print(out, a); - printf("\nb (%3d) = ", BN_num_bits(b)); - BN_print(out, b); - printf("\nm (%3d) = ", BN_num_bits(m)); - BN_print(out, m); - printf("\nsimple ="); - BN_print(out, r_simple); - printf("\nrecp ="); - BN_print(out, r_recp); - printf("\nmont ="); - BN_print(out, r_mont); - printf("\nmont_ct ="); - BN_print(out, r_mont_const); - printf("\n"); - EXIT(1); - } - } - BN_free(r_mont); - BN_free(r_mont_const); - BN_free(r_recp); - BN_free(r_simple); - BN_free(a); - BN_free(b); - BN_free(m); - BN_CTX_free(ctx); - ERR_remove_thread_state(NULL); - CRYPTO_mem_leaks(out); - BIO_free(out); - printf("\n"); - - if (test_exp_mod_zero() != 0) - goto err; - - printf("done\n"); - - EXIT(0); - err: - ERR_load_crypto_strings(); - ERR_print_errors(out); -#ifdef OPENSSL_SYS_NETWARE - printf("ERROR\n"); -#endif - EXIT(1); - return (1); -} diff --git a/crypto/bn/rsaz_exp.c b/crypto/bn/rsaz_exp.c index c54c6feb51b5..22455b8a6374 100644 --- a/crypto/bn/rsaz_exp.c +++ b/crypto/bn/rsaz_exp.c @@ -1,48 +1,23 @@ -/***************************************************************************** -* * -* Copyright (c) 2012, Intel Corporation * -* * -* All rights reserved. * -* * -* Redistribution and use in source and binary forms, with or without * -* modification, are permitted provided that the following conditions are * -* met: * -* * -* * Redistributions of source code must retain the above copyright * -* notice, this list of conditions and the following disclaimer. * -* * -* * Redistributions in binary form must reproduce the above copyright * -* notice, this list of conditions and the following disclaimer in the * -* documentation and/or other materials provided with the * -* distribution. * -* * -* * Neither the name of the Intel Corporation nor the names of its * -* contributors may be used to endorse or promote products derived from * -* this software without specific prior written permission. * -* * -* * -* THIS SOFTWARE IS PROVIDED BY INTEL CORPORATION ""AS IS"" AND ANY * -* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * -* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * -* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL CORPORATION OR * -* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, * -* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * -* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR * -* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF * -* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING * -* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * -* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * -* * -****************************************************************************** -* Developers and authors: * -* Shay Gueron (1, 2), and Vlad Krasnov (1) * -* (1) Intel Corporation, Israel Development Center, Haifa, Israel * -* (2) University of Haifa, Israel * -*****************************************************************************/ +/* + * Copyright 2013-2016 The OpenSSL Project Authors. All Rights Reserved. + * Copyright (c) 2012, Intel Corporation. All Rights Reserved. + * + * Licensed under the OpenSSL license (the "License"). You may not use + * this file except in compliance with the License. You can obtain a copy + * in the file LICENSE in the source distribution or at + * https://www.openssl.org/source/license.html + * + * Originally written by Shay Gueron (1, 2), and Vlad Krasnov (1) + * (1) Intel Corporation, Israel Development Center, Haifa, Israel + * (2) University of Haifa, Israel + */ +#include <openssl/opensslconf.h> #include "rsaz_exp.h" -#ifdef RSAZ_ENABLED +#ifndef RSAZ_ENABLED +NON_EMPTY_TRANSLATION_UNIT +#else /* * See crypto/bn/asm/rsaz-avx2.pl for further details. @@ -241,7 +216,7 @@ void RSAZ_1024_mod_exp_avx2(BN_ULONG result_norm[16], rsaz_1024_sqr_avx2(result, result, m, k0, 5); - wvalue = *((unsigned short *)&p_str[index / 8]); + wvalue = (p_str[(index / 8) + 1] << 8) | p_str[index / 8]; wvalue = (wvalue >> (index % 8)) & 31; index -= 5; @@ -337,10 +312,4 @@ void RSAZ_512_mod_exp(BN_ULONG result[8], OPENSSL_cleanse(storage, sizeof(storage)); } -#else - -# if defined(PEDANTIC) || defined(__DECC) || defined(__clang__) -static void *dummy = &dummy; -# endif - #endif diff --git a/crypto/bn/rsaz_exp.h b/crypto/bn/rsaz_exp.h index 229e181f67b5..c5864f8aaa8c 100644 --- a/crypto/bn/rsaz_exp.h +++ b/crypto/bn/rsaz_exp.h @@ -1,44 +1,16 @@ -/***************************************************************************** -* * -* Copyright (c) 2012, Intel Corporation * -* * -* All rights reserved. * -* * -* Redistribution and use in source and binary forms, with or without * -* modification, are permitted provided that the following conditions are * -* met: * -* * -* * Redistributions of source code must retain the above copyright * -* notice, this list of conditions and the following disclaimer. * -* * -* * Redistributions in binary form must reproduce the above copyright * -* notice, this list of conditions and the following disclaimer in the * -* documentation and/or other materials provided with the * -* distribution. * -* * -* * Neither the name of the Intel Corporation nor the names of its * -* contributors may be used to endorse or promote products derived from * -* this software without specific prior written permission. * -* * -* * -* THIS SOFTWARE IS PROVIDED BY INTEL CORPORATION ""AS IS"" AND ANY * -* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * -* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * -* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL CORPORATION OR * -* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, * -* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * -* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR * -* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF * -* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING * -* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * -* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * -* * -****************************************************************************** -* Developers and authors: * -* Shay Gueron (1, 2), and Vlad Krasnov (1) * -* (1) Intel Corporation, Israel Development Center, Haifa, Israel * -* (2) University of Haifa, Israel * -*****************************************************************************/ +/* + * Copyright 2013-2018 The OpenSSL Project Authors. All Rights Reserved. + * Copyright (c) 2012, Intel Corporation. All Rights Reserved. + * + * Licensed under the OpenSSL license (the "License"). You may not use + * this file except in compliance with the License. You can obtain a copy + * in the file LICENSE in the source distribution or at + * https://www.openssl.org/source/license.html + * + * Originally written by Shay Gueron (1, 2), and Vlad Krasnov (1) + * (1) Intel Corporation, Israel Development Center, Haifa, Israel + * (2) University of Haifa, Israel + */ #ifndef RSAZ_EXP_H # define RSAZ_EXP_H @@ -56,7 +28,7 @@ void RSAZ_1024_mod_exp_avx2(BN_ULONG result[16], const BN_ULONG exponent[16], const BN_ULONG m_norm[16], const BN_ULONG RR[16], BN_ULONG k0); -int rsaz_avx2_eligible(); +int rsaz_avx2_eligible(void); void RSAZ_512_mod_exp(BN_ULONG result[8], const BN_ULONG base_norm[8], const BN_ULONG exponent[8], diff --git a/crypto/bn/todo b/crypto/bn/todo deleted file mode 100644 index e47e381aea13..000000000000 --- a/crypto/bn/todo +++ /dev/null @@ -1,3 +0,0 @@ -Cache RECP_CTX values -make the result argument independant of the inputs. -split up the _exp_ functions |