diff options
Diffstat (limited to 'sys/crypto/aesni')
-rw-r--r-- | sys/crypto/aesni/aesencdec.h | 136 | ||||
-rw-r--r-- | sys/crypto/aesni/aesencdec_amd64.S | 135 | ||||
-rw-r--r-- | sys/crypto/aesni/aesencdec_i386.S | 166 | ||||
-rw-r--r-- | sys/crypto/aesni/aeskeys_amd64.S | 96 | ||||
-rw-r--r-- | sys/crypto/aesni/aesni.c | 8 | ||||
-rw-r--r-- | sys/crypto/aesni/aesni.h | 15 | ||||
-rw-r--r-- | sys/crypto/aesni/aesni_wrap.c | 225 |
7 files changed, 359 insertions, 422 deletions
diff --git a/sys/crypto/aesni/aesencdec.h b/sys/crypto/aesni/aesencdec.h new file mode 100644 index 000000000000..0c9bf5f21628 --- /dev/null +++ b/sys/crypto/aesni/aesencdec.h @@ -0,0 +1,136 @@ +/*- + * Copyright 2013 John-Mark Gurney <jmg@FreeBSD.org> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + * + */ + +#include <wmmintrin.h> + +static inline void +aesni_enc8(int rounds, const uint8_t *key_schedule, __m128i a, + __m128i b, __m128i c, __m128i d, __m128i e, __m128i f, __m128i g, + __m128i h, __m128i out[8]) +{ + const __m128i *keysched = (const __m128i *)key_schedule; + int i; + + a ^= keysched[0]; + b ^= keysched[0]; + c ^= keysched[0]; + d ^= keysched[0]; + e ^= keysched[0]; + f ^= keysched[0]; + g ^= keysched[0]; + h ^= keysched[0]; + + for (i = 0; i < rounds; i++) { + a = _mm_aesenc_si128(a, keysched[i + 1]); + b = _mm_aesenc_si128(b, keysched[i + 1]); + c = _mm_aesenc_si128(c, keysched[i + 1]); + d = _mm_aesenc_si128(d, keysched[i + 1]); + e = _mm_aesenc_si128(e, keysched[i + 1]); + f = _mm_aesenc_si128(f, keysched[i + 1]); + g = _mm_aesenc_si128(g, keysched[i + 1]); + h = _mm_aesenc_si128(h, keysched[i + 1]); + } + + out[0] = _mm_aesenclast_si128(a, keysched[i + 1]); + out[1] = _mm_aesenclast_si128(b, keysched[i + 1]); + out[2] = _mm_aesenclast_si128(c, keysched[i + 1]); + out[3] = _mm_aesenclast_si128(d, keysched[i + 1]); + out[4] = _mm_aesenclast_si128(e, keysched[i + 1]); + out[5] = _mm_aesenclast_si128(f, keysched[i + 1]); + out[6] = _mm_aesenclast_si128(g, keysched[i + 1]); + out[7] = _mm_aesenclast_si128(h, keysched[i + 1]); +} + +static inline void +aesni_dec8(int rounds, const uint8_t *key_schedule, __m128i a, + __m128i b, __m128i c, __m128i d, __m128i e, __m128i f, __m128i g, + __m128i h, __m128i out[8]) +{ + const __m128i *keysched = (const __m128i *)key_schedule; + int i; + + a ^= keysched[0]; + b ^= keysched[0]; + c ^= keysched[0]; + d ^= keysched[0]; + e ^= keysched[0]; + f ^= keysched[0]; + g ^= keysched[0]; + h ^= keysched[0]; + + for (i = 0; i < rounds; i++) { + a = _mm_aesdec_si128(a, keysched[i + 1]); + b = _mm_aesdec_si128(b, keysched[i + 1]); + c = _mm_aesdec_si128(c, keysched[i + 1]); + d = _mm_aesdec_si128(d, keysched[i + 1]); + e = _mm_aesdec_si128(e, keysched[i + 1]); + f = _mm_aesdec_si128(f, keysched[i + 1]); + g = _mm_aesdec_si128(g, keysched[i + 1]); + h = _mm_aesdec_si128(h, keysched[i + 1]); + } + + out[0] = _mm_aesdeclast_si128(a, keysched[i + 1]); + out[1] = _mm_aesdeclast_si128(b, keysched[i + 1]); + out[2] = _mm_aesdeclast_si128(c, keysched[i + 1]); + out[3] = _mm_aesdeclast_si128(d, keysched[i + 1]); + out[4] = _mm_aesdeclast_si128(e, keysched[i + 1]); + out[5] = _mm_aesdeclast_si128(f, keysched[i + 1]); + out[6] = _mm_aesdeclast_si128(g, keysched[i + 1]); + out[7] = _mm_aesdeclast_si128(h, keysched[i + 1]); +} + +static inline __m128i +aesni_enc(int rounds, const uint8_t *key_schedule, const __m128i from) +{ + __m128i tmp; + const __m128i *keysched = (const __m128i *)key_schedule; + int i; + + tmp = from ^ keysched[0]; + + for (i = 0; i < rounds; i++) + tmp = _mm_aesenc_si128(tmp, keysched[i + 1]); + + return _mm_aesenclast_si128(tmp, keysched[i + 1]); +} + +static inline __m128i +aesni_dec(int rounds, const uint8_t *key_schedule, const __m128i from) +{ + __m128i tmp; + const __m128i *keysched = (const __m128i *)key_schedule; + int i; + + tmp = from ^ keysched[0]; + + for (i = 0; i < rounds; i++) + tmp = _mm_aesdec_si128(tmp, keysched[i + 1]); + + return _mm_aesdeclast_si128(tmp, keysched[i + 1]); +} diff --git a/sys/crypto/aesni/aesencdec_amd64.S b/sys/crypto/aesni/aesencdec_amd64.S deleted file mode 100644 index f77918b8e9ad..000000000000 --- a/sys/crypto/aesni/aesencdec_amd64.S +++ /dev/null @@ -1,135 +0,0 @@ -/*- - * Copyright (c) 2010 Konstantin Belousov <kib@FreeBSD.org> - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#include <machine/asmacros.h> - - .text - -ENTRY(aesni_enc) - .cfi_startproc - movdqu (%rdx),%xmm0 - cmpq $0,%r8 - je 1f - movdqu (%r8),%xmm1 /* unaligned load into reg */ - pxor %xmm1,%xmm0 /* pxor otherwise can fault on iv */ -1: - pxor (%rsi),%xmm0 -2: - addq $0x10,%rsi -// aesenc (%rsi),%xmm0 - .byte 0x66,0x0f,0x38,0xdc,0x06 - decl %edi - jne 2b - addq $0x10,%rsi -// aesenclast (%rsi),%xmm0 - .byte 0x66,0x0f,0x38,0xdd,0x06 - movdqu %xmm0,(%rcx) - retq - .cfi_endproc -END(aesni_enc) - -ENTRY(aesni_dec) - .cfi_startproc - movdqu (%rdx),%xmm0 - pxor (%rsi),%xmm0 -1: - addq $0x10,%rsi -// aesdec (%rsi),%xmm0 - .byte 0x66,0x0f,0x38,0xde,0x06 - decl %edi - jne 1b - addq $0x10,%rsi -// aesdeclast (%rsi),%xmm0 - .byte 0x66,0x0f,0x38,0xdf,0x06 - cmpq $0,%r8 - je 2f - movdqu (%r8),%xmm1 - pxor %xmm1,%xmm0 -2: - movdqu %xmm0,(%rcx) - retq - .cfi_endproc -END(aesni_dec) - -ENTRY(aesni_decrypt_cbc) - .cfi_startproc - shrq $4,%rdx - movdqu (%r8),%xmm1 -1: - movdqu (%rcx),%xmm0 - movdqa %xmm0,%xmm2 - pxor (%rsi),%xmm0 - cmpl $12,%edi -// aesdec 0x10(%rsi),%xmm0 - .byte 0x66,0x0f,0x38,0xde,0x46,0x10 -// aesdec 0x20(%rsi),%xmm0 - .byte 0x66,0x0f,0x38,0xde,0x46,0x20 -// aesdec 0x30(%rsi),%xmm0 - .byte 0x66,0x0f,0x38,0xde,0x46,0x30 -// aesdec 0x40(%rsi),%xmm0 - .byte 0x66,0x0f,0x38,0xde,0x46,0x40 -// aesdec 0x50(%rsi),%xmm0 - .byte 0x66,0x0f,0x38,0xde,0x46,0x50 -// aesdec 0x60(%rsi),%xmm0 - .byte 0x66,0x0f,0x38,0xde,0x46,0x60 -// aesdec 0x70(%rsi),%xmm0 - .byte 0x66,0x0f,0x38,0xde,0x46,0x70 -// aesdec 0x80(%rsi),%xmm0 - .byte 0x66,0x0f,0x38,0xde,0x86,0x80,0x00,0x00,0x00 -// aesdec 0x90(%rsi),%xmm0 - .byte 0x66,0x0f,0x38,0xde,0x86,0x90,0x00,0x00,0x00 - jge 2f -// aesdeclast 0xa0(%rsi),%xmm0 - .byte 0x66,0x0f,0x38,0xdf,0x86,0xa0,0x00,0x00,0x00 - jmp 4f -2: -// aesdec 0xa0(%rsi),%xmm0 - .byte 0x66,0x0f,0x38,0xde,0x86,0xa0,0x00,0x00,0x00 -// aesdec 0xb0(%rsi),%xmm0 - .byte 0x66,0x0f,0x38,0xde,0x86,0xb0,0x00,0x00,0x00 - jg 3f -// aesdeclast 0xc0(%rsi),%xmm0 - .byte 0x66,0x0f,0x38,0xdf,0x86,0xc0,0x00,0x00,0x00 - jmp 4f -3: -// aesdec 0xc0(%rsi),%xmm0 - .byte 0x66,0x0f,0x38,0xde,0x86,0xc0,0x00,0x00,0x00 -// aesdec 0xd0(%rsi),%xmm0 - .byte 0x66,0x0f,0x38,0xde,0x86,0xd0,0x00,0x00,0x00 -// aesdeclast 0xe0(%rsi),%xmm0 - .byte 0x66,0x0f,0x38,0xdf,0x86,0xe0,0x00,0x00,0x00 -4: - pxor %xmm1,%xmm0 - movdqu %xmm0,(%rcx) - movdqa %xmm2,%xmm1 // iv - addq $0x10,%rcx - decq %rdx - jne 1b - retq - .cfi_endproc -END(aesni_decrypt_cbc) - - .ident "$FreeBSD$" diff --git a/sys/crypto/aesni/aesencdec_i386.S b/sys/crypto/aesni/aesencdec_i386.S deleted file mode 100644 index 78de311f23c9..000000000000 --- a/sys/crypto/aesni/aesencdec_i386.S +++ /dev/null @@ -1,166 +0,0 @@ -/*- - * Copyright (c) 2010 Konstantin Belousov <kib@FreeBSD.org> - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#include <machine/asmacros.h> - -ENTRY(aesni_enc) - .cfi_startproc - pushl %ebp - .cfi_adjust_cfa_offset 4 - movl %esp,%ebp - movl 8(%ebp),%ecx /* rounds */ - movl 16(%ebp),%edx - movdqu (%edx),%xmm0 /* from */ - movl 24(%ebp),%eax /* iv */ - cmpl $0,%eax - je 1f - movdqu (%eax),%xmm1 - pxor %xmm1,%xmm0 -1: - movl 12(%ebp),%eax /* key */ - pxor (%eax),%xmm0 -2: - addl $0x10,%eax -// aesenc (%eax),%xmm0 - .byte 0x66,0x0f,0x38,0xdc,0x00 - loopne 2b - addl $0x10,%eax -// aesenclast (%eax),%xmm0 - .byte 0x66,0x0f,0x38,0xdd,0x00 - movl 20(%ebp),%eax - movdqu %xmm0,(%eax) /* to */ - leave - .cfi_adjust_cfa_offset -4 - retl - .cfi_endproc -END(aesni_enc) - -ENTRY(aesni_dec) - .cfi_startproc - pushl %ebp - .cfi_adjust_cfa_offset 4 - movl %esp,%ebp - movl 8(%ebp),%ecx /* rounds */ - movl 16(%ebp),%edx - movdqu (%edx),%xmm0 /* from */ - movl 12(%ebp),%eax /* key */ - pxor (%eax),%xmm0 -1: - addl $0x10,%eax -// aesdec (%eax),%xmm0 - .byte 0x66,0x0f,0x38,0xde,0x00 - loopne 1b - addl $0x10,%eax -// aesdeclast (%eax),%xmm0 - .byte 0x66,0x0f,0x38,0xdf,0x00 - movl 24(%ebp),%eax - cmpl $0,%eax /* iv */ - je 2f - movdqu (%eax),%xmm1 - pxor %xmm1,%xmm0 -2: - movl 20(%ebp),%eax - movdqu %xmm0,(%eax) /* to */ - leave - .cfi_adjust_cfa_offset -4 - retl - .cfi_endproc -END(aesni_dec) - -ENTRY(aesni_decrypt_cbc) - .cfi_startproc - pushl %ebp - .cfi_adjust_cfa_offset 4 - movl %esp,%ebp - pushl %ebx - pushl %esi - movl 12(%ebp),%eax /* key */ - movl 16(%ebp),%ecx /* length */ - shrl $4,%ecx - movl 20(%ebp),%ebx /* buf */ - movl 24(%ebp),%esi - movdqu (%esi),%xmm1 /* iv */ - movl 8(%ebp),%esi /* rounds */ -1: - movdqu (%ebx),%xmm0 - movdqa %xmm0,%xmm2 - pxor (%eax),%xmm0 - cmpl $12,%esi -// aesdec 0x10(%eax),%xmm0 - .byte 0x66,0x0f,0x38,0xde,0x40,0x10 -// aesdec 0x20(%eax),%xmm0 - .byte 0x66,0x0f,0x38,0xde,0x40,0x20 -// aesdec 0x30(%eax),%xmm0 - .byte 0x66,0x0f,0x38,0xde,0x40,0x30 -// aesdec 0x40(%eax),%xmm0 - .byte 0x66,0x0f,0x38,0xde,0x40,0x40 -// aesdec 0x50(%eax),%xmm0 - .byte 0x66,0x0f,0x38,0xde,0x40,0x50 -// aesdec 0x60(%eax),%xmm0 - .byte 0x66,0x0f,0x38,0xde,0x40,0x60 -// aesdec 0x70(%eax),%xmm0 - .byte 0x66,0x0f,0x38,0xde,0x40,0x70 -// aesdec 0x80(%eax),%xmm0 - .byte 0x66,0x0f,0x38,0xde,0x80,0x80,0x00,0x00,0x00 -// aesdec 0x90(%eax),%xmm0 - .byte 0x66,0x0f,0x38,0xde,0x80,0x90,0x00,0x00,0x00 - jge 2f -// aesdeclast 0xa0(%eax),%xmm0 - .byte 0x66,0x0f,0x38,0xdf,0x80,0xa0,0x00,0x00,0x00 - jmp 4f -2: -// aesdec 0xa0(%eax),%xmm0 - .byte 0x66,0x0f,0x38,0xde,0x80,0xa0,0x00,0x00,0x00 -// aesdec 0xb0(%eax),%xmm0 - .byte 0x66,0x0f,0x38,0xde,0x80,0xb0,0x00,0x00,0x00 - jg 3f -// aesdeclast 0xc0(%eax),%xmm0 - .byte 0x66,0x0f,0x38,0xdf,0x80,0xc0,0x00,0x00,0x00 - jmp 4f -3: -// aesdec 0xc0(%eax),%xmm0 - .byte 0x66,0x0f,0x38,0xde,0x80,0xc0,0x00,0x00,0x00 -// aesdec 0xd0(%eax),%xmm0 - .byte 0x66,0x0f,0x38,0xde,0x80,0xd0,0x00,0x00,0x00 -// aesdeclast 0xe0(%eax),%xmm0 - .byte 0x66,0x0f,0x38,0xdf,0x80,0xe0,0x00,0x00,0x00 -4: - pxor %xmm1,%xmm0 - movdqu %xmm0,(%ebx) - movdqa %xmm2,%xmm1 - addl $0x10,%ebx - decl %ecx - jne 1b - - popl %esi - popl %ebx - leave - .cfi_adjust_cfa_offset -4 - retl - .cfi_endproc -END(aesni_decrypt_cbc) - - .ident "$FreeBSD$" diff --git a/sys/crypto/aesni/aeskeys_amd64.S b/sys/crypto/aesni/aeskeys_amd64.S index 23a4d3dc1fba..9b3e98c8437e 100644 --- a/sys/crypto/aesni/aeskeys_amd64.S +++ b/sys/crypto/aesni/aeskeys_amd64.S @@ -125,103 +125,72 @@ ENTRY(aesni_set_enckey) movups 0x10(%rdi),%xmm2 # other user key movaps %xmm2,(%rsi) addq $0x10,%rsi -// aeskeygenassist $0x1,%xmm2,%xmm1 # round 1 - .byte 0x66,0x0f,0x3a,0xdf,0xca,0x01 + aeskeygenassist $0x1,%xmm2,%xmm1 # round 1 call _key_expansion_256a -// aeskeygenassist $0x1,%xmm0,%xmm1 - .byte 0x66,0x0f,0x3a,0xdf,0xc8,0x01 + aeskeygenassist $0x1,%xmm0,%xmm1 call _key_expansion_256b -// aeskeygenassist $0x2,%xmm2,%xmm1 # round 2 - .byte 0x66,0x0f,0x3a,0xdf,0xca,0x02 + aeskeygenassist $0x2,%xmm2,%xmm1 # round 2 call _key_expansion_256a -// aeskeygenassist $0x2,%xmm0,%xmm1 - .byte 0x66,0x0f,0x3a,0xdf,0xc8,0x02 + aeskeygenassist $0x2,%xmm0,%xmm1 call _key_expansion_256b -// aeskeygenassist $0x4,%xmm2,%xmm1 # round 3 - .byte 0x66,0x0f,0x3a,0xdf,0xca,0x04 + aeskeygenassist $0x4,%xmm2,%xmm1 # round 3 call _key_expansion_256a -// aeskeygenassist $0x4,%xmm0,%xmm1 - .byte 0x66,0x0f,0x3a,0xdf,0xc8,0x04 + aeskeygenassist $0x4,%xmm0,%xmm1 call _key_expansion_256b -// aeskeygenassist $0x8,%xmm2,%xmm1 # round 4 - .byte 0x66,0x0f,0x3a,0xdf,0xca,0x08 + aeskeygenassist $0x8,%xmm2,%xmm1 # round 4 call _key_expansion_256a -// aeskeygenassist $0x8,%xmm0,%xmm1 - .byte 0x66,0x0f,0x3a,0xdf,0xc8,0x08 + aeskeygenassist $0x8,%xmm0,%xmm1 call _key_expansion_256b -// aeskeygenassist $0x10,%xmm2,%xmm1 # round 5 - .byte 0x66,0x0f,0x3a,0xdf,0xca,0x10 + aeskeygenassist $0x10,%xmm2,%xmm1 # round 5 call _key_expansion_256a -// aeskeygenassist $0x10,%xmm0,%xmm1 - .byte 0x66,0x0f,0x3a,0xdf,0xc8,0x10 + aeskeygenassist $0x10,%xmm0,%xmm1 call _key_expansion_256b -// aeskeygenassist $0x20,%xmm2,%xmm1 # round 6 - .byte 0x66,0x0f,0x3a,0xdf,0xca,0x20 + aeskeygenassist $0x20,%xmm2,%xmm1 # round 6 call _key_expansion_256a -// aeskeygenassist $0x20,%xmm0,%xmm1 - .byte 0x66,0x0f,0x3a,0xdf,0xc8,0x20 + aeskeygenassist $0x20,%xmm0,%xmm1 call _key_expansion_256b -// aeskeygenassist $0x40,%xmm2,%xmm1 # round 7 - .byte 0x66,0x0f,0x3a,0xdf,0xca,0x40 + aeskeygenassist $0x40,%xmm2,%xmm1 # round 7 call _key_expansion_256a retq .Lenc_key192: movq 0x10(%rdi),%xmm2 # other user key -// aeskeygenassist $0x1,%xmm2,%xmm1 # round 1 - .byte 0x66,0x0f,0x3a,0xdf,0xca,0x01 + aeskeygenassist $0x1,%xmm2,%xmm1 # round 1 call _key_expansion_192a -// aeskeygenassist $0x2,%xmm2,%xmm1 # round 2 - .byte 0x66,0x0f,0x3a,0xdf,0xca,0x02 + aeskeygenassist $0x2,%xmm2,%xmm1 # round 2 call _key_expansion_192b -// aeskeygenassist $0x4,%xmm2,%xmm1 # round 3 - .byte 0x66,0x0f,0x3a,0xdf,0xca,0x04 + aeskeygenassist $0x4,%xmm2,%xmm1 # round 3 call _key_expansion_192a -// aeskeygenassist $0x8,%xmm2,%xmm1 # round 4 - .byte 0x66,0x0f,0x3a,0xdf,0xca,0x08 + aeskeygenassist $0x8,%xmm2,%xmm1 # round 4 call _key_expansion_192b -// aeskeygenassist $0x10,%xmm2,%xmm1 # round 5 - .byte 0x66,0x0f,0x3a,0xdf,0xca,0x10 + aeskeygenassist $0x10,%xmm2,%xmm1 # round 5 call _key_expansion_192a -// aeskeygenassist $0x20,%xmm2,%xmm1 # round 6 - .byte 0x66,0x0f,0x3a,0xdf,0xca,0x20 + aeskeygenassist $0x20,%xmm2,%xmm1 # round 6 call _key_expansion_192b -// aeskeygenassist $0x40,%xmm2,%xmm1 # round 7 - .byte 0x66,0x0f,0x3a,0xdf,0xca,0x40 + aeskeygenassist $0x40,%xmm2,%xmm1 # round 7 call _key_expansion_192a -// aeskeygenassist $0x80,%xmm2,%xmm1 # round 8 - .byte 0x66,0x0f,0x3a,0xdf,0xca,0x80 + aeskeygenassist $0x80,%xmm2,%xmm1 # round 8 call _key_expansion_192b retq .Lenc_key128: -// aeskeygenassist $0x1,%xmm0,%xmm1 # round 1 - .byte 0x66,0x0f,0x3a,0xdf,0xc8,0x01 + aeskeygenassist $0x1,%xmm0,%xmm1 # round 1 call _key_expansion_128 -// aeskeygenassist $0x2,%xmm0,%xmm1 # round 2 - .byte 0x66,0x0f,0x3a,0xdf,0xc8,0x02 + aeskeygenassist $0x2,%xmm0,%xmm1 # round 2 call _key_expansion_128 -// aeskeygenassist $0x4,%xmm0,%xmm1 # round 3 - .byte 0x66,0x0f,0x3a,0xdf,0xc8,0x04 + aeskeygenassist $0x4,%xmm0,%xmm1 # round 3 call _key_expansion_128 -// aeskeygenassist $0x8,%xmm0,%xmm1 # round 4 - .byte 0x66,0x0f,0x3a,0xdf,0xc8,0x08 + aeskeygenassist $0x8,%xmm0,%xmm1 # round 4 call _key_expansion_128 -// aeskeygenassist $0x10,%xmm0,%xmm1 # round 5 - .byte 0x66,0x0f,0x3a,0xdf,0xc8,0x10 + aeskeygenassist $0x10,%xmm0,%xmm1 # round 5 call _key_expansion_128 -// aeskeygenassist $0x20,%xmm0,%xmm1 # round 6 - .byte 0x66,0x0f,0x3a,0xdf,0xc8,0x20 + aeskeygenassist $0x20,%xmm0,%xmm1 # round 6 call _key_expansion_128 -// aeskeygenassist $0x40,%xmm0,%xmm1 # round 7 - .byte 0x66,0x0f,0x3a,0xdf,0xc8,0x40 + aeskeygenassist $0x40,%xmm0,%xmm1 # round 7 call _key_expansion_128 -// aeskeygenassist $0x80,%xmm0,%xmm1 # round 8 - .byte 0x66,0x0f,0x3a,0xdf,0xc8,0x80 + aeskeygenassist $0x80,%xmm0,%xmm1 # round 8 call _key_expansion_128 -// aeskeygenassist $0x1b,%xmm0,%xmm1 # round 9 - .byte 0x66,0x0f,0x3a,0xdf,0xc8,0x1b + aeskeygenassist $0x1b,%xmm0,%xmm1 # round 9 call _key_expansion_128 -// aeskeygenassist $0x36,%xmm0,%xmm1 # round 10 - .byte 0x66,0x0f,0x3a,0xdf,0xc8,0x36 + aeskeygenassist $0x36,%xmm0,%xmm1 # round 10 call _key_expansion_128 retq .cfi_endproc @@ -238,8 +207,7 @@ ENTRY(aesni_set_deckey) 1: addq $0x10,%rsi subq $0x10,%rdi -// aesimc (%rdi),%xmm1 - .byte 0x66,0x0f,0x38,0xdb,0x0f + aesimc (%rdi),%xmm1 movdqa %xmm1,(%rsi) decl %edx jne 1b diff --git a/sys/crypto/aesni/aesni.c b/sys/crypto/aesni/aesni.c index ca00a578cad5..73eb28a16e36 100644 --- a/sys/crypto/aesni/aesni.c +++ b/sys/crypto/aesni/aesni.c @@ -40,7 +40,7 @@ __FBSDID("$FreeBSD$"); #include <sys/bus.h> #include <sys/uio.h> #include <crypto/aesni/aesni.h> -#include "cryptodev_if.h" +#include <cryptodev_if.h> struct aesni_softc { int32_t cid; @@ -74,6 +74,12 @@ aesni_probe(device_t dev) device_printf(dev, "No AESNI support.\n"); return (EINVAL); } + + if ((cpu_feature & CPUID_SSE2) == 0) { + device_printf(dev, "No SSE2 support but AESNI!?!\n"); + return (EINVAL); + } + device_set_desc_copy(dev, "AES-CBC,AES-XTS"); return (0); } diff --git a/sys/crypto/aesni/aesni.h b/sys/crypto/aesni/aesni.h index 78255b779a38..17ca9c5bf7f4 100644 --- a/sys/crypto/aesni/aesni.h +++ b/sys/crypto/aesni/aesni.h @@ -71,12 +71,6 @@ struct aesni_session { /* * Internal functions, implemented in assembler. */ -void aesni_enc(int rounds, const uint8_t *key_schedule, - const uint8_t from[AES_BLOCK_LEN], uint8_t to[AES_BLOCK_LEN], - const uint8_t iv[AES_BLOCK_LEN]); -void aesni_dec(int rounds, const uint8_t *key_schedule, - const uint8_t from[AES_BLOCK_LEN], uint8_t to[AES_BLOCK_LEN], - const uint8_t iv[AES_BLOCK_LEN]); void aesni_set_enckey(const uint8_t *userkey, uint8_t *encrypt_schedule, int number_of_rounds); void aesni_set_deckey(const uint8_t *encrypt_schedule, @@ -88,12 +82,19 @@ void aesni_set_deckey(const uint8_t *encrypt_schedule, void aesni_encrypt_cbc(int rounds, const void *key_schedule, size_t len, const uint8_t *from, uint8_t *to, const uint8_t iv[AES_BLOCK_LEN]); void aesni_decrypt_cbc(int rounds, const void *key_schedule, size_t len, - const uint8_t *from, const uint8_t iv[AES_BLOCK_LEN]); + uint8_t *buf, const uint8_t iv[AES_BLOCK_LEN]); void aesni_encrypt_ecb(int rounds, const void *key_schedule, size_t len, const uint8_t from[AES_BLOCK_LEN], uint8_t to[AES_BLOCK_LEN]); void aesni_decrypt_ecb(int rounds, const void *key_schedule, size_t len, const uint8_t from[AES_BLOCK_LEN], uint8_t to[AES_BLOCK_LEN]); +void aesni_encrypt_xts(int rounds, const void *data_schedule, + const void *tweak_schedule, size_t len, const uint8_t *from, uint8_t *to, + const uint8_t iv[AES_BLOCK_LEN]); +void aesni_decrypt_xts(int rounds, const void *data_schedule, + const void *tweak_schedule, size_t len, const uint8_t *from, uint8_t *to, + const uint8_t iv[AES_BLOCK_LEN]); + int aesni_cipher_setup(struct aesni_session *ses, struct cryptoini *encini); int aesni_cipher_process(struct aesni_session *ses, diff --git a/sys/crypto/aesni/aesni_wrap.c b/sys/crypto/aesni/aesni_wrap.c index 3340b1fcb6c0..197baf7f0778 100644 --- a/sys/crypto/aesni/aesni_wrap.c +++ b/sys/crypto/aesni/aesni_wrap.c @@ -2,6 +2,7 @@ * Copyright (C) 2008 Damien Miller <djm@mindrot.org> * Copyright (c) 2010 Konstantin Belousov <kib@FreeBSD.org> * Copyright (c) 2010-2011 Pawel Jakub Dawidek <pawel@dawidek.net> + * Copyright 2012-2013 John-Mark Gurney <jmg@FreeBSD.org> * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -28,13 +29,15 @@ #include <sys/cdefs.h> __FBSDID("$FreeBSD$"); - + #include <sys/param.h> #include <sys/libkern.h> #include <sys/malloc.h> #include <sys/proc.h> #include <sys/systm.h> #include <crypto/aesni/aesni.h> + +#include "aesencdec.h" MALLOC_DECLARE(M_AESNI); @@ -42,28 +45,78 @@ void aesni_encrypt_cbc(int rounds, const void *key_schedule, size_t len, const uint8_t *from, uint8_t *to, const uint8_t iv[AES_BLOCK_LEN]) { - const uint8_t *ivp; + __m128i tot, ivreg; size_t i; len /= AES_BLOCK_LEN; - ivp = iv; + ivreg = _mm_loadu_si128((const __m128i *)iv); for (i = 0; i < len; i++) { - aesni_enc(rounds - 1, key_schedule, from, to, ivp); - ivp = to; + tot = aesni_enc(rounds - 1, key_schedule, + _mm_loadu_si128((const __m128i *)from) ^ ivreg); + ivreg = tot; + _mm_storeu_si128((__m128i *)to, tot); from += AES_BLOCK_LEN; to += AES_BLOCK_LEN; } } void -aesni_encrypt_ecb(int rounds, const void *key_schedule, size_t len, - const uint8_t from[AES_BLOCK_LEN], uint8_t to[AES_BLOCK_LEN]) +aesni_decrypt_cbc(int rounds, const void *key_schedule, size_t len, + uint8_t *buf, const uint8_t iv[AES_BLOCK_LEN]) { - size_t i; + __m128i blocks[8]; + __m128i *bufs; + __m128i ivreg, nextiv; + size_t i, j, cnt; + + ivreg = _mm_loadu_si128((const __m128i *)iv); + cnt = len / AES_BLOCK_LEN / 8; + for (i = 0; i < cnt; i++) { + bufs = (__m128i *)buf; + aesni_dec8(rounds - 1, key_schedule, bufs[0], bufs[1], + bufs[2], bufs[3], bufs[4], bufs[5], bufs[6], + bufs[7], &blocks[0]); + for (j = 0; j < 8; j++) { + nextiv = bufs[j]; + bufs[j] = blocks[j] ^ ivreg; + ivreg = nextiv; + } + buf += AES_BLOCK_LEN * 8; + } + i *= 8; + cnt = len / AES_BLOCK_LEN; + for (; i < cnt; i++) { + bufs = (__m128i *)buf; + nextiv = bufs[0]; + bufs[0] = aesni_dec(rounds - 1, key_schedule, bufs[0]) ^ ivreg; + ivreg = nextiv; + buf += AES_BLOCK_LEN; + } +} - len /= AES_BLOCK_LEN; - for (i = 0; i < len; i++) { - aesni_enc(rounds - 1, key_schedule, from, to, NULL); +void +aesni_encrypt_ecb(int rounds, const void *key_schedule, size_t len, + const uint8_t *from, uint8_t *to) +{ + __m128i tot; + const __m128i *blocks; + size_t i, cnt; + + cnt = len / AES_BLOCK_LEN / 8; + for (i = 0; i < cnt; i++) { + blocks = (const __m128i *)from; + aesni_enc8(rounds - 1, key_schedule, blocks[0], blocks[1], + blocks[2], blocks[3], blocks[4], blocks[5], blocks[6], + blocks[7], (__m128i *)to); + from += AES_BLOCK_LEN * 8; + to += AES_BLOCK_LEN * 8; + } + i *= 8; + cnt = len / AES_BLOCK_LEN; + for (; i < cnt; i++) { + tot = aesni_enc(rounds - 1, key_schedule, + _mm_loadu_si128((const __m128i *)from)); + _mm_storeu_si128((__m128i *)to, tot); from += AES_BLOCK_LEN; to += AES_BLOCK_LEN; } @@ -73,11 +126,25 @@ void aesni_decrypt_ecb(int rounds, const void *key_schedule, size_t len, const uint8_t from[AES_BLOCK_LEN], uint8_t to[AES_BLOCK_LEN]) { - size_t i; - - len /= AES_BLOCK_LEN; - for (i = 0; i < len; i++) { - aesni_dec(rounds - 1, key_schedule, from, to, NULL); + __m128i tot; + const __m128i *blocks; + size_t i, cnt; + + cnt = len / AES_BLOCK_LEN / 8; + for (i = 0; i < cnt; i++) { + blocks = (const __m128i *)from; + aesni_dec8(rounds - 1, key_schedule, blocks[0], blocks[1], + blocks[2], blocks[3], blocks[4], blocks[5], blocks[6], + blocks[7], (__m128i *)to); + from += AES_BLOCK_LEN * 8; + to += AES_BLOCK_LEN * 8; + } + i *= 8; + cnt = len / AES_BLOCK_LEN; + for (; i < cnt; i++) { + tot = aesni_dec(rounds - 1, key_schedule, + _mm_loadu_si128((const __m128i *)from)); + _mm_storeu_si128((__m128i *)to, tot); from += AES_BLOCK_LEN; to += AES_BLOCK_LEN; } @@ -87,34 +154,88 @@ aesni_decrypt_ecb(int rounds, const void *key_schedule, size_t len, #define AES_XTS_IVSIZE 8 #define AES_XTS_ALPHA 0x87 /* GF(2^128) generator polynomial */ +static inline __m128i +xts_crank_lfsr(__m128i inp) +{ + const __m128i alphamask = _mm_set_epi32(1, 1, 1, AES_XTS_ALPHA); + __m128i xtweak, ret; + + /* set up xor mask */ + xtweak = _mm_shuffle_epi32(inp, 0x93); + xtweak = _mm_srai_epi32(xtweak, 31); + xtweak &= alphamask; + + /* next term */ + ret = _mm_slli_epi32(inp, 1); + ret ^= xtweak; + + return ret; +} + static void -aesni_crypt_xts_block(int rounds, const void *key_schedule, uint64_t *tweak, - const uint64_t *from, uint64_t *to, uint64_t *block, int do_encrypt) +aesni_crypt_xts_block(int rounds, const void *key_schedule, __m128i *tweak, + const __m128i *from, __m128i *to, int do_encrypt) { - int carry; + __m128i block; - block[0] = from[0] ^ tweak[0]; - block[1] = from[1] ^ tweak[1]; + block = *from ^ *tweak; if (do_encrypt) - aesni_enc(rounds - 1, key_schedule, (uint8_t *)block, (uint8_t *)to, NULL); + block = aesni_enc(rounds - 1, key_schedule, block); else - aesni_dec(rounds - 1, key_schedule, (uint8_t *)block, (uint8_t *)to, NULL); + block = aesni_dec(rounds - 1, key_schedule, block); - to[0] ^= tweak[0]; - to[1] ^= tweak[1]; + *to = block ^ *tweak; - /* Exponentiate tweak. */ - carry = ((tweak[0] & 0x8000000000000000ULL) > 0); - tweak[0] <<= 1; - if (tweak[1] & 0x8000000000000000ULL) { - uint8_t *twk = (uint8_t *)tweak; + *tweak = xts_crank_lfsr(*tweak); +} - twk[0] ^= AES_XTS_ALPHA; - } - tweak[1] <<= 1; - if (carry) - tweak[1] |= 1; +static void +aesni_crypt_xts_block8(int rounds, const void *key_schedule, __m128i *tweak, + const __m128i *from, __m128i *to, int do_encrypt) +{ + __m128i tmptweak; + __m128i a, b, c, d, e, f, g, h; + __m128i tweaks[8]; + __m128i tmp[8]; + + tmptweak = *tweak; + + /* + * unroll the loop. This lets gcc put values directly in the + * register and saves memory accesses. + */ +#define PREPINP(v, pos) \ + do { \ + tweaks[(pos)] = tmptweak; \ + (v) = from[(pos)] ^ tmptweak; \ + tmptweak = xts_crank_lfsr(tmptweak); \ + } while (0) + PREPINP(a, 0); + PREPINP(b, 1); + PREPINP(c, 2); + PREPINP(d, 3); + PREPINP(e, 4); + PREPINP(f, 5); + PREPINP(g, 6); + PREPINP(h, 7); + *tweak = tmptweak; + + if (do_encrypt) + aesni_enc8(rounds - 1, key_schedule, a, b, c, d, e, f, g, h, + tmp); + else + aesni_dec8(rounds - 1, key_schedule, a, b, c, d, e, f, g, h, + tmp); + + to[0] = tmp[0] ^ tweaks[0]; + to[1] = tmp[1] ^ tweaks[1]; + to[2] = tmp[2] ^ tweaks[2]; + to[3] = tmp[3] ^ tweaks[3]; + to[4] = tmp[4] ^ tweaks[4]; + to[5] = tmp[5] ^ tweaks[5]; + to[6] = tmp[6] ^ tweaks[6]; + to[7] = tmp[7] ^ tweaks[7]; } static void @@ -122,9 +243,9 @@ aesni_crypt_xts(int rounds, const void *data_schedule, const void *tweak_schedule, size_t len, const uint8_t *from, uint8_t *to, const uint8_t iv[AES_BLOCK_LEN], int do_encrypt) { - uint64_t block[AES_XTS_BLOCKSIZE / 8]; - uint8_t tweak[AES_XTS_BLOCKSIZE]; - size_t i; + __m128i tweakreg; + uint8_t tweak[AES_XTS_BLOCKSIZE] __aligned(16); + size_t i, cnt; /* * Prepare tweak as E_k2(IV). IV is specified as LE representation @@ -137,21 +258,27 @@ aesni_crypt_xts(int rounds, const void *data_schedule, #else #error Only LITTLE_ENDIAN architectures are supported. #endif - aesni_enc(rounds - 1, tweak_schedule, tweak, tweak, NULL); - - len /= AES_XTS_BLOCKSIZE; - for (i = 0; i < len; i++) { - aesni_crypt_xts_block(rounds, data_schedule, (uint64_t *)tweak, - (const uint64_t *)from, (uint64_t *)to, block, do_encrypt); + tweakreg = _mm_loadu_si128((__m128i *)&tweak[0]); + tweakreg = aesni_enc(rounds - 1, tweak_schedule, tweakreg); + + cnt = len / AES_XTS_BLOCKSIZE / 8; + for (i = 0; i < cnt; i++) { + aesni_crypt_xts_block8(rounds, data_schedule, &tweakreg, + (const __m128i *)from, (__m128i *)to, do_encrypt); + from += AES_XTS_BLOCKSIZE * 8; + to += AES_XTS_BLOCKSIZE * 8; + } + i *= 8; + cnt = len / AES_XTS_BLOCKSIZE; + for (; i < cnt; i++) { + aesni_crypt_xts_block(rounds, data_schedule, &tweakreg, + (const __m128i *)from, (__m128i *)to, do_encrypt); from += AES_XTS_BLOCKSIZE; to += AES_XTS_BLOCKSIZE; } - - bzero(tweak, sizeof(tweak)); - bzero(block, sizeof(block)); } -static void +void aesni_encrypt_xts(int rounds, const void *data_schedule, const void *tweak_schedule, size_t len, const uint8_t *from, uint8_t *to, const uint8_t iv[AES_BLOCK_LEN]) @@ -161,7 +288,7 @@ aesni_encrypt_xts(int rounds, const void *data_schedule, iv, 1); } -static void +void aesni_decrypt_xts(int rounds, const void *data_schedule, const void *tweak_schedule, size_t len, const uint8_t *from, uint8_t *to, const uint8_t iv[AES_BLOCK_LEN]) |