aboutsummaryrefslogtreecommitdiff
path: root/sys/crypto/aesni
diff options
context:
space:
mode:
Diffstat (limited to 'sys/crypto/aesni')
-rw-r--r--sys/crypto/aesni/aesencdec.h136
-rw-r--r--sys/crypto/aesni/aesencdec_amd64.S135
-rw-r--r--sys/crypto/aesni/aesencdec_i386.S166
-rw-r--r--sys/crypto/aesni/aeskeys_amd64.S96
-rw-r--r--sys/crypto/aesni/aesni.c8
-rw-r--r--sys/crypto/aesni/aesni.h15
-rw-r--r--sys/crypto/aesni/aesni_wrap.c225
7 files changed, 359 insertions, 422 deletions
diff --git a/sys/crypto/aesni/aesencdec.h b/sys/crypto/aesni/aesencdec.h
new file mode 100644
index 000000000000..0c9bf5f21628
--- /dev/null
+++ b/sys/crypto/aesni/aesencdec.h
@@ -0,0 +1,136 @@
+/*-
+ * Copyright 2013 John-Mark Gurney <jmg@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ *
+ */
+
+#include <wmmintrin.h>
+
+static inline void
+aesni_enc8(int rounds, const uint8_t *key_schedule, __m128i a,
+ __m128i b, __m128i c, __m128i d, __m128i e, __m128i f, __m128i g,
+ __m128i h, __m128i out[8])
+{
+ const __m128i *keysched = (const __m128i *)key_schedule;
+ int i;
+
+ a ^= keysched[0];
+ b ^= keysched[0];
+ c ^= keysched[0];
+ d ^= keysched[0];
+ e ^= keysched[0];
+ f ^= keysched[0];
+ g ^= keysched[0];
+ h ^= keysched[0];
+
+ for (i = 0; i < rounds; i++) {
+ a = _mm_aesenc_si128(a, keysched[i + 1]);
+ b = _mm_aesenc_si128(b, keysched[i + 1]);
+ c = _mm_aesenc_si128(c, keysched[i + 1]);
+ d = _mm_aesenc_si128(d, keysched[i + 1]);
+ e = _mm_aesenc_si128(e, keysched[i + 1]);
+ f = _mm_aesenc_si128(f, keysched[i + 1]);
+ g = _mm_aesenc_si128(g, keysched[i + 1]);
+ h = _mm_aesenc_si128(h, keysched[i + 1]);
+ }
+
+ out[0] = _mm_aesenclast_si128(a, keysched[i + 1]);
+ out[1] = _mm_aesenclast_si128(b, keysched[i + 1]);
+ out[2] = _mm_aesenclast_si128(c, keysched[i + 1]);
+ out[3] = _mm_aesenclast_si128(d, keysched[i + 1]);
+ out[4] = _mm_aesenclast_si128(e, keysched[i + 1]);
+ out[5] = _mm_aesenclast_si128(f, keysched[i + 1]);
+ out[6] = _mm_aesenclast_si128(g, keysched[i + 1]);
+ out[7] = _mm_aesenclast_si128(h, keysched[i + 1]);
+}
+
+static inline void
+aesni_dec8(int rounds, const uint8_t *key_schedule, __m128i a,
+ __m128i b, __m128i c, __m128i d, __m128i e, __m128i f, __m128i g,
+ __m128i h, __m128i out[8])
+{
+ const __m128i *keysched = (const __m128i *)key_schedule;
+ int i;
+
+ a ^= keysched[0];
+ b ^= keysched[0];
+ c ^= keysched[0];
+ d ^= keysched[0];
+ e ^= keysched[0];
+ f ^= keysched[0];
+ g ^= keysched[0];
+ h ^= keysched[0];
+
+ for (i = 0; i < rounds; i++) {
+ a = _mm_aesdec_si128(a, keysched[i + 1]);
+ b = _mm_aesdec_si128(b, keysched[i + 1]);
+ c = _mm_aesdec_si128(c, keysched[i + 1]);
+ d = _mm_aesdec_si128(d, keysched[i + 1]);
+ e = _mm_aesdec_si128(e, keysched[i + 1]);
+ f = _mm_aesdec_si128(f, keysched[i + 1]);
+ g = _mm_aesdec_si128(g, keysched[i + 1]);
+ h = _mm_aesdec_si128(h, keysched[i + 1]);
+ }
+
+ out[0] = _mm_aesdeclast_si128(a, keysched[i + 1]);
+ out[1] = _mm_aesdeclast_si128(b, keysched[i + 1]);
+ out[2] = _mm_aesdeclast_si128(c, keysched[i + 1]);
+ out[3] = _mm_aesdeclast_si128(d, keysched[i + 1]);
+ out[4] = _mm_aesdeclast_si128(e, keysched[i + 1]);
+ out[5] = _mm_aesdeclast_si128(f, keysched[i + 1]);
+ out[6] = _mm_aesdeclast_si128(g, keysched[i + 1]);
+ out[7] = _mm_aesdeclast_si128(h, keysched[i + 1]);
+}
+
+static inline __m128i
+aesni_enc(int rounds, const uint8_t *key_schedule, const __m128i from)
+{
+ __m128i tmp;
+ const __m128i *keysched = (const __m128i *)key_schedule;
+ int i;
+
+ tmp = from ^ keysched[0];
+
+ for (i = 0; i < rounds; i++)
+ tmp = _mm_aesenc_si128(tmp, keysched[i + 1]);
+
+ return _mm_aesenclast_si128(tmp, keysched[i + 1]);
+}
+
+static inline __m128i
+aesni_dec(int rounds, const uint8_t *key_schedule, const __m128i from)
+{
+ __m128i tmp;
+ const __m128i *keysched = (const __m128i *)key_schedule;
+ int i;
+
+ tmp = from ^ keysched[0];
+
+ for (i = 0; i < rounds; i++)
+ tmp = _mm_aesdec_si128(tmp, keysched[i + 1]);
+
+ return _mm_aesdeclast_si128(tmp, keysched[i + 1]);
+}
diff --git a/sys/crypto/aesni/aesencdec_amd64.S b/sys/crypto/aesni/aesencdec_amd64.S
deleted file mode 100644
index f77918b8e9ad..000000000000
--- a/sys/crypto/aesni/aesencdec_amd64.S
+++ /dev/null
@@ -1,135 +0,0 @@
-/*-
- * Copyright (c) 2010 Konstantin Belousov <kib@FreeBSD.org>
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
-#include <machine/asmacros.h>
-
- .text
-
-ENTRY(aesni_enc)
- .cfi_startproc
- movdqu (%rdx),%xmm0
- cmpq $0,%r8
- je 1f
- movdqu (%r8),%xmm1 /* unaligned load into reg */
- pxor %xmm1,%xmm0 /* pxor otherwise can fault on iv */
-1:
- pxor (%rsi),%xmm0
-2:
- addq $0x10,%rsi
-// aesenc (%rsi),%xmm0
- .byte 0x66,0x0f,0x38,0xdc,0x06
- decl %edi
- jne 2b
- addq $0x10,%rsi
-// aesenclast (%rsi),%xmm0
- .byte 0x66,0x0f,0x38,0xdd,0x06
- movdqu %xmm0,(%rcx)
- retq
- .cfi_endproc
-END(aesni_enc)
-
-ENTRY(aesni_dec)
- .cfi_startproc
- movdqu (%rdx),%xmm0
- pxor (%rsi),%xmm0
-1:
- addq $0x10,%rsi
-// aesdec (%rsi),%xmm0
- .byte 0x66,0x0f,0x38,0xde,0x06
- decl %edi
- jne 1b
- addq $0x10,%rsi
-// aesdeclast (%rsi),%xmm0
- .byte 0x66,0x0f,0x38,0xdf,0x06
- cmpq $0,%r8
- je 2f
- movdqu (%r8),%xmm1
- pxor %xmm1,%xmm0
-2:
- movdqu %xmm0,(%rcx)
- retq
- .cfi_endproc
-END(aesni_dec)
-
-ENTRY(aesni_decrypt_cbc)
- .cfi_startproc
- shrq $4,%rdx
- movdqu (%r8),%xmm1
-1:
- movdqu (%rcx),%xmm0
- movdqa %xmm0,%xmm2
- pxor (%rsi),%xmm0
- cmpl $12,%edi
-// aesdec 0x10(%rsi),%xmm0
- .byte 0x66,0x0f,0x38,0xde,0x46,0x10
-// aesdec 0x20(%rsi),%xmm0
- .byte 0x66,0x0f,0x38,0xde,0x46,0x20
-// aesdec 0x30(%rsi),%xmm0
- .byte 0x66,0x0f,0x38,0xde,0x46,0x30
-// aesdec 0x40(%rsi),%xmm0
- .byte 0x66,0x0f,0x38,0xde,0x46,0x40
-// aesdec 0x50(%rsi),%xmm0
- .byte 0x66,0x0f,0x38,0xde,0x46,0x50
-// aesdec 0x60(%rsi),%xmm0
- .byte 0x66,0x0f,0x38,0xde,0x46,0x60
-// aesdec 0x70(%rsi),%xmm0
- .byte 0x66,0x0f,0x38,0xde,0x46,0x70
-// aesdec 0x80(%rsi),%xmm0
- .byte 0x66,0x0f,0x38,0xde,0x86,0x80,0x00,0x00,0x00
-// aesdec 0x90(%rsi),%xmm0
- .byte 0x66,0x0f,0x38,0xde,0x86,0x90,0x00,0x00,0x00
- jge 2f
-// aesdeclast 0xa0(%rsi),%xmm0
- .byte 0x66,0x0f,0x38,0xdf,0x86,0xa0,0x00,0x00,0x00
- jmp 4f
-2:
-// aesdec 0xa0(%rsi),%xmm0
- .byte 0x66,0x0f,0x38,0xde,0x86,0xa0,0x00,0x00,0x00
-// aesdec 0xb0(%rsi),%xmm0
- .byte 0x66,0x0f,0x38,0xde,0x86,0xb0,0x00,0x00,0x00
- jg 3f
-// aesdeclast 0xc0(%rsi),%xmm0
- .byte 0x66,0x0f,0x38,0xdf,0x86,0xc0,0x00,0x00,0x00
- jmp 4f
-3:
-// aesdec 0xc0(%rsi),%xmm0
- .byte 0x66,0x0f,0x38,0xde,0x86,0xc0,0x00,0x00,0x00
-// aesdec 0xd0(%rsi),%xmm0
- .byte 0x66,0x0f,0x38,0xde,0x86,0xd0,0x00,0x00,0x00
-// aesdeclast 0xe0(%rsi),%xmm0
- .byte 0x66,0x0f,0x38,0xdf,0x86,0xe0,0x00,0x00,0x00
-4:
- pxor %xmm1,%xmm0
- movdqu %xmm0,(%rcx)
- movdqa %xmm2,%xmm1 // iv
- addq $0x10,%rcx
- decq %rdx
- jne 1b
- retq
- .cfi_endproc
-END(aesni_decrypt_cbc)
-
- .ident "$FreeBSD$"
diff --git a/sys/crypto/aesni/aesencdec_i386.S b/sys/crypto/aesni/aesencdec_i386.S
deleted file mode 100644
index 78de311f23c9..000000000000
--- a/sys/crypto/aesni/aesencdec_i386.S
+++ /dev/null
@@ -1,166 +0,0 @@
-/*-
- * Copyright (c) 2010 Konstantin Belousov <kib@FreeBSD.org>
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
-#include <machine/asmacros.h>
-
-ENTRY(aesni_enc)
- .cfi_startproc
- pushl %ebp
- .cfi_adjust_cfa_offset 4
- movl %esp,%ebp
- movl 8(%ebp),%ecx /* rounds */
- movl 16(%ebp),%edx
- movdqu (%edx),%xmm0 /* from */
- movl 24(%ebp),%eax /* iv */
- cmpl $0,%eax
- je 1f
- movdqu (%eax),%xmm1
- pxor %xmm1,%xmm0
-1:
- movl 12(%ebp),%eax /* key */
- pxor (%eax),%xmm0
-2:
- addl $0x10,%eax
-// aesenc (%eax),%xmm0
- .byte 0x66,0x0f,0x38,0xdc,0x00
- loopne 2b
- addl $0x10,%eax
-// aesenclast (%eax),%xmm0
- .byte 0x66,0x0f,0x38,0xdd,0x00
- movl 20(%ebp),%eax
- movdqu %xmm0,(%eax) /* to */
- leave
- .cfi_adjust_cfa_offset -4
- retl
- .cfi_endproc
-END(aesni_enc)
-
-ENTRY(aesni_dec)
- .cfi_startproc
- pushl %ebp
- .cfi_adjust_cfa_offset 4
- movl %esp,%ebp
- movl 8(%ebp),%ecx /* rounds */
- movl 16(%ebp),%edx
- movdqu (%edx),%xmm0 /* from */
- movl 12(%ebp),%eax /* key */
- pxor (%eax),%xmm0
-1:
- addl $0x10,%eax
-// aesdec (%eax),%xmm0
- .byte 0x66,0x0f,0x38,0xde,0x00
- loopne 1b
- addl $0x10,%eax
-// aesdeclast (%eax),%xmm0
- .byte 0x66,0x0f,0x38,0xdf,0x00
- movl 24(%ebp),%eax
- cmpl $0,%eax /* iv */
- je 2f
- movdqu (%eax),%xmm1
- pxor %xmm1,%xmm0
-2:
- movl 20(%ebp),%eax
- movdqu %xmm0,(%eax) /* to */
- leave
- .cfi_adjust_cfa_offset -4
- retl
- .cfi_endproc
-END(aesni_dec)
-
-ENTRY(aesni_decrypt_cbc)
- .cfi_startproc
- pushl %ebp
- .cfi_adjust_cfa_offset 4
- movl %esp,%ebp
- pushl %ebx
- pushl %esi
- movl 12(%ebp),%eax /* key */
- movl 16(%ebp),%ecx /* length */
- shrl $4,%ecx
- movl 20(%ebp),%ebx /* buf */
- movl 24(%ebp),%esi
- movdqu (%esi),%xmm1 /* iv */
- movl 8(%ebp),%esi /* rounds */
-1:
- movdqu (%ebx),%xmm0
- movdqa %xmm0,%xmm2
- pxor (%eax),%xmm0
- cmpl $12,%esi
-// aesdec 0x10(%eax),%xmm0
- .byte 0x66,0x0f,0x38,0xde,0x40,0x10
-// aesdec 0x20(%eax),%xmm0
- .byte 0x66,0x0f,0x38,0xde,0x40,0x20
-// aesdec 0x30(%eax),%xmm0
- .byte 0x66,0x0f,0x38,0xde,0x40,0x30
-// aesdec 0x40(%eax),%xmm0
- .byte 0x66,0x0f,0x38,0xde,0x40,0x40
-// aesdec 0x50(%eax),%xmm0
- .byte 0x66,0x0f,0x38,0xde,0x40,0x50
-// aesdec 0x60(%eax),%xmm0
- .byte 0x66,0x0f,0x38,0xde,0x40,0x60
-// aesdec 0x70(%eax),%xmm0
- .byte 0x66,0x0f,0x38,0xde,0x40,0x70
-// aesdec 0x80(%eax),%xmm0
- .byte 0x66,0x0f,0x38,0xde,0x80,0x80,0x00,0x00,0x00
-// aesdec 0x90(%eax),%xmm0
- .byte 0x66,0x0f,0x38,0xde,0x80,0x90,0x00,0x00,0x00
- jge 2f
-// aesdeclast 0xa0(%eax),%xmm0
- .byte 0x66,0x0f,0x38,0xdf,0x80,0xa0,0x00,0x00,0x00
- jmp 4f
-2:
-// aesdec 0xa0(%eax),%xmm0
- .byte 0x66,0x0f,0x38,0xde,0x80,0xa0,0x00,0x00,0x00
-// aesdec 0xb0(%eax),%xmm0
- .byte 0x66,0x0f,0x38,0xde,0x80,0xb0,0x00,0x00,0x00
- jg 3f
-// aesdeclast 0xc0(%eax),%xmm0
- .byte 0x66,0x0f,0x38,0xdf,0x80,0xc0,0x00,0x00,0x00
- jmp 4f
-3:
-// aesdec 0xc0(%eax),%xmm0
- .byte 0x66,0x0f,0x38,0xde,0x80,0xc0,0x00,0x00,0x00
-// aesdec 0xd0(%eax),%xmm0
- .byte 0x66,0x0f,0x38,0xde,0x80,0xd0,0x00,0x00,0x00
-// aesdeclast 0xe0(%eax),%xmm0
- .byte 0x66,0x0f,0x38,0xdf,0x80,0xe0,0x00,0x00,0x00
-4:
- pxor %xmm1,%xmm0
- movdqu %xmm0,(%ebx)
- movdqa %xmm2,%xmm1
- addl $0x10,%ebx
- decl %ecx
- jne 1b
-
- popl %esi
- popl %ebx
- leave
- .cfi_adjust_cfa_offset -4
- retl
- .cfi_endproc
-END(aesni_decrypt_cbc)
-
- .ident "$FreeBSD$"
diff --git a/sys/crypto/aesni/aeskeys_amd64.S b/sys/crypto/aesni/aeskeys_amd64.S
index 23a4d3dc1fba..9b3e98c8437e 100644
--- a/sys/crypto/aesni/aeskeys_amd64.S
+++ b/sys/crypto/aesni/aeskeys_amd64.S
@@ -125,103 +125,72 @@ ENTRY(aesni_set_enckey)
movups 0x10(%rdi),%xmm2 # other user key
movaps %xmm2,(%rsi)
addq $0x10,%rsi
-// aeskeygenassist $0x1,%xmm2,%xmm1 # round 1
- .byte 0x66,0x0f,0x3a,0xdf,0xca,0x01
+ aeskeygenassist $0x1,%xmm2,%xmm1 # round 1
call _key_expansion_256a
-// aeskeygenassist $0x1,%xmm0,%xmm1
- .byte 0x66,0x0f,0x3a,0xdf,0xc8,0x01
+ aeskeygenassist $0x1,%xmm0,%xmm1
call _key_expansion_256b
-// aeskeygenassist $0x2,%xmm2,%xmm1 # round 2
- .byte 0x66,0x0f,0x3a,0xdf,0xca,0x02
+ aeskeygenassist $0x2,%xmm2,%xmm1 # round 2
call _key_expansion_256a
-// aeskeygenassist $0x2,%xmm0,%xmm1
- .byte 0x66,0x0f,0x3a,0xdf,0xc8,0x02
+ aeskeygenassist $0x2,%xmm0,%xmm1
call _key_expansion_256b
-// aeskeygenassist $0x4,%xmm2,%xmm1 # round 3
- .byte 0x66,0x0f,0x3a,0xdf,0xca,0x04
+ aeskeygenassist $0x4,%xmm2,%xmm1 # round 3
call _key_expansion_256a
-// aeskeygenassist $0x4,%xmm0,%xmm1
- .byte 0x66,0x0f,0x3a,0xdf,0xc8,0x04
+ aeskeygenassist $0x4,%xmm0,%xmm1
call _key_expansion_256b
-// aeskeygenassist $0x8,%xmm2,%xmm1 # round 4
- .byte 0x66,0x0f,0x3a,0xdf,0xca,0x08
+ aeskeygenassist $0x8,%xmm2,%xmm1 # round 4
call _key_expansion_256a
-// aeskeygenassist $0x8,%xmm0,%xmm1
- .byte 0x66,0x0f,0x3a,0xdf,0xc8,0x08
+ aeskeygenassist $0x8,%xmm0,%xmm1
call _key_expansion_256b
-// aeskeygenassist $0x10,%xmm2,%xmm1 # round 5
- .byte 0x66,0x0f,0x3a,0xdf,0xca,0x10
+ aeskeygenassist $0x10,%xmm2,%xmm1 # round 5
call _key_expansion_256a
-// aeskeygenassist $0x10,%xmm0,%xmm1
- .byte 0x66,0x0f,0x3a,0xdf,0xc8,0x10
+ aeskeygenassist $0x10,%xmm0,%xmm1
call _key_expansion_256b
-// aeskeygenassist $0x20,%xmm2,%xmm1 # round 6
- .byte 0x66,0x0f,0x3a,0xdf,0xca,0x20
+ aeskeygenassist $0x20,%xmm2,%xmm1 # round 6
call _key_expansion_256a
-// aeskeygenassist $0x20,%xmm0,%xmm1
- .byte 0x66,0x0f,0x3a,0xdf,0xc8,0x20
+ aeskeygenassist $0x20,%xmm0,%xmm1
call _key_expansion_256b
-// aeskeygenassist $0x40,%xmm2,%xmm1 # round 7
- .byte 0x66,0x0f,0x3a,0xdf,0xca,0x40
+ aeskeygenassist $0x40,%xmm2,%xmm1 # round 7
call _key_expansion_256a
retq
.Lenc_key192:
movq 0x10(%rdi),%xmm2 # other user key
-// aeskeygenassist $0x1,%xmm2,%xmm1 # round 1
- .byte 0x66,0x0f,0x3a,0xdf,0xca,0x01
+ aeskeygenassist $0x1,%xmm2,%xmm1 # round 1
call _key_expansion_192a
-// aeskeygenassist $0x2,%xmm2,%xmm1 # round 2
- .byte 0x66,0x0f,0x3a,0xdf,0xca,0x02
+ aeskeygenassist $0x2,%xmm2,%xmm1 # round 2
call _key_expansion_192b
-// aeskeygenassist $0x4,%xmm2,%xmm1 # round 3
- .byte 0x66,0x0f,0x3a,0xdf,0xca,0x04
+ aeskeygenassist $0x4,%xmm2,%xmm1 # round 3
call _key_expansion_192a
-// aeskeygenassist $0x8,%xmm2,%xmm1 # round 4
- .byte 0x66,0x0f,0x3a,0xdf,0xca,0x08
+ aeskeygenassist $0x8,%xmm2,%xmm1 # round 4
call _key_expansion_192b
-// aeskeygenassist $0x10,%xmm2,%xmm1 # round 5
- .byte 0x66,0x0f,0x3a,0xdf,0xca,0x10
+ aeskeygenassist $0x10,%xmm2,%xmm1 # round 5
call _key_expansion_192a
-// aeskeygenassist $0x20,%xmm2,%xmm1 # round 6
- .byte 0x66,0x0f,0x3a,0xdf,0xca,0x20
+ aeskeygenassist $0x20,%xmm2,%xmm1 # round 6
call _key_expansion_192b
-// aeskeygenassist $0x40,%xmm2,%xmm1 # round 7
- .byte 0x66,0x0f,0x3a,0xdf,0xca,0x40
+ aeskeygenassist $0x40,%xmm2,%xmm1 # round 7
call _key_expansion_192a
-// aeskeygenassist $0x80,%xmm2,%xmm1 # round 8
- .byte 0x66,0x0f,0x3a,0xdf,0xca,0x80
+ aeskeygenassist $0x80,%xmm2,%xmm1 # round 8
call _key_expansion_192b
retq
.Lenc_key128:
-// aeskeygenassist $0x1,%xmm0,%xmm1 # round 1
- .byte 0x66,0x0f,0x3a,0xdf,0xc8,0x01
+ aeskeygenassist $0x1,%xmm0,%xmm1 # round 1
call _key_expansion_128
-// aeskeygenassist $0x2,%xmm0,%xmm1 # round 2
- .byte 0x66,0x0f,0x3a,0xdf,0xc8,0x02
+ aeskeygenassist $0x2,%xmm0,%xmm1 # round 2
call _key_expansion_128
-// aeskeygenassist $0x4,%xmm0,%xmm1 # round 3
- .byte 0x66,0x0f,0x3a,0xdf,0xc8,0x04
+ aeskeygenassist $0x4,%xmm0,%xmm1 # round 3
call _key_expansion_128
-// aeskeygenassist $0x8,%xmm0,%xmm1 # round 4
- .byte 0x66,0x0f,0x3a,0xdf,0xc8,0x08
+ aeskeygenassist $0x8,%xmm0,%xmm1 # round 4
call _key_expansion_128
-// aeskeygenassist $0x10,%xmm0,%xmm1 # round 5
- .byte 0x66,0x0f,0x3a,0xdf,0xc8,0x10
+ aeskeygenassist $0x10,%xmm0,%xmm1 # round 5
call _key_expansion_128
-// aeskeygenassist $0x20,%xmm0,%xmm1 # round 6
- .byte 0x66,0x0f,0x3a,0xdf,0xc8,0x20
+ aeskeygenassist $0x20,%xmm0,%xmm1 # round 6
call _key_expansion_128
-// aeskeygenassist $0x40,%xmm0,%xmm1 # round 7
- .byte 0x66,0x0f,0x3a,0xdf,0xc8,0x40
+ aeskeygenassist $0x40,%xmm0,%xmm1 # round 7
call _key_expansion_128
-// aeskeygenassist $0x80,%xmm0,%xmm1 # round 8
- .byte 0x66,0x0f,0x3a,0xdf,0xc8,0x80
+ aeskeygenassist $0x80,%xmm0,%xmm1 # round 8
call _key_expansion_128
-// aeskeygenassist $0x1b,%xmm0,%xmm1 # round 9
- .byte 0x66,0x0f,0x3a,0xdf,0xc8,0x1b
+ aeskeygenassist $0x1b,%xmm0,%xmm1 # round 9
call _key_expansion_128
-// aeskeygenassist $0x36,%xmm0,%xmm1 # round 10
- .byte 0x66,0x0f,0x3a,0xdf,0xc8,0x36
+ aeskeygenassist $0x36,%xmm0,%xmm1 # round 10
call _key_expansion_128
retq
.cfi_endproc
@@ -238,8 +207,7 @@ ENTRY(aesni_set_deckey)
1:
addq $0x10,%rsi
subq $0x10,%rdi
-// aesimc (%rdi),%xmm1
- .byte 0x66,0x0f,0x38,0xdb,0x0f
+ aesimc (%rdi),%xmm1
movdqa %xmm1,(%rsi)
decl %edx
jne 1b
diff --git a/sys/crypto/aesni/aesni.c b/sys/crypto/aesni/aesni.c
index ca00a578cad5..73eb28a16e36 100644
--- a/sys/crypto/aesni/aesni.c
+++ b/sys/crypto/aesni/aesni.c
@@ -40,7 +40,7 @@ __FBSDID("$FreeBSD$");
#include <sys/bus.h>
#include <sys/uio.h>
#include <crypto/aesni/aesni.h>
-#include "cryptodev_if.h"
+#include <cryptodev_if.h>
struct aesni_softc {
int32_t cid;
@@ -74,6 +74,12 @@ aesni_probe(device_t dev)
device_printf(dev, "No AESNI support.\n");
return (EINVAL);
}
+
+ if ((cpu_feature & CPUID_SSE2) == 0) {
+ device_printf(dev, "No SSE2 support but AESNI!?!\n");
+ return (EINVAL);
+ }
+
device_set_desc_copy(dev, "AES-CBC,AES-XTS");
return (0);
}
diff --git a/sys/crypto/aesni/aesni.h b/sys/crypto/aesni/aesni.h
index 78255b779a38..17ca9c5bf7f4 100644
--- a/sys/crypto/aesni/aesni.h
+++ b/sys/crypto/aesni/aesni.h
@@ -71,12 +71,6 @@ struct aesni_session {
/*
* Internal functions, implemented in assembler.
*/
-void aesni_enc(int rounds, const uint8_t *key_schedule,
- const uint8_t from[AES_BLOCK_LEN], uint8_t to[AES_BLOCK_LEN],
- const uint8_t iv[AES_BLOCK_LEN]);
-void aesni_dec(int rounds, const uint8_t *key_schedule,
- const uint8_t from[AES_BLOCK_LEN], uint8_t to[AES_BLOCK_LEN],
- const uint8_t iv[AES_BLOCK_LEN]);
void aesni_set_enckey(const uint8_t *userkey, uint8_t *encrypt_schedule,
int number_of_rounds);
void aesni_set_deckey(const uint8_t *encrypt_schedule,
@@ -88,12 +82,19 @@ void aesni_set_deckey(const uint8_t *encrypt_schedule,
void aesni_encrypt_cbc(int rounds, const void *key_schedule, size_t len,
const uint8_t *from, uint8_t *to, const uint8_t iv[AES_BLOCK_LEN]);
void aesni_decrypt_cbc(int rounds, const void *key_schedule, size_t len,
- const uint8_t *from, const uint8_t iv[AES_BLOCK_LEN]);
+ uint8_t *buf, const uint8_t iv[AES_BLOCK_LEN]);
void aesni_encrypt_ecb(int rounds, const void *key_schedule, size_t len,
const uint8_t from[AES_BLOCK_LEN], uint8_t to[AES_BLOCK_LEN]);
void aesni_decrypt_ecb(int rounds, const void *key_schedule, size_t len,
const uint8_t from[AES_BLOCK_LEN], uint8_t to[AES_BLOCK_LEN]);
+void aesni_encrypt_xts(int rounds, const void *data_schedule,
+ const void *tweak_schedule, size_t len, const uint8_t *from, uint8_t *to,
+ const uint8_t iv[AES_BLOCK_LEN]);
+void aesni_decrypt_xts(int rounds, const void *data_schedule,
+ const void *tweak_schedule, size_t len, const uint8_t *from, uint8_t *to,
+ const uint8_t iv[AES_BLOCK_LEN]);
+
int aesni_cipher_setup(struct aesni_session *ses,
struct cryptoini *encini);
int aesni_cipher_process(struct aesni_session *ses,
diff --git a/sys/crypto/aesni/aesni_wrap.c b/sys/crypto/aesni/aesni_wrap.c
index 3340b1fcb6c0..197baf7f0778 100644
--- a/sys/crypto/aesni/aesni_wrap.c
+++ b/sys/crypto/aesni/aesni_wrap.c
@@ -2,6 +2,7 @@
* Copyright (C) 2008 Damien Miller <djm@mindrot.org>
* Copyright (c) 2010 Konstantin Belousov <kib@FreeBSD.org>
* Copyright (c) 2010-2011 Pawel Jakub Dawidek <pawel@dawidek.net>
+ * Copyright 2012-2013 John-Mark Gurney <jmg@FreeBSD.org>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -28,13 +29,15 @@
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
-
+
#include <sys/param.h>
#include <sys/libkern.h>
#include <sys/malloc.h>
#include <sys/proc.h>
#include <sys/systm.h>
#include <crypto/aesni/aesni.h>
+
+#include "aesencdec.h"
MALLOC_DECLARE(M_AESNI);
@@ -42,28 +45,78 @@ void
aesni_encrypt_cbc(int rounds, const void *key_schedule, size_t len,
const uint8_t *from, uint8_t *to, const uint8_t iv[AES_BLOCK_LEN])
{
- const uint8_t *ivp;
+ __m128i tot, ivreg;
size_t i;
len /= AES_BLOCK_LEN;
- ivp = iv;
+ ivreg = _mm_loadu_si128((const __m128i *)iv);
for (i = 0; i < len; i++) {
- aesni_enc(rounds - 1, key_schedule, from, to, ivp);
- ivp = to;
+ tot = aesni_enc(rounds - 1, key_schedule,
+ _mm_loadu_si128((const __m128i *)from) ^ ivreg);
+ ivreg = tot;
+ _mm_storeu_si128((__m128i *)to, tot);
from += AES_BLOCK_LEN;
to += AES_BLOCK_LEN;
}
}
void
-aesni_encrypt_ecb(int rounds, const void *key_schedule, size_t len,
- const uint8_t from[AES_BLOCK_LEN], uint8_t to[AES_BLOCK_LEN])
+aesni_decrypt_cbc(int rounds, const void *key_schedule, size_t len,
+ uint8_t *buf, const uint8_t iv[AES_BLOCK_LEN])
{
- size_t i;
+ __m128i blocks[8];
+ __m128i *bufs;
+ __m128i ivreg, nextiv;
+ size_t i, j, cnt;
+
+ ivreg = _mm_loadu_si128((const __m128i *)iv);
+ cnt = len / AES_BLOCK_LEN / 8;
+ for (i = 0; i < cnt; i++) {
+ bufs = (__m128i *)buf;
+ aesni_dec8(rounds - 1, key_schedule, bufs[0], bufs[1],
+ bufs[2], bufs[3], bufs[4], bufs[5], bufs[6],
+ bufs[7], &blocks[0]);
+ for (j = 0; j < 8; j++) {
+ nextiv = bufs[j];
+ bufs[j] = blocks[j] ^ ivreg;
+ ivreg = nextiv;
+ }
+ buf += AES_BLOCK_LEN * 8;
+ }
+ i *= 8;
+ cnt = len / AES_BLOCK_LEN;
+ for (; i < cnt; i++) {
+ bufs = (__m128i *)buf;
+ nextiv = bufs[0];
+ bufs[0] = aesni_dec(rounds - 1, key_schedule, bufs[0]) ^ ivreg;
+ ivreg = nextiv;
+ buf += AES_BLOCK_LEN;
+ }
+}
- len /= AES_BLOCK_LEN;
- for (i = 0; i < len; i++) {
- aesni_enc(rounds - 1, key_schedule, from, to, NULL);
+void
+aesni_encrypt_ecb(int rounds, const void *key_schedule, size_t len,
+ const uint8_t *from, uint8_t *to)
+{
+ __m128i tot;
+ const __m128i *blocks;
+ size_t i, cnt;
+
+ cnt = len / AES_BLOCK_LEN / 8;
+ for (i = 0; i < cnt; i++) {
+ blocks = (const __m128i *)from;
+ aesni_enc8(rounds - 1, key_schedule, blocks[0], blocks[1],
+ blocks[2], blocks[3], blocks[4], blocks[5], blocks[6],
+ blocks[7], (__m128i *)to);
+ from += AES_BLOCK_LEN * 8;
+ to += AES_BLOCK_LEN * 8;
+ }
+ i *= 8;
+ cnt = len / AES_BLOCK_LEN;
+ for (; i < cnt; i++) {
+ tot = aesni_enc(rounds - 1, key_schedule,
+ _mm_loadu_si128((const __m128i *)from));
+ _mm_storeu_si128((__m128i *)to, tot);
from += AES_BLOCK_LEN;
to += AES_BLOCK_LEN;
}
@@ -73,11 +126,25 @@ void
aesni_decrypt_ecb(int rounds, const void *key_schedule, size_t len,
const uint8_t from[AES_BLOCK_LEN], uint8_t to[AES_BLOCK_LEN])
{
- size_t i;
-
- len /= AES_BLOCK_LEN;
- for (i = 0; i < len; i++) {
- aesni_dec(rounds - 1, key_schedule, from, to, NULL);
+ __m128i tot;
+ const __m128i *blocks;
+ size_t i, cnt;
+
+ cnt = len / AES_BLOCK_LEN / 8;
+ for (i = 0; i < cnt; i++) {
+ blocks = (const __m128i *)from;
+ aesni_dec8(rounds - 1, key_schedule, blocks[0], blocks[1],
+ blocks[2], blocks[3], blocks[4], blocks[5], blocks[6],
+ blocks[7], (__m128i *)to);
+ from += AES_BLOCK_LEN * 8;
+ to += AES_BLOCK_LEN * 8;
+ }
+ i *= 8;
+ cnt = len / AES_BLOCK_LEN;
+ for (; i < cnt; i++) {
+ tot = aesni_dec(rounds - 1, key_schedule,
+ _mm_loadu_si128((const __m128i *)from));
+ _mm_storeu_si128((__m128i *)to, tot);
from += AES_BLOCK_LEN;
to += AES_BLOCK_LEN;
}
@@ -87,34 +154,88 @@ aesni_decrypt_ecb(int rounds, const void *key_schedule, size_t len,
#define AES_XTS_IVSIZE 8
#define AES_XTS_ALPHA 0x87 /* GF(2^128) generator polynomial */
+static inline __m128i
+xts_crank_lfsr(__m128i inp)
+{
+ const __m128i alphamask = _mm_set_epi32(1, 1, 1, AES_XTS_ALPHA);
+ __m128i xtweak, ret;
+
+ /* set up xor mask */
+ xtweak = _mm_shuffle_epi32(inp, 0x93);
+ xtweak = _mm_srai_epi32(xtweak, 31);
+ xtweak &= alphamask;
+
+ /* next term */
+ ret = _mm_slli_epi32(inp, 1);
+ ret ^= xtweak;
+
+ return ret;
+}
+
static void
-aesni_crypt_xts_block(int rounds, const void *key_schedule, uint64_t *tweak,
- const uint64_t *from, uint64_t *to, uint64_t *block, int do_encrypt)
+aesni_crypt_xts_block(int rounds, const void *key_schedule, __m128i *tweak,
+ const __m128i *from, __m128i *to, int do_encrypt)
{
- int carry;
+ __m128i block;
- block[0] = from[0] ^ tweak[0];
- block[1] = from[1] ^ tweak[1];
+ block = *from ^ *tweak;
if (do_encrypt)
- aesni_enc(rounds - 1, key_schedule, (uint8_t *)block, (uint8_t *)to, NULL);
+ block = aesni_enc(rounds - 1, key_schedule, block);
else
- aesni_dec(rounds - 1, key_schedule, (uint8_t *)block, (uint8_t *)to, NULL);
+ block = aesni_dec(rounds - 1, key_schedule, block);
- to[0] ^= tweak[0];
- to[1] ^= tweak[1];
+ *to = block ^ *tweak;
- /* Exponentiate tweak. */
- carry = ((tweak[0] & 0x8000000000000000ULL) > 0);
- tweak[0] <<= 1;
- if (tweak[1] & 0x8000000000000000ULL) {
- uint8_t *twk = (uint8_t *)tweak;
+ *tweak = xts_crank_lfsr(*tweak);
+}
- twk[0] ^= AES_XTS_ALPHA;
- }
- tweak[1] <<= 1;
- if (carry)
- tweak[1] |= 1;
+static void
+aesni_crypt_xts_block8(int rounds, const void *key_schedule, __m128i *tweak,
+ const __m128i *from, __m128i *to, int do_encrypt)
+{
+ __m128i tmptweak;
+ __m128i a, b, c, d, e, f, g, h;
+ __m128i tweaks[8];
+ __m128i tmp[8];
+
+ tmptweak = *tweak;
+
+ /*
+ * unroll the loop. This lets gcc put values directly in the
+ * register and saves memory accesses.
+ */
+#define PREPINP(v, pos) \
+ do { \
+ tweaks[(pos)] = tmptweak; \
+ (v) = from[(pos)] ^ tmptweak; \
+ tmptweak = xts_crank_lfsr(tmptweak); \
+ } while (0)
+ PREPINP(a, 0);
+ PREPINP(b, 1);
+ PREPINP(c, 2);
+ PREPINP(d, 3);
+ PREPINP(e, 4);
+ PREPINP(f, 5);
+ PREPINP(g, 6);
+ PREPINP(h, 7);
+ *tweak = tmptweak;
+
+ if (do_encrypt)
+ aesni_enc8(rounds - 1, key_schedule, a, b, c, d, e, f, g, h,
+ tmp);
+ else
+ aesni_dec8(rounds - 1, key_schedule, a, b, c, d, e, f, g, h,
+ tmp);
+
+ to[0] = tmp[0] ^ tweaks[0];
+ to[1] = tmp[1] ^ tweaks[1];
+ to[2] = tmp[2] ^ tweaks[2];
+ to[3] = tmp[3] ^ tweaks[3];
+ to[4] = tmp[4] ^ tweaks[4];
+ to[5] = tmp[5] ^ tweaks[5];
+ to[6] = tmp[6] ^ tweaks[6];
+ to[7] = tmp[7] ^ tweaks[7];
}
static void
@@ -122,9 +243,9 @@ aesni_crypt_xts(int rounds, const void *data_schedule,
const void *tweak_schedule, size_t len, const uint8_t *from, uint8_t *to,
const uint8_t iv[AES_BLOCK_LEN], int do_encrypt)
{
- uint64_t block[AES_XTS_BLOCKSIZE / 8];
- uint8_t tweak[AES_XTS_BLOCKSIZE];
- size_t i;
+ __m128i tweakreg;
+ uint8_t tweak[AES_XTS_BLOCKSIZE] __aligned(16);
+ size_t i, cnt;
/*
* Prepare tweak as E_k2(IV). IV is specified as LE representation
@@ -137,21 +258,27 @@ aesni_crypt_xts(int rounds, const void *data_schedule,
#else
#error Only LITTLE_ENDIAN architectures are supported.
#endif
- aesni_enc(rounds - 1, tweak_schedule, tweak, tweak, NULL);
-
- len /= AES_XTS_BLOCKSIZE;
- for (i = 0; i < len; i++) {
- aesni_crypt_xts_block(rounds, data_schedule, (uint64_t *)tweak,
- (const uint64_t *)from, (uint64_t *)to, block, do_encrypt);
+ tweakreg = _mm_loadu_si128((__m128i *)&tweak[0]);
+ tweakreg = aesni_enc(rounds - 1, tweak_schedule, tweakreg);
+
+ cnt = len / AES_XTS_BLOCKSIZE / 8;
+ for (i = 0; i < cnt; i++) {
+ aesni_crypt_xts_block8(rounds, data_schedule, &tweakreg,
+ (const __m128i *)from, (__m128i *)to, do_encrypt);
+ from += AES_XTS_BLOCKSIZE * 8;
+ to += AES_XTS_BLOCKSIZE * 8;
+ }
+ i *= 8;
+ cnt = len / AES_XTS_BLOCKSIZE;
+ for (; i < cnt; i++) {
+ aesni_crypt_xts_block(rounds, data_schedule, &tweakreg,
+ (const __m128i *)from, (__m128i *)to, do_encrypt);
from += AES_XTS_BLOCKSIZE;
to += AES_XTS_BLOCKSIZE;
}
-
- bzero(tweak, sizeof(tweak));
- bzero(block, sizeof(block));
}
-static void
+void
aesni_encrypt_xts(int rounds, const void *data_schedule,
const void *tweak_schedule, size_t len, const uint8_t *from, uint8_t *to,
const uint8_t iv[AES_BLOCK_LEN])
@@ -161,7 +288,7 @@ aesni_encrypt_xts(int rounds, const void *data_schedule,
iv, 1);
}
-static void
+void
aesni_decrypt_xts(int rounds, const void *data_schedule,
const void *tweak_schedule, size_t len, const uint8_t *from, uint8_t *to,
const uint8_t iv[AES_BLOCK_LEN])