aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRobert Clausecker <fuz@FreeBSD.org>2023-10-15 19:25:53 +0000
committerRobert Clausecker <fuz@FreeBSD.org>2023-12-28 17:02:41 +0000
commit9a6a587e672baaed3470f6cf4a27a0d1166ca372 (patch)
tree4c07d9a2a6227a48470c9918dd29e1a79c6308e3
parent018743523a18d118bc8b28415706e18822f8c4a2 (diff)
downloadsrc-9a6a587e672baaed3470f6cf4a27a0d1166ca372.tar.gz
src-9a6a587e672baaed3470f6cf4a27a0d1166ca372.zip
lib/libc/amd64/string: add timingsafe_memcmp() assembly implementation
Conceptually very similar to timingsafe_bcmp(), but with comparison logic inspired by Elijah Stone's fancy memcmp. A baseline (SSE) implementation was omitted this time as I was not able to get it to perform adequately. Best I got was 8% over the scalar version for long inputs, but slower for short inputs. Sponsored by: The FreeBSD Foundation Approved by: security (cperciva) Inspired by: https://github.com/moon-chilled/fancy-memcmp Differential Revision: https://reviews.freebsd.org/D41696 (cherry picked from commit 5048c1b85506c5e0f441ee7dd98dd8d96d0a4a24)
-rw-r--r--lib/libc/amd64/string/Makefile.inc4
-rw-r--r--lib/libc/amd64/string/timingsafe_memcmp.S145
2 files changed, 147 insertions, 2 deletions
diff --git a/lib/libc/amd64/string/Makefile.inc b/lib/libc/amd64/string/Makefile.inc
index fc420de0450e..09bf7c8f251e 100644
--- a/lib/libc/amd64/string/Makefile.inc
+++ b/lib/libc/amd64/string/Makefile.inc
@@ -1,4 +1,3 @@
-
MDSRCS+= \
amd64_archlevel.c \
bcmp.S \
@@ -16,4 +15,5 @@ MDSRCS+= \
strlen.S \
strnlen.c \
strspn.S \
- timingsafe_bcmp.S
+ timingsafe_bcmp.S \
+ timingsafe_memcmp.S
diff --git a/lib/libc/amd64/string/timingsafe_memcmp.S b/lib/libc/amd64/string/timingsafe_memcmp.S
new file mode 100644
index 000000000000..3f1eccdbd640
--- /dev/null
+++ b/lib/libc/amd64/string/timingsafe_memcmp.S
@@ -0,0 +1,145 @@
+/*-
+ * Copyright (c) 2023 The FreeBSD Foundation
+ *
+ * This software was developed by Robert Clausecker <fuz@FreeBSD.org>
+ * under sponsorship from the FreeBSD Foundation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ''AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE
+ */
+
+#include <machine/asm.h>
+
+#define ALIGN_TEXT .p2align 4,0x90 /* 16-byte alignment, nop filled */
+
+/* int timingsafe_memcmp(const void *rdi, const void *rsi, size_t rdx) */
+ENTRY(timingsafe_memcmp)
+ cmp $16, %rdx # at least 17 bytes to process?
+ ja .Lgt16
+
+ cmp $8, %edx # at least 9 bytes to process?
+ ja .L0916
+
+ cmp $4, %edx # at least 5 bytes to process?
+ ja .L0508
+
+ cmp $2, %edx # at least 3 bytes to process?
+ ja .L0304
+
+ test %edx, %edx # buffer empty?
+ jnz .L0102
+
+ xor %eax, %eax # empty buffer always matches
+ ret
+
+.L0102: movzbl -1(%rdi, %rdx, 1), %eax # load 1--2 bytes from first buffer
+ movzbl -1(%rsi, %rdx, 1), %ecx
+ mov (%rdi), %ah # in big endian
+ mov (%rsi), %ch
+ sub %ecx, %eax
+ ret
+
+.L0304: movzwl -2(%rdi, %rdx, 1), %ecx
+ movzwl -2(%rsi, %rdx, 1), %edx
+ movzwl (%rdi), %eax
+ movzwl (%rsi), %esi
+ bswap %ecx # convert to big endian
+ bswap %edx # dito for edx, (e)ax, and (e)si
+ rol $8, %ax # ROLW is used here so the upper two
+ rol $8, %si # bytes stay clear, allowing us to
+ sub %edx, %ecx # save a SBB compared to .L0508
+ sbb %esi, %eax
+ or %eax, %ecx # nonzero if not equal
+ setnz %al
+ ret
+
+.L0508: mov -4(%rdi, %rdx, 1), %ecx
+ mov -4(%rsi, %rdx, 1), %edx
+ mov (%rdi), %edi
+ mov (%rsi), %esi
+ bswap %ecx # compare in big endian
+ bswap %edx
+ bswap %edi
+ bswap %esi
+ sub %edx, %ecx
+ sbb %esi, %edi
+ sbb %eax, %eax # -1 if less, 0 if greater or equal
+ or %edi, %ecx # nonzero if not equal
+ setnz %al # negative if <, 0 if =, 1 if >
+ ret
+
+.L0916: mov -8(%rdi, %rdx, 1), %rcx
+ mov -8(%rsi, %rdx, 1), %rdx
+ mov (%rdi), %rdi
+ mov (%rsi), %rsi
+ bswap %rcx # compare in big endian
+ bswap %rdx
+ bswap %rdi
+ bswap %rsi
+ sub %rdx, %rcx
+ sbb %rsi, %rdi
+ sbb %eax, %eax # -1 if less, 0 if greater or equal
+ or %rdi, %rcx # nonzero if not equal
+ setnz %al # negative if <, 0 if =, 1 if >
+ ret
+
+ /* compare 17+ bytes */
+.Lgt16: mov (%rdi), %r8 # process first 16 bytes
+ mov (%rsi), %r9
+ mov $32, %ecx
+ cmp %r8, %r9 # mismatch in head?
+ cmove 8(%rdi), %r8 # if not, try second pair
+ cmove 8(%rsi), %r9
+ cmp %rdx, %rcx
+ jae .Ltail
+
+ /* main loop processing 16 bytes per iteration */
+ ALIGN_TEXT
+0: mov -16(%rdi, %rcx, 1), %r10
+ mov -16(%rsi, %rcx, 1), %r11
+ cmp %r10, %r11 # mismatch in first pair?
+ cmove -8(%rdi, %rcx, 1), %r10 # if not, try second pair
+ cmove -8(%rsi, %rcx, 1), %r11
+ cmp %r8, %r9 # was there a mismatch previously?
+ cmove %r10, %r8 # apply new pair if there was not
+ cmove %r11, %r9
+ add $16, %rcx
+ cmp %rdx, %rcx
+ jb 0b
+
+.Ltail: mov -8(%rdi, %rdx, 1), %r10
+ mov -8(%rsi, %rdx, 1), %r11
+ cmp %r8, %r9
+ cmove -16(%rdi, %rdx, 1), %r8
+ cmove -16(%rsi, %rdx, 1), %r9
+ bswap %r10 # compare in big endian
+ bswap %r11
+ bswap %r8
+ bswap %r9
+ sub %r11, %r10
+ sbb %r9, %r8
+ sbb %eax, %eax # -1 if less, 0 if greater or equal
+ or %r10, %r8 # nonzero if not equal
+ setnz %al # negative if <, 0 if =, 1 if >
+ ret
+END(timingsafe_memcmp)
+
+ .section .note.GNU-stack,"",%progbits