aboutsummaryrefslogtreecommitdiff
path: root/lib/libc
diff options
context:
space:
mode:
Diffstat (limited to 'lib/libc')
-rw-r--r--lib/libc/amd64/string/memrchr.S224
-rw-r--r--lib/libc/stdio/vfscanf.c5
-rw-r--r--lib/libc/stdio/vfwscanf.c5
-rw-r--r--lib/libc/tests/stdio/sscanf_test.c25
-rw-r--r--lib/libc/tests/stdio/swscanf_test.c25
5 files changed, 162 insertions, 122 deletions
diff --git a/lib/libc/amd64/string/memrchr.S b/lib/libc/amd64/string/memrchr.S
index 4f6c5a238daa..80fb306af2a3 100644
--- a/lib/libc/amd64/string/memrchr.S
+++ b/lib/libc/amd64/string/memrchr.S
@@ -1,7 +1,7 @@
/*-
* SPDX-License-Identifier: BSD-2-Clause
*
- * Copyright (c) 2023 Robert Clausecker
+ * Copyright (c) 2023, 2025 Robert Clausecker <fuz@FreeBSD.org>
*/
#include <machine/asm.h>
@@ -16,150 +16,142 @@ ARCHFUNCS(memrchr)
ENDARCHFUNCS(memrchr)
ARCHENTRY(memrchr, scalar)
- xor %eax, %eax # prospective return value
- sub $4, %rdx # 4 bytes left to process?
- jb 1f
+ lea -1(%rdi, %rdx, 1), %rax # point to last char in buffer
+ sub $4, %rdx # 4 bytes left to process?
+ jb .Ltail
ALIGN_TEXT
-0: xor %r8, %r8
- lea 2(%rdi), %r10
- cmp %sil, 2(%rdi)
- cmovne %r8, %r10 # point to null if no match
+0: cmp %sil, (%rax) # match at last entry?
+ je 1f
- cmp %sil, (%rdi)
- cmove %rdi, %r8 # point to first char if match
+ cmp %sil, -1(%rax) # match at second to last entry?
+ je 2f
- lea 1(%rdi), %r9
- cmp %sil, 1(%rdi)
- cmovne %r8, %r9 # point to first result if no match in second
+ cmp %sil, -2(%rax) # match at third to last entry?
+ je 3f
- lea 3(%rdi), %r11
- cmp %sil, 3(%rdi)
- cmovne %r10, %r11
+ cmp %sil, -3(%rax) # match at fourth to last entry?
+ je 4f
- test %r11, %r11
- cmovz %r9, %r11 # take first pair match if none in second
+ sub $4, %rax
+ sub $4, %rdx
+ jae 0b
- test %r11, %r11
- cmovnz %r11, %rax # take match in current set if any
+.Ltail: cmp $-3, %edx # at least one character left to process?
+ jb .Lnotfound
- add $4, %rdi
- sub $4, %rdx
- jae 0b
+ cmp %sil, (%rax)
+ je 1f
-1: cmp $-3, %edx # a least one character left to process?
- jb 2f
+ cmp $-2, %edx # at least two characters left to process?
+ jb .Lnotfound
- cmp %sil, (%rdi)
- cmove %rdi, %rax
+ cmp %sil, -1(%rax)
+ je 2f
- lea 1(%rdi), %rcx
- cmp $-2, %edx # at least two characters left to process?
- jb 2f
+ cmp $-1, %edx # at least three characters left to process?
+ jb .Lnotfound
- cmp %sil, 1(%rdi)
- cmove %rcx, %rax
+ cmp %sil, -2(%rax)
+ je 3f
- lea 2(%rdi), %rcx
- cmp $-1, %edx # at least three character left to process?
- jb 2f
-
- cmp %sil, 2(%rdi)
- cmove %rcx, %rax
+.Lnotfound:
+ xor %eax, %eax
+ ret
-2: ret
+ /* match found -- adjust rax to point to matching byte */
+4: dec %rax
+3: dec %rax
+2: dec %rax
+1: ret
ARCHEND(memrchr, scalar)
ARCHENTRY(memrchr, baseline)
- movd %esi, %xmm4
- test %rdx, %rdx # empty buffer?
- jz .L0 # if yes, return immediately
+ test %rdx, %rdx # empty input?
+ je .Lnomatchb
+
+
+ lea (%rdi, %rdx, 1), %ecx # pointer to end of buffer
+ lea -1(%rdi, %rdx, 1), %rdx # pointer to last char in buffer
+ movd %esi, %xmm2
+ and $~0x1f, %rdx # pointer to final 32 buffer bytes
+ movdqa (%rdx), %xmm0 # load last 32 bytes
+ movdqa 16(%rdx), %xmm1
+
+ punpcklbw %xmm2, %xmm2 # c -> cc
- punpcklbw %xmm4, %xmm4 # c -> cc
- mov %edi, %ecx
- punpcklwd %xmm4, %xmm4 # cc -> cccc
- and $~0xf, %rdi # align source pointer
- pshufd $0, %xmm4, %xmm4 # cccc -> cccccccccccccccc
- and $0xf, %ecx
- movdqa %xmm4, %xmm0
mov $-1, %r8d
- pcmpeqb (%rdi), %xmm0 # compare aligned head
- shl %cl, %r8d # mask of bytes in the head of the buffer
- pmovmskb %xmm0, %eax
+ neg %ecx
+ mov %r8d, %r9d
+ shr %cl, %r8d # mask with zeroes after the string
- sub $16, %rcx
- and %r8d, %eax # match mask
- add %rcx, %rdx # advance past head
- cmc
- jbe .Lrunt # did the string end in the buffer?
+ punpcklwd %xmm2, %xmm2 # cc -> cccc
- mov %rdi, %rsi # pointer to matching chunk
- add $16, %rdi
- sub $16, %rdx # enough left for another round?
- jbe 1f
+ mov %edi, %ecx
+ mov %r9d, %eax
+ shl %cl, %r9d # mask with zeroes before the string
- /* main loop unrolled twice */
- ALIGN_TEXT
-0: movdqa %xmm4, %xmm0
- pcmpeqb (%rdi), %xmm0
- pmovmskb %xmm0, %r8d
+ pshufd $0, %xmm2, %xmm2 # cccc -> cccccccccccccccc
- cmp $16, %rdx # enough left for second chunk?
- jbe 2f
+ cmp %rdx, %rdi # tail is beginning of buffer?
+ cmovae %r9d, %eax # if yes, do combined head/tail processing
+ and %r8d, %eax # mak of bytes in tail part of string
- movdqa %xmm4, %xmm0
- pcmpeqb 16(%rdi), %xmm0
+ /* process tail */
+ pcmpeqb %xmm2, %xmm1
+ pcmpeqb %xmm2, %xmm0
+ pmovmskb %xmm1, %esi
pmovmskb %xmm0, %ecx
+ shl $16, %esi
+ or %esi, %ecx # locations of matches
+ and %ecx, %eax # any match inside buffer?
+ jnz .Lprecisematchb
- lea 16(%rdi), %r9
- test %ecx, %ecx # match found in second chunk?
- cmovz %r8d, %ecx # if not, use match data from first chunk
- cmovz %rdi, %r9
-
- test %ecx, %ecx # any match found?
- cmovnz %ecx, %eax # if yes, overwrite previously found match
- cmovnz %r9, %rsi
-
- add $32, %rdi # advance to next iteration
- sub $32, %rdx # advance to next chunks
- ja 0b
-
- /* process remaining 1--16 bytes */
-1: pcmpeqb (%rdi), %xmm4
- mov $0xffff, %r8d
- xor %ecx, %ecx
- sub %edx, %ecx # number of bytes to be masked out
- pmovmskb %xmm4, %r9d
- shr %cl, %r8d # mask of bytes to be kept in the buffer
- and %r9d, %r8d
- cmovnz %r8d, %eax
- cmovnz %rdi, %rsi
- bsr %eax, %eax
- lea (%rsi, %rax, 1), %rsi # pointer to match (or junk)
- cmovnz %rsi, %rax # if any match was found, return it
- ret
+ cmp %rdx, %rdi # did the buffer begin here?
+ jae .Lnomatchb # if yes, we are done
- /* end of chunk reached within first half iteration */
-2: test %r8d, %r8d # match in previous chunk?
- cmovnz %r8d, %eax # if yes, overwrite previous chunks
- cmovnz %rdi, %rsi
- add $16, %rdi # point to tail
- sub $16, %edx
- jmp 1b # handle tail the same otherwise
-
- /* runt: string ends within head, edx has negated amount of invalid head bytes */
-.Lrunt: mov $0xffff, %r8d
- xor %ecx, %ecx
- sub %edx, %ecx
- shr %cl, %r8d
- and %r8d, %eax
- bsr %eax, %eax
- lea (%rdi, %rax, 1), %rdi
- cmovnz %rdi, %rax
+ /* main loop */
+ ALIGN_TEXT
+0: movdqa -32(%rdx), %xmm0 # load previous string chunk
+ movdqa -16(%rdx), %xmm1
+ sub $32, %rdx # beginning of string reached?
+ cmp %rdx, %rdi
+ jae .Ltailb
+
+ pcmpeqb %xmm2, %xmm0
+ pcmpeqb %xmm2, %xmm1
+ por %xmm1, %xmm0 # match in either half?
+ pmovmskb %xmm0, %eax
+ test %eax, %eax
+ jz 0b
+
+.Lmatchb:
+ pcmpeqb (%rdx), %xmm2 # redo comparison of first 16 bytes
+ pmovmskb %xmm1, %ecx
+ pmovmskb %xmm2, %eax
+ shl $16, %ecx
+ or %ecx, %eax # location of matches
+
+.Lprecisematchb:
+ bsr %eax, %eax # find location of match
+ add %rdx, %rax # point to matching byte
ret
- /* empty buffer: return a null pointer */
-.L0: xor %eax, %eax
+.Ltailb:
+ pcmpeqb %xmm2, %xmm1
+ pcmpeqb %xmm2, %xmm0
+ pmovmskb %xmm1, %ecx
+ pmovmskb %xmm0, %eax
+ shl $16, %ecx
+ or %ecx, %eax # location of matches
+ and %r9d, %eax # mask out matches before buffer
+ bsr %eax, %edi # location of match
+ lea (%rdx, %rdi, 1), %rdx # pointer to match (if any)
+ cmovnz %rdx, %rax # point to match if present,
+ ret # else null pointer
+
+.Lnomatchb:
+ xor %eax, %eax # return null pointer
ret
ARCHEND(memrchr, baseline)
diff --git a/lib/libc/stdio/vfscanf.c b/lib/libc/stdio/vfscanf.c
index a678710e1ecb..89e9e843969f 100644
--- a/lib/libc/stdio/vfscanf.c
+++ b/lib/libc/stdio/vfscanf.c
@@ -318,10 +318,9 @@ parseint_fsm(int c, enum parseint_state *state, int *base)
case '0':
if (*state == begin || *state == havesign) {
*state = havezero;
- } else {
- *state = any;
+ return 1;
}
- return 1;
+ /* FALL THROUGH */
case '1':
case '2':
case '3':
diff --git a/lib/libc/stdio/vfwscanf.c b/lib/libc/stdio/vfwscanf.c
index 57206a8407d5..7ca64eb37811 100644
--- a/lib/libc/stdio/vfwscanf.c
+++ b/lib/libc/stdio/vfwscanf.c
@@ -298,10 +298,9 @@ parseint_fsm(wchar_t c, enum parseint_state *state, int *base)
case '0':
if (*state == begin || *state == havesign) {
*state = havezero;
- } else {
- *state = any;
+ return 1;
}
- return 1;
+ /* FALL THROUGH */
case '1':
case '2':
case '3':
diff --git a/lib/libc/tests/stdio/sscanf_test.c b/lib/libc/tests/stdio/sscanf_test.c
index e916873d38c3..e43292820eeb 100644
--- a/lib/libc/tests/stdio/sscanf_test.c
+++ b/lib/libc/tests/stdio/sscanf_test.c
@@ -68,6 +68,31 @@ static const struct sscanf_test_case {
{ "0e", { 1, 0, 1 }, { 1, 0, 1 }, { 1, 0, 1 }, { 1, 14, 2 }, { 1, 0, 1 }, },
{ "0f", { 1, 0, 1 }, { 1, 0, 1 }, { 1, 0, 1 }, { 1, 15, 2 }, { 1, 0, 1 }, },
{ "0x", { 1, 0, 1 }, { 1, 0, 1 }, { 1, 0, 1 }, { 1, 0, 1 }, { 1, 0, 1 }, },
+ // all digits with two leading zeroes
+ { "000", { 1, 0, 3 }, { 1, 0, 3 }, { 1, 0, 3 }, { 1, 0, 3 }, { 1, 0, 3 }, },
+ { "001", { 1, 1, 3 }, { 1, 1, 3 }, { 1, 1, 3 }, { 1, 1, 3 }, { 1, 1, 3 }, },
+ { "002", { 1, 0, 2 }, { 1, 2, 3 }, { 1, 2, 3 }, { 1, 2, 3 }, { 1, 2, 3 }, },
+ { "003", { 1, 0, 2 }, { 1, 3, 3 }, { 1, 3, 3 }, { 1, 3, 3 }, { 1, 3, 3 }, },
+ { "004", { 1, 0, 2 }, { 1, 4, 3 }, { 1, 4, 3 }, { 1, 4, 3 }, { 1, 4, 3 }, },
+ { "005", { 1, 0, 2 }, { 1, 5, 3 }, { 1, 5, 3 }, { 1, 5, 3 }, { 1, 5, 3 }, },
+ { "006", { 1, 0, 2 }, { 1, 6, 3 }, { 1, 6, 3 }, { 1, 6, 3 }, { 1, 6, 3 }, },
+ { "007", { 1, 0, 2 }, { 1, 7, 3 }, { 1, 7, 3 }, { 1, 7, 3 }, { 1, 7, 3 }, },
+ { "008", { 1, 0, 2 }, { 1, 0, 2 }, { 1, 8, 3 }, { 1, 8, 3 }, { 1, 0, 2 }, },
+ { "009", { 1, 0, 2 }, { 1, 0, 2 }, { 1, 9, 3 }, { 1, 9, 3 }, { 1, 0, 2 }, },
+ { "00A", { 1, 0, 2 }, { 1, 0, 2 }, { 1, 0, 2 }, { 1, 10, 3 }, { 1, 0, 2 }, },
+ { "00B", { 1, 0, 2 }, { 1, 0, 2 }, { 1, 0, 2 }, { 1, 11, 3 }, { 1, 0, 2 }, },
+ { "00C", { 1, 0, 2 }, { 1, 0, 2 }, { 1, 0, 2 }, { 1, 12, 3 }, { 1, 0, 2 }, },
+ { "00D", { 1, 0, 2 }, { 1, 0, 2 }, { 1, 0, 2 }, { 1, 13, 3 }, { 1, 0, 2 }, },
+ { "00E", { 1, 0, 2 }, { 1, 0, 2 }, { 1, 0, 2 }, { 1, 14, 3 }, { 1, 0, 2 }, },
+ { "00F", { 1, 0, 2 }, { 1, 0, 2 }, { 1, 0, 2 }, { 1, 15, 3 }, { 1, 0, 2 }, },
+ { "00X", { 1, 0, 2 }, { 1, 0, 2 }, { 1, 0, 2 }, { 1, 0, 2 }, { 1, 0, 2 }, },
+ { "00a", { 1, 0, 2 }, { 1, 0, 2 }, { 1, 0, 2 }, { 1, 10, 3 }, { 1, 0, 2 }, },
+ { "00b", { 1, 0, 2 }, { 1, 0, 2 }, { 1, 0, 2 }, { 1, 11, 3 }, { 1, 0, 2 }, },
+ { "00c", { 1, 0, 2 }, { 1, 0, 2 }, { 1, 0, 2 }, { 1, 12, 3 }, { 1, 0, 2 }, },
+ { "00d", { 1, 0, 2 }, { 1, 0, 2 }, { 1, 0, 2 }, { 1, 13, 3 }, { 1, 0, 2 }, },
+ { "00e", { 1, 0, 2 }, { 1, 0, 2 }, { 1, 0, 2 }, { 1, 14, 3 }, { 1, 0, 2 }, },
+ { "00f", { 1, 0, 2 }, { 1, 0, 2 }, { 1, 0, 2 }, { 1, 15, 3 }, { 1, 0, 2 }, },
+ { "00x", { 1, 0, 2 }, { 1, 0, 2 }, { 1, 0, 2 }, { 1, 0, 2 }, { 1, 0, 2 }, },
// all digits with leading one
{ "10", { 1, 2, 2 }, { 1, 8, 2 }, { 1, 10, 2 }, { 1, 16, 2 }, { 1, 10, 2 }, },
{ "11", { 1, 3, 2 }, { 1, 9, 2 }, { 1, 11, 2 }, { 1, 17, 2 }, { 1, 11, 2 }, },
diff --git a/lib/libc/tests/stdio/swscanf_test.c b/lib/libc/tests/stdio/swscanf_test.c
index f7ad30b963a7..f0638081e16f 100644
--- a/lib/libc/tests/stdio/swscanf_test.c
+++ b/lib/libc/tests/stdio/swscanf_test.c
@@ -71,6 +71,31 @@ static const struct swscanf_test_case {
{ L"0e", { 1, 0, 1 }, { 1, 0, 1 }, { 1, 0, 1 }, { 1, 14, 2 }, { 1, 0, 1 }, },
{ L"0f", { 1, 0, 1 }, { 1, 0, 1 }, { 1, 0, 1 }, { 1, 15, 2 }, { 1, 0, 1 }, },
{ L"0x", { 1, 0, 1 }, { 1, 0, 1 }, { 1, 0, 1 }, { 1, 0, 1 }, { 1, 0, 1 }, },
+ // all digits with two leading zeroes
+ { L"000", { 1, 0, 3 }, { 1, 0, 3 }, { 1, 0, 3 }, { 1, 0, 3 }, { 1, 0, 3 }, },
+ { L"001", { 1, 1, 3 }, { 1, 1, 3 }, { 1, 1, 3 }, { 1, 1, 3 }, { 1, 1, 3 }, },
+ { L"002", { 1, 0, 2 }, { 1, 2, 3 }, { 1, 2, 3 }, { 1, 2, 3 }, { 1, 2, 3 }, },
+ { L"003", { 1, 0, 2 }, { 1, 3, 3 }, { 1, 3, 3 }, { 1, 3, 3 }, { 1, 3, 3 }, },
+ { L"004", { 1, 0, 2 }, { 1, 4, 3 }, { 1, 4, 3 }, { 1, 4, 3 }, { 1, 4, 3 }, },
+ { L"005", { 1, 0, 2 }, { 1, 5, 3 }, { 1, 5, 3 }, { 1, 5, 3 }, { 1, 5, 3 }, },
+ { L"006", { 1, 0, 2 }, { 1, 6, 3 }, { 1, 6, 3 }, { 1, 6, 3 }, { 1, 6, 3 }, },
+ { L"007", { 1, 0, 2 }, { 1, 7, 3 }, { 1, 7, 3 }, { 1, 7, 3 }, { 1, 7, 3 }, },
+ { L"008", { 1, 0, 2 }, { 1, 0, 2 }, { 1, 8, 3 }, { 1, 8, 3 }, { 1, 0, 2 }, },
+ { L"009", { 1, 0, 2 }, { 1, 0, 2 }, { 1, 9, 3 }, { 1, 9, 3 }, { 1, 0, 2 }, },
+ { L"00A", { 1, 0, 2 }, { 1, 0, 2 }, { 1, 0, 2 }, { 1, 10, 3 }, { 1, 0, 2 }, },
+ { L"00B", { 1, 0, 2 }, { 1, 0, 2 }, { 1, 0, 2 }, { 1, 11, 3 }, { 1, 0, 2 }, },
+ { L"00C", { 1, 0, 2 }, { 1, 0, 2 }, { 1, 0, 2 }, { 1, 12, 3 }, { 1, 0, 2 }, },
+ { L"00D", { 1, 0, 2 }, { 1, 0, 2 }, { 1, 0, 2 }, { 1, 13, 3 }, { 1, 0, 2 }, },
+ { L"00E", { 1, 0, 2 }, { 1, 0, 2 }, { 1, 0, 2 }, { 1, 14, 3 }, { 1, 0, 2 }, },
+ { L"00F", { 1, 0, 2 }, { 1, 0, 2 }, { 1, 0, 2 }, { 1, 15, 3 }, { 1, 0, 2 }, },
+ { L"00X", { 1, 0, 2 }, { 1, 0, 2 }, { 1, 0, 2 }, { 1, 0, 2 }, { 1, 0, 2 }, },
+ { L"00a", { 1, 0, 2 }, { 1, 0, 2 }, { 1, 0, 2 }, { 1, 10, 3 }, { 1, 0, 2 }, },
+ { L"00b", { 1, 0, 2 }, { 1, 0, 2 }, { 1, 0, 2 }, { 1, 11, 3 }, { 1, 0, 2 }, },
+ { L"00c", { 1, 0, 2 }, { 1, 0, 2 }, { 1, 0, 2 }, { 1, 12, 3 }, { 1, 0, 2 }, },
+ { L"00d", { 1, 0, 2 }, { 1, 0, 2 }, { 1, 0, 2 }, { 1, 13, 3 }, { 1, 0, 2 }, },
+ { L"00e", { 1, 0, 2 }, { 1, 0, 2 }, { 1, 0, 2 }, { 1, 14, 3 }, { 1, 0, 2 }, },
+ { L"00f", { 1, 0, 2 }, { 1, 0, 2 }, { 1, 0, 2 }, { 1, 15, 3 }, { 1, 0, 2 }, },
+ { L"00x", { 1, 0, 2 }, { 1, 0, 2 }, { 1, 0, 2 }, { 1, 0, 2 }, { 1, 0, 2 }, },
// all digits with leading one
{ L"10", { 1, 2, 2 }, { 1, 8, 2 }, { 1, 10, 2 }, { 1, 16, 2 }, { 1, 10, 2 }, },
{ L"11", { 1, 3, 2 }, { 1, 9, 2 }, { 1, 11, 2 }, { 1, 17, 2 }, { 1, 11, 2 }, },