diff options
Diffstat (limited to 'lib/libc')
-rw-r--r-- | lib/libc/amd64/string/memrchr.S | 224 | ||||
-rw-r--r-- | lib/libc/stdio/vfscanf.c | 5 | ||||
-rw-r--r-- | lib/libc/stdio/vfwscanf.c | 5 | ||||
-rw-r--r-- | lib/libc/tests/stdio/sscanf_test.c | 25 | ||||
-rw-r--r-- | lib/libc/tests/stdio/swscanf_test.c | 25 |
5 files changed, 162 insertions, 122 deletions
diff --git a/lib/libc/amd64/string/memrchr.S b/lib/libc/amd64/string/memrchr.S index 4f6c5a238daa..80fb306af2a3 100644 --- a/lib/libc/amd64/string/memrchr.S +++ b/lib/libc/amd64/string/memrchr.S @@ -1,7 +1,7 @@ /*- * SPDX-License-Identifier: BSD-2-Clause * - * Copyright (c) 2023 Robert Clausecker + * Copyright (c) 2023, 2025 Robert Clausecker <fuz@FreeBSD.org> */ #include <machine/asm.h> @@ -16,150 +16,142 @@ ARCHFUNCS(memrchr) ENDARCHFUNCS(memrchr) ARCHENTRY(memrchr, scalar) - xor %eax, %eax # prospective return value - sub $4, %rdx # 4 bytes left to process? - jb 1f + lea -1(%rdi, %rdx, 1), %rax # point to last char in buffer + sub $4, %rdx # 4 bytes left to process? + jb .Ltail ALIGN_TEXT -0: xor %r8, %r8 - lea 2(%rdi), %r10 - cmp %sil, 2(%rdi) - cmovne %r8, %r10 # point to null if no match +0: cmp %sil, (%rax) # match at last entry? + je 1f - cmp %sil, (%rdi) - cmove %rdi, %r8 # point to first char if match + cmp %sil, -1(%rax) # match at second to last entry? + je 2f - lea 1(%rdi), %r9 - cmp %sil, 1(%rdi) - cmovne %r8, %r9 # point to first result if no match in second + cmp %sil, -2(%rax) # match at third to last entry? + je 3f - lea 3(%rdi), %r11 - cmp %sil, 3(%rdi) - cmovne %r10, %r11 + cmp %sil, -3(%rax) # match at fourth to last entry? + je 4f - test %r11, %r11 - cmovz %r9, %r11 # take first pair match if none in second + sub $4, %rax + sub $4, %rdx + jae 0b - test %r11, %r11 - cmovnz %r11, %rax # take match in current set if any +.Ltail: cmp $-3, %edx # at least one character left to process? + jb .Lnotfound - add $4, %rdi - sub $4, %rdx - jae 0b + cmp %sil, (%rax) + je 1f -1: cmp $-3, %edx # a least one character left to process? - jb 2f + cmp $-2, %edx # at least two characters left to process? + jb .Lnotfound - cmp %sil, (%rdi) - cmove %rdi, %rax + cmp %sil, -1(%rax) + je 2f - lea 1(%rdi), %rcx - cmp $-2, %edx # at least two characters left to process? - jb 2f + cmp $-1, %edx # at least three characters left to process? + jb .Lnotfound - cmp %sil, 1(%rdi) - cmove %rcx, %rax + cmp %sil, -2(%rax) + je 3f - lea 2(%rdi), %rcx - cmp $-1, %edx # at least three character left to process? - jb 2f - - cmp %sil, 2(%rdi) - cmove %rcx, %rax +.Lnotfound: + xor %eax, %eax + ret -2: ret + /* match found -- adjust rax to point to matching byte */ +4: dec %rax +3: dec %rax +2: dec %rax +1: ret ARCHEND(memrchr, scalar) ARCHENTRY(memrchr, baseline) - movd %esi, %xmm4 - test %rdx, %rdx # empty buffer? - jz .L0 # if yes, return immediately + test %rdx, %rdx # empty input? + je .Lnomatchb + + + lea (%rdi, %rdx, 1), %ecx # pointer to end of buffer + lea -1(%rdi, %rdx, 1), %rdx # pointer to last char in buffer + movd %esi, %xmm2 + and $~0x1f, %rdx # pointer to final 32 buffer bytes + movdqa (%rdx), %xmm0 # load last 32 bytes + movdqa 16(%rdx), %xmm1 + + punpcklbw %xmm2, %xmm2 # c -> cc - punpcklbw %xmm4, %xmm4 # c -> cc - mov %edi, %ecx - punpcklwd %xmm4, %xmm4 # cc -> cccc - and $~0xf, %rdi # align source pointer - pshufd $0, %xmm4, %xmm4 # cccc -> cccccccccccccccc - and $0xf, %ecx - movdqa %xmm4, %xmm0 mov $-1, %r8d - pcmpeqb (%rdi), %xmm0 # compare aligned head - shl %cl, %r8d # mask of bytes in the head of the buffer - pmovmskb %xmm0, %eax + neg %ecx + mov %r8d, %r9d + shr %cl, %r8d # mask with zeroes after the string - sub $16, %rcx - and %r8d, %eax # match mask - add %rcx, %rdx # advance past head - cmc - jbe .Lrunt # did the string end in the buffer? + punpcklwd %xmm2, %xmm2 # cc -> cccc - mov %rdi, %rsi # pointer to matching chunk - add $16, %rdi - sub $16, %rdx # enough left for another round? - jbe 1f + mov %edi, %ecx + mov %r9d, %eax + shl %cl, %r9d # mask with zeroes before the string - /* main loop unrolled twice */ - ALIGN_TEXT -0: movdqa %xmm4, %xmm0 - pcmpeqb (%rdi), %xmm0 - pmovmskb %xmm0, %r8d + pshufd $0, %xmm2, %xmm2 # cccc -> cccccccccccccccc - cmp $16, %rdx # enough left for second chunk? - jbe 2f + cmp %rdx, %rdi # tail is beginning of buffer? + cmovae %r9d, %eax # if yes, do combined head/tail processing + and %r8d, %eax # mak of bytes in tail part of string - movdqa %xmm4, %xmm0 - pcmpeqb 16(%rdi), %xmm0 + /* process tail */ + pcmpeqb %xmm2, %xmm1 + pcmpeqb %xmm2, %xmm0 + pmovmskb %xmm1, %esi pmovmskb %xmm0, %ecx + shl $16, %esi + or %esi, %ecx # locations of matches + and %ecx, %eax # any match inside buffer? + jnz .Lprecisematchb - lea 16(%rdi), %r9 - test %ecx, %ecx # match found in second chunk? - cmovz %r8d, %ecx # if not, use match data from first chunk - cmovz %rdi, %r9 - - test %ecx, %ecx # any match found? - cmovnz %ecx, %eax # if yes, overwrite previously found match - cmovnz %r9, %rsi - - add $32, %rdi # advance to next iteration - sub $32, %rdx # advance to next chunks - ja 0b - - /* process remaining 1--16 bytes */ -1: pcmpeqb (%rdi), %xmm4 - mov $0xffff, %r8d - xor %ecx, %ecx - sub %edx, %ecx # number of bytes to be masked out - pmovmskb %xmm4, %r9d - shr %cl, %r8d # mask of bytes to be kept in the buffer - and %r9d, %r8d - cmovnz %r8d, %eax - cmovnz %rdi, %rsi - bsr %eax, %eax - lea (%rsi, %rax, 1), %rsi # pointer to match (or junk) - cmovnz %rsi, %rax # if any match was found, return it - ret + cmp %rdx, %rdi # did the buffer begin here? + jae .Lnomatchb # if yes, we are done - /* end of chunk reached within first half iteration */ -2: test %r8d, %r8d # match in previous chunk? - cmovnz %r8d, %eax # if yes, overwrite previous chunks - cmovnz %rdi, %rsi - add $16, %rdi # point to tail - sub $16, %edx - jmp 1b # handle tail the same otherwise - - /* runt: string ends within head, edx has negated amount of invalid head bytes */ -.Lrunt: mov $0xffff, %r8d - xor %ecx, %ecx - sub %edx, %ecx - shr %cl, %r8d - and %r8d, %eax - bsr %eax, %eax - lea (%rdi, %rax, 1), %rdi - cmovnz %rdi, %rax + /* main loop */ + ALIGN_TEXT +0: movdqa -32(%rdx), %xmm0 # load previous string chunk + movdqa -16(%rdx), %xmm1 + sub $32, %rdx # beginning of string reached? + cmp %rdx, %rdi + jae .Ltailb + + pcmpeqb %xmm2, %xmm0 + pcmpeqb %xmm2, %xmm1 + por %xmm1, %xmm0 # match in either half? + pmovmskb %xmm0, %eax + test %eax, %eax + jz 0b + +.Lmatchb: + pcmpeqb (%rdx), %xmm2 # redo comparison of first 16 bytes + pmovmskb %xmm1, %ecx + pmovmskb %xmm2, %eax + shl $16, %ecx + or %ecx, %eax # location of matches + +.Lprecisematchb: + bsr %eax, %eax # find location of match + add %rdx, %rax # point to matching byte ret - /* empty buffer: return a null pointer */ -.L0: xor %eax, %eax +.Ltailb: + pcmpeqb %xmm2, %xmm1 + pcmpeqb %xmm2, %xmm0 + pmovmskb %xmm1, %ecx + pmovmskb %xmm0, %eax + shl $16, %ecx + or %ecx, %eax # location of matches + and %r9d, %eax # mask out matches before buffer + bsr %eax, %edi # location of match + lea (%rdx, %rdi, 1), %rdx # pointer to match (if any) + cmovnz %rdx, %rax # point to match if present, + ret # else null pointer + +.Lnomatchb: + xor %eax, %eax # return null pointer ret ARCHEND(memrchr, baseline) diff --git a/lib/libc/stdio/vfscanf.c b/lib/libc/stdio/vfscanf.c index a678710e1ecb..89e9e843969f 100644 --- a/lib/libc/stdio/vfscanf.c +++ b/lib/libc/stdio/vfscanf.c @@ -318,10 +318,9 @@ parseint_fsm(int c, enum parseint_state *state, int *base) case '0': if (*state == begin || *state == havesign) { *state = havezero; - } else { - *state = any; + return 1; } - return 1; + /* FALL THROUGH */ case '1': case '2': case '3': diff --git a/lib/libc/stdio/vfwscanf.c b/lib/libc/stdio/vfwscanf.c index 57206a8407d5..7ca64eb37811 100644 --- a/lib/libc/stdio/vfwscanf.c +++ b/lib/libc/stdio/vfwscanf.c @@ -298,10 +298,9 @@ parseint_fsm(wchar_t c, enum parseint_state *state, int *base) case '0': if (*state == begin || *state == havesign) { *state = havezero; - } else { - *state = any; + return 1; } - return 1; + /* FALL THROUGH */ case '1': case '2': case '3': diff --git a/lib/libc/tests/stdio/sscanf_test.c b/lib/libc/tests/stdio/sscanf_test.c index e916873d38c3..e43292820eeb 100644 --- a/lib/libc/tests/stdio/sscanf_test.c +++ b/lib/libc/tests/stdio/sscanf_test.c @@ -68,6 +68,31 @@ static const struct sscanf_test_case { { "0e", { 1, 0, 1 }, { 1, 0, 1 }, { 1, 0, 1 }, { 1, 14, 2 }, { 1, 0, 1 }, }, { "0f", { 1, 0, 1 }, { 1, 0, 1 }, { 1, 0, 1 }, { 1, 15, 2 }, { 1, 0, 1 }, }, { "0x", { 1, 0, 1 }, { 1, 0, 1 }, { 1, 0, 1 }, { 1, 0, 1 }, { 1, 0, 1 }, }, + // all digits with two leading zeroes + { "000", { 1, 0, 3 }, { 1, 0, 3 }, { 1, 0, 3 }, { 1, 0, 3 }, { 1, 0, 3 }, }, + { "001", { 1, 1, 3 }, { 1, 1, 3 }, { 1, 1, 3 }, { 1, 1, 3 }, { 1, 1, 3 }, }, + { "002", { 1, 0, 2 }, { 1, 2, 3 }, { 1, 2, 3 }, { 1, 2, 3 }, { 1, 2, 3 }, }, + { "003", { 1, 0, 2 }, { 1, 3, 3 }, { 1, 3, 3 }, { 1, 3, 3 }, { 1, 3, 3 }, }, + { "004", { 1, 0, 2 }, { 1, 4, 3 }, { 1, 4, 3 }, { 1, 4, 3 }, { 1, 4, 3 }, }, + { "005", { 1, 0, 2 }, { 1, 5, 3 }, { 1, 5, 3 }, { 1, 5, 3 }, { 1, 5, 3 }, }, + { "006", { 1, 0, 2 }, { 1, 6, 3 }, { 1, 6, 3 }, { 1, 6, 3 }, { 1, 6, 3 }, }, + { "007", { 1, 0, 2 }, { 1, 7, 3 }, { 1, 7, 3 }, { 1, 7, 3 }, { 1, 7, 3 }, }, + { "008", { 1, 0, 2 }, { 1, 0, 2 }, { 1, 8, 3 }, { 1, 8, 3 }, { 1, 0, 2 }, }, + { "009", { 1, 0, 2 }, { 1, 0, 2 }, { 1, 9, 3 }, { 1, 9, 3 }, { 1, 0, 2 }, }, + { "00A", { 1, 0, 2 }, { 1, 0, 2 }, { 1, 0, 2 }, { 1, 10, 3 }, { 1, 0, 2 }, }, + { "00B", { 1, 0, 2 }, { 1, 0, 2 }, { 1, 0, 2 }, { 1, 11, 3 }, { 1, 0, 2 }, }, + { "00C", { 1, 0, 2 }, { 1, 0, 2 }, { 1, 0, 2 }, { 1, 12, 3 }, { 1, 0, 2 }, }, + { "00D", { 1, 0, 2 }, { 1, 0, 2 }, { 1, 0, 2 }, { 1, 13, 3 }, { 1, 0, 2 }, }, + { "00E", { 1, 0, 2 }, { 1, 0, 2 }, { 1, 0, 2 }, { 1, 14, 3 }, { 1, 0, 2 }, }, + { "00F", { 1, 0, 2 }, { 1, 0, 2 }, { 1, 0, 2 }, { 1, 15, 3 }, { 1, 0, 2 }, }, + { "00X", { 1, 0, 2 }, { 1, 0, 2 }, { 1, 0, 2 }, { 1, 0, 2 }, { 1, 0, 2 }, }, + { "00a", { 1, 0, 2 }, { 1, 0, 2 }, { 1, 0, 2 }, { 1, 10, 3 }, { 1, 0, 2 }, }, + { "00b", { 1, 0, 2 }, { 1, 0, 2 }, { 1, 0, 2 }, { 1, 11, 3 }, { 1, 0, 2 }, }, + { "00c", { 1, 0, 2 }, { 1, 0, 2 }, { 1, 0, 2 }, { 1, 12, 3 }, { 1, 0, 2 }, }, + { "00d", { 1, 0, 2 }, { 1, 0, 2 }, { 1, 0, 2 }, { 1, 13, 3 }, { 1, 0, 2 }, }, + { "00e", { 1, 0, 2 }, { 1, 0, 2 }, { 1, 0, 2 }, { 1, 14, 3 }, { 1, 0, 2 }, }, + { "00f", { 1, 0, 2 }, { 1, 0, 2 }, { 1, 0, 2 }, { 1, 15, 3 }, { 1, 0, 2 }, }, + { "00x", { 1, 0, 2 }, { 1, 0, 2 }, { 1, 0, 2 }, { 1, 0, 2 }, { 1, 0, 2 }, }, // all digits with leading one { "10", { 1, 2, 2 }, { 1, 8, 2 }, { 1, 10, 2 }, { 1, 16, 2 }, { 1, 10, 2 }, }, { "11", { 1, 3, 2 }, { 1, 9, 2 }, { 1, 11, 2 }, { 1, 17, 2 }, { 1, 11, 2 }, }, diff --git a/lib/libc/tests/stdio/swscanf_test.c b/lib/libc/tests/stdio/swscanf_test.c index f7ad30b963a7..f0638081e16f 100644 --- a/lib/libc/tests/stdio/swscanf_test.c +++ b/lib/libc/tests/stdio/swscanf_test.c @@ -71,6 +71,31 @@ static const struct swscanf_test_case { { L"0e", { 1, 0, 1 }, { 1, 0, 1 }, { 1, 0, 1 }, { 1, 14, 2 }, { 1, 0, 1 }, }, { L"0f", { 1, 0, 1 }, { 1, 0, 1 }, { 1, 0, 1 }, { 1, 15, 2 }, { 1, 0, 1 }, }, { L"0x", { 1, 0, 1 }, { 1, 0, 1 }, { 1, 0, 1 }, { 1, 0, 1 }, { 1, 0, 1 }, }, + // all digits with two leading zeroes + { L"000", { 1, 0, 3 }, { 1, 0, 3 }, { 1, 0, 3 }, { 1, 0, 3 }, { 1, 0, 3 }, }, + { L"001", { 1, 1, 3 }, { 1, 1, 3 }, { 1, 1, 3 }, { 1, 1, 3 }, { 1, 1, 3 }, }, + { L"002", { 1, 0, 2 }, { 1, 2, 3 }, { 1, 2, 3 }, { 1, 2, 3 }, { 1, 2, 3 }, }, + { L"003", { 1, 0, 2 }, { 1, 3, 3 }, { 1, 3, 3 }, { 1, 3, 3 }, { 1, 3, 3 }, }, + { L"004", { 1, 0, 2 }, { 1, 4, 3 }, { 1, 4, 3 }, { 1, 4, 3 }, { 1, 4, 3 }, }, + { L"005", { 1, 0, 2 }, { 1, 5, 3 }, { 1, 5, 3 }, { 1, 5, 3 }, { 1, 5, 3 }, }, + { L"006", { 1, 0, 2 }, { 1, 6, 3 }, { 1, 6, 3 }, { 1, 6, 3 }, { 1, 6, 3 }, }, + { L"007", { 1, 0, 2 }, { 1, 7, 3 }, { 1, 7, 3 }, { 1, 7, 3 }, { 1, 7, 3 }, }, + { L"008", { 1, 0, 2 }, { 1, 0, 2 }, { 1, 8, 3 }, { 1, 8, 3 }, { 1, 0, 2 }, }, + { L"009", { 1, 0, 2 }, { 1, 0, 2 }, { 1, 9, 3 }, { 1, 9, 3 }, { 1, 0, 2 }, }, + { L"00A", { 1, 0, 2 }, { 1, 0, 2 }, { 1, 0, 2 }, { 1, 10, 3 }, { 1, 0, 2 }, }, + { L"00B", { 1, 0, 2 }, { 1, 0, 2 }, { 1, 0, 2 }, { 1, 11, 3 }, { 1, 0, 2 }, }, + { L"00C", { 1, 0, 2 }, { 1, 0, 2 }, { 1, 0, 2 }, { 1, 12, 3 }, { 1, 0, 2 }, }, + { L"00D", { 1, 0, 2 }, { 1, 0, 2 }, { 1, 0, 2 }, { 1, 13, 3 }, { 1, 0, 2 }, }, + { L"00E", { 1, 0, 2 }, { 1, 0, 2 }, { 1, 0, 2 }, { 1, 14, 3 }, { 1, 0, 2 }, }, + { L"00F", { 1, 0, 2 }, { 1, 0, 2 }, { 1, 0, 2 }, { 1, 15, 3 }, { 1, 0, 2 }, }, + { L"00X", { 1, 0, 2 }, { 1, 0, 2 }, { 1, 0, 2 }, { 1, 0, 2 }, { 1, 0, 2 }, }, + { L"00a", { 1, 0, 2 }, { 1, 0, 2 }, { 1, 0, 2 }, { 1, 10, 3 }, { 1, 0, 2 }, }, + { L"00b", { 1, 0, 2 }, { 1, 0, 2 }, { 1, 0, 2 }, { 1, 11, 3 }, { 1, 0, 2 }, }, + { L"00c", { 1, 0, 2 }, { 1, 0, 2 }, { 1, 0, 2 }, { 1, 12, 3 }, { 1, 0, 2 }, }, + { L"00d", { 1, 0, 2 }, { 1, 0, 2 }, { 1, 0, 2 }, { 1, 13, 3 }, { 1, 0, 2 }, }, + { L"00e", { 1, 0, 2 }, { 1, 0, 2 }, { 1, 0, 2 }, { 1, 14, 3 }, { 1, 0, 2 }, }, + { L"00f", { 1, 0, 2 }, { 1, 0, 2 }, { 1, 0, 2 }, { 1, 15, 3 }, { 1, 0, 2 }, }, + { L"00x", { 1, 0, 2 }, { 1, 0, 2 }, { 1, 0, 2 }, { 1, 0, 2 }, { 1, 0, 2 }, }, // all digits with leading one { L"10", { 1, 2, 2 }, { 1, 8, 2 }, { 1, 10, 2 }, { 1, 16, 2 }, { 1, 10, 2 }, }, { L"11", { 1, 3, 2 }, { 1, 9, 2 }, { 1, 11, 2 }, { 1, 17, 2 }, { 1, 11, 2 }, }, |