aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTim J. Robbins <tjr@FreeBSD.org>2006-02-19 04:27:39 +0000
committerTim J. Robbins <tjr@FreeBSD.org>2006-02-19 04:27:39 +0000
commit7c5f1a3fc555128eb995fd7f39deebae3f9866e5 (patch)
treec1e00162b9211522a420826410361a43e2350dfa
parentc0d157d85fda873b81744e7a0a01ad494a4b1ad4 (diff)
downloadsrc-7c5f1a3fc555128eb995fd7f39deebae3f9866e5.tar.gz
src-7c5f1a3fc555128eb995fd7f39deebae3f9866e5.zip
Correctly locate the character preceeding the matched string in -w
mode when in non-UTF-8 multibyte locales (e.g. EUC, GB2312, etc.). PR: 91909
Notes
Notes: svn path=/head/; revision=155829
-rw-r--r--gnu/usr.bin/grep/search.c40
1 files changed, 35 insertions, 5 deletions
diff --git a/gnu/usr.bin/grep/search.c b/gnu/usr.bin/grep/search.c
index a1951567e0d9..982d2c5ce098 100644
--- a/gnu/usr.bin/grep/search.c
+++ b/gnu/usr.bin/grep/search.c
@@ -524,11 +524,16 @@ EGexecute (char const *buf, size_t size, size_t *match_size, int exact)
if (mb_cur_max > 1)
{
const char *s;
- int mr;
+ size_t mr;
wchar_t pwc;
+ /* Locate the start of the multibyte character
+ before the match position (== beg + start). */
if (using_utf8)
{
+ /* UTF-8 is a special case: scan backwards
+ until we find a 7-bit character or a
+ lead byte. */
s = beg + start - 1;
while (s > buf
&& (unsigned char) *s >= 0x80
@@ -536,15 +541,40 @@ EGexecute (char const *buf, size_t size, size_t *match_size, int exact)
--s;
}
else
- s = last_char;
- mr = mbtowc (&pwc, s, beg + start - s);
- if (mr <= 0)
+ {
+ /* Scan forwards to find the start of the
+ last complete character before the
+ match position. */
+ size_t bytes_left = start - 1;
+ s = beg;
+ while (bytes_left > 0)
+ {
+ mr = mbrlen (s, bytes_left, &mbs);
+ if (mr == (size_t) -1 || mr == 0)
+ {
+ memset (&mbs, '\0', sizeof (mbs));
+ s++;
+ bytes_left--;
+ continue;
+ }
+ if (mr == (size_t) -2)
+ {
+ memset (&mbs, '\0', sizeof (mbs));
+ break;
+ }
+ s += mr;
+ bytes_left -= mr;
+ }
+ }
+ mr = mbrtowc (&pwc, s, beg + start - s, &mbs);
+ if (mr == (size_t) -2 || mr == (size_t) -1 ||
+ mr == 0)
{
memset (&mbs, '\0', sizeof (mbstate_t));
lword_match = 1;
}
else if (!(iswalnum (pwc) || pwc == L'_')
- && mr == (int) (beg + start - s))
+ && mr == beg + start - s)
lword_match = 1;
}
else