aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKyle Evans <kevans@FreeBSD.org>2019-12-10 19:16:00 +0000
committerKyle Evans <kevans@FreeBSD.org>2019-12-10 19:16:00 +0000
commit6e816d8711558c9a7daf7959448b0c99d1fd07d1 (patch)
tree60b38b9957d85ba0d1ad65e2573ac26f08cf28df
parent0d42317659f4125c2bf7f6cbbaca157aac86a397 (diff)
downloadsrc-6e816d8711558c9a7daf7959448b0c99d1fd07d1.tar.gz
src-6e816d8711558c9a7daf7959448b0c99d1fd07d1.zip
sed: process \r, \n, and \t
This is both reasonable and a common GNUism that a lot of ported software expects. Universally process \r, \n, and \t into carriage return, newline, and tab respectively. Newline still doesn't function in contexts where it can't (e.g. BRE), but we process it anyways rather than passing UB \n (escaped ordinary) through to the underlying regex engine. Adding a --posix flag to disable these was considered, but sed.1 already declares this version of sed a super-set of POSIX specification and this behavior is the most likely expected when one attempts to use one of these escape sequences in pattern space. This differs from pre-r197362 behavior in that we now honor the three arguably most common escape sequences used with sed(1) and we do so outside of character classes, too. Other escape sequences, like \s and \S, will come later when GNU extensions are added to libregex; sed will likely link against libregex by default, since the GNU extensions tend to be fairly un-intrusive. PR: 229925 Reviewed by: bapt, emaste, pfg Differential Revision: https://reviews.freebsd.org/D22750
Notes
Notes: svn path=/head/; revision=355590
-rw-r--r--usr.bin/sed/compile.c54
-rw-r--r--usr.bin/sed/tests/regress.multitest.out/8.223
-rwxr-xr-xusr.bin/sed/tests/sed2_test.sh20
3 files changed, 69 insertions, 8 deletions
diff --git a/usr.bin/sed/compile.c b/usr.bin/sed/compile.c
index 4427faeff4a2..e03103420559 100644
--- a/usr.bin/sed/compile.c
+++ b/usr.bin/sed/compile.c
@@ -395,10 +395,21 @@ compile_delimited(char *p, char *d, int is_tr)
continue;
} else if (*p == '\\' && p[1] == '[') {
*d++ = *p++;
- } else if (*p == '\\' && p[1] == c)
+ } else if (*p == '\\' && p[1] == c) {
p++;
- else if (*p == '\\' && p[1] == 'n') {
- *d++ = '\n';
+ } else if (*p == '\\' &&
+ (p[1] == 'n' || p[1] == 'r' || p[1] == 't')) {
+ switch (p[1]) {
+ case 'n':
+ *d++ = '\n';
+ break;
+ case 'r':
+ *d++ = '\r';
+ break;
+ case 't':
+ *d++ = '\t';
+ break;
+ }
p += 2;
continue;
} else if (*p == '\\' && p[1] == '\\') {
@@ -428,13 +439,29 @@ compile_ccl(char **sp, char *t)
*t++ = *s++;
if (*s == ']')
*t++ = *s++;
- for (; *s && (*t = *s) != ']'; s++, t++)
+ for (; *s && (*t = *s) != ']'; s++, t++) {
if (*s == '[' && ((d = *(s+1)) == '.' || d == ':' || d == '=')) {
*++t = *++s, t++, s++;
for (c = *s; (*t = *s) != ']' || c != d; s++, t++)
if ((c = *s) == '\0')
return NULL;
+ } else if (*s == '\\') {
+ switch (s[1]) {
+ case 'n':
+ *t = '\n';
+ s++;
+ break;
+ case 'r':
+ *t = '\r';
+ s++;
+ break;
+ case 't':
+ *t = '\t';
+ s++;
+ break;
+ }
}
+ }
return (*s == ']') ? *sp = ++s, ++t : NULL;
}
@@ -521,8 +548,23 @@ compile_subst(char *p, struct s_subst *s)
linenum, fname, *p);
if (s->maxbref < ref)
s->maxbref = ref;
- } else if (*p == '&' || *p == '\\')
- *sp++ = '\\';
+ } else {
+ switch (*p) {
+ case '&':
+ case '\\':
+ *sp++ = '\\';
+ break;
+ case 'n':
+ *p = '\n';
+ break;
+ case 'r':
+ *p = '\r';
+ break;
+ case 't':
+ *p = '\t';
+ break;
+ }
+ }
} else if (*p == c) {
if (*++p == '\0' && more) {
if (cu_fgets(lbuf, sizeof(lbuf), &more))
diff --git a/usr.bin/sed/tests/regress.multitest.out/8.22 b/usr.bin/sed/tests/regress.multitest.out/8.22
index 1191247b6d9a..c66dd65b0993 100644
--- a/usr.bin/sed/tests/regress.multitest.out/8.22
+++ b/usr.bin/sed/tests/regress.multitest.out/8.22
@@ -1,2 +1 @@
-1
-2
+1X2
diff --git a/usr.bin/sed/tests/sed2_test.sh b/usr.bin/sed/tests/sed2_test.sh
index 9acd628a8f1e..48e1c2b216f6 100755
--- a/usr.bin/sed/tests/sed2_test.sh
+++ b/usr.bin/sed/tests/sed2_test.sh
@@ -69,9 +69,29 @@ inplace_command_q_body()
atf_check -s not-exit:0 stat -q '.!'*
}
+atf_test_case escape_subst
+escape_subst_head()
+{
+ atf_set "descr" "Verify functional escaping of \\n, \\r, and \\t"
+}
+escape_subst_body()
+{
+ printf "a\nt\\\t\n\tb\n\t\tc\r\n" > a
+ tr -d '\r' < a > b
+ printf "a\tb c\rx\n" > c
+
+ atf_check -o 'inline:a\nt\\t\n' sed '/\t/d' a
+ atf_check -o 'inline:a\nt\\t\n b\n c\r\n' sed 's/\t/ /g' a
+ atf_check -o 'inline:a\nt\\t\n\t\tb\n\t\t\t\tc\r\n' sed 's/\t/\t\t/g' a
+ atf_check -o 'inline:a\nt\n\tb\n\t\tc\r\n' sed 's/\\t//g' a
+ atf_check -o file:b sed 's/\r//' a
+ atf_check -o 'inline:abcx\n' sed 's/[ \r\t]//g' c
+}
+
atf_init_test_cases()
{
atf_add_test_case inplace_command_q
atf_add_test_case inplace_hardlink_src
atf_add_test_case inplace_symlink_src
+ atf_add_test_case escape_subst
}