diff options
Diffstat (limited to 'ex/ex_subst.c')
-rw-r--r-- | ex/ex_subst.c | 1459 |
1 files changed, 1459 insertions, 0 deletions
diff --git a/ex/ex_subst.c b/ex/ex_subst.c new file mode 100644 index 000000000000..0ebb81dd58e7 --- /dev/null +++ b/ex/ex_subst.c @@ -0,0 +1,1459 @@ +/*- + * Copyright (c) 1992, 1993, 1994 + * The Regents of the University of California. All rights reserved. + * Copyright (c) 1992, 1993, 1994, 1995, 1996 + * Keith Bostic. All rights reserved. + * + * See the LICENSE file for redistribution information. + */ + +#include "config.h" + +#ifndef lint +static const char sccsid[] = "@(#)ex_subst.c 10.37 (Berkeley) 9/15/96"; +#endif /* not lint */ + +#include <sys/types.h> +#include <sys/queue.h> +#include <sys/time.h> + +#include <bitstring.h> +#include <ctype.h> +#include <errno.h> +#include <limits.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> + +#include "../common/common.h" +#include "../vi/vi.h" + +#define SUB_FIRST 0x01 /* The 'r' flag isn't reasonable. */ +#define SUB_MUSTSETR 0x02 /* The 'r' flag is required. */ + +static int re_conv __P((SCR *, char **, size_t *, int *)); +static int re_cscope_conv __P((SCR *, char **, size_t *, int *)); +static int re_sub __P((SCR *, + char *, char **, size_t *, size_t *, regmatch_t [10])); +static int re_tag_conv __P((SCR *, char **, size_t *, int *)); +static int s __P((SCR *, EXCMD *, char *, regex_t *, u_int)); + +/* + * ex_s -- + * [line [,line]] s[ubstitute] [[/;]pat[/;]/repl[/;] [cgr] [count] [#lp]] + * + * Substitute on lines matching a pattern. + * + * PUBLIC: int ex_s __P((SCR *, EXCMD *)); + */ +int +ex_s(sp, cmdp) + SCR *sp; + EXCMD *cmdp; +{ + regex_t *re; + size_t blen, len; + u_int flags; + int delim; + char *bp, *ptrn, *rep, *p, *t; + + /* + * Skip leading white space. + * + * !!! + * Historic vi allowed any non-alphanumeric to serve as the + * substitution command delimiter. + * + * !!! + * If the arguments are empty, it's the same as &, i.e. we + * repeat the last substitution. + */ + if (cmdp->argc == 0) + goto subagain; + for (p = cmdp->argv[0]->bp, + len = cmdp->argv[0]->len; len > 0; --len, ++p) { + if (!isblank(*p)) + break; + } + if (len == 0) +subagain: return (ex_subagain(sp, cmdp)); + + delim = *p++; + if (isalnum(delim) || delim == '\\') + return (s(sp, cmdp, p, &sp->subre_c, SUB_MUSTSETR)); + + /* + * !!! + * The full-blown substitute command reset the remembered + * state of the 'c' and 'g' suffices. + */ + sp->c_suffix = sp->g_suffix = 0; + + /* + * Get the pattern string, toss escaping characters. + * + * !!! + * Historic vi accepted any of the following forms: + * + * :s/abc/def/ change "abc" to "def" + * :s/abc/def change "abc" to "def" + * :s/abc/ delete "abc" + * :s/abc delete "abc" + * + * QUOTING NOTE: + * + * Only toss an escaping character if it escapes a delimiter. + * This means that "s/A/\\\\f" replaces "A" with "\\f". It + * would be nice to be more regular, i.e. for each layer of + * escaping a single escaping character is removed, but that's + * not how the historic vi worked. + */ + for (ptrn = t = p;;) { + if (p[0] == '\0' || p[0] == delim) { + if (p[0] == delim) + ++p; + /* + * !!! + * Nul terminate the pattern string -- it's passed + * to regcomp which doesn't understand anything else. + */ + *t = '\0'; + break; + } + if (p[0] == '\\') + if (p[1] == delim) + ++p; + else if (p[1] == '\\') + *t++ = *p++; + *t++ = *p++; + } + + /* + * If the pattern string is empty, use the last RE (not just the + * last substitution RE). + */ + if (*ptrn == '\0') { + if (sp->re == NULL) { + ex_emsg(sp, NULL, EXM_NOPREVRE); + return (1); + } + + /* Re-compile the RE if necessary. */ + if (!F_ISSET(sp, SC_RE_SEARCH) && re_compile(sp, + sp->re, sp->re_len, NULL, NULL, &sp->re_c, RE_C_SEARCH)) + return (1); + flags = 0; + } else { + /* + * !!! + * Compile the RE. Historic practice is that substitutes set + * the search direction as well as both substitute and search + * RE's. We compile the RE twice, as we don't want to bother + * ref counting the pattern string and (opaque) structure. + */ + if (re_compile(sp, ptrn, t - ptrn, + &sp->re, &sp->re_len, &sp->re_c, RE_C_SEARCH)) + return (1); + if (re_compile(sp, ptrn, t - ptrn, + &sp->subre, &sp->subre_len, &sp->subre_c, RE_C_SUBST)) + return (1); + + flags = SUB_FIRST; + sp->searchdir = FORWARD; + } + re = &sp->re_c; + + /* + * Get the replacement string. + * + * The special character & (\& if O_MAGIC not set) matches the + * entire RE. No handling of & is required here, it's done by + * re_sub(). + * + * The special character ~ (\~ if O_MAGIC not set) inserts the + * previous replacement string into this replacement string. + * Count ~'s to figure out how much space we need. We could + * special case nonexistent last patterns or whether or not + * O_MAGIC is set, but it's probably not worth the effort. + * + * QUOTING NOTE: + * + * Only toss an escaping character if it escapes a delimiter or + * if O_MAGIC is set and it escapes a tilde. + * + * !!! + * If the entire replacement pattern is "%", then use the last + * replacement pattern. This semantic was added to vi in System + * V and then percolated elsewhere, presumably around the time + * that it was added to their version of ed(1). + */ + if (p[0] == '\0' || p[0] == delim) { + if (p[0] == delim) + ++p; + if (sp->repl != NULL) + free(sp->repl); + sp->repl = NULL; + sp->repl_len = 0; + } else if (p[0] == '%' && (p[1] == '\0' || p[1] == delim)) + p += p[1] == delim ? 2 : 1; + else { + for (rep = p, len = 0; + p[0] != '\0' && p[0] != delim; ++p, ++len) + if (p[0] == '~') + len += sp->repl_len; + GET_SPACE_RET(sp, bp, blen, len); + for (t = bp, len = 0, p = rep;;) { + if (p[0] == '\0' || p[0] == delim) { + if (p[0] == delim) + ++p; + break; + } + if (p[0] == '\\') { + if (p[1] == delim) + ++p; + else if (p[1] == '\\') { + *t++ = *p++; + ++len; + } else if (p[1] == '~') { + ++p; + if (!O_ISSET(sp, O_MAGIC)) + goto tilde; + } + } else if (p[0] == '~' && O_ISSET(sp, O_MAGIC)) { +tilde: ++p; + memcpy(t, sp->repl, sp->repl_len); + t += sp->repl_len; + len += sp->repl_len; + continue; + } + *t++ = *p++; + ++len; + } + if ((sp->repl_len = len) != 0) { + if (sp->repl != NULL) + free(sp->repl); + if ((sp->repl = malloc(len)) == NULL) { + msgq(sp, M_SYSERR, NULL); + FREE_SPACE(sp, bp, blen); + return (1); + } + memcpy(sp->repl, bp, len); + } + FREE_SPACE(sp, bp, blen); + } + return (s(sp, cmdp, p, re, flags)); +} + +/* + * ex_subagain -- + * [line [,line]] & [cgr] [count] [#lp]] + * + * Substitute using the last substitute RE and replacement pattern. + * + * PUBLIC: int ex_subagain __P((SCR *, EXCMD *)); + */ +int +ex_subagain(sp, cmdp) + SCR *sp; + EXCMD *cmdp; +{ + if (sp->subre == NULL) { + ex_emsg(sp, NULL, EXM_NOPREVRE); + return (1); + } + if (!F_ISSET(sp, SC_RE_SUBST) && re_compile(sp, + sp->subre, sp->subre_len, NULL, NULL, &sp->subre_c, RE_C_SUBST)) + return (1); + return (s(sp, + cmdp, cmdp->argc ? cmdp->argv[0]->bp : NULL, &sp->subre_c, 0)); +} + +/* + * ex_subtilde -- + * [line [,line]] ~ [cgr] [count] [#lp]] + * + * Substitute using the last RE and last substitute replacement pattern. + * + * PUBLIC: int ex_subtilde __P((SCR *, EXCMD *)); + */ +int +ex_subtilde(sp, cmdp) + SCR *sp; + EXCMD *cmdp; +{ + if (sp->re == NULL) { + ex_emsg(sp, NULL, EXM_NOPREVRE); + return (1); + } + if (!F_ISSET(sp, SC_RE_SEARCH) && re_compile(sp, + sp->re, sp->re_len, NULL, NULL, &sp->re_c, RE_C_SEARCH)) + return (1); + return (s(sp, + cmdp, cmdp->argc ? cmdp->argv[0]->bp : NULL, &sp->re_c, 0)); +} + +/* + * s -- + * Do the substitution. This stuff is *really* tricky. There are lots of + * special cases, and general nastiness. Don't mess with it unless you're + * pretty confident. + * + * The nasty part of the substitution is what happens when the replacement + * string contains newlines. It's a bit tricky -- consider the information + * that has to be retained for "s/f\(o\)o/^M\1^M\1/". The solution here is + * to build a set of newline offsets which we use to break the line up later, + * when the replacement is done. Don't change it unless you're *damned* + * confident. + */ +#define NEEDNEWLINE(sp) { \ + if (sp->newl_len == sp->newl_cnt) { \ + sp->newl_len += 25; \ + REALLOC(sp, sp->newl, size_t *, \ + sp->newl_len * sizeof(size_t)); \ + if (sp->newl == NULL) { \ + sp->newl_len = 0; \ + return (1); \ + } \ + } \ +} + +#define BUILD(sp, l, len) { \ + if (lbclen + (len) > lblen) { \ + lblen += MAX(lbclen + (len), 256); \ + REALLOC(sp, lb, char *, lblen); \ + if (lb == NULL) { \ + lbclen = 0; \ + return (1); \ + } \ + } \ + memcpy(lb + lbclen, l, len); \ + lbclen += len; \ +} + +#define NEEDSP(sp, len, pnt) { \ + if (lbclen + (len) > lblen) { \ + lblen += MAX(lbclen + (len), 256); \ + REALLOC(sp, lb, char *, lblen); \ + if (lb == NULL) { \ + lbclen = 0; \ + return (1); \ + } \ + pnt = lb + lbclen; \ + } \ +} + +static int +s(sp, cmdp, s, re, flags) + SCR *sp; + EXCMD *cmdp; + char *s; + regex_t *re; + u_int flags; +{ + EVENT ev; + MARK from, to; + TEXTH tiq; + recno_t elno, lno, slno; + regmatch_t match[10]; + size_t blen, cnt, last, lbclen, lblen, len, llen; + size_t offset, saved_offset, scno; + int cflag, lflag, nflag, pflag, rflag; + int didsub, do_eol_match, eflags, empty_ok, eval; + int linechanged, matched, quit, rval; + char *bp, *lb; + + NEEDFILE(sp, cmdp); + + slno = sp->lno; + scno = sp->cno; + + /* + * !!! + * Historically, the 'g' and 'c' suffices were always toggled as flags, + * so ":s/A/B/" was the same as ":s/A/B/ccgg". If O_EDCOMPATIBLE was + * not set, they were initialized to 0 for all substitute commands. If + * O_EDCOMPATIBLE was set, they were initialized to 0 only if the user + * specified substitute/replacement patterns (see ex_s()). + */ + if (!O_ISSET(sp, O_EDCOMPATIBLE)) + sp->c_suffix = sp->g_suffix = 0; + + /* + * Historic vi permitted the '#', 'l' and 'p' options in vi mode, but + * it only displayed the last change. I'd disallow them, but they are + * useful in combination with the [v]global commands. In the current + * model the problem is combining them with the 'c' flag -- the screen + * would have to flip back and forth between the confirm screen and the + * ex print screen, which would be pretty awful. We do display all + * changes, though, for what that's worth. + * + * !!! + * Historic vi was fairly strict about the order of "options", the + * count, and "flags". I'm somewhat fuzzy on the difference between + * options and flags, anyway, so this is a simpler approach, and we + * just take it them in whatever order the user gives them. (The ex + * usage statement doesn't reflect this.) + */ + cflag = lflag = nflag = pflag = rflag = 0; + if (s == NULL) + goto noargs; + for (lno = OOBLNO; *s != '\0'; ++s) + switch (*s) { + case ' ': + case '\t': + continue; + case '+': + ++cmdp->flagoff; + break; + case '-': + --cmdp->flagoff; + break; + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + if (lno != OOBLNO) + goto usage; + errno = 0; + lno = strtoul(s, &s, 10); + if (*s == '\0') /* Loop increment correction. */ + --s; + if (errno == ERANGE) { + if (lno == LONG_MAX) + msgq(sp, M_ERR, "153|Count overflow"); + else if (lno == LONG_MIN) + msgq(sp, M_ERR, "154|Count underflow"); + else + msgq(sp, M_SYSERR, NULL); + return (1); + } + /* + * In historic vi, the count was inclusive from the + * second address. + */ + cmdp->addr1.lno = cmdp->addr2.lno; + cmdp->addr2.lno += lno - 1; + if (!db_exist(sp, cmdp->addr2.lno) && + db_last(sp, &cmdp->addr2.lno)) + return (1); + break; + case '#': + nflag = 1; + break; + case 'c': + sp->c_suffix = !sp->c_suffix; + + /* Ex text structure initialization. */ + if (F_ISSET(sp, SC_EX)) { + memset(&tiq, 0, sizeof(TEXTH)); + CIRCLEQ_INIT(&tiq); + } + break; + case 'g': + sp->g_suffix = !sp->g_suffix; + break; + case 'l': + lflag = 1; + break; + case 'p': + pflag = 1; + break; + case 'r': + if (LF_ISSET(SUB_FIRST)) { + msgq(sp, M_ERR, + "155|Regular expression specified; r flag meaningless"); + return (1); + } + if (!F_ISSET(sp, SC_RE_SEARCH)) { + ex_emsg(sp, NULL, EXM_NOPREVRE); + return (1); + } + rflag = 1; + re = &sp->re_c; + break; + default: + goto usage; + } + + if (*s != '\0' || !rflag && LF_ISSET(SUB_MUSTSETR)) { +usage: ex_emsg(sp, cmdp->cmd->usage, EXM_USAGE); + return (1); + } + +noargs: if (F_ISSET(sp, SC_VI) && sp->c_suffix && (lflag || nflag || pflag)) { + msgq(sp, M_ERR, +"156|The #, l and p flags may not be combined with the c flag in vi mode"); + return (1); + } + + /* + * bp: if interactive, line cache + * blen: if interactive, line cache length + * lb: build buffer pointer. + * lbclen: current length of built buffer. + * lblen; length of build buffer. + */ + bp = lb = NULL; + blen = lbclen = lblen = 0; + + /* For each line... */ + for (matched = quit = 0, lno = cmdp->addr1.lno, + elno = cmdp->addr2.lno; !quit && lno <= elno; ++lno) { + + /* Someone's unhappy, time to stop. */ + if (INTERRUPTED(sp)) + break; + + /* Get the line. */ + if (db_get(sp, lno, DBG_FATAL, &s, &llen)) + goto err; + + /* + * Make a local copy if doing confirmation -- when calling + * the confirm routine we're likely to lose the cached copy. + */ + if (sp->c_suffix) { + if (bp == NULL) { + GET_SPACE_RET(sp, bp, blen, llen); + } else + ADD_SPACE_RET(sp, bp, blen, llen); + memcpy(bp, s, llen); + s = bp; + } + + /* Start searching from the beginning. */ + offset = 0; + len = llen; + + /* Reset the build buffer offset. */ + lbclen = 0; + + /* Reset empty match flag. */ + empty_ok = 1; + + /* + * We don't want to have to do a setline if the line didn't + * change -- keep track of whether or not this line changed. + * If doing confirmations, don't want to keep setting the + * line if change is refused -- keep track of substitutions. + */ + didsub = linechanged = 0; + + /* New line, do an EOL match. */ + do_eol_match = 1; + + /* It's not nul terminated, but we pretend it is. */ + eflags = REG_STARTEND; + + /* + * The search area is from s + offset to the EOL. + * + * Generally, match[0].rm_so is the offset of the start + * of the match from the start of the search, and offset + * is the offset of the start of the last search. + */ +nextmatch: match[0].rm_so = 0; + match[0].rm_eo = len; + + /* Get the next match. */ + eval = regexec(re, (char *)s + offset, 10, match, eflags); + + /* + * There wasn't a match or if there was an error, deal with + * it. If there was a previous match in this line, resolve + * the changes into the database. Otherwise, just move on. + */ + if (eval == REG_NOMATCH) + goto endmatch; + if (eval != 0) { + re_error(sp, eval, re); + goto err; + } + matched = 1; + + /* Only the first search can match an anchored expression. */ + eflags |= REG_NOTBOL; + + /* + * !!! + * It's possible to match 0-length strings -- for example, the + * command s;a*;X;, when matched against the string "aabb" will + * result in "XbXbX", i.e. the matches are "aa", the space + * between the b's and the space between the b's and the end of + * the string. There is a similar space between the beginning + * of the string and the a's. The rule that we use (because vi + * historically used it) is that any 0-length match, occurring + * immediately after a match, is ignored. Otherwise, the above + * example would have resulted in "XXbXbX". Another example is + * incorrectly using " *" to replace groups of spaces with one + * space. + * + * The way we do this is that if we just had a successful match, + * the starting offset does not skip characters, and the match + * is empty, ignore the match and move forward. If there's no + * more characters in the string, we were attempting to match + * after the last character, so quit. + */ + if (!empty_ok && match[0].rm_so == 0 && match[0].rm_eo == 0) { + empty_ok = 1; + if (len == 0) + goto endmatch; + BUILD(sp, s + offset, 1) + ++offset; + --len; + goto nextmatch; + } + + /* Confirm change. */ + if (sp->c_suffix) { + /* + * Set the cursor position for confirmation. Note, + * if we matched on a '$', the cursor may be past + * the end of line. + */ + from.lno = to.lno = lno; + from.cno = match[0].rm_so + offset; + to.cno = match[0].rm_eo + offset; + /* + * Both ex and vi have to correct for a change before + * the first character in the line. + */ + if (llen == 0) + from.cno = to.cno = 0; + if (F_ISSET(sp, SC_VI)) { + /* + * Only vi has to correct for a change after + * the last character in the line. + * + * XXX + * It would be nice to change the vi code so + * that we could display a cursor past EOL. + */ + if (to.cno >= llen) + to.cno = llen - 1; + if (from.cno >= llen) + from.cno = llen - 1; + + sp->lno = from.lno; + sp->cno = from.cno; + if (vs_refresh(sp, 1)) + goto err; + + vs_update(sp, msg_cat(sp, + "169|Confirm change? [n]", NULL), NULL); + + if (v_event_get(sp, &ev, 0, 0)) + goto err; + switch (ev.e_event) { + case E_CHARACTER: + break; + case E_EOF: + case E_ERR: + case E_INTERRUPT: + goto lquit; + default: + v_event_err(sp, &ev); + goto lquit; + } + } else { + if (ex_print(sp, cmdp, &from, &to, 0) || + ex_scprint(sp, &from, &to)) + goto lquit; + if (ex_txt(sp, &tiq, 0, TXT_CR)) + goto err; + ev.e_c = tiq.cqh_first->lb[0]; + } + + switch (ev.e_c) { + case CH_YES: + break; + default: + case CH_NO: + didsub = 0; + BUILD(sp, s +offset, match[0].rm_eo); + goto skip; + case CH_QUIT: + /* Set the quit/interrupted flags. */ +lquit: quit = 1; + F_SET(sp->gp, G_INTERRUPTED); + + /* + * Resolve any changes, then return to (and + * exit from) the main loop. + */ + goto endmatch; + } + } + + /* + * Set the cursor to the last position changed, converting + * from 1-based to 0-based. + */ + sp->lno = lno; + sp->cno = match[0].rm_so; + + /* Copy the bytes before the match into the build buffer. */ + BUILD(sp, s + offset, match[0].rm_so); + + /* Substitute the matching bytes. */ + didsub = 1; + if (re_sub(sp, s + offset, &lb, &lbclen, &lblen, match)) + goto err; + + /* Set the change flag so we know this line was modified. */ + linechanged = 1; + + /* Move past the matched bytes. */ +skip: offset += match[0].rm_eo; + len -= match[0].rm_eo; + + /* A match cannot be followed by an empty pattern. */ + empty_ok = 0; + + /* + * If doing a global change with confirmation, we have to + * update the screen. The basic idea is to store the line + * so the screen update routines can find it, and restart. + */ + if (didsub && sp->c_suffix && sp->g_suffix) { + /* + * The new search offset will be the end of the + * modified line. + */ + saved_offset = lbclen; + + /* Copy the rest of the line. */ + if (len) + BUILD(sp, s + offset, len) + + /* Set the new offset. */ + offset = saved_offset; + + /* Store inserted lines, adjusting the build buffer. */ + last = 0; + if (sp->newl_cnt) { + for (cnt = 0; + cnt < sp->newl_cnt; ++cnt, ++lno, ++elno) { + if (db_insert(sp, lno, + lb + last, sp->newl[cnt] - last)) + goto err; + last = sp->newl[cnt] + 1; + ++sp->rptlines[L_ADDED]; + } + lbclen -= last; + offset -= last; + sp->newl_cnt = 0; + } + + /* Store and retrieve the line. */ + if (db_set(sp, lno, lb + last, lbclen)) + goto err; + if (db_get(sp, lno, DBG_FATAL, &s, &llen)) + goto err; + ADD_SPACE_RET(sp, bp, blen, llen) + memcpy(bp, s, llen); + s = bp; + len = llen - offset; + + /* Restart the build. */ + lbclen = 0; + BUILD(sp, s, offset); + + /* + * If we haven't already done the after-the-string + * match, do one. Set REG_NOTEOL so the '$' pattern + * only matches once. + */ + if (!do_eol_match) + goto endmatch; + if (offset == len) { + do_eol_match = 0; + eflags |= REG_NOTEOL; + } + goto nextmatch; + } + + /* + * If it's a global: + * + * If at the end of the string, do a test for the after + * the string match. Set REG_NOTEOL so the '$' pattern + * only matches once. + */ + if (sp->g_suffix && do_eol_match) { + if (len == 0) { + do_eol_match = 0; + eflags |= REG_NOTEOL; + } + goto nextmatch; + } + +endmatch: if (!linechanged) + continue; + + /* Copy any remaining bytes into the build buffer. */ + if (len) + BUILD(sp, s + offset, len) + + /* Store inserted lines, adjusting the build buffer. */ + last = 0; + if (sp->newl_cnt) { + for (cnt = 0; + cnt < sp->newl_cnt; ++cnt, ++lno, ++elno) { + if (db_insert(sp, + lno, lb + last, sp->newl[cnt] - last)) + goto err; + last = sp->newl[cnt] + 1; + ++sp->rptlines[L_ADDED]; + } + lbclen -= last; + sp->newl_cnt = 0; + } + + /* Store the changed line. */ + if (db_set(sp, lno, lb + last, lbclen)) + goto err; + + /* Update changed line counter. */ + if (sp->rptlchange != lno) { + sp->rptlchange = lno; + ++sp->rptlines[L_CHANGED]; + } + + /* + * !!! + * Display as necessary. Historic practice is to only + * display the last line of a line split into multiple + * lines. + */ + if (lflag || nflag || pflag) { + from.lno = to.lno = lno; + from.cno = to.cno = 0; + if (lflag) + (void)ex_print(sp, cmdp, &from, &to, E_C_LIST); + if (nflag) + (void)ex_print(sp, cmdp, &from, &to, E_C_HASH); + if (pflag) + (void)ex_print(sp, cmdp, &from, &to, E_C_PRINT); + } + } + + /* + * !!! + * Historically, vi attempted to leave the cursor at the same place if + * the substitution was done at the current cursor position. Otherwise + * it moved it to the first non-blank of the last line changed. There + * were some problems: for example, :s/$/foo/ with the cursor on the + * last character of the line left the cursor on the last character, or + * the & command with multiple occurrences of the matching string in the + * line usually left the cursor in a fairly random position. + * + * We try to do the same thing, with the exception that if the user is + * doing substitution with confirmation, we move to the last line about + * which the user was consulted, as opposed to the last line that they + * actually changed. This prevents a screen flash if the user doesn't + * change many of the possible lines. + */ + if (!sp->c_suffix && (sp->lno != slno || sp->cno != scno)) { + sp->cno = 0; + (void)nonblank(sp, sp->lno, &sp->cno); + } + + /* + * If not in a global command, and nothing matched, say so. + * Else, if none of the lines displayed, put something up. + */ + rval = 0; + if (!matched) { + if (!F_ISSET(sp, SC_EX_GLOBAL)) { + msgq(sp, M_ERR, "157|No match found"); + goto err; + } + } else if (!lflag && !nflag && !pflag) + F_SET(cmdp, E_AUTOPRINT); + + if (0) { +err: rval = 1; + } + + if (bp != NULL) + FREE_SPACE(sp, bp, blen); + if (lb != NULL) + free(lb); + return (rval); +} + +/* + * re_compile -- + * Compile the RE. + * + * PUBLIC: int re_compile __P((SCR *, + * PUBLIC: char *, size_t, char **, size_t *, regex_t *, u_int)); + */ +int +re_compile(sp, ptrn, plen, ptrnp, lenp, rep, flags) + SCR *sp; + char *ptrn, **ptrnp; + size_t plen, *lenp; + regex_t *rep; + u_int flags; +{ + size_t len; + int reflags, replaced, rval; + char *p; + + /* Set RE flags. */ + reflags = 0; + if (!LF_ISSET(RE_C_CSCOPE | RE_C_TAG)) { + if (O_ISSET(sp, O_EXTENDED)) + reflags |= REG_EXTENDED; + if (O_ISSET(sp, O_IGNORECASE)) + reflags |= REG_ICASE; + if (O_ISSET(sp, O_ICLOWER)) { + for (p = ptrn, len = plen; len > 0; ++p, --len) + if (isupper(*p)) + break; + if (len == 0) + reflags |= REG_ICASE; + } + } + + /* If we're replacing a saved value, clear the old one. */ + if (LF_ISSET(RE_C_SEARCH) && F_ISSET(sp, SC_RE_SEARCH)) { + regfree(&sp->re_c); + F_CLR(sp, SC_RE_SEARCH); + } + if (LF_ISSET(RE_C_SUBST) && F_ISSET(sp, SC_RE_SUBST)) { + regfree(&sp->subre_c); + F_CLR(sp, SC_RE_SUBST); + } + + /* + * If we're saving the string, it's a pattern we haven't seen before, + * so convert the vi-style RE's to POSIX 1003.2 RE's. Save a copy for + * later recompilation. Free any previously saved value. + */ + if (ptrnp != NULL) { + if (LF_ISSET(RE_C_CSCOPE)) { + if (re_cscope_conv(sp, &ptrn, &plen, &replaced)) + return (1); + /* + * XXX + * Currently, the match-any-<blank> expression used in + * re_cscope_conv() requires extended RE's. This may + * not be right or safe. + */ + reflags |= REG_EXTENDED; + } else if (LF_ISSET(RE_C_TAG)) { + if (re_tag_conv(sp, &ptrn, &plen, &replaced)) + return (1); + } else + if (re_conv(sp, &ptrn, &plen, &replaced)) + return (1); + + /* Discard previous pattern. */ + if (*ptrnp != NULL) { + free(*ptrnp); + *ptrnp = NULL; + } + if (lenp != NULL) + *lenp = plen; + + /* + * Copy the string into allocated memory. + * + * XXX + * Regcomp isn't 8-bit clean, so the pattern is nul-terminated + * for now. There's just no other solution. + */ + MALLOC(sp, *ptrnp, char *, plen + 1); + if (*ptrnp != NULL) { + memcpy(*ptrnp, ptrn, plen); + (*ptrnp)[plen] = '\0'; + } + + /* Free up conversion-routine-allocated memory. */ + if (replaced) + FREE_SPACE(sp, ptrn, 0); + + if (*ptrnp == NULL) + return (1); + + ptrn = *ptrnp; + } + + /* + * XXX + * Regcomp isn't 8-bit clean, so we just lost if the pattern + * contained a nul. Bummer! + */ + if ((rval = regcomp(rep, ptrn, /* plen, */ reflags)) != 0) { + if (!LF_ISSET(RE_C_SILENT)) + re_error(sp, rval, rep); + return (1); + } + + if (LF_ISSET(RE_C_SEARCH)) + F_SET(sp, SC_RE_SEARCH); + if (LF_ISSET(RE_C_SUBST)) + F_SET(sp, SC_RE_SUBST); + + return (0); +} + +/* + * re_conv -- + * Convert vi's regular expressions into something that the + * the POSIX 1003.2 RE functions can handle. + * + * There are three conversions we make to make vi's RE's (specifically + * the global, search, and substitute patterns) work with POSIX RE's. + * + * 1: If O_MAGIC is not set, strip backslashes from the magic character + * set (.[*~) that have them, and add them to the ones that don't. + * 2: If O_MAGIC is not set, the string "\~" is replaced with the text + * from the last substitute command's replacement string. If O_MAGIC + * is set, it's the string "~". + * 3: The pattern \<ptrn\> does "word" searches, convert it to use the + * new RE escapes. + * + * !!!/XXX + * This doesn't exactly match the historic behavior of vi because we do + * the ~ substitution before calling the RE engine, so magic characters + * in the replacement string will be expanded by the RE engine, and they + * weren't historically. It's a bug. + */ +static int +re_conv(sp, ptrnp, plenp, replacedp) + SCR *sp; + char **ptrnp; + size_t *plenp; + int *replacedp; +{ + size_t blen, len, needlen; + int magic; + char *bp, *p, *t; + + /* + * First pass through, we figure out how much space we'll need. + * We do it in two passes, on the grounds that most of the time + * the user is doing a search and won't have magic characters. + * That way we can skip most of the memory allocation and copies. + */ + magic = 0; + for (p = *ptrnp, len = *plenp, needlen = 0; len > 0; ++p, --len) + switch (*p) { + case '\\': + if (len > 1) { + --len; + switch (*++p) { + case '<': + magic = 1; + needlen += sizeof(RE_WSTART); + break; + case '>': + magic = 1; + needlen += sizeof(RE_WSTOP); + break; + case '~': + if (!O_ISSET(sp, O_MAGIC)) { + magic = 1; + needlen += sp->repl_len; + } + break; + case '.': + case '[': + case '*': + if (!O_ISSET(sp, O_MAGIC)) { + magic = 1; + needlen += 1; + } + break; + default: + needlen += 2; + } + } else + needlen += 1; + break; + case '~': + if (O_ISSET(sp, O_MAGIC)) { + magic = 1; + needlen += sp->repl_len; + } + break; + case '.': + case '[': + case '*': + if (!O_ISSET(sp, O_MAGIC)) { + magic = 1; + needlen += 2; + } + break; + default: + needlen += 1; + break; + } + + if (!magic) { + *replacedp = 0; + return (0); + } + + /* Get enough memory to hold the final pattern. */ + *replacedp = 1; + GET_SPACE_RET(sp, bp, blen, needlen); + + for (p = *ptrnp, len = *plenp, t = bp; len > 0; ++p, --len) + switch (*p) { + case '\\': + if (len > 1) { + --len; + switch (*++p) { + case '<': + memcpy(t, + RE_WSTART, sizeof(RE_WSTART) - 1); + t += sizeof(RE_WSTART) - 1; + break; + case '>': + memcpy(t, + RE_WSTOP, sizeof(RE_WSTOP) - 1); + t += sizeof(RE_WSTOP) - 1; + break; + case '~': + if (O_ISSET(sp, O_MAGIC)) + *t++ = '~'; + else { + memcpy(t, + sp->repl, sp->repl_len); + t += sp->repl_len; + } + break; + case '.': + case '[': + case '*': + if (O_ISSET(sp, O_MAGIC)) + *t++ = '\\'; + *t++ = *p; + break; + default: + *t++ = '\\'; + *t++ = *p; + } + } else + *t++ = '\\'; + break; + case '~': + if (O_ISSET(sp, O_MAGIC)) { + memcpy(t, sp->repl, sp->repl_len); + t += sp->repl_len; + } else + *t++ = '~'; + break; + case '.': + case '[': + case '*': + if (!O_ISSET(sp, O_MAGIC)) + *t++ = '\\'; + *t++ = *p; + break; + default: + *t++ = *p; + break; + } + + *ptrnp = bp; + *plenp = t - bp; + return (0); +} + +/* + * re_tag_conv -- + * Convert a tags search path into something that the POSIX + * 1003.2 RE functions can handle. + */ +static int +re_tag_conv(sp, ptrnp, plenp, replacedp) + SCR *sp; + char **ptrnp; + size_t *plenp; + int *replacedp; +{ + size_t blen, len; + int lastdollar; + char *bp, *p, *t; + + len = *plenp; + + /* Max memory usage is 2 times the length of the string. */ + *replacedp = 1; + GET_SPACE_RET(sp, bp, blen, len * 2); + + p = *ptrnp; + t = bp; + + /* If the last character is a '/' or '?', we just strip it. */ + if (len > 0 && (p[len - 1] == '/' || p[len - 1] == '?')) + --len; + + /* If the next-to-last or last character is a '$', it's magic. */ + if (len > 0 && p[len - 1] == '$') { + --len; + lastdollar = 1; + } else + lastdollar = 0; + + /* If the first character is a '/' or '?', we just strip it. */ + if (len > 0 && (p[0] == '/' || p[0] == '?')) { + ++p; + --len; + } + + /* If the first or second character is a '^', it's magic. */ + if (p[0] == '^') { + *t++ = *p++; + --len; + } + + /* + * Escape every other magic character we can find, meanwhile stripping + * the backslashes ctags inserts when escaping the search delimiter + * characters. + */ + for (; len > 0; --len) { + if (p[0] == '\\' && (p[1] == '/' || p[1] == '?')) { + ++p; + --len; + } else if (strchr("^.[]$*", p[0])) + *t++ = '\\'; + *t++ = *p++; + } + if (lastdollar) + *t++ = '$'; + + *ptrnp = bp; + *plenp = t - bp; + return (0); +} + +/* + * re_cscope_conv -- + * Convert a cscope search path into something that the POSIX + * 1003.2 RE functions can handle. + */ +static int +re_cscope_conv(sp, ptrnp, plenp, replacedp) + SCR *sp; + char **ptrnp; + size_t *plenp; + int *replacedp; +{ + size_t blen, len, nspaces; + char *bp, *p, *t; + + /* + * Each space in the source line printed by cscope represents an + * arbitrary sequence of spaces, tabs, and comments. + */ +#define CSCOPE_RE_SPACE "([ \t]|/\\*([^*]|\\*/)*\\*/)*" + for (nspaces = 0, p = *ptrnp, len = *plenp; len > 0; ++p, --len) + if (*p == ' ') + ++nspaces; + + /* + * Allocate plenty of space: + * the string, plus potential escaping characters; + * nspaces + 2 copies of CSCOPE_RE_SPACE; + * ^, $, nul terminator characters. + */ + *replacedp = 1; + len = (p - *ptrnp) * 2 + (nspaces + 2) * sizeof(CSCOPE_RE_SPACE) + 3; + GET_SPACE_RET(sp, bp, blen, len); + + p = *ptrnp; + t = bp; + + *t++ = '^'; + memcpy(t, CSCOPE_RE_SPACE, sizeof(CSCOPE_RE_SPACE) - 1); + t += sizeof(CSCOPE_RE_SPACE) - 1; + + for (len = *plenp; len > 0; ++p, --len) + if (*p == ' ') { + memcpy(t, CSCOPE_RE_SPACE, sizeof(CSCOPE_RE_SPACE) - 1); + t += sizeof(CSCOPE_RE_SPACE) - 1; + } else { + if (strchr("\\^.[]$*+?()|{}", *p)) + *t++ = '\\'; + *t++ = *p; + } + + memcpy(t, CSCOPE_RE_SPACE, sizeof(CSCOPE_RE_SPACE) - 1); + t += sizeof(CSCOPE_RE_SPACE) - 1; + *t++ = '$'; + + *ptrnp = bp; + *plenp = t - bp; + return (0); +} + +/* + * re_error -- + * Report a regular expression error. + * + * PUBLIC: void re_error __P((SCR *, int, regex_t *)); + */ +void +re_error(sp, errcode, preg) + SCR *sp; + int errcode; + regex_t *preg; +{ + size_t s; + char *oe; + + s = regerror(errcode, preg, "", 0); + if ((oe = malloc(s)) == NULL) + msgq(sp, M_SYSERR, NULL); + else { + (void)regerror(errcode, preg, oe, s); + msgq(sp, M_ERR, "RE error: %s", oe); + free(oe); + } +} + +/* + * re_sub -- + * Do the substitution for a regular expression. + */ +static int +re_sub(sp, ip, lbp, lbclenp, lblenp, match) + SCR *sp; + char *ip; /* Input line. */ + char **lbp; + size_t *lbclenp, *lblenp; + regmatch_t match[10]; +{ + enum { C_NOTSET, C_LOWER, C_ONELOWER, C_ONEUPPER, C_UPPER } conv; + size_t lbclen, lblen; /* Local copies. */ + size_t mlen; /* Match length. */ + size_t rpl; /* Remaining replacement length. */ + char *rp; /* Replacement pointer. */ + int ch; + int no; /* Match replacement offset. */ + char *p, *t; /* Buffer pointers. */ + char *lb; /* Local copies. */ + + lb = *lbp; /* Get local copies. */ + lbclen = *lbclenp; + lblen = *lblenp; + + /* + * QUOTING NOTE: + * + * There are some special sequences that vi provides in the + * replacement patterns. + * & string the RE matched (\& if nomagic set) + * \# n-th regular subexpression + * \E end \U, \L conversion + * \e end \U, \L conversion + * \l convert the next character to lower-case + * \L convert to lower-case, until \E, \e, or end of replacement + * \u convert the next character to upper-case + * \U convert to upper-case, until \E, \e, or end of replacement + * + * Otherwise, since this is the lowest level of replacement, discard + * all escaping characters. This (hopefully) matches historic practice. + */ +#define OUTCH(ch, nltrans) { \ + CHAR_T __ch = (ch); \ + u_int __value = KEY_VAL(sp, __ch); \ + if (nltrans && (__value == K_CR || __value == K_NL)) { \ + NEEDNEWLINE(sp); \ + sp->newl[sp->newl_cnt++] = lbclen; \ + } else if (conv != C_NOTSET) { \ + switch (conv) { \ + case C_ONELOWER: \ + conv = C_NOTSET; \ + /* FALLTHROUGH */ \ + case C_LOWER: \ + if (isupper(__ch)) \ + __ch = tolower(__ch); \ + break; \ + case C_ONEUPPER: \ + conv = C_NOTSET; \ + /* FALLTHROUGH */ \ + case C_UPPER: \ + if (islower(__ch)) \ + __ch = toupper(__ch); \ + break; \ + default: \ + abort(); \ + } \ + } \ + NEEDSP(sp, 1, p); \ + *p++ = __ch; \ + ++lbclen; \ +} + conv = C_NOTSET; + for (rp = sp->repl, rpl = sp->repl_len, p = lb + lbclen; rpl--;) { + switch (ch = *rp++) { + case '&': + if (O_ISSET(sp, O_MAGIC)) { + no = 0; + goto subzero; + } + break; + case '\\': + if (rpl == 0) + break; + --rpl; + switch (ch = *rp) { + case '&': + ++rp; + if (!O_ISSET(sp, O_MAGIC)) { + no = 0; + goto subzero; + } + break; + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + no = *rp++ - '0'; +subzero: if (match[no].rm_so == -1 || + match[no].rm_eo == -1) + break; + mlen = match[no].rm_eo - match[no].rm_so; + for (t = ip + match[no].rm_so; mlen--; ++t) + OUTCH(*t, 0); + continue; + case 'e': + case 'E': + ++rp; + conv = C_NOTSET; + continue; + case 'l': + ++rp; + conv = C_ONELOWER; + continue; + case 'L': + ++rp; + conv = C_LOWER; + continue; + case 'u': + ++rp; + conv = C_ONEUPPER; + continue; + case 'U': + ++rp; + conv = C_UPPER; + continue; + default: + ++rp; + break; + } + } + OUTCH(ch, 1); + } + + *lbp = lb; /* Update caller's information. */ + *lbclenp = lbclen; + *lblenp = lblen; + return (0); +} |