aboutsummaryrefslogtreecommitdiff
path: root/ex/ex_subst.c
diff options
context:
space:
mode:
Diffstat (limited to 'ex/ex_subst.c')
-rw-r--r--ex/ex_subst.c1459
1 files changed, 1459 insertions, 0 deletions
diff --git a/ex/ex_subst.c b/ex/ex_subst.c
new file mode 100644
index 000000000000..0ebb81dd58e7
--- /dev/null
+++ b/ex/ex_subst.c
@@ -0,0 +1,1459 @@
+/*-
+ * Copyright (c) 1992, 1993, 1994
+ * The Regents of the University of California. All rights reserved.
+ * Copyright (c) 1992, 1993, 1994, 1995, 1996
+ * Keith Bostic. All rights reserved.
+ *
+ * See the LICENSE file for redistribution information.
+ */
+
+#include "config.h"
+
+#ifndef lint
+static const char sccsid[] = "@(#)ex_subst.c 10.37 (Berkeley) 9/15/96";
+#endif /* not lint */
+
+#include <sys/types.h>
+#include <sys/queue.h>
+#include <sys/time.h>
+
+#include <bitstring.h>
+#include <ctype.h>
+#include <errno.h>
+#include <limits.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include "../common/common.h"
+#include "../vi/vi.h"
+
+#define SUB_FIRST 0x01 /* The 'r' flag isn't reasonable. */
+#define SUB_MUSTSETR 0x02 /* The 'r' flag is required. */
+
+static int re_conv __P((SCR *, char **, size_t *, int *));
+static int re_cscope_conv __P((SCR *, char **, size_t *, int *));
+static int re_sub __P((SCR *,
+ char *, char **, size_t *, size_t *, regmatch_t [10]));
+static int re_tag_conv __P((SCR *, char **, size_t *, int *));
+static int s __P((SCR *, EXCMD *, char *, regex_t *, u_int));
+
+/*
+ * ex_s --
+ * [line [,line]] s[ubstitute] [[/;]pat[/;]/repl[/;] [cgr] [count] [#lp]]
+ *
+ * Substitute on lines matching a pattern.
+ *
+ * PUBLIC: int ex_s __P((SCR *, EXCMD *));
+ */
+int
+ex_s(sp, cmdp)
+ SCR *sp;
+ EXCMD *cmdp;
+{
+ regex_t *re;
+ size_t blen, len;
+ u_int flags;
+ int delim;
+ char *bp, *ptrn, *rep, *p, *t;
+
+ /*
+ * Skip leading white space.
+ *
+ * !!!
+ * Historic vi allowed any non-alphanumeric to serve as the
+ * substitution command delimiter.
+ *
+ * !!!
+ * If the arguments are empty, it's the same as &, i.e. we
+ * repeat the last substitution.
+ */
+ if (cmdp->argc == 0)
+ goto subagain;
+ for (p = cmdp->argv[0]->bp,
+ len = cmdp->argv[0]->len; len > 0; --len, ++p) {
+ if (!isblank(*p))
+ break;
+ }
+ if (len == 0)
+subagain: return (ex_subagain(sp, cmdp));
+
+ delim = *p++;
+ if (isalnum(delim) || delim == '\\')
+ return (s(sp, cmdp, p, &sp->subre_c, SUB_MUSTSETR));
+
+ /*
+ * !!!
+ * The full-blown substitute command reset the remembered
+ * state of the 'c' and 'g' suffices.
+ */
+ sp->c_suffix = sp->g_suffix = 0;
+
+ /*
+ * Get the pattern string, toss escaping characters.
+ *
+ * !!!
+ * Historic vi accepted any of the following forms:
+ *
+ * :s/abc/def/ change "abc" to "def"
+ * :s/abc/def change "abc" to "def"
+ * :s/abc/ delete "abc"
+ * :s/abc delete "abc"
+ *
+ * QUOTING NOTE:
+ *
+ * Only toss an escaping character if it escapes a delimiter.
+ * This means that "s/A/\\\\f" replaces "A" with "\\f". It
+ * would be nice to be more regular, i.e. for each layer of
+ * escaping a single escaping character is removed, but that's
+ * not how the historic vi worked.
+ */
+ for (ptrn = t = p;;) {
+ if (p[0] == '\0' || p[0] == delim) {
+ if (p[0] == delim)
+ ++p;
+ /*
+ * !!!
+ * Nul terminate the pattern string -- it's passed
+ * to regcomp which doesn't understand anything else.
+ */
+ *t = '\0';
+ break;
+ }
+ if (p[0] == '\\')
+ if (p[1] == delim)
+ ++p;
+ else if (p[1] == '\\')
+ *t++ = *p++;
+ *t++ = *p++;
+ }
+
+ /*
+ * If the pattern string is empty, use the last RE (not just the
+ * last substitution RE).
+ */
+ if (*ptrn == '\0') {
+ if (sp->re == NULL) {
+ ex_emsg(sp, NULL, EXM_NOPREVRE);
+ return (1);
+ }
+
+ /* Re-compile the RE if necessary. */
+ if (!F_ISSET(sp, SC_RE_SEARCH) && re_compile(sp,
+ sp->re, sp->re_len, NULL, NULL, &sp->re_c, RE_C_SEARCH))
+ return (1);
+ flags = 0;
+ } else {
+ /*
+ * !!!
+ * Compile the RE. Historic practice is that substitutes set
+ * the search direction as well as both substitute and search
+ * RE's. We compile the RE twice, as we don't want to bother
+ * ref counting the pattern string and (opaque) structure.
+ */
+ if (re_compile(sp, ptrn, t - ptrn,
+ &sp->re, &sp->re_len, &sp->re_c, RE_C_SEARCH))
+ return (1);
+ if (re_compile(sp, ptrn, t - ptrn,
+ &sp->subre, &sp->subre_len, &sp->subre_c, RE_C_SUBST))
+ return (1);
+
+ flags = SUB_FIRST;
+ sp->searchdir = FORWARD;
+ }
+ re = &sp->re_c;
+
+ /*
+ * Get the replacement string.
+ *
+ * The special character & (\& if O_MAGIC not set) matches the
+ * entire RE. No handling of & is required here, it's done by
+ * re_sub().
+ *
+ * The special character ~ (\~ if O_MAGIC not set) inserts the
+ * previous replacement string into this replacement string.
+ * Count ~'s to figure out how much space we need. We could
+ * special case nonexistent last patterns or whether or not
+ * O_MAGIC is set, but it's probably not worth the effort.
+ *
+ * QUOTING NOTE:
+ *
+ * Only toss an escaping character if it escapes a delimiter or
+ * if O_MAGIC is set and it escapes a tilde.
+ *
+ * !!!
+ * If the entire replacement pattern is "%", then use the last
+ * replacement pattern. This semantic was added to vi in System
+ * V and then percolated elsewhere, presumably around the time
+ * that it was added to their version of ed(1).
+ */
+ if (p[0] == '\0' || p[0] == delim) {
+ if (p[0] == delim)
+ ++p;
+ if (sp->repl != NULL)
+ free(sp->repl);
+ sp->repl = NULL;
+ sp->repl_len = 0;
+ } else if (p[0] == '%' && (p[1] == '\0' || p[1] == delim))
+ p += p[1] == delim ? 2 : 1;
+ else {
+ for (rep = p, len = 0;
+ p[0] != '\0' && p[0] != delim; ++p, ++len)
+ if (p[0] == '~')
+ len += sp->repl_len;
+ GET_SPACE_RET(sp, bp, blen, len);
+ for (t = bp, len = 0, p = rep;;) {
+ if (p[0] == '\0' || p[0] == delim) {
+ if (p[0] == delim)
+ ++p;
+ break;
+ }
+ if (p[0] == '\\') {
+ if (p[1] == delim)
+ ++p;
+ else if (p[1] == '\\') {
+ *t++ = *p++;
+ ++len;
+ } else if (p[1] == '~') {
+ ++p;
+ if (!O_ISSET(sp, O_MAGIC))
+ goto tilde;
+ }
+ } else if (p[0] == '~' && O_ISSET(sp, O_MAGIC)) {
+tilde: ++p;
+ memcpy(t, sp->repl, sp->repl_len);
+ t += sp->repl_len;
+ len += sp->repl_len;
+ continue;
+ }
+ *t++ = *p++;
+ ++len;
+ }
+ if ((sp->repl_len = len) != 0) {
+ if (sp->repl != NULL)
+ free(sp->repl);
+ if ((sp->repl = malloc(len)) == NULL) {
+ msgq(sp, M_SYSERR, NULL);
+ FREE_SPACE(sp, bp, blen);
+ return (1);
+ }
+ memcpy(sp->repl, bp, len);
+ }
+ FREE_SPACE(sp, bp, blen);
+ }
+ return (s(sp, cmdp, p, re, flags));
+}
+
+/*
+ * ex_subagain --
+ * [line [,line]] & [cgr] [count] [#lp]]
+ *
+ * Substitute using the last substitute RE and replacement pattern.
+ *
+ * PUBLIC: int ex_subagain __P((SCR *, EXCMD *));
+ */
+int
+ex_subagain(sp, cmdp)
+ SCR *sp;
+ EXCMD *cmdp;
+{
+ if (sp->subre == NULL) {
+ ex_emsg(sp, NULL, EXM_NOPREVRE);
+ return (1);
+ }
+ if (!F_ISSET(sp, SC_RE_SUBST) && re_compile(sp,
+ sp->subre, sp->subre_len, NULL, NULL, &sp->subre_c, RE_C_SUBST))
+ return (1);
+ return (s(sp,
+ cmdp, cmdp->argc ? cmdp->argv[0]->bp : NULL, &sp->subre_c, 0));
+}
+
+/*
+ * ex_subtilde --
+ * [line [,line]] ~ [cgr] [count] [#lp]]
+ *
+ * Substitute using the last RE and last substitute replacement pattern.
+ *
+ * PUBLIC: int ex_subtilde __P((SCR *, EXCMD *));
+ */
+int
+ex_subtilde(sp, cmdp)
+ SCR *sp;
+ EXCMD *cmdp;
+{
+ if (sp->re == NULL) {
+ ex_emsg(sp, NULL, EXM_NOPREVRE);
+ return (1);
+ }
+ if (!F_ISSET(sp, SC_RE_SEARCH) && re_compile(sp,
+ sp->re, sp->re_len, NULL, NULL, &sp->re_c, RE_C_SEARCH))
+ return (1);
+ return (s(sp,
+ cmdp, cmdp->argc ? cmdp->argv[0]->bp : NULL, &sp->re_c, 0));
+}
+
+/*
+ * s --
+ * Do the substitution. This stuff is *really* tricky. There are lots of
+ * special cases, and general nastiness. Don't mess with it unless you're
+ * pretty confident.
+ *
+ * The nasty part of the substitution is what happens when the replacement
+ * string contains newlines. It's a bit tricky -- consider the information
+ * that has to be retained for "s/f\(o\)o/^M\1^M\1/". The solution here is
+ * to build a set of newline offsets which we use to break the line up later,
+ * when the replacement is done. Don't change it unless you're *damned*
+ * confident.
+ */
+#define NEEDNEWLINE(sp) { \
+ if (sp->newl_len == sp->newl_cnt) { \
+ sp->newl_len += 25; \
+ REALLOC(sp, sp->newl, size_t *, \
+ sp->newl_len * sizeof(size_t)); \
+ if (sp->newl == NULL) { \
+ sp->newl_len = 0; \
+ return (1); \
+ } \
+ } \
+}
+
+#define BUILD(sp, l, len) { \
+ if (lbclen + (len) > lblen) { \
+ lblen += MAX(lbclen + (len), 256); \
+ REALLOC(sp, lb, char *, lblen); \
+ if (lb == NULL) { \
+ lbclen = 0; \
+ return (1); \
+ } \
+ } \
+ memcpy(lb + lbclen, l, len); \
+ lbclen += len; \
+}
+
+#define NEEDSP(sp, len, pnt) { \
+ if (lbclen + (len) > lblen) { \
+ lblen += MAX(lbclen + (len), 256); \
+ REALLOC(sp, lb, char *, lblen); \
+ if (lb == NULL) { \
+ lbclen = 0; \
+ return (1); \
+ } \
+ pnt = lb + lbclen; \
+ } \
+}
+
+static int
+s(sp, cmdp, s, re, flags)
+ SCR *sp;
+ EXCMD *cmdp;
+ char *s;
+ regex_t *re;
+ u_int flags;
+{
+ EVENT ev;
+ MARK from, to;
+ TEXTH tiq;
+ recno_t elno, lno, slno;
+ regmatch_t match[10];
+ size_t blen, cnt, last, lbclen, lblen, len, llen;
+ size_t offset, saved_offset, scno;
+ int cflag, lflag, nflag, pflag, rflag;
+ int didsub, do_eol_match, eflags, empty_ok, eval;
+ int linechanged, matched, quit, rval;
+ char *bp, *lb;
+
+ NEEDFILE(sp, cmdp);
+
+ slno = sp->lno;
+ scno = sp->cno;
+
+ /*
+ * !!!
+ * Historically, the 'g' and 'c' suffices were always toggled as flags,
+ * so ":s/A/B/" was the same as ":s/A/B/ccgg". If O_EDCOMPATIBLE was
+ * not set, they were initialized to 0 for all substitute commands. If
+ * O_EDCOMPATIBLE was set, they were initialized to 0 only if the user
+ * specified substitute/replacement patterns (see ex_s()).
+ */
+ if (!O_ISSET(sp, O_EDCOMPATIBLE))
+ sp->c_suffix = sp->g_suffix = 0;
+
+ /*
+ * Historic vi permitted the '#', 'l' and 'p' options in vi mode, but
+ * it only displayed the last change. I'd disallow them, but they are
+ * useful in combination with the [v]global commands. In the current
+ * model the problem is combining them with the 'c' flag -- the screen
+ * would have to flip back and forth between the confirm screen and the
+ * ex print screen, which would be pretty awful. We do display all
+ * changes, though, for what that's worth.
+ *
+ * !!!
+ * Historic vi was fairly strict about the order of "options", the
+ * count, and "flags". I'm somewhat fuzzy on the difference between
+ * options and flags, anyway, so this is a simpler approach, and we
+ * just take it them in whatever order the user gives them. (The ex
+ * usage statement doesn't reflect this.)
+ */
+ cflag = lflag = nflag = pflag = rflag = 0;
+ if (s == NULL)
+ goto noargs;
+ for (lno = OOBLNO; *s != '\0'; ++s)
+ switch (*s) {
+ case ' ':
+ case '\t':
+ continue;
+ case '+':
+ ++cmdp->flagoff;
+ break;
+ case '-':
+ --cmdp->flagoff;
+ break;
+ case '0': case '1': case '2': case '3': case '4':
+ case '5': case '6': case '7': case '8': case '9':
+ if (lno != OOBLNO)
+ goto usage;
+ errno = 0;
+ lno = strtoul(s, &s, 10);
+ if (*s == '\0') /* Loop increment correction. */
+ --s;
+ if (errno == ERANGE) {
+ if (lno == LONG_MAX)
+ msgq(sp, M_ERR, "153|Count overflow");
+ else if (lno == LONG_MIN)
+ msgq(sp, M_ERR, "154|Count underflow");
+ else
+ msgq(sp, M_SYSERR, NULL);
+ return (1);
+ }
+ /*
+ * In historic vi, the count was inclusive from the
+ * second address.
+ */
+ cmdp->addr1.lno = cmdp->addr2.lno;
+ cmdp->addr2.lno += lno - 1;
+ if (!db_exist(sp, cmdp->addr2.lno) &&
+ db_last(sp, &cmdp->addr2.lno))
+ return (1);
+ break;
+ case '#':
+ nflag = 1;
+ break;
+ case 'c':
+ sp->c_suffix = !sp->c_suffix;
+
+ /* Ex text structure initialization. */
+ if (F_ISSET(sp, SC_EX)) {
+ memset(&tiq, 0, sizeof(TEXTH));
+ CIRCLEQ_INIT(&tiq);
+ }
+ break;
+ case 'g':
+ sp->g_suffix = !sp->g_suffix;
+ break;
+ case 'l':
+ lflag = 1;
+ break;
+ case 'p':
+ pflag = 1;
+ break;
+ case 'r':
+ if (LF_ISSET(SUB_FIRST)) {
+ msgq(sp, M_ERR,
+ "155|Regular expression specified; r flag meaningless");
+ return (1);
+ }
+ if (!F_ISSET(sp, SC_RE_SEARCH)) {
+ ex_emsg(sp, NULL, EXM_NOPREVRE);
+ return (1);
+ }
+ rflag = 1;
+ re = &sp->re_c;
+ break;
+ default:
+ goto usage;
+ }
+
+ if (*s != '\0' || !rflag && LF_ISSET(SUB_MUSTSETR)) {
+usage: ex_emsg(sp, cmdp->cmd->usage, EXM_USAGE);
+ return (1);
+ }
+
+noargs: if (F_ISSET(sp, SC_VI) && sp->c_suffix && (lflag || nflag || pflag)) {
+ msgq(sp, M_ERR,
+"156|The #, l and p flags may not be combined with the c flag in vi mode");
+ return (1);
+ }
+
+ /*
+ * bp: if interactive, line cache
+ * blen: if interactive, line cache length
+ * lb: build buffer pointer.
+ * lbclen: current length of built buffer.
+ * lblen; length of build buffer.
+ */
+ bp = lb = NULL;
+ blen = lbclen = lblen = 0;
+
+ /* For each line... */
+ for (matched = quit = 0, lno = cmdp->addr1.lno,
+ elno = cmdp->addr2.lno; !quit && lno <= elno; ++lno) {
+
+ /* Someone's unhappy, time to stop. */
+ if (INTERRUPTED(sp))
+ break;
+
+ /* Get the line. */
+ if (db_get(sp, lno, DBG_FATAL, &s, &llen))
+ goto err;
+
+ /*
+ * Make a local copy if doing confirmation -- when calling
+ * the confirm routine we're likely to lose the cached copy.
+ */
+ if (sp->c_suffix) {
+ if (bp == NULL) {
+ GET_SPACE_RET(sp, bp, blen, llen);
+ } else
+ ADD_SPACE_RET(sp, bp, blen, llen);
+ memcpy(bp, s, llen);
+ s = bp;
+ }
+
+ /* Start searching from the beginning. */
+ offset = 0;
+ len = llen;
+
+ /* Reset the build buffer offset. */
+ lbclen = 0;
+
+ /* Reset empty match flag. */
+ empty_ok = 1;
+
+ /*
+ * We don't want to have to do a setline if the line didn't
+ * change -- keep track of whether or not this line changed.
+ * If doing confirmations, don't want to keep setting the
+ * line if change is refused -- keep track of substitutions.
+ */
+ didsub = linechanged = 0;
+
+ /* New line, do an EOL match. */
+ do_eol_match = 1;
+
+ /* It's not nul terminated, but we pretend it is. */
+ eflags = REG_STARTEND;
+
+ /*
+ * The search area is from s + offset to the EOL.
+ *
+ * Generally, match[0].rm_so is the offset of the start
+ * of the match from the start of the search, and offset
+ * is the offset of the start of the last search.
+ */
+nextmatch: match[0].rm_so = 0;
+ match[0].rm_eo = len;
+
+ /* Get the next match. */
+ eval = regexec(re, (char *)s + offset, 10, match, eflags);
+
+ /*
+ * There wasn't a match or if there was an error, deal with
+ * it. If there was a previous match in this line, resolve
+ * the changes into the database. Otherwise, just move on.
+ */
+ if (eval == REG_NOMATCH)
+ goto endmatch;
+ if (eval != 0) {
+ re_error(sp, eval, re);
+ goto err;
+ }
+ matched = 1;
+
+ /* Only the first search can match an anchored expression. */
+ eflags |= REG_NOTBOL;
+
+ /*
+ * !!!
+ * It's possible to match 0-length strings -- for example, the
+ * command s;a*;X;, when matched against the string "aabb" will
+ * result in "XbXbX", i.e. the matches are "aa", the space
+ * between the b's and the space between the b's and the end of
+ * the string. There is a similar space between the beginning
+ * of the string and the a's. The rule that we use (because vi
+ * historically used it) is that any 0-length match, occurring
+ * immediately after a match, is ignored. Otherwise, the above
+ * example would have resulted in "XXbXbX". Another example is
+ * incorrectly using " *" to replace groups of spaces with one
+ * space.
+ *
+ * The way we do this is that if we just had a successful match,
+ * the starting offset does not skip characters, and the match
+ * is empty, ignore the match and move forward. If there's no
+ * more characters in the string, we were attempting to match
+ * after the last character, so quit.
+ */
+ if (!empty_ok && match[0].rm_so == 0 && match[0].rm_eo == 0) {
+ empty_ok = 1;
+ if (len == 0)
+ goto endmatch;
+ BUILD(sp, s + offset, 1)
+ ++offset;
+ --len;
+ goto nextmatch;
+ }
+
+ /* Confirm change. */
+ if (sp->c_suffix) {
+ /*
+ * Set the cursor position for confirmation. Note,
+ * if we matched on a '$', the cursor may be past
+ * the end of line.
+ */
+ from.lno = to.lno = lno;
+ from.cno = match[0].rm_so + offset;
+ to.cno = match[0].rm_eo + offset;
+ /*
+ * Both ex and vi have to correct for a change before
+ * the first character in the line.
+ */
+ if (llen == 0)
+ from.cno = to.cno = 0;
+ if (F_ISSET(sp, SC_VI)) {
+ /*
+ * Only vi has to correct for a change after
+ * the last character in the line.
+ *
+ * XXX
+ * It would be nice to change the vi code so
+ * that we could display a cursor past EOL.
+ */
+ if (to.cno >= llen)
+ to.cno = llen - 1;
+ if (from.cno >= llen)
+ from.cno = llen - 1;
+
+ sp->lno = from.lno;
+ sp->cno = from.cno;
+ if (vs_refresh(sp, 1))
+ goto err;
+
+ vs_update(sp, msg_cat(sp,
+ "169|Confirm change? [n]", NULL), NULL);
+
+ if (v_event_get(sp, &ev, 0, 0))
+ goto err;
+ switch (ev.e_event) {
+ case E_CHARACTER:
+ break;
+ case E_EOF:
+ case E_ERR:
+ case E_INTERRUPT:
+ goto lquit;
+ default:
+ v_event_err(sp, &ev);
+ goto lquit;
+ }
+ } else {
+ if (ex_print(sp, cmdp, &from, &to, 0) ||
+ ex_scprint(sp, &from, &to))
+ goto lquit;
+ if (ex_txt(sp, &tiq, 0, TXT_CR))
+ goto err;
+ ev.e_c = tiq.cqh_first->lb[0];
+ }
+
+ switch (ev.e_c) {
+ case CH_YES:
+ break;
+ default:
+ case CH_NO:
+ didsub = 0;
+ BUILD(sp, s +offset, match[0].rm_eo);
+ goto skip;
+ case CH_QUIT:
+ /* Set the quit/interrupted flags. */
+lquit: quit = 1;
+ F_SET(sp->gp, G_INTERRUPTED);
+
+ /*
+ * Resolve any changes, then return to (and
+ * exit from) the main loop.
+ */
+ goto endmatch;
+ }
+ }
+
+ /*
+ * Set the cursor to the last position changed, converting
+ * from 1-based to 0-based.
+ */
+ sp->lno = lno;
+ sp->cno = match[0].rm_so;
+
+ /* Copy the bytes before the match into the build buffer. */
+ BUILD(sp, s + offset, match[0].rm_so);
+
+ /* Substitute the matching bytes. */
+ didsub = 1;
+ if (re_sub(sp, s + offset, &lb, &lbclen, &lblen, match))
+ goto err;
+
+ /* Set the change flag so we know this line was modified. */
+ linechanged = 1;
+
+ /* Move past the matched bytes. */
+skip: offset += match[0].rm_eo;
+ len -= match[0].rm_eo;
+
+ /* A match cannot be followed by an empty pattern. */
+ empty_ok = 0;
+
+ /*
+ * If doing a global change with confirmation, we have to
+ * update the screen. The basic idea is to store the line
+ * so the screen update routines can find it, and restart.
+ */
+ if (didsub && sp->c_suffix && sp->g_suffix) {
+ /*
+ * The new search offset will be the end of the
+ * modified line.
+ */
+ saved_offset = lbclen;
+
+ /* Copy the rest of the line. */
+ if (len)
+ BUILD(sp, s + offset, len)
+
+ /* Set the new offset. */
+ offset = saved_offset;
+
+ /* Store inserted lines, adjusting the build buffer. */
+ last = 0;
+ if (sp->newl_cnt) {
+ for (cnt = 0;
+ cnt < sp->newl_cnt; ++cnt, ++lno, ++elno) {
+ if (db_insert(sp, lno,
+ lb + last, sp->newl[cnt] - last))
+ goto err;
+ last = sp->newl[cnt] + 1;
+ ++sp->rptlines[L_ADDED];
+ }
+ lbclen -= last;
+ offset -= last;
+ sp->newl_cnt = 0;
+ }
+
+ /* Store and retrieve the line. */
+ if (db_set(sp, lno, lb + last, lbclen))
+ goto err;
+ if (db_get(sp, lno, DBG_FATAL, &s, &llen))
+ goto err;
+ ADD_SPACE_RET(sp, bp, blen, llen)
+ memcpy(bp, s, llen);
+ s = bp;
+ len = llen - offset;
+
+ /* Restart the build. */
+ lbclen = 0;
+ BUILD(sp, s, offset);
+
+ /*
+ * If we haven't already done the after-the-string
+ * match, do one. Set REG_NOTEOL so the '$' pattern
+ * only matches once.
+ */
+ if (!do_eol_match)
+ goto endmatch;
+ if (offset == len) {
+ do_eol_match = 0;
+ eflags |= REG_NOTEOL;
+ }
+ goto nextmatch;
+ }
+
+ /*
+ * If it's a global:
+ *
+ * If at the end of the string, do a test for the after
+ * the string match. Set REG_NOTEOL so the '$' pattern
+ * only matches once.
+ */
+ if (sp->g_suffix && do_eol_match) {
+ if (len == 0) {
+ do_eol_match = 0;
+ eflags |= REG_NOTEOL;
+ }
+ goto nextmatch;
+ }
+
+endmatch: if (!linechanged)
+ continue;
+
+ /* Copy any remaining bytes into the build buffer. */
+ if (len)
+ BUILD(sp, s + offset, len)
+
+ /* Store inserted lines, adjusting the build buffer. */
+ last = 0;
+ if (sp->newl_cnt) {
+ for (cnt = 0;
+ cnt < sp->newl_cnt; ++cnt, ++lno, ++elno) {
+ if (db_insert(sp,
+ lno, lb + last, sp->newl[cnt] - last))
+ goto err;
+ last = sp->newl[cnt] + 1;
+ ++sp->rptlines[L_ADDED];
+ }
+ lbclen -= last;
+ sp->newl_cnt = 0;
+ }
+
+ /* Store the changed line. */
+ if (db_set(sp, lno, lb + last, lbclen))
+ goto err;
+
+ /* Update changed line counter. */
+ if (sp->rptlchange != lno) {
+ sp->rptlchange = lno;
+ ++sp->rptlines[L_CHANGED];
+ }
+
+ /*
+ * !!!
+ * Display as necessary. Historic practice is to only
+ * display the last line of a line split into multiple
+ * lines.
+ */
+ if (lflag || nflag || pflag) {
+ from.lno = to.lno = lno;
+ from.cno = to.cno = 0;
+ if (lflag)
+ (void)ex_print(sp, cmdp, &from, &to, E_C_LIST);
+ if (nflag)
+ (void)ex_print(sp, cmdp, &from, &to, E_C_HASH);
+ if (pflag)
+ (void)ex_print(sp, cmdp, &from, &to, E_C_PRINT);
+ }
+ }
+
+ /*
+ * !!!
+ * Historically, vi attempted to leave the cursor at the same place if
+ * the substitution was done at the current cursor position. Otherwise
+ * it moved it to the first non-blank of the last line changed. There
+ * were some problems: for example, :s/$/foo/ with the cursor on the
+ * last character of the line left the cursor on the last character, or
+ * the & command with multiple occurrences of the matching string in the
+ * line usually left the cursor in a fairly random position.
+ *
+ * We try to do the same thing, with the exception that if the user is
+ * doing substitution with confirmation, we move to the last line about
+ * which the user was consulted, as opposed to the last line that they
+ * actually changed. This prevents a screen flash if the user doesn't
+ * change many of the possible lines.
+ */
+ if (!sp->c_suffix && (sp->lno != slno || sp->cno != scno)) {
+ sp->cno = 0;
+ (void)nonblank(sp, sp->lno, &sp->cno);
+ }
+
+ /*
+ * If not in a global command, and nothing matched, say so.
+ * Else, if none of the lines displayed, put something up.
+ */
+ rval = 0;
+ if (!matched) {
+ if (!F_ISSET(sp, SC_EX_GLOBAL)) {
+ msgq(sp, M_ERR, "157|No match found");
+ goto err;
+ }
+ } else if (!lflag && !nflag && !pflag)
+ F_SET(cmdp, E_AUTOPRINT);
+
+ if (0) {
+err: rval = 1;
+ }
+
+ if (bp != NULL)
+ FREE_SPACE(sp, bp, blen);
+ if (lb != NULL)
+ free(lb);
+ return (rval);
+}
+
+/*
+ * re_compile --
+ * Compile the RE.
+ *
+ * PUBLIC: int re_compile __P((SCR *,
+ * PUBLIC: char *, size_t, char **, size_t *, regex_t *, u_int));
+ */
+int
+re_compile(sp, ptrn, plen, ptrnp, lenp, rep, flags)
+ SCR *sp;
+ char *ptrn, **ptrnp;
+ size_t plen, *lenp;
+ regex_t *rep;
+ u_int flags;
+{
+ size_t len;
+ int reflags, replaced, rval;
+ char *p;
+
+ /* Set RE flags. */
+ reflags = 0;
+ if (!LF_ISSET(RE_C_CSCOPE | RE_C_TAG)) {
+ if (O_ISSET(sp, O_EXTENDED))
+ reflags |= REG_EXTENDED;
+ if (O_ISSET(sp, O_IGNORECASE))
+ reflags |= REG_ICASE;
+ if (O_ISSET(sp, O_ICLOWER)) {
+ for (p = ptrn, len = plen; len > 0; ++p, --len)
+ if (isupper(*p))
+ break;
+ if (len == 0)
+ reflags |= REG_ICASE;
+ }
+ }
+
+ /* If we're replacing a saved value, clear the old one. */
+ if (LF_ISSET(RE_C_SEARCH) && F_ISSET(sp, SC_RE_SEARCH)) {
+ regfree(&sp->re_c);
+ F_CLR(sp, SC_RE_SEARCH);
+ }
+ if (LF_ISSET(RE_C_SUBST) && F_ISSET(sp, SC_RE_SUBST)) {
+ regfree(&sp->subre_c);
+ F_CLR(sp, SC_RE_SUBST);
+ }
+
+ /*
+ * If we're saving the string, it's a pattern we haven't seen before,
+ * so convert the vi-style RE's to POSIX 1003.2 RE's. Save a copy for
+ * later recompilation. Free any previously saved value.
+ */
+ if (ptrnp != NULL) {
+ if (LF_ISSET(RE_C_CSCOPE)) {
+ if (re_cscope_conv(sp, &ptrn, &plen, &replaced))
+ return (1);
+ /*
+ * XXX
+ * Currently, the match-any-<blank> expression used in
+ * re_cscope_conv() requires extended RE's. This may
+ * not be right or safe.
+ */
+ reflags |= REG_EXTENDED;
+ } else if (LF_ISSET(RE_C_TAG)) {
+ if (re_tag_conv(sp, &ptrn, &plen, &replaced))
+ return (1);
+ } else
+ if (re_conv(sp, &ptrn, &plen, &replaced))
+ return (1);
+
+ /* Discard previous pattern. */
+ if (*ptrnp != NULL) {
+ free(*ptrnp);
+ *ptrnp = NULL;
+ }
+ if (lenp != NULL)
+ *lenp = plen;
+
+ /*
+ * Copy the string into allocated memory.
+ *
+ * XXX
+ * Regcomp isn't 8-bit clean, so the pattern is nul-terminated
+ * for now. There's just no other solution.
+ */
+ MALLOC(sp, *ptrnp, char *, plen + 1);
+ if (*ptrnp != NULL) {
+ memcpy(*ptrnp, ptrn, plen);
+ (*ptrnp)[plen] = '\0';
+ }
+
+ /* Free up conversion-routine-allocated memory. */
+ if (replaced)
+ FREE_SPACE(sp, ptrn, 0);
+
+ if (*ptrnp == NULL)
+ return (1);
+
+ ptrn = *ptrnp;
+ }
+
+ /*
+ * XXX
+ * Regcomp isn't 8-bit clean, so we just lost if the pattern
+ * contained a nul. Bummer!
+ */
+ if ((rval = regcomp(rep, ptrn, /* plen, */ reflags)) != 0) {
+ if (!LF_ISSET(RE_C_SILENT))
+ re_error(sp, rval, rep);
+ return (1);
+ }
+
+ if (LF_ISSET(RE_C_SEARCH))
+ F_SET(sp, SC_RE_SEARCH);
+ if (LF_ISSET(RE_C_SUBST))
+ F_SET(sp, SC_RE_SUBST);
+
+ return (0);
+}
+
+/*
+ * re_conv --
+ * Convert vi's regular expressions into something that the
+ * the POSIX 1003.2 RE functions can handle.
+ *
+ * There are three conversions we make to make vi's RE's (specifically
+ * the global, search, and substitute patterns) work with POSIX RE's.
+ *
+ * 1: If O_MAGIC is not set, strip backslashes from the magic character
+ * set (.[*~) that have them, and add them to the ones that don't.
+ * 2: If O_MAGIC is not set, the string "\~" is replaced with the text
+ * from the last substitute command's replacement string. If O_MAGIC
+ * is set, it's the string "~".
+ * 3: The pattern \<ptrn\> does "word" searches, convert it to use the
+ * new RE escapes.
+ *
+ * !!!/XXX
+ * This doesn't exactly match the historic behavior of vi because we do
+ * the ~ substitution before calling the RE engine, so magic characters
+ * in the replacement string will be expanded by the RE engine, and they
+ * weren't historically. It's a bug.
+ */
+static int
+re_conv(sp, ptrnp, plenp, replacedp)
+ SCR *sp;
+ char **ptrnp;
+ size_t *plenp;
+ int *replacedp;
+{
+ size_t blen, len, needlen;
+ int magic;
+ char *bp, *p, *t;
+
+ /*
+ * First pass through, we figure out how much space we'll need.
+ * We do it in two passes, on the grounds that most of the time
+ * the user is doing a search and won't have magic characters.
+ * That way we can skip most of the memory allocation and copies.
+ */
+ magic = 0;
+ for (p = *ptrnp, len = *plenp, needlen = 0; len > 0; ++p, --len)
+ switch (*p) {
+ case '\\':
+ if (len > 1) {
+ --len;
+ switch (*++p) {
+ case '<':
+ magic = 1;
+ needlen += sizeof(RE_WSTART);
+ break;
+ case '>':
+ magic = 1;
+ needlen += sizeof(RE_WSTOP);
+ break;
+ case '~':
+ if (!O_ISSET(sp, O_MAGIC)) {
+ magic = 1;
+ needlen += sp->repl_len;
+ }
+ break;
+ case '.':
+ case '[':
+ case '*':
+ if (!O_ISSET(sp, O_MAGIC)) {
+ magic = 1;
+ needlen += 1;
+ }
+ break;
+ default:
+ needlen += 2;
+ }
+ } else
+ needlen += 1;
+ break;
+ case '~':
+ if (O_ISSET(sp, O_MAGIC)) {
+ magic = 1;
+ needlen += sp->repl_len;
+ }
+ break;
+ case '.':
+ case '[':
+ case '*':
+ if (!O_ISSET(sp, O_MAGIC)) {
+ magic = 1;
+ needlen += 2;
+ }
+ break;
+ default:
+ needlen += 1;
+ break;
+ }
+
+ if (!magic) {
+ *replacedp = 0;
+ return (0);
+ }
+
+ /* Get enough memory to hold the final pattern. */
+ *replacedp = 1;
+ GET_SPACE_RET(sp, bp, blen, needlen);
+
+ for (p = *ptrnp, len = *plenp, t = bp; len > 0; ++p, --len)
+ switch (*p) {
+ case '\\':
+ if (len > 1) {
+ --len;
+ switch (*++p) {
+ case '<':
+ memcpy(t,
+ RE_WSTART, sizeof(RE_WSTART) - 1);
+ t += sizeof(RE_WSTART) - 1;
+ break;
+ case '>':
+ memcpy(t,
+ RE_WSTOP, sizeof(RE_WSTOP) - 1);
+ t += sizeof(RE_WSTOP) - 1;
+ break;
+ case '~':
+ if (O_ISSET(sp, O_MAGIC))
+ *t++ = '~';
+ else {
+ memcpy(t,
+ sp->repl, sp->repl_len);
+ t += sp->repl_len;
+ }
+ break;
+ case '.':
+ case '[':
+ case '*':
+ if (O_ISSET(sp, O_MAGIC))
+ *t++ = '\\';
+ *t++ = *p;
+ break;
+ default:
+ *t++ = '\\';
+ *t++ = *p;
+ }
+ } else
+ *t++ = '\\';
+ break;
+ case '~':
+ if (O_ISSET(sp, O_MAGIC)) {
+ memcpy(t, sp->repl, sp->repl_len);
+ t += sp->repl_len;
+ } else
+ *t++ = '~';
+ break;
+ case '.':
+ case '[':
+ case '*':
+ if (!O_ISSET(sp, O_MAGIC))
+ *t++ = '\\';
+ *t++ = *p;
+ break;
+ default:
+ *t++ = *p;
+ break;
+ }
+
+ *ptrnp = bp;
+ *plenp = t - bp;
+ return (0);
+}
+
+/*
+ * re_tag_conv --
+ * Convert a tags search path into something that the POSIX
+ * 1003.2 RE functions can handle.
+ */
+static int
+re_tag_conv(sp, ptrnp, plenp, replacedp)
+ SCR *sp;
+ char **ptrnp;
+ size_t *plenp;
+ int *replacedp;
+{
+ size_t blen, len;
+ int lastdollar;
+ char *bp, *p, *t;
+
+ len = *plenp;
+
+ /* Max memory usage is 2 times the length of the string. */
+ *replacedp = 1;
+ GET_SPACE_RET(sp, bp, blen, len * 2);
+
+ p = *ptrnp;
+ t = bp;
+
+ /* If the last character is a '/' or '?', we just strip it. */
+ if (len > 0 && (p[len - 1] == '/' || p[len - 1] == '?'))
+ --len;
+
+ /* If the next-to-last or last character is a '$', it's magic. */
+ if (len > 0 && p[len - 1] == '$') {
+ --len;
+ lastdollar = 1;
+ } else
+ lastdollar = 0;
+
+ /* If the first character is a '/' or '?', we just strip it. */
+ if (len > 0 && (p[0] == '/' || p[0] == '?')) {
+ ++p;
+ --len;
+ }
+
+ /* If the first or second character is a '^', it's magic. */
+ if (p[0] == '^') {
+ *t++ = *p++;
+ --len;
+ }
+
+ /*
+ * Escape every other magic character we can find, meanwhile stripping
+ * the backslashes ctags inserts when escaping the search delimiter
+ * characters.
+ */
+ for (; len > 0; --len) {
+ if (p[0] == '\\' && (p[1] == '/' || p[1] == '?')) {
+ ++p;
+ --len;
+ } else if (strchr("^.[]$*", p[0]))
+ *t++ = '\\';
+ *t++ = *p++;
+ }
+ if (lastdollar)
+ *t++ = '$';
+
+ *ptrnp = bp;
+ *plenp = t - bp;
+ return (0);
+}
+
+/*
+ * re_cscope_conv --
+ * Convert a cscope search path into something that the POSIX
+ * 1003.2 RE functions can handle.
+ */
+static int
+re_cscope_conv(sp, ptrnp, plenp, replacedp)
+ SCR *sp;
+ char **ptrnp;
+ size_t *plenp;
+ int *replacedp;
+{
+ size_t blen, len, nspaces;
+ char *bp, *p, *t;
+
+ /*
+ * Each space in the source line printed by cscope represents an
+ * arbitrary sequence of spaces, tabs, and comments.
+ */
+#define CSCOPE_RE_SPACE "([ \t]|/\\*([^*]|\\*/)*\\*/)*"
+ for (nspaces = 0, p = *ptrnp, len = *plenp; len > 0; ++p, --len)
+ if (*p == ' ')
+ ++nspaces;
+
+ /*
+ * Allocate plenty of space:
+ * the string, plus potential escaping characters;
+ * nspaces + 2 copies of CSCOPE_RE_SPACE;
+ * ^, $, nul terminator characters.
+ */
+ *replacedp = 1;
+ len = (p - *ptrnp) * 2 + (nspaces + 2) * sizeof(CSCOPE_RE_SPACE) + 3;
+ GET_SPACE_RET(sp, bp, blen, len);
+
+ p = *ptrnp;
+ t = bp;
+
+ *t++ = '^';
+ memcpy(t, CSCOPE_RE_SPACE, sizeof(CSCOPE_RE_SPACE) - 1);
+ t += sizeof(CSCOPE_RE_SPACE) - 1;
+
+ for (len = *plenp; len > 0; ++p, --len)
+ if (*p == ' ') {
+ memcpy(t, CSCOPE_RE_SPACE, sizeof(CSCOPE_RE_SPACE) - 1);
+ t += sizeof(CSCOPE_RE_SPACE) - 1;
+ } else {
+ if (strchr("\\^.[]$*+?()|{}", *p))
+ *t++ = '\\';
+ *t++ = *p;
+ }
+
+ memcpy(t, CSCOPE_RE_SPACE, sizeof(CSCOPE_RE_SPACE) - 1);
+ t += sizeof(CSCOPE_RE_SPACE) - 1;
+ *t++ = '$';
+
+ *ptrnp = bp;
+ *plenp = t - bp;
+ return (0);
+}
+
+/*
+ * re_error --
+ * Report a regular expression error.
+ *
+ * PUBLIC: void re_error __P((SCR *, int, regex_t *));
+ */
+void
+re_error(sp, errcode, preg)
+ SCR *sp;
+ int errcode;
+ regex_t *preg;
+{
+ size_t s;
+ char *oe;
+
+ s = regerror(errcode, preg, "", 0);
+ if ((oe = malloc(s)) == NULL)
+ msgq(sp, M_SYSERR, NULL);
+ else {
+ (void)regerror(errcode, preg, oe, s);
+ msgq(sp, M_ERR, "RE error: %s", oe);
+ free(oe);
+ }
+}
+
+/*
+ * re_sub --
+ * Do the substitution for a regular expression.
+ */
+static int
+re_sub(sp, ip, lbp, lbclenp, lblenp, match)
+ SCR *sp;
+ char *ip; /* Input line. */
+ char **lbp;
+ size_t *lbclenp, *lblenp;
+ regmatch_t match[10];
+{
+ enum { C_NOTSET, C_LOWER, C_ONELOWER, C_ONEUPPER, C_UPPER } conv;
+ size_t lbclen, lblen; /* Local copies. */
+ size_t mlen; /* Match length. */
+ size_t rpl; /* Remaining replacement length. */
+ char *rp; /* Replacement pointer. */
+ int ch;
+ int no; /* Match replacement offset. */
+ char *p, *t; /* Buffer pointers. */
+ char *lb; /* Local copies. */
+
+ lb = *lbp; /* Get local copies. */
+ lbclen = *lbclenp;
+ lblen = *lblenp;
+
+ /*
+ * QUOTING NOTE:
+ *
+ * There are some special sequences that vi provides in the
+ * replacement patterns.
+ * & string the RE matched (\& if nomagic set)
+ * \# n-th regular subexpression
+ * \E end \U, \L conversion
+ * \e end \U, \L conversion
+ * \l convert the next character to lower-case
+ * \L convert to lower-case, until \E, \e, or end of replacement
+ * \u convert the next character to upper-case
+ * \U convert to upper-case, until \E, \e, or end of replacement
+ *
+ * Otherwise, since this is the lowest level of replacement, discard
+ * all escaping characters. This (hopefully) matches historic practice.
+ */
+#define OUTCH(ch, nltrans) { \
+ CHAR_T __ch = (ch); \
+ u_int __value = KEY_VAL(sp, __ch); \
+ if (nltrans && (__value == K_CR || __value == K_NL)) { \
+ NEEDNEWLINE(sp); \
+ sp->newl[sp->newl_cnt++] = lbclen; \
+ } else if (conv != C_NOTSET) { \
+ switch (conv) { \
+ case C_ONELOWER: \
+ conv = C_NOTSET; \
+ /* FALLTHROUGH */ \
+ case C_LOWER: \
+ if (isupper(__ch)) \
+ __ch = tolower(__ch); \
+ break; \
+ case C_ONEUPPER: \
+ conv = C_NOTSET; \
+ /* FALLTHROUGH */ \
+ case C_UPPER: \
+ if (islower(__ch)) \
+ __ch = toupper(__ch); \
+ break; \
+ default: \
+ abort(); \
+ } \
+ } \
+ NEEDSP(sp, 1, p); \
+ *p++ = __ch; \
+ ++lbclen; \
+}
+ conv = C_NOTSET;
+ for (rp = sp->repl, rpl = sp->repl_len, p = lb + lbclen; rpl--;) {
+ switch (ch = *rp++) {
+ case '&':
+ if (O_ISSET(sp, O_MAGIC)) {
+ no = 0;
+ goto subzero;
+ }
+ break;
+ case '\\':
+ if (rpl == 0)
+ break;
+ --rpl;
+ switch (ch = *rp) {
+ case '&':
+ ++rp;
+ if (!O_ISSET(sp, O_MAGIC)) {
+ no = 0;
+ goto subzero;
+ }
+ break;
+ case '0': case '1': case '2': case '3': case '4':
+ case '5': case '6': case '7': case '8': case '9':
+ no = *rp++ - '0';
+subzero: if (match[no].rm_so == -1 ||
+ match[no].rm_eo == -1)
+ break;
+ mlen = match[no].rm_eo - match[no].rm_so;
+ for (t = ip + match[no].rm_so; mlen--; ++t)
+ OUTCH(*t, 0);
+ continue;
+ case 'e':
+ case 'E':
+ ++rp;
+ conv = C_NOTSET;
+ continue;
+ case 'l':
+ ++rp;
+ conv = C_ONELOWER;
+ continue;
+ case 'L':
+ ++rp;
+ conv = C_LOWER;
+ continue;
+ case 'u':
+ ++rp;
+ conv = C_ONEUPPER;
+ continue;
+ case 'U':
+ ++rp;
+ conv = C_UPPER;
+ continue;
+ default:
+ ++rp;
+ break;
+ }
+ }
+ OUTCH(ch, 1);
+ }
+
+ *lbp = lb; /* Update caller's information. */
+ *lbclenp = lbclen;
+ *lblenp = lblen;
+ return (0);
+}