1 files changed, 1459 insertions, 0 deletions
diff --git a/ex/ex_subst.c b/ex/ex_subst.c
new file mode 100644
index 000000000000..0ebb81dd58e7
--- /dev/null
+++ b/ex/ex_subst.c
@@ -0,0 +1,1459 @@
+/*-
+ * Copyright (c) 1992, 1993, 1994
+ *	The Regents of the University of California.  All rights reserved.
+ * Copyright (c) 1992, 1993, 1994, 1995, 1996
+ *	Keith Bostic.  All rights reserved.
+ *
+ * See the LICENSE file for redistribution information.
+ */
+
+#include "config.h"
+
+#ifndef lint
+static const char sccsid[] = "@(#)ex_subst.c	10.37 (Berkeley) 9/15/96";
+#endif /* not lint */
+
+#include <sys/types.h>
+#include <sys/queue.h>
+#include <sys/time.h>
+
+#include <bitstring.h>
+#include <ctype.h>
+#include <errno.h>
+#include <limits.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include "../common/common.h"
+#include "../vi/vi.h"
+
+#define	SUB_FIRST	0x01		/* The 'r' flag isn't reasonable. */
+#define	SUB_MUSTSETR	0x02		/* The 'r' flag is required. */
+
+static int re_conv __P((SCR *, char **, size_t *, int *));
+static int re_cscope_conv __P((SCR *, char **, size_t *, int *));
+static int re_sub __P((SCR *,
+		char *, char **, size_t *, size_t *, regmatch_t [10]));
+static int re_tag_conv __P((SCR *, char **, size_t *, int *));
+static int s __P((SCR *, EXCMD *, char *, regex_t *, u_int));
+
+/*
+ * ex_s --
+ *	[line [,line]] s[ubstitute] [[/;]pat[/;]/repl[/;] [cgr] [count] [#lp]]
+ *
+ *	Substitute on lines matching a pattern.
+ *
+ * PUBLIC: int ex_s __P((SCR *, EXCMD *));
+ */
+int
+ex_s(sp, cmdp)
+	SCR *sp;
+	EXCMD *cmdp;
+{
+	regex_t *re;
+	size_t blen, len;
+	u_int flags;
+	int delim;
+	char *bp, *ptrn, *rep, *p, *t;
+
+	/*
+	 * Skip leading white space.
+	 *
+	 * !!!
+	 * Historic vi allowed any non-alphanumeric to serve as the
+	 * substitution command delimiter.
+	 *
+	 * !!!
+	 * If the arguments are empty, it's the same as &, i.e. we
+	 * repeat the last substitution.
+	 */
+	if (cmdp->argc == 0)
+		goto subagain;
+	for (p = cmdp->argv[0]->bp,
+	    len = cmdp->argv[0]->len; len > 0; --len, ++p) {
+		if (!isblank(*p))
+			break;
+	}
+	if (len == 0)
+subagain:	return (ex_subagain(sp, cmdp));
+
+	delim = *p++;
+	if (isalnum(delim) || delim == '\\')
+		return (s(sp, cmdp, p, &sp->subre_c, SUB_MUSTSETR));
+
+	/*
+	 * !!!
+	 * The full-blown substitute command reset the remembered
+	 * state of the 'c' and 'g' suffices.
+	 */
+	sp->c_suffix = sp->g_suffix = 0;
+
+	/*
+	 * Get the pattern string, toss escaping characters.
+	 *
+	 * !!!
+	 * Historic vi accepted any of the following forms:
+	 *
+	 *	:s/abc/def/		change "abc" to "def"
+	 *	:s/abc/def		change "abc" to "def"
+	 *	:s/abc/			delete "abc"
+	 *	:s/abc			delete "abc"
+	 *
+	 * QUOTING NOTE:
+	 *
+	 * Only toss an escaping character if it escapes a delimiter.
+	 * This means that "s/A/\\\\f" replaces "A" with "\\f".  It
+	 * would be nice to be more regular, i.e. for each layer of
+	 * escaping a single escaping character is removed, but that's
+	 * not how the historic vi worked.
+	 */
+	for (ptrn = t = p;;) {
+		if (p[0] == '\0' || p[0] == delim) {
+			if (p[0] == delim)
+				++p;
+			/*
+			 * !!!
+			 * Nul terminate the pattern string -- it's passed
+			 * to regcomp which doesn't understand anything else.
+			 */
+			*t = '\0';
+			break;
+		}
+		if (p[0] == '\\')
+			if (p[1] == delim)
+				++p;
+			else if (p[1] == '\\')
+				*t++ = *p++;
+		*t++ = *p++;
+	}
+
+	/*
+	 * If the pattern string is empty, use the last RE (not just the
+	 * last substitution RE).
+	 */
+	if (*ptrn == '\0') {
+		if (sp->re == NULL) {
+			ex_emsg(sp, NULL, EXM_NOPREVRE);
+			return (1);
+		}
+
+		/* Re-compile the RE if necessary. */
+		if (!F_ISSET(sp, SC_RE_SEARCH) && re_compile(sp,
+		    sp->re, sp->re_len, NULL, NULL, &sp->re_c, RE_C_SEARCH))
+			return (1);
+		flags = 0;
+	} else {
+		/*
+		 * !!!
+		 * Compile the RE.  Historic practice is that substitutes set
+		 * the search direction as well as both substitute and search
+		 * RE's.  We compile the RE twice, as we don't want to bother
+		 * ref counting the pattern string and (opaque) structure.
+		 */
+		if (re_compile(sp, ptrn, t - ptrn,
+		    &sp->re, &sp->re_len, &sp->re_c, RE_C_SEARCH))
+			return (1);
+		if (re_compile(sp, ptrn, t - ptrn,
+		    &sp->subre, &sp->subre_len, &sp->subre_c, RE_C_SUBST))
+			return (1);
+		
+		flags = SUB_FIRST;
+		sp->searchdir = FORWARD;
+	}
+	re = &sp->re_c;
+
+	/*
+	 * Get the replacement string.
+	 *
+	 * The special character & (\& if O_MAGIC not set) matches the
+	 * entire RE.  No handling of & is required here, it's done by
+	 * re_sub().
+	 *
+	 * The special character ~ (\~ if O_MAGIC not set) inserts the
+	 * previous replacement string into this replacement string.
+	 * Count ~'s to figure out how much space we need.  We could
+	 * special case nonexistent last patterns or whether or not
+	 * O_MAGIC is set, but it's probably not worth the effort.
+	 *
+	 * QUOTING NOTE:
+	 *
+	 * Only toss an escaping character if it escapes a delimiter or
+	 * if O_MAGIC is set and it escapes a tilde.
+	 *
+	 * !!!
+	 * If the entire replacement pattern is "%", then use the last
+	 * replacement pattern.  This semantic was added to vi in System
+	 * V and then percolated elsewhere, presumably around the time
+	 * that it was added to their version of ed(1).
+	 */
+	if (p[0] == '\0' || p[0] == delim) {
+		if (p[0] == delim)
+			++p;
+		if (sp->repl != NULL)
+			free(sp->repl);
+		sp->repl = NULL;
+		sp->repl_len = 0;
+	} else if (p[0] == '%' && (p[1] == '\0' || p[1] == delim))
+		p += p[1] == delim ? 2 : 1;
+	else {
+		for (rep = p, len = 0;
+		    p[0] != '\0' && p[0] != delim; ++p, ++len)
+			if (p[0] == '~')
+				len += sp->repl_len;
+		GET_SPACE_RET(sp, bp, blen, len);
+		for (t = bp, len = 0, p = rep;;) {
+			if (p[0] == '\0' || p[0] == delim) {
+				if (p[0] == delim)
+					++p;
+				break;
+			}
+			if (p[0] == '\\') {
+				if (p[1] == delim)
+					++p;
+				else if (p[1] == '\\') {
+					*t++ = *p++;
+					++len;
+				} else if (p[1] == '~') {
+					++p;
+					if (!O_ISSET(sp, O_MAGIC))
+						goto tilde;
+				}
+			} else if (p[0] == '~' && O_ISSET(sp, O_MAGIC)) {
+tilde:				++p;
+				memcpy(t, sp->repl, sp->repl_len);
+				t += sp->repl_len;
+				len += sp->repl_len;
+				continue;
+			}
+			*t++ = *p++;
+			++len;
+		}
+		if ((sp->repl_len = len) != 0) {
+			if (sp->repl != NULL)
+				free(sp->repl);
+			if ((sp->repl = malloc(len)) == NULL) {
+				msgq(sp, M_SYSERR, NULL);
+				FREE_SPACE(sp, bp, blen);
+				return (1);
+			}
+			memcpy(sp->repl, bp, len);
+		}
+		FREE_SPACE(sp, bp, blen);
+	}
+	return (s(sp, cmdp, p, re, flags));
+}
+
+/*
+ * ex_subagain --
+ *	[line [,line]] & [cgr] [count] [#lp]]
+ *
+ *	Substitute using the last substitute RE and replacement pattern.
+ *
+ * PUBLIC: int ex_subagain __P((SCR *, EXCMD *));
+ */
+int
+ex_subagain(sp, cmdp)
+	SCR *sp;
+	EXCMD *cmdp;
+{
+	if (sp->subre == NULL) {
+		ex_emsg(sp, NULL, EXM_NOPREVRE);
+		return (1);
+	}
+	if (!F_ISSET(sp, SC_RE_SUBST) && re_compile(sp,
+	    sp->subre, sp->subre_len, NULL, NULL, &sp->subre_c, RE_C_SUBST))
+		return (1);
+	return (s(sp,
+	    cmdp, cmdp->argc ? cmdp->argv[0]->bp : NULL, &sp->subre_c, 0));
+}
+
+/*
+ * ex_subtilde --
+ *	[line [,line]] ~ [cgr] [count] [#lp]]
+ *
+ *	Substitute using the last RE and last substitute replacement pattern.
+ *
+ * PUBLIC: int ex_subtilde __P((SCR *, EXCMD *));
+ */
+int
+ex_subtilde(sp, cmdp)
+	SCR *sp;
+	EXCMD *cmdp;
+{
+	if (sp->re == NULL) {
+		ex_emsg(sp, NULL, EXM_NOPREVRE);
+		return (1);
+	}
+	if (!F_ISSET(sp, SC_RE_SEARCH) && re_compile(sp,
+	    sp->re, sp->re_len, NULL, NULL, &sp->re_c, RE_C_SEARCH))
+		return (1);
+	return (s(sp,
+	    cmdp, cmdp->argc ? cmdp->argv[0]->bp : NULL, &sp->re_c, 0));
+}
+
+/*
+ * s --
+ * Do the substitution.  This stuff is *really* tricky.  There are lots of
+ * special cases, and general nastiness.  Don't mess with it unless you're
+ * pretty confident.
+ * 
+ * The nasty part of the substitution is what happens when the replacement
+ * string contains newlines.  It's a bit tricky -- consider the information
+ * that has to be retained for "s/f\(o\)o/^M\1^M\1/".  The solution here is
+ * to build a set of newline offsets which we use to break the line up later,
+ * when the replacement is done.  Don't change it unless you're *damned*
+ * confident.
+ */
+#define	NEEDNEWLINE(sp) {						\
+	if (sp->newl_len == sp->newl_cnt) {				\
+		sp->newl_len += 25;					\
+		REALLOC(sp, sp->newl, size_t *,				\
+		    sp->newl_len * sizeof(size_t));			\
+		if (sp->newl == NULL) {					\
+			sp->newl_len = 0;				\
+			return (1);					\
+		}							\
+	}								\
+}
+
+#define	BUILD(sp, l, len) {						\
+	if (lbclen + (len) > lblen) {					\
+		lblen += MAX(lbclen + (len), 256);			\
+		REALLOC(sp, lb, char *, lblen);				\
+		if (lb == NULL) {					\
+			lbclen = 0;					\
+			return (1);					\
+		}							\
+	}								\
+	memcpy(lb + lbclen, l, len);					\
+	lbclen += len;							\
+}
+
+#define	NEEDSP(sp, len, pnt) {						\
+	if (lbclen + (len) > lblen) {					\
+		lblen += MAX(lbclen + (len), 256);			\
+		REALLOC(sp, lb, char *, lblen);				\
+		if (lb == NULL) {					\
+			lbclen = 0;					\
+			return (1);					\
+		}							\
+		pnt = lb + lbclen;					\
+	}								\
+}
+
+static int
+s(sp, cmdp, s, re, flags)
+	SCR *sp;
+	EXCMD *cmdp;
+	char *s;
+	regex_t *re;
+	u_int flags;
+{
+	EVENT ev;
+	MARK from, to;
+	TEXTH tiq;
+	recno_t elno, lno, slno;
+	regmatch_t match[10];
+	size_t blen, cnt, last, lbclen, lblen, len, llen;
+	size_t offset, saved_offset, scno;
+	int cflag, lflag, nflag, pflag, rflag;
+	int didsub, do_eol_match, eflags, empty_ok, eval;
+	int linechanged, matched, quit, rval;
+	char *bp, *lb;
+
+	NEEDFILE(sp, cmdp);
+
+	slno = sp->lno;
+	scno = sp->cno;
+
+	/*
+	 * !!!
+	 * Historically, the 'g' and 'c' suffices were always toggled as flags,
+	 * so ":s/A/B/" was the same as ":s/A/B/ccgg".  If O_EDCOMPATIBLE was
+	 * not set, they were initialized to 0 for all substitute commands.  If
+	 * O_EDCOMPATIBLE was set, they were initialized to 0 only if the user
+	 * specified substitute/replacement patterns (see ex_s()).
+	 */
+	if (!O_ISSET(sp, O_EDCOMPATIBLE))
+		sp->c_suffix = sp->g_suffix = 0;
+
+	/*
+	 * Historic vi permitted the '#', 'l' and 'p' options in vi mode, but
+	 * it only displayed the last change.  I'd disallow them, but they are
+	 * useful in combination with the [v]global commands.  In the current
+	 * model the problem is combining them with the 'c' flag -- the screen
+	 * would have to flip back and forth between the confirm screen and the
+	 * ex print screen, which would be pretty awful.  We do display all
+	 * changes, though, for what that's worth.
+	 *
+	 * !!!
+	 * Historic vi was fairly strict about the order of "options", the
+	 * count, and "flags".  I'm somewhat fuzzy on the difference between
+	 * options and flags, anyway, so this is a simpler approach, and we
+	 * just take it them in whatever order the user gives them.  (The ex
+	 * usage statement doesn't reflect this.)
+	 */
+	cflag = lflag = nflag = pflag = rflag = 0;
+	if (s == NULL)
+		goto noargs;
+	for (lno = OOBLNO; *s != '\0'; ++s)
+		switch (*s) {
+		case ' ':
+		case '\t':
+			continue;
+		case '+':
+			++cmdp->flagoff;
+			break;
+		case '-':
+			--cmdp->flagoff;
+			break;
+		case '0': case '1': case '2': case '3': case '4':
+		case '5': case '6': case '7': case '8': case '9':
+			if (lno != OOBLNO)
+				goto usage;
+			errno = 0;
+			lno = strtoul(s, &s, 10);
+			if (*s == '\0')		/* Loop increment correction. */
+				--s;
+			if (errno == ERANGE) {
+				if (lno == LONG_MAX)
+					msgq(sp, M_ERR, "153|Count overflow");
+				else if (lno == LONG_MIN)
+					msgq(sp, M_ERR, "154|Count underflow");
+				else
+					msgq(sp, M_SYSERR, NULL);
+				return (1);
+			}
+			/*
+			 * In historic vi, the count was inclusive from the
+			 * second address.
+			 */
+			cmdp->addr1.lno = cmdp->addr2.lno;
+			cmdp->addr2.lno += lno - 1;
+			if (!db_exist(sp, cmdp->addr2.lno) &&
+			    db_last(sp, &cmdp->addr2.lno))
+				return (1);
+			break;
+		case '#':
+			nflag = 1;
+			break;
+		case 'c':
+			sp->c_suffix = !sp->c_suffix;
+
+			/* Ex text structure initialization. */
+			if (F_ISSET(sp, SC_EX)) {
+				memset(&tiq, 0, sizeof(TEXTH));
+				CIRCLEQ_INIT(&tiq);
+			}
+			break;
+		case 'g':
+			sp->g_suffix = !sp->g_suffix;
+			break;
+		case 'l':
+			lflag = 1;
+			break;
+		case 'p':
+			pflag = 1;
+			break;
+		case 'r':
+			if (LF_ISSET(SUB_FIRST)) {
+				msgq(sp, M_ERR,
+		    "155|Regular expression specified; r flag meaningless");
+				return (1);
+			}
+			if (!F_ISSET(sp, SC_RE_SEARCH)) {
+				ex_emsg(sp, NULL, EXM_NOPREVRE);
+				return (1);
+			}
+			rflag = 1;
+			re = &sp->re_c;
+			break;
+		default:
+			goto usage;
+		}
+
+	if (*s != '\0' || !rflag && LF_ISSET(SUB_MUSTSETR)) {
+usage:		ex_emsg(sp, cmdp->cmd->usage, EXM_USAGE);
+		return (1);
+	}
+
+noargs:	if (F_ISSET(sp, SC_VI) && sp->c_suffix && (lflag || nflag || pflag)) {
+		msgq(sp, M_ERR,
+"156|The #, l and p flags may not be combined with the c flag in vi mode");
+		return (1);
+	}
+
+	/*
+	 * bp:		if interactive, line cache
+	 * blen:	if interactive, line cache length
+	 * lb:		build buffer pointer.
+	 * lbclen:	current length of built buffer.
+	 * lblen;	length of build buffer.
+	 */
+	bp = lb = NULL;
+	blen = lbclen = lblen = 0;
+
+	/* For each line... */
+	for (matched = quit = 0, lno = cmdp->addr1.lno,
+	    elno = cmdp->addr2.lno; !quit && lno <= elno; ++lno) {
+
+		/* Someone's unhappy, time to stop. */
+		if (INTERRUPTED(sp))
+			break;
+
+		/* Get the line. */
+		if (db_get(sp, lno, DBG_FATAL, &s, &llen))
+			goto err;
+
+		/*
+		 * Make a local copy if doing confirmation -- when calling
+		 * the confirm routine we're likely to lose the cached copy.
+		 */
+		if (sp->c_suffix) {
+			if (bp == NULL) {
+				GET_SPACE_RET(sp, bp, blen, llen);
+			} else
+				ADD_SPACE_RET(sp, bp, blen, llen);
+			memcpy(bp, s, llen);
+			s = bp;
+		}
+
+		/* Start searching from the beginning. */
+		offset = 0;
+		len = llen;
+
+		/* Reset the build buffer offset. */
+		lbclen = 0;
+
+		/* Reset empty match flag. */
+		empty_ok = 1;
+
+		/*
+		 * We don't want to have to do a setline if the line didn't
+		 * change -- keep track of whether or not this line changed.
+		 * If doing confirmations, don't want to keep setting the
+		 * line if change is refused -- keep track of substitutions.
+		 */
+		didsub = linechanged = 0;
+
+		/* New line, do an EOL match. */
+		do_eol_match = 1;
+
+		/* It's not nul terminated, but we pretend it is. */
+		eflags = REG_STARTEND;
+
+		/*
+		 * The search area is from s + offset to the EOL.
+		 *
+		 * Generally, match[0].rm_so is the offset of the start
+		 * of the match from the start of the search, and offset
+		 * is the offset of the start of the last search.
+		 */
+nextmatch:	match[0].rm_so = 0;
+		match[0].rm_eo = len;
+
+		/* Get the next match. */
+		eval = regexec(re, (char *)s + offset, 10, match, eflags);
+
+		/*
+		 * There wasn't a match or if there was an error, deal with
+		 * it.  If there was a previous match in this line, resolve
+		 * the changes into the database.  Otherwise, just move on.
+		 */
+		if (eval == REG_NOMATCH)
+			goto endmatch;
+		if (eval != 0) {
+			re_error(sp, eval, re);
+			goto err;
+		}
+		matched = 1;
+
+		/* Only the first search can match an anchored expression. */
+		eflags |= REG_NOTBOL;
+
+		/*
+		 * !!!
+		 * It's possible to match 0-length strings -- for example, the
+		 * command s;a*;X;, when matched against the string "aabb" will
+		 * result in "XbXbX", i.e. the matches are "aa", the space
+		 * between the b's and the space between the b's and the end of
+		 * the string.  There is a similar space between the beginning
+		 * of the string and the a's.  The rule that we use (because vi
+		 * historically used it) is that any 0-length match, occurring
+		 * immediately after a match, is ignored.  Otherwise, the above
+		 * example would have resulted in "XXbXbX".  Another example is
+		 * incorrectly using " *" to replace groups of spaces with one
+		 * space.
+		 *
+		 * The way we do this is that if we just had a successful match,
+		 * the starting offset does not skip characters, and the match
+		 * is empty, ignore the match and move forward.  If there's no
+		 * more characters in the string, we were attempting to match
+		 * after the last character, so quit.
+		 */
+		if (!empty_ok && match[0].rm_so == 0 && match[0].rm_eo == 0) {
+			empty_ok = 1;
+			if (len == 0)
+				goto endmatch;
+			BUILD(sp, s + offset, 1)
+			++offset;
+			--len;
+			goto nextmatch;
+		}
+
+		/* Confirm change. */
+		if (sp->c_suffix) {
+			/*
+			 * Set the cursor position for confirmation.  Note,
+			 * if we matched on a '$', the cursor may be past
+			 * the end of line.
+			 */
+			from.lno = to.lno = lno;
+			from.cno = match[0].rm_so + offset;
+			to.cno = match[0].rm_eo + offset;
+			/*
+			 * Both ex and vi have to correct for a change before
+			 * the first character in the line.
+			 */
+			if (llen == 0)
+				from.cno = to.cno = 0;
+			if (F_ISSET(sp, SC_VI)) {
+				/*
+				 * Only vi has to correct for a change after
+				 * the last character in the line.
+				 *
+				 * XXX
+				 * It would be nice to change the vi code so
+				 * that we could display a cursor past EOL.
+				 */
+				if (to.cno >= llen)
+					to.cno = llen - 1;
+				if (from.cno >= llen)
+					from.cno = llen - 1;
+
+				sp->lno = from.lno;
+				sp->cno = from.cno;
+				if (vs_refresh(sp, 1))
+					goto err;
+
+				vs_update(sp, msg_cat(sp,
+				    "169|Confirm change? [n]", NULL), NULL);
+
+				if (v_event_get(sp, &ev, 0, 0))
+					goto err;
+				switch (ev.e_event) {
+				case E_CHARACTER:
+					break;
+				case E_EOF:
+				case E_ERR:
+				case E_INTERRUPT:
+					goto lquit;
+				default:
+					v_event_err(sp, &ev);
+					goto lquit;
+				}
+			} else {
+				if (ex_print(sp, cmdp, &from, &to, 0) ||
+				    ex_scprint(sp, &from, &to))
+					goto lquit;
+				if (ex_txt(sp, &tiq, 0, TXT_CR))
+					goto err;
+				ev.e_c = tiq.cqh_first->lb[0];
+			}
+
+			switch (ev.e_c) {
+			case CH_YES:
+				break;
+			default:
+			case CH_NO:
+				didsub = 0;
+				BUILD(sp, s +offset, match[0].rm_eo);
+				goto skip;
+			case CH_QUIT:
+				/* Set the quit/interrupted flags. */
+lquit:				quit = 1;
+				F_SET(sp->gp, G_INTERRUPTED);
+
+				/*
+				 * Resolve any changes, then return to (and
+				 * exit from) the main loop.
+				 */
+				goto endmatch;
+			}
+		}
+
+		/*
+		 * Set the cursor to the last position changed, converting
+		 * from 1-based to 0-based.
+		 */
+		sp->lno = lno;
+		sp->cno = match[0].rm_so;
+
+		/* Copy the bytes before the match into the build buffer. */
+		BUILD(sp, s + offset, match[0].rm_so);
+
+		/* Substitute the matching bytes. */
+		didsub = 1;
+		if (re_sub(sp, s + offset, &lb, &lbclen, &lblen, match))
+			goto err;
+
+		/* Set the change flag so we know this line was modified. */
+		linechanged = 1;
+
+		/* Move past the matched bytes. */
+skip:		offset += match[0].rm_eo;
+		len -= match[0].rm_eo;
+
+		/* A match cannot be followed by an empty pattern. */
+		empty_ok = 0;
+
+		/*
+		 * If doing a global change with confirmation, we have to
+		 * update the screen.  The basic idea is to store the line
+		 * so the screen update routines can find it, and restart.
+		 */
+		if (didsub && sp->c_suffix && sp->g_suffix) {
+			/*
+			 * The new search offset will be the end of the
+			 * modified line.
+			 */
+			saved_offset = lbclen;
+
+			/* Copy the rest of the line. */
+			if (len)
+				BUILD(sp, s + offset, len)
+
+			/* Set the new offset. */
+			offset = saved_offset;
+
+			/* Store inserted lines, adjusting the build buffer. */
+			last = 0;
+			if (sp->newl_cnt) {
+				for (cnt = 0;
+				    cnt < sp->newl_cnt; ++cnt, ++lno, ++elno) {
+					if (db_insert(sp, lno,
+					    lb + last, sp->newl[cnt] - last))
+						goto err;
+					last = sp->newl[cnt] + 1;
+					++sp->rptlines[L_ADDED];
+				}
+				lbclen -= last;
+				offset -= last;
+				sp->newl_cnt = 0;
+			}
+
+			/* Store and retrieve the line. */
+			if (db_set(sp, lno, lb + last, lbclen))
+				goto err;
+			if (db_get(sp, lno, DBG_FATAL, &s, &llen))
+				goto err;
+			ADD_SPACE_RET(sp, bp, blen, llen)
+			memcpy(bp, s, llen);
+			s = bp;
+			len = llen - offset;
+
+			/* Restart the build. */
+			lbclen = 0;
+			BUILD(sp, s, offset);
+
+			/*
+			 * If we haven't already done the after-the-string
+			 * match, do one.  Set REG_NOTEOL so the '$' pattern
+			 * only matches once.
+			 */
+			if (!do_eol_match)
+				goto endmatch;
+			if (offset == len) {
+				do_eol_match = 0;
+				eflags |= REG_NOTEOL;
+			}
+			goto nextmatch;
+		}
+
+		/*
+		 * If it's a global:
+		 *
+		 * If at the end of the string, do a test for the after
+		 * the string match.  Set REG_NOTEOL so the '$' pattern
+		 * only matches once.
+		 */
+		if (sp->g_suffix && do_eol_match) {
+			if (len == 0) {
+				do_eol_match = 0;
+				eflags |= REG_NOTEOL;
+			}
+			goto nextmatch;
+		}
+
+endmatch:	if (!linechanged)
+			continue;
+
+		/* Copy any remaining bytes into the build buffer. */
+		if (len)
+			BUILD(sp, s + offset, len)
+
+		/* Store inserted lines, adjusting the build buffer. */
+		last = 0;
+		if (sp->newl_cnt) {
+			for (cnt = 0;
+			    cnt < sp->newl_cnt; ++cnt, ++lno, ++elno) {
+				if (db_insert(sp,
+				    lno, lb + last, sp->newl[cnt] - last))
+					goto err;
+				last = sp->newl[cnt] + 1;
+				++sp->rptlines[L_ADDED];
+			}
+			lbclen -= last;
+			sp->newl_cnt = 0;
+		}
+
+		/* Store the changed line. */
+		if (db_set(sp, lno, lb + last, lbclen))
+			goto err;
+
+		/* Update changed line counter. */
+		if (sp->rptlchange != lno) {
+			sp->rptlchange = lno;
+			++sp->rptlines[L_CHANGED];
+		}
+
+		/*
+		 * !!!
+		 * Display as necessary.  Historic practice is to only
+		 * display the last line of a line split into multiple
+		 * lines.
+		 */
+		if (lflag || nflag || pflag) {
+			from.lno = to.lno = lno;
+			from.cno = to.cno = 0;
+			if (lflag)
+				(void)ex_print(sp, cmdp, &from, &to, E_C_LIST);
+			if (nflag)
+				(void)ex_print(sp, cmdp, &from, &to, E_C_HASH);
+			if (pflag)
+				(void)ex_print(sp, cmdp, &from, &to, E_C_PRINT);
+		}
+	}
+
+	/*
+	 * !!!
+	 * Historically, vi attempted to leave the cursor at the same place if
+	 * the substitution was done at the current cursor position.  Otherwise
+	 * it moved it to the first non-blank of the last line changed.  There
+	 * were some problems: for example, :s/$/foo/ with the cursor on the
+	 * last character of the line left the cursor on the last character, or
+	 * the & command with multiple occurrences of the matching string in the
+	 * line usually left the cursor in a fairly random position.
+	 *
+	 * We try to do the same thing, with the exception that if the user is
+	 * doing substitution with confirmation, we move to the last line about
+	 * which the user was consulted, as opposed to the last line that they
+	 * actually changed.  This prevents a screen flash if the user doesn't
+	 * change many of the possible lines.
+	 */
+	if (!sp->c_suffix && (sp->lno != slno || sp->cno != scno)) {
+		sp->cno = 0;
+		(void)nonblank(sp, sp->lno, &sp->cno);
+	}
+
+	/*
+	 * If not in a global command, and nothing matched, say so.
+	 * Else, if none of the lines displayed, put something up.
+	 */
+	rval = 0;
+	if (!matched) {
+		if (!F_ISSET(sp, SC_EX_GLOBAL)) {
+			msgq(sp, M_ERR, "157|No match found");
+			goto err;
+		}
+	} else if (!lflag && !nflag && !pflag)
+		F_SET(cmdp, E_AUTOPRINT);
+
+	if (0) {
+err:		rval = 1;
+	}
+
+	if (bp != NULL)
+		FREE_SPACE(sp, bp, blen);
+	if (lb != NULL)
+		free(lb);
+	return (rval);
+}
+
+/*
+ * re_compile --
+ *	Compile the RE.
+ *
+ * PUBLIC: int re_compile __P((SCR *,
+ * PUBLIC:     char *, size_t, char **, size_t *, regex_t *, u_int));
+ */
+int
+re_compile(sp, ptrn, plen, ptrnp, lenp, rep, flags)
+	SCR *sp;
+	char *ptrn, **ptrnp;
+	size_t plen, *lenp;
+	regex_t *rep;
+	u_int flags;
+{
+	size_t len;
+	int reflags, replaced, rval;
+	char *p;
+
+	/* Set RE flags. */
+	reflags = 0;
+	if (!LF_ISSET(RE_C_CSCOPE | RE_C_TAG)) {
+		if (O_ISSET(sp, O_EXTENDED))
+			reflags |= REG_EXTENDED;
+		if (O_ISSET(sp, O_IGNORECASE))
+			reflags |= REG_ICASE;
+		if (O_ISSET(sp, O_ICLOWER)) {
+			for (p = ptrn, len = plen; len > 0; ++p, --len)
+				if (isupper(*p))
+					break;
+			if (len == 0)
+				reflags |= REG_ICASE;
+		}
+	}
+
+	/* If we're replacing a saved value, clear the old one. */
+	if (LF_ISSET(RE_C_SEARCH) && F_ISSET(sp, SC_RE_SEARCH)) {
+		regfree(&sp->re_c);
+		F_CLR(sp, SC_RE_SEARCH);
+	}
+	if (LF_ISSET(RE_C_SUBST) && F_ISSET(sp, SC_RE_SUBST)) {
+		regfree(&sp->subre_c);
+		F_CLR(sp, SC_RE_SUBST);
+	}
+
+	/*
+	 * If we're saving the string, it's a pattern we haven't seen before,
+	 * so convert the vi-style RE's to POSIX 1003.2 RE's.  Save a copy for
+	 * later recompilation.   Free any previously saved value.
+	 */
+	if (ptrnp != NULL) {
+		if (LF_ISSET(RE_C_CSCOPE)) {
+			if (re_cscope_conv(sp, &ptrn, &plen, &replaced))
+				return (1);
+			/*
+			 * XXX
+			 * Currently, the match-any-<blank> expression used in
+			 * re_cscope_conv() requires extended RE's.  This may
+			 * not be right or safe.
+			 */
+			reflags |= REG_EXTENDED;
+		} else if (LF_ISSET(RE_C_TAG)) {
+			if (re_tag_conv(sp, &ptrn, &plen, &replaced))
+				return (1);
+		} else
+			if (re_conv(sp, &ptrn, &plen, &replaced))
+				return (1);
+
+		/* Discard previous pattern. */
+		if (*ptrnp != NULL) {
+			free(*ptrnp);
+			*ptrnp = NULL;
+		}
+		if (lenp != NULL)
+			*lenp = plen;
+
+		/*
+		 * Copy the string into allocated memory.
+		 *
+		 * XXX
+		 * Regcomp isn't 8-bit clean, so the pattern is nul-terminated
+		 * for now.  There's just no other solution.  
+		 */
+		MALLOC(sp, *ptrnp, char *, plen + 1);
+		if (*ptrnp != NULL) {
+			memcpy(*ptrnp, ptrn, plen);
+			(*ptrnp)[plen] = '\0';
+		}
+
+		/* Free up conversion-routine-allocated memory. */
+		if (replaced)
+			FREE_SPACE(sp, ptrn, 0);
+
+		if (*ptrnp == NULL)
+			return (1);
+
+		ptrn = *ptrnp;
+	}
+
+	/*
+	 * XXX
+	 * Regcomp isn't 8-bit clean, so we just lost if the pattern
+	 * contained a nul.  Bummer!
+	 */
+	if ((rval = regcomp(rep, ptrn, /* plen, */ reflags)) != 0) {
+		if (!LF_ISSET(RE_C_SILENT))
+			re_error(sp, rval, rep); 
+		return (1);
+	}
+
+	if (LF_ISSET(RE_C_SEARCH))
+		F_SET(sp, SC_RE_SEARCH);
+	if (LF_ISSET(RE_C_SUBST))
+		F_SET(sp, SC_RE_SUBST);
+
+	return (0);
+}
+
+/*
+ * re_conv --
+ *	Convert vi's regular expressions into something that the
+ *	the POSIX 1003.2 RE functions can handle.
+ *
+ * There are three conversions we make to make vi's RE's (specifically
+ * the global, search, and substitute patterns) work with POSIX RE's.
+ *
+ * 1: If O_MAGIC is not set, strip backslashes from the magic character
+ *    set (.[*~) that have them, and add them to the ones that don't.
+ * 2: If O_MAGIC is not set, the string "\~" is replaced with the text
+ *    from the last substitute command's replacement string.  If O_MAGIC
+ *    is set, it's the string "~".
+ * 3: The pattern \<ptrn\> does "word" searches, convert it to use the
+ *    new RE escapes.
+ *
+ * !!!/XXX
+ * This doesn't exactly match the historic behavior of vi because we do
+ * the ~ substitution before calling the RE engine, so magic characters
+ * in the replacement string will be expanded by the RE engine, and they
+ * weren't historically.  It's a bug.
+ */
+static int
+re_conv(sp, ptrnp, plenp, replacedp)
+	SCR *sp;
+	char **ptrnp;
+	size_t *plenp;
+	int *replacedp;
+{
+	size_t blen, len, needlen;
+	int magic;
+	char *bp, *p, *t;
+
+	/*
+	 * First pass through, we figure out how much space we'll need.
+	 * We do it in two passes, on the grounds that most of the time
+	 * the user is doing a search and won't have magic characters.
+	 * That way we can skip most of the memory allocation and copies.
+	 */
+	magic = 0;
+	for (p = *ptrnp, len = *plenp, needlen = 0; len > 0; ++p, --len)
+		switch (*p) {
+		case '\\':
+			if (len > 1) {
+				--len;
+				switch (*++p) {
+				case '<':
+					magic = 1;
+					needlen += sizeof(RE_WSTART);
+					break;
+				case '>':
+					magic = 1;
+					needlen += sizeof(RE_WSTOP);
+					break;
+				case '~':
+					if (!O_ISSET(sp, O_MAGIC)) {
+						magic = 1;
+						needlen += sp->repl_len;
+					}
+					break;
+				case '.':
+				case '[':
+				case '*':
+					if (!O_ISSET(sp, O_MAGIC)) {
+						magic = 1;
+						needlen += 1;
+					}
+					break;
+				default:
+					needlen += 2;
+				}
+			} else
+				needlen += 1;
+			break;
+		case '~':
+			if (O_ISSET(sp, O_MAGIC)) {
+				magic = 1;
+				needlen += sp->repl_len;
+			}
+			break;
+		case '.':
+		case '[':
+		case '*':
+			if (!O_ISSET(sp, O_MAGIC)) {
+				magic = 1;
+				needlen += 2;
+			}
+			break;
+		default:
+			needlen += 1;
+			break;
+		}
+
+	if (!magic) {
+		*replacedp = 0;
+		return (0);
+	}
+
+	/* Get enough memory to hold the final pattern. */
+	*replacedp = 1;
+	GET_SPACE_RET(sp, bp, blen, needlen);
+
+	for (p = *ptrnp, len = *plenp, t = bp; len > 0; ++p, --len)
+		switch (*p) {
+		case '\\':
+			if (len > 1) {
+				--len;
+				switch (*++p) {
+				case '<':
+					memcpy(t,
+					    RE_WSTART, sizeof(RE_WSTART) - 1);
+					t += sizeof(RE_WSTART) - 1;
+					break;
+				case '>':
+					memcpy(t,
+					    RE_WSTOP, sizeof(RE_WSTOP) - 1);
+					t += sizeof(RE_WSTOP) - 1;
+					break;
+				case '~':
+					if (O_ISSET(sp, O_MAGIC))
+						*t++ = '~';
+					else {
+						memcpy(t,
+						    sp->repl, sp->repl_len);
+						t += sp->repl_len;
+					}
+					break;
+				case '.':
+				case '[':
+				case '*':
+					if (O_ISSET(sp, O_MAGIC))
+						*t++ = '\\';
+					*t++ = *p;
+					break;
+				default:
+					*t++ = '\\';
+					*t++ = *p;
+				}
+			} else
+				*t++ = '\\';
+			break;
+		case '~':
+			if (O_ISSET(sp, O_MAGIC)) {
+				memcpy(t, sp->repl, sp->repl_len);
+				t += sp->repl_len;
+			} else
+				*t++ = '~';
+			break;
+		case '.':
+		case '[':
+		case '*':
+			if (!O_ISSET(sp, O_MAGIC))
+				*t++ = '\\';
+			*t++ = *p;
+			break;
+		default:
+			*t++ = *p;
+			break;
+		}
+
+	*ptrnp = bp;
+	*plenp = t - bp;
+	return (0);
+}
+
+/*
+ * re_tag_conv --
+ *	Convert a tags search path into something that the POSIX
+ *	1003.2 RE functions can handle.
+ */
+static int
+re_tag_conv(sp, ptrnp, plenp, replacedp)
+	SCR *sp;
+	char **ptrnp;
+	size_t *plenp;
+	int *replacedp;
+{
+	size_t blen, len;
+	int lastdollar;
+	char *bp, *p, *t;
+
+	len = *plenp;
+
+	/* Max memory usage is 2 times the length of the string. */
+	*replacedp = 1;
+	GET_SPACE_RET(sp, bp, blen, len * 2);
+
+	p = *ptrnp;
+	t = bp;
+
+	/* If the last character is a '/' or '?', we just strip it. */
+	if (len > 0 && (p[len - 1] == '/' || p[len - 1] == '?'))
+		--len;
+
+	/* If the next-to-last or last character is a '$', it's magic. */
+	if (len > 0 && p[len - 1] == '$') {
+		--len;
+		lastdollar = 1;
+	} else
+		lastdollar = 0;
+
+	/* If the first character is a '/' or '?', we just strip it. */
+	if (len > 0 && (p[0] == '/' || p[0] == '?')) {
+		++p;
+		--len;
+	}
+
+	/* If the first or second character is a '^', it's magic. */
+	if (p[0] == '^') {
+		*t++ = *p++;
+		--len;
+	}
+
+	/*
+	 * Escape every other magic character we can find, meanwhile stripping
+	 * the backslashes ctags inserts when escaping the search delimiter
+	 * characters.
+	 */
+	for (; len > 0; --len) {
+		if (p[0] == '\\' && (p[1] == '/' || p[1] == '?')) {
+			++p;
+			--len;
+		} else if (strchr("^.[]$*", p[0]))
+			*t++ = '\\';
+		*t++ = *p++;
+	}
+	if (lastdollar)
+		*t++ = '$';
+
+	*ptrnp = bp;
+	*plenp = t - bp;
+	return (0);
+}
+
+/*
+ * re_cscope_conv --
+ *	 Convert a cscope search path into something that the POSIX
+ *      1003.2 RE functions can handle.
+ */
+static int
+re_cscope_conv(sp, ptrnp, plenp, replacedp)
+	SCR *sp;
+	char **ptrnp;
+	size_t *plenp;
+	int *replacedp;
+{
+	size_t blen, len, nspaces;
+	char *bp, *p, *t;
+
+	/*
+	 * Each space in the source line printed by cscope represents an
+	 * arbitrary sequence of spaces, tabs, and comments.
+	 */
+#define	CSCOPE_RE_SPACE		"([ \t]|/\\*([^*]|\\*/)*\\*/)*"
+	for (nspaces = 0, p = *ptrnp, len = *plenp; len > 0; ++p, --len)
+		if (*p == ' ')
+			++nspaces;
+
+	/*
+	 * Allocate plenty of space:
+	 *	the string, plus potential escaping characters;
+	 *	nspaces + 2 copies of CSCOPE_RE_SPACE;
+	 *	^, $, nul terminator characters.
+	 */
+	*replacedp = 1;
+	len = (p - *ptrnp) * 2 + (nspaces + 2) * sizeof(CSCOPE_RE_SPACE) + 3;
+	GET_SPACE_RET(sp, bp, blen, len);
+
+	p = *ptrnp;
+	t = bp;
+
+	*t++ = '^';
+	memcpy(t, CSCOPE_RE_SPACE, sizeof(CSCOPE_RE_SPACE) - 1);
+	t += sizeof(CSCOPE_RE_SPACE) - 1;
+
+	for (len = *plenp; len > 0; ++p, --len)
+		if (*p == ' ') {
+			memcpy(t, CSCOPE_RE_SPACE, sizeof(CSCOPE_RE_SPACE) - 1);
+			t += sizeof(CSCOPE_RE_SPACE) - 1;
+		} else {
+			if (strchr("\\^.[]$*+?()|{}", *p))
+				*t++ = '\\';
+			*t++ = *p;
+		}
+
+	memcpy(t, CSCOPE_RE_SPACE, sizeof(CSCOPE_RE_SPACE) - 1);
+	t += sizeof(CSCOPE_RE_SPACE) - 1;
+	*t++ = '$';
+
+	*ptrnp = bp;
+	*plenp = t - bp;
+	return (0);
+}
+
+/*
+ * re_error --
+ *	Report a regular expression error.
+ *
+ * PUBLIC: void re_error __P((SCR *, int, regex_t *));
+ */
+void
+re_error(sp, errcode, preg)
+	SCR *sp;
+	int errcode;
+	regex_t *preg;
+{
+	size_t s;
+	char *oe;
+
+	s = regerror(errcode, preg, "", 0);
+	if ((oe = malloc(s)) == NULL)
+		msgq(sp, M_SYSERR, NULL);
+	else {
+		(void)regerror(errcode, preg, oe, s);
+		msgq(sp, M_ERR, "RE error: %s", oe);
+		free(oe);
+	}
+}
+
+/*
+ * re_sub --
+ * 	Do the substitution for a regular expression.
+ */
+static int
+re_sub(sp, ip, lbp, lbclenp, lblenp, match)
+	SCR *sp;
+	char *ip;			/* Input line. */
+	char **lbp;
+	size_t *lbclenp, *lblenp;
+	regmatch_t match[10];
+{
+	enum { C_NOTSET, C_LOWER, C_ONELOWER, C_ONEUPPER, C_UPPER } conv;
+	size_t lbclen, lblen;		/* Local copies. */
+	size_t mlen;			/* Match length. */
+	size_t rpl;			/* Remaining replacement length. */
+	char *rp;			/* Replacement pointer. */
+	int ch;
+	int no;				/* Match replacement offset. */
+	char *p, *t;			/* Buffer pointers. */
+	char *lb;			/* Local copies. */
+
+	lb = *lbp;			/* Get local copies. */
+	lbclen = *lbclenp;
+	lblen = *lblenp;
+
+	/*
+	 * QUOTING NOTE:
+	 *
+	 * There are some special sequences that vi provides in the
+	 * replacement patterns.
+	 *	 & string the RE matched (\& if nomagic set)
+	 *	\# n-th regular subexpression
+	 *	\E end \U, \L conversion
+	 *	\e end \U, \L conversion
+	 *	\l convert the next character to lower-case
+	 *	\L convert to lower-case, until \E, \e, or end of replacement
+	 *	\u convert the next character to upper-case
+	 *	\U convert to upper-case, until \E, \e, or end of replacement
+	 *
+	 * Otherwise, since this is the lowest level of replacement, discard
+	 * all escaping characters.  This (hopefully) matches historic practice.
+	 */
+#define	OUTCH(ch, nltrans) {						\
+	CHAR_T __ch = (ch);						\
+	u_int __value = KEY_VAL(sp, __ch);				\
+	if (nltrans && (__value == K_CR || __value == K_NL)) {		\
+		NEEDNEWLINE(sp);					\
+		sp->newl[sp->newl_cnt++] = lbclen;			\
+	} else if (conv != C_NOTSET) {					\
+		switch (conv) {						\
+		case C_ONELOWER:					\
+			conv = C_NOTSET;				\
+			/* FALLTHROUGH */				\
+		case C_LOWER:						\
+			if (isupper(__ch))				\
+				__ch = tolower(__ch);			\
+			break;						\
+		case C_ONEUPPER:					\
+			conv = C_NOTSET;				\
+			/* FALLTHROUGH */				\
+		case C_UPPER:						\
+			if (islower(__ch))				\
+				__ch = toupper(__ch);			\
+			break;						\
+		default:						\
+			abort();					\
+		}							\
+	}								\
+	NEEDSP(sp, 1, p);						\
+	*p++ = __ch;							\
+	++lbclen;							\
+}
+	conv = C_NOTSET;
+	for (rp = sp->repl, rpl = sp->repl_len, p = lb + lbclen; rpl--;) {
+		switch (ch = *rp++) {
+		case '&':
+			if (O_ISSET(sp, O_MAGIC)) {
+				no = 0;
+				goto subzero;
+			}
+			break;
+		case '\\':
+			if (rpl == 0)
+				break;
+			--rpl;
+			switch (ch = *rp) {
+			case '&':
+				++rp;
+				if (!O_ISSET(sp, O_MAGIC)) {
+					no = 0;
+					goto subzero;
+				}
+				break;
+			case '0': case '1': case '2': case '3': case '4':
+			case '5': case '6': case '7': case '8': case '9':
+				no = *rp++ - '0';
+subzero:			if (match[no].rm_so == -1 ||
+			    	    match[no].rm_eo == -1)
+					break;
+				mlen = match[no].rm_eo - match[no].rm_so;
+				for (t = ip + match[no].rm_so; mlen--; ++t)
+					OUTCH(*t, 0);
+				continue;
+			case 'e':
+			case 'E':
+				++rp;
+				conv = C_NOTSET;
+				continue;
+			case 'l':
+				++rp;
+				conv = C_ONELOWER;
+				continue;
+			case 'L':
+				++rp;
+				conv = C_LOWER;
+				continue;
+			case 'u':
+				++rp;
+				conv = C_ONEUPPER;
+				continue;
+			case 'U':
+				++rp;
+				conv = C_UPPER;
+				continue;
+			default:
+				++rp;
+				break;
+			}
+		}
+		OUTCH(ch, 1);
+	}
+
+	*lbp = lb;			/* Update caller's information. */
+	*lbclenp = lbclen;
+	*lblenp = lblen;
+	return (0);
+}