diff options
Diffstat (limited to 'contrib/nvi/vi/v_word.c')
-rw-r--r-- | contrib/nvi/vi/v_word.c | 547 |
1 files changed, 547 insertions, 0 deletions
diff --git a/contrib/nvi/vi/v_word.c b/contrib/nvi/vi/v_word.c new file mode 100644 index 000000000000..e6e4a63c57e7 --- /dev/null +++ b/contrib/nvi/vi/v_word.c @@ -0,0 +1,547 @@ +/*- + * Copyright (c) 1992, 1993, 1994 + * The Regents of the University of California. All rights reserved. + * Copyright (c) 1992, 1993, 1994, 1995, 1996 + * Keith Bostic. All rights reserved. + * + * See the LICENSE file for redistribution information. + */ + +#include "config.h" + +#ifndef lint +static const char sccsid[] = "@(#)v_word.c 10.5 (Berkeley) 3/6/96"; +#endif /* not lint */ + +#include <sys/types.h> +#include <sys/queue.h> +#include <sys/time.h> + +#include <bitstring.h> +#include <ctype.h> +#include <limits.h> +#include <stdio.h> + +#include "../common/common.h" +#include "vi.h" + +/* + * There are two types of "words". Bigwords are easy -- groups of anything + * delimited by whitespace. Normal words are trickier. They are either a + * group of characters, numbers and underscores, or a group of anything but, + * delimited by whitespace. When for a word, if you're in whitespace, it's + * easy, just remove the whitespace and go to the beginning or end of the + * word. Otherwise, figure out if the next character is in a different group. + * If it is, go to the beginning or end of that group, otherwise, go to the + * beginning or end of the current group. The historic version of vi didn't + * get this right, so, for example, there were cases where "4e" was not the + * same as "eeee" -- in particular, single character words, and commands that + * began in whitespace were almost always handled incorrectly. To get it right + * you have to resolve the cursor after each search so that the look-ahead to + * figure out what type of "word" the cursor is in will be correct. + * + * Empty lines, and lines that consist of only white-space characters count + * as a single word, and the beginning and end of the file counts as an + * infinite number of words. + * + * Movements associated with commands are different than movement commands. + * For example, in "abc def", with the cursor on the 'a', "cw" is from + * 'a' to 'c', while "w" is from 'a' to 'd'. In general, trailing white + * space is discarded from the change movement. Another example is that, + * in the same string, a "cw" on any white space character replaces that + * single character, and nothing else. Ain't nothin' in here that's easy. + * + * One historic note -- in the original vi, the 'w', 'W' and 'B' commands + * would treat groups of empty lines as individual words, i.e. the command + * would move the cursor to each new empty line. The 'e' and 'E' commands + * would treat groups of empty lines as a single word, i.e. the first use + * would move past the group of lines. The 'b' command would just beep at + * you, or, if you did it from the start of the line as part of a motion + * command, go absolutely nuts. If the lines contained only white-space + * characters, the 'w' and 'W' commands would just beep at you, and the 'B', + * 'b', 'E' and 'e' commands would treat the group as a single word, and + * the 'B' and 'b' commands will treat the lines as individual words. This + * implementation treats all of these cases as a single white-space word. + */ + +enum which {BIGWORD, LITTLEWORD}; + +static int bword __P((SCR *, VICMD *, enum which)); +static int eword __P((SCR *, VICMD *, enum which)); +static int fword __P((SCR *, VICMD *, enum which)); + +/* + * v_wordW -- [count]W + * Move forward a bigword at a time. + * + * PUBLIC: int v_wordW __P((SCR *, VICMD *)); + */ +int +v_wordW(sp, vp) + SCR *sp; + VICMD *vp; +{ + return (fword(sp, vp, BIGWORD)); +} + +/* + * v_wordw -- [count]w + * Move forward a word at a time. + * + * PUBLIC: int v_wordw __P((SCR *, VICMD *)); + */ +int +v_wordw(sp, vp) + SCR *sp; + VICMD *vp; +{ + return (fword(sp, vp, LITTLEWORD)); +} + +/* + * fword -- + * Move forward by words. + */ +static int +fword(sp, vp, type) + SCR *sp; + VICMD *vp; + enum which type; +{ + enum { INWORD, NOTWORD } state; + VCS cs; + u_long cnt; + + cnt = F_ISSET(vp, VC_C1SET) ? vp->count : 1; + cs.cs_lno = vp->m_start.lno; + cs.cs_cno = vp->m_start.cno; + if (cs_init(sp, &cs)) + return (1); + + /* + * If in white-space: + * If the count is 1, and it's a change command, we're done. + * Else, move to the first non-white-space character, which + * counts as a single word move. If it's a motion command, + * don't move off the end of the line. + */ + if (cs.cs_flags == CS_EMP || cs.cs_flags == 0 && isblank(cs.cs_ch)) { + if (ISMOTION(vp) && cs.cs_flags != CS_EMP && cnt == 1) { + if (ISCMD(vp->rkp, 'c')) + return (0); + if (ISCMD(vp->rkp, 'd') || ISCMD(vp->rkp, 'y')) { + if (cs_fspace(sp, &cs)) + return (1); + goto ret; + } + } + if (cs_fblank(sp, &cs)) + return (1); + --cnt; + } + + /* + * Cyclically move to the next word -- this involves skipping + * over word characters and then any trailing non-word characters. + * Note, for the 'w' command, the definition of a word keeps + * switching. + */ + if (type == BIGWORD) + while (cnt--) { + for (;;) { + if (cs_next(sp, &cs)) + return (1); + if (cs.cs_flags == CS_EOF) + goto ret; + if (cs.cs_flags != 0 || isblank(cs.cs_ch)) + break; + } + /* + * If a motion command and we're at the end of the + * last word, we're done. Delete and yank eat any + * trailing blanks, but we don't move off the end + * of the line regardless. + */ + if (cnt == 0 && ISMOTION(vp)) { + if ((ISCMD(vp->rkp, 'd') || + ISCMD(vp->rkp, 'y')) && + cs_fspace(sp, &cs)) + return (1); + break; + } + + /* Eat whitespace characters. */ + if (cs_fblank(sp, &cs)) + return (1); + if (cs.cs_flags == CS_EOF) + goto ret; + } + else + while (cnt--) { + state = cs.cs_flags == 0 && + inword(cs.cs_ch) ? INWORD : NOTWORD; + for (;;) { + if (cs_next(sp, &cs)) + return (1); + if (cs.cs_flags == CS_EOF) + goto ret; + if (cs.cs_flags != 0 || isblank(cs.cs_ch)) + break; + if (state == INWORD) { + if (!inword(cs.cs_ch)) + break; + } else + if (inword(cs.cs_ch)) + break; + } + /* See comment above. */ + if (cnt == 0 && ISMOTION(vp)) { + if ((ISCMD(vp->rkp, 'd') || + ISCMD(vp->rkp, 'y')) && + cs_fspace(sp, &cs)) + return (1); + break; + } + + /* Eat whitespace characters. */ + if (cs.cs_flags != 0 || isblank(cs.cs_ch)) + if (cs_fblank(sp, &cs)) + return (1); + if (cs.cs_flags == CS_EOF) + goto ret; + } + + /* + * If we didn't move, we must be at EOF. + * + * !!! + * That's okay for motion commands, however. + */ +ret: if (!ISMOTION(vp) && + cs.cs_lno == vp->m_start.lno && cs.cs_cno == vp->m_start.cno) { + v_eof(sp, &vp->m_start); + return (1); + } + + /* Adjust the end of the range for motion commands. */ + vp->m_stop.lno = cs.cs_lno; + vp->m_stop.cno = cs.cs_cno; + if (ISMOTION(vp) && cs.cs_flags == 0) + --vp->m_stop.cno; + + /* + * Non-motion commands move to the end of the range. Delete + * and yank stay at the start, ignore others. + */ + vp->m_final = ISMOTION(vp) ? vp->m_start : vp->m_stop; + return (0); +} + +/* + * v_wordE -- [count]E + * Move forward to the end of the bigword. + * + * PUBLIC: int v_wordE __P((SCR *, VICMD *)); + */ +int +v_wordE(sp, vp) + SCR *sp; + VICMD *vp; +{ + return (eword(sp, vp, BIGWORD)); +} + +/* + * v_worde -- [count]e + * Move forward to the end of the word. + * + * PUBLIC: int v_worde __P((SCR *, VICMD *)); + */ +int +v_worde(sp, vp) + SCR *sp; + VICMD *vp; +{ + return (eword(sp, vp, LITTLEWORD)); +} + +/* + * eword -- + * Move forward to the end of the word. + */ +static int +eword(sp, vp, type) + SCR *sp; + VICMD *vp; + enum which type; +{ + enum { INWORD, NOTWORD } state; + VCS cs; + u_long cnt; + + cnt = F_ISSET(vp, VC_C1SET) ? vp->count : 1; + cs.cs_lno = vp->m_start.lno; + cs.cs_cno = vp->m_start.cno; + if (cs_init(sp, &cs)) + return (1); + + /* + * !!! + * If in whitespace, or the next character is whitespace, move past + * it. (This doesn't count as a word move.) Stay at the character + * past the current one, it sets word "state" for the 'e' command. + */ + if (cs.cs_flags == 0 && !isblank(cs.cs_ch)) { + if (cs_next(sp, &cs)) + return (1); + if (cs.cs_flags == 0 && !isblank(cs.cs_ch)) + goto start; + } + if (cs_fblank(sp, &cs)) + return (1); + + /* + * Cyclically move to the next word -- this involves skipping + * over word characters and then any trailing non-word characters. + * Note, for the 'e' command, the definition of a word keeps + * switching. + */ +start: if (type == BIGWORD) + while (cnt--) { + for (;;) { + if (cs_next(sp, &cs)) + return (1); + if (cs.cs_flags == CS_EOF) + goto ret; + if (cs.cs_flags != 0 || isblank(cs.cs_ch)) + break; + } + /* + * When we reach the start of the word after the last + * word, we're done. If we changed state, back up one + * to the end of the previous word. + */ + if (cnt == 0) { + if (cs.cs_flags == 0 && cs_prev(sp, &cs)) + return (1); + break; + } + + /* Eat whitespace characters. */ + if (cs_fblank(sp, &cs)) + return (1); + if (cs.cs_flags == CS_EOF) + goto ret; + } + else + while (cnt--) { + state = cs.cs_flags == 0 && + inword(cs.cs_ch) ? INWORD : NOTWORD; + for (;;) { + if (cs_next(sp, &cs)) + return (1); + if (cs.cs_flags == CS_EOF) + goto ret; + if (cs.cs_flags != 0 || isblank(cs.cs_ch)) + break; + if (state == INWORD) { + if (!inword(cs.cs_ch)) + break; + } else + if (inword(cs.cs_ch)) + break; + } + /* See comment above. */ + if (cnt == 0) { + if (cs.cs_flags == 0 && cs_prev(sp, &cs)) + return (1); + break; + } + + /* Eat whitespace characters. */ + if (cs.cs_flags != 0 || isblank(cs.cs_ch)) + if (cs_fblank(sp, &cs)) + return (1); + if (cs.cs_flags == CS_EOF) + goto ret; + } + + /* + * If we didn't move, we must be at EOF. + * + * !!! + * That's okay for motion commands, however. + */ +ret: if (!ISMOTION(vp) && + cs.cs_lno == vp->m_start.lno && cs.cs_cno == vp->m_start.cno) { + v_eof(sp, &vp->m_start); + return (1); + } + + /* Set the end of the range for motion commands. */ + vp->m_stop.lno = cs.cs_lno; + vp->m_stop.cno = cs.cs_cno; + + /* + * Non-motion commands move to the end of the range. + * Delete and yank stay at the start, ignore others. + */ + vp->m_final = ISMOTION(vp) ? vp->m_start : vp->m_stop; + return (0); +} + +/* + * v_WordB -- [count]B + * Move backward a bigword at a time. + * + * PUBLIC: int v_wordB __P((SCR *, VICMD *)); + */ +int +v_wordB(sp, vp) + SCR *sp; + VICMD *vp; +{ + return (bword(sp, vp, BIGWORD)); +} + +/* + * v_wordb -- [count]b + * Move backward a word at a time. + * + * PUBLIC: int v_wordb __P((SCR *, VICMD *)); + */ +int +v_wordb(sp, vp) + SCR *sp; + VICMD *vp; +{ + return (bword(sp, vp, LITTLEWORD)); +} + +/* + * bword -- + * Move backward by words. + */ +static int +bword(sp, vp, type) + SCR *sp; + VICMD *vp; + enum which type; +{ + enum { INWORD, NOTWORD } state; + VCS cs; + u_long cnt; + + cnt = F_ISSET(vp, VC_C1SET) ? vp->count : 1; + cs.cs_lno = vp->m_start.lno; + cs.cs_cno = vp->m_start.cno; + if (cs_init(sp, &cs)) + return (1); + + /* + * !!! + * If in whitespace, or the previous character is whitespace, move + * past it. (This doesn't count as a word move.) Stay at the + * character before the current one, it sets word "state" for the + * 'b' command. + */ + if (cs.cs_flags == 0 && !isblank(cs.cs_ch)) { + if (cs_prev(sp, &cs)) + return (1); + if (cs.cs_flags == 0 && !isblank(cs.cs_ch)) + goto start; + } + if (cs_bblank(sp, &cs)) + return (1); + + /* + * Cyclically move to the beginning of the previous word -- this + * involves skipping over word characters and then any trailing + * non-word characters. Note, for the 'b' command, the definition + * of a word keeps switching. + */ +start: if (type == BIGWORD) + while (cnt--) { + for (;;) { + if (cs_prev(sp, &cs)) + return (1); + if (cs.cs_flags == CS_SOF) + goto ret; + if (cs.cs_flags != 0 || isblank(cs.cs_ch)) + break; + } + /* + * When we reach the end of the word before the last + * word, we're done. If we changed state, move forward + * one to the end of the next word. + */ + if (cnt == 0) { + if (cs.cs_flags == 0 && cs_next(sp, &cs)) + return (1); + break; + } + + /* Eat whitespace characters. */ + if (cs_bblank(sp, &cs)) + return (1); + if (cs.cs_flags == CS_SOF) + goto ret; + } + else + while (cnt--) { + state = cs.cs_flags == 0 && + inword(cs.cs_ch) ? INWORD : NOTWORD; + for (;;) { + if (cs_prev(sp, &cs)) + return (1); + if (cs.cs_flags == CS_SOF) + goto ret; + if (cs.cs_flags != 0 || isblank(cs.cs_ch)) + break; + if (state == INWORD) { + if (!inword(cs.cs_ch)) + break; + } else + if (inword(cs.cs_ch)) + break; + } + /* See comment above. */ + if (cnt == 0) { + if (cs.cs_flags == 0 && cs_next(sp, &cs)) + return (1); + break; + } + + /* Eat whitespace characters. */ + if (cs.cs_flags != 0 || isblank(cs.cs_ch)) + if (cs_bblank(sp, &cs)) + return (1); + if (cs.cs_flags == CS_SOF) + goto ret; + } + + /* If we didn't move, we must be at SOF. */ +ret: if (cs.cs_lno == vp->m_start.lno && cs.cs_cno == vp->m_start.cno) { + v_sof(sp, &vp->m_start); + return (1); + } + + /* Set the end of the range for motion commands. */ + vp->m_stop.lno = cs.cs_lno; + vp->m_stop.cno = cs.cs_cno; + + /* + * All commands move to the end of the range. Motion commands + * adjust the starting point to the character before the current + * one. + * + * !!! + * The historic vi didn't get this right -- the `yb' command yanked + * the right stuff and even updated the cursor value, but the cursor + * was not actually updated on the screen. + */ + vp->m_final = vp->m_stop; + if (ISMOTION(vp)) + --vp->m_start.cno; + return (0); +} |