diff options
Diffstat (limited to 'contrib/bc/src/bc_lex.c')
-rw-r--r-- | contrib/bc/src/bc_lex.c | 409 |
1 files changed, 409 insertions, 0 deletions
diff --git a/contrib/bc/src/bc_lex.c b/contrib/bc/src/bc_lex.c new file mode 100644 index 000000000000..87475385fe70 --- /dev/null +++ b/contrib/bc/src/bc_lex.c @@ -0,0 +1,409 @@ +/* + * ***************************************************************************** + * + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2018-2020 Gavin D. Howard and contributors. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * ***************************************************************************** + * + * The lexer for bc. + * + */ + +#if BC_ENABLED + +#include <assert.h> +#include <ctype.h> +#include <string.h> + +#include <bc.h> +#include <vm.h> + +static void bc_lex_identifier(BcLex *l) { + + size_t i; + const char *buf = l->buf + l->i - 1; + + for (i = 0; i < bc_lex_kws_len; ++i) { + + const BcLexKeyword *kw = bc_lex_kws + i; + size_t n = BC_LEX_KW_LEN(kw); + + if (!strncmp(buf, kw->name, n) && !isalnum(buf[n]) && buf[n] != '_') { + + l->t = BC_LEX_KW_AUTO + (BcLexType) i; + + if (!BC_LEX_KW_POSIX(kw)) + bc_lex_verr(l, BC_ERR_POSIX_KW, kw->name); + + // We minus 1 because the index has already been incremented. + l->i += n - 1; + return; + } + } + + bc_lex_name(l); + + if (BC_ERR(l->str.len - 1 > 1)) + bc_lex_verr(l, BC_ERR_POSIX_NAME_LEN, l->str.v); +} + +static void bc_lex_string(BcLex *l) { + + size_t len, nlines = 0, i = l->i; + const char *buf = l->buf; + char c; + + l->t = BC_LEX_STR; + + for (; (c = buf[i]) && c != '"'; ++i) nlines += c == '\n'; + + if (BC_ERR(c == '\0')) { + l->i = i; + bc_lex_err(l, BC_ERR_PARSE_STRING); + } + + len = i - l->i; + bc_vec_string(&l->str, len, l->buf + l->i); + + l->i = i + 1; + l->line += nlines; +} + +static void bc_lex_assign(BcLex *l, BcLexType with, BcLexType without) { + if (l->buf[l->i] == '=') { + l->i += 1; + l->t = with; + } + else l->t = without; +} + +void bc_lex_token(BcLex *l) { + + char c = l->buf[l->i++], c2; + + // This is the workhorse of the lexer. + switch (c) { + + case '\0': + case '\n': + case '\t': + case '\v': + case '\f': + case '\r': + case ' ': + { + bc_lex_commonTokens(l, c); + break; + } + + case '!': + { + bc_lex_assign(l, BC_LEX_OP_REL_NE, BC_LEX_OP_BOOL_NOT); + + if (l->t == BC_LEX_OP_BOOL_NOT) + bc_lex_verr(l, BC_ERR_POSIX_BOOL, "!"); + + break; + } + + case '"': + { + bc_lex_string(l); + break; + } + + case '#': + { + bc_lex_err(l, BC_ERR_POSIX_COMMENT); + bc_lex_lineComment(l); + break; + } + + case '%': + { + bc_lex_assign(l, BC_LEX_OP_ASSIGN_MODULUS, BC_LEX_OP_MODULUS); + break; + } + + case '&': + { + c2 = l->buf[l->i]; + if (BC_NO_ERR(c2 == '&')) { + + bc_lex_verr(l, BC_ERR_POSIX_BOOL, "&&"); + + l->i += 1; + l->t = BC_LEX_OP_BOOL_AND; + } + else bc_lex_invalidChar(l, c); + + break; + } +#if BC_ENABLE_EXTRA_MATH + case '$': + { + l->t = BC_LEX_OP_TRUNC; + break; + } + + case '@': + { + bc_lex_assign(l, BC_LEX_OP_ASSIGN_PLACES, BC_LEX_OP_PLACES); + break; + } +#endif // BC_ENABLE_EXTRA_MATH + case '(': + case ')': + { + l->t = (BcLexType) (c - '(' + BC_LEX_LPAREN); + break; + } + + case '*': + { + bc_lex_assign(l, BC_LEX_OP_ASSIGN_MULTIPLY, BC_LEX_OP_MULTIPLY); + break; + } + + case '+': + { + c2 = l->buf[l->i]; + if (c2 == '+') { + l->i += 1; + l->t = BC_LEX_OP_INC; + } + else bc_lex_assign(l, BC_LEX_OP_ASSIGN_PLUS, BC_LEX_OP_PLUS); + break; + } + + case ',': + { + l->t = BC_LEX_COMMA; + break; + } + + case '-': + { + c2 = l->buf[l->i]; + if (c2 == '-') { + l->i += 1; + l->t = BC_LEX_OP_DEC; + } + else bc_lex_assign(l, BC_LEX_OP_ASSIGN_MINUS, BC_LEX_OP_MINUS); + break; + } + + case '.': + { + c2 = l->buf[l->i]; + if (BC_LEX_NUM_CHAR(c2, true, false)) bc_lex_number(l, c); + else { + l->t = BC_LEX_KW_LAST; + bc_lex_err(l, BC_ERR_POSIX_DOT); + } + break; + } + + case '/': + { + c2 = l->buf[l->i]; + if (c2 =='*') bc_lex_comment(l); + else bc_lex_assign(l, BC_LEX_OP_ASSIGN_DIVIDE, BC_LEX_OP_DIVIDE); + break; + } + + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + case 'A': + case 'B': + case 'C': + case 'D': + case 'E': + case 'F': + // Apparently, GNU bc (and maybe others) allows any uppercase letter as + // a number. When single digits, they act like the ones above. When + // multi-digit, any letter above the input base is automatically set to + // the biggest allowable digit in the input base. + case 'G': + case 'H': + case 'I': + case 'J': + case 'K': + case 'L': + case 'M': + case 'N': + case 'O': + case 'P': + case 'Q': + case 'R': + case 'S': + case 'T': + case 'U': + case 'V': + case 'W': + case 'X': + case 'Y': + case 'Z': + { + bc_lex_number(l, c); + break; + } + + case ';': + { + l->t = BC_LEX_SCOLON; + break; + } + + case '<': + { +#if BC_ENABLE_EXTRA_MATH + c2 = l->buf[l->i]; + + if (c2 == '<') { + l->i += 1; + bc_lex_assign(l, BC_LEX_OP_ASSIGN_LSHIFT, BC_LEX_OP_LSHIFT); + break; + } +#endif // BC_ENABLE_EXTRA_MATH + bc_lex_assign(l, BC_LEX_OP_REL_LE, BC_LEX_OP_REL_LT); + break; + } + + case '=': + { + bc_lex_assign(l, BC_LEX_OP_REL_EQ, BC_LEX_OP_ASSIGN); + break; + } + + case '>': + { +#if BC_ENABLE_EXTRA_MATH + c2 = l->buf[l->i]; + + if (c2 == '>') { + l->i += 1; + bc_lex_assign(l, BC_LEX_OP_ASSIGN_RSHIFT, BC_LEX_OP_RSHIFT); + break; + } +#endif // BC_ENABLE_EXTRA_MATH + bc_lex_assign(l, BC_LEX_OP_REL_GE, BC_LEX_OP_REL_GT); + break; + } + + case '[': + case ']': + { + l->t = (BcLexType) (c - '[' + BC_LEX_LBRACKET); + break; + } + + case '\\': + { + if (BC_NO_ERR(l->buf[l->i] == '\n')) { + l->i += 1; + l->t = BC_LEX_WHITESPACE; + } + else bc_lex_invalidChar(l, c); + break; + } + + case '^': + { + bc_lex_assign(l, BC_LEX_OP_ASSIGN_POWER, BC_LEX_OP_POWER); + break; + } + + case 'a': + case 'b': + case 'c': + case 'd': + case 'e': + case 'f': + case 'g': + case 'h': + case 'i': + case 'j': + case 'k': + case 'l': + case 'm': + case 'n': + case 'o': + case 'p': + case 'q': + case 'r': + case 's': + case 't': + case 'u': + case 'v': + case 'w': + case 'x': + case 'y': + case 'z': + { + bc_lex_identifier(l); + break; + } + + case '{': + case '}': + { + l->t = (BcLexType) (c - '{' + BC_LEX_LBRACE); + break; + } + + case '|': + { + c2 = l->buf[l->i]; + + if (BC_NO_ERR(c2 == '|')) { + + bc_lex_verr(l, BC_ERR_POSIX_BOOL, "||"); + + l->i += 1; + l->t = BC_LEX_OP_BOOL_OR; + } + else bc_lex_invalidChar(l, c); + + break; + } + + default: + { + bc_lex_invalidChar(l, c); + } + } +} +#endif // BC_ENABLED |