diff options
Diffstat (limited to 'contrib/bc/src/lex.c')
-rw-r--r-- | contrib/bc/src/lex.c | 281 |
1 files changed, 227 insertions, 54 deletions
diff --git a/contrib/bc/src/lex.c b/contrib/bc/src/lex.c index a229f1964d5c..7dc1fb03bd3a 100644 --- a/contrib/bc/src/lex.c +++ b/contrib/bc/src/lex.c @@ -3,7 +3,7 @@ * * SPDX-License-Identifier: BSD-2-Clause * - * Copyright (c) 2018-2021 Gavin D. Howard and contributors. + * Copyright (c) 2018-2023 Gavin D. Howard and contributors. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -42,82 +42,154 @@ #include <vm.h> #include <bc.h> -void bc_lex_invalidChar(BcLex *l, char c) { +void +bc_lex_invalidChar(BcLex* l, char c) +{ l->t = BC_LEX_INVALID; bc_lex_verr(l, BC_ERR_PARSE_CHAR, c); } -void bc_lex_lineComment(BcLex *l) { +void +bc_lex_lineComment(BcLex* l) +{ l->t = BC_LEX_WHITESPACE; - while (l->i < l->len && l->buf[l->i] != '\n') l->i += 1; + while (l->i < l->len && l->buf[l->i] != '\n') + { + l->i += 1; + } } -void bc_lex_comment(BcLex *l) { - +void +bc_lex_comment(BcLex* l) +{ size_t i, nlines = 0; - const char *buf = l->buf; - bool end = false; + const char* buf; + bool end = false, got_more; char c; l->i += 1; l->t = BC_LEX_WHITESPACE; - for (i = l->i; !end; i += !end) { + // This loop is complex because it might need to request more data from + // stdin if the comment is not ended. This loop is taken until the comment + // is finished or we have EOF. + do + { + buf = l->buf; + got_more = false; + + // If we are in stdin mode, the buffer must be the one used for stdin. + assert(vm->mode != BC_MODE_STDIN || buf == vm->buffer.v); + + // Find the end of the comment. + for (i = l->i; !end; i += !end) + { + // While we don't have an asterisk, eat, but increment nlines. + for (; (c = buf[i]) && c != '*'; ++i) + { + nlines += (c == '\n'); + } + + // If this is true, we need to request more data. + if (BC_ERR(!c || buf[i + 1] == '\0')) + { + // Read more, if possible. + if (!vm->eof && l->mode != BC_MODE_FILE) + { + got_more = bc_lex_readLine(l); + } - for (; (c = buf[i]) && c != '*'; ++i) nlines += (c == '\n'); + break; + } - if (BC_ERR(!c || buf[i + 1] == '\0')) { - l->i = i; - bc_lex_err(l, BC_ERR_PARSE_COMMENT); + // If this turns true, we found the end. Yay! + end = (buf[i + 1] == '/'); } + } + while (got_more && !end); - end = buf[i + 1] == '/'; + // If we didn't find the end, barf. + if (!end) + { + l->i = i; + bc_lex_err(l, BC_ERR_PARSE_COMMENT); } l->i = i + 2; l->line += nlines; } -void bc_lex_whitespace(BcLex *l) { +void +bc_lex_whitespace(BcLex* l) +{ char c; + l->t = BC_LEX_WHITESPACE; - for (c = l->buf[l->i]; c != '\n' && isspace(c); c = l->buf[++l->i]); + + // Eat. We don't eat newlines because they can be special. + for (c = l->buf[l->i]; c != '\n' && isspace(c); c = l->buf[++l->i]) + { + continue; + } } -void bc_lex_commonTokens(BcLex *l, char c) { +void +bc_lex_commonTokens(BcLex* l, char c) +{ if (!c) l->t = BC_LEX_EOF; else if (c == '\n') l->t = BC_LEX_NLINE; else bc_lex_whitespace(l); } -static size_t bc_lex_num(BcLex *l, char start, bool int_only) { - - const char *buf = l->buf + l->i; +/** + * Parses a number. + * @param l The lexer. + * @param start The start character. + * @param int_only Whether this function should only look for an integer. This + * is used to implement the exponent of scientific notation. + */ +static size_t +bc_lex_num(BcLex* l, char start, bool int_only) +{ + const char* buf = l->buf + l->i; size_t i; char c; bool last_pt, pt = (start == '.'); + // This loop looks complex. It is not. It is asking if the character is not + // a nul byte and it if it a valid num character based on what we have found + // thus far, or whether it is a backslash followed by a newline. I can do + // i+1 on the buffer because the buffer must have a nul byte. for (i = 0; (c = buf[i]) && (BC_LEX_NUM_CHAR(c, pt, int_only) || - (c == '\\' && buf[i + 1] == '\n')); ++i) + (c == '\\' && buf[i + 1] == '\n')); + ++i) { - if (c == '\\') { - - if (buf[i + 1] == '\n') { - - i += 2; - - // Make sure to eat whitespace at the beginning of the line. - while(isspace(buf[i]) && buf[i] != '\n') i += 1; + // I don't need to test that the next character is a newline because + // the loop condition above ensures that. + if (c == '\\') + { + i += 2; + + // Make sure to eat whitespace at the beginning of the line. + while (isspace(buf[i]) && buf[i] != '\n') + { + i += 1; + } - c = buf[i]; + c = buf[i]; - if (!BC_LEX_NUM_CHAR(c, pt, int_only)) break; - } - else break; + // If the next character is not a number character, bail. + if (!BC_LEX_NUM_CHAR(c, pt, int_only)) break; } + // Did we find the radix point? last_pt = (c == '.'); + + // If we did, and we already have one, then break because it's not part + // of this number. if (pt && last_pt) break; + + // Set whether we have found a radix point. pt = pt || last_pt; bc_vec_push(&l->str, &c); @@ -126,38 +198,50 @@ static size_t bc_lex_num(BcLex *l, char start, bool int_only) { return i; } -void bc_lex_number(BcLex *l, char start) { - +void +bc_lex_number(BcLex* l, char start) +{ l->t = BC_LEX_NUMBER; + // Make sure the string is clear. bc_vec_popAll(&l->str); bc_vec_push(&l->str, &start); + // Parse the number. l->i += bc_lex_num(l, start, false); #if BC_ENABLE_EXTRA_MATH { char c = l->buf[l->i]; - if (c == 'e') { - + // Do we have a number in scientific notation? + if (c == 'e') + { #if BC_ENABLED + // Barf for POSIX. if (BC_IS_POSIX) bc_lex_err(l, BC_ERR_POSIX_EXP_NUM); #endif // BC_ENABLED + // Push the e. bc_vec_push(&l->str, &c); l->i += 1; c = l->buf[l->i]; - if (c == BC_LEX_NEG_CHAR) { + // Check for negative specifically because bc_lex_num() does not. + if (c == BC_LEX_NEG_CHAR) + { bc_vec_push(&l->str, &c); l->i += 1; c = l->buf[l->i]; } + // We must have a number character, so barf if not. if (BC_ERR(!BC_LEX_NUM_CHAR(c, false, true))) + { bc_lex_verr(l, BC_ERR_PARSE_CHAR, c); + } + // Parse the exponent. l->i += bc_lex_num(l, 0, true); } } @@ -166,65 +250,154 @@ void bc_lex_number(BcLex *l, char start) { bc_vec_pushByte(&l->str, '\0'); } -void bc_lex_name(BcLex *l) { - +void +bc_lex_name(BcLex* l) +{ size_t i = 0; - const char *buf = l->buf + l->i - 1; + const char* buf = l->buf + l->i - 1; char c = buf[i]; l->t = BC_LEX_NAME; - while ((c >= 'a' && c <= 'z') || isdigit(c) || c == '_') c = buf[++i]; + // Should be obvious. It's looking for valid characters. + while ((c >= 'a' && c <= 'z') || isdigit(c) || c == '_') + { + c = buf[++i]; + } + // Set the string to the identifier. bc_vec_string(&l->str, i, buf); // Increment the index. We minus 1 because it has already been incremented. l->i += i - 1; } -void bc_lex_init(BcLex *l) { +void +bc_lex_init(BcLex* l) +{ BC_SIG_ASSERT_LOCKED; assert(l != NULL); - bc_vec_init(&l->str, sizeof(char), NULL); + bc_vec_init(&l->str, sizeof(char), BC_DTOR_NONE); } -void bc_lex_free(BcLex *l) { +void +bc_lex_free(BcLex* l) +{ BC_SIG_ASSERT_LOCKED; assert(l != NULL); bc_vec_free(&l->str); } -void bc_lex_file(BcLex *l, const char *file) { +void +bc_lex_file(BcLex* l, const char* file) +{ assert(l != NULL && file != NULL); l->line = 1; - vm.file = file; + vm->file = file; } -void bc_lex_next(BcLex *l) { +void +bc_lex_next(BcLex* l) +{ + BC_SIG_ASSERT_LOCKED; assert(l != NULL); l->last = l->t; + + // If this wasn't here, the line number would be off. l->line += (l->i != 0 && l->buf[l->i - 1] == '\n'); + // If the last token was EOF, someone called this one too many times. if (BC_ERR(l->last == BC_LEX_EOF)) bc_lex_err(l, BC_ERR_PARSE_EOF); l->t = BC_LEX_EOF; + // We are done if this is true. if (l->i == l->len) return; // Loop until failure or we don't have whitespace. This // is so the parser doesn't get inundated with whitespace. - do { - vm.next(l); - } while (l->t == BC_LEX_WHITESPACE); + do + { + vm->next(l); + } + while (l->t == BC_LEX_WHITESPACE); } -void bc_lex_text(BcLex *l, const char *text) { - assert(l != NULL && text != NULL); +/** + * Updates the buffer and len so that they are not invalidated when the stdin + * buffer grows. + * @param l The lexer. + * @param text The text. + * @param len The length of the text. + */ +static void +bc_lex_fixText(BcLex* l, const char* text, size_t len) +{ l->buf = text; + l->len = len; +} + +bool +bc_lex_readLine(BcLex* l) +{ + bool good; + + // These are reversed because they should be already locked, but + // bc_vm_readLine() needs them to be unlocked. + BC_SIG_UNLOCK; + + // Make sure we read from the appropriate place. + switch (l->mode) + { + case BC_MODE_EXPRS: + { + good = bc_vm_readBuf(false); + break; + } + + case BC_MODE_FILE: + { + good = false; + break; + } + + case BC_MODE_STDIN: + { + good = bc_vm_readLine(false); + break; + } + +#ifdef __GNUC__ +#ifndef __clang__ + default: + { + // We should never get here. + abort(); + } +#endif // __clang__ +#endif // __GNUC__ + } + + BC_SIG_LOCK; + + bc_lex_fixText(l, vm->buffer.v, vm->buffer.len - 1); + + return good; +} + +void +bc_lex_text(BcLex* l, const char* text, BcMode mode) +{ + BC_SIG_ASSERT_LOCKED; + + assert(l != NULL && text != NULL); + + bc_lex_fixText(l, text, strlen(text)); l->i = 0; - l->len = strlen(text); l->t = l->last = BC_LEX_INVALID; + l->mode = mode; + bc_lex_next(l); } |