1 files changed, 227 insertions, 54 deletions
diff --git a/contrib/bc/src/lex.c b/contrib/bc/src/lex.c
index a229f1964d5c..7dc1fb03bd3a 100644
--- a/contrib/bc/src/lex.c
+++ b/contrib/bc/src/lex.c
@@ -3,7 +3,7 @@
  *
  * SPDX-License-Identifier: BSD-2-Clause
  *
- * Copyright (c) 2018-2021 Gavin D. Howard and contributors.
+ * Copyright (c) 2018-2023 Gavin D. Howard and contributors.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -42,82 +42,154 @@
 #include <vm.h>
 #include <bc.h>
 
-void bc_lex_invalidChar(BcLex *l, char c) {
+void
+bc_lex_invalidChar(BcLex* l, char c)
+{
 	l->t = BC_LEX_INVALID;
 	bc_lex_verr(l, BC_ERR_PARSE_CHAR, c);
 }
 
-void bc_lex_lineComment(BcLex *l) {
+void
+bc_lex_lineComment(BcLex* l)
+{
 	l->t = BC_LEX_WHITESPACE;
-	while (l->i < l->len && l->buf[l->i] != '\n') l->i += 1;
+	while (l->i < l->len && l->buf[l->i] != '\n')
+	{
+		l->i += 1;
+	}
 }
 
-void bc_lex_comment(BcLex *l) {
-
+void
+bc_lex_comment(BcLex* l)
+{
 	size_t i, nlines = 0;
-	const char *buf = l->buf;
-	bool end = false;
+	const char* buf;
+	bool end = false, got_more;
 	char c;
 
 	l->i += 1;
 	l->t = BC_LEX_WHITESPACE;
 
-	for (i = l->i; !end; i += !end) {
+	// This loop is complex because it might need to request more data from
+	// stdin if the comment is not ended. This loop is taken until the comment
+	// is finished or we have EOF.
+	do
+	{
+		buf = l->buf;
+		got_more = false;
+
+		// If we are in stdin mode, the buffer must be the one used for stdin.
+		assert(vm->mode != BC_MODE_STDIN || buf == vm->buffer.v);
+
+		// Find the end of the comment.
+		for (i = l->i; !end; i += !end)
+		{
+			// While we don't have an asterisk, eat, but increment nlines.
+			for (; (c = buf[i]) && c != '*'; ++i)
+			{
+				nlines += (c == '\n');
+			}
+
+			// If this is true, we need to request more data.
+			if (BC_ERR(!c || buf[i + 1] == '\0'))
+			{
+				// Read more, if possible.
+				if (!vm->eof && l->mode != BC_MODE_FILE)
+				{
+					got_more = bc_lex_readLine(l);
+				}
 
-		for (; (c = buf[i]) && c != '*'; ++i) nlines += (c == '\n');
+				break;
+			}
 
-		if (BC_ERR(!c || buf[i + 1] == '\0')) {
-			l->i = i;
-			bc_lex_err(l, BC_ERR_PARSE_COMMENT);
+			// If this turns true, we found the end. Yay!
+			end = (buf[i + 1] == '/');
 		}
+	}
+	while (got_more && !end);
 
-		end = buf[i + 1] == '/';
+	// If we didn't find the end, barf.
+	if (!end)
+	{
+		l->i = i;
+		bc_lex_err(l, BC_ERR_PARSE_COMMENT);
 	}
 
 	l->i = i + 2;
 	l->line += nlines;
 }
 
-void bc_lex_whitespace(BcLex *l) {
+void
+bc_lex_whitespace(BcLex* l)
+{
 	char c;
+
 	l->t = BC_LEX_WHITESPACE;
-	for (c = l->buf[l->i]; c != '\n' && isspace(c); c = l->buf[++l->i]);
+
+	// Eat. We don't eat newlines because they can be special.
+	for (c = l->buf[l->i]; c != '\n' && isspace(c); c = l->buf[++l->i])
+	{
+		continue;
+	}
 }
 
-void bc_lex_commonTokens(BcLex *l, char c) {
+void
+bc_lex_commonTokens(BcLex* l, char c)
+{
 	if (!c) l->t = BC_LEX_EOF;
 	else if (c == '\n') l->t = BC_LEX_NLINE;
 	else bc_lex_whitespace(l);
 }
 
-static size_t bc_lex_num(BcLex *l, char start, bool int_only) {
-
-	const char *buf = l->buf + l->i;
+/**
+ * Parses a number.
+ * @param l         The lexer.
+ * @param start     The start character.
+ * @param int_only  Whether this function should only look for an integer. This
+ *                  is used to implement the exponent of scientific notation.
+ */
+static size_t
+bc_lex_num(BcLex* l, char start, bool int_only)
+{
+	const char* buf = l->buf + l->i;
 	size_t i;
 	char c;
 	bool last_pt, pt = (start == '.');
 
+	// This loop looks complex. It is not. It is asking if the character is not
+	// a nul byte and it if it a valid num character based on what we have found
+	// thus far, or whether it is a backslash followed by a newline. I can do
+	// i+1 on the buffer because the buffer must have a nul byte.
 	for (i = 0; (c = buf[i]) && (BC_LEX_NUM_CHAR(c, pt, int_only) ||
-	                             (c == '\\' && buf[i + 1] == '\n')); ++i)
+	                             (c == '\\' && buf[i + 1] == '\n'));
+	     ++i)
 	{
-		if (c == '\\') {
-
-			if (buf[i + 1] == '\n') {
-
-				i += 2;
-
-				// Make sure to eat whitespace at the beginning of the line.
-				while(isspace(buf[i]) && buf[i] != '\n') i += 1;
+		// I don't need to test that the next character is a newline because
+		// the loop condition above ensures that.
+		if (c == '\\')
+		{
+			i += 2;
+
+			// Make sure to eat whitespace at the beginning of the line.
+			while (isspace(buf[i]) && buf[i] != '\n')
+			{
+				i += 1;
+			}
 
-				c = buf[i];
+			c = buf[i];
 
-				if (!BC_LEX_NUM_CHAR(c, pt, int_only)) break;
-			}
-			else break;
+			// If the next character is not a number character, bail.
+			if (!BC_LEX_NUM_CHAR(c, pt, int_only)) break;
 		}
 
+		// Did we find the radix point?
 		last_pt = (c == '.');
+
+		// If we did, and we already have one, then break because it's not part
+		// of this number.
 		if (pt && last_pt) break;
+
+		// Set whether we have found a radix point.
 		pt = pt || last_pt;
 
 		bc_vec_push(&l->str, &c);
@@ -126,38 +198,50 @@ static size_t bc_lex_num(BcLex *l, char start, bool int_only) {
 	return i;
 }
 
-void bc_lex_number(BcLex *l, char start) {
-
+void
+bc_lex_number(BcLex* l, char start)
+{
 	l->t = BC_LEX_NUMBER;
 
+	// Make sure the string is clear.
 	bc_vec_popAll(&l->str);
 	bc_vec_push(&l->str, &start);
 
+	// Parse the number.
 	l->i += bc_lex_num(l, start, false);
 
 #if BC_ENABLE_EXTRA_MATH
 	{
 		char c = l->buf[l->i];
 
-		if (c == 'e') {
-
+		// Do we have a number in scientific notation?
+		if (c == 'e')
+		{
 #if BC_ENABLED
+			// Barf for POSIX.
 			if (BC_IS_POSIX) bc_lex_err(l, BC_ERR_POSIX_EXP_NUM);
 #endif // BC_ENABLED
 
+			// Push the e.
 			bc_vec_push(&l->str, &c);
 			l->i += 1;
 			c = l->buf[l->i];
 
-			if (c == BC_LEX_NEG_CHAR) {
+			// Check for negative specifically because bc_lex_num() does not.
+			if (c == BC_LEX_NEG_CHAR)
+			{
 				bc_vec_push(&l->str, &c);
 				l->i += 1;
 				c = l->buf[l->i];
 			}
 
+			// We must have a number character, so barf if not.
 			if (BC_ERR(!BC_LEX_NUM_CHAR(c, false, true)))
+			{
 				bc_lex_verr(l, BC_ERR_PARSE_CHAR, c);
+			}
 
+			// Parse the exponent.
 			l->i += bc_lex_num(l, 0, true);
 		}
 	}
@@ -166,65 +250,154 @@ void bc_lex_number(BcLex *l, char start) {
 	bc_vec_pushByte(&l->str, '\0');
 }
 
-void bc_lex_name(BcLex *l) {
-
+void
+bc_lex_name(BcLex* l)
+{
 	size_t i = 0;
-	const char *buf = l->buf + l->i - 1;
+	const char* buf = l->buf + l->i - 1;
 	char c = buf[i];
 
 	l->t = BC_LEX_NAME;
 
-	while ((c >= 'a' && c <= 'z') || isdigit(c) || c == '_') c = buf[++i];
+	// Should be obvious. It's looking for valid characters.
+	while ((c >= 'a' && c <= 'z') || isdigit(c) || c == '_')
+	{
+		c = buf[++i];
+	}
 
+	// Set the string to the identifier.
 	bc_vec_string(&l->str, i, buf);
 
 	// Increment the index. We minus 1 because it has already been incremented.
 	l->i += i - 1;
 }
 
-void bc_lex_init(BcLex *l) {
+void
+bc_lex_init(BcLex* l)
+{
 	BC_SIG_ASSERT_LOCKED;
 	assert(l != NULL);
-	bc_vec_init(&l->str, sizeof(char), NULL);
+	bc_vec_init(&l->str, sizeof(char), BC_DTOR_NONE);
 }
 
-void bc_lex_free(BcLex *l) {
+void
+bc_lex_free(BcLex* l)
+{
 	BC_SIG_ASSERT_LOCKED;
 	assert(l != NULL);
 	bc_vec_free(&l->str);
 }
 
-void bc_lex_file(BcLex *l, const char *file) {
+void
+bc_lex_file(BcLex* l, const char* file)
+{
 	assert(l != NULL && file != NULL);
 	l->line = 1;
-	vm.file = file;
+	vm->file = file;
 }
 
-void bc_lex_next(BcLex *l) {
+void
+bc_lex_next(BcLex* l)
+{
+	BC_SIG_ASSERT_LOCKED;
 
 	assert(l != NULL);
 
 	l->last = l->t;
+
+	// If this wasn't here, the line number would be off.
 	l->line += (l->i != 0 && l->buf[l->i - 1] == '\n');
 
+	// If the last token was EOF, someone called this one too many times.
 	if (BC_ERR(l->last == BC_LEX_EOF)) bc_lex_err(l, BC_ERR_PARSE_EOF);
 
 	l->t = BC_LEX_EOF;
 
+	// We are done if this is true.
 	if (l->i == l->len) return;
 
 	// Loop until failure or we don't have whitespace. This
 	// is so the parser doesn't get inundated with whitespace.
-	do {
-		vm.next(l);
-	} while (l->t == BC_LEX_WHITESPACE);
+	do
+	{
+		vm->next(l);
+	}
+	while (l->t == BC_LEX_WHITESPACE);
 }
 
-void bc_lex_text(BcLex *l, const char *text) {
-	assert(l != NULL && text != NULL);
+/**
+ * Updates the buffer and len so that they are not invalidated when the stdin
+ * buffer grows.
+ * @param l     The lexer.
+ * @param text  The text.
+ * @param len   The length of the text.
+ */
+static void
+bc_lex_fixText(BcLex* l, const char* text, size_t len)
+{
 	l->buf = text;
+	l->len = len;
+}
+
+bool
+bc_lex_readLine(BcLex* l)
+{
+	bool good;
+
+	// These are reversed because they should be already locked, but
+	// bc_vm_readLine() needs them to be unlocked.
+	BC_SIG_UNLOCK;
+
+	// Make sure we read from the appropriate place.
+	switch (l->mode)
+	{
+		case BC_MODE_EXPRS:
+		{
+			good = bc_vm_readBuf(false);
+			break;
+		}
+
+		case BC_MODE_FILE:
+		{
+			good = false;
+			break;
+		}
+
+		case BC_MODE_STDIN:
+		{
+			good = bc_vm_readLine(false);
+			break;
+		}
+
+#ifdef __GNUC__
+#ifndef __clang__
+		default:
+		{
+			// We should never get here.
+			abort();
+		}
+#endif // __clang__
+#endif // __GNUC__
+	}
+
+	BC_SIG_LOCK;
+
+	bc_lex_fixText(l, vm->buffer.v, vm->buffer.len - 1);
+
+	return good;
+}
+
+void
+bc_lex_text(BcLex* l, const char* text, BcMode mode)
+{
+	BC_SIG_ASSERT_LOCKED;
+
+	assert(l != NULL && text != NULL);
+
+	bc_lex_fixText(l, text, strlen(text));
 	l->i = 0;
-	l->len = strlen(text);
 	l->t = l->last = BC_LEX_INVALID;
+	l->mode = mode;
+
 	bc_lex_next(l);
 }