aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorWarner Losh <imp@FreeBSD.org>2019-06-02 04:23:56 +0000
committerWarner Losh <imp@FreeBSD.org>2019-06-02 04:23:56 +0000
commit03ee4d05f1d963d60451e04ce505e4da116300db (patch)
tree9dcabc6cffafcb6b8195148feb519d575b0bf6ac
parent3a4488f93f2dc8fe9de757a418b74aa0aa4f9ed1 (diff)
downloadsrc-vendor/one-true-awk.tar.gz
src-vendor/one-true-awk.zip
Import latest one-true-awk from upstreamvendor/one-true-awk/4189ef5dvendor/one-true-awk
Import git hash 4189ef5d from https://github.com/onetrueawk/awk.git as there's not been a release in a while. Upstream one-true-awk woke-up! Time to catch up. This may also revert FreeBSD changes that we'd placed in the vendor branch in anticipation of their inclusion in upstream. That's not yet the case, and these will be resolved in the merge. See FIXES for a complete list of bugs fixed (starting with the Jun 7, 2018 entry).
Notes
Notes: svn path=/vendor/one-true-awk/dist/; revision=348505 svn path=/vendor/one-true-awk/4189ef5d/; revision=348506; tag=vendor/one-true-awk/4189ef5d
-rw-r--r--ChangeLog245
-rw-r--r--FIXES109
-rw-r--r--LICENSE23
-rwxr-xr-xREGRESS35
-rw-r--r--awk.1160
-rw-r--r--awk.h10
-rw-r--r--awkgram.y4
-rw-r--r--b.c291
-rw-r--r--bugs-fixed/README57
-rw-r--r--bugs-fixed/a-format.awk3
-rw-r--r--bugs-fixed/a-format.bad3
-rw-r--r--bugs-fixed/a-format.ok1
-rw-r--r--bugs-fixed/concat-assign-same.awk4
-rw-r--r--bugs-fixed/concat-assign-same.bad2
-rw-r--r--bugs-fixed/concat-assign-same.ok2
-rw-r--r--bugs-fixed/decr-NF.awk11
-rw-r--r--bugs-fixed/decr-NF.bad5
-rw-r--r--bugs-fixed/decr-NF.ok5
-rw-r--r--bugs-fixed/fmt-overflow.awk1
-rw-r--r--bugs-fixed/fmt-overflow.ok1
-rw-r--r--bugs-fixed/fs-overflow.awk13
-rw-r--r--bugs-fixed/getline-numeric.awk6
-rw-r--r--bugs-fixed/getline-numeric.bad3
-rw-r--r--bugs-fixed/getline-numeric.in1
-rw-r--r--bugs-fixed/getline-numeric.ok3
-rw-r--r--bugs-fixed/missing-precision.awk1
-rw-r--r--bugs-fixed/missing-precision.ok2
-rw-r--r--bugs-fixed/negative-nf.awk1
-rw-r--r--bugs-fixed/negative-nf.ok2
-rw-r--r--bugs-fixed/nf-self-assign.awk6
-rw-r--r--bugs-fixed/nf-self-assign.bad1
-rw-r--r--bugs-fixed/nf-self-assign.ok1
-rw-r--r--bugs-fixed/numeric-fs.awk5
-rw-r--r--bugs-fixed/numeric-fs.ok3
-rw-r--r--bugs-fixed/numeric-output-seps.awk8
-rw-r--r--bugs-fixed/numeric-output-seps.bad2
-rw-r--r--bugs-fixed/numeric-output-seps.ok1
-rw-r--r--bugs-fixed/numeric-rs.awk6
-rw-r--r--bugs-fixed/numeric-rs.bad1
-rw-r--r--bugs-fixed/numeric-rs.ok4
-rw-r--r--bugs-fixed/numeric-subsep.awk5
-rw-r--r--bugs-fixed/numeric-subsep.bad1
-rw-r--r--bugs-fixed/numeric-subsep.ok1
-rw-r--r--bugs-fixed/ofs-rebuild.awk17
-rw-r--r--bugs-fixed/ofs-rebuild.bad1
-rw-r--r--bugs-fixed/ofs-rebuild.ok1
-rw-r--r--bugs-fixed/space.awk22
-rw-r--r--bugs-fixed/space.bad16
-rw-r--r--bugs-fixed/space.ok16
-rw-r--r--bugs-fixed/split-fs-from-array.awk5
-rw-r--r--bugs-fixed/split-fs-from-array.ok1
-rw-r--r--bugs-fixed/string-conv.awk13
-rw-r--r--bugs-fixed/string-conv.bad4
-rw-r--r--bugs-fixed/string-conv.ok4
-rw-r--r--bugs-fixed/subsep-overflow.awk24
-rw-r--r--bugs-fixed/subsep-overflow.ok5
-rw-r--r--bugs-fixed/system-status.awk19
-rw-r--r--bugs-fixed/system-status.bad3
-rw-r--r--bugs-fixed/system-status.ok3
-rw-r--r--bugs-fixed/unary-plus.awk4
-rw-r--r--bugs-fixed/unary-plus.bad2
-rw-r--r--bugs-fixed/unary-plus.ok2
-rw-r--r--lex.c32
-rw-r--r--lib.c36
-rw-r--r--main.c12
-rw-r--r--makefile53
-rw-r--r--maketab.c13
-rw-r--r--parse.c2
-rw-r--r--proctab.c209
-rw-r--r--proto.h3
-rw-r--r--run.c187
-rw-r--r--tran.c183
72 files changed, 1723 insertions, 218 deletions
diff --git a/ChangeLog b/ChangeLog
new file mode 100644
index 000000000000..fd03b2bbca0b
--- /dev/null
+++ b/ChangeLog
@@ -0,0 +1,245 @@
+2019-05-29 Arnold D. Robbins <arnold@skeeve.com>
+
+ * lib.c (isclvar): Remove check for additional '=' after
+ first one. No longer needed.
+
+2019-01-26 Arnold D. Robbins <arnold@skeeve.com>
+
+ * main.c (version): Updated.
+
+2019-01-25 Arnold D. Robbins <arnold@skeeve.com>
+
+ * run.c (awkgetline): Check for numeric value in all getline
+ variants. See the numeric-getline.* files in bugs-fixed directory.
+
+2018-08-29 Arnold D. Robbins <arnold@skeeve.com>
+
+ * REGRESS: Check for existence of a.out. If not there, run
+ make. Enable core dumps for T.arnold system status test
+ to work on MacOS X.
+
+2018-08-22 Arnold D. Robbins <arnold@skeeve.com>
+
+ * awktest.tar (testdir/T.expr): Fix test for unary plus.
+
+2018-08-22 Arnold D. Robbins <arnold@skeeve.com>
+
+ * REGRESS: Extract tests if necessary, set PATH to include '.'.
+ * regdir/beebe.tar (Makefile): Fix longwrds test to prefix
+ sort with LC_ALL=C.
+ * awktest.tar: Updated from fixed test suite, directory
+ it extracts is now called 'testdir' to match what's in top-level
+ REGRESS script.
+ * regdir: Removed, as Brian wants to keep the test suite in
+ the tar file.
+
+2018-08-22 Arnold D. Robbins <arnold@skeeve.com>
+
+ * FIXES, lib.c, run.c, makefile, main.c: Merge from Brian's tree.
+ * REGRESS: New file, from Brian.
+ * awktest.tar: Restored from Brian's tree.
+
+2018-08-22 Arnold D. Robbins <arnold@skeeve.com>
+
+ * awkgram.y (UPLUS): New token. In the grammar, call op1()
+ with it.
+ * maketab.c (proc): Add entry for UPLUS.
+ * run.c (arith): Handle UPLUS.
+ * main.c (version): Updated.
+ * bugs-fixed/unary-plus.awk, bugs-fixed/unary-plus.bad,
+ bugs-fixed/unary-plus.ok: New files.
+
+2018-08-10 Arnold D. Robbins <arnold@skeeve.com>
+
+ * TODO: Updated.
+ * awk.1: Improve use of macros, add some additional explanation
+ in a few places, alphabetize list of variables.
+
+2018-08-08 Arnold D. Robbins <arnold@skeeve.com>
+
+ * awk.h (Cell): Add new field `fmt' to track xFMT value used
+ for a string conversion.
+ [CONVC, CONVO]: New flag macros.
+ * bugs-fixed/README: Updated.
+ * bugs-fixed/string-conv.awk, bugs-fixed/string-conv.bad,
+ bugs-fixed/string-conv.ok: New files.
+ * main.c (version): Updated.
+ * proto.h (flags2str): Add declaration.
+ * tran.c (setfval): Clear CONVC and CONVO flags and set vp->fmt
+ to NULL.
+ (setsval): Ditto. Add large comment and new code to manage
+ correct conversion of number to string based on various flags
+ and the value of vp->fmt. The idea is to not convert again
+ if xFMT is the same as before and we're doing the same conversion.
+ Otherwise, clear the old flags, set the new, and reconvert.
+ (flags2str): New function. For debug prints and for use from a debugger.
+
+2018-08-05 Arnold D. Robbins <arnold@skeeve.com>
+
+ Fix filename conflicts in regdir where the only difference was
+ in letter case. This caused problems on Windows systems.
+
+ * regdir/Compare.T1: Renamed from regdir/Compare.T.
+ * regdir/t.delete0: Renamed from regdir/t.delete.
+ * regdir/t.getline1: Renamed from regdir/t.getline.
+ * regdir/t.redir1: Renamed from regdir/t.redir.
+ * regdir/t.split1: Renamed from regdir/t.split.
+ * regdir/t.sub0: Renamed from regdir/t.sub.
+ * regdir/REGRESS: Adjusted.
+
+2018-08-04 Arnold D. Robbins <arnold@skeeve.com>
+
+ With scalpel, tweasers, magnifying glass and bated breath,
+ borrow code from the NetBSD version of nawk to fix the years-old
+ bug whereby decrementing the value of NF did not change the
+ record.
+
+ * lib.c (fldbld): Set donerec to 1 when done.
+ (setlastfld): New function.
+ * proto.h (setlastfld): Add declaration.
+ * run.c (copycell): Make code smarter about flags (from NetBSD code).
+ * tran.c (setfree): New function.
+ * tran.c (setfval): Normalize negative zero to positive zero.
+ If setting NF, clear donerec and call setlastfld().
+ (setsval): Remove call to save_old_OFS(). If setting OFS, call
+ recbld(). If setting NF, clear donerec and call setlastfld().
+
+ As part of the process, revert OFS-related changes of 2018-05-22:
+
+ * awk.h (saveOFS, saveOFSlen, save_old_OFS): Remove declarations.
+ * lib.c (recbld): Use *OFS instead of saveOFS.
+ * run.c (saveOFS, saveOFSlen, save_old_OFS): Remove.
+ * tran.c (syminit): Remove initialization of saveOFS and saveOFSlen.
+
+ General stuff that goes along with all this:
+
+ * bugs-fixed/README: Updated.
+ * bugs-fixed/decr-NF.awk, bugs-fixed/decr-NF.bad,
+ bugs-fixed/decr-NF.ok: New files.
+ * main.c (version): Updated.
+ * regdir/README.TESTS: Fix awk book title.
+ * regdir/T.misc: Revise test to match fixed code.
+ * run.c (format): Increase size of buffer used for %a test. (Unrelated
+ to NF or OFS, but fixes a compiler complaint.)
+
+2018-06-07 Arnold D. Robbins <arnold@skeeve.com>
+
+ * regdir/beebe.tar: Fix longwrds.ok so that the test will pass.
+ The file was incorrectly sorted.
+
+2018-06-06 Arnold D. Robbins <arnold@skeeve.com>
+
+ * regdir/T.lilly: Fix the bug again in the second instance
+ of the code. Thanks to BWK for pointing this out.
+
+2018-05-31 Arnold D. Robbins <arnold@skeeve.com>
+
+ * regdir/T.lilly: Fix a syntax error and ordering bug
+ in creating the 'foo' file.
+
+2018-05-23 Arnold D. Robbins <arnold@skeeve.com>
+
+ * awk.1: Remove standalone 'awk' at the top of file, it messed up
+ the formatting. Arrange built-in variable list in alphabetical
+ order.
+
+2018-05-23 Arnold D. Robbins <arnold@skeeve.com>
+
+ * main.c (version): Add my email address and a date so that
+ users can tell this isn't straight BWK awk.
+ * README.md: Minor updates.
+ * TODO: Updated.
+
+2018-05-22 Arnold D. Robbins <arnold@skeeve.com>
+
+ Add POSIX-required formats %a and %A.
+
+ * run.c (format): Check for %a support in C library. If there,
+ allow %a and %A as valid formats.
+ * TODO: Updated.
+ * bugs-fixed/README: Updated.
+ * bugs-fixed/a-format.awk, bugs-fixed/a-format.bad,
+ bugs-fixed/a-format.ok: New files.
+
+2018-05-22 Arnold D. Robbins <arnold@skeeve.com>
+
+ * FIXES: Restored a line from a much earlier version that
+ apparently got lost when the dates were reordered.
+ * TODO: Updated.
+
+2018-05-22 Arnold D. Robbins <arnold@skeeve.com>
+
+ * README.md: New file.
+
+2018-05-22 Arnold D. Robbins <arnold@skeeve.com>
+
+ * regdir/echo.c, regdir/time.c: Minor fixes to compile without
+ warning on current GCC / Linux.
+
+2018-05-22 Arnold D. Robbins <arnold@skeeve.com>
+
+ * TODO: New file.
+
+2018-05-22 Arnold D. Robbins <arnold@skeeve.com>
+
+ * makefile (gitadd, gitpush): Remove these targets. They
+ should not be automated and were incorrect for things that
+ would be done regularly.
+
+2018-05-22 Arnold D. Robbins <arnold@skeeve.com>
+
+ Fix nawk so that [[:blank:]] only matches space and tab instead
+ of any whitespace character, originally made May 10, 2018.
+ See bugs-fixed/space.awk.
+
+ This appears to have been a thinko on Brian's part.
+
+ * b.c (charclasses): Use xisblank() function for [[:blank:]].
+ * bugs-fixed/README: Updated.
+ * bugs-fixed/space.awk, bugs-fixed/space.bad,
+ bugs-fixed/space.ok: New files.
+
+2018-05-22 Arnold D. Robbins <arnold@skeeve.com>
+
+ * .gitignore: New file.
+
+2018-05-22 Arnold D. Robbins <arnold@skeeve.com>
+
+ Fix nawk to provide reasonable exit status for system(),
+ a la gawk, originally made March 12, 2016. See
+ bugs-fixed/system-status.awk.
+
+ * run.c (bltin): For FSYSTEM, use the macros defined for wait(2)
+ to produce a reasonable exit value, instead of doing a floating-point
+ division by 256.
+ * awk.1: Document the return status values.
+ * bugs-fixed/README: Updated.
+ * bugs-fixed/system-status.awk, bugs-fixed/system-status.bad,
+ bugs-fixed/system-status.ok: New files.
+
+2018-05-22 Arnold D. Robbins <arnold@skeeve.com>
+
+ Bug fix with respect to rebuilding a record, originally
+ made August 19, 2014. See bugs-fixed/ofs-rebuild.awk.
+
+ * awk.h (saveOFS, saveOFSlen): Declare new variables.
+ * lib.c (recbld): Use them when rebuilding the record.
+ * run.c (saveOFS, saveOFSlen): Define new variables.
+ (save_old_OFS): New function to save OFS aside.
+ * tran.c (syminit): Initialize saveOFS and saveOFSlen.
+ (setsval): If setting a field, call save_old_OFS().
+ * bugs-fixed/README, bugs-fixed/ofs-rebuild.awk,
+ bugs-fixed/ofs-rebuild.bad, bugs-fixed/ofs-rebuild.ok: New files.
+
+2018-05-22 Arnold D. Robbins <arnold@skeeve.com>
+
+ * makefile (YACC): Use bison.
+
+2018-05-22 Arnold D. Robbins <arnold@skeeve.com>
+
+ * ChangeLog: Created.
+ * regdir: Created. Based on contents of awktest.a.
+ * .gitattributes: Created, to preserve CR LF in regdir/t.crlf.
+ * awktest.a: Removed.
+ * regdir/T.gawk, regdir/T.latin1: Updated from awktest.tar.
+ * awktest.tar: Removed.
diff --git a/FIXES b/FIXES
index c78aabc511f3..183eaedee47d 100644
--- a/FIXES
+++ b/FIXES
@@ -25,6 +25,113 @@ THIS SOFTWARE.
This file lists all bug fixes, changes, etc., made since the AWK book
was sent to the printers in August, 1987.
+May 29,2019:
+ Fix check for command line arguments to no longer require that
+ first character after '=' not be another '='. Reverts change of
+ August 11, 1989. Thanks to GitHub user Jamie Landeg Jones for
+ pointing out the issue; from Issue #38.
+
+Apr 7, 2019:
+ Update awktest.tar(p.50) to use modern options to sort. Needed
+ for Android development. Thanks to GitHub user mohd-akram (Mohamed
+ Akram). From Comment #33.
+
+Mar 12, 2019:
+ Added very simplistic support for cross-compiling in the
+ makefile. We are NOT going to go in the direction of the
+ autotools, though. Thanks to GitHub user nee-san for
+ the basic change. (Merged from PR #34.)
+
+Mar 5, 2019:
+ Added support for POSIX-standard interval expressions (a.k.a.
+ bounds, a.k.a. repetition expressions) in regular expressions,
+ backported (via NetBSD) from Apple awk-24 (20070501).
+ Thanks to Martijn Dekker <martijn@inlv.org> for the port.
+ (Merged from PR #30.)
+
+Mar 3, 2019:
+ Merge PRs as follows:
+ #12: Avoid undefined behaviour when using ctype(3) functions in
+ relex(). Thanks to GitHub user iamleot.
+ #31: Make getline handle numeric strings, and update FIXES. Thanks
+ to GitHub user arnoldrobbins
+ #32: maketab: support build systems with read-only source. Thanks
+ to GitHub user enh.
+
+Jan 25, 2019:
+ Make getline handle numeric strings properly in all cases.
+ (Thanks, Arnold.)
+
+Jan 21, 2019:
+ Merged a number of small fixes from GitHub pull requests.
+ Thanks to GitHub users Arnold Robbins (arnoldrobbins),
+ Cody Mello (melloc) and Christoph Junghans (junghans).
+ PR numbers: 13-21, 23, 24, 27.
+
+Oct 25, 2018:
+ Added test in maketab.c to prevent generating a proctab entry
+ for YYSTYPE_IS_DEFINED. It was harmless but some gcc settings
+ generated a warning message. Thanks to Nan Xiao for report.
+
+Aug 27, 2018:
+ Disallow '$' in printf formats; arguments evaluated in order
+ and printed in order.
+
+ Added some casts to silence warnings on debugging printfs.
+ (Thanks, Arnold.)
+
+Aug 23, 2018:
+ A long list of fixes courtesy of Arnold Robbins,
+ to whom profound thanks.
+
+ 1. ofs-rebuild: OFS value used to rebuild the record was incorrect.
+ Fixed August 19, 2014. Revised fix August 2018.
+
+ 2. system-status: Instead of a floating-point division by 256, use
+ the wait(2) macros to create a reasonable exit status.
+ Fixed March 12, 2016.
+
+ 3. space: Use provided xisblank() function instead of ispace() for
+ matching [[:blank:]].
+
+ 4. a-format: Add POSIX standard %a and %A to supported formats. Check
+ at runtime that this format is available.
+
+ 5. decr-NF: Decrementing NF did not change $0. This is a decades-old
+ bug. There are interactions with the old and new value of OFS as well.
+ Most of the fix came from the NetBSD awk.
+
+ 6. string-conv: String conversions of scalars were sticky. Once a
+ conversion to string happened, even with OFMT, that value was used until
+ a new numeric value was assigned, even if OFMT differed from CONVFMT,
+ and also if CONVFMT changed.
+
+ 7. unary-plus: Unary plus on a string constant returned the string.
+ Instead, it should convert the value to numeric and give that value.
+
+ Also added Arnold's tests for these to awktest.tar as T.arnold.
+
+Aug 15, 2018:
+ fixed mangled awktest.tar (thanks, Arnold), posted all
+ current (very minor) fixes to github / onetrueawk
+
+Jun 7, 2018:
+ (yes, a long layoff)
+ Updated some broken tests (beebe.tar, T.lilly)
+ [thanks to Arnold Robbins]
+
+Mar 26, 2015:
+ buffer overflow in error reporting; thanks to tobias ulmer
+ and john-mark gurney for spotting it and the fix.
+
+Feb 4, 2013:
+ cleaned up a handful of tests that didn't seem to actually
+ test for correct behavior: T.latin1, T.gawk.
+
+Jan 5, 2013:
+ added ,NULL initializer to static Cells in run.c; not really
+ needed but cleaner. Thanks to Michael Bombardieri.
+
Dec 20, 2012:
fiddled makefile to get correct yacc and bison flags. pick yacc
(linux) or bison (mac) as necessary.
@@ -493,6 +600,8 @@ May 12, 1998:
Mar 12, 1998:
added -V to print version number and die.
+[notify dave kerns, dkerns@dacsoup.ih.lucent.com]
+
Feb 11, 1998:
subtle silent bug in lex.c: if the program ended with a number
longer than 1 digit, part of the input would be pushed back and
diff --git a/LICENSE b/LICENSE
new file mode 100644
index 000000000000..07dfd7b73b11
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,23 @@
+/****************************************************************
+Copyright (C) Lucent Technologies 1997
+All Rights Reserved
+
+Permission to use, copy, modify, and distribute this software and
+its documentation for any purpose and without fee is hereby
+granted, provided that the above copyright notice appear in all
+copies and that both that the copyright notice and this
+permission notice and warranty disclaimer appear in supporting
+documentation, and that the name Lucent Technologies or any of
+its entities not be used in advertising or publicity pertaining
+to distribution of the software without specific, written prior
+permission.
+
+LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
+INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
+IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
+SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
+IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
+ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
+THIS SOFTWARE.
+****************************************************************/
diff --git a/REGRESS b/REGRESS
new file mode 100755
index 000000000000..7d3ded69d536
--- /dev/null
+++ b/REGRESS
@@ -0,0 +1,35 @@
+#! /bin/sh
+
+case `uname` in
+CYGWIN) EXE=a.exe ;;
+*) EXE=a.out ;;
+esac
+
+if [ ! -f $EXE ]
+then
+ make || exit 1
+fi
+
+if [ -d testdir ]
+then
+ true # do nothing
+elif [ -f awktest.tar ]
+then
+ echo extracting testdir
+ tar -xpf awktest.tar
+else
+ echo $0: No testdir directory and no awktest.tar to extract it from! >&2
+ exit 1
+fi
+
+cd testdir
+pwd
+PATH=.:$PATH
+export PATH
+if (ulimit -c unlimited > /dev/null 2>&1)
+then
+ # Workaround broken default on MacOS X
+ ulimit -c unlimited
+fi
+
+REGRESS
diff --git a/awk.1 b/awk.1
index 6119613c1aae..18e99ad39496 100644
--- a/awk.1
+++ b/awk.1
@@ -7,7 +7,6 @@
.fi
.ft 1
..
-awk
.TH AWK 1
.CT 1 files prog_other
.SH NAME
@@ -36,7 +35,7 @@ awk \- pattern-directed scanning and processing language
scans each input
.I file
for lines that match any of a set of patterns specified literally in
-.IR prog
+.I prog
or in one or more files
specified as
.B \-f
@@ -53,7 +52,7 @@ The file name
.B \-
means the standard input.
Any
-.IR file
+.I file
of the form
.I var=value
is treated as an assignment, not a filename,
@@ -70,12 +69,12 @@ any number of
options may be present.
The
.B \-F
-.IR fs
+.I fs
option defines the input field separator to be the regular expression
-.IR fs.
+.IR fs .
.PP
An input line is normally made up of fields separated by white space,
-or by regular expression
+or by the regular expression
.BR FS .
The fields are denoted
.BR $1 ,
@@ -87,7 +86,7 @@ If
.BR FS
is null, the input line is split into one field per character.
.PP
-A pattern-action statement has the form
+A pattern-action statement has the form:
.IP
.IB pattern " { " action " }
.PP
@@ -101,7 +100,7 @@ An action is a sequence of statements.
A statement can be one of the following:
.PP
.EX
-.ta \w'\f(CWdelete array[expression]'u
+.ta \w'\f(CWdelete array[expression]\fR'u
.RS
.nf
.ft CW
@@ -145,7 +144,7 @@ The operators
are also available in expressions.
Variables may be scalars, array elements
(denoted
-.IB x [ i ] )
+.IB x [ i ] \fR)
or fields.
Variables are initialized to the null string.
Array subscripts may be any string,
@@ -161,11 +160,11 @@ The
.B print
statement prints its arguments on the standard output
(or on a file if
-.BI > file
+.BI > " file
or
-.BI >> file
+.BI >> " file
is present or on a pipe if
-.BI | cmd
+.BI | " cmd
is present), separated by the current output field separator,
and terminated by the output record separator.
.I file
@@ -176,9 +175,10 @@ identical string values in different statements denote
the same open file.
The
.B printf
-statement formats its expression list according to the format
+statement formats its expression list according to the
+.I format
(see
-.IR printf (3)) .
+.IR printf (3)).
The built-in function
.BI close( expr )
closes the file or pipe
@@ -189,13 +189,13 @@ flushes any buffered output for the file or pipe
.IR expr .
.PP
The mathematical functions
+.BR atan2 ,
+.BR cos ,
.BR exp ,
.BR log ,
-.BR sqrt ,
.BR sin ,
-.BR cos ,
and
-.BR atan2
+.B sqrt
are built in.
Other built-in functions:
.TF length
@@ -203,7 +203,8 @@ Other built-in functions:
.B length
the length of its argument
taken as a string,
-or of
+number of elements in an array for an array argument,
+or length of
.B $0
if no argument.
.TP
@@ -218,14 +219,18 @@ and returns the previous seed.
.B int
truncates to an integer value
.TP
-.BI substr( s , " m" , " n\fB)
+\fBsubstr(\fIs\fB, \fIm\fR [\fB, \fIn\^\fR]\fB)\fR
the
.IR n -character
substring of
.I s
that begins at position
-.IR m
+.I m
counted from 1.
+If no
+.IR m ,
+use the rest of the string
+.I
.TP
.BI index( s , " t" )
the position in
@@ -246,14 +251,14 @@ and
.B RLENGTH
are set to the position and length of the matched string.
.TP
-.BI split( s , " a" , " fs\fB)
+\fBsplit(\fIs\fB, \fIa \fR[\fB, \fIfs\^\fR]\fB)\fR
splits the string
.I s
into array elements
-.IB a [1] ,
-.IB a [2] ,
+.IB a [1] \fR,
+.IB a [2] \fR,
\&...,
-.IB a [ n ] ,
+.IB a [ n ] \fR,
and returns
.IR n .
The separation is done with the regular expression
@@ -266,7 +271,7 @@ is not given.
An empty string as field separator splits the string
into one array element per character.
.TP
-.BI sub( r , " t" , " s\fB)
+\fBsub(\fIr\fB, \fIt \fR[, \fIs\^\fR]\fB)
substitutes
.I t
for the first occurrence of the regular expression
@@ -279,7 +284,7 @@ is not given,
.B $0
is used.
.TP
-.B gsub
+\fBgsub(\fIr\fB, \fIt \fR[, \fIs\^\fR]\fB)
same as
.B sub
except that all occurrences of the regular expression
@@ -289,18 +294,28 @@ and
.B gsub
return the number of replacements.
.TP
-.BI sprintf( fmt , " expr" , " ...\fB )
+.BI sprintf( fmt , " expr" , " ...\fB)
the string resulting from formatting
.I expr ...
according to the
.IR printf (3)
format
-.I fmt
+.IR fmt .
.TP
.BI system( cmd )
executes
.I cmd
-and returns its exit status
+and returns its exit status. This will be \-1 upon error,
+.IR cmd 's
+exit status upon a normal exit,
+256 +
+.I sig
+upon death-by-signal, where
+.I sig
+is the number of the murdering signal,
+or 512 +
+.I sig
+if there was a core dump.
.TP
.BI tolower( str )
returns a copy of
@@ -321,7 +336,7 @@ sets
.B $0
to the next input record from the current input file;
.B getline
-.BI < file
+.BI < " file
sets
.B $0
to the next record from
@@ -359,7 +374,7 @@ Isolated regular expressions
in a pattern apply to the entire line.
Regular expressions may also occur in
relational expressions, using the operators
-.BR ~
+.B ~
and
.BR !~ .
.BI / re /
@@ -383,8 +398,12 @@ A relational expression is one of the following:
.br
.BI ( expr , expr,... ") in " array-name
.PP
-where a relop is any of the six relational operators in C,
-and a matchop is either
+where a
+.I relop
+is any of the six relational operators in C,
+and a
+.I matchop
+is either
.B ~
(matches)
or
@@ -405,57 +424,68 @@ and after the last.
and
.B END
do not combine with other patterns.
+They may appear multiple times in a program and execute
+in the order they are read by
+.IR awk .
.PP
Variable names with special meanings:
.TF FILENAME
.TP
+.B ARGC
+argument count, assignable.
+.TP
+.B ARGV
+argument array, assignable;
+non-null members are taken as filenames.
+.TP
.B CONVFMT
conversion format used when converting numbers
(default
-.BR "%.6g" )
+.BR "%.6g" ).
+.TP
+.B ENVIRON
+array of environment variables; subscripts are names.
+.TP
+.B FILENAME
+the name of the current input file.
+.TP
+.B FNR
+ordinal number of the current record in the current file.
.TP
.B FS
regular expression used to separate fields; also settable
by option
-.BI \-F fs.
+.BI \-F fs\fR.
.TP
.BR NF
-number of fields in the current record
+number of fields in the current record.
.TP
.B NR
-ordinal number of the current record
-.TP
-.B FNR
-ordinal number of the current record in the current file
-.TP
-.B FILENAME
-the name of the current input file
+ordinal number of the current record.
.TP
-.B RS
-input record separator (default newline)
+.B OFMT
+output format for numbers (default
+.BR "%.6g" ).
.TP
.B OFS
-output field separator (default blank)
+output field separator (default space).
.TP
.B ORS
-output record separator (default newline)
+output record separator (default newline).
.TP
-.B OFMT
-output format for numbers (default
-.BR "%.6g" )
-.TP
-.B SUBSEP
-separates multiple subscripts (default 034)
+.B RLENGTH
+the length of a string matched by
+.BR match .
.TP
-.B ARGC
-argument count, assignable
+.B RS
+input record separator (default newline).
.TP
-.B ARGV
-argument array, assignable;
-non-null members are taken as filenames
+.B RSTART
+the start position of a string matched by
+.BR match .
.TP
-.B ENVIRON
-array of environment variables; subscripts are names.
+.B SUBSEP
+separates multiple subscripts (default 034).
.PD
.PP
Functions may be defined (at the position of a pattern-action statement) thus:
@@ -486,7 +516,7 @@ BEGIN { FS = ",[ \et]*|[ \et]+" }
.EE
.ns
.IP
-Same, with input fields separated by comma and/or blanks and tabs.
+Same, with input fields separated by comma and/or spaces and tabs.
.PP
.EX
.nf
@@ -512,13 +542,13 @@ BEGIN { # Simulate echo(1)
.fi
.EE
.SH SEE ALSO
+.IR grep (1),
.IR lex (1),
.IR sed (1)
.br
A. V. Aho, B. W. Kernighan, P. J. Weinberger,
-.I
-The AWK Programming Language,
-Addison-Wesley, 1988. ISBN 0-201-07981-X
+.IR "The AWK Programming Language" ,
+Addison-Wesley, 1988. ISBN 0-201-07981-X.
.SH BUGS
There are no explicit conversions between numbers and strings.
To force an expression to be treated as a number add 0 to it;
@@ -527,3 +557,5 @@ to force it to be treated as a string concatenate
.br
The scope rules for variables in functions are a botch;
the syntax is worse.
+.br
+Only eight-bit characters sets are handled correctly.
diff --git a/awk.h b/awk.h
index a36cdb151e75..ddf246687969 100644
--- a/awk.h
+++ b/awk.h
@@ -81,7 +81,8 @@ typedef struct Cell {
char *nval; /* name, for variables only */
char *sval; /* string value */
Awkfloat fval; /* value as number */
- int tval; /* type info: STR|NUM|ARR|FCN|FLD|CON|DONTFREE */
+ int tval; /* type info: STR|NUM|ARR|FCN|FLD|CON|DONTFREE|CONVC|CONVO */
+ char *fmt; /* CONVFMT/OFMT value used to convert from number */
struct Cell *cnext; /* ptr to next if chained */
} Cell;
@@ -96,9 +97,14 @@ extern Array *symtab;
extern Cell *nrloc; /* NR */
extern Cell *fnrloc; /* FNR */
+extern Cell *fsloc; /* FS */
extern Cell *nfloc; /* NF */
+extern Cell *ofsloc; /* OFS */
+extern Cell *orsloc; /* ORS */
+extern Cell *rsloc; /* RS */
extern Cell *rstartloc; /* RSTART */
extern Cell *rlengthloc; /* RLENGTH */
+extern Cell *subseploc; /* SUBSEP */
/* Cell.tval values: */
#define NUM 01 /* number value is valid */
@@ -109,6 +115,8 @@ extern Cell *rlengthloc; /* RLENGTH */
#define FCN 040 /* this is a function name */
#define FLD 0100 /* this is a field $1, $2, ... */
#define REC 0200 /* this is $0 */
+#define CONVC 0400 /* string was converted from number via CONVFMT */
+#define CONVO 01000 /* string was converted from number via OFMT */
/* function types */
diff --git a/awkgram.y b/awkgram.y
index 5b5c461b3eed..e4abeeddcb6a 100644
--- a/awkgram.y
+++ b/awkgram.y
@@ -86,7 +86,7 @@ Node *arglist = 0; /* list of args for current function */
%left CAT
%left '+' '-'
%left '*' '/' '%'
-%left NOT UMINUS
+%left NOT UMINUS UPLUS
%right POWER
%right DECR INCR
%left INDIRECT
@@ -357,7 +357,7 @@ term:
| term '%' term { $$ = op2(MOD, $1, $3); }
| term POWER term { $$ = op2(POWER, $1, $3); }
| '-' term %prec UMINUS { $$ = op1(UMINUS, $2); }
- | '+' term %prec UMINUS { $$ = $2; }
+ | '+' term %prec UMINUS { $$ = op1(UPLUS, $2); }
| NOT term %prec UMINUS { $$ = op1(NOT, notnull($2)); }
| BLTIN '(' ')' { $$ = op2(BLTIN, itonp($1), rectonode()); }
| BLTIN '(' patlist ')' { $$ = op2(BLTIN, itonp($1), $3); }
diff --git a/b.c b/b.c
index 5ccb4b1e5d0f..37ea0a5bb2a7 100644
--- a/b.c
+++ b/b.c
@@ -27,6 +27,7 @@ THIS SOFTWARE.
#define DEBUG
#include <ctype.h>
+#include <limits.h>
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
@@ -65,6 +66,11 @@ int rlxval;
static uschar *rlxstr;
static uschar *prestr; /* current position in current re */
static uschar *lastre; /* origin of last re */
+static uschar *lastatom; /* origin of last Atom */
+static uschar *starttok;
+static uschar *basestr; /* starts with original, replaced during
+ repetition processing */
+static uschar *firstbasestr;
static int setcnt;
static int poscnt;
@@ -82,11 +88,11 @@ fa *makedfa(const char *s, int anchor) /* returns dfa for reg expr s */
fa *pfa;
static int now = 1;
- if (setvec == NULL) { /* first time through any RE */
+ if (setvec == 0) { /* first time through any RE */
maxsetvec = MAXLIN;
setvec = (int *) malloc(maxsetvec * sizeof(int));
tmpset = (int *) malloc(maxsetvec * sizeof(int));
- if (setvec == NULL || tmpset == NULL)
+ if (setvec == 0 || tmpset == 0)
overflo("out of space initializing makedfa");
}
@@ -124,6 +130,8 @@ fa *mkdfa(const char *s, int anchor) /* does the real work of making a dfa */
Node *p, *p1;
fa *f;
+ firstbasestr = (uschar *) s;
+ basestr = firstbasestr;
p = reparse(s);
p1 = op2(CAT, op2(STAR, op2(ALL, NIL, NIL), NIL), p);
/* put ALL STAR in front of reg. exp. */
@@ -137,7 +145,7 @@ fa *mkdfa(const char *s, int anchor) /* does the real work of making a dfa */
f->accept = poscnt-1; /* penter has computed number of positions in re */
cfoll(f, p1); /* set up follow sets */
freetr(p1);
- if ((f->posns[0] = (int *) calloc(*(f->re[0].lfollow), sizeof(int))) == NULL)
+ if ((f->posns[0] = (int *) calloc(1, *(f->re[0].lfollow)*sizeof(int))) == NULL)
overflo("out of space in makedfa");
if ((f->posns[1] = (int *) calloc(1, sizeof(int))) == NULL)
overflo("out of space in makedfa");
@@ -145,6 +153,10 @@ fa *mkdfa(const char *s, int anchor) /* does the real work of making a dfa */
f->initstat = makeinit(f, anchor);
f->anchor = anchor;
f->restr = (uschar *) tostring(s);
+ if (firstbasestr != basestr) {
+ if (basestr)
+ xfree(basestr);
+ }
return f;
}
@@ -157,7 +169,7 @@ int makeinit(fa *f, int anchor)
f->reset = 0;
k = *(f->re[0].lfollow);
xfree(f->posns[2]);
- if ((f->posns[2] = (int *) calloc(k+1, sizeof(int))) == NULL)
+ if ((f->posns[2] = (int *) calloc(1, (k+1)*sizeof(int))) == NULL)
overflo("out of space in makeinit");
for (i=0; i <= k; i++) {
(f->posns[2])[i] = (f->re[0].lfollow)[i];
@@ -290,11 +302,11 @@ char *cclenter(const char *argp) /* add a character class */
int i, c, c2;
uschar *p = (uschar *) argp;
uschar *op, *bp;
- static uschar *buf = NULL;
+ static uschar *buf = 0;
static int bufsz = 100;
op = p;
- if (buf == NULL && (buf = (uschar *) malloc(bufsz)) == NULL)
+ if (buf == 0 && (buf = (uschar *) malloc(bufsz)) == NULL)
FATAL("out of space for character class [%.10s...] 1", p);
bp = buf;
for (i = 0; (c = *p++) != 0; ) {
@@ -350,14 +362,14 @@ void cfoll(fa *f, Node *v) /* enter follow set of each leaf of vertex v into lfo
maxsetvec *= 4;
setvec = (int *) realloc(setvec, maxsetvec * sizeof(int));
tmpset = (int *) realloc(tmpset, maxsetvec * sizeof(int));
- if (setvec == NULL || tmpset == NULL)
+ if (setvec == 0 || tmpset == 0)
overflo("out of space in cfoll()");
}
for (i = 0; i <= f->accept; i++)
setvec[i] = 0;
setcnt = 0;
follow(v); /* computes setvec and setcnt */
- if ((p = (int *) calloc(setcnt+1, sizeof(int))) == NULL)
+ if ((p = (int *) calloc(1, (setcnt+1)*sizeof(int))) == NULL)
overflo("out of space building follow set");
f->re[info(v)].lfollow = p;
*p = setcnt;
@@ -391,7 +403,7 @@ int first(Node *p) /* collects initially active leaves of p into setvec */
maxsetvec *= 4;
setvec = (int *) realloc(setvec, maxsetvec * sizeof(int));
tmpset = (int *) realloc(tmpset, maxsetvec * sizeof(int));
- if (setvec == NULL || tmpset == NULL)
+ if (setvec == 0 || tmpset == 0)
overflo("out of space in first()");
}
if (type(p) == EMPTYRE) {
@@ -531,7 +543,7 @@ int pmatch(fa *f, const char *p0) /* longest match, for sub */
for (i = 2; i <= f->curstat; i++)
xfree(f->posns[i]);
k = *f->posns[0];
- if ((f->posns[2] = (int *) calloc(k+1, sizeof(int))) == NULL)
+ if ((f->posns[2] = (int *) calloc(1, (k+1)*sizeof(int))) == NULL)
overflo("out of space in pmatch");
for (i = 0; i <= k; i++)
(f->posns[2])[i] = (f->posns[0])[i];
@@ -588,7 +600,7 @@ int nematch(fa *f, const char *p0) /* non-empty match, for sub */
for (i = 2; i <= f->curstat; i++)
xfree(f->posns[i]);
k = *f->posns[0];
- if ((f->posns[2] = (int *) calloc(k+1, sizeof(int))) == NULL)
+ if ((f->posns[2] = (int *) calloc(1, (k+1)*sizeof(int))) == NULL)
overflo("out of state space");
for (i = 0; i <= k; i++)
(f->posns[2])[i] = (f->posns[0])[i];
@@ -628,9 +640,11 @@ Node *regexp(void) /* top-level parse of reg expr */
Node *primary(void)
{
Node *np;
+ int savelastatom;
switch (rtok) {
case CHAR:
+ lastatom = starttok;
np = op2(CHAR, NIL, itonp(rlxval));
rtok = relex();
return (unary(np));
@@ -639,16 +653,19 @@ Node *primary(void)
return (unary(op2(ALL, NIL, NIL)));
case EMPTYRE:
rtok = relex();
- return (unary(op2(ALL, NIL, NIL)));
+ return (unary(op2(EMPTYRE, NIL, NIL)));
case DOT:
+ lastatom = starttok;
rtok = relex();
return (unary(op2(DOT, NIL, NIL)));
case CCL:
np = op2(CCL, NIL, (Node*) cclenter((char *) rlxstr));
+ lastatom = starttok;
rtok = relex();
return (unary(np));
case NCCL:
np = op2(NCCL, NIL, (Node *) cclenter((char *) rlxstr));
+ lastatom = starttok;
rtok = relex();
return (unary(np));
case '^':
@@ -658,6 +675,8 @@ Node *primary(void)
rtok = relex();
return (unary(op2(CHAR, NIL, NIL)));
case '(':
+ lastatom = starttok;
+ savelastatom = starttok - basestr; /* Retain over recursion */
rtok = relex();
if (rtok == ')') { /* special pleading for () */
rtok = relex();
@@ -665,6 +684,7 @@ Node *primary(void)
}
np = regexp();
if (rtok == ')') {
+ lastatom = basestr + savelastatom; /* Restore */
rtok = relex();
return (unary(np));
}
@@ -679,8 +699,12 @@ Node *primary(void)
Node *concat(Node *np)
{
switch (rtok) {
- case CHAR: case DOT: case ALL: case EMPTYRE: case CCL: case NCCL: case '$': case '(':
+ case CHAR: case DOT: case ALL: case CCL: case NCCL: case '$': case '(':
return (concat(op2(CAT, np, primary())));
+ case EMPTYRE:
+ rtok = relex();
+ return (concat(op2(CAT, op2(CCL, NIL, (Node *) tostring("")),
+ primary())));
}
return (np);
}
@@ -749,7 +773,7 @@ struct charclass {
{ "alnum", 5, isalnum },
{ "alpha", 5, isalpha },
#ifndef HAS_ISBLANK
- { "blank", 5, isspace }, /* was isblank */
+ { "blank", 5, xisblank },
#else
{ "blank", 5, isblank },
#endif
@@ -765,16 +789,132 @@ struct charclass {
{ NULL, 0, NULL },
};
+#define REPEAT_SIMPLE 0
+#define REPEAT_PLUS_APPENDED 1
+#define REPEAT_WITH_Q 2
+#define REPEAT_ZERO 3
+
+static int
+replace_repeat(const uschar *reptok, int reptoklen, const uschar *atom,
+ int atomlen, int firstnum, int secondnum, int special_case)
+{
+ int i, j;
+ uschar *buf = 0;
+ int ret = 1;
+ int init_q = (firstnum==0); /* first added char will be ? */
+ int n_q_reps = secondnum-firstnum; /* m>n, so reduce until {1,m-n} left */
+ int prefix_length = reptok - basestr; /* prefix includes first rep */
+ int suffix_length = strlen((char *) reptok) - reptoklen; /* string after rep specifier */
+ int size = prefix_length + suffix_length;
+
+ if (firstnum > 1) { /* add room for reps 2 through firstnum */
+ size += atomlen*(firstnum-1);
+ }
+
+ /* Adjust size of buffer for special cases */
+ if (special_case == REPEAT_PLUS_APPENDED) {
+ size++; /* for the final + */
+ } else if (special_case == REPEAT_WITH_Q) {
+ size += init_q + (atomlen+1)* n_q_reps;
+ } else if (special_case == REPEAT_ZERO) {
+ size += 2; /* just a null ERE: () */
+ }
+ if ((buf = (uschar *) malloc(size+1)) == NULL)
+ FATAL("out of space in reg expr %.10s..", lastre);
+ memcpy(buf, basestr, prefix_length); /* copy prefix */
+ j = prefix_length;
+ if (special_case == REPEAT_ZERO) {
+ j -= atomlen;
+ buf[j++] = '(';
+ buf[j++] = ')';
+ }
+ for (i=1; i < firstnum; i++) { /* copy x reps */
+ memcpy(&buf[j], atom, atomlen);
+ j += atomlen;
+ }
+ if (special_case == REPEAT_PLUS_APPENDED) {
+ buf[j++] = '+';
+ } else if (special_case == REPEAT_WITH_Q) {
+ if (init_q) buf[j++] = '?';
+ for (i=0; i < n_q_reps; i++) { /* copy x? reps */
+ memcpy(&buf[j], atom, atomlen);
+ j += atomlen;
+ buf[j++] = '?';
+ }
+ }
+ memcpy(&buf[j], reptok+reptoklen, suffix_length);
+ if (special_case == REPEAT_ZERO) {
+ buf[j+suffix_length] = '\0';
+ } else {
+ buf[size] = '\0';
+ }
+ /* free old basestr */
+ if (firstbasestr != basestr) {
+ if (basestr)
+ xfree(basestr);
+ }
+ basestr = buf;
+ prestr = buf + prefix_length;
+ if (special_case == REPEAT_ZERO) {
+ prestr -= atomlen;
+ ret++;
+ }
+ return ret;
+}
+
+static int repeat(const uschar *reptok, int reptoklen, const uschar *atom,
+ int atomlen, int firstnum, int secondnum)
+{
+ /*
+ In general, the repetition specifier or "bound" is replaced here
+ by an equivalent ERE string, repeating the immediately previous atom
+ and appending ? and + as needed. Note that the first copy of the
+ atom is left in place, except in the special_case of a zero-repeat
+ (i.e., {0}).
+ */
+ if (secondnum < 0) { /* means {n,} -> repeat n-1 times followed by PLUS */
+ if (firstnum < 2) {
+ /* 0 or 1: should be handled before you get here */
+ FATAL("internal error");
+ } else {
+ return replace_repeat(reptok, reptoklen, atom, atomlen,
+ firstnum, secondnum, REPEAT_PLUS_APPENDED);
+ }
+ } else if (firstnum == secondnum) { /* {n} or {n,n} -> simply repeat n-1 times */
+ if (firstnum == 0) { /* {0} or {0,0} */
+ /* This case is unusual because the resulting
+ replacement string might actually be SMALLER than
+ the original ERE */
+ return replace_repeat(reptok, reptoklen, atom, atomlen,
+ firstnum, secondnum, REPEAT_ZERO);
+ } else { /* (firstnum >= 1) */
+ return replace_repeat(reptok, reptoklen, atom, atomlen,
+ firstnum, secondnum, REPEAT_SIMPLE);
+ }
+ } else if (firstnum < secondnum) { /* {n,m} -> repeat n-1 times then alternate */
+ /* x{n,m} => xx...x{1, m-n+1} => xx...x?x?x?..x? */
+ return replace_repeat(reptok, reptoklen, atom, atomlen,
+ firstnum, secondnum, REPEAT_WITH_Q);
+ } else { /* Error - shouldn't be here (n>m) */
+ FATAL("internal error");
+ }
+ return 0;
+}
int relex(void) /* lexical analyzer for reparse */
{
int c, n;
int cflag;
- static uschar *buf = NULL;
+ static uschar *buf = 0;
static int bufsz = 100;
uschar *bp;
struct charclass *cc;
int i;
+ int num, m, commafound, digitfound;
+ const uschar *startreptok;
+
+rescan:
+ starttok = prestr;
switch (c = *prestr++) {
case '|': return OR;
@@ -795,7 +935,7 @@ int relex(void) /* lexical analyzer for reparse */
rlxval = c;
return CHAR;
case '[':
- if (buf == NULL && (buf = (uschar *) malloc(bufsz)) == NULL)
+ if (buf == 0 && (buf = (uschar *) malloc(bufsz)) == NULL)
FATAL("out of space in reg expr %.10s..", lastre);
bp = buf;
if (*prestr == '^') {
@@ -823,7 +963,15 @@ int relex(void) /* lexical analyzer for reparse */
if (cc->cc_name != NULL && prestr[1 + cc->cc_namelen] == ':' &&
prestr[2 + cc->cc_namelen] == ']') {
prestr += cc->cc_namelen + 3;
- for (i = 0; i < NCHARS; i++) {
+ /*
+ * BUG: We begin at 1, instead of 0, since we
+ * would otherwise prematurely terminate the
+ * string for classes like [[:cntrl:]]. This
+ * means that we can't match the NUL character,
+ * not without first adapting the entire
+ * program to track each string's length.
+ */
+ for (i = 1; i <= UCHAR_MAX; i++) {
if (!adjbuf((char **) &buf, &bufsz, bp-buf+1, 100, (char **) &bp, "relex2"))
FATAL("out of space for reg expr %.10s...", lastre);
if (cc->cc_func(i)) {
@@ -833,6 +981,40 @@ int relex(void) /* lexical analyzer for reparse */
}
} else
*bp++ = c;
+ } else if (c == '[' && *prestr == '.') {
+ char collate_char;
+ prestr++;
+ collate_char = *prestr++;
+ if (*prestr == '.' && prestr[1] == ']') {
+ prestr += 2;
+ /* Found it: map via locale TBD: for
+ now, simply return this char. This
+ is sufficient to pass conformance
+ test awk.ex 156
+ */
+ if (*prestr == ']') {
+ prestr++;
+ rlxval = collate_char;
+ return CHAR;
+ }
+ }
+ } else if (c == '[' && *prestr == '=') {
+ char equiv_char;
+ prestr++;
+ equiv_char = *prestr++;
+ if (*prestr == '=' && prestr[1] == ']') {
+ prestr += 2;
+ /* Found it: map via locale TBD: for now
+ simply return this char. This is
+ sufficient to pass conformance test
+ awk.ex 156
+ */
+ if (*prestr == ']') {
+ prestr++;
+ rlxval = equiv_char;
+ return CHAR;
+ }
+ }
} else if (c == '\0') {
FATAL("nonterminated character class %.20s", lastre);
} else if (bp == buf) { /* 1st char is special */
@@ -847,6 +1029,75 @@ int relex(void) /* lexical analyzer for reparse */
} else
*bp++ = c;
}
+ break;
+ case '{':
+ if (isdigit(*(prestr))) {
+ num = 0; /* Process as a repetition */
+ n = -1; m = -1;
+ commafound = 0;
+ digitfound = 0;
+ startreptok = prestr-1;
+ /* Remember start of previous atom here ? */
+ } else { /* just a { char, not a repetition */
+ rlxval = c;
+ return CHAR;
+ }
+ for (; ; ) {
+ if ((c = *prestr++) == '}') {
+ if (commafound) {
+ if (digitfound) { /* {n,m} */
+ m = num;
+ if (m<n)
+ FATAL("illegal repetition expression: class %.20s",
+ lastre);
+ if ((n==0) && (m==1)) {
+ return QUEST;
+ }
+ } else { /* {n,} */
+ if (n==0) return STAR;
+ if (n==1) return PLUS;
+ }
+ } else {
+ if (digitfound) { /* {n} same as {n,n} */
+ n = num;
+ m = num;
+ } else { /* {} */
+ FATAL("illegal repetition expression: class %.20s",
+ lastre);
+ }
+ }
+ if (repeat(starttok, prestr-starttok, lastatom,
+ startreptok - lastatom, n, m) > 0) {
+ if ((n==0) && (m==0)) {
+ return EMPTYRE;
+ }
+ /* must rescan input for next token */
+ goto rescan;
+ }
+ /* Failed to replace: eat up {...} characters
+ and treat like just PLUS */
+ return PLUS;
+ } else if (c == '\0') {
+ FATAL("nonterminated character class %.20s",
+ lastre);
+ } else if (isdigit(c)) {
+ num = 10 * num + c - '0';
+ digitfound = 1;
+ } else if (c == ',') {
+ if (commafound)
+ FATAL("illegal repetition expression: class %.20s",
+ lastre);
+ /* looking for {n,} or {n,m} */
+ commafound = 1;
+ n = num;
+ digitfound = 0; /* reset */
+ num = 0;
+ } else {
+ FATAL("illegal repetition expression: class %.20s",
+ lastre);
+ }
+ }
+ break;
}
}
@@ -860,7 +1111,7 @@ int cgoto(fa *f, int s, int c)
maxsetvec *= 4;
setvec = (int *) realloc(setvec, maxsetvec * sizeof(int));
tmpset = (int *) realloc(tmpset, maxsetvec * sizeof(int));
- if (setvec == NULL || tmpset == NULL)
+ if (setvec == 0 || tmpset == 0)
overflo("out of space in cgoto()");
}
for (i = 0; i <= f->accept; i++)
@@ -882,7 +1133,7 @@ int cgoto(fa *f, int s, int c)
maxsetvec *= 4;
setvec = (int *) realloc(setvec, maxsetvec * sizeof(int));
tmpset = (int *) realloc(tmpset, maxsetvec * sizeof(int));
- if (setvec == NULL || tmpset == NULL)
+ if (setvec == 0 || tmpset == 0)
overflo("cgoto overflow");
}
if (setvec[q[j]] == 0) {
@@ -925,7 +1176,7 @@ int cgoto(fa *f, int s, int c)
for (i = 0; i < NCHARS; i++)
f->gototab[f->curstat][i] = 0;
xfree(f->posns[f->curstat]);
- if ((p = (int *) calloc(setcnt+1, sizeof(int))) == NULL)
+ if ((p = (int *) calloc(1, (setcnt+1)*sizeof(int))) == NULL)
overflo("out of space in cgoto");
f->posns[f->curstat] = p;
diff --git a/bugs-fixed/README b/bugs-fixed/README
new file mode 100644
index 000000000000..2f27c1039873
--- /dev/null
+++ b/bugs-fixed/README
@@ -0,0 +1,57 @@
+List of bugs fixed.
+
+1. ofs-rebuild: OFS value used to rebuild the record was incorrect.
+Fixed August 19, 2014. Revised fix August 2018.
+
+2. system-status: Instead of a floating-point division by 256, use
+the wait(2) macros to create a reasonable exit status. Fixed March 12, 2016.
+
+3. space: Use provided xisblank() function instead of ispace() for
+matching [[:blank:]].
+
+4. a-format: Add POSIX standard %a and %A to supported formats. Check
+at runtime that this format is available.
+
+5. decr-NF: Decrementing NF did not change $0. This is a decades-old
+bug. There are interactions with the old and new value of OFS as well.
+Most of the fix came from the NetBSD awk.
+
+6. string-conv: String conversions of scalars were sticky. Once a
+conversion to string happened, even with OFMT, that value was used until
+a new numeric value was assigned, even if OFMT differed from CONVFMT,
+and also if CONVFMT changed.
+
+7. unary-plus: Unary plus on a string constant returned the string.
+Instead, it should convert the value to numeric and give that value.
+
+8. concat-assign-same: Concatenation previously evaluated both sides of the
+expression before doing its work, which, since assign() evaluates to the cell
+being assigned to, meant that expressions like "print (a = 1) (a = 2)" would
+print "22" rather than "12".
+
+9. missing-precision: When using the format string "%*s", the precision
+argument was used without checking if it was present first.
+
+10. missing-precision: When using the format string "%*s", the precision
+argument was used without checking if it was present first.
+
+11. fmt-overflow: The buffer used for OFMT/CONVFMT conversions was written
+to with sprintf(), which meant that some conversions could write past the
+end.
+
+12. numeric-subsep, numeric-fs, numeric-output-seps, numerics-rs: If SUBSEP,
+FS, RS, OFS, or ORS were set to a numeric value, then their string values
+wouldn't always be generated before being needed.
+
+13. subsep-overflow: The length of SUBSEP needs to be rechecked after
+calling execute(), in case SUBSEP itself has been changed.
+
+14. split-fs-from-array: If the third argument to split() comes from the
+array passed as the second argument, then split() would previously read
+from the freed memory and possibly produce incorrect results (depending
+on the system's malloc()/free() behaviour.)
+
+15. getline-numeric: The `getline xx < file' syntax did not check if
+values were numeric, in discordance from POSIX. Test case adapted from
+one posted by Ben Bacarisse <ben.usenet@bsb.me.uk> in comp.lang.awk,
+January 2019.
diff --git a/bugs-fixed/a-format.awk b/bugs-fixed/a-format.awk
new file mode 100644
index 000000000000..5b7929ee3eea
--- /dev/null
+++ b/bugs-fixed/a-format.awk
@@ -0,0 +1,3 @@
+BEGIN {
+ printf("%a\n", 42)
+}
diff --git a/bugs-fixed/a-format.bad b/bugs-fixed/a-format.bad
new file mode 100644
index 000000000000..1281825b1111
--- /dev/null
+++ b/bugs-fixed/a-format.bad
@@ -0,0 +1,3 @@
+nawk: weird printf conversion %a
+ source line number 2
+%a42
diff --git a/bugs-fixed/a-format.ok b/bugs-fixed/a-format.ok
new file mode 100644
index 000000000000..e421e2d01ba6
--- /dev/null
+++ b/bugs-fixed/a-format.ok
@@ -0,0 +1 @@
+0x1.5p+5
diff --git a/bugs-fixed/concat-assign-same.awk b/bugs-fixed/concat-assign-same.awk
new file mode 100644
index 000000000000..ed19f35ca835
--- /dev/null
+++ b/bugs-fixed/concat-assign-same.awk
@@ -0,0 +1,4 @@
+BEGIN {
+ print (a = 1) (a = 2) (a = 3) (a = 4) (a = 5);
+ print (a = 1), (a = 2), (a = 3), (a = 4), (a = 5);
+}
diff --git a/bugs-fixed/concat-assign-same.bad b/bugs-fixed/concat-assign-same.bad
new file mode 100644
index 000000000000..294725b28a97
--- /dev/null
+++ b/bugs-fixed/concat-assign-same.bad
@@ -0,0 +1,2 @@
+22345
+1 2 3 4 5
diff --git a/bugs-fixed/concat-assign-same.ok b/bugs-fixed/concat-assign-same.ok
new file mode 100644
index 000000000000..447505259d02
--- /dev/null
+++ b/bugs-fixed/concat-assign-same.ok
@@ -0,0 +1,2 @@
+12345
+1 2 3 4 5
diff --git a/bugs-fixed/decr-NF.awk b/bugs-fixed/decr-NF.awk
new file mode 100644
index 000000000000..7474991d196e
--- /dev/null
+++ b/bugs-fixed/decr-NF.awk
@@ -0,0 +1,11 @@
+BEGIN {
+ $0 = "a b c d e f"
+ print NF
+ OFS = ":"
+ NF--
+ print $0
+ print NF
+ NF++
+ print $0
+ print NF
+}
diff --git a/bugs-fixed/decr-NF.bad b/bugs-fixed/decr-NF.bad
new file mode 100644
index 000000000000..b634e065954c
--- /dev/null
+++ b/bugs-fixed/decr-NF.bad
@@ -0,0 +1,5 @@
+6
+a b c d e f
+5
+a b c d e f
+6
diff --git a/bugs-fixed/decr-NF.ok b/bugs-fixed/decr-NF.ok
new file mode 100644
index 000000000000..3359cf2312d1
--- /dev/null
+++ b/bugs-fixed/decr-NF.ok
@@ -0,0 +1,5 @@
+6
+a:b:c:d:e
+5
+a:b:c:d:e:
+6
diff --git a/bugs-fixed/fmt-overflow.awk b/bugs-fixed/fmt-overflow.awk
new file mode 100644
index 000000000000..bf5877e4abac
--- /dev/null
+++ b/bugs-fixed/fmt-overflow.awk
@@ -0,0 +1 @@
+BEGIN { OFMT = "%.1000f"; print 1.25; }
diff --git a/bugs-fixed/fmt-overflow.ok b/bugs-fixed/fmt-overflow.ok
new file mode 100644
index 000000000000..5f7449e68073
--- /dev/null
+++ b/bugs-fixed/fmt-overflow.ok
@@ -0,0 +1 @@
+1.2500000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
diff --git a/bugs-fixed/fs-overflow.awk b/bugs-fixed/fs-overflow.awk
new file mode 100644
index 000000000000..be10f5a46f0d
--- /dev/null
+++ b/bugs-fixed/fs-overflow.awk
@@ -0,0 +1,13 @@
+function foo() {
+ a = "";
+ for (i = 0; i < 10000; i++) {
+ a = a "c";
+ }
+ return a;
+}
+
+BEGIN {
+ FS = foo();
+ $0="foo";
+ print $1;
+}
diff --git a/bugs-fixed/getline-numeric.awk b/bugs-fixed/getline-numeric.awk
new file mode 100644
index 000000000000..5571a9589a3d
--- /dev/null
+++ b/bugs-fixed/getline-numeric.awk
@@ -0,0 +1,6 @@
+{
+ print $0, ($0 <= 50 ? "<=" : ">"), 50
+ getline dd < ARGV[1]
+ print dd, (dd <= 50 ? "<=" : ">"), 50
+ if (dd == $0) print "same"
+}
diff --git a/bugs-fixed/getline-numeric.bad b/bugs-fixed/getline-numeric.bad
new file mode 100644
index 000000000000..d911c774fa9a
--- /dev/null
+++ b/bugs-fixed/getline-numeric.bad
@@ -0,0 +1,3 @@
+120 > 50
+120 <= 50
+same
diff --git a/bugs-fixed/getline-numeric.in b/bugs-fixed/getline-numeric.in
new file mode 100644
index 000000000000..52bd8e43afb0
--- /dev/null
+++ b/bugs-fixed/getline-numeric.in
@@ -0,0 +1 @@
+120
diff --git a/bugs-fixed/getline-numeric.ok b/bugs-fixed/getline-numeric.ok
new file mode 100644
index 000000000000..f7efd3db506f
--- /dev/null
+++ b/bugs-fixed/getline-numeric.ok
@@ -0,0 +1,3 @@
+120 > 50
+120 > 50
+same
diff --git a/bugs-fixed/missing-precision.awk b/bugs-fixed/missing-precision.awk
new file mode 100644
index 000000000000..4e7a74b2c964
--- /dev/null
+++ b/bugs-fixed/missing-precision.awk
@@ -0,0 +1 @@
+BEGIN { printf("%*s"); }
diff --git a/bugs-fixed/missing-precision.ok b/bugs-fixed/missing-precision.ok
new file mode 100644
index 000000000000..608b4fa48666
--- /dev/null
+++ b/bugs-fixed/missing-precision.ok
@@ -0,0 +1,2 @@
+./a.out: not enough args in printf(%*s)
+ source line number 1
diff --git a/bugs-fixed/negative-nf.awk b/bugs-fixed/negative-nf.awk
new file mode 100644
index 000000000000..6caeee4602b5
--- /dev/null
+++ b/bugs-fixed/negative-nf.awk
@@ -0,0 +1 @@
+BEGIN { NF = -5; }
diff --git a/bugs-fixed/negative-nf.ok b/bugs-fixed/negative-nf.ok
new file mode 100644
index 000000000000..71c860468cc0
--- /dev/null
+++ b/bugs-fixed/negative-nf.ok
@@ -0,0 +1,2 @@
+./a.out: cannot set NF to a negative value
+ source line number 1
diff --git a/bugs-fixed/nf-self-assign.awk b/bugs-fixed/nf-self-assign.awk
new file mode 100644
index 000000000000..6ae29eef916d
--- /dev/null
+++ b/bugs-fixed/nf-self-assign.awk
@@ -0,0 +1,6 @@
+BEGIN {
+ $0="a b c";
+ OFS=",";
+ NF = NF;
+ print;
+}
diff --git a/bugs-fixed/nf-self-assign.bad b/bugs-fixed/nf-self-assign.bad
new file mode 100644
index 000000000000..3774da60e546
--- /dev/null
+++ b/bugs-fixed/nf-self-assign.bad
@@ -0,0 +1 @@
+a b c
diff --git a/bugs-fixed/nf-self-assign.ok b/bugs-fixed/nf-self-assign.ok
new file mode 100644
index 000000000000..b2ffb02521e6
--- /dev/null
+++ b/bugs-fixed/nf-self-assign.ok
@@ -0,0 +1 @@
+a,b,c
diff --git a/bugs-fixed/numeric-fs.awk b/bugs-fixed/numeric-fs.awk
new file mode 100644
index 000000000000..01e438d4aa28
--- /dev/null
+++ b/bugs-fixed/numeric-fs.awk
@@ -0,0 +1,5 @@
+BEGIN {
+ FS = 0; split("20202", a); print a[1];
+ FS = 1; $0="31313"; print $1;
+ FS = 2; "echo 42424" | getline; print $1;
+}
diff --git a/bugs-fixed/numeric-fs.ok b/bugs-fixed/numeric-fs.ok
new file mode 100644
index 000000000000..dcf37cd5e262
--- /dev/null
+++ b/bugs-fixed/numeric-fs.ok
@@ -0,0 +1,3 @@
+2
+3
+4
diff --git a/bugs-fixed/numeric-output-seps.awk b/bugs-fixed/numeric-output-seps.awk
new file mode 100644
index 000000000000..daa0f72aa6ff
--- /dev/null
+++ b/bugs-fixed/numeric-output-seps.awk
@@ -0,0 +1,8 @@
+BEGIN {
+ $0 = "a b c";
+ OFS = 1;
+ ORS = 2;
+ NF = 2;
+ print;
+ print "d", "e";
+}
diff --git a/bugs-fixed/numeric-output-seps.bad b/bugs-fixed/numeric-output-seps.bad
new file mode 100644
index 000000000000..95310f78a7f3
--- /dev/null
+++ b/bugs-fixed/numeric-output-seps.bad
@@ -0,0 +1,2 @@
+a b
+d e
diff --git a/bugs-fixed/numeric-output-seps.ok b/bugs-fixed/numeric-output-seps.ok
new file mode 100644
index 000000000000..de6b2026e539
--- /dev/null
+++ b/bugs-fixed/numeric-output-seps.ok
@@ -0,0 +1 @@
+a1b2d1e2 \ No newline at end of file
diff --git a/bugs-fixed/numeric-rs.awk b/bugs-fixed/numeric-rs.awk
new file mode 100644
index 000000000000..cc7a0a0c08c2
--- /dev/null
+++ b/bugs-fixed/numeric-rs.awk
@@ -0,0 +1,6 @@
+BEGIN {
+ RS = 1;
+ while ("echo a1b1c1d" | getline > 0) {
+ print $1;
+ }
+}
diff --git a/bugs-fixed/numeric-rs.bad b/bugs-fixed/numeric-rs.bad
new file mode 100644
index 000000000000..2027bc6f27c9
--- /dev/null
+++ b/bugs-fixed/numeric-rs.bad
@@ -0,0 +1 @@
+a1b1c1d
diff --git a/bugs-fixed/numeric-rs.ok b/bugs-fixed/numeric-rs.ok
new file mode 100644
index 000000000000..d68dd4031d2a
--- /dev/null
+++ b/bugs-fixed/numeric-rs.ok
@@ -0,0 +1,4 @@
+a
+b
+c
+d
diff --git a/bugs-fixed/numeric-subsep.awk b/bugs-fixed/numeric-subsep.awk
new file mode 100644
index 000000000000..1252e4a99607
--- /dev/null
+++ b/bugs-fixed/numeric-subsep.awk
@@ -0,0 +1,5 @@
+BEGIN {
+ SUBSEP = 123.456;
+ a["hello", "world"] = "foo";
+ print a["hello" SUBSEP "world"];
+}
diff --git a/bugs-fixed/numeric-subsep.bad b/bugs-fixed/numeric-subsep.bad
new file mode 100644
index 000000000000..8b137891791f
--- /dev/null
+++ b/bugs-fixed/numeric-subsep.bad
@@ -0,0 +1 @@
+
diff --git a/bugs-fixed/numeric-subsep.ok b/bugs-fixed/numeric-subsep.ok
new file mode 100644
index 000000000000..257cc5642cb1
--- /dev/null
+++ b/bugs-fixed/numeric-subsep.ok
@@ -0,0 +1 @@
+foo
diff --git a/bugs-fixed/ofs-rebuild.awk b/bugs-fixed/ofs-rebuild.awk
new file mode 100644
index 000000000000..dd2700031524
--- /dev/null
+++ b/bugs-fixed/ofs-rebuild.awk
@@ -0,0 +1,17 @@
+# The bug here is that nawk should use the value of OFS that
+# was current when $0 became invalid to rebuild the record.
+
+BEGIN {
+ OFS = ":"
+ $0 = "a b c d e f g"
+ $3 = "3333"
+ # Conceptually, $0 should now be "a:b:3333:d:e:f:g"
+
+ # Change OFS after (conceptually) rebuilding the record
+ OFS = "<>"
+
+ # Unmodifed nawk prints "a<>b<>3333<>d<>e<>f<>g" because
+ # it delays rebuilding $0 until it's needed, and then it uses
+ # the current value of OFS. Oops.
+ print
+}
diff --git a/bugs-fixed/ofs-rebuild.bad b/bugs-fixed/ofs-rebuild.bad
new file mode 100644
index 000000000000..7570811e2c16
--- /dev/null
+++ b/bugs-fixed/ofs-rebuild.bad
@@ -0,0 +1 @@
+a<>b<>3333<>d<>e<>f<>g
diff --git a/bugs-fixed/ofs-rebuild.ok b/bugs-fixed/ofs-rebuild.ok
new file mode 100644
index 000000000000..26892181f91b
--- /dev/null
+++ b/bugs-fixed/ofs-rebuild.ok
@@ -0,0 +1 @@
+a:b:3333:d:e:f:g
diff --git a/bugs-fixed/space.awk b/bugs-fixed/space.awk
new file mode 100644
index 000000000000..6aa87d2e6259
--- /dev/null
+++ b/bugs-fixed/space.awk
@@ -0,0 +1,22 @@
+BEGIN {
+ c[" "] = "\" \""
+ c["\a"] = "\\a"
+ c["\b"] = "\\b"
+ c["\f"] = "\\f"
+ c["\n"] = "\\n"
+ c["\r"] = "\\r"
+ c["\t"] = "\\t"
+ c["\v"] = "\\v"
+
+ sort = "LC_ALL=C sort"
+
+ for (i in c)
+ printf("%s %s [[:space:]]\n", c[i],
+ i ~ /[[:space:]]/ ? "~" : "!~") | sort
+
+ for (i in c)
+ printf("%s %s [[:blank:]]\n", c[i],
+ i ~ /[[:blank:]]/ ? "~" : "!~") | sort
+
+ close(sort)
+}
diff --git a/bugs-fixed/space.bad b/bugs-fixed/space.bad
new file mode 100644
index 000000000000..f92055fd0c26
--- /dev/null
+++ b/bugs-fixed/space.bad
@@ -0,0 +1,16 @@
+" " ~ [[:blank:]]
+" " ~ [[:space:]]
+\a !~ [[:blank:]]
+\a !~ [[:space:]]
+\b !~ [[:blank:]]
+\b !~ [[:space:]]
+\f ~ [[:blank:]]
+\f ~ [[:space:]]
+\n ~ [[:blank:]]
+\n ~ [[:space:]]
+\r ~ [[:blank:]]
+\r ~ [[:space:]]
+\t ~ [[:blank:]]
+\t ~ [[:space:]]
+\v ~ [[:blank:]]
+\v ~ [[:space:]]
diff --git a/bugs-fixed/space.ok b/bugs-fixed/space.ok
new file mode 100644
index 000000000000..4278c5c9df3b
--- /dev/null
+++ b/bugs-fixed/space.ok
@@ -0,0 +1,16 @@
+" " ~ [[:blank:]]
+" " ~ [[:space:]]
+\a !~ [[:blank:]]
+\a !~ [[:space:]]
+\b !~ [[:blank:]]
+\b !~ [[:space:]]
+\f !~ [[:blank:]]
+\f ~ [[:space:]]
+\n !~ [[:blank:]]
+\n ~ [[:space:]]
+\r !~ [[:blank:]]
+\r ~ [[:space:]]
+\t ~ [[:blank:]]
+\t ~ [[:space:]]
+\v !~ [[:blank:]]
+\v ~ [[:space:]]
diff --git a/bugs-fixed/split-fs-from-array.awk b/bugs-fixed/split-fs-from-array.awk
new file mode 100644
index 000000000000..fce1607c2a97
--- /dev/null
+++ b/bugs-fixed/split-fs-from-array.awk
@@ -0,0 +1,5 @@
+BEGIN {
+ a[1] = "elephantie"
+ a[2] = "e"
+ print split(a[1],a,a[2]), a[2], a[3], split(a[2],a,a[2])
+}
diff --git a/bugs-fixed/split-fs-from-array.ok b/bugs-fixed/split-fs-from-array.ok
new file mode 100644
index 000000000000..9402b94f4fae
--- /dev/null
+++ b/bugs-fixed/split-fs-from-array.ok
@@ -0,0 +1 @@
+4 l phanti 2
diff --git a/bugs-fixed/string-conv.awk b/bugs-fixed/string-conv.awk
new file mode 100644
index 000000000000..a1f04aba354b
--- /dev/null
+++ b/bugs-fixed/string-conv.awk
@@ -0,0 +1,13 @@
+BEGIN {
+ OFMT = ">>%.6g<<"
+ a = 12.1234
+ print "a =", a
+ b = a ""
+ print "1 ->", b
+ CONVFMT = "%2.2f"
+ b = a ""
+ print "2 ->", b
+ CONVFMT = "%.12g"
+ b = a ""
+ print "3 ->", b
+}
diff --git a/bugs-fixed/string-conv.bad b/bugs-fixed/string-conv.bad
new file mode 100644
index 000000000000..2ab95e87d0a8
--- /dev/null
+++ b/bugs-fixed/string-conv.bad
@@ -0,0 +1,4 @@
+a = >>12.1234<<
+1 -> >>12.1234<<
+2 -> >>12.1234<<
+3 -> >>12.1234<<
diff --git a/bugs-fixed/string-conv.ok b/bugs-fixed/string-conv.ok
new file mode 100644
index 000000000000..7c097113207a
--- /dev/null
+++ b/bugs-fixed/string-conv.ok
@@ -0,0 +1,4 @@
+a = >>12.1234<<
+1 -> 12.1234
+2 -> 12.12
+3 -> 12.1234
diff --git a/bugs-fixed/subsep-overflow.awk b/bugs-fixed/subsep-overflow.awk
new file mode 100644
index 000000000000..66c7c24db0e6
--- /dev/null
+++ b/bugs-fixed/subsep-overflow.awk
@@ -0,0 +1,24 @@
+function foo(c, n) {
+ s = "";
+ for (i = 0; i < n; i++) {
+ s = s c;
+ }
+ return s;
+}
+
+BEGIN {
+ str1 = foo("a", 4500);
+ str2 = foo("b", 9000);
+
+ a[(SUBSEP = str1), (SUBSEP = str2), "c"] = 1;
+
+ for (k in a) {
+ print length(k);
+ }
+
+ print (((SUBSEP = str1), (SUBSEP = str2), "c") in a);
+ print (((SUBSEP = str1) SUBSEP (SUBSEP = str2) SUBSEP "c") in a);
+ delete a[(SUBSEP = str1), (SUBSEP = str2), "c"];
+ print (((SUBSEP = str1), (SUBSEP = str2), "c") in a);
+ print (((SUBSEP = str1) SUBSEP (SUBSEP = str2) SUBSEP "c") in a);
+}
diff --git a/bugs-fixed/subsep-overflow.ok b/bugs-fixed/subsep-overflow.ok
new file mode 100644
index 000000000000..ddbbd78707ee
--- /dev/null
+++ b/bugs-fixed/subsep-overflow.ok
@@ -0,0 +1,5 @@
+27001
+1
+1
+0
+0
diff --git a/bugs-fixed/system-status.awk b/bugs-fixed/system-status.awk
new file mode 100644
index 000000000000..8daf563e6f4f
--- /dev/null
+++ b/bugs-fixed/system-status.awk
@@ -0,0 +1,19 @@
+# Unmodified nawk prints the 16 bit exit status divided by 256, but
+# does so using floating point arithmetic, yielding strange results.
+#
+# The fix is to use the various macros defined for wait(2) and to
+# use the signal number + 256 for death by signal, or signal number + 512
+# for death by signal with core dump.
+
+BEGIN {
+ status = system("exit 42")
+ print "normal status", status
+
+ status = system("kill -HUP $$")
+ print "death by signal status", status
+
+ status = system("kill -ABRT $$")
+ print "death by signal with core dump status", status
+
+ system("rm -f core*")
+}
diff --git a/bugs-fixed/system-status.bad b/bugs-fixed/system-status.bad
new file mode 100644
index 000000000000..a1317dba54a8
--- /dev/null
+++ b/bugs-fixed/system-status.bad
@@ -0,0 +1,3 @@
+normal status 42
+death by signal status 0.00390625
+death by signal with core dump status 0.523438
diff --git a/bugs-fixed/system-status.ok b/bugs-fixed/system-status.ok
new file mode 100644
index 000000000000..737828f5ed7a
--- /dev/null
+++ b/bugs-fixed/system-status.ok
@@ -0,0 +1,3 @@
+normal status 42
+death by signal status 257
+death by signal with core dump status 518
diff --git a/bugs-fixed/unary-plus.awk b/bugs-fixed/unary-plus.awk
new file mode 100644
index 000000000000..ba6185b96704
--- /dev/null
+++ b/bugs-fixed/unary-plus.awk
@@ -0,0 +1,4 @@
+BEGIN {
+ print +"q"
+ print +"43.12345678912345678"
+}
diff --git a/bugs-fixed/unary-plus.bad b/bugs-fixed/unary-plus.bad
new file mode 100644
index 000000000000..76f57d5d580c
--- /dev/null
+++ b/bugs-fixed/unary-plus.bad
@@ -0,0 +1,2 @@
+q
+43.12345678912345678
diff --git a/bugs-fixed/unary-plus.ok b/bugs-fixed/unary-plus.ok
new file mode 100644
index 000000000000..90f97afc5c44
--- /dev/null
+++ b/bugs-fixed/unary-plus.ok
@@ -0,0 +1,2 @@
+0
+43.1235
diff --git a/lex.c b/lex.c
index 0c65a9fe3292..ad8e878a247d 100644
--- a/lex.c
+++ b/lex.c
@@ -170,10 +170,10 @@ int reg = 0; /* 1 => return a REGEXPR now */
int yylex(void)
{
int c;
- static char *buf = NULL;
+ static char *buf = 0;
static int bufsize = 5; /* BUG: setting this small causes core dump! */
- if (buf == NULL && (buf = (char *) malloc(bufsize)) == NULL)
+ if (buf == 0 && (buf = (char *) malloc(bufsize)) == NULL)
FATAL( "out of space in yylex" );
if (sc) {
sc = 0;
@@ -198,6 +198,7 @@ int yylex(void)
yylval.i = c;
switch (c) {
case '\n': /* {EOL} */
+ lineno++;
RET(NL);
case '\r': /* assume \n is coming */
case ' ': /* {WS}+ */
@@ -213,6 +214,7 @@ int yylex(void)
case '\\':
if (peek() == '\n') {
input();
+ lineno++;
} else if (peek() == '\r') {
input(); input(); /* \n */
lineno++;
@@ -358,10 +360,10 @@ int string(void)
{
int c, n;
char *s, *bp;
- static char *buf = NULL;
+ static char *buf = 0;
static int bufsz = 500;
- if (buf == NULL && (buf = (char *) malloc(bufsz)) == NULL)
+ if (buf == 0 && (buf = (char *) malloc(bufsz)) == NULL)
FATAL("out of space for strings");
for (bp = buf; (c = input()) != '"'; ) {
if (!adjbuf(&buf, &bufsz, bp-buf+2, 500, &bp, "string"))
@@ -370,10 +372,11 @@ int string(void)
case '\n':
case '\r':
case 0:
+ *bp = '\0';
SYNTAX( "non-terminated string %.10s...", buf );
- lineno++;
if (c == 0) /* hopeless */
FATAL( "giving up" );
+ lineno++;
break;
case '\\':
c = input();
@@ -504,17 +507,18 @@ void startreg(void) /* next call to yylex will return a regular expression */
int regexpr(void)
{
int c;
- static char *buf = NULL;
+ static char *buf = 0;
static int bufsz = 500;
char *bp;
- if (buf == NULL && (buf = (char *) malloc(bufsz)) == NULL)
+ if (buf == 0 && (buf = (char *) malloc(bufsz)) == NULL)
FATAL("out of space for rex expr");
bp = buf;
for ( ; (c = input()) != '/' && c != 0; ) {
if (!adjbuf(&buf, &bufsz, bp-buf+3, 500, &bp, "regexpr"))
FATAL("out of space for reg expr %.10s...", buf);
if (c == '\n') {
+ *bp = '\0';
SYNTAX( "newline in regular expression %.10s...", buf );
unput('\n');
break;
@@ -539,7 +543,7 @@ char ebuf[300];
char *ep = ebuf;
char yysbuf[100]; /* pushback buffer */
char *yysptr = yysbuf;
-FILE *yyin = NULL;
+FILE *yyin = 0;
int input(void) /* get next lexical input character */
{
@@ -553,19 +557,19 @@ int input(void) /* get next lexical input character */
lexprog++;
} else /* awk -f ... */
c = pgetc();
- if (c == '\n')
- lineno++;
- else if (c == EOF)
+ if (c == EOF)
c = 0;
if (ep >= ebuf + sizeof ebuf)
ep = ebuf;
- return *ep++ = c;
+ *ep = c;
+ if (c != 0) {
+ ep++;
+ }
+ return (c);
}
void unput(int c) /* put lexical character back on input */
{
- if (c == '\n')
- lineno--;
if (yysptr >= yysbuf + sizeof(yysbuf))
FATAL("pushed back too much: %.20s...", yysbuf);
*yysptr++ = c;
diff --git a/lib.c b/lib.c
index 5eeb53d4679d..a365245a0e1a 100644
--- a/lib.c
+++ b/lib.c
@@ -59,7 +59,7 @@ void recinit(unsigned int n)
{
if ( (record = (char *) malloc(n)) == NULL
|| (fields = (char *) malloc(n+1)) == NULL
- || (fldtab = (Cell **) malloc((nfields+1) * sizeof(Cell *))) == NULL
+ || (fldtab = (Cell **) malloc((nfields+2) * sizeof(Cell *))) == NULL
|| (fldtab[0] = (Cell *) malloc(sizeof(Cell))) == NULL )
FATAL("out of space for $0 and fields");
*fldtab[0] = dollar0;
@@ -189,12 +189,13 @@ int readrec(char **pbuf, int *pbufsize, FILE *inf) /* read one record into buf *
int sep, c;
char *rr, *buf = *pbuf;
int bufsize = *pbufsize;
+ char *rs = getsval(rsloc);
- if (strlen(*FS) >= sizeof(inputFS))
+ if (strlen(getsval(fsloc)) >= sizeof (inputFS))
FATAL("field separator %.10s... is too long", *FS);
/*fflush(stdout); avoids some buffering problem but makes it 25% slower*/
strcpy(inputFS, *FS); /* for subsequent field splitting */
- if ((sep = **RS) == 0) {
+ if ((sep = *rs) == 0) {
sep = '\n';
while ((c=getc(inf)) == '\n' && c != EOF) /* skip leading \n's */
;
@@ -208,7 +209,7 @@ int readrec(char **pbuf, int *pbufsize, FILE *inf) /* read one record into buf *
FATAL("input record `%.30s...' too long", buf);
*rr++ = c;
}
- if (**RS == sep || c == EOF)
+ if (*rs == sep || c == EOF)
break;
if ((c = getc(inf)) == '\n' || c == EOF) /* 2 in a row */
break;
@@ -283,6 +284,8 @@ void fldbld(void) /* create fields from current record */
}
fr = fields;
i = 0; /* number of fields accumulated here */
+ if (strlen(getsval(fsloc)) >= sizeof (inputFS))
+ FATAL("field separator %.10s... is too long", *FS);
strcpy(inputFS, *FS);
if (strlen(inputFS) > 1) { /* it's a regular expression */
i = refldbld(r, inputFS);
@@ -356,6 +359,7 @@ void fldbld(void) /* create fields from current record */
}
}
setfval(nfloc, (Awkfloat) lastfld);
+ donerec = 1; /* restore */
if (dbg) {
for (j = 0; j <= lastfld; j++) {
p = fldtab[j];
@@ -387,6 +391,21 @@ void newfld(int n) /* add field n after end of existing lastfld */
setfval(nfloc, (Awkfloat) n);
}
+void setlastfld(int n) /* set lastfld cleaning fldtab cells if necessary */
+{
+ if (n < 0)
+ FATAL("cannot set NF to a negative value");
+ if (n > nfields)
+ growfldtab(n);
+
+ if (lastfld < n)
+ cleanfld(lastfld+1, n);
+ else
+ cleanfld(n+1, lastfld);
+
+ lastfld = n;
+}
+
Cell *fieldadr(int n) /* get nth field */
{
if (n < 0)
@@ -465,6 +484,7 @@ void recbld(void) /* create $0 from $1..$NF if necessary */
{
int i;
char *r, *p;
+ char *sep = getsval(ofsloc);
if (donerec == 1)
return;
@@ -476,9 +496,9 @@ void recbld(void) /* create $0 from $1..$NF if necessary */
while ((*r = *p++) != 0)
r++;
if (i < *NF) {
- if (!adjbuf(&record, &recsize, 2+strlen(*OFS)+r-record, recsize, &r, "recbld 2"))
+ if (!adjbuf(&record, &recsize, 2+strlen(sep)+r-record, recsize, &r, "recbld 2"))
FATAL("created $0 `%.30s...' too long", record);
- for (p = *OFS; (*r = *p++) != 0; )
+ for (p = sep; (*r = *p++) != 0; )
r++;
}
}
@@ -618,6 +638,8 @@ void eprint(void) /* try to print context around error */
if (compile_time == 2 || compile_time == 0 || been_here++ > 0)
return;
+ if (ebuf == ep)
+ return;
p = ep - 1;
if (p > ebuf && *p == '\n')
p--;
@@ -681,7 +703,7 @@ int isclvar(const char *s) /* is s of form var=something ? */
for ( ; *s; s++)
if (!(isalnum((uschar) *s) || *s == '_'))
break;
- return *s == '=' && s > os && *(s+1) != '=';
+ return *s == '=' && s > os;
}
/* strtod is supposed to be a proper test of what's a valid number */
diff --git a/main.c b/main.c
index 4b659974b056..98661fcd7829 100644
--- a/main.c
+++ b/main.c
@@ -22,7 +22,7 @@ ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
THIS SOFTWARE.
****************************************************************/
-const char *version = "version 20121220";
+const char *version = "version 20190529";
#define DEBUG
#include <stdio.h>
@@ -54,6 +54,13 @@ int curpfile = 0; /* current filename */
int safe = 0; /* 1 => "safe" mode */
+/* Can this work with recursive calls? I don't think so.
+void segvcatch(int n)
+{
+ FATAL("segfault. Do you have an unbounded recursive call?", n);
+}
+*/
+
int main(int argc, char *argv[])
{
const char *fs = NULL;
@@ -68,6 +75,7 @@ int main(int argc, char *argv[])
exit(1);
}
signal(SIGFPE, fpecatch);
+ /*signal(SIGSEGV, segvcatch); experiment */
srand_seed = 1;
srand(srand_seed);
@@ -80,7 +88,7 @@ int main(int argc, char *argv[])
exit(0);
break;
}
- if (strncmp(argv[1], "--", 2) == 0) { /* explicit end of args */
+ if (strcmp(argv[1], "--") == 0) { /* explicit end of args */
argc--;
argv++;
break;
diff --git a/makefile b/makefile
index 88f992421561..3c0b62e1df9e 100644
--- a/makefile
+++ b/makefile
@@ -23,18 +23,21 @@
# ****************************************************************/
CFLAGS = -g
-CFLAGS = -O2
CFLAGS =
+CFLAGS = -O2
-CC = gcc -Wall -g -Wwrite-strings
-CC = gcc -fprofile-arcs -ftest-coverage # then gcov f1.c; cat f1.c.gcov
-CC = gcc -g -Wall -pedantic
-CC = gcc -O4 -Wall -pedantic -fno-strict-aliasing
+# compiler options
+#CC = gcc -Wall -g -Wwrite-strings
+#CC = gcc -O4 -Wall -pedantic -fno-strict-aliasing
+#CC = gcc -fprofile-arcs -ftest-coverage # then gcov f1.c; cat f1.c.gcov
+HOSTCC = gcc -g -Wall -pedantic
+CC = $(HOSTCC) # change this is cross-compiling.
-YACC = bison -d -y
-YACC = yacc -d -S
+# yacc options. pick one; this varies a lot by system.
#YFLAGS = -d -S
- # -S uses sprintf in yacc parser instead of sprint
+YACC = bison -d -y
+#YACC = yacc -d
+# -S uses sprintf in yacc parser instead of sprint
OFILES = b.o main.o parse.o proctab.o tran.o lib.o run.o lex.o
@@ -44,7 +47,7 @@ SOURCE = awk.h ytab.c ytab.h proto.h awkgram.y lex.c b.c main.c \
LISTING = awk.h proto.h awkgram.y lex.c b.c main.c maketab.c parse.c \
lib.c run.c tran.c
-SHIP = README FIXES $(SOURCE) ytab[ch].bak makefile \
+SHIP = README LICENSE FIXES $(SOURCE) ytab[ch].bak makefile \
awk.1
a.out: ytab.o $(OFILES)
@@ -52,17 +55,23 @@ a.out: ytab.o $(OFILES)
$(OFILES): awk.h ytab.h proto.h
-ytab.o: awk.h proto.h awkgram.y
+#Clear dependency for parallel build: (make -j)
+#YACC generated y.tab.c and y.tab.h at the same time
+#this needs to be a static pattern rules otherwise multiple target
+#are mapped onto multiple executions of yacc, which overwrite
+#each others outputs.
+y%.c y%.h: awk.h proto.h awkgram.y
$(YACC) $(YFLAGS) awkgram.y
- mv y.tab.c ytab.c
- mv y.tab.h ytab.h
- $(CC) $(CFLAGS) -c ytab.c
+ mv y.$*.c y$*.c
+ mv y.$*.h y$*.h
+
+ytab.h: ytab.c
proctab.c: maketab
- ./maketab >proctab.c
+ ./maketab ytab.h >proctab.c
maketab: ytab.h maketab.c
- $(CC) $(CFLAGS) maketab.c -o maketab
+ $(HOSTCC) $(CFLAGS) maketab.c -o maketab
bundle:
@cp ytab.h ytabh.bak
@@ -79,8 +88,22 @@ tar:
@zip awk.zip $(SHIP)
ls -l awk.zip
+gitadd:
+ git add README LICENSE FIXES \
+ awk.h proto.h awkgram.y lex.c b.c main.c maketab.c parse.c \
+ lib.c run.c tran.c \
+ makefile awk.1 awktest.tar
+
+gitpush:
+ # only do this once:
+ # git remote add origin https://github.com/onetrueawk/awk.git
+ git push -u origin master
+
names:
@echo $(LISTING)
clean:
rm -f a.out *.o *.obj maketab maketab.exe *.bb *.bbg *.da *.gcov *.gcno *.gcda # proctab.c
+
+cleaner:
+ rm -f a.out *.o *.obj maketab maketab.exe *.bb *.bbg *.da *.gcov *.gcno *.gcda proctab.c ytab*
diff --git a/maketab.c b/maketab.c
index b59e81592fdb..dbe3d241fcc8 100644
--- a/maketab.c
+++ b/maketab.c
@@ -62,6 +62,7 @@ struct xx
{ DIVIDE, "arith", " / " },
{ MOD, "arith", " % " },
{ UMINUS, "arith", " -" },
+ { UPLUS, "arith", " +" },
{ POWER, "arith", " **" },
{ PREINCR, "incrdecr", "++" },
{ POSTINCR, "incrdecr", "++" },
@@ -124,8 +125,12 @@ int main(int argc, char *argv[])
for (i = SIZE; --i >= 0; )
names[i] = "";
- if ((fp = fopen("ytab.h", "r")) == NULL) {
- fprintf(stderr, "maketab can't open ytab.h!\n");
+ if (argc != 2) {
+ fprintf(stderr, "usage: maketab YTAB_H\n");
+ exit(1);
+ }
+ if ((fp = fopen(argv[1], "r")) == NULL) {
+ fprintf(stderr, "maketab can't open %s!\n", argv[1]);
exit(1);
}
printf("static char *printname[%d] = {\n", SIZE);
@@ -134,6 +139,8 @@ int main(int argc, char *argv[])
n = sscanf(buf, "%1c %s %s %d", &c, def, name, &tok);
if (c != '#' || (n != 4 && strcmp(def,"define") != 0)) /* not a valid #define */
continue;
+ if (strcmp(name, "YYSTYPE_IS_DECLARED") == 0)
+ continue;
if (tok < FIRSTTOKEN || tok > LASTTOKEN) {
/* fprintf(stderr, "maketab funny token %d %s ignored\n", tok, buf); */
continue;
@@ -149,7 +156,7 @@ int main(int argc, char *argv[])
table[p->token-FIRSTTOKEN] = p->name;
printf("\nCell *(*proctab[%d])(Node **, int) = {\n", SIZE);
for (i=0; i<SIZE; i++)
- if (table[i]==NULL)
+ if (table[i]==0)
printf("\tnullproc,\t/* %s */\n", names[i]);
else
printf("\t%s,\t/* %s */\n", table[i], names[i]);
diff --git a/parse.c b/parse.c
index 753a50def1fb..8304ded837ba 100644
--- a/parse.c
+++ b/parse.c
@@ -259,7 +259,7 @@ int isarg(const char *s) /* is s in argument list for current function? */
Node *p = arglist;
int n;
- for (n = 0; p != NULL; p = p->nnext, n++)
+ for (n = 0; p != 0; p = p->nnext, n++)
if (strcmp(((Cell *)(p->narg[0]))->nval, s) == 0)
return n;
return -1;
diff --git a/proctab.c b/proctab.c
new file mode 100644
index 000000000000..ff212c416c3a
--- /dev/null
+++ b/proctab.c
@@ -0,0 +1,209 @@
+#include <stdio.h>
+#include "awk.h"
+#include "ytab.h"
+
+static char *printname[94] = {
+ (char *) "FIRSTTOKEN", /* 258 */
+ (char *) "PROGRAM", /* 259 */
+ (char *) "PASTAT", /* 260 */
+ (char *) "PASTAT2", /* 261 */
+ (char *) "XBEGIN", /* 262 */
+ (char *) "XEND", /* 263 */
+ (char *) "NL", /* 264 */
+ (char *) "ARRAY", /* 265 */
+ (char *) "MATCH", /* 266 */
+ (char *) "NOTMATCH", /* 267 */
+ (char *) "MATCHOP", /* 268 */
+ (char *) "FINAL", /* 269 */
+ (char *) "DOT", /* 270 */
+ (char *) "ALL", /* 271 */
+ (char *) "CCL", /* 272 */
+ (char *) "NCCL", /* 273 */
+ (char *) "CHAR", /* 274 */
+ (char *) "OR", /* 275 */
+ (char *) "STAR", /* 276 */
+ (char *) "QUEST", /* 277 */
+ (char *) "PLUS", /* 278 */
+ (char *) "EMPTYRE", /* 279 */
+ (char *) "AND", /* 280 */
+ (char *) "BOR", /* 281 */
+ (char *) "APPEND", /* 282 */
+ (char *) "EQ", /* 283 */
+ (char *) "GE", /* 284 */
+ (char *) "GT", /* 285 */
+ (char *) "LE", /* 286 */
+ (char *) "LT", /* 287 */
+ (char *) "NE", /* 288 */
+ (char *) "IN", /* 289 */
+ (char *) "ARG", /* 290 */
+ (char *) "BLTIN", /* 291 */
+ (char *) "BREAK", /* 292 */
+ (char *) "CLOSE", /* 293 */
+ (char *) "CONTINUE", /* 294 */
+ (char *) "DELETE", /* 295 */
+ (char *) "DO", /* 296 */
+ (char *) "EXIT", /* 297 */
+ (char *) "FOR", /* 298 */
+ (char *) "FUNC", /* 299 */
+ (char *) "SUB", /* 300 */
+ (char *) "GSUB", /* 301 */
+ (char *) "IF", /* 302 */
+ (char *) "INDEX", /* 303 */
+ (char *) "LSUBSTR", /* 304 */
+ (char *) "MATCHFCN", /* 305 */
+ (char *) "NEXT", /* 306 */
+ (char *) "NEXTFILE", /* 307 */
+ (char *) "ADD", /* 308 */
+ (char *) "MINUS", /* 309 */
+ (char *) "MULT", /* 310 */
+ (char *) "DIVIDE", /* 311 */
+ (char *) "MOD", /* 312 */
+ (char *) "ASSIGN", /* 313 */
+ (char *) "ASGNOP", /* 314 */
+ (char *) "ADDEQ", /* 315 */
+ (char *) "SUBEQ", /* 316 */
+ (char *) "MULTEQ", /* 317 */
+ (char *) "DIVEQ", /* 318 */
+ (char *) "MODEQ", /* 319 */
+ (char *) "POWEQ", /* 320 */
+ (char *) "PRINT", /* 321 */
+ (char *) "PRINTF", /* 322 */
+ (char *) "SPRINTF", /* 323 */
+ (char *) "ELSE", /* 324 */
+ (char *) "INTEST", /* 325 */
+ (char *) "CONDEXPR", /* 326 */
+ (char *) "POSTINCR", /* 327 */
+ (char *) "PREINCR", /* 328 */
+ (char *) "POSTDECR", /* 329 */
+ (char *) "PREDECR", /* 330 */
+ (char *) "VAR", /* 331 */
+ (char *) "IVAR", /* 332 */
+ (char *) "VARNF", /* 333 */
+ (char *) "CALL", /* 334 */
+ (char *) "NUMBER", /* 335 */
+ (char *) "STRING", /* 336 */
+ (char *) "REGEXPR", /* 337 */
+ (char *) "GETLINE", /* 338 */
+ (char *) "RETURN", /* 339 */
+ (char *) "SPLIT", /* 340 */
+ (char *) "SUBSTR", /* 341 */
+ (char *) "WHILE", /* 342 */
+ (char *) "CAT", /* 343 */
+ (char *) "NOT", /* 344 */
+ (char *) "UMINUS", /* 345 */
+ (char *) "UPLUS", /* 346 */
+ (char *) "POWER", /* 347 */
+ (char *) "DECR", /* 348 */
+ (char *) "INCR", /* 349 */
+ (char *) "INDIRECT", /* 350 */
+ (char *) "LASTTOKEN", /* 351 */
+};
+
+
+Cell *(*proctab[94])(Node **, int) = {
+ nullproc, /* FIRSTTOKEN */
+ program, /* PROGRAM */
+ pastat, /* PASTAT */
+ dopa2, /* PASTAT2 */
+ nullproc, /* XBEGIN */
+ nullproc, /* XEND */
+ nullproc, /* NL */
+ array, /* ARRAY */
+ matchop, /* MATCH */
+ matchop, /* NOTMATCH */
+ nullproc, /* MATCHOP */
+ nullproc, /* FINAL */
+ nullproc, /* DOT */
+ nullproc, /* ALL */
+ nullproc, /* CCL */
+ nullproc, /* NCCL */
+ nullproc, /* CHAR */
+ nullproc, /* OR */
+ nullproc, /* STAR */
+ nullproc, /* QUEST */
+ nullproc, /* PLUS */
+ nullproc, /* EMPTYRE */
+ boolop, /* AND */
+ boolop, /* BOR */
+ nullproc, /* APPEND */
+ relop, /* EQ */
+ relop, /* GE */
+ relop, /* GT */
+ relop, /* LE */
+ relop, /* LT */
+ relop, /* NE */
+ instat, /* IN */
+ arg, /* ARG */
+ bltin, /* BLTIN */
+ jump, /* BREAK */
+ closefile, /* CLOSE */
+ jump, /* CONTINUE */
+ awkdelete, /* DELETE */
+ dostat, /* DO */
+ jump, /* EXIT */
+ forstat, /* FOR */
+ nullproc, /* FUNC */
+ sub, /* SUB */
+ gsub, /* GSUB */
+ ifstat, /* IF */
+ sindex, /* INDEX */
+ nullproc, /* LSUBSTR */
+ matchop, /* MATCHFCN */
+ jump, /* NEXT */
+ jump, /* NEXTFILE */
+ arith, /* ADD */
+ arith, /* MINUS */
+ arith, /* MULT */
+ arith, /* DIVIDE */
+ arith, /* MOD */
+ assign, /* ASSIGN */
+ nullproc, /* ASGNOP */
+ assign, /* ADDEQ */
+ assign, /* SUBEQ */
+ assign, /* MULTEQ */
+ assign, /* DIVEQ */
+ assign, /* MODEQ */
+ assign, /* POWEQ */
+ printstat, /* PRINT */
+ awkprintf, /* PRINTF */
+ awksprintf, /* SPRINTF */
+ nullproc, /* ELSE */
+ intest, /* INTEST */
+ condexpr, /* CONDEXPR */
+ incrdecr, /* POSTINCR */
+ incrdecr, /* PREINCR */
+ incrdecr, /* POSTDECR */
+ incrdecr, /* PREDECR */
+ nullproc, /* VAR */
+ nullproc, /* IVAR */
+ getnf, /* VARNF */
+ call, /* CALL */
+ nullproc, /* NUMBER */
+ nullproc, /* STRING */
+ nullproc, /* REGEXPR */
+ awkgetline, /* GETLINE */
+ jump, /* RETURN */
+ split, /* SPLIT */
+ substr, /* SUBSTR */
+ whilestat, /* WHILE */
+ cat, /* CAT */
+ boolop, /* NOT */
+ arith, /* UMINUS */
+ arith, /* UPLUS */
+ arith, /* POWER */
+ nullproc, /* DECR */
+ nullproc, /* INCR */
+ indirect, /* INDIRECT */
+ nullproc, /* LASTTOKEN */
+};
+
+char *tokname(int n)
+{
+ static char buf[100];
+
+ if (n < FIRSTTOKEN || n > LASTTOKEN) {
+ sprintf(buf, "token %d", n);
+ return buf;
+ }
+ return printname[n-FIRSTTOKEN];
+}
diff --git a/proto.h b/proto.h
index 9a657ef73ec1..ad6f2e80a594 100644
--- a/proto.h
+++ b/proto.h
@@ -124,6 +124,7 @@ extern void setclvar(char *);
extern void fldbld(void);
extern void cleanfld(int, int);
extern void newfld(int);
+extern void setlastfld(int);
extern int refldbld(const char *, const char *);
extern void recbld(void);
extern Cell *fieldadr(int);
@@ -193,3 +194,5 @@ extern Cell *gsub(Node **, int);
extern FILE *popen(const char *, const char *);
extern int pclose(FILE *);
+
+extern const char *flags2str(int flags);
diff --git a/run.c b/run.c
index 5342fe0d0f8b..2dfb3e6c383d 100644
--- a/run.c
+++ b/run.c
@@ -31,6 +31,8 @@ THIS SOFTWARE.
#include <string.h>
#include <stdlib.h>
#include <time.h>
+#include <sys/types.h>
+#include <sys/wait.h>
#include "awk.h"
#include "ytab.h"
@@ -71,23 +73,23 @@ extern Awkfloat srand_seed;
Node *winner = NULL; /* root of parse tree */
Cell *tmps; /* free temporary cells for execution */
-static Cell truecell ={ OBOOL, BTRUE, 0, 0, 1.0, NUM };
+static Cell truecell ={ OBOOL, BTRUE, 0, 0, 1.0, NUM, NULL };
Cell *True = &truecell;
-static Cell falsecell ={ OBOOL, BFALSE, 0, 0, 0.0, NUM };
+static Cell falsecell ={ OBOOL, BFALSE, 0, 0, 0.0, NUM, NULL };
Cell *False = &falsecell;
-static Cell breakcell ={ OJUMP, JBREAK, 0, 0, 0.0, NUM };
+static Cell breakcell ={ OJUMP, JBREAK, 0, 0, 0.0, NUM, NULL };
Cell *jbreak = &breakcell;
-static Cell contcell ={ OJUMP, JCONT, 0, 0, 0.0, NUM };
+static Cell contcell ={ OJUMP, JCONT, 0, 0, 0.0, NUM, NULL };
Cell *jcont = &contcell;
-static Cell nextcell ={ OJUMP, JNEXT, 0, 0, 0.0, NUM };
+static Cell nextcell ={ OJUMP, JNEXT, 0, 0, 0.0, NUM, NULL };
Cell *jnext = &nextcell;
-static Cell nextfilecell ={ OJUMP, JNEXTFILE, 0, 0, 0.0, NUM };
+static Cell nextfilecell ={ OJUMP, JNEXTFILE, 0, 0, 0.0, NUM, NULL };
Cell *jnextfile = &nextfilecell;
-static Cell exitcell ={ OJUMP, JEXIT, 0, 0, 0.0, NUM };
+static Cell exitcell ={ OJUMP, JEXIT, 0, 0, 0.0, NUM, NULL };
Cell *jexit = &exitcell;
-static Cell retcell ={ OJUMP, JRET, 0, 0, 0.0, NUM };
+static Cell retcell ={ OJUMP, JRET, 0, 0, 0.0, NUM, NULL };
Cell *jret = &retcell;
-static Cell tempcell ={ OCELL, CTEMP, 0, "", 0.0, NUM|STR|DONTFREE };
+static Cell tempcell ={ OCELL, CTEMP, 0, "", 0.0, NUM|STR|DONTFREE, NULL };
Node *curnode = NULL; /* the node being executed, for debugging */
@@ -112,7 +114,7 @@ int adjbuf(char **pbuf, int *psiz, int minlen, int quantum, char **pbptr,
if (rminlen)
minlen += quantum - rminlen;
tbuf = (char *) realloc(*pbuf, minlen);
- dprintf( ("adjbuf %s: %d %d (pbuf=%p, tbuf=%p)\n", whatrtn, *psiz, minlen, *pbuf, tbuf) );
+ dprintf( ("adjbuf %s: %d %d (pbuf=%p, tbuf=%p)\n", whatrtn, *psiz, minlen, (void *) *pbuf, (void *) tbuf) );
if (tbuf == NULL) {
if (whatrtn)
FATAL("out of memory in %s", whatrtn);
@@ -221,7 +223,7 @@ struct Frame *fp = NULL; /* frame pointer. bottom level unused */
Cell *call(Node **a, int n) /* function call. very kludgy and fragile */
{
- static Cell newcopycell = { OCELL, CCOPY, 0, "", 0.0, NUM|STR|DONTFREE };
+ static Cell newcopycell = { OCELL, CCOPY, 0, "", 0.0, NUM|STR|DONTFREE, NULL };
int i, ncall, ndef;
int freed = 0; /* handles potential double freeing when fcn & param share a tempcell */
Node *x;
@@ -323,14 +325,18 @@ Cell *copycell(Cell *x) /* make a copy of a cell in a temp */
{
Cell *y;
+ /* copy is not constant or field */
+
y = gettemp();
+ y->tval = x->tval & ~(CON|FLD|REC);
y->csub = CCOPY; /* prevents freeing until call is over */
y->nval = x->nval; /* BUG? */
- if (isstr(x))
+ if (isstr(x) /* || x->ctype == OCELL */) {
y->sval = tostring(x->sval);
+ y->tval &= ~DONTFREE;
+ } else
+ y->tval |= DONTFREE;
y->fval = x->fval;
- y->tval = x->tval & ~(CON|FLD|REC|DONTFREE); /* copy is not constant or field */
- /* is DONTFREE right? */
return y;
}
@@ -419,6 +425,10 @@ Cell *awkgetline(Node **a, int n) /* get next line from specific input */
} else if (a[0] != NULL) { /* getline var <file */
x = execute(a[0]);
setsval(x, buf);
+ if (is_number(x->sval)) {
+ x->fval = atof(x->sval);
+ x->tval |= NUM;
+ }
tempfree(x);
} else { /* getline <file */
setsval(fldtab[0], buf);
@@ -434,6 +444,10 @@ Cell *awkgetline(Node **a, int n) /* get next line from specific input */
n = getrec(&buf, &bufsize, 0);
x = execute(a[0]);
setsval(x, buf);
+ if (is_number(x->sval)) {
+ x->fval = atof(x->sval);
+ x->tval |= NUM;
+ }
tempfree(x);
}
}
@@ -456,7 +470,7 @@ Cell *array(Node **a, int n) /* a[0] is symtab, a[1] is list of subscripts */
Node *np;
char *buf;
int bufsz = recsize;
- int nsub = strlen(*SUBSEP);
+ int nsub;
if ((buf = (char *) malloc(bufsz)) == NULL)
FATAL("out of memory in array");
@@ -466,6 +480,7 @@ Cell *array(Node **a, int n) /* a[0] is symtab, a[1] is list of subscripts */
for (np = a[1]; np; np = np->nnext) {
y = execute(np); /* subscript */
s = getsval(y);
+ nsub = strlen(getsval(subseploc));
if (!adjbuf(&buf, &bufsz, strlen(buf)+strlen(s)+nsub+1, recsize, 0, "array"))
FATAL("out of memory for %s[%s...]", x->nval, buf);
strcat(buf, s);
@@ -494,12 +509,12 @@ Cell *awkdelete(Node **a, int n) /* a[0] is symtab, a[1] is list of subscripts *
Cell *x, *y;
Node *np;
char *s;
- int nsub = strlen(*SUBSEP);
+ int nsub;
x = execute(a[0]); /* Cell* for symbol table */
if (!isarr(x))
return True;
- if (a[1] == NULL) { /* delete the elements, not the table */
+ if (a[1] == 0) { /* delete the elements, not the table */
freesymtab(x);
x->tval &= ~STR;
x->tval |= ARR;
@@ -513,9 +528,10 @@ Cell *awkdelete(Node **a, int n) /* a[0] is symtab, a[1] is list of subscripts *
for (np = a[1]; np; np = np->nnext) {
y = execute(np); /* subscript */
s = getsval(y);
+ nsub = strlen(getsval(subseploc));
if (!adjbuf(&buf, &bufsz, strlen(buf)+strlen(s)+nsub+1, recsize, 0, "awkdelete"))
FATAL("out of memory deleting %s[%s...]", x->nval, buf);
- strcat(buf, s);
+ strcat(buf, s);
if (np->nnext)
strcat(buf, *SUBSEP);
tempfree(y);
@@ -534,7 +550,7 @@ Cell *intest(Node **a, int n) /* a[0] is index (list), a[1] is symtab */
char *buf;
char *s;
int bufsz = recsize;
- int nsub = strlen(*SUBSEP);
+ int nsub;
ap = execute(a[1]); /* array name */
if (!isarr(ap)) {
@@ -552,6 +568,7 @@ Cell *intest(Node **a, int n) /* a[0] is index (list), a[1] is symtab */
for (p = a[0]; p; p = p->nnext) {
x = execute(p); /* expr */
s = getsval(x);
+ nsub = strlen(getsval(subseploc));
if (!adjbuf(&buf, &bufsz, strlen(buf)+strlen(s)+nsub+1, recsize, 0, "intest"))
FATAL("out of memory deleting %s[%s...]", x->nval, buf);
strcat(buf, s);
@@ -583,7 +600,7 @@ Cell *matchop(Node **a, int n) /* ~ and match() */
}
x = execute(a[1]); /* a[1] = target text */
s = getsval(x);
- if (a[0] == NULL) /* a[1] == 0: already-compiled reg expr */
+ if (a[0] == 0) /* a[1] == 0: already-compiled reg expr */
i = (*mf)((fa *) a[2], s);
else {
y = execute(a[2]); /* a[2] = regular expr */
@@ -699,7 +716,7 @@ Cell *gettemp(void) /* get a tempcell */
FATAL("out of space for temporaries");
for(i = 1; i < 100; i++)
tmps[i-1].cnext = &tmps[i];
- tmps[i-1].cnext = NULL;
+ tmps[i-1].cnext = 0;
}
x = tmps;
tmps = x->cnext;
@@ -734,18 +751,18 @@ Cell *substr(Node **a, int nnn) /* substr(a[0], a[1], a[2]) */
int k, m, n;
char *s;
int temp;
- Cell *x, *y, *z = NULL;
+ Cell *x, *y, *z = 0;
x = execute(a[0]);
y = execute(a[1]);
- if (a[2] != NULL)
+ if (a[2] != 0)
z = execute(a[2]);
s = getsval(x);
k = strlen(s) + 1;
if (k <= 1) {
tempfree(x);
tempfree(y);
- if (a[2] != NULL) {
+ if (a[2] != 0) {
tempfree(z);
}
x = gettemp();
@@ -758,7 +775,7 @@ Cell *substr(Node **a, int nnn) /* substr(a[0], a[1], a[2]) */
else if (m > k)
m = k;
tempfree(y);
- if (a[2] != NULL) {
+ if (a[2] != 0) {
n = (int) getfval(z);
tempfree(z);
} else
@@ -817,6 +834,17 @@ int format(char **pbuf, int *pbufsize, const char *s, Node *a) /* printf-like co
char *buf = *pbuf;
int bufsize = *pbufsize;
+ static int first = 1;
+ static int have_a_format = 0;
+
+ if (first) {
+ char buf[100];
+
+ sprintf(buf, "%a", 42.0);
+ have_a_format = (strcmp(buf, "0x1.5p+5") == 0);
+ first = 0;
+ }
+
os = s;
p = buf;
if ((fmt = (char *) malloc(fmtsz)) == NULL)
@@ -842,7 +870,13 @@ int format(char **pbuf, int *pbufsize, const char *s, Node *a) /* printf-like co
FATAL("format item %.30s... ran format() out of memory", os);
if (isalpha((uschar)*s) && *s != 'l' && *s != 'h' && *s != 'L')
break; /* the ansi panoply */
+ if (*s == '$') {
+ FATAL("'$' not permitted in awk formats");
+ }
if (*s == '*') {
+ if (a == NULL) {
+ FATAL("not enough args in printf(%s)", os);
+ }
x = execute(a);
a = a->nnext;
sprintf(t-1, "%d", fmtwd=(int) getfval(x));
@@ -857,8 +891,13 @@ int format(char **pbuf, int *pbufsize, const char *s, Node *a) /* printf-like co
if (fmtwd < 0)
fmtwd = -fmtwd;
adjbuf(&buf, &bufsize, fmtwd+1+p-buf, recsize, &p, "format4");
-
switch (*s) {
+ case 'a': case 'A':
+ if (have_a_format)
+ flag = *s;
+ else
+ flag = 'f';
+ break;
case 'f': case 'e': case 'g': case 'E': case 'G':
flag = 'f';
break;
@@ -901,6 +940,8 @@ int format(char **pbuf, int *pbufsize, const char *s, Node *a) /* printf-like co
p += strlen(p);
sprintf(p, "%s", t);
break;
+ case 'a':
+ case 'A':
case 'f': sprintf(p, fmt, getfval(x)); break;
case 'd': sprintf(p, fmt, (long) getfval(x)); break;
case 'u': sprintf(p, fmt, (int) getfval(x)); break;
@@ -1003,7 +1044,7 @@ Cell *arith(Node **a, int n) /* a[0] + a[1], etc. also -a[0] */
x = execute(a[0]);
i = getfval(x);
tempfree(x);
- if (n != UMINUS) {
+ if (n != UMINUS && n != UPLUS) {
y = execute(a[1]);
j = getfval(y);
tempfree(y);
@@ -1033,6 +1074,8 @@ Cell *arith(Node **a, int n) /* a[0] + a[1], etc. also -a[0] */
case UMINUS:
i = -i;
break;
+ case UPLUS: /* handled by getfval(), above */
+ break;
case POWER:
if (j >= 0 && modf(j, &v) == 0.0) /* pos integer exponent */
i = ipow(i, (int) j);
@@ -1088,8 +1131,8 @@ Cell *assign(Node **a, int n) /* a[0] = a[1], a[0] += a[1], etc. */
y = execute(a[1]);
x = execute(a[0]);
if (n == ASSIGN) { /* ordinary assignment */
- if (x == y && !(x->tval & (FLD|REC))) /* self-assignment: */
- ; /* leave alone unless it's a field */
+ if (x == y && !(x->tval & (FLD|REC)) && x != nfloc)
+ ; /* self-assignment: leave alone unless it's a field or NF */
else if ((y->tval & (STR|NUM)) == (STR|NUM)) {
setsval(x, getsval(y));
x->fval = getfval(y);
@@ -1146,25 +1189,26 @@ Cell *cat(Node **a, int q) /* a[0] cat a[1] */
{
Cell *x, *y, *z;
int n1, n2;
- char *s;
+ char *s = NULL;
+ int ssz = 0;
x = execute(a[0]);
+ n1 = strlen(getsval(x));
+ adjbuf(&s, &ssz, n1 + 1, recsize, 0, "cat1");
+ (void) strncpy(s, x->sval, ssz);
+
y = execute(a[1]);
- getsval(x);
- getsval(y);
- n1 = strlen(x->sval);
- n2 = strlen(y->sval);
- s = (char *) malloc(n1 + n2 + 1);
- if (s == NULL)
- FATAL("out of space concatenating %.15s... and %.15s...",
- x->sval, y->sval);
- strcpy(s, x->sval);
- strcpy(s+n1, y->sval);
+ n2 = strlen(getsval(y));
+ adjbuf(&s, &ssz, n1 + n2 + 1, recsize, 0, "cat2");
+ (void) strncpy(s + n1, y->sval, ssz - n1);
+
tempfree(x);
tempfree(y);
+
z = gettemp();
z->sval = s;
z->tval = STR;
+
return(z);
}
@@ -1172,7 +1216,7 @@ Cell *pastat(Node **a, int n) /* a[0] { a[1] } */
{
Cell *x;
- if (a[0] == NULL)
+ if (a[0] == 0)
x = execute(a[1]);
else {
x = execute(a[0]);
@@ -1209,20 +1253,22 @@ Cell *dopa2(Node **a, int n) /* a[0], a[1] { a[2] } */
Cell *split(Node **a, int nnn) /* split(a[0], a[1], a[2]); a[3] is type */
{
- Cell *x = NULL, *y, *ap;
+ Cell *x = 0, *y, *ap;
char *s, *origs;
+ char *fs, *origfs = NULL;
int sep;
- char *t, temp, num[50], *fs = NULL;
+ char *t, temp, num[50];
int n, tempstat, arg3type;
y = execute(a[0]); /* source string */
origs = s = strdup(getsval(y));
arg3type = ptoi(a[3]);
- if (a[2] == NULL) /* fs string */
- fs = *FS;
+ if (a[2] == 0) /* fs string */
+ fs = getsval(fsloc);
else if (arg3type == STRING) { /* split(str,arr,"string") */
x = execute(a[2]);
- fs = getsval(x);
+ origfs = fs = strdup(getsval(x));
+ tempfree(x);
} else if (arg3type == REGEXPR)
fs = "(regexpr)"; /* split(str,arr,/regexpr/) */
else
@@ -1337,9 +1383,7 @@ Cell *split(Node **a, int nnn) /* split(a[0], a[1], a[2]); a[3] is type */
tempfree(ap);
tempfree(y);
free(origs);
- if (a[2] != NULL && arg3type == STRING) {
- tempfree(x);
- }
+ free(origfs);
x = gettemp();
x->tval = NUM;
x->fval = n;
@@ -1369,7 +1413,7 @@ Cell *ifstat(Node **a, int n) /* if (a[0]) a[1]; else a[2] */
if (istrue(x)) {
tempfree(x);
x = execute(a[1]);
- } else if (a[2] != NULL) {
+ } else if (a[2] != 0) {
tempfree(x);
x = execute(a[2]);
}
@@ -1421,7 +1465,7 @@ Cell *forstat(Node **a, int n) /* for (a[0]; a[1]; a[2]) a[3] */
x = execute(a[0]);
tempfree(x);
for (;;) {
- if (a[1]!=NULL) {
+ if (a[1]!=0) {
x = execute(a[1]);
if (!istrue(x)) return(x);
else tempfree(x);
@@ -1479,6 +1523,7 @@ Cell *bltin(Node **a, int n) /* builtin functions. a[0] is type, a[1] is arg lis
Node *nextarg;
FILE *fp;
void flush_all(void);
+ int status = 0;
t = ptoi(a[0]);
x = execute(a[1]);
@@ -1503,7 +1548,7 @@ Cell *bltin(Node **a, int n) /* builtin functions. a[0] is type, a[1] is arg lis
case FCOS:
u = cos(getfval(x)); break;
case FATAN:
- if (nextarg == NULL) {
+ if (nextarg == 0) {
WARNING("atan2 requires two arguments; returning 1.0");
u = 1.0;
} else {
@@ -1515,7 +1560,20 @@ Cell *bltin(Node **a, int n) /* builtin functions. a[0] is type, a[1] is arg lis
break;
case FSYSTEM:
fflush(stdout); /* in case something is buffered already */
- u = (Awkfloat) system(getsval(x)) / 256; /* 256 is unix-dep */
+ status = system(getsval(x));
+ u = status;
+ if (status != -1) {
+ if (WIFEXITED(status)) {
+ u = WEXITSTATUS(status);
+ } else if (WIFSIGNALED(status)) {
+ u = WTERMSIG(status) + 256;
+#ifdef WCOREDUMP
+ if (WCOREDUMP(status))
+ u += 256;
+#endif
+ } else /* something else?!? */
+ u = 0;
+ }
break;
case FRAND:
/* in principle, rand() returns something in 0..RAND_MAX */
@@ -1564,7 +1622,7 @@ Cell *bltin(Node **a, int n) /* builtin functions. a[0] is type, a[1] is arg lis
tempfree(x);
x = gettemp();
setfval(x, u);
- if (nextarg != NULL) {
+ if (nextarg != 0) {
WARNING("warning: function has too many arguments");
for ( ; nextarg; nextarg = nextarg->nnext)
execute(nextarg);
@@ -1578,7 +1636,7 @@ Cell *printstat(Node **a, int n) /* print a[0] */
Cell *y;
FILE *fp;
- if (a[1] == NULL) /* a[1] is redirection operator, a[2] is file */
+ if (a[1] == 0) /* a[1] is redirection operator, a[2] is file */
fp = stdout;
else
fp = redirect(ptoi(a[1]), a[2]);
@@ -1587,11 +1645,11 @@ Cell *printstat(Node **a, int n) /* print a[0] */
fputs(getpssval(y), fp);
tempfree(y);
if (x->nnext == NULL)
- fputs(*ORS, fp);
+ fputs(getsval(orsloc), fp);
else
- fputs(*OFS, fp);
+ fputs(getsval(ofsloc), fp);
}
- if (a[1] != NULL)
+ if (a[1] != 0)
fflush(fp);
if (ferror(fp))
FATAL("write error on %s", filename(fp));
@@ -1600,8 +1658,6 @@ Cell *printstat(Node **a, int n) /* print a[0] */
Cell *nullproc(Node **a, int n)
{
- n = n;
- a = a;
return 0;
}
@@ -1650,7 +1706,7 @@ FILE *openfile(int a, const char *us)
{
const char *s = us;
int i, m;
- FILE *fp = NULL;
+ FILE *fp = 0;
if (*s == '\0')
FATAL("null file name in print or getline");
@@ -1665,7 +1721,7 @@ FILE *openfile(int a, const char *us)
return NULL;
for (i=0; i < nfiles; i++)
- if (files[i].fp == NULL)
+ if (files[i].fp == 0)
break;
if (i >= nfiles) {
struct files *nf;
@@ -1715,7 +1771,6 @@ Cell *closefile(Node **a, int n)
Cell *x;
int i, stat;
- n = n;
x = execute(a[0]);
getsval(x);
stat = -1;
@@ -1782,7 +1837,7 @@ Cell *sub(Node **a, int nnn) /* substitute command */
FATAL("out of memory in sub");
x = execute(a[3]); /* target string */
t = getsval(x);
- if (a[0] == NULL) /* 0 => a[1] is already-compiled regexpr */
+ if (a[0] == 0) /* 0 => a[1] is already-compiled regexpr */
pfa = (fa *) a[1]; /* regular expression */
else {
y = execute(a[1]);
@@ -1822,7 +1877,7 @@ Cell *sub(Node **a, int nnn) /* substitute command */
if (pb > buf + bufsz)
FATAL("sub result2 %.30s too big; can't happen", buf);
setsval(x, buf); /* BUG: should be able to avoid copy */
- result = True;
+ result = True;;
}
tempfree(x);
tempfree(y);
@@ -1845,7 +1900,7 @@ Cell *gsub(Node **a, int nnn) /* global substitute */
num = 0;
x = execute(a[3]); /* target string */
t = getsval(x);
- if (a[0] == NULL) /* 0 => a[1] is already-compiled regexpr */
+ if (a[0] == 0) /* 0 => a[1] is already-compiled regexpr */
pfa = (fa *) a[1]; /* regular expression */
else {
y = execute(a[1]);
diff --git a/tran.c b/tran.c
index e364ebd81928..d1dfe2b2f176 100644
--- a/tran.c
+++ b/tran.c
@@ -55,10 +55,14 @@ Cell *fsloc; /* FS */
Cell *nrloc; /* NR */
Cell *nfloc; /* NF */
Cell *fnrloc; /* FNR */
+Cell *ofsloc; /* OFS */
+Cell *orsloc; /* ORS */
+Cell *rsloc; /* RS */
Array *ARGVtab; /* symbol table containing ARGV[...] */
Array *ENVtab; /* symbol table containing ENVIRON[...] */
Cell *rstartloc; /* RSTART */
Cell *rlengthloc; /* RLENGTH */
+Cell *subseploc; /* SUBSEP */
Cell *symtabloc; /* SYMTAB */
Cell *nullloc; /* a guaranteed empty cell */
@@ -67,6 +71,18 @@ Cell *literal0;
extern Cell **fldtab;
+static void
+setfree(Cell *vp)
+{
+ if (&vp->sval == FS || &vp->sval == RS ||
+ &vp->sval == OFS || &vp->sval == ORS ||
+ &vp->sval == OFMT || &vp->sval == CONVFMT ||
+ &vp->sval == FILENAME || &vp->sval == SUBSEP)
+ vp->tval |= DONTFREE;
+ else
+ vp->tval &= ~DONTFREE;
+}
+
void syminit(void) /* initialize symbol table with builtin vars */
{
literal0 = setsymtab("0", "0", 0.0, NUM|STR|CON|DONTFREE, symtab);
@@ -76,9 +92,12 @@ void syminit(void) /* initialize symbol table with builtin vars */
fsloc = setsymtab("FS", " ", 0.0, STR|DONTFREE, symtab);
FS = &fsloc->sval;
- RS = &setsymtab("RS", "\n", 0.0, STR|DONTFREE, symtab)->sval;
- OFS = &setsymtab("OFS", " ", 0.0, STR|DONTFREE, symtab)->sval;
- ORS = &setsymtab("ORS", "\n", 0.0, STR|DONTFREE, symtab)->sval;
+ rsloc = setsymtab("RS", "\n", 0.0, STR|DONTFREE, symtab);
+ RS = &rsloc->sval;
+ ofsloc = setsymtab("OFS", " ", 0.0, STR|DONTFREE, symtab);
+ OFS = &ofsloc->sval;
+ orsloc = setsymtab("ORS", "\n", 0.0, STR|DONTFREE, symtab);
+ ORS = &orsloc->sval;
OFMT = &setsymtab("OFMT", "%.6g", 0.0, STR|DONTFREE, symtab)->sval;
CONVFMT = &setsymtab("CONVFMT", "%.6g", 0.0, STR|DONTFREE, symtab)->sval;
FILENAME = &setsymtab("FILENAME", "", 0.0, STR|DONTFREE, symtab)->sval;
@@ -88,7 +107,8 @@ void syminit(void) /* initialize symbol table with builtin vars */
NR = &nrloc->fval;
fnrloc = setsymtab("FNR", "", 0.0, NUM, symtab);
FNR = &fnrloc->fval;
- SUBSEP = &setsymtab("SUBSEP", "\034", 0.0, STR|DONTFREE, symtab)->sval;
+ subseploc = setsymtab("SUBSEP", "\034", 0.0, STR|DONTFREE, symtab);
+ SUBSEP = &subseploc->sval;
rstartloc = setsymtab("RSTART", "", 0.0, NUM, symtab);
RSTART = &rstartloc->fval;
rlengthloc = setsymtab("RLENGTH", "", 0.0, NUM, symtab);
@@ -174,7 +194,7 @@ void freesymtab(Cell *ap) /* free a symbol table */
free(cp);
tp->nelem--;
}
- tp->tab[i] = NULL;
+ tp->tab[i] = 0;
}
if (tp->nelem != 0)
WARNING("can't happen: inconsistent element count freeing %s", ap->nval);
@@ -282,6 +302,7 @@ Awkfloat setfval(Cell *vp, Awkfloat f) /* set float val of a Cell */
{
int fldno;
+ f += 0.0; /* normalise negative zero to positive zero */
if ((vp->tval & (NUM | STR)) == 0)
funnyvar(vp, "assign to");
if (isfld(vp)) {
@@ -290,13 +311,21 @@ Awkfloat setfval(Cell *vp, Awkfloat f) /* set float val of a Cell */
if (fldno > *NF)
newfld(fldno);
dprintf( ("setting field %d to %g\n", fldno, f) );
+ } else if (&vp->fval == NF) {
+ donerec = 0; /* mark $0 invalid */
+ setlastfld(f);
+ dprintf( ("setting NF to %g\n", f) );
} else if (isrec(vp)) {
donefld = 0; /* mark $1... invalid */
donerec = 1;
+ } else if (vp == ofsloc) {
+ if (donerec == 0)
+ recbld();
}
if (freeable(vp))
xfree(vp->sval); /* free any previous string */
- vp->tval &= ~STR; /* mark string invalid */
+ vp->tval &= ~(STR|CONVC|CONVO); /* mark string invalid */
+ vp->fmt = NULL;
vp->tval |= NUM; /* mark number ok */
if (f == -0) /* who would have thought this possible? */
f = 0;
@@ -318,6 +347,7 @@ char *setsval(Cell *vp, const char *s) /* set string val of a Cell */
{
char *t;
int fldno;
+ Awkfloat f;
dprintf( ("starting setsval %p: %s = \"%s\", t=%o, r,f=%d,%d\n",
(void*)vp, NN(vp->nval), s, vp->tval, donerec, donefld) );
@@ -328,20 +358,32 @@ char *setsval(Cell *vp, const char *s) /* set string val of a Cell */
fldno = atoi(vp->nval);
if (fldno > *NF)
newfld(fldno);
- dprintf( ("setting field %d to %s (%p)\n", fldno, s, s) );
+ dprintf( ("setting field %d to %s (%p)\n", fldno, s, (void *) s) );
} else if (isrec(vp)) {
donefld = 0; /* mark $1... invalid */
donerec = 1;
+ } else if (vp == ofsloc) {
+ if (donerec == 0)
+ recbld();
}
- t = tostring(s); /* in case it's self-assign */
+ t = s ? tostring(s) : tostring(""); /* in case it's self-assign */
if (freeable(vp))
xfree(vp->sval);
- vp->tval &= ~NUM;
+ vp->tval &= ~(NUM|CONVC|CONVO);
vp->tval |= STR;
- vp->tval &= ~DONTFREE;
+ vp->fmt = NULL;
+ setfree(vp);
dprintf( ("setsval %p: %s = \"%s (%p) \", t=%o r,f=%d,%d\n",
- (void*)vp, NN(vp->nval), t,t, vp->tval, donerec, donefld) );
- return(vp->sval = t);
+ (void*)vp, NN(vp->nval), t, (void *) t, vp->tval, donerec, donefld) );
+ vp->sval = t;
+ if (&vp->fval == NF) {
+ donerec = 0; /* mark $0 invalid */
+ f = getfval(vp);
+ setlastfld(f);
+ dprintf( ("setting NF to %g\n", f) );
+ }
+
+ return(vp->sval);
}
Awkfloat getfval(Cell *vp) /* get float val of a Cell */
@@ -364,7 +406,7 @@ Awkfloat getfval(Cell *vp) /* get float val of a Cell */
static char *get_str_val(Cell *vp, char **fmt) /* get string val of a Cell */
{
- char s[100]; /* BUG: unchecked */
+ char s[256];
double dtemp;
if ((vp->tval & (NUM | STR)) == 0)
@@ -373,19 +415,80 @@ static char *get_str_val(Cell *vp, char **fmt) /* get string val of a Cel
fldbld();
else if (isrec(vp) && donerec == 0)
recbld();
+
+ /*
+ * ADR: This is complicated and more fragile than is desirable.
+ * Retrieving a string value for a number associates the string
+ * value with the scalar. Previously, the string value was
+ * sticky, meaning if converted via OFMT that became the value
+ * (even though POSIX wants it to be via CONVFMT). Or if CONVFMT
+ * changed after a string value was retrieved, the original value
+ * was maintained and used. Also not per POSIX.
+ *
+ * We work around this design by adding two additional flags,
+ * CONVC and CONVO, indicating how the string value was
+ * obtained (via CONVFMT or OFMT) and _also_ maintaining a copy
+ * of the pointer to the xFMT format string used for the
+ * conversion. This pointer is only read, **never** dereferenced.
+ * The next time we do a conversion, if it's coming from the same
+ * xFMT as last time, and the pointer value is different, we
+ * know that the xFMT format string changed, and we need to
+ * redo the conversion. If it's the same, we don't have to.
+ *
+ * There are also several cases where we don't do a conversion,
+ * such as for a field (see the checks below).
+ */
+
+ /* Don't duplicate the code for actually updating the value */
+#define update_str_val(vp) \
+ { \
+ if (freeable(vp)) \
+ xfree(vp->sval); \
+ if (modf(vp->fval, &dtemp) == 0) /* it's integral */ \
+ snprintf(s, sizeof (s), "%.30g", vp->fval); \
+ else \
+ snprintf(s, sizeof (s), *fmt, vp->fval); \
+ vp->sval = tostring(s); \
+ vp->tval &= ~DONTFREE; \
+ vp->tval |= STR; \
+ }
+
if (isstr(vp) == 0) {
- if (freeable(vp))
- xfree(vp->sval);
- if (modf(vp->fval, &dtemp) == 0) /* it's integral */
- sprintf(s, "%.30g", vp->fval);
- else
- sprintf(s, *fmt, vp->fval);
- vp->sval = tostring(s);
- vp->tval &= ~DONTFREE;
- vp->tval |= STR;
+ update_str_val(vp);
+ if (fmt == OFMT) {
+ vp->tval &= ~CONVC;
+ vp->tval |= CONVO;
+ } else {
+ /* CONVFMT */
+ vp->tval &= ~CONVO;
+ vp->tval |= CONVC;
+ }
+ vp->fmt = *fmt;
+ } else if ((vp->tval & DONTFREE) != 0 || ! isnum(vp) || isfld(vp)) {
+ goto done;
+ } else if (isstr(vp)) {
+ if (fmt == OFMT) {
+ if ((vp->tval & CONVC) != 0
+ || ((vp->tval & CONVO) != 0 && vp->fmt != *fmt)) {
+ update_str_val(vp);
+ vp->tval &= ~CONVC;
+ vp->tval |= CONVO;
+ vp->fmt = *fmt;
+ }
+ } else {
+ /* CONVFMT */
+ if ((vp->tval & CONVO) != 0
+ || ((vp->tval & CONVC) != 0 && vp->fmt != *fmt)) {
+ update_str_val(vp);
+ vp->tval &= ~CONVO;
+ vp->tval |= CONVC;
+ vp->fmt = *fmt;
+ }
+ }
}
+done:
dprintf( ("getsval %p: %s = \"%s (%p)\", t=%o\n",
- (void*)vp, NN(vp->nval), vp->sval, vp->sval, vp->tval) );
+ (void*)vp, NN(vp->nval), vp->sval, (void *) vp->sval, vp->tval) );
return(vp->sval);
}
@@ -457,3 +560,37 @@ char *qstring(const char *is, int delim) /* collect string up to next delim */
*bp++ = 0;
return (char *) buf;
}
+
+const char *flags2str(int flags)
+{
+ static const struct ftab {
+ const char *name;
+ int value;
+ } flagtab[] = {
+ { "NUM", NUM },
+ { "STR", STR },
+ { "DONTFREE", DONTFREE },
+ { "CON", CON },
+ { "ARR", ARR },
+ { "FCN", FCN },
+ { "FLD", FLD },
+ { "REC", REC },
+ { "CONVC", CONVC },
+ { "CONVO", CONVO },
+ { NULL, 0 }
+ };
+ static char buf[100];
+ int i;
+ char *cp = buf;
+
+ for (i = 0; flagtab[i].name != NULL; i++) {
+ if ((flags & flagtab[i].value) != 0) {
+ if (cp > buf)
+ *cp++ = '|';
+ strcpy(cp, flagtab[i].name);
+ cp += strlen(cp);
+ }
+ }
+
+ return buf;
+}