aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorWarner Losh <imp@FreeBSD.org>2021-07-07 23:03:17 +0000
committerWarner Losh <imp@FreeBSD.org>2021-07-07 23:09:41 +0000
commit746b7396bb3e85208573892a0f314e0b4e7dacf4 (patch)
treec14cfbfbe60fa840441df8e856727eba63075d4e
parent03ee4d05f1d963d60451e04ce505e4da116300db (diff)
downloadsrc-vendor/one-true-awk.tar.gz
src-vendor/one-true-awk.zip
one-true-awk: import 20210221 (1e4bc42c53a1) which fixes a number of bugsvendor/one-true-awk/1e4bc42c53a1vendor/one-true-awk
Import the latest bsd-features branch of the one-true-awk upstream: o Move to bison for $YACC o Set close-on-exec flag for file and pipe redirects that aren't std* o lots of little fixes to modernize ocde base o free sval member before setting it o fix a bug where a{0,3} could match aaaa o pull in systime and strftime from NetBSD awk o pull in fixes from {Net,Free,Open}BSD o add BSD extensions and, or, xor, compl, lsheift, rshift Sponsored by: Netflix
-rw-r--r--ChangeLog108
-rw-r--r--FIXES261
-rwxr-xr-xREGRESS4
-rw-r--r--awk.1151
-rw-r--r--awk.h61
-rw-r--r--awkgram.y48
-rw-r--r--b.c469
-rw-r--r--bugs-fixed/missing-precision.ok2
-rw-r--r--bugs-fixed/negative-nf.ok2
-rw-r--r--lex.c100
-rw-r--r--lib.c328
-rw-r--r--main.c165
-rw-r--r--makefile74
-rw-r--r--maketab.c66
-rw-r--r--parse.c31
-rw-r--r--proctab.c202
-rw-r--r--proto.h41
-rw-r--r--run.c982
-rw-r--r--tran.c166
19 files changed, 2272 insertions, 989 deletions
diff --git a/ChangeLog b/ChangeLog
index fd03b2bbca0b..dea4ed7e3187 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,111 @@
+2020-07-30 Arnold D. Robbins <arnold@skeeve.com>
+
+ By fiat, we use bison for $(YACC). Trying to accommodate
+ different versions didn't work.
+
+ * makefile: Significant cleanup. Replace all ytab* references
+ with awkgram.tab.* and simplify definition of YACC.
+ * .gitignore: Remove ytab* references.
+ * b.c, lex.c, maketab.c, parse.c, run.c: Replace include of ytab.h
+ with awkgram.tab.h.
+ * lib.c, main.c, tran.c: Remove include of ytab.h, wasn't needed.
+
+2020-01-20 Arnold D. Robbins <arnold@skeeve.com>
+
+ * run.c (openfile): Set the close-on-exec flag for file
+ and pipe redirections that aren't stdin/stdout/stderr.
+
+2020-01-06 Arnold D. Robbins <arnold@skeeve.com>
+
+ Minor fixes.
+ * b.c (replace_repeat): Turn init_q back into an int.
+ * lex.c (string): Use \a instead of \007.
+ * tran.c (catstr): Use snprintf instead of sprintf.
+
+2020-01-01 Arnold D. Robbins <arnold@skeeve.com>
+
+ * tran.c (syminit, arginit, envinit): Free sval member before
+ setting it. Thanks to valgrind.
+ * b.c: Small formatting cleanups in several routines.
+
+2019-12-27 Arnold D. Robbins <arnold@skeeve.com>
+
+ * b.c (replace_repeat): Fix a bug whereby a{0,3} could match
+ four a's. Thanks to Anonymous AWK fan <awkfan77@mailfence.com>
+ for the report. Also, minor code formatting cleanups.
+ * testdir/T.int-expr: New file.
+
+2019-12-11 Arnold D. Robbins <arnold@skeeve.com>
+
+ * README: Renamed to ...
+ * README.md: ... this. Cleaned up some as well,
+ including moving to Markdown.
+
+2019-11-08 Arnold D. Robbins <arnold@skeeve.com>
+
+ * test/T.chem: Use $oldawk instead of hardwiring 'awk'.
+ * test/T.lilly: Remove gawk warnings from output, improves
+ portability.
+
+2019-10-17 Arnold D. Robbins <arnold@skeeve.com>
+
+ Pull in systime() and strftime() from the NetBSD awk.
+
+ * awk.1: Document the functions.
+ * run.c (bltin): Implement the functions.
+ * awk.h: Add defines for systime and strftime.
+ * lex.c: Add support for systime and strftime.
+
+2019-10-07 Arnold D. Robbins <arnold@skeeve.com>
+
+ Integrate features from different *BSD versions of awk.
+ Gensub support from NetBSD. Bitwise functions from OpenBSD.
+
+ * awk.h: Add defines for and, or, xor, compl, lshift and rshift.
+ * awkgram.y: Add support for gensub.
+ * maketab.c: Ditto.
+ * lex.c: Add support for gensub and bitwise functions.
+ * parse.c (node5, op5): New functions.
+ * proto.h (node5, op5): New declarations.
+ * run.c (bltin): Implement the bitwise functions.
+ (gensub): New function.
+ * awk.1: Document additional functions.
+
+2019-10-07 Arnold D. Robbins <arnold@skeeve.com>
+
+ * b.c (fnematch): Change type of pbuf from unsigned char to char.
+ * proto.h (fnematch): Ditto.
+
+2019-10-06 Arnold D. Robbins <arnold@skeeve.com>
+
+ * lib.c (readrec): Allow RS a regular expression. Imported
+ the code from the NetBSD awk.
+ * b.c (fnematch): New function for implementing the feature.
+ * awk.1: Updated.
+ * main.c (version): Updated.
+
+2019-06-24 Arnold D. Robbins <arnold@skeeve.com>
+
+ * makefile: Revise to take into account there is no more awktest.tar,
+ add targets 'check' and 'test', and also 'testclean' to clean up
+ after test run. Have 'clean' and 'cleaner' depend upon 'testclean'.
+
+2019-06-23 Arnold D. Robbins <arnold@skeeve.com>
+
+ * testdir: Extracted from awktest.tar and added to Git.
+ * awktest.tar: Removed.
+
+2019-06-06 Arnold D. Robbins <arnold@skeeve.com>
+
+ * awk.1: Fix a typo, minor edits.
+
+2019-06-05 Arnold D. Robbins <arnold@skeeve.com>
+
+ * b.c (relex): Count parentheses and treat umatched right paren
+ as a literal character.
+ * awktest.tar (testdir/T.re): Added a test case.
+ * main.c (version): Updated.
+
2019-05-29 Arnold D. Robbins <arnold@skeeve.com>
* lib.c (isclvar): Remove check for additional '=' after
diff --git a/FIXES b/FIXES
index 183eaedee47d..516458eee0c1 100644
--- a/FIXES
+++ b/FIXES
@@ -25,6 +25,229 @@ THIS SOFTWARE.
This file lists all bug fixes, changes, etc., made since the AWK book
was sent to the printers in August, 1987.
+February 15, 2021:
+ Small fix so that awk will compile again with g++. Thanks to
+ Arnold Robbins.
+
+January 06, 2021:
+ Fix a decision bug with trailing stuff in lib.c:is_valid_number
+ after recent changes. Thanks to Ozan Yigit.
+
+December 18, 2020:
+ Fix problems converting inf and NaN values in lib.c:is_valid_number.
+ Enhance number to string conversion to do the right thing for
+ NaN and inf values. Things are now pretty much the same as in
+ gawk. (Found a gawk bug while we're at it.) Added a torture
+ test for these values. Thanks to Arnold Robbins. Allows closing
+ of PR #101.
+
+December 15, 2020:
+ Merge PR #99, which gets the right header for strcasecmp.
+ Thanks to GitHub user michaelforney.
+
+December 8, 2020:
+ Merge PR #98: Disallow hex data. Allow only +nan, -nan,
+ +inf, -inf (case independent) to give NaN and infinity values.
+ Improve things so that string to double conversion is only
+ done once, yielding something of a speedup. This obviate
+ PR #95. Thanks to Arnold Robbins.
+
+December 3, 2020:
+ Fix to argument parsing to avoid printing spurious newlines.
+ Thanks to Todd Miller. Merges PR #97.
+
+October 13, 2020:
+ Add casts before all the calls to malloc/calloc/realloc in order
+ to get it to compile with g++. Thanks to Arnold Robbins.
+
+August 16, 2020:
+ Additional fixes for DJGPP. Thanks to Eli Zaretskii for
+ the testing.
+
+August 7, 2020:
+ Merge PR #93, which adds casts to (void*) for debug prints
+ using the %p format specifier. Thanks to GitHub user YongHaoWu
+ ("Chris") for the fixes.
+
+August 4, 2020:
+ In run.c, use non-restartable multibyte routines to attain
+ portability to DJGPP. Should fix Issue 92. Thanks to Albert Wik
+ for the report and to Todd Miller for the suggested fix.
+
+July 30, 2020:
+ Merge PRs 88-91 which fix small bugs. Thanks to Todd Miller and
+ Tim van der Molen for the fixes.
+
+ In order to make life easier, we move exclusively to bison
+ as the parser generator.
+
+July 2, 2020:
+ Merge PRs 85 and 86 which fix regressions. Thanks to
+ Tim van der Molen for the fixes.
+
+June 25, 2020:
+ Merge PRs 82 and 84. The latter fixes issue #83. Thanks to
+ Todd Miller and awkfan77.
+
+June 12, 2020:
+ Clear errno before calling errcheck to avoid any spurious errors
+ left over from previous calls that may have set it. Thanks to
+ Todd Miller for the fix, from PR #80.
+
+ Fix Issue #78 by allowing \r to follow floating point numbers in
+ lib.c:is_number. Thanks to GitHub user ajcarr for the report
+ and to Arnold Robbins for the fix.
+
+June 5, 2020:
+ In fldbld(), make sure that inputFS is set before trying to
+ use it. Thanks to Steffen Nurpmeso <steffen@sdaoden.eu>
+ for the report.
+
+May 5, 2020:
+ Fix checks for compilers that can handle noreturn. Thanks to
+ GitHub user enh-google for pointing it out. Closes Issue #79.
+
+April 16, 2020:
+ Handle old compilers that don't support C11 (for noreturn).
+ Thanks to Arnold Robbins.
+
+April 5, 2020:
+ Use <stdnoreturn.h> and noreturn instead of GCC attributes.
+ Thanks to GitHub user awkfan77. Closes PR #77.
+
+February 28, 2020:
+ More cleanups from Christos Zoulas: notably backslash continuation
+ inside strings removes the newline and a fix for RS = "^a".
+ Fix for address sanitizer-found problem. Thanks to GitHub user
+ enh-google.
+
+February 19, 2020:
+ More small cleanups from Christos Zoulas.
+
+February 18, 2020:
+ Additional cleanups from Christos Zoulas. It's no longer necessary
+ to use the -y flag to bison.
+
+February 6, 2020:
+ Additional small cleanups from Christos Zoulas. awk is now
+ a little more robust about reporting I/O errors upon exit.
+
+January 31, 2020:
+ Merge PR #70, which avoids use of variable length arrays. Thanks
+ to GitHub user michaelforney. Fix issue #60 ({0} in interval
+ expressions doesn't work). Also get all tests working again.
+ Thanks to Arnold Robbins.
+
+January 24, 2020:
+ A number of small cleanups from Christos Zoulas. Add the close
+ on exec flag to files/pipes opened for redirection; courtesy of
+ Arnold Robbins.
+
+January 19, 2020:
+ If POSIXLY_CORRECT is set in the environment, then sub and gsub
+ use POSIX rules for multiple backslashes. This fixes Issue #66,
+ while maintaining backwards compatibility.
+
+January 9, 2020:
+ Input/output errors on closing files are now fatal instead of
+ mere warnings. Thanks to Martijn Dekker <martijn@inlv.org>.
+
+January 5, 2020:
+ Fix a bug in the concatentation of two string constants into
+ one done in the grammar. Fixes GitHub issue #61. Thanks
+ to GitHub user awkfan77 for pointing out the direction for
+ the fix. New test T.concat added to the test suite.
+ Fix a few memory leaks reported by valgrind, as well.
+
+December 27, 2019:
+ Fix a bug whereby a{0,3} could match four a's. Thanks to
+ "Anonymous AWK fan" for the report.
+
+December 11, 2019:
+ Further printf-related fixes for 32 bit systems.
+ Thanks again to Christos Zoulas.
+
+December 8, 2019:
+ Fix the return value of sprintf("%d") on 32 bit systems.
+ Thanks to Jim Lowe for the report and to Christos Zoulas
+ for the fix.
+
+November 10, 2019:
+ Convert a number of Boolean integer variables into
+ actual bools. Convert compile_time variable into an
+ enum and simplify some of the related code. Thanks
+ to Arnold Robbins.
+
+November 8, 2019:
+ Fix from Ori Bernstein to get UTF-8 characters instead of
+ bytes when FS = "". This is currently the only bit of
+ the One True Awk that understands multibyte characters.
+ From Arnold Robbins, apply some cleanups in the test suite.
+
+October 25, 2019:
+ More fixes and cleanups from NetBSD, courtesy of Christos
+ Zoulas. Merges PRs 54 and 55.
+
+October 24, 2019:
+ Import second round of code cleanups from NetBSD. Much thanks
+ to Christos Zoulas (GitHub user zoulasc). Merges PR 53.
+ Add an optimization for string concatenation, also from
+ Christos.
+
+October 17, 2019:
+ Import code cleanups from NetBSD. Much thanks to Christos
+ Zoulas (GitHub user zoulasc). Merges PR 51.
+
+October 6, 2019:
+ Import code from NetBSD awk that implements RS as a regular
+ expression.
+
+September 10, 2019:
+ Fixes for various array / memory overruns found via gcc's
+ -fsanitize=unknown. Thanks to Alexander Richardson (GitHub
+ user arichardson). Merges PRs 47 and 48.
+
+July 28, 2019:
+ Import grammar optimization from NetBSD: Two string constants
+ concatenated together get turned into a single string.
+
+July 26, 2019:
+ Support POSIX-specified C-style escape sequences "\a" (alarm)
+ and "\v" (vertical tab) in command line arguments and regular
+ expressions, further to the support for them in strings added on
+ Apr 9, 1989. These now no longer match as literal "a" and "v"
+ characters (as they don't on other awk implementations).
+ Thanks to Martijn Dekker.
+
+July 17, 2019:
+ Pull in a number of code cleanups and minor fixes from
+ Warner Losh's bsd-ota branch. The only user visible change
+ is the use of random(3) as the random number generator.
+ Thanks to Warner Losh for collecting all these fixes in
+ one easy place to get them from.
+
+July 16, 2019:
+ Fix field splitting to use FS value as of the time a record
+ was read or assigned to. Thanks to GitHub user Cody Mello (melloc)
+ for the fix. (Merged from his branch, via PR #42.) Updated
+ testdir/T.split per said PR as well.
+
+June 24, 2019:
+ Extract awktest.tar into testdir directory. Add some very
+ simple mechanics to the makefile for running the tests and
+ for cleaning up. No changes to awk itself.
+
+June 17, 2019:
+ Disallow deleting SYMTAB and its elements, which creates
+ use-after-free bugs. Thanks to GitHub user Cody Mello (melloc)
+ for the fix. (Merged from PR #43.)
+
+June 5, 2019:
+ Allow unmatched right parenthesis in a regular expression to
+ be treated literally. Fixes Issue #40. Thanks to GitHub user
+ Warner Losh (bsdimp) for the report. Thanks to Arnold Robbins
+ for the fix.
+
May 29,2019:
Fix check for command line arguments to no longer require that
first character after '=' not be another '='. Reverts change of
@@ -34,7 +257,7 @@ May 29,2019:
Apr 7, 2019:
Update awktest.tar(p.50) to use modern options to sort. Needed
for Android development. Thanks to GitHub user mohd-akram (Mohamed
- Akram). From Comment #33.
+ Akram). From Issue #33.
Mar 12, 2019:
Added very simplistic support for cross-compiling in the
@@ -54,7 +277,7 @@ Mar 3, 2019:
#12: Avoid undefined behaviour when using ctype(3) functions in
relex(). Thanks to GitHub user iamleot.
#31: Make getline handle numeric strings, and update FIXES. Thanks
- to GitHub user arnoldrobbins
+ to GitHub user arnoldrobbins.
#32: maketab: support build systems with read-only source. Thanks
to GitHub user enh.
@@ -159,10 +382,10 @@ Jun 12, 2011:
/pat/, \n /pat/ {...} is now legal, though bad style to use.
added checks to new -v code that permits -vnospace; thanks to
- ruslan ermilov for spotting this and providing the patch.
+ ruslan ermilov for spotting this and providing the patch.
removed fixed limit on number of open files; thanks to aleksey
- cheusov and christos zoulos.
+ cheusov and christos zoulos.
fixed day 1 bug that resurrected deleted elements of ARGV when
used as filenames (in lib.c).
@@ -180,10 +403,10 @@ May 1, 2011:
and arnold robbins, changed srand() to return the previous
seed (which is 1 on the first call of srand). the seed is
an Awkfloat internally though converted to unsigned int to
- pass to the library srand(). thanks, everyone.
+ pass to the library srand(). thanks, everyone.
fixed a subtle (and i hope low-probability) overflow error
- in fldbld, by adding space for one extra \0. thanks to
+ in fldbld, by adding space for one extra \0. thanks to
robert bassett for spotting this one and providing a fix.
removed the files related to compilation on windows. i no
@@ -220,7 +443,7 @@ Oct 8, 2008:
Oct 23, 2007:
minor fix in lib.c: increase inputFS to 100, change malloc
- for fields to n+1.
+ for fields to n+1.
fixed memory fault caused by out of order test in setsval.
@@ -267,7 +490,7 @@ Jan 17, 2006:
core dump on linux with BEGIN {nextfile}, now fixed.
- removed some #ifdef's in run.c and lex.c that appear to no
+ removed some #ifdef's in run.c and lex.c that appear to no
longer be necessary.
Apr 24, 2005:
@@ -281,8 +504,8 @@ Jan 14, 2005:
rethinking it.
Dec 31, 2004:
- prevent overflow of -f array in main, head off potential error in
- call of SYNTAX(), test malloc return in lib.c, all with thanks to
+ prevent overflow of -f array in main, head off potential error in
+ call of SYNTAX(), test malloc return in lib.c, all with thanks to
todd miller.
Dec 22, 2004:
@@ -310,8 +533,8 @@ Nov 22, 2003:
code known to man.
fixed a storage leak in call() that appears to have been there since
- 1983 or so -- a function without an explicit return that assigns a
- string to a parameter leaked a Cell. thanks to moinak ghosh for
+ 1983 or so -- a function without an explicit return that assigns a
+ string to a parameter leaked a Cell. thanks to moinak ghosh for
spotting this very subtle one.
Jul 31, 2003:
@@ -333,7 +556,7 @@ Jul 28, 2003:
radix character in programs and command line arguments regardless of
the locale; otherwise, the locale should prevail for input and output
of numbers. so it's intended to work that way.
-
+
i have rescinded the attempt to use strcoll in expanding shorthands in
regular expressions (cclenter). its properties are much too
surprising; for example [a-c] matches aAbBc in locale en_US but abBcC
@@ -397,7 +620,7 @@ Nov 29, 2002:
Jun 28, 2002:
modified run/format() and tran/getsval() to do a slightly better
job on using OFMT for output from print and CONVFMT for other
- number->string conversions, as promised by posix and done by
+ number->string conversions, as promised by posix and done by
gawk and mawk. there are still places where it doesn't work
right if CONVFMT is changed; by then the STR attribute of the
variable has been irrevocably set. thanks to arnold robbins for
@@ -429,7 +652,7 @@ Feb 10, 2002:
Jan 1, 2002:
fflush() or fflush("") flushes all files and pipes.
- length(arrayname) returns number of elements; thanks to
+ length(arrayname) returns number of elements; thanks to
arnold robbins for suggestion.
added a makefile.win to make it easier to build on windows.
@@ -479,7 +702,7 @@ July 5, 2000:
May 25, 2000:
yet another attempt at making 8-bit input work, with another
- band-aid in b.c (member()), and some (uschar) casts to head
+ band-aid in b.c (member()), and some (uschar) casts to head
off potential errors in subscripts (like isdigit). also
changed HAT to NCHARS-2. thanks again to santiago vila.
@@ -526,7 +749,7 @@ Apr 21, 1999:
the test case.)
Apr 16, 1999:
- with code kindly provided by Bruce Lilly, awk now parses
+ with code kindly provided by Bruce Lilly, awk now parses
/=/ and similar constructs more sensibly in more places.
Bruce also provided some helpful test cases.
@@ -583,7 +806,7 @@ Jan 13, 1999:
Oct 19, 1998:
fixed a couple of bugs in getrec: could fail to update $0
- after a getline var; because inputFS wasn't initialized,
+ after a getline var; because inputFS wasn't initialized,
could split $0 on every character, a misleading diversion.
fixed caching bug in makedfa: LRU was actually removing
@@ -731,7 +954,7 @@ May 2, 1996:
input file. (thanks to arnold robbins for inspiration and code).
small fixes to regexpr code: can now handle []], [[], and
- variants; [] is now a syntax error, rather than matching
+ variants; [] is now a syntax error, rather than matching
everything; [z-a] is now empty, not z. far from complete
or correct, however. (thanks to jeffrey friedl for pointing out
some awful behaviors.)
diff --git a/REGRESS b/REGRESS
index 7d3ded69d536..eb3b5d7ac70b 100755
--- a/REGRESS
+++ b/REGRESS
@@ -33,3 +33,7 @@ then
fi
REGRESS
+
+cd ..
+cd bugs-fixed
+REGRESS
diff --git a/awk.1 b/awk.1
index 18e99ad39496..b3698eb07d1a 100644
--- a/awk.1
+++ b/awk.1
@@ -7,6 +7,10 @@
.fi
.ft 1
..
+.de TF
+.IP "" "\w'\fB\\$1\ \ \fP'u"
+.PD 0
+..
.TH AWK 1
.CT 1 files prog_other
.SH NAME
@@ -48,7 +52,7 @@ matches the pattern.
Each line is matched against the
pattern portion of every pattern-action statement;
the associated action is performed for each matched pattern.
-The file name
+The file name
.B \-
means the standard input.
Any
@@ -90,7 +94,7 @@ A pattern-action statement has the form:
.IP
.IB pattern " { " action " }
.PP
-A missing
+A missing
.BI { " action " }
means print the line;
a missing pattern always matches.
@@ -209,7 +213,7 @@ or length of
if no argument.
.TP
.B rand
-random number on (0,1)
+random number on [0,1).
.TP
.B srand
sets seed for
@@ -217,7 +221,7 @@ sets seed for
and returns the previous seed.
.TP
.B int
-truncates to an integer value
+truncates to an integer value.
.TP
\fBsubstr(\fIs\fB, \fIm\fR [\fB, \fIn\^\fR]\fB)\fR
the
@@ -225,12 +229,11 @@ the
substring of
.I s
that begins at position
-.I m
+.I m
counted from 1.
If no
-.IR m ,
-use the rest of the string
-.I
+.IR n ,
+use the rest of the string.
.TP
.BI index( s , " t" )
the position in
@@ -294,6 +297,25 @@ and
.B gsub
return the number of replacements.
.TP
+\fBgensub(\fIpat\fB, \fIrepl\fB, \fIhow\fR [\fB, \fItarget\fR]\fB)\fR
+replaces instances of
+.I pat
+in
+.I target
+with
+.IR repl .
+If
+.I how
+is \fB"g"\fR or \fB"G"\fR, do so globally. Otherwise,
+.I how
+is a number indicating which occurrence to replace. If no
+.IR target ,
+use
+.BR $0 .
+Return the resulting string;
+.I target
+is not modified.
+.TP
.BI sprintf( fmt , " expr" , " ...\fB)
the string resulting from formatting
.I expr ...
@@ -302,13 +324,35 @@ according to the
format
.IR fmt .
.TP
+.B systime()
+returns the current date and time as a standard
+``seconds since the epoch'' value.
+.TP
+.BI strftime( fmt ", " timestamp\^ )
+formats
+.I timestamp
+(a value in seconds since the epoch)
+according to
+.IR fmt ,
+which is a format string as supported by
+.IR strftime (3).
+Both
+.I timestamp
+and
+.I fmt
+may be omitted; if no
+.IR timestamp ,
+the current time of day is used, and if no
+.IR fmt ,
+a default format of \fB"%a %b %e %H:%M:%S %Z %Y"\fR is used.
+.TP
.BI system( cmd )
executes
.I cmd
and returns its exit status. This will be \-1 upon error,
.IR cmd 's
exit status upon a normal exit,
-256 +
+256 +
.I sig
upon death-by-signal, where
.I sig
@@ -361,13 +405,24 @@ In all cases,
returns 1 for a successful input,
0 for end of file, and \-1 for an error.
.PP
+The functions
+.BR compl ,
+.BR and ,
+.BR or ,
+.BR xor ,
+.BR lshift ,
+and
+.B rshift
+peform the corresponding bitwise operations on their
+operands, which are first truncated to integer.
+.PP
Patterns are arbitrary Boolean combinations
(with
.BR "! || &&" )
of regular expressions and
relational expressions.
Regular expressions are as in
-.IR egrep ;
+.IR egrep ;
see
.IR grep (1).
Isolated regular expressions
@@ -479,6 +534,11 @@ the length of a string matched by
.TP
.B RS
input record separator (default newline).
+If empty, blank lines separate records.
+If more than one character long,
+.B RS
+is treated as a regular expression, and records are
+separated by text matching the expression.
.TP
.B RSTART
the start position of a string matched by
@@ -498,6 +558,16 @@ functions may be called recursively.
Parameters are local to the function; all other variables are global.
Thus local variables may be created by providing excess parameters in
the function definition.
+.SH ENVIRONMENT VARIABLES
+If
+.B POSIXLY_CORRECT
+is set in the environment, then
+.I awk
+follows the POSIX rules for
+.B sub
+and
+.B gsub
+with respect to consecutive backslashes and ampersands.
.SH EXAMPLES
.TP
.EX
@@ -542,8 +612,8 @@ BEGIN { # Simulate echo(1)
.fi
.EE
.SH SEE ALSO
-.IR grep (1),
-.IR lex (1),
+.IR grep (1),
+.IR lex (1),
.IR sed (1)
.br
A. V. Aho, B. W. Kernighan, P. J. Weinberger,
@@ -554,8 +624,61 @@ There are no explicit conversions between numbers and strings.
To force an expression to be treated as a number add 0 to it;
to force it to be treated as a string concatenate
\&\f(CW""\fP to it.
-.br
+.PP
The scope rules for variables in functions are a botch;
the syntax is worse.
-.br
+.PP
Only eight-bit characters sets are handled correctly.
+.SH UNUSUAL FLOATING-POINT VALUES
+.I Awk
+was designed before IEEE 754 arithmetic defined Not-A-Number (NaN)
+and Infinity values, which are supported by all modern floating-point
+hardware.
+.PP
+Because
+.I awk
+uses
+.IR strtod (3)
+and
+.IR atof (3)
+to convert string values to double-precision floating-point values,
+modern C libraries also convert strings starting with
+.B inf
+and
+.B nan
+into infinity and NaN values respectively. This led to strange results,
+with something like this:
+.PP
+.EX
+.nf
+echo nancy | awk '{ print $1 + 0 }'
+.fi
+.EE
+.PP
+printing
+.B nan
+instead of zero.
+.PP
+.I Awk
+now follows GNU AWK, and prefilters string values before attempting
+to convert them to numbers, as follows:
+.TP
+.I "Hexadecimal values"
+Hexadecimal values (allowed since C99) convert to zero, as they did
+prior to C99.
+.TP
+.I "NaN values"
+The two strings
+.B +nan
+and
+.B \-nan
+(case independent) convert to NaN. No others do.
+(NaNs can have signs.)
+.TP
+.I "Infinity values"
+The two strings
+.B +inf
+and
+.B \-inf
+(case independent) convert to positive and negative infinity, respectively.
+No others do.
diff --git a/awk.h b/awk.h
index ddf246687969..51c00df9f279 100644
--- a/awk.h
+++ b/awk.h
@@ -23,6 +23,13 @@ THIS SOFTWARE.
****************************************************************/
#include <assert.h>
+#include <stdint.h>
+#include <stdbool.h>
+#if __STDC_VERSION__ <= 199901L
+#define noreturn
+#else
+#include <stdnoreturn.h>
+#endif
typedef double Awkfloat;
@@ -30,24 +37,34 @@ typedef double Awkfloat;
typedef unsigned char uschar;
-#define xfree(a) { if ((a) != NULL) { free((void *) (a)); (a) = NULL; } }
+#define xfree(a) { if ((a) != NULL) { free((void *)(intptr_t)(a)); (a) = NULL; } }
+/*
+ * We sometimes cheat writing read-only pointers to NUL-terminate them
+ * and then put back the original value
+ */
+#define setptr(ptr, a) (*(char *)(intptr_t)(ptr)) = (a)
-#define NN(p) ((p) ? (p) : "(null)") /* guaranteed non-null for dprintf
+#define NN(p) ((p) ? (p) : "(null)") /* guaranteed non-null for DPRINTF
*/
#define DEBUG
#ifdef DEBUG
- /* uses have to be doubly parenthesized */
-# define dprintf(x) if (dbg) printf x
+# define DPRINTF(...) if (dbg) printf(__VA_ARGS__)
#else
-# define dprintf(x)
+# define DPRINTF(...)
#endif
-extern int compile_time; /* 1 if compiling, 0 if running */
-extern int safe; /* 0 => unsafe, 1 => safe */
+extern enum compile_states {
+ RUNNING,
+ COMPILING,
+ ERROR_PRINTING
+} compile_time;
+
+extern bool safe; /* false => unsafe, true => safe */
#define RECSIZE (8 * 1024) /* sets limit on records, fields, etc., etc. */
extern int recsize; /* size of current record, orig RECSIZE */
+extern char EMPTY[]; /* this avoid -Wwritable-strings issues */
extern char **FS;
extern char **RS;
extern char **ORS;
@@ -64,13 +81,11 @@ extern Awkfloat *RLENGTH;
extern char *record; /* points to $0 */
extern int lineno; /* line number in awk program */
extern int errorflag; /* 1 if error has occurred */
-extern int donefld; /* 1 if record broken into fields */
-extern int donerec; /* 1 if record is valid (no fld has changed */
-extern char inputFS[]; /* FS at time of input, for field splitting */
-
+extern bool donefld; /* true if record broken into fields */
+extern bool donerec; /* true if record is valid (no fld has changed */
extern int dbg;
-extern char *patbeg; /* beginning of pattern matched */
+extern const char *patbeg; /* beginning of pattern matched */
extern int patlen; /* length of pattern matched. set in b.c */
/* Cell: all information about a variable or constant */
@@ -105,6 +120,7 @@ extern Cell *rsloc; /* RS */
extern Cell *rstartloc; /* RSTART */
extern Cell *rlengthloc; /* RLENGTH */
extern Cell *subseploc; /* SUBSEP */
+extern Cell *symtabloc; /* SYMTAB */
/* Cell.tval values: */
#define NUM 01 /* number value is valid */
@@ -134,6 +150,14 @@ extern Cell *subseploc; /* SUBSEP */
#define FTOUPPER 12
#define FTOLOWER 13
#define FFLUSH 14
+#define FAND 15
+#define FFOR 16
+#define FXOR 17
+#define FCOMPL 18
+#define FLSHIFT 19
+#define FRSHIFT 20
+#define FSYSTIME 21
+#define FSTRFTIME 22
/* Node: parse tree is made of nodes, with Cell's at bottom */
@@ -161,7 +185,7 @@ extern Node *nullnode;
#define CCOPY 6
#define CCON 5
#define CTEMP 4
-#define CNAME 3
+#define CNAME 3
#define CVAR 2
#define CFLD 1
#define CUNK 0
@@ -211,6 +235,7 @@ extern int pairstack[], paircnt;
#define NCHARS (256+3) /* 256 handles 8-bit chars; 128 does 7-bit */
/* watch out in match(), etc. */
+#define HAT (NCHARS+2) /* matches ^ in regular expr */
#define NSTATES 32
typedef struct rrow {
@@ -224,16 +249,16 @@ typedef struct rrow {
} rrow;
typedef struct fa {
- uschar gototab[NSTATES][NCHARS];
- uschar out[NSTATES];
+ unsigned int **gototab;
+ uschar *out;
uschar *restr;
- int *posns[NSTATES];
- int anchor;
+ int **posns;
+ int state_count;
+ bool anchor;
int use;
int initstat;
int curstat;
int accept;
- int reset;
struct rrow re[1]; /* variable: actual size set by calling malloc */
} fa;
diff --git a/awkgram.y b/awkgram.y
index e4abeeddcb6a..f37073d1f9ac 100644
--- a/awkgram.y
+++ b/awkgram.y
@@ -32,8 +32,8 @@ int yywrap(void) { return(1); }
Node *beginloc = 0;
Node *endloc = 0;
-int infunc = 0; /* = 1 if in arglist or body of func */
-int inloop = 0; /* = 1 if in while, for, do */
+bool infunc = false; /* = true if in arglist or body of func */
+int inloop = 0; /* >= 1 if in while, for, do; can't be bool, since loops can next */
char *curfname = 0; /* current function name */
Node *arglist = 0; /* list of args for current function */
%}
@@ -50,10 +50,10 @@ Node *arglist = 0; /* list of args for current function */
%token <i> NL ',' '{' '(' '|' ';' '/' ')' '}' '[' ']'
%token <i> ARRAY
%token <i> MATCH NOTMATCH MATCHOP
-%token <i> FINAL DOT ALL CCL NCCL CHAR OR STAR QUEST PLUS EMPTYRE
+%token <i> FINAL DOT ALL CCL NCCL CHAR OR STAR QUEST PLUS EMPTYRE ZERO
%token <i> AND BOR APPEND EQ GE GT LE LT NE IN
-%token <i> ARG BLTIN BREAK CLOSE CONTINUE DELETE DO EXIT FOR FUNC
-%token <i> SUB GSUB IF INDEX LSUBSTR MATCHFCN NEXT NEXTFILE
+%token <i> ARG BLTIN BREAK CLOSE CONTINUE DELETE DO EXIT FOR FUNC
+%token <i> GENSUB SUB GSUB IF INDEX LSUBSTR MATCHFCN NEXT NEXTFILE
%token <i> ADD MINUS MULT DIVIDE MOD
%token <i> ASSIGN ASGNOP ADDEQ SUBEQ MULTEQ DIVEQ MODEQ POWEQ
%token <i> PRINT PRINTF SPRINTF
@@ -71,6 +71,7 @@ Node *arglist = 0; /* list of args for current function */
%type <i> do st
%type <i> pst opt_pst lbrace rbrace rparen comma nl opt_nl and bor
%type <i> subop print
+%type <cp> string
%right ASGNOP
%right '?'
@@ -79,7 +80,7 @@ Node *arglist = 0; /* list of args for current function */
%left AND
%left GETLINE
%nonassoc APPEND EQ GE GT LE LT NE MATCHOP IN '|'
-%left ARG BLTIN BREAK CALL CLOSE CONTINUE DELETE DO EXIT FOR FUNC
+%left ARG BLTIN BREAK CALL CLOSE CONTINUE DELETE DO EXIT FOR FUNC
%left GSUB IF INDEX LSUBSTR MATCHFCN NEXT NUMBER
%left PRINT PRINTF RETURN SPLIT SPRINTF STRING SUB SUBSTR
%left REGEXPR VAR VARNF IVAR WHILE '('
@@ -181,8 +182,8 @@ pa_stat:
{ beginloc = linkum(beginloc, $3); $$ = 0; }
| XEND lbrace stmtlist '}'
{ endloc = linkum(endloc, $3); $$ = 0; }
- | FUNC funcname '(' varlist rparen {infunc++;} lbrace stmtlist '}'
- { infunc--; curfname=0; defn((Cell *)$2, $4, $8); $$ = 0; }
+ | FUNC funcname '(' varlist rparen {infunc = true;} lbrace stmtlist '}'
+ { infunc = false; curfname=0; defn((Cell *)$2, $4, $8); $$ = 0; }
;
pa_stats:
@@ -238,10 +239,10 @@ pattern:
$$ = op3($2, (Node *)1, $1, $3); }
| pattern IN varname { $$ = op2(INTEST, $1, makearr($3)); }
| '(' plist ')' IN varname { $$ = op2(INTEST, $2, makearr($5)); }
- | pattern '|' GETLINE var {
+ | pattern '|' GETLINE var {
if (safe) SYNTAX("cmd | getline is unsafe");
else $$ = op3(GETLINE, $4, itonp($2), $1); }
- | pattern '|' GETLINE {
+ | pattern '|' GETLINE {
if (safe) SYNTAX("cmd | getline is unsafe");
else $$ = op3(GETLINE, (Node*)0, itonp($2), $1); }
| pattern term %prec CAT { $$ = op2(CAT, $1, $2); }
@@ -292,7 +293,7 @@ rparen:
;
simple_stmt:
- print prarg '|' term {
+ print prarg '|' term {
if (safe) SYNTAX("print | is unsafe");
else $$ = stat3($1, $2, itonp($3), $4); }
| print prarg APPEND term {
@@ -348,6 +349,11 @@ subop:
SUB | GSUB
;
+string:
+ STRING
+ | string STRING { $$ = catstr($1, $2); }
+ ;
+
term:
term '/' ASGNOP term { $$ = op2(DIVEQ, $1, $4); }
| term '+' term { $$ = op2(ADD, $1, $3); }
@@ -369,6 +375,22 @@ term:
| INCR var { $$ = op1(PREINCR, $2); }
| var DECR { $$ = op1(POSTDECR, $1); }
| var INCR { $$ = op1(POSTINCR, $1); }
+ | GENSUB '(' reg_expr comma pattern comma pattern ')'
+ { $$ = op5(GENSUB, NIL, (Node*)makedfa($3, 1), $5, $7, rectonode()); }
+ | GENSUB '(' pattern comma pattern comma pattern ')'
+ { if (constnode($3))
+ $$ = op5(GENSUB, NIL, (Node *)makedfa(strnode($3), 1), $5, $7, rectonode());
+ else
+ $$ = op5(GENSUB, (Node *)1, $3, $5, $7, rectonode());
+ }
+ | GENSUB '(' reg_expr comma pattern comma pattern comma pattern ')'
+ { $$ = op5(GENSUB, NIL, (Node*)makedfa($3, 1), $5, $7, $9); }
+ | GENSUB '(' pattern comma pattern comma pattern comma pattern ')'
+ { if (constnode($3))
+ $$ = op5(GENSUB, NIL, (Node *)makedfa(strnode($3),1), $5,$7,$9);
+ else
+ $$ = op5(GENSUB, (Node *)1, $3, $5, $7, $9);
+ }
| GETLINE var LT term { $$ = op3(GETLINE, $2, itonp($3), $4); }
| GETLINE LT term { $$ = op3(GETLINE, NIL, itonp($2), $3); }
| GETLINE var { $$ = op3(GETLINE, $2, NIL, NIL); }
@@ -394,7 +416,7 @@ term:
| SPLIT '(' pattern comma varname ')'
{ $$ = op4(SPLIT, $3, makearr($5), NIL, (Node*)STRING); } /* default */
| SPRINTF '(' patlist ')' { $$ = op1($1, $3); }
- | STRING { $$ = celltonode($1, CCON); }
+ | string { $$ = celltonode($1, CCON); }
| subop '(' reg_expr comma pattern ')'
{ $$ = op4($1, NIL, (Node*)makedfa($3, 1), $5, rectonode()); }
| subop '(' pattern comma pattern ')'
@@ -421,7 +443,7 @@ var:
| varname '[' patlist ']' { $$ = op2(ARRAY, makearr($1), $3); }
| IVAR { $$ = op1(INDIRECT, celltonode($1, CVAR)); }
| INDIRECT term { $$ = op1(INDIRECT, $2); }
- ;
+ ;
varlist:
/* nothing */ { arglist = $$ = 0; }
diff --git a/b.c b/b.c
index 37ea0a5bb2a7..f889ee57b3fb 100644
--- a/b.c
+++ b/b.c
@@ -32,10 +32,8 @@ THIS SOFTWARE.
#include <string.h>
#include <stdlib.h>
#include "awk.h"
-#include "ytab.h"
+#include "awkgram.tab.h"
-#define HAT (NCHARS+2) /* matches ^ in regular expr */
- /* NCHARS is 2**n */
#define MAXLIN 22
#define type(v) (v)->nobj /* badly overloaded here */
@@ -63,40 +61,99 @@ int maxsetvec = 0;
int rtok; /* next token in current re */
int rlxval;
-static uschar *rlxstr;
-static uschar *prestr; /* current position in current re */
-static uschar *lastre; /* origin of last re */
-static uschar *lastatom; /* origin of last Atom */
-static uschar *starttok;
-static uschar *basestr; /* starts with original, replaced during
+static const uschar *rlxstr;
+static const uschar *prestr; /* current position in current re */
+static const uschar *lastre; /* origin of last re */
+static const uschar *lastatom; /* origin of last Atom */
+static const uschar *starttok;
+static const uschar *basestr; /* starts with original, replaced during
repetition processing */
-static uschar *firstbasestr;
+static const uschar *firstbasestr;
static int setcnt;
static int poscnt;
-char *patbeg;
+const char *patbeg;
int patlen;
-#define NFA 20 /* cache this many dynamic fa's */
+#define NFA 128 /* cache this many dynamic fa's */
fa *fatab[NFA];
int nfatab = 0; /* entries in fatab */
-fa *makedfa(const char *s, int anchor) /* returns dfa for reg expr s */
+static int *
+intalloc(size_t n, const char *f)
+{
+ int *p = (int *) calloc(n, sizeof(int));
+ if (p == NULL)
+ overflo(f);
+ return p;
+}
+
+static void
+resizesetvec(const char *f)
+{
+ if (maxsetvec == 0)
+ maxsetvec = MAXLIN;
+ else
+ maxsetvec *= 4;
+ setvec = (int *) realloc(setvec, maxsetvec * sizeof(*setvec));
+ tmpset = (int *) realloc(tmpset, maxsetvec * sizeof(*tmpset));
+ if (setvec == NULL || tmpset == NULL)
+ overflo(f);
+}
+
+static void
+resize_state(fa *f, int state)
+{
+ unsigned int **p;
+ uschar *p2;
+ int **p3;
+ int i, new_count;
+
+ if (++state < f->state_count)
+ return;
+
+ new_count = state + 10; /* needs to be tuned */
+
+ p = (unsigned int **) realloc(f->gototab, new_count * sizeof(f->gototab[0]));
+ if (p == NULL)
+ goto out;
+ f->gototab = p;
+
+ p2 = (uschar *) realloc(f->out, new_count * sizeof(f->out[0]));
+ if (p2 == NULL)
+ goto out;
+ f->out = p2;
+
+ p3 = (int **) realloc(f->posns, new_count * sizeof(f->posns[0]));
+ if (p3 == NULL)
+ goto out;
+ f->posns = p3;
+
+ for (i = f->state_count; i < new_count; ++i) {
+ f->gototab[i] = (unsigned int *) calloc(NCHARS, sizeof(**f->gototab));
+ if (f->gototab[i] == NULL)
+ goto out;
+ f->out[i] = 0;
+ f->posns[i] = NULL;
+ }
+ f->state_count = new_count;
+ return;
+out:
+ overflo(__func__);
+}
+
+fa *makedfa(const char *s, bool anchor) /* returns dfa for reg expr s */
{
int i, use, nuse;
fa *pfa;
static int now = 1;
- if (setvec == 0) { /* first time through any RE */
- maxsetvec = MAXLIN;
- setvec = (int *) malloc(maxsetvec * sizeof(int));
- tmpset = (int *) malloc(maxsetvec * sizeof(int));
- if (setvec == 0 || tmpset == 0)
- overflo("out of space initializing makedfa");
+ if (setvec == NULL) { /* first time through any RE */
+ resizesetvec(__func__);
}
- if (compile_time) /* a constant for sure */
+ if (compile_time != RUNNING) /* a constant for sure */
return mkdfa(s, anchor);
for (i = 0; i < nfatab; i++) /* is it there already? */
if (fatab[i]->anchor == anchor
@@ -124,13 +181,13 @@ fa *makedfa(const char *s, int anchor) /* returns dfa for reg expr s */
return pfa;
}
-fa *mkdfa(const char *s, int anchor) /* does the real work of making a dfa */
- /* anchor = 1 for anchored matches, else 0 */
+fa *mkdfa(const char *s, bool anchor) /* does the real work of making a dfa */
+ /* anchor = true for anchored matches, else false */
{
Node *p, *p1;
fa *f;
- firstbasestr = (uschar *) s;
+ firstbasestr = (const uschar *) s;
basestr = firstbasestr;
p = reparse(s);
p1 = op2(CAT, op2(STAR, op2(ALL, NIL, NIL), NIL), p);
@@ -140,15 +197,14 @@ fa *mkdfa(const char *s, int anchor) /* does the real work of making a dfa */
poscnt = 0;
penter(p1); /* enter parent pointers and leaf indices */
- if ((f = (fa *) calloc(1, sizeof(fa) + poscnt*sizeof(rrow))) == NULL)
- overflo("out of space for fa");
+ if ((f = (fa *) calloc(1, sizeof(fa) + poscnt * sizeof(rrow))) == NULL)
+ overflo(__func__);
f->accept = poscnt-1; /* penter has computed number of positions in re */
cfoll(f, p1); /* set up follow sets */
freetr(p1);
- if ((f->posns[0] = (int *) calloc(1, *(f->re[0].lfollow)*sizeof(int))) == NULL)
- overflo("out of space in makedfa");
- if ((f->posns[1] = (int *) calloc(1, sizeof(int))) == NULL)
- overflo("out of space in makedfa");
+ resize_state(f, 1);
+ f->posns[0] = intalloc(*(f->re[0].lfollow), __func__);
+ f->posns[1] = intalloc(1, __func__);
*f->posns[1] = 0;
f->initstat = makeinit(f, anchor);
f->anchor = anchor;
@@ -160,28 +216,26 @@ fa *mkdfa(const char *s, int anchor) /* does the real work of making a dfa */
return f;
}
-int makeinit(fa *f, int anchor)
+int makeinit(fa *f, bool anchor)
{
int i, k;
f->curstat = 2;
f->out[2] = 0;
- f->reset = 0;
k = *(f->re[0].lfollow);
- xfree(f->posns[2]);
- if ((f->posns[2] = (int *) calloc(1, (k+1)*sizeof(int))) == NULL)
- overflo("out of space in makeinit");
- for (i=0; i <= k; i++) {
+ xfree(f->posns[2]);
+ f->posns[2] = intalloc(k + 1, __func__);
+ for (i = 0; i <= k; i++) {
(f->posns[2])[i] = (f->re[0].lfollow)[i];
}
if ((f->posns[2])[1] == f->accept)
f->out[2] = 1;
- for (i=0; i < NCHARS; i++)
+ for (i = 0; i < NCHARS; i++)
f->gototab[2][i] = 0;
f->curstat = cgoto(f, 2, HAT);
if (anchor) {
*f->posns[2] = k-1; /* leave out position 0 */
- for (i=0; i < k; i++) {
+ for (i = 0; i < k; i++) {
(f->posns[0])[i] = (f->posns[2])[i];
}
@@ -211,6 +265,8 @@ void penter(Node *p) /* set up parent pointers and leaf indices */
parent(left(p)) = p;
parent(right(p)) = p;
break;
+ case ZERO:
+ break;
default: /* can't happen */
FATAL("can't happen: unknown type %d in penter", type(p));
break;
@@ -225,6 +281,7 @@ void freetr(Node *p) /* free parse tree */
xfree(p);
break;
UNARY
+ case ZERO:
freetr(left(p));
xfree(p);
break;
@@ -243,13 +300,13 @@ void freetr(Node *p) /* free parse tree */
/* in the parsing of regular expressions, metacharacters like . have */
/* to be seen literally; \056 is not a metacharacter. */
-int hexstr(uschar **pp) /* find and eval hex string at pp, return new p */
+int hexstr(const uschar **pp) /* find and eval hex string at pp, return new p */
{ /* only pick up one 8-bit byte (2 chars) */
- uschar *p;
+ const uschar *p;
int n = 0;
int i;
- for (i = 0, p = (uschar *) *pp; i < 2 && isxdigit(*p); i++, p++) {
+ for (i = 0, p = *pp; i < 2 && isxdigit(*p); i++, p++) {
if (isdigit(*p))
n = 16 * n + *p - '0';
else if (*p >= 'a' && *p <= 'f')
@@ -257,16 +314,16 @@ int hexstr(uschar **pp) /* find and eval hex string at pp, return new p */
else if (*p >= 'A' && *p <= 'F')
n = 16 * n + *p - 'A' + 10;
}
- *pp = (uschar *) p;
+ *pp = p;
return n;
}
#define isoctdigit(c) ((c) >= '0' && (c) <= '7') /* multiple use of arg */
-int quoted(uschar **pp) /* pick up next thing after a \\ */
+int quoted(const uschar **pp) /* pick up next thing after a \\ */
/* and increment *pp */
{
- uschar *p = *pp;
+ const uschar *p = *pp;
int c;
if ((c = *p++) == 't')
@@ -279,6 +336,10 @@ int quoted(uschar **pp) /* pick up next thing after a \\ */
c = '\r';
else if (c == 'b')
c = '\b';
+ else if (c == 'v')
+ c = '\v';
+ else if (c == 'a')
+ c = '\a';
else if (c == '\\')
c = '\\';
else if (c == 'x') { /* hexadecimal goo follows */
@@ -300,13 +361,13 @@ int quoted(uschar **pp) /* pick up next thing after a \\ */
char *cclenter(const char *argp) /* add a character class */
{
int i, c, c2;
- uschar *p = (uschar *) argp;
- uschar *op, *bp;
- static uschar *buf = 0;
+ const uschar *op, *p = (const uschar *) argp;
+ uschar *bp;
+ static uschar *buf = NULL;
static int bufsz = 100;
op = p;
- if (buf == 0 && (buf = (uschar *) malloc(bufsz)) == NULL)
+ if (buf == NULL && (buf = (uschar *) malloc(bufsz)) == NULL)
FATAL("out of space for character class [%.10s...] 1", p);
bp = buf;
for (i = 0; (c = *p++) != 0; ) {
@@ -338,14 +399,14 @@ char *cclenter(const char *argp) /* add a character class */
i++;
}
*bp = 0;
- dprintf( ("cclenter: in = |%s|, out = |%s|\n", op, buf) );
+ DPRINTF("cclenter: in = |%s|, out = |%s|\n", op, buf);
xfree(op);
return (char *) tostring((char *) buf);
}
void overflo(const char *s)
{
- FATAL("regular expression too big: %.30s...", s);
+ FATAL("regular expression too big: out of space in %.30s...", s);
}
void cfoll(fa *f, Node *v) /* enter follow set of each leaf of vertex v into lfollow[leaf] */
@@ -359,18 +420,13 @@ void cfoll(fa *f, Node *v) /* enter follow set of each leaf of vertex v into lfo
f->re[info(v)].ltype = type(v);
f->re[info(v)].lval.np = right(v);
while (f->accept >= maxsetvec) { /* guessing here! */
- maxsetvec *= 4;
- setvec = (int *) realloc(setvec, maxsetvec * sizeof(int));
- tmpset = (int *) realloc(tmpset, maxsetvec * sizeof(int));
- if (setvec == 0 || tmpset == 0)
- overflo("out of space in cfoll()");
+ resizesetvec(__func__);
}
for (i = 0; i <= f->accept; i++)
setvec[i] = 0;
setcnt = 0;
follow(v); /* computes setvec and setcnt */
- if ((p = (int *) calloc(1, (setcnt+1)*sizeof(int))) == NULL)
- overflo("out of space building follow set");
+ p = intalloc(setcnt + 1, __func__);
f->re[info(v)].lfollow = p;
*p = setcnt;
for (i = f->accept; i >= 0; i--)
@@ -385,6 +441,8 @@ void cfoll(fa *f, Node *v) /* enter follow set of each leaf of vertex v into lfo
cfoll(f,left(v));
cfoll(f,right(v));
break;
+ case ZERO:
+ break;
default: /* can't happen */
FATAL("can't happen: unknown type %d in cfoll", type(v));
}
@@ -400,11 +458,7 @@ int first(Node *p) /* collects initially active leaves of p into setvec */
LEAF
lp = info(p); /* look for high-water mark of subscripts */
while (setcnt >= maxsetvec || lp >= maxsetvec) { /* guessing here! */
- maxsetvec *= 4;
- setvec = (int *) realloc(setvec, maxsetvec * sizeof(int));
- tmpset = (int *) realloc(tmpset, maxsetvec * sizeof(int));
- if (setvec == 0 || tmpset == 0)
- overflo("out of space in first()");
+ resizesetvec(__func__);
}
if (type(p) == EMPTYRE) {
setvec[lp] = 0;
@@ -416,9 +470,10 @@ int first(Node *p) /* collects initially active leaves of p into setvec */
}
if (type(p) == CCL && (*(char *) right(p)) == '\0')
return(0); /* empty CCL */
- else return(1);
+ return(1);
case PLUS:
- if (first(left(p)) == 0) return(0);
+ if (first(left(p)) == 0)
+ return(0);
return(1);
case STAR:
case QUEST:
@@ -431,6 +486,8 @@ int first(Node *p) /* collects initially active leaves of p into setvec */
b = first(right(p));
if (first(left(p)) == 0 || b == 0) return(0);
return(1);
+ case ZERO:
+ return 0;
}
FATAL("can't happen: unknown type %d in first", type(p)); /* can't happen */
return(-1);
@@ -469,7 +526,7 @@ void follow(Node *v) /* collects leaves that can follow v into setvec */
int member(int c, const char *sarg) /* is c in s? */
{
- uschar *s = (uschar *) sarg;
+ const uschar *s = (const uschar *) sarg;
while (*s)
if (c == *s++)
@@ -480,9 +537,11 @@ int member(int c, const char *sarg) /* is c in s? */
int match(fa *f, const char *p0) /* shortest match ? */
{
int s, ns;
- uschar *p = (uschar *) p0;
+ const uschar *p = (const uschar *) p0;
+
+ s = f->initstat;
+ assert (s < f->state_count);
- s = f->reset ? makeinit(f,0) : f->initstat;
if (f->out[s])
return(1);
do {
@@ -500,17 +559,13 @@ int match(fa *f, const char *p0) /* shortest match ? */
int pmatch(fa *f, const char *p0) /* longest match, for sub */
{
int s, ns;
- uschar *p = (uschar *) p0;
- uschar *q;
- int i, k;
+ const uschar *p = (const uschar *) p0;
+ const uschar *q;
- /* s = f->reset ? makeinit(f,1) : f->initstat; */
- if (f->reset) {
- f->initstat = s = makeinit(f,1);
- } else {
- s = f->initstat;
- }
- patbeg = (char *) p;
+ s = f->initstat;
+ assert(s < f->state_count);
+
+ patbeg = (const char *)p;
patlen = -1;
do {
q = p;
@@ -522,9 +577,12 @@ int pmatch(fa *f, const char *p0) /* longest match, for sub */
s = ns;
else
s = cgoto(f, s, *q);
+
+ assert(s < f->state_count);
+
if (s == 1) { /* no transition */
if (patlen >= 0) {
- patbeg = (char *) p;
+ patbeg = (const char *) p;
return(1);
}
else
@@ -534,41 +592,25 @@ int pmatch(fa *f, const char *p0) /* longest match, for sub */
if (f->out[s])
patlen = q-p-1; /* don't count $ */
if (patlen >= 0) {
- patbeg = (char *) p;
+ patbeg = (const char *) p;
return(1);
}
nextin:
s = 2;
- if (f->reset) {
- for (i = 2; i <= f->curstat; i++)
- xfree(f->posns[i]);
- k = *f->posns[0];
- if ((f->posns[2] = (int *) calloc(1, (k+1)*sizeof(int))) == NULL)
- overflo("out of space in pmatch");
- for (i = 0; i <= k; i++)
- (f->posns[2])[i] = (f->posns[0])[i];
- f->initstat = f->curstat = 2;
- f->out[2] = f->out[0];
- for (i = 0; i < NCHARS; i++)
- f->gototab[2][i] = 0;
- }
- } while (*p++ != 0);
+ } while (*p++);
return (0);
}
int nematch(fa *f, const char *p0) /* non-empty match, for sub */
{
int s, ns;
- uschar *p = (uschar *) p0;
- uschar *q;
- int i, k;
+ const uschar *p = (const uschar *) p0;
+ const uschar *q;
- /* s = f->reset ? makeinit(f,1) : f->initstat; */
- if (f->reset) {
- f->initstat = s = makeinit(f,1);
- } else {
- s = f->initstat;
- }
+ s = f->initstat;
+ assert(s < f->state_count);
+
+ patbeg = (const char *)p;
patlen = -1;
while (*p) {
q = p;
@@ -582,7 +624,7 @@ int nematch(fa *f, const char *p0) /* non-empty match, for sub */
s = cgoto(f, s, *q);
if (s == 1) { /* no transition */
if (patlen > 0) {
- patbeg = (char *) p;
+ patbeg = (const char *) p;
return(1);
} else
goto nnextin; /* no nonempty match */
@@ -591,35 +633,110 @@ int nematch(fa *f, const char *p0) /* non-empty match, for sub */
if (f->out[s])
patlen = q-p-1; /* don't count $ */
if (patlen > 0 ) {
- patbeg = (char *) p;
+ patbeg = (const char *) p;
return(1);
}
nnextin:
s = 2;
- if (f->reset) {
- for (i = 2; i <= f->curstat; i++)
- xfree(f->posns[i]);
- k = *f->posns[0];
- if ((f->posns[2] = (int *) calloc(1, (k+1)*sizeof(int))) == NULL)
- overflo("out of state space");
- for (i = 0; i <= k; i++)
- (f->posns[2])[i] = (f->posns[0])[i];
- f->initstat = f->curstat = 2;
- f->out[2] = f->out[0];
- for (i = 0; i < NCHARS; i++)
- f->gototab[2][i] = 0;
- }
p++;
}
return (0);
}
+
+/*
+ * NAME
+ * fnematch
+ *
+ * DESCRIPTION
+ * A stream-fed version of nematch which transfers characters to a
+ * null-terminated buffer. All characters up to and including the last
+ * character of the matching text or EOF are placed in the buffer. If
+ * a match is found, patbeg and patlen are set appropriately.
+ *
+ * RETURN VALUES
+ * false No match found.
+ * true Match found.
+ */
+
+bool fnematch(fa *pfa, FILE *f, char **pbuf, int *pbufsize, int quantum)
+{
+ char *buf = *pbuf;
+ int bufsize = *pbufsize;
+ int c, i, j, k, ns, s;
+
+ s = pfa->initstat;
+ patlen = 0;
+
+ /*
+ * All indices relative to buf.
+ * i <= j <= k <= bufsize
+ *
+ * i: origin of active substring
+ * j: current character
+ * k: destination of next getc()
+ */
+ i = -1, k = 0;
+ do {
+ j = i++;
+ do {
+ if (++j == k) {
+ if (k == bufsize)
+ if (!adjbuf((char **) &buf, &bufsize, bufsize+1, quantum, 0, "fnematch"))
+ FATAL("stream '%.30s...' too long", buf);
+ buf[k++] = (c = getc(f)) != EOF ? c : 0;
+ }
+ c = (uschar)buf[j];
+ /* assert(c < NCHARS); */
+
+ if ((ns = pfa->gototab[s][c]) != 0)
+ s = ns;
+ else
+ s = cgoto(pfa, s, c);
+
+ if (pfa->out[s]) { /* final state */
+ patlen = j - i + 1;
+ if (c == 0) /* don't count $ */
+ patlen--;
+ }
+ } while (buf[j] && s != 1);
+ s = 2;
+ } while (buf[i] && !patlen);
+
+ /* adjbuf() may have relocated a resized buffer. Inform the world. */
+ *pbuf = buf;
+ *pbufsize = bufsize;
+
+ if (patlen) {
+ patbeg = (char *) buf + i;
+ /*
+ * Under no circumstances is the last character fed to
+ * the automaton part of the match. It is EOF's nullbyte,
+ * or it sent the automaton into a state with no further
+ * transitions available (s==1), or both. Room for a
+ * terminating nullbyte is guaranteed.
+ *
+ * ungetc any chars after the end of matching text
+ * (except for EOF's nullbyte, if present) and null
+ * terminate the buffer.
+ */
+ do
+ if (buf[--k] && ungetc(buf[k], f) == EOF)
+ FATAL("unable to ungetc '%c'", buf[k]);
+ while (k > i + patlen);
+ buf[k] = '\0';
+ return true;
+ }
+ else
+ return false;
+}
+
Node *reparse(const char *p) /* parses regular expression pointed to by p */
{ /* uses relex() to scan regular expression */
Node *np;
- dprintf( ("reparse <%s>\n", p) );
- lastre = prestr = (uschar *) p; /* prestr points to string to be parsed */
+ DPRINTF("reparse <%s>\n", p);
+ lastre = prestr = (const uschar *) p; /* prestr points to string to be parsed */
rtok = relex();
/* GNU compatibility: an empty regexp matches anything */
if (rtok == '\0') {
@@ -659,12 +776,12 @@ Node *primary(void)
rtok = relex();
return (unary(op2(DOT, NIL, NIL)));
case CCL:
- np = op2(CCL, NIL, (Node*) cclenter((char *) rlxstr));
+ np = op2(CCL, NIL, (Node*) cclenter((const char *) rlxstr));
lastatom = starttok;
rtok = relex();
return (unary(np));
case NCCL:
- np = op2(NCCL, NIL, (Node *) cclenter((char *) rlxstr));
+ np = op2(NCCL, NIL, (Node *) cclenter((const char *) rlxstr));
lastatom = starttok;
rtok = relex();
return (unary(np));
@@ -730,6 +847,9 @@ Node *unary(Node *np)
case QUEST:
rtok = relex();
return (unary(op2(QUEST, np, NIL)));
+ case ZERO:
+ rtok = relex();
+ return (unary(op2(ZERO, np, NIL)));
default:
return (np);
}
@@ -765,7 +885,7 @@ int (xisblank)(int c)
#endif
-struct charclass {
+static const struct charclass {
const char *cc_name;
int cc_namelen;
int (*cc_func)(int);
@@ -801,10 +921,10 @@ replace_repeat(const uschar *reptok, int reptoklen, const uschar *atom,
int i, j;
uschar *buf = 0;
int ret = 1;
- int init_q = (firstnum==0); /* first added char will be ? */
+ int init_q = (firstnum == 0); /* first added char will be ? */
int n_q_reps = secondnum-firstnum; /* m>n, so reduce until {1,m-n} left */
int prefix_length = reptok - basestr; /* prefix includes first rep */
- int suffix_length = strlen((char *) reptok) - reptoklen; /* string after rep specifier */
+ int suffix_length = strlen((const char *) reptok) - reptoklen; /* string after rep specifier */
int size = prefix_length + suffix_length;
if (firstnum > 1) { /* add room for reps 2 through firstnum */
@@ -819,7 +939,7 @@ replace_repeat(const uschar *reptok, int reptoklen, const uschar *atom,
} else if (special_case == REPEAT_ZERO) {
size += 2; /* just a null ERE: () */
}
- if ((buf = (uschar *) malloc(size+1)) == NULL)
+ if ((buf = (uschar *) malloc(size + 1)) == NULL)
FATAL("out of space in reg expr %.10s..", lastre);
memcpy(buf, basestr, prefix_length); /* copy prefix */
j = prefix_length;
@@ -828,15 +948,16 @@ replace_repeat(const uschar *reptok, int reptoklen, const uschar *atom,
buf[j++] = '(';
buf[j++] = ')';
}
- for (i=1; i < firstnum; i++) { /* copy x reps */
+ for (i = 1; i < firstnum; i++) { /* copy x reps */
memcpy(&buf[j], atom, atomlen);
j += atomlen;
}
if (special_case == REPEAT_PLUS_APPENDED) {
buf[j++] = '+';
} else if (special_case == REPEAT_WITH_Q) {
- if (init_q) buf[j++] = '?';
- for (i=0; i < n_q_reps; i++) { /* copy x? reps */
+ if (init_q)
+ buf[j++] = '?';
+ for (i = init_q; i < n_q_reps; i++) { /* copy x? reps */
memcpy(&buf[j], atom, atomlen);
j += atomlen;
buf[j++] = '?';
@@ -905,13 +1026,15 @@ int relex(void) /* lexical analyzer for reparse */
{
int c, n;
int cflag;
- static uschar *buf = 0;
+ static uschar *buf = NULL;
static int bufsz = 100;
uschar *bp;
- struct charclass *cc;
+ const struct charclass *cc;
int i;
- int num, m, commafound, digitfound;
+ int num, m;
+ bool commafound, digitfound;
const uschar *startreptok;
+ static int parens = 0;
rescan:
starttok = prestr;
@@ -925,17 +1048,26 @@ rescan:
case '\0': prestr--; return '\0';
case '^':
case '$':
+ return c;
case '(':
- case ')':
+ parens++;
return c;
+ case ')':
+ if (parens) {
+ parens--;
+ return c;
+ }
+ /* unmatched close parenthesis; per POSIX, treat as literal */
+ rlxval = c;
+ return CHAR;
case '\\':
rlxval = quoted(&prestr);
return CHAR;
default:
rlxval = c;
return CHAR;
- case '[':
- if (buf == 0 && (buf = (uschar *) malloc(bufsz)) == NULL)
+ case '[':
+ if (buf == NULL && (buf = (uschar *) malloc(bufsz)) == NULL)
FATAL("out of space in reg expr %.10s..", lastre);
bp = buf;
if (*prestr == '^') {
@@ -975,6 +1107,12 @@ rescan:
if (!adjbuf((char **) &buf, &bufsz, bp-buf+1, 100, (char **) &bp, "relex2"))
FATAL("out of space for reg expr %.10s...", lastre);
if (cc->cc_func(i)) {
+ /* escape backslash */
+ if (i == '\\') {
+ *bp++ = '\\';
+ n++;
+ }
+
*bp++ = i;
n++;
}
@@ -1034,8 +1172,8 @@ rescan:
if (isdigit(*(prestr))) {
num = 0; /* Process as a repetition */
n = -1; m = -1;
- commafound = 0;
- digitfound = 0;
+ commafound = false;
+ digitfound = false;
startreptok = prestr-1;
/* Remember start of previous atom here ? */
} else { /* just a { char, not a repetition */
@@ -1047,15 +1185,17 @@ rescan:
if (commafound) {
if (digitfound) { /* {n,m} */
m = num;
- if (m<n)
+ if (m < n)
FATAL("illegal repetition expression: class %.20s",
lastre);
- if ((n==0) && (m==1)) {
+ if (n == 0 && m == 1) {
return QUEST;
}
} else { /* {n,} */
- if (n==0) return STAR;
- if (n==1) return PLUS;
+ if (n == 0)
+ return STAR;
+ else if (n == 1)
+ return PLUS;
}
} else {
if (digitfound) { /* {n} same as {n,n} */
@@ -1068,8 +1208,8 @@ rescan:
}
if (repeat(starttok, prestr-starttok, lastatom,
startreptok - lastatom, n, m) > 0) {
- if ((n==0) && (m==0)) {
- return EMPTYRE;
+ if (n == 0 && m == 0) {
+ return ZERO;
}
/* must rescan input for next token */
goto rescan;
@@ -1082,15 +1222,15 @@ rescan:
lastre);
} else if (isdigit(c)) {
num = 10 * num + c - '0';
- digitfound = 1;
+ digitfound = true;
} else if (c == ',') {
if (commafound)
FATAL("illegal repetition expression: class %.20s",
lastre);
/* looking for {n,} or {n,m} */
- commafound = 1;
+ commafound = true;
n = num;
- digitfound = 0; /* reset */
+ digitfound = false; /* reset */
num = 0;
} else {
FATAL("illegal repetition expression: class %.20s",
@@ -1103,20 +1243,17 @@ rescan:
int cgoto(fa *f, int s, int c)
{
- int i, j, k;
int *p, *q;
+ int i, j, k;
assert(c == HAT || c < NCHARS);
while (f->accept >= maxsetvec) { /* guessing here! */
- maxsetvec *= 4;
- setvec = (int *) realloc(setvec, maxsetvec * sizeof(int));
- tmpset = (int *) realloc(tmpset, maxsetvec * sizeof(int));
- if (setvec == 0 || tmpset == 0)
- overflo("out of space in cgoto()");
+ resizesetvec(__func__);
}
for (i = 0; i <= f->accept; i++)
setvec[i] = 0;
setcnt = 0;
+ resize_state(f, s);
/* compute positions of gototab[s,c] into setvec */
p = f->posns[s];
for (i = 1; i <= *p; i++) {
@@ -1130,11 +1267,7 @@ int cgoto(fa *f, int s, int c)
q = f->re[p[i]].lfollow;
for (j = 1; j <= *q; j++) {
if (q[j] >= maxsetvec) {
- maxsetvec *= 4;
- setvec = (int *) realloc(setvec, maxsetvec * sizeof(int));
- tmpset = (int *) realloc(tmpset, maxsetvec * sizeof(int));
- if (setvec == 0 || tmpset == 0)
- overflo("cgoto overflow");
+ resizesetvec(__func__);
}
if (setvec[q[j]] == 0) {
setcnt++;
@@ -1151,6 +1284,7 @@ int cgoto(fa *f, int s, int c)
if (setvec[i]) {
tmpset[j++] = i;
}
+ resize_state(f, f->curstat > s ? f->curstat : s);
/* tmpset == previous state? */
for (i = 1; i <= f->curstat; i++) {
p = f->posns[i];
@@ -1160,27 +1294,23 @@ int cgoto(fa *f, int s, int c)
if (tmpset[j] != p[j])
goto different;
/* setvec is state i */
- f->gototab[s][c] = i;
+ if (c != HAT)
+ f->gototab[s][c] = i;
return i;
different:;
}
/* add tmpset to current set of states */
- if (f->curstat >= NSTATES-1) {
- f->curstat = 2;
- f->reset = 1;
- for (i = 2; i < NSTATES; i++)
- xfree(f->posns[i]);
- } else
- ++(f->curstat);
+ ++(f->curstat);
+ resize_state(f, f->curstat);
for (i = 0; i < NCHARS; i++)
f->gototab[f->curstat][i] = 0;
xfree(f->posns[f->curstat]);
- if ((p = (int *) calloc(1, (setcnt+1)*sizeof(int))) == NULL)
- overflo("out of space in cgoto");
+ p = intalloc(setcnt + 1, __func__);
f->posns[f->curstat] = p;
- f->gototab[s][c] = f->curstat;
+ if (c != HAT)
+ f->gototab[s][c] = f->curstat;
for (i = 0; i <= setcnt; i++)
p[i] = tmpset[i];
if (setvec[f->accept])
@@ -1197,13 +1327,18 @@ void freefa(fa *f) /* free a finite automaton */
if (f == NULL)
return;
+ for (i = 0; i < f->state_count; i++)
+ xfree(f->gototab[i])
for (i = 0; i <= f->curstat; i++)
xfree(f->posns[i]);
for (i = 0; i <= f->accept; i++) {
xfree(f->re[i].lfollow);
if (f->re[i].ltype == CCL || f->re[i].ltype == NCCL)
- xfree((f->re[i].lval.np));
+ xfree(f->re[i].lval.np);
}
xfree(f->restr);
+ xfree(f->out);
+ xfree(f->posns);
+ xfree(f->gototab);
xfree(f);
}
diff --git a/bugs-fixed/missing-precision.ok b/bugs-fixed/missing-precision.ok
index 608b4fa48666..75e1e3d00446 100644
--- a/bugs-fixed/missing-precision.ok
+++ b/bugs-fixed/missing-precision.ok
@@ -1,2 +1,2 @@
-./a.out: not enough args in printf(%*s)
+../a.out: not enough args in printf(%*s)
source line number 1
diff --git a/bugs-fixed/negative-nf.ok b/bugs-fixed/negative-nf.ok
index 71c860468cc0..de97f8b27def 100644
--- a/bugs-fixed/negative-nf.ok
+++ b/bugs-fixed/negative-nf.ok
@@ -1,2 +1,2 @@
-./a.out: cannot set NF to a negative value
+../a.out: cannot set NF to a negative value
source line number 1
diff --git a/lex.c b/lex.c
index ad8e878a247d..d471ac2944a9 100644
--- a/lex.c
+++ b/lex.c
@@ -27,10 +27,10 @@ THIS SOFTWARE.
#include <string.h>
#include <ctype.h>
#include "awk.h"
-#include "ytab.h"
+#include "awkgram.tab.h"
extern YYSTYPE yylval;
-extern int infunc;
+extern bool infunc;
int lineno = 1;
int bracecnt = 0;
@@ -43,13 +43,15 @@ typedef struct Keyword {
int type;
} Keyword;
-Keyword keywords[] ={ /* keep sorted: binary searched */
+const Keyword keywords[] = { /* keep sorted: binary searched */
{ "BEGIN", XBEGIN, XBEGIN },
{ "END", XEND, XEND },
{ "NF", VARNF, VARNF },
+ { "and", FAND, BLTIN },
{ "atan2", FATAN, BLTIN },
{ "break", BREAK, BREAK },
{ "close", CLOSE, CLOSE },
+ { "compl", FCOMPL, BLTIN },
{ "continue", CONTINUE, CONTINUE },
{ "cos", FCOS, BLTIN },
{ "delete", DELETE, DELETE },
@@ -61,6 +63,7 @@ Keyword keywords[] ={ /* keep sorted: binary searched */
{ "for", FOR, FOR },
{ "func", FUNC, FUNC },
{ "function", FUNC, FUNC },
+ { "gensub", GENSUB, GENSUB },
{ "getline", GETLINE, GETLINE },
{ "gsub", GSUB, GSUB },
{ "if", IF, IF },
@@ -69,36 +72,42 @@ Keyword keywords[] ={ /* keep sorted: binary searched */
{ "int", FINT, BLTIN },
{ "length", FLENGTH, BLTIN },
{ "log", FLOG, BLTIN },
+ { "lshift", FLSHIFT, BLTIN },
{ "match", MATCHFCN, MATCHFCN },
{ "next", NEXT, NEXT },
{ "nextfile", NEXTFILE, NEXTFILE },
+ { "or", FFOR, BLTIN },
{ "print", PRINT, PRINT },
{ "printf", PRINTF, PRINTF },
{ "rand", FRAND, BLTIN },
{ "return", RETURN, RETURN },
+ { "rshift", FRSHIFT, BLTIN },
{ "sin", FSIN, BLTIN },
{ "split", SPLIT, SPLIT },
{ "sprintf", SPRINTF, SPRINTF },
{ "sqrt", FSQRT, BLTIN },
{ "srand", FSRAND, BLTIN },
+ { "strftime", FSTRFTIME, BLTIN },
{ "sub", SUB, SUB },
{ "substr", SUBSTR, SUBSTR },
{ "system", FSYSTEM, BLTIN },
+ { "systime", FSYSTIME, BLTIN },
{ "tolower", FTOLOWER, BLTIN },
{ "toupper", FTOUPPER, BLTIN },
{ "while", WHILE, WHILE },
+ { "xor", FXOR, BLTIN },
};
#define RET(x) { if(dbg)printf("lex %s\n", tokname(x)); return(x); }
-int peek(void)
+static int peek(void)
{
int c = input();
unput(c);
return c;
}
-int gettok(char **pbuf, int *psz) /* get next input token */
+static int gettok(char **pbuf, int *psz) /* get next input token */
{
int c, retc;
char *buf = *pbuf;
@@ -136,7 +145,7 @@ int gettok(char **pbuf, int *psz) /* get next input token */
if (bp-buf >= sz)
if (!adjbuf(&buf, &sz, bp-buf+2, 100, &bp, "gettok"))
FATAL( "out of space for number %.10s...", buf );
- if (isdigit(c) || c == 'e' || c == 'E'
+ if (isdigit(c) || c == 'e' || c == 'E'
|| c == '.' || c == '+' || c == '-')
*bp++ = c;
else {
@@ -148,7 +157,7 @@ int gettok(char **pbuf, int *psz) /* get next input token */
strtod(buf, &rem); /* parse the number */
if (rem == buf) { /* it wasn't a valid number at all */
buf[1] = 0; /* return one character as token */
- retc = buf[0]; /* character is its own type */
+ retc = (uschar)buf[0]; /* character is its own type */
unputstr(rem+1); /* put rest back for later */
} else { /* some prefix was a number */
unputstr(rem); /* put rest back for later */
@@ -164,23 +173,23 @@ int gettok(char **pbuf, int *psz) /* get next input token */
int word(char *);
int string(void);
int regexpr(void);
-int sc = 0; /* 1 => return a } right now */
-int reg = 0; /* 1 => return a REGEXPR now */
+bool sc = false; /* true => return a } right now */
+bool reg = false; /* true => return a REGEXPR now */
int yylex(void)
{
int c;
- static char *buf = 0;
+ static char *buf = NULL;
static int bufsize = 5; /* BUG: setting this small causes core dump! */
- if (buf == 0 && (buf = (char *) malloc(bufsize)) == NULL)
+ if (buf == NULL && (buf = (char *) malloc(bufsize)) == NULL)
FATAL( "out of space in yylex" );
if (sc) {
- sc = 0;
+ sc = false;
RET('}');
}
if (reg) {
- reg = 0;
+ reg = false;
return regexpr();
}
for (;;) {
@@ -190,11 +199,18 @@ int yylex(void)
if (isalpha(c) || c == '_')
return word(buf);
if (isdigit(c)) {
- yylval.cp = setsymtab(buf, tostring(buf), atof(buf), CON|NUM, symtab);
+ char *cp = tostring(buf);
+ double result;
+
+ if (is_number(cp, & result))
+ yylval.cp = setsymtab(buf, cp, result, CON|NUM, symtab);
+ else
+ yylval.cp = setsymtab(buf, cp, 0.0, STR, symtab);
+ free(cp);
/* should this also have STR set? */
RET(NUMBER);
}
-
+
yylval.i = c;
switch (c) {
case '\n': /* {EOL} */
@@ -208,6 +224,11 @@ int yylex(void)
while ((c = input()) != '\n' && c != 0)
;
unput(c);
+ /*
+ * Next line is a hack, itcompensates for
+ * unput's treatment of \n.
+ */
+ lineno++;
break;
case ';':
RET(';');
@@ -225,7 +246,7 @@ int yylex(void)
case '&':
if (peek() == '&') {
input(); RET(AND);
- } else
+ } else
RET('&');
case '|':
if (peek() == '|') {
@@ -323,11 +344,11 @@ int yylex(void)
unputstr(buf);
RET(INDIRECT);
}
-
+
case '}':
if (--bracecnt < 0)
SYNTAX( "extra }" );
- sc = 1;
+ sc = true;
RET(';');
case ']':
if (--brackcnt < 0)
@@ -346,10 +367,10 @@ int yylex(void)
case '(':
parencnt++;
RET('(');
-
+
case '"':
return string(); /* BUG: should be like tran.c ? */
-
+
default:
RET(c);
}
@@ -360,10 +381,10 @@ int string(void)
{
int c, n;
char *s, *bp;
- static char *buf = 0;
+ static char *buf = NULL;
static int bufsz = 500;
- if (buf == 0 && (buf = (char *) malloc(bufsz)) == NULL)
+ if (buf == NULL && (buf = (char *) malloc(bufsz)) == NULL)
FATAL("out of space for strings");
for (bp = buf; (c = input()) != '"'; ) {
if (!adjbuf(&buf, &bufsz, bp-buf+2, 500, &bp, "string"))
@@ -381,14 +402,15 @@ int string(void)
case '\\':
c = input();
switch (c) {
+ case '\n': break;
case '"': *bp++ = '"'; break;
- case 'n': *bp++ = '\n'; break;
+ case 'n': *bp++ = '\n'; break;
case 't': *bp++ = '\t'; break;
case 'f': *bp++ = '\f'; break;
case 'r': *bp++ = '\r'; break;
case 'b': *bp++ = '\b'; break;
case 'v': *bp++ = '\v'; break;
- case 'a': *bp++ = '\007'; break;
+ case 'a': *bp++ = '\a'; break;
case '\\': *bp++ = '\\'; break;
case '0': case '1': case '2': /* octal: \d \dd \ddd */
@@ -419,7 +441,7 @@ int string(void)
break;
}
- default:
+ default:
*bp++ = c;
break;
}
@@ -429,15 +451,16 @@ int string(void)
break;
}
}
- *bp = 0;
+ *bp = 0;
s = tostring(buf);
- *bp++ = ' '; *bp++ = 0;
+ *bp++ = ' '; *bp++ = '\0';
yylval.cp = setsymtab(buf, s, 0.0, CON|STR|DONTFREE, symtab);
+ free(s);
RET(STRING);
}
-int binsearch(char *w, Keyword *kp, int n)
+static int binsearch(char *w, const Keyword *kp, int n)
{
int cond, low, mid, high;
@@ -455,15 +478,14 @@ int binsearch(char *w, Keyword *kp, int n)
return -1;
}
-int word(char *w)
+int word(char *w)
{
- Keyword *kp;
+ const Keyword *kp;
int c, n;
n = binsearch(w, keywords, sizeof(keywords)/sizeof(keywords[0]));
-/* BUG: this ought to be inside the if; in theory could fault (daniel barrett) */
- kp = keywords + n;
if (n != -1) { /* found in table */
+ kp = keywords + n;
yylval.i = kp->sub;
switch (kp->type) { /* special handling */
case BLTIN:
@@ -501,17 +523,17 @@ int word(char *w)
void startreg(void) /* next call to yylex will return a regular expression */
{
- reg = 1;
+ reg = true;
}
int regexpr(void)
{
int c;
- static char *buf = 0;
+ static char *buf = NULL;
static int bufsz = 500;
char *bp;
- if (buf == 0 && (buf = (char *) malloc(bufsz)) == NULL)
+ if (buf == NULL && (buf = (char *) malloc(bufsz)) == NULL)
FATAL("out of space for rex expr");
bp = buf;
for ( ; (c = input()) != '/' && c != 0; ) {
@@ -519,11 +541,11 @@ int regexpr(void)
FATAL("out of space for reg expr %.10s...", buf);
if (c == '\n') {
*bp = '\0';
- SYNTAX( "newline in regular expression %.10s...", buf );
+ SYNTAX( "newline in regular expression %.10s...", buf );
unput('\n');
break;
} else if (c == '\\') {
- *bp++ = '\\';
+ *bp++ = '\\';
*bp++ = input();
} else {
*bp++ = c;
@@ -543,7 +565,7 @@ char ebuf[300];
char *ep = ebuf;
char yysbuf[100]; /* pushback buffer */
char *yysptr = yysbuf;
-FILE *yyin = 0;
+FILE *yyin = NULL;
int input(void) /* get next lexical input character */
{
@@ -570,6 +592,8 @@ int input(void) /* get next lexical input character */
void unput(int c) /* put lexical character back on input */
{
+ if (c == '\n')
+ lineno--;
if (yysptr >= yysbuf + sizeof(yysbuf))
FATAL("pushed back too much: %.20s...", yysbuf);
*yysptr++ = c;
diff --git a/lib.c b/lib.c
index a365245a0e1a..18adbd2d1fd6 100644
--- a/lib.c
+++ b/lib.c
@@ -25,43 +25,49 @@ THIS SOFTWARE.
#define DEBUG
#include <stdio.h>
#include <string.h>
+#include <strings.h>
#include <ctype.h>
#include <errno.h>
#include <stdlib.h>
#include <stdarg.h>
+#include <limits.h>
+#include <math.h>
#include "awk.h"
-#include "ytab.h"
+char EMPTY[] = { '\0' };
FILE *infile = NULL;
-char *file = "";
+bool innew; /* true = infile has not been read by readrec */
+char *file = EMPTY;
char *record;
int recsize = RECSIZE;
char *fields;
int fieldssize = RECSIZE;
Cell **fldtab; /* pointers to Cells */
-char inputFS[100] = " ";
+static size_t len_inputFS = 0;
+static char *inputFS = NULL; /* FS at time of input, for field splitting */
#define MAXFLD 2
int nfields = MAXFLD; /* last allocated slot for $i */
-int donefld; /* 1 = implies rec broken into fields */
-int donerec; /* 1 = record is valid (no flds have changed) */
+bool donefld; /* true = implies rec broken into fields */
+bool donerec; /* true = record is valid (no flds have changed) */
int lastfld = 0; /* last used field */
int argno = 1; /* current input argument number */
extern Awkfloat *ARGC;
-static Cell dollar0 = { OCELL, CFLD, NULL, "", 0.0, REC|STR|DONTFREE };
-static Cell dollar1 = { OCELL, CFLD, NULL, "", 0.0, FLD|STR|DONTFREE };
+static Cell dollar0 = { OCELL, CFLD, NULL, EMPTY, 0.0, REC|STR|DONTFREE, NULL, NULL };
+static Cell dollar1 = { OCELL, CFLD, NULL, EMPTY, 0.0, FLD|STR|DONTFREE, NULL, NULL };
void recinit(unsigned int n)
{
if ( (record = (char *) malloc(n)) == NULL
|| (fields = (char *) malloc(n+1)) == NULL
- || (fldtab = (Cell **) malloc((nfields+2) * sizeof(Cell *))) == NULL
- || (fldtab[0] = (Cell *) malloc(sizeof(Cell))) == NULL )
+ || (fldtab = (Cell **) calloc(nfields+2, sizeof(*fldtab))) == NULL
+ || (fldtab[0] = (Cell *) malloc(sizeof(**fldtab))) == NULL)
FATAL("out of space for $0 and fields");
+ *record = '\0';
*fldtab[0] = dollar0;
fldtab[0]->sval = record;
fldtab[0]->nval = tostring("0");
@@ -74,11 +80,11 @@ void makefields(int n1, int n2) /* create $n1..$n2 inclusive */
int i;
for (i = n1; i <= n2; i++) {
- fldtab[i] = (Cell *) malloc(sizeof (struct Cell));
+ fldtab[i] = (Cell *) malloc(sizeof(**fldtab));
if (fldtab[i] == NULL)
FATAL("out of space in makefields %d", i);
*fldtab[i] = dollar1;
- sprintf(temp, "%d", i);
+ snprintf(temp, sizeof(temp), "%d", i);
fldtab[i]->nval = tostring(temp);
}
}
@@ -102,11 +108,36 @@ void initgetrec(void)
argno++;
}
infile = stdin; /* no filenames, so use stdin */
+ innew = true;
}
-static int firsttime = 1;
+/*
+ * POSIX specifies that fields are supposed to be evaluated as if they were
+ * split using the value of FS at the time that the record's value ($0) was
+ * read.
+ *
+ * Since field-splitting is done lazily, we save the current value of FS
+ * whenever a new record is read in (implicitly or via getline), or when
+ * a new value is assigned to $0.
+ */
+void savefs(void)
+{
+ size_t len;
+ if ((len = strlen(getsval(fsloc))) < len_inputFS) {
+ strcpy(inputFS, *FS); /* for subsequent field splitting */
+ return;
+ }
-int getrec(char **pbuf, int *pbufsize, int isrecord) /* get next input record */
+ len_inputFS = len + 1;
+ inputFS = (char *) realloc(inputFS, len_inputFS);
+ if (inputFS == NULL)
+ FATAL("field separator %.10s... is too long", *FS);
+ memcpy(inputFS, *FS, len_inputFS);
+}
+
+static bool firsttime = true;
+
+int getrec(char **pbuf, int *pbufsize, bool isrecord) /* get next input record */
{ /* note: cares whether buf == record */
int c;
char *buf = *pbuf;
@@ -114,19 +145,20 @@ int getrec(char **pbuf, int *pbufsize, int isrecord) /* get next input record */
int bufsize = *pbufsize, savebufsize = bufsize;
if (firsttime) {
- firsttime = 0;
+ firsttime = false;
initgetrec();
}
- dprintf( ("RS=<%s>, FS=<%s>, ARGC=%g, FILENAME=%s\n",
- *RS, *FS, *ARGC, *FILENAME) );
+ DPRINTF("RS=<%s>, FS=<%s>, ARGC=%g, FILENAME=%s\n",
+ *RS, *FS, *ARGC, *FILENAME);
if (isrecord) {
- donefld = 0;
- donerec = 1;
+ donefld = false;
+ donerec = true;
+ savefs();
}
saveb0 = buf[0];
buf[0] = 0;
while (argno < *ARGC || infile == stdin) {
- dprintf( ("argno=%d, file=|%s|\n", argno, file) );
+ DPRINTF("argno=%d, file=|%s|\n", argno, file);
if (infile == NULL) { /* have to open a new file */
file = getargv(argno);
if (file == NULL || *file == '\0') { /* deleted or zapped */
@@ -139,22 +171,26 @@ int getrec(char **pbuf, int *pbufsize, int isrecord) /* get next input record */
continue;
}
*FILENAME = file;
- dprintf( ("opening file %s\n", file) );
+ DPRINTF("opening file %s\n", file);
if (*file == '-' && *(file+1) == '\0')
infile = stdin;
else if ((infile = fopen(file, "r")) == NULL)
FATAL("can't open file %s", file);
setfval(fnrloc, 0.0);
}
- c = readrec(&buf, &bufsize, infile);
+ c = readrec(&buf, &bufsize, infile, innew);
+ if (innew)
+ innew = false;
if (c != 0 || buf[0] != '\0') { /* normal record */
if (isrecord) {
+ double result;
+
if (freeable(fldtab[0]))
xfree(fldtab[0]->sval);
fldtab[0]->sval = buf; /* buf == record */
fldtab[0]->tval = REC | STR | DONTFREE;
- if (is_number(fldtab[0]->sval)) {
- fldtab[0]->fval = atof(fldtab[0]->sval);
+ if (is_number(fldtab[0]->sval, & result)) {
+ fldtab[0]->fval = result;
fldtab[0]->tval |= NUM;
}
}
@@ -184,47 +220,62 @@ void nextfile(void)
argno++;
}
-int readrec(char **pbuf, int *pbufsize, FILE *inf) /* read one record into buf */
+int readrec(char **pbuf, int *pbufsize, FILE *inf, bool newflag) /* read one record into buf */
{
- int sep, c;
+ int sep, c, isrec;
char *rr, *buf = *pbuf;
int bufsize = *pbufsize;
char *rs = getsval(rsloc);
- if (strlen(getsval(fsloc)) >= sizeof (inputFS))
- FATAL("field separator %.10s... is too long", *FS);
- /*fflush(stdout); avoids some buffering problem but makes it 25% slower*/
- strcpy(inputFS, *FS); /* for subsequent field splitting */
- if ((sep = *rs) == 0) {
- sep = '\n';
- while ((c=getc(inf)) == '\n' && c != EOF) /* skip leading \n's */
- ;
- if (c != EOF)
- ungetc(c, inf);
- }
- for (rr = buf; ; ) {
- for (; (c=getc(inf)) != sep && c != EOF; ) {
- if (rr-buf+1 > bufsize)
- if (!adjbuf(&buf, &bufsize, 1+rr-buf, recsize, &rr, "readrec 1"))
- FATAL("input record `%.30s...' too long", buf);
+ if (*rs && rs[1]) {
+ bool found;
+
+ fa *pfa = makedfa(rs, 1);
+ if (newflag)
+ found = fnematch(pfa, inf, &buf, &bufsize, recsize);
+ else {
+ int tempstat = pfa->initstat;
+ pfa->initstat = 2;
+ found = fnematch(pfa, inf, &buf, &bufsize, recsize);
+ pfa->initstat = tempstat;
+ }
+ if (found)
+ setptr(patbeg, '\0');
+ } else {
+ if ((sep = *rs) == 0) {
+ sep = '\n';
+ while ((c=getc(inf)) == '\n' && c != EOF) /* skip leading \n's */
+ ;
+ if (c != EOF)
+ ungetc(c, inf);
+ }
+ for (rr = buf; ; ) {
+ for (; (c=getc(inf)) != sep && c != EOF; ) {
+ if (rr-buf+1 > bufsize)
+ if (!adjbuf(&buf, &bufsize, 1+rr-buf,
+ recsize, &rr, "readrec 1"))
+ FATAL("input record `%.30s...' too long", buf);
+ *rr++ = c;
+ }
+ if (*rs == sep || c == EOF)
+ break;
+ if ((c = getc(inf)) == '\n' || c == EOF) /* 2 in a row */
+ break;
+ if (!adjbuf(&buf, &bufsize, 2+rr-buf, recsize, &rr,
+ "readrec 2"))
+ FATAL("input record `%.30s...' too long", buf);
+ *rr++ = '\n';
*rr++ = c;
}
- if (*rs == sep || c == EOF)
- break;
- if ((c = getc(inf)) == '\n' || c == EOF) /* 2 in a row */
- break;
- if (!adjbuf(&buf, &bufsize, 2+rr-buf, recsize, &rr, "readrec 2"))
+ if (!adjbuf(&buf, &bufsize, 1+rr-buf, recsize, &rr, "readrec 3"))
FATAL("input record `%.30s...' too long", buf);
- *rr++ = '\n';
- *rr++ = c;
+ *rr = 0;
}
- if (!adjbuf(&buf, &bufsize, 1+rr-buf, recsize, &rr, "readrec 3"))
- FATAL("input record `%.30s...' too long", buf);
- *rr = 0;
- dprintf( ("readrec saw <%s>, returns %d\n", buf, c == EOF && rr == buf ? 0 : 1) );
*pbuf = buf;
*pbufsize = bufsize;
- return c == EOF && rr == buf ? 0 : 1;
+ isrec = *buf || !feof(inf);
+ DPRINTF("readrec saw <%s>, returns %d\n", buf, isrec);
+ return isrec;
}
char *getargv(int n) /* get ARGV[n] */
@@ -233,12 +284,12 @@ char *getargv(int n) /* get ARGV[n] */
char *s, temp[50];
extern Array *ARGVtab;
- sprintf(temp, "%d", n);
+ snprintf(temp, sizeof(temp), "%d", n);
if (lookup(temp, ARGVtab) == NULL)
return NULL;
x = setsymtab(temp, "", 0.0, STR, ARGVtab);
s = getsval(x);
- dprintf( ("getargv(%d) returns |%s|\n", n, s) );
+ DPRINTF("getargv(%d) returns |%s|\n", n, s);
return s;
}
@@ -246,6 +297,7 @@ void setclvar(char *s) /* set var=value from s */
{
char *p;
Cell *q;
+ double result;
for (p=s; *p != '='; p++)
;
@@ -253,11 +305,11 @@ void setclvar(char *s) /* set var=value from s */
p = qstring(p, '\0');
q = setsymtab(s, p, 0.0, STR, symtab);
setsval(q, p);
- if (is_number(q->sval)) {
- q->fval = atof(q->sval);
+ if (is_number(q->sval, & result)) {
+ q->fval = result;
q->tval |= NUM;
}
- dprintf( ("command line set %s to |%s|\n", s, p) );
+ DPRINTF("command line set %s to |%s|\n", s, p);
}
@@ -284,9 +336,8 @@ void fldbld(void) /* create fields from current record */
}
fr = fields;
i = 0; /* number of fields accumulated here */
- if (strlen(getsval(fsloc)) >= sizeof (inputFS))
- FATAL("field separator %.10s... is too long", *FS);
- strcpy(inputFS, *FS);
+ if (inputFS == NULL) /* make sure we have a copy of FS */
+ savefs();
if (strlen(inputFS) > 1) { /* it's a regular expression */
i = refldbld(r, inputFS);
} else if ((sep = *inputFS) == ' ') { /* default whitespace */
@@ -309,15 +360,19 @@ void fldbld(void) /* create fields from current record */
}
*fr = 0;
} else if ((sep = *inputFS) == 0) { /* new: FS="" => 1 char/field */
- for (i = 0; *r != 0; r++) {
- char buf[2];
+ for (i = 0; *r != '\0'; r += n) {
+ char buf[MB_LEN_MAX + 1];
+
i++;
if (i > nfields)
growfldtab(i);
if (freeable(fldtab[i]))
xfree(fldtab[i]->sval);
- buf[0] = *r;
- buf[1] = 0;
+ n = mblen(r, MB_LEN_MAX);
+ if (n < 0)
+ n = 1;
+ memcpy(buf, r, n);
+ buf[n] = '\0';
fldtab[i]->sval = tostring(buf);
fldtab[i]->tval = FLD | STR;
}
@@ -350,16 +405,18 @@ void fldbld(void) /* create fields from current record */
FATAL("record `%.30s...' has too many fields; can't happen", r);
cleanfld(i+1, lastfld); /* clean out junk from previous record */
lastfld = i;
- donefld = 1;
+ donefld = true;
for (j = 1; j <= lastfld; j++) {
+ double result;
+
p = fldtab[j];
- if(is_number(p->sval)) {
- p->fval = atof(p->sval);
+ if(is_number(p->sval, & result)) {
+ p->fval = result;
p->tval |= NUM;
}
}
setfval(nfloc, (Awkfloat) lastfld);
- donerec = 1; /* restore */
+ donerec = true; /* restore */
if (dbg) {
for (j = 0; j <= lastfld; j++) {
p = fldtab[j];
@@ -377,7 +434,7 @@ void cleanfld(int n1, int n2) /* clean out fields n1 .. n2 inclusive */
p = fldtab[i];
if (freeable(p))
xfree(p->sval);
- p->sval = "";
+ p->sval = EMPTY,
p->tval = FLD | STR | DONTFREE;
}
}
@@ -423,7 +480,7 @@ void growfldtab(int n) /* make new fields up to at least $n */
if (n > nf)
nf = n;
s = (nf+1) * (sizeof (struct Cell *)); /* freebsd: how much do we need? */
- if (s / sizeof(struct Cell *) - 1 == nf) /* didn't overflow */
+ if (s / sizeof(struct Cell *) - 1 == (size_t)nf) /* didn't overflow */
fldtab = (Cell **) realloc(fldtab, s);
else /* overflow sizeof int */
xfree(fldtab); /* make it null */
@@ -453,7 +510,7 @@ int refldbld(const char *rec, const char *fs) /* build fields from reg expr in F
if (*rec == '\0')
return 0;
pfa = makedfa(fs, 1);
- dprintf( ("into refldbld, rec = <%s>, pat = <%s>\n", rec, fs) );
+ DPRINTF("into refldbld, rec = <%s>, pat = <%s>\n", rec, fs);
tempstat = pfa->initstat;
for (i = 1; ; i++) {
if (i > nfields)
@@ -462,22 +519,22 @@ int refldbld(const char *rec, const char *fs) /* build fields from reg expr in F
xfree(fldtab[i]->sval);
fldtab[i]->tval = FLD | STR | DONTFREE;
fldtab[i]->sval = fr;
- dprintf( ("refldbld: i=%d\n", i) );
+ DPRINTF("refldbld: i=%d\n", i);
if (nematch(pfa, rec)) {
pfa->initstat = 2; /* horrible coupling to b.c */
- dprintf( ("match %s (%d chars)\n", patbeg, patlen) );
+ DPRINTF("match %s (%d chars)\n", patbeg, patlen);
strncpy(fr, rec, patbeg-rec);
fr += patbeg - rec + 1;
*(fr-1) = '\0';
rec = patbeg + patlen;
} else {
- dprintf( ("no match %s\n", rec) );
+ DPRINTF("no match %s\n", rec);
strcpy(fr, rec);
pfa->initstat = tempstat;
break;
}
}
- return i;
+ return i;
}
void recbld(void) /* create $0 from $1..$NF if necessary */
@@ -486,7 +543,7 @@ void recbld(void) /* create $0 from $1..$NF if necessary */
char *r, *p;
char *sep = getsval(ofsloc);
- if (donerec == 1)
+ if (donerec)
return;
r = record;
for (i = 1; i <= *NF; i++) {
@@ -505,16 +562,16 @@ void recbld(void) /* create $0 from $1..$NF if necessary */
if (!adjbuf(&record, &recsize, 2+r-record, recsize, &r, "recbld 3"))
FATAL("built giant record `%.30s...'", record);
*r = '\0';
- dprintf( ("in recbld inputFS=%s, fldtab[0]=%p\n", inputFS, (void*)fldtab[0]) );
+ DPRINTF("in recbld inputFS=%s, fldtab[0]=%p\n", inputFS, (void*)fldtab[0]);
if (freeable(fldtab[0]))
xfree(fldtab[0]->sval);
fldtab[0]->tval = REC | STR | DONTFREE;
fldtab[0]->sval = record;
- dprintf( ("in recbld inputFS=%s, fldtab[0]=%p\n", inputFS, (void*)fldtab[0]) );
- dprintf( ("recbld = |%s|\n", record) );
- donerec = 1;
+ DPRINTF("in recbld inputFS=%s, fldtab[0]=%p\n", inputFS, (void*)fldtab[0]);
+ DPRINTF("recbld = |%s|\n", record);
+ donerec = true;
}
int errorflag = 0;
@@ -539,18 +596,13 @@ void SYNTAX(const char *fmt, ...)
fprintf(stderr, " at source line %d", lineno);
if (curfname != NULL)
fprintf(stderr, " in function %s", curfname);
- if (compile_time == 1 && cursource() != NULL)
+ if (compile_time == COMPILING && cursource() != NULL)
fprintf(stderr, " source file %s", cursource());
fprintf(stderr, "\n");
errorflag = 2;
eprint();
}
-void fpecatch(int n)
-{
- FATAL("floating point exception %d", n);
-}
-
extern int bracecnt, brackcnt, parencnt;
void bracecheck(void)
@@ -613,20 +665,22 @@ void error()
extern Node *curnode;
fprintf(stderr, "\n");
- if (compile_time != 2 && NR && *NR > 0) {
- fprintf(stderr, " input record number %d", (int) (*FNR));
- if (strcmp(*FILENAME, "-") != 0)
- fprintf(stderr, ", file %s", *FILENAME);
+ if (compile_time != ERROR_PRINTING) {
+ if (NR && *NR > 0) {
+ fprintf(stderr, " input record number %d", (int) (*FNR));
+ if (strcmp(*FILENAME, "-") != 0)
+ fprintf(stderr, ", file %s", *FILENAME);
+ fprintf(stderr, "\n");
+ }
+ if (curnode)
+ fprintf(stderr, " source line number %d", curnode->lineno);
+ else if (lineno)
+ fprintf(stderr, " source line number %d", lineno);
+ if (compile_time == COMPILING && cursource() != NULL)
+ fprintf(stderr, " source file %s", cursource());
fprintf(stderr, "\n");
+ eprint();
}
- if (compile_time != 2 && curnode)
- fprintf(stderr, " source line number %d", curnode->lineno);
- else if (compile_time != 2 && lineno)
- fprintf(stderr, " source line number %d", lineno);
- if (compile_time == 1 && cursource() != NULL)
- fprintf(stderr, " source file %s", cursource());
- fprintf(stderr, "\n");
- eprint();
}
void eprint(void) /* try to print context around error */
@@ -636,7 +690,7 @@ void eprint(void) /* try to print context around error */
static int been_here = 0;
extern char ebuf[], *ep;
- if (compile_time == 2 || compile_time == 0 || been_here++ > 0)
+ if (compile_time != COMPILING || been_here++ > 0 || ebuf == ep)
return;
if (ebuf == ep)
return;
@@ -710,19 +764,75 @@ int isclvar(const char *s) /* is s of form var=something ? */
/* appears to be broken in gcc on linux: thinks 0x123 is a valid FP number */
/* wrong: violates 4.10.1.4 of ansi C standard */
-#include <math.h>
-int is_number(const char *s)
+/* well, not quite. As of C99, hex floating point is allowed. so this is
+ * a bit of a mess. We work around the mess by checking for a hexadecimal
+ * value and disallowing it. Similarly, we now follow gawk and allow only
+ * +nan, -nan, +inf, and -inf for NaN and infinity values.
+ */
+
+/*
+ * This routine now has a more complicated interface, the main point
+ * being to avoid the double conversion of a string to double, and
+ * also to convey out, if requested, the information that the numeric
+ * value was a leading string or is all of the string. The latter bit
+ * is used in getfval().
+ */
+
+bool is_valid_number(const char *s, bool trailing_stuff_ok,
+ bool *no_trailing, double *result)
{
double r;
char *ep;
+ bool retval = false;
+ bool is_nan = false;
+ bool is_inf = false;
+
+ if (no_trailing)
+ *no_trailing = false;
+
+ while (isspace(*s))
+ s++;
+
+ // no hex floating point, sorry
+ if (s[0] == '0' && tolower(s[1]) == 'x')
+ return false;
+
+ // allow +nan, -nan, +inf, -inf, any other letter, no
+ if (s[0] == '+' || s[0] == '-') {
+ is_nan = (strncasecmp(s+1, "nan", 3) == 0);
+ is_inf = (strncasecmp(s+1, "inf", 3) == 0);
+ if ((is_nan || is_inf)
+ && (isspace(s[4]) || s[4] == '\0'))
+ goto convert;
+ else if (! isdigit(s[1]) && s[1] != '.')
+ return false;
+ }
+ else if (! isdigit(s[0]) && s[0] != '.')
+ return false;
+
+convert:
errno = 0;
r = strtod(s, &ep);
- if (ep == s || r == HUGE_VAL || errno == ERANGE)
- return 0;
- while (*ep == ' ' || *ep == '\t' || *ep == '\n')
+ if (ep == s || errno == ERANGE)
+ return false;
+
+ if (isnan(r) && s[0] == '-' && signbit(r) == 0)
+ r = -r;
+
+ if (result != NULL)
+ *result = r;
+
+ /*
+ * check for trailing stuff
+ */
+ while (isspace(*ep))
ep++;
- if (*ep == '\0')
- return 1;
- else
- return 0;
+
+ if (no_trailing != NULL)
+ *no_trailing = (*ep == '\0');
+
+ // return true if found the end, or trailing stuff is allowed
+ retval = *ep == '\0' || trailing_stuff_ok;
+
+ return retval;
}
diff --git a/main.c b/main.c
index 98661fcd7829..f3936341ae75 100644
--- a/main.c
+++ b/main.c
@@ -22,7 +22,7 @@ ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
THIS SOFTWARE.
****************************************************************/
-const char *version = "version 20190529";
+const char *version = "version 20210215";
#define DEBUG
#include <stdio.h>
@@ -32,7 +32,6 @@ const char *version = "version 20190529";
#include <string.h>
#include <signal.h>
#include "awk.h"
-#include "ytab.h"
extern char **environ;
extern int nfields;
@@ -43,16 +42,41 @@ char *cmdname; /* gets argv[0] for error messages */
extern FILE *yyin; /* lex input file */
char *lexprog; /* points to program argument if it exists */
extern int errorflag; /* non-zero if any syntax errors; set by yyerror */
-int compile_time = 2; /* for error printing: */
- /* 2 = cmdline, 1 = compile, 0 = running */
+enum compile_states compile_time = ERROR_PRINTING;
-#define MAX_PFILE 20 /* max number of -f's */
+static char **pfile; /* program filenames from -f's */
+static size_t maxpfile; /* max program filename */
+static size_t npfile; /* number of filenames */
+static size_t curpfile; /* current filename */
-char *pfile[MAX_PFILE]; /* program filenames from -f's */
-int npfile = 0; /* number of filenames */
-int curpfile = 0; /* current filename */
+bool safe = false; /* true => "safe" mode */
-int safe = 0; /* 1 => "safe" mode */
+static noreturn void fpecatch(int n
+#ifdef SA_SIGINFO
+ , siginfo_t *si, void *uc
+#endif
+)
+{
+#ifdef SA_SIGINFO
+ static const char *emsg[] = {
+ [0] = "Unknown error",
+ [FPE_INTDIV] = "Integer divide by zero",
+ [FPE_INTOVF] = "Integer overflow",
+ [FPE_FLTDIV] = "Floating point divide by zero",
+ [FPE_FLTOVF] = "Floating point overflow",
+ [FPE_FLTUND] = "Floating point underflow",
+ [FPE_FLTRES] = "Floating point inexact result",
+ [FPE_FLTINV] = "Invalid Floating point operation",
+ [FPE_FLTSUB] = "Subscript out of range",
+ };
+#endif
+ FATAL("floating point exception"
+#ifdef SA_SIGINFO
+ ": %s", (size_t)si->si_code < sizeof(emsg) / sizeof(emsg[0]) &&
+ emsg[si->si_code] ? emsg[si->si_code] : emsg[0]
+#endif
+ );
+}
/* Can this work with recursive calls? I don't think so.
void segvcatch(int n)
@@ -61,32 +85,67 @@ void segvcatch(int n)
}
*/
+static const char *
+setfs(char *p)
+{
+ /* wart: t=>\t */
+ if (p[0] == 't' && p[1] == '\0')
+ return "\t";
+ else if (p[0] != '\0')
+ return p;
+ return NULL;
+}
+
+static char *
+getarg(int *argc, char ***argv, const char *msg)
+{
+ if ((*argv)[1][2] != '\0') { /* arg is -fsomething */
+ return &(*argv)[1][2];
+ } else { /* arg is -f something */
+ (*argc)--; (*argv)++;
+ if (*argc <= 1)
+ FATAL("%s", msg);
+ return (*argv)[1];
+ }
+}
+
int main(int argc, char *argv[])
{
const char *fs = NULL;
+ char *fn, *vn;
setlocale(LC_CTYPE, "");
setlocale(LC_NUMERIC, "C"); /* for parsing cmdline & prog */
cmdname = argv[0];
if (argc == 1) {
- fprintf(stderr,
- "usage: %s [-F fs] [-v var=value] [-f progfile | 'prog'] [file ...]\n",
+ fprintf(stderr,
+ "usage: %s [-F fs] [-v var=value] [-f progfile | 'prog'] [file ...]\n",
cmdname);
exit(1);
}
- signal(SIGFPE, fpecatch);
+#ifdef SA_SIGINFO
+ {
+ struct sigaction sa;
+ sa.sa_sigaction = fpecatch;
+ sa.sa_flags = SA_SIGINFO;
+ sigemptyset(&sa.sa_mask);
+ (void)sigaction(SIGFPE, &sa, NULL);
+ }
+#else
+ (void)signal(SIGFPE, fpecatch);
+#endif
/*signal(SIGSEGV, segvcatch); experiment */
+ /* Set and keep track of the random seed */
srand_seed = 1;
- srand(srand_seed);
+ srandom((unsigned long) srand_seed);
yyin = NULL;
symtab = makesymtab(NSYMTAB/NSYMTAB);
while (argc > 1 && argv[1][0] == '-' && argv[1][1] != '\0') {
- if (strcmp(argv[1],"-version") == 0 || strcmp(argv[1],"--version") == 0) {
+ if (strcmp(argv[1], "-version") == 0 || strcmp(argv[1], "--version") == 0) {
printf("awk %s\n", version);
- exit(0);
- break;
+ return 0;
}
if (strcmp(argv[1], "--") == 0) { /* explicit end of args */
argc--;
@@ -96,53 +155,29 @@ int main(int argc, char *argv[])
switch (argv[1][1]) {
case 's':
if (strcmp(argv[1], "-safe") == 0)
- safe = 1;
+ safe = true;
break;
case 'f': /* next argument is program filename */
- if (argv[1][2] != 0) { /* arg is -fsomething */
- if (npfile >= MAX_PFILE - 1)
- FATAL("too many -f options");
- pfile[npfile++] = &argv[1][2];
- } else { /* arg is -f something */
- argc--; argv++;
- if (argc <= 1)
- FATAL("no program filename");
- if (npfile >= MAX_PFILE - 1)
- FATAL("too many -f options");
- pfile[npfile++] = argv[1];
- }
- break;
+ fn = getarg(&argc, &argv, "no program filename");
+ if (npfile >= maxpfile) {
+ maxpfile += 20;
+ pfile = (char **) realloc(pfile, maxpfile * sizeof(*pfile));
+ if (pfile == NULL)
+ FATAL("error allocating space for -f options");
+ }
+ pfile[npfile++] = fn;
+ break;
case 'F': /* set field separator */
- if (argv[1][2] != 0) { /* arg is -Fsomething */
- if (argv[1][2] == 't' && argv[1][3] == 0) /* wart: t=>\t */
- fs = "\t";
- else if (argv[1][2] != 0)
- fs = &argv[1][2];
- } else { /* arg is -F something */
- argc--; argv++;
- if (argc > 1 && argv[1][0] == 't' && argv[1][1] == 0) /* wart: t=>\t */
- fs = "\t";
- else if (argc > 1 && argv[1][0] != 0)
- fs = &argv[1][0];
- }
- if (fs == NULL || *fs == '\0')
+ fs = setfs(getarg(&argc, &argv, "no field separator"));
+ if (fs == NULL)
WARNING("field separator FS is empty");
break;
case 'v': /* -v a=1 to be done NOW. one -v for each */
- if (argv[1][2] != 0) { /* arg is -vsomething */
- if (isclvar(&argv[1][2]))
- setclvar(&argv[1][2]);
- else
- FATAL("invalid -v option argument: %s", &argv[1][2]);
- } else { /* arg is -v something */
- argc--; argv++;
- if (argc <= 1)
- FATAL("no variable name");
- if (isclvar(argv[1]))
- setclvar(argv[1]);
- else
- FATAL("invalid -v option argument: %s", argv[1]);
- }
+ vn = getarg(&argc, &argv, "no variable name");
+ if (isclvar(vn))
+ setclvar(vn);
+ else
+ FATAL("invalid -v option argument: %s", vn);
break;
case 'd':
dbg = atoi(&argv[1][2]);
@@ -164,26 +199,30 @@ int main(int argc, char *argv[])
exit(0);
FATAL("no program given");
}
- dprintf( ("program = |%s|\n", argv[1]) );
+ DPRINTF("program = |%s|\n", argv[1]);
lexprog = argv[1];
argc--;
argv++;
}
recinit(recsize);
syminit();
- compile_time = 1;
+ compile_time = COMPILING;
argv[0] = cmdname; /* put prog name at front of arglist */
- dprintf( ("argc=%d, argv[0]=%s\n", argc, argv[0]) );
+ DPRINTF("argc=%d, argv[0]=%s\n", argc, argv[0]);
arginit(argc, argv);
if (!safe)
envinit(environ);
yyparse();
+#if 0
+ // Doing this would comply with POSIX, but is not compatible with
+ // other awks and with what most users expect. So comment it out.
setlocale(LC_NUMERIC, ""); /* back to whatever it is locally */
+#endif
if (fs)
*FS = qstring(fs, '\0');
- dprintf( ("errorflag=%d\n", errorflag) );
+ DPRINTF("errorflag=%d\n", errorflag);
if (errorflag == 0) {
- compile_time = 0;
+ compile_time = RUNNING;
run(winner);
} else
bracecheck();
@@ -216,7 +255,7 @@ int pgetc(void) /* get 1 character from awk program */
char *cursource(void) /* current source file name */
{
if (npfile > 0)
- return pfile[curpfile];
+ return pfile[curpfile < npfile ? curpfile : curpfile - 1];
else
return NULL;
}
diff --git a/makefile b/makefile
index 3c0b62e1df9e..9ceaaad48af1 100644
--- a/makefile
+++ b/makefile
@@ -1,7 +1,7 @@
# /****************************************************************
# Copyright (C) Lucent Technologies 1997
# All Rights Reserved
-#
+#
# Permission to use, copy, modify, and distribute this software and
# its documentation for any purpose and without fee is hereby
# granted, provided that the above copyright notice appear in all
@@ -11,7 +11,7 @@
# its entities not be used in advertising or publicity pertaining
# to distribution of the software without specific, written prior
# permission.
-#
+#
# LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
# INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
# IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
@@ -22,6 +22,7 @@
# THIS SOFTWARE.
# ****************************************************************/
+CFLAGS = -fsanitize=address -O1 -g -fno-omit-frame-pointer -fno-optimize-sibling-calls
CFLAGS = -g
CFLAGS =
CFLAGS = -O2
@@ -30,57 +31,46 @@ CFLAGS = -O2
#CC = gcc -Wall -g -Wwrite-strings
#CC = gcc -O4 -Wall -pedantic -fno-strict-aliasing
#CC = gcc -fprofile-arcs -ftest-coverage # then gcov f1.c; cat f1.c.gcov
-HOSTCC = gcc -g -Wall -pedantic
+HOSTCC = gcc -g -Wall -pedantic -Wcast-qual
CC = $(HOSTCC) # change this is cross-compiling.
-# yacc options. pick one; this varies a lot by system.
-#YFLAGS = -d -S
-YACC = bison -d -y
-#YACC = yacc -d
-# -S uses sprintf in yacc parser instead of sprint
+# By fiat, to make our lives easier, yacc is now defined to be bison.
+# If you want something else, you're on your own.
+YACC = bison -d
OFILES = b.o main.o parse.o proctab.o tran.o lib.o run.o lex.o
-SOURCE = awk.h ytab.c ytab.h proto.h awkgram.y lex.c b.c main.c \
- maketab.c parse.c lib.c run.c tran.c proctab.c
+SOURCE = awk.h awkgram.tab.c awkgram.tab.h proto.h awkgram.y lex.c b.c main.c \
+ maketab.c parse.c lib.c run.c tran.c proctab.c
LISTING = awk.h proto.h awkgram.y lex.c b.c main.c maketab.c parse.c \
- lib.c run.c tran.c
+ lib.c run.c tran.c
-SHIP = README LICENSE FIXES $(SOURCE) ytab[ch].bak makefile \
+SHIP = README LICENSE FIXES $(SOURCE) awkgram.tab.[ch].bak makefile \
awk.1
-a.out: ytab.o $(OFILES)
- $(CC) $(CFLAGS) ytab.o $(OFILES) $(ALLOC) -lm
+a.out: awkgram.tab.o $(OFILES)
+ $(CC) $(CFLAGS) awkgram.tab.o $(OFILES) $(ALLOC) -lm
-$(OFILES): awk.h ytab.h proto.h
+$(OFILES): awk.h awkgram.tab.h proto.h
-#Clear dependency for parallel build: (make -j)
-#YACC generated y.tab.c and y.tab.h at the same time
-#this needs to be a static pattern rules otherwise multiple target
-#are mapped onto multiple executions of yacc, which overwrite
-#each others outputs.
-y%.c y%.h: awk.h proto.h awkgram.y
+awkgram.tab.c awkgram.tab.h: awk.h proto.h awkgram.y
$(YACC) $(YFLAGS) awkgram.y
- mv y.$*.c y$*.c
- mv y.$*.h y$*.h
-
-ytab.h: ytab.c
proctab.c: maketab
- ./maketab ytab.h >proctab.c
+ ./maketab awkgram.tab.h >proctab.c
-maketab: ytab.h maketab.c
+maketab: awkgram.tab.h maketab.c
$(HOSTCC) $(CFLAGS) maketab.c -o maketab
bundle:
- @cp ytab.h ytabh.bak
- @cp ytab.c ytabc.bak
+ @cp awkgram.tab.h awkgram.tab.h.bak
+ @cp awkgram.tab.c awkgram.tab.c.bak
@bundle $(SHIP)
tar:
- @cp ytab.h ytabh.bak
- @cp ytab.c ytabc.bak
+ @cp awkgram.tab.h awkgram.tab.h.bak
+ @cp awkgram.tab.c awkgram.tab.c.bak
@bundle $(SHIP) >awk.shar
@tar cf awk.tar $(SHIP)
gzip awk.tar
@@ -92,18 +82,30 @@ gitadd:
git add README LICENSE FIXES \
awk.h proto.h awkgram.y lex.c b.c main.c maketab.c parse.c \
lib.c run.c tran.c \
- makefile awk.1 awktest.tar
+ makefile awk.1 testdir
gitpush:
- # only do this once:
+ # only do this once:
# git remote add origin https://github.com/onetrueawk/awk.git
git push -u origin master
names:
@echo $(LISTING)
-clean:
+test check:
+ ./REGRESS
+
+clean: testclean
rm -f a.out *.o *.obj maketab maketab.exe *.bb *.bbg *.da *.gcov *.gcno *.gcda # proctab.c
-cleaner:
- rm -f a.out *.o *.obj maketab maketab.exe *.bb *.bbg *.da *.gcov *.gcno *.gcda proctab.c ytab*
+cleaner: testclean
+ rm -f a.out *.o *.obj maketab maketab.exe *.bb *.bbg *.da *.gcov *.gcno *.gcda proctab.c awkgram.tab.*
+
+# This is a bit of a band-aid until we can invest some more time
+# in the test suite.
+testclean:
+ cd testdir; rm -fr arnold-fixes beebe devnull echo foo* \
+ glop glop1 glop2 lilly.diff tempbig tempsmall time
+
+# For the habits of GNU maintainers:
+distclean: cleaner
diff --git a/maketab.c b/maketab.c
index dbe3d241fcc8..d4b756ad6706 100644
--- a/maketab.c
+++ b/maketab.c
@@ -25,14 +25,14 @@ THIS SOFTWARE.
/*
* this program makes the table to link function names
* and type indices that is used by execute() in run.c.
- * it finds the indices in ytab.h, produced by yacc.
+ * it finds the indices in awkgram.tab.h, produced by bison.
*/
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include "awk.h"
-#include "ytab.h"
+#include "awkgram.tab.h"
struct xx
{ int token;
@@ -104,6 +104,7 @@ struct xx
{ ARG, "arg", "arg" },
{ VARNF, "getnf", "NF" },
{ GETLINE, "awkgetline", "getline" },
+ { GENSUB, "gensub", "gensub" },
{ 0, "", "" },
};
@@ -118,12 +119,11 @@ int main(int argc, char *argv[])
char c;
FILE *fp;
char buf[200], name[200], def[200];
+ enum { TOK_UNKNOWN, TOK_ENUM, TOK_DEFINE } tokentype = TOK_UNKNOWN;
printf("#include <stdio.h>\n");
printf("#include \"awk.h\"\n");
- printf("#include \"ytab.h\"\n\n");
- for (i = SIZE; --i >= 0; )
- names[i] = "";
+ printf("#include \"awkgram.tab.h\"\n\n");
if (argc != 2) {
fprintf(stderr, "usage: maketab YTAB_H\n");
@@ -133,21 +133,41 @@ int main(int argc, char *argv[])
fprintf(stderr, "maketab can't open %s!\n", argv[1]);
exit(1);
}
- printf("static char *printname[%d] = {\n", SIZE);
+ printf("static const char * const printname[%d] = {\n", SIZE);
i = 0;
while (fgets(buf, sizeof buf, fp) != NULL) {
- n = sscanf(buf, "%1c %s %s %d", &c, def, name, &tok);
- if (c != '#' || (n != 4 && strcmp(def,"define") != 0)) /* not a valid #define */
- continue;
- if (strcmp(name, "YYSTYPE_IS_DECLARED") == 0)
+ // 199 is sizeof(def) - 1
+ if (tokentype != TOK_ENUM) {
+ n = sscanf(buf, "%1c %199s %199s %d", &c, def, name,
+ &tok);
+ if (n == 4 && c == '#' && strcmp(def, "define") == 0) {
+ tokentype = TOK_DEFINE;
+ } else if (tokentype != TOK_UNKNOWN) {
+ continue;
+ }
+ }
+ if (tokentype != TOK_DEFINE) {
+ /* not a valid #define, bison uses enums now */
+ n = sscanf(buf, "%199s = %d,\n", name, &tok);
+ if (n != 2)
+ continue;
+ tokentype = TOK_ENUM;
+ }
+ if (strcmp(name, "YYSTYPE_IS_DECLARED") == 0) {
+ tokentype = TOK_UNKNOWN;
continue;
+ }
if (tok < FIRSTTOKEN || tok > LASTTOKEN) {
+ tokentype = TOK_UNKNOWN;
/* fprintf(stderr, "maketab funny token %d %s ignored\n", tok, buf); */
continue;
}
- names[tok-FIRSTTOKEN] = (char *) malloc(strlen(name)+1);
- strcpy(names[tok-FIRSTTOKEN], name);
- printf("\t(char *) \"%s\",\t/* %d */\n", name, tok);
+ names[tok-FIRSTTOKEN] = strdup(name);
+ if (names[tok-FIRSTTOKEN] == NULL) {
+ fprintf(stderr, "maketab out of space copying %s", name);
+ continue;
+ }
+ printf("\t\"%s\",\t/* %d */\n", name, tok);
i++;
}
printf("};\n\n");
@@ -156,20 +176,18 @@ int main(int argc, char *argv[])
table[p->token-FIRSTTOKEN] = p->name;
printf("\nCell *(*proctab[%d])(Node **, int) = {\n", SIZE);
for (i=0; i<SIZE; i++)
- if (table[i]==0)
- printf("\tnullproc,\t/* %s */\n", names[i]);
- else
- printf("\t%s,\t/* %s */\n", table[i], names[i]);
+ printf("\t%s,\t/* %s */\n",
+ table[i] ? table[i] : "nullproc", names[i] ? names[i] : "");
printf("};\n\n");
- printf("char *tokname(int n)\n"); /* print a tokname() function */
+ printf("const char *tokname(int n)\n"); /* print a tokname() function */
printf("{\n");
- printf(" static char buf[100];\n\n");
- printf(" if (n < FIRSTTOKEN || n > LASTTOKEN) {\n");
- printf(" sprintf(buf, \"token %%d\", n);\n");
- printf(" return buf;\n");
- printf(" }\n");
- printf(" return printname[n-FIRSTTOKEN];\n");
+ printf("\tstatic char buf[100];\n\n");
+ printf("\tif (n < FIRSTTOKEN || n > LASTTOKEN) {\n");
+ printf("\t\tsnprintf(buf, sizeof(buf), \"token %%d\", n);\n");
+ printf("\t\treturn buf;\n");
+ printf("\t}\n");
+ printf("\treturn printname[n-FIRSTTOKEN];\n");
printf("}\n");
return 0;
}
diff --git a/parse.c b/parse.c
index 8304ded837ba..79b8fade83b5 100644
--- a/parse.c
+++ b/parse.c
@@ -27,13 +27,13 @@ THIS SOFTWARE.
#include <string.h>
#include <stdlib.h>
#include "awk.h"
-#include "ytab.h"
+#include "awkgram.tab.h"
Node *nodealloc(int n)
{
Node *x;
- x = (Node *) malloc(sizeof(Node) + (n-1)*sizeof(Node *));
+ x = (Node *) malloc(sizeof(*x) + (n-1) * sizeof(x));
if (x == NULL)
FATAL("out of space in nodealloc");
x->nnext = NULL;
@@ -93,6 +93,20 @@ Node *node4(int a, Node *b, Node *c, Node *d, Node *e)
return(x);
}
+Node *node5(int a, Node *b, Node *c, Node *d, Node *e, Node *f)
+{
+ Node *x;
+
+ x = nodealloc(5);
+ x->nobj = a;
+ x->narg[0] = b;
+ x->narg[1] = c;
+ x->narg[2] = d;
+ x->narg[3] = e;
+ x->narg[4] = f;
+ return(x);
+}
+
Node *stat1(int a, Node *b)
{
Node *x;
@@ -165,6 +179,15 @@ Node *op4(int a, Node *b, Node *c, Node *d, Node *e)
return(x);
}
+Node *op5(int a, Node *b, Node *c, Node *d, Node *e, Node *f)
+{
+ Node *x;
+
+ x = node5(a,b,c,d,e,f);
+ x->ntype = NEXPR;
+ return(x);
+}
+
Node *celltonode(Cell *a, int b)
{
Node *x;
@@ -250,7 +273,7 @@ void defn(Cell *v, Node *vl, Node *st) /* turn on FCN bit in definition, */
for (p = vl; p; p = p->nnext)
n++;
v->fval = n;
- dprintf( ("defining func %s (%d args)\n", v->nval, n) );
+ DPRINTF("defining func %s (%d args)\n", v->nval, n);
}
int isarg(const char *s) /* is s in argument list for current function? */
@@ -259,7 +282,7 @@ int isarg(const char *s) /* is s in argument list for current function? */
Node *p = arglist;
int n;
- for (n = 0; p != 0; p = p->nnext, n++)
+ for (n = 0; p != NULL; p = p->nnext, n++)
if (strcmp(((Cell *)(p->narg[0]))->nval, s) == 0)
return n;
return -1;
diff --git a/proctab.c b/proctab.c
index ff212c416c3a..96fad36e601c 100644
--- a/proctab.c
+++ b/proctab.c
@@ -1,106 +1,108 @@
#include <stdio.h>
#include "awk.h"
-#include "ytab.h"
+#include "awkgram.tab.h"
-static char *printname[94] = {
- (char *) "FIRSTTOKEN", /* 258 */
- (char *) "PROGRAM", /* 259 */
- (char *) "PASTAT", /* 260 */
- (char *) "PASTAT2", /* 261 */
- (char *) "XBEGIN", /* 262 */
- (char *) "XEND", /* 263 */
- (char *) "NL", /* 264 */
- (char *) "ARRAY", /* 265 */
- (char *) "MATCH", /* 266 */
- (char *) "NOTMATCH", /* 267 */
- (char *) "MATCHOP", /* 268 */
- (char *) "FINAL", /* 269 */
- (char *) "DOT", /* 270 */
- (char *) "ALL", /* 271 */
- (char *) "CCL", /* 272 */
- (char *) "NCCL", /* 273 */
- (char *) "CHAR", /* 274 */
- (char *) "OR", /* 275 */
- (char *) "STAR", /* 276 */
- (char *) "QUEST", /* 277 */
- (char *) "PLUS", /* 278 */
- (char *) "EMPTYRE", /* 279 */
- (char *) "AND", /* 280 */
- (char *) "BOR", /* 281 */
- (char *) "APPEND", /* 282 */
- (char *) "EQ", /* 283 */
- (char *) "GE", /* 284 */
- (char *) "GT", /* 285 */
- (char *) "LE", /* 286 */
- (char *) "LT", /* 287 */
- (char *) "NE", /* 288 */
- (char *) "IN", /* 289 */
- (char *) "ARG", /* 290 */
- (char *) "BLTIN", /* 291 */
- (char *) "BREAK", /* 292 */
- (char *) "CLOSE", /* 293 */
- (char *) "CONTINUE", /* 294 */
- (char *) "DELETE", /* 295 */
- (char *) "DO", /* 296 */
- (char *) "EXIT", /* 297 */
- (char *) "FOR", /* 298 */
- (char *) "FUNC", /* 299 */
- (char *) "SUB", /* 300 */
- (char *) "GSUB", /* 301 */
- (char *) "IF", /* 302 */
- (char *) "INDEX", /* 303 */
- (char *) "LSUBSTR", /* 304 */
- (char *) "MATCHFCN", /* 305 */
- (char *) "NEXT", /* 306 */
- (char *) "NEXTFILE", /* 307 */
- (char *) "ADD", /* 308 */
- (char *) "MINUS", /* 309 */
- (char *) "MULT", /* 310 */
- (char *) "DIVIDE", /* 311 */
- (char *) "MOD", /* 312 */
- (char *) "ASSIGN", /* 313 */
- (char *) "ASGNOP", /* 314 */
- (char *) "ADDEQ", /* 315 */
- (char *) "SUBEQ", /* 316 */
- (char *) "MULTEQ", /* 317 */
- (char *) "DIVEQ", /* 318 */
- (char *) "MODEQ", /* 319 */
- (char *) "POWEQ", /* 320 */
- (char *) "PRINT", /* 321 */
- (char *) "PRINTF", /* 322 */
- (char *) "SPRINTF", /* 323 */
- (char *) "ELSE", /* 324 */
- (char *) "INTEST", /* 325 */
- (char *) "CONDEXPR", /* 326 */
- (char *) "POSTINCR", /* 327 */
- (char *) "PREINCR", /* 328 */
- (char *) "POSTDECR", /* 329 */
- (char *) "PREDECR", /* 330 */
- (char *) "VAR", /* 331 */
- (char *) "IVAR", /* 332 */
- (char *) "VARNF", /* 333 */
- (char *) "CALL", /* 334 */
- (char *) "NUMBER", /* 335 */
- (char *) "STRING", /* 336 */
- (char *) "REGEXPR", /* 337 */
- (char *) "GETLINE", /* 338 */
- (char *) "RETURN", /* 339 */
- (char *) "SPLIT", /* 340 */
- (char *) "SUBSTR", /* 341 */
- (char *) "WHILE", /* 342 */
- (char *) "CAT", /* 343 */
- (char *) "NOT", /* 344 */
- (char *) "UMINUS", /* 345 */
- (char *) "UPLUS", /* 346 */
- (char *) "POWER", /* 347 */
- (char *) "DECR", /* 348 */
- (char *) "INCR", /* 349 */
- (char *) "INDIRECT", /* 350 */
- (char *) "LASTTOKEN", /* 351 */
+static const char * const printname[96] = {
+ "FIRSTTOKEN", /* 258 */
+ "PROGRAM", /* 259 */
+ "PASTAT", /* 260 */
+ "PASTAT2", /* 261 */
+ "XBEGIN", /* 262 */
+ "XEND", /* 263 */
+ "NL", /* 264 */
+ "ARRAY", /* 265 */
+ "MATCH", /* 266 */
+ "NOTMATCH", /* 267 */
+ "MATCHOP", /* 268 */
+ "FINAL", /* 269 */
+ "DOT", /* 270 */
+ "ALL", /* 271 */
+ "CCL", /* 272 */
+ "NCCL", /* 273 */
+ "CHAR", /* 274 */
+ "OR", /* 275 */
+ "STAR", /* 276 */
+ "QUEST", /* 277 */
+ "PLUS", /* 278 */
+ "EMPTYRE", /* 279 */
+ "ZERO", /* 280 */
+ "AND", /* 281 */
+ "BOR", /* 282 */
+ "APPEND", /* 283 */
+ "EQ", /* 284 */
+ "GE", /* 285 */
+ "GT", /* 286 */
+ "LE", /* 287 */
+ "LT", /* 288 */
+ "NE", /* 289 */
+ "IN", /* 290 */
+ "ARG", /* 291 */
+ "BLTIN", /* 292 */
+ "BREAK", /* 293 */
+ "CLOSE", /* 294 */
+ "CONTINUE", /* 295 */
+ "DELETE", /* 296 */
+ "DO", /* 297 */
+ "EXIT", /* 298 */
+ "FOR", /* 299 */
+ "FUNC", /* 300 */
+ "GENSUB", /* 301 */
+ "SUB", /* 302 */
+ "GSUB", /* 303 */
+ "IF", /* 304 */
+ "INDEX", /* 305 */
+ "LSUBSTR", /* 306 */
+ "MATCHFCN", /* 307 */
+ "NEXT", /* 308 */
+ "NEXTFILE", /* 309 */
+ "ADD", /* 310 */
+ "MINUS", /* 311 */
+ "MULT", /* 312 */
+ "DIVIDE", /* 313 */
+ "MOD", /* 314 */
+ "ASSIGN", /* 315 */
+ "ASGNOP", /* 316 */
+ "ADDEQ", /* 317 */
+ "SUBEQ", /* 318 */
+ "MULTEQ", /* 319 */
+ "DIVEQ", /* 320 */
+ "MODEQ", /* 321 */
+ "POWEQ", /* 322 */
+ "PRINT", /* 323 */
+ "PRINTF", /* 324 */
+ "SPRINTF", /* 325 */
+ "ELSE", /* 326 */
+ "INTEST", /* 327 */
+ "CONDEXPR", /* 328 */
+ "POSTINCR", /* 329 */
+ "PREINCR", /* 330 */
+ "POSTDECR", /* 331 */
+ "PREDECR", /* 332 */
+ "VAR", /* 333 */
+ "IVAR", /* 334 */
+ "VARNF", /* 335 */
+ "CALL", /* 336 */
+ "NUMBER", /* 337 */
+ "STRING", /* 338 */
+ "REGEXPR", /* 339 */
+ "GETLINE", /* 340 */
+ "RETURN", /* 341 */
+ "SPLIT", /* 342 */
+ "SUBSTR", /* 343 */
+ "WHILE", /* 344 */
+ "CAT", /* 345 */
+ "NOT", /* 346 */
+ "UMINUS", /* 347 */
+ "UPLUS", /* 348 */
+ "POWER", /* 349 */
+ "DECR", /* 350 */
+ "INCR", /* 351 */
+ "INDIRECT", /* 352 */
+ "LASTTOKEN", /* 353 */
};
-Cell *(*proctab[94])(Node **, int) = {
+Cell *(*proctab[96])(Node **, int) = {
nullproc, /* FIRSTTOKEN */
program, /* PROGRAM */
pastat, /* PASTAT */
@@ -123,6 +125,7 @@ Cell *(*proctab[94])(Node **, int) = {
nullproc, /* QUEST */
nullproc, /* PLUS */
nullproc, /* EMPTYRE */
+ nullproc, /* ZERO */
boolop, /* AND */
boolop, /* BOR */
nullproc, /* APPEND */
@@ -143,6 +146,7 @@ Cell *(*proctab[94])(Node **, int) = {
jump, /* EXIT */
forstat, /* FOR */
nullproc, /* FUNC */
+ gensub, /* GENSUB */
sub, /* SUB */
gsub, /* GSUB */
ifstat, /* IF */
@@ -197,12 +201,12 @@ Cell *(*proctab[94])(Node **, int) = {
nullproc, /* LASTTOKEN */
};
-char *tokname(int n)
+const char *tokname(int n)
{
static char buf[100];
if (n < FIRSTTOKEN || n > LASTTOKEN) {
- sprintf(buf, "token %d", n);
+ snprintf(buf, sizeof(buf), "token %d", n);
return buf;
}
return printname[n-FIRSTTOKEN];
diff --git a/proto.h b/proto.h
index ad6f2e80a594..5cd3afb8cf85 100644
--- a/proto.h
+++ b/proto.h
@@ -38,15 +38,15 @@ extern int yylook(void);
extern int yyback(int *, int);
extern int yyinput(void);
-extern fa *makedfa(const char *, int);
-extern fa *mkdfa(const char *, int);
-extern int makeinit(fa *, int);
+extern fa *makedfa(const char *, bool);
+extern fa *mkdfa(const char *, bool);
+extern int makeinit(fa *, bool);
extern void penter(Node *);
extern void freetr(Node *);
-extern int hexstr(uschar **);
-extern int quoted(uschar **);
+extern int hexstr(const uschar **);
+extern int quoted(const uschar **);
extern char *cclenter(const char *);
-extern void overflo(const char *) __attribute__((__noreturn__));
+extern noreturn void overflo(const char *);
extern void cfoll(fa *, Node *);
extern int first(Node *);
extern void follow(Node *);
@@ -54,6 +54,7 @@ extern int member(int, const char *);
extern int match(fa *, const char *);
extern int pmatch(fa *, const char *);
extern int nematch(fa *, const char *);
+extern bool fnematch(fa *, FILE *, char **, int *, int);
extern Node *reparse(const char *);
extern Node *regexp(void);
extern Node *primary(void);
@@ -73,12 +74,14 @@ extern Node *node1(int, Node *);
extern Node *node2(int, Node *, Node *);
extern Node *node3(int, Node *, Node *, Node *);
extern Node *node4(int, Node *, Node *, Node *, Node *);
+extern Node *node5(int, Node *, Node *, Node *, Node *, Node *);
extern Node *stat3(int, Node *, Node *, Node *);
extern Node *op2(int, Node *, Node *);
extern Node *op1(int, Node *);
extern Node *stat1(int, Node *);
extern Node *op3(int, Node *, Node *, Node *);
extern Node *op4(int, Node *, Node *, Node *, Node *);
+extern Node *op5(int, Node *, Node *, Node *, Node *, Node *);
extern Node *stat2(int, Node *, Node *);
extern Node *stat4(int, Node *, Node *, Node *, Node *);
extern Node *celltonode(Cell *, int);
@@ -88,7 +91,7 @@ extern Node *pa2stat(Node *, Node *, Node *);
extern Node *linkum(Node *, Node *);
extern void defn(Cell *, Node *, Node *);
extern int isarg(const char *);
-extern char *tokname(int);
+extern const char *tokname(int);
extern Cell *(*proctab[])(Node **, int);
extern int ptoi(void *);
extern Node *itonp(int);
@@ -110,15 +113,18 @@ extern double getfval(Cell *);
extern char *getsval(Cell *);
extern char *getpssval(Cell *); /* for print */
extern char *tostring(const char *);
+extern char *tostringN(const char *, size_t);
extern char *qstring(const char *, int);
+extern Cell *catstr(Cell *, Cell *);
extern void recinit(unsigned int);
extern void initgetrec(void);
extern void makefields(int, int);
extern void growfldtab(int n);
-extern int getrec(char **, int *, int);
+extern void savefs(void);
+extern int getrec(char **, int *, bool);
extern void nextfile(void);
-extern int readrec(char **buf, int *bufsize, FILE *inf);
+extern int readrec(char **buf, int *bufsize, FILE *inf, bool isnew);
extern char *getargv(int);
extern void setclvar(char *);
extern void fldbld(void);
@@ -129,18 +135,22 @@ extern int refldbld(const char *, const char *);
extern void recbld(void);
extern Cell *fieldadr(int);
extern void yyerror(const char *);
-extern void fpecatch(int);
extern void bracecheck(void);
extern void bcheck2(int, int, int);
-extern void SYNTAX(const char *, ...);
-extern void FATAL(const char *, ...) __attribute__((__noreturn__));
-extern void WARNING(const char *, ...);
+extern void SYNTAX(const char *, ...)
+ __attribute__((__format__(__printf__, 1, 2)));
+extern noreturn void FATAL(const char *, ...)
+ __attribute__((__format__(__printf__, 1, 2)));
+extern void WARNING(const char *, ...)
+ __attribute__((__format__(__printf__, 1, 2)));
extern void error(void);
extern void eprint(void);
extern void bclass(int);
extern double errcheck(double, const char *);
extern int isclvar(const char *);
-extern int is_number(const char *);
+extern bool is_valid_number(const char *s, bool trailing_stuff_ok,
+ bool *no_trailing, double *result);
+#define is_number(s, val) is_valid_number(s, false, NULL, val)
extern int adjbuf(char **pb, int *sz, int min, int q, char **pbp, const char *what);
extern void run(Node *);
@@ -185,12 +195,13 @@ extern Cell *bltin(Node **, int);
extern Cell *printstat(Node **, int);
extern Cell *nullproc(Node **, int);
extern FILE *redirect(int, Node *);
-extern FILE *openfile(int, const char *);
+extern FILE *openfile(int, const char *, bool *);
extern const char *filename(FILE *);
extern Cell *closefile(Node **, int);
extern void closeall(void);
extern Cell *sub(Node **, int);
extern Cell *gsub(Node **, int);
+extern Cell *gensub(Node **, int);
extern FILE *popen(const char *, const char *);
extern int pclose(FILE *);
diff --git a/run.c b/run.c
index 2dfb3e6c383d..117fb0748bd8 100644
--- a/run.c
+++ b/run.c
@@ -25,6 +25,10 @@ THIS SOFTWARE.
#define DEBUG
#include <stdio.h>
#include <ctype.h>
+#include <errno.h>
+#include <wchar.h>
+#include <wctype.h>
+#include <fcntl.h>
#include <setjmp.h>
#include <limits.h>
#include <math.h>
@@ -34,13 +38,14 @@ THIS SOFTWARE.
#include <sys/types.h>
#include <sys/wait.h>
#include "awk.h"
-#include "ytab.h"
+#include "awkgram.tab.h"
-#define tempfree(x) if (istemp(x)) tfree(x); else
-
-/*
-#undef tempfree
+static void stdinit(void);
+static void flush_all(void);
+#if 1
+#define tempfree(x) do { if (istemp(x)) tfree(x); } while (/*CONSTCOND*/0)
+#else
void tempfree(Cell *p) {
if (p->ctype == OCELL && (p->csub < CUNK || p->csub > CFREE)) {
WARNING("bad csub %d in Cell %d %s",
@@ -49,7 +54,7 @@ void tempfree(Cell *p) {
if (istemp(p))
tfree(p);
}
-*/
+#endif
/* do we really need these? */
/* #ifdef _NFILE */
@@ -73,23 +78,23 @@ extern Awkfloat srand_seed;
Node *winner = NULL; /* root of parse tree */
Cell *tmps; /* free temporary cells for execution */
-static Cell truecell ={ OBOOL, BTRUE, 0, 0, 1.0, NUM, NULL };
+static Cell truecell ={ OBOOL, BTRUE, 0, 0, 1.0, NUM, NULL, NULL };
Cell *True = &truecell;
-static Cell falsecell ={ OBOOL, BFALSE, 0, 0, 0.0, NUM, NULL };
+static Cell falsecell ={ OBOOL, BFALSE, 0, 0, 0.0, NUM, NULL, NULL };
Cell *False = &falsecell;
-static Cell breakcell ={ OJUMP, JBREAK, 0, 0, 0.0, NUM, NULL };
+static Cell breakcell ={ OJUMP, JBREAK, 0, 0, 0.0, NUM, NULL, NULL };
Cell *jbreak = &breakcell;
-static Cell contcell ={ OJUMP, JCONT, 0, 0, 0.0, NUM, NULL };
+static Cell contcell ={ OJUMP, JCONT, 0, 0, 0.0, NUM, NULL, NULL };
Cell *jcont = &contcell;
-static Cell nextcell ={ OJUMP, JNEXT, 0, 0, 0.0, NUM, NULL };
+static Cell nextcell ={ OJUMP, JNEXT, 0, 0, 0.0, NUM, NULL, NULL };
Cell *jnext = &nextcell;
-static Cell nextfilecell ={ OJUMP, JNEXTFILE, 0, 0, 0.0, NUM, NULL };
+static Cell nextfilecell ={ OJUMP, JNEXTFILE, 0, 0, 0.0, NUM, NULL, NULL };
Cell *jnextfile = &nextfilecell;
-static Cell exitcell ={ OJUMP, JEXIT, 0, 0, 0.0, NUM, NULL };
+static Cell exitcell ={ OJUMP, JEXIT, 0, 0, 0.0, NUM, NULL, NULL };
Cell *jexit = &exitcell;
-static Cell retcell ={ OJUMP, JRET, 0, 0, 0.0, NUM, NULL };
+static Cell retcell ={ OJUMP, JRET, 0, 0, 0.0, NUM, NULL, NULL };
Cell *jret = &retcell;
-static Cell tempcell ={ OCELL, CTEMP, 0, "", 0.0, NUM|STR|DONTFREE, NULL };
+static Cell tempcell ={ OCELL, CTEMP, 0, EMPTY, 0.0, NUM|STR|DONTFREE, NULL, NULL };
Node *curnode = NULL; /* the node being executed, for debugging */
@@ -114,7 +119,7 @@ int adjbuf(char **pbuf, int *psiz, int minlen, int quantum, char **pbptr,
if (rminlen)
minlen += quantum - rminlen;
tbuf = (char *) realloc(*pbuf, minlen);
- dprintf( ("adjbuf %s: %d %d (pbuf=%p, tbuf=%p)\n", whatrtn, *psiz, minlen, (void *) *pbuf, (void *) tbuf) );
+ DPRINTF("adjbuf %s: %d %d (pbuf=%p, tbuf=%p)\n", whatrtn, *psiz, minlen, (void*)*pbuf, (void*)tbuf);
if (tbuf == NULL) {
if (whatrtn)
FATAL("out of memory in %s", whatrtn);
@@ -130,7 +135,6 @@ int adjbuf(char **pbuf, int *psiz, int minlen, int quantum, char **pbptr,
void run(Node *a) /* execution of parse tree starts here */
{
- extern void stdinit(void);
stdinit();
execute(a);
@@ -189,7 +193,7 @@ Cell *program(Node **a, int n) /* execute an awk program */
tempfree(x);
}
if (a[1] || a[2])
- while (getrec(&record, &recsize, 1) > 0) {
+ while (getrec(&record, &recsize, true) > 0) {
x = execute(a[1]);
if (isexit(x))
break;
@@ -219,11 +223,11 @@ struct Frame { /* stack frame for awk function calls */
struct Frame *frame = NULL; /* base of stack frames; dynamically allocated */
int nframe = 0; /* number of frames allocated */
-struct Frame *fp = NULL; /* frame pointer. bottom level unused */
+struct Frame *frp = NULL; /* frame pointer. bottom level unused */
Cell *call(Node **a, int n) /* function call. very kludgy and fragile */
{
- static Cell newcopycell = { OCELL, CCOPY, 0, "", 0.0, NUM|STR|DONTFREE, NULL };
+ static const Cell newcopycell = { OCELL, CCOPY, 0, EMPTY, 0.0, NUM|STR|DONTFREE, NULL, NULL };
int i, ncall, ndef;
int freed = 0; /* handles potential double freeing when fcn & param share a tempcell */
Node *x;
@@ -236,25 +240,25 @@ Cell *call(Node **a, int n) /* function call. very kludgy and fragile */
if (!isfcn(fcn))
FATAL("calling undefined function %s", s);
if (frame == NULL) {
- fp = frame = (struct Frame *) calloc(nframe += 100, sizeof(struct Frame));
+ frp = frame = (struct Frame *) calloc(nframe += 100, sizeof(*frame));
if (frame == NULL)
FATAL("out of space for stack frames calling %s", s);
}
for (ncall = 0, x = a[1]; x != NULL; x = x->nnext) /* args in call */
ncall++;
ndef = (int) fcn->fval; /* args in defn */
- dprintf( ("calling %s, %d args (%d in defn), fp=%d\n", s, ncall, ndef, (int) (fp-frame)) );
+ DPRINTF("calling %s, %d args (%d in defn), frp=%d\n", s, ncall, ndef, (int) (frp-frame));
if (ncall > ndef)
WARNING("function %s called with %d args, uses only %d",
s, ncall, ndef);
if (ncall + ndef > NARGS)
FATAL("function %s has %d arguments, limit %d", s, ncall+ndef, NARGS);
for (i = 0, x = a[1]; x != NULL; i++, x = x->nnext) { /* get call args */
- dprintf( ("evaluate args[%d], fp=%d:\n", i, (int) (fp-frame)) );
+ DPRINTF("evaluate args[%d], frp=%d:\n", i, (int) (frp-frame));
y = execute(x);
oargs[i] = y;
- dprintf( ("args[%d]: %s %f <%s>, t=%o\n",
- i, NN(y->nval), y->fval, isarr(y) ? "(array)" : NN(y->sval), y->tval) );
+ DPRINTF("args[%d]: %s %f <%s>, t=%o\n",
+ i, NN(y->nval), y->fval, isarr(y) ? "(array)" : NN(y->sval), y->tval);
if (isfcn(y))
FATAL("can't use function %s as argument in %s", y->nval, s);
if (isarr(y))
@@ -267,26 +271,25 @@ Cell *call(Node **a, int n) /* function call. very kludgy and fragile */
args[i] = gettemp();
*args[i] = newcopycell;
}
- fp++; /* now ok to up frame */
- if (fp >= frame + nframe) {
- int dfp = fp - frame; /* old index */
- frame = (struct Frame *)
- realloc((char *) frame, (nframe += 100) * sizeof(struct Frame));
+ frp++; /* now ok to up frame */
+ if (frp >= frame + nframe) {
+ int dfp = frp - frame; /* old index */
+ frame = (struct Frame *) realloc(frame, (nframe += 100) * sizeof(*frame));
if (frame == NULL)
FATAL("out of space for stack frames in %s", s);
- fp = frame + dfp;
+ frp = frame + dfp;
}
- fp->fcncell = fcn;
- fp->args = args;
- fp->nargs = ndef; /* number defined with (excess are locals) */
- fp->retval = gettemp();
+ frp->fcncell = fcn;
+ frp->args = args;
+ frp->nargs = ndef; /* number defined with (excess are locals) */
+ frp->retval = gettemp();
- dprintf( ("start exec of %s, fp=%d\n", s, (int) (fp-frame)) );
+ DPRINTF("start exec of %s, frp=%d\n", s, (int) (frp-frame));
y = execute((Node *)(fcn->sval)); /* execute body */
- dprintf( ("finished exec of %s, fp=%d\n", s, (int) (fp-frame)) );
+ DPRINTF("finished exec of %s, frp=%d\n", s, (int) (frp-frame));
for (i = 0; i < ndef; i++) {
- Cell *t = fp->args[i];
+ Cell *t = frp->args[i];
if (isarr(t)) {
if (t->csub == CCOPY) {
if (i >= ncall) {
@@ -315,9 +318,9 @@ Cell *call(Node **a, int n) /* function call. very kludgy and fragile */
if (freed == 0) {
tempfree(y); /* don't free twice! */
}
- z = fp->retval; /* return value */
- dprintf( ("%s returns %g |%s| %o\n", s, getfval(z), getsval(z), z->tval) );
- fp--;
+ z = frp->retval; /* return value */
+ DPRINTF("%s returns %g |%s| %o\n", s, getfval(z), getsval(z), z->tval);
+ frp--;
return(z);
}
@@ -344,11 +347,11 @@ Cell *arg(Node **a, int n) /* nth argument of a function */
{
n = ptoi(a[0]); /* argument number, counting from 0 */
- dprintf( ("arg(%d), fp->nargs=%d\n", n, fp->nargs) );
- if (n+1 > fp->nargs)
+ DPRINTF("arg(%d), frp->nargs=%d\n", n, frp->nargs);
+ if (n+1 > frp->nargs)
FATAL("argument #%d of function %s was not supplied",
- n+1, fp->fcncell->nval);
- return fp->args[n];
+ n+1, frp->fcncell->nval);
+ return frp->args[n];
}
Cell *jump(Node **a, int n) /* break, continue, next, nextfile, return */
@@ -367,14 +370,14 @@ Cell *jump(Node **a, int n) /* break, continue, next, nextfile, return */
if (a[0] != NULL) {
y = execute(a[0]);
if ((y->tval & (STR|NUM)) == (STR|NUM)) {
- setsval(fp->retval, getsval(y));
- fp->retval->fval = getfval(y);
- fp->retval->tval |= NUM;
+ setsval(frp->retval, getsval(y));
+ frp->retval->fval = getfval(y);
+ frp->retval->tval |= NUM;
}
else if (y->tval & STR)
- setsval(fp->retval, getsval(y));
+ setsval(frp->retval, getsval(y));
else if (y->tval & NUM)
- setfval(fp->retval, getfval(y));
+ setfval(frp->retval, getfval(y));
else /* can't happen */
FATAL("bad type variable %d", y->tval);
tempfree(y);
@@ -403,6 +406,8 @@ Cell *awkgetline(Node **a, int n) /* get next line from specific input */
char *buf;
int bufsize = recsize;
int mode;
+ bool newflag;
+ double result;
if ((buf = (char *) malloc(bufsize)) == NULL)
FATAL("out of memory in getline");
@@ -414,38 +419,38 @@ Cell *awkgetline(Node **a, int n) /* get next line from specific input */
mode = ptoi(a[1]);
if (mode == '|') /* input pipe */
mode = LE; /* arbitrary flag */
- fp = openfile(mode, getsval(x));
+ fp = openfile(mode, getsval(x), &newflag);
tempfree(x);
if (fp == NULL)
n = -1;
else
- n = readrec(&buf, &bufsize, fp);
+ n = readrec(&buf, &bufsize, fp, newflag);
if (n <= 0) {
;
} else if (a[0] != NULL) { /* getline var <file */
x = execute(a[0]);
setsval(x, buf);
- if (is_number(x->sval)) {
- x->fval = atof(x->sval);
+ if (is_number(x->sval, & result)) {
+ x->fval = result;
x->tval |= NUM;
}
tempfree(x);
} else { /* getline <file */
setsval(fldtab[0], buf);
- if (is_number(fldtab[0]->sval)) {
- fldtab[0]->fval = atof(fldtab[0]->sval);
+ if (is_number(fldtab[0]->sval, & result)) {
+ fldtab[0]->fval = result;
fldtab[0]->tval |= NUM;
}
}
} else { /* bare getline; use current input */
if (a[0] == NULL) /* getline */
- n = getrec(&record, &recsize, 1);
+ n = getrec(&record, &recsize, true);
else { /* getline var */
- n = getrec(&buf, &bufsize, 0);
+ n = getrec(&buf, &bufsize, false);
x = execute(a[0]);
setsval(x, buf);
- if (is_number(x->sval)) {
- x->fval = atof(x->sval);
+ if (is_number(x->sval, & result)) {
+ x->fval = result;
x->tval |= NUM;
}
tempfree(x);
@@ -458,38 +463,57 @@ Cell *awkgetline(Node **a, int n) /* get next line from specific input */
Cell *getnf(Node **a, int n) /* get NF */
{
- if (donefld == 0)
+ if (!donefld)
fldbld();
return (Cell *) a[0];
}
-Cell *array(Node **a, int n) /* a[0] is symtab, a[1] is list of subscripts */
+static char *
+makearraystring(Node *p, const char *func)
{
- Cell *x, *y, *z;
- char *s;
- Node *np;
char *buf;
int bufsz = recsize;
- int nsub;
+ size_t blen;
- if ((buf = (char *) malloc(bufsz)) == NULL)
- FATAL("out of memory in array");
+ if ((buf = (char *) malloc(bufsz)) == NULL) {
+ FATAL("%s: out of memory", func);
+ }
- x = execute(a[0]); /* Cell* for symbol table */
- buf[0] = 0;
- for (np = a[1]; np; np = np->nnext) {
- y = execute(np); /* subscript */
- s = getsval(y);
- nsub = strlen(getsval(subseploc));
- if (!adjbuf(&buf, &bufsz, strlen(buf)+strlen(s)+nsub+1, recsize, 0, "array"))
- FATAL("out of memory for %s[%s...]", x->nval, buf);
- strcat(buf, s);
- if (np->nnext)
- strcat(buf, *SUBSEP);
- tempfree(y);
+ blen = 0;
+ buf[blen] = '\0';
+
+ for (; p; p = p->nnext) {
+ Cell *x = execute(p); /* expr */
+ char *s = getsval(x);
+ size_t seplen = strlen(getsval(subseploc));
+ size_t nsub = p->nnext ? seplen : 0;
+ size_t slen = strlen(s);
+ size_t tlen = blen + slen + nsub;
+
+ if (!adjbuf(&buf, &bufsz, tlen + 1, recsize, 0, func)) {
+ FATAL("%s: out of memory %s[%s...]",
+ func, x->nval, buf);
+ }
+ memcpy(buf + blen, s, slen);
+ if (nsub) {
+ memcpy(buf + blen + slen, *SUBSEP, nsub);
+ }
+ buf[tlen] = '\0';
+ blen = tlen;
+ tempfree(x);
}
+ return buf;
+}
+
+Cell *array(Node **a, int n) /* a[0] is symtab, a[1] is list of subscripts */
+{
+ Cell *x, *z;
+ char *buf;
+
+ x = execute(a[0]); /* Cell* for symbol table */
+ buf = makearraystring(a[1], __func__);
if (!isarr(x)) {
- dprintf( ("making %s into an array\n", NN(x->nval)) );
+ DPRINTF("making %s into an array\n", NN(x->nval));
if (freeable(x))
xfree(x->sval);
x->tval &= ~(STR|NUM|DONTFREE);
@@ -506,36 +530,21 @@ Cell *array(Node **a, int n) /* a[0] is symtab, a[1] is list of subscripts */
Cell *awkdelete(Node **a, int n) /* a[0] is symtab, a[1] is list of subscripts */
{
- Cell *x, *y;
- Node *np;
- char *s;
- int nsub;
+ Cell *x;
x = execute(a[0]); /* Cell* for symbol table */
+ if (x == symtabloc) {
+ FATAL("cannot delete SYMTAB or its elements");
+ }
if (!isarr(x))
return True;
- if (a[1] == 0) { /* delete the elements, not the table */
+ if (a[1] == NULL) { /* delete the elements, not the table */
freesymtab(x);
x->tval &= ~STR;
x->tval |= ARR;
x->sval = (char *) makesymtab(NSYMTAB);
} else {
- int bufsz = recsize;
- char *buf;
- if ((buf = (char *) malloc(bufsz)) == NULL)
- FATAL("out of memory in adelete");
- buf[0] = 0;
- for (np = a[1]; np; np = np->nnext) {
- y = execute(np); /* subscript */
- s = getsval(y);
- nsub = strlen(getsval(subseploc));
- if (!adjbuf(&buf, &bufsz, strlen(buf)+strlen(s)+nsub+1, recsize, 0, "awkdelete"))
- FATAL("out of memory deleting %s[%s...]", x->nval, buf);
- strcat(buf, s);
- if (np->nnext)
- strcat(buf, *SUBSEP);
- tempfree(y);
- }
+ char *buf = makearraystring(a[1], __func__);
freeelem(x, buf);
free(buf);
}
@@ -545,37 +554,19 @@ Cell *awkdelete(Node **a, int n) /* a[0] is symtab, a[1] is list of subscripts *
Cell *intest(Node **a, int n) /* a[0] is index (list), a[1] is symtab */
{
- Cell *x, *ap, *k;
- Node *p;
+ Cell *ap, *k;
char *buf;
- char *s;
- int bufsz = recsize;
- int nsub;
ap = execute(a[1]); /* array name */
if (!isarr(ap)) {
- dprintf( ("making %s into an array\n", ap->nval) );
+ DPRINTF("making %s into an array\n", ap->nval);
if (freeable(ap))
xfree(ap->sval);
ap->tval &= ~(STR|NUM|DONTFREE);
ap->tval |= ARR;
ap->sval = (char *) makesymtab(NSYMTAB);
}
- if ((buf = (char *) malloc(bufsz)) == NULL) {
- FATAL("out of memory in intest");
- }
- buf[0] = 0;
- for (p = a[0]; p; p = p->nnext) {
- x = execute(p); /* expr */
- s = getsval(x);
- nsub = strlen(getsval(subseploc));
- if (!adjbuf(&buf, &bufsz, strlen(buf)+strlen(s)+nsub+1, recsize, 0, "intest"))
- FATAL("out of memory deleting %s[%s...]", x->nval, buf);
- strcat(buf, s);
- tempfree(x);
- if (p->nnext)
- strcat(buf, *SUBSEP);
- }
+ buf = makearraystring(a[0], __func__);
k = lookup(buf, (Array *) ap->sval);
tempfree(ap);
free(buf);
@@ -600,7 +591,7 @@ Cell *matchop(Node **a, int n) /* ~ and match() */
}
x = execute(a[1]); /* a[1] = target text */
s = getsval(x);
- if (a[0] == 0) /* a[1] == 0: already-compiled reg expr */
+ if (a[0] == NULL) /* a[1] == 0: already-compiled reg expr */
i = (*mf)((fa *) a[2], s);
else {
y = execute(a[2]); /* a[2] = regular expr */
@@ -697,7 +688,7 @@ Cell *relop(Node **a, int n) /* a[0 < a[1], etc. */
void tfree(Cell *a) /* free a tempcell */
{
if (freeable(a)) {
- dprintf( ("freeing %s %s %o\n", NN(a->nval), NN(a->sval), a->tval) );
+ DPRINTF("freeing %s %s %o\n", NN(a->nval), NN(a->sval), a->tval);
xfree(a->sval);
}
if (a == tmps)
@@ -711,12 +702,12 @@ Cell *gettemp(void) /* get a tempcell */
Cell *x;
if (!tmps) {
- tmps = (Cell *) calloc(100, sizeof(Cell));
+ tmps = (Cell *) calloc(100, sizeof(*tmps));
if (!tmps)
FATAL("out of space for temporaries");
- for(i = 1; i < 100; i++)
+ for (i = 1; i < 100; i++)
tmps[i-1].cnext = &tmps[i];
- tmps[i-1].cnext = 0;
+ tmps[i-1].cnext = NULL;
}
x = tmps;
tmps = x->cnext;
@@ -736,7 +727,7 @@ Cell *indirect(Node **a, int n) /* $( a[0] ) */
if ((Awkfloat)INT_MAX < val)
FATAL("trying to access out of range field %s", x->nval);
m = (int) val;
- if (m == 0 && !is_number(s = getsval(x))) /* suspicion! */
+ if (m == 0 && !is_number(s = getsval(x), NULL)) /* suspicion! */
FATAL("illegal field $(%s), name \"%s\"", s, x->nval);
/* BUG: can x->nval ever be null??? */
tempfree(x);
@@ -751,18 +742,18 @@ Cell *substr(Node **a, int nnn) /* substr(a[0], a[1], a[2]) */
int k, m, n;
char *s;
int temp;
- Cell *x, *y, *z = 0;
+ Cell *x, *y, *z = NULL;
x = execute(a[0]);
y = execute(a[1]);
- if (a[2] != 0)
+ if (a[2] != NULL)
z = execute(a[2]);
s = getsval(x);
k = strlen(s) + 1;
if (k <= 1) {
tempfree(x);
tempfree(y);
- if (a[2] != 0) {
+ if (a[2] != NULL) {
tempfree(z);
}
x = gettemp();
@@ -775,7 +766,7 @@ Cell *substr(Node **a, int nnn) /* substr(a[0], a[1], a[2]) */
else if (m > k)
m = k;
tempfree(y);
- if (a[2] != 0) {
+ if (a[2] != NULL) {
n = (int) getfval(z);
tempfree(z);
} else
@@ -784,7 +775,7 @@ Cell *substr(Node **a, int nnn) /* substr(a[0], a[1], a[2]) */
n = 0;
else if (n > k - m)
n = k - m;
- dprintf( ("substr: m=%d, n=%d, s=%s\n", m, n, s) );
+ DPRINTF("substr: m=%d, n=%d, s=%s\n", m, n, s);
y = gettemp();
temp = s[n+m-1]; /* with thanks to John Linderman */
s[n+m-1] = '\0';
@@ -807,8 +798,8 @@ Cell *sindex(Node **a, int nnn) /* index(a[0], a[1]) */
z = gettemp();
for (p1 = s1; *p1 != '\0'; p1++) {
- for (q=p1, p2=s2; *p2 != '\0' && *q == *p2; q++, p2++)
- ;
+ for (q = p1, p2 = s2; *p2 != '\0' && *q == *p2; q++, p2++)
+ continue;
if (*p2 == '\0') {
v = (Awkfloat) (p1 - s1 + 1); /* origin 1 */
break;
@@ -833,16 +824,18 @@ int format(char **pbuf, int *pbufsize, const char *s, Node *a) /* printf-like co
int fmtsz = recsize;
char *buf = *pbuf;
int bufsize = *pbufsize;
+#define FMTSZ(a) (fmtsz - ((a) - fmt))
+#define BUFSZ(a) (bufsize - ((a) - buf))
- static int first = 1;
- static int have_a_format = 0;
+ static bool first = true;
+ static bool have_a_format = false;
if (first) {
- char buf[100];
+ char xbuf[100];
- sprintf(buf, "%a", 42.0);
- have_a_format = (strcmp(buf, "0x1.5p+5") == 0);
- first = 0;
+ snprintf(xbuf, sizeof(xbuf), "%a", 42.0);
+ have_a_format = (strcmp(xbuf, "0x1.5p+5") == 0);
+ first = false;
}
os = s;
@@ -868,8 +861,13 @@ int format(char **pbuf, int *pbufsize, const char *s, Node *a) /* printf-like co
for (t = fmt; (*t++ = *s) != '\0'; s++) {
if (!adjbuf(&fmt, &fmtsz, MAXNUMSIZE+1+t-fmt, recsize, &t, "format3"))
FATAL("format item %.30s... ran format() out of memory", os);
- if (isalpha((uschar)*s) && *s != 'l' && *s != 'h' && *s != 'L')
- break; /* the ansi panoply */
+ /* Ignore size specifiers */
+ if (strchr("hjLlqtz", *s) != NULL) { /* the ansi panoply */
+ t--;
+ continue;
+ }
+ if (isalpha((uschar)*s))
+ break;
if (*s == '$') {
FATAL("'$' not permitted in awk formats");
}
@@ -879,7 +877,8 @@ int format(char **pbuf, int *pbufsize, const char *s, Node *a) /* printf-like co
}
x = execute(a);
a = a->nnext;
- sprintf(t-1, "%d", fmtwd=(int) getfval(x));
+ snprintf(t - 1, FMTSZ(t - 1),
+ "%d", fmtwd=(int) getfval(x));
if (fmtwd < 0)
fmtwd = -fmtwd;
adjbuf(&buf, &bufsize, fmtwd+1+p-buf, recsize, &p, "format");
@@ -901,16 +900,12 @@ int format(char **pbuf, int *pbufsize, const char *s, Node *a) /* printf-like co
case 'f': case 'e': case 'g': case 'E': case 'G':
flag = 'f';
break;
- case 'd': case 'i':
- flag = 'd';
- if(*(s-1) == 'l') break;
- *(t-1) = 'l';
- *t = 'd';
+ case 'd': case 'i': case 'o': case 'x': case 'X': case 'u':
+ flag = (*s == 'd' || *s == 'i') ? 'd' : 'u';
+ *(t-1) = 'j';
+ *t = *s;
*++t = '\0';
break;
- case 'o': case 'x': case 'X': case 'u':
- flag = *(s-1) == 'l' ? 'd' : 'u';
- break;
case 's':
flag = 's';
break;
@@ -931,20 +926,20 @@ int format(char **pbuf, int *pbufsize, const char *s, Node *a) /* printf-like co
n = fmtwd;
adjbuf(&buf, &bufsize, 1+n+p-buf, recsize, &p, "format5");
switch (flag) {
- case '?': sprintf(p, "%s", fmt); /* unknown, so dump it too */
+ case '?': snprintf(p, BUFSZ(p), "%s", fmt); /* unknown, so dump it too */
t = getsval(x);
n = strlen(t);
if (fmtwd > n)
n = fmtwd;
adjbuf(&buf, &bufsize, 1+strlen(p)+n+p-buf, recsize, &p, "format6");
p += strlen(p);
- sprintf(p, "%s", t);
+ snprintf(p, BUFSZ(p), "%s", t);
break;
case 'a':
case 'A':
- case 'f': sprintf(p, fmt, getfval(x)); break;
- case 'd': sprintf(p, fmt, (long) getfval(x)); break;
- case 'u': sprintf(p, fmt, (int) getfval(x)); break;
+ case 'f': snprintf(p, BUFSZ(p), fmt, getfval(x)); break;
+ case 'd': snprintf(p, BUFSZ(p), fmt, (intmax_t) getfval(x)); break;
+ case 'u': snprintf(p, BUFSZ(p), fmt, (uintmax_t) getfval(x)); break;
case 's':
t = getsval(x);
n = strlen(t);
@@ -952,18 +947,18 @@ int format(char **pbuf, int *pbufsize, const char *s, Node *a) /* printf-like co
n = fmtwd;
if (!adjbuf(&buf, &bufsize, 1+n+p-buf, recsize, &p, "format7"))
FATAL("huge string/format (%d chars) in printf %.30s... ran format() out of memory", n, t);
- sprintf(p, fmt, t);
+ snprintf(p, BUFSZ(p), fmt, t);
break;
case 'c':
if (isnum(x)) {
- if (getfval(x))
- sprintf(p, fmt, (int) getfval(x));
+ if ((int)getfval(x))
+ snprintf(p, BUFSZ(p), fmt, (int) getfval(x));
else {
*p++ = '\0'; /* explicit null byte */
*p = '\0'; /* next output will start here */
}
} else
- sprintf(p, fmt, getsval(x)[0]);
+ snprintf(p, BUFSZ(p), fmt, getsval(x)[0]);
break;
default:
FATAL("can't happen: bad conversion %c in format()", flag);
@@ -1074,13 +1069,15 @@ Cell *arith(Node **a, int n) /* a[0] + a[1], etc. also -a[0] */
case UMINUS:
i = -i;
break;
- case UPLUS: /* handled by getfval(), above */
+ case UPLUS: /* handled by getfval(), above */
break;
case POWER:
if (j >= 0 && modf(j, &v) == 0.0) /* pos integer exponent */
i = ipow(i, (int) j);
- else
+ else {
+ errno = 0;
i = errcheck(pow(i, j), "pow");
+ }
break;
default: /* can't happen */
FATAL("illegal arithmetic operator %d", n);
@@ -1173,8 +1170,10 @@ Cell *assign(Node **a, int n) /* a[0] = a[1], a[0] += a[1], etc. */
case POWEQ:
if (yf >= 0 && modf(yf, &v) == 0.0) /* pos integer exponent */
xf = ipow(xf, (int) yf);
- else
+ else {
+ errno = 0;
xf = errcheck(pow(xf, yf), "pow");
+ }
break;
default:
FATAL("illegal assignment operator %d", n);
@@ -1194,13 +1193,14 @@ Cell *cat(Node **a, int q) /* a[0] cat a[1] */
x = execute(a[0]);
n1 = strlen(getsval(x));
- adjbuf(&s, &ssz, n1 + 1, recsize, 0, "cat1");
- (void) strncpy(s, x->sval, ssz);
+ adjbuf(&s, &ssz, n1, recsize, 0, "cat1");
+ memcpy(s, x->sval, n1);
y = execute(a[1]);
n2 = strlen(getsval(y));
adjbuf(&s, &ssz, n1 + n2 + 1, recsize, 0, "cat2");
- (void) strncpy(s + n1, y->sval, ssz - n1);
+ memcpy(s + n1, y->sval, n2);
+ s[n1 + n2] = '\0';
tempfree(x);
tempfree(y);
@@ -1216,7 +1216,7 @@ Cell *pastat(Node **a, int n) /* a[0] { a[1] } */
{
Cell *x;
- if (a[0] == 0)
+ if (a[0] == NULL)
x = execute(a[1]);
else {
x = execute(a[0]);
@@ -1253,21 +1253,23 @@ Cell *dopa2(Node **a, int n) /* a[0], a[1] { a[2] } */
Cell *split(Node **a, int nnn) /* split(a[0], a[1], a[2]); a[3] is type */
{
- Cell *x = 0, *y, *ap;
- char *s, *origs;
- char *fs, *origfs = NULL;
+ Cell *x = NULL, *y, *ap;
+ const char *s, *origs, *t;
+ const char *fs = NULL;
+ char *origfs = NULL;
int sep;
- char *t, temp, num[50];
+ char temp, num[50];
int n, tempstat, arg3type;
+ double result;
y = execute(a[0]); /* source string */
origs = s = strdup(getsval(y));
arg3type = ptoi(a[3]);
- if (a[2] == 0) /* fs string */
+ if (a[2] == NULL) /* fs string */
fs = getsval(fsloc);
else if (arg3type == STRING) { /* split(str,arr,"string") */
x = execute(a[2]);
- origfs = fs = strdup(getsval(x));
+ fs = origfs = strdup(getsval(x));
tempfree(x);
} else if (arg3type == REGEXPR)
fs = "(regexpr)"; /* split(str,arr,/regexpr/) */
@@ -1276,7 +1278,7 @@ Cell *split(Node **a, int nnn) /* split(a[0], a[1], a[2]); a[3] is type */
sep = *fs;
ap = execute(a[1]); /* array name */
freesymtab(ap);
- dprintf( ("split: s=|%s|, a=%s, sep=|%s|\n", s, NN(ap->nval), fs) );
+ DPRINTF("split: s=|%s|, a=%s, sep=|%s|\n", s, NN(ap->nval), fs);
ap->tval &= ~STR;
ap->tval |= ARR;
ap->sval = (char *) makesymtab(NSYMTAB);
@@ -1300,18 +1302,18 @@ Cell *split(Node **a, int nnn) /* split(a[0], a[1], a[2]); a[3] is type */
pfa->initstat = 2;
do {
n++;
- sprintf(num, "%d", n);
+ snprintf(num, sizeof(num), "%d", n);
temp = *patbeg;
- *patbeg = '\0';
- if (is_number(s))
- setsymtab(num, s, atof(s), STR|NUM, (Array *) ap->sval);
+ setptr(patbeg, '\0');
+ if (is_number(s, & result))
+ setsymtab(num, s, result, STR|NUM, (Array *) ap->sval);
else
setsymtab(num, s, 0.0, STR, (Array *) ap->sval);
- *patbeg = temp;
+ setptr(patbeg, temp);
s = patbeg + patlen;
- if (*(patbeg+patlen-1) == 0 || *s == 0) {
+ if (*(patbeg+patlen-1) == '\0' || *s == '\0') {
n++;
- sprintf(num, "%d", n);
+ snprintf(num, sizeof(num), "%d", n);
setsymtab(num, "", 0.0, STR, (Array *) ap->sval);
pfa->initstat = tempstat;
goto spdone;
@@ -1321,69 +1323,70 @@ Cell *split(Node **a, int nnn) /* split(a[0], a[1], a[2]); a[3] is type */
/* cf gsub and refldbld */
}
n++;
- sprintf(num, "%d", n);
- if (is_number(s))
- setsymtab(num, s, atof(s), STR|NUM, (Array *) ap->sval);
+ snprintf(num, sizeof(num), "%d", n);
+ if (is_number(s, & result))
+ setsymtab(num, s, result, STR|NUM, (Array *) ap->sval);
else
setsymtab(num, s, 0.0, STR, (Array *) ap->sval);
spdone:
pfa = NULL;
} else if (sep == ' ') {
for (n = 0; ; ) {
- while (*s == ' ' || *s == '\t' || *s == '\n')
+#define ISWS(c) ((c) == ' ' || (c) == '\t' || (c) == '\n')
+ while (ISWS(*s))
s++;
- if (*s == 0)
+ if (*s == '\0')
break;
n++;
t = s;
do
s++;
- while (*s!=' ' && *s!='\t' && *s!='\n' && *s!='\0');
+ while (*s != '\0' && !ISWS(*s));
temp = *s;
- *s = '\0';
- sprintf(num, "%d", n);
- if (is_number(t))
- setsymtab(num, t, atof(t), STR|NUM, (Array *) ap->sval);
+ setptr(s, '\0');
+ snprintf(num, sizeof(num), "%d", n);
+ if (is_number(t, & result))
+ setsymtab(num, t, result, STR|NUM, (Array *) ap->sval);
else
setsymtab(num, t, 0.0, STR, (Array *) ap->sval);
- *s = temp;
- if (*s != 0)
+ setptr(s, temp);
+ if (*s != '\0')
s++;
}
} else if (sep == 0) { /* new: split(s, a, "") => 1 char/elem */
- for (n = 0; *s != 0; s++) {
+ for (n = 0; *s != '\0'; s++) {
char buf[2];
n++;
- sprintf(num, "%d", n);
+ snprintf(num, sizeof(num), "%d", n);
buf[0] = *s;
- buf[1] = 0;
+ buf[1] = '\0';
if (isdigit((uschar)buf[0]))
setsymtab(num, buf, atof(buf), STR|NUM, (Array *) ap->sval);
else
setsymtab(num, buf, 0.0, STR, (Array *) ap->sval);
}
- } else if (*s != 0) {
+ } else if (*s != '\0') {
for (;;) {
n++;
t = s;
while (*s != sep && *s != '\n' && *s != '\0')
s++;
temp = *s;
- *s = '\0';
- sprintf(num, "%d", n);
- if (is_number(t))
- setsymtab(num, t, atof(t), STR|NUM, (Array *) ap->sval);
+ setptr(s, '\0');
+ snprintf(num, sizeof(num), "%d", n);
+ if (is_number(t, & result))
+ setsymtab(num, t, result, STR|NUM, (Array *) ap->sval);
else
setsymtab(num, t, 0.0, STR, (Array *) ap->sval);
- *s = temp;
- if (*s++ == 0)
+ setptr(s, temp);
+ if (*s++ == '\0')
break;
}
}
tempfree(ap);
tempfree(y);
- free(origs);
- free(origfs);
+ xfree(origs);
+ xfree(origfs);
x = gettemp();
x->tval = NUM;
x->fval = n;
@@ -1413,7 +1416,7 @@ Cell *ifstat(Node **a, int n) /* if (a[0]) a[1]; else a[2] */
if (istrue(x)) {
tempfree(x);
x = execute(a[1]);
- } else if (a[2] != 0) {
+ } else if (a[2] != NULL) {
tempfree(x);
x = execute(a[2]);
}
@@ -1465,7 +1468,7 @@ Cell *forstat(Node **a, int n) /* for (a[0]; a[1]; a[2]) a[3] */
x = execute(a[0]);
tempfree(x);
for (;;) {
- if (a[1]!=0) {
+ if (a[1]!=NULL) {
x = execute(a[1]);
if (!istrue(x)) return(x);
else tempfree(x);
@@ -1513,17 +1516,100 @@ Cell *instat(Node **a, int n) /* for (a[0] in a[1]) a[2] */
return True;
}
+static char *nawk_convert(const char *s, int (*fun_c)(int),
+ wint_t (*fun_wc)(wint_t))
+{
+ char *buf = NULL;
+ char *pbuf = NULL;
+ const char *ps = NULL;
+ size_t n = 0;
+ wchar_t wc;
+ size_t sz = MB_CUR_MAX;
+
+ if (sz == 1) {
+ buf = tostring(s);
+
+ for (pbuf = buf; *pbuf; pbuf++)
+ *pbuf = fun_c((uschar)*pbuf);
+
+ return buf;
+ } else {
+ /* upper/lower character may be shorter/longer */
+ buf = tostringN(s, strlen(s) * sz + 1);
+
+ (void) mbtowc(NULL, NULL, 0); /* reset internal state */
+ /*
+ * Reset internal state here too.
+ * Assign result to avoid a compiler warning. (Casting to void
+ * doesn't work.)
+ * Increment said variable to avoid a different warning.
+ */
+ int unused = wctomb(NULL, L'\0');
+ unused++;
+
+ ps = s;
+ pbuf = buf;
+ while (n = mbtowc(&wc, ps, sz),
+ n > 0 && n != (size_t)-1 && n != (size_t)-2)
+ {
+ ps += n;
+
+ n = wctomb(pbuf, fun_wc(wc));
+ if (n == (size_t)-1)
+ FATAL("illegal wide character %s", s);
+
+ pbuf += n;
+ }
+
+ *pbuf = '\0';
+
+ if (n)
+ FATAL("illegal byte sequence %s", s);
+
+ return buf;
+ }
+}
+
+#ifdef __DJGPP__
+static wint_t towupper(wint_t wc)
+{
+ if (wc >= 0 && wc < 256)
+ return toupper(wc & 0xFF);
+
+ return wc;
+}
+
+static wint_t towlower(wint_t wc)
+{
+ if (wc >= 0 && wc < 256)
+ return tolower(wc & 0xFF);
+
+ return wc;
+}
+#endif
+
+static char *nawk_toupper(const char *s)
+{
+ return nawk_convert(s, toupper, towupper);
+}
+
+static char *nawk_tolower(const char *s)
+{
+ return nawk_convert(s, tolower, towlower);
+}
+
Cell *bltin(Node **a, int n) /* builtin functions. a[0] is type, a[1] is arg list */
{
Cell *x, *y;
Awkfloat u;
- int t;
+ int t, sz;
Awkfloat tmp;
- char *p, *buf;
+ char *buf, *fmt;
Node *nextarg;
FILE *fp;
- void flush_all(void);
int status = 0;
+ time_t tv;
+ struct tm *tm;
t = ptoi(a[0]);
x = execute(a[1]);
@@ -1536,19 +1622,25 @@ Cell *bltin(Node **a, int n) /* builtin functions. a[0] is type, a[1] is arg lis
u = strlen(getsval(x));
break;
case FLOG:
- u = errcheck(log(getfval(x)), "log"); break;
+ errno = 0;
+ u = errcheck(log(getfval(x)), "log");
+ break;
case FINT:
modf(getfval(x), &u); break;
case FEXP:
- u = errcheck(exp(getfval(x)), "exp"); break;
+ errno = 0;
+ u = errcheck(exp(getfval(x)), "exp");
+ break;
case FSQRT:
- u = errcheck(sqrt(getfval(x)), "sqrt"); break;
+ errno = 0;
+ u = errcheck(sqrt(getfval(x)), "sqrt");
+ break;
case FSIN:
u = sin(getfval(x)); break;
case FCOS:
u = cos(getfval(x)); break;
case FATAN:
- if (nextarg == 0) {
+ if (nextarg == NULL) {
WARNING("atan2 requires two arguments; returning 1.0");
u = 1.0;
} else {
@@ -1558,6 +1650,64 @@ Cell *bltin(Node **a, int n) /* builtin functions. a[0] is type, a[1] is arg lis
nextarg = nextarg->nnext;
}
break;
+ case FCOMPL:
+ u = ~((int)getfval(x));
+ break;
+ case FAND:
+ if (nextarg == 0) {
+ WARNING("and requires two arguments; returning 0");
+ u = 0;
+ break;
+ }
+ y = execute(a[1]->nnext);
+ u = ((int)getfval(x)) & ((int)getfval(y));
+ tempfree(y);
+ nextarg = nextarg->nnext;
+ break;
+ case FFOR:
+ if (nextarg == 0) {
+ WARNING("or requires two arguments; returning 0");
+ u = 0;
+ break;
+ }
+ y = execute(a[1]->nnext);
+ u = ((int)getfval(x)) | ((int)getfval(y));
+ tempfree(y);
+ nextarg = nextarg->nnext;
+ break;
+ case FXOR:
+ if (nextarg == 0) {
+ WARNING("xor requires two arguments; returning 0");
+ u = 0;
+ break;
+ }
+ y = execute(a[1]->nnext);
+ u = ((int)getfval(x)) ^ ((int)getfval(y));
+ tempfree(y);
+ nextarg = nextarg->nnext;
+ break;
+ case FLSHIFT:
+ if (nextarg == 0) {
+ WARNING("lshift requires two arguments; returning 0");
+ u = 0;
+ break;
+ }
+ y = execute(a[1]->nnext);
+ u = ((int)getfval(x)) << ((int)getfval(y));
+ tempfree(y);
+ nextarg = nextarg->nnext;
+ break;
+ case FRSHIFT:
+ if (nextarg == 0) {
+ WARNING("rshift requires two arguments; returning 0");
+ u = 0;
+ break;
+ }
+ y = execute(a[1]->nnext);
+ u = ((int)getfval(x)) >> ((int)getfval(y));
+ tempfree(y);
+ nextarg = nextarg->nnext;
+ break;
case FSYSTEM:
fflush(stdout); /* in case something is buffered already */
status = system(getsval(x));
@@ -1576,8 +1726,10 @@ Cell *bltin(Node **a, int n) /* builtin functions. a[0] is type, a[1] is arg lis
}
break;
case FRAND:
- /* in principle, rand() returns something in 0..RAND_MAX */
- u = (Awkfloat) (rand() % RAND_MAX) / RAND_MAX;
+ /* random() returns numbers in [0..2^31-1]
+ * in order to get a number in [0, 1), divide it by 2^31
+ */
+ u = (Awkfloat) random() / (0x7fffffffL + 0x1UL);
break;
case FSRAND:
if (isrec(x)) /* no argument provided */
@@ -1585,22 +1737,16 @@ Cell *bltin(Node **a, int n) /* builtin functions. a[0] is type, a[1] is arg lis
else
u = getfval(x);
tmp = u;
- srand((unsigned int) u);
+ srandom((unsigned long) u);
u = srand_seed;
srand_seed = tmp;
break;
case FTOUPPER:
case FTOLOWER:
- buf = tostring(getsval(x));
- if (t == FTOUPPER) {
- for (p = buf; *p; p++)
- if (islower((uschar) *p))
- *p = toupper((uschar)*p);
- } else {
- for (p = buf; *p; p++)
- if (isupper((uschar) *p))
- *p = tolower((uschar)*p);
- }
+ if (t == FTOUPPER)
+ buf = nawk_toupper(getsval(x));
+ else
+ buf = nawk_tolower(getsval(x));
tempfree(x);
x = gettemp();
setsval(x, buf);
@@ -1610,11 +1756,46 @@ Cell *bltin(Node **a, int n) /* builtin functions. a[0] is type, a[1] is arg lis
if (isrec(x) || strlen(getsval(x)) == 0) {
flush_all(); /* fflush() or fflush("") -> all */
u = 0;
- } else if ((fp = openfile(FFLUSH, getsval(x))) == NULL)
+ } else if ((fp = openfile(FFLUSH, getsval(x), NULL)) == NULL)
u = EOF;
else
u = fflush(fp);
break;
+ case FSYSTIME:
+ u = time((time_t *) 0);
+ break;
+ case FSTRFTIME:
+ /* strftime([format [,timestamp]]) */
+ if (nextarg) {
+ y = execute(nextarg);
+ nextarg = nextarg->nnext;
+ tv = (time_t) getfval(y);
+ tempfree(y);
+ } else
+ tv = time((time_t *) 0);
+ tm = localtime(&tv);
+ if (tm == NULL)
+ FATAL("bad time %ld", (long)tv);
+
+ if (isrec(x)) {
+ /* format argument not provided, use default */
+ fmt = tostring("%a %b %d %H:%M:%S %Z %Y");
+ } else
+ fmt = tostring(getsval(x));
+
+ sz = 32;
+ buf = NULL;
+ do {
+ if ((buf = realloc(buf, (sz *= 2))) == NULL)
+ FATAL("out of memory in strftime");
+ } while (strftime(buf, sz, fmt, tm) == 0 && fmt[0] != '\0');
+
+ y = gettemp();
+ setsval(y, buf);
+ free(fmt);
+ free(buf);
+
+ return y;
default: /* can't happen */
FATAL("illegal function type %d", t);
break;
@@ -1622,7 +1803,7 @@ Cell *bltin(Node **a, int n) /* builtin functions. a[0] is type, a[1] is arg lis
tempfree(x);
x = gettemp();
setfval(x, u);
- if (nextarg != 0) {
+ if (nextarg != NULL) {
WARNING("warning: function has too many arguments");
for ( ; nextarg; nextarg = nextarg->nnext)
execute(nextarg);
@@ -1636,7 +1817,7 @@ Cell *printstat(Node **a, int n) /* print a[0] */
Cell *y;
FILE *fp;
- if (a[1] == 0) /* a[1] is redirection operator, a[2] is file */
+ if (a[1] == NULL) /* a[1] is redirection operator, a[2] is file */
fp = stdout;
else
fp = redirect(ptoi(a[1]), a[2]);
@@ -1649,7 +1830,7 @@ Cell *printstat(Node **a, int n) /* print a[0] */
else
fputs(getsval(ofsloc), fp);
}
- if (a[1] != 0)
+ if (a[1] != NULL)
fflush(fp);
if (ferror(fp))
FATAL("write error on %s", filename(fp));
@@ -1670,7 +1851,7 @@ FILE *redirect(int a, Node *b) /* set up all i/o redirections */
x = execute(b);
fname = getsval(x);
- fp = openfile(a, fname);
+ fp = openfile(a, fname, NULL);
if (fp == NULL)
FATAL("can't open file %s", fname);
tempfree(x);
@@ -1683,14 +1864,14 @@ struct files {
int mode; /* '|', 'a', 'w' => LE/LT, GT */
} *files;
-int nfiles;
+size_t nfiles;
-void stdinit(void) /* in case stdin, etc., are not constants */
+static void stdinit(void) /* in case stdin, etc., are not constants */
{
nfiles = FOPEN_MAX;
- files = calloc(nfiles, sizeof(*files));
+ files = (struct files *) calloc(nfiles, sizeof(*files));
if (files == NULL)
- FATAL("can't allocate file memory for %u files", nfiles);
+ FATAL("can't allocate file memory for %zu files", nfiles);
files[0].fp = stdin;
files[0].fname = "/dev/stdin";
files[0].mode = LT;
@@ -1702,33 +1883,35 @@ void stdinit(void) /* in case stdin, etc., are not constants */
files[2].mode = GT;
}
-FILE *openfile(int a, const char *us)
+FILE *openfile(int a, const char *us, bool *pnewflag)
{
const char *s = us;
- int i, m;
- FILE *fp = 0;
+ size_t i;
+ int m;
+ FILE *fp = NULL;
if (*s == '\0')
FATAL("null file name in print or getline");
- for (i=0; i < nfiles; i++)
- if (files[i].fname && strcmp(s, files[i].fname) == 0) {
- if (a == files[i].mode || (a==APPEND && files[i].mode==GT))
- return files[i].fp;
- if (a == FFLUSH)
- return files[i].fp;
+ for (i = 0; i < nfiles; i++)
+ if (files[i].fname && strcmp(s, files[i].fname) == 0 &&
+ (a == files[i].mode || (a==APPEND && files[i].mode==GT) ||
+ a == FFLUSH)) {
+ if (pnewflag)
+ *pnewflag = false;
+ return files[i].fp;
}
if (a == FFLUSH) /* didn't find it, so don't create it! */
return NULL;
- for (i=0; i < nfiles; i++)
- if (files[i].fp == 0)
+ for (i = 0; i < nfiles; i++)
+ if (files[i].fp == NULL)
break;
if (i >= nfiles) {
struct files *nf;
- int nnf = nfiles + FOPEN_MAX;
- nf = realloc(files, nnf * sizeof(*nf));
+ size_t nnf = nfiles + FOPEN_MAX;
+ nf = (struct files *) realloc(files, nnf * sizeof(*nf));
if (nf == NULL)
- FATAL("cannot grow files for %s and %d files", s, nnf);
+ FATAL("cannot grow files for %s and %zu files", s, nnf);
memset(&nf[nfiles], 0, FOPEN_MAX * sizeof(*nf));
nfiles = nnf;
files = nf;
@@ -1752,13 +1935,17 @@ FILE *openfile(int a, const char *us)
files[i].fname = tostring(s);
files[i].fp = fp;
files[i].mode = m;
+ if (pnewflag)
+ *pnewflag = true;
+ if (fp != stdin && fp != stdout && fp != stderr)
+ (void) fcntl(fileno(fp), F_SETFD, FD_CLOEXEC);
}
return fp;
}
const char *filename(FILE *fp)
{
- int i;
+ size_t i;
for (i = 0; i < nfiles; i++)
if (fp == files[i].fp)
@@ -1766,70 +1953,80 @@ const char *filename(FILE *fp)
return "???";
}
-Cell *closefile(Node **a, int n)
-{
- Cell *x;
- int i, stat;
-
- x = execute(a[0]);
- getsval(x);
- stat = -1;
- for (i = 0; i < nfiles; i++) {
- if (files[i].fname && strcmp(x->sval, files[i].fname) == 0) {
- if (ferror(files[i].fp))
- WARNING( "i/o error occurred on %s", files[i].fname );
- if (files[i].mode == '|' || files[i].mode == LE)
- stat = pclose(files[i].fp);
- else
- stat = fclose(files[i].fp);
- if (stat == EOF)
- WARNING( "i/o error occurred closing %s", files[i].fname );
- if (i > 2) /* don't do /dev/std... */
- xfree(files[i].fname);
- files[i].fname = NULL; /* watch out for ref thru this */
- files[i].fp = NULL;
- }
- }
- tempfree(x);
- x = gettemp();
- setfval(x, (Awkfloat) stat);
- return(x);
-}
+ Cell *closefile(Node **a, int n)
+ {
+ Cell *x;
+ size_t i;
+ bool stat;
+
+ x = execute(a[0]);
+ getsval(x);
+ stat = true;
+ for (i = 0; i < nfiles; i++) {
+ if (!files[i].fname || strcmp(x->sval, files[i].fname) != 0)
+ continue;
+ if (ferror(files[i].fp))
+ FATAL("i/o error occurred on %s", files[i].fname);
+ if (files[i].fp == stdin || files[i].fp == stdout ||
+ files[i].fp == stderr)
+ stat = freopen("/dev/null", "r+", files[i].fp) == NULL;
+ else if (files[i].mode == '|' || files[i].mode == LE)
+ stat = pclose(files[i].fp) == -1;
+ else
+ stat = fclose(files[i].fp) == EOF;
+ if (stat)
+ FATAL("i/o error occurred closing %s", files[i].fname);
+ if (i > 2) /* don't do /dev/std... */
+ xfree(files[i].fname);
+ files[i].fname = NULL; /* watch out for ref thru this */
+ files[i].fp = NULL;
+ break;
+ }
+ tempfree(x);
+ x = gettemp();
+ setfval(x, (Awkfloat) (stat ? -1 : 0));
+ return(x);
+ }
void closeall(void)
{
- int i, stat;
-
- for (i = 0; i < FOPEN_MAX; i++) {
- if (files[i].fp) {
- if (ferror(files[i].fp))
- WARNING( "i/o error occurred on %s", files[i].fname );
- if (files[i].mode == '|' || files[i].mode == LE)
- stat = pclose(files[i].fp);
- else
- stat = fclose(files[i].fp);
- if (stat == EOF)
- WARNING( "i/o error occurred while closing %s", files[i].fname );
- }
+ size_t i;
+ bool stat = false;
+
+ for (i = 0; i < nfiles; i++) {
+ if (! files[i].fp)
+ continue;
+ if (ferror(files[i].fp))
+ FATAL( "i/o error occurred on %s", files[i].fname );
+ if (files[i].fp == stdin)
+ continue;
+ if (files[i].mode == '|' || files[i].mode == LE)
+ stat = pclose(files[i].fp) == -1;
+ else if (files[i].fp == stdout || files[i].fp == stderr)
+ stat = fflush(files[i].fp) == EOF;
+ else
+ stat = fclose(files[i].fp) == EOF;
+ if (stat)
+ FATAL( "i/o error occurred while closing %s", files[i].fname );
}
}
-void flush_all(void)
+static void flush_all(void)
{
- int i;
+ size_t i;
for (i = 0; i < nfiles; i++)
if (files[i].fp)
fflush(files[i].fp);
}
-void backsub(char **pb_ptr, char **sptr_ptr);
+void backsub(char **pb_ptr, const char **sptr_ptr);
Cell *sub(Node **a, int nnn) /* substitute command */
{
- char *sptr, *pb, *q;
+ const char *sptr, *q;
Cell *x, *y, *result;
- char *t, *buf;
+ char *t, *buf, *pb;
fa *pfa;
int bufsz = recsize;
@@ -1837,7 +2034,7 @@ Cell *sub(Node **a, int nnn) /* substitute command */
FATAL("out of memory in sub");
x = execute(a[3]); /* target string */
t = getsval(x);
- if (a[0] == 0) /* 0 => a[1] is already-compiled regexpr */
+ if (a[0] == NULL) /* 0 => a[1] is already-compiled regexpr */
pfa = (fa *) a[1]; /* regular expression */
else {
y = execute(a[1]);
@@ -1853,7 +2050,7 @@ Cell *sub(Node **a, int nnn) /* substitute command */
while (sptr < patbeg)
*pb++ = *sptr++;
sptr = getsval(y);
- while (*sptr != 0) {
+ while (*sptr != '\0') {
adjbuf(&buf, &bufsz, 5+pb-buf, recsize, &pb, "sub");
if (*sptr == '\\') {
backsub(&pb, &sptr);
@@ -1871,13 +2068,13 @@ Cell *sub(Node **a, int nnn) /* substitute command */
sptr = patbeg + patlen;
if ((patlen == 0 && *patbeg) || (patlen && *(sptr-1))) {
adjbuf(&buf, &bufsz, 1+strlen(sptr)+pb-buf, 0, &pb, "sub");
- while ((*pb++ = *sptr++) != 0)
- ;
+ while ((*pb++ = *sptr++) != '\0')
+ continue;
}
if (pb > buf + bufsz)
FATAL("sub result2 %.30s too big; can't happen", buf);
setsval(x, buf); /* BUG: should be able to avoid copy */
- result = True;;
+ result = True;
}
tempfree(x);
tempfree(y);
@@ -1888,7 +2085,8 @@ Cell *sub(Node **a, int nnn) /* substitute command */
Cell *gsub(Node **a, int nnn) /* global substitute */
{
Cell *x, *y;
- char *rptr, *sptr, *t, *pb, *q;
+ char *rptr, *pb;
+ const char *q, *t, *sptr;
char *buf;
fa *pfa;
int mflag, tempstat, num;
@@ -1900,7 +2098,7 @@ Cell *gsub(Node **a, int nnn) /* global substitute */
num = 0;
x = execute(a[3]); /* target string */
t = getsval(x);
- if (a[0] == 0) /* 0 => a[1] is already-compiled regexpr */
+ if (a[0] == NULL) /* 0 => a[1] is already-compiled regexpr */
pfa = (fa *) a[1]; /* regular expression */
else {
y = execute(a[1]);
@@ -1914,11 +2112,11 @@ Cell *gsub(Node **a, int nnn) /* global substitute */
pb = buf;
rptr = getsval(y);
do {
- if (patlen == 0 && *patbeg != 0) { /* matched empty string */
+ if (patlen == 0 && *patbeg != '\0') { /* matched empty string */
if (mflag == 0) { /* can replace empty */
num++;
sptr = rptr;
- while (*sptr != 0) {
+ while (*sptr != '\0') {
adjbuf(&buf, &bufsz, 5+pb-buf, recsize, &pb, "gsub");
if (*sptr == '\\') {
backsub(&pb, &sptr);
@@ -1931,7 +2129,7 @@ Cell *gsub(Node **a, int nnn) /* global substitute */
*pb++ = *sptr++;
}
}
- if (*t == 0) /* at end */
+ if (*t == '\0') /* at end */
goto done;
adjbuf(&buf, &bufsz, 2+pb-buf, recsize, &pb, "gsub");
*pb++ = *t++;
@@ -1946,7 +2144,7 @@ Cell *gsub(Node **a, int nnn) /* global substitute */
while (sptr < patbeg)
*pb++ = *sptr++;
sptr = rptr;
- while (*sptr != 0) {
+ while (*sptr != '\0') {
adjbuf(&buf, &bufsz, 5+pb-buf, recsize, &pb, "gsub");
if (*sptr == '\\') {
backsub(&pb, &sptr);
@@ -1959,7 +2157,7 @@ Cell *gsub(Node **a, int nnn) /* global substitute */
*pb++ = *sptr++;
}
t = patbeg + patlen;
- if (patlen == 0 || *t == 0 || *(t-1) == 0)
+ if (patlen == 0 || *t == '\0' || *(t-1) == '\0')
goto done;
if (pb > buf + bufsz)
FATAL("gsub result1 %.30s too big; can't happen", buf);
@@ -1968,8 +2166,8 @@ Cell *gsub(Node **a, int nnn) /* global substitute */
} while (pmatch(pfa,t));
sptr = t;
adjbuf(&buf, &bufsz, 1+strlen(sptr)+pb-buf, 0, &pb, "gsub");
- while ((*pb++ = *sptr++) != 0)
- ;
+ while ((*pb++ = *sptr++) != '\0')
+ continue;
done: if (pb < buf + bufsz)
*pb = '\0';
else if (*(pb-1) != '\0')
@@ -1986,9 +2184,158 @@ Cell *gsub(Node **a, int nnn) /* global substitute */
return(x);
}
-void backsub(char **pb_ptr, char **sptr_ptr) /* handle \\& variations */
+Cell *gensub(Node **a, int nnn) /* global selective substitute */
+ /* XXX incomplete - doesn't support backreferences \0 ... \9 */
+{
+ Cell *x, *y, *res, *h;
+ char *rptr;
+ const char *sptr;
+ char *buf, *pb;
+ const char *t, *q;
+ fa *pfa;
+ int mflag, tempstat, num, whichm;
+ int bufsz = recsize;
+
+ if ((buf = malloc(bufsz)) == NULL)
+ FATAL("out of memory in gensub");
+ mflag = 0; /* if mflag == 0, can replace empty string */
+ num = 0;
+ x = execute(a[4]); /* source string */
+ t = getsval(x);
+ res = copycell(x); /* target string - initially copy of source */
+ res->csub = CTEMP; /* result values are temporary */
+ if (a[0] == 0) /* 0 => a[1] is already-compiled regexpr */
+ pfa = (fa *) a[1]; /* regular expression */
+ else {
+ y = execute(a[1]);
+ pfa = makedfa(getsval(y), 1);
+ tempfree(y);
+ }
+ y = execute(a[2]); /* replacement string */
+ h = execute(a[3]); /* which matches should be replaced */
+ sptr = getsval(h);
+ if (sptr[0] == 'g' || sptr[0] == 'G')
+ whichm = -1;
+ else {
+ /*
+ * The specified number is index of replacement, starting
+ * from 1. GNU awk treats index lower than 0 same as
+ * 1, we do same for compatibility.
+ */
+ whichm = (int) getfval(h) - 1;
+ if (whichm < 0)
+ whichm = 0;
+ }
+ tempfree(h);
+
+ if (pmatch(pfa, t)) {
+ char *sl;
+
+ tempstat = pfa->initstat;
+ pfa->initstat = 2;
+ pb = buf;
+ rptr = getsval(y);
+ /*
+ * XXX if there are any backreferences in subst string,
+ * complain now.
+ */
+ for (sl = rptr; (sl = strchr(sl, '\\')) && sl[1]; sl++) {
+ if (strchr("0123456789", sl[1])) {
+ FATAL("gensub doesn't support backreferences (subst \"%s\")", rptr);
+ }
+ }
+
+ do {
+ if (whichm >= 0 && whichm != num) {
+ num++;
+ adjbuf(&buf, &bufsz, (pb - buf) + (patbeg - t) + patlen, recsize, &pb, "gensub");
+
+ /* copy the part of string up to and including
+ * match to output buffer */
+ while (t < patbeg + patlen)
+ *pb++ = *t++;
+ continue;
+ }
+
+ if (patlen == 0 && *patbeg != 0) { /* matched empty string */
+ if (mflag == 0) { /* can replace empty */
+ num++;
+ sptr = rptr;
+ while (*sptr != 0) {
+ adjbuf(&buf, &bufsz, 5+pb-buf, recsize, &pb, "gensub");
+ if (*sptr == '\\') {
+ backsub(&pb, &sptr);
+ } else if (*sptr == '&') {
+ sptr++;
+ adjbuf(&buf, &bufsz, 1+patlen+pb-buf, recsize, &pb, "gensub");
+ for (q = patbeg; q < patbeg+patlen; )
+ *pb++ = *q++;
+ } else
+ *pb++ = *sptr++;
+ }
+ }
+ if (*t == 0) /* at end */
+ goto done;
+ adjbuf(&buf, &bufsz, 2+pb-buf, recsize, &pb, "gensub");
+ *pb++ = *t++;
+ if (pb > buf + bufsz) /* BUG: not sure of this test */
+ FATAL("gensub result0 %.30s too big; can't happen", buf);
+ mflag = 0;
+ }
+ else { /* matched nonempty string */
+ num++;
+ sptr = t;
+ adjbuf(&buf, &bufsz, 1+(patbeg-sptr)+pb-buf, recsize, &pb, "gensub");
+ while (sptr < patbeg)
+ *pb++ = *sptr++;
+ sptr = rptr;
+ while (*sptr != 0) {
+ adjbuf(&buf, &bufsz, 5+pb-buf, recsize, &pb, "gensub");
+ if (*sptr == '\\') {
+ backsub(&pb, &sptr);
+ } else if (*sptr == '&') {
+ sptr++;
+ adjbuf(&buf, &bufsz, 1+patlen+pb-buf, recsize, &pb, "gensub");
+ for (q = patbeg; q < patbeg+patlen; )
+ *pb++ = *q++;
+ } else
+ *pb++ = *sptr++;
+ }
+ t = patbeg + patlen;
+ if (patlen == 0 || *t == 0 || *(t-1) == 0)
+ goto done;
+ if (pb > buf + bufsz)
+ FATAL("gensub result1 %.30s too big; can't happen", buf);
+ mflag = 1;
+ }
+ } while (pmatch(pfa,t));
+ sptr = t;
+ adjbuf(&buf, &bufsz, 1+strlen(sptr)+pb-buf, 0, &pb, "gensub");
+ while ((*pb++ = *sptr++) != 0)
+ ;
+ done: if (pb > buf + bufsz)
+ FATAL("gensub result2 %.30s too big; can't happen", buf);
+ *pb = '\0';
+ setsval(res, buf);
+ pfa->initstat = tempstat;
+ }
+ tempfree(x);
+ tempfree(y);
+ free(buf);
+ return(res);
+}
+
+void backsub(char **pb_ptr, const char **sptr_ptr) /* handle \\& variations */
{ /* sptr[0] == '\\' */
- char *pb = *pb_ptr, *sptr = *sptr_ptr;
+ char *pb = *pb_ptr;
+ const char *sptr = *sptr_ptr;
+ static bool first = true;
+ static bool do_posix = false;
+
+ if (first) {
+ first = false;
+ do_posix = (getenv("POSIXLY_CORRECT") != NULL);
+ }
if (sptr[1] == '\\') {
if (sptr[2] == '\\' && sptr[3] == '&') { /* \\\& -> \& */
@@ -1998,6 +2345,9 @@ void backsub(char **pb_ptr, char **sptr_ptr) /* handle \\& variations */
} else if (sptr[2] == '&') { /* \\& -> \ + matched */
*pb++ = '\\';
sptr += 2;
+ } else if (do_posix) { /* \\x -> \x */
+ sptr++;
+ *pb++ = *sptr++;
} else { /* \\x -> \\x */
*pb++ = *sptr++;
*pb++ = *sptr++;
diff --git a/tran.c b/tran.c
index d1dfe2b2f176..c6ae890c8a91 100644
--- a/tran.c
+++ b/tran.c
@@ -29,7 +29,6 @@ THIS SOFTWARE.
#include <string.h>
#include <stdlib.h>
#include "awk.h"
-#include "ytab.h"
#define FULLTAB 2 /* rehash when table gets this x full */
#define GROWTAB 4 /* grow table by this factor */
@@ -114,6 +113,7 @@ void syminit(void) /* initialize symbol table with builtin vars */
rlengthloc = setsymtab("RLENGTH", "", 0.0, NUM, symtab);
RLENGTH = &rlengthloc->fval;
symtabloc = setsymtab("SYMTAB", "", 0.0, ARR, symtab);
+ free(symtabloc->sval);
symtabloc->sval = (char *) symtab;
}
@@ -126,11 +126,14 @@ void arginit(int ac, char **av) /* set up ARGV and ARGC */
ARGC = &setsymtab("ARGC", "", (Awkfloat) ac, NUM, symtab)->fval;
cp = setsymtab("ARGV", "", 0.0, ARR, symtab);
ARGVtab = makesymtab(NSYMTAB); /* could be (int) ARGC as well */
+ free(cp->sval);
cp->sval = (char *) ARGVtab;
for (i = 0; i < ac; i++) {
+ double result;
+
sprintf(temp, "%d", i);
- if (is_number(*av))
- setsymtab(temp, *av, atof(*av), STR|NUM, ARGVtab);
+ if (is_number(*av, & result))
+ setsymtab(temp, *av, result, STR|NUM, ARGVtab);
else
setsymtab(temp, *av, 0.0, STR, ARGVtab);
av++;
@@ -144,15 +147,18 @@ void envinit(char **envp) /* set up ENVIRON variable */
cp = setsymtab("ENVIRON", "", 0.0, ARR, symtab);
ENVtab = makesymtab(NSYMTAB);
+ free(cp->sval);
cp->sval = (char *) ENVtab;
for ( ; *envp; envp++) {
+ double result;
+
if ((p = strchr(*envp, '=')) == NULL)
continue;
if( p == *envp ) /* no left hand side name in env string */
continue;
*p++ = 0; /* split into two strings at = */
- if (is_number(p))
- setsymtab(*envp, p, atof(p), STR|NUM, ENVtab);
+ if (is_number(p, & result))
+ setsymtab(*envp, p, result, STR|NUM, ENVtab);
else
setsymtab(*envp, p, 0.0, STR, ENVtab);
p[-1] = '='; /* restore in case env is passed down to a shell */
@@ -164,8 +170,8 @@ Array *makesymtab(int n) /* make a new symbol table */
Array *ap;
Cell **tp;
- ap = (Array *) malloc(sizeof(Array));
- tp = (Cell **) calloc(n, sizeof(Cell *));
+ ap = (Array *) malloc(sizeof(*ap));
+ tp = (Cell **) calloc(n, sizeof(*tp));
if (ap == NULL || tp == NULL)
FATAL("out of space in makesymtab");
ap->nelem = 0;
@@ -191,10 +197,10 @@ void freesymtab(Cell *ap) /* free a symbol table */
if (freeable(cp))
xfree(cp->sval);
temp = cp->cnext; /* avoids freeing then using */
- free(cp);
+ free(cp);
tp->nelem--;
}
- tp->tab[i] = 0;
+ tp->tab[i] = NULL;
}
if (tp->nelem != 0)
WARNING("can't happen: inconsistent element count freeing %s", ap->nval);
@@ -207,7 +213,7 @@ void freeelem(Cell *ap, const char *s) /* free elem s from ap (i.e., ap["s"] */
Array *tp;
Cell *p, *prev = NULL;
int h;
-
+
tp = (Array *) ap->sval;
h = hash(s, tp->size);
for (p = tp->tab[h]; p != NULL; prev = p, p = p->cnext)
@@ -231,11 +237,11 @@ Cell *setsymtab(const char *n, const char *s, Awkfloat f, unsigned t, Array *tp)
Cell *p;
if (n != NULL && (p = lookup(n, tp)) != NULL) {
- dprintf( ("setsymtab found %p: n=%s s=\"%s\" f=%g t=%o\n",
- (void*)p, NN(p->nval), NN(p->sval), p->fval, p->tval) );
+ DPRINTF("setsymtab found %p: n=%s s=\"%s\" f=%g t=%o\n",
+ (void*)p, NN(p->nval), NN(p->sval), p->fval, p->tval);
return(p);
}
- p = (Cell *) malloc(sizeof(Cell));
+ p = (Cell *) malloc(sizeof(*p));
if (p == NULL)
FATAL("out of space for symbol table at %s", n);
p->nval = tostring(n);
@@ -250,8 +256,8 @@ Cell *setsymtab(const char *n, const char *s, Awkfloat f, unsigned t, Array *tp)
h = hash(n, tp->size);
p->cnext = tp->tab[h];
tp->tab[h] = p;
- dprintf( ("setsymtab set %p: n=%s s=\"%s\" f=%g t=%o\n",
- (void*)p, p->nval, p->sval, p->fval, p->tval) );
+ DPRINTF("setsymtab set %p: n=%s s=\"%s\" f=%g t=%o\n",
+ (void*)p, p->nval, p->sval, p->fval, p->tval);
return(p);
}
@@ -270,7 +276,7 @@ void rehash(Array *tp) /* rehash items in small table into big one */
Cell *cp, *op, **np;
nsz = GROWTAB * tp->size;
- np = (Cell **) calloc(nsz, sizeof(Cell *));
+ np = (Cell **) calloc(nsz, sizeof(*np));
if (np == NULL) /* can't do it, but can keep running. */
return; /* someone else will run out later. */
for (i = 0; i < tp->size; i++) {
@@ -303,23 +309,24 @@ Awkfloat setfval(Cell *vp, Awkfloat f) /* set float val of a Cell */
int fldno;
f += 0.0; /* normalise negative zero to positive zero */
- if ((vp->tval & (NUM | STR)) == 0)
+ if ((vp->tval & (NUM | STR)) == 0)
funnyvar(vp, "assign to");
if (isfld(vp)) {
- donerec = 0; /* mark $0 invalid */
+ donerec = false; /* mark $0 invalid */
fldno = atoi(vp->nval);
if (fldno > *NF)
newfld(fldno);
- dprintf( ("setting field %d to %g\n", fldno, f) );
+ DPRINTF("setting field %d to %g\n", fldno, f);
} else if (&vp->fval == NF) {
- donerec = 0; /* mark $0 invalid */
+ donerec = false; /* mark $0 invalid */
setlastfld(f);
- dprintf( ("setting NF to %g\n", f) );
+ DPRINTF("setting NF to %g\n", f);
} else if (isrec(vp)) {
- donefld = 0; /* mark $1... invalid */
- donerec = 1;
+ donefld = false; /* mark $1... invalid */
+ donerec = true;
+ savefs();
} else if (vp == ofsloc) {
- if (donerec == 0)
+ if (!donerec)
recbld();
}
if (freeable(vp))
@@ -329,7 +336,7 @@ Awkfloat setfval(Cell *vp, Awkfloat f) /* set float val of a Cell */
vp->tval |= NUM; /* mark number ok */
if (f == -0) /* who would have thought this possible? */
f = 0;
- dprintf( ("setfval %p: %s = %g, t=%o\n", (void*)vp, NN(vp->nval), f, vp->tval) );
+ DPRINTF("setfval %p: %s = %g, t=%o\n", (void*)vp, NN(vp->nval), f, vp->tval);
return vp->fval = f;
}
@@ -340,7 +347,7 @@ void funnyvar(Cell *vp, const char *rw)
if (vp->tval & FCN)
FATAL("can't %s %s; it's a function.", rw, vp->nval);
WARNING("funny variable %p: n=%s s=\"%s\" f=%g t=%o",
- vp, vp->nval, vp->sval, vp->fval, vp->tval);
+ (void *)vp, vp->nval, vp->sval, vp->fval, vp->tval);
}
char *setsval(Cell *vp, const char *s) /* set string val of a Cell */
@@ -349,21 +356,22 @@ char *setsval(Cell *vp, const char *s) /* set string val of a Cell */
int fldno;
Awkfloat f;
- dprintf( ("starting setsval %p: %s = \"%s\", t=%o, r,f=%d,%d\n",
- (void*)vp, NN(vp->nval), s, vp->tval, donerec, donefld) );
+ DPRINTF("starting setsval %p: %s = \"%s\", t=%o, r,f=%d,%d\n",
+ (void*)vp, NN(vp->nval), s, vp->tval, donerec, donefld);
if ((vp->tval & (NUM | STR)) == 0)
funnyvar(vp, "assign to");
if (isfld(vp)) {
- donerec = 0; /* mark $0 invalid */
+ donerec = false; /* mark $0 invalid */
fldno = atoi(vp->nval);
if (fldno > *NF)
newfld(fldno);
- dprintf( ("setting field %d to %s (%p)\n", fldno, s, (void *) s) );
+ DPRINTF("setting field %d to %s (%p)\n", fldno, s, (const void*)s);
} else if (isrec(vp)) {
- donefld = 0; /* mark $1... invalid */
- donerec = 1;
+ donefld = false; /* mark $1... invalid */
+ donerec = true;
+ savefs();
} else if (vp == ofsloc) {
- if (donerec == 0)
+ if (!donerec)
recbld();
}
t = s ? tostring(s) : tostring(""); /* in case it's self-assign */
@@ -373,14 +381,14 @@ char *setsval(Cell *vp, const char *s) /* set string val of a Cell */
vp->tval |= STR;
vp->fmt = NULL;
setfree(vp);
- dprintf( ("setsval %p: %s = \"%s (%p) \", t=%o r,f=%d,%d\n",
- (void*)vp, NN(vp->nval), t, (void *) t, vp->tval, donerec, donefld) );
+ DPRINTF("setsval %p: %s = \"%s (%p) \", t=%o r,f=%d,%d\n",
+ (void*)vp, NN(vp->nval), t, (void*)t, vp->tval, donerec, donefld);
vp->sval = t;
if (&vp->fval == NF) {
- donerec = 0; /* mark $0 invalid */
+ donerec = false; /* mark $0 invalid */
f = getfval(vp);
setlastfld(f);
- dprintf( ("setting NF to %g\n", f) );
+ DPRINTF("setting NF to %g\n", f);
}
return(vp->sval);
@@ -390,30 +398,47 @@ Awkfloat getfval(Cell *vp) /* get float val of a Cell */
{
if ((vp->tval & (NUM | STR)) == 0)
funnyvar(vp, "read value of");
- if (isfld(vp) && donefld == 0)
+ if (isfld(vp) && !donefld)
fldbld();
- else if (isrec(vp) && donerec == 0)
+ else if (isrec(vp) && !donerec)
recbld();
if (!isnum(vp)) { /* not a number */
- vp->fval = atof(vp->sval); /* best guess */
- if (is_number(vp->sval) && !(vp->tval&CON))
- vp->tval |= NUM; /* make NUM only sparingly */
+ double fval;
+ bool no_trailing;
+
+ if (is_valid_number(vp->sval, true, & no_trailing, & fval)) {
+ vp->fval = fval;
+ if (no_trailing && !(vp->tval&CON))
+ vp->tval |= NUM; /* make NUM only sparingly */
+ } else
+ vp->fval = 0.0;
}
- dprintf( ("getfval %p: %s = %g, t=%o\n",
- (void*)vp, NN(vp->nval), vp->fval, vp->tval) );
+ DPRINTF("getfval %p: %s = %g, t=%o\n",
+ (void*)vp, NN(vp->nval), vp->fval, vp->tval);
return(vp->fval);
}
+static const char *get_inf_nan(double d)
+{
+ if (isinf(d)) {
+ return (d < 0 ? "-inf" : "+inf");
+ } else if (isnan(d)) {
+ return (signbit(d) != 0 ? "-nan" : "+nan");
+ } else
+ return NULL;
+}
+
static char *get_str_val(Cell *vp, char **fmt) /* get string val of a Cell */
{
char s[256];
double dtemp;
+ const char *p;
if ((vp->tval & (NUM | STR)) == 0)
funnyvar(vp, "read value of");
- if (isfld(vp) && donefld == 0)
+ if (isfld(vp) && ! donefld)
fldbld();
- else if (isrec(vp) && donerec == 0)
+ else if (isrec(vp) && ! donerec)
recbld();
/*
@@ -444,7 +469,9 @@ static char *get_str_val(Cell *vp, char **fmt) /* get string val of a Cel
{ \
if (freeable(vp)) \
xfree(vp->sval); \
- if (modf(vp->fval, &dtemp) == 0) /* it's integral */ \
+ if ((p = get_inf_nan(vp->fval)) != NULL) \
+ strcpy(s, p); \
+ else if (modf(vp->fval, &dtemp) == 0) /* it's integral */ \
snprintf(s, sizeof (s), "%.30g", vp->fval); \
else \
snprintf(s, sizeof (s), *fmt, vp->fval); \
@@ -487,8 +514,8 @@ static char *get_str_val(Cell *vp, char **fmt) /* get string val of a Cel
}
}
done:
- dprintf( ("getsval %p: %s = \"%s (%p)\", t=%o\n",
- (void*)vp, NN(vp->nval), vp->sval, (void *) vp->sval, vp->tval) );
+ DPRINTF("getsval %p: %s = \"%s (%p)\", t=%o\n",
+ (void*)vp, NN(vp->nval), vp->sval, (void*)vp->sval, vp->tval);
return(vp->sval);
}
@@ -505,20 +532,53 @@ char *getpssval(Cell *vp) /* get string val of a Cell for print */
char *tostring(const char *s) /* make a copy of string s */
{
+ char *p = strdup(s);
+ if (p == NULL)
+ FATAL("out of space in tostring on %s", s);
+ return(p);
+}
+
+char *tostringN(const char *s, size_t n) /* make a copy of string s */
+{
char *p;
- p = (char *) malloc(strlen(s)+1);
+ p = (char *) malloc(n);
if (p == NULL)
FATAL("out of space in tostring on %s", s);
strcpy(p, s);
return(p);
}
+Cell *catstr(Cell *a, Cell *b) /* concatenate a and b */
+{
+ Cell *c;
+ char *p;
+ char *sa = getsval(a);
+ char *sb = getsval(b);
+ size_t l = strlen(sa) + strlen(sb) + 1;
+ p = (char *) malloc(l);
+ if (p == NULL)
+ FATAL("out of space concatenating %s and %s", sa, sb);
+ snprintf(p, l, "%s%s", sa, sb);
+
+ l++; // add room for ' '
+ char *newbuf = (char *) malloc(l);
+ if (newbuf == NULL)
+ FATAL("out of space concatenating %s and %s", sa, sb);
+ // See string() in lex.c; a string "xx" is stored in the symbol
+ // table as "xx ".
+ snprintf(newbuf, l, "%s ", p);
+ c = setsymtab(newbuf, p, 0.0, CON|STR|DONTFREE, symtab);
+ free(p);
+ free(newbuf);
+ return c;
+}
+
char *qstring(const char *is, int delim) /* collect string up to next delim */
{
const char *os = is;
int c, n;
- uschar *s = (uschar *) is;
+ const uschar *s = (const uschar *) is;
uschar *buf, *bp;
if ((buf = (uschar *) malloc(strlen(is)+3)) == NULL)
@@ -533,7 +593,7 @@ char *qstring(const char *is, int delim) /* collect string up to next delim */
if (c == 0) { /* \ at end */
*bp++ = '\\';
break; /* for loop */
- }
+ }
switch (c) {
case '\\': *bp++ = '\\'; break;
case 'n': *bp++ = '\n'; break;
@@ -541,6 +601,8 @@ char *qstring(const char *is, int delim) /* collect string up to next delim */
case 'b': *bp++ = '\b'; break;
case 'f': *bp++ = '\f'; break;
case 'r': *bp++ = '\r'; break;
+ case 'v': *bp++ = '\v'; break;
+ case 'a': *bp++ = '\a'; break;
default:
if (!isdigit(c)) {
*bp++ = c;