aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--COPYING5
-rw-r--r--Changes66
-rw-r--r--MANIFEST4
-rw-r--r--Makefile.in29
-rw-r--r--README4
-rw-r--r--configure.ac (renamed from configure.in)10
-rwxr-xr-xdoc/expat.pngbin1027 -> 1029 bytes
-rw-r--r--doc/reference.html8
-rw-r--r--doc/xmlwf.1214
-rw-r--r--doc/xmlwf.xml (renamed from doc/xmlwf.sgml)36
-rw-r--r--examples/elements.c9
-rw-r--r--examples/outline.c7
-rw-r--r--expat_config.h.in5
-rw-r--r--lib/expat.h13
-rwxr-xr-xlib/expat_external.h14
-rw-r--r--lib/internal.h22
-rw-r--r--lib/xmlparse.c127
-rw-r--r--lib/xmlrole.c224
-rw-r--r--lib/xmltok.c230
-rw-r--r--lib/xmltok.h10
-rw-r--r--lib/xmltok_impl.c226
-rwxr-xr-xtests/benchmark/README.txt2
-rw-r--r--tests/chardata.c8
-rwxr-xr-xtests/minicheck.c7
-rwxr-xr-xtests/minicheck.h15
-rw-r--r--tests/runtests.c479
-rwxr-xr-xtests/xmltest.sh8
-rwxr-xr-xxmlwf/codepage.c5
-rwxr-xr-xxmlwf/readfilemap.c9
-rwxr-xr-xxmlwf/unixfilemap.c4
-rwxr-xr-xxmlwf/xmlfile.c9
-rwxr-xr-xxmlwf/xmlwf.c48
32 files changed, 1162 insertions, 695 deletions
diff --git a/COPYING b/COPYING
index dcb450642968..092c83baee13 100644
--- a/COPYING
+++ b/COPYING
@@ -1,6 +1,5 @@
-Copyright (c) 1998, 1999, 2000 Thai Open Source Software Center Ltd
- and Clark Cooper
-Copyright (c) 2001, 2002, 2003, 2004, 2005, 2006 Expat maintainers.
+Copyright (c) 1998-2000 Thai Open Source Software Center Ltd and Clark Cooper
+Copyright (c) 2001-2016 Expat maintainers
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
diff --git a/Changes b/Changes
index 08897b9f9ed1..583c86857629 100644
--- a/Changes
+++ b/Changes
@@ -1,3 +1,67 @@
+Release 2.2.0 Tue June 21 2016
+ Security fixes:
+ #537 CVE-2016-0718 -- Fix crash on malformed input
+ CVE-2016-4472 -- Improve insufficient fix to CVE-2015-1283 /
+ CVE-2015-2716 introduced with Expat 2.1.1
+ #499 CVE-2016-5300 -- Use more entropy for hash initialization
+ than the original fix to CVE-2012-0876
+ #519 CVE-2012-6702 -- Resolve troublesome internal call to srand
+ that was introduced with Expat 2.1.0
+ when addressing CVE-2012-0876 (issue #496)
+
+ Bug fixes:
+ Fix uninitialized reads of size 1
+ (e.g. in little2_updatePosition)
+ Fix detection of UTF-8 character boundaries
+
+ Other changes:
+ #532 Fix compilation for Visual Studio 2010 (keyword "C99")
+ Autotools: Resolve use of "$<" to better support bmake
+ Autotools: Add QA script "qa.sh" (and make target "qa")
+ Autotools: Respect CXXFLAGS if given
+ Autotools: Fix "make run-xmltest"
+ Autotools: Have "make run-xmltest" check for expected output
+ p90 CMake: Fix static build (BUILD_shared=OFF) on Windows
+ #536 CMake: Add soversion, support -DNO_SONAME=yes to bypass
+ #323 CMake: Add suffix "d" to differentiate debug from release
+ CMake: Define WIN32 with CMake on Windows
+ Annotate memory allocators for GCC
+ Address all currently known compile warnings
+ Make sure that API symbols remain visible despite
+ -fvisibility=hidden
+ Remove executable flag from source files
+ Resolve COMPILED_FROM_DSP in favor of WIN32
+
+ Special thanks to:
+ Björn Lindahl
+ Christian Heimes
+ Cristian Rodríguez
+ Daniel Krügler
+ Gustavo Grieco
+ Karl Waclawek
+ László Böszörményi
+ Marco Grassi
+ Pascal Cuoq
+ Sergei Nikulov
+ Thomas Beutlich
+ Warren Young
+ Yann Droneaud
+
+Release 2.1.1 Sat March 12 2016
+ Security fixes:
+ #582: CVE-2015-1283 - Multiple integer overflows in XML_GetBuffer
+
+ Bug fixes:
+ #502: Fix potential null pointer dereference
+ #520: Symbol XML_SetHashSalt was not exported
+ Output of "xmlwf -h" was incomplete
+
+ Other changes:
+ #503: Document behavior of calling XML_SetHashSalt with salt 0
+ Minor improvements to man page xmlwf(1)
+ Improvements to the experimental CMake build system
+ libtool now invoked with --verbose
+
Release 2.1.0 Sat March 24 2012
- Bug Fixes:
#1742315: Harmful XML_ParserCreateNS suggestion.
@@ -23,7 +87,7 @@ Release 2.1.0 Sat March 24 2012
#3312568: CMake support.
#3446384: Report byte offsets for attr names and values.
- New Features / API changes:
- Added new API member XML_SetHashSalt() that allows setting an intial
+ Added new API member XML_SetHashSalt() that allows setting an initial
value (salt) for hash calculations. This is part of the fix for
bug #3496608 to randomize hash parameters.
When compiled with XML_ATTR_INFO defined, adds new API member
diff --git a/MANIFEST b/MANIFEST
index 7a020dc05b01..afb9acacdf06 100644
--- a/MANIFEST
+++ b/MANIFEST
@@ -44,7 +44,7 @@ doc/reference.html
doc/style.css
doc/valid-xhtml10.png
doc/xmlwf.1
-doc/xmlwf.sgml
+doc/xmlwf.xml
CMakeLists.txt
CMake.README
COPYING
@@ -54,7 +54,7 @@ MANIFEST
Makefile.in
README
configure
-configure.in
+configure.ac
expat_config.h.in
expat_config.h.cmake
expat.pc.in
diff --git a/Makefile.in b/Makefile.in
index 9c0f5d49f0f4..3310aa9aae92 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -42,7 +42,7 @@ INSTALL_DATA = @INSTALL_DATA@
mkinstalldirs = $(SHELL) $(top_srcdir)/conftools/mkinstalldirs
MANFILE = $(srcdir)/doc/xmlwf.1
-APIHEADER = $(srcdir)/lib/expat.h $(srcdir)/lib/expat_external.h
+APIHEADER = $(srcdir)/lib/expat.h $(srcdir)/lib/expat_external.h expat_config.h
LIBRARY = libexpat.la
DESTDIR = $(INSTALL_ROOT)
@@ -51,7 +51,7 @@ default: buildlib xmlwf/xmlwf@EXEEXT@
buildlib: $(LIBRARY) expat.pc
-all: $(LIBRARY) expat.pc xmlwf/xmlwf@EXEEXT@ examples/elements examples/outline
+all: $(LIBRARY) expat.pc xmlwf/xmlwf@EXEEXT@ examples/elements examples/outline $(MANFILE)
clean:
cd lib && rm -f $(LIBRARY) *.@OBJEXT@ *.lo && rm -rf .libs _libs
@@ -77,7 +77,10 @@ check: tests/runtests tests/runtestspp
tests/runtests
tests/runtestspp
-install: xmlwf/xmlwf@EXEEXT@ installlib
+$(MANFILE):
+ $(MAKE) -C doc xmlwf.1
+
+install: xmlwf/xmlwf@EXEEXT@ installlib $(MANFILE)
$(mkinstalldirs) $(DESTDIR)$(bindir) $(DESTDIR)$(man1dir)
$(LIBTOOL) --mode=install $(INSTALL_PROGRAM) xmlwf/xmlwf@EXEEXT@ $(DESTDIR)$(bindir)/xmlwf
$(INSTALL_DATA) $(MANFILE) $(DESTDIR)$(man1dir)
@@ -116,7 +119,7 @@ CXXFLAGS = @CXXFLAGS@
VSNFLAG = -version-info @LIBCURRENT@:@LIBREVISION@:@LIBAGE@
### autoconf this?
-LTFLAGS = --silent
+LTFLAGS = --verbose
COMPILE = $(CC) $(INCLUDES) $(CFLAGS) $(DEFS) $(CPPFLAGS)
CXXCOMPILE = $(CXX) $(INCLUDES) $(CXXFLAGS) $(DEFS) $(CPPFLAGS)
@@ -154,11 +157,11 @@ xmlwf/xmlwf@EXEEXT@: $(XMLWF_OBJS) $(LIBRARY)
examples/elements.@OBJEXT@: examples/elements.c
examples/elements: examples/elements.@OBJEXT@ $(LIBRARY)
- $(LINK_EXE) $< $(LIBRARY)
+ $(LINK_EXE) examples/elements.@OBJEXT@ $(LIBRARY)
examples/outline.@OBJEXT@: examples/outline.c
examples/outline: examples/outline.@OBJEXT@ $(LIBRARY)
- $(LINK_EXE) $< $(LIBRARY)
+ $(LINK_EXE) examples/outline.@OBJEXT@ $(LIBRARY)
tests/chardata.@OBJEXT@: tests/chardata.c tests/chardata.h
tests/minicheck.@OBJEXT@: tests/minicheck.c tests/minicheck.h
@@ -180,11 +183,19 @@ tests/xmlts.zip:
wget --output-document=tests/xmlts.zip \
http://www.w3.org/XML/Test/xmlts20080827.zip
-tests/XML-Test-Suite: tests/xmlts.zip
+tests/xmlconf: tests/xmlts.zip
cd tests && unzip -q xmlts.zip
-run-xmltest: xmlwf/xmlwf@EXEEXT@ tests/XML-Test-Suite
- tests/xmltest.sh
+run-xmltest: xmlwf/xmlwf@EXEEXT@ tests/xmlconf
+ tests/xmltest.sh 2>&1 | tee tests/xmltest.log
+ diff -u tests/xmltest.log.expected tests/xmltest.log
+
+.PHONY: qa
+qa:
+ ./qa.sh address
+ ./qa.sh memory
+ ./qa.sh undefined
+ ./qa.sh coverage
.SUFFIXES: .c .cpp .lo .@OBJEXT@
diff --git a/README b/README
index 1f88467d1b04..a7d28450d54f 100644
--- a/README
+++ b/README
@@ -1,5 +1,5 @@
- Expat, Release 2.1.0
+ Expat, Release 2.2.0
This is Expat, a C library for parsing XML, written by James Clark.
Expat is a stream-oriented XML parser. This means that you register
@@ -114,7 +114,7 @@ Note for Solaris users: The "ar" command is usually located in
"/usr/ccs/bin", which is not in the default PATH. You will need to
add this to your path for the "make" command, and probably also switch
to GNU make (the "make" found in /usr/ccs/bin does not seem to work
-properly -- appearantly it does not understand .PHONY directives). If
+properly -- apparently it does not understand .PHONY directives). If
you're using ksh or bash, use this command to build:
PATH=/usr/ccs/bin:$PATH make
diff --git a/configure.in b/configure.ac
index 7e968c46de3a..a06c3e49fc25 100644
--- a/configure.in
+++ b/configure.ac
@@ -25,7 +25,7 @@ dnl test. I believe this test will work, but I don't have a place with non-
dnl GNU M4 to test it right now.
define([expat_version], ifdef([__gnu__],
[esyscmd(conftools/get-version.sh lib/expat.h)],
- [2.1.x]))
+ [2.2.x]))
AC_INIT(expat, expat_version, expat-bugs@libexpat.org)
undefine([expat_version])
@@ -45,9 +45,9 @@ dnl
dnl If the API changes incompatibly set LIBAGE back to 0
dnl
-LIBCURRENT=7
-LIBREVISION=0
-LIBAGE=6
+LIBCURRENT=7 # sync
+LIBREVISION=2 # with
+LIBAGE=6 # CMakeLists.txt!
AC_CONFIG_HEADER(expat_config.h)
@@ -77,7 +77,9 @@ if test "$GCC" = yes ; then
AC_TRY_LINK( , ,
AC_MSG_RESULT(yes),
AC_MSG_RESULT(no); CFLAGS="$OLDCFLAGS")
+ if test "x$CXXFLAGS" = x ; then
CXXFLAGS=`echo "$CFLAGS" | sed 's/ -Wmissing-prototypes -Wstrict-prototypes//'`
+ fi
fi
dnl Checks for header files.
diff --git a/doc/expat.png b/doc/expat.png
index 5bc0726cfd85..3d88eac4a133 100755
--- a/doc/expat.png
+++ b/doc/expat.png
Binary files differ
diff --git a/doc/reference.html b/doc/reference.html
index 8811a3397c73..a4ab40530aa4 100644
--- a/doc/reference.html
+++ b/doc/reference.html
@@ -2151,8 +2151,12 @@ Helps in preventing DoS attacks based on predicting hash
function behavior. In order to have an effect this must be called
before parsing has started. Returns 1 if successful, 0 when called
after <code>XML_Parse</code> or <code>XML_ParseBuffer</code>.
-<p><b>Note:</b> This call is optional, as the parser will auto-generate a new
-random salt value if no value has been set at the start of parsing.</p>
+<p><b>Note:</b>This call is optional, as the parser will auto-generate
+a new random salt value if no value has been set at the start of parsing.
+<p><b>Note:</b>One should not call <code>XML_SetHashSalt</code> with a
+hash salt value of 0, as this value is used as sentinel value to indicate
+that <code>XML_SetHashSalt</code> has <b>not</b> been called. Consequently
+such a call will have no effect, even if it returns 1.</p>
</div>
<pre class="fcndec" id="XML_UseForeignDTD">
diff --git a/doc/xmlwf.1 b/doc/xmlwf.1
index 174719a709ea..06bb84cf3d79 100644
--- a/doc/xmlwf.1
+++ b/doc/xmlwf.1
@@ -1,33 +1,40 @@
-.\" This manpage has been automatically generated by docbook2man
-.\" from a DocBook document. This tool can be found at:
-.\" <http://shell.ipoline.com/~elmert/comp/docbook2X/>
-.\" Please send any bug reports, improvements, comments, patches,
-.\" etc. to Steve Cheng <steve@ggi-project.org>.
-.TH "XMLWF" "1" "24 January 2003" "" ""
+'\" -*- coding: us-ascii -*-
+.if \n(.g .ds T< \\FC
+.if \n(.g .ds T> \\F[\n[.fam]]
+.de URL
+\\$2 \(la\\$1\(ra\\$3
+..
+.if \n(.g .mso www.tmac
+.TH XMLWF 1 "March 11, 2016" "" ""
.SH NAME
xmlwf \- Determines if an XML document is well-formed
.SH SYNOPSIS
-
-\fBxmlwf\fR [ \fB-s\fR] [ \fB-n\fR] [ \fB-p\fR] [ \fB-x\fR] [ \fB-e \fIencoding\fB\fR] [ \fB-w\fR] [ \fB-d \fIoutput-dir\fB\fR] [ \fB-c\fR] [ \fB-m\fR] [ \fB-r\fR] [ \fB-t\fR] [ \fB-v\fR] [ \fBfile ...\fR]
-
-.SH "DESCRIPTION"
-.PP
+'nh
+.fi
+.ad l
+\fBxmlwf\fR \kx
+.if (\nx>(\n(.l/2)) .nr x (\n(.l/5)
+'in \n(.iu+\nxu
+[\fB-s\fR] [\fB-n\fR] [\fB-p\fR] [\fB-x\fR] [\fB-e \fIencoding\fB\fR] [\fB-w\fR] [\fB-d \fIoutput-dir\fB\fR] [\fB-c\fR] [\fB-m\fR] [\fB-r\fR] [\fB-t\fR] [\fB-v\fR] [file ...]
+'in \n(.iu-\nxu
+.ad b
+'hy
+.SH DESCRIPTION
\fBxmlwf\fR uses the Expat library to
-determine if an XML document is well-formed. It is
+determine if an XML document is well-formed. It is
non-validating.
.PP
If you do not specify any files on the command-line, and you
have a recent version of \fBxmlwf\fR, the
input file will be read from standard input.
.SH "WELL-FORMED DOCUMENTS"
-.PP
A well-formed document must adhere to the
following rules:
.TP 0.2i
\(bu
-The file begins with an XML declaration. For instance,
-<?xml version="1.0" standalone="yes"?>.
-\fBNOTE:\fR
+The file begins with an XML declaration. For instance,
+\*(T<<?xml version="1.0" standalone="yes"?>\*(T>.
+\fINOTE:\fR
\fBxmlwf\fR does not currently
check for a valid XML declaration.
.TP 0.2i
@@ -36,8 +43,8 @@ Every start tag is either empty (<tag/>)
or has a corresponding end tag.
.TP 0.2i
\(bu
-There is exactly one root element. This element must contain
-all other elements in the document. Only comments, white
+There is exactly one root element. This element must contain
+all other elements in the document. Only comments, white
space, and processing instructions may come after the close
of the root element.
.TP 0.2i
@@ -49,39 +56,38 @@ All attribute values are enclosed in quotes (either single
or double).
.PP
If the document has a DTD, and it strictly complies with that
-DTD, then the document is also considered \fBvalid\fR.
+DTD, then the document is also considered \fIvalid\fR.
\fBxmlwf\fR is a non-validating parser --
-it does not check the DTD. However, it does support
-external entities (see the \fB-x\fR option).
-.SH "OPTIONS"
-.PP
+it does not check the DTD. However, it does support
+external entities (see the \*(T<\fB\-x\fR\*(T> option).
+.SH OPTIONS
When an option includes an argument, you may specify the argument either
-separately ("\fB-d\fR output") or concatenated with the
-option ("\fB-d\fRoutput"). \fBxmlwf\fR
+separately ("\*(T<\fB\-d\fR\*(T> output") or concatenated with the
+option ("\*(T<\fB\-d\fR\*(T>output"). \fBxmlwf\fR
supports both.
-.TP
-\fB-c\fR
+.TP
+\*(T<\fB\-c\fR\*(T>
If the input file is well-formed and \fBxmlwf\fR
doesn't encounter any errors, the input file is simply copied to
the output directory unchanged.
-This implies no namespaces (turns off \fB-n\fR) and
-requires \fB-d\fR to specify an output file.
-.TP
-\fB-d output-dir\fR
+This implies no namespaces (turns off \*(T<\fB\-n\fR\*(T>) and
+requires \*(T<\fB\-d\fR\*(T> to specify an output file.
+.TP
+\*(T<\fB\-d output\-dir\fR\*(T>
Specifies a directory to contain transformed
representations of the input files.
-By default, \fB-d\fR outputs a canonical representation
+By default, \*(T<\fB\-d\fR\*(T> outputs a canonical representation
(described below).
-You can select different output formats using \fB-c\fR
-and \fB-m\fR.
+You can select different output formats using \*(T<\fB\-c\fR\*(T>
+and \*(T<\fB\-m\fR\*(T>.
The output filenames will
be exactly the same as the input filenames or "STDIN" if the input is
-coming from standard input. Therefore, you must be careful that the
+coming from standard input. Therefore, you must be careful that the
output file does not go into the same directory as the input
-file. Otherwise, \fBxmlwf\fR will delete the
+file. Otherwise, \fBxmlwf\fR will delete the
input file before it generates the output file (just like running
-cat < file > file in most shells).
+\*(T<cat < file > file\*(T> in most shells).
Two structurally equivalent XML documents have a byte-for-byte
identical canonical XML representation.
@@ -89,39 +95,39 @@ Note that ignorable white space is considered significant and
is treated equivalently to data.
More on canonical XML can be found at
http://www.jclark.com/xml/canonxml.html .
-.TP
-\fB-e encoding\fR
+.TP
+\*(T<\fB\-e encoding\fR\*(T>
Specifies the character encoding for the document, overriding
-any document encoding declaration. \fBxmlwf\fR
+any document encoding declaration. \fBxmlwf\fR
supports four built-in encodings:
-US-ASCII,
-UTF-8,
-UTF-16, and
-ISO-8859-1.
-Also see the \fB-w\fR option.
-.TP
-\fB-m\fR
+\*(T<US\-ASCII\*(T>,
+\*(T<UTF\-8\*(T>,
+\*(T<UTF\-16\*(T>, and
+\*(T<ISO\-8859\-1\*(T>.
+Also see the \*(T<\fB\-w\fR\*(T> option.
+.TP
+\*(T<\fB\-m\fR\*(T>
Outputs some strange sort of XML file that completely
describes the input file, including character positions.
-Requires \fB-d\fR to specify an output file.
-.TP
-\fB-n\fR
-Turns on namespace processing. (describe namespaces)
-\fB-c\fR disables namespaces.
-.TP
-\fB-p\fR
+Requires \*(T<\fB\-d\fR\*(T> to specify an output file.
+.TP
+\*(T<\fB\-n\fR\*(T>
+Turns on namespace processing. (describe namespaces)
+\*(T<\fB\-c\fR\*(T> disables namespaces.
+.TP
+\*(T<\fB\-p\fR\*(T>
Tells xmlwf to process external DTDs and parameter
entities.
Normally \fBxmlwf\fR never parses parameter
-entities. \fB-p\fR tells it to always parse them.
-\fB-p\fR implies \fB-x\fR.
-.TP
-\fB-r\fR
+entities. \*(T<\fB\-p\fR\*(T> tells it to always parse them.
+\*(T<\fB\-p\fR\*(T> implies \*(T<\fB\-x\fR\*(T>.
+.TP
+\*(T<\fB\-r\fR\*(T>
Normally \fBxmlwf\fR memory-maps the XML file
before parsing; this can result in faster parsing on many
platforms.
-\fB-r\fR turns off memory-mapping and uses normal file
+\*(T<\fB\-r\fR\*(T> turns off memory-mapping and uses normal file
IO calls instead.
Of course, memory-mapping is automatically turned off
when reading from standard input.
@@ -131,34 +137,33 @@ substantially higher memory usage for
\fBxmlwf\fR, but this appears to be a matter of
the operating system reporting memory in a strange way; there is
not a leak in \fBxmlwf\fR.
-.TP
-\fB-s\fR
+.TP
+\*(T<\fB\-s\fR\*(T>
Prints an error if the document is not standalone.
A document is standalone if it has no external subset and no
references to parameter entities.
-.TP
-\fB-t\fR
-Turns on timings. This tells Expat to parse the entire file,
+.TP
+\*(T<\fB\-t\fR\*(T>
+Turns on timings. This tells Expat to parse the entire file,
but not perform any processing.
This gives a fairly accurate idea of the raw speed of Expat itself
without client overhead.
-\fB-t\fR turns off most of the output options
-(\fB-d\fR, \fB-m\fR, \fB-c\fR,
-\&...).
-.TP
-\fB-v\fR
+\*(T<\fB\-t\fR\*(T> turns off most of the output options
+(\*(T<\fB\-d\fR\*(T>, \*(T<\fB\-m\fR\*(T>, \*(T<\fB\-c\fR\*(T>, ...).
+.TP
+\*(T<\fB\-v\fR\*(T>
Prints the version of the Expat library being used, including some
information on the compile-time configuration of the library, and
then exits.
-.TP
-\fB-w\fR
+.TP
+\*(T<\fB\-w\fR\*(T>
Enables support for Windows code pages.
Normally, \fBxmlwf\fR will throw an error if it
-runs across an encoding that it is not equipped to handle itself. With
-\fB-w\fR, xmlwf will try to use a Windows code
-page. See also \fB-e\fR.
-.TP
-\fB-x\fR
+runs across an encoding that it is not equipped to handle itself. With
+\*(T<\fB\-w\fR\*(T>, xmlwf will try to use a Windows code
+page. See also \*(T<\fB\-e\fR\*(T>.
+.TP
+\*(T<\fB\-x\fR\*(T>
Turns on parsing external entities.
Non-validating parsers are not required to resolve external
@@ -172,80 +177,75 @@ data from outside the XML file currently being parsed.
This is an example of an internal entity:
.nf
+
<!ENTITY vers '1.0.2'>
.fi
And here are some examples of external entities:
.nf
-<!ENTITY header SYSTEM "header-&vers;.xml"> (parsed)
+
+<!ENTITY header SYSTEM "header\-&vers;.xml"> (parsed)
<!ENTITY logo SYSTEM "logo.png" PNG> (unparsed)
.fi
-.TP
-\fB--\fR
+.TP
+\*(T<\fB\-\-\fR\*(T>
(Two hyphens.)
-Terminates the list of options. This is only needed if a filename
-starts with a hyphen. For example:
+Terminates the list of options. This is only needed if a filename
+starts with a hyphen. For example:
.nf
-xmlwf -- -myfile.xml
+
+xmlwf \-\- \-myfile.xml
.fi
will run \fBxmlwf\fR on the file
-\fI-myfile.xml\fR.
+\*(T<\fI\-myfile.xml\fR\*(T>.
.PP
Older versions of \fBxmlwf\fR do not support
reading from standard input.
-.SH "OUTPUT"
-.PP
+.SH OUTPUT
If an input file is not well-formed,
\fBxmlwf\fR prints a single line describing
-the problem to standard output. If a file is well formed,
+the problem to standard output. If a file is well formed,
\fBxmlwf\fR outputs nothing.
-Note that the result code is \fBnot\fR set.
-.SH "BUGS"
-.PP
-According to the W3C standard, an XML file without a
-declaration at the beginning is not considered well-formed.
-However, \fBxmlwf\fR allows this to pass.
-.PP
+Note that the result code is \fInot\fR set.
+.SH BUGS
\fBxmlwf\fR returns a 0 - noerr result,
-even if the file is not well-formed. There is no good way for
+even if the file is not well-formed. There is no good way for
a program to use \fBxmlwf\fR to quickly
check a file -- it must parse \fBxmlwf\fR's
standard output.
.PP
The errors should go to standard error, not standard output.
.PP
-There should be a way to get \fB-d\fR to send its
+There should be a way to get \*(T<\fB\-d\fR\*(T> to send its
output to standard output rather than forcing the user to send
it to a file.
.PP
I have no idea why anyone would want to use the
-\fB-d\fR, \fB-c\fR, and
-\fB-m\fR options. If someone could explain it to
+\*(T<\fB\-d\fR\*(T>, \*(T<\fB\-c\fR\*(T>, and
+\*(T<\fB\-m\fR\*(T> options. If someone could explain it to
me, I'd like to add this information to this manpage.
-.SH "ALTERNATIVES"
-.PP
+.SH ALTERNATIVES
Here are some XML validators on the web:
.nf
-http://www.hcrc.ed.ac.uk/~richard/xml-check.html
+
+http://www.hcrc.ed.ac.uk/~richard/xml\-check.html
http://www.stg.brown.edu/service/xmlvalid/
http://www.scripting.com/frontier5/xml/code/xmlValidator.html
http://www.xml.com/pub/a/tools/ruwf/check.html
.fi
.SH "SEE ALSO"
-.PP
-
.nf
+
The Expat home page: http://www.libexpat.org/
-The W3 XML specification: http://www.w3.org/TR/REC-xml
+The W3 XML specification: http://www.w3.org/TR/REC\-xml
.fi
-.SH "AUTHOR"
-.PP
-This manual page was written by Scott Bronson <bronson@rinspin.com> for
-the Debian GNU/Linux system (but may be used by others). Permission is
+.SH AUTHOR
+This manual page was written by Scott Bronson <\*(T<bronson@rinspin.com\*(T>> for
+the Debian GNU/Linux system (but may be used by others). Permission is
granted to copy, distribute and/or modify this document under
the terms of the GNU Free Documentation
License, Version 1.1.
diff --git a/doc/xmlwf.sgml b/doc/xmlwf.xml
index 313cfbcb210d..92ea8b592964 100644
--- a/doc/xmlwf.sgml
+++ b/doc/xmlwf.xml
@@ -1,19 +1,9 @@
-<!doctype refentry PUBLIC "-//OASIS//DTD DocBook V4.1//EN" [
-
-<!-- Process this file with docbook-to-man to generate an nroff manual
- page: `docbook-to-man manpage.sgml > manpage.1'. You may view
- the manual page with: `docbook-to-man manpage.sgml | nroff -man |
- less'. A typical entry in a Makefile or Makefile.am is:
-
-manpage.1: manpage.sgml
- docbook-to-man $< > $@
- -->
-
+<!DOCTYPE refentry [
<!-- Fill in your name for FIRSTNAME and SURNAME. -->
<!ENTITY dhfirstname "<firstname>Scott</firstname>">
<!ENTITY dhsurname "<surname>Bronson</surname>">
<!-- Please adjust the date whenever revising the manpage. -->
- <!ENTITY dhdate "<date>December 5, 2001</date>">
+ <!ENTITY dhdate "<date>March 11, 2016</date>">
<!-- SECTION should be 1-8, maybe w/ subsection other parameters are
allowed: see man(7), man(1). -->
<!ENTITY dhsection "<manvolnum>1</manvolnum>">
@@ -213,7 +203,7 @@ supports both.
<listitem>
<para>
Outputs some strange sort of XML file that completely
- describes the the input file, including character postitions.
+ describes the input file, including character positions.
Requires <option>-d</option> to specify an output file.
</para>
</listitem>
@@ -286,8 +276,7 @@ supports both.
This gives a fairly accurate idea of the raw speed of Expat itself
without client overhead.
<option>-t</option> turns off most of the output options
- (<option>-d</option>, <option>-m</option>, <option>-c</option>,
- ...).
+ (<option>-d</option>, <option>-m</option>, <option>-c</option>, ...).
</para>
</listitem>
</varlistentry>
@@ -449,20 +438,3 @@ The W3 XML specification: http://www.w3.org/TR/REC-xml
</para>
</refsect1>
</refentry>
-
-<!-- Keep this comment at the end of the file
-Local variables:
-mode: sgml
-sgml-omittag:t
-sgml-shorttag:t
-sgml-minimize-attributes:nil
-sgml-always-quote-attributes:t
-sgml-indent-step:2
-sgml-indent-data:t
-sgml-parent-document:nil
-sgml-default-dtd-file:nil
-sgml-exposed-tags:nil
-sgml-local-catalogs:nil
-sgml-local-ecat-files:nil
-End:
--->
diff --git a/examples/elements.c b/examples/elements.c
index 6b8f85501b6c..0ca1abd98a39 100644
--- a/examples/elements.c
+++ b/examples/elements.c
@@ -27,6 +27,8 @@ startElement(void *userData, const char *name, const char **atts)
{
int i;
int *depthPtr = (int *)userData;
+ (void)atts;
+
for (i = 0; i < *depthPtr; i++)
putchar('\t');
puts(name);
@@ -37,6 +39,8 @@ static void XMLCALL
endElement(void *userData, const char *name)
{
int *depthPtr = (int *)userData;
+ (void)name;
+
*depthPtr -= 1;
}
@@ -47,10 +51,13 @@ main(int argc, char *argv[])
XML_Parser parser = XML_ParserCreate(NULL);
int done;
int depth = 0;
+ (void)argc;
+ (void)argv;
+
XML_SetUserData(parser, &depth);
XML_SetElementHandler(parser, startElement, endElement);
do {
- int len = (int)fread(buf, 1, sizeof(buf), stdin);
+ size_t len = fread(buf, 1, sizeof(buf), stdin);
done = len < sizeof(buf);
if (XML_Parse(parser, buf, len, done) == XML_STATUS_ERROR) {
fprintf(stderr,
diff --git a/examples/outline.c b/examples/outline.c
index 3a3c8385512a..d9b091761cd4 100644
--- a/examples/outline.c
+++ b/examples/outline.c
@@ -49,6 +49,7 @@ static void XMLCALL
start(void *data, const char *el, const char **attr)
{
int i;
+ (void)data;
for (i = 0; i < Depth; i++)
printf(" ");
@@ -66,6 +67,9 @@ start(void *data, const char *el, const char **attr)
static void XMLCALL
end(void *data, const char *el)
{
+ (void)data;
+ (void)el;
+
Depth--;
}
@@ -73,6 +77,9 @@ int
main(int argc, char *argv[])
{
XML_Parser p = XML_ParserCreate(NULL);
+ (void)argc;
+ (void)argv;
+
if (! p) {
fprintf(stderr, "Couldn't allocate memory for parser\n");
exit(-1);
diff --git a/expat_config.h.in b/expat_config.h.in
index 8c6e51409e0a..0777a7f9bda1 100644
--- a/expat_config.h.in
+++ b/expat_config.h.in
@@ -1,4 +1,4 @@
-/* expat_config.h.in. Generated from configure.in by autoheader. */
+/* expat_config.h.in. Generated from configure.ac by autoheader. */
/* 1234 = LIL_ENDIAN, 4321 = BIGENDIAN */
#undef BYTEORDER
@@ -51,8 +51,7 @@
/* Define to 1 if you have the <unistd.h> header file. */
#undef HAVE_UNISTD_H
-/* Define to the sub-directory in which libtool stores uninstalled libraries.
- */
+/* Define to the sub-directory where libtool stores uninstalled libraries. */
#undef LT_OBJDIR
/* Define to the address where bug reports for this package should be sent. */
diff --git a/lib/expat.h b/lib/expat.h
index 9a21680be468..086e24b39c51 100644
--- a/lib/expat.h
+++ b/lib/expat.h
@@ -342,7 +342,7 @@ XML_SetEntityDeclHandler(XML_Parser parser,
XML_EntityDeclHandler handler);
/* OBSOLETE -- OBSOLETE -- OBSOLETE
- This handler has been superceded by the EntityDeclHandler above.
+ This handler has been superseded by the EntityDeclHandler above.
It is provided here for backward compatibility.
This is called for a declaration of an unparsed (NDATA) entity.
@@ -973,9 +973,12 @@ XML_FreeContentModel(XML_Parser parser, XML_Content *model);
/* Exposing the memory handling functions used in Expat */
XMLPARSEAPI(void *)
+XML_ATTR_MALLOC
+XML_ATTR_ALLOC_SIZE(2)
XML_MemMalloc(XML_Parser parser, size_t size);
XMLPARSEAPI(void *)
+XML_ATTR_ALLOC_SIZE(3)
XML_MemRealloc(XML_Parser parser, void *ptr, size_t size);
XMLPARSEAPI(void)
@@ -1031,13 +1034,11 @@ XMLPARSEAPI(const XML_Feature *)
XML_GetFeatureList(void);
-/* Expat follows the GNU/Linux convention of odd number minor version for
- beta/development releases and even number minor version for stable
- releases. Micro is bumped with each release, and set to 0 with each
- change to major or minor version.
+/* Expat follows the semantic versioning convention.
+ See http://semver.org.
*/
#define XML_MAJOR_VERSION 2
-#define XML_MINOR_VERSION 1
+#define XML_MINOR_VERSION 2
#define XML_MICRO_VERSION 0
#ifdef __cplusplus
diff --git a/lib/expat_external.h b/lib/expat_external.h
index 2c03284ea265..aa08a2f84c07 100755
--- a/lib/expat_external.h
+++ b/lib/expat_external.h
@@ -65,12 +65,26 @@
#endif
#endif /* not defined XML_STATIC */
+#if !defined(XMLIMPORT) && defined(__GNUC__) && (__GNUC__ >= 4)
+#define XMLIMPORT __attribute__ ((visibility ("default")))
+#endif
/* If we didn't define it above, define it away: */
#ifndef XMLIMPORT
#define XMLIMPORT
#endif
+#if defined(__GNUC__) && (__GNUC__ > 2 || (__GNUC__ == 2 && __GNUC_MINOR__ >= 96))
+#define XML_ATTR_MALLOC __attribute__((__malloc__))
+#else
+#define XML_ATTR_MALLOC
+#endif
+
+#if defined(__GNUC__) && ((__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3))
+#define XML_ATTR_ALLOC_SIZE(x) __attribute__((__alloc_size__(x)))
+#else
+#define XML_ATTR_ALLOC_SIZE(x)
+#endif
#define XMLPARSEAPI(type) XMLIMPORT type XMLCALL
diff --git a/lib/internal.h b/lib/internal.h
index dd5454831da2..94cb98e15cae 100644
--- a/lib/internal.h
+++ b/lib/internal.h
@@ -71,3 +71,25 @@
#define inline
#endif
#endif
+
+#ifndef UNUSED_P
+# ifdef __GNUC__
+# define UNUSED_P(p) UNUSED_ ## p __attribute__((__unused__))
+# else
+# define UNUSED_P(p) UNUSED_ ## p
+# endif
+#endif
+
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+
+void
+align_limit_to_full_utf8_characters(const char * from, const char ** fromLimRef);
+
+
+#ifdef __cplusplus
+}
+#endif
diff --git a/lib/xmlparse.c b/lib/xmlparse.c
index f35aa36ba8a7..b308e67e2d39 100644
--- a/lib/xmlparse.c
+++ b/lib/xmlparse.c
@@ -6,11 +6,18 @@
#include <string.h> /* memset(), memcpy() */
#include <assert.h>
#include <limits.h> /* UINT_MAX */
-#include <time.h> /* time() */
+
+#ifdef WIN32
+#define getpid GetCurrentProcessId
+#else
+#include <sys/time.h> /* gettimeofday() */
+#include <sys/types.h> /* getpid() */
+#include <unistd.h> /* getpid() */
+#endif
#define XML_BUILDING_EXPAT 1
-#ifdef COMPILED_FROM_DSP
+#ifdef WIN32
#include "winconfig.h"
#elif defined(MACOS_CLASSIC)
#include "macconfig.h"
@@ -20,7 +27,7 @@
#include "watcomconfig.h"
#elif defined(HAVE_EXPAT_CONFIG_H)
#include <expat_config.h>
-#endif /* ndef COMPILED_FROM_DSP */
+#endif /* ndef WIN32 */
#include "ascii.h"
#include "expat.h"
@@ -432,7 +439,7 @@ static ELEMENT_TYPE *
getElementType(XML_Parser parser, const ENCODING *enc,
const char *ptr, const char *end);
-static unsigned long generate_hash_secret_salt(void);
+static unsigned long generate_hash_secret_salt(XML_Parser parser);
static XML_Bool startParsing(XML_Parser parser);
static XML_Parser
@@ -691,11 +698,38 @@ static const XML_Char implicitContext[] = {
};
static unsigned long
-generate_hash_secret_salt(void)
+gather_time_entropy(void)
{
- unsigned int seed = time(NULL) % UINT_MAX;
- srand(seed);
- return rand();
+#ifdef WIN32
+ FILETIME ft;
+ GetSystemTimeAsFileTime(&ft); /* never fails */
+ return ft.dwHighDateTime ^ ft.dwLowDateTime;
+#else
+ struct timeval tv;
+ int gettimeofday_res;
+
+ gettimeofday_res = gettimeofday(&tv, NULL);
+ assert (gettimeofday_res == 0);
+
+ /* Microseconds time is <20 bits entropy */
+ return tv.tv_usec;
+#endif
+}
+
+static unsigned long
+generate_hash_secret_salt(XML_Parser parser)
+{
+ /* Process ID is 0 bits entropy if attacker has local access
+ * XML_Parser address is few bits of entropy if attacker has local access */
+ const unsigned long entropy =
+ gather_time_entropy() ^ getpid() ^ (unsigned long)parser;
+
+ /* Factors are 2^31-1 and 2^61-1 (Mersenne primes M31 and M61) */
+ if (sizeof(unsigned long) == 4) {
+ return entropy * 2147483647;
+ } else {
+ return entropy * (unsigned long)2305843009213693951;
+ }
}
static XML_Bool /* only valid for root parser */
@@ -703,7 +737,7 @@ startParsing(XML_Parser parser)
{
/* hash functions must be initialized before setContext() is called */
if (hash_secret_salt == 0)
- hash_secret_salt = generate_hash_secret_salt();
+ hash_secret_salt = generate_hash_secret_salt(parser);
if (ns) {
/* implicit context only set for root parser, since child
parsers (i.e. external entity parsers) will inherit it
@@ -1550,7 +1584,7 @@ XML_Parse(XML_Parser parser, const char *s, int len, int isFinal)
else if (bufferPtr == bufferEnd) {
const char *end;
int nLeftOver;
- enum XML_Error result;
+ enum XML_Status result;
parseEndByteIndex += len;
positionPtr = s;
ps_finalBuffer = (XML_Bool)isFinal;
@@ -1678,6 +1712,10 @@ XML_ParseBuffer(XML_Parser parser, int len, int isFinal)
void * XMLCALL
XML_GetBuffer(XML_Parser parser, int len)
{
+ if (len < 0) {
+ errorCode = XML_ERROR_NO_MEMORY;
+ return NULL;
+ }
switch (ps_parsing) {
case XML_SUSPENDED:
errorCode = XML_ERROR_SUSPENDED;
@@ -1689,11 +1727,17 @@ XML_GetBuffer(XML_Parser parser, int len)
}
if (len > bufferLim - bufferEnd) {
- /* FIXME avoid integer overflow */
- int neededSize = len + (int)(bufferEnd - bufferPtr);
#ifdef XML_CONTEXT_BYTES
- int keep = (int)(bufferPtr - buffer);
-
+ int keep;
+#endif /* defined XML_CONTEXT_BYTES */
+ /* Do not invoke signed arithmetic overflow: */
+ int neededSize = (int) ((unsigned)len + (unsigned)(bufferEnd - bufferPtr));
+ if (neededSize < 0) {
+ errorCode = XML_ERROR_NO_MEMORY;
+ return NULL;
+ }
+#ifdef XML_CONTEXT_BYTES
+ keep = (int)(bufferPtr - buffer);
if (keep > XML_CONTEXT_BYTES)
keep = XML_CONTEXT_BYTES;
neededSize += keep;
@@ -1718,8 +1762,13 @@ XML_GetBuffer(XML_Parser parser, int len)
if (bufferSize == 0)
bufferSize = INIT_BUFFER_SIZE;
do {
- bufferSize *= 2;
- } while (bufferSize < neededSize);
+ /* Do not invoke signed arithmetic overflow: */
+ bufferSize = (int) (2U * (unsigned) bufferSize);
+ } while (bufferSize < neededSize && bufferSize > 0);
+ if (bufferSize <= 0) {
+ errorCode = XML_ERROR_NO_MEMORY;
+ return NULL;
+ }
newBuf = (char *)MALLOC(bufferSize);
if (newBuf == 0) {
errorCode = XML_ERROR_NO_MEMORY;
@@ -1841,7 +1890,7 @@ XML_Index XMLCALL
XML_GetCurrentByteIndex(XML_Parser parser)
{
if (eventPtr)
- return parseEndByteIndex - (parseEndPtr - eventPtr);
+ return (XML_Index)(parseEndByteIndex - (parseEndPtr - eventPtr));
return -1;
}
@@ -2415,11 +2464,11 @@ doContent(XML_Parser parser,
for (;;) {
int bufSize;
int convLen;
- XmlConvert(enc,
+ const enum XML_Convert_Result convert_res = XmlConvert(enc,
&fromPtr, rawNameEnd,
(ICHAR **)&toPtr, (ICHAR *)tag->bufEnd - 1);
convLen = (int)(toPtr - (XML_Char *)tag->buf);
- if (fromPtr == rawNameEnd) {
+ if ((convert_res == XML_CONVERT_COMPLETED) || (convert_res == XML_CONVERT_INPUT_INCOMPLETE)) {
tag->name.strLen = convLen;
break;
}
@@ -2640,11 +2689,11 @@ doContent(XML_Parser parser,
if (MUST_CONVERT(enc, s)) {
for (;;) {
ICHAR *dataPtr = (ICHAR *)dataBuf;
- XmlConvert(enc, &s, next, &dataPtr, (ICHAR *)dataBufEnd);
+ const enum XML_Convert_Result convert_res = XmlConvert(enc, &s, next, &dataPtr, (ICHAR *)dataBufEnd);
*eventEndPP = s;
charDataHandler(handlerArg, dataBuf,
(int)(dataPtr - (ICHAR *)dataBuf));
- if (s == next)
+ if ((convert_res == XML_CONVERT_COMPLETED) || (convert_res == XML_CONVERT_INPUT_INCOMPLETE))
break;
*eventPP = s;
}
@@ -2911,6 +2960,8 @@ storeAtts(XML_Parser parser, const ENCODING *enc,
unsigned long uriHash = hash_secret_salt;
((XML_Char *)s)[-1] = 0; /* clear flag */
id = (ATTRIBUTE_ID *)lookup(parser, &dtd->attributeIds, s, 0);
+ if (!id || !id->prefix)
+ return XML_ERROR_NO_MEMORY;
b = id->prefix->binding;
if (!b)
return XML_ERROR_UNBOUND_PREFIX;
@@ -3248,11 +3299,11 @@ doCdataSection(XML_Parser parser,
if (MUST_CONVERT(enc, s)) {
for (;;) {
ICHAR *dataPtr = (ICHAR *)dataBuf;
- XmlConvert(enc, &s, next, &dataPtr, (ICHAR *)dataBufEnd);
+ const enum XML_Convert_Result convert_res = XmlConvert(enc, &s, next, &dataPtr, (ICHAR *)dataBufEnd);
*eventEndPP = next;
charDataHandler(handlerArg, dataBuf,
(int)(dataPtr - (ICHAR *)dataBuf));
- if (s == next)
+ if ((convert_res == XML_CONVERT_COMPLETED) || (convert_res == XML_CONVERT_INPUT_INCOMPLETE))
break;
*eventPP = s;
}
@@ -4911,9 +4962,9 @@ internalEntityProcessor(XML_Parser parser,
static enum XML_Error PTRCALL
errorProcessor(XML_Parser parser,
- const char *s,
- const char *end,
- const char **nextPtr)
+ const char *UNUSED_P(s),
+ const char *UNUSED_P(end),
+ const char **UNUSED_P(nextPtr))
{
return errorCode;
}
@@ -5329,6 +5380,7 @@ reportDefault(XML_Parser parser, const ENCODING *enc,
const char *s, const char *end)
{
if (MUST_CONVERT(enc, s)) {
+ enum XML_Convert_Result convert_res;
const char **eventPP;
const char **eventEndPP;
if (enc == encoding) {
@@ -5341,11 +5393,11 @@ reportDefault(XML_Parser parser, const ENCODING *enc,
}
do {
ICHAR *dataPtr = (ICHAR *)dataBuf;
- XmlConvert(enc, &s, end, &dataPtr, (ICHAR *)dataBufEnd);
+ convert_res = XmlConvert(enc, &s, end, &dataPtr, (ICHAR *)dataBufEnd);
*eventEndPP = s;
defaultHandler(handlerArg, dataBuf, (int)(dataPtr - (ICHAR *)dataBuf));
*eventPP = s;
- } while (s != end);
+ } while ((convert_res != XML_CONVERT_COMPLETED) && (convert_res != XML_CONVERT_INPUT_INCOMPLETE));
}
else
defaultHandler(handlerArg, (XML_Char *)s, (int)((XML_Char *)end - (XML_Char *)s));
@@ -5475,6 +5527,8 @@ getAttributeId(XML_Parser parser, const ENCODING *enc,
return NULL;
id->prefix = (PREFIX *)lookup(parser, &dtd->prefixes, poolStart(&dtd->pool),
sizeof(PREFIX));
+ if (!id->prefix)
+ return NULL;
if (id->prefix->name == poolStart(&dtd->pool))
poolFinish(&dtd->pool);
else
@@ -6148,8 +6202,8 @@ poolAppend(STRING_POOL *pool, const ENCODING *enc,
if (!pool->ptr && !poolGrow(pool))
return NULL;
for (;;) {
- XmlConvert(enc, &ptr, end, (ICHAR **)&(pool->ptr), (ICHAR *)pool->end);
- if (ptr == end)
+ const enum XML_Convert_Result convert_res = XmlConvert(enc, &ptr, end, (ICHAR **)&(pool->ptr), (ICHAR *)pool->end);
+ if ((convert_res == XML_CONVERT_COMPLETED) || (convert_res == XML_CONVERT_INPUT_INCOMPLETE))
break;
if (!poolGrow(pool))
return NULL;
@@ -6233,8 +6287,13 @@ poolGrow(STRING_POOL *pool)
}
}
if (pool->blocks && pool->start == pool->blocks->s) {
- int blockSize = (int)(pool->end - pool->start)*2;
- BLOCK *temp = (BLOCK *)
+ BLOCK *temp;
+ int blockSize = (int)((unsigned)(pool->end - pool->start)*2U);
+
+ if (blockSize < 0)
+ return XML_FALSE;
+
+ temp = (BLOCK *)
pool->mem->realloc_fcn(pool->blocks,
(offsetof(BLOCK, s)
+ blockSize * sizeof(XML_Char)));
@@ -6249,6 +6308,10 @@ poolGrow(STRING_POOL *pool)
else {
BLOCK *tem;
int blockSize = (int)(pool->end - pool->start);
+
+ if (blockSize < 0)
+ return XML_FALSE;
+
if (blockSize < INIT_BLOCK_SIZE)
blockSize = INIT_BLOCK_SIZE;
else
diff --git a/lib/xmlrole.c b/lib/xmlrole.c
index 44772e21dd33..fcd0dc6948f4 100644
--- a/lib/xmlrole.c
+++ b/lib/xmlrole.c
@@ -4,7 +4,7 @@
#include <stddef.h>
-#ifdef COMPILED_FROM_DSP
+#ifdef WIN32
#include "winconfig.h"
#elif defined(MACOS_CLASSIC)
#include "macconfig.h"
@@ -16,7 +16,7 @@
#ifdef HAVE_EXPAT_CONFIG_H
#include <expat_config.h>
#endif
-#endif /* ndef COMPILED_FROM_DSP */
+#endif /* ndef WIN32 */
#include "expat_external.h"
#include "internal.h"
@@ -195,9 +195,9 @@ prolog1(PROLOG_STATE *state,
static int PTRCALL
prolog2(PROLOG_STATE *state,
int tok,
- const char *ptr,
- const char *end,
- const ENCODING *enc)
+ const char *UNUSED_P(ptr),
+ const char *UNUSED_P(end),
+ const ENCODING *UNUSED_P(enc))
{
switch (tok) {
case XML_TOK_PROLOG_S:
@@ -216,9 +216,9 @@ prolog2(PROLOG_STATE *state,
static int PTRCALL
doctype0(PROLOG_STATE *state,
int tok,
- const char *ptr,
- const char *end,
- const ENCODING *enc)
+ const char *UNUSED_P(ptr),
+ const char *UNUSED_P(end),
+ const ENCODING *UNUSED_P(enc))
{
switch (tok) {
case XML_TOK_PROLOG_S:
@@ -264,9 +264,9 @@ doctype1(PROLOG_STATE *state,
static int PTRCALL
doctype2(PROLOG_STATE *state,
int tok,
- const char *ptr,
- const char *end,
- const ENCODING *enc)
+ const char *UNUSED_P(ptr),
+ const char *UNUSED_P(end),
+ const ENCODING *UNUSED_P(enc))
{
switch (tok) {
case XML_TOK_PROLOG_S:
@@ -281,9 +281,9 @@ doctype2(PROLOG_STATE *state,
static int PTRCALL
doctype3(PROLOG_STATE *state,
int tok,
- const char *ptr,
- const char *end,
- const ENCODING *enc)
+ const char *UNUSED_P(ptr),
+ const char *UNUSED_P(end),
+ const ENCODING *UNUSED_P(enc))
{
switch (tok) {
case XML_TOK_PROLOG_S:
@@ -298,9 +298,9 @@ doctype3(PROLOG_STATE *state,
static int PTRCALL
doctype4(PROLOG_STATE *state,
int tok,
- const char *ptr,
- const char *end,
- const ENCODING *enc)
+ const char *UNUSED_P(ptr),
+ const char *UNUSED_P(end),
+ const ENCODING *UNUSED_P(enc))
{
switch (tok) {
case XML_TOK_PROLOG_S:
@@ -318,9 +318,9 @@ doctype4(PROLOG_STATE *state,
static int PTRCALL
doctype5(PROLOG_STATE *state,
int tok,
- const char *ptr,
- const char *end,
- const ENCODING *enc)
+ const char *UNUSED_P(ptr),
+ const char *UNUSED_P(end),
+ const ENCODING *UNUSED_P(enc))
{
switch (tok) {
case XML_TOK_PROLOG_S:
@@ -437,9 +437,9 @@ externalSubset1(PROLOG_STATE *state,
static int PTRCALL
entity0(PROLOG_STATE *state,
int tok,
- const char *ptr,
- const char *end,
- const ENCODING *enc)
+ const char *UNUSED_P(ptr),
+ const char *UNUSED_P(end),
+ const ENCODING *UNUSED_P(enc))
{
switch (tok) {
case XML_TOK_PROLOG_S:
@@ -457,9 +457,9 @@ entity0(PROLOG_STATE *state,
static int PTRCALL
entity1(PROLOG_STATE *state,
int tok,
- const char *ptr,
- const char *end,
- const ENCODING *enc)
+ const char *UNUSED_P(ptr),
+ const char *UNUSED_P(end),
+ const ENCODING *UNUSED_P(enc))
{
switch (tok) {
case XML_TOK_PROLOG_S:
@@ -502,9 +502,9 @@ entity2(PROLOG_STATE *state,
static int PTRCALL
entity3(PROLOG_STATE *state,
int tok,
- const char *ptr,
- const char *end,
- const ENCODING *enc)
+ const char *UNUSED_P(ptr),
+ const char *UNUSED_P(end),
+ const ENCODING *UNUSED_P(enc))
{
switch (tok) {
case XML_TOK_PROLOG_S:
@@ -519,9 +519,9 @@ entity3(PROLOG_STATE *state,
static int PTRCALL
entity4(PROLOG_STATE *state,
int tok,
- const char *ptr,
- const char *end,
- const ENCODING *enc)
+ const char *UNUSED_P(ptr),
+ const char *UNUSED_P(end),
+ const ENCODING *UNUSED_P(enc))
{
switch (tok) {
case XML_TOK_PROLOG_S:
@@ -559,9 +559,9 @@ entity5(PROLOG_STATE *state,
static int PTRCALL
entity6(PROLOG_STATE *state,
int tok,
- const char *ptr,
- const char *end,
- const ENCODING *enc)
+ const char *UNUSED_P(ptr),
+ const char *UNUSED_P(end),
+ const ENCODING *UNUSED_P(enc))
{
switch (tok) {
case XML_TOK_PROLOG_S:
@@ -605,9 +605,9 @@ entity7(PROLOG_STATE *state,
static int PTRCALL
entity8(PROLOG_STATE *state,
int tok,
- const char *ptr,
- const char *end,
- const ENCODING *enc)
+ const char *UNUSED_P(ptr),
+ const char *UNUSED_P(end),
+ const ENCODING *UNUSED_P(enc))
{
switch (tok) {
case XML_TOK_PROLOG_S:
@@ -622,9 +622,9 @@ entity8(PROLOG_STATE *state,
static int PTRCALL
entity9(PROLOG_STATE *state,
int tok,
- const char *ptr,
- const char *end,
- const ENCODING *enc)
+ const char *UNUSED_P(ptr),
+ const char *UNUSED_P(end),
+ const ENCODING *UNUSED_P(enc))
{
switch (tok) {
case XML_TOK_PROLOG_S:
@@ -639,9 +639,9 @@ entity9(PROLOG_STATE *state,
static int PTRCALL
entity10(PROLOG_STATE *state,
int tok,
- const char *ptr,
- const char *end,
- const ENCODING *enc)
+ const char *UNUSED_P(ptr),
+ const char *UNUSED_P(end),
+ const ENCODING *UNUSED_P(enc))
{
switch (tok) {
case XML_TOK_PROLOG_S:
@@ -656,9 +656,9 @@ entity10(PROLOG_STATE *state,
static int PTRCALL
notation0(PROLOG_STATE *state,
int tok,
- const char *ptr,
- const char *end,
- const ENCODING *enc)
+ const char *UNUSED_P(ptr),
+ const char *UNUSED_P(end),
+ const ENCODING *UNUSED_P(enc))
{
switch (tok) {
case XML_TOK_PROLOG_S:
@@ -697,9 +697,9 @@ notation1(PROLOG_STATE *state,
static int PTRCALL
notation2(PROLOG_STATE *state,
int tok,
- const char *ptr,
- const char *end,
- const ENCODING *enc)
+ const char *UNUSED_P(ptr),
+ const char *UNUSED_P(end),
+ const ENCODING *UNUSED_P(enc))
{
switch (tok) {
case XML_TOK_PROLOG_S:
@@ -714,9 +714,9 @@ notation2(PROLOG_STATE *state,
static int PTRCALL
notation3(PROLOG_STATE *state,
int tok,
- const char *ptr,
- const char *end,
- const ENCODING *enc)
+ const char *UNUSED_P(ptr),
+ const char *UNUSED_P(end),
+ const ENCODING *UNUSED_P(enc))
{
switch (tok) {
case XML_TOK_PROLOG_S:
@@ -732,9 +732,9 @@ notation3(PROLOG_STATE *state,
static int PTRCALL
notation4(PROLOG_STATE *state,
int tok,
- const char *ptr,
- const char *end,
- const ENCODING *enc)
+ const char *UNUSED_P(ptr),
+ const char *UNUSED_P(end),
+ const ENCODING *UNUSED_P(enc))
{
switch (tok) {
case XML_TOK_PROLOG_S:
@@ -753,9 +753,9 @@ notation4(PROLOG_STATE *state,
static int PTRCALL
attlist0(PROLOG_STATE *state,
int tok,
- const char *ptr,
- const char *end,
- const ENCODING *enc)
+ const char *UNUSED_P(ptr),
+ const char *UNUSED_P(end),
+ const ENCODING *UNUSED_P(enc))
{
switch (tok) {
case XML_TOK_PROLOG_S:
@@ -771,9 +771,9 @@ attlist0(PROLOG_STATE *state,
static int PTRCALL
attlist1(PROLOG_STATE *state,
int tok,
- const char *ptr,
- const char *end,
- const ENCODING *enc)
+ const char *UNUSED_P(ptr),
+ const char *UNUSED_P(end),
+ const ENCODING *UNUSED_P(enc))
{
switch (tok) {
case XML_TOK_PROLOG_S:
@@ -833,9 +833,9 @@ attlist2(PROLOG_STATE *state,
static int PTRCALL
attlist3(PROLOG_STATE *state,
int tok,
- const char *ptr,
- const char *end,
- const ENCODING *enc)
+ const char *UNUSED_P(ptr),
+ const char *UNUSED_P(end),
+ const ENCODING *UNUSED_P(enc))
{
switch (tok) {
case XML_TOK_PROLOG_S:
@@ -852,9 +852,9 @@ attlist3(PROLOG_STATE *state,
static int PTRCALL
attlist4(PROLOG_STATE *state,
int tok,
- const char *ptr,
- const char *end,
- const ENCODING *enc)
+ const char *UNUSED_P(ptr),
+ const char *UNUSED_P(end),
+ const ENCODING *UNUSED_P(enc))
{
switch (tok) {
case XML_TOK_PROLOG_S:
@@ -872,9 +872,9 @@ attlist4(PROLOG_STATE *state,
static int PTRCALL
attlist5(PROLOG_STATE *state,
int tok,
- const char *ptr,
- const char *end,
- const ENCODING *enc)
+ const char *UNUSED_P(ptr),
+ const char *UNUSED_P(end),
+ const ENCODING *UNUSED_P(enc))
{
switch (tok) {
case XML_TOK_PROLOG_S:
@@ -889,9 +889,9 @@ attlist5(PROLOG_STATE *state,
static int PTRCALL
attlist6(PROLOG_STATE *state,
int tok,
- const char *ptr,
- const char *end,
- const ENCODING *enc)
+ const char *UNUSED_P(ptr),
+ const char *UNUSED_P(end),
+ const ENCODING *UNUSED_P(enc))
{
switch (tok) {
case XML_TOK_PROLOG_S:
@@ -906,9 +906,9 @@ attlist6(PROLOG_STATE *state,
static int PTRCALL
attlist7(PROLOG_STATE *state,
int tok,
- const char *ptr,
- const char *end,
- const ENCODING *enc)
+ const char *UNUSED_P(ptr),
+ const char *UNUSED_P(end),
+ const ENCODING *UNUSED_P(enc))
{
switch (tok) {
case XML_TOK_PROLOG_S:
@@ -967,9 +967,9 @@ attlist8(PROLOG_STATE *state,
static int PTRCALL
attlist9(PROLOG_STATE *state,
int tok,
- const char *ptr,
- const char *end,
- const ENCODING *enc)
+ const char *UNUSED_P(ptr),
+ const char *UNUSED_P(end),
+ const ENCODING *UNUSED_P(enc))
{
switch (tok) {
case XML_TOK_PROLOG_S:
@@ -984,9 +984,9 @@ attlist9(PROLOG_STATE *state,
static int PTRCALL
element0(PROLOG_STATE *state,
int tok,
- const char *ptr,
- const char *end,
- const ENCODING *enc)
+ const char *UNUSED_P(ptr),
+ const char *UNUSED_P(end),
+ const ENCODING *UNUSED_P(enc))
{
switch (tok) {
case XML_TOK_PROLOG_S:
@@ -1072,9 +1072,9 @@ element2(PROLOG_STATE *state,
static int PTRCALL
element3(PROLOG_STATE *state,
int tok,
- const char *ptr,
- const char *end,
- const ENCODING *enc)
+ const char *UNUSED_P(ptr),
+ const char *UNUSED_P(end),
+ const ENCODING *UNUSED_P(enc))
{
switch (tok) {
case XML_TOK_PROLOG_S:
@@ -1097,9 +1097,9 @@ element3(PROLOG_STATE *state,
static int PTRCALL
element4(PROLOG_STATE *state,
int tok,
- const char *ptr,
- const char *end,
- const ENCODING *enc)
+ const char *UNUSED_P(ptr),
+ const char *UNUSED_P(end),
+ const ENCODING *UNUSED_P(enc))
{
switch (tok) {
case XML_TOK_PROLOG_S:
@@ -1115,9 +1115,9 @@ element4(PROLOG_STATE *state,
static int PTRCALL
element5(PROLOG_STATE *state,
int tok,
- const char *ptr,
- const char *end,
- const ENCODING *enc)
+ const char *UNUSED_P(ptr),
+ const char *UNUSED_P(end),
+ const ENCODING *UNUSED_P(enc))
{
switch (tok) {
case XML_TOK_PROLOG_S:
@@ -1136,9 +1136,9 @@ element5(PROLOG_STATE *state,
static int PTRCALL
element6(PROLOG_STATE *state,
int tok,
- const char *ptr,
- const char *end,
- const ENCODING *enc)
+ const char *UNUSED_P(ptr),
+ const char *UNUSED_P(end),
+ const ENCODING *UNUSED_P(enc))
{
switch (tok) {
case XML_TOK_PROLOG_S:
@@ -1166,9 +1166,9 @@ element6(PROLOG_STATE *state,
static int PTRCALL
element7(PROLOG_STATE *state,
int tok,
- const char *ptr,
- const char *end,
- const ENCODING *enc)
+ const char *UNUSED_P(ptr),
+ const char *UNUSED_P(end),
+ const ENCODING *UNUSED_P(enc))
{
switch (tok) {
case XML_TOK_PROLOG_S:
@@ -1240,9 +1240,9 @@ condSect0(PROLOG_STATE *state,
static int PTRCALL
condSect1(PROLOG_STATE *state,
int tok,
- const char *ptr,
- const char *end,
- const ENCODING *enc)
+ const char *UNUSED_P(ptr),
+ const char *UNUSED_P(end),
+ const ENCODING *UNUSED_P(enc))
{
switch (tok) {
case XML_TOK_PROLOG_S:
@@ -1258,9 +1258,9 @@ condSect1(PROLOG_STATE *state,
static int PTRCALL
condSect2(PROLOG_STATE *state,
int tok,
- const char *ptr,
- const char *end,
- const ENCODING *enc)
+ const char *UNUSED_P(ptr),
+ const char *UNUSED_P(end),
+ const ENCODING *UNUSED_P(enc))
{
switch (tok) {
case XML_TOK_PROLOG_S:
@@ -1277,9 +1277,9 @@ condSect2(PROLOG_STATE *state,
static int PTRCALL
declClose(PROLOG_STATE *state,
int tok,
- const char *ptr,
- const char *end,
- const ENCODING *enc)
+ const char *UNUSED_P(ptr),
+ const char *UNUSED_P(end),
+ const ENCODING *UNUSED_P(enc))
{
switch (tok) {
case XML_TOK_PROLOG_S:
@@ -1292,11 +1292,11 @@ declClose(PROLOG_STATE *state,
}
static int PTRCALL
-error(PROLOG_STATE *state,
- int tok,
- const char *ptr,
- const char *end,
- const ENCODING *enc)
+error(PROLOG_STATE *UNUSED_P(state),
+ int UNUSED_P(tok),
+ const char *UNUSED_P(ptr),
+ const char *UNUSED_P(end),
+ const ENCODING *UNUSED_P(enc))
{
return XML_ROLE_NONE;
}
diff --git a/lib/xmltok.c b/lib/xmltok.c
index bf09dfc72b9f..a29d9e2f8ac3 100644
--- a/lib/xmltok.c
+++ b/lib/xmltok.c
@@ -4,7 +4,7 @@
#include <stddef.h>
-#ifdef COMPILED_FROM_DSP
+#ifdef WIN32
#include "winconfig.h"
#elif defined(MACOS_CLASSIC)
#include "macconfig.h"
@@ -16,7 +16,7 @@
#ifdef HAVE_EXPAT_CONFIG_H
#include <expat_config.h>
#endif
-#endif /* ndef COMPILED_FROM_DSP */
+#endif /* ndef WIN32 */
#include "expat_external.h"
#include "internal.h"
@@ -46,7 +46,7 @@
#define VTABLE VTABLE1, PREFIX(toUtf8), PREFIX(toUtf16)
#define UCS2_GET_NAMING(pages, hi, lo) \
- (namingBitmap[(pages[hi] << 3) + ((lo) >> 5)] & (1 << ((lo) & 0x1F)))
+ (namingBitmap[(pages[hi] << 3) + ((lo) >> 5)] & (1u << ((lo) & 0x1F)))
/* A 2 byte UTF-8 representation splits the characters 11 bits between
the bottom 5 and 6 bits of the bytes. We need 8 bits to index into
@@ -56,7 +56,7 @@
(namingBitmap[((pages)[(((byte)[0]) >> 2) & 7] << 3) \
+ ((((byte)[0]) & 3) << 1) \
+ ((((byte)[1]) >> 5) & 1)] \
- & (1 << (((byte)[1]) & 0x1F)))
+ & (1u << (((byte)[1]) & 0x1F)))
/* A 3 byte UTF-8 representation splits the characters 16 bits between
the bottom 4, 6 and 6 bits of the bytes. We need 8 bits to index
@@ -69,7 +69,7 @@
<< 3) \
+ ((((byte)[1]) & 3) << 1) \
+ ((((byte)[2]) >> 5) & 1)] \
- & (1 << (((byte)[2]) & 0x1F)))
+ & (1u << (((byte)[2]) & 0x1F)))
#define UTF8_GET_NAMING(pages, p, n) \
((n) == 2 \
@@ -122,19 +122,19 @@
((*p) == 0xF4 ? (p)[1] > 0x8F : ((p)[1] & 0xC0) == 0xC0)))
static int PTRFASTCALL
-isNever(const ENCODING *enc, const char *p)
+isNever(const ENCODING *UNUSED_P(enc), const char *UNUSED_P(p))
{
return 0;
}
static int PTRFASTCALL
-utf8_isName2(const ENCODING *enc, const char *p)
+utf8_isName2(const ENCODING *UNUSED_P(enc), const char *p)
{
return UTF8_GET_NAMING2(namePages, (const unsigned char *)p);
}
static int PTRFASTCALL
-utf8_isName3(const ENCODING *enc, const char *p)
+utf8_isName3(const ENCODING *UNUSED_P(enc), const char *p)
{
return UTF8_GET_NAMING3(namePages, (const unsigned char *)p);
}
@@ -142,13 +142,13 @@ utf8_isName3(const ENCODING *enc, const char *p)
#define utf8_isName4 isNever
static int PTRFASTCALL
-utf8_isNmstrt2(const ENCODING *enc, const char *p)
+utf8_isNmstrt2(const ENCODING *UNUSED_P(enc), const char *p)
{
return UTF8_GET_NAMING2(nmstrtPages, (const unsigned char *)p);
}
static int PTRFASTCALL
-utf8_isNmstrt3(const ENCODING *enc, const char *p)
+utf8_isNmstrt3(const ENCODING *UNUSED_P(enc), const char *p)
{
return UTF8_GET_NAMING3(nmstrtPages, (const unsigned char *)p);
}
@@ -156,19 +156,19 @@ utf8_isNmstrt3(const ENCODING *enc, const char *p)
#define utf8_isNmstrt4 isNever
static int PTRFASTCALL
-utf8_isInvalid2(const ENCODING *enc, const char *p)
+utf8_isInvalid2(const ENCODING *UNUSED_P(enc), const char *p)
{
return UTF8_INVALID2((const unsigned char *)p);
}
static int PTRFASTCALL
-utf8_isInvalid3(const ENCODING *enc, const char *p)
+utf8_isInvalid3(const ENCODING *UNUSED_P(enc), const char *p)
{
return UTF8_INVALID3((const unsigned char *)p);
}
static int PTRFASTCALL
-utf8_isInvalid4(const ENCODING *enc, const char *p)
+utf8_isInvalid4(const ENCODING *UNUSED_P(enc), const char *p)
{
return UTF8_INVALID4((const unsigned char *)p);
}
@@ -222,6 +222,17 @@ struct normal_encoding {
E ## isInvalid3, \
E ## isInvalid4
+#define NULL_VTABLE \
+ /* isName2 */ NULL, \
+ /* isName3 */ NULL, \
+ /* isName4 */ NULL, \
+ /* isNmstrt2 */ NULL, \
+ /* isNmstrt3 */ NULL, \
+ /* isNmstrt4 */ NULL, \
+ /* isInvalid2 */ NULL, \
+ /* isInvalid3 */ NULL, \
+ /* isInvalid4 */ NULL
+
static int FASTCALL checkCharRefNumber(int);
#include "xmltok_impl.h"
@@ -318,39 +329,89 @@ enum { /* UTF8_cvalN is value of masked first byte of N byte sequence */
UTF8_cval4 = 0xf0
};
-static void PTRCALL
-utf8_toUtf8(const ENCODING *enc,
+void
+align_limit_to_full_utf8_characters(const char * from, const char ** fromLimRef)
+{
+ const char * fromLim = *fromLimRef;
+ size_t walked = 0;
+ for (; fromLim > from; fromLim--, walked++) {
+ const unsigned char prev = (unsigned char)fromLim[-1];
+ if ((prev & 0xf8u) == 0xf0u) { /* 4-byte character, lead by 0b11110xxx byte */
+ if (walked + 1 >= 4) {
+ fromLim += 4 - 1;
+ break;
+ } else {
+ walked = 0;
+ }
+ } else if ((prev & 0xf0u) == 0xe0u) { /* 3-byte character, lead by 0b1110xxxx byte */
+ if (walked + 1 >= 3) {
+ fromLim += 3 - 1;
+ break;
+ } else {
+ walked = 0;
+ }
+ } else if ((prev & 0xe0u) == 0xc0u) { /* 2-byte character, lead by 0b110xxxxx byte */
+ if (walked + 1 >= 2) {
+ fromLim += 2 - 1;
+ break;
+ } else {
+ walked = 0;
+ }
+ } else if ((prev & 0x80u) == 0x00u) { /* 1-byte character, matching 0b0xxxxxxx */
+ break;
+ }
+ }
+ *fromLimRef = fromLim;
+}
+
+static enum XML_Convert_Result PTRCALL
+utf8_toUtf8(const ENCODING *UNUSED_P(enc),
const char **fromP, const char *fromLim,
char **toP, const char *toLim)
{
+ enum XML_Convert_Result res = XML_CONVERT_COMPLETED;
char *to;
const char *from;
if (fromLim - *fromP > toLim - *toP) {
/* Avoid copying partial characters. */
- for (fromLim = *fromP + (toLim - *toP); fromLim > *fromP; fromLim--)
- if (((unsigned char)fromLim[-1] & 0xc0) != 0x80)
- break;
+ res = XML_CONVERT_OUTPUT_EXHAUSTED;
+ fromLim = *fromP + (toLim - *toP);
+ align_limit_to_full_utf8_characters(*fromP, &fromLim);
}
- for (to = *toP, from = *fromP; from != fromLim; from++, to++)
+ for (to = *toP, from = *fromP; (from < fromLim) && (to < toLim); from++, to++)
*to = *from;
*fromP = from;
*toP = to;
+
+ if ((to == toLim) && (from < fromLim))
+ return XML_CONVERT_OUTPUT_EXHAUSTED;
+ else
+ return res;
}
-static void PTRCALL
+static enum XML_Convert_Result PTRCALL
utf8_toUtf16(const ENCODING *enc,
const char **fromP, const char *fromLim,
unsigned short **toP, const unsigned short *toLim)
{
+ enum XML_Convert_Result res = XML_CONVERT_COMPLETED;
unsigned short *to = *toP;
const char *from = *fromP;
- while (from != fromLim && to != toLim) {
+ while (from < fromLim && to < toLim) {
switch (((struct normal_encoding *)enc)->type[(unsigned char)*from]) {
case BT_LEAD2:
+ if (fromLim - from < 2) {
+ res = XML_CONVERT_INPUT_INCOMPLETE;
+ break;
+ }
*to++ = (unsigned short)(((from[0] & 0x1f) << 6) | (from[1] & 0x3f));
from += 2;
break;
case BT_LEAD3:
+ if (fromLim - from < 3) {
+ res = XML_CONVERT_INPUT_INCOMPLETE;
+ break;
+ }
*to++ = (unsigned short)(((from[0] & 0xf) << 12)
| ((from[1] & 0x3f) << 6) | (from[2] & 0x3f));
from += 3;
@@ -358,8 +419,14 @@ utf8_toUtf16(const ENCODING *enc,
case BT_LEAD4:
{
unsigned long n;
- if (to + 1 == toLim)
+ if (toLim - to < 2) {
+ res = XML_CONVERT_OUTPUT_EXHAUSTED;
goto after;
+ }
+ if (fromLim - from < 4) {
+ res = XML_CONVERT_INPUT_INCOMPLETE;
+ goto after;
+ }
n = ((from[0] & 0x7) << 18) | ((from[1] & 0x3f) << 12)
| ((from[2] & 0x3f) << 6) | (from[3] & 0x3f);
n -= 0x10000;
@@ -377,6 +444,7 @@ utf8_toUtf16(const ENCODING *enc,
after:
*fromP = from;
*toP = to;
+ return res;
}
#ifdef XML_NS
@@ -425,38 +493,43 @@ static const struct normal_encoding internal_utf8_encoding = {
STANDARD_VTABLE(sb_) NORMAL_VTABLE(utf8_)
};
-static void PTRCALL
-latin1_toUtf8(const ENCODING *enc,
+static enum XML_Convert_Result PTRCALL
+latin1_toUtf8(const ENCODING *UNUSED_P(enc),
const char **fromP, const char *fromLim,
char **toP, const char *toLim)
{
for (;;) {
unsigned char c;
if (*fromP == fromLim)
- break;
+ return XML_CONVERT_COMPLETED;
c = (unsigned char)**fromP;
if (c & 0x80) {
if (toLim - *toP < 2)
- break;
+ return XML_CONVERT_OUTPUT_EXHAUSTED;
*(*toP)++ = (char)((c >> 6) | UTF8_cval2);
*(*toP)++ = (char)((c & 0x3f) | 0x80);
(*fromP)++;
}
else {
if (*toP == toLim)
- break;
+ return XML_CONVERT_OUTPUT_EXHAUSTED;
*(*toP)++ = *(*fromP)++;
}
}
}
-static void PTRCALL
-latin1_toUtf16(const ENCODING *enc,
+static enum XML_Convert_Result PTRCALL
+latin1_toUtf16(const ENCODING *UNUSED_P(enc),
const char **fromP, const char *fromLim,
unsigned short **toP, const unsigned short *toLim)
{
- while (*fromP != fromLim && *toP != toLim)
+ while (*fromP < fromLim && *toP < toLim)
*(*toP)++ = (unsigned char)*(*fromP)++;
+
+ if ((*toP == toLim) && (*fromP < fromLim))
+ return XML_CONVERT_OUTPUT_EXHAUSTED;
+ else
+ return XML_CONVERT_COMPLETED;
}
#ifdef XML_NS
@@ -467,7 +540,7 @@ static const struct normal_encoding latin1_encoding_ns = {
#include "asciitab.h"
#include "latin1tab.h"
},
- STANDARD_VTABLE(sb_)
+ STANDARD_VTABLE(sb_) NULL_VTABLE
};
#endif
@@ -480,16 +553,21 @@ static const struct normal_encoding latin1_encoding = {
#undef BT_COLON
#include "latin1tab.h"
},
- STANDARD_VTABLE(sb_)
+ STANDARD_VTABLE(sb_) NULL_VTABLE
};
-static void PTRCALL
-ascii_toUtf8(const ENCODING *enc,
+static enum XML_Convert_Result PTRCALL
+ascii_toUtf8(const ENCODING *UNUSED_P(enc),
const char **fromP, const char *fromLim,
char **toP, const char *toLim)
{
- while (*fromP != fromLim && *toP != toLim)
+ while (*fromP < fromLim && *toP < toLim)
*(*toP)++ = *(*fromP)++;
+
+ if ((*toP == toLim) && (*fromP < fromLim))
+ return XML_CONVERT_OUTPUT_EXHAUSTED;
+ else
+ return XML_CONVERT_COMPLETED;
}
#ifdef XML_NS
@@ -500,7 +578,7 @@ static const struct normal_encoding ascii_encoding_ns = {
#include "asciitab.h"
/* BT_NONXML == 0 */
},
- STANDARD_VTABLE(sb_)
+ STANDARD_VTABLE(sb_) NULL_VTABLE
};
#endif
@@ -513,7 +591,7 @@ static const struct normal_encoding ascii_encoding = {
#undef BT_COLON
/* BT_NONXML == 0 */
},
- STANDARD_VTABLE(sb_)
+ STANDARD_VTABLE(sb_) NULL_VTABLE
};
static int PTRFASTCALL
@@ -536,13 +614,14 @@ unicode_byte_type(char hi, char lo)
}
#define DEFINE_UTF16_TO_UTF8(E) \
-static void PTRCALL \
-E ## toUtf8(const ENCODING *enc, \
+static enum XML_Convert_Result PTRCALL \
+E ## toUtf8(const ENCODING *UNUSED_P(enc), \
const char **fromP, const char *fromLim, \
char **toP, const char *toLim) \
{ \
- const char *from; \
- for (from = *fromP; from != fromLim; from += 2) { \
+ const char *from = *fromP; \
+ fromLim = from + (((fromLim - from) >> 1) << 1); /* shrink to even */ \
+ for (; from < fromLim; from += 2) { \
int plane; \
unsigned char lo2; \
unsigned char lo = GET_LO(from); \
@@ -552,7 +631,7 @@ E ## toUtf8(const ENCODING *enc, \
if (lo < 0x80) { \
if (*toP == toLim) { \
*fromP = from; \
- return; \
+ return XML_CONVERT_OUTPUT_EXHAUSTED; \
} \
*(*toP)++ = lo; \
break; \
@@ -562,7 +641,7 @@ E ## toUtf8(const ENCODING *enc, \
case 0x4: case 0x5: case 0x6: case 0x7: \
if (toLim - *toP < 2) { \
*fromP = from; \
- return; \
+ return XML_CONVERT_OUTPUT_EXHAUSTED; \
} \
*(*toP)++ = ((lo >> 6) | (hi << 2) | UTF8_cval2); \
*(*toP)++ = ((lo & 0x3f) | 0x80); \
@@ -570,7 +649,7 @@ E ## toUtf8(const ENCODING *enc, \
default: \
if (toLim - *toP < 3) { \
*fromP = from; \
- return; \
+ return XML_CONVERT_OUTPUT_EXHAUSTED; \
} \
/* 16 bits divided 4, 6, 6 amongst 3 bytes */ \
*(*toP)++ = ((hi >> 4) | UTF8_cval3); \
@@ -580,7 +659,11 @@ E ## toUtf8(const ENCODING *enc, \
case 0xD8: case 0xD9: case 0xDA: case 0xDB: \
if (toLim - *toP < 4) { \
*fromP = from; \
- return; \
+ return XML_CONVERT_OUTPUT_EXHAUSTED; \
+ } \
+ if (fromLim - from < 4) { \
+ *fromP = from; \
+ return XML_CONVERT_INPUT_INCOMPLETE; \
} \
plane = (((hi & 0x3) << 2) | ((lo >> 6) & 0x3)) + 1; \
*(*toP)++ = ((plane >> 2) | UTF8_cval4); \
@@ -596,20 +679,32 @@ E ## toUtf8(const ENCODING *enc, \
} \
} \
*fromP = from; \
+ if (from < fromLim) \
+ return XML_CONVERT_INPUT_INCOMPLETE; \
+ else \
+ return XML_CONVERT_COMPLETED; \
}
#define DEFINE_UTF16_TO_UTF16(E) \
-static void PTRCALL \
-E ## toUtf16(const ENCODING *enc, \
+static enum XML_Convert_Result PTRCALL \
+E ## toUtf16(const ENCODING *UNUSED_P(enc), \
const char **fromP, const char *fromLim, \
unsigned short **toP, const unsigned short *toLim) \
{ \
+ enum XML_Convert_Result res = XML_CONVERT_COMPLETED; \
+ fromLim = *fromP + (((fromLim - *fromP) >> 1) << 1); /* shrink to even */ \
/* Avoid copying first half only of surrogate */ \
if (fromLim - *fromP > ((toLim - *toP) << 1) \
- && (GET_HI(fromLim - 2) & 0xF8) == 0xD8) \
+ && (GET_HI(fromLim - 2) & 0xF8) == 0xD8) { \
fromLim -= 2; \
- for (; *fromP != fromLim && *toP != toLim; *fromP += 2) \
+ res = XML_CONVERT_INPUT_INCOMPLETE; \
+ } \
+ for (; *fromP < fromLim && *toP < toLim; *fromP += 2) \
*(*toP)++ = (GET_HI(*fromP) << 8) | GET_LO(*fromP); \
+ if ((*toP == toLim) && (*fromP < fromLim)) \
+ return XML_CONVERT_OUTPUT_EXHAUSTED; \
+ else \
+ return res; \
}
#define SET2(ptr, ch) \
@@ -726,7 +821,7 @@ static const struct normal_encoding little2_encoding_ns = {
#include "asciitab.h"
#include "latin1tab.h"
},
- STANDARD_VTABLE(little2_)
+ STANDARD_VTABLE(little2_) NULL_VTABLE
};
#endif
@@ -745,7 +840,7 @@ static const struct normal_encoding little2_encoding = {
#undef BT_COLON
#include "latin1tab.h"
},
- STANDARD_VTABLE(little2_)
+ STANDARD_VTABLE(little2_) NULL_VTABLE
};
#if BYTEORDER != 4321
@@ -758,7 +853,7 @@ static const struct normal_encoding internal_little2_encoding_ns = {
#include "iasciitab.h"
#include "latin1tab.h"
},
- STANDARD_VTABLE(little2_)
+ STANDARD_VTABLE(little2_) NULL_VTABLE
};
#endif
@@ -771,7 +866,7 @@ static const struct normal_encoding internal_little2_encoding = {
#undef BT_COLON
#include "latin1tab.h"
},
- STANDARD_VTABLE(little2_)
+ STANDARD_VTABLE(little2_) NULL_VTABLE
};
#endif
@@ -867,7 +962,7 @@ static const struct normal_encoding big2_encoding_ns = {
#include "asciitab.h"
#include "latin1tab.h"
},
- STANDARD_VTABLE(big2_)
+ STANDARD_VTABLE(big2_) NULL_VTABLE
};
#endif
@@ -886,7 +981,7 @@ static const struct normal_encoding big2_encoding = {
#undef BT_COLON
#include "latin1tab.h"
},
- STANDARD_VTABLE(big2_)
+ STANDARD_VTABLE(big2_) NULL_VTABLE
};
#if BYTEORDER != 1234
@@ -899,7 +994,7 @@ static const struct normal_encoding internal_big2_encoding_ns = {
#include "iasciitab.h"
#include "latin1tab.h"
},
- STANDARD_VTABLE(big2_)
+ STANDARD_VTABLE(big2_) NULL_VTABLE
};
#endif
@@ -912,7 +1007,7 @@ static const struct normal_encoding internal_big2_encoding = {
#undef BT_COLON
#include "latin1tab.h"
},
- STANDARD_VTABLE(big2_)
+ STANDARD_VTABLE(big2_) NULL_VTABLE
};
#endif
@@ -938,7 +1033,7 @@ streqci(const char *s1, const char *s2)
}
static void PTRCALL
-initUpdatePosition(const ENCODING *enc, const char *ptr,
+initUpdatePosition(const ENCODING *UNUSED_P(enc), const char *ptr,
const char *end, POSITION *pos)
{
normal_updatePosition(&utf8_encoding.enc, ptr, end, pos);
@@ -1288,7 +1383,7 @@ unknown_isInvalid(const ENCODING *enc, const char *p)
return (c & ~0xFFFF) || checkCharRefNumber(c) < 0;
}
-static void PTRCALL
+static enum XML_Convert_Result PTRCALL
unknown_toUtf8(const ENCODING *enc,
const char **fromP, const char *fromLim,
char **toP, const char *toLim)
@@ -1299,21 +1394,21 @@ unknown_toUtf8(const ENCODING *enc,
const char *utf8;
int n;
if (*fromP == fromLim)
- break;
+ return XML_CONVERT_COMPLETED;
utf8 = uenc->utf8[(unsigned char)**fromP];
n = *utf8++;
if (n == 0) {
int c = uenc->convert(uenc->userData, *fromP);
n = XmlUtf8Encode(c, buf);
if (n > toLim - *toP)
- break;
+ return XML_CONVERT_OUTPUT_EXHAUSTED;
utf8 = buf;
*fromP += (AS_NORMAL_ENCODING(enc)->type[(unsigned char)**fromP]
- (BT_LEAD2 - 2));
}
else {
if (n > toLim - *toP)
- break;
+ return XML_CONVERT_OUTPUT_EXHAUSTED;
(*fromP)++;
}
do {
@@ -1322,13 +1417,13 @@ unknown_toUtf8(const ENCODING *enc,
}
}
-static void PTRCALL
+static enum XML_Convert_Result PTRCALL
unknown_toUtf16(const ENCODING *enc,
const char **fromP, const char *fromLim,
unsigned short **toP, const unsigned short *toLim)
{
const struct unknown_encoding *uenc = AS_UNKNOWN_ENCODING(enc);
- while (*fromP != fromLim && *toP != toLim) {
+ while (*fromP < fromLim && *toP < toLim) {
unsigned short c = uenc->utf16[(unsigned char)**fromP];
if (c == 0) {
c = (unsigned short)
@@ -1340,6 +1435,11 @@ unknown_toUtf16(const ENCODING *enc,
(*fromP)++;
*(*toP)++ = c;
}
+
+ if ((*toP == toLim) && (*fromP < fromLim))
+ return XML_CONVERT_OUTPUT_EXHAUSTED;
+ else
+ return XML_CONVERT_COMPLETED;
}
ENCODING *
@@ -1503,7 +1603,7 @@ initScan(const ENCODING * const *encodingTable,
{
const ENCODING **encPtr;
- if (ptr == end)
+ if (ptr >= end)
return XML_TOK_NONE;
encPtr = enc->encPtr;
if (ptr + 1 == end) {
diff --git a/lib/xmltok.h b/lib/xmltok.h
index ca867aa6b429..752007e8b9e2 100644
--- a/lib/xmltok.h
+++ b/lib/xmltok.h
@@ -130,6 +130,12 @@ typedef int (PTRCALL *SCANNER)(const ENCODING *,
const char *,
const char **);
+enum XML_Convert_Result {
+ XML_CONVERT_COMPLETED = 0,
+ XML_CONVERT_INPUT_INCOMPLETE = 1,
+ XML_CONVERT_OUTPUT_EXHAUSTED = 2 /* and therefore potentially input remaining as well */
+};
+
struct encoding {
SCANNER scanners[XML_N_STATES];
SCANNER literalScanners[XML_N_LITERAL_TYPES];
@@ -158,12 +164,12 @@ struct encoding {
const char *ptr,
const char *end,
const char **badPtr);
- void (PTRCALL *utf8Convert)(const ENCODING *enc,
+ enum XML_Convert_Result (PTRCALL *utf8Convert)(const ENCODING *enc,
const char **fromP,
const char *fromLim,
char **toP,
const char *toLim);
- void (PTRCALL *utf16Convert)(const ENCODING *enc,
+ enum XML_Convert_Result (PTRCALL *utf16Convert)(const ENCODING *enc,
const char **fromP,
const char *fromLim,
unsigned short **toP,
diff --git a/lib/xmltok_impl.c b/lib/xmltok_impl.c
index 9c2895b87739..5f779c0571b5 100644
--- a/lib/xmltok_impl.c
+++ b/lib/xmltok_impl.c
@@ -87,27 +87,45 @@
#define PREFIX(ident) ident
#endif
+
+#define HAS_CHARS(enc, ptr, end, count) \
+ (end - ptr >= count * MINBPC(enc))
+
+#define HAS_CHAR(enc, ptr, end) \
+ HAS_CHARS(enc, ptr, end, 1)
+
+#define REQUIRE_CHARS(enc, ptr, end, count) \
+ { \
+ if (! HAS_CHARS(enc, ptr, end, count)) { \
+ return XML_TOK_PARTIAL; \
+ } \
+ }
+
+#define REQUIRE_CHAR(enc, ptr, end) \
+ REQUIRE_CHARS(enc, ptr, end, 1)
+
+
/* ptr points to character following "<!-" */
static int PTRCALL
PREFIX(scanComment)(const ENCODING *enc, const char *ptr,
const char *end, const char **nextTokPtr)
{
- if (ptr != end) {
+ if (HAS_CHAR(enc, ptr, end)) {
if (!CHAR_MATCHES(enc, ptr, ASCII_MINUS)) {
*nextTokPtr = ptr;
return XML_TOK_INVALID;
}
ptr += MINBPC(enc);
- while (ptr != end) {
+ while (HAS_CHAR(enc, ptr, end)) {
switch (BYTE_TYPE(enc, ptr)) {
INVALID_CASES(ptr, nextTokPtr)
case BT_MINUS:
- if ((ptr += MINBPC(enc)) == end)
- return XML_TOK_PARTIAL;
+ ptr += MINBPC(enc);
+ REQUIRE_CHAR(enc, ptr, end);
if (CHAR_MATCHES(enc, ptr, ASCII_MINUS)) {
- if ((ptr += MINBPC(enc)) == end)
- return XML_TOK_PARTIAL;
+ ptr += MINBPC(enc);
+ REQUIRE_CHAR(enc, ptr, end);
if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) {
*nextTokPtr = ptr;
return XML_TOK_INVALID;
@@ -131,8 +149,7 @@ static int PTRCALL
PREFIX(scanDecl)(const ENCODING *enc, const char *ptr,
const char *end, const char **nextTokPtr)
{
- if (ptr == end)
- return XML_TOK_PARTIAL;
+ REQUIRE_CHAR(enc, ptr, end);
switch (BYTE_TYPE(enc, ptr)) {
case BT_MINUS:
return PREFIX(scanComment)(enc, ptr + MINBPC(enc), end, nextTokPtr);
@@ -147,11 +164,10 @@ PREFIX(scanDecl)(const ENCODING *enc, const char *ptr,
*nextTokPtr = ptr;
return XML_TOK_INVALID;
}
- while (ptr != end) {
+ while (HAS_CHAR(enc, ptr, end)) {
switch (BYTE_TYPE(enc, ptr)) {
case BT_PERCNT:
- if (ptr + MINBPC(enc) == end)
- return XML_TOK_PARTIAL;
+ REQUIRE_CHARS(enc, ptr, end, 2);
/* don't allow <!ENTITY% foo "whatever"> */
switch (BYTE_TYPE(enc, ptr + MINBPC(enc))) {
case BT_S: case BT_CR: case BT_LF: case BT_PERCNT:
@@ -175,7 +191,7 @@ PREFIX(scanDecl)(const ENCODING *enc, const char *ptr,
}
static int PTRCALL
-PREFIX(checkPiTarget)(const ENCODING *enc, const char *ptr,
+PREFIX(checkPiTarget)(const ENCODING *UNUSED_P(enc), const char *ptr,
const char *end, int *tokPtr)
{
int upper = 0;
@@ -225,15 +241,14 @@ PREFIX(scanPi)(const ENCODING *enc, const char *ptr,
{
int tok;
const char *target = ptr;
- if (ptr == end)
- return XML_TOK_PARTIAL;
+ REQUIRE_CHAR(enc, ptr, end);
switch (BYTE_TYPE(enc, ptr)) {
CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
default:
*nextTokPtr = ptr;
return XML_TOK_INVALID;
}
- while (ptr != end) {
+ while (HAS_CHAR(enc, ptr, end)) {
switch (BYTE_TYPE(enc, ptr)) {
CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
case BT_S: case BT_CR: case BT_LF:
@@ -242,13 +257,12 @@ PREFIX(scanPi)(const ENCODING *enc, const char *ptr,
return XML_TOK_INVALID;
}
ptr += MINBPC(enc);
- while (ptr != end) {
+ while (HAS_CHAR(enc, ptr, end)) {
switch (BYTE_TYPE(enc, ptr)) {
INVALID_CASES(ptr, nextTokPtr)
case BT_QUEST:
ptr += MINBPC(enc);
- if (ptr == end)
- return XML_TOK_PARTIAL;
+ REQUIRE_CHAR(enc, ptr, end);
if (CHAR_MATCHES(enc, ptr, ASCII_GT)) {
*nextTokPtr = ptr + MINBPC(enc);
return tok;
@@ -266,8 +280,7 @@ PREFIX(scanPi)(const ENCODING *enc, const char *ptr,
return XML_TOK_INVALID;
}
ptr += MINBPC(enc);
- if (ptr == end)
- return XML_TOK_PARTIAL;
+ REQUIRE_CHAR(enc, ptr, end);
if (CHAR_MATCHES(enc, ptr, ASCII_GT)) {
*nextTokPtr = ptr + MINBPC(enc);
return tok;
@@ -282,15 +295,14 @@ PREFIX(scanPi)(const ENCODING *enc, const char *ptr,
}
static int PTRCALL
-PREFIX(scanCdataSection)(const ENCODING *enc, const char *ptr,
+PREFIX(scanCdataSection)(const ENCODING *UNUSED_P(enc), const char *ptr,
const char *end, const char **nextTokPtr)
{
static const char CDATA_LSQB[] = { ASCII_C, ASCII_D, ASCII_A,
ASCII_T, ASCII_A, ASCII_LSQB };
int i;
/* CDATA[ */
- if (end - ptr < 6 * MINBPC(enc))
- return XML_TOK_PARTIAL;
+ REQUIRE_CHARS(enc, ptr, end, 6);
for (i = 0; i < 6; i++, ptr += MINBPC(enc)) {
if (!CHAR_MATCHES(enc, ptr, CDATA_LSQB[i])) {
*nextTokPtr = ptr;
@@ -305,7 +317,7 @@ static int PTRCALL
PREFIX(cdataSectionTok)(const ENCODING *enc, const char *ptr,
const char *end, const char **nextTokPtr)
{
- if (ptr == end)
+ if (ptr >= end)
return XML_TOK_NONE;
if (MINBPC(enc) > 1) {
size_t n = end - ptr;
@@ -319,13 +331,11 @@ PREFIX(cdataSectionTok)(const ENCODING *enc, const char *ptr,
switch (BYTE_TYPE(enc, ptr)) {
case BT_RSQB:
ptr += MINBPC(enc);
- if (ptr == end)
- return XML_TOK_PARTIAL;
+ REQUIRE_CHAR(enc, ptr, end);
if (!CHAR_MATCHES(enc, ptr, ASCII_RSQB))
break;
ptr += MINBPC(enc);
- if (ptr == end)
- return XML_TOK_PARTIAL;
+ REQUIRE_CHAR(enc, ptr, end);
if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) {
ptr -= MINBPC(enc);
break;
@@ -334,8 +344,7 @@ PREFIX(cdataSectionTok)(const ENCODING *enc, const char *ptr,
return XML_TOK_CDATA_SECT_CLOSE;
case BT_CR:
ptr += MINBPC(enc);
- if (ptr == end)
- return XML_TOK_PARTIAL;
+ REQUIRE_CHAR(enc, ptr, end);
if (BYTE_TYPE(enc, ptr) == BT_LF)
ptr += MINBPC(enc);
*nextTokPtr = ptr;
@@ -348,7 +357,7 @@ PREFIX(cdataSectionTok)(const ENCODING *enc, const char *ptr,
ptr += MINBPC(enc);
break;
}
- while (ptr != end) {
+ while (HAS_CHAR(enc, ptr, end)) {
switch (BYTE_TYPE(enc, ptr)) {
#define LEAD_CASE(n) \
case BT_LEAD ## n: \
@@ -383,19 +392,18 @@ static int PTRCALL
PREFIX(scanEndTag)(const ENCODING *enc, const char *ptr,
const char *end, const char **nextTokPtr)
{
- if (ptr == end)
- return XML_TOK_PARTIAL;
+ REQUIRE_CHAR(enc, ptr, end);
switch (BYTE_TYPE(enc, ptr)) {
CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
default:
*nextTokPtr = ptr;
return XML_TOK_INVALID;
}
- while (ptr != end) {
+ while (HAS_CHAR(enc, ptr, end)) {
switch (BYTE_TYPE(enc, ptr)) {
CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
case BT_S: case BT_CR: case BT_LF:
- for (ptr += MINBPC(enc); ptr != end; ptr += MINBPC(enc)) {
+ for (ptr += MINBPC(enc); HAS_CHAR(enc, ptr, end); ptr += MINBPC(enc)) {
switch (BYTE_TYPE(enc, ptr)) {
case BT_S: case BT_CR: case BT_LF:
break;
@@ -432,7 +440,7 @@ static int PTRCALL
PREFIX(scanHexCharRef)(const ENCODING *enc, const char *ptr,
const char *end, const char **nextTokPtr)
{
- if (ptr != end) {
+ if (HAS_CHAR(enc, ptr, end)) {
switch (BYTE_TYPE(enc, ptr)) {
case BT_DIGIT:
case BT_HEX:
@@ -441,7 +449,7 @@ PREFIX(scanHexCharRef)(const ENCODING *enc, const char *ptr,
*nextTokPtr = ptr;
return XML_TOK_INVALID;
}
- for (ptr += MINBPC(enc); ptr != end; ptr += MINBPC(enc)) {
+ for (ptr += MINBPC(enc); HAS_CHAR(enc, ptr, end); ptr += MINBPC(enc)) {
switch (BYTE_TYPE(enc, ptr)) {
case BT_DIGIT:
case BT_HEX:
@@ -464,7 +472,7 @@ static int PTRCALL
PREFIX(scanCharRef)(const ENCODING *enc, const char *ptr,
const char *end, const char **nextTokPtr)
{
- if (ptr != end) {
+ if (HAS_CHAR(enc, ptr, end)) {
if (CHAR_MATCHES(enc, ptr, ASCII_x))
return PREFIX(scanHexCharRef)(enc, ptr + MINBPC(enc), end, nextTokPtr);
switch (BYTE_TYPE(enc, ptr)) {
@@ -474,7 +482,7 @@ PREFIX(scanCharRef)(const ENCODING *enc, const char *ptr,
*nextTokPtr = ptr;
return XML_TOK_INVALID;
}
- for (ptr += MINBPC(enc); ptr != end; ptr += MINBPC(enc)) {
+ for (ptr += MINBPC(enc); HAS_CHAR(enc, ptr, end); ptr += MINBPC(enc)) {
switch (BYTE_TYPE(enc, ptr)) {
case BT_DIGIT:
break;
@@ -496,8 +504,7 @@ static int PTRCALL
PREFIX(scanRef)(const ENCODING *enc, const char *ptr, const char *end,
const char **nextTokPtr)
{
- if (ptr == end)
- return XML_TOK_PARTIAL;
+ REQUIRE_CHAR(enc, ptr, end);
switch (BYTE_TYPE(enc, ptr)) {
CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
case BT_NUM:
@@ -506,7 +513,7 @@ PREFIX(scanRef)(const ENCODING *enc, const char *ptr, const char *end,
*nextTokPtr = ptr;
return XML_TOK_INVALID;
}
- while (ptr != end) {
+ while (HAS_CHAR(enc, ptr, end)) {
switch (BYTE_TYPE(enc, ptr)) {
CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
case BT_SEMI:
@@ -529,7 +536,7 @@ PREFIX(scanAtts)(const ENCODING *enc, const char *ptr, const char *end,
#ifdef XML_NS
int hadColon = 0;
#endif
- while (ptr != end) {
+ while (HAS_CHAR(enc, ptr, end)) {
switch (BYTE_TYPE(enc, ptr)) {
CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
#ifdef XML_NS
@@ -540,8 +547,7 @@ PREFIX(scanAtts)(const ENCODING *enc, const char *ptr, const char *end,
}
hadColon = 1;
ptr += MINBPC(enc);
- if (ptr == end)
- return XML_TOK_PARTIAL;
+ REQUIRE_CHAR(enc, ptr, end);
switch (BYTE_TYPE(enc, ptr)) {
CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
default:
@@ -555,8 +561,7 @@ PREFIX(scanAtts)(const ENCODING *enc, const char *ptr, const char *end,
int t;
ptr += MINBPC(enc);
- if (ptr == end)
- return XML_TOK_PARTIAL;
+ REQUIRE_CHAR(enc, ptr, end);
t = BYTE_TYPE(enc, ptr);
if (t == BT_EQUALS)
break;
@@ -579,8 +584,7 @@ PREFIX(scanAtts)(const ENCODING *enc, const char *ptr, const char *end,
#endif
for (;;) {
ptr += MINBPC(enc);
- if (ptr == end)
- return XML_TOK_PARTIAL;
+ REQUIRE_CHAR(enc, ptr, end);
open = BYTE_TYPE(enc, ptr);
if (open == BT_QUOT || open == BT_APOS)
break;
@@ -598,8 +602,7 @@ PREFIX(scanAtts)(const ENCODING *enc, const char *ptr, const char *end,
/* in attribute value */
for (;;) {
int t;
- if (ptr == end)
- return XML_TOK_PARTIAL;
+ REQUIRE_CHAR(enc, ptr, end);
t = BYTE_TYPE(enc, ptr);
if (t == open)
break;
@@ -624,8 +627,7 @@ PREFIX(scanAtts)(const ENCODING *enc, const char *ptr, const char *end,
}
}
ptr += MINBPC(enc);
- if (ptr == end)
- return XML_TOK_PARTIAL;
+ REQUIRE_CHAR(enc, ptr, end);
switch (BYTE_TYPE(enc, ptr)) {
case BT_S:
case BT_CR:
@@ -642,8 +644,7 @@ PREFIX(scanAtts)(const ENCODING *enc, const char *ptr, const char *end,
/* ptr points to closing quote */
for (;;) {
ptr += MINBPC(enc);
- if (ptr == end)
- return XML_TOK_PARTIAL;
+ REQUIRE_CHAR(enc, ptr, end);
switch (BYTE_TYPE(enc, ptr)) {
CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
case BT_S: case BT_CR: case BT_LF:
@@ -655,8 +656,7 @@ PREFIX(scanAtts)(const ENCODING *enc, const char *ptr, const char *end,
case BT_SOL:
sol:
ptr += MINBPC(enc);
- if (ptr == end)
- return XML_TOK_PARTIAL;
+ REQUIRE_CHAR(enc, ptr, end);
if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) {
*nextTokPtr = ptr;
return XML_TOK_INVALID;
@@ -688,13 +688,12 @@ PREFIX(scanLt)(const ENCODING *enc, const char *ptr, const char *end,
#ifdef XML_NS
int hadColon;
#endif
- if (ptr == end)
- return XML_TOK_PARTIAL;
+ REQUIRE_CHAR(enc, ptr, end);
switch (BYTE_TYPE(enc, ptr)) {
CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
case BT_EXCL:
- if ((ptr += MINBPC(enc)) == end)
- return XML_TOK_PARTIAL;
+ ptr += MINBPC(enc);
+ REQUIRE_CHAR(enc, ptr, end);
switch (BYTE_TYPE(enc, ptr)) {
case BT_MINUS:
return PREFIX(scanComment)(enc, ptr + MINBPC(enc), end, nextTokPtr);
@@ -716,7 +715,7 @@ PREFIX(scanLt)(const ENCODING *enc, const char *ptr, const char *end,
hadColon = 0;
#endif
/* we have a start-tag */
- while (ptr != end) {
+ while (HAS_CHAR(enc, ptr, end)) {
switch (BYTE_TYPE(enc, ptr)) {
CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
#ifdef XML_NS
@@ -727,8 +726,7 @@ PREFIX(scanLt)(const ENCODING *enc, const char *ptr, const char *end,
}
hadColon = 1;
ptr += MINBPC(enc);
- if (ptr == end)
- return XML_TOK_PARTIAL;
+ REQUIRE_CHAR(enc, ptr, end);
switch (BYTE_TYPE(enc, ptr)) {
CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
default:
@@ -740,7 +738,7 @@ PREFIX(scanLt)(const ENCODING *enc, const char *ptr, const char *end,
case BT_S: case BT_CR: case BT_LF:
{
ptr += MINBPC(enc);
- while (ptr != end) {
+ while (HAS_CHAR(enc, ptr, end)) {
switch (BYTE_TYPE(enc, ptr)) {
CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
case BT_GT:
@@ -765,8 +763,7 @@ PREFIX(scanLt)(const ENCODING *enc, const char *ptr, const char *end,
case BT_SOL:
sol:
ptr += MINBPC(enc);
- if (ptr == end)
- return XML_TOK_PARTIAL;
+ REQUIRE_CHAR(enc, ptr, end);
if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) {
*nextTokPtr = ptr;
return XML_TOK_INVALID;
@@ -785,7 +782,7 @@ static int PTRCALL
PREFIX(contentTok)(const ENCODING *enc, const char *ptr, const char *end,
const char **nextTokPtr)
{
- if (ptr == end)
+ if (ptr >= end)
return XML_TOK_NONE;
if (MINBPC(enc) > 1) {
size_t n = end - ptr;
@@ -803,7 +800,7 @@ PREFIX(contentTok)(const ENCODING *enc, const char *ptr, const char *end,
return PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, nextTokPtr);
case BT_CR:
ptr += MINBPC(enc);
- if (ptr == end)
+ if (! HAS_CHAR(enc, ptr, end))
return XML_TOK_TRAILING_CR;
if (BYTE_TYPE(enc, ptr) == BT_LF)
ptr += MINBPC(enc);
@@ -814,12 +811,12 @@ PREFIX(contentTok)(const ENCODING *enc, const char *ptr, const char *end,
return XML_TOK_DATA_NEWLINE;
case BT_RSQB:
ptr += MINBPC(enc);
- if (ptr == end)
+ if (! HAS_CHAR(enc, ptr, end))
return XML_TOK_TRAILING_RSQB;
if (!CHAR_MATCHES(enc, ptr, ASCII_RSQB))
break;
ptr += MINBPC(enc);
- if (ptr == end)
+ if (! HAS_CHAR(enc, ptr, end))
return XML_TOK_TRAILING_RSQB;
if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) {
ptr -= MINBPC(enc);
@@ -832,7 +829,7 @@ PREFIX(contentTok)(const ENCODING *enc, const char *ptr, const char *end,
ptr += MINBPC(enc);
break;
}
- while (ptr != end) {
+ while (HAS_CHAR(enc, ptr, end)) {
switch (BYTE_TYPE(enc, ptr)) {
#define LEAD_CASE(n) \
case BT_LEAD ## n: \
@@ -845,12 +842,12 @@ PREFIX(contentTok)(const ENCODING *enc, const char *ptr, const char *end,
LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
#undef LEAD_CASE
case BT_RSQB:
- if (ptr + MINBPC(enc) != end) {
+ if (HAS_CHARS(enc, ptr, end, 2)) {
if (!CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_RSQB)) {
ptr += MINBPC(enc);
break;
}
- if (ptr + 2*MINBPC(enc) != end) {
+ if (HAS_CHARS(enc, ptr, end, 3)) {
if (!CHAR_MATCHES(enc, ptr + 2*MINBPC(enc), ASCII_GT)) {
ptr += MINBPC(enc);
break;
@@ -884,8 +881,7 @@ static int PTRCALL
PREFIX(scanPercent)(const ENCODING *enc, const char *ptr, const char *end,
const char **nextTokPtr)
{
- if (ptr == end)
- return XML_TOK_PARTIAL;
+ REQUIRE_CHAR(enc, ptr, end);
switch (BYTE_TYPE(enc, ptr)) {
CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
case BT_S: case BT_LF: case BT_CR: case BT_PERCNT:
@@ -895,7 +891,7 @@ PREFIX(scanPercent)(const ENCODING *enc, const char *ptr, const char *end,
*nextTokPtr = ptr;
return XML_TOK_INVALID;
}
- while (ptr != end) {
+ while (HAS_CHAR(enc, ptr, end)) {
switch (BYTE_TYPE(enc, ptr)) {
CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
case BT_SEMI:
@@ -913,15 +909,14 @@ static int PTRCALL
PREFIX(scanPoundName)(const ENCODING *enc, const char *ptr, const char *end,
const char **nextTokPtr)
{
- if (ptr == end)
- return XML_TOK_PARTIAL;
+ REQUIRE_CHAR(enc, ptr, end);
switch (BYTE_TYPE(enc, ptr)) {
CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
default:
*nextTokPtr = ptr;
return XML_TOK_INVALID;
}
- while (ptr != end) {
+ while (HAS_CHAR(enc, ptr, end)) {
switch (BYTE_TYPE(enc, ptr)) {
CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
case BT_CR: case BT_LF: case BT_S:
@@ -941,7 +936,7 @@ PREFIX(scanLit)(int open, const ENCODING *enc,
const char *ptr, const char *end,
const char **nextTokPtr)
{
- while (ptr != end) {
+ while (HAS_CHAR(enc, ptr, end)) {
int t = BYTE_TYPE(enc, ptr);
switch (t) {
INVALID_CASES(ptr, nextTokPtr)
@@ -950,7 +945,7 @@ PREFIX(scanLit)(int open, const ENCODING *enc,
ptr += MINBPC(enc);
if (t != open)
break;
- if (ptr == end)
+ if (! HAS_CHAR(enc, ptr, end))
return -XML_TOK_LITERAL;
*nextTokPtr = ptr;
switch (BYTE_TYPE(enc, ptr)) {
@@ -973,7 +968,7 @@ PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end,
const char **nextTokPtr)
{
int tok;
- if (ptr == end)
+ if (ptr >= end)
return XML_TOK_NONE;
if (MINBPC(enc) > 1) {
size_t n = end - ptr;
@@ -992,8 +987,7 @@ PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end,
case BT_LT:
{
ptr += MINBPC(enc);
- if (ptr == end)
- return XML_TOK_PARTIAL;
+ REQUIRE_CHAR(enc, ptr, end);
switch (BYTE_TYPE(enc, ptr)) {
case BT_EXCL:
return PREFIX(scanDecl)(enc, ptr + MINBPC(enc), end, nextTokPtr);
@@ -1021,7 +1015,7 @@ PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end,
case BT_S: case BT_LF:
for (;;) {
ptr += MINBPC(enc);
- if (ptr == end)
+ if (! HAS_CHAR(enc, ptr, end))
break;
switch (BYTE_TYPE(enc, ptr)) {
case BT_S: case BT_LF:
@@ -1048,11 +1042,10 @@ PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end,
return XML_TOK_OPEN_BRACKET;
case BT_RSQB:
ptr += MINBPC(enc);
- if (ptr == end)
+ if (! HAS_CHAR(enc, ptr, end))
return -XML_TOK_CLOSE_BRACKET;
if (CHAR_MATCHES(enc, ptr, ASCII_RSQB)) {
- if (ptr + MINBPC(enc) == end)
- return XML_TOK_PARTIAL;
+ REQUIRE_CHARS(enc, ptr, end, 2);
if (CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_GT)) {
*nextTokPtr = ptr + 2*MINBPC(enc);
return XML_TOK_COND_SECT_CLOSE;
@@ -1065,7 +1058,7 @@ PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end,
return XML_TOK_OPEN_PAREN;
case BT_RPAR:
ptr += MINBPC(enc);
- if (ptr == end)
+ if (! HAS_CHAR(enc, ptr, end))
return -XML_TOK_CLOSE_PAREN;
switch (BYTE_TYPE(enc, ptr)) {
case BT_AST:
@@ -1141,7 +1134,7 @@ PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end,
*nextTokPtr = ptr;
return XML_TOK_INVALID;
}
- while (ptr != end) {
+ while (HAS_CHAR(enc, ptr, end)) {
switch (BYTE_TYPE(enc, ptr)) {
CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
case BT_GT: case BT_RPAR: case BT_COMMA:
@@ -1154,8 +1147,7 @@ PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end,
ptr += MINBPC(enc);
switch (tok) {
case XML_TOK_NAME:
- if (ptr == end)
- return XML_TOK_PARTIAL;
+ REQUIRE_CHAR(enc, ptr, end);
tok = XML_TOK_PREFIXED_NAME;
switch (BYTE_TYPE(enc, ptr)) {
CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
@@ -1204,10 +1196,12 @@ PREFIX(attributeValueTok)(const ENCODING *enc, const char *ptr,
const char *end, const char **nextTokPtr)
{
const char *start;
- if (ptr == end)
+ if (ptr >= end)
return XML_TOK_NONE;
+ else if (! HAS_CHAR(enc, ptr, end))
+ return XML_TOK_PARTIAL;
start = ptr;
- while (ptr != end) {
+ while (HAS_CHAR(enc, ptr, end)) {
switch (BYTE_TYPE(enc, ptr)) {
#define LEAD_CASE(n) \
case BT_LEAD ## n: ptr += n; break;
@@ -1232,7 +1226,7 @@ PREFIX(attributeValueTok)(const ENCODING *enc, const char *ptr,
case BT_CR:
if (ptr == start) {
ptr += MINBPC(enc);
- if (ptr == end)
+ if (! HAS_CHAR(enc, ptr, end))
return XML_TOK_TRAILING_CR;
if (BYTE_TYPE(enc, ptr) == BT_LF)
ptr += MINBPC(enc);
@@ -1262,10 +1256,12 @@ PREFIX(entityValueTok)(const ENCODING *enc, const char *ptr,
const char *end, const char **nextTokPtr)
{
const char *start;
- if (ptr == end)
+ if (ptr >= end)
return XML_TOK_NONE;
+ else if (! HAS_CHAR(enc, ptr, end))
+ return XML_TOK_PARTIAL;
start = ptr;
- while (ptr != end) {
+ while (HAS_CHAR(enc, ptr, end)) {
switch (BYTE_TYPE(enc, ptr)) {
#define LEAD_CASE(n) \
case BT_LEAD ## n: ptr += n; break;
@@ -1294,7 +1290,7 @@ PREFIX(entityValueTok)(const ENCODING *enc, const char *ptr,
case BT_CR:
if (ptr == start) {
ptr += MINBPC(enc);
- if (ptr == end)
+ if (! HAS_CHAR(enc, ptr, end))
return XML_TOK_TRAILING_CR;
if (BYTE_TYPE(enc, ptr) == BT_LF)
ptr += MINBPC(enc);
@@ -1326,15 +1322,15 @@ PREFIX(ignoreSectionTok)(const ENCODING *enc, const char *ptr,
end = ptr + n;
}
}
- while (ptr != end) {
+ while (HAS_CHAR(enc, ptr, end)) {
switch (BYTE_TYPE(enc, ptr)) {
INVALID_CASES(ptr, nextTokPtr)
case BT_LT:
- if ((ptr += MINBPC(enc)) == end)
- return XML_TOK_PARTIAL;
+ ptr += MINBPC(enc);
+ REQUIRE_CHAR(enc, ptr, end);
if (CHAR_MATCHES(enc, ptr, ASCII_EXCL)) {
- if ((ptr += MINBPC(enc)) == end)
- return XML_TOK_PARTIAL;
+ ptr += MINBPC(enc);
+ REQUIRE_CHAR(enc, ptr, end);
if (CHAR_MATCHES(enc, ptr, ASCII_LSQB)) {
++level;
ptr += MINBPC(enc);
@@ -1342,11 +1338,11 @@ PREFIX(ignoreSectionTok)(const ENCODING *enc, const char *ptr,
}
break;
case BT_RSQB:
- if ((ptr += MINBPC(enc)) == end)
- return XML_TOK_PARTIAL;
+ ptr += MINBPC(enc);
+ REQUIRE_CHAR(enc, ptr, end);
if (CHAR_MATCHES(enc, ptr, ASCII_RSQB)) {
- if ((ptr += MINBPC(enc)) == end)
- return XML_TOK_PARTIAL;
+ ptr += MINBPC(enc);
+ REQUIRE_CHAR(enc, ptr, end);
if (CHAR_MATCHES(enc, ptr, ASCII_GT)) {
ptr += MINBPC(enc);
if (level == 0) {
@@ -1373,7 +1369,7 @@ PREFIX(isPublicId)(const ENCODING *enc, const char *ptr, const char *end,
{
ptr += MINBPC(enc);
end -= MINBPC(enc);
- for (; ptr != end; ptr += MINBPC(enc)) {
+ for (; HAS_CHAR(enc, ptr, end); ptr += MINBPC(enc)) {
switch (BYTE_TYPE(enc, ptr)) {
case BT_DIGIT:
case BT_HEX:
@@ -1521,7 +1517,7 @@ PREFIX(getAtts)(const ENCODING *enc, const char *ptr,
}
static int PTRFASTCALL
-PREFIX(charRefNumber)(const ENCODING *enc, const char *ptr)
+PREFIX(charRefNumber)(const ENCODING *UNUSED_P(enc), const char *ptr)
{
int result = 0;
/* skip &# */
@@ -1565,7 +1561,7 @@ PREFIX(charRefNumber)(const ENCODING *enc, const char *ptr)
}
static int PTRCALL
-PREFIX(predefinedEntityName)(const ENCODING *enc, const char *ptr,
+PREFIX(predefinedEntityName)(const ENCODING *UNUSED_P(enc), const char *ptr,
const char *end)
{
switch ((end - ptr)/MINBPC(enc)) {
@@ -1683,11 +1679,11 @@ PREFIX(sameName)(const ENCODING *enc, const char *ptr1, const char *ptr2)
}
static int PTRCALL
-PREFIX(nameMatchesAscii)(const ENCODING *enc, const char *ptr1,
+PREFIX(nameMatchesAscii)(const ENCODING *UNUSED_P(enc), const char *ptr1,
const char *end1, const char *ptr2)
{
for (; *ptr2; ptr1 += MINBPC(enc), ptr2++) {
- if (ptr1 == end1)
+ if (end1 - ptr1 < MINBPC(enc))
return 0;
if (!CHAR_MATCHES(enc, ptr1, *ptr2))
return 0;
@@ -1744,7 +1740,7 @@ PREFIX(updatePosition)(const ENCODING *enc,
const char *end,
POSITION *pos)
{
- while (ptr < end) {
+ while (HAS_CHAR(enc, ptr, end)) {
switch (BYTE_TYPE(enc, ptr)) {
#define LEAD_CASE(n) \
case BT_LEAD ## n: \
@@ -1760,7 +1756,7 @@ PREFIX(updatePosition)(const ENCODING *enc,
case BT_CR:
pos->lineNumber++;
ptr += MINBPC(enc);
- if (ptr != end && BYTE_TYPE(enc, ptr) == BT_LF)
+ if (HAS_CHAR(enc, ptr, end) && BYTE_TYPE(enc, ptr) == BT_LF)
ptr += MINBPC(enc);
pos->columnNumber = (XML_Size)-1;
break;
diff --git a/tests/benchmark/README.txt b/tests/benchmark/README.txt
index 7f9cca037ec4..86414d5815ac 100755
--- a/tests/benchmark/README.txt
+++ b/tests/benchmark/README.txt
@@ -13,4 +13,4 @@ The command line arguments are:
Returns:
The time (in seconds) it takes to parse the test file,
- averaged over the number of iterations. \ No newline at end of file
+ averaged over the number of iterations.@
diff --git a/tests/chardata.c b/tests/chardata.c
index 5fb0299d88e0..012499bb8812 100644
--- a/tests/chardata.c
+++ b/tests/chardata.c
@@ -7,11 +7,7 @@
#ifdef HAVE_EXPAT_CONFIG_H
#include <expat_config.h>
#endif
-#ifdef HAVE_CHECK_H
-#include <check.h>
-#else
#include "minicheck.h"
-#endif
#include <assert.h>
#include <stdio.h>
@@ -51,7 +47,7 @@ CharData_AppendString(CharData *storage, const char *s)
if ((len + storage->count) > maxchars) {
len = (maxchars - storage->count);
}
- if (len + storage->count < sizeof(storage->data)) {
+ if (len + storage->count < (int)sizeof(storage->data)) {
memcpy(storage->data + storage->count, s, len);
storage->count += len;
}
@@ -72,7 +68,7 @@ CharData_AppendXMLChars(CharData *storage, const XML_Char *s, int len)
if ((len + storage->count) > maxchars) {
len = (maxchars - storage->count);
}
- if (len + storage->count < sizeof(storage->data)) {
+ if (len + storage->count < (int)sizeof(storage->data)) {
memcpy(storage->data + storage->count, s,
len * sizeof(storage->data[0]));
storage->count += len;
diff --git a/tests/minicheck.c b/tests/minicheck.c
index d2f4295ffb9e..5a1f5ed0e252 100755
--- a/tests/minicheck.c
+++ b/tests/minicheck.c
@@ -10,10 +10,11 @@
#include <setjmp.h>
#include <assert.h>
+#include "internal.h" /* for UNUSED_P only */
#include "minicheck.h"
Suite *
-suite_create(char *name)
+suite_create(const char *name)
{
Suite *suite = (Suite *) calloc(1, sizeof(Suite));
if (suite != NULL) {
@@ -23,7 +24,7 @@ suite_create(char *name)
}
TCase *
-tcase_create(char *name)
+tcase_create(const char *name)
{
TCase *tc = (TCase *) calloc(1, sizeof(TCase));
if (tc != NULL) {
@@ -156,7 +157,7 @@ srunner_run_all(SRunner *runner, int verbosity)
}
void
-_fail_unless(int condition, const char *file, int line, char *msg)
+_fail_unless(int UNUSED_P(condition), const char *UNUSED_P(file), int UNUSED_P(line), const char *msg)
{
/* Always print the error message so it isn't lost. In this case,
we have a failure, so there's no reason to be quiet about what
diff --git a/tests/minicheck.h b/tests/minicheck.h
index c917c0269717..9b06f51a88a7 100755
--- a/tests/minicheck.h
+++ b/tests/minicheck.h
@@ -26,6 +26,11 @@ extern "C" {
#define __func__ __FUNCTION__
#endif
+/* ISO C90 does not support '__func__' predefined identifier */
+#if defined(__STDC_VERSION__) && (__STDC_VERSION__ < 199901)
+# define __func__ "(unknown)"
+#endif
+
#define START_TEST(testname) static void testname(void) { \
_check_set_test_info(__func__, __FILE__, __LINE__); \
{
@@ -48,12 +53,12 @@ struct SRunner {
};
struct Suite {
- char *name;
+ const char *name;
TCase *tests;
};
struct TCase {
- char *name;
+ const char *name;
tcase_setup_function setup;
tcase_teardown_function teardown;
tcase_test_function *tests;
@@ -72,9 +77,9 @@ void _check_set_test_info(char const *function,
* Prototypes for the actual implementation.
*/
-void _fail_unless(int condition, const char *file, int line, char *msg);
-Suite *suite_create(char *name);
-TCase *tcase_create(char *name);
+void _fail_unless(int condition, const char *file, int line, const char *msg);
+Suite *suite_create(const char *name);
+TCase *tcase_create(const char *name);
void suite_add_tcase(Suite *suite, TCase *tc);
void tcase_add_checked_fixture(TCase *,
tcase_setup_function,
diff --git a/tests/runtests.c b/tests/runtests.c
index 614d6b24873f..c0cdea997548 100644
--- a/tests/runtests.c
+++ b/tests/runtests.c
@@ -13,9 +13,14 @@
#include <stdio.h>
#include <string.h>
#include <stdint.h>
+#include <stddef.h> /* ptrdiff_t */
+#ifndef __cplusplus
+# include <stdbool.h>
+#endif
#include "expat.h"
#include "chardata.h"
+#include "internal.h" /* for UNUSED_P only */
#include "minicheck.h"
#if defined(__amigaos__) && defined(__USE_INLINE__)
@@ -66,13 +71,34 @@ _xml_failure(XML_Parser parser, const char *file, int line)
_fail_unless(0, file, line, buffer);
}
+static enum XML_Status
+_XML_Parse_SINGLE_BYTES(XML_Parser parser, const char *s, int len, int isFinal)
+{
+ enum XML_Status res = XML_STATUS_ERROR;
+ int offset = 0;
+
+ if (len == 0) {
+ return XML_Parse(parser, s, len, isFinal);
+ }
+
+ for (; offset < len; offset++) {
+ const int innerIsFinal = (offset == len - 1) && isFinal;
+ const char c = s[offset]; /* to help out-of-bounds detection */
+ res = XML_Parse(parser, &c, sizeof(char), innerIsFinal);
+ if (res != XML_STATUS_OK) {
+ return res;
+ }
+ }
+ return res;
+}
+
#define xml_failure(parser) _xml_failure((parser), __FILE__, __LINE__)
static void
-_expect_failure(char *text, enum XML_Error errorCode, char *errorMessage,
- char *file, int lineno)
+_expect_failure(const char *text, enum XML_Error errorCode, const char *errorMessage,
+ const char *file, int lineno)
{
- if (XML_Parse(parser, text, strlen(text), XML_TRUE) == XML_STATUS_OK)
+ if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), XML_TRUE) == XML_STATUS_OK)
/* Hackish use of _fail_unless() macro, but let's us report
the right filename and line number. */
_fail_unless(0, file, lineno, errorMessage);
@@ -89,63 +115,63 @@ _expect_failure(char *text, enum XML_Error errorCode, char *errorMessage,
*/
static void XMLCALL
-dummy_start_doctype_handler(void *userData,
- const XML_Char *doctypeName,
- const XML_Char *sysid,
- const XML_Char *pubid,
- int has_internal_subset)
+dummy_start_doctype_handler(void *UNUSED_P(userData),
+ const XML_Char *UNUSED_P(doctypeName),
+ const XML_Char *UNUSED_P(sysid),
+ const XML_Char *UNUSED_P(pubid),
+ int UNUSED_P(has_internal_subset))
{}
static void XMLCALL
-dummy_end_doctype_handler(void *userData)
+dummy_end_doctype_handler(void *UNUSED_P(userData))
{}
static void XMLCALL
-dummy_entity_decl_handler(void *userData,
- const XML_Char *entityName,
- int is_parameter_entity,
- const XML_Char *value,
- int value_length,
- const XML_Char *base,
- const XML_Char *systemId,
- const XML_Char *publicId,
- const XML_Char *notationName)
+dummy_entity_decl_handler(void *UNUSED_P(userData),
+ const XML_Char *UNUSED_P(entityName),
+ int UNUSED_P(is_parameter_entity),
+ const XML_Char *UNUSED_P(value),
+ int UNUSED_P(value_length),
+ const XML_Char *UNUSED_P(base),
+ const XML_Char *UNUSED_P(systemId),
+ const XML_Char *UNUSED_P(publicId),
+ const XML_Char *UNUSED_P(notationName))
{}
static void XMLCALL
-dummy_notation_decl_handler(void *userData,
- const XML_Char *notationName,
- const XML_Char *base,
- const XML_Char *systemId,
- const XML_Char *publicId)
+dummy_notation_decl_handler(void *UNUSED_P(userData),
+ const XML_Char *UNUSED_P(notationName),
+ const XML_Char *UNUSED_P(base),
+ const XML_Char *UNUSED_P(systemId),
+ const XML_Char *UNUSED_P(publicId))
{}
static void XMLCALL
-dummy_element_decl_handler(void *userData,
- const XML_Char *name,
- XML_Content *model)
+dummy_element_decl_handler(void *UNUSED_P(userData),
+ const XML_Char *UNUSED_P(name),
+ XML_Content *UNUSED_P(model))
{}
static void XMLCALL
-dummy_attlist_decl_handler(void *userData,
- const XML_Char *elname,
- const XML_Char *attname,
- const XML_Char *att_type,
- const XML_Char *dflt,
- int isrequired)
+dummy_attlist_decl_handler(void *UNUSED_P(userData),
+ const XML_Char *UNUSED_P(elname),
+ const XML_Char *UNUSED_P(attname),
+ const XML_Char *UNUSED_P(att_type),
+ const XML_Char *UNUSED_P(dflt),
+ int UNUSED_P(isrequired))
{}
static void XMLCALL
-dummy_comment_handler(void *userData, const XML_Char *data)
+dummy_comment_handler(void *UNUSED_P(userData), const XML_Char *UNUSED_P(data))
{}
static void XMLCALL
-dummy_pi_handler(void *userData, const XML_Char *target, const XML_Char *data)
+dummy_pi_handler(void *UNUSED_P(userData), const XML_Char *UNUSED_P(target), const XML_Char *UNUSED_P(data))
{}
static void XMLCALL
-dummy_start_element(void *userData,
- const XML_Char *name, const XML_Char **atts)
+dummy_start_element(void *UNUSED_P(userData),
+ const XML_Char *UNUSED_P(name), const XML_Char **UNUSED_P(atts))
{}
@@ -158,7 +184,7 @@ START_TEST(test_nul_byte)
char text[] = "<doc>\0</doc>";
/* test that a NUL byte (in US-ASCII data) is an error */
- if (XML_Parse(parser, text, sizeof(text) - 1, XML_TRUE) == XML_STATUS_OK)
+ if (_XML_Parse_SINGLE_BYTES(parser, text, sizeof(text) - 1, XML_TRUE) == XML_STATUS_OK)
fail("Parser did not report error on NUL-byte.");
if (XML_GetErrorCode(parser) != XML_ERROR_INVALID_TOKEN)
xml_failure(parser);
@@ -178,9 +204,9 @@ END_TEST
START_TEST(test_bom_utf8)
{
/* This test is really just making sure we don't core on a UTF-8 BOM. */
- char *text = "\357\273\277<e/>";
+ const char *text = "\357\273\277<e/>";
- if (XML_Parse(parser, text, strlen(text), XML_TRUE) == XML_STATUS_ERROR)
+ if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), XML_TRUE) == XML_STATUS_ERROR)
xml_failure(parser);
}
END_TEST
@@ -189,7 +215,7 @@ START_TEST(test_bom_utf16_be)
{
char text[] = "\376\377\0<\0e\0/\0>";
- if (XML_Parse(parser, text, sizeof(text)-1, XML_TRUE) == XML_STATUS_ERROR)
+ if (_XML_Parse_SINGLE_BYTES(parser, text, sizeof(text)-1, XML_TRUE) == XML_STATUS_ERROR)
xml_failure(parser);
}
END_TEST
@@ -198,7 +224,7 @@ START_TEST(test_bom_utf16_le)
{
char text[] = "\377\376<\0e\0/\0>\0";
- if (XML_Parse(parser, text, sizeof(text)-1, XML_TRUE) == XML_STATUS_ERROR)
+ if (_XML_Parse_SINGLE_BYTES(parser, text, sizeof(text)-1, XML_TRUE) == XML_STATUS_ERROR)
xml_failure(parser);
}
END_TEST
@@ -210,7 +236,7 @@ accumulate_characters(void *userData, const XML_Char *s, int len)
}
static void XMLCALL
-accumulate_attribute(void *userData, const XML_Char *name,
+accumulate_attribute(void *userData, const XML_Char *UNUSED_P(name),
const XML_Char **atts)
{
CharData *storage = (CharData *)userData;
@@ -222,7 +248,7 @@ accumulate_attribute(void *userData, const XML_Char *name,
static void
-_run_character_check(XML_Char *text, XML_Char *expected,
+_run_character_check(const XML_Char *text, const XML_Char *expected,
const char *file, int line)
{
CharData storage;
@@ -230,7 +256,7 @@ _run_character_check(XML_Char *text, XML_Char *expected,
CharData_Init(&storage);
XML_SetUserData(parser, &storage);
XML_SetCharacterDataHandler(parser, accumulate_characters);
- if (XML_Parse(parser, text, strlen(text), XML_TRUE) == XML_STATUS_ERROR)
+ if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), XML_TRUE) == XML_STATUS_ERROR)
_xml_failure(parser, file, line);
CharData_CheckXMLChars(&storage, expected);
}
@@ -239,7 +265,7 @@ _run_character_check(XML_Char *text, XML_Char *expected,
_run_character_check(text, expected, __FILE__, __LINE__)
static void
-_run_attribute_check(XML_Char *text, XML_Char *expected,
+_run_attribute_check(const XML_Char *text, const XML_Char *expected,
const char *file, int line)
{
CharData storage;
@@ -247,7 +273,7 @@ _run_attribute_check(XML_Char *text, XML_Char *expected,
CharData_Init(&storage);
XML_SetUserData(parser, &storage);
XML_SetStartElementHandler(parser, accumulate_attribute);
- if (XML_Parse(parser, text, strlen(text), XML_TRUE) == XML_STATUS_ERROR)
+ if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), XML_TRUE) == XML_STATUS_ERROR)
_xml_failure(parser, file, line);
CharData_CheckXMLChars(&storage, expected);
}
@@ -258,7 +284,7 @@ _run_attribute_check(XML_Char *text, XML_Char *expected,
/* Regression test for SF bug #491986. */
START_TEST(test_danish_latin1)
{
- char *text =
+ const char *text =
"<?xml version='1.0' encoding='iso-8859-1'?>\n"
"<e>J\xF8rgen \xE6\xF8\xE5\xC6\xD8\xC5</e>";
run_character_check(text,
@@ -270,7 +296,7 @@ END_TEST
/* Regression test for SF bug #514281. */
START_TEST(test_french_charref_hexidecimal)
{
- char *text =
+ const char *text =
"<?xml version='1.0' encoding='iso-8859-1'?>\n"
"<doc>&#xE9;&#xE8;&#xE0;&#xE7;&#xEA;&#xC8;</doc>";
run_character_check(text,
@@ -280,7 +306,7 @@ END_TEST
START_TEST(test_french_charref_decimal)
{
- char *text =
+ const char *text =
"<?xml version='1.0' encoding='iso-8859-1'?>\n"
"<doc>&#233;&#232;&#224;&#231;&#234;&#200;</doc>";
run_character_check(text,
@@ -290,7 +316,7 @@ END_TEST
START_TEST(test_french_latin1)
{
- char *text =
+ const char *text =
"<?xml version='1.0' encoding='iso-8859-1'?>\n"
"<doc>\xE9\xE8\xE0\xE7\xEa\xC8</doc>";
run_character_check(text,
@@ -300,7 +326,7 @@ END_TEST
START_TEST(test_french_utf8)
{
- char *text =
+ const char *text =
"<?xml version='1.0' encoding='utf-8'?>\n"
"<doc>\xC3\xA9</doc>";
run_character_check(text, "\xC3\xA9");
@@ -314,7 +340,7 @@ END_TEST
*/
START_TEST(test_utf8_false_rejection)
{
- char *text = "<doc>\xEF\xBA\xBF</doc>";
+ const char *text = "<doc>\xEF\xBA\xBF</doc>";
run_character_check(text, "\xEF\xBA\xBF");
}
END_TEST
@@ -331,7 +357,7 @@ START_TEST(test_illegal_utf8)
for (i = 128; i <= 255; ++i) {
sprintf(text, "<e>%ccd</e>", i);
- if (XML_Parse(parser, text, strlen(text), XML_TRUE) == XML_STATUS_OK) {
+ if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), XML_TRUE) == XML_STATUS_OK) {
sprintf(text,
"expected token error for '%c' (ordinal %d) in UTF-8 text",
i, i);
@@ -345,6 +371,68 @@ START_TEST(test_illegal_utf8)
}
END_TEST
+
+/* Examples, not masks: */
+#define UTF8_LEAD_1 "\x7f" /* 0b01111111 */
+#define UTF8_LEAD_2 "\xdf" /* 0b11011111 */
+#define UTF8_LEAD_3 "\xef" /* 0b11101111 */
+#define UTF8_LEAD_4 "\xf7" /* 0b11110111 */
+#define UTF8_FOLLOW "\xbf" /* 0b10111111 */
+
+START_TEST(test_utf8_auto_align)
+{
+ struct TestCase {
+ ptrdiff_t expectedMovementInChars;
+ const char * input;
+ };
+
+ struct TestCase cases[] = {
+ {00, ""},
+
+ {00, UTF8_LEAD_1},
+
+ {-1, UTF8_LEAD_2},
+ {00, UTF8_LEAD_2 UTF8_FOLLOW},
+
+ {-1, UTF8_LEAD_3},
+ {-2, UTF8_LEAD_3 UTF8_FOLLOW},
+ {00, UTF8_LEAD_3 UTF8_FOLLOW UTF8_FOLLOW},
+
+ {-1, UTF8_LEAD_4},
+ {-2, UTF8_LEAD_4 UTF8_FOLLOW},
+ {-3, UTF8_LEAD_4 UTF8_FOLLOW UTF8_FOLLOW},
+ {00, UTF8_LEAD_4 UTF8_FOLLOW UTF8_FOLLOW UTF8_FOLLOW},
+ };
+
+ size_t i = 0;
+ bool success = true;
+ for (; i < sizeof(cases) / sizeof(*cases); i++) {
+ const char * fromLim = cases[i].input + strlen(cases[i].input);
+ const char * const fromLimInitially = fromLim;
+ ptrdiff_t actualMovementInChars;
+
+ align_limit_to_full_utf8_characters(cases[i].input, &fromLim);
+
+ actualMovementInChars = (fromLim - fromLimInitially);
+ if (actualMovementInChars != cases[i].expectedMovementInChars) {
+ size_t j = 0;
+ success = false;
+ printf("[-] UTF-8 case %2lu: Expected movement by %2ld chars"
+ ", actually moved by %2ld chars: \"",
+ i + 1, cases[i].expectedMovementInChars, actualMovementInChars);
+ for (; j < strlen(cases[i].input); j++) {
+ printf("\\x%02x", (unsigned char)cases[i].input[j]);
+ }
+ printf("\"\n");
+ }
+ }
+
+ if (! success) {
+ fail("UTF-8 auto-alignment is not bullet-proof\n");
+ }
+}
+END_TEST
+
START_TEST(test_utf16)
{
/* <?xml version="1.0" encoding="UTF-16"?>
@@ -358,7 +446,7 @@ START_TEST(test_utf16)
"\000<\000d\000o\000c\000 \000a\000=\000'\0001\0002\0003\000'"
"\000>\000s\000o\000m\000e\000 \000t\000e\000x\000t\000<\000/"
"\000d\000o\000c\000>";
- if (XML_Parse(parser, text, sizeof(text)-1, XML_TRUE) == XML_STATUS_ERROR)
+ if (_XML_Parse_SINGLE_BYTES(parser, text, sizeof(text)-1, XML_TRUE) == XML_STATUS_ERROR)
xml_failure(parser);
}
END_TEST
@@ -373,12 +461,12 @@ START_TEST(test_utf16_le_epilog_newline)
if (first_chunk_bytes >= sizeof(text) - 1)
fail("bad value of first_chunk_bytes");
- if ( XML_Parse(parser, text, first_chunk_bytes, XML_FALSE)
+ if ( _XML_Parse_SINGLE_BYTES(parser, text, first_chunk_bytes, XML_FALSE)
== XML_STATUS_ERROR)
xml_failure(parser);
else {
enum XML_Status rc;
- rc = XML_Parse(parser, text + first_chunk_bytes,
+ rc = _XML_Parse_SINGLE_BYTES(parser, text + first_chunk_bytes,
sizeof(text) - first_chunk_bytes - 1, XML_TRUE);
if (rc == XML_STATUS_ERROR)
xml_failure(parser);
@@ -389,11 +477,11 @@ END_TEST
/* Regression test for SF bug #481609, #774028. */
START_TEST(test_latin1_umlauts)
{
- char *text =
+ const char *text =
"<?xml version='1.0' encoding='iso-8859-1'?>\n"
"<e a='\xE4 \xF6 \xFC &#228; &#246; &#252; &#x00E4; &#x0F6; &#xFC; >'\n"
" >\xE4 \xF6 \xFC &#228; &#246; &#252; &#x00E4; &#x0F6; &#xFC; ></e>";
- char *utf8 =
+ const char *utf8 =
"\xC3\xA4 \xC3\xB6 \xC3\xBC "
"\xC3\xA4 \xC3\xB6 \xC3\xBC "
"\xC3\xA4 \xC3\xB6 \xC3\xBC >";
@@ -406,13 +494,13 @@ END_TEST
/* Regression test #1 for SF bug #653180. */
START_TEST(test_line_number_after_parse)
{
- char *text =
+ const char *text =
"<tag>\n"
"\n"
"\n</tag>";
XML_Size lineno;
- if (XML_Parse(parser, text, strlen(text), XML_FALSE) == XML_STATUS_ERROR)
+ if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), XML_FALSE) == XML_STATUS_ERROR)
xml_failure(parser);
lineno = XML_GetCurrentLineNumber(parser);
if (lineno != 4) {
@@ -427,10 +515,10 @@ END_TEST
/* Regression test #2 for SF bug #653180. */
START_TEST(test_column_number_after_parse)
{
- char *text = "<tag></tag>";
+ const char *text = "<tag></tag>";
XML_Size colno;
- if (XML_Parse(parser, text, strlen(text), XML_FALSE) == XML_STATUS_ERROR)
+ if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), XML_FALSE) == XML_STATUS_ERROR)
xml_failure(parser);
colno = XML_GetCurrentColumnNumber(parser);
if (colno != 11) {
@@ -444,7 +532,7 @@ END_TEST
static void XMLCALL
start_element_event_handler2(void *userData, const XML_Char *name,
- const XML_Char **attr)
+ const XML_Char **UNUSED_P(attr))
{
CharData *storage = (CharData *) userData;
char buffer[100];
@@ -474,7 +562,7 @@ end_element_event_handler2(void *userData, const XML_Char *name)
/* Regression test #3 for SF bug #653180. */
START_TEST(test_line_and_column_numbers_inside_handlers)
{
- char *text =
+ const char *text =
"<a>\n" /* Unix end-of-line */
" <b>\r\n" /* Windows end-of-line */
" <c/>\r" /* Mac OS end-of-line */
@@ -483,7 +571,7 @@ START_TEST(test_line_and_column_numbers_inside_handlers)
" <f/>\n"
" </d>\n"
"</a>";
- char *expected =
+ const char *expected =
"<a> at col:0 line:1\n"
"<b> at col:2 line:2\n"
"<c> at col:4 line:3\n"
@@ -500,7 +588,7 @@ START_TEST(test_line_and_column_numbers_inside_handlers)
XML_SetUserData(parser, &storage);
XML_SetStartElementHandler(parser, start_element_event_handler2);
XML_SetEndElementHandler(parser, end_element_event_handler2);
- if (XML_Parse(parser, text, strlen(text), XML_TRUE) == XML_STATUS_ERROR)
+ if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), XML_TRUE) == XML_STATUS_ERROR)
xml_failure(parser);
CharData_CheckString(&storage, expected);
@@ -510,12 +598,12 @@ END_TEST
/* Regression test #4 for SF bug #653180. */
START_TEST(test_line_number_after_error)
{
- char *text =
+ const char *text =
"<a>\n"
" <b>\n"
" </a>"; /* missing </b> */
XML_Size lineno;
- if (XML_Parse(parser, text, strlen(text), XML_FALSE) != XML_STATUS_ERROR)
+ if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), XML_FALSE) != XML_STATUS_ERROR)
fail("Expected a parse error");
lineno = XML_GetCurrentLineNumber(parser);
@@ -530,12 +618,12 @@ END_TEST
/* Regression test #5 for SF bug #653180. */
START_TEST(test_column_number_after_error)
{
- char *text =
+ const char *text =
"<a>\n"
" <b>\n"
" </a>"; /* missing </b> */
XML_Size colno;
- if (XML_Parse(parser, text, strlen(text), XML_FALSE) != XML_STATUS_ERROR)
+ if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), XML_FALSE) != XML_STATUS_ERROR)
fail("Expected a parse error");
colno = XML_GetCurrentColumnNumber(parser);
@@ -556,7 +644,7 @@ START_TEST(test_really_long_lines)
really cheesy approach to building the input buffer, because
this avoids writing bugs in buffer-filling code.
*/
- char *text =
+ const char *text =
"<e>"
/* 64 chars */
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
@@ -578,7 +666,7 @@ START_TEST(test_really_long_lines)
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
"</e>";
- if (XML_Parse(parser, text, strlen(text), XML_TRUE) == XML_STATUS_ERROR)
+ if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), XML_TRUE) == XML_STATUS_ERROR)
xml_failure(parser);
}
END_TEST
@@ -598,14 +686,14 @@ end_element_event_handler(void *userData, const XML_Char *name)
START_TEST(test_end_element_events)
{
- char *text = "<a><b><c/></b><d><f/></d></a>";
- char *expected = "/c/b/f/d/a";
+ const char *text = "<a><b><c/></b><d><f/></d></a>";
+ const char *expected = "/c/b/f/d/a";
CharData storage;
CharData_Init(&storage);
XML_SetUserData(parser, &storage);
XML_SetEndElementHandler(parser, end_element_event_handler);
- if (XML_Parse(parser, text, strlen(text), XML_TRUE) == XML_STATUS_ERROR)
+ if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), XML_TRUE) == XML_STATUS_ERROR)
xml_failure(parser);
CharData_CheckString(&storage, expected);
}
@@ -679,8 +767,8 @@ testhelper_is_whitespace_normalized(void)
}
static void XMLCALL
-check_attr_contains_normalized_whitespace(void *userData,
- const XML_Char *name,
+check_attr_contains_normalized_whitespace(void *UNUSED_P(userData),
+ const XML_Char *UNUSED_P(name),
const XML_Char **atts)
{
int i;
@@ -702,7 +790,7 @@ check_attr_contains_normalized_whitespace(void *userData,
START_TEST(test_attr_whitespace_normalization)
{
- char *text =
+ const char *text =
"<!DOCTYPE doc [\n"
" <!ATTLIST doc\n"
" attr NMTOKENS #REQUIRED\n"
@@ -718,7 +806,7 @@ START_TEST(test_attr_whitespace_normalization)
XML_SetStartElementHandler(parser,
check_attr_contains_normalized_whitespace);
- if (XML_Parse(parser, text, strlen(text), XML_TRUE) == XML_STATUS_ERROR)
+ if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), XML_TRUE) == XML_STATUS_ERROR)
xml_failure(parser);
}
END_TEST
@@ -740,7 +828,7 @@ END_TEST
/* Regression test for SF bug #584832. */
static int XMLCALL
-UnknownEncodingHandler(void *data,const XML_Char *encoding,XML_Encoding *info)
+UnknownEncodingHandler(void *UNUSED_P(data),const XML_Char *encoding,XML_Encoding *info)
{
if (strcmp(encoding,"unsupported-encoding") == 0) {
int i;
@@ -756,13 +844,13 @@ UnknownEncodingHandler(void *data,const XML_Char *encoding,XML_Encoding *info)
START_TEST(test_unknown_encoding_internal_entity)
{
- char *text =
+ const char *text =
"<?xml version='1.0' encoding='unsupported-encoding'?>\n"
"<!DOCTYPE test [<!ENTITY foo 'bar'>]>\n"
"<test a='&foo;'/>";
XML_SetUnknownEncodingHandler(parser, UnknownEncodingHandler, NULL);
- if (XML_Parse(parser, text, strlen(text), XML_TRUE) == XML_STATUS_ERROR)
+ if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), XML_TRUE) == XML_STATUS_ERROR)
xml_failure(parser);
}
END_TEST
@@ -771,14 +859,14 @@ END_TEST
static int XMLCALL
external_entity_loader_set_encoding(XML_Parser parser,
const XML_Char *context,
- const XML_Char *base,
- const XML_Char *systemId,
- const XML_Char *publicId)
+ const XML_Char *UNUSED_P(base),
+ const XML_Char *UNUSED_P(systemId),
+ const XML_Char *UNUSED_P(publicId))
{
/* This text says it's an unsupported encoding, but it's really
UTF-8, which we tell Expat using XML_SetEncoding().
*/
- char *text =
+ const char *text =
"<?xml encoding='iso-8859-3'?>"
"\xC3\xA9";
XML_Parser extparser;
@@ -788,7 +876,7 @@ external_entity_loader_set_encoding(XML_Parser parser,
fail("Could not create external entity parser.");
if (!XML_SetEncoding(extparser, "utf-8"))
fail("XML_SetEncoding() ignored for external entity");
- if ( XML_Parse(extparser, text, strlen(text), XML_TRUE)
+ if ( _XML_Parse_SINGLE_BYTES(extparser, text, strlen(text), XML_TRUE)
== XML_STATUS_ERROR) {
xml_failure(parser);
return 0;
@@ -798,7 +886,7 @@ external_entity_loader_set_encoding(XML_Parser parser,
START_TEST(test_ext_entity_set_encoding)
{
- char *text =
+ const char *text =
"<!DOCTYPE doc [\n"
" <!ENTITY en SYSTEM 'http://xml.libexpat.org/dummy.ent'>\n"
"]>\n"
@@ -814,11 +902,11 @@ END_TEST
read an external subset. This was fixed in Expat 1.95.5.
*/
START_TEST(test_wfc_undeclared_entity_unread_external_subset) {
- char *text =
+ const char *text =
"<!DOCTYPE doc SYSTEM 'foo'>\n"
"<doc>&entity;</doc>";
- if (XML_Parse(parser, text, strlen(text), XML_TRUE) == XML_STATUS_ERROR)
+ if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), XML_TRUE) == XML_STATUS_ERROR)
xml_failure(parser);
}
END_TEST
@@ -837,7 +925,7 @@ END_TEST
read an external subset, but have been declared standalone.
*/
START_TEST(test_wfc_undeclared_entity_standalone) {
- char *text =
+ const char *text =
"<?xml version='1.0' encoding='us-ascii' standalone='yes'?>\n"
"<!DOCTYPE doc SYSTEM 'foo'>\n"
"<doc>&entity;</doc>";
@@ -851,9 +939,9 @@ END_TEST
static int XMLCALL
external_entity_loader(XML_Parser parser,
const XML_Char *context,
- const XML_Char *base,
- const XML_Char *systemId,
- const XML_Char *publicId)
+ const XML_Char *UNUSED_P(base),
+ const XML_Char *UNUSED_P(systemId),
+ const XML_Char *UNUSED_P(publicId))
{
char *text = (char *)XML_GetUserData(parser);
XML_Parser extparser;
@@ -861,7 +949,7 @@ external_entity_loader(XML_Parser parser,
extparser = XML_ExternalEntityParserCreate(parser, context, NULL);
if (extparser == NULL)
fail("Could not create external entity parser.");
- if ( XML_Parse(extparser, text, strlen(text), XML_TRUE)
+ if ( _XML_Parse_SINGLE_BYTES(extparser, text, strlen(text), XML_TRUE)
== XML_STATUS_ERROR) {
xml_failure(parser);
return XML_STATUS_ERROR;
@@ -873,11 +961,11 @@ external_entity_loader(XML_Parser parser,
an external subset, and standalone is true.
*/
START_TEST(test_wfc_undeclared_entity_with_external_subset_standalone) {
- char *text =
+ const char *text =
"<?xml version='1.0' encoding='us-ascii' standalone='yes'?>\n"
"<!DOCTYPE doc SYSTEM 'foo'>\n"
"<doc>&entity;</doc>";
- char *foo_text =
+ char foo_text[] =
"<!ELEMENT doc (#PCDATA)*>";
XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
@@ -893,24 +981,24 @@ END_TEST
an external subset, and standalone is false.
*/
START_TEST(test_wfc_undeclared_entity_with_external_subset) {
- char *text =
+ const char *text =
"<?xml version='1.0' encoding='us-ascii'?>\n"
"<!DOCTYPE doc SYSTEM 'foo'>\n"
"<doc>&entity;</doc>";
- char *foo_text =
+ char foo_text[] =
"<!ELEMENT doc (#PCDATA)*>";
XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
XML_SetUserData(parser, foo_text);
XML_SetExternalEntityRefHandler(parser, external_entity_loader);
- if (XML_Parse(parser, text, strlen(text), XML_TRUE) == XML_STATUS_ERROR)
+ if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), XML_TRUE) == XML_STATUS_ERROR)
xml_failure(parser);
}
END_TEST
START_TEST(test_wfc_no_recursive_entity_refs)
{
- char *text =
+ const char *text =
"<!DOCTYPE doc [\n"
" <!ENTITY entity '&#38;entity;'>\n"
"]>\n"
@@ -925,7 +1013,7 @@ END_TEST
/* Regression test for SF bug #483514. */
START_TEST(test_dtd_default_handling)
{
- char *text =
+ const char *text =
"<!DOCTYPE doc [\n"
"<!ENTITY e SYSTEM 'http://xml.libexpat.org/e'>\n"
"<!NOTATION n SYSTEM 'http://xml.libexpat.org/n'>\n"
@@ -957,12 +1045,12 @@ END_TEST
*/
START_TEST(test_empty_ns_without_namespaces)
{
- char *text =
+ const char *text =
"<doc xmlns:prefix='http://www.example.com/'>\n"
" <e xmlns:prefix=''/>\n"
"</doc>";
- if (XML_Parse(parser, text, strlen(text), XML_TRUE) == XML_STATUS_ERROR)
+ if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), XML_TRUE) == XML_STATUS_ERROR)
xml_failure(parser);
}
END_TEST
@@ -973,19 +1061,19 @@ END_TEST
*/
START_TEST(test_ns_in_attribute_default_without_namespaces)
{
- char *text =
+ const char *text =
"<!DOCTYPE e:element [\n"
" <!ATTLIST e:element\n"
" xmlns:e CDATA 'http://example.com/'>\n"
" ]>\n"
"<e:element/>";
- if (XML_Parse(parser, text, strlen(text), XML_TRUE) == XML_STATUS_ERROR)
+ if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), XML_TRUE) == XML_STATUS_ERROR)
xml_failure(parser);
}
END_TEST
-static char *long_character_data_text =
+static const char *long_character_data_text =
"<?xml version='1.0' encoding='iso-8859-1'?><s>"
"012345678901234567890123456789012345678901234567890123456789"
"012345678901234567890123456789012345678901234567890123456789"
@@ -1012,8 +1100,8 @@ static char *long_character_data_text =
static XML_Bool resumable = XML_FALSE;
static void
-clearing_aborting_character_handler(void *userData,
- const XML_Char *s, int len)
+clearing_aborting_character_handler(void *UNUSED_P(userData),
+ const XML_Char *UNUSED_P(s), int UNUSED_P(len))
{
XML_StopParser(parser, resumable);
XML_SetCharacterDataHandler(parser, NULL);
@@ -1029,11 +1117,11 @@ START_TEST(test_stop_parser_between_char_data_calls)
handler must stop the parser and clear the character data
handler.
*/
- char *text = long_character_data_text;
+ const char *text = long_character_data_text;
XML_SetCharacterDataHandler(parser, clearing_aborting_character_handler);
resumable = XML_FALSE;
- if (XML_Parse(parser, text, strlen(text), XML_TRUE) != XML_STATUS_ERROR)
+ if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), XML_TRUE) != XML_STATUS_ERROR)
xml_failure(parser);
if (XML_GetErrorCode(parser) != XML_ERROR_ABORTED)
xml_failure(parser);
@@ -1050,17 +1138,112 @@ START_TEST(test_suspend_parser_between_char_data_calls)
handler must stop the parser and clear the character data
handler.
*/
- char *text = long_character_data_text;
+ const char *text = long_character_data_text;
XML_SetCharacterDataHandler(parser, clearing_aborting_character_handler);
resumable = XML_TRUE;
- if (XML_Parse(parser, text, strlen(text), XML_TRUE) != XML_STATUS_SUSPENDED)
+ if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), XML_TRUE) != XML_STATUS_SUSPENDED)
xml_failure(parser);
if (XML_GetErrorCode(parser) != XML_ERROR_NONE)
xml_failure(parser);
}
END_TEST
+START_TEST(test_good_cdata_ascii)
+{
+ const char *text = "<a><![CDATA[<greeting>Hello, world!</greeting>]]></a>";
+ const char *expected = "<greeting>Hello, world!</greeting>";
+
+ CharData storage;
+ CharData_Init(&storage);
+ XML_SetUserData(parser, &storage);
+ XML_SetCharacterDataHandler(parser, accumulate_characters);
+
+ if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), XML_TRUE) == XML_STATUS_ERROR)
+ xml_failure(parser);
+ CharData_CheckXMLChars(&storage, expected);
+}
+END_TEST
+
+START_TEST(test_good_cdata_utf16)
+{
+ /* Test data is:
+ * <?xml version='1.0' encoding='utf-16'?>
+ * <a><![CDATA[hello]]></a>
+ */
+ const char text[] =
+ "\0<\0?\0x\0m\0l\0"
+ " \0v\0e\0r\0s\0i\0o\0n\0=\0'\0\x31\0.\0\x30\0'\0"
+ " \0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\0""1\0""6\0'"
+ "\0?\0>\0\n"
+ "\0<\0a\0>\0<\0!\0[\0C\0D\0A\0T\0A\0[\0h\0e\0l\0l\0o\0]\0]\0>\0<\0/\0a\0>";
+ const char *expected = "hello";
+
+ CharData storage;
+ CharData_Init(&storage);
+ XML_SetUserData(parser, &storage);
+ XML_SetCharacterDataHandler(parser, accumulate_characters);
+
+ if (_XML_Parse_SINGLE_BYTES(parser, text, sizeof(text) - 1, XML_TRUE) == XML_STATUS_ERROR)
+ xml_failure(parser);
+ CharData_CheckXMLChars(&storage, expected);
+}
+END_TEST
+
+START_TEST(test_bad_cdata)
+{
+ struct CaseData {
+ const char *text;
+ enum XML_Error expectedError;
+ };
+
+ struct CaseData cases[] = {
+ {"<a><", XML_ERROR_UNCLOSED_TOKEN},
+ {"<a><!", XML_ERROR_UNCLOSED_TOKEN},
+ {"<a><![", XML_ERROR_UNCLOSED_TOKEN},
+ {"<a><![C", XML_ERROR_UNCLOSED_TOKEN},
+ {"<a><![CD", XML_ERROR_UNCLOSED_TOKEN},
+ {"<a><![CDA", XML_ERROR_UNCLOSED_TOKEN},
+ {"<a><![CDAT", XML_ERROR_UNCLOSED_TOKEN},
+ {"<a><![CDATA", XML_ERROR_UNCLOSED_TOKEN},
+
+ {"<a><![CDATA[", XML_ERROR_UNCLOSED_CDATA_SECTION},
+ {"<a><![CDATA[]", XML_ERROR_UNCLOSED_CDATA_SECTION},
+ {"<a><![CDATA[]]", XML_ERROR_UNCLOSED_CDATA_SECTION},
+
+ {"<a><!<a/>", XML_ERROR_INVALID_TOKEN},
+ {"<a><![<a/>", XML_ERROR_UNCLOSED_TOKEN}, /* ?! */
+ {"<a><![C<a/>", XML_ERROR_UNCLOSED_TOKEN}, /* ?! */
+ {"<a><![CD<a/>", XML_ERROR_INVALID_TOKEN},
+ {"<a><![CDA<a/>", XML_ERROR_INVALID_TOKEN},
+ {"<a><![CDAT<a/>", XML_ERROR_INVALID_TOKEN},
+ {"<a><![CDATA<a/>", XML_ERROR_INVALID_TOKEN},
+
+ {"<a><![CDATA[<a/>", XML_ERROR_UNCLOSED_CDATA_SECTION},
+ {"<a><![CDATA[]<a/>", XML_ERROR_UNCLOSED_CDATA_SECTION},
+ {"<a><![CDATA[]]<a/>", XML_ERROR_UNCLOSED_CDATA_SECTION}
+ };
+
+ size_t i = 0;
+ for (; i < sizeof(cases) / sizeof(struct CaseData); i++) {
+ const enum XML_Status actualStatus = _XML_Parse_SINGLE_BYTES(
+ parser, cases[i].text, strlen(cases[i].text), XML_TRUE);
+ const enum XML_Error actualError = XML_GetErrorCode(parser);
+
+ assert(actualStatus == XML_STATUS_ERROR);
+
+ if (actualError != cases[i].expectedError) {
+ char message[100];
+ sprintf(message, "Expected error %d but got error %d for case %u: \"%s\"\n",
+ cases[i].expectedError, actualError, (unsigned int)i + 1, cases[i].text);
+ fail(message);
+ }
+
+ XML_ParserReset(parser, NULL);
+ }
+}
+END_TEST
+
/*
* Namespaces tests.
@@ -1118,17 +1301,17 @@ triplet_end_checker(void *userData, const XML_Char *name)
START_TEST(test_return_ns_triplet)
{
- char *text =
+ const char *text =
"<foo:e xmlns:foo='http://expat.sf.net/' bar:a='12'\n"
" xmlns:bar='http://expat.sf.net/'></foo:e>";
- char *elemstr[] = {
+ const char *elemstr[] = {
"http://expat.sf.net/ e foo",
"http://expat.sf.net/ a bar"
};
XML_SetReturnNSTriplet(parser, XML_TRUE);
XML_SetUserData(parser, elemstr);
XML_SetElementHandler(parser, triplet_start_checker, triplet_end_checker);
- if (XML_Parse(parser, text, strlen(text), XML_TRUE) == XML_STATUS_ERROR)
+ if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), XML_TRUE) == XML_STATUS_ERROR)
xml_failure(parser);
}
END_TEST
@@ -1158,14 +1341,14 @@ overwrite_end_checker(void *userData, const XML_Char *name)
}
static void
-run_ns_tagname_overwrite_test(char *text, char *result)
+run_ns_tagname_overwrite_test(const char *text, const char *result)
{
CharData storage;
CharData_Init(&storage);
XML_SetUserData(parser, &storage);
XML_SetElementHandler(parser,
overwrite_start_checker, overwrite_end_checker);
- if (XML_Parse(parser, text, strlen(text), XML_TRUE) == XML_STATUS_ERROR)
+ if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), XML_TRUE) == XML_STATUS_ERROR)
xml_failure(parser);
CharData_CheckString(&storage, result);
}
@@ -1173,12 +1356,12 @@ run_ns_tagname_overwrite_test(char *text, char *result)
/* Regression test for SF bug #566334. */
START_TEST(test_ns_tagname_overwrite)
{
- char *text =
+ const char *text =
"<n:e xmlns:n='http://xml.libexpat.org/'>\n"
" <n:f n:attr='foo'/>\n"
" <n:g n:attr2='bar'/>\n"
"</n:e>";
- char *result =
+ const char *result =
"start http://xml.libexpat.org/ e\n"
"start http://xml.libexpat.org/ f\n"
"attribute http://xml.libexpat.org/ attr\n"
@@ -1194,12 +1377,12 @@ END_TEST
/* Regression test for SF bug #566334. */
START_TEST(test_ns_tagname_overwrite_triplet)
{
- char *text =
+ const char *text =
"<n:e xmlns:n='http://xml.libexpat.org/'>\n"
" <n:f n:attr='foo'/>\n"
" <n:g n:attr2='bar'/>\n"
"</n:e>";
- char *result =
+ const char *result =
"start http://xml.libexpat.org/ e n\n"
"start http://xml.libexpat.org/ f n\n"
"attribute http://xml.libexpat.org/ attr n\n"
@@ -1216,8 +1399,8 @@ END_TEST
/* Regression test for SF bug #620343. */
static void XMLCALL
-start_element_fail(void *userData,
- const XML_Char *name, const XML_Char **atts)
+start_element_fail(void *UNUSED_P(userData),
+ const XML_Char *UNUSED_P(name), const XML_Char **UNUSED_P(atts))
{
/* We should never get here. */
fail("should never reach start_element_fail()");
@@ -1225,8 +1408,8 @@ start_element_fail(void *userData,
static void XMLCALL
start_ns_clearing_start_element(void *userData,
- const XML_Char *prefix,
- const XML_Char *uri)
+ const XML_Char *UNUSED_P(prefix),
+ const XML_Char *UNUSED_P(uri))
{
XML_SetStartElementHandler((XML_Parser) userData, NULL);
}
@@ -1237,12 +1420,12 @@ START_TEST(test_start_ns_clears_start_element)
syntax doesn't cause the problematic path through Expat to be
taken.
*/
- char *text = "<e xmlns='http://xml.libexpat.org/'></e>";
+ const char *text = "<e xmlns='http://xml.libexpat.org/'></e>";
XML_SetStartElementHandler(parser, start_element_fail);
XML_SetStartNamespaceDeclHandler(parser, start_ns_clearing_start_element);
XML_UseParserAsHandlerArg(parser);
- if (XML_Parse(parser, text, strlen(text), XML_TRUE) == XML_STATUS_ERROR)
+ if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), XML_TRUE) == XML_STATUS_ERROR)
xml_failure(parser);
}
END_TEST
@@ -1251,12 +1434,12 @@ END_TEST
static int XMLCALL
external_entity_handler(XML_Parser parser,
const XML_Char *context,
- const XML_Char *base,
- const XML_Char *systemId,
- const XML_Char *publicId)
+ const XML_Char *UNUSED_P(base),
+ const XML_Char *UNUSED_P(systemId),
+ const XML_Char *UNUSED_P(publicId))
{
intptr_t callno = 1 + (intptr_t)XML_GetUserData(parser);
- char *text;
+ const char *text;
XML_Parser p2;
if (callno == 1)
@@ -1269,7 +1452,7 @@ external_entity_handler(XML_Parser parser,
XML_SetUserData(parser, (void *) callno);
p2 = XML_ExternalEntityParserCreate(parser, context, NULL);
- if (XML_Parse(p2, text, strlen(text), XML_TRUE) == XML_STATUS_ERROR) {
+ if (_XML_Parse_SINGLE_BYTES(p2, text, strlen(text), XML_TRUE) == XML_STATUS_ERROR) {
xml_failure(p2);
return 0;
}
@@ -1279,7 +1462,7 @@ external_entity_handler(XML_Parser parser,
START_TEST(test_default_ns_from_ext_subset_and_ext_ge)
{
- char *text =
+ const char *text =
"<?xml version='1.0'?>\n"
"<!DOCTYPE doc SYSTEM 'http://xml.libexpat.org/doc.dtd' [\n"
" <!ENTITY en SYSTEM 'http://xml.libexpat.org/entity.ent'>\n"
@@ -1293,7 +1476,7 @@ START_TEST(test_default_ns_from_ext_subset_and_ext_ge)
/* We actually need to set this handler to tickle this bug. */
XML_SetStartElementHandler(parser, dummy_start_element);
XML_SetUserData(parser, NULL);
- if (XML_Parse(parser, text, strlen(text), XML_TRUE) == XML_STATUS_ERROR)
+ if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), XML_TRUE) == XML_STATUS_ERROR)
xml_failure(parser);
}
END_TEST
@@ -1301,7 +1484,7 @@ END_TEST
/* Regression test #1 for SF bug #673791. */
START_TEST(test_ns_prefix_with_empty_uri_1)
{
- char *text =
+ const char *text =
"<doc xmlns:prefix='http://xml.libexpat.org/'>\n"
" <e xmlns:prefix=''/>\n"
"</doc>";
@@ -1316,7 +1499,7 @@ END_TEST
/* Regression test #2 for SF bug #673791. */
START_TEST(test_ns_prefix_with_empty_uri_2)
{
- char *text =
+ const char *text =
"<?xml version='1.0'?>\n"
"<docelem xmlns:pre=''/>";
@@ -1329,7 +1512,7 @@ END_TEST
/* Regression test #3 for SF bug #673791. */
START_TEST(test_ns_prefix_with_empty_uri_3)
{
- char *text =
+ const char *text =
"<!DOCTYPE doc [\n"
" <!ELEMENT doc EMPTY>\n"
" <!ATTLIST doc\n"
@@ -1346,7 +1529,7 @@ END_TEST
/* Regression test #4 for SF bug #673791. */
START_TEST(test_ns_prefix_with_empty_uri_4)
{
- char *text =
+ const char *text =
"<!DOCTYPE doc [\n"
" <!ELEMENT prefix:doc EMPTY>\n"
" <!ATTLIST prefix:doc\n"
@@ -1356,24 +1539,24 @@ START_TEST(test_ns_prefix_with_empty_uri_4)
/* Packaged info expected by the end element handler;
the weird structuring lets us re-use the triplet_end_checker()
function also used for another test. */
- char *elemstr[] = {
+ const char *elemstr[] = {
"http://xml.libexpat.org/ doc prefix"
};
XML_SetReturnNSTriplet(parser, XML_TRUE);
XML_SetUserData(parser, elemstr);
XML_SetEndElementHandler(parser, triplet_end_checker);
- if (XML_Parse(parser, text, strlen(text), XML_TRUE) == XML_STATUS_ERROR)
+ if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), XML_TRUE) == XML_STATUS_ERROR)
xml_failure(parser);
}
END_TEST
START_TEST(test_ns_default_with_empty_uri)
{
- char *text =
+ const char *text =
"<doc xmlns='http://xml.libexpat.org/'>\n"
" <e xmlns=''/>\n"
"</doc>";
- if (XML_Parse(parser, text, strlen(text), XML_TRUE) == XML_STATUS_ERROR)
+ if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), XML_TRUE) == XML_STATUS_ERROR)
xml_failure(parser);
}
END_TEST
@@ -1381,7 +1564,7 @@ END_TEST
/* Regression test for SF bug #692964: two prefixes for one namespace. */
START_TEST(test_ns_duplicate_attrs_diff_prefixes)
{
- char *text =
+ const char *text =
"<doc xmlns:a='http://xml.libexpat.org/a'\n"
" xmlns:b='http://xml.libexpat.org/a'\n"
" a:a='v' b:a='v' />";
@@ -1394,7 +1577,7 @@ END_TEST
/* Regression test for SF bug #695401: unbound prefix. */
START_TEST(test_ns_unbound_prefix_on_attribute)
{
- char *text = "<doc a:attr=''/>";
+ const char *text = "<doc a:attr=''/>";
expect_failure(text,
XML_ERROR_UNBOUND_PREFIX,
"did not report unbound prefix on attribute");
@@ -1404,7 +1587,7 @@ END_TEST
/* Regression test for SF bug #695401: unbound prefix. */
START_TEST(test_ns_unbound_prefix_on_element)
{
- char *text = "<a:doc/>";
+ const char *text = "<a:doc/>";
expect_failure(text,
XML_ERROR_UNBOUND_PREFIX,
"did not report unbound prefix on element");
@@ -1426,6 +1609,7 @@ make_suite(void)
tcase_add_test(tc_basic, test_bom_utf16_be);
tcase_add_test(tc_basic, test_bom_utf16_le);
tcase_add_test(tc_basic, test_illegal_utf8);
+ tcase_add_test(tc_basic, test_utf8_auto_align);
tcase_add_test(tc_basic, test_utf16);
tcase_add_test(tc_basic, test_utf16_le_epilog_newline);
tcase_add_test(tc_basic, test_latin1_umlauts);
@@ -1461,6 +1645,9 @@ make_suite(void)
tcase_add_test(tc_basic, test_ns_in_attribute_default_without_namespaces);
tcase_add_test(tc_basic, test_stop_parser_between_char_data_calls);
tcase_add_test(tc_basic, test_suspend_parser_between_char_data_calls);
+ tcase_add_test(tc_basic, test_good_cdata_ascii);
+ tcase_add_test(tc_basic, test_good_cdata_utf16);
+ tcase_add_test(tc_basic, test_bad_cdata);
suite_add_tcase(s, tc_namespace);
tcase_add_checked_fixture(tc_namespace,
diff --git a/tests/xmltest.sh b/tests/xmltest.sh
index 793a5cc2819b..64a17eec06f5 100755
--- a/tests/xmltest.sh
+++ b/tests/xmltest.sh
@@ -1,4 +1,4 @@
-#! /bin/sh
+#! /usr/bin/env bash
# EXPAT TEST SCRIPT FOR W3C XML TEST SUITE
@@ -20,12 +20,14 @@
# produced by xmlwf conforms to an older definition of canonical XML
# and does not generate notation declarations.
+shopt -s nullglob
+
MYDIR="`dirname \"$0\"`"
cd "$MYDIR"
MYDIR="`pwd`"
XMLWF="`dirname \"$MYDIR\"`/xmlwf/xmlwf"
# XMLWF=/usr/local/bin/xmlwf
-TS="$MYDIR/XML-Test-Suite"
+TS="$MYDIR"
# OUTPUT must terminate with the directory separator.
OUTPUT="$TS/out/"
# OUTPUT=/home/tmp/xml-testsuite-out/
@@ -100,7 +102,7 @@ for xmldir in ibm/valid/P* \
RunXmlwfWF "$xmlfile" "$xmldir/"
UpdateStatus $?
done
- rm outfile
+ rm -f outfile
done
cd "$TS/xmlconf/oasis"
diff --git a/xmlwf/codepage.c b/xmlwf/codepage.c
index 57e48ff2d12e..91bd15e96f8b 100755
--- a/xmlwf/codepage.c
+++ b/xmlwf/codepage.c
@@ -3,6 +3,7 @@
*/
#include "codepage.h"
+#include "internal.h" /* for UNUSED_P only */
#if (defined(WIN32) || (defined(__WATCOMC__) && defined(__NT__)))
#define STRICT 1
@@ -54,13 +55,13 @@ codepageConvert(int cp, const char *p)
#else /* not WIN32 */
int
-codepageMap(int cp, int *map)
+codepageMap(int UNUSED_P(cp), int *UNUSED_P(map))
{
return 0;
}
int
-codepageConvert(int cp, const char *p)
+codepageConvert(int UNUSED_P(cp), const char *UNUSED_P(p))
{
return -1;
}
diff --git a/xmlwf/readfilemap.c b/xmlwf/readfilemap.c
index bd32b9341471..d816b263aae1 100755
--- a/xmlwf/readfilemap.c
+++ b/xmlwf/readfilemap.c
@@ -8,16 +8,17 @@
#include <stdlib.h>
#include <stdio.h>
+/* Functions close(2) and read(2) */
#ifdef __WATCOMC__
#ifndef __LINUX__
#include <io.h>
#else
#include <unistd.h>
#endif
-#endif
-
-#ifdef __BEOS__
-#include <unistd.h>
+#else
+# if !defined(WIN32) && !defined(_WIN32) && !defined(_WIN64)
+# include <unistd.h>
+# endif
#endif
#ifndef S_ISREG
diff --git a/xmlwf/unixfilemap.c b/xmlwf/unixfilemap.c
index 93adce32e826..e13299da05fc 100755
--- a/xmlwf/unixfilemap.c
+++ b/xmlwf/unixfilemap.c
@@ -51,7 +51,7 @@ filemap(const char *name,
close(fd);
return 1;
}
- p = (void *)mmap((caddr_t)0, (size_t)nbytes, PROT_READ,
+ p = (void *)mmap((void *)0, (size_t)nbytes, PROT_READ,
MAP_FILE|MAP_PRIVATE, fd, (off_t)0);
if (p == (void *)-1) {
perror(name);
@@ -59,7 +59,7 @@ filemap(const char *name,
return 0;
}
processor(p, nbytes, name, arg);
- munmap((caddr_t)p, nbytes);
+ munmap((void *)p, nbytes);
close(fd);
return 1;
}
diff --git a/xmlwf/xmlfile.c b/xmlwf/xmlfile.c
index 99eeeaaef28d..2f769aa894cf 100755
--- a/xmlwf/xmlfile.c
+++ b/xmlwf/xmlfile.c
@@ -8,7 +8,7 @@
#include <string.h>
#include <fcntl.h>
-#ifdef COMPILED_FROM_DSP
+#ifdef WIN32
#include "winconfig.h"
#elif defined(MACOS_CLASSIC)
#include "macconfig.h"
@@ -18,9 +18,10 @@
#include "watcomconfig.h"
#elif defined(HAVE_EXPAT_CONFIG_H)
#include <expat_config.h>
-#endif /* ndef COMPILED_FROM_DSP */
+#endif /* ndef WIN32 */
#include "expat.h"
+#include "internal.h" /* for UNUSED_P only */
#include "xmlfile.h"
#include "xmltchar.h"
#include "filemap.h"
@@ -132,7 +133,7 @@ externalEntityRefFilemap(XML_Parser parser,
const XML_Char *context,
const XML_Char *base,
const XML_Char *systemId,
- const XML_Char *publicId)
+ const XML_Char *UNUSED_P(publicId))
{
int result;
XML_Char *s;
@@ -200,7 +201,7 @@ externalEntityRefStream(XML_Parser parser,
const XML_Char *context,
const XML_Char *base,
const XML_Char *systemId,
- const XML_Char *publicId)
+ const XML_Char *UNUSED_P(publicId))
{
XML_Char *s;
const XML_Char *filename;
diff --git a/xmlwf/xmlwf.c b/xmlwf/xmlwf.c
index 4fc77da94419..66d6c9ef4b71 100755
--- a/xmlwf/xmlwf.c
+++ b/xmlwf/xmlwf.c
@@ -9,6 +9,7 @@
#include "expat.h"
#include "codepage.h"
+#include "internal.h" /* for UNUSED_P only */
#include "xmlfile.h"
#include "xmltchar.h"
@@ -248,49 +249,49 @@ processingInstruction(void *userData, const XML_Char *target,
#endif /* not W3C14N */
static void XMLCALL
-defaultCharacterData(void *userData, const XML_Char *s, int len)
+defaultCharacterData(void *userData, const XML_Char *UNUSED_P(s), int UNUSED_P(len))
{
XML_DefaultCurrent((XML_Parser) userData);
}
static void XMLCALL
-defaultStartElement(void *userData, const XML_Char *name,
- const XML_Char **atts)
+defaultStartElement(void *userData, const XML_Char *UNUSED_P(name),
+ const XML_Char **UNUSED_P(atts))
{
XML_DefaultCurrent((XML_Parser) userData);
}
static void XMLCALL
-defaultEndElement(void *userData, const XML_Char *name)
+defaultEndElement(void *userData, const XML_Char *UNUSED_P(name))
{
XML_DefaultCurrent((XML_Parser) userData);
}
static void XMLCALL
-defaultProcessingInstruction(void *userData, const XML_Char *target,
- const XML_Char *data)
+defaultProcessingInstruction(void *userData, const XML_Char *UNUSED_P(target),
+ const XML_Char *UNUSED_P(data))
{
XML_DefaultCurrent((XML_Parser) userData);
}
static void XMLCALL
-nopCharacterData(void *userData, const XML_Char *s, int len)
+nopCharacterData(void *UNUSED_P(userData), const XML_Char *UNUSED_P(s), int UNUSED_P(len))
{
}
static void XMLCALL
-nopStartElement(void *userData, const XML_Char *name, const XML_Char **atts)
+nopStartElement(void *UNUSED_P(userData), const XML_Char *UNUSED_P(name), const XML_Char **UNUSED_P(atts))
{
}
static void XMLCALL
-nopEndElement(void *userData, const XML_Char *name)
+nopEndElement(void *UNUSED_P(userData), const XML_Char *UNUSED_P(name))
{
}
static void XMLCALL
-nopProcessingInstruction(void *userData, const XML_Char *target,
- const XML_Char *data)
+nopProcessingInstruction(void *UNUSED_P(userData), const XML_Char *UNUSED_P(target),
+ const XML_Char *UNUSED_P(data))
{
}
@@ -434,9 +435,9 @@ metaCharacterData(void *userData, const XML_Char *s, int len)
static void XMLCALL
metaStartDoctypeDecl(void *userData,
const XML_Char *doctypeName,
- const XML_Char *sysid,
- const XML_Char *pubid,
- int has_internal_subset)
+ const XML_Char *UNUSED_P(sysid),
+ const XML_Char *UNUSED_P(pubid),
+ int UNUSED_P(has_internal_subset))
{
XML_Parser parser = (XML_Parser) userData;
FILE *fp = (FILE *)XML_GetUserData(parser);
@@ -458,7 +459,7 @@ metaEndDoctypeDecl(void *userData)
static void XMLCALL
metaNotationDecl(void *userData,
const XML_Char *notationName,
- const XML_Char *base,
+ const XML_Char *UNUSED_P(base),
const XML_Char *systemId,
const XML_Char *publicId)
{
@@ -480,10 +481,10 @@ metaNotationDecl(void *userData,
static void XMLCALL
metaEntityDecl(void *userData,
const XML_Char *entityName,
- int is_param,
+ int UNUSED_P(is_param),
const XML_Char *value,
int value_length,
- const XML_Char *base,
+ const XML_Char *UNUSED_P(base),
const XML_Char *systemId,
const XML_Char *publicId,
const XML_Char *notationName)
@@ -558,7 +559,7 @@ unknownEncodingConvert(void *data, const char *p)
}
static int XMLCALL
-unknownEncoding(void *userData, const XML_Char *name, XML_Encoding *info)
+unknownEncoding(void *UNUSED_P(userData), const XML_Char *name, XML_Encoding *info)
{
int cp;
static const XML_Char prefixL[] = T("windows-");
@@ -594,7 +595,7 @@ unknownEncoding(void *userData, const XML_Char *name, XML_Encoding *info)
}
static int XMLCALL
-notStandalone(void *userData)
+notStandalone(void *UNUSED_P(userData))
{
return 0;
}
@@ -634,8 +635,7 @@ static void
usage(const XML_Char *prog, int rc)
{
ftprintf(stderr,
- T("usage: %s [-n] [-p] [-r] [-s] [-w] [-x] [-d output-dir] "
- "[-e encoding] file ...\n"), prog);
+ T("usage: %s [-s] [-n] [-p] [-x] [-e encoding] [-w] [-d output-dir] [-c] [-m] [-r] [-t] [file ...]\n"), prog);
exit(rc);
}
@@ -760,6 +760,12 @@ tmain(int argc, XML_Char **argv)
parser = XML_ParserCreateNS(encoding, NSSEP);
else
parser = XML_ParserCreate(encoding);
+
+ if (! parser) {
+ tperror("Could not instantiate parser");
+ exit(1);
+ }
+
if (requireStandalone)
XML_SetNotStandaloneHandler(parser, notStandalone);
XML_SetParamEntityParsing(parser, paramEntityParsing);