aboutsummaryrefslogtreecommitdiff
path: root/contrib/expat/xmlwf/xmlwf.c
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/expat/xmlwf/xmlwf.c')
-rw-r--r--contrib/expat/xmlwf/xmlwf.c159
1 files changed, 109 insertions, 50 deletions
diff --git a/contrib/expat/xmlwf/xmlwf.c b/contrib/expat/xmlwf/xmlwf.c
index b0cd212f78ae..7c0a8cd4d6a4 100644
--- a/contrib/expat/xmlwf/xmlwf.c
+++ b/contrib/expat/xmlwf/xmlwf.c
@@ -11,12 +11,14 @@
Copyright (c) 2001-2003 Fred L. Drake, Jr. <fdrake@users.sourceforge.net>
Copyright (c) 2004-2009 Karl Waclawek <karl@waclawek.net>
Copyright (c) 2005-2007 Steven Solie <steven@solie.ca>
- Copyright (c) 2016-2022 Sebastian Pipping <sebastian@pipping.org>
+ Copyright (c) 2016-2023 Sebastian Pipping <sebastian@pipping.org>
Copyright (c) 2017 Rhodri James <rhodri@wildebeest.org.uk>
Copyright (c) 2019 David Loffredo <loffredo@steptools.com>
Copyright (c) 2020 Joe Orton <jorton@redhat.com>
Copyright (c) 2020 Kleber TarcĂ­sio <klebertarcisio@yahoo.com.br>
Copyright (c) 2021 Tim Bray <tbray@textuality.com>
+ Copyright (c) 2022 Martin Ettl <ettl.martin78@googlemail.com>
+ Copyright (c) 2022 Sean McBride <sean@rogue-research.com>
Licensed under the MIT license:
Permission is hereby granted, free of charge, to any person obtaining
@@ -39,7 +41,7 @@
USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
-#include <expat_config.h>
+#include "expat_config.h"
#include <assert.h>
#include <stdio.h>
@@ -177,7 +179,7 @@ is equivalent to lexicographically comparing based on the character number. */
static int
attcmp(const void *att1, const void *att2) {
- return tcscmp(*(const XML_Char **)att1, *(const XML_Char **)att2);
+ return tcscmp(*(const XML_Char *const *)att1, *(const XML_Char *const *)att2);
}
static void XMLCALL
@@ -214,10 +216,10 @@ endElement(void *userData, const XML_Char *name) {
static int
nsattcmp(const void *p1, const void *p2) {
- const XML_Char *att1 = *(const XML_Char **)p1;
- const XML_Char *att2 = *(const XML_Char **)p2;
+ const XML_Char *att1 = *(const XML_Char *const *)p1;
+ const XML_Char *att2 = *(const XML_Char *const *)p2;
int sep1 = (tcsrchr(att1, NSSEP) != 0);
- int sep2 = (tcsrchr(att1, NSSEP) != 0);
+ int sep2 = (tcsrchr(att2, NSSEP) != 0);
if (sep1 != sep2)
return sep1 - sep2;
return tcscmp(att1, att2);
@@ -369,8 +371,8 @@ xcscmp(const XML_Char *xs, const XML_Char *xt) {
static int
notationCmp(const void *a, const void *b) {
- const NotationList *const n1 = *(NotationList **)a;
- const NotationList *const n2 = *(NotationList **)b;
+ const NotationList *const n1 = *(const NotationList *const *)a;
+ const NotationList *const n2 = *(const NotationList *const *)b;
return xcscmp(n1->notationName, n2->notationName);
}
@@ -870,6 +872,9 @@ showVersion(XML_Char *prog) {
}
}
+#if defined(__GNUC__)
+__attribute__((noreturn))
+#endif
static void
usage(const XML_Char *prog, int rc) {
ftprintf(
@@ -882,50 +887,54 @@ usage(const XML_Char *prog, int rc) {
/* clang-format off */
T("usage:\n")
T(" %s [OPTIONS] [FILE ...]\n")
- T(" %s -h\n")
- T(" %s -v\n")
+ T(" %s -h|--help\n")
+ T(" %s -v|--version\n")
T("\n")
T("xmlwf - Determines if an XML document is well-formed\n")
T("\n")
T("positional arguments:\n")
- T(" FILE file to process (default: STDIN)\n")
+ T(" FILE file to process (default: STDIN)\n")
T("\n")
T("input control arguments:\n")
- T(" -s print an error if the document is not [s]tandalone\n")
- T(" -n enable [n]amespace processing\n")
- T(" -p enable processing external DTDs and [p]arameter entities\n")
- T(" -x enable processing of e[x]ternal entities\n")
- T(" -e ENCODING override any in-document [e]ncoding declaration\n")
- T(" -w enable support for [W]indows code pages\n")
- T(" -r disable memory-mapping and use normal file [r]ead IO calls instead\n")
- T(" -k when processing multiple files, [k]eep processing after first file with error\n")
+ T(" -s print an error if the document is not [s]tandalone\n")
+ T(" -n enable [n]amespace processing\n")
+ T(" -p enable processing of external DTDs and [p]arameter entities\n")
+ T(" -x enable processing of e[x]ternal entities\n")
+ T(" -e ENCODING override any in-document [e]ncoding declaration\n")
+ T(" -w enable support for [W]indows code pages\n")
+ T(" -r disable memory-mapping and use [r]ead calls instead\n")
+ T(" -g BYTES buffer size to request per call pair to XML_[G]etBuffer and read (default: 8 KiB)\n")
+ T(" -k when processing multiple files, [k]eep processing after first file with error\n")
T("\n")
T("output control arguments:\n")
- T(" -d DIRECTORY output [d]estination directory\n")
- T(" -c write a [c]opy of input XML, not canonical XML\n")
- T(" -m write [m]eta XML, not canonical XML\n")
- T(" -t write no XML output for [t]iming of plain parsing\n")
- T(" -N enable adding doctype and [n]otation declarations\n")
+ T(" -d DIRECTORY output [d]estination directory\n")
+ T(" -c write a [c]opy of input XML, not canonical XML\n")
+ T(" -m write [m]eta XML, not canonical XML\n")
+ T(" -t write no XML output for [t]iming of plain parsing\n")
+ T(" -N enable adding doctype and [n]otation declarations\n")
T("\n")
T("billion laughs attack protection:\n")
T(" NOTE: If you ever need to increase these values for non-attack payload, please file a bug report.\n")
T("\n")
- T(" -a FACTOR set maximum tolerated [a]mplification factor (default: 100.0)\n")
- T(" -b BYTES set number of output [b]ytes needed to activate (default: 8 MiB)\n")
+ T(" -a FACTOR set maximum tolerated [a]mplification factor (default: 100.0)\n")
+ T(" -b BYTES set number of output [b]ytes needed to activate (default: 8 MiB)\n")
+ T("\n")
+ T("reparse deferral:\n")
+ T(" -q disable reparse deferral, and allow [q]uadratic parse runtime with large tokens\n")
T("\n")
T("info arguments:\n")
- T(" -h show this [h]elp message and exit\n")
- T(" -v show program's [v]ersion number and exit\n")
+ T(" -h, --help show this [h]elp message and exit\n")
+ T(" -v, --version show program's [v]ersion number and exit\n")
T("\n")
T("exit status:\n")
- T(" 0 the input files are well-formed and the output (if requested) was written successfully\n")
- T(" 1 could not allocate data structures, signals a serious problem with execution environment\n")
- T(" 2 one or more input files were not well-formed\n")
- T(" 3 could not create an output file\n")
- T(" 4 command-line argument error\n")
+ T(" 0 the input files are well-formed and the output (if requested) was written successfully\n")
+ T(" 1 could not allocate data structures, signals a serious problem with execution environment\n")
+ T(" 2 one or more input files were not well-formed\n")
+ T(" 3 could not create an output file\n")
+ T(" 4 command-line argument error\n")
T("\n")
T("xmlwf of libexpat is software libre, licensed under the MIT license.\n")
- T("Please report bugs at https://github.com/libexpat/libexpat/issues. Thank you!\n")
+ T("Please report bugs at https://github.com/libexpat/libexpat/issues -- thank you!\n")
, /* clang-format on */
prog, prog, prog);
exit(rc);
@@ -939,8 +948,10 @@ int wmain(int argc, XML_Char **argv);
#define XMLWF_SHIFT_ARG_INTO(constCharStarTarget, argc, argv, i, j) \
{ \
if (argv[i][j + 1] == T('\0')) { \
- if (++i == argc) \
+ if (++i == argc) { \
usage(argv[0], XMLWF_EXIT_USAGE_ERROR); \
+ /* usage called exit(..), never gets here */ \
+ } \
constCharStarTarget = argv[i]; \
} else { \
constCharStarTarget = argv[i] + j + 1; \
@@ -963,9 +974,11 @@ tmain(int argc, XML_Char **argv) {
int continueOnError = 0;
float attackMaximumAmplification = -1.0f; /* signaling "not set" */
- unsigned long long attackThresholdBytes;
+ unsigned long long attackThresholdBytes = 0;
XML_Bool attackThresholdGiven = XML_FALSE;
+ XML_Bool disableDeferral = XML_FALSE;
+
int exitCode = XMLWF_EXIT_SUCCESS;
enum XML_ParamEntityParsing paramEntityParsing
= XML_PARAM_ENTITY_PARSING_NEVER;
@@ -982,9 +995,17 @@ tmain(int argc, XML_Char **argv) {
if (j == 0) {
if (argv[i][0] != T('-'))
break;
- if (argv[i][1] == T('-') && argv[i][2] == T('\0')) {
- i++;
- break;
+ if (argv[i][1] == T('-')) {
+ if (argv[i][2] == T('\0')) {
+ i++;
+ break;
+ } else if (tcscmp(argv[i] + 2, T("help")) == 0) {
+ usage(argv[0], XMLWF_EXIT_SUCCESS);
+ // usage called exit(..), never gets here
+ } else if (tcscmp(argv[i] + 2, T("version")) == 0) {
+ showVersion(argv[0]);
+ return XMLWF_EXIT_SUCCESS;
+ }
}
j++;
}
@@ -1037,10 +1058,30 @@ tmain(int argc, XML_Char **argv) {
break;
case T('h'):
usage(argv[0], XMLWF_EXIT_SUCCESS);
- return 0;
+ // usage called exit(..), never gets here
case T('v'):
showVersion(argv[0]);
- return 0;
+ return XMLWF_EXIT_SUCCESS;
+ case T('g'): {
+ const XML_Char *valueText = NULL;
+ XMLWF_SHIFT_ARG_INTO(valueText, argc, argv, i, j);
+
+ errno = 0;
+ XML_Char *afterValueText = (XML_Char *)valueText;
+ const long long read_size_bytes_candidate
+ = tcstoull(valueText, &afterValueText, 10);
+ if ((errno != 0) || (afterValueText[0] != T('\0'))
+ || (read_size_bytes_candidate < 1)
+ || (read_size_bytes_candidate > (INT_MAX / 2 + 1))) {
+ // This prevents tperror(..) from reporting misleading "[..]: Success"
+ errno = ERANGE;
+ tperror(T("invalid buffer size") T(
+ " (needs an integer from 1 to INT_MAX/2+1 i.e. 1,073,741,824 on most platforms)"));
+ exit(XMLWF_EXIT_USAGE_ERROR);
+ }
+ g_read_size_bytes = (int)read_size_bytes_candidate;
+ break;
+ }
case T('k'):
continueOnError = 1;
j++;
@@ -1050,7 +1091,7 @@ tmain(int argc, XML_Char **argv) {
XMLWF_SHIFT_ARG_INTO(valueText, argc, argv, i, j);
errno = 0;
- XML_Char *afterValueText = (XML_Char *)valueText;
+ XML_Char *afterValueText = NULL;
attackMaximumAmplification = tcstof(valueText, &afterValueText);
if ((errno != 0) || (afterValueText[0] != T('\0'))
|| isnan(attackMaximumAmplification)
@@ -1061,9 +1102,10 @@ tmain(int argc, XML_Char **argv) {
" (needs a floating point number greater or equal than 1.0)"));
exit(XMLWF_EXIT_USAGE_ERROR);
}
-#ifndef XML_DTD
- ftprintf(stderr, T("Warning: Given amplification limit ignored") T(
- ", xmlwf has been compiled without DTD support.\n"));
+#if XML_GE == 0
+ ftprintf(stderr,
+ T("Warning: Given amplification limit ignored")
+ T(", xmlwf has been compiled without DTD/GE support.\n"));
#endif
break;
}
@@ -1082,12 +1124,18 @@ tmain(int argc, XML_Char **argv) {
exit(XMLWF_EXIT_USAGE_ERROR);
}
attackThresholdGiven = XML_TRUE;
-#ifndef XML_DTD
- ftprintf(stderr, T("Warning: Given attack threshold ignored") T(
- ", xmlwf has been compiled without DTD support.\n"));
+#if XML_GE == 0
+ ftprintf(stderr,
+ T("Warning: Given attack threshold ignored")
+ T(", xmlwf has been compiled without DTD/GE support.\n"));
#endif
break;
}
+ case T('q'): {
+ disableDeferral = XML_TRUE;
+ j++;
+ break;
+ }
case T('\0'):
if (j > 1) {
i++;
@@ -1097,6 +1145,7 @@ tmain(int argc, XML_Char **argv) {
/* fall through */
default:
usage(argv[0], XMLWF_EXIT_USAGE_ERROR);
+ // usage called exit(..), never gets here
}
}
if (i == argc) {
@@ -1119,13 +1168,13 @@ tmain(int argc, XML_Char **argv) {
}
if (attackMaximumAmplification != -1.0f) {
-#ifdef XML_DTD
+#if XML_GE == 1
XML_SetBillionLaughsAttackProtectionMaximumAmplification(
parser, attackMaximumAmplification);
#endif
}
if (attackThresholdGiven) {
-#ifdef XML_DTD
+#if XML_GE == 1
XML_SetBillionLaughsAttackProtectionActivationThreshold(
parser, attackThresholdBytes);
#else
@@ -1133,6 +1182,16 @@ tmain(int argc, XML_Char **argv) {
#endif
}
+ if (disableDeferral) {
+ const XML_Bool success = XML_SetReparseDeferralEnabled(parser, XML_FALSE);
+ if (! success) {
+ // This prevents tperror(..) from reporting misleading "[..]: Success"
+ errno = EINVAL;
+ tperror(T("Failed to disable reparse deferral"));
+ exit(XMLWF_EXIT_INTERNAL_ERROR);
+ }
+ }
+
if (requireStandalone)
XML_SetNotStandaloneHandler(parser, notStandalone);
XML_SetParamEntityParsing(parser, paramEntityParsing);