diff options
Diffstat (limited to 'contrib/expat/xmlwf/xmlwf.c')
-rw-r--r-- | contrib/expat/xmlwf/xmlwf.c | 159 |
1 files changed, 109 insertions, 50 deletions
diff --git a/contrib/expat/xmlwf/xmlwf.c b/contrib/expat/xmlwf/xmlwf.c index b0cd212f78ae..7c0a8cd4d6a4 100644 --- a/contrib/expat/xmlwf/xmlwf.c +++ b/contrib/expat/xmlwf/xmlwf.c @@ -11,12 +11,14 @@ Copyright (c) 2001-2003 Fred L. Drake, Jr. <fdrake@users.sourceforge.net> Copyright (c) 2004-2009 Karl Waclawek <karl@waclawek.net> Copyright (c) 2005-2007 Steven Solie <steven@solie.ca> - Copyright (c) 2016-2022 Sebastian Pipping <sebastian@pipping.org> + Copyright (c) 2016-2023 Sebastian Pipping <sebastian@pipping.org> Copyright (c) 2017 Rhodri James <rhodri@wildebeest.org.uk> Copyright (c) 2019 David Loffredo <loffredo@steptools.com> Copyright (c) 2020 Joe Orton <jorton@redhat.com> Copyright (c) 2020 Kleber TarcĂsio <klebertarcisio@yahoo.com.br> Copyright (c) 2021 Tim Bray <tbray@textuality.com> + Copyright (c) 2022 Martin Ettl <ettl.martin78@googlemail.com> + Copyright (c) 2022 Sean McBride <sean@rogue-research.com> Licensed under the MIT license: Permission is hereby granted, free of charge, to any person obtaining @@ -39,7 +41,7 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. */ -#include <expat_config.h> +#include "expat_config.h" #include <assert.h> #include <stdio.h> @@ -177,7 +179,7 @@ is equivalent to lexicographically comparing based on the character number. */ static int attcmp(const void *att1, const void *att2) { - return tcscmp(*(const XML_Char **)att1, *(const XML_Char **)att2); + return tcscmp(*(const XML_Char *const *)att1, *(const XML_Char *const *)att2); } static void XMLCALL @@ -214,10 +216,10 @@ endElement(void *userData, const XML_Char *name) { static int nsattcmp(const void *p1, const void *p2) { - const XML_Char *att1 = *(const XML_Char **)p1; - const XML_Char *att2 = *(const XML_Char **)p2; + const XML_Char *att1 = *(const XML_Char *const *)p1; + const XML_Char *att2 = *(const XML_Char *const *)p2; int sep1 = (tcsrchr(att1, NSSEP) != 0); - int sep2 = (tcsrchr(att1, NSSEP) != 0); + int sep2 = (tcsrchr(att2, NSSEP) != 0); if (sep1 != sep2) return sep1 - sep2; return tcscmp(att1, att2); @@ -369,8 +371,8 @@ xcscmp(const XML_Char *xs, const XML_Char *xt) { static int notationCmp(const void *a, const void *b) { - const NotationList *const n1 = *(NotationList **)a; - const NotationList *const n2 = *(NotationList **)b; + const NotationList *const n1 = *(const NotationList *const *)a; + const NotationList *const n2 = *(const NotationList *const *)b; return xcscmp(n1->notationName, n2->notationName); } @@ -870,6 +872,9 @@ showVersion(XML_Char *prog) { } } +#if defined(__GNUC__) +__attribute__((noreturn)) +#endif static void usage(const XML_Char *prog, int rc) { ftprintf( @@ -882,50 +887,54 @@ usage(const XML_Char *prog, int rc) { /* clang-format off */ T("usage:\n") T(" %s [OPTIONS] [FILE ...]\n") - T(" %s -h\n") - T(" %s -v\n") + T(" %s -h|--help\n") + T(" %s -v|--version\n") T("\n") T("xmlwf - Determines if an XML document is well-formed\n") T("\n") T("positional arguments:\n") - T(" FILE file to process (default: STDIN)\n") + T(" FILE file to process (default: STDIN)\n") T("\n") T("input control arguments:\n") - T(" -s print an error if the document is not [s]tandalone\n") - T(" -n enable [n]amespace processing\n") - T(" -p enable processing external DTDs and [p]arameter entities\n") - T(" -x enable processing of e[x]ternal entities\n") - T(" -e ENCODING override any in-document [e]ncoding declaration\n") - T(" -w enable support for [W]indows code pages\n") - T(" -r disable memory-mapping and use normal file [r]ead IO calls instead\n") - T(" -k when processing multiple files, [k]eep processing after first file with error\n") + T(" -s print an error if the document is not [s]tandalone\n") + T(" -n enable [n]amespace processing\n") + T(" -p enable processing of external DTDs and [p]arameter entities\n") + T(" -x enable processing of e[x]ternal entities\n") + T(" -e ENCODING override any in-document [e]ncoding declaration\n") + T(" -w enable support for [W]indows code pages\n") + T(" -r disable memory-mapping and use [r]ead calls instead\n") + T(" -g BYTES buffer size to request per call pair to XML_[G]etBuffer and read (default: 8 KiB)\n") + T(" -k when processing multiple files, [k]eep processing after first file with error\n") T("\n") T("output control arguments:\n") - T(" -d DIRECTORY output [d]estination directory\n") - T(" -c write a [c]opy of input XML, not canonical XML\n") - T(" -m write [m]eta XML, not canonical XML\n") - T(" -t write no XML output for [t]iming of plain parsing\n") - T(" -N enable adding doctype and [n]otation declarations\n") + T(" -d DIRECTORY output [d]estination directory\n") + T(" -c write a [c]opy of input XML, not canonical XML\n") + T(" -m write [m]eta XML, not canonical XML\n") + T(" -t write no XML output for [t]iming of plain parsing\n") + T(" -N enable adding doctype and [n]otation declarations\n") T("\n") T("billion laughs attack protection:\n") T(" NOTE: If you ever need to increase these values for non-attack payload, please file a bug report.\n") T("\n") - T(" -a FACTOR set maximum tolerated [a]mplification factor (default: 100.0)\n") - T(" -b BYTES set number of output [b]ytes needed to activate (default: 8 MiB)\n") + T(" -a FACTOR set maximum tolerated [a]mplification factor (default: 100.0)\n") + T(" -b BYTES set number of output [b]ytes needed to activate (default: 8 MiB)\n") + T("\n") + T("reparse deferral:\n") + T(" -q disable reparse deferral, and allow [q]uadratic parse runtime with large tokens\n") T("\n") T("info arguments:\n") - T(" -h show this [h]elp message and exit\n") - T(" -v show program's [v]ersion number and exit\n") + T(" -h, --help show this [h]elp message and exit\n") + T(" -v, --version show program's [v]ersion number and exit\n") T("\n") T("exit status:\n") - T(" 0 the input files are well-formed and the output (if requested) was written successfully\n") - T(" 1 could not allocate data structures, signals a serious problem with execution environment\n") - T(" 2 one or more input files were not well-formed\n") - T(" 3 could not create an output file\n") - T(" 4 command-line argument error\n") + T(" 0 the input files are well-formed and the output (if requested) was written successfully\n") + T(" 1 could not allocate data structures, signals a serious problem with execution environment\n") + T(" 2 one or more input files were not well-formed\n") + T(" 3 could not create an output file\n") + T(" 4 command-line argument error\n") T("\n") T("xmlwf of libexpat is software libre, licensed under the MIT license.\n") - T("Please report bugs at https://github.com/libexpat/libexpat/issues. Thank you!\n") + T("Please report bugs at https://github.com/libexpat/libexpat/issues -- thank you!\n") , /* clang-format on */ prog, prog, prog); exit(rc); @@ -939,8 +948,10 @@ int wmain(int argc, XML_Char **argv); #define XMLWF_SHIFT_ARG_INTO(constCharStarTarget, argc, argv, i, j) \ { \ if (argv[i][j + 1] == T('\0')) { \ - if (++i == argc) \ + if (++i == argc) { \ usage(argv[0], XMLWF_EXIT_USAGE_ERROR); \ + /* usage called exit(..), never gets here */ \ + } \ constCharStarTarget = argv[i]; \ } else { \ constCharStarTarget = argv[i] + j + 1; \ @@ -963,9 +974,11 @@ tmain(int argc, XML_Char **argv) { int continueOnError = 0; float attackMaximumAmplification = -1.0f; /* signaling "not set" */ - unsigned long long attackThresholdBytes; + unsigned long long attackThresholdBytes = 0; XML_Bool attackThresholdGiven = XML_FALSE; + XML_Bool disableDeferral = XML_FALSE; + int exitCode = XMLWF_EXIT_SUCCESS; enum XML_ParamEntityParsing paramEntityParsing = XML_PARAM_ENTITY_PARSING_NEVER; @@ -982,9 +995,17 @@ tmain(int argc, XML_Char **argv) { if (j == 0) { if (argv[i][0] != T('-')) break; - if (argv[i][1] == T('-') && argv[i][2] == T('\0')) { - i++; - break; + if (argv[i][1] == T('-')) { + if (argv[i][2] == T('\0')) { + i++; + break; + } else if (tcscmp(argv[i] + 2, T("help")) == 0) { + usage(argv[0], XMLWF_EXIT_SUCCESS); + // usage called exit(..), never gets here + } else if (tcscmp(argv[i] + 2, T("version")) == 0) { + showVersion(argv[0]); + return XMLWF_EXIT_SUCCESS; + } } j++; } @@ -1037,10 +1058,30 @@ tmain(int argc, XML_Char **argv) { break; case T('h'): usage(argv[0], XMLWF_EXIT_SUCCESS); - return 0; + // usage called exit(..), never gets here case T('v'): showVersion(argv[0]); - return 0; + return XMLWF_EXIT_SUCCESS; + case T('g'): { + const XML_Char *valueText = NULL; + XMLWF_SHIFT_ARG_INTO(valueText, argc, argv, i, j); + + errno = 0; + XML_Char *afterValueText = (XML_Char *)valueText; + const long long read_size_bytes_candidate + = tcstoull(valueText, &afterValueText, 10); + if ((errno != 0) || (afterValueText[0] != T('\0')) + || (read_size_bytes_candidate < 1) + || (read_size_bytes_candidate > (INT_MAX / 2 + 1))) { + // This prevents tperror(..) from reporting misleading "[..]: Success" + errno = ERANGE; + tperror(T("invalid buffer size") T( + " (needs an integer from 1 to INT_MAX/2+1 i.e. 1,073,741,824 on most platforms)")); + exit(XMLWF_EXIT_USAGE_ERROR); + } + g_read_size_bytes = (int)read_size_bytes_candidate; + break; + } case T('k'): continueOnError = 1; j++; @@ -1050,7 +1091,7 @@ tmain(int argc, XML_Char **argv) { XMLWF_SHIFT_ARG_INTO(valueText, argc, argv, i, j); errno = 0; - XML_Char *afterValueText = (XML_Char *)valueText; + XML_Char *afterValueText = NULL; attackMaximumAmplification = tcstof(valueText, &afterValueText); if ((errno != 0) || (afterValueText[0] != T('\0')) || isnan(attackMaximumAmplification) @@ -1061,9 +1102,10 @@ tmain(int argc, XML_Char **argv) { " (needs a floating point number greater or equal than 1.0)")); exit(XMLWF_EXIT_USAGE_ERROR); } -#ifndef XML_DTD - ftprintf(stderr, T("Warning: Given amplification limit ignored") T( - ", xmlwf has been compiled without DTD support.\n")); +#if XML_GE == 0 + ftprintf(stderr, + T("Warning: Given amplification limit ignored") + T(", xmlwf has been compiled without DTD/GE support.\n")); #endif break; } @@ -1082,12 +1124,18 @@ tmain(int argc, XML_Char **argv) { exit(XMLWF_EXIT_USAGE_ERROR); } attackThresholdGiven = XML_TRUE; -#ifndef XML_DTD - ftprintf(stderr, T("Warning: Given attack threshold ignored") T( - ", xmlwf has been compiled without DTD support.\n")); +#if XML_GE == 0 + ftprintf(stderr, + T("Warning: Given attack threshold ignored") + T(", xmlwf has been compiled without DTD/GE support.\n")); #endif break; } + case T('q'): { + disableDeferral = XML_TRUE; + j++; + break; + } case T('\0'): if (j > 1) { i++; @@ -1097,6 +1145,7 @@ tmain(int argc, XML_Char **argv) { /* fall through */ default: usage(argv[0], XMLWF_EXIT_USAGE_ERROR); + // usage called exit(..), never gets here } } if (i == argc) { @@ -1119,13 +1168,13 @@ tmain(int argc, XML_Char **argv) { } if (attackMaximumAmplification != -1.0f) { -#ifdef XML_DTD +#if XML_GE == 1 XML_SetBillionLaughsAttackProtectionMaximumAmplification( parser, attackMaximumAmplification); #endif } if (attackThresholdGiven) { -#ifdef XML_DTD +#if XML_GE == 1 XML_SetBillionLaughsAttackProtectionActivationThreshold( parser, attackThresholdBytes); #else @@ -1133,6 +1182,16 @@ tmain(int argc, XML_Char **argv) { #endif } + if (disableDeferral) { + const XML_Bool success = XML_SetReparseDeferralEnabled(parser, XML_FALSE); + if (! success) { + // This prevents tperror(..) from reporting misleading "[..]: Success" + errno = EINVAL; + tperror(T("Failed to disable reparse deferral")); + exit(XMLWF_EXIT_INTERNAL_ERROR); + } + } + if (requireStandalone) XML_SetNotStandaloneHandler(parser, notStandalone); XML_SetParamEntityParsing(parser, paramEntityParsing); |