aboutsummaryrefslogtreecommitdiff
path: root/contrib/expat/lib/xmltok.c
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/expat/lib/xmltok.c')
-rw-r--r--contrib/expat/lib/xmltok.c43
1 files changed, 17 insertions, 26 deletions
diff --git a/contrib/expat/lib/xmltok.c b/contrib/expat/lib/xmltok.c
index 502ca1adc33b..29a66d72ceea 100644
--- a/contrib/expat/lib/xmltok.c
+++ b/contrib/expat/lib/xmltok.c
@@ -11,8 +11,8 @@
Copyright (c) 2001-2003 Fred L. Drake, Jr. <fdrake@users.sourceforge.net>
Copyright (c) 2002 Greg Stein <gstein@users.sourceforge.net>
Copyright (c) 2002-2016 Karl Waclawek <karl@waclawek.net>
- Copyright (c) 2005-2009 Steven Solie <ssolie@users.sourceforge.net>
- Copyright (c) 2016-2021 Sebastian Pipping <sebastian@pipping.org>
+ Copyright (c) 2005-2009 Steven Solie <steven@solie.ca>
+ Copyright (c) 2016-2024 Sebastian Pipping <sebastian@pipping.org>
Copyright (c) 2016 Pascal Cuoq <cuoq@trust-in-soft.com>
Copyright (c) 2016 Don Lewis <truckman@apache.org>
Copyright (c) 2017 Rhodri James <rhodri@wildebeest.org.uk>
@@ -20,7 +20,10 @@
Copyright (c) 2017 Benbuck Nason <bnason@netflix.com>
Copyright (c) 2017 José Gutiérrez de la Concha <jose@zeroc.com>
Copyright (c) 2019 David Loffredo <loffredo@steptools.com>
- Copyright (c) 2021 Dong-hee Na <donghee.na@python.org>
+ Copyright (c) 2021 Donghee Na <donghee.na@python.org>
+ Copyright (c) 2022 Martin Ettl <ettl.martin78@googlemail.com>
+ Copyright (c) 2022 Sean McBride <sean@rogue-research.com>
+ Copyright (c) 2023 Hanno Böck <hanno@gentoo.org>
Licensed under the MIT license:
Permission is hereby granted, free of charge, to any person obtaining
@@ -43,7 +46,7 @@
USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
-#include <expat_config.h>
+#include "expat_config.h"
#include <stddef.h>
#include <string.h> /* memcpy */
@@ -75,7 +78,7 @@
#define VTABLE VTABLE1, PREFIX(toUtf8), PREFIX(toUtf16)
#define UCS2_GET_NAMING(pages, hi, lo) \
- (namingBitmap[(pages[hi] << 3) + ((lo) >> 5)] & (1u << ((lo)&0x1F)))
+ (namingBitmap[(pages[hi] << 3) + ((lo) >> 5)] & (1u << ((lo) & 0x1F)))
/* A 2 byte UTF-8 representation splits the characters 11 bits between
the bottom 5 and 6 bits of the bytes. We need 8 bits to index into
@@ -98,13 +101,8 @@
+ ((((byte)[1]) & 3) << 1) + ((((byte)[2]) >> 5) & 1)] \
& (1u << (((byte)[2]) & 0x1F)))
-#define UTF8_GET_NAMING(pages, p, n) \
- ((n) == 2 \
- ? UTF8_GET_NAMING2(pages, (const unsigned char *)(p)) \
- : ((n) == 3 ? UTF8_GET_NAMING3(pages, (const unsigned char *)(p)) : 0))
-
/* Detection of invalid UTF-8 sequences is based on Table 3.1B
- of Unicode 3.2: http://www.unicode.org/unicode/reports/tr28/
+ of Unicode 3.2: https://www.unicode.org/unicode/reports/tr28/
with the additional restriction of not allowing the Unicode
code points 0xFFFF and 0xFFFE (sequences EF,BF,BF and EF,BF,BE).
Implementation details:
@@ -229,7 +227,7 @@ struct normal_encoding {
/* isNmstrt2 */ NULL, /* isNmstrt3 */ NULL, /* isNmstrt4 */ NULL, \
/* isInvalid2 */ NULL, /* isInvalid3 */ NULL, /* isInvalid4 */ NULL
-static int FASTCALL checkCharRefNumber(int);
+static int FASTCALL checkCharRefNumber(int result);
#include "xmltok_impl.h"
#include "ascii.h"
@@ -247,7 +245,7 @@ static int FASTCALL checkCharRefNumber(int);
#endif
#define SB_BYTE_TYPE(enc, p) \
- (((struct normal_encoding *)(enc))->type[(unsigned char)*(p)])
+ (((const struct normal_encoding *)(enc))->type[(unsigned char)*(p)])
#ifdef XML_MIN_SIZE
static int PTRFASTCALL
@@ -301,7 +299,7 @@ sb_charMatches(const ENCODING *enc, const char *p, int c) {
}
#else
/* c is an ASCII character */
-# define CHAR_MATCHES(enc, p, c) (*(p) == c)
+# define CHAR_MATCHES(enc, p, c) (*(p) == (c))
#endif
#define PREFIX(ident) normal_##ident
@@ -411,7 +409,7 @@ utf8_toUtf16(const ENCODING *enc, const char **fromP, const char *fromLim,
unsigned short *to = *toP;
const char *from = *fromP;
while (from < fromLim && to < toLim) {
- switch (((struct normal_encoding *)enc)->type[(unsigned char)*from]) {
+ switch (SB_BYTE_TYPE(enc, from)) {
case BT_LEAD2:
if (fromLim - from < 2) {
res = XML_CONVERT_INPUT_INCOMPLETE;
@@ -719,33 +717,28 @@ unicode_byte_type(char hi, char lo) {
return res; \
}
-#define SET2(ptr, ch) (((ptr)[0] = ((ch)&0xff)), ((ptr)[1] = ((ch) >> 8)))
#define GET_LO(ptr) ((unsigned char)(ptr)[0])
#define GET_HI(ptr) ((unsigned char)(ptr)[1])
DEFINE_UTF16_TO_UTF8(little2_)
DEFINE_UTF16_TO_UTF16(little2_)
-#undef SET2
#undef GET_LO
#undef GET_HI
-#define SET2(ptr, ch) (((ptr)[0] = ((ch) >> 8)), ((ptr)[1] = ((ch)&0xFF)))
#define GET_LO(ptr) ((unsigned char)(ptr)[1])
#define GET_HI(ptr) ((unsigned char)(ptr)[0])
DEFINE_UTF16_TO_UTF8(big2_)
DEFINE_UTF16_TO_UTF16(big2_)
-#undef SET2
#undef GET_LO
#undef GET_HI
#define LITTLE2_BYTE_TYPE(enc, p) \
- ((p)[1] == 0 ? ((struct normal_encoding *)(enc))->type[(unsigned char)*(p)] \
- : unicode_byte_type((p)[1], (p)[0]))
+ ((p)[1] == 0 ? SB_BYTE_TYPE(enc, p) : unicode_byte_type((p)[1], (p)[0]))
#define LITTLE2_BYTE_TO_ASCII(p) ((p)[1] == 0 ? (p)[0] : -1)
-#define LITTLE2_CHAR_MATCHES(p, c) ((p)[1] == 0 && (p)[0] == c)
+#define LITTLE2_CHAR_MATCHES(p, c) ((p)[1] == 0 && (p)[0] == (c))
#define LITTLE2_IS_NAME_CHAR_MINBPC(p) \
UCS2_GET_NAMING(namePages, (unsigned char)p[1], (unsigned char)p[0])
#define LITTLE2_IS_NMSTRT_CHAR_MINBPC(p) \
@@ -876,11 +869,9 @@ static const struct normal_encoding internal_little2_encoding
#endif
#define BIG2_BYTE_TYPE(enc, p) \
- ((p)[0] == 0 \
- ? ((struct normal_encoding *)(enc))->type[(unsigned char)(p)[1]] \
- : unicode_byte_type((p)[0], (p)[1]))
+ ((p)[0] == 0 ? SB_BYTE_TYPE(enc, p + 1) : unicode_byte_type((p)[0], (p)[1]))
#define BIG2_BYTE_TO_ASCII(p) ((p)[0] == 0 ? (p)[1] : -1)
-#define BIG2_CHAR_MATCHES(p, c) ((p)[0] == 0 && (p)[1] == c)
+#define BIG2_CHAR_MATCHES(p, c) ((p)[0] == 0 && (p)[1] == (c))
#define BIG2_IS_NAME_CHAR_MINBPC(p) \
UCS2_GET_NAMING(namePages, (unsigned char)p[0], (unsigned char)p[1])
#define BIG2_IS_NMSTRT_CHAR_MINBPC(p) \