aboutsummaryrefslogtreecommitdiff
path: root/src/cbor/internal/unicode.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/cbor/internal/unicode.c')
-rw-r--r--src/cbor/internal/unicode.c94
1 files changed, 94 insertions, 0 deletions
diff --git a/src/cbor/internal/unicode.c b/src/cbor/internal/unicode.c
new file mode 100644
index 000000000000..98b49728989e
--- /dev/null
+++ b/src/cbor/internal/unicode.c
@@ -0,0 +1,94 @@
+/*
+ * Copyright (c) 2014-2020 Pavel Kalvoda <me@pavelkalvoda.com>
+ *
+ * libcbor is free software; you can redistribute it and/or modify
+ * it under the terms of the MIT license. See LICENSE for details.
+ */
+
+#include "unicode.h"
+
+#define UTF8_ACCEPT 0
+#define UTF8_REJECT 1
+
+static const uint8_t utf8d[] = {
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 00..1f */
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 20..3f */
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 40..5f */
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 60..7f */
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 9, 9, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, /* 80..9f */
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, /* a0..bf */
+ 8, 8, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, /* c0..df */
+ 0xa, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3,
+ 0x3, 0x3, 0x4, 0x3, 0x3, /* e0..ef */
+ 0xb, 0x6, 0x6, 0x6, 0x5, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8,
+ 0x8, 0x8, 0x8, 0x8, 0x8, /* f0..ff */
+ 0x0, 0x1, 0x2, 0x3, 0x5, 0x8, 0x7, 0x1, 0x1, 0x1, 0x4,
+ 0x6, 0x1, 0x1, 0x1, 0x1, /* s0..s0 */
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1,
+ 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, /* s1..s2 */
+ 1, 2, 1, 1, 1, 1, 1, 2, 1, 2, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, /* s3..s4 */
+ 1, 2, 1, 1, 1, 1, 1, 1, 1, 2, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 3, 1, 3, 1, 1, 1, 1, 1, 1, /* s5..s6 */
+ 1, 3, 1, 1, 1, 1, 1, 3, 1, 3, 1,
+ 1, 1, 1, 1, 1, 1, 3, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* s7..s8 */
+};
+
+/* Copyright of this function: (c) 2008-2009 Bjoern Hoehrmann
+ * <bjoern@hoehrmann.de> */
+/* See http://bjoern.hoehrmann.de/utf-8/decoder/dfa/ for details. */
+uint32_t _cbor_unicode_decode(uint32_t* state, uint32_t* codep, uint32_t byte) {
+ uint32_t type = utf8d[byte];
+
+ *codep = (*state != UTF8_ACCEPT) ? (byte & 0x3fu) | (*codep << 6)
+ : (0xff >> type) & (byte);
+
+ *state = utf8d[256 + *state * 16 + type];
+ return *state;
+}
+
+size_t _cbor_unicode_codepoint_count(cbor_data source, size_t source_length,
+ struct _cbor_unicode_status* status) {
+ *status =
+ (struct _cbor_unicode_status){.location = 0, .status = _CBOR_UNICODE_OK};
+ uint32_t codepoint, state = UTF8_ACCEPT, res;
+ size_t pos = 0, count = 0;
+
+ for (; pos < source_length; pos++) {
+ res = _cbor_unicode_decode(&state, &codepoint, source[pos]);
+
+ if (res == UTF8_ACCEPT) {
+ count++;
+ } else if (res == UTF8_REJECT) {
+ goto error;
+ }
+ }
+
+ /* Unfinished multibyte codepoint */
+ if (state != UTF8_ACCEPT) goto error;
+
+ return count;
+
+error:
+ *status = (struct _cbor_unicode_status){.location = pos,
+ .status = _CBOR_UNICODE_BADCP};
+ return -1;
+}