aboutsummaryrefslogtreecommitdiff
path: root/lib/libc/locale/gb18030.c
diff options
context:
space:
mode:
Diffstat (limited to 'lib/libc/locale/gb18030.c')
-rw-r--r--lib/libc/locale/gb18030.c224
1 files changed, 224 insertions, 0 deletions
diff --git a/lib/libc/locale/gb18030.c b/lib/libc/locale/gb18030.c
new file mode 100644
index 000000000000..92143852dd76
--- /dev/null
+++ b/lib/libc/locale/gb18030.c
@@ -0,0 +1,224 @@
+/*-
+ * Copyright (c) 2002-2004 Tim J. Robbins
+ * All rights reserved.
+ *
+ * Copyright (c) 2011 The FreeBSD Foundation
+ * All rights reserved.
+ * Portions of this software were developed by David Chisnall
+ * under sponsorship from the FreeBSD Foundation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+/*
+ * PRC National Standard GB 18030-2000 encoding of Chinese text.
+ *
+ * See gb18030(5) for details.
+ */
+
+#include <sys/param.h>
+__FBSDID("$FreeBSD$");
+
+#include <errno.h>
+#include <runetype.h>
+#include <stdlib.h>
+#include <string.h>
+#include <wchar.h>
+#include "mblocal.h"
+
+static size_t _GB18030_mbrtowc(wchar_t * __restrict, const char * __restrict,
+ size_t, mbstate_t * __restrict);
+static int _GB18030_mbsinit(const mbstate_t *);
+static size_t _GB18030_wcrtomb(char * __restrict, wchar_t,
+ mbstate_t * __restrict);
+
+typedef struct {
+ int count;
+ u_char bytes[4];
+} _GB18030State;
+
+int
+_GB18030_init(struct xlocale_ctype *l, _RuneLocale *rl)
+{
+
+ l->__mbrtowc = _GB18030_mbrtowc;
+ l->__wcrtomb = _GB18030_wcrtomb;
+ l->__mbsinit = _GB18030_mbsinit;
+ l->runes = rl;
+ l->__mb_cur_max = 4;
+ l->__mb_sb_limit = 128;
+
+ return (0);
+}
+
+static int
+_GB18030_mbsinit(const mbstate_t *ps)
+{
+
+ return (ps == NULL || ((const _GB18030State *)ps)->count == 0);
+}
+
+static size_t
+_GB18030_mbrtowc(wchar_t * __restrict pwc, const char * __restrict s,
+ size_t n, mbstate_t * __restrict ps)
+{
+ _GB18030State *gs;
+ wchar_t wch;
+ int ch, len, ocount;
+ size_t ncopy;
+
+ gs = (_GB18030State *)ps;
+
+ if (gs->count < 0 || gs->count > sizeof(gs->bytes)) {
+ errno = EINVAL;
+ return ((size_t)-1);
+ }
+
+ if (s == NULL) {
+ s = "";
+ n = 1;
+ pwc = NULL;
+ }
+
+ ncopy = MIN(MIN(n, MB_CUR_MAX), sizeof(gs->bytes) - gs->count);
+ memcpy(gs->bytes + gs->count, s, ncopy);
+ ocount = gs->count;
+ gs->count += ncopy;
+ s = (char *)gs->bytes;
+ n = gs->count;
+
+ if (n == 0)
+ /* Incomplete multibyte sequence */
+ return ((size_t)-2);
+
+ /*
+ * Single byte: [00-7f]
+ * Two byte: [81-fe][40-7e,80-fe]
+ * Four byte: [81-fe][30-39][81-fe][30-39]
+ */
+ ch = (unsigned char)*s++;
+ if (ch <= 0x7f) {
+ len = 1;
+ wch = ch;
+ } else if (ch >= 0x81 && ch <= 0xfe) {
+ wch = ch;
+ if (n < 2)
+ return ((size_t)-2);
+ ch = (unsigned char)*s++;
+ if ((ch >= 0x40 && ch <= 0x7e) || (ch >= 0x80 && ch <= 0xfe)) {
+ wch = (wch << 8) | ch;
+ len = 2;
+ } else if (ch >= 0x30 && ch <= 0x39) {
+ /*
+ * Strip high bit off the wide character we will
+ * eventually output so that it is positive when
+ * cast to wint_t on 32-bit twos-complement machines.
+ */
+ wch = ((wch & 0x7f) << 8) | ch;
+ if (n < 3)
+ return ((size_t)-2);
+ ch = (unsigned char)*s++;
+ if (ch < 0x81 || ch > 0xfe)
+ goto ilseq;
+ wch = (wch << 8) | ch;
+ if (n < 4)
+ return ((size_t)-2);
+ ch = (unsigned char)*s++;
+ if (ch < 0x30 || ch > 0x39)
+ goto ilseq;
+ wch = (wch << 8) | ch;
+ len = 4;
+ } else
+ goto ilseq;
+ } else
+ goto ilseq;
+
+ if (pwc != NULL)
+ *pwc = wch;
+ gs->count = 0;
+ return (wch == L'\0' ? 0 : len - ocount);
+ilseq:
+ errno = EILSEQ;
+ return ((size_t)-1);
+}
+
+static size_t
+_GB18030_wcrtomb(char * __restrict s, wchar_t wc, mbstate_t * __restrict ps)
+{
+ _GB18030State *gs;
+ size_t len;
+ int c;
+
+ gs = (_GB18030State *)ps;
+
+ if (gs->count != 0) {
+ errno = EINVAL;
+ return ((size_t)-1);
+ }
+
+ if (s == NULL)
+ /* Reset to initial shift state (no-op) */
+ return (1);
+ if ((wc & ~0x7fffffff) != 0)
+ goto ilseq;
+ if (wc & 0x7f000000) {
+ /* Replace high bit that mbrtowc() removed. */
+ wc |= 0x80000000;
+ c = (wc >> 24) & 0xff;
+ if (c < 0x81 || c > 0xfe)
+ goto ilseq;
+ *s++ = c;
+ c = (wc >> 16) & 0xff;
+ if (c < 0x30 || c > 0x39)
+ goto ilseq;
+ *s++ = c;
+ c = (wc >> 8) & 0xff;
+ if (c < 0x81 || c > 0xfe)
+ goto ilseq;
+ *s++ = c;
+ c = wc & 0xff;
+ if (c < 0x30 || c > 0x39)
+ goto ilseq;
+ *s++ = c;
+ len = 4;
+ } else if (wc & 0x00ff0000)
+ goto ilseq;
+ else if (wc & 0x0000ff00) {
+ c = (wc >> 8) & 0xff;
+ if (c < 0x81 || c > 0xfe)
+ goto ilseq;
+ *s++ = c;
+ c = wc & 0xff;
+ if (c < 0x40 || c == 0x7f || c == 0xff)
+ goto ilseq;
+ *s++ = c;
+ len = 2;
+ } else if (wc <= 0x7f) {
+ *s++ = wc;
+ len = 1;
+ } else
+ goto ilseq;
+
+ return (len);
+ilseq:
+ errno = EILSEQ;
+ return ((size_t)-1);
+}