aboutsummaryrefslogtreecommitdiff
path: root/lib/libc/locale/euc.4
diff options
context:
space:
mode:
Diffstat (limited to 'lib/libc/locale/euc.4')
-rw-r--r--lib/libc/locale/euc.4231
1 files changed, 231 insertions, 0 deletions
diff --git a/lib/libc/locale/euc.4 b/lib/libc/locale/euc.4
new file mode 100644
index 000000000000..966b896b657f
--- /dev/null
+++ b/lib/libc/locale/euc.4
@@ -0,0 +1,231 @@
+.\" Copyright (c) 1993
+.\" The Regents of the University of California. All rights reserved.
+.\"
+.\" This code is derived from software contributed to Berkeley by
+.\" Paul Borman at Krystal Technologies.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\" notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\" notice, this list of conditions and the following disclaimer in the
+.\" documentation and/or other materials provided with the distribution.
+.\" 3. All advertising materials mentioning features or use of this software
+.\" must display the following acknowledgement:
+.\" This product includes software developed by the University of
+.\" California, Berkeley and its contributors.
+.\" 4. Neither the name of the University nor the names of its contributors
+.\" may be used to endorse or promote products derived from this software
+.\" without specific prior written permission.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.\" @(#)euc.4 8.1 (Berkeley) 6/4/93
+.\"
+.Dd "June 4, 1993"
+.Dt EUC 4
+.Os
+.Sh NAME
+.Nm EUC
+.Nd EUC encoding of runes
+.Sh SYNOPSIS
+\fBENCODING "EUC"\fP
+.br
+\fBVARIABLE \fP\fIlen1 mask1 len2 mask2 len3 mask3 len4 mask4 mask\fP
+.Sh DESCRIPTION
+The
+.Nm EUC
+encoding is provided for compatibility with
+.Ux
+based systems.
+See
+.Xr mklocale 1
+for a complete description of the
+.Ev LC_CTYPE
+source file format.
+.Pp
+.Nm EUC
+implements a system of 4 multibyte codesets.
+A multibyte character in the first codeset consists of
+.Ar len1
+bytes starting with a byte in the range of 0x00 to 0x7f.
+To allow use of ASCII,
+.Ar len1
+is always 1.
+A multibyte character in the second codeset consists of
+.Ar len2
+bytes starting with a byte in the range of 0x80-0xff excluding 0x8e and 0x8f.
+A multibyte character in the third codeset consists of
+.Ar len3
+bytes starting with the byte 0x8e.
+A multibyte character in the fourth codeset consists of
+.Ar len4
+bytes starting with the byte 0x8f.
+.Pp
+The
+.Ev rune_t
+encoding of
+.Nm EUC
+multibyte characters is dependent on the
+.Ar len
+and
+.Ar mask
+arguments.
+First, the bytes are moved into a
+.Ev rune_t
+as follows:
+.Bd -literal
+byte0 << ((\fIlen\fPN-1) * 8) | byte1 << ((\fIlen\fPN-2) * 8) | ... | byte\fIlen\fPN-1
+.Ed
+.sp
+The result is then ANDed with
+.Ar ~mask
+and ORed with
+.Ar mask\fPN.
+Codesets 2 and 3 are special in that the leading byte (0x8e or 0x8f) is
+first removed and the
+.Ar len\fPN
+argument is reduced by 1.
+.sp
+For example, the Japanese locale has the following
+.Ev VARIABLE
+line:
+.Bd -literal
+VARIABLE 1 0x0000 2 0x8080 2 0x0080 3 0x8000 0x8080
+.Ed
+.sp
+Codeset 1 consists of the values 0x0000 - 0x007f.
+.sp
+Codeset 2 consists of the values who have the bits 0x8080 set.
+.sp
+Codeset 3 consists of the values 0x0080 - 0x00ff.
+.sp
+Codeset 4 consists of the values 0x8000 - 0xff7f excluding the values
+which have the 0x0080 bit set.
+.sp
+Notice that the global
+.Ar mask
+is set to 0x8080, this implies that from those 2 bits the codeset can
+be determined.
+.Sh "EXAMPLE - Japanese Locale"
+This is a complete example of an
+.Ev LC_CTYPE
+source file for the Japanese locale
+.Bd -literal
+/*
+ * Japanese LOCALE_CTYPE definitions using EUC of JIS character sets
+ */
+
+ENCODING "EUC"
+
+/* JIS JIS JIS */
+/* X201 X208 X201 */
+/* 00-7f 84-fe */
+
+VARIABLE 1 0x0000 2 0x8080 2 0x0080 3 0x8000 0x8080
+
+/*
+ * Code Set 1
+ */
+ALPHA 'A' - 'Z' 'a' - 'z'
+CONTROL 0x00 - 0x1f 0x7f
+DIGIT '0' - '9'
+GRAPH 0x21 - 0x7e
+LOWER 'a' - 'z'
+PUNCT 0x21 - 0x2f 0x3a - 0x40 0x5b - 0x60 0x7b - 0x7e
+SPACE 0x09 - 0x0d 0x20
+UPPER 'A' - 'Z'
+XDIGIT 'a' - 'f' 'A' - 'F'
+BLANK ' ' '\t'
+PRINT 0x20 - 0x7e
+
+MAPLOWER < 'A' - 'Z' : 'a' > < 'a' - 'z' : 'a' >
+MAPUPPER < 'A' - 'Z' : 'A' > < 'a' - 'z' : 'A' >
+TODIGIT < '0' - '9' : 0 >
+TODIGIT < 'A' - 'F' : 10 > < 'a' - 'f' : 10 >
+
+/*
+ * Code Set 2
+ */
+
+SPACE 0xa1a1
+PHONOGRAM 0xa1bc
+SPECIAL 0xa1a2 - 0xa1fe
+PUNCT 0xa1a2 - 0xa1f8 /* A few too many in here... */
+
+SPECIAL 0xa2a1 - 0xa2ae 0xa2ba - 0xa2c1 0xa2ca - 0xa2d0 0xa2dc - 0xa2ea
+SPECIAL 0xa2f2 - 0xa2f9 0xa2fe
+
+DIGIT 0xa3b0 - 0xa3b9
+UPPER 0xa3c1 - 0xa3da /* Romaji */
+LOWER 0xa3e1 - 0xa3fa /* Romaji */
+MAPLOWER < 0xa3c1 - 0xa3da : 0xa3e1 > /* English */
+MAPLOWER < 0xa3e1 - 0xa3fa : 0xa3e1 > /* English */
+MAPUPPER < 0xa3c1 - 0xa3da : 0xa3c1 >
+MAPUPPER < 0xa3e1 - 0xa3fa : 0xa3c1 >
+
+XDIGIT 0xa3c1 - 0xa3c6 0xa3e1 - 0xa3e6
+
+TODIGIT < 0xa3b0 - 0xa3b9 : 0 >
+TODIGIT < 0xa3c1 - 0xa3c6 : 10 > < 0xa3e1 - 0xa3e6 : 10 >
+
+PHONOGRAM 0xa4a1 - 0xa4f3
+PHONOGRAM 0xa5a1 - 0xa5f6
+
+UPPER 0xa6a1 - 0xa6b8 /* Greek */
+LOWER 0xa6c1 - 0xa6d8 /* Greek */
+MAPLOWER < 0xa6a1 - 0xa6b8 : 0xa6c1 > < 0xa6c1 - 0xa6d8 : 0xa6c1 >
+MAPUPPER < 0xa6a1 - 0xa6b8 : 0xa6a1 > < 0xa6c1 - 0xa6d8 : 0xa6a1 >
+
+UPPER 0xa7a1 - 0xa7c1 /* Cyrillic */
+LOWER 0xa7d1 - 0xa7f1 /* Cyrillic */
+MAPLOWER < 0xa7a1 - 0xa7c1 : 0xa7d1 > < 0xa7d1 - 0xa7f1 : 0xa7d1 >
+MAPUPPER < 0xa7a1 - 0xa7c1 : 0xa7a1 > < 0xa7d1 - 0xa7f1 : 0xa7a1 >
+
+SPECIAL 0xa8a1 - 0xa8c0
+
+IDEOGRAM 0xb0a1 - 0xb0fe 0xb1a1 - 0xb1fe 0xb2a1 - 0xb2fe
+IDEOGRAM 0xb3a1 - 0xb3fe 0xb4a1 - 0xb4fe 0xb5a1 - 0xb5fe
+IDEOGRAM 0xb6a1 - 0xb6fe 0xb7a1 - 0xb7fe 0xb8a1 - 0xb8fe
+IDEOGRAM 0xb9a1 - 0xb9fe 0xbaa1 - 0xbafe 0xbba1 - 0xbbfe
+IDEOGRAM 0xbca1 - 0xbcfe 0xbda1 - 0xbdfe 0xbea1 - 0xbefe
+IDEOGRAM 0xbfa1 - 0xbffe 0xc0a1 - 0xc0fe 0xc1a1 - 0xc1fe
+IDEOGRAM 0xc2a1 - 0xc2fe 0xc3a1 - 0xc3fe 0xc4a1 - 0xc4fe
+IDEOGRAM 0xc5a1 - 0xc5fe 0xc6a1 - 0xc6fe 0xc7a1 - 0xc7fe
+IDEOGRAM 0xc8a1 - 0xc8fe 0xc9a1 - 0xc9fe 0xcaa1 - 0xcafe
+IDEOGRAM 0xcba1 - 0xcbfe 0xcca1 - 0xccfe 0xcda1 - 0xcdfe
+IDEOGRAM 0xcea1 - 0xcefe 0xcfa1 - 0xcfd3 0xd0a1 - 0xd0fe
+IDEOGRAM 0xd1a1 - 0xd1fe 0xd2a1 - 0xd2fe 0xd3a1 - 0xd3fe
+IDEOGRAM 0xd4a1 - 0xd4fe 0xd5a1 - 0xd5fe 0xd6a1 - 0xd6fe
+IDEOGRAM 0xd7a1 - 0xd7fe 0xd8a1 - 0xd8fe 0xd9a1 - 0xd9fe
+IDEOGRAM 0xdaa1 - 0xdafe 0xdba1 - 0xdbfe 0xdca1 - 0xdcfe
+IDEOGRAM 0xdda1 - 0xddfe 0xdea1 - 0xdefe 0xdfa1 - 0xdffe
+IDEOGRAM 0xe0a1 - 0xe0fe 0xe1a1 - 0xe1fe 0xe2a1 - 0xe2fe
+IDEOGRAM 0xe3a1 - 0xe3fe 0xe4a1 - 0xe4fe 0xe5a1 - 0xe5fe
+IDEOGRAM 0xe6a1 - 0xe6fe 0xe7a1 - 0xe7fe 0xe8a1 - 0xe8fe
+IDEOGRAM 0xe9a1 - 0xe9fe 0xeaa1 - 0xeafe 0xeba1 - 0xebfe
+IDEOGRAM 0xeca1 - 0xecfe 0xeda1 - 0xedfe 0xeea1 - 0xeefe
+IDEOGRAM 0xefa1 - 0xeffe 0xf0a1 - 0xf0fe 0xf1a1 - 0xf1fe
+IDEOGRAM 0xf2a1 - 0xf2fe 0xf3a1 - 0xf3fe 0xf4a1 - 0xf4a4
+/*
+ * This is for Code Set 3, half-width kana
+ */
+SPECIAL 0xa1 - 0xdf
+PHONOGRAM 0xa1 - 0xdf
+CONTROL 0x84 - 0x97 0x9b - 0x9f 0xe0 - 0xfe
+.Ed
+.Sh "SEE ALSO"
+.Xr mklocale 1 ,
+.Xr setlocale 3