diff options
author | Tim J. Robbins <tjr@FreeBSD.org> | 2004-07-09 02:08:07 +0000 |
---|---|---|
committer | Tim J. Robbins <tjr@FreeBSD.org> | 2004-07-09 02:08:07 +0000 |
commit | ca99cfdd14f4fa361788e3a15e1bfdd99e72b58c (patch) | |
tree | 1c3e4abaa437f441d05a1cc08f7e7b91684fbad4 /usr.bin/tr/cset.h | |
parent | 6ec70e64c680d378f84602bfed04545c94f7ca99 (diff) | |
download | src-ca99cfdd14f4fa361788e3a15e1bfdd99e72b58c.tar.gz src-ca99cfdd14f4fa361788e3a15e1bfdd99e72b58c.zip |
Add support for multibyte characters. The challenge here was to use
data structures that scale better with large character sets, instead of
arrays indexed by character value:
- Sets of characters to delete/squeeze are stored in a new "cset" structure,
which is implemented as a splay tree of extents. This structure has the
ability to store character classes (ala wctype(3)), but this is not
currently fully utilized.
- Mappings between characters are stored in a new "cmap" structure, which
is also a splay tree.
- The parser no longer builds arrays containing all the characters in a
particular class; instead, next() determines them on-the-fly using
nextwctype(3).
Notes
Notes:
svn path=/head/; revision=131846
Diffstat (limited to 'usr.bin/tr/cset.h')
-rw-r--r-- | usr.bin/tr/cset.h | 75 |
1 files changed, 75 insertions, 0 deletions
diff --git a/usr.bin/tr/cset.h b/usr.bin/tr/cset.h new file mode 100644 index 000000000000..b85349315666 --- /dev/null +++ b/usr.bin/tr/cset.h @@ -0,0 +1,75 @@ +/*- + * Copyright (c) 2004 Tim J. Robbins. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#ifndef CSET_H +#define CSET_H + +#include <stdbool.h> +#include <wchar.h> +#include <wctype.h> + +struct csnode { + wchar_t csn_min; + wchar_t csn_max; + struct csnode *csn_left; + struct csnode *csn_right; +}; + +struct csclass { + wctype_t csc_type; + bool csc_invert; + bool csc_value; + struct csclass *csc_next; +}; + +struct cset { +#define CS_CACHE_SIZE 256 + bool cs_cache[CS_CACHE_SIZE]; + bool cs_havecache; + struct csclass *cs_classes; + struct csnode *cs_root; + bool cs_invert; +}; + +bool cset_addclass(struct cset *, wctype_t, bool); +struct cset * cset_alloc(void); +bool cset_add(struct cset *, wchar_t); +void cset_invert(struct cset *); +bool cset_in_hard(struct cset *, wchar_t); +void cset_cache(struct cset *); + +static __inline bool +cset_in(struct cset *cs, wchar_t ch) +{ + + if (ch < CS_CACHE_SIZE && cs->cs_havecache) + return (cs->cs_cache[ch]); + return (cset_in_hard(cs, ch)); +} + +#endif /* CSET_H */ |