summaryrefslogtreecommitdiff
path: root/src/lib9/utf
diff options
context:
space:
mode:
authorOndřej Surý <ondrej@sury.org>2011-09-13 13:13:40 +0200
committerOndřej Surý <ondrej@sury.org>2011-09-13 13:13:40 +0200
commit5ff4c17907d5b19510a62e08fd8d3b11e62b431d (patch)
treec0650497e988f47be9c6f2324fa692a52dea82e1 /src/lib9/utf
parent80f18fc933cf3f3e829c5455a1023d69f7b86e52 (diff)
downloadgolang-upstream/60.tar.gz
Imported Upstream version 60upstream/60
Diffstat (limited to 'src/lib9/utf')
-rw-r--r--src/lib9/utf/Makefile32
-rw-r--r--src/lib9/utf/mkrunetype.c732
-rw-r--r--src/lib9/utf/rune.c351
-rw-r--r--src/lib9/utf/runetype.c38
-rw-r--r--src/lib9/utf/runetypebody-5.0.0.c1361
-rw-r--r--src/lib9/utf/runetypebody-5.2.0.c1541
-rw-r--r--src/lib9/utf/runetypebody-6.0.0.c1565
-rw-r--r--src/lib9/utf/utf.h242
-rw-r--r--src/lib9/utf/utfdef.h28
-rw-r--r--src/lib9/utf/utfecpy.c36
-rw-r--r--src/lib9/utf/utflen.c38
-rw-r--r--src/lib9/utf/utfnlen.c41
-rw-r--r--src/lib9/utf/utfrrune.c47
-rw-r--r--src/lib9/utf/utfrune.c46
-rw-r--r--src/lib9/utf/utfutf.c42
15 files changed, 6140 insertions, 0 deletions
diff --git a/src/lib9/utf/Makefile b/src/lib9/utf/Makefile
new file mode 100644
index 000000000..c3b9ec5d0
--- /dev/null
+++ b/src/lib9/utf/Makefile
@@ -0,0 +1,32 @@
+# Copyright 2010 The Go Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style
+# license that can be found in the LICENSE file.
+
+# The library is built by the Makefile in the parent directory.
+# This Makefile only builds mkrunetype.
+
+include ../../Make.inc
+O:=$(HOST_O)
+
+TARG=mkrunetype
+
+OFILES=\
+ mkrunetype.$O\
+
+include ../../Make.ccmd
+
+UnicodeData-%.txt:
+ curl http://www.unicode.org/Public/$*/ucd/UnicodeData.txt >_$@
+ mv _$@ $@
+
+runetypebody-%.c: mkrunetype UnicodeData-%.txt
+ mkrunetype -p UnicodeData-$*.txt >_$@
+ mv _$@ $@
+
+CLEANFILES+=UnicodeData.txt
+
+UNICODE_VERSION=6.0.0
+
+test: mkrunetype UnicodeData-$(UNICODE_VERSION).txt
+ mkrunetype -c UnicodeData-$(UNICODE_VERSION).txt
+
diff --git a/src/lib9/utf/mkrunetype.c b/src/lib9/utf/mkrunetype.c
new file mode 100644
index 000000000..06d52b572
--- /dev/null
+++ b/src/lib9/utf/mkrunetype.c
@@ -0,0 +1,732 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+/*
+ * make is(upper|lower|title|space|alpha)rune and
+ * to(upper|lower|title)rune from a UnicodeData.txt file.
+ * these can be found at unicode.org
+ *
+ * with -c, runs a check of the existing runetype functions vs.
+ * those extracted from UnicodeData.
+ *
+ * with -p, generates tables for pairs of chars, as well as for ranges
+ * and singletons.
+ *
+ * UnicodeData defines 4 fields of interest:
+ * 1) a category
+ * 2) an upper case mapping
+ * 3) a lower case mapping
+ * 4) a title case mapping
+ *
+ * toupper, tolower, and totitle are defined directly from the mapping.
+ *
+ * isalpharune(c) is true iff c is a "letter" category
+ * isupperrune(c) is true iff c is the target of toupperrune,
+ * or is in the uppercase letter category
+ * similarly for islowerrune and istitlerune.
+ * isspacerune is true for space category chars, "C" locale white space chars,
+ * and two additions:
+ * 0085 "next line" control char
+ * feff] "zero-width non-break space"
+ * isdigitrune is true iff c is a numeric-digit category.
+ */
+
+#include <u.h>
+#include <libc.h>
+#include <stdio.h>
+#include "utf.h"
+#include "utfdef.h"
+
+enum {
+ /*
+ * fields in the unicode data file
+ */
+ FIELD_CODE,
+ FIELD_NAME,
+ FIELD_CATEGORY,
+ FIELD_COMBINING,
+ FIELD_BIDIR,
+ FIELD_DECOMP,
+ FIELD_DECIMAL_DIG,
+ FIELD_DIG,
+ FIELD_NUMERIC_VAL,
+ FIELD_MIRRORED,
+ FIELD_UNICODE_1_NAME,
+ FIELD_COMMENT,
+ FIELD_UPPER,
+ FIELD_LOWER,
+ FIELD_TITLE,
+ NFIELDS,
+
+ MAX_LINE = 1024,
+
+ TO_OFFSET = 1 << 20,
+
+ NRUNES = 1 << 21,
+};
+
+#define TO_DELTA(xmapped,x) (TO_OFFSET + (xmapped) - (x))
+
+static char myisspace[NRUNES];
+static char myisalpha[NRUNES];
+static char myisdigit[NRUNES];
+static char myisupper[NRUNES];
+static char myislower[NRUNES];
+static char myistitle[NRUNES];
+
+static int mytoupper[NRUNES];
+static int mytolower[NRUNES];
+static int mytotitle[NRUNES];
+
+static void check(void);
+static void mktables(char *src, int usepairs);
+static void fatal(const char *fmt, ...);
+static int mygetfields(char **fields, int nfields, char *str, const char *delim);
+static int getunicodeline(FILE *in, char **fields, char *buf);
+static int getcode(char *s);
+
+static void
+usage(void)
+{
+ fprintf(stderr, "usage: mktables [-cp] <UnicodeData.txt>\n");
+ exit(1);
+}
+
+void
+main(int argc, char *argv[])
+{
+ FILE *in;
+ char buf[MAX_LINE], buf2[MAX_LINE];
+ char *fields[NFIELDS + 1], *fields2[NFIELDS + 1];
+ char *p;
+ int i, code, last, docheck, usepairs;
+
+ docheck = 0;
+ usepairs = 0;
+ ARGBEGIN{
+ case 'c':
+ docheck = 1;
+ break;
+ case 'p':
+ usepairs = 1;
+ break;
+ default:
+ usage();
+ }ARGEND
+
+ if(argc != 1){
+ usage();
+ }
+
+ in = fopen(argv[0], "r");
+ if(in == NULL){
+ fatal("can't open %s", argv[0]);
+ }
+
+ for(i = 0; i < NRUNES; i++){
+ mytoupper[i] = i;
+ mytolower[i] = i;
+ mytotitle[i] = i;
+ }
+
+ /*
+ * make sure isspace has all of the "C" locale whitespace chars
+ */
+ myisspace['\t'] = 1;
+ myisspace['\n'] = 1;
+ myisspace['\r'] = 1;
+ myisspace['\f'] = 1;
+ myisspace['\v'] = 1;
+
+ /*
+ * a couple of other exceptions
+ */
+ myisspace[0x85] = 1; /* control char, "next line" */
+ myisspace[0xfeff] = 1; /* zero-width non-break space */
+
+ last = -1;
+ while(getunicodeline(in, fields, buf)){
+ code = getcode(fields[FIELD_CODE]);
+ if (code >= NRUNES)
+ fatal("code-point value too big: %x", code);
+ if(code <= last)
+ fatal("bad code sequence: %x then %x", last, code);
+ last = code;
+
+ /*
+ * check for ranges
+ */
+ p = fields[FIELD_CATEGORY];
+ if(strstr(fields[FIELD_NAME], ", First>") != NULL){
+ if(!getunicodeline(in, fields2, buf2))
+ fatal("range start at eof");
+ if (strstr(fields2[FIELD_NAME], ", Last>") == NULL)
+ fatal("range start not followed by range end");
+ last = getcode(fields2[FIELD_CODE]);
+ if(last <= code)
+ fatal("range out of sequence: %x then %x", code, last);
+ if(strcmp(p, fields2[FIELD_CATEGORY]) != 0)
+ fatal("range with mismatched category");
+ }
+
+ /*
+ * set properties and conversions
+ */
+ for (; code <= last; code++){
+ if(p[0] == 'L')
+ myisalpha[code] = 1;
+ if(p[0] == 'Z')
+ myisspace[code] = 1;
+
+ if(strcmp(p, "Lu") == 0)
+ myisupper[code] = 1;
+ if(strcmp(p, "Ll") == 0)
+ myislower[code] = 1;
+
+ if(strcmp(p, "Lt") == 0)
+ myistitle[code] = 1;
+
+ if(strcmp(p, "Nd") == 0)
+ myisdigit[code] = 1;
+
+ /*
+ * when finding conversions, also need to mark
+ * upper/lower case, since some chars, like
+ * "III" (0x2162), aren't defined as letters but have a
+ * lower case mapping ("iii" (0x2172)).
+ */
+ if(fields[FIELD_UPPER][0] != '\0'){
+ mytoupper[code] = getcode(fields[FIELD_UPPER]);
+ }
+ if(fields[FIELD_LOWER][0] != '\0'){
+ mytolower[code] = getcode(fields[FIELD_LOWER]);
+ }
+ if(fields[FIELD_TITLE][0] != '\0'){
+ mytotitle[code] = getcode(fields[FIELD_TITLE]);
+ }
+ }
+ }
+
+ fclose(in);
+
+ /*
+ * check for codes with no totitle mapping but a toupper mapping.
+ * these appear in UnicodeData-2.0.14.txt, but are almost certainly
+ * erroneous.
+ */
+ for(i = 0; i < NRUNES; i++){
+ if(mytotitle[i] == i
+ && mytoupper[i] != i
+ && !myistitle[i])
+ fprintf(stderr, "warning: code=%.4x not istitle, totitle is same, toupper=%.4x\n", i, mytoupper[i]);
+ }
+
+ /*
+ * make sure isupper[c] is true if for some x toupper[x] == c
+ * ditto for islower and istitle
+ */
+ for(i = 0; i < NRUNES; i++) {
+ if(mytoupper[i] != i)
+ myisupper[mytoupper[i]] = 1;
+ if(mytolower[i] != i)
+ myislower[mytolower[i]] = 1;
+ if(mytotitle[i] != i)
+ myistitle[mytotitle[i]] = 1;
+ }
+
+ if(docheck){
+ check();
+ }else{
+ mktables(argv[0], usepairs);
+ }
+ exit(0);
+}
+
+/*
+ * generate a properties array for ranges, clearing those cases covered.
+ * if force, generate one-entry ranges for singletons.
+ */
+static int
+mkisrange(const char* label, char* prop, int force)
+{
+ int start, stop, some;
+
+ /*
+ * first, the ranges
+ */
+ some = 0;
+ for(start = 0; start < NRUNES; ) {
+ if(!prop[start]){
+ start++;
+ continue;
+ }
+
+ for(stop = start + 1; stop < NRUNES; stop++){
+ if(!prop[stop]){
+ break;
+ }
+ prop[stop] = 0;
+ }
+ if(force || stop != start + 1){
+ if(!some){
+ printf("static Rune __is%sr[] = {\n", label);
+ some = 1;
+ }
+ prop[start] = 0;
+ printf("\t0x%.4x, 0x%.4x,\n", start, stop - 1);
+ }
+
+ start = stop;
+ }
+ if(some)
+ printf("};\n\n");
+ return some;
+}
+
+/*
+ * generate a mapping array for pairs with a skip between,
+ * clearing those entries covered.
+ */
+static int
+mkispair(const char *label, char *prop)
+{
+ int start, stop, some;
+
+ some = 0;
+ for(start = 0; start + 2 < NRUNES; ) {
+ if(!prop[start]){
+ start++;
+ continue;
+ }
+
+ for(stop = start + 2; stop < NRUNES; stop += 2){
+ if(!prop[stop]){
+ break;
+ }
+ prop[stop] = 0;
+ }
+ if(stop != start + 2){
+ if(!some){
+ printf("static Rune __is%sp[] = {\n", label);
+ some = 1;
+ }
+ prop[start] = 0;
+ printf("\t0x%.4x, 0x%.4x,\n", start, stop - 2);
+ }
+
+ start = stop;
+ }
+ if(some)
+ printf("};\n\n");
+ return some;
+}
+
+/*
+ * generate a properties array for singletons, clearing those cases covered.
+ */
+static int
+mkissingle(const char *label, char *prop)
+{
+ int start, some;
+
+ some = 0;
+ for(start = 0; start < NRUNES; start++) {
+ if(!prop[start]){
+ continue;
+ }
+
+ if(!some){
+ printf("static Rune __is%ss[] = {\n", label);
+ some = 1;
+ }
+ prop[start] = 0;
+ printf("\t0x%.4x,\n", start);
+ }
+ if(some)
+ printf("};\n\n");
+ return some;
+}
+
+/*
+ * generate tables and a function for is<label>rune
+ */
+static void
+mkis(const char* label, char* prop, int usepairs)
+{
+ int isr, isp, iss;
+
+ isr = mkisrange(label, prop, 0);
+ isp = 0;
+ if(usepairs)
+ isp = mkispair(label, prop);
+ iss = mkissingle(label, prop);
+
+ printf(
+ "int\n"
+ "is%srune(Rune c)\n"
+ "{\n"
+ " Rune *p;\n"
+ "\n",
+ label);
+
+ if(isr)
+ printf(
+ " p = rbsearch(c, __is%sr, nelem(__is%sr)/2, 2);\n"
+ " if(p && c >= p[0] && c <= p[1])\n"
+ " return 1;\n",
+ label, label);
+
+ if(isp)
+ printf(
+ " p = rbsearch(c, __is%sp, nelem(__is%sp)/2, 2);\n"
+ " if(p && c >= p[0] && c <= p[1] && !((c - p[0]) & 1))\n"
+ " return 1;\n",
+ label, label);
+
+ if(iss)
+ printf(
+ " p = rbsearch(c, __is%ss, nelem(__is%ss), 1);\n"
+ " if(p && c == p[0])\n"
+ " return 1;\n",
+ label, label);
+
+
+ printf(
+ " return 0;\n"
+ "}\n"
+ "\n"
+ );
+}
+
+/*
+ * generate a mapping array for ranges, clearing those entries covered.
+ * if force, generate one-entry ranges for singletons.
+ */
+static int
+mktorange(const char* label, int* map, int force)
+{
+ int start, stop, delta, some;
+
+ some = 0;
+ for(start = 0; start < NRUNES; ) {
+ if(map[start] == start){
+ start++;
+ continue;
+ }
+
+ delta = TO_DELTA(map[start], start);
+ if(delta != (Rune)delta)
+ fatal("bad map delta %d", delta);
+ for(stop = start + 1; stop < NRUNES; stop++){
+ if(TO_DELTA(map[stop], stop) != delta){
+ break;
+ }
+ map[stop] = stop;
+ }
+ if(stop != start + 1){
+ if(!some){
+ printf("static Rune __to%sr[] = {\n", label);
+ some = 1;
+ }
+ map[start] = start;
+ printf("\t0x%.4x, 0x%.4x, %d,\n", start, stop - 1, delta);
+ }
+
+ start = stop;
+ }
+ if(some)
+ printf("};\n\n");
+ return some;
+}
+
+/*
+ * generate a mapping array for pairs with a skip between,
+ * clearing those entries covered.
+ */
+static int
+mktopair(const char* label, int* map)
+{
+ int start, stop, delta, some;
+
+ some = 0;
+ for(start = 0; start + 2 < NRUNES; ) {
+ if(map[start] == start){
+ start++;
+ continue;
+ }
+
+ delta = TO_DELTA(map[start], start);
+ if(delta != (Rune)delta)
+ fatal("bad map delta %d", delta);
+ for(stop = start + 2; stop < NRUNES; stop += 2){
+ if(TO_DELTA(map[stop], stop) != delta){
+ break;
+ }
+ map[stop] = stop;
+ }
+ if(stop != start + 2){
+ if(!some){
+ printf("static Rune __to%sp[] = {\n", label);
+ some = 1;
+ }
+ map[start] = start;
+ printf("\t0x%.4x, 0x%.4x, %d,\n", start, stop - 2, delta);
+ }
+
+ start = stop;
+ }
+ if(some)
+ printf("};\n\n");
+ return some;
+}
+
+/*
+ * generate a mapping array for singletons, clearing those entries covered.
+ */
+static int
+mktosingle(const char* label, int* map)
+{
+ int start, delta, some;
+
+ some = 0;
+ for(start = 0; start < NRUNES; start++) {
+ if(map[start] == start){
+ continue;
+ }
+
+ delta = TO_DELTA(map[start], start);
+ if(delta != (Rune)delta)
+ fatal("bad map delta %d", delta);
+ if(!some){
+ printf("static Rune __to%ss[] = {\n", label);
+ some = 1;
+ }
+ map[start] = start;
+ printf("\t0x%.4x, %d,\n", start, delta);
+ }
+ if(some)
+ printf("};\n\n");
+ return some;
+}
+
+/*
+ * generate tables and a function for to<label>rune
+ */
+static void
+mkto(const char* label, int* map, int usepairs)
+{
+ int tor, top, tos;
+
+ tor = mktorange(label, map, 0);
+ top = 0;
+ if(usepairs)
+ top = mktopair(label, map);
+ tos = mktosingle(label, map);
+
+ printf(
+ "Rune\n"
+ "to%srune(Rune c)\n"
+ "{\n"
+ " Rune *p;\n"
+ "\n",
+ label);
+
+ if(tor)
+ printf(
+ " p = rbsearch(c, __to%sr, nelem(__to%sr)/3, 3);\n"
+ " if(p && c >= p[0] && c <= p[1])\n"
+ " return c + p[2] - %d;\n",
+ label, label, TO_OFFSET);
+
+ if(top)
+ printf(
+ " p = rbsearch(c, __to%sp, nelem(__to%sp)/3, 3);\n"
+ " if(p && c >= p[0] && c <= p[1] && !((c - p[0]) & 1))\n"
+ " return c + p[2] - %d;\n",
+ label, label, TO_OFFSET);
+
+ if(tos)
+ printf(
+ " p = rbsearch(c, __to%ss, nelem(__to%ss)/2, 2);\n"
+ " if(p && c == p[0])\n"
+ " return c + p[1] - %d;\n",
+ label, label, TO_OFFSET);
+
+
+ printf(
+ " return c;\n"
+ "}\n"
+ "\n"
+ );
+}
+
+// Make only range tables and a function for is<label>rune.
+static void
+mkisronly(const char* label, char* prop)
+{
+ mkisrange(label, prop, 1);
+ printf(
+ "int\n"
+ "is%srune(Rune c)\n"
+ "{\n"
+ " Rune *p;\n"
+ "\n"
+ " p = rbsearch(c, __is%sr, nelem(__is%sr)/2, 2);\n"
+ " if(p && c >= p[0] && c <= p[1])\n"
+ " return 1;\n"
+ " return 0;\n"
+ "}\n"
+ "\n",
+ label, label, label);
+}
+
+/*
+ * generate the body of runetype.
+ * assumes there is a function Rune* rbsearch(Rune c, Rune *t, int n, int ne);
+ */
+static void
+mktables(char *src, int usepairs)
+{
+ printf("/* generated automatically by mkrunetype.c from %s */\n\n", src);
+
+ /*
+ * we special case the space and digit tables, since they are assumed
+ * to be small with several ranges.
+ */
+ mkisronly("space", myisspace);
+ mkisronly("digit", myisdigit);
+
+ mkis("alpha", myisalpha, 0);
+ mkis("upper", myisupper, usepairs);
+ mkis("lower", myislower, usepairs);
+ mkis("title", myistitle, usepairs);
+
+ mkto("upper", mytoupper, usepairs);
+ mkto("lower", mytolower, usepairs);
+ mkto("title", mytotitle, usepairs);
+}
+
+/*
+ * find differences between the newly generated tables and current runetypes.
+ */
+static void
+check(void)
+{
+ int i;
+
+ for(i = 0; i < NRUNES; i++){
+ if(isdigitrune(i) != myisdigit[i])
+ fprintf(stderr, "isdigit diff at %x: runetype=%x, unicode=%x\n",
+ i, isdigitrune(i), myisdigit[i]);
+
+ if(isspacerune(i) != myisspace[i])
+ fprintf(stderr, "isspace diff at %x: runetype=%x, unicode=%x\n",
+ i, isspacerune(i), myisspace[i]);
+
+ if(isupperrune(i) != myisupper[i])
+ fprintf(stderr, "isupper diff at %x: runetype=%x, unicode=%x\n",
+ i, isupperrune(i), myisupper[i]);
+
+ if(islowerrune(i) != myislower[i])
+ fprintf(stderr, "islower diff at %x: runetype=%x, unicode=%x\n",
+ i, islowerrune(i), myislower[i]);
+
+ if(isalpharune(i) != myisalpha[i])
+ fprintf(stderr, "isalpha diff at %x: runetype=%x, unicode=%x\n",
+ i, isalpharune(i), myisalpha[i]);
+
+ if(toupperrune(i) != mytoupper[i])
+ fprintf(stderr, "toupper diff at %x: runetype=%x, unicode=%x\n",
+ i, toupperrune(i), mytoupper[i]);
+
+ if(tolowerrune(i) != mytolower[i])
+ fprintf(stderr, "tolower diff at %x: runetype=%x, unicode=%x\n",
+ i, tolowerrune(i), mytolower[i]);
+
+ if(istitlerune(i) != myistitle[i])
+ fprintf(stderr, "istitle diff at %x: runetype=%x, unicode=%x\n",
+ i, istitlerune(i), myistitle[i]);
+
+ if(totitlerune(i) != mytotitle[i])
+ fprintf(stderr, "totitle diff at %x: runetype=%x, unicode=%x\n",
+ i, totitlerune(i), mytotitle[i]);
+
+
+ }
+}
+
+static int
+mygetfields(char **fields, int nfields, char *str, const char *delim)
+{
+ int nf;
+
+ fields[0] = str;
+ nf = 1;
+ if(nf >= nfields)
+ return nf;
+
+ for(; *str; str++){
+ if(strchr(delim, *str) != NULL){
+ *str = '\0';
+ fields[nf++] = str + 1;
+ if(nf >= nfields)
+ break;
+ }
+ }
+ return nf;
+}
+
+static int
+getunicodeline(FILE *in, char **fields, char *buf)
+{
+ char *p;
+
+ if(fgets(buf, MAX_LINE, in) == NULL)
+ return 0;
+
+ p = strchr(buf, '\n');
+ if (p == NULL)
+ fatal("line too long");
+ *p = '\0';
+
+ if (mygetfields(fields, NFIELDS + 1, buf, ";") != NFIELDS)
+ fatal("bad number of fields");
+
+ return 1;
+}
+
+static int
+getcode(char *s)
+{
+ int i, code;
+
+ code = 0;
+ i = 0;
+ /* Parse a hex number */
+ while(s[i]) {
+ code <<= 4;
+ if(s[i] >= '0' && s[i] <= '9')
+ code += s[i] - '0';
+ else if(s[i] >= 'A' && s[i] <= 'F')
+ code += s[i] - 'A' + 10;
+ else
+ fatal("bad code char '%c'", s[i]);
+ i++;
+ }
+ return code;
+}
+
+static void
+fatal(const char *fmt, ...)
+{
+ va_list arg;
+
+ fprintf(stderr, "%s: fatal error: ", argv0);
+ va_start(arg, fmt);
+ vfprintf(stderr, fmt, arg);
+ va_end(arg);
+ fprintf(stderr, "\n");
+
+ exit(1);
+}
diff --git a/src/lib9/utf/rune.c b/src/lib9/utf/rune.c
new file mode 100644
index 000000000..cf98bab15
--- /dev/null
+++ b/src/lib9/utf/rune.c
@@ -0,0 +1,351 @@
+/*
+ * The authors of this software are Rob Pike and Ken Thompson.
+ * Copyright (c) 2002 by Lucent Technologies.
+ * Portions Copyright (c) 2009 The Go Authors. All rights reserved.
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose without fee is hereby granted, provided that this entire notice
+ * is included in all copies of any software which is or includes a copy
+ * or modification of this software and in all copies of the supporting
+ * documentation for such software.
+ * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED
+ * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE ANY
+ * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY
+ * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE.
+ */
+#include <stdarg.h>
+#include <string.h>
+#include "utf.h"
+#include "utfdef.h"
+
+enum
+{
+ Bit1 = 7,
+ Bitx = 6,
+ Bit2 = 5,
+ Bit3 = 4,
+ Bit4 = 3,
+ Bit5 = 2,
+
+ T1 = ((1<<(Bit1+1))-1) ^ 0xFF, /* 0000 0000 */
+ Tx = ((1<<(Bitx+1))-1) ^ 0xFF, /* 1000 0000 */
+ T2 = ((1<<(Bit2+1))-1) ^ 0xFF, /* 1100 0000 */
+ T3 = ((1<<(Bit3+1))-1) ^ 0xFF, /* 1110 0000 */
+ T4 = ((1<<(Bit4+1))-1) ^ 0xFF, /* 1111 0000 */
+ T5 = ((1<<(Bit5+1))-1) ^ 0xFF, /* 1111 1000 */
+
+ Rune1 = (1<<(Bit1+0*Bitx))-1, /* 0000 0000 0111 1111 */
+ Rune2 = (1<<(Bit2+1*Bitx))-1, /* 0000 0111 1111 1111 */
+ Rune3 = (1<<(Bit3+2*Bitx))-1, /* 1111 1111 1111 1111 */
+ Rune4 = (1<<(Bit4+3*Bitx))-1,
+ /* 0001 1111 1111 1111 1111 1111 */
+
+ Maskx = (1<<Bitx)-1, /* 0011 1111 */
+ Testx = Maskx ^ 0xFF, /* 1100 0000 */
+
+ Bad = Runeerror,
+};
+
+/*
+ * Modified by Wei-Hwa Huang, Google Inc., on 2004-09-24
+ * This is a slower but "safe" version of the old chartorune
+ * that works on strings that are not necessarily null-terminated.
+ *
+ * If you know for sure that your string is null-terminated,
+ * chartorune will be a bit faster.
+ *
+ * It is guaranteed not to attempt to access "length"
+ * past the incoming pointer. This is to avoid
+ * possible access violations. If the string appears to be
+ * well-formed but incomplete (i.e., to get the whole Rune
+ * we'd need to read past str+length) then we'll set the Rune
+ * to Bad and return 0.
+ *
+ * Note that if we have decoding problems for other
+ * reasons, we return 1 instead of 0.
+ */
+int
+charntorune(Rune *rune, const char *str, int length)
+{
+ int c, c1, c2, c3;
+ long l;
+
+ /* When we're not allowed to read anything */
+ if(length <= 0) {
+ goto badlen;
+ }
+
+ /*
+ * one character sequence (7-bit value)
+ * 00000-0007F => T1
+ */
+ c = *(uchar*)str;
+ if(c < Tx) {
+ *rune = c;
+ return 1;
+ }
+
+ // If we can't read more than one character we must stop
+ if(length <= 1) {
+ goto badlen;
+ }
+
+ /*
+ * two character sequence (11-bit value)
+ * 0080-07FF => T2 Tx
+ */
+ c1 = *(uchar*)(str+1) ^ Tx;
+ if(c1 & Testx)
+ goto bad;
+ if(c < T3) {
+ if(c < T2)
+ goto bad;
+ l = ((c << Bitx) | c1) & Rune2;
+ if(l <= Rune1)
+ goto bad;
+ *rune = l;
+ return 2;
+ }
+
+ // If we can't read more than two characters we must stop
+ if(length <= 2) {
+ goto badlen;
+ }
+
+ /*
+ * three character sequence (16-bit value)
+ * 0800-FFFF => T3 Tx Tx
+ */
+ c2 = *(uchar*)(str+2) ^ Tx;
+ if(c2 & Testx)
+ goto bad;
+ if(c < T4) {
+ l = ((((c << Bitx) | c1) << Bitx) | c2) & Rune3;
+ if(l <= Rune2)
+ goto bad;
+ *rune = l;
+ return 3;
+ }
+
+ if (length <= 3)
+ goto badlen;
+
+ /*
+ * four character sequence (21-bit value)
+ * 10000-1FFFFF => T4 Tx Tx Tx
+ */
+ c3 = *(uchar*)(str+3) ^ Tx;
+ if (c3 & Testx)
+ goto bad;
+ if (c < T5) {
+ l = ((((((c << Bitx) | c1) << Bitx) | c2) << Bitx) | c3) & Rune4;
+ if (l <= Rune3)
+ goto bad;
+ *rune = l;
+ return 4;
+ }
+
+ // Support for 5-byte or longer UTF-8 would go here, but
+ // since we don't have that, we'll just fall through to bad.
+
+ /*
+ * bad decoding
+ */
+bad:
+ *rune = Bad;
+ return 1;
+badlen:
+ *rune = Bad;
+ return 0;
+
+}
+
+
+/*
+ * This is the older "unsafe" version, which works fine on
+ * null-terminated strings.
+ */
+int
+chartorune(Rune *rune, const char *str)
+{
+ int c, c1, c2, c3;
+ long l;
+
+ /*
+ * one character sequence
+ * 00000-0007F => T1
+ */
+ c = *(uchar*)str;
+ if(c < Tx) {
+ *rune = c;
+ return 1;
+ }
+
+ /*
+ * two character sequence
+ * 0080-07FF => T2 Tx
+ */
+ c1 = *(uchar*)(str+1) ^ Tx;
+ if(c1 & Testx)
+ goto bad;
+ if(c < T3) {
+ if(c < T2)
+ goto bad;
+ l = ((c << Bitx) | c1) & Rune2;
+ if(l <= Rune1)
+ goto bad;
+ *rune = l;
+ return 2;
+ }
+
+ /*
+ * three character sequence
+ * 0800-FFFF => T3 Tx Tx
+ */
+ c2 = *(uchar*)(str+2) ^ Tx;
+ if(c2 & Testx)
+ goto bad;
+ if(c < T4) {
+ l = ((((c << Bitx) | c1) << Bitx) | c2) & Rune3;
+ if(l <= Rune2)
+ goto bad;
+ *rune = l;
+ return 3;
+ }
+
+ /*
+ * four character sequence (21-bit value)
+ * 10000-1FFFFF => T4 Tx Tx Tx
+ */
+ c3 = *(uchar*)(str+3) ^ Tx;
+ if (c3 & Testx)
+ goto bad;
+ if (c < T5) {
+ l = ((((((c << Bitx) | c1) << Bitx) | c2) << Bitx) | c3) & Rune4;
+ if (l <= Rune3)
+ goto bad;
+ *rune = l;
+ return 4;
+ }
+
+ /*
+ * Support for 5-byte or longer UTF-8 would go here, but
+ * since we don't have that, we'll just fall through to bad.
+ */
+
+ /*
+ * bad decoding
+ */
+bad:
+ *rune = Bad;
+ return 1;
+}
+
+int
+isvalidcharntorune(const char* str, int length, Rune* rune, int* consumed) {
+ *consumed = charntorune(rune, str, length);
+ return *rune != Runeerror || *consumed == 3;
+}
+
+int
+runetochar(char *str, const Rune *rune)
+{
+ /* Runes are signed, so convert to unsigned for range check. */
+ unsigned long c;
+
+ /*
+ * one character sequence
+ * 00000-0007F => 00-7F
+ */
+ c = *rune;
+ if(c <= Rune1) {
+ str[0] = c;
+ return 1;
+ }
+
+ /*
+ * two character sequence
+ * 0080-07FF => T2 Tx
+ */
+ if(c <= Rune2) {
+ str[0] = T2 | (c >> 1*Bitx);
+ str[1] = Tx | (c & Maskx);
+ return 2;
+ }
+
+ /*
+ * If the Rune is out of range, convert it to the error rune.
+ * Do this test here because the error rune encodes to three bytes.
+ * Doing it earlier would duplicate work, since an out of range
+ * Rune wouldn't have fit in one or two bytes.
+ */
+ if (c > Runemax)
+ c = Runeerror;
+
+ /*
+ * three character sequence
+ * 0800-FFFF => T3 Tx Tx
+ */
+ if (c <= Rune3) {
+ str[0] = T3 | (c >> 2*Bitx);
+ str[1] = Tx | ((c >> 1*Bitx) & Maskx);
+ str[2] = Tx | (c & Maskx);
+ return 3;
+ }
+
+ /*
+ * four character sequence (21-bit value)
+ * 10000-1FFFFF => T4 Tx Tx Tx
+ */
+ str[0] = T4 | (c >> 3*Bitx);
+ str[1] = Tx | ((c >> 2*Bitx) & Maskx);
+ str[2] = Tx | ((c >> 1*Bitx) & Maskx);
+ str[3] = Tx | (c & Maskx);
+ return 4;
+}
+
+int
+runelen(Rune rune)
+{
+ char str[10];
+
+ return runetochar(str, &rune);
+}
+
+int
+runenlen(const Rune *r, int nrune)
+{
+ int nb, c;
+
+ nb = 0;
+ while(nrune--) {
+ c = *r++;
+ if (c <= Rune1)
+ nb++;
+ else if (c <= Rune2)
+ nb += 2;
+ else if (c <= Rune3)
+ nb += 3;
+ else /* assert(c <= Rune4) */
+ nb += 4;
+ }
+ return nb;
+}
+
+int
+fullrune(const char *str, int n)
+{
+ if (n > 0) {
+ int c = *(uchar*)str;
+ if (c < Tx)
+ return 1;
+ if (n > 1) {
+ if (c < T3)
+ return 1;
+ if (n > 2) {
+ if (c < T4 || n > 3)
+ return 1;
+ }
+ }
+ }
+ return 0;
+}
diff --git a/src/lib9/utf/runetype.c b/src/lib9/utf/runetype.c
new file mode 100644
index 000000000..27867430b
--- /dev/null
+++ b/src/lib9/utf/runetype.c
@@ -0,0 +1,38 @@
+/*
+ * The authors of this software are Rob Pike and Ken Thompson.
+ * Copyright (c) 2002 by Lucent Technologies.
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose without fee is hereby granted, provided that this entire notice
+ * is included in all copies of any software which is or includes a copy
+ * or modification of this software and in all copies of the supporting
+ * documentation for such software.
+ * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED
+ * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE ANY
+ * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY
+ * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE.
+ */
+#include "utf.h"
+#include "utfdef.h"
+
+static
+Rune*
+rbsearch(Rune c, Rune *t, int n, int ne)
+{
+ Rune *p;
+ int m;
+
+ while(n > 1) {
+ m = n >> 1;
+ p = t + m*ne;
+ if(c >= p[0]) {
+ t = p;
+ n = n-m;
+ } else
+ n = m;
+ }
+ if(n && c >= t[0])
+ return t;
+ return 0;
+}
+
+#include "runetypebody-6.0.0.c"
diff --git a/src/lib9/utf/runetypebody-5.0.0.c b/src/lib9/utf/runetypebody-5.0.0.c
new file mode 100644
index 000000000..67a645d60
--- /dev/null
+++ b/src/lib9/utf/runetypebody-5.0.0.c
@@ -0,0 +1,1361 @@
+/* generated automatically by mkrunetype.c from UnicodeData-5.0.0.txt */
+
+static Rune __isspacer[] = {
+ 0x0009, 0x000d,
+ 0x0020, 0x0020,
+ 0x0085, 0x0085,
+ 0x00a0, 0x00a0,
+ 0x1680, 0x1680,
+ 0x180e, 0x180e,
+ 0x2000, 0x200a,
+ 0x2028, 0x2029,
+ 0x202f, 0x202f,
+ 0x205f, 0x205f,
+ 0x3000, 0x3000,
+ 0xfeff, 0xfeff,
+};
+
+int
+isspacerune(Rune c)
+{
+ Rune *p;
+
+ p = rbsearch(c, __isspacer, nelem(__isspacer)/2, 2);
+ if(p && c >= p[0] && c <= p[1])
+ return 1;
+ return 0;
+}
+
+static Rune __isdigitr[] = {
+ 0x0030, 0x0039,
+ 0x0660, 0x0669,
+ 0x06f0, 0x06f9,
+ 0x07c0, 0x07c9,
+ 0x0966, 0x096f,
+ 0x09e6, 0x09ef,
+ 0x0a66, 0x0a6f,
+ 0x0ae6, 0x0aef,
+ 0x0b66, 0x0b6f,
+ 0x0be6, 0x0bef,
+ 0x0c66, 0x0c6f,
+ 0x0ce6, 0x0cef,
+ 0x0d66, 0x0d6f,
+ 0x0e50, 0x0e59,
+ 0x0ed0, 0x0ed9,
+ 0x0f20, 0x0f29,
+ 0x1040, 0x1049,
+ 0x17e0, 0x17e9,
+ 0x1810, 0x1819,
+ 0x1946, 0x194f,
+ 0x19d0, 0x19d9,
+ 0x1b50, 0x1b59,
+ 0xff10, 0xff19,
+ 0x104a0, 0x104a9,
+ 0x1d7ce, 0x1d7ff,
+};
+
+int
+isdigitrune(Rune c)
+{
+ Rune *p;
+
+ p = rbsearch(c, __isdigitr, nelem(__isdigitr)/2, 2);
+ if(p && c >= p[0] && c <= p[1])
+ return 1;
+ return 0;
+}
+
+static Rune __isalphar[] = {
+ 0x0041, 0x005a,
+ 0x0061, 0x007a,
+ 0x00c0, 0x00d6,
+ 0x00d8, 0x00f6,
+ 0x00f8, 0x02c1,
+ 0x02c6, 0x02d1,
+ 0x02e0, 0x02e4,
+ 0x037a, 0x037d,
+ 0x0388, 0x038a,
+ 0x038e, 0x03a1,
+ 0x03a3, 0x03ce,
+ 0x03d0, 0x03f5,
+ 0x03f7, 0x0481,
+ 0x048a, 0x0513,
+ 0x0531, 0x0556,
+ 0x0561, 0x0587,
+ 0x05d0, 0x05ea,
+ 0x05f0, 0x05f2,
+ 0x0621, 0x063a,
+ 0x0640, 0x064a,
+ 0x066e, 0x066f,
+ 0x0671, 0x06d3,
+ 0x06e5, 0x06e6,
+ 0x06ee, 0x06ef,
+ 0x06fa, 0x06fc,
+ 0x0712, 0x072f,
+ 0x074d, 0x076d,
+ 0x0780, 0x07a5,
+ 0x07ca, 0x07ea,
+ 0x07f4, 0x07f5,
+ 0x0904, 0x0939,
+ 0x0958, 0x0961,
+ 0x097b, 0x097f,
+ 0x0985, 0x098c,
+ 0x098f, 0x0990,
+ 0x0993, 0x09a8,
+ 0x09aa, 0x09b0,
+ 0x09b6, 0x09b9,
+ 0x09dc, 0x09dd,
+ 0x09df, 0x09e1,
+ 0x09f0, 0x09f1,
+ 0x0a05, 0x0a0a,
+ 0x0a0f, 0x0a10,
+ 0x0a13, 0x0a28,
+ 0x0a2a, 0x0a30,
+ 0x0a32, 0x0a33,
+ 0x0a35, 0x0a36,
+ 0x0a38, 0x0a39,
+ 0x0a59, 0x0a5c,
+ 0x0a72, 0x0a74,
+ 0x0a85, 0x0a8d,
+ 0x0a8f, 0x0a91,
+ 0x0a93, 0x0aa8,
+ 0x0aaa, 0x0ab0,
+ 0x0ab2, 0x0ab3,
+ 0x0ab5, 0x0ab9,
+ 0x0ae0, 0x0ae1,
+ 0x0b05, 0x0b0c,
+ 0x0b0f, 0x0b10,
+ 0x0b13, 0x0b28,
+ 0x0b2a, 0x0b30,
+ 0x0b32, 0x0b33,
+ 0x0b35, 0x0b39,
+ 0x0b5c, 0x0b5d,
+ 0x0b5f, 0x0b61,
+ 0x0b85, 0x0b8a,
+ 0x0b8e, 0x0b90,
+ 0x0b92, 0x0b95,
+ 0x0b99, 0x0b9a,
+ 0x0b9e, 0x0b9f,
+ 0x0ba3, 0x0ba4,
+ 0x0ba8, 0x0baa,
+ 0x0bae, 0x0bb9,
+ 0x0c05, 0x0c0c,
+ 0x0c0e, 0x0c10,
+ 0x0c12, 0x0c28,
+ 0x0c2a, 0x0c33,
+ 0x0c35, 0x0c39,
+ 0x0c60, 0x0c61,
+ 0x0c85, 0x0c8c,
+ 0x0c8e, 0x0c90,
+ 0x0c92, 0x0ca8,
+ 0x0caa, 0x0cb3,
+ 0x0cb5, 0x0cb9,
+ 0x0ce0, 0x0ce1,
+ 0x0d05, 0x0d0c,
+ 0x0d0e, 0x0d10,
+ 0x0d12, 0x0d28,
+ 0x0d2a, 0x0d39,
+ 0x0d60, 0x0d61,
+ 0x0d85, 0x0d96,
+ 0x0d9a, 0x0db1,
+ 0x0db3, 0x0dbb,
+ 0x0dc0, 0x0dc6,
+ 0x0e01, 0x0e30,
+ 0x0e32, 0x0e33,
+ 0x0e40, 0x0e46,
+ 0x0e81, 0x0e82,
+ 0x0e87, 0x0e88,
+ 0x0e94, 0x0e97,
+ 0x0e99, 0x0e9f,
+ 0x0ea1, 0x0ea3,
+ 0x0eaa, 0x0eab,
+ 0x0ead, 0x0eb0,
+ 0x0eb2, 0x0eb3,
+ 0x0ec0, 0x0ec4,
+ 0x0edc, 0x0edd,
+ 0x0f40, 0x0f47,
+ 0x0f49, 0x0f6a,
+ 0x0f88, 0x0f8b,
+ 0x1000, 0x1021,
+ 0x1023, 0x1027,
+ 0x1029, 0x102a,
+ 0x1050, 0x1055,
+ 0x10a0, 0x10c5,
+ 0x10d0, 0x10fa,
+ 0x1100, 0x1159,
+ 0x115f, 0x11a2,
+ 0x11a8, 0x11f9,
+ 0x1200, 0x1248,
+ 0x124a, 0x124d,
+ 0x1250, 0x1256,
+ 0x125a, 0x125d,
+ 0x1260, 0x1288,
+ 0x128a, 0x128d,
+ 0x1290, 0x12b0,
+ 0x12b2, 0x12b5,
+ 0x12b8, 0x12be,
+ 0x12c2, 0x12c5,
+ 0x12c8, 0x12d6,
+ 0x12d8, 0x1310,
+ 0x1312, 0x1315,
+ 0x1318, 0x135a,
+ 0x1380, 0x138f,
+ 0x13a0, 0x13f4,
+ 0x1401, 0x166c,
+ 0x166f, 0x1676,
+ 0x1681, 0x169a,
+ 0x16a0, 0x16ea,
+ 0x1700, 0x170c,
+ 0x170e, 0x1711,
+ 0x1720, 0x1731,
+ 0x1740, 0x1751,
+ 0x1760, 0x176c,
+ 0x176e, 0x1770,
+ 0x1780, 0x17b3,
+ 0x1820, 0x1877,
+ 0x1880, 0x18a8,
+ 0x1900, 0x191c,
+ 0x1950, 0x196d,
+ 0x1970, 0x1974,
+ 0x1980, 0x19a9,
+ 0x19c1, 0x19c7,
+ 0x1a00, 0x1a16,
+ 0x1b05, 0x1b33,
+ 0x1b45, 0x1b4b,
+ 0x1d00, 0x1dbf,
+ 0x1e00, 0x1e9b,
+ 0x1ea0, 0x1ef9,
+ 0x1f00, 0x1f15,
+ 0x1f18, 0x1f1d,
+ 0x1f20, 0x1f45,
+ 0x1f48, 0x1f4d,
+ 0x1f50, 0x1f57,
+ 0x1f5f, 0x1f7d,
+ 0x1f80, 0x1fb4,
+ 0x1fb6, 0x1fbc,
+ 0x1fc2, 0x1fc4,
+ 0x1fc6, 0x1fcc,
+ 0x1fd0, 0x1fd3,
+ 0x1fd6, 0x1fdb,
+ 0x1fe0, 0x1fec,
+ 0x1ff2, 0x1ff4,
+ 0x1ff6, 0x1ffc,
+ 0x2090, 0x2094,
+ 0x210a, 0x2113,
+ 0x2119, 0x211d,
+ 0x212a, 0x212d,
+ 0x212f, 0x2139,
+ 0x213c, 0x213f,
+ 0x2145, 0x2149,
+ 0x2183, 0x2184,
+ 0x2c00, 0x2c2e,
+ 0x2c30, 0x2c5e,
+ 0x2c60, 0x2c6c,
+ 0x2c74, 0x2c77,
+ 0x2c80, 0x2ce4,
+ 0x2d00, 0x2d25,
+ 0x2d30, 0x2d65,
+ 0x2d80, 0x2d96,
+ 0x2da0, 0x2da6,
+ 0x2da8, 0x2dae,
+ 0x2db0, 0x2db6,
+ 0x2db8, 0x2dbe,
+ 0x2dc0, 0x2dc6,
+ 0x2dc8, 0x2dce,
+ 0x2dd0, 0x2dd6,
+ 0x2dd8, 0x2dde,
+ 0x3005, 0x3006,
+ 0x3031, 0x3035,
+ 0x303b, 0x303c,
+ 0x3041, 0x3096,
+ 0x309d, 0x309f,
+ 0x30a1, 0x30fa,
+ 0x30fc, 0x30ff,
+ 0x3105, 0x312c,
+ 0x3131, 0x318e,
+ 0x31a0, 0x31b7,
+ 0x31f0, 0x31ff,
+ 0x3400, 0x4db5,
+ 0x4e00, 0x9fbb,
+ 0xa000, 0xa48c,
+ 0xa717, 0xa71a,
+ 0xa800, 0xa801,
+ 0xa803, 0xa805,
+ 0xa807, 0xa80a,
+ 0xa80c, 0xa822,
+ 0xa840, 0xa873,
+ 0xac00, 0xd7a3,
+ 0xf900, 0xfa2d,
+ 0xfa30, 0xfa6a,
+ 0xfa70, 0xfad9,
+ 0xfb00, 0xfb06,
+ 0xfb13, 0xfb17,
+ 0xfb1f, 0xfb28,
+ 0xfb2a, 0xfb36,
+ 0xfb38, 0xfb3c,
+ 0xfb40, 0xfb41,
+ 0xfb43, 0xfb44,
+ 0xfb46, 0xfbb1,
+ 0xfbd3, 0xfd3d,
+ 0xfd50, 0xfd8f,
+ 0xfd92, 0xfdc7,
+ 0xfdf0, 0xfdfb,
+ 0xfe70, 0xfe74,
+ 0xfe76, 0xfefc,
+ 0xff21, 0xff3a,
+ 0xff41, 0xff5a,
+ 0xff66, 0xffbe,
+ 0xffc2, 0xffc7,
+ 0xffca, 0xffcf,
+ 0xffd2, 0xffd7,
+ 0xffda, 0xffdc,
+ 0x10000, 0x1000b,
+ 0x1000d, 0x10026,
+ 0x10028, 0x1003a,
+ 0x1003c, 0x1003d,
+ 0x1003f, 0x1004d,
+ 0x10050, 0x1005d,
+ 0x10080, 0x100fa,
+ 0x10300, 0x1031e,
+ 0x10330, 0x10340,
+ 0x10342, 0x10349,
+ 0x10380, 0x1039d,
+ 0x103a0, 0x103c3,
+ 0x103c8, 0x103cf,
+ 0x10400, 0x1049d,
+ 0x10800, 0x10805,
+ 0x1080a, 0x10835,
+ 0x10837, 0x10838,
+ 0x10900, 0x10915,
+ 0x10a10, 0x10a13,
+ 0x10a15, 0x10a17,
+ 0x10a19, 0x10a33,
+ 0x12000, 0x1236e,
+ 0x1d400, 0x1d454,
+ 0x1d456, 0x1d49c,
+ 0x1d49e, 0x1d49f,
+ 0x1d4a5, 0x1d4a6,
+ 0x1d4a9, 0x1d4ac,
+ 0x1d4ae, 0x1d4b9,
+ 0x1d4bd, 0x1d4c3,
+ 0x1d4c5, 0x1d505,
+ 0x1d507, 0x1d50a,
+ 0x1d50d, 0x1d514,
+ 0x1d516, 0x1d51c,
+ 0x1d51e, 0x1d539,
+ 0x1d53b, 0x1d53e,
+ 0x1d540, 0x1d544,
+ 0x1d54a, 0x1d550,
+ 0x1d552, 0x1d6a5,
+ 0x1d6a8, 0x1d6c0,
+ 0x1d6c2, 0x1d6da,
+ 0x1d6dc, 0x1d6fa,
+ 0x1d6fc, 0x1d714,
+ 0x1d716, 0x1d734,
+ 0x1d736, 0x1d74e,
+ 0x1d750, 0x1d76e,
+ 0x1d770, 0x1d788,
+ 0x1d78a, 0x1d7a8,
+ 0x1d7aa, 0x1d7c2,
+ 0x1d7c4, 0x1d7cb,
+ 0x20000, 0x2a6d6,
+ 0x2f800, 0x2fa1d,
+};
+
+static Rune __isalphas[] = {
+ 0x00aa,
+ 0x00b5,
+ 0x00ba,
+ 0x02ee,
+ 0x0386,
+ 0x038c,
+ 0x0559,
+ 0x06d5,
+ 0x06ff,
+ 0x0710,
+ 0x07b1,
+ 0x07fa,
+ 0x093d,
+ 0x0950,
+ 0x09b2,
+ 0x09bd,
+ 0x09ce,
+ 0x0a5e,
+ 0x0abd,
+ 0x0ad0,
+ 0x0b3d,
+ 0x0b71,
+ 0x0b83,
+ 0x0b9c,
+ 0x0cbd,
+ 0x0cde,
+ 0x0dbd,
+ 0x0e84,
+ 0x0e8a,
+ 0x0e8d,
+ 0x0ea5,
+ 0x0ea7,
+ 0x0ebd,
+ 0x0ec6,
+ 0x0f00,
+ 0x10fc,
+ 0x1258,
+ 0x12c0,
+ 0x17d7,
+ 0x17dc,
+ 0x1f59,
+ 0x1f5b,
+ 0x1f5d,
+ 0x1fbe,
+ 0x2071,
+ 0x207f,
+ 0x2102,
+ 0x2107,
+ 0x2115,
+ 0x2124,
+ 0x2126,
+ 0x2128,
+ 0x214e,
+ 0x2d6f,
+ 0xfb1d,
+ 0xfb3e,
+ 0x10808,
+ 0x1083c,
+ 0x1083f,
+ 0x10a00,
+ 0x1d4a2,
+ 0x1d4bb,
+ 0x1d546,
+};
+
+int
+isalpharune(Rune c)
+{
+ Rune *p;
+
+ p = rbsearch(c, __isalphar, nelem(__isalphar)/2, 2);
+ if(p && c >= p[0] && c <= p[1])
+ return 1;
+ p = rbsearch(c, __isalphas, nelem(__isalphas), 1);
+ if(p && c == p[0])
+ return 1;
+ return 0;
+}
+
+static Rune __isupperr[] = {
+ 0x0041, 0x005a,
+ 0x00c0, 0x00d6,
+ 0x00d8, 0x00de,
+ 0x0178, 0x0179,
+ 0x0181, 0x0182,
+ 0x0186, 0x0187,
+ 0x0189, 0x018b,
+ 0x018e, 0x0191,
+ 0x0193, 0x0194,
+ 0x0196, 0x0198,
+ 0x019c, 0x019d,
+ 0x019f, 0x01a0,
+ 0x01a6, 0x01a7,
+ 0x01ae, 0x01af,
+ 0x01b1, 0x01b3,
+ 0x01b7, 0x01b8,
+ 0x01f6, 0x01f8,
+ 0x023a, 0x023b,
+ 0x023d, 0x023e,
+ 0x0243, 0x0246,
+ 0x0388, 0x038a,
+ 0x038e, 0x038f,
+ 0x0391, 0x03a1,
+ 0x03a3, 0x03ab,
+ 0x03d2, 0x03d4,
+ 0x03f9, 0x03fa,
+ 0x03fd, 0x042f,
+ 0x04c0, 0x04c1,
+ 0x0531, 0x0556,
+ 0x10a0, 0x10c5,
+ 0x1f08, 0x1f0f,
+ 0x1f18, 0x1f1d,
+ 0x1f28, 0x1f2f,
+ 0x1f38, 0x1f3f,
+ 0x1f48, 0x1f4d,
+ 0x1f68, 0x1f6f,
+ 0x1f88, 0x1f8f,
+ 0x1f98, 0x1f9f,
+ 0x1fa8, 0x1faf,
+ 0x1fb8, 0x1fbc,
+ 0x1fc8, 0x1fcc,
+ 0x1fd8, 0x1fdb,
+ 0x1fe8, 0x1fec,
+ 0x1ff8, 0x1ffc,
+ 0x210b, 0x210d,
+ 0x2110, 0x2112,
+ 0x2119, 0x211d,
+ 0x212a, 0x212d,
+ 0x2130, 0x2133,
+ 0x213e, 0x213f,
+ 0x2160, 0x216f,
+ 0x24b6, 0x24cf,
+ 0x2c00, 0x2c2e,
+ 0x2c62, 0x2c64,
+ 0xff21, 0xff3a,
+ 0x10400, 0x10427,
+ 0x1d400, 0x1d419,
+ 0x1d434, 0x1d44d,
+ 0x1d468, 0x1d481,
+ 0x1d49e, 0x1d49f,
+ 0x1d4a5, 0x1d4a6,
+ 0x1d4a9, 0x1d4ac,
+ 0x1d4ae, 0x1d4b5,
+ 0x1d4d0, 0x1d4e9,
+ 0x1d504, 0x1d505,
+ 0x1d507, 0x1d50a,
+ 0x1d50d, 0x1d514,
+ 0x1d516, 0x1d51c,
+ 0x1d538, 0x1d539,
+ 0x1d53b, 0x1d53e,
+ 0x1d540, 0x1d544,
+ 0x1d54a, 0x1d550,
+ 0x1d56c, 0x1d585,
+ 0x1d5a0, 0x1d5b9,
+ 0x1d5d4, 0x1d5ed,
+ 0x1d608, 0x1d621,
+ 0x1d63c, 0x1d655,
+ 0x1d670, 0x1d689,
+ 0x1d6a8, 0x1d6c0,
+ 0x1d6e2, 0x1d6fa,
+ 0x1d71c, 0x1d734,
+ 0x1d756, 0x1d76e,
+ 0x1d790, 0x1d7a8,
+};
+
+static Rune __isupperp[] = {
+ 0x0100, 0x0136,
+ 0x0139, 0x0147,
+ 0x014a, 0x0176,
+ 0x017b, 0x017d,
+ 0x01a2, 0x01a4,
+ 0x01cd, 0x01db,
+ 0x01de, 0x01ee,
+ 0x01fa, 0x0232,
+ 0x0248, 0x024e,
+ 0x03d8, 0x03ee,
+ 0x0460, 0x0480,
+ 0x048a, 0x04be,
+ 0x04c3, 0x04cd,
+ 0x04d0, 0x0512,
+ 0x1e00, 0x1e94,
+ 0x1ea0, 0x1ef8,
+ 0x1f59, 0x1f5f,
+ 0x2124, 0x2128,
+ 0x2c67, 0x2c6b,
+ 0x2c80, 0x2ce2,
+};
+
+static Rune __isuppers[] = {
+ 0x0184,
+ 0x01a9,
+ 0x01ac,
+ 0x01b5,
+ 0x01bc,
+ 0x01c4,
+ 0x01c7,
+ 0x01ca,
+ 0x01f1,
+ 0x01f4,
+ 0x0241,
+ 0x0386,
+ 0x038c,
+ 0x03f4,
+ 0x03f7,
+ 0x2102,
+ 0x2107,
+ 0x2115,
+ 0x2145,
+ 0x2183,
+ 0x2c60,
+ 0x2c75,
+ 0x1d49c,
+ 0x1d4a2,
+ 0x1d546,
+ 0x1d7ca,
+};
+
+int
+isupperrune(Rune c)
+{
+ Rune *p;
+
+ p = rbsearch(c, __isupperr, nelem(__isupperr)/2, 2);
+ if(p && c >= p[0] && c <= p[1])
+ return 1;
+ p = rbsearch(c, __isupperp, nelem(__isupperp)/2, 2);
+ if(p && c >= p[0] && c <= p[1] && !((c - p[0]) & 1))
+ return 1;
+ p = rbsearch(c, __isuppers, nelem(__isuppers), 1);
+ if(p && c == p[0])
+ return 1;
+ return 0;
+}
+
+static Rune __islowerr[] = {
+ 0x0061, 0x007a,
+ 0x00df, 0x00f6,
+ 0x00f8, 0x00ff,
+ 0x0137, 0x0138,
+ 0x0148, 0x0149,
+ 0x017e, 0x0180,
+ 0x018c, 0x018d,
+ 0x0199, 0x019b,
+ 0x01aa, 0x01ab,
+ 0x01b9, 0x01ba,
+ 0x01bd, 0x01bf,
+ 0x01dc, 0x01dd,
+ 0x01ef, 0x01f0,
+ 0x0233, 0x0239,
+ 0x023f, 0x0240,
+ 0x024f, 0x0293,
+ 0x0295, 0x02af,
+ 0x037b, 0x037d,
+ 0x03ac, 0x03ce,
+ 0x03d0, 0x03d1,
+ 0x03d5, 0x03d7,
+ 0x03ef, 0x03f3,
+ 0x03fb, 0x03fc,
+ 0x0430, 0x045f,
+ 0x04ce, 0x04cf,
+ 0x0561, 0x0587,
+ 0x1d00, 0x1d2b,
+ 0x1d62, 0x1d77,
+ 0x1d79, 0x1d9a,
+ 0x1e95, 0x1e9b,
+ 0x1f00, 0x1f07,
+ 0x1f10, 0x1f15,
+ 0x1f20, 0x1f27,
+ 0x1f30, 0x1f37,
+ 0x1f40, 0x1f45,
+ 0x1f50, 0x1f57,
+ 0x1f60, 0x1f67,
+ 0x1f70, 0x1f7d,
+ 0x1f80, 0x1f87,
+ 0x1f90, 0x1f97,
+ 0x1fa0, 0x1fa7,
+ 0x1fb0, 0x1fb4,
+ 0x1fb6, 0x1fb7,
+ 0x1fc2, 0x1fc4,
+ 0x1fc6, 0x1fc7,
+ 0x1fd0, 0x1fd3,
+ 0x1fd6, 0x1fd7,
+ 0x1fe0, 0x1fe7,
+ 0x1ff2, 0x1ff4,
+ 0x1ff6, 0x1ff7,
+ 0x210e, 0x210f,
+ 0x213c, 0x213d,
+ 0x2146, 0x2149,
+ 0x2170, 0x217f,
+ 0x24d0, 0x24e9,
+ 0x2c30, 0x2c5e,
+ 0x2c65, 0x2c66,
+ 0x2c76, 0x2c77,
+ 0x2ce3, 0x2ce4,
+ 0x2d00, 0x2d25,
+ 0xfb00, 0xfb06,
+ 0xfb13, 0xfb17,
+ 0xff41, 0xff5a,
+ 0x10428, 0x1044f,
+ 0x1d41a, 0x1d433,
+ 0x1d44e, 0x1d454,
+ 0x1d456, 0x1d467,
+ 0x1d482, 0x1d49b,
+ 0x1d4b6, 0x1d4b9,
+ 0x1d4bd, 0x1d4c3,
+ 0x1d4c5, 0x1d4cf,
+ 0x1d4ea, 0x1d503,
+ 0x1d51e, 0x1d537,
+ 0x1d552, 0x1d56b,
+ 0x1d586, 0x1d59f,
+ 0x1d5ba, 0x1d5d3,
+ 0x1d5ee, 0x1d607,
+ 0x1d622, 0x1d63b,
+ 0x1d656, 0x1d66f,
+ 0x1d68a, 0x1d6a5,
+ 0x1d6c2, 0x1d6da,
+ 0x1d6dc, 0x1d6e1,
+ 0x1d6fc, 0x1d714,
+ 0x1d716, 0x1d71b,
+ 0x1d736, 0x1d74e,
+ 0x1d750, 0x1d755,
+ 0x1d770, 0x1d788,
+ 0x1d78a, 0x1d78f,
+ 0x1d7aa, 0x1d7c2,
+ 0x1d7c4, 0x1d7c9,
+};
+
+static Rune __islowerp[] = {
+ 0x0101, 0x0135,
+ 0x013a, 0x0146,
+ 0x014b, 0x0177,
+ 0x017a, 0x017c,
+ 0x0183, 0x0185,
+ 0x01a1, 0x01a5,
+ 0x01b4, 0x01b6,
+ 0x01cc, 0x01da,
+ 0x01df, 0x01ed,
+ 0x01f3, 0x01f5,
+ 0x01f9, 0x0231,
+ 0x0247, 0x024d,
+ 0x03d9, 0x03ed,
+ 0x0461, 0x0481,
+ 0x048b, 0x04bf,
+ 0x04c2, 0x04cc,
+ 0x04d1, 0x0513,
+ 0x1e01, 0x1e93,
+ 0x1ea1, 0x1ef9,
+ 0x2c68, 0x2c6c,
+ 0x2c81, 0x2ce1,
+};
+
+static Rune __islowers[] = {
+ 0x00aa,
+ 0x00b5,
+ 0x00ba,
+ 0x0188,
+ 0x0192,
+ 0x0195,
+ 0x019e,
+ 0x01a8,
+ 0x01ad,
+ 0x01b0,
+ 0x01c6,
+ 0x01c9,
+ 0x023c,
+ 0x0242,
+ 0x0390,
+ 0x03f5,
+ 0x03f8,
+ 0x1fbe,
+ 0x2071,
+ 0x207f,
+ 0x210a,
+ 0x2113,
+ 0x212f,
+ 0x2134,
+ 0x2139,
+ 0x214e,
+ 0x2184,
+ 0x2c61,
+ 0x2c74,
+ 0x1d4bb,
+ 0x1d7cb,
+};
+
+int
+islowerrune(Rune c)
+{
+ Rune *p;
+
+ p = rbsearch(c, __islowerr, nelem(__islowerr)/2, 2);
+ if(p && c >= p[0] && c <= p[1])
+ return 1;
+ p = rbsearch(c, __islowerp, nelem(__islowerp)/2, 2);
+ if(p && c >= p[0] && c <= p[1] && !((c - p[0]) & 1))
+ return 1;
+ p = rbsearch(c, __islowers, nelem(__islowers), 1);
+ if(p && c == p[0])
+ return 1;
+ return 0;
+}
+
+static Rune __istitler[] = {
+ 0x0041, 0x005a,
+ 0x00c0, 0x00d6,
+ 0x00d8, 0x00de,
+ 0x0178, 0x0179,
+ 0x0181, 0x0182,
+ 0x0186, 0x0187,
+ 0x0189, 0x018b,
+ 0x018e, 0x0191,
+ 0x0193, 0x0194,
+ 0x0196, 0x0198,
+ 0x019c, 0x019d,
+ 0x019f, 0x01a0,
+ 0x01a6, 0x01a7,
+ 0x01ae, 0x01af,
+ 0x01b1, 0x01b3,
+ 0x01b7, 0x01b8,
+ 0x01f6, 0x01f8,
+ 0x023a, 0x023b,
+ 0x023d, 0x023e,
+ 0x0243, 0x0246,
+ 0x0388, 0x038a,
+ 0x038e, 0x038f,
+ 0x0391, 0x03a1,
+ 0x03a3, 0x03ab,
+ 0x03f9, 0x03fa,
+ 0x03fd, 0x042f,
+ 0x04c0, 0x04c1,
+ 0x0531, 0x0556,
+ 0x10a0, 0x10c5,
+ 0x1f08, 0x1f0f,
+ 0x1f18, 0x1f1d,
+ 0x1f28, 0x1f2f,
+ 0x1f38, 0x1f3f,
+ 0x1f48, 0x1f4d,
+ 0x1f68, 0x1f6f,
+ 0x1f88, 0x1f8f,
+ 0x1f98, 0x1f9f,
+ 0x1fa8, 0x1faf,
+ 0x1fb8, 0x1fbc,
+ 0x1fc8, 0x1fcc,
+ 0x1fd8, 0x1fdb,
+ 0x1fe8, 0x1fec,
+ 0x1ff8, 0x1ffc,
+ 0x2160, 0x216f,
+ 0x24b6, 0x24cf,
+ 0x2c00, 0x2c2e,
+ 0x2c62, 0x2c64,
+ 0xff21, 0xff3a,
+ 0x10400, 0x10427,
+};
+
+static Rune __istitlep[] = {
+ 0x0100, 0x012e,
+ 0x0132, 0x0136,
+ 0x0139, 0x0147,
+ 0x014a, 0x0176,
+ 0x017b, 0x017d,
+ 0x01a2, 0x01a4,
+ 0x01cb, 0x01db,
+ 0x01de, 0x01ee,
+ 0x01f2, 0x01f4,
+ 0x01fa, 0x0232,
+ 0x0248, 0x024e,
+ 0x03d8, 0x03ee,
+ 0x0460, 0x0480,
+ 0x048a, 0x04be,
+ 0x04c3, 0x04cd,
+ 0x04d0, 0x0512,
+ 0x1e00, 0x1e94,
+ 0x1ea0, 0x1ef8,
+ 0x1f59, 0x1f5f,
+ 0x2c67, 0x2c6b,
+ 0x2c80, 0x2ce2,
+};
+
+static Rune __istitles[] = {
+ 0x0184,
+ 0x01a9,
+ 0x01ac,
+ 0x01b5,
+ 0x01bc,
+ 0x01c5,
+ 0x01c8,
+ 0x0241,
+ 0x0386,
+ 0x038c,
+ 0x03f7,
+ 0x2132,
+ 0x2183,
+ 0x2c60,
+ 0x2c75,
+};
+
+int
+istitlerune(Rune c)
+{
+ Rune *p;
+
+ p = rbsearch(c, __istitler, nelem(__istitler)/2, 2);
+ if(p && c >= p[0] && c <= p[1])
+ return 1;
+ p = rbsearch(c, __istitlep, nelem(__istitlep)/2, 2);
+ if(p && c >= p[0] && c <= p[1] && !((c - p[0]) & 1))
+ return 1;
+ p = rbsearch(c, __istitles, nelem(__istitles), 1);
+ if(p && c == p[0])
+ return 1;
+ return 0;
+}
+
+static Rune __toupperr[] = {
+ 0x0061, 0x007a, 1048544,
+ 0x00e0, 0x00f6, 1048544,
+ 0x00f8, 0x00fe, 1048544,
+ 0x0256, 0x0257, 1048371,
+ 0x028a, 0x028b, 1048359,
+ 0x037b, 0x037d, 1048706,
+ 0x03ad, 0x03af, 1048539,
+ 0x03b1, 0x03c1, 1048544,
+ 0x03c3, 0x03cb, 1048544,
+ 0x03cd, 0x03ce, 1048513,
+ 0x0430, 0x044f, 1048544,
+ 0x0450, 0x045f, 1048496,
+ 0x0561, 0x0586, 1048528,
+ 0x1f00, 0x1f07, 1048584,
+ 0x1f10, 0x1f15, 1048584,
+ 0x1f20, 0x1f27, 1048584,
+ 0x1f30, 0x1f37, 1048584,
+ 0x1f40, 0x1f45, 1048584,
+ 0x1f60, 0x1f67, 1048584,
+ 0x1f70, 0x1f71, 1048650,
+ 0x1f72, 0x1f75, 1048662,
+ 0x1f76, 0x1f77, 1048676,
+ 0x1f78, 0x1f79, 1048704,
+ 0x1f7a, 0x1f7b, 1048688,
+ 0x1f7c, 0x1f7d, 1048702,
+ 0x1f80, 0x1f87, 1048584,
+ 0x1f90, 0x1f97, 1048584,
+ 0x1fa0, 0x1fa7, 1048584,
+ 0x1fb0, 0x1fb1, 1048584,
+ 0x1fd0, 0x1fd1, 1048584,
+ 0x1fe0, 0x1fe1, 1048584,
+ 0x2170, 0x217f, 1048560,
+ 0x24d0, 0x24e9, 1048550,
+ 0x2c30, 0x2c5e, 1048528,
+ 0x2d00, 0x2d25, 1041312,
+ 0xff41, 0xff5a, 1048544,
+ 0x10428, 0x1044f, 1048536,
+};
+
+static Rune __toupperp[] = {
+ 0x0101, 0x012f, 1048575,
+ 0x0133, 0x0137, 1048575,
+ 0x013a, 0x0148, 1048575,
+ 0x014b, 0x0177, 1048575,
+ 0x017a, 0x017e, 1048575,
+ 0x0183, 0x0185, 1048575,
+ 0x01a1, 0x01a5, 1048575,
+ 0x01b4, 0x01b6, 1048575,
+ 0x01ce, 0x01dc, 1048575,
+ 0x01df, 0x01ef, 1048575,
+ 0x01f9, 0x021f, 1048575,
+ 0x0223, 0x0233, 1048575,
+ 0x0247, 0x024f, 1048575,
+ 0x03d9, 0x03ef, 1048575,
+ 0x0461, 0x0481, 1048575,
+ 0x048b, 0x04bf, 1048575,
+ 0x04c2, 0x04ce, 1048575,
+ 0x04d1, 0x0513, 1048575,
+ 0x1e01, 0x1e95, 1048575,
+ 0x1ea1, 0x1ef9, 1048575,
+ 0x1f51, 0x1f57, 1048584,
+ 0x2c68, 0x2c6c, 1048575,
+ 0x2c81, 0x2ce3, 1048575,
+};
+
+static Rune __touppers[] = {
+ 0x00b5, 1049319,
+ 0x00ff, 1048697,
+ 0x0131, 1048344,
+ 0x017f, 1048276,
+ 0x0180, 1048771,
+ 0x0188, 1048575,
+ 0x018c, 1048575,
+ 0x0192, 1048575,
+ 0x0195, 1048673,
+ 0x0199, 1048575,
+ 0x019a, 1048739,
+ 0x019e, 1048706,
+ 0x01a8, 1048575,
+ 0x01ad, 1048575,
+ 0x01b0, 1048575,
+ 0x01b9, 1048575,
+ 0x01bd, 1048575,
+ 0x01bf, 1048632,
+ 0x01c5, 1048575,
+ 0x01c6, 1048574,
+ 0x01c8, 1048575,
+ 0x01c9, 1048574,
+ 0x01cb, 1048575,
+ 0x01cc, 1048574,
+ 0x01dd, 1048497,
+ 0x01f2, 1048575,
+ 0x01f3, 1048574,
+ 0x01f5, 1048575,
+ 0x023c, 1048575,
+ 0x0242, 1048575,
+ 0x0253, 1048366,
+ 0x0254, 1048370,
+ 0x0259, 1048374,
+ 0x025b, 1048373,
+ 0x0260, 1048371,
+ 0x0263, 1048369,
+ 0x0268, 1048367,
+ 0x0269, 1048365,
+ 0x026b, 1059319,
+ 0x026f, 1048365,
+ 0x0272, 1048363,
+ 0x0275, 1048362,
+ 0x027d, 1059303,
+ 0x0280, 1048358,
+ 0x0283, 1048358,
+ 0x0288, 1048358,
+ 0x0289, 1048507,
+ 0x028c, 1048505,
+ 0x0292, 1048357,
+ 0x0345, 1048660,
+ 0x03ac, 1048538,
+ 0x03c2, 1048545,
+ 0x03cc, 1048512,
+ 0x03d0, 1048514,
+ 0x03d1, 1048519,
+ 0x03d5, 1048529,
+ 0x03d6, 1048522,
+ 0x03f0, 1048490,
+ 0x03f1, 1048496,
+ 0x03f2, 1048583,
+ 0x03f5, 1048480,
+ 0x03f8, 1048575,
+ 0x03fb, 1048575,
+ 0x04cf, 1048561,
+ 0x1d7d, 1052390,
+ 0x1e9b, 1048517,
+ 0x1fb3, 1048585,
+ 0x1fbe, 1041371,
+ 0x1fc3, 1048585,
+ 0x1fe5, 1048583,
+ 0x1ff3, 1048585,
+ 0x214e, 1048548,
+ 0x2184, 1048575,
+ 0x2c61, 1048575,
+ 0x2c65, 1037781,
+ 0x2c66, 1037784,
+ 0x2c76, 1048575,
+};
+
+Rune
+toupperrune(Rune c)
+{
+ Rune *p;
+
+ p = rbsearch(c, __toupperr, nelem(__toupperr)/3, 3);
+ if(p && c >= p[0] && c <= p[1])
+ return c + p[2] - 1048576;
+ p = rbsearch(c, __toupperp, nelem(__toupperp)/3, 3);
+ if(p && c >= p[0] && c <= p[1] && !((c - p[0]) & 1))
+ return c + p[2] - 1048576;
+ p = rbsearch(c, __touppers, nelem(__touppers)/2, 2);
+ if(p && c == p[0])
+ return c + p[1] - 1048576;
+ return c;
+}
+
+static Rune __tolowerr[] = {
+ 0x0041, 0x005a, 1048608,
+ 0x00c0, 0x00d6, 1048608,
+ 0x00d8, 0x00de, 1048608,
+ 0x0189, 0x018a, 1048781,
+ 0x01b1, 0x01b2, 1048793,
+ 0x0388, 0x038a, 1048613,
+ 0x038e, 0x038f, 1048639,
+ 0x0391, 0x03a1, 1048608,
+ 0x03a3, 0x03ab, 1048608,
+ 0x03fd, 0x03ff, 1048446,
+ 0x0400, 0x040f, 1048656,
+ 0x0410, 0x042f, 1048608,
+ 0x0531, 0x0556, 1048624,
+ 0x10a0, 0x10c5, 1055840,
+ 0x1f08, 0x1f0f, 1048568,
+ 0x1f18, 0x1f1d, 1048568,
+ 0x1f28, 0x1f2f, 1048568,
+ 0x1f38, 0x1f3f, 1048568,
+ 0x1f48, 0x1f4d, 1048568,
+ 0x1f68, 0x1f6f, 1048568,
+ 0x1f88, 0x1f8f, 1048568,
+ 0x1f98, 0x1f9f, 1048568,
+ 0x1fa8, 0x1faf, 1048568,
+ 0x1fb8, 0x1fb9, 1048568,
+ 0x1fba, 0x1fbb, 1048502,
+ 0x1fc8, 0x1fcb, 1048490,
+ 0x1fd8, 0x1fd9, 1048568,
+ 0x1fda, 0x1fdb, 1048476,
+ 0x1fe8, 0x1fe9, 1048568,
+ 0x1fea, 0x1feb, 1048464,
+ 0x1ff8, 0x1ff9, 1048448,
+ 0x1ffa, 0x1ffb, 1048450,
+ 0x2160, 0x216f, 1048592,
+ 0x24b6, 0x24cf, 1048602,
+ 0x2c00, 0x2c2e, 1048624,
+ 0xff21, 0xff3a, 1048608,
+ 0x10400, 0x10427, 1048616,
+};
+
+static Rune __tolowerp[] = {
+ 0x0100, 0x012e, 1048577,
+ 0x0132, 0x0136, 1048577,
+ 0x0139, 0x0147, 1048577,
+ 0x014a, 0x0176, 1048577,
+ 0x017b, 0x017d, 1048577,
+ 0x01a2, 0x01a4, 1048577,
+ 0x01b3, 0x01b5, 1048577,
+ 0x01cd, 0x01db, 1048577,
+ 0x01de, 0x01ee, 1048577,
+ 0x01f8, 0x021e, 1048577,
+ 0x0222, 0x0232, 1048577,
+ 0x0248, 0x024e, 1048577,
+ 0x03d8, 0x03ee, 1048577,
+ 0x0460, 0x0480, 1048577,
+ 0x048a, 0x04be, 1048577,
+ 0x04c3, 0x04cd, 1048577,
+ 0x04d0, 0x0512, 1048577,
+ 0x1e00, 0x1e94, 1048577,
+ 0x1ea0, 0x1ef8, 1048577,
+ 0x1f59, 0x1f5f, 1048568,
+ 0x2c67, 0x2c6b, 1048577,
+ 0x2c80, 0x2ce2, 1048577,
+};
+
+static Rune __tolowers[] = {
+ 0x0130, 1048377,
+ 0x0178, 1048455,
+ 0x0179, 1048577,
+ 0x0181, 1048786,
+ 0x0182, 1048577,
+ 0x0184, 1048577,
+ 0x0186, 1048782,
+ 0x0187, 1048577,
+ 0x018b, 1048577,
+ 0x018e, 1048655,
+ 0x018f, 1048778,
+ 0x0190, 1048779,
+ 0x0191, 1048577,
+ 0x0193, 1048781,
+ 0x0194, 1048783,
+ 0x0196, 1048787,
+ 0x0197, 1048785,
+ 0x0198, 1048577,
+ 0x019c, 1048787,
+ 0x019d, 1048789,
+ 0x019f, 1048790,
+ 0x01a0, 1048577,
+ 0x01a6, 1048794,
+ 0x01a7, 1048577,
+ 0x01a9, 1048794,
+ 0x01ac, 1048577,
+ 0x01ae, 1048794,
+ 0x01af, 1048577,
+ 0x01b7, 1048795,
+ 0x01b8, 1048577,
+ 0x01bc, 1048577,
+ 0x01c4, 1048578,
+ 0x01c5, 1048577,
+ 0x01c7, 1048578,
+ 0x01c8, 1048577,
+ 0x01ca, 1048578,
+ 0x01cb, 1048577,
+ 0x01f1, 1048578,
+ 0x01f2, 1048577,
+ 0x01f4, 1048577,
+ 0x01f6, 1048479,
+ 0x01f7, 1048520,
+ 0x0220, 1048446,
+ 0x023a, 1059371,
+ 0x023b, 1048577,
+ 0x023d, 1048413,
+ 0x023e, 1059368,
+ 0x0241, 1048577,
+ 0x0243, 1048381,
+ 0x0244, 1048645,
+ 0x0245, 1048647,
+ 0x0246, 1048577,
+ 0x0386, 1048614,
+ 0x038c, 1048640,
+ 0x03f4, 1048516,
+ 0x03f7, 1048577,
+ 0x03f9, 1048569,
+ 0x03fa, 1048577,
+ 0x04c0, 1048591,
+ 0x04c1, 1048577,
+ 0x1fbc, 1048567,
+ 0x1fcc, 1048567,
+ 0x1fec, 1048569,
+ 0x1ffc, 1048567,
+ 0x2126, 1041059,
+ 0x212a, 1040193,
+ 0x212b, 1040314,
+ 0x2132, 1048604,
+ 0x2183, 1048577,
+ 0x2c60, 1048577,
+ 0x2c62, 1037833,
+ 0x2c63, 1044762,
+ 0x2c64, 1037849,
+ 0x2c75, 1048577,
+};
+
+Rune
+tolowerrune(Rune c)
+{
+ Rune *p;
+
+ p = rbsearch(c, __tolowerr, nelem(__tolowerr)/3, 3);
+ if(p && c >= p[0] && c <= p[1])
+ return c + p[2] - 1048576;
+ p = rbsearch(c, __tolowerp, nelem(__tolowerp)/3, 3);
+ if(p && c >= p[0] && c <= p[1] && !((c - p[0]) & 1))
+ return c + p[2] - 1048576;
+ p = rbsearch(c, __tolowers, nelem(__tolowers)/2, 2);
+ if(p && c == p[0])
+ return c + p[1] - 1048576;
+ return c;
+}
+
+static Rune __totitler[] = {
+ 0x0061, 0x007a, 1048544,
+ 0x00e0, 0x00f6, 1048544,
+ 0x00f8, 0x00fe, 1048544,
+ 0x0256, 0x0257, 1048371,
+ 0x028a, 0x028b, 1048359,
+ 0x037b, 0x037d, 1048706,
+ 0x03ad, 0x03af, 1048539,
+ 0x03b1, 0x03c1, 1048544,
+ 0x03c3, 0x03cb, 1048544,
+ 0x03cd, 0x03ce, 1048513,
+ 0x0430, 0x044f, 1048544,
+ 0x0450, 0x045f, 1048496,
+ 0x0561, 0x0586, 1048528,
+ 0x1f00, 0x1f07, 1048584,
+ 0x1f10, 0x1f15, 1048584,
+ 0x1f20, 0x1f27, 1048584,
+ 0x1f30, 0x1f37, 1048584,
+ 0x1f40, 0x1f45, 1048584,
+ 0x1f60, 0x1f67, 1048584,
+ 0x1f70, 0x1f71, 1048650,
+ 0x1f72, 0x1f75, 1048662,
+ 0x1f76, 0x1f77, 1048676,
+ 0x1f78, 0x1f79, 1048704,
+ 0x1f7a, 0x1f7b, 1048688,
+ 0x1f7c, 0x1f7d, 1048702,
+ 0x1f80, 0x1f87, 1048584,
+ 0x1f90, 0x1f97, 1048584,
+ 0x1fa0, 0x1fa7, 1048584,
+ 0x1fb0, 0x1fb1, 1048584,
+ 0x1fd0, 0x1fd1, 1048584,
+ 0x1fe0, 0x1fe1, 1048584,
+ 0x2170, 0x217f, 1048560,
+ 0x24d0, 0x24e9, 1048550,
+ 0x2c30, 0x2c5e, 1048528,
+ 0x2d00, 0x2d25, 1041312,
+ 0xff41, 0xff5a, 1048544,
+ 0x10428, 0x1044f, 1048536,
+};
+
+static Rune __totitlep[] = {
+ 0x0101, 0x012f, 1048575,
+ 0x0133, 0x0137, 1048575,
+ 0x013a, 0x0148, 1048575,
+ 0x014b, 0x0177, 1048575,
+ 0x017a, 0x017e, 1048575,
+ 0x0183, 0x0185, 1048575,
+ 0x01a1, 0x01a5, 1048575,
+ 0x01b4, 0x01b6, 1048575,
+ 0x01cc, 0x01dc, 1048575,
+ 0x01df, 0x01ef, 1048575,
+ 0x01f3, 0x01f5, 1048575,
+ 0x01f9, 0x021f, 1048575,
+ 0x0223, 0x0233, 1048575,
+ 0x0247, 0x024f, 1048575,
+ 0x03d9, 0x03ef, 1048575,
+ 0x0461, 0x0481, 1048575,
+ 0x048b, 0x04bf, 1048575,
+ 0x04c2, 0x04ce, 1048575,
+ 0x04d1, 0x0513, 1048575,
+ 0x1e01, 0x1e95, 1048575,
+ 0x1ea1, 0x1ef9, 1048575,
+ 0x1f51, 0x1f57, 1048584,
+ 0x2c68, 0x2c6c, 1048575,
+ 0x2c81, 0x2ce3, 1048575,
+};
+
+static Rune __totitles[] = {
+ 0x00b5, 1049319,
+ 0x00ff, 1048697,
+ 0x0131, 1048344,
+ 0x017f, 1048276,
+ 0x0180, 1048771,
+ 0x0188, 1048575,
+ 0x018c, 1048575,
+ 0x0192, 1048575,
+ 0x0195, 1048673,
+ 0x0199, 1048575,
+ 0x019a, 1048739,
+ 0x019e, 1048706,
+ 0x01a8, 1048575,
+ 0x01ad, 1048575,
+ 0x01b0, 1048575,
+ 0x01b9, 1048575,
+ 0x01bd, 1048575,
+ 0x01bf, 1048632,
+ 0x01c4, 1048577,
+ 0x01c6, 1048575,
+ 0x01c7, 1048577,
+ 0x01c9, 1048575,
+ 0x01ca, 1048577,
+ 0x01dd, 1048497,
+ 0x01f1, 1048577,
+ 0x023c, 1048575,
+ 0x0242, 1048575,
+ 0x0253, 1048366,
+ 0x0254, 1048370,
+ 0x0259, 1048374,
+ 0x025b, 1048373,
+ 0x0260, 1048371,
+ 0x0263, 1048369,
+ 0x0268, 1048367,
+ 0x0269, 1048365,
+ 0x026b, 1059319,
+ 0x026f, 1048365,
+ 0x0272, 1048363,
+ 0x0275, 1048362,
+ 0x027d, 1059303,
+ 0x0280, 1048358,
+ 0x0283, 1048358,
+ 0x0288, 1048358,
+ 0x0289, 1048507,
+ 0x028c, 1048505,
+ 0x0292, 1048357,
+ 0x0345, 1048660,
+ 0x03ac, 1048538,
+ 0x03c2, 1048545,
+ 0x03cc, 1048512,
+ 0x03d0, 1048514,
+ 0x03d1, 1048519,
+ 0x03d5, 1048529,
+ 0x03d6, 1048522,
+ 0x03f0, 1048490,
+ 0x03f1, 1048496,
+ 0x03f2, 1048583,
+ 0x03f5, 1048480,
+ 0x03f8, 1048575,
+ 0x03fb, 1048575,
+ 0x04cf, 1048561,
+ 0x1d7d, 1052390,
+ 0x1e9b, 1048517,
+ 0x1fb3, 1048585,
+ 0x1fbe, 1041371,
+ 0x1fc3, 1048585,
+ 0x1fe5, 1048583,
+ 0x1ff3, 1048585,
+ 0x214e, 1048548,
+ 0x2184, 1048575,
+ 0x2c61, 1048575,
+ 0x2c65, 1037781,
+ 0x2c66, 1037784,
+ 0x2c76, 1048575,
+};
+
+Rune
+totitlerune(Rune c)
+{
+ Rune *p;
+
+ p = rbsearch(c, __totitler, nelem(__totitler)/3, 3);
+ if(p && c >= p[0] && c <= p[1])
+ return c + p[2] - 1048576;
+ p = rbsearch(c, __totitlep, nelem(__totitlep)/3, 3);
+ if(p && c >= p[0] && c <= p[1] && !((c - p[0]) & 1))
+ return c + p[2] - 1048576;
+ p = rbsearch(c, __totitles, nelem(__totitles)/2, 2);
+ if(p && c == p[0])
+ return c + p[1] - 1048576;
+ return c;
+}
+
diff --git a/src/lib9/utf/runetypebody-5.2.0.c b/src/lib9/utf/runetypebody-5.2.0.c
new file mode 100644
index 000000000..4ff66b9d9
--- /dev/null
+++ b/src/lib9/utf/runetypebody-5.2.0.c
@@ -0,0 +1,1541 @@
+/* generated automatically by mkrunetype.c from UnicodeData-5.2.0.txt */
+
+static Rune __isspacer[] = {
+ 0x0009, 0x000d,
+ 0x0020, 0x0020,
+ 0x0085, 0x0085,
+ 0x00a0, 0x00a0,
+ 0x1680, 0x1680,
+ 0x180e, 0x180e,
+ 0x2000, 0x200a,
+ 0x2028, 0x2029,
+ 0x202f, 0x202f,
+ 0x205f, 0x205f,
+ 0x3000, 0x3000,
+ 0xfeff, 0xfeff,
+};
+
+int
+isspacerune(Rune c)
+{
+ Rune *p;
+
+ p = rbsearch(c, __isspacer, nelem(__isspacer)/2, 2);
+ if(p && c >= p[0] && c <= p[1])
+ return 1;
+ return 0;
+}
+
+static Rune __isdigitr[] = {
+ 0x0030, 0x0039,
+ 0x0660, 0x0669,
+ 0x06f0, 0x06f9,
+ 0x07c0, 0x07c9,
+ 0x0966, 0x096f,
+ 0x09e6, 0x09ef,
+ 0x0a66, 0x0a6f,
+ 0x0ae6, 0x0aef,
+ 0x0b66, 0x0b6f,
+ 0x0be6, 0x0bef,
+ 0x0c66, 0x0c6f,
+ 0x0ce6, 0x0cef,
+ 0x0d66, 0x0d6f,
+ 0x0e50, 0x0e59,
+ 0x0ed0, 0x0ed9,
+ 0x0f20, 0x0f29,
+ 0x1040, 0x1049,
+ 0x1090, 0x1099,
+ 0x17e0, 0x17e9,
+ 0x1810, 0x1819,
+ 0x1946, 0x194f,
+ 0x19d0, 0x19da,
+ 0x1a80, 0x1a89,
+ 0x1a90, 0x1a99,
+ 0x1b50, 0x1b59,
+ 0x1bb0, 0x1bb9,
+ 0x1c40, 0x1c49,
+ 0x1c50, 0x1c59,
+ 0xa620, 0xa629,
+ 0xa8d0, 0xa8d9,
+ 0xa900, 0xa909,
+ 0xa9d0, 0xa9d9,
+ 0xaa50, 0xaa59,
+ 0xabf0, 0xabf9,
+ 0xff10, 0xff19,
+ 0x104a0, 0x104a9,
+ 0x1d7ce, 0x1d7ff,
+};
+
+int
+isdigitrune(Rune c)
+{
+ Rune *p;
+
+ p = rbsearch(c, __isdigitr, nelem(__isdigitr)/2, 2);
+ if(p && c >= p[0] && c <= p[1])
+ return 1;
+ return 0;
+}
+
+static Rune __isalphar[] = {
+ 0x0041, 0x005a,
+ 0x0061, 0x007a,
+ 0x00c0, 0x00d6,
+ 0x00d8, 0x00f6,
+ 0x00f8, 0x02c1,
+ 0x02c6, 0x02d1,
+ 0x02e0, 0x02e4,
+ 0x0370, 0x0374,
+ 0x0376, 0x0377,
+ 0x037a, 0x037d,
+ 0x0388, 0x038a,
+ 0x038e, 0x03a1,
+ 0x03a3, 0x03f5,
+ 0x03f7, 0x0481,
+ 0x048a, 0x0525,
+ 0x0531, 0x0556,
+ 0x0561, 0x0587,
+ 0x05d0, 0x05ea,
+ 0x05f0, 0x05f2,
+ 0x0621, 0x064a,
+ 0x066e, 0x066f,
+ 0x0671, 0x06d3,
+ 0x06e5, 0x06e6,
+ 0x06ee, 0x06ef,
+ 0x06fa, 0x06fc,
+ 0x0712, 0x072f,
+ 0x074d, 0x07a5,
+ 0x07ca, 0x07ea,
+ 0x07f4, 0x07f5,
+ 0x0800, 0x0815,
+ 0x0904, 0x0939,
+ 0x0958, 0x0961,
+ 0x0971, 0x0972,
+ 0x0979, 0x097f,
+ 0x0985, 0x098c,
+ 0x098f, 0x0990,
+ 0x0993, 0x09a8,
+ 0x09aa, 0x09b0,
+ 0x09b6, 0x09b9,
+ 0x09dc, 0x09dd,
+ 0x09df, 0x09e1,
+ 0x09f0, 0x09f1,
+ 0x0a05, 0x0a0a,
+ 0x0a0f, 0x0a10,
+ 0x0a13, 0x0a28,
+ 0x0a2a, 0x0a30,
+ 0x0a32, 0x0a33,
+ 0x0a35, 0x0a36,
+ 0x0a38, 0x0a39,
+ 0x0a59, 0x0a5c,
+ 0x0a72, 0x0a74,
+ 0x0a85, 0x0a8d,
+ 0x0a8f, 0x0a91,
+ 0x0a93, 0x0aa8,
+ 0x0aaa, 0x0ab0,
+ 0x0ab2, 0x0ab3,
+ 0x0ab5, 0x0ab9,
+ 0x0ae0, 0x0ae1,
+ 0x0b05, 0x0b0c,
+ 0x0b0f, 0x0b10,
+ 0x0b13, 0x0b28,
+ 0x0b2a, 0x0b30,
+ 0x0b32, 0x0b33,
+ 0x0b35, 0x0b39,
+ 0x0b5c, 0x0b5d,
+ 0x0b5f, 0x0b61,
+ 0x0b85, 0x0b8a,
+ 0x0b8e, 0x0b90,
+ 0x0b92, 0x0b95,
+ 0x0b99, 0x0b9a,
+ 0x0b9e, 0x0b9f,
+ 0x0ba3, 0x0ba4,
+ 0x0ba8, 0x0baa,
+ 0x0bae, 0x0bb9,
+ 0x0c05, 0x0c0c,
+ 0x0c0e, 0x0c10,
+ 0x0c12, 0x0c28,
+ 0x0c2a, 0x0c33,
+ 0x0c35, 0x0c39,
+ 0x0c58, 0x0c59,
+ 0x0c60, 0x0c61,
+ 0x0c85, 0x0c8c,
+ 0x0c8e, 0x0c90,
+ 0x0c92, 0x0ca8,
+ 0x0caa, 0x0cb3,
+ 0x0cb5, 0x0cb9,
+ 0x0ce0, 0x0ce1,
+ 0x0d05, 0x0d0c,
+ 0x0d0e, 0x0d10,
+ 0x0d12, 0x0d28,
+ 0x0d2a, 0x0d39,
+ 0x0d60, 0x0d61,
+ 0x0d7a, 0x0d7f,
+ 0x0d85, 0x0d96,
+ 0x0d9a, 0x0db1,
+ 0x0db3, 0x0dbb,
+ 0x0dc0, 0x0dc6,
+ 0x0e01, 0x0e30,
+ 0x0e32, 0x0e33,
+ 0x0e40, 0x0e46,
+ 0x0e81, 0x0e82,
+ 0x0e87, 0x0e88,
+ 0x0e94, 0x0e97,
+ 0x0e99, 0x0e9f,
+ 0x0ea1, 0x0ea3,
+ 0x0eaa, 0x0eab,
+ 0x0ead, 0x0eb0,
+ 0x0eb2, 0x0eb3,
+ 0x0ec0, 0x0ec4,
+ 0x0edc, 0x0edd,
+ 0x0f40, 0x0f47,
+ 0x0f49, 0x0f6c,
+ 0x0f88, 0x0f8b,
+ 0x1000, 0x102a,
+ 0x1050, 0x1055,
+ 0x105a, 0x105d,
+ 0x1065, 0x1066,
+ 0x106e, 0x1070,
+ 0x1075, 0x1081,
+ 0x10a0, 0x10c5,
+ 0x10d0, 0x10fa,
+ 0x1100, 0x1248,
+ 0x124a, 0x124d,
+ 0x1250, 0x1256,
+ 0x125a, 0x125d,
+ 0x1260, 0x1288,
+ 0x128a, 0x128d,
+ 0x1290, 0x12b0,
+ 0x12b2, 0x12b5,
+ 0x12b8, 0x12be,
+ 0x12c2, 0x12c5,
+ 0x12c8, 0x12d6,
+ 0x12d8, 0x1310,
+ 0x1312, 0x1315,
+ 0x1318, 0x135a,
+ 0x1380, 0x138f,
+ 0x13a0, 0x13f4,
+ 0x1401, 0x166c,
+ 0x166f, 0x167f,
+ 0x1681, 0x169a,
+ 0x16a0, 0x16ea,
+ 0x1700, 0x170c,
+ 0x170e, 0x1711,
+ 0x1720, 0x1731,
+ 0x1740, 0x1751,
+ 0x1760, 0x176c,
+ 0x176e, 0x1770,
+ 0x1780, 0x17b3,
+ 0x1820, 0x1877,
+ 0x1880, 0x18a8,
+ 0x18b0, 0x18f5,
+ 0x1900, 0x191c,
+ 0x1950, 0x196d,
+ 0x1970, 0x1974,
+ 0x1980, 0x19ab,
+ 0x19c1, 0x19c7,
+ 0x1a00, 0x1a16,
+ 0x1a20, 0x1a54,
+ 0x1b05, 0x1b33,
+ 0x1b45, 0x1b4b,
+ 0x1b83, 0x1ba0,
+ 0x1bae, 0x1baf,
+ 0x1c00, 0x1c23,
+ 0x1c4d, 0x1c4f,
+ 0x1c5a, 0x1c7d,
+ 0x1ce9, 0x1cec,
+ 0x1cee, 0x1cf1,
+ 0x1d00, 0x1dbf,
+ 0x1e00, 0x1f15,
+ 0x1f18, 0x1f1d,
+ 0x1f20, 0x1f45,
+ 0x1f48, 0x1f4d,
+ 0x1f50, 0x1f57,
+ 0x1f5f, 0x1f7d,
+ 0x1f80, 0x1fb4,
+ 0x1fb6, 0x1fbc,
+ 0x1fc2, 0x1fc4,
+ 0x1fc6, 0x1fcc,
+ 0x1fd0, 0x1fd3,
+ 0x1fd6, 0x1fdb,
+ 0x1fe0, 0x1fec,
+ 0x1ff2, 0x1ff4,
+ 0x1ff6, 0x1ffc,
+ 0x2090, 0x2094,
+ 0x210a, 0x2113,
+ 0x2119, 0x211d,
+ 0x212a, 0x212d,
+ 0x212f, 0x2139,
+ 0x213c, 0x213f,
+ 0x2145, 0x2149,
+ 0x2183, 0x2184,
+ 0x2c00, 0x2c2e,
+ 0x2c30, 0x2c5e,
+ 0x2c60, 0x2ce4,
+ 0x2ceb, 0x2cee,
+ 0x2d00, 0x2d25,
+ 0x2d30, 0x2d65,
+ 0x2d80, 0x2d96,
+ 0x2da0, 0x2da6,
+ 0x2da8, 0x2dae,
+ 0x2db0, 0x2db6,
+ 0x2db8, 0x2dbe,
+ 0x2dc0, 0x2dc6,
+ 0x2dc8, 0x2dce,
+ 0x2dd0, 0x2dd6,
+ 0x2dd8, 0x2dde,
+ 0x3005, 0x3006,
+ 0x3031, 0x3035,
+ 0x303b, 0x303c,
+ 0x3041, 0x3096,
+ 0x309d, 0x309f,
+ 0x30a1, 0x30fa,
+ 0x30fc, 0x30ff,
+ 0x3105, 0x312d,
+ 0x3131, 0x318e,
+ 0x31a0, 0x31b7,
+ 0x31f0, 0x31ff,
+ 0x3400, 0x4db5,
+ 0x4e00, 0x9fcb,
+ 0xa000, 0xa48c,
+ 0xa4d0, 0xa4fd,
+ 0xa500, 0xa60c,
+ 0xa610, 0xa61f,
+ 0xa62a, 0xa62b,
+ 0xa640, 0xa65f,
+ 0xa662, 0xa66e,
+ 0xa67f, 0xa697,
+ 0xa6a0, 0xa6e5,
+ 0xa717, 0xa71f,
+ 0xa722, 0xa788,
+ 0xa78b, 0xa78c,
+ 0xa7fb, 0xa801,
+ 0xa803, 0xa805,
+ 0xa807, 0xa80a,
+ 0xa80c, 0xa822,
+ 0xa840, 0xa873,
+ 0xa882, 0xa8b3,
+ 0xa8f2, 0xa8f7,
+ 0xa90a, 0xa925,
+ 0xa930, 0xa946,
+ 0xa960, 0xa97c,
+ 0xa984, 0xa9b2,
+ 0xaa00, 0xaa28,
+ 0xaa40, 0xaa42,
+ 0xaa44, 0xaa4b,
+ 0xaa60, 0xaa76,
+ 0xaa80, 0xaaaf,
+ 0xaab5, 0xaab6,
+ 0xaab9, 0xaabd,
+ 0xaadb, 0xaadd,
+ 0xabc0, 0xabe2,
+ 0xac00, 0xd7a3,
+ 0xd7b0, 0xd7c6,
+ 0xd7cb, 0xd7fb,
+ 0xf900, 0xfa2d,
+ 0xfa30, 0xfa6d,
+ 0xfa70, 0xfad9,
+ 0xfb00, 0xfb06,
+ 0xfb13, 0xfb17,
+ 0xfb1f, 0xfb28,
+ 0xfb2a, 0xfb36,
+ 0xfb38, 0xfb3c,
+ 0xfb40, 0xfb41,
+ 0xfb43, 0xfb44,
+ 0xfb46, 0xfbb1,
+ 0xfbd3, 0xfd3d,
+ 0xfd50, 0xfd8f,
+ 0xfd92, 0xfdc7,
+ 0xfdf0, 0xfdfb,
+ 0xfe70, 0xfe74,
+ 0xfe76, 0xfefc,
+ 0xff21, 0xff3a,
+ 0xff41, 0xff5a,
+ 0xff66, 0xffbe,
+ 0xffc2, 0xffc7,
+ 0xffca, 0xffcf,
+ 0xffd2, 0xffd7,
+ 0xffda, 0xffdc,
+ 0x10000, 0x1000b,
+ 0x1000d, 0x10026,
+ 0x10028, 0x1003a,
+ 0x1003c, 0x1003d,
+ 0x1003f, 0x1004d,
+ 0x10050, 0x1005d,
+ 0x10080, 0x100fa,
+ 0x10280, 0x1029c,
+ 0x102a0, 0x102d0,
+ 0x10300, 0x1031e,
+ 0x10330, 0x10340,
+ 0x10342, 0x10349,
+ 0x10380, 0x1039d,
+ 0x103a0, 0x103c3,
+ 0x103c8, 0x103cf,
+ 0x10400, 0x1049d,
+ 0x10800, 0x10805,
+ 0x1080a, 0x10835,
+ 0x10837, 0x10838,
+ 0x1083f, 0x10855,
+ 0x10900, 0x10915,
+ 0x10920, 0x10939,
+ 0x10a10, 0x10a13,
+ 0x10a15, 0x10a17,
+ 0x10a19, 0x10a33,
+ 0x10a60, 0x10a7c,
+ 0x10b00, 0x10b35,
+ 0x10b40, 0x10b55,
+ 0x10b60, 0x10b72,
+ 0x10c00, 0x10c48,
+ 0x11083, 0x110af,
+ 0x12000, 0x1236e,
+ 0x13000, 0x1342e,
+ 0x1d400, 0x1d454,
+ 0x1d456, 0x1d49c,
+ 0x1d49e, 0x1d49f,
+ 0x1d4a5, 0x1d4a6,
+ 0x1d4a9, 0x1d4ac,
+ 0x1d4ae, 0x1d4b9,
+ 0x1d4bd, 0x1d4c3,
+ 0x1d4c5, 0x1d505,
+ 0x1d507, 0x1d50a,
+ 0x1d50d, 0x1d514,
+ 0x1d516, 0x1d51c,
+ 0x1d51e, 0x1d539,
+ 0x1d53b, 0x1d53e,
+ 0x1d540, 0x1d544,
+ 0x1d54a, 0x1d550,
+ 0x1d552, 0x1d6a5,
+ 0x1d6a8, 0x1d6c0,
+ 0x1d6c2, 0x1d6da,
+ 0x1d6dc, 0x1d6fa,
+ 0x1d6fc, 0x1d714,
+ 0x1d716, 0x1d734,
+ 0x1d736, 0x1d74e,
+ 0x1d750, 0x1d76e,
+ 0x1d770, 0x1d788,
+ 0x1d78a, 0x1d7a8,
+ 0x1d7aa, 0x1d7c2,
+ 0x1d7c4, 0x1d7cb,
+ 0x20000, 0x2a6d6,
+ 0x2a700, 0x2b734,
+ 0x2f800, 0x2fa1d,
+};
+
+static Rune __isalphas[] = {
+ 0x00aa,
+ 0x00b5,
+ 0x00ba,
+ 0x02ec,
+ 0x02ee,
+ 0x0386,
+ 0x038c,
+ 0x0559,
+ 0x06d5,
+ 0x06ff,
+ 0x0710,
+ 0x07b1,
+ 0x07fa,
+ 0x081a,
+ 0x0824,
+ 0x0828,
+ 0x093d,
+ 0x0950,
+ 0x09b2,
+ 0x09bd,
+ 0x09ce,
+ 0x0a5e,
+ 0x0abd,
+ 0x0ad0,
+ 0x0b3d,
+ 0x0b71,
+ 0x0b83,
+ 0x0b9c,
+ 0x0bd0,
+ 0x0c3d,
+ 0x0cbd,
+ 0x0cde,
+ 0x0d3d,
+ 0x0dbd,
+ 0x0e84,
+ 0x0e8a,
+ 0x0e8d,
+ 0x0ea5,
+ 0x0ea7,
+ 0x0ebd,
+ 0x0ec6,
+ 0x0f00,
+ 0x103f,
+ 0x1061,
+ 0x108e,
+ 0x10fc,
+ 0x1258,
+ 0x12c0,
+ 0x17d7,
+ 0x17dc,
+ 0x18aa,
+ 0x1aa7,
+ 0x1f59,
+ 0x1f5b,
+ 0x1f5d,
+ 0x1fbe,
+ 0x2071,
+ 0x207f,
+ 0x2102,
+ 0x2107,
+ 0x2115,
+ 0x2124,
+ 0x2126,
+ 0x2128,
+ 0x214e,
+ 0x2d6f,
+ 0x2e2f,
+ 0xa8fb,
+ 0xa9cf,
+ 0xaa7a,
+ 0xaab1,
+ 0xaac0,
+ 0xaac2,
+ 0xfb1d,
+ 0xfb3e,
+ 0x10808,
+ 0x1083c,
+ 0x10a00,
+ 0x1d4a2,
+ 0x1d4bb,
+ 0x1d546,
+};
+
+int
+isalpharune(Rune c)
+{
+ Rune *p;
+
+ p = rbsearch(c, __isalphar, nelem(__isalphar)/2, 2);
+ if(p && c >= p[0] && c <= p[1])
+ return 1;
+ p = rbsearch(c, __isalphas, nelem(__isalphas), 1);
+ if(p && c == p[0])
+ return 1;
+ return 0;
+}
+
+static Rune __isupperr[] = {
+ 0x0041, 0x005a,
+ 0x00c0, 0x00d6,
+ 0x00d8, 0x00de,
+ 0x0178, 0x0179,
+ 0x0181, 0x0182,
+ 0x0186, 0x0187,
+ 0x0189, 0x018b,
+ 0x018e, 0x0191,
+ 0x0193, 0x0194,
+ 0x0196, 0x0198,
+ 0x019c, 0x019d,
+ 0x019f, 0x01a0,
+ 0x01a6, 0x01a7,
+ 0x01ae, 0x01af,
+ 0x01b1, 0x01b3,
+ 0x01b7, 0x01b8,
+ 0x01f6, 0x01f8,
+ 0x023a, 0x023b,
+ 0x023d, 0x023e,
+ 0x0243, 0x0246,
+ 0x0388, 0x038a,
+ 0x038e, 0x038f,
+ 0x0391, 0x03a1,
+ 0x03a3, 0x03ab,
+ 0x03d2, 0x03d4,
+ 0x03f9, 0x03fa,
+ 0x03fd, 0x042f,
+ 0x04c0, 0x04c1,
+ 0x0531, 0x0556,
+ 0x10a0, 0x10c5,
+ 0x1f08, 0x1f0f,
+ 0x1f18, 0x1f1d,
+ 0x1f28, 0x1f2f,
+ 0x1f38, 0x1f3f,
+ 0x1f48, 0x1f4d,
+ 0x1f68, 0x1f6f,
+ 0x1f88, 0x1f8f,
+ 0x1f98, 0x1f9f,
+ 0x1fa8, 0x1faf,
+ 0x1fb8, 0x1fbc,
+ 0x1fc8, 0x1fcc,
+ 0x1fd8, 0x1fdb,
+ 0x1fe8, 0x1fec,
+ 0x1ff8, 0x1ffc,
+ 0x210b, 0x210d,
+ 0x2110, 0x2112,
+ 0x2119, 0x211d,
+ 0x212a, 0x212d,
+ 0x2130, 0x2133,
+ 0x213e, 0x213f,
+ 0x2160, 0x216f,
+ 0x24b6, 0x24cf,
+ 0x2c00, 0x2c2e,
+ 0x2c62, 0x2c64,
+ 0x2c6d, 0x2c70,
+ 0x2c7e, 0x2c80,
+ 0xa77d, 0xa77e,
+ 0xff21, 0xff3a,
+ 0x10400, 0x10427,
+ 0x1d400, 0x1d419,
+ 0x1d434, 0x1d44d,
+ 0x1d468, 0x1d481,
+ 0x1d49e, 0x1d49f,
+ 0x1d4a5, 0x1d4a6,
+ 0x1d4a9, 0x1d4ac,
+ 0x1d4ae, 0x1d4b5,
+ 0x1d4d0, 0x1d4e9,
+ 0x1d504, 0x1d505,
+ 0x1d507, 0x1d50a,
+ 0x1d50d, 0x1d514,
+ 0x1d516, 0x1d51c,
+ 0x1d538, 0x1d539,
+ 0x1d53b, 0x1d53e,
+ 0x1d540, 0x1d544,
+ 0x1d54a, 0x1d550,
+ 0x1d56c, 0x1d585,
+ 0x1d5a0, 0x1d5b9,
+ 0x1d5d4, 0x1d5ed,
+ 0x1d608, 0x1d621,
+ 0x1d63c, 0x1d655,
+ 0x1d670, 0x1d689,
+ 0x1d6a8, 0x1d6c0,
+ 0x1d6e2, 0x1d6fa,
+ 0x1d71c, 0x1d734,
+ 0x1d756, 0x1d76e,
+ 0x1d790, 0x1d7a8,
+};
+
+static Rune __isupperp[] = {
+ 0x0100, 0x0136,
+ 0x0139, 0x0147,
+ 0x014a, 0x0176,
+ 0x017b, 0x017d,
+ 0x01a2, 0x01a4,
+ 0x01cd, 0x01db,
+ 0x01de, 0x01ee,
+ 0x01fa, 0x0232,
+ 0x0248, 0x024e,
+ 0x0370, 0x0372,
+ 0x03d8, 0x03ee,
+ 0x0460, 0x0480,
+ 0x048a, 0x04be,
+ 0x04c3, 0x04cd,
+ 0x04d0, 0x0524,
+ 0x1e00, 0x1e94,
+ 0x1e9e, 0x1efe,
+ 0x1f59, 0x1f5f,
+ 0x2124, 0x2128,
+ 0x2c67, 0x2c6b,
+ 0x2c82, 0x2ce2,
+ 0x2ceb, 0x2ced,
+ 0xa640, 0xa65e,
+ 0xa662, 0xa66c,
+ 0xa680, 0xa696,
+ 0xa722, 0xa72e,
+ 0xa732, 0xa76e,
+ 0xa779, 0xa77b,
+ 0xa780, 0xa786,
+};
+
+static Rune __isuppers[] = {
+ 0x0184,
+ 0x01a9,
+ 0x01ac,
+ 0x01b5,
+ 0x01bc,
+ 0x01c4,
+ 0x01c7,
+ 0x01ca,
+ 0x01f1,
+ 0x01f4,
+ 0x0241,
+ 0x0376,
+ 0x0386,
+ 0x038c,
+ 0x03cf,
+ 0x03f4,
+ 0x03f7,
+ 0x2102,
+ 0x2107,
+ 0x2115,
+ 0x2145,
+ 0x2183,
+ 0x2c60,
+ 0x2c72,
+ 0x2c75,
+ 0xa78b,
+ 0x1d49c,
+ 0x1d4a2,
+ 0x1d546,
+ 0x1d7ca,
+};
+
+int
+isupperrune(Rune c)
+{
+ Rune *p;
+
+ p = rbsearch(c, __isupperr, nelem(__isupperr)/2, 2);
+ if(p && c >= p[0] && c <= p[1])
+ return 1;
+ p = rbsearch(c, __isupperp, nelem(__isupperp)/2, 2);
+ if(p && c >= p[0] && c <= p[1] && !((c - p[0]) & 1))
+ return 1;
+ p = rbsearch(c, __isuppers, nelem(__isuppers), 1);
+ if(p && c == p[0])
+ return 1;
+ return 0;
+}
+
+static Rune __islowerr[] = {
+ 0x0061, 0x007a,
+ 0x00df, 0x00f6,
+ 0x00f8, 0x00ff,
+ 0x0137, 0x0138,
+ 0x0148, 0x0149,
+ 0x017e, 0x0180,
+ 0x018c, 0x018d,
+ 0x0199, 0x019b,
+ 0x01aa, 0x01ab,
+ 0x01b9, 0x01ba,
+ 0x01bd, 0x01bf,
+ 0x01dc, 0x01dd,
+ 0x01ef, 0x01f0,
+ 0x0233, 0x0239,
+ 0x023f, 0x0240,
+ 0x024f, 0x0293,
+ 0x0295, 0x02af,
+ 0x037b, 0x037d,
+ 0x03ac, 0x03ce,
+ 0x03d0, 0x03d1,
+ 0x03d5, 0x03d7,
+ 0x03ef, 0x03f3,
+ 0x03fb, 0x03fc,
+ 0x0430, 0x045f,
+ 0x04ce, 0x04cf,
+ 0x0561, 0x0587,
+ 0x1d00, 0x1d2b,
+ 0x1d62, 0x1d77,
+ 0x1d79, 0x1d9a,
+ 0x1e95, 0x1e9d,
+ 0x1eff, 0x1f07,
+ 0x1f10, 0x1f15,
+ 0x1f20, 0x1f27,
+ 0x1f30, 0x1f37,
+ 0x1f40, 0x1f45,
+ 0x1f50, 0x1f57,
+ 0x1f60, 0x1f67,
+ 0x1f70, 0x1f7d,
+ 0x1f80, 0x1f87,
+ 0x1f90, 0x1f97,
+ 0x1fa0, 0x1fa7,
+ 0x1fb0, 0x1fb4,
+ 0x1fb6, 0x1fb7,
+ 0x1fc2, 0x1fc4,
+ 0x1fc6, 0x1fc7,
+ 0x1fd0, 0x1fd3,
+ 0x1fd6, 0x1fd7,
+ 0x1fe0, 0x1fe7,
+ 0x1ff2, 0x1ff4,
+ 0x1ff6, 0x1ff7,
+ 0x210e, 0x210f,
+ 0x213c, 0x213d,
+ 0x2146, 0x2149,
+ 0x2170, 0x217f,
+ 0x24d0, 0x24e9,
+ 0x2c30, 0x2c5e,
+ 0x2c65, 0x2c66,
+ 0x2c73, 0x2c74,
+ 0x2c76, 0x2c7c,
+ 0x2ce3, 0x2ce4,
+ 0x2d00, 0x2d25,
+ 0xa72f, 0xa731,
+ 0xa771, 0xa778,
+ 0xfb00, 0xfb06,
+ 0xfb13, 0xfb17,
+ 0xff41, 0xff5a,
+ 0x10428, 0x1044f,
+ 0x1d41a, 0x1d433,
+ 0x1d44e, 0x1d454,
+ 0x1d456, 0x1d467,
+ 0x1d482, 0x1d49b,
+ 0x1d4b6, 0x1d4b9,
+ 0x1d4bd, 0x1d4c3,
+ 0x1d4c5, 0x1d4cf,
+ 0x1d4ea, 0x1d503,
+ 0x1d51e, 0x1d537,
+ 0x1d552, 0x1d56b,
+ 0x1d586, 0x1d59f,
+ 0x1d5ba, 0x1d5d3,
+ 0x1d5ee, 0x1d607,
+ 0x1d622, 0x1d63b,
+ 0x1d656, 0x1d66f,
+ 0x1d68a, 0x1d6a5,
+ 0x1d6c2, 0x1d6da,
+ 0x1d6dc, 0x1d6e1,
+ 0x1d6fc, 0x1d714,
+ 0x1d716, 0x1d71b,
+ 0x1d736, 0x1d74e,
+ 0x1d750, 0x1d755,
+ 0x1d770, 0x1d788,
+ 0x1d78a, 0x1d78f,
+ 0x1d7aa, 0x1d7c2,
+ 0x1d7c4, 0x1d7c9,
+};
+
+static Rune __islowerp[] = {
+ 0x0101, 0x0135,
+ 0x013a, 0x0146,
+ 0x014b, 0x0177,
+ 0x017a, 0x017c,
+ 0x0183, 0x0185,
+ 0x01a1, 0x01a5,
+ 0x01b4, 0x01b6,
+ 0x01cc, 0x01da,
+ 0x01df, 0x01ed,
+ 0x01f3, 0x01f5,
+ 0x01f9, 0x0231,
+ 0x0247, 0x024d,
+ 0x0371, 0x0373,
+ 0x03d9, 0x03ed,
+ 0x0461, 0x0481,
+ 0x048b, 0x04bf,
+ 0x04c2, 0x04cc,
+ 0x04d1, 0x0525,
+ 0x1e01, 0x1e93,
+ 0x1e9f, 0x1efd,
+ 0x2c68, 0x2c6c,
+ 0x2c81, 0x2ce1,
+ 0x2cec, 0x2cee,
+ 0xa641, 0xa65f,
+ 0xa663, 0xa66d,
+ 0xa681, 0xa697,
+ 0xa723, 0xa72d,
+ 0xa733, 0xa76f,
+ 0xa77a, 0xa77c,
+ 0xa77f, 0xa787,
+};
+
+static Rune __islowers[] = {
+ 0x00aa,
+ 0x00b5,
+ 0x00ba,
+ 0x0188,
+ 0x0192,
+ 0x0195,
+ 0x019e,
+ 0x01a8,
+ 0x01ad,
+ 0x01b0,
+ 0x01c6,
+ 0x01c9,
+ 0x023c,
+ 0x0242,
+ 0x0377,
+ 0x0390,
+ 0x03f5,
+ 0x03f8,
+ 0x1fbe,
+ 0x210a,
+ 0x2113,
+ 0x212f,
+ 0x2134,
+ 0x2139,
+ 0x214e,
+ 0x2184,
+ 0x2c61,
+ 0x2c71,
+ 0xa78c,
+ 0x1d4bb,
+ 0x1d7cb,
+};
+
+int
+islowerrune(Rune c)
+{
+ Rune *p;
+
+ p = rbsearch(c, __islowerr, nelem(__islowerr)/2, 2);
+ if(p && c >= p[0] && c <= p[1])
+ return 1;
+ p = rbsearch(c, __islowerp, nelem(__islowerp)/2, 2);
+ if(p && c >= p[0] && c <= p[1] && !((c - p[0]) & 1))
+ return 1;
+ p = rbsearch(c, __islowers, nelem(__islowers), 1);
+ if(p && c == p[0])
+ return 1;
+ return 0;
+}
+
+static Rune __istitler[] = {
+ 0x0041, 0x005a,
+ 0x00c0, 0x00d6,
+ 0x00d8, 0x00de,
+ 0x0178, 0x0179,
+ 0x0181, 0x0182,
+ 0x0186, 0x0187,
+ 0x0189, 0x018b,
+ 0x018e, 0x0191,
+ 0x0193, 0x0194,
+ 0x0196, 0x0198,
+ 0x019c, 0x019d,
+ 0x019f, 0x01a0,
+ 0x01a6, 0x01a7,
+ 0x01ae, 0x01af,
+ 0x01b1, 0x01b3,
+ 0x01b7, 0x01b8,
+ 0x01f6, 0x01f8,
+ 0x023a, 0x023b,
+ 0x023d, 0x023e,
+ 0x0243, 0x0246,
+ 0x0388, 0x038a,
+ 0x038e, 0x038f,
+ 0x0391, 0x03a1,
+ 0x03a3, 0x03ab,
+ 0x03f9, 0x03fa,
+ 0x03fd, 0x042f,
+ 0x04c0, 0x04c1,
+ 0x0531, 0x0556,
+ 0x10a0, 0x10c5,
+ 0x1f08, 0x1f0f,
+ 0x1f18, 0x1f1d,
+ 0x1f28, 0x1f2f,
+ 0x1f38, 0x1f3f,
+ 0x1f48, 0x1f4d,
+ 0x1f68, 0x1f6f,
+ 0x1f88, 0x1f8f,
+ 0x1f98, 0x1f9f,
+ 0x1fa8, 0x1faf,
+ 0x1fb8, 0x1fbc,
+ 0x1fc8, 0x1fcc,
+ 0x1fd8, 0x1fdb,
+ 0x1fe8, 0x1fec,
+ 0x1ff8, 0x1ffc,
+ 0x2160, 0x216f,
+ 0x24b6, 0x24cf,
+ 0x2c00, 0x2c2e,
+ 0x2c62, 0x2c64,
+ 0x2c6d, 0x2c70,
+ 0x2c7e, 0x2c80,
+ 0xa77d, 0xa77e,
+ 0xff21, 0xff3a,
+ 0x10400, 0x10427,
+};
+
+static Rune __istitlep[] = {
+ 0x0100, 0x012e,
+ 0x0132, 0x0136,
+ 0x0139, 0x0147,
+ 0x014a, 0x0176,
+ 0x017b, 0x017d,
+ 0x01a2, 0x01a4,
+ 0x01cb, 0x01db,
+ 0x01de, 0x01ee,
+ 0x01f2, 0x01f4,
+ 0x01fa, 0x0232,
+ 0x0248, 0x024e,
+ 0x0370, 0x0372,
+ 0x03d8, 0x03ee,
+ 0x0460, 0x0480,
+ 0x048a, 0x04be,
+ 0x04c3, 0x04cd,
+ 0x04d0, 0x0524,
+ 0x1e00, 0x1e94,
+ 0x1ea0, 0x1efe,
+ 0x1f59, 0x1f5f,
+ 0x2c67, 0x2c6b,
+ 0x2c82, 0x2ce2,
+ 0x2ceb, 0x2ced,
+ 0xa640, 0xa65e,
+ 0xa662, 0xa66c,
+ 0xa680, 0xa696,
+ 0xa722, 0xa72e,
+ 0xa732, 0xa76e,
+ 0xa779, 0xa77b,
+ 0xa780, 0xa786,
+};
+
+static Rune __istitles[] = {
+ 0x0184,
+ 0x01a9,
+ 0x01ac,
+ 0x01b5,
+ 0x01bc,
+ 0x01c5,
+ 0x01c8,
+ 0x0241,
+ 0x0376,
+ 0x0386,
+ 0x038c,
+ 0x03cf,
+ 0x03f7,
+ 0x2132,
+ 0x2183,
+ 0x2c60,
+ 0x2c72,
+ 0x2c75,
+ 0xa78b,
+};
+
+int
+istitlerune(Rune c)
+{
+ Rune *p;
+
+ p = rbsearch(c, __istitler, nelem(__istitler)/2, 2);
+ if(p && c >= p[0] && c <= p[1])
+ return 1;
+ p = rbsearch(c, __istitlep, nelem(__istitlep)/2, 2);
+ if(p && c >= p[0] && c <= p[1] && !((c - p[0]) & 1))
+ return 1;
+ p = rbsearch(c, __istitles, nelem(__istitles), 1);
+ if(p && c == p[0])
+ return 1;
+ return 0;
+}
+
+static Rune __toupperr[] = {
+ 0x0061, 0x007a, 1048544,
+ 0x00e0, 0x00f6, 1048544,
+ 0x00f8, 0x00fe, 1048544,
+ 0x023f, 0x0240, 1059391,
+ 0x0256, 0x0257, 1048371,
+ 0x028a, 0x028b, 1048359,
+ 0x037b, 0x037d, 1048706,
+ 0x03ad, 0x03af, 1048539,
+ 0x03b1, 0x03c1, 1048544,
+ 0x03c3, 0x03cb, 1048544,
+ 0x03cd, 0x03ce, 1048513,
+ 0x0430, 0x044f, 1048544,
+ 0x0450, 0x045f, 1048496,
+ 0x0561, 0x0586, 1048528,
+ 0x1f00, 0x1f07, 1048584,
+ 0x1f10, 0x1f15, 1048584,
+ 0x1f20, 0x1f27, 1048584,
+ 0x1f30, 0x1f37, 1048584,
+ 0x1f40, 0x1f45, 1048584,
+ 0x1f60, 0x1f67, 1048584,
+ 0x1f70, 0x1f71, 1048650,
+ 0x1f72, 0x1f75, 1048662,
+ 0x1f76, 0x1f77, 1048676,
+ 0x1f78, 0x1f79, 1048704,
+ 0x1f7a, 0x1f7b, 1048688,
+ 0x1f7c, 0x1f7d, 1048702,
+ 0x1f80, 0x1f87, 1048584,
+ 0x1f90, 0x1f97, 1048584,
+ 0x1fa0, 0x1fa7, 1048584,
+ 0x1fb0, 0x1fb1, 1048584,
+ 0x1fd0, 0x1fd1, 1048584,
+ 0x1fe0, 0x1fe1, 1048584,
+ 0x2170, 0x217f, 1048560,
+ 0x24d0, 0x24e9, 1048550,
+ 0x2c30, 0x2c5e, 1048528,
+ 0x2d00, 0x2d25, 1041312,
+ 0xff41, 0xff5a, 1048544,
+ 0x10428, 0x1044f, 1048536,
+};
+
+static Rune __toupperp[] = {
+ 0x0101, 0x012f, 1048575,
+ 0x0133, 0x0137, 1048575,
+ 0x013a, 0x0148, 1048575,
+ 0x014b, 0x0177, 1048575,
+ 0x017a, 0x017e, 1048575,
+ 0x0183, 0x0185, 1048575,
+ 0x01a1, 0x01a5, 1048575,
+ 0x01b4, 0x01b6, 1048575,
+ 0x01ce, 0x01dc, 1048575,
+ 0x01df, 0x01ef, 1048575,
+ 0x01f9, 0x021f, 1048575,
+ 0x0223, 0x0233, 1048575,
+ 0x0247, 0x024f, 1048575,
+ 0x0371, 0x0373, 1048575,
+ 0x03d9, 0x03ef, 1048575,
+ 0x0461, 0x0481, 1048575,
+ 0x048b, 0x04bf, 1048575,
+ 0x04c2, 0x04ce, 1048575,
+ 0x04d1, 0x0525, 1048575,
+ 0x1e01, 0x1e95, 1048575,
+ 0x1ea1, 0x1eff, 1048575,
+ 0x1f51, 0x1f57, 1048584,
+ 0x2c68, 0x2c6c, 1048575,
+ 0x2c81, 0x2ce3, 1048575,
+ 0x2cec, 0x2cee, 1048575,
+ 0xa641, 0xa65f, 1048575,
+ 0xa663, 0xa66d, 1048575,
+ 0xa681, 0xa697, 1048575,
+ 0xa723, 0xa72f, 1048575,
+ 0xa733, 0xa76f, 1048575,
+ 0xa77a, 0xa77c, 1048575,
+ 0xa77f, 0xa787, 1048575,
+};
+
+static Rune __touppers[] = {
+ 0x00b5, 1049319,
+ 0x00ff, 1048697,
+ 0x0131, 1048344,
+ 0x017f, 1048276,
+ 0x0180, 1048771,
+ 0x0188, 1048575,
+ 0x018c, 1048575,
+ 0x0192, 1048575,
+ 0x0195, 1048673,
+ 0x0199, 1048575,
+ 0x019a, 1048739,
+ 0x019e, 1048706,
+ 0x01a8, 1048575,
+ 0x01ad, 1048575,
+ 0x01b0, 1048575,
+ 0x01b9, 1048575,
+ 0x01bd, 1048575,
+ 0x01bf, 1048632,
+ 0x01c5, 1048575,
+ 0x01c6, 1048574,
+ 0x01c8, 1048575,
+ 0x01c9, 1048574,
+ 0x01cb, 1048575,
+ 0x01cc, 1048574,
+ 0x01dd, 1048497,
+ 0x01f2, 1048575,
+ 0x01f3, 1048574,
+ 0x01f5, 1048575,
+ 0x023c, 1048575,
+ 0x0242, 1048575,
+ 0x0250, 1059359,
+ 0x0251, 1059356,
+ 0x0252, 1059358,
+ 0x0253, 1048366,
+ 0x0254, 1048370,
+ 0x0259, 1048374,
+ 0x025b, 1048373,
+ 0x0260, 1048371,
+ 0x0263, 1048369,
+ 0x0268, 1048367,
+ 0x0269, 1048365,
+ 0x026b, 1059319,
+ 0x026f, 1048365,
+ 0x0271, 1059325,
+ 0x0272, 1048363,
+ 0x0275, 1048362,
+ 0x027d, 1059303,
+ 0x0280, 1048358,
+ 0x0283, 1048358,
+ 0x0288, 1048358,
+ 0x0289, 1048507,
+ 0x028c, 1048505,
+ 0x0292, 1048357,
+ 0x0345, 1048660,
+ 0x0377, 1048575,
+ 0x03ac, 1048538,
+ 0x03c2, 1048545,
+ 0x03cc, 1048512,
+ 0x03d0, 1048514,
+ 0x03d1, 1048519,
+ 0x03d5, 1048529,
+ 0x03d6, 1048522,
+ 0x03d7, 1048568,
+ 0x03f0, 1048490,
+ 0x03f1, 1048496,
+ 0x03f2, 1048583,
+ 0x03f5, 1048480,
+ 0x03f8, 1048575,
+ 0x03fb, 1048575,
+ 0x04cf, 1048561,
+ 0x1d79, 1083908,
+ 0x1d7d, 1052390,
+ 0x1e9b, 1048517,
+ 0x1fb3, 1048585,
+ 0x1fbe, 1041371,
+ 0x1fc3, 1048585,
+ 0x1fe5, 1048583,
+ 0x1ff3, 1048585,
+ 0x214e, 1048548,
+ 0x2184, 1048575,
+ 0x2c61, 1048575,
+ 0x2c65, 1037781,
+ 0x2c66, 1037784,
+ 0x2c73, 1048575,
+ 0x2c76, 1048575,
+ 0xa78c, 1048575,
+};
+
+Rune
+toupperrune(Rune c)
+{
+ Rune *p;
+
+ p = rbsearch(c, __toupperr, nelem(__toupperr)/3, 3);
+ if(p && c >= p[0] && c <= p[1])
+ return c + p[2] - 1048576;
+ p = rbsearch(c, __toupperp, nelem(__toupperp)/3, 3);
+ if(p && c >= p[0] && c <= p[1] && !((c - p[0]) & 1))
+ return c + p[2] - 1048576;
+ p = rbsearch(c, __touppers, nelem(__touppers)/2, 2);
+ if(p && c == p[0])
+ return c + p[1] - 1048576;
+ return c;
+}
+
+static Rune __tolowerr[] = {
+ 0x0041, 0x005a, 1048608,
+ 0x00c0, 0x00d6, 1048608,
+ 0x00d8, 0x00de, 1048608,
+ 0x0189, 0x018a, 1048781,
+ 0x01b1, 0x01b2, 1048793,
+ 0x0388, 0x038a, 1048613,
+ 0x038e, 0x038f, 1048639,
+ 0x0391, 0x03a1, 1048608,
+ 0x03a3, 0x03ab, 1048608,
+ 0x03fd, 0x03ff, 1048446,
+ 0x0400, 0x040f, 1048656,
+ 0x0410, 0x042f, 1048608,
+ 0x0531, 0x0556, 1048624,
+ 0x10a0, 0x10c5, 1055840,
+ 0x1f08, 0x1f0f, 1048568,
+ 0x1f18, 0x1f1d, 1048568,
+ 0x1f28, 0x1f2f, 1048568,
+ 0x1f38, 0x1f3f, 1048568,
+ 0x1f48, 0x1f4d, 1048568,
+ 0x1f68, 0x1f6f, 1048568,
+ 0x1f88, 0x1f8f, 1048568,
+ 0x1f98, 0x1f9f, 1048568,
+ 0x1fa8, 0x1faf, 1048568,
+ 0x1fb8, 0x1fb9, 1048568,
+ 0x1fba, 0x1fbb, 1048502,
+ 0x1fc8, 0x1fcb, 1048490,
+ 0x1fd8, 0x1fd9, 1048568,
+ 0x1fda, 0x1fdb, 1048476,
+ 0x1fe8, 0x1fe9, 1048568,
+ 0x1fea, 0x1feb, 1048464,
+ 0x1ff8, 0x1ff9, 1048448,
+ 0x1ffa, 0x1ffb, 1048450,
+ 0x2160, 0x216f, 1048592,
+ 0x24b6, 0x24cf, 1048602,
+ 0x2c00, 0x2c2e, 1048624,
+ 0x2c7e, 0x2c7f, 1037761,
+ 0xff21, 0xff3a, 1048608,
+ 0x10400, 0x10427, 1048616,
+};
+
+static Rune __tolowerp[] = {
+ 0x0100, 0x012e, 1048577,
+ 0x0132, 0x0136, 1048577,
+ 0x0139, 0x0147, 1048577,
+ 0x014a, 0x0176, 1048577,
+ 0x017b, 0x017d, 1048577,
+ 0x01a2, 0x01a4, 1048577,
+ 0x01b3, 0x01b5, 1048577,
+ 0x01cd, 0x01db, 1048577,
+ 0x01de, 0x01ee, 1048577,
+ 0x01f8, 0x021e, 1048577,
+ 0x0222, 0x0232, 1048577,
+ 0x0248, 0x024e, 1048577,
+ 0x0370, 0x0372, 1048577,
+ 0x03d8, 0x03ee, 1048577,
+ 0x0460, 0x0480, 1048577,
+ 0x048a, 0x04be, 1048577,
+ 0x04c3, 0x04cd, 1048577,
+ 0x04d0, 0x0524, 1048577,
+ 0x1e00, 0x1e94, 1048577,
+ 0x1ea0, 0x1efe, 1048577,
+ 0x1f59, 0x1f5f, 1048568,
+ 0x2c67, 0x2c6b, 1048577,
+ 0x2c80, 0x2ce2, 1048577,
+ 0x2ceb, 0x2ced, 1048577,
+ 0xa640, 0xa65e, 1048577,
+ 0xa662, 0xa66c, 1048577,
+ 0xa680, 0xa696, 1048577,
+ 0xa722, 0xa72e, 1048577,
+ 0xa732, 0xa76e, 1048577,
+ 0xa779, 0xa77b, 1048577,
+ 0xa780, 0xa786, 1048577,
+};
+
+static Rune __tolowers[] = {
+ 0x0130, 1048377,
+ 0x0178, 1048455,
+ 0x0179, 1048577,
+ 0x0181, 1048786,
+ 0x0182, 1048577,
+ 0x0184, 1048577,
+ 0x0186, 1048782,
+ 0x0187, 1048577,
+ 0x018b, 1048577,
+ 0x018e, 1048655,
+ 0x018f, 1048778,
+ 0x0190, 1048779,
+ 0x0191, 1048577,
+ 0x0193, 1048781,
+ 0x0194, 1048783,
+ 0x0196, 1048787,
+ 0x0197, 1048785,
+ 0x0198, 1048577,
+ 0x019c, 1048787,
+ 0x019d, 1048789,
+ 0x019f, 1048790,
+ 0x01a0, 1048577,
+ 0x01a6, 1048794,
+ 0x01a7, 1048577,
+ 0x01a9, 1048794,
+ 0x01ac, 1048577,
+ 0x01ae, 1048794,
+ 0x01af, 1048577,
+ 0x01b7, 1048795,
+ 0x01b8, 1048577,
+ 0x01bc, 1048577,
+ 0x01c4, 1048578,
+ 0x01c5, 1048577,
+ 0x01c7, 1048578,
+ 0x01c8, 1048577,
+ 0x01ca, 1048578,
+ 0x01cb, 1048577,
+ 0x01f1, 1048578,
+ 0x01f2, 1048577,
+ 0x01f4, 1048577,
+ 0x01f6, 1048479,
+ 0x01f7, 1048520,
+ 0x0220, 1048446,
+ 0x023a, 1059371,
+ 0x023b, 1048577,
+ 0x023d, 1048413,
+ 0x023e, 1059368,
+ 0x0241, 1048577,
+ 0x0243, 1048381,
+ 0x0244, 1048645,
+ 0x0245, 1048647,
+ 0x0246, 1048577,
+ 0x0376, 1048577,
+ 0x0386, 1048614,
+ 0x038c, 1048640,
+ 0x03cf, 1048584,
+ 0x03f4, 1048516,
+ 0x03f7, 1048577,
+ 0x03f9, 1048569,
+ 0x03fa, 1048577,
+ 0x04c0, 1048591,
+ 0x04c1, 1048577,
+ 0x1e9e, 1040961,
+ 0x1fbc, 1048567,
+ 0x1fcc, 1048567,
+ 0x1fec, 1048569,
+ 0x1ffc, 1048567,
+ 0x2126, 1041059,
+ 0x212a, 1040193,
+ 0x212b, 1040314,
+ 0x2132, 1048604,
+ 0x2183, 1048577,
+ 0x2c60, 1048577,
+ 0x2c62, 1037833,
+ 0x2c63, 1044762,
+ 0x2c64, 1037849,
+ 0x2c6d, 1037796,
+ 0x2c6e, 1037827,
+ 0x2c6f, 1037793,
+ 0x2c70, 1037794,
+ 0x2c72, 1048577,
+ 0x2c75, 1048577,
+ 0xa77d, 1013244,
+ 0xa77e, 1048577,
+ 0xa78b, 1048577,
+};
+
+Rune
+tolowerrune(Rune c)
+{
+ Rune *p;
+
+ p = rbsearch(c, __tolowerr, nelem(__tolowerr)/3, 3);
+ if(p && c >= p[0] && c <= p[1])
+ return c + p[2] - 1048576;
+ p = rbsearch(c, __tolowerp, nelem(__tolowerp)/3, 3);
+ if(p && c >= p[0] && c <= p[1] && !((c - p[0]) & 1))
+ return c + p[2] - 1048576;
+ p = rbsearch(c, __tolowers, nelem(__tolowers)/2, 2);
+ if(p && c == p[0])
+ return c + p[1] - 1048576;
+ return c;
+}
+
+static Rune __totitler[] = {
+ 0x0061, 0x007a, 1048544,
+ 0x00e0, 0x00f6, 1048544,
+ 0x00f8, 0x00fe, 1048544,
+ 0x023f, 0x0240, 1059391,
+ 0x0256, 0x0257, 1048371,
+ 0x028a, 0x028b, 1048359,
+ 0x037b, 0x037d, 1048706,
+ 0x03ad, 0x03af, 1048539,
+ 0x03b1, 0x03c1, 1048544,
+ 0x03c3, 0x03cb, 1048544,
+ 0x03cd, 0x03ce, 1048513,
+ 0x0430, 0x044f, 1048544,
+ 0x0450, 0x045f, 1048496,
+ 0x0561, 0x0586, 1048528,
+ 0x1f00, 0x1f07, 1048584,
+ 0x1f10, 0x1f15, 1048584,
+ 0x1f20, 0x1f27, 1048584,
+ 0x1f30, 0x1f37, 1048584,
+ 0x1f40, 0x1f45, 1048584,
+ 0x1f60, 0x1f67, 1048584,
+ 0x1f70, 0x1f71, 1048650,
+ 0x1f72, 0x1f75, 1048662,
+ 0x1f76, 0x1f77, 1048676,
+ 0x1f78, 0x1f79, 1048704,
+ 0x1f7a, 0x1f7b, 1048688,
+ 0x1f7c, 0x1f7d, 1048702,
+ 0x1f80, 0x1f87, 1048584,
+ 0x1f90, 0x1f97, 1048584,
+ 0x1fa0, 0x1fa7, 1048584,
+ 0x1fb0, 0x1fb1, 1048584,
+ 0x1fd0, 0x1fd1, 1048584,
+ 0x1fe0, 0x1fe1, 1048584,
+ 0x2170, 0x217f, 1048560,
+ 0x24d0, 0x24e9, 1048550,
+ 0x2c30, 0x2c5e, 1048528,
+ 0x2d00, 0x2d25, 1041312,
+ 0xff41, 0xff5a, 1048544,
+ 0x10428, 0x1044f, 1048536,
+};
+
+static Rune __totitlep[] = {
+ 0x0101, 0x012f, 1048575,
+ 0x0133, 0x0137, 1048575,
+ 0x013a, 0x0148, 1048575,
+ 0x014b, 0x0177, 1048575,
+ 0x017a, 0x017e, 1048575,
+ 0x0183, 0x0185, 1048575,
+ 0x01a1, 0x01a5, 1048575,
+ 0x01b4, 0x01b6, 1048575,
+ 0x01cc, 0x01dc, 1048575,
+ 0x01df, 0x01ef, 1048575,
+ 0x01f3, 0x01f5, 1048575,
+ 0x01f9, 0x021f, 1048575,
+ 0x0223, 0x0233, 1048575,
+ 0x0247, 0x024f, 1048575,
+ 0x0371, 0x0373, 1048575,
+ 0x03d9, 0x03ef, 1048575,
+ 0x0461, 0x0481, 1048575,
+ 0x048b, 0x04bf, 1048575,
+ 0x04c2, 0x04ce, 1048575,
+ 0x04d1, 0x0525, 1048575,
+ 0x1e01, 0x1e95, 1048575,
+ 0x1ea1, 0x1eff, 1048575,
+ 0x1f51, 0x1f57, 1048584,
+ 0x2c68, 0x2c6c, 1048575,
+ 0x2c81, 0x2ce3, 1048575,
+ 0x2cec, 0x2cee, 1048575,
+ 0xa641, 0xa65f, 1048575,
+ 0xa663, 0xa66d, 1048575,
+ 0xa681, 0xa697, 1048575,
+ 0xa723, 0xa72f, 1048575,
+ 0xa733, 0xa76f, 1048575,
+ 0xa77a, 0xa77c, 1048575,
+ 0xa77f, 0xa787, 1048575,
+};
+
+static Rune __totitles[] = {
+ 0x00b5, 1049319,
+ 0x00ff, 1048697,
+ 0x0131, 1048344,
+ 0x017f, 1048276,
+ 0x0180, 1048771,
+ 0x0188, 1048575,
+ 0x018c, 1048575,
+ 0x0192, 1048575,
+ 0x0195, 1048673,
+ 0x0199, 1048575,
+ 0x019a, 1048739,
+ 0x019e, 1048706,
+ 0x01a8, 1048575,
+ 0x01ad, 1048575,
+ 0x01b0, 1048575,
+ 0x01b9, 1048575,
+ 0x01bd, 1048575,
+ 0x01bf, 1048632,
+ 0x01c4, 1048577,
+ 0x01c6, 1048575,
+ 0x01c7, 1048577,
+ 0x01c9, 1048575,
+ 0x01ca, 1048577,
+ 0x01dd, 1048497,
+ 0x01f1, 1048577,
+ 0x023c, 1048575,
+ 0x0242, 1048575,
+ 0x0250, 1059359,
+ 0x0251, 1059356,
+ 0x0252, 1059358,
+ 0x0253, 1048366,
+ 0x0254, 1048370,
+ 0x0259, 1048374,
+ 0x025b, 1048373,
+ 0x0260, 1048371,
+ 0x0263, 1048369,
+ 0x0268, 1048367,
+ 0x0269, 1048365,
+ 0x026b, 1059319,
+ 0x026f, 1048365,
+ 0x0271, 1059325,
+ 0x0272, 1048363,
+ 0x0275, 1048362,
+ 0x027d, 1059303,
+ 0x0280, 1048358,
+ 0x0283, 1048358,
+ 0x0288, 1048358,
+ 0x0289, 1048507,
+ 0x028c, 1048505,
+ 0x0292, 1048357,
+ 0x0345, 1048660,
+ 0x0377, 1048575,
+ 0x03ac, 1048538,
+ 0x03c2, 1048545,
+ 0x03cc, 1048512,
+ 0x03d0, 1048514,
+ 0x03d1, 1048519,
+ 0x03d5, 1048529,
+ 0x03d6, 1048522,
+ 0x03d7, 1048568,
+ 0x03f0, 1048490,
+ 0x03f1, 1048496,
+ 0x03f2, 1048583,
+ 0x03f5, 1048480,
+ 0x03f8, 1048575,
+ 0x03fb, 1048575,
+ 0x04cf, 1048561,
+ 0x1d79, 1083908,
+ 0x1d7d, 1052390,
+ 0x1e9b, 1048517,
+ 0x1fb3, 1048585,
+ 0x1fbe, 1041371,
+ 0x1fc3, 1048585,
+ 0x1fe5, 1048583,
+ 0x1ff3, 1048585,
+ 0x214e, 1048548,
+ 0x2184, 1048575,
+ 0x2c61, 1048575,
+ 0x2c65, 1037781,
+ 0x2c66, 1037784,
+ 0x2c73, 1048575,
+ 0x2c76, 1048575,
+ 0xa78c, 1048575,
+};
+
+Rune
+totitlerune(Rune c)
+{
+ Rune *p;
+
+ p = rbsearch(c, __totitler, nelem(__totitler)/3, 3);
+ if(p && c >= p[0] && c <= p[1])
+ return c + p[2] - 1048576;
+ p = rbsearch(c, __totitlep, nelem(__totitlep)/3, 3);
+ if(p && c >= p[0] && c <= p[1] && !((c - p[0]) & 1))
+ return c + p[2] - 1048576;
+ p = rbsearch(c, __totitles, nelem(__totitles)/2, 2);
+ if(p && c == p[0])
+ return c + p[1] - 1048576;
+ return c;
+}
+
diff --git a/src/lib9/utf/runetypebody-6.0.0.c b/src/lib9/utf/runetypebody-6.0.0.c
new file mode 100644
index 000000000..47c0faf73
--- /dev/null
+++ b/src/lib9/utf/runetypebody-6.0.0.c
@@ -0,0 +1,1565 @@
+/* generated automatically by mkrunetype.c from UnicodeData-6.0.0.txt */
+
+static Rune __isspacer[] = {
+ 0x0009, 0x000d,
+ 0x0020, 0x0020,
+ 0x0085, 0x0085,
+ 0x00a0, 0x00a0,
+ 0x1680, 0x1680,
+ 0x180e, 0x180e,
+ 0x2000, 0x200a,
+ 0x2028, 0x2029,
+ 0x202f, 0x202f,
+ 0x205f, 0x205f,
+ 0x3000, 0x3000,
+ 0xfeff, 0xfeff,
+};
+
+int
+isspacerune(Rune c)
+{
+ Rune *p;
+
+ p = rbsearch(c, __isspacer, nelem(__isspacer)/2, 2);
+ if(p && c >= p[0] && c <= p[1])
+ return 1;
+ return 0;
+}
+
+static Rune __isdigitr[] = {
+ 0x0030, 0x0039,
+ 0x0660, 0x0669,
+ 0x06f0, 0x06f9,
+ 0x07c0, 0x07c9,
+ 0x0966, 0x096f,
+ 0x09e6, 0x09ef,
+ 0x0a66, 0x0a6f,
+ 0x0ae6, 0x0aef,
+ 0x0b66, 0x0b6f,
+ 0x0be6, 0x0bef,
+ 0x0c66, 0x0c6f,
+ 0x0ce6, 0x0cef,
+ 0x0d66, 0x0d6f,
+ 0x0e50, 0x0e59,
+ 0x0ed0, 0x0ed9,
+ 0x0f20, 0x0f29,
+ 0x1040, 0x1049,
+ 0x1090, 0x1099,
+ 0x17e0, 0x17e9,
+ 0x1810, 0x1819,
+ 0x1946, 0x194f,
+ 0x19d0, 0x19d9,
+ 0x1a80, 0x1a89,
+ 0x1a90, 0x1a99,
+ 0x1b50, 0x1b59,
+ 0x1bb0, 0x1bb9,
+ 0x1c40, 0x1c49,
+ 0x1c50, 0x1c59,
+ 0xa620, 0xa629,
+ 0xa8d0, 0xa8d9,
+ 0xa900, 0xa909,
+ 0xa9d0, 0xa9d9,
+ 0xaa50, 0xaa59,
+ 0xabf0, 0xabf9,
+ 0xff10, 0xff19,
+ 0x104a0, 0x104a9,
+ 0x11066, 0x1106f,
+ 0x1d7ce, 0x1d7ff,
+};
+
+int
+isdigitrune(Rune c)
+{
+ Rune *p;
+
+ p = rbsearch(c, __isdigitr, nelem(__isdigitr)/2, 2);
+ if(p && c >= p[0] && c <= p[1])
+ return 1;
+ return 0;
+}
+
+static Rune __isalphar[] = {
+ 0x0041, 0x005a,
+ 0x0061, 0x007a,
+ 0x00c0, 0x00d6,
+ 0x00d8, 0x00f6,
+ 0x00f8, 0x02c1,
+ 0x02c6, 0x02d1,
+ 0x02e0, 0x02e4,
+ 0x0370, 0x0374,
+ 0x0376, 0x0377,
+ 0x037a, 0x037d,
+ 0x0388, 0x038a,
+ 0x038e, 0x03a1,
+ 0x03a3, 0x03f5,
+ 0x03f7, 0x0481,
+ 0x048a, 0x0527,
+ 0x0531, 0x0556,
+ 0x0561, 0x0587,
+ 0x05d0, 0x05ea,
+ 0x05f0, 0x05f2,
+ 0x0620, 0x064a,
+ 0x066e, 0x066f,
+ 0x0671, 0x06d3,
+ 0x06e5, 0x06e6,
+ 0x06ee, 0x06ef,
+ 0x06fa, 0x06fc,
+ 0x0712, 0x072f,
+ 0x074d, 0x07a5,
+ 0x07ca, 0x07ea,
+ 0x07f4, 0x07f5,
+ 0x0800, 0x0815,
+ 0x0840, 0x0858,
+ 0x0904, 0x0939,
+ 0x0958, 0x0961,
+ 0x0971, 0x0977,
+ 0x0979, 0x097f,
+ 0x0985, 0x098c,
+ 0x098f, 0x0990,
+ 0x0993, 0x09a8,
+ 0x09aa, 0x09b0,
+ 0x09b6, 0x09b9,
+ 0x09dc, 0x09dd,
+ 0x09df, 0x09e1,
+ 0x09f0, 0x09f1,
+ 0x0a05, 0x0a0a,
+ 0x0a0f, 0x0a10,
+ 0x0a13, 0x0a28,
+ 0x0a2a, 0x0a30,
+ 0x0a32, 0x0a33,
+ 0x0a35, 0x0a36,
+ 0x0a38, 0x0a39,
+ 0x0a59, 0x0a5c,
+ 0x0a72, 0x0a74,
+ 0x0a85, 0x0a8d,
+ 0x0a8f, 0x0a91,
+ 0x0a93, 0x0aa8,
+ 0x0aaa, 0x0ab0,
+ 0x0ab2, 0x0ab3,
+ 0x0ab5, 0x0ab9,
+ 0x0ae0, 0x0ae1,
+ 0x0b05, 0x0b0c,
+ 0x0b0f, 0x0b10,
+ 0x0b13, 0x0b28,
+ 0x0b2a, 0x0b30,
+ 0x0b32, 0x0b33,
+ 0x0b35, 0x0b39,
+ 0x0b5c, 0x0b5d,
+ 0x0b5f, 0x0b61,
+ 0x0b85, 0x0b8a,
+ 0x0b8e, 0x0b90,
+ 0x0b92, 0x0b95,
+ 0x0b99, 0x0b9a,
+ 0x0b9e, 0x0b9f,
+ 0x0ba3, 0x0ba4,
+ 0x0ba8, 0x0baa,
+ 0x0bae, 0x0bb9,
+ 0x0c05, 0x0c0c,
+ 0x0c0e, 0x0c10,
+ 0x0c12, 0x0c28,
+ 0x0c2a, 0x0c33,
+ 0x0c35, 0x0c39,
+ 0x0c58, 0x0c59,
+ 0x0c60, 0x0c61,
+ 0x0c85, 0x0c8c,
+ 0x0c8e, 0x0c90,
+ 0x0c92, 0x0ca8,
+ 0x0caa, 0x0cb3,
+ 0x0cb5, 0x0cb9,
+ 0x0ce0, 0x0ce1,
+ 0x0cf1, 0x0cf2,
+ 0x0d05, 0x0d0c,
+ 0x0d0e, 0x0d10,
+ 0x0d12, 0x0d3a,
+ 0x0d60, 0x0d61,
+ 0x0d7a, 0x0d7f,
+ 0x0d85, 0x0d96,
+ 0x0d9a, 0x0db1,
+ 0x0db3, 0x0dbb,
+ 0x0dc0, 0x0dc6,
+ 0x0e01, 0x0e30,
+ 0x0e32, 0x0e33,
+ 0x0e40, 0x0e46,
+ 0x0e81, 0x0e82,
+ 0x0e87, 0x0e88,
+ 0x0e94, 0x0e97,
+ 0x0e99, 0x0e9f,
+ 0x0ea1, 0x0ea3,
+ 0x0eaa, 0x0eab,
+ 0x0ead, 0x0eb0,
+ 0x0eb2, 0x0eb3,
+ 0x0ec0, 0x0ec4,
+ 0x0edc, 0x0edd,
+ 0x0f40, 0x0f47,
+ 0x0f49, 0x0f6c,
+ 0x0f88, 0x0f8c,
+ 0x1000, 0x102a,
+ 0x1050, 0x1055,
+ 0x105a, 0x105d,
+ 0x1065, 0x1066,
+ 0x106e, 0x1070,
+ 0x1075, 0x1081,
+ 0x10a0, 0x10c5,
+ 0x10d0, 0x10fa,
+ 0x1100, 0x1248,
+ 0x124a, 0x124d,
+ 0x1250, 0x1256,
+ 0x125a, 0x125d,
+ 0x1260, 0x1288,
+ 0x128a, 0x128d,
+ 0x1290, 0x12b0,
+ 0x12b2, 0x12b5,
+ 0x12b8, 0x12be,
+ 0x12c2, 0x12c5,
+ 0x12c8, 0x12d6,
+ 0x12d8, 0x1310,
+ 0x1312, 0x1315,
+ 0x1318, 0x135a,
+ 0x1380, 0x138f,
+ 0x13a0, 0x13f4,
+ 0x1401, 0x166c,
+ 0x166f, 0x167f,
+ 0x1681, 0x169a,
+ 0x16a0, 0x16ea,
+ 0x1700, 0x170c,
+ 0x170e, 0x1711,
+ 0x1720, 0x1731,
+ 0x1740, 0x1751,
+ 0x1760, 0x176c,
+ 0x176e, 0x1770,
+ 0x1780, 0x17b3,
+ 0x1820, 0x1877,
+ 0x1880, 0x18a8,
+ 0x18b0, 0x18f5,
+ 0x1900, 0x191c,
+ 0x1950, 0x196d,
+ 0x1970, 0x1974,
+ 0x1980, 0x19ab,
+ 0x19c1, 0x19c7,
+ 0x1a00, 0x1a16,
+ 0x1a20, 0x1a54,
+ 0x1b05, 0x1b33,
+ 0x1b45, 0x1b4b,
+ 0x1b83, 0x1ba0,
+ 0x1bae, 0x1baf,
+ 0x1bc0, 0x1be5,
+ 0x1c00, 0x1c23,
+ 0x1c4d, 0x1c4f,
+ 0x1c5a, 0x1c7d,
+ 0x1ce9, 0x1cec,
+ 0x1cee, 0x1cf1,
+ 0x1d00, 0x1dbf,
+ 0x1e00, 0x1f15,
+ 0x1f18, 0x1f1d,
+ 0x1f20, 0x1f45,
+ 0x1f48, 0x1f4d,
+ 0x1f50, 0x1f57,
+ 0x1f5f, 0x1f7d,
+ 0x1f80, 0x1fb4,
+ 0x1fb6, 0x1fbc,
+ 0x1fc2, 0x1fc4,
+ 0x1fc6, 0x1fcc,
+ 0x1fd0, 0x1fd3,
+ 0x1fd6, 0x1fdb,
+ 0x1fe0, 0x1fec,
+ 0x1ff2, 0x1ff4,
+ 0x1ff6, 0x1ffc,
+ 0x2090, 0x209c,
+ 0x210a, 0x2113,
+ 0x2119, 0x211d,
+ 0x212a, 0x212d,
+ 0x212f, 0x2139,
+ 0x213c, 0x213f,
+ 0x2145, 0x2149,
+ 0x2183, 0x2184,
+ 0x2c00, 0x2c2e,
+ 0x2c30, 0x2c5e,
+ 0x2c60, 0x2ce4,
+ 0x2ceb, 0x2cee,
+ 0x2d00, 0x2d25,
+ 0x2d30, 0x2d65,
+ 0x2d80, 0x2d96,
+ 0x2da0, 0x2da6,
+ 0x2da8, 0x2dae,
+ 0x2db0, 0x2db6,
+ 0x2db8, 0x2dbe,
+ 0x2dc0, 0x2dc6,
+ 0x2dc8, 0x2dce,
+ 0x2dd0, 0x2dd6,
+ 0x2dd8, 0x2dde,
+ 0x3005, 0x3006,
+ 0x3031, 0x3035,
+ 0x303b, 0x303c,
+ 0x3041, 0x3096,
+ 0x309d, 0x309f,
+ 0x30a1, 0x30fa,
+ 0x30fc, 0x30ff,
+ 0x3105, 0x312d,
+ 0x3131, 0x318e,
+ 0x31a0, 0x31ba,
+ 0x31f0, 0x31ff,
+ 0x3400, 0x4db5,
+ 0x4e00, 0x9fcb,
+ 0xa000, 0xa48c,
+ 0xa4d0, 0xa4fd,
+ 0xa500, 0xa60c,
+ 0xa610, 0xa61f,
+ 0xa62a, 0xa62b,
+ 0xa640, 0xa66e,
+ 0xa67f, 0xa697,
+ 0xa6a0, 0xa6e5,
+ 0xa717, 0xa71f,
+ 0xa722, 0xa788,
+ 0xa78b, 0xa78e,
+ 0xa790, 0xa791,
+ 0xa7a0, 0xa7a9,
+ 0xa7fa, 0xa801,
+ 0xa803, 0xa805,
+ 0xa807, 0xa80a,
+ 0xa80c, 0xa822,
+ 0xa840, 0xa873,
+ 0xa882, 0xa8b3,
+ 0xa8f2, 0xa8f7,
+ 0xa90a, 0xa925,
+ 0xa930, 0xa946,
+ 0xa960, 0xa97c,
+ 0xa984, 0xa9b2,
+ 0xaa00, 0xaa28,
+ 0xaa40, 0xaa42,
+ 0xaa44, 0xaa4b,
+ 0xaa60, 0xaa76,
+ 0xaa80, 0xaaaf,
+ 0xaab5, 0xaab6,
+ 0xaab9, 0xaabd,
+ 0xaadb, 0xaadd,
+ 0xab01, 0xab06,
+ 0xab09, 0xab0e,
+ 0xab11, 0xab16,
+ 0xab20, 0xab26,
+ 0xab28, 0xab2e,
+ 0xabc0, 0xabe2,
+ 0xac00, 0xd7a3,
+ 0xd7b0, 0xd7c6,
+ 0xd7cb, 0xd7fb,
+ 0xf900, 0xfa2d,
+ 0xfa30, 0xfa6d,
+ 0xfa70, 0xfad9,
+ 0xfb00, 0xfb06,
+ 0xfb13, 0xfb17,
+ 0xfb1f, 0xfb28,
+ 0xfb2a, 0xfb36,
+ 0xfb38, 0xfb3c,
+ 0xfb40, 0xfb41,
+ 0xfb43, 0xfb44,
+ 0xfb46, 0xfbb1,
+ 0xfbd3, 0xfd3d,
+ 0xfd50, 0xfd8f,
+ 0xfd92, 0xfdc7,
+ 0xfdf0, 0xfdfb,
+ 0xfe70, 0xfe74,
+ 0xfe76, 0xfefc,
+ 0xff21, 0xff3a,
+ 0xff41, 0xff5a,
+ 0xff66, 0xffbe,
+ 0xffc2, 0xffc7,
+ 0xffca, 0xffcf,
+ 0xffd2, 0xffd7,
+ 0xffda, 0xffdc,
+ 0x10000, 0x1000b,
+ 0x1000d, 0x10026,
+ 0x10028, 0x1003a,
+ 0x1003c, 0x1003d,
+ 0x1003f, 0x1004d,
+ 0x10050, 0x1005d,
+ 0x10080, 0x100fa,
+ 0x10280, 0x1029c,
+ 0x102a0, 0x102d0,
+ 0x10300, 0x1031e,
+ 0x10330, 0x10340,
+ 0x10342, 0x10349,
+ 0x10380, 0x1039d,
+ 0x103a0, 0x103c3,
+ 0x103c8, 0x103cf,
+ 0x10400, 0x1049d,
+ 0x10800, 0x10805,
+ 0x1080a, 0x10835,
+ 0x10837, 0x10838,
+ 0x1083f, 0x10855,
+ 0x10900, 0x10915,
+ 0x10920, 0x10939,
+ 0x10a10, 0x10a13,
+ 0x10a15, 0x10a17,
+ 0x10a19, 0x10a33,
+ 0x10a60, 0x10a7c,
+ 0x10b00, 0x10b35,
+ 0x10b40, 0x10b55,
+ 0x10b60, 0x10b72,
+ 0x10c00, 0x10c48,
+ 0x11003, 0x11037,
+ 0x11083, 0x110af,
+ 0x12000, 0x1236e,
+ 0x13000, 0x1342e,
+ 0x16800, 0x16a38,
+ 0x1b000, 0x1b001,
+ 0x1d400, 0x1d454,
+ 0x1d456, 0x1d49c,
+ 0x1d49e, 0x1d49f,
+ 0x1d4a5, 0x1d4a6,
+ 0x1d4a9, 0x1d4ac,
+ 0x1d4ae, 0x1d4b9,
+ 0x1d4bd, 0x1d4c3,
+ 0x1d4c5, 0x1d505,
+ 0x1d507, 0x1d50a,
+ 0x1d50d, 0x1d514,
+ 0x1d516, 0x1d51c,
+ 0x1d51e, 0x1d539,
+ 0x1d53b, 0x1d53e,
+ 0x1d540, 0x1d544,
+ 0x1d54a, 0x1d550,
+ 0x1d552, 0x1d6a5,
+ 0x1d6a8, 0x1d6c0,
+ 0x1d6c2, 0x1d6da,
+ 0x1d6dc, 0x1d6fa,
+ 0x1d6fc, 0x1d714,
+ 0x1d716, 0x1d734,
+ 0x1d736, 0x1d74e,
+ 0x1d750, 0x1d76e,
+ 0x1d770, 0x1d788,
+ 0x1d78a, 0x1d7a8,
+ 0x1d7aa, 0x1d7c2,
+ 0x1d7c4, 0x1d7cb,
+ 0x20000, 0x2a6d6,
+ 0x2a700, 0x2b734,
+ 0x2b740, 0x2b81d,
+ 0x2f800, 0x2fa1d,
+};
+
+static Rune __isalphas[] = {
+ 0x00aa,
+ 0x00b5,
+ 0x00ba,
+ 0x02ec,
+ 0x02ee,
+ 0x0386,
+ 0x038c,
+ 0x0559,
+ 0x06d5,
+ 0x06ff,
+ 0x0710,
+ 0x07b1,
+ 0x07fa,
+ 0x081a,
+ 0x0824,
+ 0x0828,
+ 0x093d,
+ 0x0950,
+ 0x09b2,
+ 0x09bd,
+ 0x09ce,
+ 0x0a5e,
+ 0x0abd,
+ 0x0ad0,
+ 0x0b3d,
+ 0x0b71,
+ 0x0b83,
+ 0x0b9c,
+ 0x0bd0,
+ 0x0c3d,
+ 0x0cbd,
+ 0x0cde,
+ 0x0d3d,
+ 0x0d4e,
+ 0x0dbd,
+ 0x0e84,
+ 0x0e8a,
+ 0x0e8d,
+ 0x0ea5,
+ 0x0ea7,
+ 0x0ebd,
+ 0x0ec6,
+ 0x0f00,
+ 0x103f,
+ 0x1061,
+ 0x108e,
+ 0x10fc,
+ 0x1258,
+ 0x12c0,
+ 0x17d7,
+ 0x17dc,
+ 0x18aa,
+ 0x1aa7,
+ 0x1f59,
+ 0x1f5b,
+ 0x1f5d,
+ 0x1fbe,
+ 0x2071,
+ 0x207f,
+ 0x2102,
+ 0x2107,
+ 0x2115,
+ 0x2124,
+ 0x2126,
+ 0x2128,
+ 0x214e,
+ 0x2d6f,
+ 0x2e2f,
+ 0xa8fb,
+ 0xa9cf,
+ 0xaa7a,
+ 0xaab1,
+ 0xaac0,
+ 0xaac2,
+ 0xfb1d,
+ 0xfb3e,
+ 0x10808,
+ 0x1083c,
+ 0x10a00,
+ 0x1d4a2,
+ 0x1d4bb,
+ 0x1d546,
+};
+
+int
+isalpharune(Rune c)
+{
+ Rune *p;
+
+ p = rbsearch(c, __isalphar, nelem(__isalphar)/2, 2);
+ if(p && c >= p[0] && c <= p[1])
+ return 1;
+ p = rbsearch(c, __isalphas, nelem(__isalphas), 1);
+ if(p && c == p[0])
+ return 1;
+ return 0;
+}
+
+static Rune __isupperr[] = {
+ 0x0041, 0x005a,
+ 0x00c0, 0x00d6,
+ 0x00d8, 0x00de,
+ 0x0178, 0x0179,
+ 0x0181, 0x0182,
+ 0x0186, 0x0187,
+ 0x0189, 0x018b,
+ 0x018e, 0x0191,
+ 0x0193, 0x0194,
+ 0x0196, 0x0198,
+ 0x019c, 0x019d,
+ 0x019f, 0x01a0,
+ 0x01a6, 0x01a7,
+ 0x01ae, 0x01af,
+ 0x01b1, 0x01b3,
+ 0x01b7, 0x01b8,
+ 0x01f6, 0x01f8,
+ 0x023a, 0x023b,
+ 0x023d, 0x023e,
+ 0x0243, 0x0246,
+ 0x0388, 0x038a,
+ 0x038e, 0x038f,
+ 0x0391, 0x03a1,
+ 0x03a3, 0x03ab,
+ 0x03d2, 0x03d4,
+ 0x03f9, 0x03fa,
+ 0x03fd, 0x042f,
+ 0x04c0, 0x04c1,
+ 0x0531, 0x0556,
+ 0x10a0, 0x10c5,
+ 0x1f08, 0x1f0f,
+ 0x1f18, 0x1f1d,
+ 0x1f28, 0x1f2f,
+ 0x1f38, 0x1f3f,
+ 0x1f48, 0x1f4d,
+ 0x1f68, 0x1f6f,
+ 0x1f88, 0x1f8f,
+ 0x1f98, 0x1f9f,
+ 0x1fa8, 0x1faf,
+ 0x1fb8, 0x1fbc,
+ 0x1fc8, 0x1fcc,
+ 0x1fd8, 0x1fdb,
+ 0x1fe8, 0x1fec,
+ 0x1ff8, 0x1ffc,
+ 0x210b, 0x210d,
+ 0x2110, 0x2112,
+ 0x2119, 0x211d,
+ 0x212a, 0x212d,
+ 0x2130, 0x2133,
+ 0x213e, 0x213f,
+ 0x2160, 0x216f,
+ 0x24b6, 0x24cf,
+ 0x2c00, 0x2c2e,
+ 0x2c62, 0x2c64,
+ 0x2c6d, 0x2c70,
+ 0x2c7e, 0x2c80,
+ 0xa77d, 0xa77e,
+ 0xff21, 0xff3a,
+ 0x10400, 0x10427,
+ 0x1d400, 0x1d419,
+ 0x1d434, 0x1d44d,
+ 0x1d468, 0x1d481,
+ 0x1d49e, 0x1d49f,
+ 0x1d4a5, 0x1d4a6,
+ 0x1d4a9, 0x1d4ac,
+ 0x1d4ae, 0x1d4b5,
+ 0x1d4d0, 0x1d4e9,
+ 0x1d504, 0x1d505,
+ 0x1d507, 0x1d50a,
+ 0x1d50d, 0x1d514,
+ 0x1d516, 0x1d51c,
+ 0x1d538, 0x1d539,
+ 0x1d53b, 0x1d53e,
+ 0x1d540, 0x1d544,
+ 0x1d54a, 0x1d550,
+ 0x1d56c, 0x1d585,
+ 0x1d5a0, 0x1d5b9,
+ 0x1d5d4, 0x1d5ed,
+ 0x1d608, 0x1d621,
+ 0x1d63c, 0x1d655,
+ 0x1d670, 0x1d689,
+ 0x1d6a8, 0x1d6c0,
+ 0x1d6e2, 0x1d6fa,
+ 0x1d71c, 0x1d734,
+ 0x1d756, 0x1d76e,
+ 0x1d790, 0x1d7a8,
+};
+
+static Rune __isupperp[] = {
+ 0x0100, 0x0136,
+ 0x0139, 0x0147,
+ 0x014a, 0x0176,
+ 0x017b, 0x017d,
+ 0x01a2, 0x01a4,
+ 0x01cd, 0x01db,
+ 0x01de, 0x01ee,
+ 0x01fa, 0x0232,
+ 0x0248, 0x024e,
+ 0x0370, 0x0372,
+ 0x03d8, 0x03ee,
+ 0x0460, 0x0480,
+ 0x048a, 0x04be,
+ 0x04c3, 0x04cd,
+ 0x04d0, 0x0526,
+ 0x1e00, 0x1e94,
+ 0x1e9e, 0x1efe,
+ 0x1f59, 0x1f5f,
+ 0x2124, 0x2128,
+ 0x2c67, 0x2c6b,
+ 0x2c82, 0x2ce2,
+ 0x2ceb, 0x2ced,
+ 0xa640, 0xa66c,
+ 0xa680, 0xa696,
+ 0xa722, 0xa72e,
+ 0xa732, 0xa76e,
+ 0xa779, 0xa77b,
+ 0xa780, 0xa786,
+ 0xa78b, 0xa78d,
+ 0xa7a0, 0xa7a8,
+};
+
+static Rune __isuppers[] = {
+ 0x0184,
+ 0x01a9,
+ 0x01ac,
+ 0x01b5,
+ 0x01bc,
+ 0x01c4,
+ 0x01c7,
+ 0x01ca,
+ 0x01f1,
+ 0x01f4,
+ 0x0241,
+ 0x0376,
+ 0x0386,
+ 0x038c,
+ 0x03cf,
+ 0x03f4,
+ 0x03f7,
+ 0x2102,
+ 0x2107,
+ 0x2115,
+ 0x2145,
+ 0x2183,
+ 0x2c60,
+ 0x2c72,
+ 0x2c75,
+ 0xa790,
+ 0x1d49c,
+ 0x1d4a2,
+ 0x1d546,
+ 0x1d7ca,
+};
+
+int
+isupperrune(Rune c)
+{
+ Rune *p;
+
+ p = rbsearch(c, __isupperr, nelem(__isupperr)/2, 2);
+ if(p && c >= p[0] && c <= p[1])
+ return 1;
+ p = rbsearch(c, __isupperp, nelem(__isupperp)/2, 2);
+ if(p && c >= p[0] && c <= p[1] && !((c - p[0]) & 1))
+ return 1;
+ p = rbsearch(c, __isuppers, nelem(__isuppers), 1);
+ if(p && c == p[0])
+ return 1;
+ return 0;
+}
+
+static Rune __islowerr[] = {
+ 0x0061, 0x007a,
+ 0x00df, 0x00f6,
+ 0x00f8, 0x00ff,
+ 0x0137, 0x0138,
+ 0x0148, 0x0149,
+ 0x017e, 0x0180,
+ 0x018c, 0x018d,
+ 0x0199, 0x019b,
+ 0x01aa, 0x01ab,
+ 0x01b9, 0x01ba,
+ 0x01bd, 0x01bf,
+ 0x01dc, 0x01dd,
+ 0x01ef, 0x01f0,
+ 0x0233, 0x0239,
+ 0x023f, 0x0240,
+ 0x024f, 0x0293,
+ 0x0295, 0x02af,
+ 0x037b, 0x037d,
+ 0x03ac, 0x03ce,
+ 0x03d0, 0x03d1,
+ 0x03d5, 0x03d7,
+ 0x03ef, 0x03f3,
+ 0x03fb, 0x03fc,
+ 0x0430, 0x045f,
+ 0x04ce, 0x04cf,
+ 0x0561, 0x0587,
+ 0x1d00, 0x1d2b,
+ 0x1d62, 0x1d77,
+ 0x1d79, 0x1d9a,
+ 0x1e95, 0x1e9d,
+ 0x1eff, 0x1f07,
+ 0x1f10, 0x1f15,
+ 0x1f20, 0x1f27,
+ 0x1f30, 0x1f37,
+ 0x1f40, 0x1f45,
+ 0x1f50, 0x1f57,
+ 0x1f60, 0x1f67,
+ 0x1f70, 0x1f7d,
+ 0x1f80, 0x1f87,
+ 0x1f90, 0x1f97,
+ 0x1fa0, 0x1fa7,
+ 0x1fb0, 0x1fb4,
+ 0x1fb6, 0x1fb7,
+ 0x1fc2, 0x1fc4,
+ 0x1fc6, 0x1fc7,
+ 0x1fd0, 0x1fd3,
+ 0x1fd6, 0x1fd7,
+ 0x1fe0, 0x1fe7,
+ 0x1ff2, 0x1ff4,
+ 0x1ff6, 0x1ff7,
+ 0x210e, 0x210f,
+ 0x213c, 0x213d,
+ 0x2146, 0x2149,
+ 0x2170, 0x217f,
+ 0x24d0, 0x24e9,
+ 0x2c30, 0x2c5e,
+ 0x2c65, 0x2c66,
+ 0x2c73, 0x2c74,
+ 0x2c76, 0x2c7c,
+ 0x2ce3, 0x2ce4,
+ 0x2d00, 0x2d25,
+ 0xa72f, 0xa731,
+ 0xa771, 0xa778,
+ 0xfb00, 0xfb06,
+ 0xfb13, 0xfb17,
+ 0xff41, 0xff5a,
+ 0x10428, 0x1044f,
+ 0x1d41a, 0x1d433,
+ 0x1d44e, 0x1d454,
+ 0x1d456, 0x1d467,
+ 0x1d482, 0x1d49b,
+ 0x1d4b6, 0x1d4b9,
+ 0x1d4bd, 0x1d4c3,
+ 0x1d4c5, 0x1d4cf,
+ 0x1d4ea, 0x1d503,
+ 0x1d51e, 0x1d537,
+ 0x1d552, 0x1d56b,
+ 0x1d586, 0x1d59f,
+ 0x1d5ba, 0x1d5d3,
+ 0x1d5ee, 0x1d607,
+ 0x1d622, 0x1d63b,
+ 0x1d656, 0x1d66f,
+ 0x1d68a, 0x1d6a5,
+ 0x1d6c2, 0x1d6da,
+ 0x1d6dc, 0x1d6e1,
+ 0x1d6fc, 0x1d714,
+ 0x1d716, 0x1d71b,
+ 0x1d736, 0x1d74e,
+ 0x1d750, 0x1d755,
+ 0x1d770, 0x1d788,
+ 0x1d78a, 0x1d78f,
+ 0x1d7aa, 0x1d7c2,
+ 0x1d7c4, 0x1d7c9,
+};
+
+static Rune __islowerp[] = {
+ 0x0101, 0x0135,
+ 0x013a, 0x0146,
+ 0x014b, 0x0177,
+ 0x017a, 0x017c,
+ 0x0183, 0x0185,
+ 0x01a1, 0x01a5,
+ 0x01b4, 0x01b6,
+ 0x01cc, 0x01da,
+ 0x01df, 0x01ed,
+ 0x01f3, 0x01f5,
+ 0x01f9, 0x0231,
+ 0x0247, 0x024d,
+ 0x0371, 0x0373,
+ 0x03d9, 0x03ed,
+ 0x0461, 0x0481,
+ 0x048b, 0x04bf,
+ 0x04c2, 0x04cc,
+ 0x04d1, 0x0527,
+ 0x1e01, 0x1e93,
+ 0x1e9f, 0x1efd,
+ 0x2c68, 0x2c6c,
+ 0x2c81, 0x2ce1,
+ 0x2cec, 0x2cee,
+ 0xa641, 0xa66d,
+ 0xa681, 0xa697,
+ 0xa723, 0xa72d,
+ 0xa733, 0xa76f,
+ 0xa77a, 0xa77c,
+ 0xa77f, 0xa787,
+ 0xa78c, 0xa78e,
+ 0xa7a1, 0xa7a9,
+};
+
+static Rune __islowers[] = {
+ 0x00aa,
+ 0x00b5,
+ 0x00ba,
+ 0x0188,
+ 0x0192,
+ 0x0195,
+ 0x019e,
+ 0x01a8,
+ 0x01ad,
+ 0x01b0,
+ 0x01c6,
+ 0x01c9,
+ 0x023c,
+ 0x0242,
+ 0x0377,
+ 0x0390,
+ 0x03f5,
+ 0x03f8,
+ 0x1fbe,
+ 0x210a,
+ 0x2113,
+ 0x212f,
+ 0x2134,
+ 0x2139,
+ 0x214e,
+ 0x2184,
+ 0x2c61,
+ 0x2c71,
+ 0xa791,
+ 0xa7fa,
+ 0x1d4bb,
+ 0x1d7cb,
+};
+
+int
+islowerrune(Rune c)
+{
+ Rune *p;
+
+ p = rbsearch(c, __islowerr, nelem(__islowerr)/2, 2);
+ if(p && c >= p[0] && c <= p[1])
+ return 1;
+ p = rbsearch(c, __islowerp, nelem(__islowerp)/2, 2);
+ if(p && c >= p[0] && c <= p[1] && !((c - p[0]) & 1))
+ return 1;
+ p = rbsearch(c, __islowers, nelem(__islowers), 1);
+ if(p && c == p[0])
+ return 1;
+ return 0;
+}
+
+static Rune __istitler[] = {
+ 0x0041, 0x005a,
+ 0x00c0, 0x00d6,
+ 0x00d8, 0x00de,
+ 0x0178, 0x0179,
+ 0x0181, 0x0182,
+ 0x0186, 0x0187,
+ 0x0189, 0x018b,
+ 0x018e, 0x0191,
+ 0x0193, 0x0194,
+ 0x0196, 0x0198,
+ 0x019c, 0x019d,
+ 0x019f, 0x01a0,
+ 0x01a6, 0x01a7,
+ 0x01ae, 0x01af,
+ 0x01b1, 0x01b3,
+ 0x01b7, 0x01b8,
+ 0x01f6, 0x01f8,
+ 0x023a, 0x023b,
+ 0x023d, 0x023e,
+ 0x0243, 0x0246,
+ 0x0388, 0x038a,
+ 0x038e, 0x038f,
+ 0x0391, 0x03a1,
+ 0x03a3, 0x03ab,
+ 0x03f9, 0x03fa,
+ 0x03fd, 0x042f,
+ 0x04c0, 0x04c1,
+ 0x0531, 0x0556,
+ 0x10a0, 0x10c5,
+ 0x1f08, 0x1f0f,
+ 0x1f18, 0x1f1d,
+ 0x1f28, 0x1f2f,
+ 0x1f38, 0x1f3f,
+ 0x1f48, 0x1f4d,
+ 0x1f68, 0x1f6f,
+ 0x1f88, 0x1f8f,
+ 0x1f98, 0x1f9f,
+ 0x1fa8, 0x1faf,
+ 0x1fb8, 0x1fbc,
+ 0x1fc8, 0x1fcc,
+ 0x1fd8, 0x1fdb,
+ 0x1fe8, 0x1fec,
+ 0x1ff8, 0x1ffc,
+ 0x2160, 0x216f,
+ 0x24b6, 0x24cf,
+ 0x2c00, 0x2c2e,
+ 0x2c62, 0x2c64,
+ 0x2c6d, 0x2c70,
+ 0x2c7e, 0x2c80,
+ 0xa77d, 0xa77e,
+ 0xff21, 0xff3a,
+ 0x10400, 0x10427,
+};
+
+static Rune __istitlep[] = {
+ 0x0100, 0x012e,
+ 0x0132, 0x0136,
+ 0x0139, 0x0147,
+ 0x014a, 0x0176,
+ 0x017b, 0x017d,
+ 0x01a2, 0x01a4,
+ 0x01cb, 0x01db,
+ 0x01de, 0x01ee,
+ 0x01f2, 0x01f4,
+ 0x01fa, 0x0232,
+ 0x0248, 0x024e,
+ 0x0370, 0x0372,
+ 0x03d8, 0x03ee,
+ 0x0460, 0x0480,
+ 0x048a, 0x04be,
+ 0x04c3, 0x04cd,
+ 0x04d0, 0x0526,
+ 0x1e00, 0x1e94,
+ 0x1ea0, 0x1efe,
+ 0x1f59, 0x1f5f,
+ 0x2c67, 0x2c6b,
+ 0x2c82, 0x2ce2,
+ 0x2ceb, 0x2ced,
+ 0xa640, 0xa66c,
+ 0xa680, 0xa696,
+ 0xa722, 0xa72e,
+ 0xa732, 0xa76e,
+ 0xa779, 0xa77b,
+ 0xa780, 0xa786,
+ 0xa78b, 0xa78d,
+ 0xa7a0, 0xa7a8,
+};
+
+static Rune __istitles[] = {
+ 0x0184,
+ 0x01a9,
+ 0x01ac,
+ 0x01b5,
+ 0x01bc,
+ 0x01c5,
+ 0x01c8,
+ 0x0241,
+ 0x0376,
+ 0x0386,
+ 0x038c,
+ 0x03cf,
+ 0x03f7,
+ 0x2132,
+ 0x2183,
+ 0x2c60,
+ 0x2c72,
+ 0x2c75,
+ 0xa790,
+};
+
+int
+istitlerune(Rune c)
+{
+ Rune *p;
+
+ p = rbsearch(c, __istitler, nelem(__istitler)/2, 2);
+ if(p && c >= p[0] && c <= p[1])
+ return 1;
+ p = rbsearch(c, __istitlep, nelem(__istitlep)/2, 2);
+ if(p && c >= p[0] && c <= p[1] && !((c - p[0]) & 1))
+ return 1;
+ p = rbsearch(c, __istitles, nelem(__istitles), 1);
+ if(p && c == p[0])
+ return 1;
+ return 0;
+}
+
+static Rune __toupperr[] = {
+ 0x0061, 0x007a, 1048544,
+ 0x00e0, 0x00f6, 1048544,
+ 0x00f8, 0x00fe, 1048544,
+ 0x023f, 0x0240, 1059391,
+ 0x0256, 0x0257, 1048371,
+ 0x028a, 0x028b, 1048359,
+ 0x037b, 0x037d, 1048706,
+ 0x03ad, 0x03af, 1048539,
+ 0x03b1, 0x03c1, 1048544,
+ 0x03c3, 0x03cb, 1048544,
+ 0x03cd, 0x03ce, 1048513,
+ 0x0430, 0x044f, 1048544,
+ 0x0450, 0x045f, 1048496,
+ 0x0561, 0x0586, 1048528,
+ 0x1f00, 0x1f07, 1048584,
+ 0x1f10, 0x1f15, 1048584,
+ 0x1f20, 0x1f27, 1048584,
+ 0x1f30, 0x1f37, 1048584,
+ 0x1f40, 0x1f45, 1048584,
+ 0x1f60, 0x1f67, 1048584,
+ 0x1f70, 0x1f71, 1048650,
+ 0x1f72, 0x1f75, 1048662,
+ 0x1f76, 0x1f77, 1048676,
+ 0x1f78, 0x1f79, 1048704,
+ 0x1f7a, 0x1f7b, 1048688,
+ 0x1f7c, 0x1f7d, 1048702,
+ 0x1f80, 0x1f87, 1048584,
+ 0x1f90, 0x1f97, 1048584,
+ 0x1fa0, 0x1fa7, 1048584,
+ 0x1fb0, 0x1fb1, 1048584,
+ 0x1fd0, 0x1fd1, 1048584,
+ 0x1fe0, 0x1fe1, 1048584,
+ 0x2170, 0x217f, 1048560,
+ 0x24d0, 0x24e9, 1048550,
+ 0x2c30, 0x2c5e, 1048528,
+ 0x2d00, 0x2d25, 1041312,
+ 0xff41, 0xff5a, 1048544,
+ 0x10428, 0x1044f, 1048536,
+};
+
+static Rune __toupperp[] = {
+ 0x0101, 0x012f, 1048575,
+ 0x0133, 0x0137, 1048575,
+ 0x013a, 0x0148, 1048575,
+ 0x014b, 0x0177, 1048575,
+ 0x017a, 0x017e, 1048575,
+ 0x0183, 0x0185, 1048575,
+ 0x01a1, 0x01a5, 1048575,
+ 0x01b4, 0x01b6, 1048575,
+ 0x01ce, 0x01dc, 1048575,
+ 0x01df, 0x01ef, 1048575,
+ 0x01f9, 0x021f, 1048575,
+ 0x0223, 0x0233, 1048575,
+ 0x0247, 0x024f, 1048575,
+ 0x0371, 0x0373, 1048575,
+ 0x03d9, 0x03ef, 1048575,
+ 0x0461, 0x0481, 1048575,
+ 0x048b, 0x04bf, 1048575,
+ 0x04c2, 0x04ce, 1048575,
+ 0x04d1, 0x0527, 1048575,
+ 0x1e01, 0x1e95, 1048575,
+ 0x1ea1, 0x1eff, 1048575,
+ 0x1f51, 0x1f57, 1048584,
+ 0x2c68, 0x2c6c, 1048575,
+ 0x2c81, 0x2ce3, 1048575,
+ 0x2cec, 0x2cee, 1048575,
+ 0xa641, 0xa66d, 1048575,
+ 0xa681, 0xa697, 1048575,
+ 0xa723, 0xa72f, 1048575,
+ 0xa733, 0xa76f, 1048575,
+ 0xa77a, 0xa77c, 1048575,
+ 0xa77f, 0xa787, 1048575,
+ 0xa7a1, 0xa7a9, 1048575,
+};
+
+static Rune __touppers[] = {
+ 0x00b5, 1049319,
+ 0x00ff, 1048697,
+ 0x0131, 1048344,
+ 0x017f, 1048276,
+ 0x0180, 1048771,
+ 0x0188, 1048575,
+ 0x018c, 1048575,
+ 0x0192, 1048575,
+ 0x0195, 1048673,
+ 0x0199, 1048575,
+ 0x019a, 1048739,
+ 0x019e, 1048706,
+ 0x01a8, 1048575,
+ 0x01ad, 1048575,
+ 0x01b0, 1048575,
+ 0x01b9, 1048575,
+ 0x01bd, 1048575,
+ 0x01bf, 1048632,
+ 0x01c5, 1048575,
+ 0x01c6, 1048574,
+ 0x01c8, 1048575,
+ 0x01c9, 1048574,
+ 0x01cb, 1048575,
+ 0x01cc, 1048574,
+ 0x01dd, 1048497,
+ 0x01f2, 1048575,
+ 0x01f3, 1048574,
+ 0x01f5, 1048575,
+ 0x023c, 1048575,
+ 0x0242, 1048575,
+ 0x0250, 1059359,
+ 0x0251, 1059356,
+ 0x0252, 1059358,
+ 0x0253, 1048366,
+ 0x0254, 1048370,
+ 0x0259, 1048374,
+ 0x025b, 1048373,
+ 0x0260, 1048371,
+ 0x0263, 1048369,
+ 0x0265, 1090856,
+ 0x0268, 1048367,
+ 0x0269, 1048365,
+ 0x026b, 1059319,
+ 0x026f, 1048365,
+ 0x0271, 1059325,
+ 0x0272, 1048363,
+ 0x0275, 1048362,
+ 0x027d, 1059303,
+ 0x0280, 1048358,
+ 0x0283, 1048358,
+ 0x0288, 1048358,
+ 0x0289, 1048507,
+ 0x028c, 1048505,
+ 0x0292, 1048357,
+ 0x0345, 1048660,
+ 0x0377, 1048575,
+ 0x03ac, 1048538,
+ 0x03c2, 1048545,
+ 0x03cc, 1048512,
+ 0x03d0, 1048514,
+ 0x03d1, 1048519,
+ 0x03d5, 1048529,
+ 0x03d6, 1048522,
+ 0x03d7, 1048568,
+ 0x03f0, 1048490,
+ 0x03f1, 1048496,
+ 0x03f2, 1048583,
+ 0x03f5, 1048480,
+ 0x03f8, 1048575,
+ 0x03fb, 1048575,
+ 0x04cf, 1048561,
+ 0x1d79, 1083908,
+ 0x1d7d, 1052390,
+ 0x1e9b, 1048517,
+ 0x1fb3, 1048585,
+ 0x1fbe, 1041371,
+ 0x1fc3, 1048585,
+ 0x1fe5, 1048583,
+ 0x1ff3, 1048585,
+ 0x214e, 1048548,
+ 0x2184, 1048575,
+ 0x2c61, 1048575,
+ 0x2c65, 1037781,
+ 0x2c66, 1037784,
+ 0x2c73, 1048575,
+ 0x2c76, 1048575,
+ 0xa78c, 1048575,
+ 0xa791, 1048575,
+};
+
+Rune
+toupperrune(Rune c)
+{
+ Rune *p;
+
+ p = rbsearch(c, __toupperr, nelem(__toupperr)/3, 3);
+ if(p && c >= p[0] && c <= p[1])
+ return c + p[2] - 1048576;
+ p = rbsearch(c, __toupperp, nelem(__toupperp)/3, 3);
+ if(p && c >= p[0] && c <= p[1] && !((c - p[0]) & 1))
+ return c + p[2] - 1048576;
+ p = rbsearch(c, __touppers, nelem(__touppers)/2, 2);
+ if(p && c == p[0])
+ return c + p[1] - 1048576;
+ return c;
+}
+
+static Rune __tolowerr[] = {
+ 0x0041, 0x005a, 1048608,
+ 0x00c0, 0x00d6, 1048608,
+ 0x00d8, 0x00de, 1048608,
+ 0x0189, 0x018a, 1048781,
+ 0x01b1, 0x01b2, 1048793,
+ 0x0388, 0x038a, 1048613,
+ 0x038e, 0x038f, 1048639,
+ 0x0391, 0x03a1, 1048608,
+ 0x03a3, 0x03ab, 1048608,
+ 0x03fd, 0x03ff, 1048446,
+ 0x0400, 0x040f, 1048656,
+ 0x0410, 0x042f, 1048608,
+ 0x0531, 0x0556, 1048624,
+ 0x10a0, 0x10c5, 1055840,
+ 0x1f08, 0x1f0f, 1048568,
+ 0x1f18, 0x1f1d, 1048568,
+ 0x1f28, 0x1f2f, 1048568,
+ 0x1f38, 0x1f3f, 1048568,
+ 0x1f48, 0x1f4d, 1048568,
+ 0x1f68, 0x1f6f, 1048568,
+ 0x1f88, 0x1f8f, 1048568,
+ 0x1f98, 0x1f9f, 1048568,
+ 0x1fa8, 0x1faf, 1048568,
+ 0x1fb8, 0x1fb9, 1048568,
+ 0x1fba, 0x1fbb, 1048502,
+ 0x1fc8, 0x1fcb, 1048490,
+ 0x1fd8, 0x1fd9, 1048568,
+ 0x1fda, 0x1fdb, 1048476,
+ 0x1fe8, 0x1fe9, 1048568,
+ 0x1fea, 0x1feb, 1048464,
+ 0x1ff8, 0x1ff9, 1048448,
+ 0x1ffa, 0x1ffb, 1048450,
+ 0x2160, 0x216f, 1048592,
+ 0x24b6, 0x24cf, 1048602,
+ 0x2c00, 0x2c2e, 1048624,
+ 0x2c7e, 0x2c7f, 1037761,
+ 0xff21, 0xff3a, 1048608,
+ 0x10400, 0x10427, 1048616,
+};
+
+static Rune __tolowerp[] = {
+ 0x0100, 0x012e, 1048577,
+ 0x0132, 0x0136, 1048577,
+ 0x0139, 0x0147, 1048577,
+ 0x014a, 0x0176, 1048577,
+ 0x017b, 0x017d, 1048577,
+ 0x01a2, 0x01a4, 1048577,
+ 0x01b3, 0x01b5, 1048577,
+ 0x01cd, 0x01db, 1048577,
+ 0x01de, 0x01ee, 1048577,
+ 0x01f8, 0x021e, 1048577,
+ 0x0222, 0x0232, 1048577,
+ 0x0248, 0x024e, 1048577,
+ 0x0370, 0x0372, 1048577,
+ 0x03d8, 0x03ee, 1048577,
+ 0x0460, 0x0480, 1048577,
+ 0x048a, 0x04be, 1048577,
+ 0x04c3, 0x04cd, 1048577,
+ 0x04d0, 0x0526, 1048577,
+ 0x1e00, 0x1e94, 1048577,
+ 0x1ea0, 0x1efe, 1048577,
+ 0x1f59, 0x1f5f, 1048568,
+ 0x2c67, 0x2c6b, 1048577,
+ 0x2c80, 0x2ce2, 1048577,
+ 0x2ceb, 0x2ced, 1048577,
+ 0xa640, 0xa66c, 1048577,
+ 0xa680, 0xa696, 1048577,
+ 0xa722, 0xa72e, 1048577,
+ 0xa732, 0xa76e, 1048577,
+ 0xa779, 0xa77b, 1048577,
+ 0xa780, 0xa786, 1048577,
+ 0xa7a0, 0xa7a8, 1048577,
+};
+
+static Rune __tolowers[] = {
+ 0x0130, 1048377,
+ 0x0178, 1048455,
+ 0x0179, 1048577,
+ 0x0181, 1048786,
+ 0x0182, 1048577,
+ 0x0184, 1048577,
+ 0x0186, 1048782,
+ 0x0187, 1048577,
+ 0x018b, 1048577,
+ 0x018e, 1048655,
+ 0x018f, 1048778,
+ 0x0190, 1048779,
+ 0x0191, 1048577,
+ 0x0193, 1048781,
+ 0x0194, 1048783,
+ 0x0196, 1048787,
+ 0x0197, 1048785,
+ 0x0198, 1048577,
+ 0x019c, 1048787,
+ 0x019d, 1048789,
+ 0x019f, 1048790,
+ 0x01a0, 1048577,
+ 0x01a6, 1048794,
+ 0x01a7, 1048577,
+ 0x01a9, 1048794,
+ 0x01ac, 1048577,
+ 0x01ae, 1048794,
+ 0x01af, 1048577,
+ 0x01b7, 1048795,
+ 0x01b8, 1048577,
+ 0x01bc, 1048577,
+ 0x01c4, 1048578,
+ 0x01c5, 1048577,
+ 0x01c7, 1048578,
+ 0x01c8, 1048577,
+ 0x01ca, 1048578,
+ 0x01cb, 1048577,
+ 0x01f1, 1048578,
+ 0x01f2, 1048577,
+ 0x01f4, 1048577,
+ 0x01f6, 1048479,
+ 0x01f7, 1048520,
+ 0x0220, 1048446,
+ 0x023a, 1059371,
+ 0x023b, 1048577,
+ 0x023d, 1048413,
+ 0x023e, 1059368,
+ 0x0241, 1048577,
+ 0x0243, 1048381,
+ 0x0244, 1048645,
+ 0x0245, 1048647,
+ 0x0246, 1048577,
+ 0x0376, 1048577,
+ 0x0386, 1048614,
+ 0x038c, 1048640,
+ 0x03cf, 1048584,
+ 0x03f4, 1048516,
+ 0x03f7, 1048577,
+ 0x03f9, 1048569,
+ 0x03fa, 1048577,
+ 0x04c0, 1048591,
+ 0x04c1, 1048577,
+ 0x1e9e, 1040961,
+ 0x1fbc, 1048567,
+ 0x1fcc, 1048567,
+ 0x1fec, 1048569,
+ 0x1ffc, 1048567,
+ 0x2126, 1041059,
+ 0x212a, 1040193,
+ 0x212b, 1040314,
+ 0x2132, 1048604,
+ 0x2183, 1048577,
+ 0x2c60, 1048577,
+ 0x2c62, 1037833,
+ 0x2c63, 1044762,
+ 0x2c64, 1037849,
+ 0x2c6d, 1037796,
+ 0x2c6e, 1037827,
+ 0x2c6f, 1037793,
+ 0x2c70, 1037794,
+ 0x2c72, 1048577,
+ 0x2c75, 1048577,
+ 0xa77d, 1013244,
+ 0xa77e, 1048577,
+ 0xa78b, 1048577,
+ 0xa78d, 1006296,
+ 0xa790, 1048577,
+};
+
+Rune
+tolowerrune(Rune c)
+{
+ Rune *p;
+
+ p = rbsearch(c, __tolowerr, nelem(__tolowerr)/3, 3);
+ if(p && c >= p[0] && c <= p[1])
+ return c + p[2] - 1048576;
+ p = rbsearch(c, __tolowerp, nelem(__tolowerp)/3, 3);
+ if(p && c >= p[0] && c <= p[1] && !((c - p[0]) & 1))
+ return c + p[2] - 1048576;
+ p = rbsearch(c, __tolowers, nelem(__tolowers)/2, 2);
+ if(p && c == p[0])
+ return c + p[1] - 1048576;
+ return c;
+}
+
+static Rune __totitler[] = {
+ 0x0061, 0x007a, 1048544,
+ 0x00e0, 0x00f6, 1048544,
+ 0x00f8, 0x00fe, 1048544,
+ 0x023f, 0x0240, 1059391,
+ 0x0256, 0x0257, 1048371,
+ 0x028a, 0x028b, 1048359,
+ 0x037b, 0x037d, 1048706,
+ 0x03ad, 0x03af, 1048539,
+ 0x03b1, 0x03c1, 1048544,
+ 0x03c3, 0x03cb, 1048544,
+ 0x03cd, 0x03ce, 1048513,
+ 0x0430, 0x044f, 1048544,
+ 0x0450, 0x045f, 1048496,
+ 0x0561, 0x0586, 1048528,
+ 0x1f00, 0x1f07, 1048584,
+ 0x1f10, 0x1f15, 1048584,
+ 0x1f20, 0x1f27, 1048584,
+ 0x1f30, 0x1f37, 1048584,
+ 0x1f40, 0x1f45, 1048584,
+ 0x1f60, 0x1f67, 1048584,
+ 0x1f70, 0x1f71, 1048650,
+ 0x1f72, 0x1f75, 1048662,
+ 0x1f76, 0x1f77, 1048676,
+ 0x1f78, 0x1f79, 1048704,
+ 0x1f7a, 0x1f7b, 1048688,
+ 0x1f7c, 0x1f7d, 1048702,
+ 0x1f80, 0x1f87, 1048584,
+ 0x1f90, 0x1f97, 1048584,
+ 0x1fa0, 0x1fa7, 1048584,
+ 0x1fb0, 0x1fb1, 1048584,
+ 0x1fd0, 0x1fd1, 1048584,
+ 0x1fe0, 0x1fe1, 1048584,
+ 0x2170, 0x217f, 1048560,
+ 0x24d0, 0x24e9, 1048550,
+ 0x2c30, 0x2c5e, 1048528,
+ 0x2d00, 0x2d25, 1041312,
+ 0xff41, 0xff5a, 1048544,
+ 0x10428, 0x1044f, 1048536,
+};
+
+static Rune __totitlep[] = {
+ 0x0101, 0x012f, 1048575,
+ 0x0133, 0x0137, 1048575,
+ 0x013a, 0x0148, 1048575,
+ 0x014b, 0x0177, 1048575,
+ 0x017a, 0x017e, 1048575,
+ 0x0183, 0x0185, 1048575,
+ 0x01a1, 0x01a5, 1048575,
+ 0x01b4, 0x01b6, 1048575,
+ 0x01cc, 0x01dc, 1048575,
+ 0x01df, 0x01ef, 1048575,
+ 0x01f3, 0x01f5, 1048575,
+ 0x01f9, 0x021f, 1048575,
+ 0x0223, 0x0233, 1048575,
+ 0x0247, 0x024f, 1048575,
+ 0x0371, 0x0373, 1048575,
+ 0x03d9, 0x03ef, 1048575,
+ 0x0461, 0x0481, 1048575,
+ 0x048b, 0x04bf, 1048575,
+ 0x04c2, 0x04ce, 1048575,
+ 0x04d1, 0x0527, 1048575,
+ 0x1e01, 0x1e95, 1048575,
+ 0x1ea1, 0x1eff, 1048575,
+ 0x1f51, 0x1f57, 1048584,
+ 0x2c68, 0x2c6c, 1048575,
+ 0x2c81, 0x2ce3, 1048575,
+ 0x2cec, 0x2cee, 1048575,
+ 0xa641, 0xa66d, 1048575,
+ 0xa681, 0xa697, 1048575,
+ 0xa723, 0xa72f, 1048575,
+ 0xa733, 0xa76f, 1048575,
+ 0xa77a, 0xa77c, 1048575,
+ 0xa77f, 0xa787, 1048575,
+ 0xa7a1, 0xa7a9, 1048575,
+};
+
+static Rune __totitles[] = {
+ 0x00b5, 1049319,
+ 0x00ff, 1048697,
+ 0x0131, 1048344,
+ 0x017f, 1048276,
+ 0x0180, 1048771,
+ 0x0188, 1048575,
+ 0x018c, 1048575,
+ 0x0192, 1048575,
+ 0x0195, 1048673,
+ 0x0199, 1048575,
+ 0x019a, 1048739,
+ 0x019e, 1048706,
+ 0x01a8, 1048575,
+ 0x01ad, 1048575,
+ 0x01b0, 1048575,
+ 0x01b9, 1048575,
+ 0x01bd, 1048575,
+ 0x01bf, 1048632,
+ 0x01c4, 1048577,
+ 0x01c6, 1048575,
+ 0x01c7, 1048577,
+ 0x01c9, 1048575,
+ 0x01ca, 1048577,
+ 0x01dd, 1048497,
+ 0x01f1, 1048577,
+ 0x023c, 1048575,
+ 0x0242, 1048575,
+ 0x0250, 1059359,
+ 0x0251, 1059356,
+ 0x0252, 1059358,
+ 0x0253, 1048366,
+ 0x0254, 1048370,
+ 0x0259, 1048374,
+ 0x025b, 1048373,
+ 0x0260, 1048371,
+ 0x0263, 1048369,
+ 0x0265, 1090856,
+ 0x0268, 1048367,
+ 0x0269, 1048365,
+ 0x026b, 1059319,
+ 0x026f, 1048365,
+ 0x0271, 1059325,
+ 0x0272, 1048363,
+ 0x0275, 1048362,
+ 0x027d, 1059303,
+ 0x0280, 1048358,
+ 0x0283, 1048358,
+ 0x0288, 1048358,
+ 0x0289, 1048507,
+ 0x028c, 1048505,
+ 0x0292, 1048357,
+ 0x0345, 1048660,
+ 0x0377, 1048575,
+ 0x03ac, 1048538,
+ 0x03c2, 1048545,
+ 0x03cc, 1048512,
+ 0x03d0, 1048514,
+ 0x03d1, 1048519,
+ 0x03d5, 1048529,
+ 0x03d6, 1048522,
+ 0x03d7, 1048568,
+ 0x03f0, 1048490,
+ 0x03f1, 1048496,
+ 0x03f2, 1048583,
+ 0x03f5, 1048480,
+ 0x03f8, 1048575,
+ 0x03fb, 1048575,
+ 0x04cf, 1048561,
+ 0x1d79, 1083908,
+ 0x1d7d, 1052390,
+ 0x1e9b, 1048517,
+ 0x1fb3, 1048585,
+ 0x1fbe, 1041371,
+ 0x1fc3, 1048585,
+ 0x1fe5, 1048583,
+ 0x1ff3, 1048585,
+ 0x214e, 1048548,
+ 0x2184, 1048575,
+ 0x2c61, 1048575,
+ 0x2c65, 1037781,
+ 0x2c66, 1037784,
+ 0x2c73, 1048575,
+ 0x2c76, 1048575,
+ 0xa78c, 1048575,
+ 0xa791, 1048575,
+};
+
+Rune
+totitlerune(Rune c)
+{
+ Rune *p;
+
+ p = rbsearch(c, __totitler, nelem(__totitler)/3, 3);
+ if(p && c >= p[0] && c <= p[1])
+ return c + p[2] - 1048576;
+ p = rbsearch(c, __totitlep, nelem(__totitlep)/3, 3);
+ if(p && c >= p[0] && c <= p[1] && !((c - p[0]) & 1))
+ return c + p[2] - 1048576;
+ p = rbsearch(c, __totitles, nelem(__totitles)/2, 2);
+ if(p && c == p[0])
+ return c + p[1] - 1048576;
+ return c;
+}
+
diff --git a/src/lib9/utf/utf.h b/src/lib9/utf/utf.h
new file mode 100644
index 000000000..8a79828bc
--- /dev/null
+++ b/src/lib9/utf/utf.h
@@ -0,0 +1,242 @@
+/*
+ * The authors of this software are Rob Pike and Ken Thompson.
+ * Copyright (c) 1998-2002 by Lucent Technologies.
+ * Portions Copyright (c) 2009 The Go Authors. All rights reserved.
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose without fee is hereby granted, provided that this entire notice
+ * is included in all copies of any software which is or includes a copy
+ * or modification of this software and in all copies of the supporting
+ * documentation for such software.
+ * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED
+ * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE ANY
+ * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY
+ * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE.
+ */
+
+#ifndef _UTFH_
+#define _UTFH_ 1
+
+#include <stdint.h>
+
+typedef unsigned int Rune; /* Code-point values in Unicode 4.0 are 21 bits wide.*/
+
+enum
+{
+ UTFmax = 4, /* maximum bytes per rune */
+ Runesync = 0x80, /* cannot represent part of a UTF sequence (<) */
+ Runeself = 0x80, /* rune and UTF sequences are the same (<) */
+ Runeerror = 0xFFFD, /* decoding error in UTF */
+ Runemax = 0x10FFFF, /* maximum rune value */
+};
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * rune routines
+ */
+
+/*
+ * These routines were written by Rob Pike and Ken Thompson
+ * and first appeared in Plan 9.
+ * SEE ALSO
+ * utf (7)
+ * tcs (1)
+*/
+
+// runetochar copies (encodes) one rune, pointed to by r, to at most
+// UTFmax bytes starting at s and returns the number of bytes generated.
+
+int runetochar(char* s, const Rune* r);
+
+
+// chartorune copies (decodes) at most UTFmax bytes starting at s to
+// one rune, pointed to by r, and returns the number of bytes consumed.
+// If the input is not exactly in UTF format, chartorune will set *r
+// to Runeerror and return 1.
+//
+// Note: There is no special case for a "null-terminated" string. A
+// string whose first byte has the value 0 is the UTF8 encoding of the
+// Unicode value 0 (i.e., ASCII NULL). A byte value of 0 is illegal
+// anywhere else in a UTF sequence.
+
+int chartorune(Rune* r, const char* s);
+
+
+// charntorune is like chartorune, except that it will access at most
+// n bytes of s. If the UTF sequence is incomplete within n bytes,
+// charntorune will set *r to Runeerror and return 0. If it is complete
+// but not in UTF format, it will set *r to Runeerror and return 1.
+//
+// Added 2004-09-24 by Wei-Hwa Huang
+
+int charntorune(Rune* r, const char* s, int n);
+
+// isvalidcharntorune(str, n, r, consumed)
+// is a convenience function that calls "*consumed = charntorune(r, str, n)"
+// and returns an int (logically boolean) indicating whether the first
+// n bytes of str was a valid and complete UTF sequence.
+
+int isvalidcharntorune(const char* str, int n, Rune* r, int* consumed);
+
+// runelen returns the number of bytes required to convert r into UTF.
+
+int runelen(Rune r);
+
+
+// runenlen returns the number of bytes required to convert the n
+// runes pointed to by r into UTF.
+
+int runenlen(const Rune* r, int n);
+
+
+// fullrune returns 1 if the string s of length n is long enough to be
+// decoded by chartorune, and 0 otherwise. This does not guarantee
+// that the string contains a legal UTF encoding. This routine is used
+// by programs that obtain input one byte at a time and need to know
+// when a full rune has arrived.
+
+int fullrune(const char* s, int n);
+
+// The following routines are analogous to the corresponding string
+// routines with "utf" substituted for "str", and "rune" substituted
+// for "chr".
+
+// utflen returns the number of runes that are represented by the UTF
+// string s. (cf. strlen)
+
+int utflen(const char* s);
+
+
+// utfnlen returns the number of complete runes that are represented
+// by the first n bytes of the UTF string s. If the last few bytes of
+// the string contain an incompletely coded rune, utfnlen will not
+// count them; in this way, it differs from utflen, which includes
+// every byte of the string. (cf. strnlen)
+
+int utfnlen(const char* s, long n);
+
+
+// utfrune returns a pointer to the first occurrence of rune r in the
+// UTF string s, or 0 if r does not occur in the string. The NULL
+// byte terminating a string is considered to be part of the string s.
+// (cf. strchr)
+
+/*const*/ char* utfrune(const char* s, Rune r);
+
+
+// utfrrune returns a pointer to the last occurrence of rune r in the
+// UTF string s, or 0 if r does not occur in the string. The NULL
+// byte terminating a string is considered to be part of the string s.
+// (cf. strrchr)
+
+/*const*/ char* utfrrune(const char* s, Rune r);
+
+
+// utfutf returns a pointer to the first occurrence of the UTF string
+// s2 as a UTF substring of s1, or 0 if there is none. If s2 is the
+// null string, utfutf returns s1. (cf. strstr)
+
+const char* utfutf(const char* s1, const char* s2);
+
+
+// utfecpy copies UTF sequences until a null sequence has been copied,
+// but writes no sequences beyond es1. If any sequences are copied,
+// s1 is terminated by a null sequence, and a pointer to that sequence
+// is returned. Otherwise, the original s1 is returned. (cf. strecpy)
+
+char* utfecpy(char *s1, char *es1, const char *s2);
+
+
+
+// These functions are rune-string analogues of the corresponding
+// functions in strcat (3).
+//
+// These routines first appeared in Plan 9.
+// SEE ALSO
+// memmove (3)
+// rune (3)
+// strcat (2)
+//
+// BUGS: The outcome of overlapping moves varies among implementations.
+
+Rune* runestrcat(Rune* s1, const Rune* s2);
+Rune* runestrncat(Rune* s1, const Rune* s2, long n);
+
+const Rune* runestrchr(const Rune* s, Rune c);
+
+int runestrcmp(const Rune* s1, const Rune* s2);
+int runestrncmp(const Rune* s1, const Rune* s2, long n);
+
+Rune* runestrcpy(Rune* s1, const Rune* s2);
+Rune* runestrncpy(Rune* s1, const Rune* s2, long n);
+Rune* runestrecpy(Rune* s1, Rune* es1, const Rune* s2);
+
+Rune* runestrdup(const Rune* s);
+
+const Rune* runestrrchr(const Rune* s, Rune c);
+long runestrlen(const Rune* s);
+const Rune* runestrstr(const Rune* s1, const Rune* s2);
+
+
+
+// The following routines test types and modify cases for Unicode
+// characters. Unicode defines some characters as letters and
+// specifies three cases: upper, lower, and title. Mappings among the
+// cases are also defined, although they are not exhaustive: some
+// upper case letters have no lower case mapping, and so on. Unicode
+// also defines several character properties, a subset of which are
+// checked by these routines. These routines are based on Unicode
+// version 3.0.0.
+//
+// NOTE: The routines are implemented in C, so the boolean functions
+// (e.g., isupperrune) return 0 for false and 1 for true.
+//
+//
+// toupperrune, tolowerrune, and totitlerune are the Unicode case
+// mappings. These routines return the character unchanged if it has
+// no defined mapping.
+
+Rune toupperrune(Rune r);
+Rune tolowerrune(Rune r);
+Rune totitlerune(Rune r);
+
+
+// isupperrune tests for upper case characters, including Unicode
+// upper case letters and targets of the toupper mapping. islowerrune
+// and istitlerune are defined analogously.
+
+int isupperrune(Rune r);
+int islowerrune(Rune r);
+int istitlerune(Rune r);
+
+
+// isalpharune tests for Unicode letters; this includes ideographs in
+// addition to alphabetic characters.
+
+int isalpharune(Rune r);
+
+
+// isdigitrune tests for digits. Non-digit numbers, such as Roman
+// numerals, are not included.
+
+int isdigitrune(Rune r);
+
+
+// isspacerune tests for whitespace characters, including "C" locale
+// whitespace, Unicode defined whitespace, and the "zero-width
+// non-break space" character.
+
+int isspacerune(Rune r);
+
+
+// (The comments in this file were copied from the manpage files rune.3,
+// isalpharune.3, and runestrcat.3. Some formatting changes were also made
+// to conform to Google style. /JRM 11/11/05)
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/src/lib9/utf/utfdef.h b/src/lib9/utf/utfdef.h
new file mode 100644
index 000000000..adc6d95fb
--- /dev/null
+++ b/src/lib9/utf/utfdef.h
@@ -0,0 +1,28 @@
+/*
+ * The authors of this software are Rob Pike and Ken Thompson.
+ * Copyright (c) 1998-2002 by Lucent Technologies.
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose without fee is hereby granted, provided that this entire notice
+ * is included in all copies of any software which is or includes a copy
+ * or modification of this software and in all copies of the supporting
+ * documentation for such software.
+ * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED
+ * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE ANY
+ * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY
+ * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE.
+ */
+
+#define uchar _utfuchar
+#define ushort _utfushort
+#define uint _utfuint
+#define ulong _utfulong
+#define vlong _utfvlong
+#define uvlong _utfuvlong
+
+typedef unsigned char uchar;
+typedef unsigned short ushort;
+typedef unsigned int uint;
+typedef unsigned long ulong;
+
+#define nelem(x) (sizeof(x)/sizeof((x)[0]))
+#define nil ((void*)0)
diff --git a/src/lib9/utf/utfecpy.c b/src/lib9/utf/utfecpy.c
new file mode 100644
index 000000000..d6dc091c4
--- /dev/null
+++ b/src/lib9/utf/utfecpy.c
@@ -0,0 +1,36 @@
+/*
+ * The authors of this software are Rob Pike and Ken Thompson.
+ * Copyright (c) 2002 by Lucent Technologies.
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose without fee is hereby granted, provided that this entire notice
+ * is included in all copies of any software which is or includes a copy
+ * or modification of this software and in all copies of the supporting
+ * documentation for such software.
+ * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED
+ * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE ANY
+ * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY
+ * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE.
+ */
+#include <stdarg.h>
+#include <string.h>
+#include "utf.h"
+#include "utfdef.h"
+
+char*
+utfecpy(char *to, char *e, const char *from)
+{
+ char *end;
+
+ if(to >= e)
+ return to;
+ end = memccpy(to, from, '\0', e - to);
+ if(end == nil){
+ end = e-1;
+ while(end>to && (*--end&0xC0)==0x80)
+ ;
+ *end = '\0';
+ }else{
+ end--;
+ }
+ return end;
+}
diff --git a/src/lib9/utf/utflen.c b/src/lib9/utf/utflen.c
new file mode 100644
index 000000000..45653d540
--- /dev/null
+++ b/src/lib9/utf/utflen.c
@@ -0,0 +1,38 @@
+/*
+ * The authors of this software are Rob Pike and Ken Thompson.
+ * Copyright (c) 2002 by Lucent Technologies.
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose without fee is hereby granted, provided that this entire notice
+ * is included in all copies of any software which is or includes a copy
+ * or modification of this software and in all copies of the supporting
+ * documentation for such software.
+ * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED
+ * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE ANY
+ * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY
+ * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE.
+ */
+#include <stdarg.h>
+#include <string.h>
+#include "utf.h"
+#include "utfdef.h"
+
+int
+utflen(const char *s)
+{
+ int c;
+ long n;
+ Rune rune;
+
+ n = 0;
+ for(;;) {
+ c = *(uchar*)s;
+ if(c < Runeself) {
+ if(c == 0)
+ return n;
+ s++;
+ } else
+ s += chartorune(&rune, s);
+ n++;
+ }
+ return 0;
+}
diff --git a/src/lib9/utf/utfnlen.c b/src/lib9/utf/utfnlen.c
new file mode 100644
index 000000000..d673c8290
--- /dev/null
+++ b/src/lib9/utf/utfnlen.c
@@ -0,0 +1,41 @@
+/*
+ * The authors of this software are Rob Pike and Ken Thompson.
+ * Copyright (c) 2002 by Lucent Technologies.
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose without fee is hereby granted, provided that this entire notice
+ * is included in all copies of any software which is or includes a copy
+ * or modification of this software and in all copies of the supporting
+ * documentation for such software.
+ * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED
+ * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE ANY
+ * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY
+ * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE.
+ */
+#include <stdarg.h>
+#include <string.h>
+#include "utf.h"
+#include "utfdef.h"
+
+int
+utfnlen(const char *s, long m)
+{
+ int c;
+ long n;
+ Rune rune;
+ const char *es;
+
+ es = s + m;
+ for(n = 0; s < es; n++) {
+ c = *(uchar*)s;
+ if(c < Runeself){
+ if(c == '\0')
+ break;
+ s++;
+ continue;
+ }
+ if(!fullrune(s, es-s))
+ break;
+ s += chartorune(&rune, s);
+ }
+ return n;
+}
diff --git a/src/lib9/utf/utfrrune.c b/src/lib9/utf/utfrrune.c
new file mode 100644
index 000000000..95d2a9d8a
--- /dev/null
+++ b/src/lib9/utf/utfrrune.c
@@ -0,0 +1,47 @@
+/*
+ * The authors of this software are Rob Pike and Ken Thompson.
+ * Copyright (c) 2002 by Lucent Technologies.
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose without fee is hereby granted, provided that this entire notice
+ * is included in all copies of any software which is or includes a copy
+ * or modification of this software and in all copies of the supporting
+ * documentation for such software.
+ * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED
+ * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE ANY
+ * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY
+ * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE.
+ */
+#include <stdarg.h>
+#include <string.h>
+#include "utf.h"
+#include "utfdef.h"
+
+/* const - removed for go code */
+char*
+utfrrune(const char *s, Rune c)
+{
+ long c1;
+ Rune r;
+ const char *s1;
+
+ if(c < Runesync) /* not part of utf sequence */
+ return strrchr(s, c);
+
+ s1 = 0;
+ for(;;) {
+ c1 = *(uchar*)s;
+ if(c1 < Runeself) { /* one byte rune */
+ if(c1 == 0)
+ return (char*)s1;
+ if(c1 == c)
+ s1 = s;
+ s++;
+ continue;
+ }
+ c1 = chartorune(&r, s);
+ if(r == c)
+ s1 = s;
+ s += c1;
+ }
+ return 0;
+}
diff --git a/src/lib9/utf/utfrune.c b/src/lib9/utf/utfrune.c
new file mode 100644
index 000000000..b4017d26c
--- /dev/null
+++ b/src/lib9/utf/utfrune.c
@@ -0,0 +1,46 @@
+/*
+ * The authors of this software are Rob Pike and Ken Thompson.
+ * Copyright (c) 2002 by Lucent Technologies.
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose without fee is hereby granted, provided that this entire notice
+ * is included in all copies of any software which is or includes a copy
+ * or modification of this software and in all copies of the supporting
+ * documentation for such software.
+ * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED
+ * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE ANY
+ * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY
+ * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE.
+ */
+#include <stdarg.h>
+#include <string.h>
+#include "utf.h"
+#include "utfdef.h"
+
+/* const - removed for go code */
+char*
+utfrune(const char *s, Rune c)
+{
+ long c1;
+ Rune r;
+ int n;
+
+ if(c < Runesync) /* not part of utf sequence */
+ return strchr(s, c);
+
+ for(;;) {
+ c1 = *(uchar*)s;
+ if(c1 < Runeself) { /* one byte rune */
+ if(c1 == 0)
+ return 0;
+ if(c1 == c)
+ return (char*)s;
+ s++;
+ continue;
+ }
+ n = chartorune(&r, s);
+ if(r == c)
+ return (char*)s;
+ s += n;
+ }
+ return 0;
+}
diff --git a/src/lib9/utf/utfutf.c b/src/lib9/utf/utfutf.c
new file mode 100644
index 000000000..ec4923165
--- /dev/null
+++ b/src/lib9/utf/utfutf.c
@@ -0,0 +1,42 @@
+/*
+ * The authors of this software are Rob Pike and Ken Thompson.
+ * Copyright (c) 2002 by Lucent Technologies.
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose without fee is hereby granted, provided that this entire notice
+ * is included in all copies of any software which is or includes a copy
+ * or modification of this software and in all copies of the supporting
+ * documentation for such software.
+ * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED
+ * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE ANY
+ * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY
+ * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE.
+ */
+#include <stdarg.h>
+#include <string.h>
+#include "utf.h"
+#include "utfdef.h"
+
+
+/*
+ * Return pointer to first occurrence of s2 in s1,
+ * 0 if none
+ */
+const
+char*
+utfutf(const char *s1, const char *s2)
+{
+ const char *p;
+ long f, n1, n2;
+ Rune r;
+
+ n1 = chartorune(&r, s2);
+ f = r;
+ if(f <= Runesync) /* represents self */
+ return strstr(s1, s2);
+
+ n2 = strlen(s2);
+ for(p=s1; (p=utfrune(p, f)) != 0; p+=n1)
+ if(strncmp(p, s2, n2) == 0)
+ return p;
+ return 0;
+}