summaryrefslogtreecommitdiff
path: root/usr/src
diff options
context:
space:
mode:
Diffstat (limited to 'usr/src')
-rw-r--r--usr/src/cmd/localedef/Makefile15
-rw-r--r--usr/src/cmd/localedef/ctype.c6
-rw-r--r--usr/src/cmd/localedef/data/ctype.sh46
3 files changed, 61 insertions, 6 deletions
diff --git a/usr/src/cmd/localedef/Makefile b/usr/src/cmd/localedef/Makefile
index a47d92f3e8..92c5687f37 100644
--- a/usr/src/cmd/localedef/Makefile
+++ b/usr/src/cmd/localedef/Makefile
@@ -31,7 +31,7 @@ LDLIBS += -lavl
YFLAGS = -d -b parser
CLEANFILES = $(OBJS) parser.tab.c parser.tab.h
CLEANFILES += \
- UTF-8.cm \
+ UTF-8.cm UTF-8.ct \
8859-1.cm 8859-2.cm 8859-3.cm 8859-4.cm \
8859-5.cm 8859-5.cm 8859-6.cm 8859-7.cm \
8859-8.cm 8859-9.cm 8859-9.cm 8859-10.cm \
@@ -179,6 +179,8 @@ UTF_8_LOCALES = \
vi_VN \
zh_CN zh_HK zh_MO zh_SG zh_TW
+UTF8SRCS = $(UTF_8_LOCALES:%=data/%.UTF-8.src)
+
LOCNAMES = \
$(ISO8859_1_LOCALES:%=%.ISO8859-1) \
$(ISO8859_2_LOCALES:%=%.ISO8859-2) \
@@ -251,8 +253,12 @@ locale $(ROOTLOCDIRS) $(ROOTCATDIRS):
include ../Makefile.targ
-locale/%.UTF-8/stamp: data/%.UTF-8.src UTF-8.cm locale $(PROG)
- ./$(PROG) -U -i $< -f UTF-8.cm $(@D)
+# Strip LC_CTYPE contents for UTF-8 locales and replace them
+# with UTF-8.ct we compiled
+locale/%.UTF-8/stamp: data/%.UTF-8.src UTF-8.cm \
+ UTF-8.ct locale $(PROG)
+ $(SED) '/^LC_CTYPE/,/^END LC_CTYPE/d;$$r UTF-8.ct' $< | \
+ ./$(PROG) -U -f UTF-8.cm $(@D)
$(TOUCH) $@
locale/%.ISO8859-1/stamp: data/%.UTF-8.src 8859-1.cm locale $(PROG)
./$(PROG) -U -i $< -f 8859-1.cm $(@D)
@@ -295,6 +301,9 @@ locale/%/$(DTIME): locale/%/stamp
UTF-8.cm: data/UTF-8.cm
$(LN) -sf data/UTF-8.cm $@
+UTF-8.ct: $(UTF8SRCS)
+ $(SH) data/ctype.sh $(UTF8SRCS) > $@
+
%.cm: data/%.TXT UTF-8.cm
$(RM) $@
$(PERL) data/convert_map.pl $< > $@
diff --git a/usr/src/cmd/localedef/ctype.c b/usr/src/cmd/localedef/ctype.c
index 5ee65e21e6..6b2090ab9a 100644
--- a/usr/src/cmd/localedef/ctype.c
+++ b/usr/src/cmd/localedef/ctype.c
@@ -10,7 +10,7 @@
*/
/*
- * Copyright 2010 Nexenta Systems, Inc. All rights reserved.
+ * Copyright 2010,2011 Nexenta Systems, Inc. All rights reserved.
*/
/*
@@ -321,8 +321,8 @@ dump_ctype(void)
ct[rl.runetype_ext_nranges - 1].map = ctn->ctype;
last_ct = ctn;
}
- if (ctn->toupper == 0) {
- last_up = NULL;
+ if (ctn->tolower == 0) {
+ last_lo = NULL;
} else if ((last_lo != NULL) &&
(last_lo->tolower + 1 == ctn->tolower)) {
lo[rl.maplower_ext_nranges-1].max = wc;
diff --git a/usr/src/cmd/localedef/data/ctype.sh b/usr/src/cmd/localedef/data/ctype.sh
new file mode 100644
index 0000000000..0d55cf92c6
--- /dev/null
+++ b/usr/src/cmd/localedef/data/ctype.sh
@@ -0,0 +1,46 @@
+#! /usr/bin/sh
+#
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright 2011 Nexenta Systems, Inc. All rights reserved.
+#
+
+# Combine LC_CTYPE classes from all .UTF-8.src files to be compiled by localedef
+# into one LC_CTYPE/LCL_DATA used by all locales, so we have the same case
+# mapping tables, character classes, etc. for all of them. This is not general
+# purpose parser but is good enough for the stock files supplied with CLDR.
+
+printf "\nLC_CTYPE\n"
+
+for i in upper lower alpha space cntrl graph print punct digit xdigit blank \
+ toupper tolower; do
+ # sed can't match both range patterns on the same line so we just make
+ # it look like valid multiline class by duplicating the definition
+ sed -E "/^$i.*>$/ {
+ s,$,;/,
+ h
+ s,^$i(.*>);/$,\1,
+ H
+ x
+ }" $@ |\
+ sed -E -n "/^$i/,/(>|\))$/ {
+ s,^$i,,
+ s,(>|\))$,\1;/,
+ /^$/d
+ p
+ }" |\
+ sort -u |\
+ sed -E "1 s,^,$i,;$ s,(>|\));/,\1,"
+done
+
+printf "\nEND LC_CTYPE\n"