diff options
Diffstat (limited to 'usr/src/lib/libast/common/port/lc.c')
-rw-r--r-- | usr/src/lib/libast/common/port/lc.c | 832 |
1 files changed, 832 insertions, 0 deletions
diff --git a/usr/src/lib/libast/common/port/lc.c b/usr/src/lib/libast/common/port/lc.c new file mode 100644 index 0000000000..e632d00182 --- /dev/null +++ b/usr/src/lib/libast/common/port/lc.c @@ -0,0 +1,832 @@ +/*********************************************************************** +* * +* This software is part of the ast package * +* Copyright (c) 1985-2007 AT&T Knowledge Ventures * +* and is licensed under the * +* Common Public License, Version 1.0 * +* by AT&T Knowledge Ventures * +* * +* A copy of the License is available at * +* http://www.opensource.org/licenses/cpl1.0.txt * +* (with md5 checksum 059e8cd6165cb4c31e351f2b69388fd9) * +* * +* Information and Software Systems Research * +* AT&T Research * +* Florham Park NJ * +* * +* Glenn Fowler <gsf@research.att.com> * +* David Korn <dgk@research.att.com> * +* Phong Vo <kpv@research.att.com> * +* * +***********************************************************************/ +#pragma prototyped + +/* + * locale state implementation + */ + +#include "lclib.h" + +#include <ctype.h> + +#if _WINIX + +#include <ast_windows.h> + +#define LANG_CHINESE_SIMPLIFIED LANG_CHINESE +#define LANG_CHINESE_TRADITIONAL LANG_CHINESE +#define LANG_NORWEGIAN_BOKMAL LANG_NORWEGIAN +#define LANG_NORWEGIAN_NYNORSK LANG_NORWEGIAN +#define LANG_SERBO_CROATIAN LANG_CROATIAN + +#define CTRY_CZECH_REPUBLIC CTRY_CZECH + +#define SUBLANG_CHINESE_SIMPLIFIED_CHINA SUBLANG_CHINESE_SIMPLIFIED +#define SUBLANG_CHINESE_SIMPLIFIED_HONG_KONG SUBLANG_CHINESE_HONGKONG +#define SUBLANG_CHINESE_SIMPLIFIED_SINGAPORE SUBLANG_CHINESE_SINGAPORE +#define SUBLANG_CHINESE_TRADITIONAL_TAIWAN SUBLANG_CHINESE_TRADITIONAL +#define SUBLANG_DUTCH_NETHERLANDS_ANTILLES SUBLANG_DUTCH +#define SUBLANG_DUTCH_BELGIUM SUBLANG_DUTCH_BELGIAN +#define SUBLANG_ENGLISH_AUSTRALIA SUBLANG_ENGLISH_AUS +#define SUBLANG_ENGLISH_CANADA SUBLANG_ENGLISH_CAN +#define SUBLANG_ENGLISH_IRELAND SUBLANG_ENGLISH_EIRE +#define SUBLANG_ENGLISH_NEW_ZEALAND SUBLANG_ENGLISH_NZ +#define SUBLANG_ENGLISH_TRINIDAD_TOBAGO SUBLANG_ENGLISH_CARIBBEAN +#define SUBLANG_ENGLISH_UNITED_KINGDOM SUBLANG_ENGLISH_UK +#define SUBLANG_ENGLISH_UNITED_STATES SUBLANG_ENGLISH_US +#define SUBLANG_FRENCH_BELGIUM SUBLANG_FRENCH_BELGIAN +#define SUBLANG_FRENCH_CANADA SUBLANG_FRENCH_CANADIAN +#define SUBLANG_FRENCH_SWITZERLAND SUBLANG_FRENCH_SWISS +#define SUBLANG_GERMAN_AUSTRIA SUBLANG_GERMAN_AUSTRIAN +#define SUBLANG_GERMAN_SWITZERLAND SUBLANG_GERMAN_SWISS +#define SUBLANG_ITALIAN_SWITZERLAND SUBLANG_ITALIAN_SWISS +#define SUBLANG_NORWEGIAN_BOKMAL_NORWAY SUBLANG_NORWEGIAN_BOKMAL +#define SUBLANG_NORWEGIAN_NORWAY SUBLANG_NORWEGIAN_BOKMAL +#define SUBLANG_NORWEGIAN_NYNORSK_NORWAY SUBLANG_NORWEGIAN_NYNORSK +#define SUBLANG_PORTUGUESE_BRAZIL SUBLANG_PORTUGUESE_BRAZILIAN + +#endif + +#include "lctab.h" + +static Lc_numeric_t default_numeric = { '.', -1 }; + +static Lc_t default_lc = +{ + "C", + "POSIX", + &language[0], + &territory[0], + &charset[0], + 0, + LC_default|LC_checked|LC_local, + 0, + { + { &default_lc, 0, 0 }, + { &default_lc, 0, 0 }, + { &default_lc, 0, 0 }, + { &default_lc, 0, 0 }, + { &default_lc, 0, 0 }, + { &default_lc, 0, (void*)&default_numeric }, + { &default_lc, 0, 0 }, + { &default_lc, 0, 0 }, + { &default_lc, 0, 0 }, + { &default_lc, 0, 0 }, + { &default_lc, 0, 0 }, + { &default_lc, 0, 0 }, + { &default_lc, 0, 0 }, + { &default_lc, 0, 0 } + } +}; + +static Lc_numeric_t debug_numeric = { ',', '.' }; + +static Lc_t debug_lc = +{ + "debug", + "debug", + &language[1], + &territory[1], + &charset[0], + 0, + LC_debug|LC_checked|LC_local, + 0, + { + { &debug_lc, 0, 0 }, + { &debug_lc, 0, 0 }, + { &debug_lc, 0, 0 }, + { &debug_lc, 0, 0 }, + { &debug_lc, 0, 0 }, + { &debug_lc, 0, (void*)&debug_numeric }, + { &debug_lc, 0, 0 }, + { &debug_lc, 0, 0 }, + { &debug_lc, 0, 0 }, + { &debug_lc, 0, 0 }, + { &debug_lc, 0, 0 }, + { &debug_lc, 0, 0 }, + { &debug_lc, 0, 0 }, + { &debug_lc, 0, 0 } + }, + &default_lc +}; + +static Lc_t* lcs = &debug_lc; + +Lc_t* locales[] = +{ + &default_lc, + &default_lc, + &default_lc, + &default_lc, + &default_lc, + &default_lc, + &default_lc, + &default_lc, + &default_lc, + &default_lc, + &default_lc, + &default_lc, + &default_lc, + &default_lc +}; + +/* + * return the internal category index for category + */ + +int +lcindex(int category, int min) +{ + switch (category) + { + case LC_ALL: return min ? -1 : AST_LC_ALL; + case LC_ADDRESS: return AST_LC_ADDRESS; + case LC_COLLATE: return AST_LC_COLLATE; + case LC_CTYPE: return AST_LC_CTYPE; + case LC_IDENTIFICATION: return AST_LC_IDENTIFICATION; + case LC_MEASUREMENT: return AST_LC_MEASUREMENT; + case LC_MESSAGES: return AST_LC_MESSAGES; + case LC_MONETARY: return AST_LC_MONETARY; + case LC_NAME: return AST_LC_NAME; + case LC_NUMERIC: return AST_LC_NUMERIC; + case LC_PAPER: return AST_LC_PAPER; + case LC_TELEPHONE: return AST_LC_TELEPHONE; + case LC_TIME: return AST_LC_TIME; + case LC_XLITERATE: return AST_LC_XLITERATE; + } + return -1; +} + +/* + * return the first category table entry + */ + +Lc_category_t* +lccategories(void) +{ + return &categories[0]; +} + +/* + * return the current info for category + */ + +Lc_info_t* +lcinfo(register int category) +{ + if ((category = lcindex(category, 0)) < 0) + return 0; + return LCINFO(category); +} + +/* + * return 1 if s matches the alternation pattern p + * if minimum!=0 then at least that many chars must match + * if standard!=0 and s[0] is a digit leading non-digits are ignored in p + */ + +static int +match(const char* s, register const char* p, int minimum, int standard) +{ + register const char* t; + const char* x; + int w; + int z; + + z = 0; + do + { + t = s; + if (standard) + { + if (isdigit(*t)) + while (*p && !isdigit(*p)) + p++; + else if (isdigit(*p)) + while (*t && !isdigit(*t)) + t++; + } + if (*p) + { + w = 0; + x = p; + while (*p && *p != '|') + { + if (!*t || *t == ',') + break; + else if (*t == *p) + /*ok*/; + else if (*t == '-') + { + if (standard && isdigit(*p)) + { + t++; + continue; + } + while (*p && *p != '-') + p++; + if (!*p) + break; + } + else if (*p == '-') + { + if (standard && isdigit(*t)) + { + p++; + continue; + } + w = 1; + while (*t && *t != '-') + t++; + if (!*t) + break; + } + else + break; + t++; + p++; + } + if ((!*t || *t == ',') && (!*p || *p == '|' || w)) + return p - x; + if (minimum && z < (p - x) && (p - x) >= minimum) + z = p - x; + } + while (*p && *p != '|') + p++; + } while (*p++); + return z; +} + +/* + * return 1 if s matches the charset names in cp + */ + +static int +match_charset(register const char* s, register const Lc_charset_t* cp) +{ + return match(s, cp->code, 0, 1) || match(s, cp->alternates, 3, 1) || cp->ms && match(s, cp->ms, 0, 1); +} + +/* + * low level for lccanon + */ + +static size_t +canonical(const Lc_language_t* lp, const Lc_territory_t* tp, const Lc_charset_t* cp, const Lc_attribute_list_t* ap, unsigned long flags, char* buf, size_t siz) +{ + register int c; + register int u; + register char* s; + register char* e; + register const char* t; + + if (!(flags & (LC_abbreviated|LC_default|LC_local|LC_qualified|LC_verbose))) + flags |= LC_abbreviated; + s = buf; + e = &buf[siz - 3]; + if (lp) + { + if (lp->flags & (LC_debug|LC_default)) + { + for (t = lp->code; s < e && (*s = *t++); s++); + *s++ = 0; + return s - buf; + } + if (flags & LC_verbose) + { + u = 1; + t = lp->name; + while (s < e && (c = *t++)) + { + if (u) + { + u = 0; + c = toupper(c); + } + else if (!isalnum(c)) + u = 1; + *s++ = c; + } + } + else + for (t = lp->code; s < e && (*s = *t++); s++); + } + if (s < e) + { + if (tp && tp != &territory[0] && (!(flags & (LC_abbreviated|LC_default)) || !lp || !streq(lp->code, tp->code))) + { + if (lp) + *s++ = '_'; + if (flags & LC_verbose) + { + u = 1; + t = tp->name; + while (s < e && (c = *t++) && c != '|') + { + if (u) + { + u = 0; + c = toupper(c); + } + else if (!isalnum(c)) + u = 1; + *s++ = c; + } + } + else + for (t = tp->code; s < e && (*s = toupper(*t++)); s++); + } + if (lp && (!(flags & (LC_abbreviated|LC_default)) || cp != lp->charset) && s < e) + { + *s++ = '.'; + for (t = cp->code; s < e && (c = *t++); s++) + { + if (islower(c)) + c = toupper(c); + *s = c; + } + } + for (c = '@'; ap && s < e; ap = ap->next) + if (!(flags & (LC_abbreviated|LC_default|LC_verbose)) || !(ap->attribute->flags & LC_default)) + { + *s++ = c; + c = ','; + for (t = ap->attribute->name; s < e && (*s = *t++); s++); + } + } + *s++ = 0; + return s - buf; +} + +/* + * generate a canonical locale name in buf + */ + +size_t +lccanon(Lc_t* lc, unsigned long flags, char* buf, size_t siz) +{ + if ((flags & LC_local) && (!lc->language || !(lc->language->flags & (LC_debug|LC_default)))) + { +#if _WINIX + char lang[64]; + char code[64]; + char ctry[64]; + + if (lc->index && + GetLocaleInfo(lc->index, LOCALE_SENGLANGUAGE, lang, sizeof(lang)) && + GetLocaleInfo(lc->index, LOCALE_SENGCOUNTRY, ctry, sizeof(ctry))) + { + if (!GetLocaleInfo(lc->index, LOCALE_IDEFAULTANSICODEPAGE, code, sizeof(code))) + code[0] = 0; + if (!lc->charset || !lc->charset->ms) + return sfsprintf(buf, siz, "%s_%s", lang, ctry); + else if (streq(lc->charset->ms, code)) + return sfsprintf(buf, siz, "%s_%s.%s", lang, ctry, code); + else + return sfsprintf(buf, siz, "%s_%s.%s,%s", lang, ctry, code, lc->charset->ms); + } +#endif + buf[0] = '-'; + buf[1] = 0; + return 0; + } + return canonical(lc->language, lc->territory, lc->charset, lc->attributes, flags, buf, siz); +} + +/* + * make an Lc_t from a locale name + */ + +Lc_t* +lcmake(const char* name) +{ + register int c; + register char* s; + register char* e; + register const char* t; + const char* a; + char* w; + char* language_name; + char* territory_name; + char* charset_name; + char* attributes_name; + Lc_t* lc; + const Lc_map_t* mp; + const Lc_language_t* lp; + const Lc_territory_t* tp; + const Lc_territory_t* tpb; + const Lc_territory_t* primary; + const Lc_charset_t* cp; + const Lc_charset_t* ppa; + const Lc_attribute_t* ap; + Lc_attribute_list_t* ai; + Lc_attribute_list_t* al; + int i; + int n; + int z; + char buf[PATH_MAX / 2]; + char tmp[PATH_MAX / 2]; + + if (!(t = name) || !*t) + return &default_lc; + for (lc = lcs; lc; lc = lc->next) + if (!strcasecmp(t, lc->code) || !strcasecmp(t, lc->name)) + return lc; + for (mp = map; mp->code; mp++) + if (streq(t, mp->code)) + { + lp = mp->language; + tp = mp->territory; + cp = mp->charset; + if (!mp->attribute) + al = 0; + else if (al = newof(0, Lc_attribute_list_t, 1, 0)) + al->attribute = mp->attribute; + goto mapped; + } + language_name = buf; + territory_name = charset_name = attributes_name = 0; + s = buf; + e = &buf[sizeof(buf)-2]; + a = 0; + n = 0; + while (s < e && (c = *t++)) + { + if (isspace(c) || (c == '(' || c == '-' && *t == '-') && ++n) + { + while ((c = *t++) && (isspace(c) || (c == '-' || c == '(' || c == ')') && ++n)) + if (!c) + break; + if (isalnum(c) && !n) + *s++ = '-'; + else + { + n = 0; + if (!a) + { + a = t - 1; + while (c && c != '_' && c != '.' && c != '@') + c = *t++; + if (!c) + break; + } + } + } + if (c == '_' && !territory_name) + { + *s++ = 0; + territory_name = s; + } + else if (c == '.' && !charset_name) + { + *s++ = 0; + charset_name = s; + } + else if (c == '@' && !attributes_name) + { + *s++ = 0; + attributes_name = s; + } + else + { + if (isupper(c)) + c = tolower(c); + *s++ = c; + } + } + if ((t = a) && s < e) + { + if (attributes_name) + *s++ = ','; + else + { + *s++ = 0; + attributes_name = s; + } + while (s < e && (c = *t++)) + { + if (isspace(c) || (c == '(' || c == ')' || c == '-' && *t == '-') && ++n) + { + while ((c = *t++) && (isspace(c) || (c == '-' || c == '(' || c == ')') && ++n)) + if (!c) + break; + if (isalnum(c) && !n) + *s++ = '-'; + else + n = 0; + } + if (c == '_' || c == '.' || c == '@') + break; + if (isupper(c)) + c = tolower(c); + *s++ = c; + } + } + *s = 0; + tp = 0; + cp = ppa = 0; + al = 0; + + /* + * language + */ + + n = strlen(s = language_name); + if (n == 2) + for (lp = language; lp->code && !streq(s, lp->code); lp++); + else if (n == 3) + { + for (lp = language; lp->code && (!lp->alternates || !match(s, lp->alternates, n, 0)); lp++); + if (!lp->code) + { + c = s[2]; + s[2] = 0; + for (lp = language; lp->code && !streq(s, lp->code); lp++); + s[2] = c; + if (lp->code) + n = 1; + } + } + else + lp = 0; + if (!lp || !lp->code) + { + for (lp = language; lp->code && !match(s, lp->name, 0, 0); lp++); + if (!lp || !lp->code) + { + if (!territory_name) + { + if (n == 2) + for (tp = territory; tp->code && !streq(s, tp->code); tp++); + else + { + z = 0; + tpb = 0; + for (tp = territory; tp->name; tp++) + if ((i = match(s, tp->name, 3, 0)) > z) + { + tpb = tp; + if ((z = i) == n) + break; + } + if (tpb) + tp = tpb; + } + if (tp->code) + lp = tp->languages[0]; + } + if (!lp || !lp->code) + { + /* + * name not in the tables so let + * _ast_setlocale() and/or setlocale() + * handle the validity checks + */ + + s = (char*)name; + z = strlen(s) + 1; + if (!(lp = newof(0, Lc_language_t, 1, z))) + return 0; + name = ((Lc_language_t*)lp)->code = ((Lc_language_t*)lp)->name = (const char*)(lp + 1); + memcpy((char*)lp->code, s, z - 1); + tp = &territory[0]; + cp = ((Lc_language_t*)lp)->charset = &charset[0]; + al = 0; + goto override; + } + } + } + + /* + * territory + */ + + if (!tp || !tp->code) + { + if (!(s = territory_name)) + { + n = 0; + primary = 0; + for (tp = territory; tp->code; tp++) + if (tp->languages[0] == lp) + { + if (tp->flags & LC_primary) + { + n = 1; + primary = tp; + break; + } + n++; + primary = tp; + } + if (n == 1) + tp = primary; + s = (char*)lp->code; + } + if (!tp || !tp->code) + { + n = strlen(s); + if (n == 2) + { + for (tp = territory; tp->code; tp++) + if (streq(s, tp->code)) + { + for (i = 0; i < elementsof(tp->languages) && lp != tp->languages[i]; i++); + if (i >= elementsof(tp->languages)) + tp = 0; + break; + } + } + else + { + for (tp = territory; tp->code; tp++) + if (match(s, tp->name, 3, 0)) + { + for (i = 0; i < elementsof(tp->languages) && lp != tp->languages[i]; i++); + if (i < elementsof(tp->languages)) + break; + } + } + if (tp && !tp->code) + tp = 0; + } + } + + /* + * attributes -- done here to catch misplaced charset references + */ + + if (s = attributes_name) + { + do + { + for (w = s; *s && *s != ','; s++); + c = *s; + *s = 0; + if (!(cp = lp->charset) || !match_charset(w, cp)) + for (cp = charset; cp->code; cp++) + if (match_charset(w, cp)) + { + ppa = cp; + break; + } + if (!cp->code) + { + for (i = 0; i < elementsof(lp->attributes) && (ap = lp->attributes[i]); i++) + if (match(w, ap->name, 5, 0)) + { + if (ai = newof(0, Lc_attribute_list_t, 1, 0)) + { + ai->attribute = ap; + ai->next = al; + al = ai; + } + break; + } + if (i >= elementsof(lp->attributes) && (ap = newof(0, Lc_attribute_t, 1, sizeof(Lc_attribute_list_t) + s - w + 1))) + { + ai = (Lc_attribute_list_t*)(ap + 1); + strcpy((char*)(((Lc_attribute_t*)ap)->name = (const char*)(ai + 1)), w); + ai->attribute = ap; + ai->next = al; + al = ai; + } + } + *s = c; + } while (*s++); + } + + /* + * charset + */ + + if (s = charset_name) + for (cp = charset; cp->code; cp++) + if (match_charset(s, cp)) + break; + if (!cp || !cp->code) + cp = ppa ? ppa : lp->charset; + mapped: + z = canonical(lp, tp, cp, al, 0, s = tmp, sizeof(tmp)); + + /* + * add to the list of possibly active locales + */ + + override: + n = strlen(name) + 1; + if (!(lc = newof(0, Lc_t, 1, n + z))) + return 0; + strcpy((char*)(lc->name = (const char*)(lc + 1)), name); + strcpy((char*)(lc->code = lc->name + n), s); + lc->language = lp ? lp : &language[0]; + lc->territory = tp ? tp : &territory[0]; + lc->charset = cp ? cp : &charset[0]; + lc->attributes = al; + for (i = 0; i < elementsof(lc->info); i++) + lc->info[i].lc = lc; +#if _WINIX + n = SUBLANG_DEFAULT; + if (tp) + for (i = 0; i < elementsof(tp->languages); i++) + if (lp == tp->languages[i]) + { + n = tp->indices[i]; + break; + } + lc->index = MAKELCID(MAKELANGID(lp->index, n), SORT_DEFAULT); +#endif + lc->next = lcs; + lcs = lc; + return lc; +} + +/* + * return an Lc_t* for each locale in the tables + * one Lc_t is allocated on the first call with lc==0 + * this is freed when 0 returned + * the return value is not part of the lcmake() cache + */ + +typedef struct Lc_scan_s +{ + Lc_t lc; + Lc_attribute_list_t list; + int territory; + int language; + int attribute; + char buf[256]; +} Lc_scan_t; + +Lc_t* +lcscan(Lc_t* lc) +{ + register Lc_scan_t* ls; + + if (!(ls = (Lc_scan_t*)lc)) + { + if (!(ls = newof(0, Lc_scan_t, 1, 0))) + return 0; + ls->lc.code = ls->lc.name = ls->buf; + ls->territory = -1; + ls->language = elementsof(ls->lc.territory->languages); + ls->attribute = elementsof(ls->lc.language->attributes); + } + if (++ls->attribute >= elementsof(ls->lc.language->attributes) || !(ls->list.attribute = ls->lc.language->attributes[ls->attribute])) + { + if (++ls->language >= elementsof(ls->lc.territory->languages) || !(ls->lc.language = ls->lc.territory->languages[ls->language])) + { + if (++ls->territory >= (elementsof(territory) - 1)) + { + free(ls); + return 0; + } + ls->lc.territory = &territory[ls->territory]; + ls->lc.language = ls->lc.territory->languages[ls->language = 0]; + } + if (ls->lc.language) + { + ls->lc.charset = ls->lc.language->charset ? ls->lc.language->charset : &charset[0]; + ls->list.attribute = ls->lc.language->attributes[ls->attribute = 0]; + } + else + { + ls->lc.charset = &charset[0]; + ls->list.attribute = 0; + } + } + ls->lc.attributes = ls->list.attribute ? &ls->list : (Lc_attribute_list_t*)0; +#if _WINIX + if (!ls->lc.language || !ls->lc.language->index) + ls->lc.index = 0; + else + { + if ((!ls->list.attribute || !(ls->lc.index = ls->list.attribute->index)) && + (!ls->lc.territory || !(ls->lc.index = ls->lc.territory->indices[ls->language]))) + ls->lc.index = SUBLANG_DEFAULT; + ls->lc.index = MAKELCID(MAKELANGID(ls->lc.language->index, ls->lc.index), SORT_DEFAULT); + } +#endif + canonical(ls->lc.language, ls->lc.territory, ls->lc.charset, ls->lc.attributes, 0, ls->buf, sizeof(ls->buf)); + return (Lc_t*)ls; +} |