diff options
author | chin <none@none> | 2007-08-17 12:01:52 -0700 |
---|---|---|
committer | chin <none@none> | 2007-08-17 12:01:52 -0700 |
commit | da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968 (patch) | |
tree | 5280d3b78e289fe9551371ab6e7f15ef9944ea14 /usr/src/lib/libast/common/regex/regcoll.c | |
parent | 073dbf9103ef2a2b05d8a16e2d26db04e0374b0e (diff) | |
download | illumos-gate-da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968.tar.gz |
6437624 RFE: Add ksh93 (as /usr/bin/ksh93) and libshell.so to OS/Net
6505835 AST tools and library (libpp) required for creating l10n messages for ksh93
PSARC/2006/550 Korn Shell 93 Integration
PSARC/2006/587 /etc/ksh.kshrc for ksh93
PSARC/2007/035 ksh93 Amendments
Contributed by Roland Mainz <roland.mainz@nrubsig.org>
--HG--
rename : usr/src/lib/libcmd/common/mapfile-vers => deleted_files/usr/src/lib/libcmd/common/mapfile-vers
rename : usr/src/lib/libcmd/common/placeholder.c => deleted_files/usr/src/lib/libcmd/common/placeholder.c
Diffstat (limited to 'usr/src/lib/libast/common/regex/regcoll.c')
-rw-r--r-- | usr/src/lib/libast/common/regex/regcoll.c | 297 |
1 files changed, 297 insertions, 0 deletions
diff --git a/usr/src/lib/libast/common/regex/regcoll.c b/usr/src/lib/libast/common/regex/regcoll.c new file mode 100644 index 0000000000..29bb85ffc0 --- /dev/null +++ b/usr/src/lib/libast/common/regex/regcoll.c @@ -0,0 +1,297 @@ +/*********************************************************************** +* * +* This software is part of the ast package * +* Copyright (c) 1985-2007 AT&T Knowledge Ventures * +* and is licensed under the * +* Common Public License, Version 1.0 * +* by AT&T Knowledge Ventures * +* * +* A copy of the License is available at * +* http://www.opensource.org/licenses/cpl1.0.txt * +* (with md5 checksum 059e8cd6165cb4c31e351f2b69388fd9) * +* * +* Information and Software Systems Research * +* AT&T Research * +* Florham Park NJ * +* * +* Glenn Fowler <gsf@research.att.com> * +* David Korn <dgk@research.att.com> * +* Phong Vo <kpv@research.att.com> * +* * +***********************************************************************/ +#pragma prototyped +/* + * regex collation symbol support + */ + +#include "reglib.h" + +#include <ccode.h> + +#ifndef UCS_BYTE +#define UCS_BYTE 1 +#endif + +#include "ucs_names.h" + +typedef struct Ucs_map_s +{ + Ucs_attr_t attr[3]; + Ucs_code_t code; + const char* name; + Dtlink_t link; + struct Ucs_map_s* next; +} Ucs_map_t; + +#define setattr(a,i) ((a)[(i)>>5]|=(1<<((i)&((1<<5)-1)))) +#define tstattr(a,i) ((a)[(i)>>5]&(1<<((i)&((1<<5)-1)))) +#define clrattr(a,i) ((a)[(i)>>5]&=~(1<<((i)&((1<<5)-1)))) + +static struct Local_s +{ + int fatal; + Dt_t* attrs; + Dt_t* names; + Dtdisc_t dtdisc; +#if CC_NATIVE != CC_ASCII + unsigned char* a2n; +#endif +} local; + +/* + * initialize the writeable tables from the readonly data + * the tables are big enough to be concerned about text vs. data vs. bss + * UCS_BYTE==0 100K + * UCS_BYTE==1 20K + */ + +static int +initialize(void) +{ + register int i; + register Ucs_map_t* a; + register Ucs_map_t* w; + + if (local.fatal) + return -1; + local.dtdisc.link = offsetof(Ucs_map_t, link); + local.dtdisc.key = offsetof(Ucs_map_t, name); + local.dtdisc.size = -1; + if (!(w = (Ucs_map_t*)malloc(sizeof(Ucs_map_t) * (elementsof(ucs_attrs) + elementsof(ucs_names))))) + { + local.fatal = 1; + return -1; + } + if (!(local.attrs = dtopen(&local.dtdisc, Dttree))) + { + free(w); + local.fatal = 1; + return -1; + } + if (!(local.names = dtopen(&local.dtdisc, Dttree))) + { + free(w); + dtclose(local.attrs); + local.fatal = 1; + return -1; + } + for (i = 0; i < elementsof(ucs_attrs); i++, w++) + { + memcpy(w, &ucs_attrs[i], offsetof(Ucs_dat_t, table)); + w->name = ucs_strings[ucs_attrs[i].table] + ucs_attrs[i].index; + w->next = 0; + dtinsert(local.attrs, w); + } + for (i = 0; i < elementsof(ucs_names); i++, w++) + { + memcpy(w, &ucs_names[i], offsetof(Ucs_dat_t, table)); + w->name = ucs_strings[ucs_names[i].table] + ucs_names[i].index; + w->next = 0; + if (a = (Ucs_map_t*)dtsearch(local.names, w)) + { + while (a->next) + a = a->next; + a->next = w; + } + else + dtinsert(local.names, w); + } +#if CC_NATIVE != CC_ASCII + local.a2n = ccmap(CC_ASCII, CC_NATIVE); +#endif + return 0; +} + +/* + * return the collating symbol delimited by [c c], where c is either '=' or '.' + * s points to the first char after the initial [ + * if e!=0 it is set to point to the next char in s on return + * + * the collating symbol is converted to multibyte in <buf,size> + * the return value is: + * -1 syntax error or buf not large enough + * >=0 size with 0-terminated mb collation element + * or ligature value in buf + */ + +int +regcollate(register const char* s, char** e, char* buf, int size) +{ + register int c; + register char* u; + register char* b; + register char* x; + register Ucs_map_t* a; + Ucs_map_t* z; + const char* t; + const char* v; + int n; + int r; + int ul; + int term; + wchar_t w[2]; + Ucs_attr_t attr[3]; + + if (size < 2) + r = -1; + else if ((term = *s++) != '.' && term != '=') + { + s--; + r = -1; + } + else if (*s == term && *(s + 1) == ']') + r = -1; + else + { + t = s; + mbchar(s); + if ((n = (s - t)) == 1) + { + if (*s == term && *(s + 1) == ']') + { + s += 2; + r = -1; + } + else + { + if (!local.attrs && initialize()) + return -1; + attr[0] = attr[1] = attr[2] = 0; + ul = 0; + b = buf; + x = buf + size - 2; + r = 1; + s = t; + do + { + v = s; + u = b; + for (;;) + { + if (!(c = *s++)) + return -1; + if (c == term) + { + if (!(c = *s++)) + return -1; + if (c != term) + { + if (c != ']') + return -1; + r = -1; + break; + } + } + if (c == ' ' || c == '-' && u > b && *s != ' ' && *s != '-') + break; + if (isupper(c)) + c = tolower(c); + if (u > x) + break; + *u++ = c; + } + *u = 0; + if (a = (Ucs_map_t*)dtmatch(local.attrs, b)) + setattr(attr, a->code); + else + { + if (u < x) + *u++ = ' '; + if (b == buf) + { + if (isupper(*v)) + ul = UCS_UC; + else if (islower(*v)) + ul = UCS_LC; + } + b = u; + } + } while (r > 0); + if (b > buf && *(b - 1) == ' ') + b--; + *b = 0; + attr[0] &= ~((Ucs_attr_t)1); + if (ul) + { + if (tstattr(attr, UCS_UC) || tstattr(attr, UCS_LC)) + ul = 0; + else + setattr(attr, ul); + } + if (z = (Ucs_map_t*)dtmatch(local.names, buf)) + for(;;) + { + for (a = z; a; a = a->next) + if ((attr[0] & a->attr[0]) == attr[0] && (attr[1] & a->attr[1]) == attr[1] && (attr[2] & a->attr[2]) == attr[2]) + { + if (a->code <= 0xff) + { +#if CC_NATIVE != CC_ASCII + buf[0] = local.a2n[a->code]; +#else + buf[0] = a->code; +#endif + buf[r = 1] = 0; + ul = 0; + break; + } + w[0] = a->code; + w[1] = 0; + if ((r = wcstombs(buf, w, size)) > 0) + { + r--; + ul = 0; + } + break; + } + if (!ul) + break; + clrattr(attr, ul); + ul = 0; + } + } + if (r < 0) + { + if ((r = s - t - 2) > (size - 1)) + return -1; + memcpy(buf, t, r); + buf[r] = 0; + } + } + else if (*s++ != term || *s++ != ']') + { + s--; + r = -1; + } + else if (n > (size - 1)) + r = -1; + else + { + memcpy(buf, t, n); + buf[r = n] = 0; + } + } + if (e) + *e = (char*)s; + return r; +} |