diff options
author | Roger A. Faulkner <Roger.Faulkner@Oracle.COM> | 2010-04-03 09:48:44 -0700 |
---|---|---|
committer | Roger A. Faulkner <Roger.Faulkner@Oracle.COM> | 2010-04-03 09:48:44 -0700 |
commit | 3e14f97f673e8a630f076077de35afdd43dc1587 (patch) | |
tree | 9828b6b676f58d7b27dd28e00202d3f3461a5cd8 /usr/src/lib/libcmd/common/join.c | |
parent | 4f60987df4dcaa54a88b596f861fbf4f3382c65e (diff) | |
download | illumos-joyent-3e14f97f673e8a630f076077de35afdd43dc1587.tar.gz |
6939349 RFE: Update ksh93 to ast-ksh.2010-03-09
6877392 ksh93 regresses 'uniq -c' performance
6887363 Korn shell 93 sometimes mishandles return value of its child process
6900314 (while true ; do true|true ; done) hang in ioctl() with SIGTTOU
6904557 wc no longer counts number of bytes correctly
6904575 cut -d with multibyte character no longer works
6904597 paste -d no longer works with multibyte characters
6904780 /usr/bin/cksum changed output in snv_128
6904870 uniq -s does not skip multibyte characters correctly
6904878 join -t no longer works with multibyte char separator
6907460 EXIT trap handlers are sometimes executed twice
6909579 libast getopt solaris compatibility broken
6920072 ksh93 tail -f, with unconditional .25s sleep and line parsing, about 37x slower than cat
6932124 mktemp in ksh93 is broken
Contributed by Olga Kryzhanovska <olga.kryzhanovska@gmail.com>
Diffstat (limited to 'usr/src/lib/libcmd/common/join.c')
-rw-r--r-- | usr/src/lib/libcmd/common/join.c | 280 |
1 files changed, 229 insertions, 51 deletions
diff --git a/usr/src/lib/libcmd/common/join.c b/usr/src/lib/libcmd/common/join.c index 909a8e8a29..4ef3c40860 100644 --- a/usr/src/lib/libcmd/common/join.c +++ b/usr/src/lib/libcmd/common/join.c @@ -1,7 +1,7 @@ /*********************************************************************** * * * This software is part of the ast package * -* Copyright (c) 1992-2009 AT&T Intellectual Property * +* Copyright (c) 1992-2010 AT&T Intellectual Property * * and is licensed under the * * Common Public License, Version 1.0 * * by AT&T Intellectual Property * @@ -28,7 +28,7 @@ */ static const char usage[] = -"[-?\n@(#)$Id: join (AT&T Research) 2009-08-01 $\n]" +"[-?\n@(#)$Id: join (AT&T Research) 2009-12-10 $\n]" USAGE_LICENSE "[+NAME?join - relational database operator]" "[+DESCRIPTION?\bjoin\b performs an \aequality join\a on the files \afile1\a " @@ -93,6 +93,21 @@ USAGE_LICENSE #include <cmd.h> #include <sfdisc.h> +#if _hdr_wchar && _hdr_wctype && _lib_iswctype + +#include <wchar.h> +#include <wctype.h> + +#else + +#include <ctype.h> + +#ifndef iswspace +#define iswspace(x) isspace(x) +#endif + +#endif + #define C_FILE1 001 #define C_FILE2 002 #define C_COMMON 004 @@ -104,8 +119,15 @@ USAGE_LICENSE #define S_DELIM 1 #define S_SPACE 2 #define S_NL 3 +#define S_WIDE 4 -typedef struct +typedef struct Field_s +{ + char* beg; + char* end; +} Field_t; + +typedef struct File_s { Sfio_t* iop; char* name; @@ -118,10 +140,10 @@ typedef struct int spaces; int hit; int discard; - char** fieldlist; + Field_t* fields; } File_t; -typedef struct +typedef struct Join_s { unsigned char state[1<<CHAR_BIT]; Sfio_t* outfile; @@ -129,9 +151,12 @@ typedef struct int outmode; int ooutmode; char* nullfield; + char* delimstr; int delim; + int delimlen; int buffered; int ignorecase; + int mb; char* same; int samesize; void* context; @@ -147,10 +172,10 @@ done(register Join_t* jp) sfclose(jp->file[1].iop); if (jp->outlist) free(jp->outlist); - if (jp->file[0].fieldlist) - free(jp->file[0].fieldlist); - if (jp->file[1].fieldlist) - free(jp->file[1].fieldlist); + if (jp->file[0].fields) + free(jp->file[0].fields); + if (jp->file[1].fields) + free(jp->file[1].fields); if (jp->same) free(jp->same); free(jp); @@ -160,14 +185,20 @@ static Join_t* init(void) { register Join_t* jp; + register int i; + setlocale(LC_ALL, ""); if (jp = newof(0, Join_t, 1, 0)) { + if (jp->mb = mbwide()) + for (i = 0x80; i <= 0xff; i++) + jp->state[i] = S_WIDE; jp->state[' '] = jp->state['\t'] = S_SPACE; + jp->state['\n'] = S_NL; jp->delim = -1; jp->nullfield = 0; - if (!(jp->file[0].fieldlist = newof(0, char*, NFIELD + 1, 0)) || - !(jp->file[1].fieldlist = newof(0, char*, NFIELD + 1, 0))) + if (!(jp->file[0].fields = newof(0, Field_t, NFIELD + 1, 0)) || + !(jp->file[1].fields = newof(0, Field_t, NFIELD + 1, 0))) { done(jp); return 0; @@ -265,10 +296,11 @@ getrec(Join_t* jp, int index, int discard) { register unsigned char* sp = jp->state; register File_t* fp = &jp->file[index]; - register char** ptr = fp->fieldlist; - register char** ptrmax = ptr + fp->maxfields; + register Field_t* field = fp->fields; + register Field_t* fieldmax = field + fp->maxfields; register char* cp; - register int n = 0; + register int n; + char* tp; if (sh_checksig(jp->context)) return 0; @@ -283,48 +315,148 @@ getrec(Join_t* jp, int index, int discard) } fp->recptr = cp; fp->reclen = sfvalue(fp->iop); - if (jp->delim=='\n') /* handle new-line delimiter specially */ + if (jp->delim == '\n') /* handle new-line delimiter specially */ { - *ptr++ = cp; + field->beg = cp; cp += fp->reclen; + field->end = cp - 1; + field++; } - else while (n!=S_NL) /* separate into fields */ - { - if (ptr >= ptrmax) - { - n = 2*fp->maxfields; - fp->fieldlist = newof(fp->fieldlist, char*, n + 1, 0); - ptr = fp->fieldlist + fp->maxfields; - fp->maxfields = n; - ptrmax = fp->fieldlist+n; - } - *ptr++ = cp; - if (jp->delim<=0 && sp[*(unsigned char*)cp]==S_SPACE) + else + do /* separate into fields */ { - fp->spaces = 1; - while (sp[*(unsigned char*)cp++]==S_SPACE); - cp--; - } - while ((n=sp[*(unsigned char*)cp++])==0); - } - *ptr = cp; - fp->nfields = ptr - fp->fieldlist; - if ((n=fp->field) < fp->nfields) + if (field >= fieldmax) + { + n = 2 * fp->maxfields; + fp->fields = newof(fp->fields, Field_t, n + 1, 0); + field = fp->fields + fp->maxfields; + fp->maxfields = n; + fieldmax = fp->fields + n; + } + field->beg = cp; + if (jp->delim == -1) + { + switch (sp[*(unsigned char*)cp]) + { + case S_SPACE: + cp++; + break; + case S_WIDE: + tp = cp; + if (iswspace(mbchar(tp))) + { + cp = tp; + break; + } + /*FALLTHROUGH*/ + default: + goto next; + } + fp->spaces = 1; + if (jp->mb) + for (;;) + { + switch (sp[*(unsigned char*)cp++]) + { + case S_SPACE: + continue; + case S_WIDE: + tp = cp - 1; + if (iswspace(mbchar(tp))) + { + cp = tp; + continue; + } + break; + } + break; + } + else + while (sp[*(unsigned char*)cp++]==S_SPACE); + cp--; + } + next: + if (jp->mb) + { + for (;;) + { + tp = cp; + switch (n = sp[*(unsigned char*)cp++]) + { + case 0: + continue; + case S_WIDE: + cp--; + n = mbchar(cp); + if (n == jp->delim) + { + n = S_DELIM; + break; + } + if (jp->delim == -1 && iswspace(n)) + { + n = S_SPACE; + break; + } + continue; + } + break; + } + field->end = tp; + } + else + { + while (!(n = sp[*(unsigned char*)cp++])); + field->end = cp - 1; + } + field++; + } while (n != S_NL); + fp->nfields = field - fp->fields; + if ((n = fp->field) < fp->nfields) { - cp = fp->fieldlist[n]; + cp = fp->fields[n].beg; /* eliminate leading spaces */ if (fp->spaces) { - while (sp[*(unsigned char*)cp++]==S_SPACE); + if (jp->mb) + for (;;) + { + switch (sp[*(unsigned char*)cp++]) + { + case S_SPACE: + continue; + case S_WIDE: + tp = cp - 1; + if (iswspace(mbchar(tp))) + { + cp = tp; + continue; + } + break; + } + break; + } + else + while (sp[*(unsigned char*)cp++]==S_SPACE); cp--; } - fp->fieldlen = (fp->fieldlist[n+1]-cp)-1; + fp->fieldlen = fp->fields[n].end - cp; return (unsigned char*)cp; } fp->fieldlen = 0; return (unsigned char*)""; } +static unsigned char* +_trace_getrec(Join_t* jp, int index, int discard) +{ + unsigned char* r; + + r = getrec(jp, index, discard); + return r; +} +#define getrec _trace_getrec + #if DEBUG_TRACE static unsigned char* u1,u2,u3; #define getrec(p,n,d) (u1 = getrec(p, n, d), sfprintf(sfstdout, "[G%d#%d@%I*d:%-.8s]", __LINE__, n, sizeof(Sfoff_t), sftell(p->file[n].iop), u1), u1) @@ -341,42 +473,78 @@ outfield(Join_t* jp, int index, register int n, int last) register char* cpmax; register int size; register Sfio_t* iop = jp->outfile; + char* tp; if (n < fp->nfields) { - cp = fp->fieldlist[n]; - cpmax = fp->fieldlist[n+1]; + cp = fp->fields[n].beg; + cpmax = fp->fields[n].end + 1; } else cp = 0; - if ((n=jp->delim)<=0) + if ((n = jp->delim) == -1) { if (cp && fp->spaces) { + register unsigned char* sp = jp->state; + /*eliminate leading spaces */ - while (jp->state[*(unsigned char*)cp++]==S_SPACE); + if (jp->mb) + for (;;) + { + switch (sp[*(unsigned char*)cp++]) + { + case S_SPACE: + continue; + case S_WIDE: + tp = cp - 1; + if (iswspace(mbchar(tp))) + { + cp = tp; + continue; + } + break; + } + break; + } + else + while (sp[*(unsigned char*)cp++]==S_SPACE); cp--; } n = ' '; } + else if (jp->delimstr) + n = -1; if (last) n = '\n'; if (cp) - size = cpmax-cp; + size = cpmax - cp; else size = 0; - if (size<=1) + if (n == -1) + { + if (size<=1) + { + if (jp->nullfield && sfputr(iop, jp->nullfield, -1) < 0) + return -1; + } + else if (sfwrite(iop, cp, size) < 0) + return -1; + if (sfwrite(iop, jp->delimstr, jp->delimlen) < 0) + return -1; + } + else if (size <= 1) { if (!jp->nullfield) - sfputc(iop,n); - else if (sfputr(iop,jp->nullfield,n) < 0) + sfputc(iop, n); + else if (sfputr(iop, jp->nullfield, n) < 0) return -1; } else { last = cp[size-1]; cp[size-1] = n; - if (sfwrite(iop,cp,size) < 0) + if (sfwrite(iop, cp, size) < 0) return -1; cp[size-1] = last; } @@ -735,7 +903,18 @@ b_join(int argc, char** argv, void* context) continue; case 't': jp->state[' '] = jp->state['\t'] = 0; - n= *(unsigned char*)opt_info.arg; + if (jp->mb) + { + cp = opt_info.arg; + jp->delim = mbchar(cp); + if ((n = cp - opt_info.arg) > 1) + { + jp->delimlen = n; + jp->delimstr = opt_info.arg; + continue; + } + } + n = *(unsigned char*)opt_info.arg; jp->state[n] = S_DELIM; jp->delim = n; continue; @@ -802,7 +981,6 @@ b_join(int argc, char** argv, void* context) sfsetbuf(jp->file[0].iop, jp->file[0].iop, SF_UNBOUND); sfsetbuf(jp->file[1].iop, jp->file[1].iop, SF_UNBOUND); } - jp->state['\n'] = S_NL; jp->outfile = sfstdout; if (!jp->outlist) jp->nullfield = 0; |