summaryrefslogtreecommitdiff
path: root/usr/src/lib/libcmd/common/join.c
diff options
context:
space:
mode:
authorRoger A. Faulkner <Roger.Faulkner@Oracle.COM>2010-04-03 09:48:44 -0700
committerRoger A. Faulkner <Roger.Faulkner@Oracle.COM>2010-04-03 09:48:44 -0700
commit3e14f97f673e8a630f076077de35afdd43dc1587 (patch)
tree9828b6b676f58d7b27dd28e00202d3f3461a5cd8 /usr/src/lib/libcmd/common/join.c
parent4f60987df4dcaa54a88b596f861fbf4f3382c65e (diff)
downloadillumos-joyent-3e14f97f673e8a630f076077de35afdd43dc1587.tar.gz
6939349 RFE: Update ksh93 to ast-ksh.2010-03-09
6877392 ksh93 regresses 'uniq -c' performance 6887363 Korn shell 93 sometimes mishandles return value of its child process 6900314 (while true ; do true|true ; done) hang in ioctl() with SIGTTOU 6904557 wc no longer counts number of bytes correctly 6904575 cut -d with multibyte character no longer works 6904597 paste -d no longer works with multibyte characters 6904780 /usr/bin/cksum changed output in snv_128 6904870 uniq -s does not skip multibyte characters correctly 6904878 join -t no longer works with multibyte char separator 6907460 EXIT trap handlers are sometimes executed twice 6909579 libast getopt solaris compatibility broken 6920072 ksh93 tail -f, with unconditional .25s sleep and line parsing, about 37x slower than cat 6932124 mktemp in ksh93 is broken Contributed by Olga Kryzhanovska <olga.kryzhanovska@gmail.com>
Diffstat (limited to 'usr/src/lib/libcmd/common/join.c')
-rw-r--r--usr/src/lib/libcmd/common/join.c280
1 files changed, 229 insertions, 51 deletions
diff --git a/usr/src/lib/libcmd/common/join.c b/usr/src/lib/libcmd/common/join.c
index 909a8e8a29..4ef3c40860 100644
--- a/usr/src/lib/libcmd/common/join.c
+++ b/usr/src/lib/libcmd/common/join.c
@@ -1,7 +1,7 @@
/***********************************************************************
* *
* This software is part of the ast package *
-* Copyright (c) 1992-2009 AT&T Intellectual Property *
+* Copyright (c) 1992-2010 AT&T Intellectual Property *
* and is licensed under the *
* Common Public License, Version 1.0 *
* by AT&T Intellectual Property *
@@ -28,7 +28,7 @@
*/
static const char usage[] =
-"[-?\n@(#)$Id: join (AT&T Research) 2009-08-01 $\n]"
+"[-?\n@(#)$Id: join (AT&T Research) 2009-12-10 $\n]"
USAGE_LICENSE
"[+NAME?join - relational database operator]"
"[+DESCRIPTION?\bjoin\b performs an \aequality join\a on the files \afile1\a "
@@ -93,6 +93,21 @@ USAGE_LICENSE
#include <cmd.h>
#include <sfdisc.h>
+#if _hdr_wchar && _hdr_wctype && _lib_iswctype
+
+#include <wchar.h>
+#include <wctype.h>
+
+#else
+
+#include <ctype.h>
+
+#ifndef iswspace
+#define iswspace(x) isspace(x)
+#endif
+
+#endif
+
#define C_FILE1 001
#define C_FILE2 002
#define C_COMMON 004
@@ -104,8 +119,15 @@ USAGE_LICENSE
#define S_DELIM 1
#define S_SPACE 2
#define S_NL 3
+#define S_WIDE 4
-typedef struct
+typedef struct Field_s
+{
+ char* beg;
+ char* end;
+} Field_t;
+
+typedef struct File_s
{
Sfio_t* iop;
char* name;
@@ -118,10 +140,10 @@ typedef struct
int spaces;
int hit;
int discard;
- char** fieldlist;
+ Field_t* fields;
} File_t;
-typedef struct
+typedef struct Join_s
{
unsigned char state[1<<CHAR_BIT];
Sfio_t* outfile;
@@ -129,9 +151,12 @@ typedef struct
int outmode;
int ooutmode;
char* nullfield;
+ char* delimstr;
int delim;
+ int delimlen;
int buffered;
int ignorecase;
+ int mb;
char* same;
int samesize;
void* context;
@@ -147,10 +172,10 @@ done(register Join_t* jp)
sfclose(jp->file[1].iop);
if (jp->outlist)
free(jp->outlist);
- if (jp->file[0].fieldlist)
- free(jp->file[0].fieldlist);
- if (jp->file[1].fieldlist)
- free(jp->file[1].fieldlist);
+ if (jp->file[0].fields)
+ free(jp->file[0].fields);
+ if (jp->file[1].fields)
+ free(jp->file[1].fields);
if (jp->same)
free(jp->same);
free(jp);
@@ -160,14 +185,20 @@ static Join_t*
init(void)
{
register Join_t* jp;
+ register int i;
+ setlocale(LC_ALL, "");
if (jp = newof(0, Join_t, 1, 0))
{
+ if (jp->mb = mbwide())
+ for (i = 0x80; i <= 0xff; i++)
+ jp->state[i] = S_WIDE;
jp->state[' '] = jp->state['\t'] = S_SPACE;
+ jp->state['\n'] = S_NL;
jp->delim = -1;
jp->nullfield = 0;
- if (!(jp->file[0].fieldlist = newof(0, char*, NFIELD + 1, 0)) ||
- !(jp->file[1].fieldlist = newof(0, char*, NFIELD + 1, 0)))
+ if (!(jp->file[0].fields = newof(0, Field_t, NFIELD + 1, 0)) ||
+ !(jp->file[1].fields = newof(0, Field_t, NFIELD + 1, 0)))
{
done(jp);
return 0;
@@ -265,10 +296,11 @@ getrec(Join_t* jp, int index, int discard)
{
register unsigned char* sp = jp->state;
register File_t* fp = &jp->file[index];
- register char** ptr = fp->fieldlist;
- register char** ptrmax = ptr + fp->maxfields;
+ register Field_t* field = fp->fields;
+ register Field_t* fieldmax = field + fp->maxfields;
register char* cp;
- register int n = 0;
+ register int n;
+ char* tp;
if (sh_checksig(jp->context))
return 0;
@@ -283,48 +315,148 @@ getrec(Join_t* jp, int index, int discard)
}
fp->recptr = cp;
fp->reclen = sfvalue(fp->iop);
- if (jp->delim=='\n') /* handle new-line delimiter specially */
+ if (jp->delim == '\n') /* handle new-line delimiter specially */
{
- *ptr++ = cp;
+ field->beg = cp;
cp += fp->reclen;
+ field->end = cp - 1;
+ field++;
}
- else while (n!=S_NL) /* separate into fields */
- {
- if (ptr >= ptrmax)
- {
- n = 2*fp->maxfields;
- fp->fieldlist = newof(fp->fieldlist, char*, n + 1, 0);
- ptr = fp->fieldlist + fp->maxfields;
- fp->maxfields = n;
- ptrmax = fp->fieldlist+n;
- }
- *ptr++ = cp;
- if (jp->delim<=0 && sp[*(unsigned char*)cp]==S_SPACE)
+ else
+ do /* separate into fields */
{
- fp->spaces = 1;
- while (sp[*(unsigned char*)cp++]==S_SPACE);
- cp--;
- }
- while ((n=sp[*(unsigned char*)cp++])==0);
- }
- *ptr = cp;
- fp->nfields = ptr - fp->fieldlist;
- if ((n=fp->field) < fp->nfields)
+ if (field >= fieldmax)
+ {
+ n = 2 * fp->maxfields;
+ fp->fields = newof(fp->fields, Field_t, n + 1, 0);
+ field = fp->fields + fp->maxfields;
+ fp->maxfields = n;
+ fieldmax = fp->fields + n;
+ }
+ field->beg = cp;
+ if (jp->delim == -1)
+ {
+ switch (sp[*(unsigned char*)cp])
+ {
+ case S_SPACE:
+ cp++;
+ break;
+ case S_WIDE:
+ tp = cp;
+ if (iswspace(mbchar(tp)))
+ {
+ cp = tp;
+ break;
+ }
+ /*FALLTHROUGH*/
+ default:
+ goto next;
+ }
+ fp->spaces = 1;
+ if (jp->mb)
+ for (;;)
+ {
+ switch (sp[*(unsigned char*)cp++])
+ {
+ case S_SPACE:
+ continue;
+ case S_WIDE:
+ tp = cp - 1;
+ if (iswspace(mbchar(tp)))
+ {
+ cp = tp;
+ continue;
+ }
+ break;
+ }
+ break;
+ }
+ else
+ while (sp[*(unsigned char*)cp++]==S_SPACE);
+ cp--;
+ }
+ next:
+ if (jp->mb)
+ {
+ for (;;)
+ {
+ tp = cp;
+ switch (n = sp[*(unsigned char*)cp++])
+ {
+ case 0:
+ continue;
+ case S_WIDE:
+ cp--;
+ n = mbchar(cp);
+ if (n == jp->delim)
+ {
+ n = S_DELIM;
+ break;
+ }
+ if (jp->delim == -1 && iswspace(n))
+ {
+ n = S_SPACE;
+ break;
+ }
+ continue;
+ }
+ break;
+ }
+ field->end = tp;
+ }
+ else
+ {
+ while (!(n = sp[*(unsigned char*)cp++]));
+ field->end = cp - 1;
+ }
+ field++;
+ } while (n != S_NL);
+ fp->nfields = field - fp->fields;
+ if ((n = fp->field) < fp->nfields)
{
- cp = fp->fieldlist[n];
+ cp = fp->fields[n].beg;
/* eliminate leading spaces */
if (fp->spaces)
{
- while (sp[*(unsigned char*)cp++]==S_SPACE);
+ if (jp->mb)
+ for (;;)
+ {
+ switch (sp[*(unsigned char*)cp++])
+ {
+ case S_SPACE:
+ continue;
+ case S_WIDE:
+ tp = cp - 1;
+ if (iswspace(mbchar(tp)))
+ {
+ cp = tp;
+ continue;
+ }
+ break;
+ }
+ break;
+ }
+ else
+ while (sp[*(unsigned char*)cp++]==S_SPACE);
cp--;
}
- fp->fieldlen = (fp->fieldlist[n+1]-cp)-1;
+ fp->fieldlen = fp->fields[n].end - cp;
return (unsigned char*)cp;
}
fp->fieldlen = 0;
return (unsigned char*)"";
}
+static unsigned char*
+_trace_getrec(Join_t* jp, int index, int discard)
+{
+ unsigned char* r;
+
+ r = getrec(jp, index, discard);
+ return r;
+}
+#define getrec _trace_getrec
+
#if DEBUG_TRACE
static unsigned char* u1,u2,u3;
#define getrec(p,n,d) (u1 = getrec(p, n, d), sfprintf(sfstdout, "[G%d#%d@%I*d:%-.8s]", __LINE__, n, sizeof(Sfoff_t), sftell(p->file[n].iop), u1), u1)
@@ -341,42 +473,78 @@ outfield(Join_t* jp, int index, register int n, int last)
register char* cpmax;
register int size;
register Sfio_t* iop = jp->outfile;
+ char* tp;
if (n < fp->nfields)
{
- cp = fp->fieldlist[n];
- cpmax = fp->fieldlist[n+1];
+ cp = fp->fields[n].beg;
+ cpmax = fp->fields[n].end + 1;
}
else
cp = 0;
- if ((n=jp->delim)<=0)
+ if ((n = jp->delim) == -1)
{
if (cp && fp->spaces)
{
+ register unsigned char* sp = jp->state;
+
/*eliminate leading spaces */
- while (jp->state[*(unsigned char*)cp++]==S_SPACE);
+ if (jp->mb)
+ for (;;)
+ {
+ switch (sp[*(unsigned char*)cp++])
+ {
+ case S_SPACE:
+ continue;
+ case S_WIDE:
+ tp = cp - 1;
+ if (iswspace(mbchar(tp)))
+ {
+ cp = tp;
+ continue;
+ }
+ break;
+ }
+ break;
+ }
+ else
+ while (sp[*(unsigned char*)cp++]==S_SPACE);
cp--;
}
n = ' ';
}
+ else if (jp->delimstr)
+ n = -1;
if (last)
n = '\n';
if (cp)
- size = cpmax-cp;
+ size = cpmax - cp;
else
size = 0;
- if (size<=1)
+ if (n == -1)
+ {
+ if (size<=1)
+ {
+ if (jp->nullfield && sfputr(iop, jp->nullfield, -1) < 0)
+ return -1;
+ }
+ else if (sfwrite(iop, cp, size) < 0)
+ return -1;
+ if (sfwrite(iop, jp->delimstr, jp->delimlen) < 0)
+ return -1;
+ }
+ else if (size <= 1)
{
if (!jp->nullfield)
- sfputc(iop,n);
- else if (sfputr(iop,jp->nullfield,n) < 0)
+ sfputc(iop, n);
+ else if (sfputr(iop, jp->nullfield, n) < 0)
return -1;
}
else
{
last = cp[size-1];
cp[size-1] = n;
- if (sfwrite(iop,cp,size) < 0)
+ if (sfwrite(iop, cp, size) < 0)
return -1;
cp[size-1] = last;
}
@@ -735,7 +903,18 @@ b_join(int argc, char** argv, void* context)
continue;
case 't':
jp->state[' '] = jp->state['\t'] = 0;
- n= *(unsigned char*)opt_info.arg;
+ if (jp->mb)
+ {
+ cp = opt_info.arg;
+ jp->delim = mbchar(cp);
+ if ((n = cp - opt_info.arg) > 1)
+ {
+ jp->delimlen = n;
+ jp->delimstr = opt_info.arg;
+ continue;
+ }
+ }
+ n = *(unsigned char*)opt_info.arg;
jp->state[n] = S_DELIM;
jp->delim = n;
continue;
@@ -802,7 +981,6 @@ b_join(int argc, char** argv, void* context)
sfsetbuf(jp->file[0].iop, jp->file[0].iop, SF_UNBOUND);
sfsetbuf(jp->file[1].iop, jp->file[1].iop, SF_UNBOUND);
}
- jp->state['\n'] = S_NL;
jp->outfile = sfstdout;
if (!jp->outlist)
jp->nullfield = 0;