diff options
author | chin <none@none> | 2007-08-17 12:01:52 -0700 |
---|---|---|
committer | chin <none@none> | 2007-08-17 12:01:52 -0700 |
commit | da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968 (patch) | |
tree | 5280d3b78e289fe9551371ab6e7f15ef9944ea14 /usr/src/lib/libcmd/common/cut.c | |
parent | 073dbf9103ef2a2b05d8a16e2d26db04e0374b0e (diff) | |
download | illumos-joyent-da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968.tar.gz |
6437624 RFE: Add ksh93 (as /usr/bin/ksh93) and libshell.so to OS/Net
6505835 AST tools and library (libpp) required for creating l10n messages for ksh93
PSARC/2006/550 Korn Shell 93 Integration
PSARC/2006/587 /etc/ksh.kshrc for ksh93
PSARC/2007/035 ksh93 Amendments
Contributed by Roland Mainz <roland.mainz@nrubsig.org>
--HG--
rename : usr/src/lib/libcmd/common/mapfile-vers => deleted_files/usr/src/lib/libcmd/common/mapfile-vers
rename : usr/src/lib/libcmd/common/placeholder.c => deleted_files/usr/src/lib/libcmd/common/placeholder.c
Diffstat (limited to 'usr/src/lib/libcmd/common/cut.c')
-rw-r--r-- | usr/src/lib/libcmd/common/cut.c | 512 |
1 files changed, 512 insertions, 0 deletions
diff --git a/usr/src/lib/libcmd/common/cut.c b/usr/src/lib/libcmd/common/cut.c new file mode 100644 index 0000000000..732d1784da --- /dev/null +++ b/usr/src/lib/libcmd/common/cut.c @@ -0,0 +1,512 @@ +/*********************************************************************** +* * +* This software is part of the ast package * +* Copyright (c) 1992-2007 AT&T Knowledge Ventures * +* and is licensed under the * +* Common Public License, Version 1.0 * +* by AT&T Knowledge Ventures * +* * +* A copy of the License is available at * +* http://www.opensource.org/licenses/cpl1.0.txt * +* (with md5 checksum 059e8cd6165cb4c31e351f2b69388fd9) * +* * +* Information and Software Systems Research * +* AT&T Research * +* Florham Park NJ * +* * +* Glenn Fowler <gsf@research.att.com> * +* David Korn <dgk@research.att.com> * +* * +***********************************************************************/ +#pragma prototyped +/* + * David Korn + * AT&T Bell Laboratories + * + * cut [-sN] [-f flist] [-c clist] [-d delim] [-D delim] [-r reclen] [file] ... + * + * cut fields or columns from fields from a file + */ + +static const char usage[] = +"[-?\n@(#)$Id: cut (AT&T Research) 2007-01-23 $\n]" +USAGE_LICENSE +"[+NAME?cut - cut out selected columns or fields of each line of a file]" +"[+DESCRIPTION?\bcut\b bytes, characters, or character-delimited fields " + "from one or more files, contatenating them on standard output.]" +"[+?The option argument \alist\a is a comma-separated or blank-separated " + "list of positive numbers and ranges. Ranges can be of three " + "forms. The first is two positive integers separated by a hyphen " + "(\alow\a\b-\b\ahigh\a), which represents all fields from \alow\a to " + "\ahigh\a. The second is a positive number preceded by a hyphen " + "(\b-\b\ahigh\a), which represents all fields from field \b1\b to " + "\ahigh\a. The last is a positive number followed by a hyphen " + "(\alow\a\b-\b), which represents all fields from \alow\a to the " + "last field, inclusive. Elements in the \alist\a can be repeated, " + "can overlap, and can appear in any order. The order of the " + "output is that of the input.]" +"[+?One and only one of \b-b\b, \b-c\b, or \b-f\b must be specified.]" +"[+?If no \afile\a is given, or if the \afile\a is \b-\b, \bcut\b " + "cuts from standard input. The start of the file is defined " + "as the current offset.]" +"[b:bytes]:[list?\bcut\b based on a list of bytes.]" +"[c:characters]:[list?\bcut\b based on a list of characters.]" +"[d:delimiter]:[delim?The field character for the \b-f\b option is set " + "to \adelim\a. The default is the \btab\b character.]" +"[f:fields]:[list?\bcut\b based on fields separated by the delimiter " + "character specified with the \b-d\b optiion.]" +"[n:nosplit?Do not split characters. Currently ignored.]" +"[R|r:reclen]#[reclen?If \areclen\a > 0, the input will be read as fixed length " + "records of length \areclen\a when used with the \b-b\b or \b-c\b " + "option.]" +"[s:suppress|only-delimited?Suppress lines with no delimiter characters, " + "when used with the \b-f\b option. By default, lines with no " + "delimiters will be passsed in untouched.]" +"[D:line-delimeter|output-delimiter]:[ldelim?The line delimiter character for " + "the \b-f\b option is set to \aldelim\a. The default is the " + "\bnewline\b character.]" +"[N:nonewline?Do not output new-lines at end of each record when used " + "with the \b-b\b or \b-c\b option.]" +"\n" +"\n[file ...]\n" +"\n" +"[+EXIT STATUS?]{" + "[+0?All files processed successfully.]" + "[+>0?One or more files failed to open or could not be read.]" +"}" +"[+SEE ALSO?\bpaste\b(1), \bgrep\b(1)]" +; + +#include <cmd.h> +#include <ctype.h> + +typedef struct Last_s +{ + int seqno; + int seq; + int wdelim; + int ldelim; +} Last_t; + +typedef struct Cut_s +{ + int cflag; + int sflag; + int nlflag; + int wdelim; + int ldelim; + int seqno; + int reclen; + signed char space[UCHAR_MAX]; + Last_t last; + int list[2]; /* NOTE: must be last member */ +} Cut_t; + +#define HUGE (1<<14) +#define BLOCK 8*1024 +#define C_BYTES 1 +#define C_CHARS 2 +#define C_FIELDS 4 +#define C_SUPRESS 8 +#define C_NOCHOP 16 +#define C_NONEWLINE 32 + +/* + * compare the first of an array of integers + */ + +static int mycomp(register const void *a,register const void *b) +{ + return(*((int*)a) - *((int*)b)); +} + +static Cut_t *cutinit(int mode,char *str,int wdelim,int ldelim,size_t reclen) +{ + register int *lp, c, n=0; + register int range = 0; + register char *cp = str; + Cut_t *cuthdr; + if (!(cuthdr = (Cut_t*)stakalloc(sizeof(Cut_t)+strlen(cp)*sizeof(int)))) + error(ERROR_exit(1), "out of space"); + memset(cuthdr->space, 0, sizeof(cuthdr->space)); + cuthdr->last.seqno = 0; + cuthdr->last.seq = 0; + cuthdr->last.wdelim = 0; + cuthdr->last.ldelim = '\n'; + cuthdr->cflag = ((mode&C_CHARS)!=0 && mbwide()); + cuthdr->sflag = ((mode&C_SUPRESS)!=0); + cuthdr->nlflag = ((mode&C_NONEWLINE)!=0); + cuthdr->wdelim = wdelim; + cuthdr->ldelim = ldelim; + cuthdr->reclen = reclen; + cuthdr->seqno = ++cuthdr->last.seqno; + lp = cuthdr->list; + while(1) switch(c= *cp++) + { + case ' ': + case '\t': + while(*cp==' ' || *cp=='\t') + cp++; + case 0: + case ',': + if(range) + { + --range; + if((n = (n==0?HUGE:n-range)) < 0) + error(ERROR_exit(1),"invalid range for c/f option"); + *lp++ = range; + *lp++ = n; + } + else + { + *lp++ = --n; + *lp++ = 1; + } + if(c==0) + { + register int *dp; + *lp = HUGE; + n = 1 + (lp-cuthdr->list)/2; + qsort(lp=cuthdr->list,n,2*sizeof(*lp),mycomp); + /* eliminate overlapping regions */ + for(n=0,range= -2,dp=lp; *lp!=HUGE; lp+=2) + { + if(lp[0] <= range) + { + if(lp[1]==HUGE) + { + dp[-1] = HUGE; + break; + } + if((c = lp[0]+lp[1]-range)>0) + { + range += c; + dp[-1] += c; + } + } + else + { + range = *dp++ = lp[0]; + if(lp[1]==HUGE) + { + *dp++ = HUGE; + break; + } + range += (*dp++ = lp[1]); + } + } + *dp = HUGE; + lp = cuthdr->list; + /* convert ranges into gaps */ + for(n=0; *lp!=HUGE; lp+=2) + { + c = *lp; + *lp -= n; + n = c+lp[1]; + } + return(cuthdr); + } + n = range = 0; + break; + + case '-': + if(range) + error(ERROR_exit(1),"bad list for c/f option"); + range = n?n:1; + n = 0; + break; + + default: + if(!isdigit(c)) + error(ERROR_exit(1),"bad list for c/f option"); + n = 10*n + (c-'0'); + } + /* NOTREACHED */ +} + +/* + * advance <cp> by <n> multi-byte characters + */ +static int advance(const char *str, register int n, register int inlen) +{ + register int size, len=inlen; + register const char *cp=str; + while(len>0 && n-->0) + { + size = mblen(cp, len); + if(size<0) + size = 1; + cp += size; + len -= size; + + } + if(n>0) + return(inlen+1); + return(cp-str); +} + +/* + * cut each line of file <fdin> and put results to <fdout> using list <list> + */ + +static int cutcols(Cut_t *cuthdr,Sfio_t *fdin,Sfio_t *fdout) +{ + register int c, ncol=0,len; + register const int *lp = cuthdr->list; + register char *inp; + register int skip; /* non-zero for don't copy */ + while(1) + { + if(len = cuthdr->reclen) + inp = sfreserve(fdin, len, -1); + else + inp = sfgetr(fdin, '\n', 0); + if(!inp && !(inp = sfgetr(fdin, 0, SF_LASTR))) + break; + len = sfvalue(fdin); + if((ncol = skip = *(lp = cuthdr->list)) == 0) + ncol = *++lp; + while(1) + { + if((c=(cuthdr->cflag?advance(inp,ncol,len):ncol)) > len) + c = len; + else if(c==len && !skip) + ncol++; + ncol -= c; + if(!skip && sfwrite(fdout,(char*)inp,c)<0) + return(-1); + inp += c; + if(ncol) + break; + len -= c; + ncol = *++lp; + skip = !skip; + } + if(!cuthdr->nlflag && (skip || cuthdr->reclen)) + sfputc(fdout,cuthdr->ldelim); + } + return(c); +} + +/* + * cut each line of file <fdin> and put results to <fdout> using list <list> + * stream <fdin> must be line buffered + */ + +#define endline(c) (((signed char)-1)<0?(c)<0:(c)==((char)-1)) + +static int cutfields(Cut_t *cuthdr,Sfio_t *fdin,Sfio_t *fdout) +{ + register unsigned char *cp; + register int c, nfields; + register const int *lp = cuthdr->list; + register unsigned char *copy; + register int nodelim, empty, inword=0; + register unsigned char *endbuff; + unsigned char *inbuff, *first; + int lastchar; + Sfio_t *fdtmp = 0; + long offset = 0; + if(cuthdr->seqno != cuthdr->last.seq) + { + cuthdr->space[cuthdr->last.ldelim] = 0; + cuthdr->space[cuthdr->last.wdelim] = 0; + cuthdr->space[cuthdr->last.wdelim=cuthdr->wdelim] = 1; + cuthdr->space[cuthdr->last.ldelim=cuthdr->ldelim] = -1; + cuthdr->last.seq = cuthdr->seqno; + } + /* process each buffer */ + while ((inbuff = (unsigned char*)sfreserve(fdin, SF_UNBOUND, 0)) && (c = sfvalue(fdin)) > 0) + { + cp = inbuff; + endbuff = cp + --c; + if((lastchar = cp[c]) != cuthdr->ldelim) + *endbuff = cuthdr->ldelim; + /* process each line in the buffer */ + while(cp <= endbuff) + { + first = cp; + if(!inword) + { + nodelim = empty = 1; + copy = cp; + if(nfields = *(lp = cuthdr->list)) + copy = 0; + else + nfields = *++lp; + } + else if(copy) + copy = cp; + inword = 0; + while(!inword) + { + /* skip over non-delimiter characters */ + while(!(c=cuthdr->space[*cp++])); + /* check for end-of-line */ + if(endline(c)) + { + if(cp<=endbuff) + break; + if((c=cuthdr->space[lastchar]),endline(c)) + break; + /* restore cuthdr->last. character */ + if(lastchar != cuthdr->ldelim) + *endbuff = lastchar; + inword++; + if(!c) + break; + } + nodelim = 0; + if(--nfields >0) + continue; + nfields = *++lp; + if(copy) + { + empty = 0; + if((c=(cp-1)-copy)>0 && sfwrite(fdout,(char*)copy,c)< 0) + goto failed; + copy = 0; + } + else + /* set to delimiter unless the first field */ + copy = cp -!empty; + } + if(!inword) + { + if(!copy) + { + if(nodelim) + { + if(!cuthdr->sflag) + { + if(offset) + { + sfseek(fdtmp,(Sfoff_t)0,SEEK_SET); + sfmove(fdtmp,fdout,offset,-1); + } + copy = first; + } + } + else + sfputc(fdout,'\n'); + } + if(offset) + sfseek(fdtmp,offset=0,SEEK_SET); + } + if(copy && (c=cp-copy)>0 && (!nodelim || !cuthdr->sflag) && sfwrite(fdout,(char*)copy,c)< 0) + goto failed; + } + /* see whether to save in tmp file */ + if(nodelim && inword && !cuthdr->sflag && (c=cp-first)>0) + { + /* copy line to tmpfile in case no fields */ + if(!fdtmp) + fdtmp = sftmp(BLOCK); + sfwrite(fdtmp,(char*)first,c); + offset +=c; + } + } +failed: + if(fdtmp) + sfclose(fdtmp); + return(0); +} + +int +b_cut(int argc,char *argv[], void* context) +{ + register char *cp = 0; + register Sfio_t *fp; + int n; + Cut_t *cuthdr; + int mode = 0; + int wdelim = '\t'; + int ldelim = '\n'; + size_t reclen = 0; + + cmdinit(argc, argv, context, ERROR_CATALOG, 0); + while (n = optget(argv, usage)) switch (n) + { + case 'b': + case 'c': + if(mode&C_FIELDS) + { + error(2, "f option already specified"); + break; + } + cp = opt_info.arg; + if(n=='b') + mode |= C_BYTES; + else + mode |= C_CHARS; + break; + case 'D': + ldelim = *(unsigned char*)opt_info.arg; + break; + case 'd': + wdelim = *(unsigned char*)opt_info.arg; + break; + case 'f': + if(mode&(C_CHARS|C_BYTES)) + { + error(2, "c option already specified"); + break; + } + cp = opt_info.arg; + mode |= C_FIELDS; + break; + case 'n': + mode |= C_NOCHOP; + break; + case 'N': + mode |= C_NONEWLINE; + break; + case 'R': + case 'r': + if(opt_info.num>0) + reclen = opt_info.num; + break; + case 's': + mode |= C_SUPRESS; + break; + case ':': + error(2, "%s", opt_info.arg); + break; + case '?': + error(ERROR_usage(2), "%s", opt_info.arg); + break; + } + argv += opt_info.index; + if (error_info.errors) + error(ERROR_usage(2), "%s",optusage(NiL)); + if(!cp) + { + error(2, "b, c or f option must be specified"); + error(ERROR_usage(2), "%s", optusage(NiL)); + } + if(!*cp) + error(3, "non-empty b, c or f option must be specified"); + if((mode & (C_FIELDS|C_SUPRESS)) == C_SUPRESS) + error(3, "s option requires f option"); + cuthdr = cutinit(mode,cp,wdelim,ldelim,reclen); + if(cp = *argv) + argv++; + do + { + if(!cp || streq(cp,"-")) + fp = sfstdin; + else if(!(fp = sfopen(NiL,cp,"r"))) + { + error(ERROR_system(0),"%s: cannot open",cp); + continue; + } + if(mode&C_FIELDS) + cutfields(cuthdr,fp,sfstdout); + else + cutcols(cuthdr,fp,sfstdout); + if(fp!=sfstdin) + sfclose(fp); + } + while(cp= *argv++); + return(error_info.errors?1:0); +} |