diff options
Diffstat (limited to 'usr/src/lib/libcmd/common/uniq.c')
-rw-r--r-- | usr/src/lib/libcmd/common/uniq.c | 302 |
1 files changed, 302 insertions, 0 deletions
diff --git a/usr/src/lib/libcmd/common/uniq.c b/usr/src/lib/libcmd/common/uniq.c new file mode 100644 index 0000000000..e6e0da4667 --- /dev/null +++ b/usr/src/lib/libcmd/common/uniq.c @@ -0,0 +1,302 @@ +/*********************************************************************** +* * +* This software is part of the ast package * +* Copyright (c) 1992-2007 AT&T Knowledge Ventures * +* and is licensed under the * +* Common Public License, Version 1.0 * +* by AT&T Knowledge Ventures * +* * +* A copy of the License is available at * +* http://www.opensource.org/licenses/cpl1.0.txt * +* (with md5 checksum 059e8cd6165cb4c31e351f2b69388fd9) * +* * +* Information and Software Systems Research * +* AT&T Research * +* Florham Park NJ * +* * +* Glenn Fowler <gsf@research.att.com> * +* David Korn <dgk@research.att.com> * +* * +***********************************************************************/ +#pragma prototyped +/* + * uniq + * + * Written by David Korn + */ + +static const char usage[] = +"[-?\n@(#)$Id: uniq (AT&T Research) 2006-08-28 $\n]" +USAGE_LICENSE +"[+NAME?uniq - Report or filter out repeated lines in a file]" +"[+DESCRIPTION?\buniq\b reads an input, comparing adjacent lines, and " + "writing one copy of each input line on the output. The second " + "and succeeding copies of the repeated adjacent lines are not " + "written.]" +"[+?If the output file, \aoutfile\a, is not specified, \buniq\b writes " + "to standard output. If no \ainfile\a is given, or if the \ainfile\a " + "is \b-\b, \buniq\b reads from standard input with the start of " + "the file is defined as the current offset.]" +"[c:count?Output the number of times each line occurred along with " + "the line.]" +"[d:repeated|duplicates?Output the first of each duplicate line.]" +"[D:all-repeated?Output all duplicate lines as a group with an empty " + "line delimiter specified by \adelimit\a:]:?[delimit:=none]" + "{" + "[n:none?Do not delimit duplicate groups.]" + "[p:prepend?Prepend an empty line before each group.]" + "[s:separate?Separate each group with an empty line.]" + "}" +"[f:skip-fields]#[fields?\afields\a is the number of fields to skip over " + "before checking for uniqueness. A field is the minimal string matching " + "the BRE \b[[:blank:]]]]*[^[:blank:]]]]*\b.]" +"[i:ignore-case?Ignore case in comparisons.]" +"[s:skip-chars]#[chars?\achars\a is the number of characters to skip over " + "before checking for uniqueness. If specified along with \b-f\b, " + "the first \achars\a after the first \afields\a are ignored. If " + "the \achars\a specifies more characters than are on the line, " + "an empty string will be used for comparison.]" +"[u:unique?Output unique lines.]" +"[w:check-chars]#[chars?\achars\a is the number of characters to compare " + "after skipping any specified fields and characters.]" +"\n" +"\n[infile [outfile]]\n" +"\n" +"[+EXIT STATUS?]{" + "[+0?The input file was successfully processed.]" + "[+>0?An error occurred.]" +"}" +"[+SEE ALSO?\bsort\b(1), \bgrep\b(1)]" +; + +#include <cmd.h> + +#define C_FLAG 1 +#define D_FLAG 2 +#define U_FLAG 4 + +#define CWIDTH 4 +#define MAXCNT 9999 + +typedef int (*Compare_f)(const char*, const char*, size_t); + +static int uniq(Sfio_t *fdin, Sfio_t *fdout, int fields, int chars, int width, int mode, int* all, Compare_f compare) +{ + register int n, f, outsize=0; + register char *cp, *ep, *bufp, *outp; + char *orecp, *sbufp=0, *outbuff; + int reclen,oreclen= -1,count=0,cwidth=0,sep,next; + if(mode&C_FLAG) + cwidth = CWIDTH+1; + while(1) + { + if(bufp = sfgetr(fdin,'\n',0)) + n = sfvalue(fdin); + else if(bufp = sfgetr(fdin,'\n',SF_LASTR)) + { + n = sfvalue(fdin); + bufp = memcpy(fmtbuf(n + 1), bufp, n); + bufp[n++] = '\n'; + } + else + n = 0; + if(n) + { + cp = bufp; + ep = cp + n; + if(f=fields) + while(f-->0 && cp<ep) /* skip over fields */ + { + while(cp<ep && *cp==' ' || *cp=='\t') + cp++; + while(cp<ep && *cp!=' ' && *cp!='\t') + cp++; + } + if(chars) + cp += chars; + if((reclen = n - (cp-bufp)) <=0) + { + reclen = 1; + cp = bufp + sfvalue(fdin)-1; + } + else if(width >= 0 && width < reclen) + reclen = width; + } + else + reclen=-2; + if(reclen==oreclen && (!reclen || !(*compare)(cp,orecp,reclen))) + { + count++; + if (!all) + continue; + next = count; + } + else + { + next = 0; + if(outsize>0) + { + if(((mode&D_FLAG)&&count==0) || ((mode&U_FLAG)&&count)) + { + if(outp!=sbufp) + sfwrite(fdout,outp,0); + } + else + { + if(cwidth) + { + outp[CWIDTH] = ' '; + if(count<MAXCNT) + { + sfsprintf(outp,cwidth,"%*d",CWIDTH,count+1); + outp[CWIDTH] = ' '; + } + else + { + outsize -= (CWIDTH+1); + if(outp!=sbufp) + { + if(!(sbufp=fmtbuf(outsize))) + return(1); + memcpy(sbufp,outp+CWIDTH+1,outsize); + sfwrite(fdout,outp,0); + outp = sbufp; + } + else + outp += CWIDTH+1; + sfprintf(fdout,"%4d ",count+1); + } + } + if(sfwrite(fdout,outp,outsize) != outsize) + return(1); + } + } + } + if(n==0) + break; + if(count = next) + { + if(sfwrite(fdout,outp,outsize) != outsize) + return(1); + if(*all >= 0) + *all = 1; + sep = 0; + } + else + sep = all && *all > 0; + /* save current record */ + if (!(outbuff = sfreserve(fdout, 0, 0)) || (outsize = sfvalue(fdout)) < 0) + return(1); + outp = outbuff; + if(outsize < n+cwidth+sep) + { + /* no room in outp, clear lock and use side buffer */ + sfwrite(fdout,outp,0); + if(!(sbufp = outp=fmtbuf(outsize=n+cwidth+sep))) + return(1); + } + else + outsize = n+cwidth+sep; + memcpy(outp+cwidth+sep,bufp,n); + if(sep) + outp[cwidth] = '\n'; + oreclen = reclen; + orecp = outp+cwidth+sep + (cp-bufp); + } + return(0); +} + +int +b_uniq(int argc, char** argv, void* context) +{ + register int n, mode=0; + register char *cp; + int fields=0, chars=0, width=-1; + Sfio_t *fpin, *fpout; + int* all = 0; + int sep; + Compare_f compare = (Compare_f)memcmp; + + cmdinit(argc, argv, context, ERROR_CATALOG, 0); + while (n = optget(argv, usage)) switch (n) + { + case 'c': + mode |= C_FLAG; + break; + case 'd': + mode |= D_FLAG; + break; + case 'D': + mode |= D_FLAG; + switch ((int)opt_info.num) + { + case 'p': + sep = 1; + break; + case 's': + sep = 0; + break; + default: + sep = -1; + break; + } + all = &sep; + break; + case 'i': + compare = (Compare_f)strncasecmp; + break; + case 'u': + mode |= U_FLAG; + break; + case 'f': + if(*opt_info.option=='-') + fields = opt_info.num; + else + chars = opt_info.num; + break; + case 's': + chars = opt_info.num; + break; + case 'w': + width = opt_info.num; + break; + case ':': + error(2, "%s", opt_info.arg); + break; + case '?': + error(ERROR_usage(2), "%s", opt_info.arg); + break; + } + argv += opt_info.index; + if(all && (mode&C_FLAG)) + error(2, "-c and -D are mutually exclusive"); + if(error_info.errors) + error(ERROR_usage(2), "%s", optusage(NiL)); + if((cp = *argv) && (argv++,!streq(cp,"-"))) + { + if(!(fpin = sfopen(NiL,cp,"r"))) + error(ERROR_system(1),"%s: cannot open",cp); + } + else + fpin = sfstdin; + if(cp = *argv) + { + argv++; + if(!(fpout = sfopen(NiL,cp,"w"))) + error(ERROR_system(1),"%s: cannot create",cp); + } + else + fpout = sfstdout; + if(*argv) + { + error(2, "too many arguments"); + error(ERROR_usage(2), "%s", optusage(NiL)); + } + error_info.errors = uniq(fpin,fpout,fields,chars,width,mode,all,compare); + if(fpin!=sfstdin) + sfclose(fpin); + if(fpout!=sfstdout) + sfclose(fpout); + return(error_info.errors); +} + |