diff options
Diffstat (limited to 'usr/src/lib/libcmd/common/wclib.c')
-rw-r--r-- | usr/src/lib/libcmd/common/wclib.c | 197 |
1 files changed, 197 insertions, 0 deletions
diff --git a/usr/src/lib/libcmd/common/wclib.c b/usr/src/lib/libcmd/common/wclib.c new file mode 100644 index 0000000000..c16e4efc87 --- /dev/null +++ b/usr/src/lib/libcmd/common/wclib.c @@ -0,0 +1,197 @@ +/*********************************************************************** +* * +* This software is part of the ast package * +* Copyright (c) 1992-2007 AT&T Knowledge Ventures * +* and is licensed under the * +* Common Public License, Version 1.0 * +* by AT&T Knowledge Ventures * +* * +* A copy of the License is available at * +* http://www.opensource.org/licenses/cpl1.0.txt * +* (with md5 checksum 059e8cd6165cb4c31e351f2b69388fd9) * +* * +* Information and Software Systems Research * +* AT&T Research * +* Florham Park NJ * +* * +* Glenn Fowler <gsf@research.att.com> * +* David Korn <dgk@research.att.com> * +* * +***********************************************************************/ +#pragma prototyped +/* + * David Korn + * AT&T Bell Laboratories + * + * library interface for word count + */ + +#include <cmd.h> +#include <wc.h> +#include <ctype.h> + +#if _hdr_wchar && _hdr_wctype + +#include <wchar.h> +#include <wctype.h> + +#else + +#ifndef iswspace +#define iswspace(x) isspace(x) +#endif + +#endif + +#define endline(c) (((signed char)-1)<0?(c)<0:(c)==((char)-1)) +#define mbok(p,n) (((n)<1)?0:mbwide()?((*ast.mb_towc)(NiL,(char*)(p),n)>=0):1) + +Wc_t *wc_init(int mode) +{ + register int n; + register int w; + Wc_t* wp; + + if(!(wp = (Wc_t*)stakalloc(sizeof(Wc_t)))) + return(0); + wp->mode = mode; + w = mode & WC_WORDS; + for(n=(1<<CHAR_BIT);--n >=0;) + wp->space[n] = w ? !!isspace(n) : 0; + wp->space['\n'] = -1; + return(wp); +} + +/* + * compute the line, word, and character count for file <fd> + */ +int wc_count(Wc_t *wp, Sfio_t *fd, const char* file) +{ + register signed char *space = wp->space; + register unsigned char *cp; + register Sfoff_t nchars; + register Sfoff_t nwords; + register Sfoff_t nlines; + register Sfoff_t eline; + register Sfoff_t longest; + register ssize_t c; + register unsigned char *endbuff; + register int lasttype = 1; + unsigned int lastchar; + unsigned char *buff; + wchar_t x; + + sfset(fd,SF_WRITE,1); + nlines = nwords = nchars = 0; + wp->longest = 0; + if (wp->mode & (WC_LONGEST|WC_MBYTE)) + { + longest = 0; + eline = -1; + cp = buff = endbuff = 0; + for (;;) + { + if (!mbok(cp, endbuff-cp)) + { + if (buff) + sfread(fd, buff, cp-buff); + if (!(buff = (unsigned char*)sfreserve(fd, SF_UNBOUND, SF_LOCKR))) + break; + endbuff = (cp = buff) + sfvalue(fd); + } + nchars++; + x = mbchar(cp); + if (x == -1) + { + if (eline != nlines && !(wp->mode & WC_QUIET)) + { + error_info.file = (char*)file; + error_info.line = eline = nlines; + error(ERROR_SYSTEM|1, "invalid multibyte character"); + error_info.file = 0; + error_info.line = 0; + } + } + else if (x == '\n') + { + if ((nchars - longest) > wp->longest) + wp->longest = nchars - longest; + longest = nchars; + nlines++; + lasttype = 1; + } + else if (iswspace(x)) + lasttype = 1; + else if (lasttype) + { + lasttype = 0; + nwords++; + } + } + } + else + { + for (;;) + { + /* fill next buffer and check for end-of-file */ + if (!(buff = (unsigned char*)sfreserve(fd, 0, 0)) || (c = sfvalue(fd)) <= 0) + break; + sfread(fd,(char*)(cp=buff),c); + nchars += c; + /* check to see whether first character terminates word */ + if(c==1) + { + if(endline(lasttype)) + nlines++; + if((c = space[*cp]) && !lasttype) + nwords++; + lasttype = c; + continue; + } + if(!lasttype && space[*cp]) + nwords++; + lastchar = cp[--c]; + cp[c] = '\n'; + endbuff = cp+c; + c = lasttype; + /* process each buffer */ + for (;;) + { + /* process spaces and new-lines */ + do if (endline(c)) + { + for (;;) + { + /* check for end of buffer */ + if (cp > endbuff) + goto eob; + nlines++; + if (*cp != '\n') + break; + cp++; + } + } while (c = space[*cp++]); + /* skip over word characters */ + while(!(c = space[*cp++])); + nwords++; + } + eob: + if((cp -= 2) >= buff) + c = space[*cp]; + else + c = lasttype; + lasttype = space[lastchar]; + /* see if was in word */ + if(!c && !lasttype) + nwords--; + } + if(endline(lasttype)) + nlines++; + else if(!lasttype) + nwords++; + } + wp->chars = nchars; + wp->words = nwords; + wp->lines = nlines; + return(0); +} |