summaryrefslogtreecommitdiff
path: root/usr/src/lib/libcmd/common/wclib.c
diff options
context:
space:
mode:
Diffstat (limited to 'usr/src/lib/libcmd/common/wclib.c')
-rw-r--r--usr/src/lib/libcmd/common/wclib.c197
1 files changed, 197 insertions, 0 deletions
diff --git a/usr/src/lib/libcmd/common/wclib.c b/usr/src/lib/libcmd/common/wclib.c
new file mode 100644
index 0000000000..c16e4efc87
--- /dev/null
+++ b/usr/src/lib/libcmd/common/wclib.c
@@ -0,0 +1,197 @@
+/***********************************************************************
+* *
+* This software is part of the ast package *
+* Copyright (c) 1992-2007 AT&T Knowledge Ventures *
+* and is licensed under the *
+* Common Public License, Version 1.0 *
+* by AT&T Knowledge Ventures *
+* *
+* A copy of the License is available at *
+* http://www.opensource.org/licenses/cpl1.0.txt *
+* (with md5 checksum 059e8cd6165cb4c31e351f2b69388fd9) *
+* *
+* Information and Software Systems Research *
+* AT&T Research *
+* Florham Park NJ *
+* *
+* Glenn Fowler <gsf@research.att.com> *
+* David Korn <dgk@research.att.com> *
+* *
+***********************************************************************/
+#pragma prototyped
+/*
+ * David Korn
+ * AT&T Bell Laboratories
+ *
+ * library interface for word count
+ */
+
+#include <cmd.h>
+#include <wc.h>
+#include <ctype.h>
+
+#if _hdr_wchar && _hdr_wctype
+
+#include <wchar.h>
+#include <wctype.h>
+
+#else
+
+#ifndef iswspace
+#define iswspace(x) isspace(x)
+#endif
+
+#endif
+
+#define endline(c) (((signed char)-1)<0?(c)<0:(c)==((char)-1))
+#define mbok(p,n) (((n)<1)?0:mbwide()?((*ast.mb_towc)(NiL,(char*)(p),n)>=0):1)
+
+Wc_t *wc_init(int mode)
+{
+ register int n;
+ register int w;
+ Wc_t* wp;
+
+ if(!(wp = (Wc_t*)stakalloc(sizeof(Wc_t))))
+ return(0);
+ wp->mode = mode;
+ w = mode & WC_WORDS;
+ for(n=(1<<CHAR_BIT);--n >=0;)
+ wp->space[n] = w ? !!isspace(n) : 0;
+ wp->space['\n'] = -1;
+ return(wp);
+}
+
+/*
+ * compute the line, word, and character count for file <fd>
+ */
+int wc_count(Wc_t *wp, Sfio_t *fd, const char* file)
+{
+ register signed char *space = wp->space;
+ register unsigned char *cp;
+ register Sfoff_t nchars;
+ register Sfoff_t nwords;
+ register Sfoff_t nlines;
+ register Sfoff_t eline;
+ register Sfoff_t longest;
+ register ssize_t c;
+ register unsigned char *endbuff;
+ register int lasttype = 1;
+ unsigned int lastchar;
+ unsigned char *buff;
+ wchar_t x;
+
+ sfset(fd,SF_WRITE,1);
+ nlines = nwords = nchars = 0;
+ wp->longest = 0;
+ if (wp->mode & (WC_LONGEST|WC_MBYTE))
+ {
+ longest = 0;
+ eline = -1;
+ cp = buff = endbuff = 0;
+ for (;;)
+ {
+ if (!mbok(cp, endbuff-cp))
+ {
+ if (buff)
+ sfread(fd, buff, cp-buff);
+ if (!(buff = (unsigned char*)sfreserve(fd, SF_UNBOUND, SF_LOCKR)))
+ break;
+ endbuff = (cp = buff) + sfvalue(fd);
+ }
+ nchars++;
+ x = mbchar(cp);
+ if (x == -1)
+ {
+ if (eline != nlines && !(wp->mode & WC_QUIET))
+ {
+ error_info.file = (char*)file;
+ error_info.line = eline = nlines;
+ error(ERROR_SYSTEM|1, "invalid multibyte character");
+ error_info.file = 0;
+ error_info.line = 0;
+ }
+ }
+ else if (x == '\n')
+ {
+ if ((nchars - longest) > wp->longest)
+ wp->longest = nchars - longest;
+ longest = nchars;
+ nlines++;
+ lasttype = 1;
+ }
+ else if (iswspace(x))
+ lasttype = 1;
+ else if (lasttype)
+ {
+ lasttype = 0;
+ nwords++;
+ }
+ }
+ }
+ else
+ {
+ for (;;)
+ {
+ /* fill next buffer and check for end-of-file */
+ if (!(buff = (unsigned char*)sfreserve(fd, 0, 0)) || (c = sfvalue(fd)) <= 0)
+ break;
+ sfread(fd,(char*)(cp=buff),c);
+ nchars += c;
+ /* check to see whether first character terminates word */
+ if(c==1)
+ {
+ if(endline(lasttype))
+ nlines++;
+ if((c = space[*cp]) && !lasttype)
+ nwords++;
+ lasttype = c;
+ continue;
+ }
+ if(!lasttype && space[*cp])
+ nwords++;
+ lastchar = cp[--c];
+ cp[c] = '\n';
+ endbuff = cp+c;
+ c = lasttype;
+ /* process each buffer */
+ for (;;)
+ {
+ /* process spaces and new-lines */
+ do if (endline(c))
+ {
+ for (;;)
+ {
+ /* check for end of buffer */
+ if (cp > endbuff)
+ goto eob;
+ nlines++;
+ if (*cp != '\n')
+ break;
+ cp++;
+ }
+ } while (c = space[*cp++]);
+ /* skip over word characters */
+ while(!(c = space[*cp++]));
+ nwords++;
+ }
+ eob:
+ if((cp -= 2) >= buff)
+ c = space[*cp];
+ else
+ c = lasttype;
+ lasttype = space[lastchar];
+ /* see if was in word */
+ if(!c && !lasttype)
+ nwords--;
+ }
+ if(endline(lasttype))
+ nlines++;
+ else if(!lasttype)
+ nwords++;
+ }
+ wp->chars = nchars;
+ wp->words = nwords;
+ wp->lines = nlines;
+ return(0);
+}