diff options
| author | stevel@tonic-gate <none@none> | 2005-06-14 00:00:00 -0700 |
|---|---|---|
| committer | stevel@tonic-gate <none@none> | 2005-06-14 00:00:00 -0700 |
| commit | 7c478bd95313f5f23a4c958a745db2134aa03244 (patch) | |
| tree | c871e58545497667cbb4b0a4f2daf204743e1fe7 /usr/src/cmd/csplit | |
| download | illumos-joyent-7c478bd95313f5f23a4c958a745db2134aa03244.tar.gz | |
OpenSolaris Launch
Diffstat (limited to 'usr/src/cmd/csplit')
| -rw-r--r-- | usr/src/cmd/csplit/Makefile | 47 | ||||
| -rw-r--r-- | usr/src/cmd/csplit/csplit.c | 597 | ||||
| -rw-r--r-- | usr/src/cmd/csplit/csplit.xcl | 35 |
3 files changed, 679 insertions, 0 deletions
diff --git a/usr/src/cmd/csplit/Makefile b/usr/src/cmd/csplit/Makefile new file mode 100644 index 0000000000..d5f55c341e --- /dev/null +++ b/usr/src/cmd/csplit/Makefile @@ -0,0 +1,47 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License, Version 1.0 only +# (the "License"). You may not use this file except in compliance +# with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +#ident "%Z%%M% %I% %E% SMI" +# +# Copyright (c) 1989,1996 by Sun Microsystems, Inc. +# All rights reserved. +# + +PROG= csplit + +include ../Makefile.cmd + +LDLIBS += -lgen +XGETFLAGS += -a -x csplit.xcl +CPPFLAGS += -D_FILE_OFFSET_BITS=64 -D_LARGEFILE_SOURCE=1 + +.KEEP_STATE: + +all: $(PROG) + +install: all $(ROOTPROG) + +clean: + +lint: lint_PROG + +include ../Makefile.targ diff --git a/usr/src/cmd/csplit/csplit.c b/usr/src/cmd/csplit/csplit.c new file mode 100644 index 0000000000..4f1055eda2 --- /dev/null +++ b/usr/src/cmd/csplit/csplit.c @@ -0,0 +1,597 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ +/* All Rights Reserved */ + + +/* + * Copyright 2004 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +/* + * csplit - Context or line file splitter + * Compile: cc -O -s -o csplit csplit.c + */ + +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include <string.h> +#include <ctype.h> +#include <errno.h> +#include <limits.h> +#include <regexpr.h> +#include <signal.h> +#include <locale.h> +#include <libintl.h> + +#define LAST 0LL +#define ERR -1 +#define FALSE 0 +#define TRUE 1 +#define EXPMODE 2 +#define LINMODE 3 +#define LINSIZ LINE_MAX /* POSIX.2 - read lines LINE_MAX long */ + + /* Globals */ + +char linbuf[LINSIZ]; /* Input line buffer */ +char *expbuf; +char tmpbuf[BUFSIZ]; /* Temporary buffer for stdin */ +char file[8192] = "xx"; /* File name buffer */ +char *targ; /* Arg ptr for error messages */ +char *sptr; +FILE *infile, *outfile; /* I/O file streams */ +int silent, keep, create; /* Flags: -s(ilent), -k(eep), (create) */ +int errflg; +int fiwidth = 2; /* file index width (output file names) */ +extern int optind; +extern char *optarg; +offset_t offset; /* Regular expression offset value */ +offset_t curline; /* Current line in input file */ + +/* + * These defines are needed for regexp handling(see regexp(7)) + */ +#define PERROR(x) fatal("%s: Illegal Regular Expression\n", targ); + +static int asc_to_ll(char *, long long *); +static void closefile(void); +static void fatal(char *, char *); +static offset_t findline(char *, offset_t); +static void flush(void); +static FILE *getfile(void); +static char *getline(int); +static void line_arg(char *); +static void num_arg(char *, int); +static void re_arg(char *); +static void sig(int); +static void to_line(offset_t); +static void usage(void); + +int +main(int argc, char **argv) +{ + int ch, mode; + char *ptr; + + (void) setlocale(LC_ALL, ""); +#if !defined(TEXT_DOMAIN) /* Should be defined by cc -D */ +#define TEXT_DOMAIN "SYS_TEST" /* Use this only if it weren't */ +#endif + (void) textdomain(TEXT_DOMAIN); + + while ((ch = getopt(argc, argv, "skf:n:")) != EOF) { + switch (ch) { + case 'f': + (void) strcpy(file, optarg); + if ((ptr = strrchr(optarg, '/')) == NULL) + ptr = optarg; + else + ptr++; + + break; + case 'n': /* POSIX.2 */ + for (ptr = optarg; *ptr != NULL; ptr++) + if (!isdigit((int)*ptr)) + fatal("-n num\n", NULL); + fiwidth = atoi(optarg); + break; + case 'k': + keep++; + break; + case 's': + silent++; + break; + case '?': + errflg++; + } + } + + argv = &argv[optind]; + argc -= optind; + if (argc <= 1 || errflg) + usage(); + + if (strcmp(*argv, "-") == 0) { + infile = tmpfile(); + + while (fread(tmpbuf, 1, BUFSIZ, stdin) != 0) { + if (fwrite(tmpbuf, 1, BUFSIZ, infile) == 0) + if (errno == ENOSPC) { + (void) fprintf(stderr, "csplit: "); + (void) fprintf(stderr, gettext( + "No space left on device\n")); + exit(1); + } else { + (void) fprintf(stderr, "csplit: "); + (void) fprintf(stderr, gettext( + "Bad write to temporary " + "file\n")); + exit(1); + } + + /* clear the buffer to get correct size when writing buffer */ + + (void) memset(tmpbuf, '\0', sizeof (tmpbuf)); + } + rewind(infile); + } else if ((infile = fopen(*argv, "r")) == NULL) + fatal("Cannot open %s\n", *argv); + ++argv; + curline = (offset_t)1; + (void) signal(SIGINT, sig); + + /* + * The following for loop handles the different argument types. + * A switch is performed on the first character of the argument + * and each case calls the appropriate argument handling routine. + */ + + for (; *argv; ++argv) { + targ = *argv; + switch (**argv) { + case '/': + mode = EXPMODE; + create = TRUE; + re_arg(*argv); + break; + case '%': + mode = EXPMODE; + create = FALSE; + re_arg(*argv); + break; + case '{': + num_arg(*argv, mode); + mode = FALSE; + break; + default: + mode = LINMODE; + create = TRUE; + line_arg(*argv); + break; + } + } + create = TRUE; + to_line(LAST); + return (0); +} + +/* + * asc_to_ll takes an ascii argument(str) and converts it to a long long(plc) + * It returns ERR if an illegal character. The reason that asc_to_ll + * does not return an answer(long long) is that any value for the long + * long is legal, and this version of asc_to_ll detects error strings. + */ + +static int +asc_to_ll(char *str, long long *plc) +{ + int f; + *plc = 0; + f = 0; + for (; ; str++) { + switch (*str) { + case ' ': + case '\t': + continue; + case '-': + f++; + /* FALLTHROUGH */ + case '+': + str++; + } + break; + } + for (; *str != NULL; str++) + if (*str >= '0' && *str <= '9') + *plc = *plc * 10 + *str - '0'; + else + return (ERR); + if (f) + *plc = -(*plc); + return (TRUE); /* not error */ +} + +/* + * Closefile prints the byte count of the file created,(via fseeko + * and ftello), if the create flag is on and the silent flag is not on. + * If the create flag is on closefile then closes the file(fclose). + */ + +static void +closefile() +{ + if (!silent && create) { + (void) fseeko(outfile, (offset_t)0, SEEK_END); + (void) fprintf(stdout, "%lld\n", (offset_t)ftello(outfile)); + } + if (create) + (void) fclose(outfile); +} + +/* + * Fatal handles error messages and cleanup. + * Because "arg" can be the global file, and the cleanup processing + * uses the global file, the error message is printed first. If the + * "keep" flag is not set, fatal unlinks all created files. If the + * "keep" flag is set, fatal closes the current file(if there is one). + * Fatal exits with a value of 1. + */ + +static void +fatal(char *string, char *arg) +{ + char *fls; + int num; + + (void) fprintf(stderr, "csplit: "); + + /* gettext dynamically replaces string */ + + (void) fprintf(stderr, gettext(string), arg); + if (!keep) { + if (outfile) { + (void) fclose(outfile); + for (fls = file; *fls != '\0'; fls++) + continue; + fls -= fiwidth; + for (num = atoi(fls); num >= 0; num--) { + (void) sprintf(fls, "%.*d", fiwidth, num); + (void) unlink(file); + } + } + } else + if (outfile) + closefile(); + exit(1); +} + +/* + * Findline returns the line number referenced by the current argument. + * Its arguments are a pointer to the compiled regular expression(expr), + * and an offset(oset). The variable lncnt is used to count the number + * of lines searched. First the current stream location is saved via + * ftello(), and getline is called so that R.E. searching starts at the + * line after the previously referenced line. The while loop checks + * that there are more lines(error if none), bumps the line count, and + * checks for the R.E. on each line. If the R.E. matches on one of the + * lines the old stream location is restored, and the line number + * referenced by the R.E. and the offset is returned. + */ + +static offset_t +findline(char *expr, offset_t oset) +{ + static int benhere = 0; + offset_t lncnt = 0, saveloc; + + saveloc = ftello(infile); + if (curline != (offset_t)1 || benhere) /* If first line, first time, */ + (void) getline(FALSE); /* then don't skip */ + else + lncnt--; + benhere = 1; + while (getline(FALSE) != NULL) { + lncnt++; + if ((sptr = strrchr(linbuf, '\n')) != NULL) + *sptr = '\0'; + if (step(linbuf, expr)) { + (void) fseeko(infile, (offset_t)saveloc, SEEK_SET); + return (curline+lncnt+oset); + } + } + (void) fseeko(infile, (offset_t)saveloc, SEEK_SET); + return (curline+lncnt+oset+2); +} + +/* + * Flush uses fputs to put lines on the output file stream(outfile) + * Since fputs does its own buffering, flush doesn't need to. + * Flush does nothing if the create flag is not set. + */ + +static void +flush() +{ + if (create) + (void) fputs(linbuf, outfile); +} + +/* + * Getfile does nothing if the create flag is not set. If the create + * flag is set, getfile positions the file pointer(fptr) at the end of + * the file name prefix on the first call(fptr=0). The file counter is + * stored in the file name and incremented. If the subsequent fopen + * fails, the file name is copied to tfile for the error message, the + * previous file name is restored for cleanup, and fatal is called. If + * the fopen succeeds, the stream(opfil) is returned. + */ + +FILE * +getfile() +{ + static char *fptr; + static int ctr; + FILE *opfil; + char tfile[15]; + char *delim; + char savedelim; + + if (create) { + if (fptr == 0) + for (fptr = file; *fptr != NULL; fptr++); + (void) sprintf(fptr, "%.*d", fiwidth, ctr++); + + /* check for suffix length overflow */ + if (strlen(fptr) > fiwidth) { + fatal("Suffix longer than %ld chars; increase -n\n", + (char *)fiwidth); + } + + /* check for filename length overflow */ + + delim = strrchr(file, '/'); + if (delim == (char *)NULL) { + if (strlen(file) > pathconf(".", _PC_NAME_MAX)) { + fatal("Name too long: %s\n", file); + } + } else { + /* truncate file at pathname delim to do pathconf */ + savedelim = *delim; + *delim = '\0'; + /* + * file: pppppppp\0fffff\0 + * ..... ^ file + * ............. ^ delim + */ + if (strlen(delim + 1) > pathconf(file, _PC_NAME_MAX)) { + fatal("Name too long: %s\n", delim + 1); + } + *delim = savedelim; + } + + if ((opfil = fopen(file, "w")) == NULL) { + (void) strcpy(tfile, file); + (void) sprintf(fptr, "%.*d", fiwidth, (ctr-2)); + fatal("Cannot create %s\n", tfile); + } + return (opfil); + } + return (NULL); +} + +/* + * Getline gets a line via fgets from the input stream "infile". + * The line is put into linbuf and may not be larger than LINSIZ. + * If getline is called with a non-zero value, the current line + * is bumped, otherwise it is not(for R.E. searching). + */ + +static char * +getline(int bumpcur) +{ + char *ret; + if (bumpcur) + curline++; + ret = fgets(linbuf, LINSIZ, infile); + return (ret); +} + +/* + * Line_arg handles line number arguments. + * line_arg takes as its argument a pointer to a character string + * (assumed to be a line number). If that character string can be + * converted to a number(long long), to_line is called with that number, + * otherwise error. + */ + +static void +line_arg(char *line) +{ + long long to; + + if (asc_to_ll(line, &to) == ERR) + fatal("%s: bad line number\n", line); + to_line(to); +} + +/* + * Num_arg handles repeat arguments. + * Num_arg copies the numeric argument to "rep" (error if number is + * larger than 20 characters or } is left off). Num_arg then converts + * the number and checks for validity. Next num_arg checks the mode + * of the previous argument, and applys the argument the correct number + * of times. If the mode is not set properly its an error. + */ + +static void +num_arg(char *arg, int md) +{ + offset_t repeat, toline; + char rep[21]; + char *ptr; + int len; + + ptr = rep; + for (++arg; *arg != '}'; arg += len) { + if (*arg == NULL) + fatal("%s: missing '}'\n", targ); + if ((len = mblen(arg, MB_LEN_MAX)) <= 0) + len = 1; + if ((ptr + len) >= &rep[20]) + fatal("%s: Repeat count too large\n", targ); + (void) memcpy(ptr, arg, len); + ptr += len; + } + *ptr = NULL; + if ((asc_to_ll(rep, &repeat) == ERR) || repeat < 0L) + fatal("Illegal repeat count: %s\n", targ); + if (md == LINMODE) { + toline = offset = curline; + for (; repeat > 0LL; repeat--) { + toline += offset; + to_line(toline); + } + } else if (md == EXPMODE) + for (; repeat > 0LL; repeat--) + to_line(findline(expbuf, offset)); + else + fatal("No operation for %s\n", targ); +} + +/* + * Re_arg handles regular expression arguments. + * Re_arg takes a csplit regular expression argument. It checks for + * delimiter balance, computes any offset, and compiles the regular + * expression. Findline is called with the compiled expression and + * offset, and returns the corresponding line number, which is used + * as input to the to_line function. + */ + +static void +re_arg(char *string) +{ + char *ptr; + char ch; + int len; + + ch = *string; + ptr = string; + ptr++; + while (*ptr != ch) { + if (*ptr == '\\') + ++ptr; + + if (*ptr == NULL) + fatal("%s: missing delimiter\n", targ); + + if ((len = mblen(ptr, MB_LEN_MAX)) <= 0) + len = 1; + ptr += len; + } + + /* + * The line below was added because compile no longer supports + * the fourth argument being passed. The fourth argument used + * to be '/' or '%'. + */ + + *ptr = NULL; + if (asc_to_ll(++ptr, &offset) == ERR) + fatal("%s: illegal offset\n", string); + + /* + * The line below was added because INIT which did this for us + * was removed from compile in regexp.h + */ + + string++; + expbuf = compile(string, (char *)0, (char *)0); + if (regerrno) + PERROR(regerrno); + to_line(findline(expbuf, offset)); +} + +/* + * Sig handles breaks. When a break occurs the signal is reset, + * and fatal is called to clean up and print the argument which + * was being processed at the time the interrupt occured. + */ + +/* ARGSUSED */ +static void +sig(int s) +{ + (void) signal(SIGINT, sig); + fatal("Interrupt - program aborted at arg '%s'\n", targ); +} + +/* + * To_line creates split files. + * To_line gets as its argument the line which the current argument + * referenced. To_line calls getfile for a new output stream, which + * does nothing if create is False. If to_line's argument is not LAST + * it checks that the current line is not greater than its argument. + * While the current line is less than the desired line to_line gets + * lines and flushes(error if EOF is reached). + * If to_line's argument is LAST, it checks for more lines, and gets + * and flushes lines till the end of file. + * Finally, to_line calls closefile to close the output stream. + */ + +static void +to_line(offset_t ln) +{ + outfile = getfile(); + if (ln != LAST) { + if (curline > ln) + fatal("%s - out of range\n", targ); + while (curline < ln) { + if (getline(TRUE) == NULL) + fatal("%s - out of range\n", targ); + flush(); + } + } else /* last file */ + if (getline(TRUE) != NULL) { + flush(); + for (;;) { + if (getline(TRUE) == NULL) + break; + flush(); + } + } else + fatal("%s - out of range\n", targ); + closefile(); +} + +static void +usage() +{ + (void) fprintf(stderr, gettext( + "usage: csplit [-ks] [-f prefix] [-n number] " + "file arg1 ...argn\n")); + exit(1); +} diff --git a/usr/src/cmd/csplit/csplit.xcl b/usr/src/cmd/csplit/csplit.xcl new file mode 100644 index 0000000000..d7e3292a2c --- /dev/null +++ b/usr/src/cmd/csplit/csplit.xcl @@ -0,0 +1,35 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License, Version 1.0 only +# (the "License"). You may not use this file except in compliance +# with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +msgid "" +msgid "%.02d" +msgid "%ld\n" +msgid "-" +msgid "csplit: " +msgid "r" +msgid "skf:" +msgid "w" +msgid "xx" +msgid "%lld\n" +msgid "skf:n:" +msgid "%.*d" +msgid "-n num\n" + |
