diff options
author | Garrett D'Amore <garrett@nexenta.com> | 2010-08-18 14:41:42 -0700 |
---|---|---|
committer | Garrett D'Amore <garrett@nexenta.com> | 2010-08-18 14:41:42 -0700 |
commit | 163bd69b3c164dda2a59c7f08ca788e7d6ba9bea (patch) | |
tree | ee05de7a81bb03596b38c9d0fdd179c8fc4df0cf | |
parent | 53274d1a13fcd0e2f2ec7b1532a47bcaf120b081 (diff) | |
download | illumos-joyent-163bd69b3c164dda2a59c7f08ca788e7d6ba9bea.tar.gz |
38 need replacement for tr
Reviewed by: richlowe@richlowe.net
Approved by: garrett@nexenta.com
24 files changed, 1746 insertions, 7 deletions
diff --git a/usr/src/Makefile.lint b/usr/src/Makefile.lint index ccf0a103b5..936a37f4d9 100644 --- a/usr/src/Makefile.lint +++ b/usr/src/Makefile.lint @@ -293,6 +293,7 @@ COMMON_SUBDIRS = \ cmd/th_tools \ cmd/tip \ cmd/touch \ + cmd/tr \ cmd/truss \ cmd/tty \ cmd/tzreload \ @@ -471,7 +472,6 @@ $(CLOSED_BUILD)COMMON_SUBDIRS += \ $(CLOSED)/cmd/pax \ $(CLOSED)/cmd/sed_xpg4 \ $(CLOSED)/cmd/tail \ - $(CLOSED)/cmd/tr_xpg4 \ $(CLOSED)/lib/libc_i18n i386_SUBDIRS= \ diff --git a/usr/src/cmd/Makefile b/usr/src/cmd/Makefile index 8da4b5b22e..785f77f1d6 100644 --- a/usr/src/cmd/Makefile +++ b/usr/src/cmd/Makefile @@ -402,6 +402,7 @@ COMMON_SUBDIRS= \ touch \ tplot \ tput \ + tr \ trapstat \ troff \ true \ @@ -467,8 +468,7 @@ $(CLOSED_BUILD)COMMON_SUBDIRS += \ $(CLOSED)/cmd/printf \ $(CLOSED)/cmd/sed \ $(CLOSED)/cmd/sed_xpg4 \ - $(CLOSED)/cmd/tail \ - $(CLOSED)/cmd/tr_xpg4 + $(CLOSED)/cmd/tail i386_SUBDIRS= \ acpihpd \ @@ -749,8 +749,7 @@ $(CLOSED_BUILD)MSGSUBDIRS += \ $(CLOSED)/cmd/printf \ $(CLOSED)/cmd/sed \ $(CLOSED)/cmd/sed_xpg4 \ - $(CLOSED)/cmd/tail \ - $(CLOSED)/cmd/tr_xpg4 + $(CLOSED)/cmd/tail sparc_MSGSUBDIRS= \ fruadm \ diff --git a/usr/src/cmd/tr/Makefile b/usr/src/cmd/tr/Makefile new file mode 100644 index 0000000000..25ee89ac91 --- /dev/null +++ b/usr/src/cmd/tr/Makefile @@ -0,0 +1,70 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +# +# Copyright 2010 Nexenta Systems, Inc. All rights reserved. +# Use is subject to license terms. +# + + +PROG= tr +XPG4PROG= $(PROG) +XPG6PROG= $(PROG) + +OBJS= tr.o str.o cset.o cmap.o +SRCS= $(OBJS:%.o=%.c) + +include ../Makefile.cmd + +CLOBBERFILES= $(PROG) + + +C99MODE= -xc99=%all +C99LMODE= -Xc99=%all +CPPFLAGS += -D_ILLUMOS_PRIVATE -I. +LINTFLAGS += -D_ILLUMOS_PRIVATE -I. + +# install rules +$(ROOTINC)/% : % + $(INS.file) + +.KEEP_STATE: + +.PARALLEL: $(OBJS) + +all: $(PROG) + +$(PROG): $(OBJS) + $(LINK.c) $(OBJS) -o $@ $(LDLIBS) + $(POST_PROCESS) + +install: all .WAIT $(ROOTPROG) $(ROOTXPG4PROG) $(ROOTXPG6PROG) + +$(ROOTXPG4PROG) $(ROOTXPG6PROG): + -$(RM) $@ + -$(LN) -s ../../bin/$(PROG) $@ + +lint: lint_SRCS + +clean: + $(RM) $(OBJS) + +include ../Makefile.targ diff --git a/usr/src/cmd/tr/THIRDPARTYLICENSE b/usr/src/cmd/tr/THIRDPARTYLICENSE new file mode 100644 index 0000000000..12818221d3 --- /dev/null +++ b/usr/src/cmd/tr/THIRDPARTYLICENSE @@ -0,0 +1,88 @@ + +Copyright (c) 2004 Tim J. Robbins. +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: +1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +SUCH DAMAGE. + + +Copyright (c) 1991, 1993 + The Regents of the University of California. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: +1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. +3. All advertising materials mentioning features or use of this software + must display the following acknowledgement: + This product includes software developed by the University of + California, Berkeley and its contributors. +4. Neither the name of the University nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +SUCH DAMAGE. + + +Copyright (c) 1988, 1993 + The Regents of the University of California. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: +1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. +3. All advertising materials mentioning features or use of this software + must display the following acknowledgement: + This product includes software developed by the University of + California, Berkeley and its contributors. +4. Neither the name of the University nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +SUCH DAMAGE. diff --git a/usr/src/cmd/tr/THIRDPARTYLICENSE.descrip b/usr/src/cmd/tr/THIRDPARTYLICENSE.descrip new file mode 100644 index 0000000000..c68adb0034 --- /dev/null +++ b/usr/src/cmd/tr/THIRDPARTYLICENSE.descrip @@ -0,0 +1 @@ +TR UTILITY diff --git a/usr/src/cmd/tr/cmap.c b/usr/src/cmd/tr/cmap.c new file mode 100644 index 0000000000..d73ffde081 --- /dev/null +++ b/usr/src/cmd/tr/cmap.c @@ -0,0 +1,209 @@ +/* + * Copyright (c) 2004 Tim J. Robbins. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ +/* + * "Character map" ADT. Stores mappings between pairs of characters in a + * splay tree, with a lookup table cache to simplify looking up the first + * bunch of characters (which are presumably more common than others). + */ + +#include <assert.h> +#include <limits.h> +#include <stdbool.h> +#include <stdlib.h> +#include <wchar.h> +#include "cmap.h" + +static struct cmapnode *cmap_splay(struct cmapnode *, wint_t); + +/* + * cmap_alloc -- + * Allocate a character map. + */ +struct cmap * +cmap_alloc(void) +{ + struct cmap *cm; + + cm = malloc(sizeof (*cm)); + if (cm == NULL) + return (NULL); + cm->cm_root = NULL; + cm->cm_def = CM_DEF_SELF; + cm->cm_havecache = false; + cm->cm_min = cm->cm_max = 0; + return (cm); +} + +/* + * cmap_add -- + * Add a mapping from "from" to "to" to the map. + */ +bool +cmap_add(struct cmap *cm, wint_t from, wint_t to) +{ + struct cmapnode *cmn, *ncmn; + + cm->cm_havecache = false; + + if (cm->cm_root == NULL) { + cmn = malloc(sizeof (*cmn)); + if (cmn == NULL) + return (false); + cmn->cmn_from = from; + cmn->cmn_to = to; + cmn->cmn_left = cmn->cmn_right = NULL; + cm->cm_root = cmn; + cm->cm_min = cm->cm_max = from; + return (true); + } + + cmn = cm->cm_root = cmap_splay(cm->cm_root, from); + + if (cmn->cmn_from == from) { + cmn->cmn_to = to; + return (true); + } + + ncmn = malloc(sizeof (*ncmn)); + if (ncmn == NULL) + return (false); + ncmn->cmn_from = from; + ncmn->cmn_to = to; + if (from < cmn->cmn_from) { + ncmn->cmn_left = cmn->cmn_left; + ncmn->cmn_right = cmn; + cmn->cmn_left = NULL; + } else { + ncmn->cmn_right = cmn->cmn_right; + ncmn->cmn_left = cmn; + cmn->cmn_right = NULL; + } + if (from < cm->cm_min) + cm->cm_min = from; + if (from > cm->cm_max) + cm->cm_max = from; + cm->cm_root = ncmn; + + return (true); +} + +/* + * cmap_lookup_hard -- + * Look up the mapping for a character without using the cache. + */ +wint_t +cmap_lookup_hard(struct cmap *cm, wint_t ch) +{ + + if (cm->cm_root != NULL) { + cm->cm_root = cmap_splay(cm->cm_root, ch); + if (cm->cm_root->cmn_from == ch) + return (cm->cm_root->cmn_to); + } + return (cm->cm_def == CM_DEF_SELF ? ch : cm->cm_def); +} + +/* + * cmap_cache -- + * Update the cache. + */ +void +cmap_cache(struct cmap *cm) +{ + wint_t ch; + + for (ch = 0; ch < CM_CACHE_SIZE; ch++) + cm->cm_cache[ch] = cmap_lookup_hard(cm, ch); + + cm->cm_havecache = true; +} + +/* + * cmap_default -- + * Change the value that characters without mappings map to, and + * return the old value. The special character value CM_MAP_SELF + * means characters map to themselves. + */ +wint_t +cmap_default(struct cmap *cm, wint_t def) +{ + wint_t old; + + old = cm->cm_def; + cm->cm_def = def; + cm->cm_havecache = false; + return (old); +} + +static struct cmapnode * +cmap_splay(struct cmapnode *t, wint_t ch) +{ + struct cmapnode N, *l, *r, *y; + + /* + * Based on public domain code from Sleator. + */ + + assert(t != NULL); + + N.cmn_left = N.cmn_right = NULL; + l = r = &N; + for (;;) { + if (ch < t->cmn_from) { + if (t->cmn_left != NULL && + ch < t->cmn_left->cmn_from) { + y = t->cmn_left; + t->cmn_left = y->cmn_right; + y->cmn_right = t; + t = y; + } + if (t->cmn_left == NULL) + break; + r->cmn_left = t; + r = t; + t = t->cmn_left; + } else if (ch > t->cmn_from) { + if (t->cmn_right != NULL && + ch > t->cmn_right->cmn_from) { + y = t->cmn_right; + t->cmn_right = y->cmn_left; + y->cmn_left = t; + t = y; + } + if (t->cmn_right == NULL) + break; + l->cmn_right = t; + l = t; + t = t->cmn_right; + } else + break; + } + l->cmn_right = t->cmn_left; + r->cmn_left = t->cmn_right; + t->cmn_left = N.cmn_right; + t->cmn_right = N.cmn_left; + return (t); +} diff --git a/usr/src/cmd/tr/cmap.h b/usr/src/cmd/tr/cmap.h new file mode 100644 index 0000000000..98d344a1fd --- /dev/null +++ b/usr/src/cmd/tr/cmap.h @@ -0,0 +1,58 @@ +/* + * Copyright (c) 2004 Tim J. Robbins. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef CMAP_H +#define CMAP_H + +#include <limits.h> +#include <stdbool.h> +#include <wchar.h> + +struct cmapnode { + wint_t cmn_from; + wint_t cmn_to; + struct cmapnode *cmn_left; + struct cmapnode *cmn_right; +}; + +struct cmap { +#define CM_CACHE_SIZE 128 + wint_t cm_cache[CM_CACHE_SIZE]; + bool cm_havecache; + struct cmapnode *cm_root; +#define CM_DEF_SELF -2 + wint_t cm_def; + wint_t cm_min; + wint_t cm_max; +}; + +struct cmap *cmap_alloc(void); +bool cmap_add(struct cmap *, wint_t, wint_t); +wint_t cmap_lookup_hard(struct cmap *, wint_t); +void cmap_cache(struct cmap *); +wint_t cmap_default(struct cmap *, wint_t); + +#endif diff --git a/usr/src/cmd/tr/cset.c b/usr/src/cmd/tr/cset.c new file mode 100644 index 0000000000..203db18283 --- /dev/null +++ b/usr/src/cmd/tr/cset.c @@ -0,0 +1,287 @@ +/* + * Copyright (c) 2004 Tim J. Robbins. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ +/* + * "Set of characters" ADT implemented as a splay tree of extents, with + * a lookup table cache to simplify looking up the first bunch of + * characters (which are presumably more common than others). + */ + +#include <assert.h> +#include <stdbool.h> +#include <stdlib.h> +#include <wchar.h> +#include <wctype.h> +#include "cset.h" + +static struct csnode *cset_delete(struct csnode *, wchar_t); +static int cset_rangecmp(struct csnode *, wchar_t); +static struct csnode *cset_splay(struct csnode *, wchar_t); + +/* + * cset_alloc -- + * Allocate a set of characters. + */ +struct cset * +cset_alloc(void) +{ + struct cset *cs; + + if ((cs = malloc(sizeof (*cs))) == NULL) + return (NULL); + cs->cs_root = NULL; + cs->cs_classes = NULL; + cs->cs_havecache = false; + cs->cs_invert = false; + return (cs); +} + +/* + * cset_add -- + * Add a character to the set. + */ +bool +cset_add(struct cset *cs, wchar_t ch) +{ + struct csnode *csn, *ncsn; + wchar_t oval; + + cs->cs_havecache = false; + + /* + * Inserting into empty tree; new item becomes the root. + */ + if (cs->cs_root == NULL) { + csn = malloc(sizeof (*cs->cs_root)); + if (csn == NULL) + return (false); + csn->csn_left = csn->csn_right = NULL; + csn->csn_min = csn->csn_max = ch; + cs->cs_root = csn; + return (true); + } + + /* + * Splay to check whether the item already exists, and otherwise, + * where we should put it. + */ + csn = cs->cs_root = cset_splay(cs->cs_root, ch); + + /* + * Avoid adding duplicate nodes. + */ + if (cset_rangecmp(csn, ch) == 0) + return (true); + + /* + * Allocate a new node and make it the new root. + */ + ncsn = malloc(sizeof (*ncsn)); + if (ncsn == NULL) + return (false); + ncsn->csn_min = ncsn->csn_max = ch; + if (cset_rangecmp(csn, ch) < 0) { + ncsn->csn_left = csn->csn_left; + ncsn->csn_right = csn; + csn->csn_left = NULL; + } else { + ncsn->csn_right = csn->csn_right; + ncsn->csn_left = csn; + csn->csn_right = NULL; + } + cs->cs_root = ncsn; + + /* + * Coalesce with left and right neighbours if possible. + */ + if (ncsn->csn_left != NULL) { + ncsn->csn_left = cset_splay(ncsn->csn_left, ncsn->csn_min - 1); + if (ncsn->csn_left->csn_max == ncsn->csn_min - 1) { + oval = ncsn->csn_left->csn_min; + ncsn->csn_left = cset_delete(ncsn->csn_left, + ncsn->csn_left->csn_min); + ncsn->csn_min = oval; + } + } + if (ncsn->csn_right != NULL) { + ncsn->csn_right = cset_splay(ncsn->csn_right, + ncsn->csn_max + 1); + if (ncsn->csn_right->csn_min == ncsn->csn_max + 1) { + oval = ncsn->csn_right->csn_max; + ncsn->csn_right = cset_delete(ncsn->csn_right, + ncsn->csn_right->csn_min); + ncsn->csn_max = oval; + } + } + + return (true); +} + +/* + * cset_in_hard -- + * Determine whether a character is in the set without using + * the cache. + */ +bool +cset_in_hard(struct cset *cs, wchar_t ch) +{ + struct csclass *csc; + + for (csc = cs->cs_classes; csc != NULL; csc = csc->csc_next) + if ((csc->csc_invert ^ iswctype(ch, csc->csc_type)) != 0) + return (cs->cs_invert ^ true); + if (cs->cs_root != NULL) { + cs->cs_root = cset_splay(cs->cs_root, ch); + return ((cs->cs_invert ^ cset_rangecmp(cs->cs_root, ch)) == 0); + } + return (cs->cs_invert ^ false); +} + +/* + * cset_cache -- + * Update the cache. + */ +void +cset_cache(struct cset *cs) +{ + wchar_t i; + + for (i = 0; i < CS_CACHE_SIZE; i++) + cs->cs_cache[i] = cset_in_hard(cs, i); + + cs->cs_havecache = true; +} + +/* + * cset_invert -- + * Invert the character set. + */ +void +cset_invert(struct cset *cs) +{ + + cs->cs_invert ^= true; + cs->cs_havecache = false; +} + +/* + * cset_addclass -- + * Add a wctype()-style character class to the set, optionally + * inverting it. + */ +bool +cset_addclass(struct cset *cs, wctype_t type, bool invert) +{ + struct csclass *csc; + + csc = malloc(sizeof (*csc)); + if (csc == NULL) + return (false); + csc->csc_type = type; + csc->csc_invert = invert; + csc->csc_next = cs->cs_classes; + cs->cs_classes = csc; + cs->cs_havecache = false; + return (true); +} + +static int +cset_rangecmp(struct csnode *t, wchar_t ch) +{ + + if (ch < t->csn_min) + return (-1); + if (ch > t->csn_max) + return (1); + return (0); +} + +static struct csnode * +cset_splay(struct csnode *t, wchar_t ch) +{ + struct csnode N, *l, *r, *y; + + /* + * Based on public domain code from Sleator. + */ + + assert(t != NULL); + + N.csn_left = N.csn_right = NULL; + l = r = &N; + for (;;) { + if (cset_rangecmp(t, ch) < 0) { + if (t->csn_left != NULL && + cset_rangecmp(t->csn_left, ch) < 0) { + y = t->csn_left; + t->csn_left = y->csn_right; + y->csn_right = t; + t = y; + } + if (t->csn_left == NULL) + break; + r->csn_left = t; + r = t; + t = t->csn_left; + } else if (cset_rangecmp(t, ch) > 0) { + if (t->csn_right != NULL && + cset_rangecmp(t->csn_right, ch) > 0) { + y = t->csn_right; + t->csn_right = y->csn_left; + y->csn_left = t; + t = y; + } + if (t->csn_right == NULL) + break; + l->csn_right = t; + l = t; + t = t->csn_right; + } else + break; + } + l->csn_right = t->csn_left; + r->csn_left = t->csn_right; + t->csn_left = N.csn_right; + t->csn_right = N.csn_left; + return (t); +} + +static struct csnode * +cset_delete(struct csnode *t, wchar_t ch) +{ + struct csnode *x; + + assert(t != NULL); + t = cset_splay(t, ch); + assert(cset_rangecmp(t, ch) == 0); + if (t->csn_left == NULL) + x = t->csn_right; + else { + x = cset_splay(t->csn_left, ch); + x->csn_right = t->csn_right; + } + free(t); + return (x); +} diff --git a/usr/src/cmd/tr/cset.h b/usr/src/cmd/tr/cset.h new file mode 100644 index 0000000000..3cad575744 --- /dev/null +++ b/usr/src/cmd/tr/cset.h @@ -0,0 +1,63 @@ +/* + * Copyright (c) 2004 Tim J. Robbins. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef CSET_H +#define CSET_H + +#include <stdbool.h> +#include <wchar.h> +#include <wctype.h> + +struct csnode { + wchar_t csn_min; + wchar_t csn_max; + struct csnode *csn_left; + struct csnode *csn_right; +}; + +struct csclass { + wctype_t csc_type; + bool csc_invert; + struct csclass *csc_next; +}; + +struct cset { +#define CS_CACHE_SIZE 256 + bool cs_cache[CS_CACHE_SIZE]; + bool cs_havecache; + struct csclass *cs_classes; + struct csnode *cs_root; + bool cs_invert; +}; + +bool cset_addclass(struct cset *, wctype_t, bool); +struct cset *cset_alloc(void); +bool cset_add(struct cset *, wchar_t); +void cset_invert(struct cset *); +bool cset_in_hard(struct cset *, wchar_t); +void cset_cache(struct cset *); + +#endif /* CSET_H */ diff --git a/usr/src/cmd/tr/extern.h b/usr/src/cmd/tr/extern.h new file mode 100644 index 0000000000..43ec931dc5 --- /dev/null +++ b/usr/src/cmd/tr/extern.h @@ -0,0 +1,52 @@ +/* + * Copyright (c) 1991, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <limits.h> + +#define NCHARS_SB (UCHAR_MAX + 1) /* Number of single-byte characters. */ +#define OOBCH -1 /* Out of band character value. */ + +typedef struct { + enum { STRING1, STRING2 } which; + enum { EOS, INFINITE, NORMAL, RANGE, SEQUENCE, + CCLASS, CCLASS_UPPER, CCLASS_LOWER, SET } state; + int cnt; /* character count */ + wint_t lastch; /* last character */ + wctype_t cclass; /* character class from wctype() */ + wint_t equiv[NCHARS_SB]; /* equivalence set */ + wint_t *set; /* set of characters */ + char *str; /* user's string */ +} STR; + +wint_t next(STR *); +int charcoll(const void *, const void *); diff --git a/usr/src/cmd/tr/str.c b/usr/src/cmd/tr/str.c new file mode 100644 index 0000000000..54395b9ccf --- /dev/null +++ b/usr/src/cmd/tr/str.c @@ -0,0 +1,399 @@ +/* + * Copyright (c) 1991, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/types.h> + +#include <ctype.h> +#include <err.h> +#include <errno.h> +#include <stddef.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <strings.h> +#include <wchar.h> +#include <wctype.h> + +#include "extern.h" + +static int backslash(STR *, int *); +static int bracket(STR *); +static void genclass(STR *); +static void genequiv(STR *); +static int genrange(STR *, int); +static void genseq(STR *); + +wint_t +next(s) + STR *s; +{ + int is_octal; + wint_t ch; + wchar_t wch; + size_t clen; + + switch (s->state) { + case EOS: + return (0); + case INFINITE: + return (1); + case NORMAL: + switch (*s->str) { + case '\0': + s->state = EOS; + return (0); + case '\\': + s->lastch = backslash(s, &is_octal); + break; + case '[': + if (bracket(s)) + return (next(s)); + /* FALLTHROUGH */ + default: + clen = mbrtowc(&wch, s->str, MB_LEN_MAX, NULL); + if (clen == (size_t)-1 || clen == (size_t)-2 || + clen == 0) { + (void) fprintf(stderr, "Illegal seqeunce.\n"); + exit(1); + } + is_octal = 0; + s->lastch = wch; + s->str += clen; + break; + } + + /* We can start a range at any time. */ + if (s->str[0] == '-' && genrange(s, is_octal)) + return (next(s)); + return (1); + case RANGE: + if (s->cnt-- == 0) { + s->state = NORMAL; + return (next(s)); + } + ++s->lastch; + return (1); + case SEQUENCE: + if (s->cnt-- == 0) { + s->state = NORMAL; + return (next(s)); + } + return (1); + case CCLASS: + case CCLASS_UPPER: + case CCLASS_LOWER: + s->cnt++; + ch = nextwctype(s->lastch, s->cclass); + if (ch == -1) { + s->state = NORMAL; + return (next(s)); + } + s->lastch = ch; + return (1); + case SET: + if ((ch = s->set[s->cnt++]) == OOBCH) { + s->state = NORMAL; + return (next(s)); + } + s->lastch = ch; + return (1); + default: + return (0); + } + /* NOTREACHED */ +} + +static int +bracket(s) + STR *s; +{ + char *p; + + switch (s->str[1]) { + case ':': /* "[:class:]" */ + if ((p = strchr(s->str + 2, ']')) == NULL) + return (0); + if (*(p - 1) != ':' || p - s->str < 4) + goto repeat; + *(p - 1) = '\0'; + s->str += 2; + genclass(s); + s->str = p + 1; + return (1); + case '=': /* "[=equiv=]" */ + if ((p = strchr(s->str + 2, ']')) == NULL) + return (0); + if (*(p - 1) != '=' || p - s->str < 4) + goto repeat; + s->str += 2; + genequiv(s); + return (1); + default: /* "[\###*n]" or "[#*n]" */ + repeat: + if ((p = strpbrk(s->str + 2, "*]")) == NULL) + return (0); + if (p[0] != '*' || index(p, ']') == NULL) + return (0); + s->str += 1; + genseq(s); + return (1); + } + /* NOTREACHED */ +} + +static void +genclass(s) + STR *s; +{ + + if ((s->cclass = wctype(s->str)) == 0) + errx(1, "unknown class %s", s->str); + s->cnt = 0; + s->lastch = -1; /* incremented before check in next() */ + if (strcmp(s->str, "upper") == 0) + s->state = CCLASS_UPPER; + else if (strcmp(s->str, "lower") == 0) + s->state = CCLASS_LOWER; + else + s->state = CCLASS; +} + +static void +genequiv(s) + STR *s; +{ + int i, p, pri; + char src[2], dst[3]; + size_t clen; + wchar_t wc; + + if (*s->str == '\\') { + s->equiv[0] = backslash(s, NULL); + if (*s->str != '=') + errx(1, "misplaced equivalence equals sign"); + s->str += 2; + } else { + clen = mbrtowc(&wc, s->str, MB_LEN_MAX, NULL); + if (clen == (size_t)-1 || clen == (size_t)-2 || clen == 0) { + errno = EILSEQ; + err(1, NULL); + } + s->equiv[0] = wc; + if (s->str[clen] != '=') + errx(1, "misplaced equivalence equals sign"); + s->str += clen + 2; + } + + /* + * Calculate the set of all characters in the same equivalence class + * as the specified character (they will have the same primary + * collation weights). + * XXX Knows too much about how strxfrm() is implemented. Assumes + * it fills the string with primary collation weight bytes. Only one- + * to-one mappings are supported. + * XXX Equivalence classes not supported in multibyte locales. + */ + src[0] = (char)s->equiv[0]; + src[1] = '\0'; + if (MB_CUR_MAX == 1 && strxfrm(dst, src, sizeof (dst)) == 1) { + pri = (unsigned char)*dst; + for (p = 1, i = 1; i < NCHARS_SB; i++) { + *src = i; + if (strxfrm(dst, src, sizeof (dst)) == 1 && pri && + pri == (unsigned char)*dst) + s->equiv[p++] = i; + } + s->equiv[p] = OOBCH; + } + + s->cnt = 0; + s->state = SET; + s->set = s->equiv; +} + +static int +genrange(STR *s, int was_octal) +{ + int stopval, octal; + char *savestart; + int n, cnt, *p; + size_t clen; + wchar_t wc; + + octal = 0; + savestart = s->str; + if (*++s->str == '\\') + stopval = backslash(s, &octal); + else { + clen = mbrtowc(&wc, s->str, MB_LEN_MAX, NULL); + if (clen == (size_t)-1 || clen == (size_t)-2) { + errno = EILSEQ; + err(1, NULL); + } + stopval = wc; + s->str += clen; + } + /* + * XXX Characters are not ordered according to collating sequence in + * multibyte locales. + */ + if (octal || was_octal || MB_CUR_MAX > 1) { + if (stopval < s->lastch) { + s->str = savestart; + return (0); + } + s->cnt = stopval - s->lastch + 1; + s->state = RANGE; + --s->lastch; + return (1); + } + if (charcoll((const void *)&stopval, (const void *)&(s->lastch)) < 0) { + s->str = savestart; + return (0); + } + p = malloc((NCHARS_SB + 1) * sizeof (int)); + if ((s = (void *)p) == NULL) + err(1, "genrange() malloc"); + for (cnt = 0; cnt < NCHARS_SB; cnt++) + if (charcoll((const void *)&cnt, (const void *)&(s->lastch)) >= + 0 && + charcoll((const void *)&cnt, (const void *)&stopval) <= 0) + *p++ = cnt; + *p = OOBCH; + n = (int *)p - (int *)s->set; + + s->cnt = 0; + s->state = SET; + if (n > 1) + qsort(s->set, n, sizeof (*(s->set)), charcoll); + return (1); +} + +static void +genseq(s) + STR *s; +{ + char *ep; + wchar_t wc; + size_t clen; + + if (s->which == STRING1) + errx(1, "sequences only valid in string2"); + + if (*s->str == '\\') + s->lastch = backslash(s, NULL); + else { + clen = mbrtowc(&wc, s->str, MB_LEN_MAX, NULL); + if (clen == (size_t)-1 || clen == (size_t)-2) { + errno = EILSEQ; + err(1, NULL); + } + s->lastch = wc; + s->str += clen; + } + if (*s->str != '*') + errx(1, "misplaced sequence asterisk"); + + switch (*++s->str) { + case '\\': + s->cnt = backslash(s, NULL); + break; + case ']': + s->cnt = 0; + ++s->str; + break; + default: + if (isdigit((uchar_t)*s->str)) { + s->cnt = strtol(s->str, &ep, 0); + if (*ep == ']') { + s->str = ep + 1; + break; + } + } + errx(1, "illegal sequence count"); + /* NOTREACHED */ + } + + s->state = s->cnt ? SEQUENCE : INFINITE; +} + +/* + * Translate \??? into a character. Up to 3 octal digits, if no digits either + * an escape code or a literal character. + */ +static int +backslash(STR *s, int *is_octal) +{ + int ch, cnt, val; + + if (is_octal != NULL) + *is_octal = 0; + for (cnt = val = 0; ; ) { + ch = (uchar_t)*++s->str; + if (!isdigit(ch) || ch > '7') + break; + val = val * 8 + ch - '0'; + if (++cnt == 3) { + ++s->str; + break; + } + } + if (cnt) { + if (is_octal != NULL) + *is_octal = 1; + return (val); + } + if (ch != '\0') + ++s->str; + switch (ch) { + case 'a': /* escape characters */ + return ('\7'); + case 'b': + return ('\b'); + case 'f': + return ('\f'); + case 'n': + return ('\n'); + case 'r': + return ('\r'); + case 't': + return ('\t'); + case 'v': + return ('\13'); + case '\0': /* \" -> \ */ + s->state = EOS; + return ('\\'); + default: /* \x" -> x */ + return (ch); + } +} diff --git a/usr/src/cmd/tr/tr.c b/usr/src/cmd/tr/tr.c new file mode 100644 index 0000000000..4627e590a6 --- /dev/null +++ b/usr/src/cmd/tr/tr.c @@ -0,0 +1,386 @@ +/* + * Copyright (c) 1988, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/types.h> + +#include <ctype.h> +#include <err.h> +#include <limits.h> +#include <locale.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <wchar.h> +#include <wctype.h> + +#include "cmap.h" +#include "cset.h" +#include "extern.h" + +STR s1 = { STRING1, NORMAL, 0, OOBCH, 0, { 0, OOBCH }, NULL, NULL }; +STR s2 = { STRING2, NORMAL, 0, OOBCH, 0, { 0, OOBCH }, NULL, NULL }; + +static struct cset *setup(char *, STR *, int, int); +static void usage(void); + +static wint_t +cmap_lookup(struct cmap *cm, wint_t from) +{ + + if (from < CM_CACHE_SIZE && cm->cm_havecache) + return (cm->cm_cache[from]); + return (cmap_lookup_hard(cm, from)); +} + +static wint_t +cmap_max(struct cmap *cm) +{ + return (cm->cm_max); +} + +static inline bool +cset_in(struct cset *cs, wchar_t ch) +{ + + if (ch < CS_CACHE_SIZE && cs->cs_havecache) + return (cs->cs_cache[ch]); + return (cset_in_hard(cs, ch)); +} + +int +main(int argc, char **argv) +{ + static int carray[NCHARS_SB]; + struct cmap *map; + struct cset *delete, *squeeze; + int n, *p; + int Cflag, cflag, dflag, sflag, isstring2; + wint_t ch, cnt, lastch; + + (void) setlocale(LC_ALL, ""); + + Cflag = cflag = dflag = sflag = 0; + while ((ch = getopt(argc, argv, "Ccdsu")) != -1) + switch ((char)ch) { + case 'C': + Cflag = 1; + cflag = 0; + break; + case 'c': + cflag = 1; + Cflag = 0; + break; + case 'd': + dflag = 1; + break; + case 's': + sflag = 1; + break; + case 'u': + setbuf(stdout, (char *)NULL); + break; + case '?': + default: + usage(); + } + argc -= optind; + argv += optind; + + switch (argc) { + case 0: + default: + usage(); + /* NOTREACHED */ + case 1: + isstring2 = 0; + break; + case 2: + isstring2 = 1; + break; + } + + /* + * tr -ds [-Cc] string1 string2 + * Delete all characters (or complemented characters) in string1. + * Squeeze all characters in string2. + */ + if (dflag && sflag) { + if (!isstring2) + usage(); + + delete = setup(argv[0], &s1, cflag, Cflag); + squeeze = setup(argv[1], &s2, 0, 0); + + for (lastch = OOBCH; (ch = getwchar()) != WEOF; ) + if (!cset_in(delete, ch) && + (lastch != ch || !cset_in(squeeze, ch))) { + lastch = ch; + (void) putwchar(ch); + } + if (ferror(stdin)) + err(1, NULL); + exit(0); + } + + /* + * tr -d [-Cc] string1 + * Delete all characters (or complemented characters) in string1. + */ + if (dflag) { + if (isstring2) + usage(); + + delete = setup(argv[0], &s1, cflag, Cflag); + + while ((ch = getwchar()) != WEOF) + if (!cset_in(delete, ch)) + (void) putwchar(ch); + if (ferror(stdin)) + err(1, NULL); + exit(0); + } + + /* + * tr -s [-Cc] string1 + * Squeeze all characters (or complemented characters) in string1. + */ + if (sflag && !isstring2) { + squeeze = setup(argv[0], &s1, cflag, Cflag); + + for (lastch = OOBCH; (ch = getwchar()) != WEOF; ) + if (lastch != ch || !cset_in(squeeze, ch)) { + lastch = ch; + (void) putwchar(ch); + } + if (ferror(stdin)) + err(1, NULL); + exit(0); + } + + /* + * tr [-Ccs] string1 string2 + * Replace all characters (or complemented characters) in string1 with + * the character in the same position in string2. If the -s option is + * specified, squeeze all the characters in string2. + */ + if (!isstring2) + usage(); + + map = cmap_alloc(); + if (map == NULL) + err(1, NULL); + squeeze = cset_alloc(); + if (squeeze == NULL) + err(1, NULL); + + s1.str = argv[0]; + + if (Cflag || cflag) { + (void) cmap_default(map, OOBCH); + if ((s2.str = strdup(argv[1])) == NULL) + errx(1, "strdup(argv[1])"); + } else + s2.str = argv[1]; + + if (!next(&s2)) + errx(1, "empty string2"); + + /* + * For -s result will contain only those characters defined + * as the second characters in each of the toupper or tolower + * pairs. + */ + + /* If string2 runs out of characters, use the last one specified. */ + while (next(&s1)) { + again: + if (s1.state == CCLASS_LOWER && + s2.state == CCLASS_UPPER && + s1.cnt == 1 && s2.cnt == 1) { + do { + ch = towupper(s1.lastch); + (void) cmap_add(map, s1.lastch, ch); + if (sflag && iswupper(ch)) + (void) cset_add(squeeze, ch); + if (!next(&s1)) + goto endloop; + } while (s1.state == CCLASS_LOWER && s1.cnt > 1); + /* skip upper set */ + do { + if (!next(&s2)) + break; + } while (s2.state == CCLASS_UPPER && s2.cnt > 1); + goto again; + } else if (s1.state == CCLASS_UPPER && + s2.state == CCLASS_LOWER && + s1.cnt == 1 && s2.cnt == 1) { + do { + ch = towlower(s1.lastch); + (void) cmap_add(map, s1.lastch, ch); + if (sflag && iswlower(ch)) + (void) cset_add(squeeze, ch); + if (!next(&s1)) + goto endloop; + } while (s1.state == CCLASS_UPPER && s1.cnt > 1); + /* skip lower set */ + do { + if (!next(&s2)) + break; + } while (s2.state == CCLASS_LOWER && s2.cnt > 1); + goto again; + } else { + (void) cmap_add(map, s1.lastch, s2.lastch); + if (sflag) + (void) cset_add(squeeze, s2.lastch); + } + (void) next(&s2); + } +endloop: + if (cflag || (Cflag && MB_CUR_MAX > 1)) { + /* + * This is somewhat tricky: since the character set is + * potentially huge, we need to avoid allocating a map + * entry for every character. Our strategy is to set the + * default mapping to the last character of string #2 + * (= the one that gets automatically repeated), then to + * add back identity mappings for characters that should + * remain unchanged. We don't waste space on identity mappings + * for non-characters with the -C option; those are simulated + * in the I/O loop. + */ + s2.str = argv[1]; + s2.state = NORMAL; + for (cnt = 0; cnt < WCHAR_MAX; cnt++) { + if (Cflag && !iswrune(cnt)) + continue; + if (cmap_lookup(map, cnt) == OOBCH) { + if (next(&s2)) + (void) cmap_add(map, cnt, s2.lastch); + if (sflag) + (void) cset_add(squeeze, s2.lastch); + } else + (void) cmap_add(map, cnt, cnt); + if ((s2.state == EOS || s2.state == INFINITE) && + cnt >= cmap_max(map)) + break; + } + (void) cmap_default(map, s2.lastch); + } else if (Cflag) { + for (p = carray, cnt = 0; cnt < NCHARS_SB; cnt++) { + if (cmap_lookup(map, cnt) == OOBCH && iswrune(cnt)) + *p++ = cnt; + else + (void) cmap_add(map, cnt, cnt); + } + n = p - carray; + if (Cflag && n > 1) + (void) qsort(carray, n, sizeof (*carray), charcoll); + + s2.str = argv[1]; + s2.state = NORMAL; + for (cnt = 0; cnt < n; cnt++) { + (void) next(&s2); + (void) cmap_add(map, carray[cnt], s2.lastch); + /* + * Chars taken from s2 can be different this time + * due to lack of complex upper/lower processing, + * so fill string2 again to not miss some. + */ + if (sflag) + (void) cset_add(squeeze, s2.lastch); + } + } + + cset_cache(squeeze); + cmap_cache(map); + + if (sflag) + for (lastch = OOBCH; (ch = getwchar()) != WEOF; ) { + if (!Cflag || iswrune(ch)) + ch = cmap_lookup(map, ch); + if (lastch != ch || !cset_in(squeeze, ch)) { + lastch = ch; + (void) putwchar(ch); + } + } + else + while ((ch = getwchar()) != WEOF) { + if (!Cflag || iswrune(ch)) + ch = cmap_lookup(map, ch); + (void) putwchar(ch); + } + if (ferror(stdin)) + err(1, NULL); + exit(0); +} + +static struct cset * +setup(char *arg, STR *str, int cflag, int Cflag) +{ + struct cset *cs; + + cs = cset_alloc(); + if (cs == NULL) + err(1, NULL); + str->str = arg; + while (next(str)) + (void) cset_add(cs, str->lastch); + if (Cflag) + (void) cset_addclass(cs, wctype("rune"), true); + if (cflag || Cflag) + cset_invert(cs); + cset_cache(cs); + return (cs); +} + +int +charcoll(const void *a, const void *b) +{ + static char sa[2], sb[2]; + + sa[0] = *(const int *)a; + sb[0] = *(const int *)b; + return (strcoll(sa, sb)); +} + +static void +usage(void) +{ + (void) fprintf(stderr, "%s\n%s\n%s\n%s\n", + "usage: tr [-Ccsu] string1 string2", + " tr [-Ccu] -d string1", + " tr [-Ccu] -s string1", + " tr [-Ccu] -ds string1 string2"); + exit(1); +} diff --git a/usr/src/head/wctype.h b/usr/src/head/wctype.h index 532f61aaf9..ae69a8e1f8 100644 --- a/usr/src/head/wctype.h +++ b/usr/src/head/wctype.h @@ -95,6 +95,14 @@ struct _wctype { wchar_t *code; /* conversion code */ }; + +#ifdef _ILLUMOS_PRIVATE +extern int __iswrune(wint_t); +extern wint_t __nextwctype(wint_t, wctype_t); +#define iswrune(c) __iswrune(c) +#define nextwctype(c, t) __nextwctype(c, t) +#endif + /* character classification functions */ /* iswascii is still a macro */ diff --git a/usr/src/lib/libc/amd64/Makefile b/usr/src/lib/libc/amd64/Makefile index 3cec41db51..f714af9118 100644 --- a/usr/src/lib/libc/amd64/Makefile +++ b/usr/src/lib/libc/amd64/Makefile @@ -733,6 +733,7 @@ PORTLOCALE= \ mbstowcs.o \ mbtowc.o \ mskanji.o \ + nextwctype.o \ nl_langinfo.o \ none.o \ regcomp.o \ diff --git a/usr/src/lib/libc/i386/Makefile.com b/usr/src/lib/libc/i386/Makefile.com index 6cb439b99c..5e4250afe6 100644 --- a/usr/src/lib/libc/i386/Makefile.com +++ b/usr/src/lib/libc/i386/Makefile.com @@ -775,6 +775,7 @@ PORTLOCALE= \ mbstowcs.o \ mbtowc.o \ mskanji.o \ + nextwctype.o \ nl_langinfo.o \ none.o \ regcomp.o \ diff --git a/usr/src/lib/libc/port/locale/iswctype.c b/usr/src/lib/libc/port/locale/iswctype.c index 03d90aa86e..e6b1140562 100644 --- a/usr/src/lib/libc/port/locale/iswctype.c +++ b/usr/src/lib/libc/port/locale/iswctype.c @@ -214,3 +214,24 @@ isnumber(wint_t wc) { return (__istype(wc, _CTYPE_N)); } + +/* + * FreeBSD has iswrune() for use by external programs, and this is used by + * the "tr" program. As that program is part of our consolidation, we + * provide an _ILLUMOS_PRIVATE version of this function that we can use. + * + * No programs that are not part of the illumos stack itself should use + * this function -- programs that do reference will not be portable to + * other versions of SunOS or Solaris. + */ +int +__iswrune(wint_t wc) +{ + /* + * Note, FreeBSD ignored the low order byte, as they encode their + * ctype values differently. We can't do that (ctype is baked into + * applications), but instead can just check if *any* bit is set in + * the ctype. Any bit being set indicates its a valid rune. + */ + return (__istype(wc, 0xffffffffU)); +} diff --git a/usr/src/lib/libc/port/locale/nextwctype.c b/usr/src/lib/libc/port/locale/nextwctype.c new file mode 100644 index 0000000000..54a9d2a07b --- /dev/null +++ b/usr/src/lib/libc/port/locale/nextwctype.c @@ -0,0 +1,88 @@ +/* + * Copyright (c) 2004 Tim J. Robbins. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "lint.h" +#include "runetype.h" +#include <wchar.h> +#include <wctype.h> + +wint_t +__nextwctype(wint_t wc, wctype_t wct) +{ + size_t lim; + _RuneRange *rr = &_CurrentRuneLocale->__runetype_ext; + _RuneEntry *base, *re; + int noinc; + + noinc = 0; + if (wc < _CACHED_RUNES) { + wc++; + while (wc < _CACHED_RUNES) { + if (_CurrentRuneLocale->__runetype[wc] & wct) + return (wc); + wc++; + } + wc--; + } + if (rr->__ranges != NULL && wc < rr->__ranges[0].__min) { + wc = rr->__ranges[0].__min; + noinc = 1; + } + + /* Binary search -- see bsearch.c for explanation. */ + base = rr->__ranges; + for (lim = rr->__nranges; lim != 0; lim >>= 1) { + re = base + (lim >> 1); + if (re->__min <= wc && wc <= re->__max) + goto found; + else if (wc > re->__max) { + base = re + 1; + lim--; + } + } + return (-1); +found: + if (!noinc) + wc++; + if (re->__min <= wc && wc <= re->__max) { + if (re->__types != NULL) { + for (; wc <= re->__max; wc++) + if (re->__types[wc - re->__min] & wct) + return (wc); + } else if (re->__map & wct) + return (wc); + } + while (++re < rr->__ranges + rr->__nranges) { + wc = re->__min; + if (re->__types != NULL) { + for (; wc <= re->__max; wc++) + if (re->__types[wc - re->__min] & wct) + return (wc); + } else if (re->__map & wct) + return (wc); + } + return (-1); +} diff --git a/usr/src/lib/libc/port/mapfile-vers b/usr/src/lib/libc/port/mapfile-vers index b0f99cab15..2d9136f6da 100644 --- a/usr/src/lib/libc/port/mapfile-vers +++ b/usr/src/lib/libc/port/mapfile-vers @@ -2496,6 +2496,7 @@ SYMBOL_VERSION SUNWprivate_1.1 { __inf_written; __i_size; _isnanf { TYPE = FUNCTION; FILTER = libm.so.2 }; + __iswrune; __libc_threaded; _lib_version { FLAGS = NODIRECT }; _logb { TYPE = FUNCTION; FILTER = libm.so.2 }; @@ -2510,6 +2511,7 @@ SYMBOL_VERSION SUNWprivate_1.1 { _modff { TYPE = FUNCTION; FILTER = libm.so.2 }; __nan_read; __nan_written; + __nextwctype; __nis_debug_bind; __nis_debug_calls; __nis_debug_file; diff --git a/usr/src/lib/libc/sparc/Makefile.com b/usr/src/lib/libc/sparc/Makefile.com index 07c5f2e8e6..55e6678e0a 100644 --- a/usr/src/lib/libc/sparc/Makefile.com +++ b/usr/src/lib/libc/sparc/Makefile.com @@ -803,6 +803,8 @@ PORTLOCALE= \ mbstowcs.o \ mbtowc.o \ mskanji.o \ + nextwctype.o \ + nl_langinfo.o \ none.o \ regcomp.o \ regfree.o \ diff --git a/usr/src/lib/libc/sparcv9/Makefile.com b/usr/src/lib/libc/sparcv9/Makefile.com index f7674e613c..4eb9eb903b 100644 --- a/usr/src/lib/libc/sparcv9/Makefile.com +++ b/usr/src/lib/libc/sparcv9/Makefile.com @@ -754,6 +754,8 @@ PORTLOCALE= \ mbstowcs.o \ mbtowc.o \ mskanji.o \ + nextwctype.o \ + nl_langinfo.o \ none.o \ regcomp.o \ regfree.o \ diff --git a/usr/src/pkg/manifests/SUNWcs.mf b/usr/src/pkg/manifests/SUNWcs.mf index 3f864de738..3040fdf633 100644 --- a/usr/src/pkg/manifests/SUNWcs.mf +++ b/usr/src/pkg/manifests/SUNWcs.mf @@ -2565,6 +2565,7 @@ license cmd/script/THIRDPARTYLICENSE license=cmd/script/THIRDPARTYLICENSE license cmd/stat/vmstat/THIRDPARTYLICENSE \ license=cmd/stat/vmstat/THIRDPARTYLICENSE license cmd/tip/THIRDPARTYLICENSE license=cmd/tip/THIRDPARTYLICENSE +license cmd/tr/THIRDPARTYLICENSE license=cmd/tr/THIRDPARTYLICENSE license cmd/vi/THIRDPARTYLICENSE license=cmd/vi/THIRDPARTYLICENSE license cmd/which/THIRDPARTYLICENSE license=cmd/which/THIRDPARTYLICENSE license cmd/xstr/THIRDPARTYLICENSE license=cmd/xstr/THIRDPARTYLICENSE diff --git a/usr/src/pkg/manifests/system-xopen-xcu4.mf b/usr/src/pkg/manifests/system-xopen-xcu4.mf index 161afe8909..1bf6b0ec02 100644 --- a/usr/src/pkg/manifests/system-xopen-xcu4.mf +++ b/usr/src/pkg/manifests/system-xopen-xcu4.mf @@ -71,7 +71,6 @@ file path=usr/xpg4/bin/sed mode=0555 file path=usr/xpg4/bin/sort mode=0555 file path=usr/xpg4/bin/stty mode=0555 file path=usr/xpg4/bin/tail mode=0555 -file path=usr/xpg4/bin/tr mode=0555 file path=usr/xpg4/bin/who mode=0555 hardlink path=usr/xpg4/bin/bg target=../../../usr/xpg4/bin/alias hardlink path=usr/xpg4/bin/cd target=../../../usr/xpg4/bin/alias @@ -108,3 +107,4 @@ license lic_CDDL license=lic_CDDL license lic_OSBL license=lic_OSBL license lic_OSBL_preamble license=lic_OSBL_preamble link path=usr/xpg4/bin/ipcs target=../../bin/ipcs +link path=usr/xpg4/bin/tr target=../../bin/tr diff --git a/usr/src/pkg/manifests/system-xopen-xcu6.mf b/usr/src/pkg/manifests/system-xopen-xcu6.mf index ad4eb51ed7..6e704636c6 100644 --- a/usr/src/pkg/manifests/system-xopen-xcu6.mf +++ b/usr/src/pkg/manifests/system-xopen-xcu6.mf @@ -41,7 +41,6 @@ file path=usr/xpg6/bin/edit mode=0555 file path=usr/xpg6/bin/expr mode=0555 file path=usr/xpg6/bin/getconf mode=0555 file path=usr/xpg6/bin/ls mode=0555 -file path=usr/xpg6/bin/tr mode=0555 hardlink path=usr/xpg6/bin/ex target=../../../usr/xpg6/bin/edit hardlink path=usr/xpg6/bin/vedit target=../../../usr/xpg6/bin/edit hardlink path=usr/xpg6/bin/vi target=../../../usr/xpg6/bin/edit @@ -55,3 +54,4 @@ license cr_Sun license=cr_Sun license lic_CDDL license=lic_CDDL link path=usr/xpg6/bin/stty target=../../bin/stty link path=usr/xpg6/bin/xargs target=../../bin/xargs +link path=usr/xpg6/bin/tr target=../../bin/tr diff --git a/usr/src/tools/opensolaris/license-list b/usr/src/tools/opensolaris/license-list index e666fec819..618eca7113 100644 --- a/usr/src/tools/opensolaris/license-list +++ b/usr/src/tools/opensolaris/license-list @@ -63,6 +63,7 @@ usr/src/cmd/tbl/THIRDPARTYLICENSE usr/src/cmd/tcpd/THIRDPARTYLICENSE usr/src/cmd/terminfo/THIRDPARTYLICENSE usr/src/cmd/tip/THIRDPARTYLICENSE +usr/src/cmd/tr/THIRDPARTYLICENSE usr/src/cmd/ul/THIRDPARTYLICENSE usr/src/cmd/units/THIRDPARTYLICENSE usr/src/cmd/vgrind/THIRDPARTYLICENSE |