diff options
| author | Yuri Pankov <yuri.pankov@nexenta.com> | 2017-06-07 00:36:24 +0300 |
|---|---|---|
| committer | Hans Rosenfeld <hans.rosenfeld@joyent.com> | 2017-06-12 12:53:59 +0200 |
| commit | 490fea6b98d609ce9ae1306209c80bb255b84c95 (patch) | |
| tree | 01803ff49e90602b84920ab1e3a228afda34d0b9 | |
| parent | 7641c5eab2e15e87eafc318a09f2bd5c6f968607 (diff) | |
| download | illumos-joyent-490fea6b98d609ce9ae1306209c80bb255b84c95.tar.gz | |
8355 need libc regex tests
Reviewed by: Robert Mustacchi <rm@joyent.com>
Approved by: Hans Rosenfeld <rosenfeld@grumpf.hope-2000.org>
33 files changed, 1385 insertions, 42 deletions
diff --git a/usr/src/head/regex.h b/usr/src/head/regex.h index 8131d756cc..33e4bd7de1 100644 --- a/usr/src/head/regex.h +++ b/usr/src/head/regex.h @@ -19,6 +19,7 @@ * * CDDL HEADER END */ + /* * Copyright 2014 Garrett D'Amore <garrett@damore.org> * @@ -30,9 +31,9 @@ * Copyright 1989, 1994 by Mortice Kern Systems Inc. * All rights reserved. */ + /* - * Copyright 2010 Nexenta Systems, Inc. All rights reserved. - * Use is subject to license terms. + * Copyright 2017 Nexenta Systems, Inc. */ #ifndef _REGEX_H @@ -73,32 +74,31 @@ typedef long wchar_t; typedef ssize_t regoff_t; /* regcomp flags */ -#define REG_BASIC 0x00 -#define REG_EXTENDED 0x01 /* Use Extended Regular Expressions */ -#define REG_NEWLINE 0x08 /* Treat \n as regular character */ -#define REG_ICASE 0x04 /* Ignore case in match */ -#define REG_NOSUB 0x02 /* Don't set subexpression */ -#define REG_EGREP 0x1000 /* running as egrep(1) */ - -/* non-standard flags - note that most of these are not supported */ -#define REG_DELIM 0x10 /* string[0] is delimiter */ -#define REG_DEBUG 0x20 /* Debug recomp and regexec */ -#define REG_ANCHOR 0x40 /* Implicit ^ and $ */ -#define REG_WORDS 0x80 /* \< and \> match word boundries */ - -/* FreeBSD additions */ -#define REG_DUMP 0x2000 -#define REG_PEND 0x4000 -#define REG_NOSPEC 0x8000 -#define REG_STARTEND 0x10000 +#define REG_BASIC 0x00000 +#define REG_EXTENDED 0x00001 /* Use Extended Regular Expressions */ +#define REG_NOSUB 0x00002 /* Don't set subexpression */ +#define REG_ICASE 0x00004 /* Ignore case in match */ +#define REG_NEWLINE 0x00008 /* Treat \n as regular character */ +#define REG_DELIM 0x00010 /* legacy, no effect */ +#define REG_DEBUG 0x00020 /* legacy, no effect */ +#define REG_ANCHOR 0x00040 /* legacy, no effect */ +#define REG_WORDS 0x00080 /* legacy, no effect */ +#define REG_EGREP 0x01000 /* legacy, no effect */ +#define REG_DUMP 0x02000 /* internal */ +#define REG_PEND 0x04000 /* NULs are ordinary characters */ +#define REG_NOSPEC 0x08000 /* no special characters */ /* internal flags */ -#define REG_MUST 0x100 /* check for regmust substring */ +#define REG_MUST 0x00100 /* legacy, no effect */ /* regexec flags */ -#define REG_NOTBOL 0x200 /* string is not BOL */ -#define REG_NOTEOL 0x400 /* string has no EOL */ -#define REG_NOOPT 0x800 /* don't do regmust optimization */ +#define REG_NOTBOL 0x00200 /* string is not BOL */ +#define REG_NOTEOL 0x00400 /* string has no EOL */ +#define REG_NOOPT 0x00800 /* legacy, no effect */ +#define REG_STARTEND 0x10000 /* match whole pattern */ +#define REG_TRACE 0x20000 /* tracing of execution */ +#define REG_LARGE 0x40000 /* force large representation */ +#define REG_BACKR 0x80000 /* force use of backref code */ /* regcomp and regexec return codes */ #define REG_OK 0 /* success (non-standard) */ @@ -118,12 +118,15 @@ typedef ssize_t regoff_t; #define REG_BADPAT 14 /* syntax error */ #define REG_BADBR 15 /* \{ \} contents bad */ #define REG_EFATAL 16 /* internal error, not POSIX.2 */ -#define REG_ECHAR 17 /* invalid mulitbyte character */ +#define REG_ECHAR 17 /* invalid multibyte character */ #define REG_STACK 18 /* backtrack stack overflow */ #define REG_ENOSYS 19 /* function not supported (XPG4) */ #define REG__LAST 20 /* first unused code */ #define REG_EBOL 21 /* ^ anchor and not BOL */ #define REG_EEOL 22 /* $ anchor and not EOL */ +#define REG_ATOI 255 /* convert name to number (!) */ +#define REG_ITOA 256 /* convert number to name (!) */ + #define _REG_BACKREF_MAX 9 /* Max # of subexp. backreference */ typedef struct { /* regcomp() data saved for regexec() */ @@ -151,15 +154,14 @@ typedef struct { /* - * Additional API and structs to support regular expression manipulations - * on wide characters. + * IEEE Std 1003.2 ("POSIX.2") regular expressions API. */ extern int regcomp(regex_t *_RESTRICT_KYWD, const char *_RESTRICT_KYWD, int); extern int regexec(const regex_t *_RESTRICT_KYWD, const char *_RESTRICT_KYWD, - size_t, regmatch_t *_RESTRICT_KYWD, int); -extern size_t regerror(int, const regex_t *_RESTRICT_KYWD, - char *_RESTRICT_KYWD, size_t); + size_t, regmatch_t *_RESTRICT_KYWD, int); +extern size_t regerror(int, const regex_t *_RESTRICT_KYWD, char *_RESTRICT_KYWD, + size_t); extern void regfree(regex_t *); #ifdef __cplusplus diff --git a/usr/src/lib/libc/port/regex/engine.c b/usr/src/lib/libc/port/regex/engine.c index 7481545864..68cf24a5da 100644 --- a/usr/src/lib/libc/port/regex/engine.c +++ b/usr/src/lib/libc/port/regex/engine.c @@ -289,8 +289,7 @@ matcher(struct re_guts *g, const char *string, size_t nmatch, } for (i = 1; i <= m->g->nsub; i++) m->pmatch[i].rm_so = m->pmatch[i].rm_eo = -1; - /* NB: FreeBSD has REG_BACKR, we do not */ - if (!g->backrefs /* && !(m->eflags®_BACKR) */) { + if (!g->backrefs && !(m->eflags®_BACKR)) { NOTE("dissecting"); dp = dissect(m, m->coldp, endp, gf, gl); } else { diff --git a/usr/src/lib/libc/port/regex/regerror.c b/usr/src/lib/libc/port/regex/regerror.c index 3915c4429b..92179feea6 100644 --- a/usr/src/lib/libc/port/regex/regerror.c +++ b/usr/src/lib/libc/port/regex/regerror.c @@ -44,6 +44,8 @@ #include "utils.h" #include "../gen/_libc_gettext.h" +static const char *regatoi(const regex_t *preg, char *localbuf); + #define RERR(x, msg) { x, #x, msg } static struct rerr { @@ -90,19 +92,35 @@ regerror(int errcode, const regex_t *_RESTRICT_KYWD preg, { struct rerr *r; size_t len; + int target = errcode &~ REG_ITOA; const char *s; + char convbuf[50]; - for (r = rerrs; r->code != 0; r++) - if (r->code == errcode) - break; + if (errcode == REG_ATOI) { + s = regatoi(preg, convbuf); + } else { + for (r = rerrs; r->code != 0; r++) { + if (r->code == target) + break; + } - s = _libc_gettext(r->explain); + if (errcode®_ITOA) { + if (r->code != 0) + (void) strcpy(convbuf, r->name); + else + (void) sprintf(convbuf, "REG_0x%x", target); + assert(strlen(convbuf) < sizeof (convbuf)); + s = convbuf; + } else { + s = _libc_gettext(r->explain); + } + } len = strlen(s) + 1; if (errbuf_size > 0) { - if (errbuf_size > len) + if (errbuf_size > len) { (void) strcpy(errbuf, s); - else { + } else { (void) strncpy(errbuf, s, errbuf_size-1); errbuf[errbuf_size-1] = '\0'; } @@ -110,3 +128,22 @@ regerror(int errcode, const regex_t *_RESTRICT_KYWD preg, return (len); } + +/* + * regatoi - internal routine to implement REG_ATOI + */ +static const char * +regatoi(const regex_t *preg, char *localbuf) +{ + struct rerr *r; + + for (r = rerrs; r->code != 0; r++) { + if (strcmp(r->name, preg->re_endp) == 0) + break; + } + if (r->code == 0) + return ("0"); + + (void) sprintf(localbuf, "%d", r->code); + return (localbuf); +} diff --git a/usr/src/lib/libc/port/regex/regexec.c b/usr/src/lib/libc/port/regex/regexec.c index c1bc7b436a..188e9f08ed 100644 --- a/usr/src/lib/libc/port/regex/regexec.c +++ b/usr/src/lib/libc/port/regex/regexec.c @@ -218,11 +218,7 @@ regexec(const regex_t *_RESTRICT_KYWD preg, const char *_RESTRICT_KYWD string, if (MB_CUR_MAX > 1) return (mmatcher(g, string, nmatch, pmatch, eflags)); -#ifdef REG_LARGE else if (g->nstates <= CHAR_BIT*sizeof (states1) && !(eflags®_LARGE)) -#else - else if (g->nstates <= CHAR_BIT*sizeof (states1)) -#endif return (smatcher(g, string, nmatch, pmatch, eflags)); else return (lmatcher(g, string, nmatch, pmatch, eflags)); diff --git a/usr/src/pkg/manifests/system-test-libctest.mf b/usr/src/pkg/manifests/system-test-libctest.mf index 6cc5b2ebcb..2b2f3329f6 100644 --- a/usr/src/pkg/manifests/system-test-libctest.mf +++ b/usr/src/pkg/manifests/system-test-libctest.mf @@ -28,6 +28,8 @@ dir path=opt/libc-tests/cfg/symbols dir path=opt/libc-tests/runfiles dir path=opt/libc-tests/tests dir path=opt/libc-tests/tests/random +dir path=opt/libc-tests/tests/regex +dir path=opt/libc-tests/tests/regex/data dir path=opt/libc-tests/tests/select dir path=opt/libc-tests/tests/symbols dir path=usr group=sys @@ -124,6 +126,27 @@ file path=opt/libc-tests/tests/random/inz_region mode=0555 file path=opt/libc-tests/tests/random/inz_split mode=0555 file path=opt/libc-tests/tests/random/inz_split_vpp mode=0555 file path=opt/libc-tests/tests/random/inz_vpp mode=0555 +file path=opt/libc-tests/tests/regex/data/anchor.in mode=0444 +file path=opt/libc-tests/tests/regex/data/backref.in mode=0444 +file path=opt/libc-tests/tests/regex/data/basic.in mode=0444 +file path=opt/libc-tests/tests/regex/data/bracket.in mode=0444 +file path=opt/libc-tests/tests/regex/data/c_comments.in mode=0444 +file path=opt/libc-tests/tests/regex/data/complex.in mode=0444 +file path=opt/libc-tests/tests/regex/data/error.in mode=0444 +file path=opt/libc-tests/tests/regex/data/meta.in mode=0444 +file path=opt/libc-tests/tests/regex/data/nospec.in mode=0444 +file path=opt/libc-tests/tests/regex/data/paren.in mode=0444 +file path=opt/libc-tests/tests/regex/data/regress.in mode=0444 +file path=opt/libc-tests/tests/regex/data/repet_bounded.in mode=0444 +file path=opt/libc-tests/tests/regex/data/repet_multi.in mode=0444 +file path=opt/libc-tests/tests/regex/data/repet_ordinary.in mode=0444 +file path=opt/libc-tests/tests/regex/data/startend.in mode=0444 +file path=opt/libc-tests/tests/regex/data/subexp.in mode=0444 +file path=opt/libc-tests/tests/regex/data/subtle.in mode=0444 +file path=opt/libc-tests/tests/regex/data/word_bound.in mode=0444 +file path=opt/libc-tests/tests/regex/data/zero.in mode=0444 +file path=opt/libc-tests/tests/regex/h_regex mode=0555 +file path=opt/libc-tests/tests/regex/t_regex mode=0555 file path=opt/libc-tests/tests/select/select.sh mode=0555 file path=opt/libc-tests/tests/select/select_test mode=0555 file path=opt/libc-tests/tests/strcoll-strxfrm-6907.32 mode=0555 @@ -180,6 +203,8 @@ hardlink path=opt/libc-tests/tests/symbols/unistd_h target=setup hardlink path=opt/libc-tests/tests/symbols/wchar_h target=setup hardlink path=opt/libc-tests/tests/symbols/wctype_h target=setup license lic_CDDL license=lic_CDDL +license usr/src/test/libc-tests/tests/regex/THIRDPARTYLICENSE \ + license=usr/src/test/libc-tests/tests/regex/THIRDPARTYLICENSE depend fmri=locale/de type=require depend fmri=locale/en type=require depend fmri=locale/en-extra type=require diff --git a/usr/src/test/libc-tests/runfiles/default.run b/usr/src/test/libc-tests/runfiles/default.run index 149d696f03..93a394782b 100644 --- a/usr/src/test/libc-tests/runfiles/default.run +++ b/usr/src/test/libc-tests/runfiles/default.run @@ -57,6 +57,8 @@ outputdir = /var/tmp/test_results [/opt/libc-tests/tests/random/arc4random_preforksig] [/opt/libc-tests/tests/random/arc4key.ksh] +[/opt/libc-tests/tests/regex/t_regex] + [/opt/libc-tests/tests/select/select.sh] timeout = 600 diff --git a/usr/src/test/libc-tests/tests/Makefile b/usr/src/test/libc-tests/tests/Makefile index e9e15616d0..160e9a9383 100644 --- a/usr/src/test/libc-tests/tests/Makefile +++ b/usr/src/test/libc-tests/tests/Makefile @@ -22,6 +22,7 @@ SUBDIRS = \ nl_langinfo \ priv_gettext \ random \ + regex \ select \ strerror \ symbols \ diff --git a/usr/src/test/libc-tests/tests/regex/Makefile b/usr/src/test/libc-tests/tests/regex/Makefile new file mode 100644 index 0000000000..41227d87b4 --- /dev/null +++ b/usr/src/test/libc-tests/tests/regex/Makefile @@ -0,0 +1,88 @@ +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright 2017 Nexenta Systems, Inc. +# + +include $(SRC)/Makefile.master + +ROOTOPTPKG= $(ROOT)/opt/libc-tests +TESTDIR= $(ROOTOPTPKG)/tests/regex +DATADIR= $(TESTDIR)/data + +PROG= h_regex +OBJS= main.o \ + split.o + +SHPROG= t_regex + +DATA= anchor.in \ + backref.in \ + basic.in \ + bracket.in \ + c_comments.in \ + complex.in \ + error.in \ + meta.in \ + nospec.in \ + paren.in \ + regress.in \ + repet_bounded.in \ + repet_multi.in \ + repet_ordinary.in \ + startend.in \ + subexp.in \ + subtle.in \ + word_bound.in \ + zero.in + +include $(SRC)/cmd/Makefile.cmd + +CMDS= $(PROG:%=$(TESTDIR)/%) $(SHPROG:%=$(TESTDIR)/%) +$(CMDS):= FILEMODE=0555 +TESTDATA= $(DATA:%=$(DATADIR)/%) +$(TESTDATA):= FILEMODE=0444 + +CPPFLAGS += -I$(SRC)/lib/libc/port/regex + +.KEEP_STATE: + +all: $(PROG) + +$(PROG): $(OBJS) + $(LINK.c) $(OBJS) -o $@ $(LDLIBS) + $(POST_PROCESS) + +$(CMDS): $(TESTDIR) + +$(TESTDATA): $(DATADIR) + +install: all $(CMDS) $(TESTDATA) + +clean: + $(RM) $(OBJS) + +lint: + +$(TESTDIR) $(DATADIR): + $(INS.dir) + +$(TESTDIR)/%: % + $(INS.file) + +$(TESTDIR)/%: %.sh + $(INS.rename) + +$(DATADIR)/%: data/% + $(INS.file) + +include $(SRC)/cmd/Makefile.targ diff --git a/usr/src/test/libc-tests/tests/regex/THIRDPARTYLICENSE b/usr/src/test/libc-tests/tests/regex/THIRDPARTYLICENSE new file mode 100644 index 0000000000..7a7e26d39b --- /dev/null +++ b/usr/src/test/libc-tests/tests/regex/THIRDPARTYLICENSE @@ -0,0 +1,23 @@ +Copyright (c) 1993 The NetBSD Foundation, Inc. +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: +1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS +``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS +BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. diff --git a/usr/src/test/libc-tests/tests/regex/THIRDPARTYLICENSE.descrip b/usr/src/test/libc-tests/tests/regex/THIRDPARTYLICENSE.descrip new file mode 100644 index 0000000000..e3d50f5ef3 --- /dev/null +++ b/usr/src/test/libc-tests/tests/regex/THIRDPARTYLICENSE.descrip @@ -0,0 +1 @@ +LIBC REGEX TESTS diff --git a/usr/src/test/libc-tests/tests/regex/data/anchor.in b/usr/src/test/libc-tests/tests/regex/data/anchor.in new file mode 100644 index 0000000000..d145408247 --- /dev/null +++ b/usr/src/test/libc-tests/tests/regex/data/anchor.in @@ -0,0 +1,33 @@ +# anchoring and REG_NEWLINE +^abc$ & abc abc +a^b - a^b +a^b b a^b a^b +a$b - a$b +a$b b a$b a$b +^ & abc @abc +$ & abc @ +^$ & "" @ +$^ - "" @ +\($\)\(^\) b "" @ +# stop retching, those are legitimate (although disgusting) +^^ - "" @ +$$ - "" @ +b$ & abNc +b$ &n abNc b +^b$ & aNbNc +^b$ &n aNbNc b +^$ &n aNNb @Nb +^$ n abc +^$ n abcN @ +$^ n aNNb @Nb +\($\)\(^\) bn aNNb @Nb +^^ n^ aNNb @Nb +$$ n aNNb @NN +^a ^ a +a$ $ a +^a ^n aNb +^b ^n aNb b +a$ $n bNa +b$ $n bNa b +a*(^b$)c* - b b +a*\(^b$\)c* b b b diff --git a/usr/src/test/libc-tests/tests/regex/data/backref.in b/usr/src/test/libc-tests/tests/regex/data/backref.in new file mode 100644 index 0000000000..cc59b06e5f --- /dev/null +++ b/usr/src/test/libc-tests/tests/regex/data/backref.in @@ -0,0 +1,21 @@ +# back references, ugh +a\(b\)\2c bC ESUBREG +a\(b\1\)c bC ESUBREG +a\(b*\)c\1d b abbcbbd abbcbbd bb +a\(b*\)c\1d b abbcbd +a\(b*\)c\1d b abbcbbbd +^\(.\)\1 b abc +a\([bc]\)\1d b abcdabbd abbd b +a\(\([bc]\)\2\)*d b abbccd abbccd +a\(\([bc]\)\2\)*d b abbcbd +# actually, this next one probably ought to fail, but the spec is unclear +a\(\(b\)*\2\)*d b abbbd abbbd +# here is a case that no NFA implementation does right +\(ab*\)[ab]*\1 b ababaaa ababaaa a +# check out normal matching in the presence of back refs +\(a\)\1bcd b aabcd aabcd +\(a\)\1bc*d b aabcd aabcd +\(a\)\1bc*d b aabd aabd +\(a\)\1bc*d b aabcccd aabcccd +\(a\)\1bc*[ce]d b aabcccd aabcccd +^\(a\)\1b\(c\)*cd$ b aabcccd aabcccd diff --git a/usr/src/test/libc-tests/tests/regex/data/basic.in b/usr/src/test/libc-tests/tests/regex/data/basic.in new file mode 100644 index 0000000000..d1e3aa9dad --- /dev/null +++ b/usr/src/test/libc-tests/tests/regex/data/basic.in @@ -0,0 +1,5 @@ +# basics +a & a a +abc & abc abc +abc|de - abc abc +a|b|c - abc a diff --git a/usr/src/test/libc-tests/tests/regex/data/bracket.in b/usr/src/test/libc-tests/tests/regex/data/bracket.in new file mode 100644 index 0000000000..53a0b20d3c --- /dev/null +++ b/usr/src/test/libc-tests/tests/regex/data/bracket.in @@ -0,0 +1,55 @@ +# brackets, and numerous perversions thereof +a[b]c & abc abc +a[ab]c & abc abc +a[^ab]c & adc adc +a[]b]c & a]c a]c +a[[b]c & a[c a[c +a[-b]c & a-c a-c +a[^]b]c & adc adc +a[^-b]c & adc adc +a[b-]c & a-c a-c +a[b &C EBRACK +a[] &C EBRACK +a[1-3]c & a2c a2c +a[3-1]c &C ERANGE +a[1-3-5]c &C ERANGE +a[[.-.]--]c & a-c a-c +a[1- &C ERANGE +a[[. &C EBRACK +a[[.x &C EBRACK +a[[.x. &C EBRACK +a[[.x.] &C EBRACK +a[[.x.]] & ax ax +a[[.x,.]] &C ECOLLATE +a[[.one.]]b & a1b a1b +a[[.notdef.]]b &C ECOLLATE +a[[.].]]b & a]b a]b +a[[:alpha:]]c & abc abc +a[[:notdef:]]c &C ECTYPE +a[[: &C EBRACK +a[[:alpha &C EBRACK +a[[:alpha:] &C EBRACK +a[[:alpha,:] &C ECTYPE +a[[:]:]]b &C ECTYPE +a[[:-:]]b &C ECTYPE +a[[:alph:]] &C ECTYPE +a[[:alphabet:]] &C ECTYPE +[[:alnum:]]+ - -%@a0X- a0X +[[:alpha:]]+ - -%@aX0- aX +[[:blank:]]+ - aSSTb SST +[[:cntrl:]]+ - aNTb NT +[[:digit:]]+ - a019b 019 +[[:graph:]]+ - Sa%bS a%b +[[:lower:]]+ - AabC ab +[[:print:]]+ - NaSbN aSb +[[:punct:]]+ - S%-&T %-& +[[:space:]]+ - aSNTb SNT +[[:upper:]]+ - aBCd BC +[[:xdigit:]]+ - p0f3Cq 0f3C +a[[=b=]]c & abc abc +a[[= &C EBRACK +a[[=b &C EBRACK +a[[=b= &C EBRACK +a[[=b=] &C EBRACK +a[[=b,=]] &C ECOLLATE +a[[=one=]]b & a1b a1b diff --git a/usr/src/test/libc-tests/tests/regex/data/c_comments.in b/usr/src/test/libc-tests/tests/regex/data/c_comments.in new file mode 100644 index 0000000000..ea3faf9ddf --- /dev/null +++ b/usr/src/test/libc-tests/tests/regex/data/c_comments.in @@ -0,0 +1,17 @@ +# Let's have some fun -- try to match a C comment. +# first the obvious, which looks okay at first glance... +/\*.*\*/ - /*x*/ /*x*/ +# but... +/\*.*\*/ - /*x*/y/*z*/ /*x*/y/*z*/ +# okay, we must not match */ inside; try to do that... +/\*([^*]|\*[^/])*\*/ - /*x*/ /*x*/ +/\*([^*]|\*[^/])*\*/ - /*x*/y/*z*/ /*x*/ +# but... +/\*([^*]|\*[^/])*\*/ - /*x**/y/*z*/ /*x**/y/*z*/ +# and a still fancier version, which does it right (I think)... +/\*([^*]|\*+[^*/])*\*+/ - /*x*/ /*x*/ +/\*([^*]|\*+[^*/])*\*+/ - /*x*/y/*z*/ /*x*/ +/\*([^*]|\*+[^*/])*\*+/ - /*x**/y/*z*/ /*x**/ +/\*([^*]|\*+[^*/])*\*+/ - /*x****/y/*z*/ /*x****/ +/\*([^*]|\*+[^*/])*\*+/ - /*x**x*/y/*z*/ /*x**x*/ +/\*([^*]|\*+[^*/])*\*+/ - /*x***x/y/*z*/ /*x***x/y/*z*/ diff --git a/usr/src/test/libc-tests/tests/regex/data/complex.in b/usr/src/test/libc-tests/tests/regex/data/complex.in new file mode 100644 index 0000000000..e1140588e7 --- /dev/null +++ b/usr/src/test/libc-tests/tests/regex/data/complex.in @@ -0,0 +1,23 @@ +# complexities +a(((b)))c - abc abc +a(b|(c))d - abd abd +a(b*|c)d - abbd abbd +# just gotta have one DFA-buster, of course +a[ab]{20} - aaaaabaaaabaaaabaaaab aaaaabaaaabaaaabaaaab +# and an inline expansion in case somebody gets tricky +a[ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab] - aaaaabaaaabaaaabaaaab aaaaabaaaabaaaabaaaab +# and in case somebody just slips in an NFA... +a[ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab](wee|week)(knights|night) - aaaaabaaaabaaaabaaaabweeknights aaaaabaaaabaaaabaaaabweeknights +# fish for anomalies as the number of states passes 32 +12345678901234567890123456789 - a12345678901234567890123456789b 12345678901234567890123456789 +123456789012345678901234567890 - a123456789012345678901234567890b 123456789012345678901234567890 +1234567890123456789012345678901 - a1234567890123456789012345678901b 1234567890123456789012345678901 +12345678901234567890123456789012 - a12345678901234567890123456789012b 12345678901234567890123456789012 +123456789012345678901234567890123 - a123456789012345678901234567890123b 123456789012345678901234567890123 +# and one really big one, beyond any plausible word width +1234567890123456789012345678901234567890123456789012345678901234567890 - a1234567890123456789012345678901234567890123456789012345678901234567890b 1234567890123456789012345678901234567890123456789012345678901234567890 +# fish for problems as brackets go past 8 +[ab][cd][ef][gh][ij][kl][mn] - xacegikmoq acegikm +[ab][cd][ef][gh][ij][kl][mn][op] - xacegikmoq acegikmo +[ab][cd][ef][gh][ij][kl][mn][op][qr] - xacegikmoqy acegikmoq +[ab][cd][ef][gh][ij][kl][mn][op][q] - xacegikmoqy acegikmoq diff --git a/usr/src/test/libc-tests/tests/regex/data/error.in b/usr/src/test/libc-tests/tests/regex/data/error.in new file mode 100644 index 0000000000..ea106f1653 --- /dev/null +++ b/usr/src/test/libc-tests/tests/regex/data/error.in @@ -0,0 +1,30 @@ +# certain syntax errors and non-errors +| C BADPAT +| b | | +* C BADRPT +* b * * ++ C BADRPT +? C BADRPT +"" &C BADPAT +() - abc @abc +\(\) b abc @abc +a||b C BADPAT +|ab C BADPAT +ab| C BADPAT +(|a)b C BADPAT +(a|)b C BADPAT +(*a) C BADRPT +(+a) C BADRPT +(?a) C BADRPT +({1}a) C BADRPT +\(\{1\}a\) bC BADRPT +(a|*b) C BADRPT +(a|+b) C BADRPT +(a|?b) C BADRPT +(a|{1}b) C BADRPT +^* C BADRPT +^* b * * +^+ C BADRPT +^? C BADRPT +^{1} C BADRPT +^\{1\} bC BADRPT diff --git a/usr/src/test/libc-tests/tests/regex/data/meta.in b/usr/src/test/libc-tests/tests/regex/data/meta.in new file mode 100644 index 0000000000..4533d3591b --- /dev/null +++ b/usr/src/test/libc-tests/tests/regex/data/meta.in @@ -0,0 +1,21 @@ +# metacharacters, backslashes +a.c & abc abc +a[bc]d & abd abd +a\*c & a*c a*c +a\\b & a\b a\b +a\\\*b & a\*b a\*b +a\bc & abc abc +a\ &C EESCAPE +a\\bc & a\bc a\bc +\{ bC BADRPT +a\[b & a[b a[b +a[b &C EBRACK +# trailing $ is a peculiar special case for the BRE code +a$ & a a +a$ & a$ +a\$ & a +a\$ & a$ a$ +a\\$ & a +a\\$ & a$ +a\\$ & a\$ +a\\$ & a\ a\ diff --git a/usr/src/test/libc-tests/tests/regex/data/nospec.in b/usr/src/test/libc-tests/tests/regex/data/nospec.in new file mode 100644 index 0000000000..d95f81d1d7 --- /dev/null +++ b/usr/src/test/libc-tests/tests/regex/data/nospec.in @@ -0,0 +1,7 @@ +# plain strings, with the NOSPEC flag +abc m abc abc +abc m xabcy abc +abc m xyz +a*b m aba*b a*b +a*b m ab +"" mC BADPAT diff --git a/usr/src/test/libc-tests/tests/regex/data/paren.in b/usr/src/test/libc-tests/tests/regex/data/paren.in new file mode 100644 index 0000000000..c3e8a295fe --- /dev/null +++ b/usr/src/test/libc-tests/tests/regex/data/paren.in @@ -0,0 +1,19 @@ +# parentheses and perversions thereof +a(b)c - abc abc +a\(b\)c b abc abc +a( C EPAREN +a( b a( a( +a\( - a( a( +a\( bC EPAREN +a\(b bC EPAREN +a(b C EPAREN +a(b b a(b a(b +# gag me with a right parenthesis -- 1003.2 goofed here (my fault, partly) +a) b EPAREN +) b EPAREN +# end gagging (in a just world, those *should* give EPAREN) +a) b a) a) +a\) bC EPAREN +\) bC EPAREN +a()b - ab ab +a\(\)b b ab ab diff --git a/usr/src/test/libc-tests/tests/regex/data/regress.in b/usr/src/test/libc-tests/tests/regex/data/regress.in new file mode 100644 index 0000000000..afd832a51c --- /dev/null +++ b/usr/src/test/libc-tests/tests/regex/data/regress.in @@ -0,0 +1,9 @@ +# past problems, and suspected problems +(A[1])|(A[2])|(A[3])|(A[4])|(A[5])|(A[6])|(A[7])|(A[8])|(A[9])|(A[A]) - A1 A1 +abcdefghijklmnop i abcdefghijklmnop abcdefghijklmnop +abcdefghijklmnopqrstuv i abcdefghijklmnopqrstuv abcdefghijklmnopqrstuv +(ALAK)|(ALT[AB])|(CC[123]1)|(CM[123]1)|(GAMC)|(LC[23][EO ])|(SEM[1234])|(SL[ES][12])|(SLWW)|(SLF )|(SLDT)|(VWH[12])|(WH[34][EW])|(WP1[ESN]) - CC11 CC11 +CC[13]1|a{21}[23][EO][123][Es][12]a{15}aa[34][EW]aaaaaaa[X]a - CC11 CC11 +Char \([a-z0-9_]*\)\[.* b Char xyz[k Char xyz[k xyz +a?b - ab ab +-\{0,1\}[0-9]*$ b -5 -5 diff --git a/usr/src/test/libc-tests/tests/regex/data/repet_bounded.in b/usr/src/test/libc-tests/tests/regex/data/repet_bounded.in new file mode 100644 index 0000000000..ee6ff4cd19 --- /dev/null +++ b/usr/src/test/libc-tests/tests/regex/data/repet_bounded.in @@ -0,0 +1,45 @@ +# the dreaded bounded repetitions +{ & { { +{abc & {abc {abc +{1 C BADRPT +{1} C BADRPT +a{b & a{b a{b +a{1}b - ab ab +a\{1\}b b ab ab +a{1,}b - ab ab +a\{1,\}b b ab ab +a{1,2}b - aab aab +a\{1,2\}b b aab aab +a{1 C EBRACE +a\{1 bC EBRACE +a{1a C EBRACE +a\{1a bC EBRACE +a{1a} C BADBR +a\{1a\} bC BADBR +a{,2} - a{,2} a{,2} +a\{,2\} bC BADBR +a{,} - a{,} a{,} +a\{,\} bC BADBR +a{1,x} C BADBR +a\{1,x\} bC BADBR +a{1,x C EBRACE +a\{1,x bC EBRACE +a{300} C BADBR +a\{300\} bC BADBR +a{1,0} C BADBR +a\{1,0\} bC BADBR +ab{0,0}c - abcac ac +ab\{0,0\}c b abcac ac +ab{0,1}c - abcac abc +ab\{0,1\}c b abcac abc +ab{0,3}c - abbcac abbc +ab\{0,3\}c b abbcac abbc +ab{1,1}c - acabc abc +ab\{1,1\}c b acabc abc +ab{1,3}c - acabc abc +ab\{1,3\}c b acabc abc +ab{2,2}c - abcabbc abbc +ab\{2,2\}c b abcabbc abbc +ab{2,4}c - abcabbc abbc +ab\{2,4\}c b abcabbc abbc +((a{1,10}){1,10}){1,10} - a a a,a diff --git a/usr/src/test/libc-tests/tests/regex/data/repet_multi.in b/usr/src/test/libc-tests/tests/regex/data/repet_multi.in new file mode 100644 index 0000000000..da97badde9 --- /dev/null +++ b/usr/src/test/libc-tests/tests/regex/data/repet_multi.in @@ -0,0 +1,21 @@ +# multiple repetitions +a** &C BADRPT +a++ C BADRPT +a?? C BADRPT +a*+ C BADRPT +a*? C BADRPT +a+* C BADRPT +a+? C BADRPT +a?* C BADRPT +a?+ C BADRPT +a{1}{1} C BADRPT +a*{1} C BADRPT +a+{1} C BADRPT +a?{1} C BADRPT +a{1}* C BADRPT +a{1}+ C BADRPT +a{1}? C BADRPT +a*{b} - a{b} a{b} +a\{1\}\{1\} bC BADRPT +a*\{1\} bC BADRPT +a\{1\}* bC BADRPT diff --git a/usr/src/test/libc-tests/tests/regex/data/repet_ordinary.in b/usr/src/test/libc-tests/tests/regex/data/repet_ordinary.in new file mode 100644 index 0000000000..08bc286e5b --- /dev/null +++ b/usr/src/test/libc-tests/tests/regex/data/repet_ordinary.in @@ -0,0 +1,10 @@ +# ordinary repetitions +ab*c & abc abc +ab+c - abc abc +ab?c - abc abc +a\(*\)b b a*b a*b +a\(**\)b b ab ab +a\(***\)b bC BADRPT +*a b *a *a +**a b a a +***a bC BADRPT diff --git a/usr/src/test/libc-tests/tests/regex/data/startend.in b/usr/src/test/libc-tests/tests/regex/data/startend.in new file mode 100644 index 0000000000..c396e58ac4 --- /dev/null +++ b/usr/src/test/libc-tests/tests/regex/data/startend.in @@ -0,0 +1,9 @@ +# check out the STARTEND option +[abc] &# a(b)c b +[abc] &# a(d)c +[abc] &# a(bc)d b +[abc] &# a(dc)d c +. &# a()c +b.*c &# b(bc)c bc +b.* &# b(bc)c bc +.*c &# b(bc)c bc diff --git a/usr/src/test/libc-tests/tests/regex/data/subexp.in b/usr/src/test/libc-tests/tests/regex/data/subexp.in new file mode 100644 index 0000000000..c7bcc06175 --- /dev/null +++ b/usr/src/test/libc-tests/tests/regex/data/subexp.in @@ -0,0 +1,57 @@ +# subexpressions +a(b)(c)d - abcd abcd b,c +a(((b)))c - abc abc b,b,b +a(b|(c))d - abd abd b,- +a(b*|c|e)d - abbd abbd bb +a(b*|c|e)d - acd acd c +a(b*|c|e)d - ad ad @d +a(b?)c - abc abc b +a(b?)c - ac ac @c +a(b+)c - abc abc b +a(b+)c - abbbc abbbc bbb +a(b*)c - ac ac @c +(a|ab)(bc([de]+)f|cde) - abcdef abcdef a,bcdef,de +# the regression tester only asks for 9 subexpressions +a(b)(c)(d)(e)(f)(g)(h)(i)(j)k - abcdefghijk abcdefghijk b,c,d,e,f,g,h,i,j +a(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)l - abcdefghijkl abcdefghijkl b,c,d,e,f,g,h,i,j,k +a([bc]?)c - abc abc b +a([bc]?)c - ac ac @c +a([bc]+)c - abc abc b +a([bc]+)c - abcc abcc bc +a([bc]+)bc - abcbc abcbc bc +a(bb+|b)b - abb abb b +a(bbb+|bb+|b)b - abb abb b +a(bbb+|bb+|b)b - abbb abbb bb +a(bbb+|bb+|b)bb - abbb abbb b +(.*).* - abcdef abcdef abcdef +(a*)* - bc @b @b + +# do we get the right subexpression when it is used more than once? +a(b|c)*d - ad ad - +a(b|c)*d - abcd abcd c +a(b|c)+d - abd abd b +a(b|c)+d - abcd abcd c +a(b|c?)+d - ad ad @d +a(b|c?)+d - abcd abcd @d +a(b|c){0,0}d - ad ad - +a(b|c){0,1}d - ad ad - +a(b|c){0,1}d - abd abd b +a(b|c){0,2}d - ad ad - +a(b|c){0,2}d - abcd abcd c +a(b|c){0,}d - ad ad - +a(b|c){0,}d - abcd abcd c +a(b|c){1,1}d - abd abd b +a(b|c){1,1}d - acd acd c +a(b|c){1,2}d - abd abd b +a(b|c){1,2}d - abcd abcd c +a(b|c){1,}d - abd abd b +a(b|c){1,}d - abcd abcd c +a(b|c){2,2}d - acbd acbd b +a(b|c){2,2}d - abcd abcd c +a(b|c){2,4}d - abcd abcd c +a(b|c){2,4}d - abcbd abcbd b +a(b|c){2,4}d - abcbcd abcbcd c +a(b|c){2,}d - abcd abcd c +a(b|c){2,}d - abcbd abcbd b +a(b+|((c)*))+d - abd abd @d,@d,- +a(b+|((c)*))+d - abcd abcd @d,@d,- diff --git a/usr/src/test/libc-tests/tests/regex/data/subtle.in b/usr/src/test/libc-tests/tests/regex/data/subtle.in new file mode 100644 index 0000000000..92d68bb9c2 --- /dev/null +++ b/usr/src/test/libc-tests/tests/regex/data/subtle.in @@ -0,0 +1,21 @@ +# subtleties of matching +abc & xabcy abc +a\(b\)?c\1d b acd +aBc i Abc Abc +a[Bc]*d i abBCcd abBCcd +0[[:upper:]]1 &i 0a1 0a1 +0[[:lower:]]1 &i 0A1 0A1 +a[^b]c &i abc +a[^b]c &i aBc +a[^b]c &i adc adc +[a]b[c] - abc abc +[a]b[a] - aba aba +[abc]b[abc] - abc abc +[abc]b[abd] - abd abd +a(b?c)+d - accd accd +(wee|week)(knights|night) - weeknights weeknights +(we|wee|week|frob)(knights|night|day) - weeknights weeknights +a[bc]d - xyzaaabcaababdacd abd +a[ab]c - aaabc abc +abc s abc abc +a* & b @b diff --git a/usr/src/test/libc-tests/tests/regex/data/word_bound.in b/usr/src/test/libc-tests/tests/regex/data/word_bound.in new file mode 100644 index 0000000000..e09a329afd --- /dev/null +++ b/usr/src/test/libc-tests/tests/regex/data/word_bound.in @@ -0,0 +1,13 @@ +# word boundaries (ick) +[[:<:]]a & a a +[[:<:]]a & ba +[[:<:]]a & -a a +a[[:>:]] & a a +a[[:>:]] & ab +a[[:>:]] & a- a +[[:<:]]a.c[[:>:]] & axcd-dayc-dazce-abc abc +[[:<:]]a.c[[:>:]] & axcd-dayc-dazce-abc-q abc +[[:<:]]a.c[[:>:]] & axc-dayc-dazce-abc axc +[[:<:]]b.c[[:>:]] & a_bxc-byc_d-bzc-q bzc +[[:<:]].x..[[:>:]] & y_xa_-_xb_y-_xc_-axdc _xc_ +[[:<:]]a_b[[:>:]] & x_a_b diff --git a/usr/src/test/libc-tests/tests/regex/data/zero.in b/usr/src/test/libc-tests/tests/regex/data/zero.in new file mode 100644 index 0000000000..2786944eb2 --- /dev/null +++ b/usr/src/test/libc-tests/tests/regex/data/zero.in @@ -0,0 +1,7 @@ +# cases involving NULs +aZb & a a +aZb &p a +aZb &p# (aZb) aZb +aZ*b &p# (ab) ab +a.b &# (aZb) aZb +a.* &# (aZb)c aZb diff --git a/usr/src/test/libc-tests/tests/regex/main.c b/usr/src/test/libc-tests/tests/regex/main.c new file mode 100644 index 0000000000..fa7a2dd408 --- /dev/null +++ b/usr/src/test/libc-tests/tests/regex/main.c @@ -0,0 +1,514 @@ +/* + * Copyright (c) 1993 The NetBSD Foundation, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include <assert.h> +#include <regex.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> + +#include <sys/types.h> + +#include "test_regex.h" + +char *progname; +int line = 0; +int status = 0; + +int copts = REG_EXTENDED; +int eopts = 0; +regoff_t startoff = 0; +regoff_t endoff = 0; + +static char empty = '\0'; + +static char *eprint(int); +static int efind(char *); + +/* + * main - do the simple case, hand off to regress() for regression + */ +int +main(int argc, char *argv[]) +{ + regex_t re; +#define NS 10 + regmatch_t subs[NS]; + char erbuf[100]; + int err; + size_t len; + int c; + int errflg = 0; + int i; + extern int optind; + extern char *optarg; + + progname = argv[0]; + + while ((c = getopt(argc, argv, "c:e:S:E:")) != -1) + switch (c) { + case 'c': /* compile options */ + copts = options('c', optarg); + break; + case 'e': /* execute options */ + eopts = options('e', optarg); + break; + case 'S': /* start offset */ + startoff = (regoff_t)atoi(optarg); + break; + case 'E': /* end offset */ + endoff = (regoff_t)atoi(optarg); + break; + case '?': + default: + errflg++; + break; + } + if (errflg) { + fprintf(stderr, "usage: %s ", progname); + fprintf(stderr, "[-c copt][-C][-d] [re]\n"); + exit(2); + } + + if (optind >= argc) { + regress(stdin); + exit(status); + } + + err = regcomp(&re, argv[optind++], copts); + if (err) { + len = regerror(err, &re, erbuf, sizeof (erbuf)); + fprintf(stderr, "error %s, %zd/%zd `%s'\n", + eprint(err), len, (size_t)sizeof (erbuf), erbuf); + exit(status); + } + + if (optind >= argc) { + regfree(&re); + exit(status); + } + + if (eopts®_STARTEND) { + subs[0].rm_so = startoff; + subs[0].rm_eo = strlen(argv[optind]) - endoff; + } + err = regexec(&re, argv[optind], (size_t)NS, subs, eopts); + if (err) { + len = regerror(err, &re, erbuf, sizeof (erbuf)); + fprintf(stderr, "error %s, %zd/%zd `%s'\n", + eprint(err), len, (size_t)sizeof (erbuf), erbuf); + exit(status); + } + if (!(copts®_NOSUB)) { + len = (int)(subs[0].rm_eo - subs[0].rm_so); + if (subs[0].rm_so != -1) { + if (len != 0) { + printf("match `%.*s'\n", (int)len, + argv[optind] + subs[0].rm_so); + } else { + printf("match `'@%.1s\n", + argv[optind] + subs[0].rm_so); + } + } + for (i = 1; i < NS; i++) { + if (subs[i].rm_so != -1) { + printf("(%d) `%.*s'\n", i, + (int)(subs[i].rm_eo - subs[i].rm_so), + argv[optind] + subs[i].rm_so); + } + } + } + exit(status); +} + +/* + * regress - main loop of regression test + */ +void +regress(FILE *in) +{ + char inbuf[1000]; +#define MAXF 10 + char *f[MAXF]; + int nf; + int i; + char erbuf[100]; + size_t ne; + const char *badpat = "invalid regular expression"; +#define SHORT 10 + const char *bpname = "REG_BADPAT"; + regex_t re; + + while (fgets(inbuf, sizeof (inbuf), in) != NULL) { + line++; + if (inbuf[0] == '#' || inbuf[0] == '\n') + continue; /* NOTE CONTINUE */ + inbuf[strlen(inbuf)-1] = '\0'; /* get rid of stupid \n */ + nf = split(inbuf, f, MAXF, "\t\t"); + if (nf < 3) { + fprintf(stderr, "bad input, line %d\n", line); + exit(1); + } + for (i = 0; i < nf; i++) + if (strcmp(f[i], "\"\"") == 0) + f[i] = ∅ + if (nf <= 3) + f[3] = NULL; + if (nf <= 4) + f[4] = NULL; + try(f[0], f[1], f[2], f[3], f[4], options('c', f[1])); + if (opt('&', f[1])) /* try with either type of RE */ + try(f[0], f[1], f[2], f[3], f[4], + options('c', f[1]) &~ REG_EXTENDED); + } + + ne = regerror(REG_BADPAT, NULL, erbuf, sizeof (erbuf)); + if (strcmp(erbuf, badpat) != 0 || ne != strlen(badpat)+1) { + fprintf(stderr, "end: regerror() test gave `%s' not `%s'\n", + erbuf, badpat); + status = 1; + } + ne = regerror(REG_BADPAT, NULL, erbuf, (size_t)SHORT); + if (strncmp(erbuf, badpat, SHORT-1) != 0 || erbuf[SHORT-1] != '\0' || + ne != strlen(badpat)+1) { + fprintf(stderr, + "end: regerror() short test gave `%s' not `%.*s'\n", + erbuf, SHORT-1, badpat); + status = 1; + } + ne = regerror(REG_ITOA|REG_BADPAT, NULL, erbuf, sizeof (erbuf)); + if (strcmp(erbuf, bpname) != 0 || ne != strlen(bpname)+1) { + fprintf(stderr, + "end: regerror() ITOA test gave `%s' not `%s'\n", + erbuf, bpname); + status = 1; + } + re.re_endp = bpname; + ne = regerror(REG_ATOI, &re, erbuf, sizeof (erbuf)); + if (atoi(erbuf) != (int)REG_BADPAT) { + fprintf(stderr, + "end: regerror() ATOI test gave `%s' not `%ld'\n", + erbuf, (long)REG_BADPAT); + status = 1; + } else if (ne != strlen(erbuf)+1) { + fprintf(stderr, "end: regerror() ATOI test len(`%s') = %ld\n", + erbuf, (long)REG_BADPAT); + status = 1; + } +} + +/* + * try - try it, and report on problems + */ +void +try(char *f0, char *f1, char *f2, char *f3, char *f4, int opts) +{ + regex_t re; +#define NSUBS 10 + regmatch_t subs[NSUBS]; +#define NSHOULD 15 + char *should[NSHOULD]; + int nshould; + char erbuf[100]; + int err; + int len; + const char *type = (opts & REG_EXTENDED) ? "ERE" : "BRE"; + int i; + char *grump; + char f0copy[1000]; + char f2copy[1000]; + + strcpy(f0copy, f0); + re.re_endp = (opts®_PEND) ? f0copy + strlen(f0copy) : NULL; + fixstr(f0copy); + err = regcomp(&re, f0copy, opts); + if (err != 0 && (!opt('C', f1) || err != efind(f2))) { + /* unexpected error or wrong error */ + len = regerror(err, &re, erbuf, sizeof (erbuf)); + fprintf(stderr, "%d: %s error %s, %d/%d `%s'\n", + line, type, eprint(err), len, (int)sizeof (erbuf), erbuf); + status = 1; + } else if (err == 0 && opt('C', f1)) { + /* unexpected success */ + fprintf(stderr, "%d: %s should have given REG_%s\n", + line, type, f2); + status = 1; + err = 1; /* so we won't try regexec */ + } + + if (err != 0) { + regfree(&re); + return; + } + + strcpy(f2copy, f2); + fixstr(f2copy); + + if (options('e', f1)®_STARTEND) { + if (strchr(f2, '(') == NULL || strchr(f2, ')') == NULL) + fprintf(stderr, "%d: bad STARTEND syntax\n", line); + subs[0].rm_so = strchr(f2, '(') - f2 + 1; + subs[0].rm_eo = strchr(f2, ')') - f2; + } + err = regexec(&re, f2copy, NSUBS, subs, options('e', f1)); + + if (err != 0 && (f3 != NULL || err != REG_NOMATCH)) { + /* unexpected error or wrong error */ + len = regerror(err, &re, erbuf, sizeof (erbuf)); + fprintf(stderr, "%d: %s exec error %s, %d/%d `%s'\n", + line, type, eprint(err), len, (int)sizeof (erbuf), erbuf); + status = 1; + } else if (err != 0) { + /* nothing more to check */ + } else if (f3 == NULL) { + /* unexpected success */ + fprintf(stderr, "%d: %s exec should have failed\n", line, type); + status = 1; + err = 1; /* just on principle */ + } else if (opts®_NOSUB) { + /* nothing more to check */ + } else if ((grump = check(f2, subs[0], f3)) != NULL) { + fprintf(stderr, "%d: %s %s\n", line, type, grump); + status = 1; + err = 1; + } + + if (err != 0 || f4 == NULL) { + regfree(&re); + return; + } + + for (i = 1; i < NSHOULD; i++) + should[i] = NULL; + nshould = split(f4, &should[1], NSHOULD-1, ","); + if (nshould == 0) { + nshould = 1; + should[1] = ∅ + } + for (i = 1; i < NSUBS; i++) { + grump = check(f2, subs[i], should[i]); + if (grump != NULL) { + fprintf(stderr, "%d: %s $%d %s\n", line, + type, i, grump); + status = 1; + err = 1; + } + } + + regfree(&re); +} + +/* + * options - pick options out of a regression-test string + */ +int +options(int type, char *s) +{ + char *p; + int o = (type == 'c') ? copts : eopts; + const char *legal = (type == 'c') ? "bisnmp" : "^$#tl"; + + for (p = s; *p != '\0'; p++) + if (strchr(legal, *p) != NULL) + switch (*p) { + case 'b': + o &= ~REG_EXTENDED; + break; + case 'i': + o |= REG_ICASE; + break; + case 's': + o |= REG_NOSUB; + break; + case 'n': + o |= REG_NEWLINE; + break; + case 'm': + o &= ~REG_EXTENDED; + o |= REG_NOSPEC; + break; + case 'p': + o |= REG_PEND; + break; + case '^': + o |= REG_NOTBOL; + break; + case '$': + o |= REG_NOTEOL; + break; + case '#': + o |= REG_STARTEND; + break; + case 't': /* trace */ + o |= REG_TRACE; + break; + case 'l': /* force long representation */ + o |= REG_LARGE; + break; + case 'r': /* force backref use */ + o |= REG_BACKR; + break; + } + return (o); +} + +/* + * opt - is a particular option in a regression string? + */ +int /* predicate */ +opt(int c, char *s) +{ + return (strchr(s, c) != NULL); +} + +/* + * fixstr - transform magic characters in strings + */ +void +fixstr(char *p) +{ + if (p == NULL) + return; + + for (; *p != '\0'; p++) + if (*p == 'N') + *p = '\n'; + else if (*p == 'T') + *p = '\t'; + else if (*p == 'S') + *p = ' '; + else if (*p == 'Z') + *p = '\0'; +} + +/* + * check - check a substring match + */ +char * +check(char *str, regmatch_t sub, char *should) +{ + int len; + int shlen; + char *p; + static char grump[500]; + char *at = NULL; + + if (should != NULL && strcmp(should, "-") == 0) + should = NULL; + if (should != NULL && should[0] == '@') { + at = should + 1; + should = ∅ + } + + /* check rm_so and rm_eo for consistency */ + if ((sub.rm_so > sub.rm_eo) || + (sub.rm_so == -1 && sub.rm_eo != -1) || + (sub.rm_so != -1 && sub.rm_eo == -1) || + (sub.rm_so != -1 && sub.rm_so < 0) || + (sub.rm_eo != -1 && sub.rm_eo < 0)) { + sprintf(grump, "start %ld end %ld", + (long)sub.rm_so, (long)sub.rm_eo); + return (grump); + } + + /* check for no match */ + if (sub.rm_so == -1) { + if (should == NULL) + return (NULL); + else { + sprintf(grump, "did not match"); + return (grump); + } + } + + /* check for in range */ + if (sub.rm_eo > (ssize_t)strlen(str)) { + sprintf(grump, "start %ld end %ld, past end of string", + (long)sub.rm_so, (long)sub.rm_eo); + return (grump); + } + + len = (int)(sub.rm_eo - sub.rm_so); + p = str + sub.rm_so; + + /* check for not supposed to match */ + if (should == NULL) { + sprintf(grump, "matched `%.*s'", len, p); + return (grump); + } + + /* check for wrong match */ + shlen = (int)strlen(should); + if (len != shlen || strncmp(p, should, (size_t)shlen) != 0) { + sprintf(grump, "matched `%.*s' instead", len, p); + return (grump); + } + if (shlen > 0) + return (NULL); + + /* check null match in right place */ + if (at == NULL) + return (NULL); + shlen = strlen(at); + if (shlen == 0) + shlen = 1; /* force check for end-of-string */ + if (strncmp(p, at, shlen) != 0) { + sprintf(grump, "matched null at `%.20s'", p); + return (grump); + } + return (NULL); +} + +/* + * eprint - convert error number to name + */ +static char * +eprint(int err) +{ + static char epbuf[100]; + size_t len; + + len = regerror(REG_ITOA|err, NULL, epbuf, sizeof (epbuf)); + assert(len <= sizeof (epbuf)); + return (epbuf); +} + +/* + * efind - convert error name to number + */ +static int +efind(char *name) +{ + static char efbuf[100]; + regex_t re; + + sprintf(efbuf, "REG_%s", name); + assert(strlen(efbuf) < sizeof (efbuf)); + re.re_endp = efbuf; + (void) regerror(REG_ATOI, &re, efbuf, sizeof (efbuf)); + return (atoi(efbuf)); +} diff --git a/usr/src/test/libc-tests/tests/regex/split.c b/usr/src/test/libc-tests/tests/regex/split.c new file mode 100644 index 0000000000..664b8375af --- /dev/null +++ b/usr/src/test/libc-tests/tests/regex/split.c @@ -0,0 +1,168 @@ +/* + * Copyright (c) 1993 The NetBSD Foundation, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include <regex.h> +#include <stdio.h> +#include <string.h> + +#include "test_regex.h" + +/* + * split - divide a string into fields, like awk split() + * + * returns number of fields, including overflow + * + * fields[] list is not NULL-terminated + * nfields number of entries available in fields[] + * sep "" white, "c" single char, "ab" [ab]+ + */ +int +split(char *string, char *fields[], int nfields, const char *sep) +{ + char *p = string; + char c; /* latest character */ + char sepc = *sep; + char sepc2; + int fn; + char **fp = fields; + const char *sepp; + int trimtrail; + + /* white space */ + if (sepc == '\0') { + while ((c = *p++) == ' ' || c == '\t') + continue; + p--; + trimtrail = 1; + sep = " \t"; /* note, code below knows this is 2 long */ + sepc = ' '; + } else + trimtrail = 0; + sepc2 = sep[1]; /* now we can safely pick this up */ + + /* catch empties */ + if (*p == '\0') + return (0); + + /* single separator */ + if (sepc2 == '\0') { + fn = nfields; + for (;;) { + *fp++ = p; + fn--; + if (fn == 0) + break; + while ((c = *p++) != sepc) + if (c == '\0') + return (nfields - fn); + *(p-1) = '\0'; + } + /* we have overflowed the fields vector -- just count them */ + fn = nfields; + for (;;) { + while ((c = *p++) != sepc) + if (c == '\0') + return (fn); + fn++; + } + /* not reached */ + } + + /* two separators */ + if (sep[2] == '\0') { + fn = nfields; + for (;;) { + *fp++ = p; + fn--; + while ((c = *p++) != sepc && c != sepc2) + if (c == '\0') { + if (trimtrail && **(fp-1) == '\0') + fn++; + return (nfields - fn); + } + if (fn == 0) + break; + *(p-1) = '\0'; + while ((c = *p++) == sepc || c == sepc2) + continue; + p--; + } + /* we have overflowed the fields vector -- just count them */ + fn = nfields; + while (c != '\0') { + while ((c = *p++) == sepc || c == sepc2) + continue; + p--; + fn++; + while ((c = *p++) != '\0' && c != sepc && c != sepc2) + continue; + } + /* might have to trim trailing white space */ + if (trimtrail) { + p--; + while ((c = *--p) == sepc || c == sepc2) + continue; + p++; + if (*p != '\0') { + if (fn == nfields+1) + *p = '\0'; + fn--; + } + } + return (fn); + } + + /* n separators */ + fn = 0; + for (;;) { + if (fn < nfields) + *fp++ = p; + fn++; + for (;;) { + c = *p++; + if (c == '\0') + return (fn); + sepp = sep; + while ((sepc = *sepp++) != '\0' && sepc != c) + continue; + if (sepc != '\0') /* it was a separator */ + break; + } + if (fn < nfields) + *(p-1) = '\0'; + for (;;) { + c = *p++; + sepp = sep; + while ((sepc = *sepp++) != '\0' && sepc != c) + continue; + if (sepc == '\0') /* it wasn't a separator */ + break; + } + p--; + } + + /* not reached */ +} diff --git a/usr/src/test/libc-tests/tests/regex/t_regex.sh b/usr/src/test/libc-tests/tests/regex/t_regex.sh new file mode 100644 index 0000000000..990f13154c --- /dev/null +++ b/usr/src/test/libc-tests/tests/regex/t_regex.sh @@ -0,0 +1,25 @@ +#! /usr/bin/sh +# +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright 2017 Nexenta Systems, Inc. +# + +TESTDIR=$(dirname $0) +HREGEX=${TESTDIR}/h_regex + +for t in $TESTDIR/data/*.in; do + $HREGEX < $t || exit 1 + $HREGEX -el < $t || exit 1 + $HREGEX -er < $t || exit 1 +done diff --git a/usr/src/test/libc-tests/tests/regex/test_regex.h b/usr/src/test/libc-tests/tests/regex/test_regex.h new file mode 100644 index 0000000000..7a90ac4859 --- /dev/null +++ b/usr/src/test/libc-tests/tests/regex/test_regex.h @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2011 The NetBSD Foundation, Inc. + * All rights reserved. + * + * This code is derived from software contributed to The NetBSD Foundation + * by + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/* from main.c */ +void regress(FILE *); +void try(char *, char *, char *, char *, char *, int); +int options(int, char *); +int opt(int, char *); +void fixstr(char *); +char *check(char *, regmatch_t, char *); + +/* from split.c */ +int split(char *string, char *fields[], int nfields, const char *sep); |
