diff options
Diffstat (limited to 'usr/src/lib/libc/port/regex/regex.c')
-rw-r--r-- | usr/src/lib/libc/port/regex/regex.c | 1818 |
1 files changed, 1818 insertions, 0 deletions
diff --git a/usr/src/lib/libc/port/regex/regex.c b/usr/src/lib/libc/port/regex/regex.c new file mode 100644 index 0000000000..b763a915b0 --- /dev/null +++ b/usr/src/lib/libc/port/regex/regex.c @@ -0,0 +1,1818 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2004 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ +/* All Rights Reserved */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +/* + * IMPORTANT NOTE: + * + * regex() WORKS **ONLY** WITH THE ASCII AND THE Solaris EUC CHARACTER SETS. + * IT IS **NOT** CHARACTER SET INDEPENDENT. + * + */ + +#pragma weak regex = _regex + +/* CONSTANTS SHARED WITH regcmp() */ +#include "regex.h" + +#include "lint.h" +#include "mtlib.h" +#include <limits.h> +#include <stdarg.h> +#include <stdlib.h> +#include <thread.h> +#include <widec.h> +#include "tsd.h" + + +/* PRIVATE CONSTANTS */ + +#define ADD_256_TO_GROUP_LENGTH 0x1 +#define ADD_512_TO_GROUP_LENGTH 0x2 +#define ADD_768_TO_GROUP_LENGTH 0x3 +#define ADDED_LENGTH_BITS 0x3 +#define SINGLE_BYTE_MASK 0xff +#define STRINGP_STACK_SIZE 50 + + +/* PRIVATE TYPE DEFINITIONS */ + +typedef enum { + NOT_IN_CLASS = 0, + IN_CLASS +} char_test_condition_t; + +typedef enum { + TESTING_CHAR = 0, + CONDITION_TRUE, + CONDITION_FALSE, + CHAR_TEST_ERROR +} char_test_result_t; + + +/* PRIVATE GLOBAL VARIABLES */ + +static mutex_t regex_lock = DEFAULTMUTEX; +static int return_arg_number[NSUBSTRINGS]; +static const char *substring_endp[NSUBSTRINGS]; +static const char *substring_startp[NSUBSTRINGS]; +static const char *stringp_stack[STRINGP_STACK_SIZE]; +static const char **stringp_stackp; + + +/* DECLARATIONS OF PRIVATE FUNCTIONS */ + +static int +get_wchar(wchar_t *wcharp, + const char *stringp); + +static void +get_match_counts(int *nmust_matchp, + int *nextra_matches_allowedp, + const char *count_stringp); + +static boolean_t +in_wchar_range(wchar_t test_char, + wchar_t lower_char, + wchar_t upper_char); + +static const char * +pop_stringp(void); + +static const char * +previous_charp(const char *current_charp); + +static const char * +push_stringp(const char *stringp); + +static char_test_result_t +test_char_against_ascii_class(char test_char, + const char *classp, + char_test_condition_t test_condition); + +static char_test_result_t +test_char_against_multibyte_class(wchar_t test_char, + const char *classp, + char_test_condition_t test_condition); + + +/* FOR COMPATIBILITY WITH PREVIOUS ASCII VERSIONS OF regcmp() */ + +static char_test_result_t +test_char_against_old_ascii_class(char test_char, + const char *classp, + char_test_condition_t test_condition); + +static const char * +test_repeated_ascii_char(const char *repeat_startp, + const char *stringp, + const char *regexp); + +static const char * +test_repeated_multibyte_char(const char *repeat_startp, + const char *stringp, + const char *regexp); + +static const char * +test_repeated_group(const char *repeat_startp, + const char *stringp, + const char *regexp); + +static const char * +test_string(const char *stringp, + const char *regexp); + + +/* DEFINITIONS OF PUBLIC VARIABLES */ + +char *__loc1; + +/* + * reserve thread-specific storage for __loc1 + */ +char ** +____loc1(void) +{ + if (_thr_main()) + return (&__loc1); + return ((char **)tsdalloc(_T_REGEX_LOC1, sizeof (char *), NULL)); +} + +#define __loc1 (*(____loc1())) + +/* DEFINITION OF regex() */ + +extern char * +_regex(const char *regexp, + const char *stringp, ...) +{ + va_list arg_listp; + int char_size; + const char *end_of_matchp; + wchar_t regex_wchar; + char *return_argp[NSUBSTRINGS]; + char *returned_substringp; + int substringn; + const char *substringp; + wchar_t string_wchar; + + if (____loc1() == (char **)0) { + return ((char *)0); + } else { + lmutex_lock(®ex_lock); + __loc1 = (char *)0; + } + + if ((stringp == (char *)0) || (regexp == (char *)0)) { + lmutex_unlock(®ex_lock); + return ((char *)0); + } + + + /* INITIALIZE SUBSTRINGS THAT MIGHT BE RETURNED IN VARARGS */ + + substringn = 0; + va_start(arg_listp, stringp); + while (substringn < NSUBSTRINGS) { + return_argp[substringn] = va_arg(arg_listp, char *); + substring_startp[substringn] = (char *)0; + return_arg_number[substringn] = -1; + substringn++; + } + va_end(arg_listp); + + + /* TEST THE STRING AGAINST THE REGULAR EXPRESSION */ + + end_of_matchp = (char *)0; + stringp_stackp = &stringp_stack[STRINGP_STACK_SIZE]; + + if ((int)*regexp == (int)START_OF_STRING_MARK) { + + /* + * the match must start at the beginning of the string + */ + + __loc1 = (char *)stringp; + regexp++; + end_of_matchp = test_string(stringp, regexp); + + } else if ((int)*regexp == (int)ASCII_CHAR) { + + /* + * test a string against a regular expression + * that starts with a single ASCII character: + * + * move to each character in the string that matches + * the first character in the regular expression + * and test the remaining string + */ + + while ((*stringp != *(regexp + 1)) && (*stringp != '\0')) { + stringp++; + } + while ((end_of_matchp == (char *)0) && (*stringp != '\0')) { + end_of_matchp = test_string(stringp, regexp); + if (end_of_matchp != (char *)0) { + __loc1 = (char *)stringp; + } else { + stringp++; + while ((*stringp != *(regexp + 1)) && (*stringp != '\0')) { + stringp++; + } + } + } + + } else if (!multibyte) { + + /* + * if the value of the "multibyte" macro defined in <euc.h> + * is false, regex() is running in an ASCII locale; + * test an ASCII string against an ASCII regular expression + * that doesn't start with a single ASCII character: + * + * move forward in the string one byte at a time, testing + * the remaining string against the regular expression + */ + + end_of_matchp = test_string(stringp, regexp); + while ((end_of_matchp == (char *)0) && (*stringp != '\0')) { + stringp++; + end_of_matchp = test_string(stringp, regexp); + } + if (end_of_matchp != (char *)0) { + __loc1 = (char *)stringp; + } + + } else if ((int)*regexp == (int)MULTIBYTE_CHAR) { + + /* + * test a multibyte string against a multibyte regular expression + * that starts with a single multibyte character: + * + * move to each character in the string that matches + * the first character in the regular expression + * and test the remaining string + */ + + (void) get_wchar(®ex_wchar, regexp + 1); + char_size = get_wchar(&string_wchar, stringp); + while ((string_wchar != regex_wchar) && (char_size > 0)) { + stringp += char_size; + char_size = get_wchar(&string_wchar, stringp); + } + while ((end_of_matchp == (char *)0) && (char_size > 0)) { + end_of_matchp = test_string(stringp, regexp); + if (end_of_matchp != (char *)0) { + __loc1 = (char *)stringp; + } else { + stringp += char_size; + char_size = get_wchar(&string_wchar, stringp); + while ((string_wchar != regex_wchar) && (char_size > 0)) { + stringp += char_size; + char_size = get_wchar(&string_wchar, stringp); + } + } + } + + } else { + + /* + * test a multibyte string against a multibyte regular expression + * that doesn't start with a single multibyte character + * + * move forward in the string one multibyte character at a time, + * testing the remaining string against the regular expression + */ + + end_of_matchp = test_string(stringp, regexp); + char_size = get_wchar(&string_wchar, stringp); + while ((end_of_matchp == (char *)0) && (char_size > 0)) { + stringp += char_size; + end_of_matchp = test_string(stringp, regexp); + char_size = get_wchar(&string_wchar, stringp); + } + if (end_of_matchp != (char *)0) { + __loc1 = (char *)stringp; + } + } + + /* + * Return substrings that matched subexpressions for which + * matching substrings are to be returned. + * + * NOTE: + * + * According to manual page regcmp(3G), regex() returns substrings + * that match subexpressions even when no substring matches the + * entire regular expression. + */ + + substringn = 0; + while (substringn < NSUBSTRINGS) { + substringp = substring_startp[substringn]; + if ((substringp != (char *)0) && + (return_arg_number[substringn] >= 0)) { + returned_substringp = + return_argp[return_arg_number[substringn]]; + if (returned_substringp != (char *)0) { + while (substringp < substring_endp[substringn]) { + *returned_substringp = (char)*substringp; + returned_substringp++; + substringp++; + } + *returned_substringp = '\0'; + } + } + substringn++; + } + lmutex_unlock(®ex_lock); + return ((char *)end_of_matchp); +} /* regex() */ + + +/* DEFINITIONS OF PRIVATE FUNCTIONS */ + +static int +get_wchar(wchar_t *wcharp, + const char *stringp) +{ + int char_size; + + if (stringp == (char *)0) { + char_size = 0; + *wcharp = (wchar_t)((unsigned int)'\0'); + } else if (*stringp == '\0') { + char_size = 0; + *wcharp = (wchar_t)((unsigned int)*stringp); + } else if ((unsigned char)*stringp <= (unsigned char)0x7f) { + char_size = 1; + *wcharp = (wchar_t)((unsigned int)*stringp); + } else { + char_size = mbtowc(wcharp, stringp, MB_LEN_MAX); + } + return (char_size); +} + +static void +get_match_counts(int *nmust_matchp, + int *nextra_matches_allowedp, + const char *count_stringp) +{ + int minimum_match_count; + int maximum_match_count; + + minimum_match_count = + (int)((unsigned int)*count_stringp & SINGLE_BYTE_MASK); + *nmust_matchp = minimum_match_count; + + count_stringp++; + maximum_match_count = + (int)((unsigned int)*count_stringp & SINGLE_BYTE_MASK); + if (maximum_match_count == (int)UNLIMITED) { + *nextra_matches_allowedp = (int)UNLIMITED; + } else { + *nextra_matches_allowedp = + maximum_match_count - minimum_match_count; + } + return; + +} /* get_match_counts() */ + +static boolean_t +in_wchar_range(wchar_t test_char, + wchar_t lower_char, + wchar_t upper_char) +{ + return (((lower_char <= 0x7f) && (upper_char <= 0x7f) && + (lower_char <= test_char) && (test_char <= upper_char)) || + (((test_char & WCHAR_CSMASK) == (lower_char & WCHAR_CSMASK)) && + ((test_char & WCHAR_CSMASK) == (upper_char & WCHAR_CSMASK)) && + (lower_char <= test_char) && (test_char <= upper_char))); + +} /* in_wchar_range() */ + +static const char * +pop_stringp(void) +{ + const char *stringp; + + if (stringp_stackp >= &stringp_stack[STRINGP_STACK_SIZE]) { + return ((char *)0); + } else { + stringp = *stringp_stackp; + stringp_stackp++; + return (stringp); + } +} + + +static const char * +previous_charp(const char *current_charp) +{ + /* + * returns the pointer to the previous character in + * a string of multibyte characters + */ + + const char *prev_cs0 = current_charp - 1; + const char *prev_cs1 = current_charp - eucw1; + const char *prev_cs2 = current_charp - eucw2 - 1; + const char *prev_cs3 = current_charp - eucw3 - 1; + const char *prev_charp; + + if ((unsigned char)*prev_cs0 <= 0x7f) { + prev_charp = prev_cs0; + } else if ((unsigned char)*prev_cs2 == SS2) { + prev_charp = prev_cs2; + } else if ((unsigned char)*prev_cs3 == SS3) { + prev_charp = prev_cs3; + } else { + prev_charp = prev_cs1; + } + return (prev_charp); + +} /* previous_charp() */ + +static const char * +push_stringp(const char *stringp) +{ + if (stringp_stackp <= &stringp_stack[0]) { + return ((char *)0); + } else { + stringp_stackp--; + *stringp_stackp = stringp; + return (stringp); + } +} + + +static char_test_result_t +test_char_against_ascii_class(char test_char, + const char *classp, + char_test_condition_t test_condition) +{ + /* + * tests a character for membership in an ASCII character class compiled + * by the internationalized version of regcmp(); + * + * NOTE: The internationalized version of regcmp() compiles + * the range a-z in an ASCII character class to aTHRUz. + */ + + int nbytes_to_check; + + nbytes_to_check = (int)*classp; + classp++; + nbytes_to_check--; + + while (nbytes_to_check > 0) { + if (test_char == *classp) { + if (test_condition == IN_CLASS) + return (CONDITION_TRUE); + else + return (CONDITION_FALSE); + } else if (*classp == THRU) { + if ((*(classp - 1) <= test_char) && + (test_char <= *(classp + 1))) { + if (test_condition == IN_CLASS) + return (CONDITION_TRUE); + else + return (CONDITION_FALSE); + } else { + classp += 2; + nbytes_to_check -= 2; + } + } else { + classp++; + nbytes_to_check--; + } + } + if (test_condition == NOT_IN_CLASS) { + return (CONDITION_TRUE); + } else { + return (CONDITION_FALSE); + } +} /* test_char_against_ascii_class() */ + +static char_test_result_t +test_char_against_multibyte_class(wchar_t test_char, + const char *classp, + char_test_condition_t test_condition) +{ + /* + * tests a character for membership in a multibyte character class; + * + * NOTE: The range a-z in a multibyte character class compiles to + * aTHRUz. + */ + + int char_size; + wchar_t current_char; + int nbytes_to_check; + wchar_t previous_char; + + nbytes_to_check = (int)*classp; + classp++; + nbytes_to_check--; + + char_size = get_wchar(¤t_char, classp); + if (char_size <= 0) { + return (CHAR_TEST_ERROR); + } else if (test_char == current_char) { + if (test_condition == IN_CLASS) { + return (CONDITION_TRUE); + } else { + return (CONDITION_FALSE); + } + } else { + classp += char_size; + nbytes_to_check -= char_size; + } + + while (nbytes_to_check > 0) { + previous_char = current_char; + char_size = get_wchar(¤t_char, classp); + if (char_size <= 0) { + return (CHAR_TEST_ERROR); + } else if (test_char == current_char) { + if (test_condition == IN_CLASS) { + return (CONDITION_TRUE); + } else { + return (CONDITION_FALSE); + } + } else if (current_char == THRU) { + classp += char_size; + nbytes_to_check -= char_size; + char_size = get_wchar(¤t_char, classp); + if (char_size <= 0) { + return (CHAR_TEST_ERROR); + } else if (in_wchar_range(test_char, previous_char, + current_char)) { + if (test_condition == IN_CLASS) { + return (CONDITION_TRUE); + } else { + return (CONDITION_FALSE); + } + } else { + classp += char_size; + nbytes_to_check -= char_size; + } + } else { + classp += char_size; + nbytes_to_check -= char_size; + } + } + if (test_condition == NOT_IN_CLASS) { + return (CONDITION_TRUE); + } else { + return (CONDITION_FALSE); + } +} /* test_char_against_multibyte_class() */ + + +/* FOR COMPATIBILITY WITH PREVIOUS ASCII VERSIONS OF regcmp() */ + +static char_test_result_t +test_char_against_old_ascii_class(char test_char, + const char *classp, + char_test_condition_t test_condition) +{ + /* + * tests a character for membership in an ASCII character class compiled + * by the ASCII version of regcmp(); + * + * NOTE: ASCII versions of regcmp() compile the range a-z in an + * ASCII character class to THRUaz. The internationalized + * version compiles the same range to aTHRUz. + */ + + int nbytes_to_check; + + nbytes_to_check = (int)*classp; + classp++; + nbytes_to_check--; + + while (nbytes_to_check > 0) { + if (test_char == *classp) { + if (test_condition == IN_CLASS) { + return (CONDITION_TRUE); + } else { + return (CONDITION_FALSE); + } + } else if (*classp == THRU) { + if ((*(classp + 1) <= test_char) && + (test_char <= *(classp + 2))) { + if (test_condition == IN_CLASS) { + return (CONDITION_TRUE); + } else { + return (CONDITION_FALSE); + } + } else { + classp += 3; + nbytes_to_check -= 3; + } + } else { + classp++; + nbytes_to_check--; + } + } + if (test_condition == NOT_IN_CLASS) { + return (CONDITION_TRUE); + } else { + return (CONDITION_FALSE); + } +} /* test_char_against_old_ascii_class() */ + +static const char * +test_repeated_ascii_char(const char *repeat_startp, + const char *stringp, + const char *regexp) +{ + const char *end_of_matchp; + + end_of_matchp = test_string(stringp, regexp); + while ((end_of_matchp == (char *)0) && + (stringp > repeat_startp)) { + stringp--; + end_of_matchp = test_string(stringp, regexp); + } + return (end_of_matchp); +} + +static const char * +test_repeated_multibyte_char(const char *repeat_startp, + const char *stringp, + const char *regexp) +{ + const char *end_of_matchp; + + end_of_matchp = test_string(stringp, regexp); + while ((end_of_matchp == (char *)0) && + (stringp > repeat_startp)) { + stringp = previous_charp(stringp); + end_of_matchp = test_string(stringp, regexp); + } + return (end_of_matchp); +} + +static const char * +test_repeated_group(const char *repeat_startp, + const char *stringp, + const char *regexp) +{ + const char *end_of_matchp; + + end_of_matchp = test_string(stringp, regexp); + while ((end_of_matchp == (char *)0) && + (stringp > repeat_startp)) { + stringp = pop_stringp(); + if (stringp == (char *)0) { + return ((char *)0); + } + end_of_matchp = test_string(stringp, regexp); + } + return (end_of_matchp); +} + +static const char * +test_string(const char *stringp, + const char *regexp) +{ + /* + * returns a pointer to the first character following the first + * substring of the string addressed by stringp that matches + * the compiled regular expression addressed by regexp + */ + + unsigned int group_length; + int nextra_matches_allowed; + int nmust_match; + wchar_t regex_wchar; + int regex_char_size; + const char *repeat_startp; + unsigned int return_argn; + wchar_t string_wchar; + int string_char_size; + unsigned int substringn; + char_test_condition_t test_condition; + const char *test_stringp; + + for (;;) { + + /* + * Exit the loop via a return whenever there's a match + * or it's clear that there can be no match. + */ + + switch ((int)*regexp) { + + /* + * No fall-through. + * Each case ends with either a return or with stringp + * addressing the next character to be tested and regexp + * addressing the next compiled regular expression + * + * NOTE: The comments for each case give the meaning + * of the compiled regular expression decoded by the case + * and the character string that the compiled regular + * expression uses to encode the case. Each single + * character encoded in the compiled regular expression + * is shown enclosed in angle brackets (<>). Each + * compiled regular expression begins with a marker + * character which is shown as a named constant + * (e.g. <ASCII_CHAR>). Character constants are shown + * enclosed in single quotes (e.g. <'$'>). All other + * single characters encoded in the compiled regular + * expression are shown as lower case variable names + * (e.g. <ascii_char> or <multibyte_char>). Multicharacter + * strings encoded in the compiled regular expression + * are shown as variable names followed by elipses + * (e.g. <compiled_regex...>). + */ + + case ASCII_CHAR: /* single ASCII char */ + + /* encoded as <ASCII_CHAR><ascii_char> */ + + regexp++; + if (*regexp == *stringp) { + regexp++; + stringp++; + } else { + return ((char *)0); + } + break; /* end case ASCII_CHAR */ + + case MULTIBYTE_CHAR: /* single multibyte char */ + + /* encoded as <MULTIBYTE_CHAR><multibyte_char> */ + + regexp++; + regex_char_size = get_wchar(®ex_wchar, regexp); + string_char_size = get_wchar(&string_wchar, stringp); + if ((string_char_size <= 0) || (string_wchar != regex_wchar)) { + return ((char *)0); + } else { + regexp += regex_char_size; + stringp += string_char_size; + } + break; /* end case MULTIBYTE_CHAR */ + + case ANY_CHAR: /* any single ASCII or multibyte char */ + + /* encoded as <ANY_CHAR> */ + + if (!multibyte) { + if (*stringp == '\0') { + return ((char *)0); + } else { + regexp++; + stringp++; + } + } else { + string_char_size = get_wchar(&string_wchar, stringp); + if (string_char_size <= 0) { + return ((char *)0); + } else { + regexp++; + stringp += string_char_size; + } + } + break; /* end case ANY_CHAR */ + + case IN_ASCII_CHAR_CLASS: /* [.....] */ + case NOT_IN_ASCII_CHAR_CLASS: + + /* + * encoded as <IN_ASCII_CHAR_CLASS><class_length><class...> + * or <NOT_IN_ASCII_CHAR_CLASS><class_length><class...> + * + * NOTE: <class_length> includes the <class_length> byte + */ + + if ((int)*regexp == (int)IN_ASCII_CHAR_CLASS) { + test_condition = IN_CLASS; + } else { + test_condition = NOT_IN_CLASS; + } + regexp++; /* point to the <class_length> byte */ + + if ((*stringp != '\0') && + (test_char_against_ascii_class(*stringp, regexp, + test_condition) == CONDITION_TRUE)) { + regexp += (int)*regexp; /* add the class length to regexp */ + stringp++; + } else { + return ((char *)0); + } + break; /* end case IN_ASCII_CHAR_CLASS */ + + case IN_MULTIBYTE_CHAR_CLASS: /* [....] */ + case NOT_IN_MULTIBYTE_CHAR_CLASS: + + /* + * encoded as <IN_MULTIBYTE_CHAR_CLASS><class_length><class...> + * or <NOT_IN_MULTIBYTE_CHAR_CLASS><class_length><class...> + * + * NOTE: <class_length> includes the <class_length> byte + */ + + if ((int)*regexp == (int)IN_MULTIBYTE_CHAR_CLASS) { + test_condition = IN_CLASS; + } else { + test_condition = NOT_IN_CLASS; + } + regexp++; /* point to the <class_length> byte */ + + string_char_size = get_wchar(&string_wchar, stringp); + if ((string_char_size > 0) && + (test_char_against_multibyte_class(string_wchar, regexp, + test_condition) == CONDITION_TRUE)) { + regexp += (int)*regexp; /* add the class length to regexp */ + stringp += string_char_size; + } else { + return ((char *)0); + } + break; /* end case IN_MULTIBYTE_CHAR_CLASS */ + + case IN_OLD_ASCII_CHAR_CLASS: /* [...] */ + case NOT_IN_OLD_ASCII_CHAR_CLASS: + + /* + * encoded as <IN_OLD_ASCII_CHAR_CLASS><class_length><class...> + * or <NOT_IN_OLD_ASCII_CHAR_CLASS><class_length><class...> + * + * NOTE: <class_length> includes the <class_length> byte + */ + + if ((int)*regexp == (int)IN_OLD_ASCII_CHAR_CLASS) { + test_condition = IN_CLASS; + } else { + test_condition = NOT_IN_CLASS; + } + regexp++; /* point to the <class_length> byte */ + + if ((*stringp != '\0') && + (test_char_against_old_ascii_class(*stringp, regexp, + test_condition) == CONDITION_TRUE)) { + regexp += (int)*regexp; /* add the class length to regexp */ + stringp++; + } else { + return ((char *)0); + } + break; /* end case [NOT_]IN_OLD_ASCII_CHAR_CLASS */ + + case SIMPLE_GROUP: /* (.....) */ + + /* encoded as <SIMPLE_GROUP><group_length> */ + + regexp += 2; + break; /* end case SIMPLE_GROUP */ + + case END_GROUP: /* (.....) */ + + /* encoded as <END_GROUP><groupn> */ + + regexp += 2; + break; /* end case END_GROUP */ + + case SAVED_GROUP: /* (.....)$0-9 */ + + /* encoded as <SAVED_GROUP><substringn> */ + + regexp++; + substringn = (unsigned int)*regexp; + if (substringn >= NSUBSTRINGS) + return ((char *)0); + substring_startp[substringn] = stringp; + regexp++; + break; /* end case SAVED_GROUP */ + + case END_SAVED_GROUP: /* (.....)$0-9 */ + + /* + * encoded as <END_SAVED_GROUP><substringn>\ + * <return_arg_number[substringn]> + */ + + regexp++; + substringn = (unsigned int)*regexp; + if (substringn >= NSUBSTRINGS) + return ((char *)0); + substring_endp[substringn] = stringp; + regexp++; + return_argn = (unsigned int)*regexp; + if (return_argn >= NSUBSTRINGS) + return ((char *)0); + return_arg_number[substringn] = return_argn; + regexp++; + break; /* end case END_SAVED_GROUP */ + + case ASCII_CHAR|ZERO_OR_MORE: /* char* */ + + /* encoded as <ASCII_CHAR|ZERO_OR_MORE><ascii_char> */ + + regexp++; + repeat_startp = stringp; + while (*stringp == *regexp) { + stringp++; + } + regexp++; + return (test_repeated_ascii_char(repeat_startp, + stringp, regexp)); + + /* end case ASCII_CHAR|ZERO_OR_MORE */ + + case ASCII_CHAR|ONE_OR_MORE: /* char+ */ + + /* encoded as <ASCII_CHAR|ONE_OR_MORE><ascii_char> */ + + regexp++; + if (*stringp != *regexp) { + return ((char *)0); + } else { + stringp++; + repeat_startp = stringp; + while (*stringp == *regexp) { + stringp++; + } + regexp++; + return (test_repeated_ascii_char(repeat_startp, stringp, + regexp)); + } + /* end case ASCII_CHAR|ONE_OR_MORE */ + + case ASCII_CHAR|COUNT: /* char{min_count,max_count} */ + + /* + * encoded as <ASCII_CHAR|COUNT><ascii_char>\ + * <minimum_match_count><maximum_match_count> + */ + + regexp++; + get_match_counts(&nmust_match, &nextra_matches_allowed, + regexp + 1); + while ((*stringp == *regexp) && (nmust_match > 0)) { + nmust_match--; + stringp++; + } + if (nmust_match > 0) { + return ((char *)0); + } else if (nextra_matches_allowed == UNLIMITED) { + repeat_startp = stringp; + while (*stringp == *regexp) { + stringp++; + } + regexp += 3; + return (test_repeated_ascii_char(repeat_startp, stringp, + regexp)); + } else { + repeat_startp = stringp; + while ((*stringp == *regexp) && + (nextra_matches_allowed > 0)) { + nextra_matches_allowed--; + stringp++; + } + regexp += 3; + return (test_repeated_ascii_char(repeat_startp, stringp, + regexp)); + } + /* end case ASCII_CHAR|COUNT */ + + case MULTIBYTE_CHAR|ZERO_OR_MORE: /* char* */ + + /* encoded as <MULTIBYTE_CHAR|ZERO_OR_MORE><multibyte_char> */ + + regexp++; + regex_char_size = get_wchar(®ex_wchar, regexp); + repeat_startp = stringp; + string_char_size = get_wchar(&string_wchar, stringp); + while ((string_char_size > 0) && + (string_wchar == regex_wchar)) { + stringp += string_char_size; + string_char_size = get_wchar(&string_wchar, stringp); + } + regexp += regex_char_size; + return (test_repeated_multibyte_char(repeat_startp, stringp, + regexp)); + + /* end case MULTIBYTE_CHAR|ZERO_OR_MORE */ + + case MULTIBYTE_CHAR|ONE_OR_MORE: /* char+ */ + + /* encoded as <MULTIBYTE_CHAR|ONE_OR_MORE><multibyte_char> */ + + regexp++; + regex_char_size = get_wchar(®ex_wchar, regexp); + string_char_size = get_wchar(&string_wchar, stringp); + if ((string_char_size <= 0) || (string_wchar != regex_wchar)) { + return ((char *)0); + } else { + stringp += string_char_size; + repeat_startp = stringp; + string_char_size = get_wchar(&string_wchar, stringp); + while ((string_char_size > 0) && + (string_wchar == regex_wchar)) { + stringp += string_char_size; + string_char_size = get_wchar(&string_wchar, stringp); + } + regexp += regex_char_size; + return (test_repeated_multibyte_char(repeat_startp, stringp, + regexp)); + } + /* end case MULTIBYTE_CHAR|ONE_OR_MORE */ + + case MULTIBYTE_CHAR|COUNT: /* char{min_count,max_count} */ + + /* + * encoded as <MULTIBYTE_CHAR|COUNT><multibyte_char>\ + * <minimum_match_count><maximum_match_count> + */ + + regexp++; + regex_char_size = get_wchar(®ex_wchar, regexp); + get_match_counts(&nmust_match, &nextra_matches_allowed, + regexp + regex_char_size); + string_char_size = get_wchar(&string_wchar, stringp); + while ((string_char_size > 0) && + (string_wchar == regex_wchar) && + (nmust_match > 0)) { + + nmust_match--; + stringp += string_char_size; + string_char_size = get_wchar(&string_wchar, stringp); + } + if (nmust_match > 0) { + return ((char *)0); + } else if (nextra_matches_allowed == UNLIMITED) { + repeat_startp = stringp; + while ((string_char_size > 0) && + (string_wchar == regex_wchar)) { + stringp += string_char_size; + string_char_size = get_wchar(&string_wchar, stringp); + } + regexp += regex_char_size + 2; + return (test_repeated_multibyte_char(repeat_startp, stringp, + regexp)); + } else { + repeat_startp = stringp; + while ((string_char_size > 0) && + (string_wchar == regex_wchar) && + (nextra_matches_allowed > 0)) { + nextra_matches_allowed--; + stringp += string_char_size; + string_char_size = get_wchar(&string_wchar, stringp); + } + regexp += regex_char_size + 2; + return (test_repeated_multibyte_char(repeat_startp, stringp, + regexp)); + } + /* end case MULTIBYTE_CHAR|COUNT */ + + case ANY_CHAR|ZERO_OR_MORE: /* .* */ + + /* encoded as <ANY_CHAR|ZERO_OR_MORE> */ + + repeat_startp = stringp; + if (!multibyte) { + while (*stringp != '\0') { + stringp++; + } + regexp++; + return (test_repeated_ascii_char(repeat_startp, stringp, + regexp)); + } else { + string_char_size = get_wchar(&string_wchar, stringp); + while (string_char_size > 0) { + stringp += string_char_size; + string_char_size = get_wchar(&string_wchar, stringp); + } + regexp++; + return (test_repeated_multibyte_char(repeat_startp, stringp, + regexp)); + } + /* end case <ANY_CHAR|ZERO_OR_MORE> */ + + case ANY_CHAR|ONE_OR_MORE: /* .+ */ + + /* encoded as <ANY_CHAR|ONE_OR_MORE> */ + + if (!multibyte) { + if (*stringp == '\0') { + return ((char *)0); + } else { + stringp++; + repeat_startp = stringp; + while (*stringp != '\0') { + stringp++; + } + regexp++; + return (test_repeated_ascii_char(repeat_startp, stringp, + regexp)); + } + } else { + string_char_size = get_wchar(&string_wchar, stringp); + if (string_char_size <= 0) { + return ((char *)0); + } else { + stringp += string_char_size; + repeat_startp = stringp; + string_char_size = get_wchar(&string_wchar, stringp); + while (string_char_size > 0) { + stringp += string_char_size; + string_char_size = + get_wchar(&string_wchar, stringp); + } + regexp++; + return (test_repeated_multibyte_char(repeat_startp, + stringp, regexp)); + } + } + /* end case <ANY_CHAR|ONE_OR_MORE> */ + + case ANY_CHAR|COUNT: /* .{min_count,max_count} */ + + /* + * encoded as <ANY_CHAR|COUNT>\ + * <minimum_match_count><maximum_match_count> + */ + + get_match_counts(&nmust_match, &nextra_matches_allowed, + regexp + 1); + if (!multibyte) { + while ((*stringp != '\0') && (nmust_match > 0)) { + nmust_match--; + stringp++; + } + if (nmust_match > 0) { + return ((char *)0); + } else if (nextra_matches_allowed == UNLIMITED) { + repeat_startp = stringp; + while (*stringp != '\0') { + stringp++; + } + regexp += 3; + return (test_repeated_ascii_char(repeat_startp, stringp, + regexp)); + } else { + repeat_startp = stringp; + while ((*stringp != '\0') && + (nextra_matches_allowed > 0)) { + nextra_matches_allowed--; + stringp++; + } + regexp += 3; + return (test_repeated_ascii_char(repeat_startp, stringp, + regexp)); + } + } else { /* multibyte character */ + + string_char_size = get_wchar(&string_wchar, stringp); + while ((string_char_size > 0) && (nmust_match > 0)) { + nmust_match--; + stringp += string_char_size; + string_char_size = get_wchar(&string_wchar, stringp); + } + if (nmust_match > 0) { + return ((char *)0); + } else if (nextra_matches_allowed == UNLIMITED) { + repeat_startp = stringp; + while (string_char_size > 0) { + stringp += string_char_size; + string_char_size = + get_wchar(&string_wchar, stringp); + } + regexp += 3; + return (test_repeated_multibyte_char(repeat_startp, + stringp, regexp)); + } else { + repeat_startp = stringp; + while ((string_char_size > 0) && + (nextra_matches_allowed > 0)) { + nextra_matches_allowed--; + stringp += string_char_size; + string_char_size = + get_wchar(&string_wchar, stringp); + } + regexp += 3; + return (test_repeated_multibyte_char(repeat_startp, + stringp, regexp)); + } + } /* end case ANY_CHAR|COUNT */ + + case IN_ASCII_CHAR_CLASS|ZERO_OR_MORE: /* [.....]* */ + case NOT_IN_ASCII_CHAR_CLASS|ZERO_OR_MORE: + + /* + * encoded as <IN_ASCII_CHAR_CLASS|ZERO_OR_MORE>\ + * <class_length><class ...> + * or <NOT_IN_ASCII_CHAR_CLASS|ZERO_OR_MORE>\ + * <class_length><class ...> + * + * NOTE: <class_length> includes the <class_length> byte + */ + + if ((int)*regexp == (int)(IN_ASCII_CHAR_CLASS|ZERO_OR_MORE)) { + test_condition = IN_CLASS; + } else { + test_condition = NOT_IN_CLASS; + } + regexp++; /* point to the <class_length> byte */ + + repeat_startp = stringp; + while ((*stringp != '\0') && + (test_char_against_ascii_class(*stringp, regexp, + test_condition) == CONDITION_TRUE)) { + stringp++; + } + regexp += (int)*regexp; /* add the class length to regexp */ + return (test_repeated_ascii_char(repeat_startp, stringp, + regexp)); + + /* end case IN_ASCII_CHAR_CLASS|ZERO_OR_MORE */ + + case IN_ASCII_CHAR_CLASS|ONE_OR_MORE: /* [.....]+ */ + case NOT_IN_ASCII_CHAR_CLASS|ONE_OR_MORE: + + /* + * encoded as <IN_ASCII_CHAR_CLASS|ONE_OR_MORE>\ + * <class_length><class ...> + * or <NOT_IN_ASCII_CHAR_CLASS|ONE_OR_MORE>\ + * <class_length><class ...> + * + * NOTE: <class_length> includes the <class_length> byte + */ + + if ((int)*regexp == (int)(IN_ASCII_CHAR_CLASS|ONE_OR_MORE)) { + test_condition = IN_CLASS; + } else { + test_condition = NOT_IN_CLASS; + } + regexp++; /* point to the <class_length> byte */ + + if ((*stringp == '\0') || + (test_char_against_ascii_class(*stringp, regexp, + test_condition) != CONDITION_TRUE)) { + return ((char *)0); + } else { + stringp++; + repeat_startp = stringp; + while ((*stringp != '\0') && + (test_char_against_ascii_class(*stringp, regexp, + test_condition) == CONDITION_TRUE)) { + stringp++; + } + regexp += (int)*regexp; /* add the class length to regexp */ + return (test_repeated_ascii_char(repeat_startp, stringp, + regexp)); + } + /* end case IN_ASCII_CHAR_CLASS|ONE_OR_MORE */ + + case IN_ASCII_CHAR_CLASS | COUNT: /* [.....]{max_count,min_count} */ + case NOT_IN_ASCII_CHAR_CLASS | COUNT: + + /* + * endoded as <IN_ASCII_CHAR_CLASS|COUNT><class_length>\ + * <class ...><minimum_match_count>\ + * <maximum_match_count> + * or <NOT_IN_ASCII_CHAR_CLASS|COUNT><class_length>\ + * <class ...><minimum_match_count>\ + * <maximum_match_count> + * + * NOTE: <class_length> includes the <class_length> byte, + * but not the <minimum_match_count> or + * <maximum_match_count> bytes + */ + + if ((int)*regexp == (int)(IN_ASCII_CHAR_CLASS|COUNT)) { + test_condition = IN_CLASS; + } else { + test_condition = NOT_IN_CLASS; + } + regexp++; /* point to the <class_length> byte */ + + get_match_counts(&nmust_match, &nextra_matches_allowed, + regexp + (int)*regexp); + while ((*stringp != '\0') && + (test_char_against_ascii_class(*stringp, regexp, + test_condition) == CONDITION_TRUE) && + (nmust_match > 0)) { + nmust_match--; + stringp++; + } + if (nmust_match > 0) { + return ((char *)0); + } else if (nextra_matches_allowed == UNLIMITED) { + repeat_startp = stringp; + while ((*stringp != '\0') && + (test_char_against_ascii_class(*stringp, regexp, + test_condition) == CONDITION_TRUE)) { + stringp++; + } + regexp += (int)*regexp + 2; + return (test_repeated_ascii_char(repeat_startp, stringp, + regexp)); + } else { + repeat_startp = stringp; + while ((*stringp != '\0') && + (test_char_against_ascii_class(*stringp, regexp, + test_condition) == CONDITION_TRUE) && + (nextra_matches_allowed > 0)) { + nextra_matches_allowed--; + stringp++; + } + regexp += (int)*regexp + 2; + return (test_repeated_ascii_char(repeat_startp, stringp, + regexp)); + } + /* end case IN_ASCII_CHAR_CLASS|COUNT */ + + case IN_MULTIBYTE_CHAR_CLASS|ZERO_OR_MORE: /* [.....]* */ + case NOT_IN_MULTIBYTE_CHAR_CLASS|ZERO_OR_MORE: + + /* + * encoded as <IN_MULTIBYTE_CHAR_CLASS|ZERO_OR_MORE>\ + * <class_length><class ...> + * or <NOT_IN_MULTIBYTE_CHAR_CLASS|ZERO_OR_MORE>\ + * <class_length><class ...> + * + * NOTE: <class_length> includes the <class_length> byte + */ + + if ((int)*regexp == + (int)(IN_MULTIBYTE_CHAR_CLASS|ZERO_OR_MORE)) { + test_condition = IN_CLASS; + } else { + test_condition = NOT_IN_CLASS; + } + regexp++; /* point to the <class_length> byte */ + + repeat_startp = stringp; + string_char_size = get_wchar(&string_wchar, stringp); + while ((string_char_size > 0) && + (test_char_against_multibyte_class(string_wchar, regexp, + test_condition) == CONDITION_TRUE)) { + stringp += string_char_size; + string_char_size = get_wchar(&string_wchar, stringp); + } + regexp += (int)*regexp; /* add the class length to regexp */ + return (test_repeated_multibyte_char(repeat_startp, stringp, + regexp)); + + /* end case IN_MULTIBYTE_CHAR_CLASS|ZERO_OR_MORE */ + + case IN_MULTIBYTE_CHAR_CLASS|ONE_OR_MORE: /* [.....]+ */ + case NOT_IN_MULTIBYTE_CHAR_CLASS|ONE_OR_MORE: + + /* + * encoded as <IN_MULTIBYTE_CHAR_CLASS|ONE_OR_MORE>\ + * <class_length><class ...> + * or <NOT_IN_MULTIBYTE_CHAR_CLASS|ONE_OR_MORE>\ + * <class_length><class ...> + * + * NOTE: <class_length> includes the <class_length> byte + */ + + if ((int)*regexp == + (int)(IN_MULTIBYTE_CHAR_CLASS|ONE_OR_MORE)) { + test_condition = IN_CLASS; + } else { + test_condition = NOT_IN_CLASS; + } + regexp++; /* point to the <class_length> byte */ + + string_char_size = get_wchar(&string_wchar, stringp); + if ((string_char_size <= 0) || + (test_char_against_multibyte_class(string_wchar, regexp, + test_condition) != CONDITION_TRUE)) { + return ((char *)0); + } else { + stringp += string_char_size; + repeat_startp = stringp; + string_char_size = get_wchar(&string_wchar, stringp); + while ((string_char_size > 0) && + (test_char_against_multibyte_class(string_wchar, + regexp, test_condition) == CONDITION_TRUE)) { + stringp += string_char_size; + string_char_size = get_wchar(&string_wchar, stringp); + } + regexp += (int)*regexp; /* add the class length to regexp */ + return (test_repeated_multibyte_char(repeat_startp, stringp, + regexp)); + } + /* end case IN_MULTIBYTE_CHAR_CLASS|ONE_OR_MORE */ + + case IN_MULTIBYTE_CHAR_CLASS|COUNT: /* [...]{min_count,max_count} */ + case NOT_IN_MULTIBYTE_CHAR_CLASS|COUNT: + + /* + * encoded as <IN_MULTIBYTE_CHAR_CLASS|COUNT>\ + * <class_length><class ...><min_count><max_count> + * or <NOT_IN_MULTIBYTE_CHAR_CLASS|COUNT>\ + * <class_length><class ...><min_count><max_count> + * + * NOTE: <class_length> includes the <class_length> byte + * but not the <minimum_match_count> or + * <maximum_match_count> bytes + */ + + if ((int)*regexp == (int)(IN_MULTIBYTE_CHAR_CLASS|COUNT)) { + test_condition = IN_CLASS; + } else { + test_condition = NOT_IN_CLASS; + } + regexp++; /* point to the <class_length> byte */ + + get_match_counts(&nmust_match, &nextra_matches_allowed, + regexp + (int)*regexp); + string_char_size = get_wchar(&string_wchar, stringp); + while ((string_char_size > 0) && + (test_char_against_multibyte_class(string_wchar, regexp, + test_condition) == CONDITION_TRUE) && + (nmust_match > 0)) { + nmust_match--; + stringp += string_char_size; + string_char_size = get_wchar(&string_wchar, stringp); + } + if (nmust_match > 0) { + return ((char *)0); + } else if (nextra_matches_allowed == UNLIMITED) { + repeat_startp = stringp; + while ((string_char_size > 0) && + (test_char_against_multibyte_class(string_wchar, + regexp, test_condition) == CONDITION_TRUE)) { + stringp += string_char_size; + string_char_size = get_wchar(&string_wchar, stringp); + } + regexp += (int)*regexp + 2; + return (test_repeated_multibyte_char(repeat_startp, stringp, + regexp)); + } else { + repeat_startp = stringp; + while ((string_char_size > 0) && + (test_char_against_multibyte_class(string_wchar, + regexp, test_condition) == CONDITION_TRUE) && + (nextra_matches_allowed > 0)) { + nextra_matches_allowed--; + stringp += string_char_size; + string_char_size = get_wchar(&string_wchar, stringp); + } + regexp += (int)*regexp + 2; + return (test_repeated_multibyte_char(repeat_startp, stringp, + regexp)); + } + /* end case IN_MULTIBYTE_CHAR_CLASS|COUNT */ + + case IN_OLD_ASCII_CHAR_CLASS|ZERO_OR_MORE: /* [.....]* */ + case NOT_IN_OLD_ASCII_CHAR_CLASS|ZERO_OR_MORE: + + /* + * encoded as <IN_OLD_ASCII_CHAR_CLASS|ZERO_OR_MORE>\ + * <class_length><class ...> + * or <NOT_IN_OLD_ASCII_CHAR_CLASS|ZERO_OR_MORE>\ + * <class_length><class ...> + * + * NOTE: <class_length> includes the <class_length> byte + */ + + if ((int)*regexp == + (int)(IN_OLD_ASCII_CHAR_CLASS|ZERO_OR_MORE)) { + test_condition = IN_CLASS; + } else { + test_condition = NOT_IN_CLASS; + } + regexp++; /* point to the <class_length> byte */ + + repeat_startp = stringp; + while ((*stringp != '\0') && + (test_char_against_old_ascii_class(*stringp, regexp, + test_condition) == CONDITION_TRUE)) { + stringp++; + } + regexp += (int)*regexp; /* add the class length to regexp */ + return (test_repeated_ascii_char(repeat_startp, stringp, + regexp)); + + /* end case IN_OLD_ASCII_CHAR_CLASS|ZERO_OR_MORE */ + + case IN_OLD_ASCII_CHAR_CLASS|ONE_OR_MORE: /* [.....]+ */ + case NOT_IN_OLD_ASCII_CHAR_CLASS|ONE_OR_MORE: + + /* + * encoded as <IN_OLD_ASCII_CHAR_CLASS|ONE_OR_MORE>\ + * <class_length><class ...> + * or <NOT_IN_OLD_ASCII_CHAR_CLASS|ONE_OR_MORE>\ + * <class_length><class ...> + * + * NOTE: <class length> includes the <class_length> byte + */ + + if ((int)*regexp == + (int)(IN_OLD_ASCII_CHAR_CLASS|ONE_OR_MORE)) { + test_condition = IN_CLASS; + } else { + test_condition = NOT_IN_CLASS; + } + regexp++; /* point to the <class_length> byte */ + + if ((*stringp == '\0') || + (test_char_against_old_ascii_class(*stringp, regexp, + test_condition) != CONDITION_TRUE)) { + return ((char *)0); + } else { + stringp++; + repeat_startp = stringp; + while ((*stringp != '\0') && + (test_char_against_old_ascii_class(*stringp, regexp, + test_condition) == CONDITION_TRUE)) { + stringp++; + } + regexp += (int)*regexp; /* add the class length to regexp */ + return (test_repeated_ascii_char(repeat_startp, stringp, + regexp)); + } + /* end case IN_OLD_ASCII_CHAR_CLASS | ONE_OR_MORE */ + + case IN_OLD_ASCII_CHAR_CLASS|COUNT: /* [...]{min_count,max_count} */ + case NOT_IN_OLD_ASCII_CHAR_CLASS|COUNT: + + /* + * encoded as <IN_OLD_ASCII_CHAR_CLASS|COUNT><class_length>\ + * <class ...><minimum_match_count>\ + * <maximum_match_count> + * or <NOT_IN_OLD_ASCII_CHAR_CLASS|COUNT>\ + * <class_length><class ...><minimum_match_count>\ + * <maximum_match_count> + * + * NOTE: <class_length> includes the <class_length> byte + * but not the <minimum_match_count> or + * <maximum_match_count> bytes + */ + + if ((int)*regexp == (int)(IN_OLD_ASCII_CHAR_CLASS|COUNT)) { + test_condition = IN_CLASS; + } else { + test_condition = NOT_IN_CLASS; + } + regexp++; /* point to the <class_length> byte */ + + get_match_counts(&nmust_match, &nextra_matches_allowed, + regexp + (int)*regexp); + while ((*stringp != '\0') && + (test_char_against_old_ascii_class(*stringp, regexp, + test_condition) == CONDITION_TRUE) && + (nmust_match > 0)) { + nmust_match--; + stringp++; + } + if (nmust_match > 0) { + return ((char *)0); + } else if (nextra_matches_allowed == UNLIMITED) { + repeat_startp = stringp; + while ((*stringp != '\0') && + (test_char_against_old_ascii_class(*stringp, regexp, + test_condition) == CONDITION_TRUE)) { + stringp++; + } + regexp += (int)*regexp + 2; + return (test_repeated_ascii_char(repeat_startp, stringp, + regexp)); + } else { + repeat_startp = stringp; + while ((*stringp != '\0') && + (test_char_against_old_ascii_class(*stringp, regexp, + test_condition) == CONDITION_TRUE) && + (nextra_matches_allowed > 0)) { + nextra_matches_allowed--; + stringp++; + } + regexp += (int)*regexp + 2; + return (test_repeated_ascii_char(repeat_startp, stringp, + regexp)); + } + /* end case IN_OLD_ASCII_CHAR_CLASS|COUNT */ + + case ZERO_OR_MORE_GROUP: /* (.....)* */ + case ZERO_OR_MORE_GROUP|ADD_256_TO_GROUP_LENGTH: + case ZERO_OR_MORE_GROUP|ADD_512_TO_GROUP_LENGTH: + case ZERO_OR_MORE_GROUP|ADD_768_TO_GROUP_LENGTH: + + /* + * encoded as <ZERO_OR_MORE_GROUP|ADDED_LENGTH_BITS>\ + * <group_length><compiled_regex...>\ + * <END_GROUP|ZERO_OR_MORE><groupn> + * + * NOTE: + * + * group_length + (256 * ADDED_LENGTH_BITS) == + * length_of(<compiled_regex...><END_GROUP|ZERO_OR_MORE>\ + * <groupn>) + * + */ + + group_length = + (((unsigned int)*regexp & ADDED_LENGTH_BITS) << + TIMES_256_SHIFT); + regexp++; + group_length += (unsigned int)*regexp; + regexp++; + repeat_startp = stringp; + test_stringp = test_string(stringp, regexp); + while (test_stringp != (char *)0) { + if (push_stringp(stringp) == (char *)0) + return ((char *)0); + stringp = test_stringp; + test_stringp = test_string(stringp, regexp); + } + regexp += group_length; + return (test_repeated_group(repeat_startp, stringp, regexp)); + + /* end case ZERO_OR_MORE_GROUP */ + + case END_GROUP|ZERO_OR_MORE: /* (.....)* */ + + /* encoded as <END_GROUP|ZERO_OR_MORE> */ + + /* return from recursive call to test_string() */ + + return ((char *)stringp); + + /* end case END_GROUP|ZERO_OR_MORE */ + + case ONE_OR_MORE_GROUP: /* (.....)+ */ + case ONE_OR_MORE_GROUP|ADD_256_TO_GROUP_LENGTH: + case ONE_OR_MORE_GROUP|ADD_512_TO_GROUP_LENGTH: + case ONE_OR_MORE_GROUP|ADD_768_TO_GROUP_LENGTH: + + /* + * encoded as <ONE_OR_MORE_GROUP|ADDED_LENGTH_BITS>\ + * <group_length><compiled_regex...>\ + * <END_GROUP|ONE_OR_MORE><groupn> + * + * NOTE: + * + * group_length + (256 * ADDED_LENGTH_BITS) == + * length_of(<compiled_regex...><END_GROUP|ONE_OR_MORE>\ + * <groupn>) + */ + + group_length = + (((unsigned int)*regexp & ADDED_LENGTH_BITS) << + TIMES_256_SHIFT); + regexp++; + group_length += (unsigned int)*regexp; + regexp++; + stringp = test_string(stringp, regexp); + if (stringp == (char *)0) + return ((char *)0); + repeat_startp = stringp; + test_stringp = test_string(stringp, regexp); + while (test_stringp != (char *)0) { + if (push_stringp(stringp) == (char *)0) + return ((char *)0); + stringp = test_stringp; + test_stringp = test_string(stringp, regexp); + } + regexp += group_length; + return (test_repeated_group(repeat_startp, stringp, regexp)); + + /* end case ONE_OR_MORE_GROUP */ + + case END_GROUP|ONE_OR_MORE: /* (.....)+ */ + + /* encoded as <END_GROUP|ONE_OR_MORE><groupn> */ + + /* return from recursive call to test_string() */ + + return ((char *)stringp); + + /* end case END_GROUP|ONE_OR_MORE */ + + case COUNTED_GROUP: /* (.....){max_count,min_count} */ + case COUNTED_GROUP|ADD_256_TO_GROUP_LENGTH: + case COUNTED_GROUP|ADD_512_TO_GROUP_LENGTH: + case COUNTED_GROUP|ADD_768_TO_GROUP_LENGTH: + + /* + * encoded as <COUNTED_GROUP|ADDED_LENGTH_BITS><group_length>\ + * <compiled_regex...>\<END_GROUP|COUNT><groupn>\ + * <minimum_match_count><maximum_match_count> + * + * NOTE: + * + * group_length + (256 * ADDED_LENGTH_BITS) == + * length_of(<compiled_regex...><END_GROUP|COUNT><groupn>) + * + * but does not include the <minimum_match_count> or + * <maximum_match_count> bytes + */ + + group_length = + (((unsigned int)*regexp & ADDED_LENGTH_BITS) << + TIMES_256_SHIFT); + regexp++; + group_length += (unsigned int)*regexp; + regexp++; + get_match_counts(&nmust_match, &nextra_matches_allowed, + regexp + group_length); + test_stringp = test_string(stringp, regexp); + while ((test_stringp != (char *)0) && (nmust_match > 0)) { + stringp = test_stringp; + nmust_match--; + test_stringp = test_string(stringp, regexp); + } + if (nmust_match > 0) { + return ((char *)0); + } else if (nextra_matches_allowed == UNLIMITED) { + repeat_startp = stringp; + while (test_stringp != (char *)0) { + if (push_stringp(stringp) == (char *)0) + return ((char *)0); + stringp = test_stringp; + test_stringp = test_string(stringp, regexp); + } + regexp += group_length + 2; + return (test_repeated_group(repeat_startp, stringp, + regexp)); + } else { + repeat_startp = stringp; + while ((test_stringp != (char *)0) && + (nextra_matches_allowed > 0)) { + nextra_matches_allowed--; + if (push_stringp(stringp) == (char *)0) + return ((char *)0); + stringp = test_stringp; + test_stringp = test_string(stringp, regexp); + } + regexp += group_length + 2; + return (test_repeated_group(repeat_startp, stringp, + regexp)); + } + /* end case COUNTED_GROUP */ + + case END_GROUP|COUNT: /* (.....){max_count,min_count} */ + + /* encoded as <END_GROUP|COUNT> */ + + /* return from recursive call to test_string() */ + + return (stringp); + + /* end case END_GROUP|COUNT */ + + case END_OF_STRING_MARK: + + /* encoded as <END_OF_STRING_MARK><END_REGEX> */ + + if (*stringp == '\0') { + regexp++; + } else { + return ((char *)0); + } + break; /* end case END_OF_STRING_MARK */ + + case END_REGEX: /* end of the compiled regular expression */ + + /* encoded as <END_REGEX> */ + + return (stringp); + + /* end case END_REGEX */ + + default: + + return ((char *)0); + + } /* end switch (*regexp) */ + + } /* end for (;;) */ + +} /* test_string() */ |