summaryrefslogtreecommitdiff
path: root/usr/src/lib/libc/port/regex/regex.c
diff options
context:
space:
mode:
Diffstat (limited to 'usr/src/lib/libc/port/regex/regex.c')
-rw-r--r--usr/src/lib/libc/port/regex/regex.c1818
1 files changed, 1818 insertions, 0 deletions
diff --git a/usr/src/lib/libc/port/regex/regex.c b/usr/src/lib/libc/port/regex/regex.c
new file mode 100644
index 0000000000..b763a915b0
--- /dev/null
+++ b/usr/src/lib/libc/port/regex/regex.c
@@ -0,0 +1,1818 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License"). You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2004 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+/* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
+/* All Rights Reserved */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+/*
+ * IMPORTANT NOTE:
+ *
+ * regex() WORKS **ONLY** WITH THE ASCII AND THE Solaris EUC CHARACTER SETS.
+ * IT IS **NOT** CHARACTER SET INDEPENDENT.
+ *
+ */
+
+#pragma weak regex = _regex
+
+/* CONSTANTS SHARED WITH regcmp() */
+#include "regex.h"
+
+#include "lint.h"
+#include "mtlib.h"
+#include <limits.h>
+#include <stdarg.h>
+#include <stdlib.h>
+#include <thread.h>
+#include <widec.h>
+#include "tsd.h"
+
+
+/* PRIVATE CONSTANTS */
+
+#define ADD_256_TO_GROUP_LENGTH 0x1
+#define ADD_512_TO_GROUP_LENGTH 0x2
+#define ADD_768_TO_GROUP_LENGTH 0x3
+#define ADDED_LENGTH_BITS 0x3
+#define SINGLE_BYTE_MASK 0xff
+#define STRINGP_STACK_SIZE 50
+
+
+/* PRIVATE TYPE DEFINITIONS */
+
+typedef enum {
+ NOT_IN_CLASS = 0,
+ IN_CLASS
+} char_test_condition_t;
+
+typedef enum {
+ TESTING_CHAR = 0,
+ CONDITION_TRUE,
+ CONDITION_FALSE,
+ CHAR_TEST_ERROR
+} char_test_result_t;
+
+
+/* PRIVATE GLOBAL VARIABLES */
+
+static mutex_t regex_lock = DEFAULTMUTEX;
+static int return_arg_number[NSUBSTRINGS];
+static const char *substring_endp[NSUBSTRINGS];
+static const char *substring_startp[NSUBSTRINGS];
+static const char *stringp_stack[STRINGP_STACK_SIZE];
+static const char **stringp_stackp;
+
+
+/* DECLARATIONS OF PRIVATE FUNCTIONS */
+
+static int
+get_wchar(wchar_t *wcharp,
+ const char *stringp);
+
+static void
+get_match_counts(int *nmust_matchp,
+ int *nextra_matches_allowedp,
+ const char *count_stringp);
+
+static boolean_t
+in_wchar_range(wchar_t test_char,
+ wchar_t lower_char,
+ wchar_t upper_char);
+
+static const char *
+pop_stringp(void);
+
+static const char *
+previous_charp(const char *current_charp);
+
+static const char *
+push_stringp(const char *stringp);
+
+static char_test_result_t
+test_char_against_ascii_class(char test_char,
+ const char *classp,
+ char_test_condition_t test_condition);
+
+static char_test_result_t
+test_char_against_multibyte_class(wchar_t test_char,
+ const char *classp,
+ char_test_condition_t test_condition);
+
+
+/* FOR COMPATIBILITY WITH PREVIOUS ASCII VERSIONS OF regcmp() */
+
+static char_test_result_t
+test_char_against_old_ascii_class(char test_char,
+ const char *classp,
+ char_test_condition_t test_condition);
+
+static const char *
+test_repeated_ascii_char(const char *repeat_startp,
+ const char *stringp,
+ const char *regexp);
+
+static const char *
+test_repeated_multibyte_char(const char *repeat_startp,
+ const char *stringp,
+ const char *regexp);
+
+static const char *
+test_repeated_group(const char *repeat_startp,
+ const char *stringp,
+ const char *regexp);
+
+static const char *
+test_string(const char *stringp,
+ const char *regexp);
+
+
+/* DEFINITIONS OF PUBLIC VARIABLES */
+
+char *__loc1;
+
+/*
+ * reserve thread-specific storage for __loc1
+ */
+char **
+____loc1(void)
+{
+ if (_thr_main())
+ return (&__loc1);
+ return ((char **)tsdalloc(_T_REGEX_LOC1, sizeof (char *), NULL));
+}
+
+#define __loc1 (*(____loc1()))
+
+/* DEFINITION OF regex() */
+
+extern char *
+_regex(const char *regexp,
+ const char *stringp, ...)
+{
+ va_list arg_listp;
+ int char_size;
+ const char *end_of_matchp;
+ wchar_t regex_wchar;
+ char *return_argp[NSUBSTRINGS];
+ char *returned_substringp;
+ int substringn;
+ const char *substringp;
+ wchar_t string_wchar;
+
+ if (____loc1() == (char **)0) {
+ return ((char *)0);
+ } else {
+ lmutex_lock(&regex_lock);
+ __loc1 = (char *)0;
+ }
+
+ if ((stringp == (char *)0) || (regexp == (char *)0)) {
+ lmutex_unlock(&regex_lock);
+ return ((char *)0);
+ }
+
+
+ /* INITIALIZE SUBSTRINGS THAT MIGHT BE RETURNED IN VARARGS */
+
+ substringn = 0;
+ va_start(arg_listp, stringp);
+ while (substringn < NSUBSTRINGS) {
+ return_argp[substringn] = va_arg(arg_listp, char *);
+ substring_startp[substringn] = (char *)0;
+ return_arg_number[substringn] = -1;
+ substringn++;
+ }
+ va_end(arg_listp);
+
+
+ /* TEST THE STRING AGAINST THE REGULAR EXPRESSION */
+
+ end_of_matchp = (char *)0;
+ stringp_stackp = &stringp_stack[STRINGP_STACK_SIZE];
+
+ if ((int)*regexp == (int)START_OF_STRING_MARK) {
+
+ /*
+ * the match must start at the beginning of the string
+ */
+
+ __loc1 = (char *)stringp;
+ regexp++;
+ end_of_matchp = test_string(stringp, regexp);
+
+ } else if ((int)*regexp == (int)ASCII_CHAR) {
+
+ /*
+ * test a string against a regular expression
+ * that starts with a single ASCII character:
+ *
+ * move to each character in the string that matches
+ * the first character in the regular expression
+ * and test the remaining string
+ */
+
+ while ((*stringp != *(regexp + 1)) && (*stringp != '\0')) {
+ stringp++;
+ }
+ while ((end_of_matchp == (char *)0) && (*stringp != '\0')) {
+ end_of_matchp = test_string(stringp, regexp);
+ if (end_of_matchp != (char *)0) {
+ __loc1 = (char *)stringp;
+ } else {
+ stringp++;
+ while ((*stringp != *(regexp + 1)) && (*stringp != '\0')) {
+ stringp++;
+ }
+ }
+ }
+
+ } else if (!multibyte) {
+
+ /*
+ * if the value of the "multibyte" macro defined in <euc.h>
+ * is false, regex() is running in an ASCII locale;
+ * test an ASCII string against an ASCII regular expression
+ * that doesn't start with a single ASCII character:
+ *
+ * move forward in the string one byte at a time, testing
+ * the remaining string against the regular expression
+ */
+
+ end_of_matchp = test_string(stringp, regexp);
+ while ((end_of_matchp == (char *)0) && (*stringp != '\0')) {
+ stringp++;
+ end_of_matchp = test_string(stringp, regexp);
+ }
+ if (end_of_matchp != (char *)0) {
+ __loc1 = (char *)stringp;
+ }
+
+ } else if ((int)*regexp == (int)MULTIBYTE_CHAR) {
+
+ /*
+ * test a multibyte string against a multibyte regular expression
+ * that starts with a single multibyte character:
+ *
+ * move to each character in the string that matches
+ * the first character in the regular expression
+ * and test the remaining string
+ */
+
+ (void) get_wchar(&regex_wchar, regexp + 1);
+ char_size = get_wchar(&string_wchar, stringp);
+ while ((string_wchar != regex_wchar) && (char_size > 0)) {
+ stringp += char_size;
+ char_size = get_wchar(&string_wchar, stringp);
+ }
+ while ((end_of_matchp == (char *)0) && (char_size > 0)) {
+ end_of_matchp = test_string(stringp, regexp);
+ if (end_of_matchp != (char *)0) {
+ __loc1 = (char *)stringp;
+ } else {
+ stringp += char_size;
+ char_size = get_wchar(&string_wchar, stringp);
+ while ((string_wchar != regex_wchar) && (char_size > 0)) {
+ stringp += char_size;
+ char_size = get_wchar(&string_wchar, stringp);
+ }
+ }
+ }
+
+ } else {
+
+ /*
+ * test a multibyte string against a multibyte regular expression
+ * that doesn't start with a single multibyte character
+ *
+ * move forward in the string one multibyte character at a time,
+ * testing the remaining string against the regular expression
+ */
+
+ end_of_matchp = test_string(stringp, regexp);
+ char_size = get_wchar(&string_wchar, stringp);
+ while ((end_of_matchp == (char *)0) && (char_size > 0)) {
+ stringp += char_size;
+ end_of_matchp = test_string(stringp, regexp);
+ char_size = get_wchar(&string_wchar, stringp);
+ }
+ if (end_of_matchp != (char *)0) {
+ __loc1 = (char *)stringp;
+ }
+ }
+
+ /*
+ * Return substrings that matched subexpressions for which
+ * matching substrings are to be returned.
+ *
+ * NOTE:
+ *
+ * According to manual page regcmp(3G), regex() returns substrings
+ * that match subexpressions even when no substring matches the
+ * entire regular expression.
+ */
+
+ substringn = 0;
+ while (substringn < NSUBSTRINGS) {
+ substringp = substring_startp[substringn];
+ if ((substringp != (char *)0) &&
+ (return_arg_number[substringn] >= 0)) {
+ returned_substringp =
+ return_argp[return_arg_number[substringn]];
+ if (returned_substringp != (char *)0) {
+ while (substringp < substring_endp[substringn]) {
+ *returned_substringp = (char)*substringp;
+ returned_substringp++;
+ substringp++;
+ }
+ *returned_substringp = '\0';
+ }
+ }
+ substringn++;
+ }
+ lmutex_unlock(&regex_lock);
+ return ((char *)end_of_matchp);
+} /* regex() */
+
+
+/* DEFINITIONS OF PRIVATE FUNCTIONS */
+
+static int
+get_wchar(wchar_t *wcharp,
+ const char *stringp)
+{
+ int char_size;
+
+ if (stringp == (char *)0) {
+ char_size = 0;
+ *wcharp = (wchar_t)((unsigned int)'\0');
+ } else if (*stringp == '\0') {
+ char_size = 0;
+ *wcharp = (wchar_t)((unsigned int)*stringp);
+ } else if ((unsigned char)*stringp <= (unsigned char)0x7f) {
+ char_size = 1;
+ *wcharp = (wchar_t)((unsigned int)*stringp);
+ } else {
+ char_size = mbtowc(wcharp, stringp, MB_LEN_MAX);
+ }
+ return (char_size);
+}
+
+static void
+get_match_counts(int *nmust_matchp,
+ int *nextra_matches_allowedp,
+ const char *count_stringp)
+{
+ int minimum_match_count;
+ int maximum_match_count;
+
+ minimum_match_count =
+ (int)((unsigned int)*count_stringp & SINGLE_BYTE_MASK);
+ *nmust_matchp = minimum_match_count;
+
+ count_stringp++;
+ maximum_match_count =
+ (int)((unsigned int)*count_stringp & SINGLE_BYTE_MASK);
+ if (maximum_match_count == (int)UNLIMITED) {
+ *nextra_matches_allowedp = (int)UNLIMITED;
+ } else {
+ *nextra_matches_allowedp =
+ maximum_match_count - minimum_match_count;
+ }
+ return;
+
+} /* get_match_counts() */
+
+static boolean_t
+in_wchar_range(wchar_t test_char,
+ wchar_t lower_char,
+ wchar_t upper_char)
+{
+ return (((lower_char <= 0x7f) && (upper_char <= 0x7f) &&
+ (lower_char <= test_char) && (test_char <= upper_char)) ||
+ (((test_char & WCHAR_CSMASK) == (lower_char & WCHAR_CSMASK)) &&
+ ((test_char & WCHAR_CSMASK) == (upper_char & WCHAR_CSMASK)) &&
+ (lower_char <= test_char) && (test_char <= upper_char)));
+
+} /* in_wchar_range() */
+
+static const char *
+pop_stringp(void)
+{
+ const char *stringp;
+
+ if (stringp_stackp >= &stringp_stack[STRINGP_STACK_SIZE]) {
+ return ((char *)0);
+ } else {
+ stringp = *stringp_stackp;
+ stringp_stackp++;
+ return (stringp);
+ }
+}
+
+
+static const char *
+previous_charp(const char *current_charp)
+{
+ /*
+ * returns the pointer to the previous character in
+ * a string of multibyte characters
+ */
+
+ const char *prev_cs0 = current_charp - 1;
+ const char *prev_cs1 = current_charp - eucw1;
+ const char *prev_cs2 = current_charp - eucw2 - 1;
+ const char *prev_cs3 = current_charp - eucw3 - 1;
+ const char *prev_charp;
+
+ if ((unsigned char)*prev_cs0 <= 0x7f) {
+ prev_charp = prev_cs0;
+ } else if ((unsigned char)*prev_cs2 == SS2) {
+ prev_charp = prev_cs2;
+ } else if ((unsigned char)*prev_cs3 == SS3) {
+ prev_charp = prev_cs3;
+ } else {
+ prev_charp = prev_cs1;
+ }
+ return (prev_charp);
+
+} /* previous_charp() */
+
+static const char *
+push_stringp(const char *stringp)
+{
+ if (stringp_stackp <= &stringp_stack[0]) {
+ return ((char *)0);
+ } else {
+ stringp_stackp--;
+ *stringp_stackp = stringp;
+ return (stringp);
+ }
+}
+
+
+static char_test_result_t
+test_char_against_ascii_class(char test_char,
+ const char *classp,
+ char_test_condition_t test_condition)
+{
+ /*
+ * tests a character for membership in an ASCII character class compiled
+ * by the internationalized version of regcmp();
+ *
+ * NOTE: The internationalized version of regcmp() compiles
+ * the range a-z in an ASCII character class to aTHRUz.
+ */
+
+ int nbytes_to_check;
+
+ nbytes_to_check = (int)*classp;
+ classp++;
+ nbytes_to_check--;
+
+ while (nbytes_to_check > 0) {
+ if (test_char == *classp) {
+ if (test_condition == IN_CLASS)
+ return (CONDITION_TRUE);
+ else
+ return (CONDITION_FALSE);
+ } else if (*classp == THRU) {
+ if ((*(classp - 1) <= test_char) &&
+ (test_char <= *(classp + 1))) {
+ if (test_condition == IN_CLASS)
+ return (CONDITION_TRUE);
+ else
+ return (CONDITION_FALSE);
+ } else {
+ classp += 2;
+ nbytes_to_check -= 2;
+ }
+ } else {
+ classp++;
+ nbytes_to_check--;
+ }
+ }
+ if (test_condition == NOT_IN_CLASS) {
+ return (CONDITION_TRUE);
+ } else {
+ return (CONDITION_FALSE);
+ }
+} /* test_char_against_ascii_class() */
+
+static char_test_result_t
+test_char_against_multibyte_class(wchar_t test_char,
+ const char *classp,
+ char_test_condition_t test_condition)
+{
+ /*
+ * tests a character for membership in a multibyte character class;
+ *
+ * NOTE: The range a-z in a multibyte character class compiles to
+ * aTHRUz.
+ */
+
+ int char_size;
+ wchar_t current_char;
+ int nbytes_to_check;
+ wchar_t previous_char;
+
+ nbytes_to_check = (int)*classp;
+ classp++;
+ nbytes_to_check--;
+
+ char_size = get_wchar(&current_char, classp);
+ if (char_size <= 0) {
+ return (CHAR_TEST_ERROR);
+ } else if (test_char == current_char) {
+ if (test_condition == IN_CLASS) {
+ return (CONDITION_TRUE);
+ } else {
+ return (CONDITION_FALSE);
+ }
+ } else {
+ classp += char_size;
+ nbytes_to_check -= char_size;
+ }
+
+ while (nbytes_to_check > 0) {
+ previous_char = current_char;
+ char_size = get_wchar(&current_char, classp);
+ if (char_size <= 0) {
+ return (CHAR_TEST_ERROR);
+ } else if (test_char == current_char) {
+ if (test_condition == IN_CLASS) {
+ return (CONDITION_TRUE);
+ } else {
+ return (CONDITION_FALSE);
+ }
+ } else if (current_char == THRU) {
+ classp += char_size;
+ nbytes_to_check -= char_size;
+ char_size = get_wchar(&current_char, classp);
+ if (char_size <= 0) {
+ return (CHAR_TEST_ERROR);
+ } else if (in_wchar_range(test_char, previous_char,
+ current_char)) {
+ if (test_condition == IN_CLASS) {
+ return (CONDITION_TRUE);
+ } else {
+ return (CONDITION_FALSE);
+ }
+ } else {
+ classp += char_size;
+ nbytes_to_check -= char_size;
+ }
+ } else {
+ classp += char_size;
+ nbytes_to_check -= char_size;
+ }
+ }
+ if (test_condition == NOT_IN_CLASS) {
+ return (CONDITION_TRUE);
+ } else {
+ return (CONDITION_FALSE);
+ }
+} /* test_char_against_multibyte_class() */
+
+
+/* FOR COMPATIBILITY WITH PREVIOUS ASCII VERSIONS OF regcmp() */
+
+static char_test_result_t
+test_char_against_old_ascii_class(char test_char,
+ const char *classp,
+ char_test_condition_t test_condition)
+{
+ /*
+ * tests a character for membership in an ASCII character class compiled
+ * by the ASCII version of regcmp();
+ *
+ * NOTE: ASCII versions of regcmp() compile the range a-z in an
+ * ASCII character class to THRUaz. The internationalized
+ * version compiles the same range to aTHRUz.
+ */
+
+ int nbytes_to_check;
+
+ nbytes_to_check = (int)*classp;
+ classp++;
+ nbytes_to_check--;
+
+ while (nbytes_to_check > 0) {
+ if (test_char == *classp) {
+ if (test_condition == IN_CLASS) {
+ return (CONDITION_TRUE);
+ } else {
+ return (CONDITION_FALSE);
+ }
+ } else if (*classp == THRU) {
+ if ((*(classp + 1) <= test_char) &&
+ (test_char <= *(classp + 2))) {
+ if (test_condition == IN_CLASS) {
+ return (CONDITION_TRUE);
+ } else {
+ return (CONDITION_FALSE);
+ }
+ } else {
+ classp += 3;
+ nbytes_to_check -= 3;
+ }
+ } else {
+ classp++;
+ nbytes_to_check--;
+ }
+ }
+ if (test_condition == NOT_IN_CLASS) {
+ return (CONDITION_TRUE);
+ } else {
+ return (CONDITION_FALSE);
+ }
+} /* test_char_against_old_ascii_class() */
+
+static const char *
+test_repeated_ascii_char(const char *repeat_startp,
+ const char *stringp,
+ const char *regexp)
+{
+ const char *end_of_matchp;
+
+ end_of_matchp = test_string(stringp, regexp);
+ while ((end_of_matchp == (char *)0) &&
+ (stringp > repeat_startp)) {
+ stringp--;
+ end_of_matchp = test_string(stringp, regexp);
+ }
+ return (end_of_matchp);
+}
+
+static const char *
+test_repeated_multibyte_char(const char *repeat_startp,
+ const char *stringp,
+ const char *regexp)
+{
+ const char *end_of_matchp;
+
+ end_of_matchp = test_string(stringp, regexp);
+ while ((end_of_matchp == (char *)0) &&
+ (stringp > repeat_startp)) {
+ stringp = previous_charp(stringp);
+ end_of_matchp = test_string(stringp, regexp);
+ }
+ return (end_of_matchp);
+}
+
+static const char *
+test_repeated_group(const char *repeat_startp,
+ const char *stringp,
+ const char *regexp)
+{
+ const char *end_of_matchp;
+
+ end_of_matchp = test_string(stringp, regexp);
+ while ((end_of_matchp == (char *)0) &&
+ (stringp > repeat_startp)) {
+ stringp = pop_stringp();
+ if (stringp == (char *)0) {
+ return ((char *)0);
+ }
+ end_of_matchp = test_string(stringp, regexp);
+ }
+ return (end_of_matchp);
+}
+
+static const char *
+test_string(const char *stringp,
+ const char *regexp)
+{
+ /*
+ * returns a pointer to the first character following the first
+ * substring of the string addressed by stringp that matches
+ * the compiled regular expression addressed by regexp
+ */
+
+ unsigned int group_length;
+ int nextra_matches_allowed;
+ int nmust_match;
+ wchar_t regex_wchar;
+ int regex_char_size;
+ const char *repeat_startp;
+ unsigned int return_argn;
+ wchar_t string_wchar;
+ int string_char_size;
+ unsigned int substringn;
+ char_test_condition_t test_condition;
+ const char *test_stringp;
+
+ for (;;) {
+
+ /*
+ * Exit the loop via a return whenever there's a match
+ * or it's clear that there can be no match.
+ */
+
+ switch ((int)*regexp) {
+
+ /*
+ * No fall-through.
+ * Each case ends with either a return or with stringp
+ * addressing the next character to be tested and regexp
+ * addressing the next compiled regular expression
+ *
+ * NOTE: The comments for each case give the meaning
+ * of the compiled regular expression decoded by the case
+ * and the character string that the compiled regular
+ * expression uses to encode the case. Each single
+ * character encoded in the compiled regular expression
+ * is shown enclosed in angle brackets (<>). Each
+ * compiled regular expression begins with a marker
+ * character which is shown as a named constant
+ * (e.g. <ASCII_CHAR>). Character constants are shown
+ * enclosed in single quotes (e.g. <'$'>). All other
+ * single characters encoded in the compiled regular
+ * expression are shown as lower case variable names
+ * (e.g. <ascii_char> or <multibyte_char>). Multicharacter
+ * strings encoded in the compiled regular expression
+ * are shown as variable names followed by elipses
+ * (e.g. <compiled_regex...>).
+ */
+
+ case ASCII_CHAR: /* single ASCII char */
+
+ /* encoded as <ASCII_CHAR><ascii_char> */
+
+ regexp++;
+ if (*regexp == *stringp) {
+ regexp++;
+ stringp++;
+ } else {
+ return ((char *)0);
+ }
+ break; /* end case ASCII_CHAR */
+
+ case MULTIBYTE_CHAR: /* single multibyte char */
+
+ /* encoded as <MULTIBYTE_CHAR><multibyte_char> */
+
+ regexp++;
+ regex_char_size = get_wchar(&regex_wchar, regexp);
+ string_char_size = get_wchar(&string_wchar, stringp);
+ if ((string_char_size <= 0) || (string_wchar != regex_wchar)) {
+ return ((char *)0);
+ } else {
+ regexp += regex_char_size;
+ stringp += string_char_size;
+ }
+ break; /* end case MULTIBYTE_CHAR */
+
+ case ANY_CHAR: /* any single ASCII or multibyte char */
+
+ /* encoded as <ANY_CHAR> */
+
+ if (!multibyte) {
+ if (*stringp == '\0') {
+ return ((char *)0);
+ } else {
+ regexp++;
+ stringp++;
+ }
+ } else {
+ string_char_size = get_wchar(&string_wchar, stringp);
+ if (string_char_size <= 0) {
+ return ((char *)0);
+ } else {
+ regexp++;
+ stringp += string_char_size;
+ }
+ }
+ break; /* end case ANY_CHAR */
+
+ case IN_ASCII_CHAR_CLASS: /* [.....] */
+ case NOT_IN_ASCII_CHAR_CLASS:
+
+ /*
+ * encoded as <IN_ASCII_CHAR_CLASS><class_length><class...>
+ * or <NOT_IN_ASCII_CHAR_CLASS><class_length><class...>
+ *
+ * NOTE: <class_length> includes the <class_length> byte
+ */
+
+ if ((int)*regexp == (int)IN_ASCII_CHAR_CLASS) {
+ test_condition = IN_CLASS;
+ } else {
+ test_condition = NOT_IN_CLASS;
+ }
+ regexp++; /* point to the <class_length> byte */
+
+ if ((*stringp != '\0') &&
+ (test_char_against_ascii_class(*stringp, regexp,
+ test_condition) == CONDITION_TRUE)) {
+ regexp += (int)*regexp; /* add the class length to regexp */
+ stringp++;
+ } else {
+ return ((char *)0);
+ }
+ break; /* end case IN_ASCII_CHAR_CLASS */
+
+ case IN_MULTIBYTE_CHAR_CLASS: /* [....] */
+ case NOT_IN_MULTIBYTE_CHAR_CLASS:
+
+ /*
+ * encoded as <IN_MULTIBYTE_CHAR_CLASS><class_length><class...>
+ * or <NOT_IN_MULTIBYTE_CHAR_CLASS><class_length><class...>
+ *
+ * NOTE: <class_length> includes the <class_length> byte
+ */
+
+ if ((int)*regexp == (int)IN_MULTIBYTE_CHAR_CLASS) {
+ test_condition = IN_CLASS;
+ } else {
+ test_condition = NOT_IN_CLASS;
+ }
+ regexp++; /* point to the <class_length> byte */
+
+ string_char_size = get_wchar(&string_wchar, stringp);
+ if ((string_char_size > 0) &&
+ (test_char_against_multibyte_class(string_wchar, regexp,
+ test_condition) == CONDITION_TRUE)) {
+ regexp += (int)*regexp; /* add the class length to regexp */
+ stringp += string_char_size;
+ } else {
+ return ((char *)0);
+ }
+ break; /* end case IN_MULTIBYTE_CHAR_CLASS */
+
+ case IN_OLD_ASCII_CHAR_CLASS: /* [...] */
+ case NOT_IN_OLD_ASCII_CHAR_CLASS:
+
+ /*
+ * encoded as <IN_OLD_ASCII_CHAR_CLASS><class_length><class...>
+ * or <NOT_IN_OLD_ASCII_CHAR_CLASS><class_length><class...>
+ *
+ * NOTE: <class_length> includes the <class_length> byte
+ */
+
+ if ((int)*regexp == (int)IN_OLD_ASCII_CHAR_CLASS) {
+ test_condition = IN_CLASS;
+ } else {
+ test_condition = NOT_IN_CLASS;
+ }
+ regexp++; /* point to the <class_length> byte */
+
+ if ((*stringp != '\0') &&
+ (test_char_against_old_ascii_class(*stringp, regexp,
+ test_condition) == CONDITION_TRUE)) {
+ regexp += (int)*regexp; /* add the class length to regexp */
+ stringp++;
+ } else {
+ return ((char *)0);
+ }
+ break; /* end case [NOT_]IN_OLD_ASCII_CHAR_CLASS */
+
+ case SIMPLE_GROUP: /* (.....) */
+
+ /* encoded as <SIMPLE_GROUP><group_length> */
+
+ regexp += 2;
+ break; /* end case SIMPLE_GROUP */
+
+ case END_GROUP: /* (.....) */
+
+ /* encoded as <END_GROUP><groupn> */
+
+ regexp += 2;
+ break; /* end case END_GROUP */
+
+ case SAVED_GROUP: /* (.....)$0-9 */
+
+ /* encoded as <SAVED_GROUP><substringn> */
+
+ regexp++;
+ substringn = (unsigned int)*regexp;
+ if (substringn >= NSUBSTRINGS)
+ return ((char *)0);
+ substring_startp[substringn] = stringp;
+ regexp++;
+ break; /* end case SAVED_GROUP */
+
+ case END_SAVED_GROUP: /* (.....)$0-9 */
+
+ /*
+ * encoded as <END_SAVED_GROUP><substringn>\
+ * <return_arg_number[substringn]>
+ */
+
+ regexp++;
+ substringn = (unsigned int)*regexp;
+ if (substringn >= NSUBSTRINGS)
+ return ((char *)0);
+ substring_endp[substringn] = stringp;
+ regexp++;
+ return_argn = (unsigned int)*regexp;
+ if (return_argn >= NSUBSTRINGS)
+ return ((char *)0);
+ return_arg_number[substringn] = return_argn;
+ regexp++;
+ break; /* end case END_SAVED_GROUP */
+
+ case ASCII_CHAR|ZERO_OR_MORE: /* char* */
+
+ /* encoded as <ASCII_CHAR|ZERO_OR_MORE><ascii_char> */
+
+ regexp++;
+ repeat_startp = stringp;
+ while (*stringp == *regexp) {
+ stringp++;
+ }
+ regexp++;
+ return (test_repeated_ascii_char(repeat_startp,
+ stringp, regexp));
+
+ /* end case ASCII_CHAR|ZERO_OR_MORE */
+
+ case ASCII_CHAR|ONE_OR_MORE: /* char+ */
+
+ /* encoded as <ASCII_CHAR|ONE_OR_MORE><ascii_char> */
+
+ regexp++;
+ if (*stringp != *regexp) {
+ return ((char *)0);
+ } else {
+ stringp++;
+ repeat_startp = stringp;
+ while (*stringp == *regexp) {
+ stringp++;
+ }
+ regexp++;
+ return (test_repeated_ascii_char(repeat_startp, stringp,
+ regexp));
+ }
+ /* end case ASCII_CHAR|ONE_OR_MORE */
+
+ case ASCII_CHAR|COUNT: /* char{min_count,max_count} */
+
+ /*
+ * encoded as <ASCII_CHAR|COUNT><ascii_char>\
+ * <minimum_match_count><maximum_match_count>
+ */
+
+ regexp++;
+ get_match_counts(&nmust_match, &nextra_matches_allowed,
+ regexp + 1);
+ while ((*stringp == *regexp) && (nmust_match > 0)) {
+ nmust_match--;
+ stringp++;
+ }
+ if (nmust_match > 0) {
+ return ((char *)0);
+ } else if (nextra_matches_allowed == UNLIMITED) {
+ repeat_startp = stringp;
+ while (*stringp == *regexp) {
+ stringp++;
+ }
+ regexp += 3;
+ return (test_repeated_ascii_char(repeat_startp, stringp,
+ regexp));
+ } else {
+ repeat_startp = stringp;
+ while ((*stringp == *regexp) &&
+ (nextra_matches_allowed > 0)) {
+ nextra_matches_allowed--;
+ stringp++;
+ }
+ regexp += 3;
+ return (test_repeated_ascii_char(repeat_startp, stringp,
+ regexp));
+ }
+ /* end case ASCII_CHAR|COUNT */
+
+ case MULTIBYTE_CHAR|ZERO_OR_MORE: /* char* */
+
+ /* encoded as <MULTIBYTE_CHAR|ZERO_OR_MORE><multibyte_char> */
+
+ regexp++;
+ regex_char_size = get_wchar(&regex_wchar, regexp);
+ repeat_startp = stringp;
+ string_char_size = get_wchar(&string_wchar, stringp);
+ while ((string_char_size > 0) &&
+ (string_wchar == regex_wchar)) {
+ stringp += string_char_size;
+ string_char_size = get_wchar(&string_wchar, stringp);
+ }
+ regexp += regex_char_size;
+ return (test_repeated_multibyte_char(repeat_startp, stringp,
+ regexp));
+
+ /* end case MULTIBYTE_CHAR|ZERO_OR_MORE */
+
+ case MULTIBYTE_CHAR|ONE_OR_MORE: /* char+ */
+
+ /* encoded as <MULTIBYTE_CHAR|ONE_OR_MORE><multibyte_char> */
+
+ regexp++;
+ regex_char_size = get_wchar(&regex_wchar, regexp);
+ string_char_size = get_wchar(&string_wchar, stringp);
+ if ((string_char_size <= 0) || (string_wchar != regex_wchar)) {
+ return ((char *)0);
+ } else {
+ stringp += string_char_size;
+ repeat_startp = stringp;
+ string_char_size = get_wchar(&string_wchar, stringp);
+ while ((string_char_size > 0) &&
+ (string_wchar == regex_wchar)) {
+ stringp += string_char_size;
+ string_char_size = get_wchar(&string_wchar, stringp);
+ }
+ regexp += regex_char_size;
+ return (test_repeated_multibyte_char(repeat_startp, stringp,
+ regexp));
+ }
+ /* end case MULTIBYTE_CHAR|ONE_OR_MORE */
+
+ case MULTIBYTE_CHAR|COUNT: /* char{min_count,max_count} */
+
+ /*
+ * encoded as <MULTIBYTE_CHAR|COUNT><multibyte_char>\
+ * <minimum_match_count><maximum_match_count>
+ */
+
+ regexp++;
+ regex_char_size = get_wchar(&regex_wchar, regexp);
+ get_match_counts(&nmust_match, &nextra_matches_allowed,
+ regexp + regex_char_size);
+ string_char_size = get_wchar(&string_wchar, stringp);
+ while ((string_char_size > 0) &&
+ (string_wchar == regex_wchar) &&
+ (nmust_match > 0)) {
+
+ nmust_match--;
+ stringp += string_char_size;
+ string_char_size = get_wchar(&string_wchar, stringp);
+ }
+ if (nmust_match > 0) {
+ return ((char *)0);
+ } else if (nextra_matches_allowed == UNLIMITED) {
+ repeat_startp = stringp;
+ while ((string_char_size > 0) &&
+ (string_wchar == regex_wchar)) {
+ stringp += string_char_size;
+ string_char_size = get_wchar(&string_wchar, stringp);
+ }
+ regexp += regex_char_size + 2;
+ return (test_repeated_multibyte_char(repeat_startp, stringp,
+ regexp));
+ } else {
+ repeat_startp = stringp;
+ while ((string_char_size > 0) &&
+ (string_wchar == regex_wchar) &&
+ (nextra_matches_allowed > 0)) {
+ nextra_matches_allowed--;
+ stringp += string_char_size;
+ string_char_size = get_wchar(&string_wchar, stringp);
+ }
+ regexp += regex_char_size + 2;
+ return (test_repeated_multibyte_char(repeat_startp, stringp,
+ regexp));
+ }
+ /* end case MULTIBYTE_CHAR|COUNT */
+
+ case ANY_CHAR|ZERO_OR_MORE: /* .* */
+
+ /* encoded as <ANY_CHAR|ZERO_OR_MORE> */
+
+ repeat_startp = stringp;
+ if (!multibyte) {
+ while (*stringp != '\0') {
+ stringp++;
+ }
+ regexp++;
+ return (test_repeated_ascii_char(repeat_startp, stringp,
+ regexp));
+ } else {
+ string_char_size = get_wchar(&string_wchar, stringp);
+ while (string_char_size > 0) {
+ stringp += string_char_size;
+ string_char_size = get_wchar(&string_wchar, stringp);
+ }
+ regexp++;
+ return (test_repeated_multibyte_char(repeat_startp, stringp,
+ regexp));
+ }
+ /* end case <ANY_CHAR|ZERO_OR_MORE> */
+
+ case ANY_CHAR|ONE_OR_MORE: /* .+ */
+
+ /* encoded as <ANY_CHAR|ONE_OR_MORE> */
+
+ if (!multibyte) {
+ if (*stringp == '\0') {
+ return ((char *)0);
+ } else {
+ stringp++;
+ repeat_startp = stringp;
+ while (*stringp != '\0') {
+ stringp++;
+ }
+ regexp++;
+ return (test_repeated_ascii_char(repeat_startp, stringp,
+ regexp));
+ }
+ } else {
+ string_char_size = get_wchar(&string_wchar, stringp);
+ if (string_char_size <= 0) {
+ return ((char *)0);
+ } else {
+ stringp += string_char_size;
+ repeat_startp = stringp;
+ string_char_size = get_wchar(&string_wchar, stringp);
+ while (string_char_size > 0) {
+ stringp += string_char_size;
+ string_char_size =
+ get_wchar(&string_wchar, stringp);
+ }
+ regexp++;
+ return (test_repeated_multibyte_char(repeat_startp,
+ stringp, regexp));
+ }
+ }
+ /* end case <ANY_CHAR|ONE_OR_MORE> */
+
+ case ANY_CHAR|COUNT: /* .{min_count,max_count} */
+
+ /*
+ * encoded as <ANY_CHAR|COUNT>\
+ * <minimum_match_count><maximum_match_count>
+ */
+
+ get_match_counts(&nmust_match, &nextra_matches_allowed,
+ regexp + 1);
+ if (!multibyte) {
+ while ((*stringp != '\0') && (nmust_match > 0)) {
+ nmust_match--;
+ stringp++;
+ }
+ if (nmust_match > 0) {
+ return ((char *)0);
+ } else if (nextra_matches_allowed == UNLIMITED) {
+ repeat_startp = stringp;
+ while (*stringp != '\0') {
+ stringp++;
+ }
+ regexp += 3;
+ return (test_repeated_ascii_char(repeat_startp, stringp,
+ regexp));
+ } else {
+ repeat_startp = stringp;
+ while ((*stringp != '\0') &&
+ (nextra_matches_allowed > 0)) {
+ nextra_matches_allowed--;
+ stringp++;
+ }
+ regexp += 3;
+ return (test_repeated_ascii_char(repeat_startp, stringp,
+ regexp));
+ }
+ } else { /* multibyte character */
+
+ string_char_size = get_wchar(&string_wchar, stringp);
+ while ((string_char_size > 0) && (nmust_match > 0)) {
+ nmust_match--;
+ stringp += string_char_size;
+ string_char_size = get_wchar(&string_wchar, stringp);
+ }
+ if (nmust_match > 0) {
+ return ((char *)0);
+ } else if (nextra_matches_allowed == UNLIMITED) {
+ repeat_startp = stringp;
+ while (string_char_size > 0) {
+ stringp += string_char_size;
+ string_char_size =
+ get_wchar(&string_wchar, stringp);
+ }
+ regexp += 3;
+ return (test_repeated_multibyte_char(repeat_startp,
+ stringp, regexp));
+ } else {
+ repeat_startp = stringp;
+ while ((string_char_size > 0) &&
+ (nextra_matches_allowed > 0)) {
+ nextra_matches_allowed--;
+ stringp += string_char_size;
+ string_char_size =
+ get_wchar(&string_wchar, stringp);
+ }
+ regexp += 3;
+ return (test_repeated_multibyte_char(repeat_startp,
+ stringp, regexp));
+ }
+ } /* end case ANY_CHAR|COUNT */
+
+ case IN_ASCII_CHAR_CLASS|ZERO_OR_MORE: /* [.....]* */
+ case NOT_IN_ASCII_CHAR_CLASS|ZERO_OR_MORE:
+
+ /*
+ * encoded as <IN_ASCII_CHAR_CLASS|ZERO_OR_MORE>\
+ * <class_length><class ...>
+ * or <NOT_IN_ASCII_CHAR_CLASS|ZERO_OR_MORE>\
+ * <class_length><class ...>
+ *
+ * NOTE: <class_length> includes the <class_length> byte
+ */
+
+ if ((int)*regexp == (int)(IN_ASCII_CHAR_CLASS|ZERO_OR_MORE)) {
+ test_condition = IN_CLASS;
+ } else {
+ test_condition = NOT_IN_CLASS;
+ }
+ regexp++; /* point to the <class_length> byte */
+
+ repeat_startp = stringp;
+ while ((*stringp != '\0') &&
+ (test_char_against_ascii_class(*stringp, regexp,
+ test_condition) == CONDITION_TRUE)) {
+ stringp++;
+ }
+ regexp += (int)*regexp; /* add the class length to regexp */
+ return (test_repeated_ascii_char(repeat_startp, stringp,
+ regexp));
+
+ /* end case IN_ASCII_CHAR_CLASS|ZERO_OR_MORE */
+
+ case IN_ASCII_CHAR_CLASS|ONE_OR_MORE: /* [.....]+ */
+ case NOT_IN_ASCII_CHAR_CLASS|ONE_OR_MORE:
+
+ /*
+ * encoded as <IN_ASCII_CHAR_CLASS|ONE_OR_MORE>\
+ * <class_length><class ...>
+ * or <NOT_IN_ASCII_CHAR_CLASS|ONE_OR_MORE>\
+ * <class_length><class ...>
+ *
+ * NOTE: <class_length> includes the <class_length> byte
+ */
+
+ if ((int)*regexp == (int)(IN_ASCII_CHAR_CLASS|ONE_OR_MORE)) {
+ test_condition = IN_CLASS;
+ } else {
+ test_condition = NOT_IN_CLASS;
+ }
+ regexp++; /* point to the <class_length> byte */
+
+ if ((*stringp == '\0') ||
+ (test_char_against_ascii_class(*stringp, regexp,
+ test_condition) != CONDITION_TRUE)) {
+ return ((char *)0);
+ } else {
+ stringp++;
+ repeat_startp = stringp;
+ while ((*stringp != '\0') &&
+ (test_char_against_ascii_class(*stringp, regexp,
+ test_condition) == CONDITION_TRUE)) {
+ stringp++;
+ }
+ regexp += (int)*regexp; /* add the class length to regexp */
+ return (test_repeated_ascii_char(repeat_startp, stringp,
+ regexp));
+ }
+ /* end case IN_ASCII_CHAR_CLASS|ONE_OR_MORE */
+
+ case IN_ASCII_CHAR_CLASS | COUNT: /* [.....]{max_count,min_count} */
+ case NOT_IN_ASCII_CHAR_CLASS | COUNT:
+
+ /*
+ * endoded as <IN_ASCII_CHAR_CLASS|COUNT><class_length>\
+ * <class ...><minimum_match_count>\
+ * <maximum_match_count>
+ * or <NOT_IN_ASCII_CHAR_CLASS|COUNT><class_length>\
+ * <class ...><minimum_match_count>\
+ * <maximum_match_count>
+ *
+ * NOTE: <class_length> includes the <class_length> byte,
+ * but not the <minimum_match_count> or
+ * <maximum_match_count> bytes
+ */
+
+ if ((int)*regexp == (int)(IN_ASCII_CHAR_CLASS|COUNT)) {
+ test_condition = IN_CLASS;
+ } else {
+ test_condition = NOT_IN_CLASS;
+ }
+ regexp++; /* point to the <class_length> byte */
+
+ get_match_counts(&nmust_match, &nextra_matches_allowed,
+ regexp + (int)*regexp);
+ while ((*stringp != '\0') &&
+ (test_char_against_ascii_class(*stringp, regexp,
+ test_condition) == CONDITION_TRUE) &&
+ (nmust_match > 0)) {
+ nmust_match--;
+ stringp++;
+ }
+ if (nmust_match > 0) {
+ return ((char *)0);
+ } else if (nextra_matches_allowed == UNLIMITED) {
+ repeat_startp = stringp;
+ while ((*stringp != '\0') &&
+ (test_char_against_ascii_class(*stringp, regexp,
+ test_condition) == CONDITION_TRUE)) {
+ stringp++;
+ }
+ regexp += (int)*regexp + 2;
+ return (test_repeated_ascii_char(repeat_startp, stringp,
+ regexp));
+ } else {
+ repeat_startp = stringp;
+ while ((*stringp != '\0') &&
+ (test_char_against_ascii_class(*stringp, regexp,
+ test_condition) == CONDITION_TRUE) &&
+ (nextra_matches_allowed > 0)) {
+ nextra_matches_allowed--;
+ stringp++;
+ }
+ regexp += (int)*regexp + 2;
+ return (test_repeated_ascii_char(repeat_startp, stringp,
+ regexp));
+ }
+ /* end case IN_ASCII_CHAR_CLASS|COUNT */
+
+ case IN_MULTIBYTE_CHAR_CLASS|ZERO_OR_MORE: /* [.....]* */
+ case NOT_IN_MULTIBYTE_CHAR_CLASS|ZERO_OR_MORE:
+
+ /*
+ * encoded as <IN_MULTIBYTE_CHAR_CLASS|ZERO_OR_MORE>\
+ * <class_length><class ...>
+ * or <NOT_IN_MULTIBYTE_CHAR_CLASS|ZERO_OR_MORE>\
+ * <class_length><class ...>
+ *
+ * NOTE: <class_length> includes the <class_length> byte
+ */
+
+ if ((int)*regexp ==
+ (int)(IN_MULTIBYTE_CHAR_CLASS|ZERO_OR_MORE)) {
+ test_condition = IN_CLASS;
+ } else {
+ test_condition = NOT_IN_CLASS;
+ }
+ regexp++; /* point to the <class_length> byte */
+
+ repeat_startp = stringp;
+ string_char_size = get_wchar(&string_wchar, stringp);
+ while ((string_char_size > 0) &&
+ (test_char_against_multibyte_class(string_wchar, regexp,
+ test_condition) == CONDITION_TRUE)) {
+ stringp += string_char_size;
+ string_char_size = get_wchar(&string_wchar, stringp);
+ }
+ regexp += (int)*regexp; /* add the class length to regexp */
+ return (test_repeated_multibyte_char(repeat_startp, stringp,
+ regexp));
+
+ /* end case IN_MULTIBYTE_CHAR_CLASS|ZERO_OR_MORE */
+
+ case IN_MULTIBYTE_CHAR_CLASS|ONE_OR_MORE: /* [.....]+ */
+ case NOT_IN_MULTIBYTE_CHAR_CLASS|ONE_OR_MORE:
+
+ /*
+ * encoded as <IN_MULTIBYTE_CHAR_CLASS|ONE_OR_MORE>\
+ * <class_length><class ...>
+ * or <NOT_IN_MULTIBYTE_CHAR_CLASS|ONE_OR_MORE>\
+ * <class_length><class ...>
+ *
+ * NOTE: <class_length> includes the <class_length> byte
+ */
+
+ if ((int)*regexp ==
+ (int)(IN_MULTIBYTE_CHAR_CLASS|ONE_OR_MORE)) {
+ test_condition = IN_CLASS;
+ } else {
+ test_condition = NOT_IN_CLASS;
+ }
+ regexp++; /* point to the <class_length> byte */
+
+ string_char_size = get_wchar(&string_wchar, stringp);
+ if ((string_char_size <= 0) ||
+ (test_char_against_multibyte_class(string_wchar, regexp,
+ test_condition) != CONDITION_TRUE)) {
+ return ((char *)0);
+ } else {
+ stringp += string_char_size;
+ repeat_startp = stringp;
+ string_char_size = get_wchar(&string_wchar, stringp);
+ while ((string_char_size > 0) &&
+ (test_char_against_multibyte_class(string_wchar,
+ regexp, test_condition) == CONDITION_TRUE)) {
+ stringp += string_char_size;
+ string_char_size = get_wchar(&string_wchar, stringp);
+ }
+ regexp += (int)*regexp; /* add the class length to regexp */
+ return (test_repeated_multibyte_char(repeat_startp, stringp,
+ regexp));
+ }
+ /* end case IN_MULTIBYTE_CHAR_CLASS|ONE_OR_MORE */
+
+ case IN_MULTIBYTE_CHAR_CLASS|COUNT: /* [...]{min_count,max_count} */
+ case NOT_IN_MULTIBYTE_CHAR_CLASS|COUNT:
+
+ /*
+ * encoded as <IN_MULTIBYTE_CHAR_CLASS|COUNT>\
+ * <class_length><class ...><min_count><max_count>
+ * or <NOT_IN_MULTIBYTE_CHAR_CLASS|COUNT>\
+ * <class_length><class ...><min_count><max_count>
+ *
+ * NOTE: <class_length> includes the <class_length> byte
+ * but not the <minimum_match_count> or
+ * <maximum_match_count> bytes
+ */
+
+ if ((int)*regexp == (int)(IN_MULTIBYTE_CHAR_CLASS|COUNT)) {
+ test_condition = IN_CLASS;
+ } else {
+ test_condition = NOT_IN_CLASS;
+ }
+ regexp++; /* point to the <class_length> byte */
+
+ get_match_counts(&nmust_match, &nextra_matches_allowed,
+ regexp + (int)*regexp);
+ string_char_size = get_wchar(&string_wchar, stringp);
+ while ((string_char_size > 0) &&
+ (test_char_against_multibyte_class(string_wchar, regexp,
+ test_condition) == CONDITION_TRUE) &&
+ (nmust_match > 0)) {
+ nmust_match--;
+ stringp += string_char_size;
+ string_char_size = get_wchar(&string_wchar, stringp);
+ }
+ if (nmust_match > 0) {
+ return ((char *)0);
+ } else if (nextra_matches_allowed == UNLIMITED) {
+ repeat_startp = stringp;
+ while ((string_char_size > 0) &&
+ (test_char_against_multibyte_class(string_wchar,
+ regexp, test_condition) == CONDITION_TRUE)) {
+ stringp += string_char_size;
+ string_char_size = get_wchar(&string_wchar, stringp);
+ }
+ regexp += (int)*regexp + 2;
+ return (test_repeated_multibyte_char(repeat_startp, stringp,
+ regexp));
+ } else {
+ repeat_startp = stringp;
+ while ((string_char_size > 0) &&
+ (test_char_against_multibyte_class(string_wchar,
+ regexp, test_condition) == CONDITION_TRUE) &&
+ (nextra_matches_allowed > 0)) {
+ nextra_matches_allowed--;
+ stringp += string_char_size;
+ string_char_size = get_wchar(&string_wchar, stringp);
+ }
+ regexp += (int)*regexp + 2;
+ return (test_repeated_multibyte_char(repeat_startp, stringp,
+ regexp));
+ }
+ /* end case IN_MULTIBYTE_CHAR_CLASS|COUNT */
+
+ case IN_OLD_ASCII_CHAR_CLASS|ZERO_OR_MORE: /* [.....]* */
+ case NOT_IN_OLD_ASCII_CHAR_CLASS|ZERO_OR_MORE:
+
+ /*
+ * encoded as <IN_OLD_ASCII_CHAR_CLASS|ZERO_OR_MORE>\
+ * <class_length><class ...>
+ * or <NOT_IN_OLD_ASCII_CHAR_CLASS|ZERO_OR_MORE>\
+ * <class_length><class ...>
+ *
+ * NOTE: <class_length> includes the <class_length> byte
+ */
+
+ if ((int)*regexp ==
+ (int)(IN_OLD_ASCII_CHAR_CLASS|ZERO_OR_MORE)) {
+ test_condition = IN_CLASS;
+ } else {
+ test_condition = NOT_IN_CLASS;
+ }
+ regexp++; /* point to the <class_length> byte */
+
+ repeat_startp = stringp;
+ while ((*stringp != '\0') &&
+ (test_char_against_old_ascii_class(*stringp, regexp,
+ test_condition) == CONDITION_TRUE)) {
+ stringp++;
+ }
+ regexp += (int)*regexp; /* add the class length to regexp */
+ return (test_repeated_ascii_char(repeat_startp, stringp,
+ regexp));
+
+ /* end case IN_OLD_ASCII_CHAR_CLASS|ZERO_OR_MORE */
+
+ case IN_OLD_ASCII_CHAR_CLASS|ONE_OR_MORE: /* [.....]+ */
+ case NOT_IN_OLD_ASCII_CHAR_CLASS|ONE_OR_MORE:
+
+ /*
+ * encoded as <IN_OLD_ASCII_CHAR_CLASS|ONE_OR_MORE>\
+ * <class_length><class ...>
+ * or <NOT_IN_OLD_ASCII_CHAR_CLASS|ONE_OR_MORE>\
+ * <class_length><class ...>
+ *
+ * NOTE: <class length> includes the <class_length> byte
+ */
+
+ if ((int)*regexp ==
+ (int)(IN_OLD_ASCII_CHAR_CLASS|ONE_OR_MORE)) {
+ test_condition = IN_CLASS;
+ } else {
+ test_condition = NOT_IN_CLASS;
+ }
+ regexp++; /* point to the <class_length> byte */
+
+ if ((*stringp == '\0') ||
+ (test_char_against_old_ascii_class(*stringp, regexp,
+ test_condition) != CONDITION_TRUE)) {
+ return ((char *)0);
+ } else {
+ stringp++;
+ repeat_startp = stringp;
+ while ((*stringp != '\0') &&
+ (test_char_against_old_ascii_class(*stringp, regexp,
+ test_condition) == CONDITION_TRUE)) {
+ stringp++;
+ }
+ regexp += (int)*regexp; /* add the class length to regexp */
+ return (test_repeated_ascii_char(repeat_startp, stringp,
+ regexp));
+ }
+ /* end case IN_OLD_ASCII_CHAR_CLASS | ONE_OR_MORE */
+
+ case IN_OLD_ASCII_CHAR_CLASS|COUNT: /* [...]{min_count,max_count} */
+ case NOT_IN_OLD_ASCII_CHAR_CLASS|COUNT:
+
+ /*
+ * encoded as <IN_OLD_ASCII_CHAR_CLASS|COUNT><class_length>\
+ * <class ...><minimum_match_count>\
+ * <maximum_match_count>
+ * or <NOT_IN_OLD_ASCII_CHAR_CLASS|COUNT>\
+ * <class_length><class ...><minimum_match_count>\
+ * <maximum_match_count>
+ *
+ * NOTE: <class_length> includes the <class_length> byte
+ * but not the <minimum_match_count> or
+ * <maximum_match_count> bytes
+ */
+
+ if ((int)*regexp == (int)(IN_OLD_ASCII_CHAR_CLASS|COUNT)) {
+ test_condition = IN_CLASS;
+ } else {
+ test_condition = NOT_IN_CLASS;
+ }
+ regexp++; /* point to the <class_length> byte */
+
+ get_match_counts(&nmust_match, &nextra_matches_allowed,
+ regexp + (int)*regexp);
+ while ((*stringp != '\0') &&
+ (test_char_against_old_ascii_class(*stringp, regexp,
+ test_condition) == CONDITION_TRUE) &&
+ (nmust_match > 0)) {
+ nmust_match--;
+ stringp++;
+ }
+ if (nmust_match > 0) {
+ return ((char *)0);
+ } else if (nextra_matches_allowed == UNLIMITED) {
+ repeat_startp = stringp;
+ while ((*stringp != '\0') &&
+ (test_char_against_old_ascii_class(*stringp, regexp,
+ test_condition) == CONDITION_TRUE)) {
+ stringp++;
+ }
+ regexp += (int)*regexp + 2;
+ return (test_repeated_ascii_char(repeat_startp, stringp,
+ regexp));
+ } else {
+ repeat_startp = stringp;
+ while ((*stringp != '\0') &&
+ (test_char_against_old_ascii_class(*stringp, regexp,
+ test_condition) == CONDITION_TRUE) &&
+ (nextra_matches_allowed > 0)) {
+ nextra_matches_allowed--;
+ stringp++;
+ }
+ regexp += (int)*regexp + 2;
+ return (test_repeated_ascii_char(repeat_startp, stringp,
+ regexp));
+ }
+ /* end case IN_OLD_ASCII_CHAR_CLASS|COUNT */
+
+ case ZERO_OR_MORE_GROUP: /* (.....)* */
+ case ZERO_OR_MORE_GROUP|ADD_256_TO_GROUP_LENGTH:
+ case ZERO_OR_MORE_GROUP|ADD_512_TO_GROUP_LENGTH:
+ case ZERO_OR_MORE_GROUP|ADD_768_TO_GROUP_LENGTH:
+
+ /*
+ * encoded as <ZERO_OR_MORE_GROUP|ADDED_LENGTH_BITS>\
+ * <group_length><compiled_regex...>\
+ * <END_GROUP|ZERO_OR_MORE><groupn>
+ *
+ * NOTE:
+ *
+ * group_length + (256 * ADDED_LENGTH_BITS) ==
+ * length_of(<compiled_regex...><END_GROUP|ZERO_OR_MORE>\
+ * <groupn>)
+ *
+ */
+
+ group_length =
+ (((unsigned int)*regexp & ADDED_LENGTH_BITS) <<
+ TIMES_256_SHIFT);
+ regexp++;
+ group_length += (unsigned int)*regexp;
+ regexp++;
+ repeat_startp = stringp;
+ test_stringp = test_string(stringp, regexp);
+ while (test_stringp != (char *)0) {
+ if (push_stringp(stringp) == (char *)0)
+ return ((char *)0);
+ stringp = test_stringp;
+ test_stringp = test_string(stringp, regexp);
+ }
+ regexp += group_length;
+ return (test_repeated_group(repeat_startp, stringp, regexp));
+
+ /* end case ZERO_OR_MORE_GROUP */
+
+ case END_GROUP|ZERO_OR_MORE: /* (.....)* */
+
+ /* encoded as <END_GROUP|ZERO_OR_MORE> */
+
+ /* return from recursive call to test_string() */
+
+ return ((char *)stringp);
+
+ /* end case END_GROUP|ZERO_OR_MORE */
+
+ case ONE_OR_MORE_GROUP: /* (.....)+ */
+ case ONE_OR_MORE_GROUP|ADD_256_TO_GROUP_LENGTH:
+ case ONE_OR_MORE_GROUP|ADD_512_TO_GROUP_LENGTH:
+ case ONE_OR_MORE_GROUP|ADD_768_TO_GROUP_LENGTH:
+
+ /*
+ * encoded as <ONE_OR_MORE_GROUP|ADDED_LENGTH_BITS>\
+ * <group_length><compiled_regex...>\
+ * <END_GROUP|ONE_OR_MORE><groupn>
+ *
+ * NOTE:
+ *
+ * group_length + (256 * ADDED_LENGTH_BITS) ==
+ * length_of(<compiled_regex...><END_GROUP|ONE_OR_MORE>\
+ * <groupn>)
+ */
+
+ group_length =
+ (((unsigned int)*regexp & ADDED_LENGTH_BITS) <<
+ TIMES_256_SHIFT);
+ regexp++;
+ group_length += (unsigned int)*regexp;
+ regexp++;
+ stringp = test_string(stringp, regexp);
+ if (stringp == (char *)0)
+ return ((char *)0);
+ repeat_startp = stringp;
+ test_stringp = test_string(stringp, regexp);
+ while (test_stringp != (char *)0) {
+ if (push_stringp(stringp) == (char *)0)
+ return ((char *)0);
+ stringp = test_stringp;
+ test_stringp = test_string(stringp, regexp);
+ }
+ regexp += group_length;
+ return (test_repeated_group(repeat_startp, stringp, regexp));
+
+ /* end case ONE_OR_MORE_GROUP */
+
+ case END_GROUP|ONE_OR_MORE: /* (.....)+ */
+
+ /* encoded as <END_GROUP|ONE_OR_MORE><groupn> */
+
+ /* return from recursive call to test_string() */
+
+ return ((char *)stringp);
+
+ /* end case END_GROUP|ONE_OR_MORE */
+
+ case COUNTED_GROUP: /* (.....){max_count,min_count} */
+ case COUNTED_GROUP|ADD_256_TO_GROUP_LENGTH:
+ case COUNTED_GROUP|ADD_512_TO_GROUP_LENGTH:
+ case COUNTED_GROUP|ADD_768_TO_GROUP_LENGTH:
+
+ /*
+ * encoded as <COUNTED_GROUP|ADDED_LENGTH_BITS><group_length>\
+ * <compiled_regex...>\<END_GROUP|COUNT><groupn>\
+ * <minimum_match_count><maximum_match_count>
+ *
+ * NOTE:
+ *
+ * group_length + (256 * ADDED_LENGTH_BITS) ==
+ * length_of(<compiled_regex...><END_GROUP|COUNT><groupn>)
+ *
+ * but does not include the <minimum_match_count> or
+ * <maximum_match_count> bytes
+ */
+
+ group_length =
+ (((unsigned int)*regexp & ADDED_LENGTH_BITS) <<
+ TIMES_256_SHIFT);
+ regexp++;
+ group_length += (unsigned int)*regexp;
+ regexp++;
+ get_match_counts(&nmust_match, &nextra_matches_allowed,
+ regexp + group_length);
+ test_stringp = test_string(stringp, regexp);
+ while ((test_stringp != (char *)0) && (nmust_match > 0)) {
+ stringp = test_stringp;
+ nmust_match--;
+ test_stringp = test_string(stringp, regexp);
+ }
+ if (nmust_match > 0) {
+ return ((char *)0);
+ } else if (nextra_matches_allowed == UNLIMITED) {
+ repeat_startp = stringp;
+ while (test_stringp != (char *)0) {
+ if (push_stringp(stringp) == (char *)0)
+ return ((char *)0);
+ stringp = test_stringp;
+ test_stringp = test_string(stringp, regexp);
+ }
+ regexp += group_length + 2;
+ return (test_repeated_group(repeat_startp, stringp,
+ regexp));
+ } else {
+ repeat_startp = stringp;
+ while ((test_stringp != (char *)0) &&
+ (nextra_matches_allowed > 0)) {
+ nextra_matches_allowed--;
+ if (push_stringp(stringp) == (char *)0)
+ return ((char *)0);
+ stringp = test_stringp;
+ test_stringp = test_string(stringp, regexp);
+ }
+ regexp += group_length + 2;
+ return (test_repeated_group(repeat_startp, stringp,
+ regexp));
+ }
+ /* end case COUNTED_GROUP */
+
+ case END_GROUP|COUNT: /* (.....){max_count,min_count} */
+
+ /* encoded as <END_GROUP|COUNT> */
+
+ /* return from recursive call to test_string() */
+
+ return (stringp);
+
+ /* end case END_GROUP|COUNT */
+
+ case END_OF_STRING_MARK:
+
+ /* encoded as <END_OF_STRING_MARK><END_REGEX> */
+
+ if (*stringp == '\0') {
+ regexp++;
+ } else {
+ return ((char *)0);
+ }
+ break; /* end case END_OF_STRING_MARK */
+
+ case END_REGEX: /* end of the compiled regular expression */
+
+ /* encoded as <END_REGEX> */
+
+ return (stringp);
+
+ /* end case END_REGEX */
+
+ default:
+
+ return ((char *)0);
+
+ } /* end switch (*regexp) */
+
+ } /* end for (;;) */
+
+} /* test_string() */