diff options
author | stevel@tonic-gate <none@none> | 2005-06-14 00:00:00 -0700 |
---|---|---|
committer | stevel@tonic-gate <none@none> | 2005-06-14 00:00:00 -0700 |
commit | 7c478bd95313f5f23a4c958a745db2134aa03244 (patch) | |
tree | c871e58545497667cbb4b0a4f2daf204743e1fe7 /usr/src/lib/libc/port/regex/regcmp.c | |
download | illumos-joyent-7c478bd95313f5f23a4c958a745db2134aa03244.tar.gz |
OpenSolaris Launch
Diffstat (limited to 'usr/src/lib/libc/port/regex/regcmp.c')
-rw-r--r-- | usr/src/lib/libc/port/regex/regcmp.c | 1024 |
1 files changed, 1024 insertions, 0 deletions
diff --git a/usr/src/lib/libc/port/regex/regcmp.c b/usr/src/lib/libc/port/regex/regcmp.c new file mode 100644 index 0000000000..4533c207df --- /dev/null +++ b/usr/src/lib/libc/port/regex/regcmp.c @@ -0,0 +1,1024 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2004 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ +/* All Rights Reserved */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +/* + * IMPORTANT NOTE: + * + * regcmp() WORKS **ONLY** WITH THE ASCII AND THE Solaris EUC CHARACTER SETS. + * IT IS **NOT** CHARACTER SET INDEPENDENT. + * + */ + +#pragma weak regcmp = _regcmp + +#include "synonyms.h" +#include "mtlib.h" +#include <limits.h> +#include <stdarg.h> +#include <stdlib.h> +#include <thread.h> +#include <wctype.h> +#include <widec.h> +#include <string.h> +#include "tsd.h" + + +/* CONSTANTS SHARED WITH regex() */ + +#include "regex.h" + +/* PRIVATE CONSTANTS */ + +#define BACKSLASH '\\' +#define CIRCUMFLEX '^' +#define COMMA ',' +#define DASH '-' +#define DOLLAR_SIGN '$' +#define DOT '.' +#define LEFT_CURLY_BRACE '{' +#define LEFT_PAREN '(' +#define LEFT_SQUARE_BRACKET '[' +#define PLUS '+' +#define RIGHT_CURLY_BRACE '}' +#define RIGHT_PAREN ')' +#define RIGHT_SQUARE_BRACKET ']' +#define SINGLE_BYTE_MASK 0xff +#define STRINGP_STACK_SIZE 50 +#define STAR '*' + +/* PRIVATE GLOBAL VARIABLES */ + +static char *compilep_stack[STRINGP_STACK_SIZE]; +static char **compilep_stackp; +static mutex_t regcmp_lock = DEFAULTMUTEX; + +/* DECLARATIONS OF PRIVATE FUNCTIONS */ + +static int add_char(char *compilep, wchar_t wchar); +static int add_single_char_expr(char *compilep, wchar_t wchar); + +#define ERROR_EXIT(mutex_lockp, arg_listp, compile_startp) \ +\ + va_end(arg_listp); \ + lmutex_unlock(mutex_lockp); \ + if ((compile_startp) != (char *)0) \ + free((void *)compile_startp); \ + return ((char *)0) + +static int get_count(int *countp, const char *regexp); +static int get_digit(const char *regexp); +static int get_wchar(wchar_t *wchar, const char *regexp); +static char *pop_compilep(void); +static char *push_compilep(char *compilep); +static boolean_t valid_range(wchar_t lower_char, wchar_t upper_char); + + +/* DEFINITIONS OF PUBLIC VARIABLES */ + +int __i_size; + +/* + * define thread-specific storage for __i_size + * + */ +int * +___i_size(void) +{ + if (_thr_main()) + return (&__i_size); + return ((int *)tsdalloc(_T_REGCMP_ISIZE, sizeof (int), NULL)); +} + +#define __i_size (*(___i_size())) + +/* DEFINITION OF regcmp() */ + +extern char * +regcmp(const char *regexp, ...) +{ + va_list arg_listp; + size_t arg_strlen; + boolean_t can_repeat; + int char_size; + unsigned int class_length; + char *compilep; + char *compile_startp = (char *)0; + int count_length; + wchar_t current_char; + int expr_length; + int groupn; + unsigned int group_length; + unsigned int high_bits; + boolean_t dash_indicates_range; + unsigned int low_bits; + int max_count; + int min_count; + const char *next_argp; + wchar_t first_char_in_range; + char *regex_typep; + int return_arg_number; + int substringn; + + if (___i_size() == (int *)0) + return ((char *)0); + + /* + * When compiling a regular expression, regcmp() generates at most + * two extra single-byte characters for each character in the + * expression, so allocating three times the number of bytes in all + * the strings that comprise the regular expression will ensure that + * regcmp() won't overwrite the end of the allocated block when + * compiling the expression. + */ + + va_start(arg_listp, regexp); + next_argp = regexp; + arg_strlen = 0; + while (next_argp != (char *)0) { + arg_strlen += strlen(next_argp); + next_argp = va_arg(arg_listp, /* const */ char *); + } + va_end(arg_listp); + + if (arg_strlen == 0) + return ((char *)0); + compile_startp = (char *)malloc(3 * arg_strlen); + if (compile_startp == (char *)0) + return ((char *)0); + + lmutex_lock(®cmp_lock); + __i_size = 0; + compilep = compile_startp; + compilep_stackp = &compilep_stack[STRINGP_STACK_SIZE]; + + /* GET THE FIRST CHARACTER IN THE REGULAR EXPRESSION */ + va_start(arg_listp, regexp); + next_argp = va_arg(arg_listp, /* const */ char *); + char_size = get_wchar(¤t_char, regexp); + if (char_size < 0) { + ERROR_EXIT(®cmp_lock, arg_listp, compile_startp); + } else if (char_size > 0) { + regexp += char_size; + } else /* (char_size == 0 ) */ { + regexp = next_argp; + next_argp = va_arg(arg_listp, /* const */ char *); + char_size = get_wchar(¤t_char, regexp); + if (char_size <= 0) { + ERROR_EXIT(®cmp_lock, arg_listp, compile_startp); + } else { + regexp += char_size; + } + } + + /* FIND OUT IF THE EXPRESSION MUST START AT THE START OF A STRING */ + + if (current_char == CIRCUMFLEX) { + char_size = get_wchar(¤t_char, regexp); + if (char_size < 0) { + ERROR_EXIT(®cmp_lock, arg_listp, compile_startp); + } else if (char_size > 0) { + regexp += char_size; + *compilep = (unsigned char)START_OF_STRING_MARK; + compilep++; + } else if /* (char_size == 0) && */ (next_argp != (char *)0) { + regexp = next_argp; + next_argp = va_arg(arg_listp, /* const */ char *); + char_size = get_wchar(¤t_char, regexp); + if (char_size <= 0) { + ERROR_EXIT(®cmp_lock, arg_listp, + compile_startp); + } else { + regexp += char_size; + } + *compilep = (unsigned char)START_OF_STRING_MARK; + compilep++; + } else { + /* ((char_size==0) && (next_argp==(char *)0)) */ + /* + * the regular expression is "^" + */ + *compilep = (unsigned char)START_OF_STRING_MARK; + compilep++; + *compilep = (unsigned char)END_REGEX; + compilep++; + *compilep = '\0'; + compilep++; + __i_size = (int)(compilep - compile_startp); + va_end(arg_listp); + lmutex_unlock(®cmp_lock); + return (compile_startp); + } + } + + /* COMPILE THE REGULAR EXPRESSION */ + + groupn = 0; + substringn = 0; + can_repeat = B_FALSE; + for (;;) { + + /* + * At the end of each iteration get the next character + * from the regular expression and increment regexp to + * point to the following character. Exit when all + * the characters in all the strings in the argument + * list have been read. + */ + + switch (current_char) { + + /* + * No fall-through. Each case ends with either + * a break or an error exit. Each case starts + * with compilep addressing the next location to + * be written in the compiled regular expression, + * and with regexp addressing the next character + * to be read from the regular expression being + * compiled. Each case that doesn't return + * increments regexp to address the next character + * to be read from the regular expression and + * increments compilep to address the next + * location to be written in the compiled + * regular expression. + * + * NOTE: The comments for each case give the meaning + * of the regular expression compiled by the case + * and the character string written to the compiled + * regular expression by the case. Each single + * character + * written to the compiled regular expression is + * shown enclosed in angle brackets (<>). Each + * compiled regular expression begins with a marker + * character which is shown as a named constant + * (e.g. <ASCII_CHAR>). Character constants are + * shown enclosed in single quotes (e.g. <'$'>). + * All other single characters written to the + * compiled regular expression are shown as lower + * case variable names (e.g. <ascii_char> or + * <multibyte_char>). Multicharacter + * strings written to the compiled regular expression + * are shown as variable names followed by elipses + * (e.g. <regex...>). + */ + + case DOLLAR_SIGN: + /* end of string marker or simple dollar sign */ + /* compiles to <END_OF_STRING_MARK> or */ + /* <ASCII_CHAR><'$'> */ + + char_size = get_wchar(¤t_char, regexp); + if ((char_size == 0) && (next_argp == (char *)0)) { + can_repeat = B_FALSE; + *compilep = (unsigned char)END_OF_STRING_MARK; + compilep++; + } else { + can_repeat = B_TRUE; + *compilep = (unsigned char)ASCII_CHAR; + regex_typep = compilep; + compilep++; + *compilep = DOLLAR_SIGN; + compilep++; + } + break; /* end case DOLLAR_SIGN */ + + case DOT: /* any character */ + + /* compiles to <ANY_CHAR> */ + + can_repeat = B_TRUE; + *compilep = (unsigned char)ANY_CHAR; + regex_typep = compilep; + compilep++; + + break; /* end case DOT */ + + case BACKSLASH: /* escaped character */ + + /* + * compiles to <ASCII_CHAR><ascii_char> or + * <MULTIBYTE_CHAR><multibyte_char> + */ + + char_size = get_wchar(¤t_char, regexp); + if (char_size <= 0) { + ERROR_EXIT(®cmp_lock, arg_listp, + compile_startp); + } else { + regexp += char_size; + can_repeat = B_TRUE; + expr_length = add_single_char_expr( + compilep, current_char); + regex_typep = compilep; + compilep += expr_length; + } + break; /* end case '\\' */ + + case LEFT_SQUARE_BRACKET: + /* start of a character class expression */ + + /* + * [^...c...] compiles to + * <NOT_IN_CLASS><class_length><...c...> + * [^...a-z...] compiles to + * <NOT_IN_CLASS><class_length><...a<THRU>z...> + * [...c...] compiles to + * <IN_CLASS><class_length><...c...> + * [...a-z...] compiles to + * <IN_CLASS><class_length><...a<THRU>z...> + * + * NOTE: <class_length> includes the + * <class_length> byte + */ + + can_repeat = B_TRUE; + regex_typep = compilep; + + /* DETERMINE THE CLASS TYPE */ + + /* + * NOTE: This algorithm checks the value of the + * "multibyte" + * macro in <euc.h> (included in <widec.h> ) + * to find out if regcmp() + * is compiling the regular expression in a + * multibyte locale. + */ + char_size = get_wchar(¤t_char, regexp); + if (char_size <= 0) { + ERROR_EXIT(®cmp_lock, arg_listp, + compile_startp); + } else if (current_char == CIRCUMFLEX) { + regexp++; + char_size = get_wchar(¤t_char, regexp); + if (char_size <= 0) { + ERROR_EXIT(®cmp_lock, + arg_listp, compile_startp); + } else { + regexp += char_size; + if (!multibyte) { + *compilep = (unsigned char) + NOT_IN_ASCII_CHAR_CLASS; + } else { + *compilep = (unsigned char) + NOT_IN_MULTIBYTE_CHAR_CLASS; + } + /* leave space for <class_length> */ + compilep += 2; + } + } else { + regexp += char_size; + if (!multibyte) { + *compilep = (unsigned char) + IN_ASCII_CHAR_CLASS; + } else { + *compilep = (unsigned char) + IN_MULTIBYTE_CHAR_CLASS; + } + /* leave space for <class_length> */ + compilep += 2; + } + + /* COMPILE THE CLASS */ + /* + * check for a leading right square bracket, + * which is allowed + */ + + if (current_char == RIGHT_SQUARE_BRACKET) { + /* + * the leading RIGHT_SQUARE_BRACKET may + * be part of a character range + * expression like "[]-\]" + */ + dash_indicates_range = B_TRUE; + first_char_in_range = current_char; + char_size = get_wchar(¤t_char, regexp); + if (char_size <= 0) { + ERROR_EXIT(®cmp_lock, + arg_listp, compile_startp); + } else { + regexp += char_size; + *compilep = RIGHT_SQUARE_BRACKET; + compilep++; + } + } else { + /* + * decode the character in the following + * while loop and decide then if it can + * be the first character + * in a character range expression + */ + dash_indicates_range = B_FALSE; + } + + while (current_char != RIGHT_SQUARE_BRACKET) { + if (current_char != DASH) { + /* + * if a DASH follows current_char, + * current_char, the DASH and the + * character that follows the DASH + * may form a character range + * expression + */ + dash_indicates_range = B_TRUE; + first_char_in_range = current_char; + expr_length = add_char( + compilep, current_char); + compilep += expr_length; + + } else if /* (current_char == DASH) && */ + (dash_indicates_range == B_FALSE) { + /* + * current_char is a DASH, but + * either begins the entire + * character class or follows a + * character that's already + * part of a character range + * expression, so it simply + * represents the DASH character + * itself + */ + *compilep = DASH; + compilep ++; + /* + * if another DASH follows this + * one, this DASH is part + * of a character range expression + * like "[--\]" + */ + dash_indicates_range = B_TRUE; + first_char_in_range = current_char; + + } else /* ((current_char == DASH && */ + /* (dash_indicates_range == B_TRUE)) */ { + /* + * the DASH appears after a single + * character that isn't + * already part of a character + * range expression, so it + * and the characters preceding + * and following it can form a + * character range expression + * like "[a-z]" + */ + char_size = get_wchar( + ¤t_char, regexp); + if (char_size <= 0) { + ERROR_EXIT(®cmp_lock, + arg_listp, compile_startp); + + } else if (current_char == + RIGHT_SQUARE_BRACKET) { + /* + * the preceding DASH is + * the last character in the + * class and represents the + * DASH character itself + */ + *compilep = DASH; + compilep++; + + } else if (valid_range( + first_char_in_range, + current_char) == B_FALSE) { + + ERROR_EXIT(®cmp_lock, + arg_listp, compile_startp); + + } else { + /* + * the DASH is part of a + * character range + * expression; encode the + * rest of the expression + */ + regexp += char_size; + *compilep = (unsigned char) + THRU; + compilep++; + expr_length = add_char( + compilep, current_char); + compilep += expr_length; + /* + * if a DASH follows this + * character range + * expression, + * it represents the DASH + * character itself + */ + dash_indicates_range = + B_FALSE; + } + } + + /* GET THE NEXT CHARACTER */ + + char_size = get_wchar(¤t_char, regexp); + if (char_size <= 0) { + ERROR_EXIT(®cmp_lock, + arg_listp, compile_startp); + } else { + regexp += char_size; + } + + } + /* end while (current_char != RIGHT_SQUARE_BRACKET) */ + + /* INSERT THE LENGTH OF THE CLASS INTO THE */ + /* COMPILED EXPRESSION */ + + class_length = (unsigned int) + (compilep - regex_typep - 1); + if ((class_length < 2) || + (class_length > MAX_SINGLE_BYTE_INT)) { + ERROR_EXIT(®cmp_lock, arg_listp, + compile_startp); + } else { + *(regex_typep + 1) = (unsigned char) + class_length; + } + break; /* end case LEFT_SQUARE_BRACKET */ + + case LEFT_PAREN: + + /* + * start of a parenthesized group of regular + * expressions compiles to <'\0'><'\0'>, leaving + * space in the compiled regular expression for + * <group_type|ADDED_LENGTH_BITS><group_length> + */ + + if (push_compilep(compilep) == (char *)0) { + /* + * groups can contain groups, so group + * start pointers + * must be saved and restored in sequence + */ + ERROR_EXIT(®cmp_lock, arg_listp, + compile_startp); + } else { + can_repeat = B_FALSE; + *compilep = '\0'; /* for debugging */ + compilep++; + *compilep = '\0'; /* for debugging */ + compilep++; + } + break; /* end case LEFT_PAREN */ + + case RIGHT_PAREN: + /* end of a marked group of regular expressions */ + + /* + * (<regex>)$0-9 compiles to + * <SAVED_GROUP><substringn><compiled_regex...>\ + * <END_SAVED_GROUP><substringn><return_arg_number> + * (<regex>)* compiles to + * <ZERO_OR_MORE_GROUP|ADDED_LENGTH_BITS> + * <group_length> <compiled_regex...> + * <END_GROUP|ZERO_OR_MORE><groupn> + * (<regex>)+ compiles to + * <ONE_OR_MORE_GROUP|ADDED_LENGTH_BITS> + * <group_length>\ + * <compiled_regex...><END_GROUP|ONE_OR_MORE> + * <groupn> + * (<regex>){...} compiles to + * <COUNTED_GROUP|ADDED_LENGTH_BITS><group_length>\ + * <compiled_regex...><END_GROUP|COUNT><groupn>\ + * <minimum_repeat_count><maximum_repeat_count> + * otherwise (<regex>) compiles to + * <SIMPLE_GROUP><blank><compiled_regex...> + * <END_GROUP><groupn> + * + * NOTE: + * + * group_length + (256 * ADDED_LENGTH_BITS) == + * length_of(<compiled_regex...><END_GROUP|...> + * <groupn>) + * which also == + * length_of(<group_type|ADDED_LENGTH_BITS> + * <group_length>\ <compiled_regex...>) + * groupn no longer seems to be used, but the code + * still computes it to preserve backward + * compatibility + * with earlier versions of regex(). + */ + + /* RETRIEVE THE ADDRESS OF THE START OF THE GROUP */ + + regex_typep = pop_compilep(); + if (regex_typep == (char *)0) { + ERROR_EXIT(®cmp_lock, arg_listp, + compile_startp); + } + char_size = get_wchar(¤t_char, regexp); + if (char_size < 0) { + ERROR_EXIT(®cmp_lock, arg_listp, + compile_startp); + } else if (char_size == 0) { + *regex_typep = SIMPLE_GROUP; + can_repeat = B_TRUE; + *compilep = (unsigned char)END_GROUP; + regex_typep = compilep; + compilep++; + *compilep = (unsigned char)groupn; + groupn++; + compilep++; + } else if (current_char == DOLLAR_SIGN) { + *regex_typep = SAVED_GROUP; + regex_typep++; + *regex_typep = (char)substringn; + can_repeat = B_FALSE; + regexp ++; + return_arg_number = get_digit(regexp); + if ((return_arg_number < 0) || + (substringn >= NSUBSTRINGS)) { + ERROR_EXIT(®cmp_lock, arg_listp, + compile_startp); + } + regexp++; + *compilep = (unsigned char)END_SAVED_GROUP; + compilep++; + *compilep = (unsigned char)substringn; + substringn++; + compilep++; + *compilep = (unsigned char)return_arg_number; + compilep++; + } else { + switch (current_char) { + case STAR: + *regex_typep = ZERO_OR_MORE_GROUP; + break; + case PLUS: + *regex_typep = ONE_OR_MORE_GROUP; + break; + case LEFT_CURLY_BRACE: + *regex_typep = COUNTED_GROUP; + break; + default: + *regex_typep = SIMPLE_GROUP; + } + if (*regex_typep != SIMPLE_GROUP) { + group_length = (unsigned int) + (compilep - regex_typep); + if (group_length >= 1024) { + ERROR_EXIT(®cmp_lock, + arg_listp, compile_startp); + } + high_bits = group_length >> + TIMES_256_SHIFT; + low_bits = group_length & + SINGLE_BYTE_MASK; + *regex_typep = + (unsigned char) + ((unsigned int) + *regex_typep | high_bits); + regex_typep++; + *regex_typep = + (unsigned char)low_bits; + } + can_repeat = B_TRUE; + *compilep = (unsigned char)END_GROUP; + regex_typep = compilep; + compilep++; + *compilep = (unsigned char)groupn; + groupn++; + compilep++; + } + + break; /* end case RIGHT_PAREN */ + + case STAR: /* zero or more repetitions of the */ + /* preceding expression */ + + /* + * <regex...>* compiles to <regex_type|ZERO_OR_MORE>\ + * <compiled_regex...> + * (<regex...>)* compiles to + * <ZERO_OR_MORE_GROUP|ADDED_LENGTH_BITS>\ + * <group_length><compiled_regex...>\ + * <END_GROUP|ZERO_OR_MORE><groupn> + */ + + if (can_repeat == B_FALSE) { + ERROR_EXIT(®cmp_lock, arg_listp, + compile_startp); + } else { + can_repeat = B_FALSE; + *regex_typep = (unsigned char) + ((unsigned int)*regex_typep | ZERO_OR_MORE); + } + break; /* end case '*' */ + + case PLUS: + /* one or more repetitions of the preceding */ + /* expression */ + + /* + * <regex...>+ compiles to <regex_type|ONE_OR_MORE>\ + * <compiled_regex...> (<regex...>)+ compiles to + * <ONE_OR_MORE_GROUP|ADDED_LENGTH_BITS>\ + * <group_length><compiled_regex...>\ + * <END_GROUP|ONE_OR_MORE><groupn> + */ + + if (can_repeat == B_FALSE) { + ERROR_EXIT(®cmp_lock, arg_listp, + compile_startp); + } else { + can_repeat = B_FALSE; + *regex_typep = + (unsigned char)((unsigned int)* + regex_typep | ONE_OR_MORE); + } + break; /* end case '+' */ + + case LEFT_CURLY_BRACE: + + /* + * repeat the preceding regular expression + * at least min_count times + * and at most max_count times + * + * <regex...>{min_count} compiles to + * <regex type|COUNT><compiled_regex...> + * <min_count><min_count> + * + * <regex...>{min_count,} compiles to + * <regex type|COUNT><compiled_regex...> + * <min_count><UNLIMITED> + * + * <regex...>{min_count,max_count} compiles to + * <regex type>|COUNT><compiled_regex...> + * <min_count><max_count> + * + * (<regex...>){min_count,max_count} compiles to + * <COUNTED_GROUP|ADDED_LENGTH_BITS><group_length>\ + * <compiled_regex...><END_GROUP|COUNT><groupn>\ + * <minimum_match_count><maximum_match_count> + */ + + if (can_repeat == B_FALSE) { + ERROR_EXIT(®cmp_lock, arg_listp, + compile_startp); + } + can_repeat = B_FALSE; + *regex_typep = (unsigned char)((unsigned int)* + regex_typep | COUNT); + count_length = get_count(&min_count, regexp); + if (count_length <= 0) { + ERROR_EXIT(®cmp_lock, arg_listp, + compile_startp); + } + regexp += count_length; + + if (*regexp == RIGHT_CURLY_BRACE) { /* {min_count} */ + regexp++; + max_count = min_count; + } else if (*regexp == COMMA) { /* {min_count,..} */ + regexp++; + /* {min_count,} */ + if (*regexp == RIGHT_CURLY_BRACE) { + regexp++; + max_count = UNLIMITED; + } else { /* {min_count,max_count} */ + count_length = get_count( + &max_count, regexp); + if (count_length <= 0) { + ERROR_EXIT(®cmp_lock, + arg_listp, compile_startp); + } + regexp += count_length; + if (*regexp != RIGHT_CURLY_BRACE) { + ERROR_EXIT(®cmp_lock, + arg_listp, compile_startp); + } + regexp++; + } + } else { /* invalid expression */ + ERROR_EXIT(®cmp_lock, arg_listp, + compile_startp); + } + + if ((min_count > MAX_SINGLE_BYTE_INT) || + ((max_count != UNLIMITED) && + (min_count > max_count))) { + ERROR_EXIT(®cmp_lock, arg_listp, + compile_startp); + } else { + *compilep = (unsigned char)min_count; + compilep++; + *compilep = (unsigned char)max_count; + compilep++; + } + break; /* end case LEFT_CURLY_BRACE */ + + default: /* a single non-special character */ + + /* + * compiles to <ASCII_CHAR><ascii_char> or + * <MULTIBYTE_CHAR><multibyte_char> + */ + + can_repeat = B_TRUE; + regex_typep = compilep; + expr_length = add_single_char_expr(compilep, + current_char); + compilep += expr_length; + + } /* end switch (current_char) */ + + /* GET THE NEXT CHARACTER FOR THE WHILE LOOP */ + + char_size = get_wchar(¤t_char, regexp); + if (char_size < 0) { + ERROR_EXIT(®cmp_lock, arg_listp, compile_startp); + } else if (char_size > 0) { + regexp += char_size; + } else if /* (char_size == 0) && */ (next_argp != (char *)0) { + regexp = next_argp; + next_argp = va_arg(arg_listp, /* const */ char *); + char_size = get_wchar(¤t_char, regexp); + if (char_size <= 0) { + ERROR_EXIT(®cmp_lock, arg_listp, + compile_startp); + } else { + regexp += char_size; + } + } else /* ((char_size == 0) && (next_argp == (char *)0)) */ { + if (pop_compilep() != (char *)0) { + /* unmatched parentheses */ + ERROR_EXIT(®cmp_lock, arg_listp, + compile_startp); + } + *compilep = (unsigned char)END_REGEX; + compilep++; + *compilep = '\0'; + compilep++; + __i_size = (int)(compilep - compile_startp); + va_end(arg_listp); + lmutex_unlock(®cmp_lock); + return (compile_startp); + } + } /* end for (;;) */ + +} /* regcmp() */ + + +/* DEFINITIONS OF PRIVATE FUNCTIONS */ + +static int +add_char(char *compilep, wchar_t wchar) +{ + int expr_length; + + if ((unsigned int)wchar <= (unsigned int)0x7f) { + *compilep = (unsigned char)wchar; + expr_length = 1; + } else { + expr_length = wctomb(compilep, wchar); + } + return (expr_length); +} + +static int +add_single_char_expr(char *compilep, wchar_t wchar) +{ + int expr_length = 0; + + if ((unsigned int)wchar <= (unsigned int)0x7f) { + *compilep = (unsigned char)ASCII_CHAR; + compilep++; + *compilep = (unsigned char)wchar; + expr_length += 2; + } else { + *compilep = (unsigned char)MULTIBYTE_CHAR; + compilep++; + expr_length++; + expr_length += wctomb(compilep, wchar); + } + return (expr_length); +} + +static int +get_count(int *countp, const char *regexp) +{ + char count_char = '0'; + int count = 0; + int count_length = 0; + + if (regexp == (char *)0) { + return ((int)0); + } else { + count_char = *regexp; + while (('0' <= count_char) && (count_char <= '9')) { + count = (10 * count) + (int)(count_char - '0'); + count_length++; + regexp++; + count_char = *regexp; + } + } + *countp = count; + return (count_length); +} + +static int +get_digit(const char *regexp) +{ + char digit; + + if (regexp == (char *)0) { + return ((int)-1); + } else { + digit = *regexp; + if (('0' <= digit) && (digit <= '9')) { + return ((int)(digit - '0')); + } else { + return ((int)-1); + } + } +} + +static int +get_wchar(wchar_t *wcharp, const char *regexp) +{ + int char_size; + + if (regexp == (char *)0) { + char_size = 0; + *wcharp = (wchar_t)((unsigned int)'\0'); + } else if (*regexp == '\0') { + char_size = 0; + *wcharp = (wchar_t)((unsigned int)*regexp); + } else if ((unsigned char)*regexp <= (unsigned char)0x7f) { + char_size = 1; + *wcharp = (wchar_t)((unsigned int)*regexp); + } else { + char_size = mbtowc(wcharp, regexp, MB_LEN_MAX); + } + return (char_size); +} + +static char * +pop_compilep(void) +{ + char *compilep; + + if (compilep_stackp >= &compilep_stack[STRINGP_STACK_SIZE]) { + return ((char *)0); + } else { + compilep = *compilep_stackp; + compilep_stackp++; + return (compilep); + } +} + +static char * +push_compilep(char *compilep) +{ + if (compilep_stackp <= &compilep_stack[0]) { + return ((char *)0); + } else { + compilep_stackp--; + *compilep_stackp = compilep; + return (compilep); + } +} + +static boolean_t +valid_range(wchar_t lower_char, wchar_t upper_char) +{ + return (((lower_char <= 0x7f) && (upper_char <= 0x7f) && + !iswcntrl(lower_char) && !iswcntrl(upper_char) && + (lower_char < upper_char)) || + (((lower_char & WCHAR_CSMASK) == + (upper_char & WCHAR_CSMASK)) && + (lower_char < upper_char))); +} |