diff options
Diffstat (limited to 'third_party/pcre-7.4/pcre_internal.h')
| -rw-r--r-- | third_party/pcre-7.4/pcre_internal.h | 1117 | 
1 files changed, 1117 insertions, 0 deletions
| diff --git a/third_party/pcre-7.4/pcre_internal.h b/third_party/pcre-7.4/pcre_internal.h new file mode 100644 index 0000000..5fbb344 --- /dev/null +++ b/third_party/pcre-7.4/pcre_internal.h @@ -0,0 +1,1117 @@ +/************************************************* +*      Perl-Compatible Regular Expressions       * +*************************************************/ + + +/* PCRE is a library of functions to support regular expressions whose syntax +and semantics are as close as possible to those of the Perl 5 language. + +                       Written by Philip Hazel +           Copyright (c) 1997-2007 University of Cambridge + +----------------------------------------------------------------------------- +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +    * Redistributions of source code must retain the above copyright notice, +      this list of conditions and the following disclaimer. + +    * Redistributions in binary form must reproduce the above copyright +      notice, this list of conditions and the following disclaimer in the +      documentation and/or other materials provided with the distribution. + +    * Neither the name of the University of Cambridge nor the names of its +      contributors may be used to endorse or promote products derived from +      this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +----------------------------------------------------------------------------- +*/ + +/* This header contains definitions that are shared between the different +modules, but which are not relevant to the exported API. This includes some +functions whose names all begin with "_pcre_". */ + +#ifndef PCRE_INTERNAL_H +#define PCRE_INTERNAL_H + +/* Define DEBUG to get debugging output on stdout. */ + +#if 0 +#define DEBUG +#endif + +/* Use a macro for debugging printing, 'cause that eliminates the use of #ifdef +inline, and there are *still* stupid compilers about that don't like indented +pre-processor statements, or at least there were when I first wrote this. After +all, it had only been about 10 years then... + +It turns out that the Mac Debugging.h header also defines the macro DPRINTF, so +be absolutely sure we get our version. */ + +#undef DPRINTF +#ifdef DEBUG +#define DPRINTF(p) printf p +#else +#define DPRINTF(p) /* Nothing */ +#endif + + +/* Standard C headers plus the external interface definition. The only time +setjmp and stdarg are used is when NO_RECURSE is set. */ + +#include <ctype.h> +#include <limits.h> +#include <setjmp.h> +#include <stdarg.h> +#include <stddef.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +/* When compiling a DLL for Windows, the exported symbols have to be declared +using some MS magic. I found some useful information on this web page: +http://msdn2.microsoft.com/en-us/library/y4h7bcy6(VS.80).aspx. According to the +information there, using __declspec(dllexport) without "extern" we have a +definition; with "extern" we have a declaration. The settings here override the +setting in pcre.h (which is included below); it defines only PCRE_EXP_DECL, +which is all that is needed for applications (they just import the symbols). We +use: + +  PCRE_EXP_DECL       for declarations +  PCRE_EXP_DEFN       for definitions of exported functions +  PCRE_EXP_DATA_DEFN  for definitions of exported variables + +The reason for the two DEFN macros is that in non-Windows environments, one +does not want to have "extern" before variable definitions because it leads to +compiler warnings. So we distinguish between functions and variables. In +Windows, the two should always be the same. + +The reason for wrapping this in #ifndef PCRE_EXP_DECL is so that pcretest, +which is an application, but needs to import this file in order to "peek" at +internals, can #include pcre.h first to get an application's-eye view. + +In principle, people compiling for non-Windows, non-Unix-like (i.e. uncommon, +special-purpose environments) might want to stick other stuff in front of +exported symbols. That's why, in the non-Windows case, we set PCRE_EXP_DEFN and +PCRE_EXP_DATA_DEFN only if they are not already set. */ + +#ifndef PCRE_EXP_DECL +/*#  ifdef _WIN32 +#    ifndef PCRE_STATIC +#      define PCRE_EXP_DECL       extern __declspec(dllexport) +#      define PCRE_EXP_DEFN       __declspec(dllexport) +#      define PCRE_EXP_DATA_DEFN  __declspec(dllexport) +#    else +#      define PCRE_EXP_DECL       extern +#      define PCRE_EXP_DEFN +#      define PCRE_EXP_DATA_DEFN +#    endif +#  else*/ +#    ifdef __cplusplus +#      define PCRE_EXP_DECL       extern "C" +#    else +#      define PCRE_EXP_DECL       extern +#    endif +#    ifndef PCRE_EXP_DEFN +#      define PCRE_EXP_DEFN       PCRE_EXP_DECL +#    endif +#    ifndef PCRE_EXP_DATA_DEFN +#      define PCRE_EXP_DATA_DEFN +#    endif +#  endif +/*#endif*/ + +/* We need to have types that specify unsigned 16-bit and 32-bit integers. We +cannot determine these outside the compilation (e.g. by running a program as +part of "configure") because PCRE is often cross-compiled for use on other +systems. Instead we make use of the maximum sizes that are available at +preprocessor time in standard C environments. */ + +#if USHRT_MAX == 65535 +  typedef unsigned short pcre_uint16; +#elif UINT_MAX == 65535 +  typedef unsigned int pcre_uint16; +#else +  #error Cannot determine a type for 16-bit unsigned integers +#endif + +#if UINT_MAX == 4294967295 +  typedef unsigned int pcre_uint32; +#elif ULONG_MAX == 4294967295 +  typedef unsigned long int pcre_uint32; +#else +  #error Cannot determine a type for 32-bit unsigned integers +#endif + +/* All character handling must be done as unsigned characters. Otherwise there +are problems with top-bit-set characters and functions such as isspace(). +However, we leave the interface to the outside world as char *, because that +should make things easier for callers. We define a short type for unsigned char +to save lots of typing. I tried "uchar", but it causes problems on Digital +Unix, where it is defined in sys/types, so use "uschar" instead. */ + +typedef unsigned char uschar; + +/* This is an unsigned int value that no character can ever have. UTF-8 +characters only go up to 0x7fffffff (though Unicode doesn't go beyond +0x0010ffff). */ + +#define NOTACHAR 0xffffffff + +/* PCRE is able to support several different kinds of newline (CR, LF, CRLF, +"any" and "anycrlf" at present). The following macros are used to package up +testing for newlines. NLBLOCK, PSSTART, and PSEND are defined in the various +modules to indicate in which datablock the parameters exist, and what the +start/end of string field names are. */ + +#define NLTYPE_FIXED    0     /* Newline is a fixed length string */ +#define NLTYPE_ANY      1     /* Newline is any Unicode line ending */ +#define NLTYPE_ANYCRLF  2     /* Newline is CR, LF, or CRLF */ + +/* This macro checks for a newline at the given position */ + +#define IS_NEWLINE(p) \ +  ((NLBLOCK->nltype != NLTYPE_FIXED)? \ +    ((p) < NLBLOCK->PSEND && \ +     _pcre_is_newline((p), NLBLOCK->nltype, NLBLOCK->PSEND, &(NLBLOCK->nllen),\ +       utf8)) \ +    : \ +    ((p) <= NLBLOCK->PSEND - NLBLOCK->nllen && \ +     (p)[0] == NLBLOCK->nl[0] && \ +     (NLBLOCK->nllen == 1 || (p)[1] == NLBLOCK->nl[1]) \ +    ) \ +  ) + +/* This macro checks for a newline immediately preceding the given position */ + +#define WAS_NEWLINE(p) \ +  ((NLBLOCK->nltype != NLTYPE_FIXED)? \ +    ((p) > NLBLOCK->PSSTART && \ +     _pcre_was_newline((p), NLBLOCK->nltype, NLBLOCK->PSSTART, \ +       &(NLBLOCK->nllen), utf8)) \ +    : \ +    ((p) >= NLBLOCK->PSSTART + NLBLOCK->nllen && \ +     (p)[-NLBLOCK->nllen] == NLBLOCK->nl[0] && \ +     (NLBLOCK->nllen == 1 || (p)[-NLBLOCK->nllen+1] == NLBLOCK->nl[1]) \ +    ) \ +  ) + +/* When PCRE is compiled as a C++ library, the subject pointer can be replaced +with a custom type. This makes it possible, for example, to allow pcre_exec() +to process subject strings that are discontinuous by using a smart pointer +class. It must always be possible to inspect all of the subject string in +pcre_exec() because of the way it backtracks. Two macros are required in the +normal case, for sign-unspecified and unsigned char pointers. The former is +used for the external interface and appears in pcre.h, which is why its name +must begin with PCRE_. */ + +#ifdef CUSTOM_SUBJECT_PTR +#define PCRE_SPTR CUSTOM_SUBJECT_PTR +#define USPTR CUSTOM_SUBJECT_PTR +#else +#define PCRE_SPTR const char * +#define USPTR const unsigned char * +#endif + + + +/* Include the public PCRE header and the definitions of UCP character property +values. */ + +#include "pcre.h" +#include "ucp.h" + +/* When compiling for use with the Virtual Pascal compiler, these functions +need to have their names changed. PCRE must be compiled with the -DVPCOMPAT +option on the command line. */ + +#ifdef VPCOMPAT +#define strlen(s)        _strlen(s) +#define strncmp(s1,s2,m) _strncmp(s1,s2,m) +#define memcmp(s,c,n)    _memcmp(s,c,n) +#define memcpy(d,s,n)    _memcpy(d,s,n) +#define memmove(d,s,n)   _memmove(d,s,n) +#define memset(s,c,n)    _memset(s,c,n) +#else  /* VPCOMPAT */ + +/* To cope with SunOS4 and other systems that lack memmove() but have bcopy(), +define a macro for memmove() if HAVE_MEMMOVE is false, provided that HAVE_BCOPY +is set. Otherwise, include an emulating function for those systems that have +neither (there some non-Unix environments where this is the case). */ + +#ifndef HAVE_MEMMOVE +#undef  memmove        /* some systems may have a macro */ +#ifdef HAVE_BCOPY +#define memmove(a, b, c) bcopy(b, a, c) +#else  /* HAVE_BCOPY */ +static void * +pcre_memmove(void *d, const void *s, size_t n) +{ +size_t i; +unsigned char *dest = (unsigned char *)d; +const unsigned char *src = (const unsigned char *)s; +if (dest > src) +  { +  dest += n; +  src += n; +  for (i = 0; i < n; ++i) *(--dest) = *(--src); +  return (void *)dest; +  } +else +  { +  for (i = 0; i < n; ++i) *dest++ = *src++; +  return (void *)(dest - n); +  } +} +#define memmove(a, b, c) pcre_memmove(a, b, c) +#endif   /* not HAVE_BCOPY */ +#endif   /* not HAVE_MEMMOVE */ +#endif   /* not VPCOMPAT */ + + +/* PCRE keeps offsets in its compiled code as 2-byte quantities (always stored +in big-endian order) by default. These are used, for example, to link from the +start of a subpattern to its alternatives and its end. The use of 2 bytes per +offset limits the size of the compiled regex to around 64K, which is big enough +for almost everybody. However, I received a request for an even bigger limit. +For this reason, and also to make the code easier to maintain, the storing and +loading of offsets from the byte string is now handled by the macros that are +defined here. + +The macros are controlled by the value of LINK_SIZE. This defaults to 2 in +the config.h file, but can be overridden by using -D on the command line. This +is automated on Unix systems via the "configure" command. */ + +#if LINK_SIZE == 2 + +#define PUT(a,n,d)   \ +  (a[n] = (d) >> 8), \ +  (a[(n)+1] = (d) & 255) + +#define GET(a,n) \ +  (((a)[n] << 8) | (a)[(n)+1]) + +#define MAX_PATTERN_SIZE (1 << 16) + + +#elif LINK_SIZE == 3 + +#define PUT(a,n,d)       \ +  (a[n] = (d) >> 16),    \ +  (a[(n)+1] = (d) >> 8), \ +  (a[(n)+2] = (d) & 255) + +#define GET(a,n) \ +  (((a)[n] << 16) | ((a)[(n)+1] << 8) | (a)[(n)+2]) + +#define MAX_PATTERN_SIZE (1 << 24) + + +#elif LINK_SIZE == 4 + +#define PUT(a,n,d)        \ +  (a[n] = (d) >> 24),     \ +  (a[(n)+1] = (d) >> 16), \ +  (a[(n)+2] = (d) >> 8),  \ +  (a[(n)+3] = (d) & 255) + +#define GET(a,n) \ +  (((a)[n] << 24) | ((a)[(n)+1] << 16) | ((a)[(n)+2] << 8) | (a)[(n)+3]) + +#define MAX_PATTERN_SIZE (1 << 30)   /* Keep it positive */ + + +#else +#error LINK_SIZE must be either 2, 3, or 4 +#endif + + +/* Convenience macro defined in terms of the others */ + +#define PUTINC(a,n,d)   PUT(a,n,d), a += LINK_SIZE + + +/* PCRE uses some other 2-byte quantities that do not change when the size of +offsets changes. There are used for repeat counts and for other things such as +capturing parenthesis numbers in back references. */ + +#define PUT2(a,n,d)   \ +  a[n] = (d) >> 8; \ +  a[(n)+1] = (d) & 255 + +#define GET2(a,n) \ +  (((a)[n] << 8) | (a)[(n)+1]) + +#define PUT2INC(a,n,d)  PUT2(a,n,d), a += 2 + + +/* When UTF-8 encoding is being used, a character is no longer just a single +byte. The macros for character handling generate simple sequences when used in +byte-mode, and more complicated ones for UTF-8 characters. BACKCHAR should +never be called in byte mode. To make sure it can never even appear when UTF-8 +support is omitted, we don't even define it. */ + +#ifndef SUPPORT_UTF8 +#define GETCHAR(c, eptr) c = *eptr; +#define GETCHARTEST(c, eptr) c = *eptr; +#define GETCHARINC(c, eptr) c = *eptr++; +#define GETCHARINCTEST(c, eptr) c = *eptr++; +#define GETCHARLEN(c, eptr, len) c = *eptr; +/* #define BACKCHAR(eptr) */ + +#else   /* SUPPORT_UTF8 */ + +/* Get the next UTF-8 character, not advancing the pointer. This is called when +we know we are in UTF-8 mode. */ + +#define GETCHAR(c, eptr) \ +  c = *eptr; \ +  if (c >= 0xc0) \ +    { \ +    int gcii; \ +    int gcaa = _pcre_utf8_table4[c & 0x3f];  /* Number of additional bytes */ \ +    int gcss = 6*gcaa; \ +    c = (c & _pcre_utf8_table3[gcaa]) << gcss; \ +    for (gcii = 1; gcii <= gcaa; gcii++) \ +      { \ +      gcss -= 6; \ +      c |= (eptr[gcii] & 0x3f) << gcss; \ +      } \ +    } + +/* Get the next UTF-8 character, testing for UTF-8 mode, and not advancing the +pointer. */ + +#define GETCHARTEST(c, eptr) \ +  c = *eptr; \ +  if (utf8 && c >= 0xc0) \ +    { \ +    int gcii; \ +    int gcaa = _pcre_utf8_table4[c & 0x3f];  /* Number of additional bytes */ \ +    int gcss = 6*gcaa; \ +    c = (c & _pcre_utf8_table3[gcaa]) << gcss; \ +    for (gcii = 1; gcii <= gcaa; gcii++) \ +      { \ +      gcss -= 6; \ +      c |= (eptr[gcii] & 0x3f) << gcss; \ +      } \ +    } + +/* Get the next UTF-8 character, advancing the pointer. This is called when we +know we are in UTF-8 mode. */ + +#define GETCHARINC(c, eptr) \ +  c = *eptr++; \ +  if (c >= 0xc0) \ +    { \ +    int gcaa = _pcre_utf8_table4[c & 0x3f];  /* Number of additional bytes */ \ +    int gcss = 6*gcaa; \ +    c = (c & _pcre_utf8_table3[gcaa]) << gcss; \ +    while (gcaa-- > 0) \ +      { \ +      gcss -= 6; \ +      c |= (*eptr++ & 0x3f) << gcss; \ +      } \ +    } + +/* Get the next character, testing for UTF-8 mode, and advancing the pointer */ + +#define GETCHARINCTEST(c, eptr) \ +  c = *eptr++; \ +  if (utf8 && c >= 0xc0) \ +    { \ +    int gcaa = _pcre_utf8_table4[c & 0x3f];  /* Number of additional bytes */ \ +    int gcss = 6*gcaa; \ +    c = (c & _pcre_utf8_table3[gcaa]) << gcss; \ +    while (gcaa-- > 0) \ +      { \ +      gcss -= 6; \ +      c |= (*eptr++ & 0x3f) << gcss; \ +      } \ +    } + +/* Get the next UTF-8 character, not advancing the pointer, incrementing length +if there are extra bytes. This is called when we know we are in UTF-8 mode. */ + +#define GETCHARLEN(c, eptr, len) \ +  c = *eptr; \ +  if (c >= 0xc0) \ +    { \ +    int gcii; \ +    int gcaa = _pcre_utf8_table4[c & 0x3f];  /* Number of additional bytes */ \ +    int gcss = 6*gcaa; \ +    c = (c & _pcre_utf8_table3[gcaa]) << gcss; \ +    for (gcii = 1; gcii <= gcaa; gcii++) \ +      { \ +      gcss -= 6; \ +      c |= (eptr[gcii] & 0x3f) << gcss; \ +      } \ +    len += gcaa; \ +    } + +/* If the pointer is not at the start of a character, move it back until +it is. This is called only in UTF-8 mode - we don't put a test within the macro +because almost all calls are already within a block of UTF-8 only code. */ + +#define BACKCHAR(eptr) while((*eptr & 0xc0) == 0x80) eptr-- + +#endif + + +/* In case there is no definition of offsetof() provided - though any proper +Standard C system should have one. */ + +#ifndef offsetof +#define offsetof(p_type,field) ((size_t)&(((p_type *)0)->field)) +#endif + + +/* These are the public options that can change during matching. */ + +#define PCRE_IMS (PCRE_CASELESS|PCRE_MULTILINE|PCRE_DOTALL) + +/* Private flags containing information about the compiled regex. They used to +live at the top end of the options word, but that got almost full, so now they +are in a 16-bit flags word. */ + +#define PCRE_NOPARTIAL     0x0001  /* can't use partial with this regex */ +#define PCRE_FIRSTSET      0x0002  /* first_byte is set */ +#define PCRE_REQCHSET      0x0004  /* req_byte is set */ +#define PCRE_STARTLINE     0x0008  /* start after \n for multiline */ +#define PCRE_JCHANGED      0x0010  /* j option used in regex */ +#define PCRE_HASCRORLF     0x0020  /* explicit \r or \n in pattern */ + +/* Options for the "extra" block produced by pcre_study(). */ + +#define PCRE_STUDY_MAPPED   0x01     /* a map of starting chars exists */ + +/* Masks for identifying the public options that are permitted at compile +time, run time, or study time, respectively. */ + +#define PCRE_NEWLINE_BITS (PCRE_NEWLINE_CR|PCRE_NEWLINE_LF|PCRE_NEWLINE_ANY| \ +                           PCRE_NEWLINE_ANYCRLF) + +#define PUBLIC_OPTIONS \ +  (PCRE_CASELESS|PCRE_EXTENDED|PCRE_ANCHORED|PCRE_MULTILINE| \ +   PCRE_DOTALL|PCRE_DOLLAR_ENDONLY|PCRE_EXTRA|PCRE_UNGREEDY|PCRE_UTF8| \ +   PCRE_NO_AUTO_CAPTURE|PCRE_NO_UTF8_CHECK|PCRE_AUTO_CALLOUT|PCRE_FIRSTLINE| \ +   PCRE_DUPNAMES|PCRE_NEWLINE_BITS|PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE) + +#define PUBLIC_EXEC_OPTIONS \ +  (PCRE_ANCHORED|PCRE_NOTBOL|PCRE_NOTEOL|PCRE_NOTEMPTY|PCRE_NO_UTF8_CHECK| \ +   PCRE_PARTIAL|PCRE_NEWLINE_BITS|PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE) + +#define PUBLIC_DFA_EXEC_OPTIONS \ +  (PCRE_ANCHORED|PCRE_NOTBOL|PCRE_NOTEOL|PCRE_NOTEMPTY|PCRE_NO_UTF8_CHECK| \ +   PCRE_PARTIAL|PCRE_DFA_SHORTEST|PCRE_DFA_RESTART|PCRE_NEWLINE_BITS| \ +   PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE) + +#define PUBLIC_STUDY_OPTIONS 0   /* None defined */ + +/* Magic number to provide a small check against being handed junk. Also used +to detect whether a pattern was compiled on a host of different endianness. */ + +#define MAGIC_NUMBER  0x50435245UL   /* 'PCRE' */ + +/* Negative values for the firstchar and reqchar variables */ + +#define REQ_UNSET (-2) +#define REQ_NONE  (-1) + +/* The maximum remaining length of subject we are prepared to search for a +req_byte match. */ + +#define REQ_BYTE_MAX 1000 + +/* Flags added to firstbyte or reqbyte; a "non-literal" item is either a +variable-length repeat, or a anything other than literal characters. */ + +#define REQ_CASELESS 0x0100    /* indicates caselessness */ +#define REQ_VARY     0x0200    /* reqbyte followed non-literal item */ + +/* Miscellaneous definitions */ + +typedef int BOOL; + +#define FALSE   0 +#define TRUE    1 + +/* Escape items that are just an encoding of a particular data value. */ + +#ifndef ESC_e +#define ESC_e 27 +#endif + +#ifndef ESC_f +#define ESC_f '\f' +#endif + +#ifndef ESC_n +#define ESC_n '\n' +#endif + +#ifndef ESC_r +#define ESC_r '\r' +#endif + +/* We can't officially use ESC_t because it is a POSIX reserved identifier +(presumably because of all the others like size_t). */ + +#ifndef ESC_tee +#define ESC_tee '\t' +#endif + +/* Codes for different types of Unicode property */ + +#define PT_ANY        0    /* Any property - matches all chars */ +#define PT_LAMP       1    /* L& - the union of Lu, Ll, Lt */ +#define PT_GC         2    /* General characteristic (e.g. L) */ +#define PT_PC         3    /* Particular characteristic (e.g. Lu) */ +#define PT_SC         4    /* Script (e.g. Han) */ + +/* Flag bits and data types for the extended class (OP_XCLASS) for classes that +contain UTF-8 characters with values greater than 255. */ + +#define XCL_NOT    0x01    /* Flag: this is a negative class */ +#define XCL_MAP    0x02    /* Flag: a 32-byte map is present */ + +#define XCL_END       0    /* Marks end of individual items */ +#define XCL_SINGLE    1    /* Single item (one multibyte char) follows */ +#define XCL_RANGE     2    /* A range (two multibyte chars) follows */ +#define XCL_PROP      3    /* Unicode property (2-byte property code follows) */ +#define XCL_NOTPROP   4    /* Unicode inverted property (ditto) */ + +/* These are escaped items that aren't just an encoding of a particular data +value such as \n. They must have non-zero values, as check_escape() returns +their negation. Also, they must appear in the same order as in the opcode +definitions below, up to ESC_z. There's a dummy for OP_ANY because it +corresponds to "." rather than an escape sequence. The final one must be +ESC_REF as subsequent values are used for backreferences (\1, \2, \3, etc). +There are two tests in the code for an escape greater than ESC_b and less than +ESC_Z to detect the types that may be repeated. These are the types that +consume characters. If any new escapes are put in between that don't consume a +character, that code will have to change. */ + +enum { ESC_A = 1, ESC_G, ESC_K, ESC_B, ESC_b, ESC_D, ESC_d, ESC_S, ESC_s, +       ESC_W, ESC_w, ESC_dum1, ESC_C, ESC_P, ESC_p, ESC_R, ESC_H, ESC_h, +       ESC_V, ESC_v, ESC_X, ESC_Z, ESC_z, ESC_E, ESC_Q, ESC_k, ESC_REF }; + + +/* Opcode table: Starting from 1 (i.e. after OP_END), the values up to +OP_EOD must correspond in order to the list of escapes immediately above. + +*** NOTE NOTE NOTE *** Whenever this list is updated, the two macro definitions +that follow must also be updated to match. There is also a table called +"coptable" in pcre_dfa_exec.c that must be updated. */ + +enum { +  OP_END,            /* 0 End of pattern */ + +  /* Values corresponding to backslashed metacharacters */ + +  OP_SOD,            /* 1 Start of data: \A */ +  OP_SOM,            /* 2 Start of match (subject + offset): \G */ +  OP_SET_SOM,        /* 3 Set start of match (\K) */ +  OP_NOT_WORD_BOUNDARY,  /*  4 \B */ +  OP_WORD_BOUNDARY,      /*  5 \b */ +  OP_NOT_DIGIT,          /*  6 \D */ +  OP_DIGIT,              /*  7 \d */ +  OP_NOT_WHITESPACE,     /*  8 \S */ +  OP_WHITESPACE,         /*  9 \s */ +  OP_NOT_WORDCHAR,       /* 10 \W */ +  OP_WORDCHAR,           /* 11 \w */ +  OP_ANY,            /* 12 Match any character */ +  OP_ANYBYTE,        /* 13 Match any byte (\C); different to OP_ANY for UTF-8 */ +  OP_NOTPROP,        /* 14 \P (not Unicode property) */ +  OP_PROP,           /* 15 \p (Unicode property) */ +  OP_ANYNL,          /* 16 \R (any newline sequence) */ +  OP_NOT_HSPACE,     /* 17 \H (not horizontal whitespace) */ +  OP_HSPACE,         /* 18 \h (horizontal whitespace) */ +  OP_NOT_VSPACE,     /* 19 \V (not vertical whitespace) */ +  OP_VSPACE,         /* 20 \v (vertical whitespace) */ +  OP_EXTUNI,         /* 21 \X (extended Unicode sequence */ +  OP_EODN,           /* 22 End of data or \n at end of data: \Z. */ +  OP_EOD,            /* 23 End of data: \z */ + +  OP_OPT,            /* 24 Set runtime options */ +  OP_CIRC,           /* 25 Start of line - varies with multiline switch */ +  OP_DOLL,           /* 26 End of line - varies with multiline switch */ +  OP_CHAR,           /* 27 Match one character, casefully */ +  OP_CHARNC,         /* 28 Match one character, caselessly */ +  OP_NOT,            /* 29 Match one character, not the following one */ + +  OP_STAR,           /* 30 The maximizing and minimizing versions of */ +  OP_MINSTAR,        /* 31 these six opcodes must come in pairs, with */ +  OP_PLUS,           /* 32 the minimizing one second. */ +  OP_MINPLUS,        /* 33 This first set applies to single characters.*/ +  OP_QUERY,          /* 34 */ +  OP_MINQUERY,       /* 35 */ + +  OP_UPTO,           /* 36 From 0 to n matches */ +  OP_MINUPTO,        /* 37 */ +  OP_EXACT,          /* 38 Exactly n matches */ + +  OP_POSSTAR,        /* 39 Possessified star */ +  OP_POSPLUS,        /* 40 Possessified plus */ +  OP_POSQUERY,       /* 41 Posesssified query */ +  OP_POSUPTO,        /* 42 Possessified upto */ + +  OP_NOTSTAR,        /* 43 The maximizing and minimizing versions of */ +  OP_NOTMINSTAR,     /* 44 these six opcodes must come in pairs, with */ +  OP_NOTPLUS,        /* 45 the minimizing one second. They must be in */ +  OP_NOTMINPLUS,     /* 46 exactly the same order as those above. */ +  OP_NOTQUERY,       /* 47 This set applies to "not" single characters. */ +  OP_NOTMINQUERY,    /* 48 */ + +  OP_NOTUPTO,        /* 49 From 0 to n matches */ +  OP_NOTMINUPTO,     /* 50 */ +  OP_NOTEXACT,       /* 51 Exactly n matches */ + +  OP_NOTPOSSTAR,     /* 52 Possessified versions */ +  OP_NOTPOSPLUS,     /* 53 */ +  OP_NOTPOSQUERY,    /* 54 */ +  OP_NOTPOSUPTO,     /* 55 */ + +  OP_TYPESTAR,       /* 56 The maximizing and minimizing versions of */ +  OP_TYPEMINSTAR,    /* 57 these six opcodes must come in pairs, with */ +  OP_TYPEPLUS,       /* 58 the minimizing one second. These codes must */ +  OP_TYPEMINPLUS,    /* 59 be in exactly the same order as those above. */ +  OP_TYPEQUERY,      /* 60 This set applies to character types such as \d */ +  OP_TYPEMINQUERY,   /* 61 */ + +  OP_TYPEUPTO,       /* 62 From 0 to n matches */ +  OP_TYPEMINUPTO,    /* 63 */ +  OP_TYPEEXACT,      /* 64 Exactly n matches */ + +  OP_TYPEPOSSTAR,    /* 65 Possessified versions */ +  OP_TYPEPOSPLUS,    /* 66 */ +  OP_TYPEPOSQUERY,   /* 67 */ +  OP_TYPEPOSUPTO,    /* 68 */ + +  OP_CRSTAR,         /* 69 The maximizing and minimizing versions of */ +  OP_CRMINSTAR,      /* 70 all these opcodes must come in pairs, with */ +  OP_CRPLUS,         /* 71 the minimizing one second. These codes must */ +  OP_CRMINPLUS,      /* 72 be in exactly the same order as those above. */ +  OP_CRQUERY,        /* 73 These are for character classes and back refs */ +  OP_CRMINQUERY,     /* 74 */ +  OP_CRRANGE,        /* 75 These are different to the three sets above. */ +  OP_CRMINRANGE,     /* 76 */ + +  OP_CLASS,          /* 77 Match a character class, chars < 256 only */ +  OP_NCLASS,         /* 78 Same, but the bitmap was created from a negative +                           class - the difference is relevant only when a UTF-8 +                           character > 255 is encountered. */ + +  OP_XCLASS,         /* 79 Extended class for handling UTF-8 chars within the +                           class. This does both positive and negative. */ + +  OP_REF,            /* 80 Match a back reference */ +  OP_RECURSE,        /* 81 Match a numbered subpattern (possibly recursive) */ +  OP_CALLOUT,        /* 82 Call out to external function if provided */ + +  OP_ALT,            /* 83 Start of alternation */ +  OP_KET,            /* 84 End of group that doesn't have an unbounded repeat */ +  OP_KETRMAX,        /* 85 These two must remain together and in this */ +  OP_KETRMIN,        /* 86 order. They are for groups the repeat for ever. */ + +  /* The assertions must come before BRA, CBRA, ONCE, and COND.*/ + +  OP_ASSERT,         /* 87 Positive lookahead */ +  OP_ASSERT_NOT,     /* 88 Negative lookahead */ +  OP_ASSERTBACK,     /* 89 Positive lookbehind */ +  OP_ASSERTBACK_NOT, /* 90 Negative lookbehind */ +  OP_REVERSE,        /* 91 Move pointer back - used in lookbehind assertions */ + +  /* ONCE, BRA, CBRA, and COND must come after the assertions, with ONCE first, +  as there's a test for >= ONCE for a subpattern that isn't an assertion. */ + +  OP_ONCE,           /* 92 Atomic group */ +  OP_BRA,            /* 93 Start of non-capturing bracket */ +  OP_CBRA,           /* 94 Start of capturing bracket */ +  OP_COND,           /* 95 Conditional group */ + +  /* These three must follow the previous three, in the same order. There's a +  check for >= SBRA to distinguish the two sets. */ + +  OP_SBRA,           /* 96 Start of non-capturing bracket, check empty  */ +  OP_SCBRA,          /* 97 Start of capturing bracket, check empty */ +  OP_SCOND,          /* 98 Conditional group, check empty */ + +  OP_CREF,           /* 99 Used to hold a capture number as condition */ +  OP_RREF,           /* 100 Used to hold a recursion number as condition */ +  OP_DEF,            /* 101 The DEFINE condition */ + +  OP_BRAZERO,        /* 102 These two must remain together and in this */ +  OP_BRAMINZERO,     /* 103 order. */ + +  /* These are backtracking control verbs */ + +  OP_PRUNE,          /* 104 */ +  OP_SKIP,           /* 105 */ +  OP_THEN,           /* 106 */ +  OP_COMMIT,         /* 107 */ + +  /* These are forced failure and success verbs */ + +  OP_FAIL,           /* 108 */ +  OP_ACCEPT          /* 109 */ +}; + + +/* This macro defines textual names for all the opcodes. These are used only +for debugging. The macro is referenced only in pcre_printint.c. */ + +#define OP_NAME_LIST \ +  "End", "\\A", "\\G", "\\K", "\\B", "\\b", "\\D", "\\d",         \ +  "\\S", "\\s", "\\W", "\\w", "Any", "Anybyte",                   \ +  "notprop", "prop", "\\R", "\\H", "\\h", "\\V", "\\v",           \ +  "extuni",  "\\Z", "\\z",                                        \ +  "Opt", "^", "$", "char", "charnc", "not",                       \ +  "*", "*?", "+", "+?", "?", "??", "{", "{", "{",                 \ +  "*+","++", "?+", "{",                                           \ +  "*", "*?", "+", "+?", "?", "??", "{", "{", "{",                 \ +  "*+","++", "?+", "{",                                           \ +  "*", "*?", "+", "+?", "?", "??", "{", "{", "{",                 \ +  "*+","++", "?+", "{",                                           \ +  "*", "*?", "+", "+?", "?", "??", "{", "{",                      \ +  "class", "nclass", "xclass", "Ref", "Recurse", "Callout",       \ +  "Alt", "Ket", "KetRmax", "KetRmin", "Assert", "Assert not",     \ +  "AssertB", "AssertB not", "Reverse",                            \ +  "Once", "Bra", "CBra", "Cond", "SBra", "SCBra", "SCond",        \ +  "Cond ref", "Cond rec", "Cond def", "Brazero", "Braminzero",    \ +  "*PRUNE", "*SKIP", "*THEN", "*COMMIT", "*FAIL", "*ACCEPT" + + +/* This macro defines the length of fixed length operations in the compiled +regex. The lengths are used when searching for specific things, and also in the +debugging printing of a compiled regex. We use a macro so that it can be +defined close to the definitions of the opcodes themselves. + +As things have been extended, some of these are no longer fixed lenths, but are +minima instead. For example, the length of a single-character repeat may vary +in UTF-8 mode. The code that uses this table must know about such things. */ + +#define OP_LENGTHS \ +  1,                             /* End                                    */ \ +  1, 1, 1, 1, 1,                 /* \A, \G, \K, \B, \b                     */ \ +  1, 1, 1, 1, 1, 1,              /* \D, \d, \S, \s, \W, \w                 */ \ +  1, 1,                          /* Any, Anybyte                           */ \ +  3, 3, 1,                       /* NOTPROP, PROP, EXTUNI                  */ \ +  1, 1, 1, 1, 1,                 /* \R, \H, \h, \V, \v                     */ \ +  1, 1, 2, 1, 1,                 /* \Z, \z, Opt, ^, $                      */ \ +  2,                             /* Char  - the minimum length             */ \ +  2,                             /* Charnc  - the minimum length           */ \ +  2,                             /* not                                    */ \ +  /* Positive single-char repeats                            ** These are  */ \ +  2, 2, 2, 2, 2, 2,              /* *, *?, +, +?, ?, ??      ** minima in  */ \ +  4, 4, 4,                       /* upto, minupto, exact     ** UTF-8 mode */ \ +  2, 2, 2, 4,                    /* *+, ++, ?+, upto+                      */ \ +  /* Negative single-char repeats - only for chars < 256                   */ \ +  2, 2, 2, 2, 2, 2,              /* NOT *, *?, +, +?, ?, ??                */ \ +  4, 4, 4,                       /* NOT upto, minupto, exact               */ \ +  2, 2, 2, 4,                    /* Possessive *, +, ?, upto               */ \ +  /* Positive type repeats                                                 */ \ +  2, 2, 2, 2, 2, 2,              /* Type *, *?, +, +?, ?, ??               */ \ +  4, 4, 4,                       /* Type upto, minupto, exact              */ \ +  2, 2, 2, 4,                    /* Possessive *+, ++, ?+, upto+           */ \ +  /* Character class & ref repeats                                         */ \ +  1, 1, 1, 1, 1, 1,              /* *, *?, +, +?, ?, ??                    */ \ +  5, 5,                          /* CRRANGE, CRMINRANGE                    */ \ + 33,                             /* CLASS                                  */ \ + 33,                             /* NCLASS                                 */ \ +  0,                             /* XCLASS - variable length               */ \ +  3,                             /* REF                                    */ \ +  1+LINK_SIZE,                   /* RECURSE                                */ \ +  2+2*LINK_SIZE,                 /* CALLOUT                                */ \ +  1+LINK_SIZE,                   /* Alt                                    */ \ +  1+LINK_SIZE,                   /* Ket                                    */ \ +  1+LINK_SIZE,                   /* KetRmax                                */ \ +  1+LINK_SIZE,                   /* KetRmin                                */ \ +  1+LINK_SIZE,                   /* Assert                                 */ \ +  1+LINK_SIZE,                   /* Assert not                             */ \ +  1+LINK_SIZE,                   /* Assert behind                          */ \ +  1+LINK_SIZE,                   /* Assert behind not                      */ \ +  1+LINK_SIZE,                   /* Reverse                                */ \ +  1+LINK_SIZE,                   /* ONCE                                   */ \ +  1+LINK_SIZE,                   /* BRA                                    */ \ +  3+LINK_SIZE,                   /* CBRA                                   */ \ +  1+LINK_SIZE,                   /* COND                                   */ \ +  1+LINK_SIZE,                   /* SBRA                                   */ \ +  3+LINK_SIZE,                   /* SCBRA                                  */ \ +  1+LINK_SIZE,                   /* SCOND                                  */ \ +  3,                             /* CREF                                   */ \ +  3,                             /* RREF                                   */ \ +  1,                             /* DEF                                    */ \ +  1, 1,                          /* BRAZERO, BRAMINZERO                    */ \ +  1, 1, 1, 1,                    /* PRUNE, SKIP, THEN, COMMIT,             */ \ +  1, 1                           /* FAIL, ACCEPT                           */ + + +/* A magic value for OP_RREF to indicate the "any recursion" condition. */ + +#define RREF_ANY  0xffff + +/* Error code numbers. They are given names so that they can more easily be +tracked. */ + +enum { ERR0,  ERR1,  ERR2,  ERR3,  ERR4,  ERR5,  ERR6,  ERR7,  ERR8,  ERR9, +       ERR10, ERR11, ERR12, ERR13, ERR14, ERR15, ERR16, ERR17, ERR18, ERR19, +       ERR20, ERR21, ERR22, ERR23, ERR24, ERR25, ERR26, ERR27, ERR28, ERR29, +       ERR30, ERR31, ERR32, ERR33, ERR34, ERR35, ERR36, ERR37, ERR38, ERR39, +       ERR40, ERR41, ERR42, ERR43, ERR44, ERR45, ERR46, ERR47, ERR48, ERR49, +       ERR50, ERR51, ERR52, ERR53, ERR54, ERR55, ERR56, ERR57, ERR58, ERR59, +       ERR60, ERR61 }; + +/* The real format of the start of the pcre block; the index of names and the +code vector run on as long as necessary after the end. We store an explicit +offset to the name table so that if a regex is compiled on one host, saved, and +then run on another where the size of pointers is different, all might still +be well. For the case of compiled-on-4 and run-on-8, we include an extra +pointer that is always NULL. For future-proofing, a few dummy fields were +originally included - even though you can never get this planning right - but +there is only one left now. + +NOTE NOTE NOTE: +Because people can now save and re-use compiled patterns, any additions to this +structure should be made at the end, and something earlier (e.g. a new +flag in the options or one of the dummy fields) should indicate that the new +fields are present. Currently PCRE always sets the dummy fields to zero. +NOTE NOTE NOTE: +*/ + +typedef struct real_pcre { +  pcre_uint32 magic_number; +  pcre_uint32 size;               /* Total that was malloced */ +  pcre_uint32 options;            /* Public options */ +  pcre_uint16 flags;              /* Private flags */ +  pcre_uint16 dummy1;             /* For future use */ +  pcre_uint16 top_bracket; +  pcre_uint16 top_backref; +  pcre_uint16 first_byte; +  pcre_uint16 req_byte; +  pcre_uint16 name_table_offset;  /* Offset to name table that follows */ +  pcre_uint16 name_entry_size;    /* Size of any name items */ +  pcre_uint16 name_count;         /* Number of name items */ +  pcre_uint16 ref_count;          /* Reference count */ + +  const unsigned char *tables;    /* Pointer to tables or NULL for std */ +  const unsigned char *nullpad;   /* NULL padding */ +} real_pcre; + +/* The format of the block used to store data from pcre_study(). The same +remark (see NOTE above) about extending this structure applies. */ + +typedef struct pcre_study_data { +  pcre_uint32 size;               /* Total that was malloced */ +  pcre_uint32 options; +  uschar start_bits[32]; +} pcre_study_data; + +/* Structure for passing "static" information around between the functions +doing the compiling, so that they are thread-safe. */ + +typedef struct compile_data { +  const uschar *lcc;            /* Points to lower casing table */ +  const uschar *fcc;            /* Points to case-flipping table */ +  const uschar *cbits;          /* Points to character type table */ +  const uschar *ctypes;         /* Points to table of type maps */ +  const uschar *start_workspace;/* The start of working space */ +  const uschar *start_code;     /* The start of the compiled code */ +  const uschar *start_pattern;  /* The start of the pattern */ +  const uschar *end_pattern;    /* The end of the pattern */ +  uschar *hwm;                  /* High watermark of workspace */ +  uschar *name_table;           /* The name/number table */ +  int  names_found;             /* Number of entries so far */ +  int  name_entry_size;         /* Size of each entry */ +  int  bracount;                /* Count of capturing parens */ +  int  top_backref;             /* Maximum back reference */ +  unsigned int backref_map;     /* Bitmap of low back refs */ +  int  external_options;        /* External (initial) options */ +  int  external_flags;          /* External flag bits to be set */ +  int  req_varyopt;             /* "After variable item" flag for reqbyte */ +  BOOL had_accept;              /* (*ACCEPT) encountered */ +  int  nltype;                  /* Newline type */ +  int  nllen;                   /* Newline string length */ +  uschar nl[4];                 /* Newline string when fixed length */ +} compile_data; + +/* Structure for maintaining a chain of pointers to the currently incomplete +branches, for testing for left recursion. */ + +typedef struct branch_chain { +  struct branch_chain *outer; +  uschar *current; +} branch_chain; + +/* Structure for items in a linked list that represents an explicit recursive +call within the pattern. */ + +typedef struct recursion_info { +  struct recursion_info *prevrec; /* Previous recursion record (or NULL) */ +  int group_num;                /* Number of group that was called */ +  const uschar *after_call;     /* "Return value": points after the call in the expr */ +  USPTR save_start;             /* Old value of mstart */ +  int *offset_save;             /* Pointer to start of saved offsets */ +  int saved_max;                /* Number of saved offsets */ +} recursion_info; + +/* Structure for building a chain of data for holding the values of the subject +pointer at the start of each subpattern, so as to detect when an empty string +has been matched by a subpattern - to break infinite loops. */ + +typedef struct eptrblock { +  struct eptrblock *epb_prev; +  USPTR epb_saved_eptr; +} eptrblock; + + +/* Structure for passing "static" information around between the functions +doing traditional NFA matching, so that they are thread-safe. */ + +typedef struct match_data { +  unsigned long int match_call_count;      /* As it says */ +  unsigned long int match_limit;           /* As it says */ +  unsigned long int match_limit_recursion; /* As it says */ +  int   *offset_vector;         /* Offset vector */ +  int    offset_end;            /* One past the end */ +  int    offset_max;            /* The maximum usable for return data */ +  int    nltype;                /* Newline type */ +  int    nllen;                 /* Newline string length */ +  uschar nl[4];                 /* Newline string when fixed */ +  const uschar *lcc;            /* Points to lower casing table */ +  const uschar *ctypes;         /* Points to table of type maps */ +  BOOL   offset_overflow;       /* Set if too many extractions */ +  BOOL   notbol;                /* NOTBOL flag */ +  BOOL   noteol;                /* NOTEOL flag */ +  BOOL   utf8;                  /* UTF8 flag */ +  BOOL   endonly;               /* Dollar not before final \n */ +  BOOL   notempty;              /* Empty string match not wanted */ +  BOOL   partial;               /* PARTIAL flag */ +  BOOL   hitend;                /* Hit the end of the subject at some point */ +  BOOL   bsr_anycrlf;           /* \R is just any CRLF, not full Unicode */ +  const uschar *start_code;     /* For use when recursing */ +  USPTR  start_subject;         /* Start of the subject string */ +  USPTR  end_subject;           /* End of the subject string */ +  USPTR  start_match_ptr;       /* Start of matched string */ +  USPTR  end_match_ptr;         /* Subject position at end match */ +  int    end_offset_top;        /* Highwater mark at end of match */ +  int    capture_last;          /* Most recent capture number */ +  int    start_offset;          /* The start offset value */ +  eptrblock *eptrchain;         /* Chain of eptrblocks for tail recursions */ +  int    eptrn;                 /* Next free eptrblock */ +  recursion_info *recursive;    /* Linked list of recursion data */ +  void  *callout_data;          /* To pass back to callouts */ +} match_data; + +/* A similar structure is used for the same purpose by the DFA matching +functions. */ + +typedef struct dfa_match_data { +  const uschar *start_code;     /* Start of the compiled pattern */ +  const uschar *start_subject;  /* Start of the subject string */ +  const uschar *end_subject;    /* End of subject string */ +  const uschar *tables;         /* Character tables */ +  int   moptions;               /* Match options */ +  int   poptions;               /* Pattern options */ +  int    nltype;                /* Newline type */ +  int    nllen;                 /* Newline string length */ +  uschar nl[4];                 /* Newline string when fixed */ +  void  *callout_data;          /* To pass back to callouts */ +} dfa_match_data; + +/* Bit definitions for entries in the pcre_ctypes table. */ + +#define ctype_space   0x01 +#define ctype_letter  0x02 +#define ctype_digit   0x04 +#define ctype_xdigit  0x08 +#define ctype_word    0x10   /* alphameric or '_' */ +#define ctype_meta    0x80   /* regexp meta char or zero (end pattern) */ + +/* Offsets for the bitmap tables in pcre_cbits. Each table contains a set +of bits for a class map. Some classes are built by combining these tables. */ + +#define cbit_space     0      /* [:space:] or \s */ +#define cbit_xdigit   32      /* [:xdigit:] */ +#define cbit_digit    64      /* [:digit:] or \d */ +#define cbit_upper    96      /* [:upper:] */ +#define cbit_lower   128      /* [:lower:] */ +#define cbit_word    160      /* [:word:] or \w */ +#define cbit_graph   192      /* [:graph:] */ +#define cbit_print   224      /* [:print:] */ +#define cbit_punct   256      /* [:punct:] */ +#define cbit_cntrl   288      /* [:cntrl:] */ +#define cbit_length  320      /* Length of the cbits table */ + +/* Offsets of the various tables from the base tables pointer, and +total length. */ + +#define lcc_offset      0 +#define fcc_offset    256 +#define cbits_offset  512 +#define ctypes_offset (cbits_offset + cbit_length) +#define tables_length (ctypes_offset + 256) + +/* Layout of the UCP type table that translates property names into types and +codes. Each entry used to point directly to a name, but to reduce the number of +relocations in shared libraries, it now has an offset into a single string +instead. */ + +typedef struct { +  pcre_uint16 name_offset; +  pcre_uint16 type; +  pcre_uint16 value; +} ucp_type_table; + + +/* Internal shared data tables. These are tables that are used by more than one +of the exported public functions. They have to be "external" in the C sense, +but are not part of the PCRE public API. The data for these tables is in the +pcre_tables.c module. */ + +extern const int    _pcre_utf8_table1[]; +extern const int    _pcre_utf8_table2[]; +extern const int    _pcre_utf8_table3[]; +extern const uschar _pcre_utf8_table4[]; + +extern const int    _pcre_utf8_table1_size; + +extern const char   _pcre_utt_names[]; +extern const ucp_type_table _pcre_utt[]; +extern const int _pcre_utt_size; + +extern const uschar _pcre_default_tables[]; + +extern const uschar _pcre_OP_lengths[]; + + +/* Internal shared functions. These are functions that are used by more than +one of the exported public functions. They have to be "external" in the C +sense, but are not part of the PCRE public API. */ + +extern BOOL         _pcre_is_newline(const uschar *, int, const uschar *, +                      int *, BOOL); +extern int          _pcre_ord2utf8(int, uschar *); +extern real_pcre   *_pcre_try_flipped(const real_pcre *, real_pcre *, +                      const pcre_study_data *, pcre_study_data *); +extern int          _pcre_ucp_findprop(const unsigned int, int *, int *); +extern unsigned int _pcre_ucp_othercase(const unsigned int); +extern int          _pcre_valid_utf8(const uschar *, int); +extern BOOL         _pcre_was_newline(const uschar *, int, const uschar *, +                      int *, BOOL); +extern BOOL         _pcre_xclass(int, const uschar *); + +#endif + +/* End of pcre_internal.h */ | 
