diff options
Diffstat (limited to 'ext/pcre/pcrelib/pcregrep.c')
-rw-r--r-- | ext/pcre/pcrelib/pcregrep.c | 163 |
1 files changed, 126 insertions, 37 deletions
diff --git a/ext/pcre/pcrelib/pcregrep.c b/ext/pcre/pcrelib/pcregrep.c index e5ceec3ac..f14c973cb 100644 --- a/ext/pcre/pcrelib/pcregrep.c +++ b/ext/pcre/pcrelib/pcregrep.c @@ -6,7 +6,7 @@ its pattern matching. On a Unix or Win32 system it can recurse into directories. - Copyright (c) 1997-2006 University of Cambridge + Copyright (c) 1997-2007 University of Cambridge ----------------------------------------------------------------------------- Redistribution and use in source and binary forms, with or without @@ -37,6 +37,10 @@ POSSIBILITY OF SUCH DAMAGE. ----------------------------------------------------------------------------- */ +#ifdef HAVE_CONFIG_H +# include <config.h> +#endif + #include <ctype.h> #include <locale.h> #include <stdio.h> @@ -46,17 +50,17 @@ POSSIBILITY OF SUCH DAMAGE. #include <sys/types.h> #include <sys/stat.h> -#include <unistd.h> +#ifdef HAVE_UNISTD_H +# include <unistd.h> +#endif -#include "config.h" -#include "pcre.h" +#include <pcre.h> #define FALSE 0 #define TRUE 1 typedef int BOOL; -#define VERSION "4.4 29-Nov-2006" #define MAX_PATTERN_COUNT 100 #if BUFSIZ > 8192 @@ -84,7 +88,7 @@ enum { DEE_READ, DEE_SKIP }; /* Line ending types */ -enum { EL_LF, EL_CR, EL_CRLF, EL_ANY }; +enum { EL_LF, EL_CR, EL_CRLF, EL_ANY, EL_ANYCRLF }; @@ -117,8 +121,8 @@ static char *locale = NULL; static const unsigned char *pcretables = NULL; static int pattern_count = 0; -static pcre **pattern_list; -static pcre_extra **hints_list; +static pcre **pattern_list = NULL; +static pcre_extra **hints_list = NULL; static char *include_pattern = NULL; static char *exclude_pattern = NULL; @@ -192,7 +196,7 @@ static option_item optionlist[] = { { OP_STRING, N_LABEL, &stdin_name, "label=name", "set name for standard input" }, { OP_STRING, N_LOCALE, &locale, "locale=locale", "use the named locale" }, { OP_NODATA, 'M', NULL, "multiline", "run in multiline mode" }, - { OP_STRING, 'N', &newline, "newline=type", "specify newline type (CR, LR, CRLF)" }, + { OP_STRING, 'N', &newline, "newline=type", "specify newline type (CR, LF, CRLF, ANYCRLF or ANY)" }, { OP_NODATA, 'n', NULL, "line-number", "print line number with output lines" }, { OP_NODATA, 'o', NULL, "only-matching", "show only the part of the line that matched" }, { OP_NODATA, 'q', NULL, "quiet", "suppress output, just set return code" }, @@ -222,7 +226,7 @@ static const char *prefix[] = { static const char *suffix[] = { "", "\\b", ")$", ")$", "\\E", "\\E\\b", "\\E)$", "\\E)$" }; -/* UTF-8 tables - used only when the newline setting is "all". */ +/* UTF-8 tables - used only when the newline setting is "any". */ const int utf8_table3[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01}; @@ -244,7 +248,7 @@ although at present the only ones are for Unix, Win32, and for "no support". */ /************* Directory scanning in Unix ***********/ -#if IS_UNIX +#if defined HAVE_SYS_STAT_H && defined HAVE_DIRENT_H && defined HAVE_SYS_TYPES_H #include <sys/types.h> #include <sys/stat.h> #include <dirent.h> @@ -276,7 +280,7 @@ for (;;) if (strcmp(dent->d_name, ".") != 0 && strcmp(dent->d_name, "..") != 0) return dent->d_name; } -return NULL; /* Keep compiler happy; never executed */ +/* Control never reaches here */ } static void @@ -314,7 +318,7 @@ Lionel Fourquaux. David Burgess added a patch to define INVALID_FILE_ATTRIBUTES when it did not exist. */ -#elif HAVE_WIN32API +#elif HAVE_WINDOWS_H #ifndef STRICT # define STRICT @@ -436,8 +440,8 @@ FALSE; typedef void directory_type; int isdirectory(char *filename) { return 0; } -directory_type * opendirectory(char *filename) {} -char *readdirectory(directory_type *dir) {} +directory_type * opendirectory(char *filename) { return (directory_type*)0;} +char *readdirectory(directory_type *dir) { return (char*)0;} void closedirectory(directory_type *dir) {} @@ -461,7 +465,7 @@ return FALSE; -#if ! HAVE_STRERROR +#ifndef HAVE_STRERROR /************************************************* * Provide strerror() for non-ANSI libraries * *************************************************/ @@ -541,6 +545,50 @@ switch(endlinetype) } break; + case EL_ANYCRLF: + while (p < endptr) + { + int extra = 0; + register int c = *((unsigned char *)p); + + if (utf8 && c >= 0xc0) + { + int gcii, gcss; + extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */ + gcss = 6*extra; + c = (c & utf8_table3[extra]) << gcss; + for (gcii = 1; gcii <= extra; gcii++) + { + gcss -= 6; + c |= (p[gcii] & 0x3f) << gcss; + } + } + + p += 1 + extra; + + switch (c) + { + case 0x0a: /* LF */ + *lenptr = 1; + return p; + + case 0x0d: /* CR */ + if (p < endptr && *p == 0x0a) + { + *lenptr = 2; + p++; + } + else *lenptr = 1; + return p; + + default: + break; + } + } /* End of loop for ANYCRLF case */ + + *lenptr = 0; /* Must have hit the end */ + return endptr; + case EL_ANY: while (p < endptr) { @@ -639,6 +687,7 @@ switch(endlinetype) return p; /* But control should never get here */ case EL_ANY: + case EL_ANYCRLF: if (*(--p) == '\n' && p > startptr && p[-1] == '\r') p--; if (utf8) while ((*p & 0xc0) == 0x80) p--; @@ -667,7 +716,17 @@ switch(endlinetype) } else c = *((unsigned char *)pp); - switch (c) + if (endlinetype == EL_ANYCRLF) switch (c) + { + case 0x0a: /* LF */ + case 0x0d: /* CR */ + return p; + + default: + break; + } + + else switch (c) { case 0x0a: /* LF */ case 0x0b: /* VT */ @@ -1188,7 +1247,8 @@ if ((sep = isdirectory(pathname)) != 0) while ((nextfile = readdirectory(dir)) != NULL) { int frc, blen; - blen = slprintf(buffer, sizeof(buffer), "%.512s%c%.128s", pathname, sep, nextfile); + sprintf(buffer, "%.512s%c%.128s", pathname, sep, nextfile); + blen = strlen(buffer); if (exclude_compiled != NULL && pcre_exec(exclude_compiled, NULL, buffer, blen, 0, 0, NULL, 0) >= 0) @@ -1280,7 +1340,7 @@ for (op = optionlist; op->one_char != 0; op++) { int n; char s[4]; - if (op->one_char > 0) snprintf(s, sizeof(s), "-%c,", op->one_char); else strcpy(s, " "); + if (op->one_char > 0) sprintf(s, "-%c,", op->one_char); else strcpy(s, " "); printf(" %s --%s%n", s, op->long_name, &n); n = 30 - n; if (n < 1) n = 1; @@ -1327,8 +1387,7 @@ switch(letter) case 'x': process_options |= PO_LINE_MATCH; break; case 'V': - fprintf(stderr, "pcregrep version %s using ", VERSION); - fprintf(stderr, "PCRE version %s\n", pcre_version()); + fprintf(stderr, "pcregrep version %s\n", pcre_version()); exit(0); break; @@ -1354,7 +1413,7 @@ ordin(int n) { static char buffer[8]; char *p = buffer; -snprintf(p, sizeof(buffer), "%d", n); +sprintf(p, "%d", n); while (*p != 0) p++; switch (n%10) { @@ -1400,11 +1459,15 @@ if (pattern_count >= MAX_PATTERN_COUNT) return FALSE; } -snprintf(buffer, sizeof(buffer), "%s%.*s%s", prefix[process_options], MBUFTHIRD, pattern, +sprintf(buffer, "%s%.*s%s", prefix[process_options], MBUFTHIRD, pattern, suffix[process_options]); pattern_list[pattern_count] = pcre_compile(buffer, options, &error, &errptr, pcretables); -if (pattern_list[pattern_count++] != NULL) return TRUE; +if (pattern_list[pattern_count] != NULL) + { + pattern_count++; + return TRUE; + } /* Handle compile errors */ @@ -1462,7 +1525,7 @@ if ((process_options & PO_FIXED_STRINGS) != 0) char *p = end_of_line(pattern, eop, &ellength); if (ellength == 0) return compile_single_pattern(pattern, options, filename, count); - snprintf(buffer, sizeof(buffer), "%.*s", p - pattern - ellength, pattern); + sprintf(buffer, "%.*s", (int)(p - pattern - ellength), pattern); pattern = p; if (!compile_single_pattern(buffer, options, filename, count)) return FALSE; @@ -1486,6 +1549,7 @@ int i, j; int rc = 1; int pcre_options = 0; int cmd_pattern_count = 0; +int hint_count = 0; int errptr; BOOL only_one_at_top; char *patterns[MAX_PATTERN_COUNT]; @@ -1503,6 +1567,7 @@ switch(i) case '\r': newline = (char *)"cr"; break; case ('\r' << 8) | '\n': newline = (char *)"crlf"; break; case -1: newline = (char *)"any"; break; + case -2: newline = (char *)"anycrlf"; break; } /* Process the options */ @@ -1578,9 +1643,9 @@ for (i = 1; i < argc; i++) char buff1[24]; char buff2[24]; int baselen = opbra - op->long_name; - snprintf(buff1, sizeof(buff1), "%.*s", baselen, op->long_name); - snprintf(buff2, sizeof(buff2), "%s%.*s", buff1, strlen(op->long_name) - baselen - 2, - opbra + 1); + sprintf(buff1, "%.*s", baselen, op->long_name); + sprintf(buff2, "%s%.*s", buff1, + (int)strlen(op->long_name) - baselen - 2, opbra + 1); if (strcmp(arg, buff1) == 0 || strcmp(arg, buff2) == 0) break; } @@ -1810,6 +1875,11 @@ else if (strcmp(newline, "any") == 0 || strcmp(newline, "ANY") == 0) pcre_options |= PCRE_NEWLINE_ANY; endlinetype = EL_ANY; } +else if (strcmp(newline, "anycrlf") == 0 || strcmp(newline, "ANYCRLF") == 0) + { + pcre_options |= PCRE_NEWLINE_ANYCRLF; + endlinetype = EL_ANYCRLF; + } else { fprintf(stderr, "pcregrep: Invalid newline specifier \"%s\"\n", newline); @@ -1864,7 +1934,7 @@ hints_list = (pcre_extra **)malloc(MAX_PATTERN_COUNT * sizeof(pcre_extra *)); if (pattern_list == NULL || hints_list == NULL) { fprintf(stderr, "pcregrep: malloc failed\n"); - return 2; + goto EXIT2; } /* If no patterns were provided by -e, and there is no file provided by -f, @@ -1883,7 +1953,7 @@ for (j = 0; j < cmd_pattern_count; j++) { if (!compile_pattern(patterns[j], pcre_options, NULL, (j == 0 && cmd_pattern_count == 1)? 0 : j + 1)) - return 2; + goto EXIT2; } /* Compile the regular expressions that are provided in a file. */ @@ -1907,7 +1977,7 @@ if (pattern_filename != NULL) { fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pattern_filename, strerror(errno)); - return 2; + goto EXIT2; } filename = pattern_filename; } @@ -1920,7 +1990,7 @@ if (pattern_filename != NULL) linenumber++; if (buffer[0] == 0) continue; /* Skip blank lines */ if (!compile_pattern(buffer, pcre_options, filename, linenumber)) - return 2; + goto EXIT2; } if (f != stdin) fclose(f); @@ -1934,10 +2004,11 @@ for (j = 0; j < pattern_count; j++) if (error != NULL) { char s[16]; - if (pattern_count == 1) s[0] = 0; else snprintf(s, sizeof(s), " number %d", j); + if (pattern_count == 1) s[0] = 0; else sprintf(s, " number %d", j); fprintf(stderr, "pcregrep: Error while studying regex%s: %s\n", s, error); - return 2; + goto EXIT2; } + hint_count++; } /* If there are include or exclude patterns, compile them. */ @@ -1950,7 +2021,7 @@ if (exclude_pattern != NULL) { fprintf(stderr, "pcregrep: Error in 'exclude' regex at offset %d: %s\n", errptr, error); - return 2; + goto EXIT2; } } @@ -1962,14 +2033,17 @@ if (include_pattern != NULL) { fprintf(stderr, "pcregrep: Error in 'include' regex at offset %d: %s\n", errptr, error); - return 2; + goto EXIT2; } } /* If there are no further arguments, do the business on stdin and exit. */ if (i >= argc) - return pcregrep(stdin, (filenames > FN_DEFAULT)? stdin_name : NULL); + { + rc = pcregrep(stdin, (filenames > FN_DEFAULT)? stdin_name : NULL); + goto EXIT; + } /* Otherwise, work through the remaining arguments as files or directories. Pass in the fact that there is only one argument at top level - this suppresses @@ -1986,7 +2060,22 @@ for (; i < argc; i++) else if (frc == 0 && rc == 1) rc = 0; } +EXIT: +if (pattern_list != NULL) + { + for (i = 0; i < pattern_count; i++) free(pattern_list[i]); + free(pattern_list); + } +if (hints_list != NULL) + { + for (i = 0; i < hint_count; i++) free(hints_list[i]); + free(hints_list); + } return rc; + +EXIT2: +rc = 2; +goto EXIT; } /* End of pcregrep */ |