diff options
author | Michael Stapelberg <stapelberg@debian.org> | 2013-03-04 21:27:36 +0100 |
---|---|---|
committer | Michael Stapelberg <michael@stapelberg.de> | 2013-03-04 21:27:36 +0100 |
commit | 04b08da9af0c450d645ab7389d1467308cfc2db8 (patch) | |
tree | db247935fa4f2f94408edc3acd5d0d4f997aa0d8 /src/pkg/regexp/syntax | |
parent | 917c5fb8ec48e22459d77e3849e6d388f93d3260 (diff) | |
download | golang-upstream/1.1_hg20130304.tar.gz |
Imported Upstream version 1.1~hg20130304upstream/1.1_hg20130304
Diffstat (limited to 'src/pkg/regexp/syntax')
-rw-r--r-- | src/pkg/regexp/syntax/compile.go | 4 | ||||
-rw-r--r-- | src/pkg/regexp/syntax/doc.go | 127 | ||||
-rw-r--r-- | src/pkg/regexp/syntax/parse.go | 14 |
3 files changed, 133 insertions, 12 deletions
diff --git a/src/pkg/regexp/syntax/compile.go b/src/pkg/regexp/syntax/compile.go index 41955bfc2..95f6f1569 100644 --- a/src/pkg/regexp/syntax/compile.go +++ b/src/pkg/regexp/syntax/compile.go @@ -10,10 +10,10 @@ import "unicode" // Because the pointers haven't been filled in yet, we can reuse their storage // to hold the list. It's kind of sleazy, but works well in practice. // See http://swtch.com/~rsc/regexp/regexp1.html for inspiration. -// +// // These aren't really pointers: they're integers, so we can reinterpret them // this way without using package unsafe. A value l denotes -// p.inst[l>>1].Out (l&1==0) or .Arg (l&1==1). +// p.inst[l>>1].Out (l&1==0) or .Arg (l&1==1). // l == 0 denotes the empty list, okay because we start every program // with a fail instruction, so we'll never want to point at its output link. type patchList uint32 diff --git a/src/pkg/regexp/syntax/doc.go b/src/pkg/regexp/syntax/doc.go new file mode 100644 index 000000000..843a6f6a4 --- /dev/null +++ b/src/pkg/regexp/syntax/doc.go @@ -0,0 +1,127 @@ +// Copyright 2012 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// DO NOT EDIT. This file is generated by mksyntaxgo from the RE2 distribution. + +/* +Package syntax parses regular expressions into parse trees and compiles +parse trees into programs. Most clients of regular expressions will use the +facilities of package regexp (such as Compile and Match) instead of this package. + +Syntax + +The regular expression syntax understood by this package when parsing with the Perl flag is as follows. +Parts of the syntax can be disabled by passing alternate flags to Parse. + + +Single characters: + . any character, possibly including newline (flag s=true) + [xyz] character class + [^xyz] negated character class + \d Perl character class + \D negated Perl character class + [:alpha:] ASCII character class + [:^alpha:] negated ASCII character class + \pN Unicode character class (one-letter name) + \p{Greek} Unicode character class + \PN negated Unicode character class (one-letter name) + \P{Greek} negated Unicode character class + +Composites: + xy x followed by y + x|y x or y (prefer x) + +Repetitions: + x* zero or more x, prefer more + x+ one or more x, prefer more + x? zero or one x, prefer one + x{n,m} n or n+1 or ... or m x, prefer more + x{n,} n or more x, prefer more + x{n} exactly n x + x*? zero or more x, prefer fewer + x+? one or more x, prefer fewer + x?? zero or one x, prefer zero + x{n,m}? n or n+1 or ... or m x, prefer fewer + x{n,}? n or more x, prefer fewer + x{n}? exactly n x + +Grouping: + (re) numbered capturing group + (?P<name>re) named & numbered capturing group + (?:re) non-capturing group + (?flags) set flags within current group; non-capturing + (?flags:re) set flags during re; non-capturing + + Flag syntax is xyz (set) or -xyz (clear) or xy-z (set xy, clear z). The flags are: + + i case-insensitive (default false) + m multi-line mode: ^ and $ match begin/end line in addition to begin/end text (default false) + s let . match \n (default false) + U ungreedy: swap meaning of x* and x*?, x+ and x+?, etc (default false) + +Empty strings: + ^ at beginning of text or line (flag m=true) + $ at end of text (like \z not \Z) or line (flag m=true) + \A at beginning of text + \b at word boundary (\w on one side and \W, \A, or \z on the other) + \B not a word boundary + \z at end of text + +Escape sequences: + \a bell (== \007) + \f form feed (== \014) + \t horizontal tab (== \011) + \n newline (== \012) + \r carriage return (== \015) + \v vertical tab character (== \013) + \* literal *, for any punctuation character * + \123 octal character code (up to three digits) + \x7F hex character code (exactly two digits) + \x{10FFFF} hex character code + \Q...\E literal text ... even if ... has punctuation + +Character class elements: + x single character + A-Z character range (inclusive) + \d Perl character class + [:foo:] ASCII character class foo + \p{Foo} Unicode character class Foo + \pF Unicode character class F (one-letter name) + +Named character classes as character class elements: + [\d] digits (== \d) + [^\d] not digits (== \D) + [\D] not digits (== \D) + [^\D] not not digits (== \d) + [[:name:]] named ASCII class inside character class (== [:name:]) + [^[:name:]] named ASCII class inside negated character class (== [:^name:]) + [\p{Name}] named Unicode property inside character class (== \p{Name}) + [^\p{Name}] named Unicode property inside negated character class (== \P{Name}) + +Perl character classes: + \d digits (== [0-9]) + \D not digits (== [^0-9]) + \s whitespace (== [\t\n\f\r ]) + \S not whitespace (== [^\t\n\f\r ]) + \w word characters (== [0-9A-Za-z_]) + \W not word characters (== [^0-9A-Za-z_]) + +ASCII character classes: + [:alnum:] alphanumeric (== [0-9A-Za-z]) + [:alpha:] alphabetic (== [A-Za-z]) + [:ascii:] ASCII (== [\x00-\x7F]) + [:blank:] blank (== [\t ]) + [:cntrl:] control (== [\x00-\x1F\x7F]) + [:digit:] digits (== [0-9]) + [:graph:] graphical (== [!-~] == [A-Za-z0-9!"#$%&'()*+,\-./:;<=>?@[\\\]^_`{|}~]) + [:lower:] lower case (== [a-z]) + [:print:] printable (== [ -~] == [ [:graph:]]) + [:punct:] punctuation (== [!-/:-@[-`{-~]) + [:space:] whitespace (== [\t\n\v\f\r ]) + [:upper:] upper case (== [A-Z]) + [:word:] word characters (== [0-9A-Za-z_]) + [:xdigit:] hex digit (== [0-9A-Fa-f]) + +*/ +package syntax diff --git a/src/pkg/regexp/syntax/parse.go b/src/pkg/regexp/syntax/parse.go index 4924e9453..30e0e8b7f 100644 --- a/src/pkg/regexp/syntax/parse.go +++ b/src/pkg/regexp/syntax/parse.go @@ -2,10 +2,6 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -// Package syntax parses regular expressions into parse trees and compiles -// parse trees into programs. Most clients of regular expressions will use -// the facilities of package regexp (such as Compile and Match) instead of -// this package. package syntax import ( @@ -46,11 +42,9 @@ const ( ErrMissingParen ErrorCode = "missing closing )" ErrMissingRepeatArgument ErrorCode = "missing argument to repetition operator" ErrTrailingBackslash ErrorCode = "trailing backslash at end of expression" + ErrUnexpectedParen ErrorCode = "unexpected )" ) -// TODO: Export for Go 1.1. -const errUnexpectedParen ErrorCode = "unexpected )" - func (e ErrorCode) String() string { return string(e) } @@ -470,7 +464,7 @@ func (p *parser) factor(sub []*Regexp, flags Flags) []*Regexp { // Construct factored form: prefix(suffix1|suffix2|...) prefix := first for j := start; j < i; j++ { - reuse := j != start // prefix came from sub[start] + reuse := j != start // prefix came from sub[start] sub[j] = p.removeLeadingRegexp(sub[j], reuse) } suffix := p.collapse(sub[start:i], OpAlternate) // recurse @@ -1171,13 +1165,13 @@ func (p *parser) parseRightParen() error { n := len(p.stack) if n < 2 { - return &Error{errUnexpectedParen, p.wholeRegexp} + return &Error{ErrUnexpectedParen, p.wholeRegexp} } re1 := p.stack[n-1] re2 := p.stack[n-2] p.stack = p.stack[:n-2] if re2.Op != opLeftParen { - return &Error{errUnexpectedParen, p.wholeRegexp} + return &Error{ErrUnexpectedParen, p.wholeRegexp} } // Restore flags at time of paren. p.flags = re2.Flags |