summaryrefslogtreecommitdiff
path: root/src/pkg/regexp/syntax
diff options
context:
space:
mode:
authorMichael Stapelberg <stapelberg@debian.org>2013-03-04 21:27:36 +0100
committerMichael Stapelberg <michael@stapelberg.de>2013-03-04 21:27:36 +0100
commit04b08da9af0c450d645ab7389d1467308cfc2db8 (patch)
treedb247935fa4f2f94408edc3acd5d0d4f997aa0d8 /src/pkg/regexp/syntax
parent917c5fb8ec48e22459d77e3849e6d388f93d3260 (diff)
downloadgolang-04b08da9af0c450d645ab7389d1467308cfc2db8.tar.gz
Imported Upstream version 1.1~hg20130304upstream/1.1_hg20130304
Diffstat (limited to 'src/pkg/regexp/syntax')
-rw-r--r--src/pkg/regexp/syntax/compile.go4
-rw-r--r--src/pkg/regexp/syntax/doc.go127
-rw-r--r--src/pkg/regexp/syntax/parse.go14
3 files changed, 133 insertions, 12 deletions
diff --git a/src/pkg/regexp/syntax/compile.go b/src/pkg/regexp/syntax/compile.go
index 41955bfc2..95f6f1569 100644
--- a/src/pkg/regexp/syntax/compile.go
+++ b/src/pkg/regexp/syntax/compile.go
@@ -10,10 +10,10 @@ import "unicode"
// Because the pointers haven't been filled in yet, we can reuse their storage
// to hold the list. It's kind of sleazy, but works well in practice.
// See http://swtch.com/~rsc/regexp/regexp1.html for inspiration.
-//
+//
// These aren't really pointers: they're integers, so we can reinterpret them
// this way without using package unsafe. A value l denotes
-// p.inst[l>>1].Out (l&1==0) or .Arg (l&1==1).
+// p.inst[l>>1].Out (l&1==0) or .Arg (l&1==1).
// l == 0 denotes the empty list, okay because we start every program
// with a fail instruction, so we'll never want to point at its output link.
type patchList uint32
diff --git a/src/pkg/regexp/syntax/doc.go b/src/pkg/regexp/syntax/doc.go
new file mode 100644
index 000000000..843a6f6a4
--- /dev/null
+++ b/src/pkg/regexp/syntax/doc.go
@@ -0,0 +1,127 @@
+// Copyright 2012 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// DO NOT EDIT. This file is generated by mksyntaxgo from the RE2 distribution.
+
+/*
+Package syntax parses regular expressions into parse trees and compiles
+parse trees into programs. Most clients of regular expressions will use the
+facilities of package regexp (such as Compile and Match) instead of this package.
+
+Syntax
+
+The regular expression syntax understood by this package when parsing with the Perl flag is as follows.
+Parts of the syntax can be disabled by passing alternate flags to Parse.
+
+
+Single characters:
+ . any character, possibly including newline (flag s=true)
+ [xyz] character class
+ [^xyz] negated character class
+ \d Perl character class
+ \D negated Perl character class
+ [:alpha:] ASCII character class
+ [:^alpha:] negated ASCII character class
+ \pN Unicode character class (one-letter name)
+ \p{Greek} Unicode character class
+ \PN negated Unicode character class (one-letter name)
+ \P{Greek} negated Unicode character class
+
+Composites:
+ xy x followed by y
+ x|y x or y (prefer x)
+
+Repetitions:
+ x* zero or more x, prefer more
+ x+ one or more x, prefer more
+ x? zero or one x, prefer one
+ x{n,m} n or n+1 or ... or m x, prefer more
+ x{n,} n or more x, prefer more
+ x{n} exactly n x
+ x*? zero or more x, prefer fewer
+ x+? one or more x, prefer fewer
+ x?? zero or one x, prefer zero
+ x{n,m}? n or n+1 or ... or m x, prefer fewer
+ x{n,}? n or more x, prefer fewer
+ x{n}? exactly n x
+
+Grouping:
+ (re) numbered capturing group
+ (?P<name>re) named & numbered capturing group
+ (?:re) non-capturing group
+ (?flags) set flags within current group; non-capturing
+ (?flags:re) set flags during re; non-capturing
+
+ Flag syntax is xyz (set) or -xyz (clear) or xy-z (set xy, clear z). The flags are:
+
+ i case-insensitive (default false)
+ m multi-line mode: ^ and $ match begin/end line in addition to begin/end text (default false)
+ s let . match \n (default false)
+ U ungreedy: swap meaning of x* and x*?, x+ and x+?, etc (default false)
+
+Empty strings:
+ ^ at beginning of text or line (flag m=true)
+ $ at end of text (like \z not \Z) or line (flag m=true)
+ \A at beginning of text
+ \b at word boundary (\w on one side and \W, \A, or \z on the other)
+ \B not a word boundary
+ \z at end of text
+
+Escape sequences:
+ \a bell (== \007)
+ \f form feed (== \014)
+ \t horizontal tab (== \011)
+ \n newline (== \012)
+ \r carriage return (== \015)
+ \v vertical tab character (== \013)
+ \* literal *, for any punctuation character *
+ \123 octal character code (up to three digits)
+ \x7F hex character code (exactly two digits)
+ \x{10FFFF} hex character code
+ \Q...\E literal text ... even if ... has punctuation
+
+Character class elements:
+ x single character
+ A-Z character range (inclusive)
+ \d Perl character class
+ [:foo:] ASCII character class foo
+ \p{Foo} Unicode character class Foo
+ \pF Unicode character class F (one-letter name)
+
+Named character classes as character class elements:
+ [\d] digits (== \d)
+ [^\d] not digits (== \D)
+ [\D] not digits (== \D)
+ [^\D] not not digits (== \d)
+ [[:name:]] named ASCII class inside character class (== [:name:])
+ [^[:name:]] named ASCII class inside negated character class (== [:^name:])
+ [\p{Name}] named Unicode property inside character class (== \p{Name})
+ [^\p{Name}] named Unicode property inside negated character class (== \P{Name})
+
+Perl character classes:
+ \d digits (== [0-9])
+ \D not digits (== [^0-9])
+ \s whitespace (== [\t\n\f\r ])
+ \S not whitespace (== [^\t\n\f\r ])
+ \w word characters (== [0-9A-Za-z_])
+ \W not word characters (== [^0-9A-Za-z_])
+
+ASCII character classes:
+ [:alnum:] alphanumeric (== [0-9A-Za-z])
+ [:alpha:] alphabetic (== [A-Za-z])
+ [:ascii:] ASCII (== [\x00-\x7F])
+ [:blank:] blank (== [\t ])
+ [:cntrl:] control (== [\x00-\x1F\x7F])
+ [:digit:] digits (== [0-9])
+ [:graph:] graphical (== [!-~] == [A-Za-z0-9!"#$%&'()*+,\-./:;<=>?@[\\\]^_`{|}~])
+ [:lower:] lower case (== [a-z])
+ [:print:] printable (== [ -~] == [ [:graph:]])
+ [:punct:] punctuation (== [!-/:-@[-`{-~])
+ [:space:] whitespace (== [\t\n\v\f\r ])
+ [:upper:] upper case (== [A-Z])
+ [:word:] word characters (== [0-9A-Za-z_])
+ [:xdigit:] hex digit (== [0-9A-Fa-f])
+
+*/
+package syntax
diff --git a/src/pkg/regexp/syntax/parse.go b/src/pkg/regexp/syntax/parse.go
index 4924e9453..30e0e8b7f 100644
--- a/src/pkg/regexp/syntax/parse.go
+++ b/src/pkg/regexp/syntax/parse.go
@@ -2,10 +2,6 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
-// Package syntax parses regular expressions into parse trees and compiles
-// parse trees into programs. Most clients of regular expressions will use
-// the facilities of package regexp (such as Compile and Match) instead of
-// this package.
package syntax
import (
@@ -46,11 +42,9 @@ const (
ErrMissingParen ErrorCode = "missing closing )"
ErrMissingRepeatArgument ErrorCode = "missing argument to repetition operator"
ErrTrailingBackslash ErrorCode = "trailing backslash at end of expression"
+ ErrUnexpectedParen ErrorCode = "unexpected )"
)
-// TODO: Export for Go 1.1.
-const errUnexpectedParen ErrorCode = "unexpected )"
-
func (e ErrorCode) String() string {
return string(e)
}
@@ -470,7 +464,7 @@ func (p *parser) factor(sub []*Regexp, flags Flags) []*Regexp {
// Construct factored form: prefix(suffix1|suffix2|...)
prefix := first
for j := start; j < i; j++ {
- reuse := j != start // prefix came from sub[start]
+ reuse := j != start // prefix came from sub[start]
sub[j] = p.removeLeadingRegexp(sub[j], reuse)
}
suffix := p.collapse(sub[start:i], OpAlternate) // recurse
@@ -1171,13 +1165,13 @@ func (p *parser) parseRightParen() error {
n := len(p.stack)
if n < 2 {
- return &Error{errUnexpectedParen, p.wholeRegexp}
+ return &Error{ErrUnexpectedParen, p.wholeRegexp}
}
re1 := p.stack[n-1]
re2 := p.stack[n-2]
p.stack = p.stack[:n-2]
if re2.Op != opLeftParen {
- return &Error{errUnexpectedParen, p.wholeRegexp}
+ return &Error{ErrUnexpectedParen, p.wholeRegexp}
}
// Restore flags at time of paren.
p.flags = re2.Flags