Imported Upstream version 1.1~hg20130304upstream/1.1_hg20130304

author: Michael Stapelberg <stapelberg@debian.org> 2013-03-04 21:27:36 +0100
committer: Michael Stapelberg <michael@stapelberg.de> 2013-03-04 21:27:36 +0100
commit: 04b08da9af0c450d645ab7389d1467308cfc2db8 (patch)
tree: db247935fa4f2f94408edc3acd5d0d4f997aa0d8 /src/pkg/regexp/syntax
parent: 917c5fb8ec48e22459d77e3849e6d388f93d3260 (diff)
download: golang-upstream/1.1_hg20130304.tar.gz
3 files changed, 133 insertions, 12 deletions
diff --git a/src/pkg/regexp/syntax/compile.go b/src/pkg/regexp/syntax/compile.go
index 41955bfc2..95f6f1569 100644
--- a/src/pkg/regexp/syntax/compile.go
+++ b/src/pkg/regexp/syntax/compile.go
@@ -10,10 +10,10 @@ import "unicode"
 // Because the pointers haven't been filled in yet, we can reuse their storage
 // to hold the list.  It's kind of sleazy, but works well in practice.
 // See http://swtch.com/~rsc/regexp/regexp1.html for inspiration.
-// 
+//
 // These aren't really pointers: they're integers, so we can reinterpret them
 // this way without using package unsafe.  A value l denotes
-// p.inst[l>>1].Out (l&1==0) or .Arg (l&1==1). 
+// p.inst[l>>1].Out (l&1==0) or .Arg (l&1==1).
 // l == 0 denotes the empty list, okay because we start every program
 // with a fail instruction, so we'll never want to point at its output link.
 type patchList uint32
diff --git a/src/pkg/regexp/syntax/doc.go b/src/pkg/regexp/syntax/doc.go
new file mode 100644
index 000000000..843a6f6a4
--- /dev/null
+++ b/src/pkg/regexp/syntax/doc.go
@@ -0,0 +1,127 @@
+// Copyright 2012 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// DO NOT EDIT. This file is generated by mksyntaxgo from the RE2 distribution.
+
+/*
+Package syntax parses regular expressions into parse trees and compiles
+parse trees into programs. Most clients of regular expressions will use the
+facilities of package regexp (such as Compile and Match) instead of this package.
+
+Syntax
+
+The regular expression syntax understood by this package when parsing with the Perl flag is as follows.
+Parts of the syntax can be disabled by passing alternate flags to Parse.
+
+
+Single characters:
+  .              any character, possibly including newline (flag s=true)
+  [xyz]          character class
+  [^xyz]         negated character class
+  \d             Perl character class
+  \D             negated Perl character class
+  [:alpha:]      ASCII character class
+  [:^alpha:]     negated ASCII character class
+  \pN            Unicode character class (one-letter name)
+  \p{Greek}      Unicode character class
+  \PN            negated Unicode character class (one-letter name)
+  \P{Greek}      negated Unicode character class
+
+Composites:
+  xy             x followed by y
+  x|y            x or y (prefer x)
+
+Repetitions:
+  x*             zero or more x, prefer more
+  x+             one or more x, prefer more
+  x?             zero or one x, prefer one
+  x{n,m}         n or n+1 or ... or m x, prefer more
+  x{n,}          n or more x, prefer more
+  x{n}           exactly n x
+  x*?            zero or more x, prefer fewer
+  x+?            one or more x, prefer fewer
+  x??            zero or one x, prefer zero
+  x{n,m}?        n or n+1 or ... or m x, prefer fewer
+  x{n,}?         n or more x, prefer fewer
+  x{n}?          exactly n x
+
+Grouping:
+  (re)           numbered capturing group
+  (?P<name>re)   named & numbered capturing group
+  (?:re)         non-capturing group
+  (?flags)       set flags within current group; non-capturing
+  (?flags:re)    set flags during re; non-capturing
+
+  Flag syntax is xyz (set) or -xyz (clear) or xy-z (set xy, clear z). The flags are:
+
+  i              case-insensitive (default false)
+  m              multi-line mode: ^ and $ match begin/end line in addition to begin/end text (default false)
+  s              let . match \n (default false)
+  U              ungreedy: swap meaning of x* and x*?, x+ and x+?, etc (default false)
+
+Empty strings:
+  ^              at beginning of text or line (flag m=true)
+  $              at end of text (like \z not \Z) or line (flag m=true)
+  \A             at beginning of text
+  \b             at word boundary (\w on one side and \W, \A, or \z on the other)
+  \B             not a word boundary
+  \z             at end of text
+
+Escape sequences:
+  \a             bell (== \007)
+  \f             form feed (== \014)
+  \t             horizontal tab (== \011)
+  \n             newline (== \012)
+  \r             carriage return (== \015)
+  \v             vertical tab character (== \013)
+  \*             literal *, for any punctuation character *
+  \123           octal character code (up to three digits)
+  \x7F           hex character code (exactly two digits)
+  \x{10FFFF}     hex character code
+  \Q...\E        literal text ... even if ... has punctuation
+
+Character class elements:
+  x              single character
+  A-Z            character range (inclusive)
+  \d             Perl character class
+  [:foo:]        ASCII character class foo
+  \p{Foo}        Unicode character class Foo
+  \pF            Unicode character class F (one-letter name)
+
+Named character classes as character class elements:
+  [\d]           digits (== \d)
+  [^\d]          not digits (== \D)
+  [\D]           not digits (== \D)
+  [^\D]          not not digits (== \d)
+  [[:name:]]     named ASCII class inside character class (== [:name:])
+  [^[:name:]]    named ASCII class inside negated character class (== [:^name:])
+  [\p{Name}]     named Unicode property inside character class (== \p{Name})
+  [^\p{Name}]    named Unicode property inside negated character class (== \P{Name})
+
+Perl character classes:
+  \d             digits (== [0-9])
+  \D             not digits (== [^0-9])
+  \s             whitespace (== [\t\n\f\r ])
+  \S             not whitespace (== [^\t\n\f\r ])
+  \w             word characters (== [0-9A-Za-z_])
+  \W             not word characters (== [^0-9A-Za-z_])
+
+ASCII character classes:
+  [:alnum:]      alphanumeric (== [0-9A-Za-z])
+  [:alpha:]      alphabetic (== [A-Za-z])
+  [:ascii:]      ASCII (== [\x00-\x7F])
+  [:blank:]      blank (== [\t ])
+  [:cntrl:]      control (== [\x00-\x1F\x7F])
+  [:digit:]      digits (== [0-9])
+  [:graph:]      graphical (== [!-~] == [A-Za-z0-9!"#$%&'()*+,\-./:;<=>?@[\\\]^_`{|}~])
+  [:lower:]      lower case (== [a-z])
+  [:print:]      printable (== [ -~] == [ [:graph:]])
+  [:punct:]      punctuation (== [!-/:-@[-`{-~])
+  [:space:]      whitespace (== [\t\n\v\f\r ])
+  [:upper:]      upper case (== [A-Z])
+  [:word:]       word characters (== [0-9A-Za-z_])
+  [:xdigit:]     hex digit (== [0-9A-Fa-f])
+
+*/
+package syntax
diff --git a/src/pkg/regexp/syntax/parse.go b/src/pkg/regexp/syntax/parse.go
index 4924e9453..30e0e8b7f 100644
--- a/src/pkg/regexp/syntax/parse.go
+++ b/src/pkg/regexp/syntax/parse.go
@@ -2,10 +2,6 @@
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
 
-// Package syntax parses regular expressions into parse trees and compiles
-// parse trees into programs. Most clients of regular expressions will use
-// the facilities of package regexp (such as Compile and Match) instead of
-// this package.
 package syntax
 
 import (
@@ -46,11 +42,9 @@ const (
 	ErrMissingParen          ErrorCode = "missing closing )"
 	ErrMissingRepeatArgument ErrorCode = "missing argument to repetition operator"
 	ErrTrailingBackslash     ErrorCode = "trailing backslash at end of expression"
+	ErrUnexpectedParen       ErrorCode = "unexpected )"
 )
 
-// TODO: Export for Go 1.1.
-const errUnexpectedParen ErrorCode = "unexpected )"
-
 func (e ErrorCode) String() string {
 	return string(e)
 }
@@ -470,7 +464,7 @@ func (p *parser) factor(sub []*Regexp, flags Flags) []*Regexp {
 			// Construct factored form: prefix(suffix1|suffix2|...)
 			prefix := first
 			for j := start; j < i; j++ {
-				reuse := j != start // prefix came from sub[start] 
+				reuse := j != start // prefix came from sub[start]
 				sub[j] = p.removeLeadingRegexp(sub[j], reuse)
 			}
 			suffix := p.collapse(sub[start:i], OpAlternate) // recurse
@@ -1171,13 +1165,13 @@ func (p *parser) parseRightParen() error {
 
 	n := len(p.stack)
 	if n < 2 {
-		return &Error{errUnexpectedParen, p.wholeRegexp}
+		return &Error{ErrUnexpectedParen, p.wholeRegexp}
 	}
 	re1 := p.stack[n-1]
 	re2 := p.stack[n-2]
 	p.stack = p.stack[:n-2]
 	if re2.Op != opLeftParen {
-		return &Error{errUnexpectedParen, p.wholeRegexp}
+		return &Error{ErrUnexpectedParen, p.wholeRegexp}
 	}
 	// Restore flags at time of paren.
 	p.flags = re2.Flags
author	Michael Stapelberg <stapelberg@debian.org>	2013-03-04 21:27:36 +0100
committer	Michael Stapelberg <michael@stapelberg.de>	2013-03-04 21:27:36 +0100
commit	04b08da9af0c450d645ab7389d1467308cfc2db8 (patch)
tree	db247935fa4f2f94408edc3acd5d0d4f997aa0d8 /src/pkg/regexp/syntax
parent	917c5fb8ec48e22459d77e3849e6d388f93d3260 (diff)
download	golang-upstream/1.1_hg20130304.tar.gz