diff options
| author | Ondřej Surý <ondrej@sury.org> | 2012-04-06 15:14:11 +0200 | 
|---|---|---|
| committer | Ondřej Surý <ondrej@sury.org> | 2012-04-06 15:14:11 +0200 | 
| commit | 505c19580e0f43fe5224431459cacb7c21edd93d (patch) | |
| tree | 79e2634c253d60afc0cc0b2f510dc7dcbb48497b /src/pkg/regexp/syntax/simplify_test.go | |
| parent | 1336a7c91e596c423a49d1194ea42d98bca0d958 (diff) | |
| download | golang-505c19580e0f43fe5224431459cacb7c21edd93d.tar.gz | |
Imported Upstream version 1upstream/1
Diffstat (limited to 'src/pkg/regexp/syntax/simplify_test.go')
| -rw-r--r-- | src/pkg/regexp/syntax/simplify_test.go | 151 | 
1 files changed, 151 insertions, 0 deletions
| diff --git a/src/pkg/regexp/syntax/simplify_test.go b/src/pkg/regexp/syntax/simplify_test.go new file mode 100644 index 000000000..879eff5be --- /dev/null +++ b/src/pkg/regexp/syntax/simplify_test.go @@ -0,0 +1,151 @@ +// Copyright 2011 The Go Authors.  All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package syntax + +import "testing" + +var simplifyTests = []struct { +	Regexp string +	Simple string +}{ +	// Already-simple constructs +	{`a`, `a`}, +	{`ab`, `ab`}, +	{`a|b`, `[a-b]`}, +	{`ab|cd`, `ab|cd`}, +	{`(ab)*`, `(ab)*`}, +	{`(ab)+`, `(ab)+`}, +	{`(ab)?`, `(ab)?`}, +	{`.`, `(?s:.)`}, +	{`^`, `^`}, +	{`$`, `$`}, +	{`[ac]`, `[ac]`}, +	{`[^ac]`, `[^ac]`}, + +	// Posix character classes +	{`[[:alnum:]]`, `[0-9A-Za-z]`}, +	{`[[:alpha:]]`, `[A-Za-z]`}, +	{`[[:blank:]]`, `[\t ]`}, +	{`[[:cntrl:]]`, `[\x00-\x1f\x7f]`}, +	{`[[:digit:]]`, `[0-9]`}, +	{`[[:graph:]]`, `[!-~]`}, +	{`[[:lower:]]`, `[a-z]`}, +	{`[[:print:]]`, `[ -~]`}, +	{`[[:punct:]]`, "[!-/:-@\\[-`\\{-~]"}, +	{`[[:space:]]`, `[\t-\r ]`}, +	{`[[:upper:]]`, `[A-Z]`}, +	{`[[:xdigit:]]`, `[0-9A-Fa-f]`}, + +	// Perl character classes +	{`\d`, `[0-9]`}, +	{`\s`, `[\t-\n\f-\r ]`}, +	{`\w`, `[0-9A-Z_a-z]`}, +	{`\D`, `[^0-9]`}, +	{`\S`, `[^\t-\n\f-\r ]`}, +	{`\W`, `[^0-9A-Z_a-z]`}, +	{`[\d]`, `[0-9]`}, +	{`[\s]`, `[\t-\n\f-\r ]`}, +	{`[\w]`, `[0-9A-Z_a-z]`}, +	{`[\D]`, `[^0-9]`}, +	{`[\S]`, `[^\t-\n\f-\r ]`}, +	{`[\W]`, `[^0-9A-Z_a-z]`}, + +	// Posix repetitions +	{`a{1}`, `a`}, +	{`a{2}`, `aa`}, +	{`a{5}`, `aaaaa`}, +	{`a{0,1}`, `a?`}, +	// The next three are illegible because Simplify inserts (?:) +	// parens instead of () parens to avoid creating extra +	// captured subexpressions.  The comments show a version with fewer parens. +	{`(a){0,2}`, `(?:(a)(a)?)?`},                       //       (aa?)? +	{`(a){0,4}`, `(?:(a)(?:(a)(?:(a)(a)?)?)?)?`},       //   (a(a(aa?)?)?)? +	{`(a){2,6}`, `(a)(a)(?:(a)(?:(a)(?:(a)(a)?)?)?)?`}, // aa(a(a(aa?)?)?)? +	{`a{0,2}`, `(?:aa?)?`},                             //       (aa?)? +	{`a{0,4}`, `(?:a(?:a(?:aa?)?)?)?`},                 //   (a(a(aa?)?)?)? +	{`a{2,6}`, `aa(?:a(?:a(?:aa?)?)?)?`},               // aa(a(a(aa?)?)?)? +	{`a{0,}`, `a*`}, +	{`a{1,}`, `a+`}, +	{`a{2,}`, `aa+`}, +	{`a{5,}`, `aaaaa+`}, + +	// Test that operators simplify their arguments. +	{`(?:a{1,}){1,}`, `a+`}, +	{`(a{1,}b{1,})`, `(a+b+)`}, +	{`a{1,}|b{1,}`, `a+|b+`}, +	{`(?:a{1,})*`, `(?:a+)*`}, +	{`(?:a{1,})+`, `a+`}, +	{`(?:a{1,})?`, `(?:a+)?`}, +	{``, `(?:)`}, +	{`a{0}`, `(?:)`}, + +	// Character class simplification +	{`[ab]`, `[a-b]`}, +	{`[a-za-za-z]`, `[a-z]`}, +	{`[A-Za-zA-Za-z]`, `[A-Za-z]`}, +	{`[ABCDEFGH]`, `[A-H]`}, +	{`[AB-CD-EF-GH]`, `[A-H]`}, +	{`[W-ZP-XE-R]`, `[E-Z]`}, +	{`[a-ee-gg-m]`, `[a-m]`}, +	{`[a-ea-ha-m]`, `[a-m]`}, +	{`[a-ma-ha-e]`, `[a-m]`}, +	{`[a-zA-Z0-9 -~]`, `[ -~]`}, + +	// Empty character classes +	{`[^[:cntrl:][:^cntrl:]]`, `[^\x00-\x{10FFFF}]`}, + +	// Full character classes +	{`[[:cntrl:][:^cntrl:]]`, `(?s:.)`}, + +	// Unicode case folding. +	{`(?i)A`, `(?i:A)`}, +	{`(?i)a`, `(?i:A)`}, +	{`(?i)[A]`, `(?i:A)`}, +	{`(?i)[a]`, `(?i:A)`}, +	{`(?i)K`, `(?i:K)`}, +	{`(?i)k`, `(?i:K)`}, +	{`(?i)\x{212a}`, "(?i:K)"}, +	{`(?i)[K]`, "[Kk\u212A]"}, +	{`(?i)[k]`, "[Kk\u212A]"}, +	{`(?i)[\x{212a}]`, "[Kk\u212A]"}, +	{`(?i)[a-z]`, "[A-Za-z\u017F\u212A]"}, +	{`(?i)[\x00-\x{FFFD}]`, "[\\x00-\uFFFD]"}, +	{`(?i)[\x00-\x{10FFFF}]`, `(?s:.)`}, + +	// Empty string as a regular expression. +	// The empty string must be preserved inside parens in order +	// to make submatches work right, so these tests are less +	// interesting than they might otherwise be.  String inserts +	// explicit (?:) in place of non-parenthesized empty strings, +	// to make them easier to spot for other parsers. +	{`(a|b|)`, `([a-b]|(?:))`}, +	{`(|)`, `()`}, +	{`a()`, `a()`}, +	{`(()|())`, `(()|())`}, +	{`(a|)`, `(a|(?:))`}, +	{`ab()cd()`, `ab()cd()`}, +	{`()`, `()`}, +	{`()*`, `()*`}, +	{`()+`, `()+`}, +	{`()?`, `()?`}, +	{`(){0}`, `(?:)`}, +	{`(){1}`, `()`}, +	{`(){1,}`, `()+`}, +	{`(){0,2}`, `(?:()()?)?`}, +} + +func TestSimplify(t *testing.T) { +	for _, tt := range simplifyTests { +		re, err := Parse(tt.Regexp, MatchNL|Perl&^OneLine) +		if err != nil { +			t.Errorf("Parse(%#q) = error %v", tt.Regexp, err) +			continue +		} +		s := re.Simplify().String() +		if s != tt.Simple { +			t.Errorf("Simplify(%#q) = %#q, want %#q", tt.Regexp, s, tt.Simple) +		} +	} +} | 
