From d5b3aa68ecdd12c7a45b2d640ea453937a234873 Mon Sep 17 00:00:00 2001 From: Russ Cox Date: Wed, 24 Feb 2010 16:11:14 -0800 Subject: path: add Match R=eridius, r, rog CC=golang-dev http://codereview.appspot.com/217088 --- src/pkg/path/Makefile | 1 + src/pkg/path/match.go | 197 +++++++++++++++++++++++++++++++++++++++++++++ src/pkg/path/match_test.go | 76 +++++++++++++++++ 3 files changed, 274 insertions(+) create mode 100644 src/pkg/path/match.go create mode 100644 src/pkg/path/match_test.go (limited to 'src') diff --git a/src/pkg/path/Makefile b/src/pkg/path/Makefile index 199b68008..9372cdf37 100644 --- a/src/pkg/path/Makefile +++ b/src/pkg/path/Makefile @@ -6,6 +6,7 @@ include ../../Make.$(GOARCH) TARG=path GOFILES=\ + match.go\ path.go\ include ../../Make.pkg diff --git a/src/pkg/path/match.go b/src/pkg/path/match.go new file mode 100644 index 000000000..4e42b6a10 --- /dev/null +++ b/src/pkg/path/match.go @@ -0,0 +1,197 @@ +package path + +import ( + "os" + "strings" + "utf8" +) + +var ErrBadPattern = os.NewError("syntax error in pattern") + +// Match returns true if name matches the shell file name pattern. +// The syntax used by pattern is: +// +// pattern: +// { term } +// term: +// '*' matches any sequence of non-/ characters +// '?' matches any single non-/ character +// '[' [ '^' ] { character-range } ']' +// character class (must be non-empty) +// c matches character c (c != '*', '?', '\\', '[') +// '\\' c matches character c +// +// character-range: +// c matches character c (c != '\\', '-', ']') +// '\\' c matches character c +// lo '-' hi matches character c for lo <= c <= hi +// +// Match requires pattern to match all of name, not just a substring. +// The only possible error return is when pattern is malformed. +// +func Match(pattern, name string) (matched bool, err os.Error) { +Pattern: + for len(pattern) > 0 { + var star bool + var chunk string + star, chunk, pattern = scanChunk(pattern) + if star && chunk == "" { + // Trailing * matches rest of string unless it has a /. + return strings.Index(name, "/") < 0, nil + } + // Look for match at current position. + t, ok, err := matchChunk(chunk, name) + if ok { + name = t + continue + } + if err != nil { + return false, err + } + if star { + // Look for match skipping i+1 bytes. + // Cannot skip /. + for i := 0; i < len(name) && name[i] != '/'; i++ { + t, ok, err := matchChunk(chunk, name[i+1:]) + if ok { + name = t + continue Pattern + } + if err != nil { + return false, err + } + } + } + return false, nil + } + return len(name) == 0, nil +} + +// scanChunk gets the next section of pattern, which is a non-star string +// possibly preceded by a star. +func scanChunk(pattern string) (star bool, chunk, rest string) { + for len(pattern) > 0 && pattern[0] == '*' { + pattern = pattern[1:] + star = true + } + inrange := false + var i int +Scan: + for i = 0; i < len(pattern); i++ { + switch pattern[i] { + case '\\': + // error check handled in matchChunk: bad pattern. + if i+1 < len(pattern) { + i++ + } + continue + case '[': + inrange = true + case ']': + inrange = false + case '*': + if !inrange { + break Scan + } + } + } + return star, pattern[0:i], pattern[i:] +} + +// matchChunk checks whether chunk matches the beginning of s. +// If so, it returns the remainder of s (after the match). +// Chunk is all single-character operators: literals, char classes, and ?. +func matchChunk(chunk, s string) (rest string, ok bool, err os.Error) { + for len(chunk) > 0 { + if len(s) == 0 { + return + } + switch chunk[0] { + case '[': + // character class + r, n := utf8.DecodeRuneInString(s) + s = s[n:] + chunk = chunk[1:] + // possibly negated + notNegated := true + if len(chunk) > 0 && chunk[0] == '^' { + notNegated = false + chunk = chunk[1:] + } + // parse all ranges + match := false + nrange := 0 + for { + if len(chunk) > 0 && chunk[0] == ']' && nrange > 0 { + chunk = chunk[1:] + break + } + var lo, hi int + if lo, chunk, err = getEsc(chunk); err != nil { + return + } + hi = lo + if chunk[0] == '-' { + if hi, chunk, err = getEsc(chunk[1:]); err != nil { + return + } + } + if lo <= r && r <= hi { + match = true + } + nrange++ + } + if match != notNegated { + return + } + + case '?': + if s[0] == '/' { + return + } + _, n := utf8.DecodeRuneInString(s) + s = s[n:] + chunk = chunk[1:] + + case '\\': + chunk = chunk[1:] + if len(chunk) == 0 { + err = ErrBadPattern + return + } + fallthrough + + default: + if chunk[0] != s[0] { + return + } + s = s[1:] + chunk = chunk[1:] + } + } + return s, true, nil +} + +// getEsc gets a possibly-escaped character from chunk, for a character class. +func getEsc(chunk string) (r int, nchunk string, err os.Error) { + if len(chunk) == 0 || chunk[0] == '-' || chunk[0] == ']' { + err = ErrBadPattern + return + } + if chunk[0] == '\\' { + chunk = chunk[1:] + if len(chunk) == 0 { + err = ErrBadPattern + return + } + } + r, n := utf8.DecodeRuneInString(chunk) + if r == utf8.RuneError && n == 1 { + err = ErrBadPattern + } + nchunk = chunk[n:] + if len(nchunk) == 0 { + err = ErrBadPattern + } + return +} diff --git a/src/pkg/path/match_test.go b/src/pkg/path/match_test.go new file mode 100644 index 000000000..d3cd088f1 --- /dev/null +++ b/src/pkg/path/match_test.go @@ -0,0 +1,76 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package path + +import ( + "os" + "testing" +) + +type MatchTest struct { + pattern, s string + match bool + err os.Error +} + +var matchTests = []MatchTest{ + MatchTest{"abc", "abc", true, nil}, + MatchTest{"*", "abc", true, nil}, + MatchTest{"*c", "abc", true, nil}, + MatchTest{"a*", "a", true, nil}, + MatchTest{"a*", "abc", true, nil}, + MatchTest{"a*", "ab/c", false, nil}, + MatchTest{"a*/b", "abc/b", true, nil}, + MatchTest{"a*/b", "a/c/b", false, nil}, + MatchTest{"a*b*c*d*e*/f", "axbxcxdxe/f", true, nil}, + MatchTest{"a*b*c*d*e*/f", "axbxcxdxexxx/f", true, nil}, + MatchTest{"a*b*c*d*e*/f", "axbxcxdxe/xxx/f", false, nil}, + MatchTest{"a*b*c*d*e*/f", "axbxcxdxexxx/fff", false, nil}, + MatchTest{"a*b?c*x", "abxbbxdbxebxczzx", true, nil}, + MatchTest{"a*b?c*x", "abxbbxdbxebxczzy", false, nil}, + MatchTest{"ab[c]", "abc", true, nil}, + MatchTest{"ab[b-d]", "abc", true, nil}, + MatchTest{"ab[e-g]", "abc", false, nil}, + MatchTest{"ab[^c]", "abc", false, nil}, + MatchTest{"ab[^b-d]", "abc", false, nil}, + MatchTest{"ab[^e-g]", "abc", true, nil}, + MatchTest{"a\\*b", "a*b", true, nil}, + MatchTest{"a\\*b", "ab", false, nil}, + MatchTest{"a?b", "a☺b", true, nil}, + MatchTest{"a[^a]b", "a☺b", true, nil}, + MatchTest{"a???b", "a☺b", false, nil}, + MatchTest{"a[^a][^a][^a]b", "a☺b", false, nil}, + MatchTest{"[a-ζ]*", "α", true, nil}, + MatchTest{"*[a-ζ]", "A", false, nil}, + MatchTest{"a?b", "a/b", false, nil}, + MatchTest{"a*b", "a/b", false, nil}, + MatchTest{"[\\]a]", "]", true, nil}, + MatchTest{"[\\-]", "-", true, nil}, + MatchTest{"[x\\-]", "x", true, nil}, + MatchTest{"[x\\-]", "-", true, nil}, + MatchTest{"[x\\-]", "z", false, nil}, + MatchTest{"[\\-x]", "x", true, nil}, + MatchTest{"[\\-x]", "-", true, nil}, + MatchTest{"[\\-x]", "a", false, nil}, + MatchTest{"[]a]", "]", false, ErrBadPattern}, + MatchTest{"[-]", "-", false, ErrBadPattern}, + MatchTest{"[x-]", "x", false, ErrBadPattern}, + MatchTest{"[x-]", "-", false, ErrBadPattern}, + MatchTest{"[x-]", "z", false, ErrBadPattern}, + MatchTest{"[-x]", "x", false, ErrBadPattern}, + MatchTest{"[-x]", "-", false, ErrBadPattern}, + MatchTest{"[-x]", "a", false, ErrBadPattern}, + MatchTest{"\\", "a", false, ErrBadPattern}, + MatchTest{"[a-b-c]", "a", false, ErrBadPattern}, +} + +func TestMatch(t *testing.T) { + for _, tt := range matchTests { + ok, err := Match(tt.pattern, tt.s) + if ok != tt.match || err != tt.err { + t.Errorf("Match(%#q, %#q) = %v, %v want %v, nil\n", tt.pattern, tt.s, ok, err, tt.match) + } + } +} -- cgit v1.2.3