diff options
Diffstat (limited to 'src/pkg/strings')
-rw-r--r-- | src/pkg/strings/example_test.go | 225 | ||||
-rw-r--r-- | src/pkg/strings/export_test.go | 45 | ||||
-rw-r--r-- | src/pkg/strings/reader.go | 144 | ||||
-rw-r--r-- | src/pkg/strings/reader_test.go | 159 | ||||
-rw-r--r-- | src/pkg/strings/replace.go | 549 | ||||
-rw-r--r-- | src/pkg/strings/replace_test.go | 506 | ||||
-rw-r--r-- | src/pkg/strings/search.go | 124 | ||||
-rw-r--r-- | src/pkg/strings/search_test.go | 90 | ||||
-rw-r--r-- | src/pkg/strings/strings.go | 725 | ||||
-rw-r--r-- | src/pkg/strings/strings.s | 5 | ||||
-rw-r--r-- | src/pkg/strings/strings_decl.go | 8 | ||||
-rw-r--r-- | src/pkg/strings/strings_test.go | 1176 |
12 files changed, 0 insertions, 3756 deletions
diff --git a/src/pkg/strings/example_test.go b/src/pkg/strings/example_test.go deleted file mode 100644 index 7243e16b1..000000000 --- a/src/pkg/strings/example_test.go +++ /dev/null @@ -1,225 +0,0 @@ -// Copyright 2012 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package strings_test - -import ( - "fmt" - "strings" - "unicode" -) - -func ExampleFields() { - fmt.Printf("Fields are: %q", strings.Fields(" foo bar baz ")) - // Output: Fields are: ["foo" "bar" "baz"] -} - -func ExampleFieldsFunc() { - f := func(c rune) bool { - return !unicode.IsLetter(c) && !unicode.IsNumber(c) - } - fmt.Printf("Fields are: %q", strings.FieldsFunc(" foo1;bar2,baz3...", f)) - // Output: Fields are: ["foo1" "bar2" "baz3"] -} - -func ExampleContains() { - fmt.Println(strings.Contains("seafood", "foo")) - fmt.Println(strings.Contains("seafood", "bar")) - fmt.Println(strings.Contains("seafood", "")) - fmt.Println(strings.Contains("", "")) - // Output: - // true - // false - // true - // true -} - -func ExampleContainsAny() { - fmt.Println(strings.ContainsAny("team", "i")) - fmt.Println(strings.ContainsAny("failure", "u & i")) - fmt.Println(strings.ContainsAny("foo", "")) - fmt.Println(strings.ContainsAny("", "")) - // Output: - // false - // true - // false - // false -} - -func ExampleCount() { - fmt.Println(strings.Count("cheese", "e")) - fmt.Println(strings.Count("five", "")) // before & after each rune - // Output: - // 3 - // 5 -} - -func ExampleEqualFold() { - fmt.Println(strings.EqualFold("Go", "go")) - // Output: true -} - -func ExampleIndex() { - fmt.Println(strings.Index("chicken", "ken")) - fmt.Println(strings.Index("chicken", "dmr")) - // Output: - // 4 - // -1 -} - -func ExampleIndexFunc() { - f := func(c rune) bool { - return unicode.Is(unicode.Han, c) - } - fmt.Println(strings.IndexFunc("Hello, 世界", f)) - fmt.Println(strings.IndexFunc("Hello, world", f)) - // Output: - // 7 - // -1 -} - -func ExampleIndexAny() { - fmt.Println(strings.IndexAny("chicken", "aeiouy")) - fmt.Println(strings.IndexAny("crwth", "aeiouy")) - // Output: - // 2 - // -1 -} - -func ExampleIndexRune() { - fmt.Println(strings.IndexRune("chicken", 'k')) - fmt.Println(strings.IndexRune("chicken", 'd')) - // Output: - // 4 - // -1 -} - -func ExampleLastIndex() { - fmt.Println(strings.Index("go gopher", "go")) - fmt.Println(strings.LastIndex("go gopher", "go")) - fmt.Println(strings.LastIndex("go gopher", "rodent")) - // Output: - // 0 - // 3 - // -1 -} - -func ExampleJoin() { - s := []string{"foo", "bar", "baz"} - fmt.Println(strings.Join(s, ", ")) - // Output: foo, bar, baz -} - -func ExampleRepeat() { - fmt.Println("ba" + strings.Repeat("na", 2)) - // Output: banana -} - -func ExampleReplace() { - fmt.Println(strings.Replace("oink oink oink", "k", "ky", 2)) - fmt.Println(strings.Replace("oink oink oink", "oink", "moo", -1)) - // Output: - // oinky oinky oink - // moo moo moo -} - -func ExampleSplit() { - fmt.Printf("%q\n", strings.Split("a,b,c", ",")) - fmt.Printf("%q\n", strings.Split("a man a plan a canal panama", "a ")) - fmt.Printf("%q\n", strings.Split(" xyz ", "")) - fmt.Printf("%q\n", strings.Split("", "Bernardo O'Higgins")) - // Output: - // ["a" "b" "c"] - // ["" "man " "plan " "canal panama"] - // [" " "x" "y" "z" " "] - // [""] -} - -func ExampleSplitN() { - fmt.Printf("%q\n", strings.SplitN("a,b,c", ",", 2)) - z := strings.SplitN("a,b,c", ",", 0) - fmt.Printf("%q (nil = %v)\n", z, z == nil) - // Output: - // ["a" "b,c"] - // [] (nil = true) -} - -func ExampleSplitAfter() { - fmt.Printf("%q\n", strings.SplitAfter("a,b,c", ",")) - // Output: ["a," "b," "c"] -} - -func ExampleSplitAfterN() { - fmt.Printf("%q\n", strings.SplitAfterN("a,b,c", ",", 2)) - // Output: ["a," "b,c"] -} - -func ExampleTitle() { - fmt.Println(strings.Title("her royal highness")) - // Output: Her Royal Highness -} - -func ExampleToTitle() { - fmt.Println(strings.ToTitle("loud noises")) - fmt.Println(strings.ToTitle("хлеб")) - // Output: - // LOUD NOISES - // ХЛЕБ -} - -func ExampleTrim() { - fmt.Printf("[%q]", strings.Trim(" !!! Achtung! Achtung! !!! ", "! ")) - // Output: ["Achtung! Achtung"] -} - -func ExampleMap() { - rot13 := func(r rune) rune { - switch { - case r >= 'A' && r <= 'Z': - return 'A' + (r-'A'+13)%26 - case r >= 'a' && r <= 'z': - return 'a' + (r-'a'+13)%26 - } - return r - } - fmt.Println(strings.Map(rot13, "'Twas brillig and the slithy gopher...")) - // Output: 'Gjnf oevyyvt naq gur fyvgul tbcure... -} - -func ExampleTrimSpace() { - fmt.Println(strings.TrimSpace(" \t\n a lone gopher \n\t\r\n")) - // Output: a lone gopher -} - -func ExampleNewReplacer() { - r := strings.NewReplacer("<", "<", ">", ">") - fmt.Println(r.Replace("This is <b>HTML</b>!")) - // Output: This is <b>HTML</b>! -} - -func ExampleToUpper() { - fmt.Println(strings.ToUpper("Gopher")) - // Output: GOPHER -} - -func ExampleToLower() { - fmt.Println(strings.ToLower("Gopher")) - // Output: gopher -} - -func ExampleTrimSuffix() { - var s = "Hello, goodbye, etc!" - s = strings.TrimSuffix(s, "goodbye, etc!") - s = strings.TrimSuffix(s, "planet") - fmt.Print(s, "world!") - // Output: Hello, world! -} - -func ExampleTrimPrefix() { - var s = "Goodbye,, world!" - s = strings.TrimPrefix(s, "Goodbye,") - s = strings.TrimPrefix(s, "Howdy,") - fmt.Print("Hello" + s) - // Output: Hello, world! -} diff --git a/src/pkg/strings/export_test.go b/src/pkg/strings/export_test.go deleted file mode 100644 index 17c806aa5..000000000 --- a/src/pkg/strings/export_test.go +++ /dev/null @@ -1,45 +0,0 @@ -// Copyright 2011 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package strings - -func (r *Replacer) Replacer() interface{} { - return r.r -} - -func (r *Replacer) PrintTrie() string { - gen := r.r.(*genericReplacer) - return gen.printNode(&gen.root, 0) -} - -func (r *genericReplacer) printNode(t *trieNode, depth int) (s string) { - if t.priority > 0 { - s += "+" - } else { - s += "-" - } - s += "\n" - - if t.prefix != "" { - s += Repeat(".", depth) + t.prefix - s += r.printNode(t.next, depth+len(t.prefix)) - } else if t.table != nil { - for b, m := range r.mapping { - if int(m) != r.tableSize && t.table[m] != nil { - s += Repeat(".", depth) + string([]byte{byte(b)}) - s += r.printNode(t.table[m], depth+1) - } - } - } - return -} - -func StringFind(pattern, text string) int { - return makeStringFinder(pattern).next(text) -} - -func DumpTables(pattern string) ([]int, []int) { - finder := makeStringFinder(pattern) - return finder.badCharSkip[:], finder.goodSuffixSkip -} diff --git a/src/pkg/strings/reader.go b/src/pkg/strings/reader.go deleted file mode 100644 index 82df97439..000000000 --- a/src/pkg/strings/reader.go +++ /dev/null @@ -1,144 +0,0 @@ -// Copyright 2009 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package strings - -import ( - "errors" - "io" - "unicode/utf8" -) - -// A Reader implements the io.Reader, io.ReaderAt, io.Seeker, io.WriterTo, -// io.ByteScanner, and io.RuneScanner interfaces by reading -// from a string. -type Reader struct { - s string - i int64 // current reading index - prevRune int // index of previous rune; or < 0 -} - -// Len returns the number of bytes of the unread portion of the -// string. -func (r *Reader) Len() int { - if r.i >= int64(len(r.s)) { - return 0 - } - return int(int64(len(r.s)) - r.i) -} - -func (r *Reader) Read(b []byte) (n int, err error) { - if len(b) == 0 { - return 0, nil - } - if r.i >= int64(len(r.s)) { - return 0, io.EOF - } - r.prevRune = -1 - n = copy(b, r.s[r.i:]) - r.i += int64(n) - return -} - -func (r *Reader) ReadAt(b []byte, off int64) (n int, err error) { - // cannot modify state - see io.ReaderAt - if off < 0 { - return 0, errors.New("strings.Reader.ReadAt: negative offset") - } - if off >= int64(len(r.s)) { - return 0, io.EOF - } - n = copy(b, r.s[off:]) - if n < len(b) { - err = io.EOF - } - return -} - -func (r *Reader) ReadByte() (b byte, err error) { - r.prevRune = -1 - if r.i >= int64(len(r.s)) { - return 0, io.EOF - } - b = r.s[r.i] - r.i++ - return -} - -func (r *Reader) UnreadByte() error { - r.prevRune = -1 - if r.i <= 0 { - return errors.New("strings.Reader.UnreadByte: at beginning of string") - } - r.i-- - return nil -} - -func (r *Reader) ReadRune() (ch rune, size int, err error) { - if r.i >= int64(len(r.s)) { - r.prevRune = -1 - return 0, 0, io.EOF - } - r.prevRune = int(r.i) - if c := r.s[r.i]; c < utf8.RuneSelf { - r.i++ - return rune(c), 1, nil - } - ch, size = utf8.DecodeRuneInString(r.s[r.i:]) - r.i += int64(size) - return -} - -func (r *Reader) UnreadRune() error { - if r.prevRune < 0 { - return errors.New("strings.Reader.UnreadRune: previous operation was not ReadRune") - } - r.i = int64(r.prevRune) - r.prevRune = -1 - return nil -} - -// Seek implements the io.Seeker interface. -func (r *Reader) Seek(offset int64, whence int) (int64, error) { - r.prevRune = -1 - var abs int64 - switch whence { - case 0: - abs = offset - case 1: - abs = int64(r.i) + offset - case 2: - abs = int64(len(r.s)) + offset - default: - return 0, errors.New("strings.Reader.Seek: invalid whence") - } - if abs < 0 { - return 0, errors.New("strings.Reader.Seek: negative position") - } - r.i = abs - return abs, nil -} - -// WriteTo implements the io.WriterTo interface. -func (r *Reader) WriteTo(w io.Writer) (n int64, err error) { - r.prevRune = -1 - if r.i >= int64(len(r.s)) { - return 0, nil - } - s := r.s[r.i:] - m, err := io.WriteString(w, s) - if m > len(s) { - panic("strings.Reader.WriteTo: invalid WriteString count") - } - r.i += int64(m) - n = int64(m) - if m != len(s) && err == nil { - err = io.ErrShortWrite - } - return -} - -// NewReader returns a new Reader reading from s. -// It is similar to bytes.NewBufferString but more efficient and read-only. -func NewReader(s string) *Reader { return &Reader{s, 0, -1} } diff --git a/src/pkg/strings/reader_test.go b/src/pkg/strings/reader_test.go deleted file mode 100644 index bee90eb25..000000000 --- a/src/pkg/strings/reader_test.go +++ /dev/null @@ -1,159 +0,0 @@ -// Copyright 2012 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package strings_test - -import ( - "bytes" - "fmt" - "io" - "os" - "strings" - "sync" - "testing" -) - -func TestReader(t *testing.T) { - r := strings.NewReader("0123456789") - tests := []struct { - off int64 - seek int - n int - want string - wantpos int64 - seekerr string - }{ - {seek: os.SEEK_SET, off: 0, n: 20, want: "0123456789"}, - {seek: os.SEEK_SET, off: 1, n: 1, want: "1"}, - {seek: os.SEEK_CUR, off: 1, wantpos: 3, n: 2, want: "34"}, - {seek: os.SEEK_SET, off: -1, seekerr: "strings.Reader.Seek: negative position"}, - {seek: os.SEEK_SET, off: 1 << 33, wantpos: 1 << 33}, - {seek: os.SEEK_CUR, off: 1, wantpos: 1<<33 + 1}, - {seek: os.SEEK_SET, n: 5, want: "01234"}, - {seek: os.SEEK_CUR, n: 5, want: "56789"}, - {seek: os.SEEK_END, off: -1, n: 1, wantpos: 9, want: "9"}, - } - - for i, tt := range tests { - pos, err := r.Seek(tt.off, tt.seek) - if err == nil && tt.seekerr != "" { - t.Errorf("%d. want seek error %q", i, tt.seekerr) - continue - } - if err != nil && err.Error() != tt.seekerr { - t.Errorf("%d. seek error = %q; want %q", i, err.Error(), tt.seekerr) - continue - } - if tt.wantpos != 0 && tt.wantpos != pos { - t.Errorf("%d. pos = %d, want %d", i, pos, tt.wantpos) - } - buf := make([]byte, tt.n) - n, err := r.Read(buf) - if err != nil { - t.Errorf("%d. read = %v", i, err) - continue - } - got := string(buf[:n]) - if got != tt.want { - t.Errorf("%d. got %q; want %q", i, got, tt.want) - } - } -} - -func TestReadAfterBigSeek(t *testing.T) { - r := strings.NewReader("0123456789") - if _, err := r.Seek(1<<31+5, os.SEEK_SET); err != nil { - t.Fatal(err) - } - if n, err := r.Read(make([]byte, 10)); n != 0 || err != io.EOF { - t.Errorf("Read = %d, %v; want 0, EOF", n, err) - } -} - -func TestReaderAt(t *testing.T) { - r := strings.NewReader("0123456789") - tests := []struct { - off int64 - n int - want string - wanterr interface{} - }{ - {0, 10, "0123456789", nil}, - {1, 10, "123456789", io.EOF}, - {1, 9, "123456789", nil}, - {11, 10, "", io.EOF}, - {0, 0, "", nil}, - {-1, 0, "", "strings.Reader.ReadAt: negative offset"}, - } - for i, tt := range tests { - b := make([]byte, tt.n) - rn, err := r.ReadAt(b, tt.off) - got := string(b[:rn]) - if got != tt.want { - t.Errorf("%d. got %q; want %q", i, got, tt.want) - } - if fmt.Sprintf("%v", err) != fmt.Sprintf("%v", tt.wanterr) { - t.Errorf("%d. got error = %v; want %v", i, err, tt.wanterr) - } - } -} - -func TestReaderAtConcurrent(t *testing.T) { - // Test for the race detector, to verify ReadAt doesn't mutate - // any state. - r := strings.NewReader("0123456789") - var wg sync.WaitGroup - for i := 0; i < 5; i++ { - wg.Add(1) - go func(i int) { - defer wg.Done() - var buf [1]byte - r.ReadAt(buf[:], int64(i)) - }(i) - } - wg.Wait() -} - -func TestEmptyReaderConcurrent(t *testing.T) { - // Test for the race detector, to verify a Read that doesn't yield any bytes - // is okay to use from multiple goroutines. This was our historic behavior. - // See golang.org/issue/7856 - r := strings.NewReader("") - var wg sync.WaitGroup - for i := 0; i < 5; i++ { - wg.Add(2) - go func() { - defer wg.Done() - var buf [1]byte - r.Read(buf[:]) - }() - go func() { - defer wg.Done() - r.Read(nil) - }() - } - wg.Wait() -} - -func TestWriteTo(t *testing.T) { - const str = "0123456789" - for i := 0; i <= len(str); i++ { - s := str[i:] - r := strings.NewReader(s) - var b bytes.Buffer - n, err := r.WriteTo(&b) - if expect := int64(len(s)); n != expect { - t.Errorf("got %v; want %v", n, expect) - } - if err != nil { - t.Errorf("for length %d: got error = %v; want nil", len(s), err) - } - if b.String() != s { - t.Errorf("got string %q; want %q", b.String(), s) - } - if r.Len() != 0 { - t.Errorf("reader contains %v bytes; want 0", r.Len()) - } - } -} diff --git a/src/pkg/strings/replace.go b/src/pkg/strings/replace.go deleted file mode 100644 index 3e05d2057..000000000 --- a/src/pkg/strings/replace.go +++ /dev/null @@ -1,549 +0,0 @@ -// Copyright 2011 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package strings - -import "io" - -// A Replacer replaces a list of strings with replacements. -type Replacer struct { - r replacer -} - -// replacer is the interface that a replacement algorithm needs to implement. -type replacer interface { - Replace(s string) string - WriteString(w io.Writer, s string) (n int, err error) -} - -// byteBitmap represents bytes which are sought for replacement. -// byteBitmap is 256 bits wide, with a bit set for each old byte to be -// replaced. -type byteBitmap [256 / 32]uint32 - -func (m *byteBitmap) set(b byte) { - m[b>>5] |= uint32(1 << (b & 31)) -} - -// NewReplacer returns a new Replacer from a list of old, new string pairs. -// Replacements are performed in order, without overlapping matches. -func NewReplacer(oldnew ...string) *Replacer { - if len(oldnew)%2 == 1 { - panic("strings.NewReplacer: odd argument count") - } - - if len(oldnew) == 2 && len(oldnew[0]) > 1 { - return &Replacer{r: makeSingleStringReplacer(oldnew[0], oldnew[1])} - } - - allNewBytes := true - for i := 0; i < len(oldnew); i += 2 { - if len(oldnew[i]) != 1 { - return &Replacer{r: makeGenericReplacer(oldnew)} - } - if len(oldnew[i+1]) != 1 { - allNewBytes = false - } - } - - if allNewBytes { - bb := &byteReplacer{} - for i := 0; i < len(oldnew); i += 2 { - o, n := oldnew[i][0], oldnew[i+1][0] - if bb.old[o>>5]&uint32(1<<(o&31)) != 0 { - // Later old->new maps do not override previous ones with the same old string. - continue - } - bb.old.set(o) - bb.new[o] = n - } - return &Replacer{r: bb} - } - - bs := &byteStringReplacer{} - for i := 0; i < len(oldnew); i += 2 { - o, new := oldnew[i][0], oldnew[i+1] - if bs.old[o>>5]&uint32(1<<(o&31)) != 0 { - // Later old->new maps do not override previous ones with the same old string. - continue - } - bs.old.set(o) - bs.new[o] = []byte(new) - } - return &Replacer{r: bs} -} - -// Replace returns a copy of s with all replacements performed. -func (r *Replacer) Replace(s string) string { - return r.r.Replace(s) -} - -// WriteString writes s to w with all replacements performed. -func (r *Replacer) WriteString(w io.Writer, s string) (n int, err error) { - return r.r.WriteString(w, s) -} - -// trieNode is a node in a lookup trie for prioritized key/value pairs. Keys -// and values may be empty. For example, the trie containing keys "ax", "ay", -// "bcbc", "x" and "xy" could have eight nodes: -// -// n0 - -// n1 a- -// n2 .x+ -// n3 .y+ -// n4 b- -// n5 .cbc+ -// n6 x+ -// n7 .y+ -// -// n0 is the root node, and its children are n1, n4 and n6; n1's children are -// n2 and n3; n4's child is n5; n6's child is n7. Nodes n0, n1 and n4 (marked -// with a trailing "-") are partial keys, and nodes n2, n3, n5, n6 and n7 -// (marked with a trailing "+") are complete keys. -type trieNode struct { - // value is the value of the trie node's key/value pair. It is empty if - // this node is not a complete key. - value string - // priority is the priority (higher is more important) of the trie node's - // key/value pair; keys are not necessarily matched shortest- or longest- - // first. Priority is positive if this node is a complete key, and zero - // otherwise. In the example above, positive/zero priorities are marked - // with a trailing "+" or "-". - priority int - - // A trie node may have zero, one or more child nodes: - // * if the remaining fields are zero, there are no children. - // * if prefix and next are non-zero, there is one child in next. - // * if table is non-zero, it defines all the children. - // - // Prefixes are preferred over tables when there is one child, but the - // root node always uses a table for lookup efficiency. - - // prefix is the difference in keys between this trie node and the next. - // In the example above, node n4 has prefix "cbc" and n4's next node is n5. - // Node n5 has no children and so has zero prefix, next and table fields. - prefix string - next *trieNode - - // table is a lookup table indexed by the next byte in the key, after - // remapping that byte through genericReplacer.mapping to create a dense - // index. In the example above, the keys only use 'a', 'b', 'c', 'x' and - // 'y', which remap to 0, 1, 2, 3 and 4. All other bytes remap to 5, and - // genericReplacer.tableSize will be 5. Node n0's table will be - // []*trieNode{ 0:n1, 1:n4, 3:n6 }, where the 0, 1 and 3 are the remapped - // 'a', 'b' and 'x'. - table []*trieNode -} - -func (t *trieNode) add(key, val string, priority int, r *genericReplacer) { - if key == "" { - if t.priority == 0 { - t.value = val - t.priority = priority - } - return - } - - if t.prefix != "" { - // Need to split the prefix among multiple nodes. - var n int // length of the longest common prefix - for ; n < len(t.prefix) && n < len(key); n++ { - if t.prefix[n] != key[n] { - break - } - } - if n == len(t.prefix) { - t.next.add(key[n:], val, priority, r) - } else if n == 0 { - // First byte differs, start a new lookup table here. Looking up - // what is currently t.prefix[0] will lead to prefixNode, and - // looking up key[0] will lead to keyNode. - var prefixNode *trieNode - if len(t.prefix) == 1 { - prefixNode = t.next - } else { - prefixNode = &trieNode{ - prefix: t.prefix[1:], - next: t.next, - } - } - keyNode := new(trieNode) - t.table = make([]*trieNode, r.tableSize) - t.table[r.mapping[t.prefix[0]]] = prefixNode - t.table[r.mapping[key[0]]] = keyNode - t.prefix = "" - t.next = nil - keyNode.add(key[1:], val, priority, r) - } else { - // Insert new node after the common section of the prefix. - next := &trieNode{ - prefix: t.prefix[n:], - next: t.next, - } - t.prefix = t.prefix[:n] - t.next = next - next.add(key[n:], val, priority, r) - } - } else if t.table != nil { - // Insert into existing table. - m := r.mapping[key[0]] - if t.table[m] == nil { - t.table[m] = new(trieNode) - } - t.table[m].add(key[1:], val, priority, r) - } else { - t.prefix = key - t.next = new(trieNode) - t.next.add("", val, priority, r) - } -} - -func (r *genericReplacer) lookup(s string, ignoreRoot bool) (val string, keylen int, found bool) { - // Iterate down the trie to the end, and grab the value and keylen with - // the highest priority. - bestPriority := 0 - node := &r.root - n := 0 - for node != nil { - if node.priority > bestPriority && !(ignoreRoot && node == &r.root) { - bestPriority = node.priority - val = node.value - keylen = n - found = true - } - - if s == "" { - break - } - if node.table != nil { - index := r.mapping[s[0]] - if int(index) == r.tableSize { - break - } - node = node.table[index] - s = s[1:] - n++ - } else if node.prefix != "" && HasPrefix(s, node.prefix) { - n += len(node.prefix) - s = s[len(node.prefix):] - node = node.next - } else { - break - } - } - return -} - -// genericReplacer is the fully generic algorithm. -// It's used as a fallback when nothing faster can be used. -type genericReplacer struct { - root trieNode - // tableSize is the size of a trie node's lookup table. It is the number - // of unique key bytes. - tableSize int - // mapping maps from key bytes to a dense index for trieNode.table. - mapping [256]byte -} - -func makeGenericReplacer(oldnew []string) *genericReplacer { - r := new(genericReplacer) - // Find each byte used, then assign them each an index. - for i := 0; i < len(oldnew); i += 2 { - key := oldnew[i] - for j := 0; j < len(key); j++ { - r.mapping[key[j]] = 1 - } - } - - for _, b := range r.mapping { - r.tableSize += int(b) - } - - var index byte - for i, b := range r.mapping { - if b == 0 { - r.mapping[i] = byte(r.tableSize) - } else { - r.mapping[i] = index - index++ - } - } - // Ensure root node uses a lookup table (for performance). - r.root.table = make([]*trieNode, r.tableSize) - - for i := 0; i < len(oldnew); i += 2 { - r.root.add(oldnew[i], oldnew[i+1], len(oldnew)-i, r) - } - return r -} - -type appendSliceWriter []byte - -// Write writes to the buffer to satisfy io.Writer. -func (w *appendSliceWriter) Write(p []byte) (int, error) { - *w = append(*w, p...) - return len(p), nil -} - -// WriteString writes to the buffer without string->[]byte->string allocations. -func (w *appendSliceWriter) WriteString(s string) (int, error) { - *w = append(*w, s...) - return len(s), nil -} - -type stringWriterIface interface { - WriteString(string) (int, error) -} - -type stringWriter struct { - w io.Writer -} - -func (w stringWriter) WriteString(s string) (int, error) { - return w.w.Write([]byte(s)) -} - -func getStringWriter(w io.Writer) stringWriterIface { - sw, ok := w.(stringWriterIface) - if !ok { - sw = stringWriter{w} - } - return sw -} - -func (r *genericReplacer) Replace(s string) string { - buf := make(appendSliceWriter, 0, len(s)) - r.WriteString(&buf, s) - return string(buf) -} - -func (r *genericReplacer) WriteString(w io.Writer, s string) (n int, err error) { - sw := getStringWriter(w) - var last, wn int - var prevMatchEmpty bool - for i := 0; i <= len(s); { - // Ignore the empty match iff the previous loop found the empty match. - val, keylen, match := r.lookup(s[i:], prevMatchEmpty) - prevMatchEmpty = match && keylen == 0 - if match { - wn, err = sw.WriteString(s[last:i]) - n += wn - if err != nil { - return - } - wn, err = sw.WriteString(val) - n += wn - if err != nil { - return - } - i += keylen - last = i - continue - } - i++ - } - if last != len(s) { - wn, err = sw.WriteString(s[last:]) - n += wn - } - return -} - -// singleStringReplacer is the implementation that's used when there is only -// one string to replace (and that string has more than one byte). -type singleStringReplacer struct { - finder *stringFinder - // value is the new string that replaces that pattern when it's found. - value string -} - -func makeSingleStringReplacer(pattern string, value string) *singleStringReplacer { - return &singleStringReplacer{finder: makeStringFinder(pattern), value: value} -} - -func (r *singleStringReplacer) Replace(s string) string { - var buf []byte - i, matched := 0, false - for { - match := r.finder.next(s[i:]) - if match == -1 { - break - } - matched = true - buf = append(buf, s[i:i+match]...) - buf = append(buf, r.value...) - i += match + len(r.finder.pattern) - } - if !matched { - return s - } - buf = append(buf, s[i:]...) - return string(buf) -} - -func (r *singleStringReplacer) WriteString(w io.Writer, s string) (n int, err error) { - sw := getStringWriter(w) - var i, wn int - for { - match := r.finder.next(s[i:]) - if match == -1 { - break - } - wn, err = sw.WriteString(s[i : i+match]) - n += wn - if err != nil { - return - } - wn, err = sw.WriteString(r.value) - n += wn - if err != nil { - return - } - i += match + len(r.finder.pattern) - } - wn, err = sw.WriteString(s[i:]) - n += wn - return -} - -// byteReplacer is the implementation that's used when all the "old" -// and "new" values are single ASCII bytes. -type byteReplacer struct { - // old has a bit set for each old byte that should be replaced. - old byteBitmap - - // replacement byte, indexed by old byte. only valid if - // corresponding old bit is set. - new [256]byte -} - -func (r *byteReplacer) Replace(s string) string { - var buf []byte // lazily allocated - for i := 0; i < len(s); i++ { - b := s[i] - if r.old[b>>5]&uint32(1<<(b&31)) != 0 { - if buf == nil { - buf = []byte(s) - } - buf[i] = r.new[b] - } - } - if buf == nil { - return s - } - return string(buf) -} - -func (r *byteReplacer) WriteString(w io.Writer, s string) (n int, err error) { - // TODO(bradfitz): use io.WriteString with slices of s, avoiding allocation. - bufsize := 32 << 10 - if len(s) < bufsize { - bufsize = len(s) - } - buf := make([]byte, bufsize) - - for len(s) > 0 { - ncopy := copy(buf, s[:]) - s = s[ncopy:] - for i, b := range buf[:ncopy] { - if r.old[b>>5]&uint32(1<<(b&31)) != 0 { - buf[i] = r.new[b] - } - } - wn, err := w.Write(buf[:ncopy]) - n += wn - if err != nil { - return n, err - } - } - return n, nil -} - -// byteStringReplacer is the implementation that's used when all the -// "old" values are single ASCII bytes but the "new" values vary in -// size. -type byteStringReplacer struct { - // old has a bit set for each old byte that should be replaced. - old byteBitmap - - // replacement string, indexed by old byte. only valid if - // corresponding old bit is set. - new [256][]byte -} - -func (r *byteStringReplacer) Replace(s string) string { - newSize := 0 - anyChanges := false - for i := 0; i < len(s); i++ { - b := s[i] - if r.old[b>>5]&uint32(1<<(b&31)) != 0 { - anyChanges = true - newSize += len(r.new[b]) - } else { - newSize++ - } - } - if !anyChanges { - return s - } - buf := make([]byte, newSize) - bi := buf - for i := 0; i < len(s); i++ { - b := s[i] - if r.old[b>>5]&uint32(1<<(b&31)) != 0 { - n := copy(bi, r.new[b]) - bi = bi[n:] - } else { - bi[0] = b - bi = bi[1:] - } - } - return string(buf) -} - -// WriteString maintains one buffer that's at most 32KB. The bytes in -// s are enumerated and the buffer is filled. If it reaches its -// capacity or a byte has a replacement, the buffer is flushed to w. -func (r *byteStringReplacer) WriteString(w io.Writer, s string) (n int, err error) { - // TODO(bradfitz): use io.WriteString with slices of s instead. - bufsize := 32 << 10 - if len(s) < bufsize { - bufsize = len(s) - } - buf := make([]byte, bufsize) - bi := buf[:0] - - for i := 0; i < len(s); i++ { - b := s[i] - var new []byte - if r.old[b>>5]&uint32(1<<(b&31)) != 0 { - new = r.new[b] - } else { - bi = append(bi, b) - } - if len(bi) == cap(bi) || (len(bi) > 0 && len(new) > 0) { - nw, err := w.Write(bi) - n += nw - if err != nil { - return n, err - } - bi = buf[:0] - } - if len(new) > 0 { - nw, err := w.Write(new) - n += nw - if err != nil { - return n, err - } - } - } - if len(bi) > 0 { - nw, err := w.Write(bi) - n += nw - if err != nil { - return n, err - } - } - return n, nil -} diff --git a/src/pkg/strings/replace_test.go b/src/pkg/strings/replace_test.go deleted file mode 100644 index 82e4b6ef0..000000000 --- a/src/pkg/strings/replace_test.go +++ /dev/null @@ -1,506 +0,0 @@ -// Copyright 2009 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package strings_test - -import ( - "bytes" - "fmt" - . "strings" - "testing" -) - -var htmlEscaper = NewReplacer( - "&", "&", - "<", "<", - ">", ">", - `"`, """, - "'", "'", -) - -var htmlUnescaper = NewReplacer( - "&", "&", - "<", "<", - ">", ">", - """, `"`, - "'", "'", -) - -// The http package's old HTML escaping function. -func oldHTMLEscape(s string) string { - s = Replace(s, "&", "&", -1) - s = Replace(s, "<", "<", -1) - s = Replace(s, ">", ">", -1) - s = Replace(s, `"`, """, -1) - s = Replace(s, "'", "'", -1) - return s -} - -var capitalLetters = NewReplacer("a", "A", "b", "B") - -// TestReplacer tests the replacer implementations. -func TestReplacer(t *testing.T) { - type testCase struct { - r *Replacer - in, out string - } - var testCases []testCase - - // str converts 0xff to "\xff". This isn't just string(b) since that converts to UTF-8. - str := func(b byte) string { - return string([]byte{b}) - } - var s []string - - // inc maps "\x00"->"\x01", ..., "a"->"b", "b"->"c", ..., "\xff"->"\x00". - s = nil - for i := 0; i < 256; i++ { - s = append(s, str(byte(i)), str(byte(i+1))) - } - inc := NewReplacer(s...) - - // Test cases with 1-byte old strings, 1-byte new strings. - testCases = append(testCases, - testCase{capitalLetters, "brad", "BrAd"}, - testCase{capitalLetters, Repeat("a", (32<<10)+123), Repeat("A", (32<<10)+123)}, - testCase{capitalLetters, "", ""}, - - testCase{inc, "brad", "csbe"}, - testCase{inc, "\x00\xff", "\x01\x00"}, - testCase{inc, "", ""}, - - testCase{NewReplacer("a", "1", "a", "2"), "brad", "br1d"}, - ) - - // repeat maps "a"->"a", "b"->"bb", "c"->"ccc", ... - s = nil - for i := 0; i < 256; i++ { - n := i + 1 - 'a' - if n < 1 { - n = 1 - } - s = append(s, str(byte(i)), Repeat(str(byte(i)), n)) - } - repeat := NewReplacer(s...) - - // Test cases with 1-byte old strings, variable length new strings. - testCases = append(testCases, - testCase{htmlEscaper, "No changes", "No changes"}, - testCase{htmlEscaper, "I <3 escaping & stuff", "I <3 escaping & stuff"}, - testCase{htmlEscaper, "&&&", "&&&"}, - testCase{htmlEscaper, "", ""}, - - testCase{repeat, "brad", "bbrrrrrrrrrrrrrrrrrradddd"}, - testCase{repeat, "abba", "abbbba"}, - testCase{repeat, "", ""}, - - testCase{NewReplacer("a", "11", "a", "22"), "brad", "br11d"}, - ) - - // The remaining test cases have variable length old strings. - - testCases = append(testCases, - testCase{htmlUnescaper, "&amp;", "&"}, - testCase{htmlUnescaper, "<b>HTML's neat</b>", "<b>HTML's neat</b>"}, - testCase{htmlUnescaper, "", ""}, - - testCase{NewReplacer("a", "1", "a", "2", "xxx", "xxx"), "brad", "br1d"}, - - testCase{NewReplacer("a", "1", "aa", "2", "aaa", "3"), "aaaa", "1111"}, - - testCase{NewReplacer("aaa", "3", "aa", "2", "a", "1"), "aaaa", "31"}, - ) - - // gen1 has multiple old strings of variable length. There is no - // overall non-empty common prefix, but some pairwise common prefixes. - gen1 := NewReplacer( - "aaa", "3[aaa]", - "aa", "2[aa]", - "a", "1[a]", - "i", "i", - "longerst", "most long", - "longer", "medium", - "long", "short", - "xx", "xx", - "x", "X", - "X", "Y", - "Y", "Z", - ) - testCases = append(testCases, - testCase{gen1, "fooaaabar", "foo3[aaa]b1[a]r"}, - testCase{gen1, "long, longerst, longer", "short, most long, medium"}, - testCase{gen1, "xxxxx", "xxxxX"}, - testCase{gen1, "XiX", "YiY"}, - testCase{gen1, "", ""}, - ) - - // gen2 has multiple old strings with no pairwise common prefix. - gen2 := NewReplacer( - "roses", "red", - "violets", "blue", - "sugar", "sweet", - ) - testCases = append(testCases, - testCase{gen2, "roses are red, violets are blue...", "red are red, blue are blue..."}, - testCase{gen2, "", ""}, - ) - - // gen3 has multiple old strings with an overall common prefix. - gen3 := NewReplacer( - "abracadabra", "poof", - "abracadabrakazam", "splat", - "abraham", "lincoln", - "abrasion", "scrape", - "abraham", "isaac", - ) - testCases = append(testCases, - testCase{gen3, "abracadabrakazam abraham", "poofkazam lincoln"}, - testCase{gen3, "abrasion abracad", "scrape abracad"}, - testCase{gen3, "abba abram abrasive", "abba abram abrasive"}, - testCase{gen3, "", ""}, - ) - - // foo{1,2,3,4} have multiple old strings with an overall common prefix - // and 1- or 2- byte extensions from the common prefix. - foo1 := NewReplacer( - "foo1", "A", - "foo2", "B", - "foo3", "C", - ) - foo2 := NewReplacer( - "foo1", "A", - "foo2", "B", - "foo31", "C", - "foo32", "D", - ) - foo3 := NewReplacer( - "foo11", "A", - "foo12", "B", - "foo31", "C", - "foo32", "D", - ) - foo4 := NewReplacer( - "foo12", "B", - "foo32", "D", - ) - testCases = append(testCases, - testCase{foo1, "fofoofoo12foo32oo", "fofooA2C2oo"}, - testCase{foo1, "", ""}, - - testCase{foo2, "fofoofoo12foo32oo", "fofooA2Doo"}, - testCase{foo2, "", ""}, - - testCase{foo3, "fofoofoo12foo32oo", "fofooBDoo"}, - testCase{foo3, "", ""}, - - testCase{foo4, "fofoofoo12foo32oo", "fofooBDoo"}, - testCase{foo4, "", ""}, - ) - - // genAll maps "\x00\x01\x02...\xfe\xff" to "[all]", amongst other things. - allBytes := make([]byte, 256) - for i := range allBytes { - allBytes[i] = byte(i) - } - allString := string(allBytes) - genAll := NewReplacer( - allString, "[all]", - "\xff", "[ff]", - "\x00", "[00]", - ) - testCases = append(testCases, - testCase{genAll, allString, "[all]"}, - testCase{genAll, "a\xff" + allString + "\x00", "a[ff][all][00]"}, - testCase{genAll, "", ""}, - ) - - // Test cases with empty old strings. - - blankToX1 := NewReplacer("", "X") - blankToX2 := NewReplacer("", "X", "", "") - blankHighPriority := NewReplacer("", "X", "o", "O") - blankLowPriority := NewReplacer("o", "O", "", "X") - blankNoOp1 := NewReplacer("", "") - blankNoOp2 := NewReplacer("", "", "", "A") - blankFoo := NewReplacer("", "X", "foobar", "R", "foobaz", "Z") - testCases = append(testCases, - testCase{blankToX1, "foo", "XfXoXoX"}, - testCase{blankToX1, "", "X"}, - - testCase{blankToX2, "foo", "XfXoXoX"}, - testCase{blankToX2, "", "X"}, - - testCase{blankHighPriority, "oo", "XOXOX"}, - testCase{blankHighPriority, "ii", "XiXiX"}, - testCase{blankHighPriority, "oiio", "XOXiXiXOX"}, - testCase{blankHighPriority, "iooi", "XiXOXOXiX"}, - testCase{blankHighPriority, "", "X"}, - - testCase{blankLowPriority, "oo", "OOX"}, - testCase{blankLowPriority, "ii", "XiXiX"}, - testCase{blankLowPriority, "oiio", "OXiXiOX"}, - testCase{blankLowPriority, "iooi", "XiOOXiX"}, - testCase{blankLowPriority, "", "X"}, - - testCase{blankNoOp1, "foo", "foo"}, - testCase{blankNoOp1, "", ""}, - - testCase{blankNoOp2, "foo", "foo"}, - testCase{blankNoOp2, "", ""}, - - testCase{blankFoo, "foobarfoobaz", "XRXZX"}, - testCase{blankFoo, "foobar-foobaz", "XRX-XZX"}, - testCase{blankFoo, "", "X"}, - ) - - // single string replacer - - abcMatcher := NewReplacer("abc", "[match]") - - testCases = append(testCases, - testCase{abcMatcher, "", ""}, - testCase{abcMatcher, "ab", "ab"}, - testCase{abcMatcher, "abc", "[match]"}, - testCase{abcMatcher, "abcd", "[match]d"}, - testCase{abcMatcher, "cabcabcdabca", "c[match][match]d[match]a"}, - ) - - // Issue 6659 cases (more single string replacer) - - noHello := NewReplacer("Hello", "") - testCases = append(testCases, - testCase{noHello, "Hello", ""}, - testCase{noHello, "Hellox", "x"}, - testCase{noHello, "xHello", "x"}, - testCase{noHello, "xHellox", "xx"}, - ) - - // No-arg test cases. - - nop := NewReplacer() - testCases = append(testCases, - testCase{nop, "abc", "abc"}, - testCase{nop, "", ""}, - ) - - // Run the test cases. - - for i, tc := range testCases { - if s := tc.r.Replace(tc.in); s != tc.out { - t.Errorf("%d. Replace(%q) = %q, want %q", i, tc.in, s, tc.out) - } - var buf bytes.Buffer - n, err := tc.r.WriteString(&buf, tc.in) - if err != nil { - t.Errorf("%d. WriteString: %v", i, err) - continue - } - got := buf.String() - if got != tc.out { - t.Errorf("%d. WriteString(%q) wrote %q, want %q", i, tc.in, got, tc.out) - continue - } - if n != len(tc.out) { - t.Errorf("%d. WriteString(%q) wrote correct string but reported %d bytes; want %d (%q)", - i, tc.in, n, len(tc.out), tc.out) - } - } -} - -// TestPickAlgorithm tests that NewReplacer picks the correct algorithm. -func TestPickAlgorithm(t *testing.T) { - testCases := []struct { - r *Replacer - want string - }{ - {capitalLetters, "*strings.byteReplacer"}, - {htmlEscaper, "*strings.byteStringReplacer"}, - {NewReplacer("12", "123"), "*strings.singleStringReplacer"}, - {NewReplacer("1", "12"), "*strings.byteStringReplacer"}, - {NewReplacer("", "X"), "*strings.genericReplacer"}, - {NewReplacer("a", "1", "b", "12", "cde", "123"), "*strings.genericReplacer"}, - } - for i, tc := range testCases { - got := fmt.Sprintf("%T", tc.r.Replacer()) - if got != tc.want { - t.Errorf("%d. algorithm = %s, want %s", i, got, tc.want) - } - } -} - -// TestGenericTrieBuilding verifies the structure of the generated trie. There -// is one node per line, and the key ending with the current line is in the -// trie if it ends with a "+". -func TestGenericTrieBuilding(t *testing.T) { - testCases := []struct{ in, out string }{ - {"abc;abdef;abdefgh;xx;xy;z", `- - a- - .b- - ..c+ - ..d- - ...ef+ - .....gh+ - x- - .x+ - .y+ - z+ - `}, - {"abracadabra;abracadabrakazam;abraham;abrasion", `- - a- - .bra- - ....c- - .....adabra+ - ...........kazam+ - ....h- - .....am+ - ....s- - .....ion+ - `}, - {"aaa;aa;a;i;longerst;longer;long;xx;x;X;Y", `- - X+ - Y+ - a+ - .a+ - ..a+ - i+ - l- - .ong+ - ....er+ - ......st+ - x+ - .x+ - `}, - {"foo;;foo;foo1", `+ - f- - .oo+ - ...1+ - `}, - } - - for _, tc := range testCases { - keys := Split(tc.in, ";") - args := make([]string, len(keys)*2) - for i, key := range keys { - args[i*2] = key - } - - got := NewReplacer(args...).PrintTrie() - // Remove tabs from tc.out - wantbuf := make([]byte, 0, len(tc.out)) - for i := 0; i < len(tc.out); i++ { - if tc.out[i] != '\t' { - wantbuf = append(wantbuf, tc.out[i]) - } - } - want := string(wantbuf) - - if got != want { - t.Errorf("PrintTrie(%q)\ngot\n%swant\n%s", tc.in, got, want) - } - } -} - -func BenchmarkGenericNoMatch(b *testing.B) { - str := Repeat("A", 100) + Repeat("B", 100) - generic := NewReplacer("a", "A", "b", "B", "12", "123") // varying lengths forces generic - for i := 0; i < b.N; i++ { - generic.Replace(str) - } -} - -func BenchmarkGenericMatch1(b *testing.B) { - str := Repeat("a", 100) + Repeat("b", 100) - generic := NewReplacer("a", "A", "b", "B", "12", "123") - for i := 0; i < b.N; i++ { - generic.Replace(str) - } -} - -func BenchmarkGenericMatch2(b *testing.B) { - str := Repeat("It's <b>HTML</b>!", 100) - for i := 0; i < b.N; i++ { - htmlUnescaper.Replace(str) - } -} - -func benchmarkSingleString(b *testing.B, pattern, text string) { - r := NewReplacer(pattern, "[match]") - b.SetBytes(int64(len(text))) - b.ResetTimer() - for i := 0; i < b.N; i++ { - r.Replace(text) - } -} - -func BenchmarkSingleMaxSkipping(b *testing.B) { - benchmarkSingleString(b, Repeat("b", 25), Repeat("a", 10000)) -} - -func BenchmarkSingleLongSuffixFail(b *testing.B) { - benchmarkSingleString(b, "b"+Repeat("a", 500), Repeat("a", 1002)) -} - -func BenchmarkSingleMatch(b *testing.B) { - benchmarkSingleString(b, "abcdef", Repeat("abcdefghijklmno", 1000)) -} - -func BenchmarkByteByteNoMatch(b *testing.B) { - str := Repeat("A", 100) + Repeat("B", 100) - for i := 0; i < b.N; i++ { - capitalLetters.Replace(str) - } -} - -func BenchmarkByteByteMatch(b *testing.B) { - str := Repeat("a", 100) + Repeat("b", 100) - for i := 0; i < b.N; i++ { - capitalLetters.Replace(str) - } -} - -func BenchmarkByteStringMatch(b *testing.B) { - str := "<" + Repeat("a", 99) + Repeat("b", 99) + ">" - for i := 0; i < b.N; i++ { - htmlEscaper.Replace(str) - } -} - -func BenchmarkHTMLEscapeNew(b *testing.B) { - str := "I <3 to escape HTML & other text too." - for i := 0; i < b.N; i++ { - htmlEscaper.Replace(str) - } -} - -func BenchmarkHTMLEscapeOld(b *testing.B) { - str := "I <3 to escape HTML & other text too." - for i := 0; i < b.N; i++ { - oldHTMLEscape(str) - } -} - -// BenchmarkByteByteReplaces compares byteByteImpl against multiple Replaces. -func BenchmarkByteByteReplaces(b *testing.B) { - str := Repeat("a", 100) + Repeat("b", 100) - for i := 0; i < b.N; i++ { - Replace(Replace(str, "a", "A", -1), "b", "B", -1) - } -} - -// BenchmarkByteByteMap compares byteByteImpl against Map. -func BenchmarkByteByteMap(b *testing.B) { - str := Repeat("a", 100) + Repeat("b", 100) - fn := func(r rune) rune { - switch r { - case 'a': - return 'A' - case 'b': - return 'B' - } - return r - } - for i := 0; i < b.N; i++ { - Map(fn, str) - } -} diff --git a/src/pkg/strings/search.go b/src/pkg/strings/search.go deleted file mode 100644 index f77c879c5..000000000 --- a/src/pkg/strings/search.go +++ /dev/null @@ -1,124 +0,0 @@ -// Copyright 2012 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package strings - -// stringFinder efficiently finds strings in a source text. It's implemented -// using the Boyer-Moore string search algorithm: -// http://en.wikipedia.org/wiki/Boyer-Moore_string_search_algorithm -// http://www.cs.utexas.edu/~moore/publications/fstrpos.pdf (note: this aged -// document uses 1-based indexing) -type stringFinder struct { - // pattern is the string that we are searching for in the text. - pattern string - - // badCharSkip[b] contains the distance between the last byte of pattern - // and the rightmost occurrence of b in pattern. If b is not in pattern, - // badCharSkip[b] is len(pattern). - // - // Whenever a mismatch is found with byte b in the text, we can safely - // shift the matching frame at least badCharSkip[b] until the next time - // the matching char could be in alignment. - badCharSkip [256]int - - // goodSuffixSkip[i] defines how far we can shift the matching frame given - // that the suffix pattern[i+1:] matches, but the byte pattern[i] does - // not. There are two cases to consider: - // - // 1. The matched suffix occurs elsewhere in pattern (with a different - // byte preceding it that we might possibly match). In this case, we can - // shift the matching frame to align with the next suffix chunk. For - // example, the pattern "mississi" has the suffix "issi" next occurring - // (in right-to-left order) at index 1, so goodSuffixSkip[3] == - // shift+len(suffix) == 3+4 == 7. - // - // 2. If the matched suffix does not occur elsewhere in pattern, then the - // matching frame may share part of its prefix with the end of the - // matching suffix. In this case, goodSuffixSkip[i] will contain how far - // to shift the frame to align this portion of the prefix to the - // suffix. For example, in the pattern "abcxxxabc", when the first - // mismatch from the back is found to be in position 3, the matching - // suffix "xxabc" is not found elsewhere in the pattern. However, its - // rightmost "abc" (at position 6) is a prefix of the whole pattern, so - // goodSuffixSkip[3] == shift+len(suffix) == 6+5 == 11. - goodSuffixSkip []int -} - -func makeStringFinder(pattern string) *stringFinder { - f := &stringFinder{ - pattern: pattern, - goodSuffixSkip: make([]int, len(pattern)), - } - // last is the index of the last character in the pattern. - last := len(pattern) - 1 - - // Build bad character table. - // Bytes not in the pattern can skip one pattern's length. - for i := range f.badCharSkip { - f.badCharSkip[i] = len(pattern) - } - // The loop condition is < instead of <= so that the last byte does not - // have a zero distance to itself. Finding this byte out of place implies - // that it is not in the last position. - for i := 0; i < last; i++ { - f.badCharSkip[pattern[i]] = last - i - } - - // Build good suffix table. - // First pass: set each value to the next index which starts a prefix of - // pattern. - lastPrefix := last - for i := last; i >= 0; i-- { - if HasPrefix(pattern, pattern[i+1:]) { - lastPrefix = i + 1 - } - // lastPrefix is the shift, and (last-i) is len(suffix). - f.goodSuffixSkip[i] = lastPrefix + last - i - } - // Second pass: find repeats of pattern's suffix starting from the front. - for i := 0; i < last; i++ { - lenSuffix := longestCommonSuffix(pattern, pattern[1:i+1]) - if pattern[i-lenSuffix] != pattern[last-lenSuffix] { - // (last-i) is the shift, and lenSuffix is len(suffix). - f.goodSuffixSkip[last-lenSuffix] = lenSuffix + last - i - } - } - - return f -} - -func longestCommonSuffix(a, b string) (i int) { - for ; i < len(a) && i < len(b); i++ { - if a[len(a)-1-i] != b[len(b)-1-i] { - break - } - } - return -} - -// next returns the index in text of the first occurrence of the pattern. If -// the pattern is not found, it returns -1. -func (f *stringFinder) next(text string) int { - i := len(f.pattern) - 1 - for i < len(text) { - // Compare backwards from the end until the first unmatching character. - j := len(f.pattern) - 1 - for j >= 0 && text[i] == f.pattern[j] { - i-- - j-- - } - if j < 0 { - return i + 1 // match - } - i += max(f.badCharSkip[text[i]], f.goodSuffixSkip[j]) - } - return -1 -} - -func max(a, b int) int { - if a > b { - return a - } - return b -} diff --git a/src/pkg/strings/search_test.go b/src/pkg/strings/search_test.go deleted file mode 100644 index 966c05e65..000000000 --- a/src/pkg/strings/search_test.go +++ /dev/null @@ -1,90 +0,0 @@ -// Copyright 2012 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package strings_test - -import ( - "reflect" - . "strings" - "testing" -) - -func TestFinderNext(t *testing.T) { - testCases := []struct { - pat, text string - index int - }{ - {"", "", 0}, - {"", "abc", 0}, - {"abc", "", -1}, - {"abc", "abc", 0}, - {"d", "abcdefg", 3}, - {"nan", "banana", 2}, - {"pan", "anpanman", 2}, - {"nnaaman", "anpanmanam", -1}, - {"abcd", "abc", -1}, - {"abcd", "bcd", -1}, - {"bcd", "abcd", 1}, - {"abc", "acca", -1}, - {"aa", "aaa", 0}, - {"baa", "aaaaa", -1}, - {"at that", "which finally halts. at that point", 22}, - } - - for _, tc := range testCases { - got := StringFind(tc.pat, tc.text) - want := tc.index - if got != want { - t.Errorf("stringFind(%q, %q) got %d, want %d\n", tc.pat, tc.text, got, want) - } - } -} - -func TestFinderCreation(t *testing.T) { - testCases := []struct { - pattern string - bad [256]int - suf []int - }{ - { - "abc", - [256]int{'a': 2, 'b': 1, 'c': 3}, - []int{5, 4, 1}, - }, - { - "mississi", - [256]int{'i': 3, 'm': 7, 's': 1}, - []int{15, 14, 13, 7, 11, 10, 7, 1}, - }, - // From http://www.cs.utexas.edu/~moore/publications/fstrpos.pdf - { - "abcxxxabc", - [256]int{'a': 2, 'b': 1, 'c': 6, 'x': 3}, - []int{14, 13, 12, 11, 10, 9, 11, 10, 1}, - }, - { - "abyxcdeyx", - [256]int{'a': 8, 'b': 7, 'c': 4, 'd': 3, 'e': 2, 'y': 1, 'x': 5}, - []int{17, 16, 15, 14, 13, 12, 7, 10, 1}, - }, - } - - for _, tc := range testCases { - bad, good := DumpTables(tc.pattern) - - for i, got := range bad { - want := tc.bad[i] - if want == 0 { - want = len(tc.pattern) - } - if got != want { - t.Errorf("boyerMoore(%q) bad['%c']: got %d want %d", tc.pattern, i, got, want) - } - } - - if !reflect.DeepEqual(good, tc.suf) { - t.Errorf("boyerMoore(%q) got %v want %v", tc.pattern, good, tc.suf) - } - } -} diff --git a/src/pkg/strings/strings.go b/src/pkg/strings/strings.go deleted file mode 100644 index 5d46211d8..000000000 --- a/src/pkg/strings/strings.go +++ /dev/null @@ -1,725 +0,0 @@ -// Copyright 2009 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -// Package strings implements simple functions to manipulate strings. -package strings - -import ( - "unicode" - "unicode/utf8" -) - -// explode splits s into an array of UTF-8 sequences, one per Unicode character (still strings) up to a maximum of n (n < 0 means no limit). -// Invalid UTF-8 sequences become correct encodings of U+FFF8. -func explode(s string, n int) []string { - if n == 0 { - return nil - } - l := utf8.RuneCountInString(s) - if n <= 0 || n > l { - n = l - } - a := make([]string, n) - var size int - var ch rune - i, cur := 0, 0 - for ; i+1 < n; i++ { - ch, size = utf8.DecodeRuneInString(s[cur:]) - if ch == utf8.RuneError { - a[i] = string(utf8.RuneError) - } else { - a[i] = s[cur : cur+size] - } - cur += size - } - // add the rest, if there is any - if cur < len(s) { - a[i] = s[cur:] - } - return a -} - -// primeRK is the prime base used in Rabin-Karp algorithm. -const primeRK = 16777619 - -// hashstr returns the hash and the appropriate multiplicative -// factor for use in Rabin-Karp algorithm. -func hashstr(sep string) (uint32, uint32) { - hash := uint32(0) - for i := 0; i < len(sep); i++ { - hash = hash*primeRK + uint32(sep[i]) - - } - var pow, sq uint32 = 1, primeRK - for i := len(sep); i > 0; i >>= 1 { - if i&1 != 0 { - pow *= sq - } - sq *= sq - } - return hash, pow -} - -// Count counts the number of non-overlapping instances of sep in s. -func Count(s, sep string) int { - n := 0 - // special cases - switch { - case len(sep) == 0: - return utf8.RuneCountInString(s) + 1 - case len(sep) == 1: - // special case worth making fast - c := sep[0] - for i := 0; i < len(s); i++ { - if s[i] == c { - n++ - } - } - return n - case len(sep) > len(s): - return 0 - case len(sep) == len(s): - if sep == s { - return 1 - } - return 0 - } - hashsep, pow := hashstr(sep) - h := uint32(0) - for i := 0; i < len(sep); i++ { - h = h*primeRK + uint32(s[i]) - } - lastmatch := 0 - if h == hashsep && s[:len(sep)] == sep { - n++ - lastmatch = len(sep) - } - for i := len(sep); i < len(s); { - h *= primeRK - h += uint32(s[i]) - h -= pow * uint32(s[i-len(sep)]) - i++ - if h == hashsep && lastmatch <= i-len(sep) && s[i-len(sep):i] == sep { - n++ - lastmatch = i - } - } - return n -} - -// Contains returns true if substr is within s. -func Contains(s, substr string) bool { - return Index(s, substr) >= 0 -} - -// ContainsAny returns true if any Unicode code points in chars are within s. -func ContainsAny(s, chars string) bool { - return IndexAny(s, chars) >= 0 -} - -// ContainsRune returns true if the Unicode code point r is within s. -func ContainsRune(s string, r rune) bool { - return IndexRune(s, r) >= 0 -} - -// Index returns the index of the first instance of sep in s, or -1 if sep is not present in s. -func Index(s, sep string) int { - n := len(sep) - switch { - case n == 0: - return 0 - case n == 1: - return IndexByte(s, sep[0]) - case n == len(s): - if sep == s { - return 0 - } - return -1 - case n > len(s): - return -1 - } - // Hash sep. - hashsep, pow := hashstr(sep) - var h uint32 - for i := 0; i < n; i++ { - h = h*primeRK + uint32(s[i]) - } - if h == hashsep && s[:n] == sep { - return 0 - } - for i := n; i < len(s); { - h *= primeRK - h += uint32(s[i]) - h -= pow * uint32(s[i-n]) - i++ - if h == hashsep && s[i-n:i] == sep { - return i - n - } - } - return -1 -} - -// LastIndex returns the index of the last instance of sep in s, or -1 if sep is not present in s. -func LastIndex(s, sep string) int { - n := len(sep) - if n == 0 { - return len(s) - } - c := sep[0] - if n == 1 { - // special case worth making fast - for i := len(s) - 1; i >= 0; i-- { - if s[i] == c { - return i - } - } - return -1 - } - // n > 1 - for i := len(s) - n; i >= 0; i-- { - if s[i] == c && s[i:i+n] == sep { - return i - } - } - return -1 -} - -// IndexRune returns the index of the first instance of the Unicode code point -// r, or -1 if rune is not present in s. -func IndexRune(s string, r rune) int { - switch { - case r < 0x80: - b := byte(r) - for i := 0; i < len(s); i++ { - if s[i] == b { - return i - } - } - default: - for i, c := range s { - if c == r { - return i - } - } - } - return -1 -} - -// IndexAny returns the index of the first instance of any Unicode code point -// from chars in s, or -1 if no Unicode code point from chars is present in s. -func IndexAny(s, chars string) int { - if len(chars) > 0 { - for i, c := range s { - for _, m := range chars { - if c == m { - return i - } - } - } - } - return -1 -} - -// LastIndexAny returns the index of the last instance of any Unicode code -// point from chars in s, or -1 if no Unicode code point from chars is -// present in s. -func LastIndexAny(s, chars string) int { - if len(chars) > 0 { - for i := len(s); i > 0; { - rune, size := utf8.DecodeLastRuneInString(s[0:i]) - i -= size - for _, m := range chars { - if rune == m { - return i - } - } - } - } - return -1 -} - -// Generic split: splits after each instance of sep, -// including sepSave bytes of sep in the subarrays. -func genSplit(s, sep string, sepSave, n int) []string { - if n == 0 { - return nil - } - if sep == "" { - return explode(s, n) - } - if n < 0 { - n = Count(s, sep) + 1 - } - c := sep[0] - start := 0 - a := make([]string, n) - na := 0 - for i := 0; i+len(sep) <= len(s) && na+1 < n; i++ { - if s[i] == c && (len(sep) == 1 || s[i:i+len(sep)] == sep) { - a[na] = s[start : i+sepSave] - na++ - start = i + len(sep) - i += len(sep) - 1 - } - } - a[na] = s[start:] - return a[0 : na+1] -} - -// SplitN slices s into substrings separated by sep and returns a slice of -// the substrings between those separators. -// If sep is empty, SplitN splits after each UTF-8 sequence. -// The count determines the number of substrings to return: -// n > 0: at most n substrings; the last substring will be the unsplit remainder. -// n == 0: the result is nil (zero substrings) -// n < 0: all substrings -func SplitN(s, sep string, n int) []string { return genSplit(s, sep, 0, n) } - -// SplitAfterN slices s into substrings after each instance of sep and -// returns a slice of those substrings. -// If sep is empty, SplitAfterN splits after each UTF-8 sequence. -// The count determines the number of substrings to return: -// n > 0: at most n substrings; the last substring will be the unsplit remainder. -// n == 0: the result is nil (zero substrings) -// n < 0: all substrings -func SplitAfterN(s, sep string, n int) []string { - return genSplit(s, sep, len(sep), n) -} - -// Split slices s into all substrings separated by sep and returns a slice of -// the substrings between those separators. -// If sep is empty, Split splits after each UTF-8 sequence. -// It is equivalent to SplitN with a count of -1. -func Split(s, sep string) []string { return genSplit(s, sep, 0, -1) } - -// SplitAfter slices s into all substrings after each instance of sep and -// returns a slice of those substrings. -// If sep is empty, SplitAfter splits after each UTF-8 sequence. -// It is equivalent to SplitAfterN with a count of -1. -func SplitAfter(s, sep string) []string { - return genSplit(s, sep, len(sep), -1) -} - -// Fields splits the string s around each instance of one or more consecutive white space -// characters, as defined by unicode.IsSpace, returning an array of substrings of s or an -// empty list if s contains only white space. -func Fields(s string) []string { - return FieldsFunc(s, unicode.IsSpace) -} - -// FieldsFunc splits the string s at each run of Unicode code points c satisfying f(c) -// and returns an array of slices of s. If all code points in s satisfy f(c) or the -// string is empty, an empty slice is returned. -func FieldsFunc(s string, f func(rune) bool) []string { - // First count the fields. - n := 0 - inField := false - for _, rune := range s { - wasInField := inField - inField = !f(rune) - if inField && !wasInField { - n++ - } - } - - // Now create them. - a := make([]string, n) - na := 0 - fieldStart := -1 // Set to -1 when looking for start of field. - for i, rune := range s { - if f(rune) { - if fieldStart >= 0 { - a[na] = s[fieldStart:i] - na++ - fieldStart = -1 - } - } else if fieldStart == -1 { - fieldStart = i - } - } - if fieldStart >= 0 { // Last field might end at EOF. - a[na] = s[fieldStart:] - } - return a -} - -// Join concatenates the elements of a to create a single string. The separator string -// sep is placed between elements in the resulting string. -func Join(a []string, sep string) string { - if len(a) == 0 { - return "" - } - if len(a) == 1 { - return a[0] - } - n := len(sep) * (len(a) - 1) - for i := 0; i < len(a); i++ { - n += len(a[i]) - } - - b := make([]byte, n) - bp := copy(b, a[0]) - for _, s := range a[1:] { - bp += copy(b[bp:], sep) - bp += copy(b[bp:], s) - } - return string(b) -} - -// HasPrefix tests whether the string s begins with prefix. -func HasPrefix(s, prefix string) bool { - return len(s) >= len(prefix) && s[0:len(prefix)] == prefix -} - -// HasSuffix tests whether the string s ends with suffix. -func HasSuffix(s, suffix string) bool { - return len(s) >= len(suffix) && s[len(s)-len(suffix):] == suffix -} - -// Map returns a copy of the string s with all its characters modified -// according to the mapping function. If mapping returns a negative value, the character is -// dropped from the string with no replacement. -func Map(mapping func(rune) rune, s string) string { - // In the worst case, the string can grow when mapped, making - // things unpleasant. But it's so rare we barge in assuming it's - // fine. It could also shrink but that falls out naturally. - maxbytes := len(s) // length of b - nbytes := 0 // number of bytes encoded in b - // The output buffer b is initialized on demand, the first - // time a character differs. - var b []byte - - for i, c := range s { - r := mapping(c) - if b == nil { - if r == c { - continue - } - b = make([]byte, maxbytes) - nbytes = copy(b, s[:i]) - } - if r >= 0 { - wid := 1 - if r >= utf8.RuneSelf { - wid = utf8.RuneLen(r) - } - if nbytes+wid > maxbytes { - // Grow the buffer. - maxbytes = maxbytes*2 + utf8.UTFMax - nb := make([]byte, maxbytes) - copy(nb, b[0:nbytes]) - b = nb - } - nbytes += utf8.EncodeRune(b[nbytes:maxbytes], r) - } - } - if b == nil { - return s - } - return string(b[0:nbytes]) -} - -// Repeat returns a new string consisting of count copies of the string s. -func Repeat(s string, count int) string { - b := make([]byte, len(s)*count) - bp := 0 - for i := 0; i < count; i++ { - bp += copy(b[bp:], s) - } - return string(b) -} - -// ToUpper returns a copy of the string s with all Unicode letters mapped to their upper case. -func ToUpper(s string) string { return Map(unicode.ToUpper, s) } - -// ToLower returns a copy of the string s with all Unicode letters mapped to their lower case. -func ToLower(s string) string { return Map(unicode.ToLower, s) } - -// ToTitle returns a copy of the string s with all Unicode letters mapped to their title case. -func ToTitle(s string) string { return Map(unicode.ToTitle, s) } - -// ToUpperSpecial returns a copy of the string s with all Unicode letters mapped to their -// upper case, giving priority to the special casing rules. -func ToUpperSpecial(_case unicode.SpecialCase, s string) string { - return Map(func(r rune) rune { return _case.ToUpper(r) }, s) -} - -// ToLowerSpecial returns a copy of the string s with all Unicode letters mapped to their -// lower case, giving priority to the special casing rules. -func ToLowerSpecial(_case unicode.SpecialCase, s string) string { - return Map(func(r rune) rune { return _case.ToLower(r) }, s) -} - -// ToTitleSpecial returns a copy of the string s with all Unicode letters mapped to their -// title case, giving priority to the special casing rules. -func ToTitleSpecial(_case unicode.SpecialCase, s string) string { - return Map(func(r rune) rune { return _case.ToTitle(r) }, s) -} - -// isSeparator reports whether the rune could mark a word boundary. -// TODO: update when package unicode captures more of the properties. -func isSeparator(r rune) bool { - // ASCII alphanumerics and underscore are not separators - if r <= 0x7F { - switch { - case '0' <= r && r <= '9': - return false - case 'a' <= r && r <= 'z': - return false - case 'A' <= r && r <= 'Z': - return false - case r == '_': - return false - } - return true - } - // Letters and digits are not separators - if unicode.IsLetter(r) || unicode.IsDigit(r) { - return false - } - // Otherwise, all we can do for now is treat spaces as separators. - return unicode.IsSpace(r) -} - -// Title returns a copy of the string s with all Unicode letters that begin words -// mapped to their title case. -// -// BUG: The rule Title uses for word boundaries does not handle Unicode punctuation properly. -func Title(s string) string { - // Use a closure here to remember state. - // Hackish but effective. Depends on Map scanning in order and calling - // the closure once per rune. - prev := ' ' - return Map( - func(r rune) rune { - if isSeparator(prev) { - prev = r - return unicode.ToTitle(r) - } - prev = r - return r - }, - s) -} - -// TrimLeftFunc returns a slice of the string s with all leading -// Unicode code points c satisfying f(c) removed. -func TrimLeftFunc(s string, f func(rune) bool) string { - i := indexFunc(s, f, false) - if i == -1 { - return "" - } - return s[i:] -} - -// TrimRightFunc returns a slice of the string s with all trailing -// Unicode code points c satisfying f(c) removed. -func TrimRightFunc(s string, f func(rune) bool) string { - i := lastIndexFunc(s, f, false) - if i >= 0 && s[i] >= utf8.RuneSelf { - _, wid := utf8.DecodeRuneInString(s[i:]) - i += wid - } else { - i++ - } - return s[0:i] -} - -// TrimFunc returns a slice of the string s with all leading -// and trailing Unicode code points c satisfying f(c) removed. -func TrimFunc(s string, f func(rune) bool) string { - return TrimRightFunc(TrimLeftFunc(s, f), f) -} - -// IndexFunc returns the index into s of the first Unicode -// code point satisfying f(c), or -1 if none do. -func IndexFunc(s string, f func(rune) bool) int { - return indexFunc(s, f, true) -} - -// LastIndexFunc returns the index into s of the last -// Unicode code point satisfying f(c), or -1 if none do. -func LastIndexFunc(s string, f func(rune) bool) int { - return lastIndexFunc(s, f, true) -} - -// indexFunc is the same as IndexFunc except that if -// truth==false, the sense of the predicate function is -// inverted. -func indexFunc(s string, f func(rune) bool, truth bool) int { - start := 0 - for start < len(s) { - wid := 1 - r := rune(s[start]) - if r >= utf8.RuneSelf { - r, wid = utf8.DecodeRuneInString(s[start:]) - } - if f(r) == truth { - return start - } - start += wid - } - return -1 -} - -// lastIndexFunc is the same as LastIndexFunc except that if -// truth==false, the sense of the predicate function is -// inverted. -func lastIndexFunc(s string, f func(rune) bool, truth bool) int { - for i := len(s); i > 0; { - r, size := utf8.DecodeLastRuneInString(s[0:i]) - i -= size - if f(r) == truth { - return i - } - } - return -1 -} - -func makeCutsetFunc(cutset string) func(rune) bool { - return func(r rune) bool { return IndexRune(cutset, r) >= 0 } -} - -// Trim returns a slice of the string s with all leading and -// trailing Unicode code points contained in cutset removed. -func Trim(s string, cutset string) string { - if s == "" || cutset == "" { - return s - } - return TrimFunc(s, makeCutsetFunc(cutset)) -} - -// TrimLeft returns a slice of the string s with all leading -// Unicode code points contained in cutset removed. -func TrimLeft(s string, cutset string) string { - if s == "" || cutset == "" { - return s - } - return TrimLeftFunc(s, makeCutsetFunc(cutset)) -} - -// TrimRight returns a slice of the string s, with all trailing -// Unicode code points contained in cutset removed. -func TrimRight(s string, cutset string) string { - if s == "" || cutset == "" { - return s - } - return TrimRightFunc(s, makeCutsetFunc(cutset)) -} - -// TrimSpace returns a slice of the string s, with all leading -// and trailing white space removed, as defined by Unicode. -func TrimSpace(s string) string { - return TrimFunc(s, unicode.IsSpace) -} - -// TrimPrefix returns s without the provided leading prefix string. -// If s doesn't start with prefix, s is returned unchanged. -func TrimPrefix(s, prefix string) string { - if HasPrefix(s, prefix) { - return s[len(prefix):] - } - return s -} - -// TrimSuffix returns s without the provided trailing suffix string. -// If s doesn't end with suffix, s is returned unchanged. -func TrimSuffix(s, suffix string) string { - if HasSuffix(s, suffix) { - return s[:len(s)-len(suffix)] - } - return s -} - -// Replace returns a copy of the string s with the first n -// non-overlapping instances of old replaced by new. -// If n < 0, there is no limit on the number of replacements. -func Replace(s, old, new string, n int) string { - if old == new || n == 0 { - return s // avoid allocation - } - - // Compute number of replacements. - if m := Count(s, old); m == 0 { - return s // avoid allocation - } else if n < 0 || m < n { - n = m - } - - // Apply replacements to buffer. - t := make([]byte, len(s)+n*(len(new)-len(old))) - w := 0 - start := 0 - for i := 0; i < n; i++ { - j := start - if len(old) == 0 { - if i > 0 { - _, wid := utf8.DecodeRuneInString(s[start:]) - j += wid - } - } else { - j += Index(s[start:], old) - } - w += copy(t[w:], s[start:j]) - w += copy(t[w:], new) - start = j + len(old) - } - w += copy(t[w:], s[start:]) - return string(t[0:w]) -} - -// EqualFold reports whether s and t, interpreted as UTF-8 strings, -// are equal under Unicode case-folding. -func EqualFold(s, t string) bool { - for s != "" && t != "" { - // Extract first rune from each string. - var sr, tr rune - if s[0] < utf8.RuneSelf { - sr, s = rune(s[0]), s[1:] - } else { - r, size := utf8.DecodeRuneInString(s) - sr, s = r, s[size:] - } - if t[0] < utf8.RuneSelf { - tr, t = rune(t[0]), t[1:] - } else { - r, size := utf8.DecodeRuneInString(t) - tr, t = r, t[size:] - } - - // If they match, keep going; if not, return false. - - // Easy case. - if tr == sr { - continue - } - - // Make sr < tr to simplify what follows. - if tr < sr { - tr, sr = sr, tr - } - // Fast check for ASCII. - if tr < utf8.RuneSelf && 'A' <= sr && sr <= 'Z' { - // ASCII, and sr is upper case. tr must be lower case. - if tr == sr+'a'-'A' { - continue - } - return false - } - - // General case. SimpleFold(x) returns the next equivalent rune > x - // or wraps around to smaller values. - r := unicode.SimpleFold(sr) - for r != sr && r < tr { - r = unicode.SimpleFold(r) - } - if r == tr { - continue - } - return false - } - - // One string is empty. Are both? - return s == t -} diff --git a/src/pkg/strings/strings.s b/src/pkg/strings/strings.s deleted file mode 100644 index 55103bae0..000000000 --- a/src/pkg/strings/strings.s +++ /dev/null @@ -1,5 +0,0 @@ -// Copyright 2013 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -// This file is here just to make the go tool happy. diff --git a/src/pkg/strings/strings_decl.go b/src/pkg/strings/strings_decl.go deleted file mode 100644 index 810a696af..000000000 --- a/src/pkg/strings/strings_decl.go +++ /dev/null @@ -1,8 +0,0 @@ -// Copyright 2013 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package strings - -// IndexByte returns the index of the first instance of c in s, or -1 if c is not present in s. -func IndexByte(s string, c byte) int // ../runtime/asm_$GOARCH.s diff --git a/src/pkg/strings/strings_test.go b/src/pkg/strings/strings_test.go deleted file mode 100644 index e40a18015..000000000 --- a/src/pkg/strings/strings_test.go +++ /dev/null @@ -1,1176 +0,0 @@ -// Copyright 2009 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package strings_test - -import ( - "bytes" - "io" - "math/rand" - "reflect" - . "strings" - "testing" - "unicode" - "unicode/utf8" - "unsafe" -) - -func eq(a, b []string) bool { - if len(a) != len(b) { - return false - } - for i := 0; i < len(a); i++ { - if a[i] != b[i] { - return false - } - } - return true -} - -var abcd = "abcd" -var faces = "☺☻☹" -var commas = "1,2,3,4" -var dots = "1....2....3....4" - -type IndexTest struct { - s string - sep string - out int -} - -var indexTests = []IndexTest{ - {"", "", 0}, - {"", "a", -1}, - {"", "foo", -1}, - {"fo", "foo", -1}, - {"foo", "foo", 0}, - {"oofofoofooo", "f", 2}, - {"oofofoofooo", "foo", 4}, - {"barfoobarfoo", "foo", 3}, - {"foo", "", 0}, - {"foo", "o", 1}, - {"abcABCabc", "A", 3}, - // cases with one byte strings - test special case in Index() - {"", "a", -1}, - {"x", "a", -1}, - {"x", "x", 0}, - {"abc", "a", 0}, - {"abc", "b", 1}, - {"abc", "c", 2}, - {"abc", "x", -1}, -} - -var lastIndexTests = []IndexTest{ - {"", "", 0}, - {"", "a", -1}, - {"", "foo", -1}, - {"fo", "foo", -1}, - {"foo", "foo", 0}, - {"foo", "f", 0}, - {"oofofoofooo", "f", 7}, - {"oofofoofooo", "foo", 7}, - {"barfoobarfoo", "foo", 9}, - {"foo", "", 3}, - {"foo", "o", 2}, - {"abcABCabc", "A", 3}, - {"abcABCabc", "a", 6}, -} - -var indexAnyTests = []IndexTest{ - {"", "", -1}, - {"", "a", -1}, - {"", "abc", -1}, - {"a", "", -1}, - {"a", "a", 0}, - {"aaa", "a", 0}, - {"abc", "xyz", -1}, - {"abc", "xcz", 2}, - {"a☺b☻c☹d", "uvw☻xyz", 2 + len("☺")}, - {"aRegExp*", ".(|)*+?^$[]", 7}, - {dots + dots + dots, " ", -1}, -} -var lastIndexAnyTests = []IndexTest{ - {"", "", -1}, - {"", "a", -1}, - {"", "abc", -1}, - {"a", "", -1}, - {"a", "a", 0}, - {"aaa", "a", 2}, - {"abc", "xyz", -1}, - {"abc", "ab", 1}, - {"a☺b☻c☹d", "uvw☻xyz", 2 + len("☺")}, - {"a.RegExp*", ".(|)*+?^$[]", 8}, - {dots + dots + dots, " ", -1}, -} - -// Execute f on each test case. funcName should be the name of f; it's used -// in failure reports. -func runIndexTests(t *testing.T, f func(s, sep string) int, funcName string, testCases []IndexTest) { - for _, test := range testCases { - actual := f(test.s, test.sep) - if actual != test.out { - t.Errorf("%s(%q,%q) = %v; want %v", funcName, test.s, test.sep, actual, test.out) - } - } -} - -func TestIndex(t *testing.T) { runIndexTests(t, Index, "Index", indexTests) } -func TestLastIndex(t *testing.T) { runIndexTests(t, LastIndex, "LastIndex", lastIndexTests) } -func TestIndexAny(t *testing.T) { runIndexTests(t, IndexAny, "IndexAny", indexAnyTests) } -func TestLastIndexAny(t *testing.T) { runIndexTests(t, LastIndexAny, "LastIndexAny", lastIndexAnyTests) } - -var indexRuneTests = []struct { - s string - rune rune - out int -}{ - {"a A x", 'A', 2}, - {"some_text=some_value", '=', 9}, - {"☺a", 'a', 3}, - {"a☻☺b", '☺', 4}, -} - -func TestIndexRune(t *testing.T) { - for _, test := range indexRuneTests { - if actual := IndexRune(test.s, test.rune); actual != test.out { - t.Errorf("IndexRune(%q,%d)= %v; want %v", test.s, test.rune, actual, test.out) - } - } -} - -const benchmarkString = "some_text=some☺value" - -func BenchmarkIndexRune(b *testing.B) { - if got := IndexRune(benchmarkString, '☺'); got != 14 { - b.Fatalf("wrong index: expected 14, got=%d", got) - } - for i := 0; i < b.N; i++ { - IndexRune(benchmarkString, '☺') - } -} - -func BenchmarkIndexRuneFastPath(b *testing.B) { - if got := IndexRune(benchmarkString, 'v'); got != 17 { - b.Fatalf("wrong index: expected 17, got=%d", got) - } - for i := 0; i < b.N; i++ { - IndexRune(benchmarkString, 'v') - } -} - -func BenchmarkIndex(b *testing.B) { - if got := Index(benchmarkString, "v"); got != 17 { - b.Fatalf("wrong index: expected 17, got=%d", got) - } - for i := 0; i < b.N; i++ { - Index(benchmarkString, "v") - } -} - -func BenchmarkIndexByte(b *testing.B) { - if got := IndexByte(benchmarkString, 'v'); got != 17 { - b.Fatalf("wrong index: expected 17, got=%d", got) - } - for i := 0; i < b.N; i++ { - IndexByte(benchmarkString, 'v') - } -} - -var explodetests = []struct { - s string - n int - a []string -}{ - {"", -1, []string{}}, - {abcd, 4, []string{"a", "b", "c", "d"}}, - {faces, 3, []string{"☺", "☻", "☹"}}, - {abcd, 2, []string{"a", "bcd"}}, -} - -func TestExplode(t *testing.T) { - for _, tt := range explodetests { - a := SplitN(tt.s, "", tt.n) - if !eq(a, tt.a) { - t.Errorf("explode(%q, %d) = %v; want %v", tt.s, tt.n, a, tt.a) - continue - } - s := Join(a, "") - if s != tt.s { - t.Errorf(`Join(explode(%q, %d), "") = %q`, tt.s, tt.n, s) - } - } -} - -type SplitTest struct { - s string - sep string - n int - a []string -} - -var splittests = []SplitTest{ - {abcd, "a", 0, nil}, - {abcd, "a", -1, []string{"", "bcd"}}, - {abcd, "z", -1, []string{"abcd"}}, - {abcd, "", -1, []string{"a", "b", "c", "d"}}, - {commas, ",", -1, []string{"1", "2", "3", "4"}}, - {dots, "...", -1, []string{"1", ".2", ".3", ".4"}}, - {faces, "☹", -1, []string{"☺☻", ""}}, - {faces, "~", -1, []string{faces}}, - {faces, "", -1, []string{"☺", "☻", "☹"}}, - {"1 2 3 4", " ", 3, []string{"1", "2", "3 4"}}, - {"1 2", " ", 3, []string{"1", "2"}}, - {"123", "", 2, []string{"1", "23"}}, - {"123", "", 17, []string{"1", "2", "3"}}, -} - -func TestSplit(t *testing.T) { - for _, tt := range splittests { - a := SplitN(tt.s, tt.sep, tt.n) - if !eq(a, tt.a) { - t.Errorf("Split(%q, %q, %d) = %v; want %v", tt.s, tt.sep, tt.n, a, tt.a) - continue - } - if tt.n == 0 { - continue - } - s := Join(a, tt.sep) - if s != tt.s { - t.Errorf("Join(Split(%q, %q, %d), %q) = %q", tt.s, tt.sep, tt.n, tt.sep, s) - } - if tt.n < 0 { - b := Split(tt.s, tt.sep) - if !reflect.DeepEqual(a, b) { - t.Errorf("Split disagrees with SplitN(%q, %q, %d) = %v; want %v", tt.s, tt.sep, tt.n, b, a) - } - } - } -} - -var splitaftertests = []SplitTest{ - {abcd, "a", -1, []string{"a", "bcd"}}, - {abcd, "z", -1, []string{"abcd"}}, - {abcd, "", -1, []string{"a", "b", "c", "d"}}, - {commas, ",", -1, []string{"1,", "2,", "3,", "4"}}, - {dots, "...", -1, []string{"1...", ".2...", ".3...", ".4"}}, - {faces, "☹", -1, []string{"☺☻☹", ""}}, - {faces, "~", -1, []string{faces}}, - {faces, "", -1, []string{"☺", "☻", "☹"}}, - {"1 2 3 4", " ", 3, []string{"1 ", "2 ", "3 4"}}, - {"1 2 3", " ", 3, []string{"1 ", "2 ", "3"}}, - {"1 2", " ", 3, []string{"1 ", "2"}}, - {"123", "", 2, []string{"1", "23"}}, - {"123", "", 17, []string{"1", "2", "3"}}, -} - -func TestSplitAfter(t *testing.T) { - for _, tt := range splitaftertests { - a := SplitAfterN(tt.s, tt.sep, tt.n) - if !eq(a, tt.a) { - t.Errorf(`Split(%q, %q, %d) = %v; want %v`, tt.s, tt.sep, tt.n, a, tt.a) - continue - } - s := Join(a, "") - if s != tt.s { - t.Errorf(`Join(Split(%q, %q, %d), %q) = %q`, tt.s, tt.sep, tt.n, tt.sep, s) - } - if tt.n < 0 { - b := SplitAfter(tt.s, tt.sep) - if !reflect.DeepEqual(a, b) { - t.Errorf("SplitAfter disagrees with SplitAfterN(%q, %q, %d) = %v; want %v", tt.s, tt.sep, tt.n, b, a) - } - } - } -} - -type FieldsTest struct { - s string - a []string -} - -var fieldstests = []FieldsTest{ - {"", []string{}}, - {" ", []string{}}, - {" \t ", []string{}}, - {" abc ", []string{"abc"}}, - {"1 2 3 4", []string{"1", "2", "3", "4"}}, - {"1 2 3 4", []string{"1", "2", "3", "4"}}, - {"1\t\t2\t\t3\t4", []string{"1", "2", "3", "4"}}, - {"1\u20002\u20013\u20024", []string{"1", "2", "3", "4"}}, - {"\u2000\u2001\u2002", []string{}}, - {"\n™\t™\n", []string{"™", "™"}}, - {faces, []string{faces}}, -} - -func TestFields(t *testing.T) { - for _, tt := range fieldstests { - a := Fields(tt.s) - if !eq(a, tt.a) { - t.Errorf("Fields(%q) = %v; want %v", tt.s, a, tt.a) - continue - } - } -} - -var FieldsFuncTests = []FieldsTest{ - {"", []string{}}, - {"XX", []string{}}, - {"XXhiXXX", []string{"hi"}}, - {"aXXbXXXcX", []string{"a", "b", "c"}}, -} - -func TestFieldsFunc(t *testing.T) { - for _, tt := range fieldstests { - a := FieldsFunc(tt.s, unicode.IsSpace) - if !eq(a, tt.a) { - t.Errorf("FieldsFunc(%q, unicode.IsSpace) = %v; want %v", tt.s, a, tt.a) - continue - } - } - pred := func(c rune) bool { return c == 'X' } - for _, tt := range FieldsFuncTests { - a := FieldsFunc(tt.s, pred) - if !eq(a, tt.a) { - t.Errorf("FieldsFunc(%q) = %v, want %v", tt.s, a, tt.a) - } - } -} - -// Test case for any function which accepts and returns a single string. -type StringTest struct { - in, out string -} - -// Execute f on each test case. funcName should be the name of f; it's used -// in failure reports. -func runStringTests(t *testing.T, f func(string) string, funcName string, testCases []StringTest) { - for _, tc := range testCases { - actual := f(tc.in) - if actual != tc.out { - t.Errorf("%s(%q) = %q; want %q", funcName, tc.in, actual, tc.out) - } - } -} - -var upperTests = []StringTest{ - {"", ""}, - {"abc", "ABC"}, - {"AbC123", "ABC123"}, - {"azAZ09_", "AZAZ09_"}, - {"\u0250\u0250\u0250\u0250\u0250", "\u2C6F\u2C6F\u2C6F\u2C6F\u2C6F"}, // grows one byte per char -} - -var lowerTests = []StringTest{ - {"", ""}, - {"abc", "abc"}, - {"AbC123", "abc123"}, - {"azAZ09_", "azaz09_"}, - {"\u2C6D\u2C6D\u2C6D\u2C6D\u2C6D", "\u0251\u0251\u0251\u0251\u0251"}, // shrinks one byte per char -} - -const space = "\t\v\r\f\n\u0085\u00a0\u2000\u3000" - -var trimSpaceTests = []StringTest{ - {"", ""}, - {"abc", "abc"}, - {space + "abc" + space, "abc"}, - {" ", ""}, - {" \t\r\n \t\t\r\r\n\n ", ""}, - {" \t\r\n x\t\t\r\r\n\n ", "x"}, - {" \u2000\t\r\n x\t\t\r\r\ny\n \u3000", "x\t\t\r\r\ny"}, - {"1 \t\r\n2", "1 \t\r\n2"}, - {" x\x80", "x\x80"}, - {" x\xc0", "x\xc0"}, - {"x \xc0\xc0 ", "x \xc0\xc0"}, - {"x \xc0", "x \xc0"}, - {"x \xc0 ", "x \xc0"}, - {"x \xc0\xc0 ", "x \xc0\xc0"}, - {"x ☺\xc0\xc0 ", "x ☺\xc0\xc0"}, - {"x ☺ ", "x ☺"}, -} - -func tenRunes(ch rune) string { - r := make([]rune, 10) - for i := range r { - r[i] = ch - } - return string(r) -} - -// User-defined self-inverse mapping function -func rot13(r rune) rune { - step := rune(13) - if r >= 'a' && r <= 'z' { - return ((r - 'a' + step) % 26) + 'a' - } - if r >= 'A' && r <= 'Z' { - return ((r - 'A' + step) % 26) + 'A' - } - return r -} - -func TestMap(t *testing.T) { - // Run a couple of awful growth/shrinkage tests - a := tenRunes('a') - // 1. Grow. This triggers two reallocations in Map. - maxRune := func(rune) rune { return unicode.MaxRune } - m := Map(maxRune, a) - expect := tenRunes(unicode.MaxRune) - if m != expect { - t.Errorf("growing: expected %q got %q", expect, m) - } - - // 2. Shrink - minRune := func(rune) rune { return 'a' } - m = Map(minRune, tenRunes(unicode.MaxRune)) - expect = a - if m != expect { - t.Errorf("shrinking: expected %q got %q", expect, m) - } - - // 3. Rot13 - m = Map(rot13, "a to zed") - expect = "n gb mrq" - if m != expect { - t.Errorf("rot13: expected %q got %q", expect, m) - } - - // 4. Rot13^2 - m = Map(rot13, Map(rot13, "a to zed")) - expect = "a to zed" - if m != expect { - t.Errorf("rot13: expected %q got %q", expect, m) - } - - // 5. Drop - dropNotLatin := func(r rune) rune { - if unicode.Is(unicode.Latin, r) { - return r - } - return -1 - } - m = Map(dropNotLatin, "Hello, 세계") - expect = "Hello" - if m != expect { - t.Errorf("drop: expected %q got %q", expect, m) - } - - // 6. Identity - identity := func(r rune) rune { - return r - } - orig := "Input string that we expect not to be copied." - m = Map(identity, orig) - if (*reflect.StringHeader)(unsafe.Pointer(&orig)).Data != - (*reflect.StringHeader)(unsafe.Pointer(&m)).Data { - t.Error("unexpected copy during identity map") - } -} - -func TestToUpper(t *testing.T) { runStringTests(t, ToUpper, "ToUpper", upperTests) } - -func TestToLower(t *testing.T) { runStringTests(t, ToLower, "ToLower", lowerTests) } - -func BenchmarkMapNoChanges(b *testing.B) { - identity := func(r rune) rune { - return r - } - for i := 0; i < b.N; i++ { - Map(identity, "Some string that won't be modified.") - } -} - -func TestSpecialCase(t *testing.T) { - lower := "abcçdefgğhıijklmnoöprsştuüvyz" - upper := "ABCÇDEFGĞHIİJKLMNOÖPRSŞTUÜVYZ" - u := ToUpperSpecial(unicode.TurkishCase, upper) - if u != upper { - t.Errorf("Upper(upper) is %s not %s", u, upper) - } - u = ToUpperSpecial(unicode.TurkishCase, lower) - if u != upper { - t.Errorf("Upper(lower) is %s not %s", u, upper) - } - l := ToLowerSpecial(unicode.TurkishCase, lower) - if l != lower { - t.Errorf("Lower(lower) is %s not %s", l, lower) - } - l = ToLowerSpecial(unicode.TurkishCase, upper) - if l != lower { - t.Errorf("Lower(upper) is %s not %s", l, lower) - } -} - -func TestTrimSpace(t *testing.T) { runStringTests(t, TrimSpace, "TrimSpace", trimSpaceTests) } - -var trimTests = []struct { - f string - in, arg, out string -}{ - {"Trim", "abba", "a", "bb"}, - {"Trim", "abba", "ab", ""}, - {"TrimLeft", "abba", "ab", ""}, - {"TrimRight", "abba", "ab", ""}, - {"TrimLeft", "abba", "a", "bba"}, - {"TrimRight", "abba", "a", "abb"}, - {"Trim", "<tag>", "<>", "tag"}, - {"Trim", "* listitem", " *", "listitem"}, - {"Trim", `"quote"`, `"`, "quote"}, - {"Trim", "\u2C6F\u2C6F\u0250\u0250\u2C6F\u2C6F", "\u2C6F", "\u0250\u0250"}, - //empty string tests - {"Trim", "abba", "", "abba"}, - {"Trim", "", "123", ""}, - {"Trim", "", "", ""}, - {"TrimLeft", "abba", "", "abba"}, - {"TrimLeft", "", "123", ""}, - {"TrimLeft", "", "", ""}, - {"TrimRight", "abba", "", "abba"}, - {"TrimRight", "", "123", ""}, - {"TrimRight", "", "", ""}, - {"TrimRight", "☺\xc0", "☺", "☺\xc0"}, - {"TrimPrefix", "aabb", "a", "abb"}, - {"TrimPrefix", "aabb", "b", "aabb"}, - {"TrimSuffix", "aabb", "a", "aabb"}, - {"TrimSuffix", "aabb", "b", "aab"}, -} - -func TestTrim(t *testing.T) { - for _, tc := range trimTests { - name := tc.f - var f func(string, string) string - switch name { - case "Trim": - f = Trim - case "TrimLeft": - f = TrimLeft - case "TrimRight": - f = TrimRight - case "TrimPrefix": - f = TrimPrefix - case "TrimSuffix": - f = TrimSuffix - default: - t.Errorf("Undefined trim function %s", name) - } - actual := f(tc.in, tc.arg) - if actual != tc.out { - t.Errorf("%s(%q, %q) = %q; want %q", name, tc.in, tc.arg, actual, tc.out) - } - } -} - -type predicate struct { - f func(rune) bool - name string -} - -var isSpace = predicate{unicode.IsSpace, "IsSpace"} -var isDigit = predicate{unicode.IsDigit, "IsDigit"} -var isUpper = predicate{unicode.IsUpper, "IsUpper"} -var isValidRune = predicate{ - func(r rune) bool { - return r != utf8.RuneError - }, - "IsValidRune", -} - -func not(p predicate) predicate { - return predicate{ - func(r rune) bool { - return !p.f(r) - }, - "not " + p.name, - } -} - -var trimFuncTests = []struct { - f predicate - in, out string -}{ - {isSpace, space + " hello " + space, "hello"}, - {isDigit, "\u0e50\u0e5212hello34\u0e50\u0e51", "hello"}, - {isUpper, "\u2C6F\u2C6F\u2C6F\u2C6FABCDhelloEF\u2C6F\u2C6FGH\u2C6F\u2C6F", "hello"}, - {not(isSpace), "hello" + space + "hello", space}, - {not(isDigit), "hello\u0e50\u0e521234\u0e50\u0e51helo", "\u0e50\u0e521234\u0e50\u0e51"}, - {isValidRune, "ab\xc0a\xc0cd", "\xc0a\xc0"}, - {not(isValidRune), "\xc0a\xc0", "a"}, -} - -func TestTrimFunc(t *testing.T) { - for _, tc := range trimFuncTests { - actual := TrimFunc(tc.in, tc.f.f) - if actual != tc.out { - t.Errorf("TrimFunc(%q, %q) = %q; want %q", tc.in, tc.f.name, actual, tc.out) - } - } -} - -var indexFuncTests = []struct { - in string - f predicate - first, last int -}{ - {"", isValidRune, -1, -1}, - {"abc", isDigit, -1, -1}, - {"0123", isDigit, 0, 3}, - {"a1b", isDigit, 1, 1}, - {space, isSpace, 0, len(space) - 3}, // last rune in space is 3 bytes - {"\u0e50\u0e5212hello34\u0e50\u0e51", isDigit, 0, 18}, - {"\u2C6F\u2C6F\u2C6F\u2C6FABCDhelloEF\u2C6F\u2C6FGH\u2C6F\u2C6F", isUpper, 0, 34}, - {"12\u0e50\u0e52hello34\u0e50\u0e51", not(isDigit), 8, 12}, - - // tests of invalid UTF-8 - {"\x801", isDigit, 1, 1}, - {"\x80abc", isDigit, -1, -1}, - {"\xc0a\xc0", isValidRune, 1, 1}, - {"\xc0a\xc0", not(isValidRune), 0, 2}, - {"\xc0☺\xc0", not(isValidRune), 0, 4}, - {"\xc0☺\xc0\xc0", not(isValidRune), 0, 5}, - {"ab\xc0a\xc0cd", not(isValidRune), 2, 4}, - {"a\xe0\x80cd", not(isValidRune), 1, 2}, - {"\x80\x80\x80\x80", not(isValidRune), 0, 3}, -} - -func TestIndexFunc(t *testing.T) { - for _, tc := range indexFuncTests { - first := IndexFunc(tc.in, tc.f.f) - if first != tc.first { - t.Errorf("IndexFunc(%q, %s) = %d; want %d", tc.in, tc.f.name, first, tc.first) - } - last := LastIndexFunc(tc.in, tc.f.f) - if last != tc.last { - t.Errorf("LastIndexFunc(%q, %s) = %d; want %d", tc.in, tc.f.name, last, tc.last) - } - } -} - -func equal(m string, s1, s2 string, t *testing.T) bool { - if s1 == s2 { - return true - } - e1 := Split(s1, "") - e2 := Split(s2, "") - for i, c1 := range e1 { - if i >= len(e2) { - break - } - r1, _ := utf8.DecodeRuneInString(c1) - r2, _ := utf8.DecodeRuneInString(e2[i]) - if r1 != r2 { - t.Errorf("%s diff at %d: U+%04X U+%04X", m, i, r1, r2) - } - } - return false -} - -func TestCaseConsistency(t *testing.T) { - // Make a string of all the runes. - numRunes := int(unicode.MaxRune + 1) - if testing.Short() { - numRunes = 1000 - } - a := make([]rune, numRunes) - for i := range a { - a[i] = rune(i) - } - s := string(a) - // convert the cases. - upper := ToUpper(s) - lower := ToLower(s) - - // Consistency checks - if n := utf8.RuneCountInString(upper); n != numRunes { - t.Error("rune count wrong in upper:", n) - } - if n := utf8.RuneCountInString(lower); n != numRunes { - t.Error("rune count wrong in lower:", n) - } - if !equal("ToUpper(upper)", ToUpper(upper), upper, t) { - t.Error("ToUpper(upper) consistency fail") - } - if !equal("ToLower(lower)", ToLower(lower), lower, t) { - t.Error("ToLower(lower) consistency fail") - } - /* - These fail because of non-one-to-oneness of the data, such as multiple - upper case 'I' mapping to 'i'. We comment them out but keep them for - interest. - For instance: CAPITAL LETTER I WITH DOT ABOVE: - unicode.ToUpper(unicode.ToLower('\u0130')) != '\u0130' - - if !equal("ToUpper(lower)", ToUpper(lower), upper, t) { - t.Error("ToUpper(lower) consistency fail"); - } - if !equal("ToLower(upper)", ToLower(upper), lower, t) { - t.Error("ToLower(upper) consistency fail"); - } - */ -} - -var RepeatTests = []struct { - in, out string - count int -}{ - {"", "", 0}, - {"", "", 1}, - {"", "", 2}, - {"-", "", 0}, - {"-", "-", 1}, - {"-", "----------", 10}, - {"abc ", "abc abc abc ", 3}, -} - -func TestRepeat(t *testing.T) { - for _, tt := range RepeatTests { - a := Repeat(tt.in, tt.count) - if !equal("Repeat(s)", a, tt.out, t) { - t.Errorf("Repeat(%v, %d) = %v; want %v", tt.in, tt.count, a, tt.out) - continue - } - } -} - -func runesEqual(a, b []rune) bool { - if len(a) != len(b) { - return false - } - for i, r := range a { - if r != b[i] { - return false - } - } - return true -} - -var RunesTests = []struct { - in string - out []rune - lossy bool -}{ - {"", []rune{}, false}, - {" ", []rune{32}, false}, - {"ABC", []rune{65, 66, 67}, false}, - {"abc", []rune{97, 98, 99}, false}, - {"\u65e5\u672c\u8a9e", []rune{26085, 26412, 35486}, false}, - {"ab\x80c", []rune{97, 98, 0xFFFD, 99}, true}, - {"ab\xc0c", []rune{97, 98, 0xFFFD, 99}, true}, -} - -func TestRunes(t *testing.T) { - for _, tt := range RunesTests { - a := []rune(tt.in) - if !runesEqual(a, tt.out) { - t.Errorf("[]rune(%q) = %v; want %v", tt.in, a, tt.out) - continue - } - if !tt.lossy { - // can only test reassembly if we didn't lose information - s := string(a) - if s != tt.in { - t.Errorf("string([]rune(%q)) = %x; want %x", tt.in, s, tt.in) - } - } - } -} - -func TestReadByte(t *testing.T) { - testStrings := []string{"", abcd, faces, commas} - for _, s := range testStrings { - reader := NewReader(s) - if e := reader.UnreadByte(); e == nil { - t.Errorf("Unreading %q at beginning: expected error", s) - } - var res bytes.Buffer - for { - b, e := reader.ReadByte() - if e == io.EOF { - break - } - if e != nil { - t.Errorf("Reading %q: %s", s, e) - break - } - res.WriteByte(b) - // unread and read again - e = reader.UnreadByte() - if e != nil { - t.Errorf("Unreading %q: %s", s, e) - break - } - b1, e := reader.ReadByte() - if e != nil { - t.Errorf("Reading %q after unreading: %s", s, e) - break - } - if b1 != b { - t.Errorf("Reading %q after unreading: want byte %q, got %q", s, b, b1) - break - } - } - if res.String() != s { - t.Errorf("Reader(%q).ReadByte() produced %q", s, res.String()) - } - } -} - -func TestReadRune(t *testing.T) { - testStrings := []string{"", abcd, faces, commas} - for _, s := range testStrings { - reader := NewReader(s) - if e := reader.UnreadRune(); e == nil { - t.Errorf("Unreading %q at beginning: expected error", s) - } - res := "" - for { - r, z, e := reader.ReadRune() - if e == io.EOF { - break - } - if e != nil { - t.Errorf("Reading %q: %s", s, e) - break - } - res += string(r) - // unread and read again - e = reader.UnreadRune() - if e != nil { - t.Errorf("Unreading %q: %s", s, e) - break - } - r1, z1, e := reader.ReadRune() - if e != nil { - t.Errorf("Reading %q after unreading: %s", s, e) - break - } - if r1 != r { - t.Errorf("Reading %q after unreading: want rune %q, got %q", s, r, r1) - break - } - if z1 != z { - t.Errorf("Reading %q after unreading: want size %d, got %d", s, z, z1) - break - } - } - if res != s { - t.Errorf("Reader(%q).ReadRune() produced %q", s, res) - } - } -} - -var UnreadRuneErrorTests = []struct { - name string - f func(*Reader) -}{ - {"Read", func(r *Reader) { r.Read([]byte{0}) }}, - {"ReadByte", func(r *Reader) { r.ReadByte() }}, - {"UnreadRune", func(r *Reader) { r.UnreadRune() }}, - {"Seek", func(r *Reader) { r.Seek(0, 1) }}, - {"WriteTo", func(r *Reader) { r.WriteTo(&bytes.Buffer{}) }}, -} - -func TestUnreadRuneError(t *testing.T) { - for _, tt := range UnreadRuneErrorTests { - reader := NewReader("0123456789") - if _, _, err := reader.ReadRune(); err != nil { - // should not happen - t.Fatal(err) - } - tt.f(reader) - err := reader.UnreadRune() - if err == nil { - t.Errorf("Unreading after %s: expected error", tt.name) - } - } -} - -var ReplaceTests = []struct { - in string - old, new string - n int - out string -}{ - {"hello", "l", "L", 0, "hello"}, - {"hello", "l", "L", -1, "heLLo"}, - {"hello", "x", "X", -1, "hello"}, - {"", "x", "X", -1, ""}, - {"radar", "r", "<r>", -1, "<r>ada<r>"}, - {"", "", "<>", -1, "<>"}, - {"banana", "a", "<>", -1, "b<>n<>n<>"}, - {"banana", "a", "<>", 1, "b<>nana"}, - {"banana", "a", "<>", 1000, "b<>n<>n<>"}, - {"banana", "an", "<>", -1, "b<><>a"}, - {"banana", "ana", "<>", -1, "b<>na"}, - {"banana", "", "<>", -1, "<>b<>a<>n<>a<>n<>a<>"}, - {"banana", "", "<>", 10, "<>b<>a<>n<>a<>n<>a<>"}, - {"banana", "", "<>", 6, "<>b<>a<>n<>a<>n<>a"}, - {"banana", "", "<>", 5, "<>b<>a<>n<>a<>na"}, - {"banana", "", "<>", 1, "<>banana"}, - {"banana", "a", "a", -1, "banana"}, - {"banana", "a", "a", 1, "banana"}, - {"☺☻☹", "", "<>", -1, "<>☺<>☻<>☹<>"}, -} - -func TestReplace(t *testing.T) { - for _, tt := range ReplaceTests { - if s := Replace(tt.in, tt.old, tt.new, tt.n); s != tt.out { - t.Errorf("Replace(%q, %q, %q, %d) = %q, want %q", tt.in, tt.old, tt.new, tt.n, s, tt.out) - } - } -} - -var TitleTests = []struct { - in, out string -}{ - {"", ""}, - {"a", "A"}, - {" aaa aaa aaa ", " Aaa Aaa Aaa "}, - {" Aaa Aaa Aaa ", " Aaa Aaa Aaa "}, - {"123a456", "123a456"}, - {"double-blind", "Double-Blind"}, - {"ÿøû", "Ÿøû"}, - {"with_underscore", "With_underscore"}, - {"unicode \xe2\x80\xa8 line separator", "Unicode \xe2\x80\xa8 Line Separator"}, -} - -func TestTitle(t *testing.T) { - for _, tt := range TitleTests { - if s := Title(tt.in); s != tt.out { - t.Errorf("Title(%q) = %q, want %q", tt.in, s, tt.out) - } - } -} - -var ContainsTests = []struct { - str, substr string - expected bool -}{ - {"abc", "bc", true}, - {"abc", "bcd", false}, - {"abc", "", true}, - {"", "a", false}, -} - -func TestContains(t *testing.T) { - for _, ct := range ContainsTests { - if Contains(ct.str, ct.substr) != ct.expected { - t.Errorf("Contains(%s, %s) = %v, want %v", - ct.str, ct.substr, !ct.expected, ct.expected) - } - } -} - -var ContainsAnyTests = []struct { - str, substr string - expected bool -}{ - {"", "", false}, - {"", "a", false}, - {"", "abc", false}, - {"a", "", false}, - {"a", "a", true}, - {"aaa", "a", true}, - {"abc", "xyz", false}, - {"abc", "xcz", true}, - {"a☺b☻c☹d", "uvw☻xyz", true}, - {"aRegExp*", ".(|)*+?^$[]", true}, - {dots + dots + dots, " ", false}, -} - -func TestContainsAny(t *testing.T) { - for _, ct := range ContainsAnyTests { - if ContainsAny(ct.str, ct.substr) != ct.expected { - t.Errorf("ContainsAny(%s, %s) = %v, want %v", - ct.str, ct.substr, !ct.expected, ct.expected) - } - } -} - -var ContainsRuneTests = []struct { - str string - r rune - expected bool -}{ - {"", 'a', false}, - {"a", 'a', true}, - {"aaa", 'a', true}, - {"abc", 'y', false}, - {"abc", 'c', true}, - {"a☺b☻c☹d", 'x', false}, - {"a☺b☻c☹d", '☻', true}, - {"aRegExp*", '*', true}, -} - -func TestContainsRune(t *testing.T) { - for _, ct := range ContainsRuneTests { - if ContainsRune(ct.str, ct.r) != ct.expected { - t.Errorf("ContainsRune(%q, %q) = %v, want %v", - ct.str, ct.r, !ct.expected, ct.expected) - } - } -} - -var EqualFoldTests = []struct { - s, t string - out bool -}{ - {"abc", "abc", true}, - {"ABcd", "ABcd", true}, - {"123abc", "123ABC", true}, - {"αβδ", "ΑΒΔ", true}, - {"abc", "xyz", false}, - {"abc", "XYZ", false}, - {"abcdefghijk", "abcdefghijX", false}, - {"abcdefghijk", "abcdefghij\u212A", true}, - {"abcdefghijK", "abcdefghij\u212A", true}, - {"abcdefghijkz", "abcdefghij\u212Ay", false}, - {"abcdefghijKz", "abcdefghij\u212Ay", false}, -} - -func TestEqualFold(t *testing.T) { - for _, tt := range EqualFoldTests { - if out := EqualFold(tt.s, tt.t); out != tt.out { - t.Errorf("EqualFold(%#q, %#q) = %v, want %v", tt.s, tt.t, out, tt.out) - } - if out := EqualFold(tt.t, tt.s); out != tt.out { - t.Errorf("EqualFold(%#q, %#q) = %v, want %v", tt.t, tt.s, out, tt.out) - } - } -} - -var CountTests = []struct { - s, sep string - num int -}{ - {"", "", 1}, - {"", "notempty", 0}, - {"notempty", "", 9}, - {"smaller", "not smaller", 0}, - {"12345678987654321", "6", 2}, - {"611161116", "6", 3}, - {"notequal", "NotEqual", 0}, - {"equal", "equal", 1}, - {"abc1231231123q", "123", 3}, - {"11111", "11", 2}, -} - -func TestCount(t *testing.T) { - for _, tt := range CountTests { - if num := Count(tt.s, tt.sep); num != tt.num { - t.Errorf("Count(\"%s\", \"%s\") = %d, want %d", tt.s, tt.sep, num, tt.num) - } - } -} - -func makeBenchInputHard() string { - tokens := [...]string{ - "<a>", "<p>", "<b>", "<strong>", - "</a>", "</p>", "</b>", "</strong>", - "hello", "world", - } - x := make([]byte, 0, 1<<20) - for len(x) < 1<<20 { - i := rand.Intn(len(tokens)) - x = append(x, tokens[i]...) - } - return string(x) -} - -var benchInputHard = makeBenchInputHard() - -func benchmarkIndexHard(b *testing.B, sep string) { - for i := 0; i < b.N; i++ { - Index(benchInputHard, sep) - } -} - -func benchmarkCountHard(b *testing.B, sep string) { - for i := 0; i < b.N; i++ { - Count(benchInputHard, sep) - } -} - -func BenchmarkIndexHard1(b *testing.B) { benchmarkIndexHard(b, "<>") } -func BenchmarkIndexHard2(b *testing.B) { benchmarkIndexHard(b, "</pre>") } -func BenchmarkIndexHard3(b *testing.B) { benchmarkIndexHard(b, "<b>hello world</b>") } - -func BenchmarkCountHard1(b *testing.B) { benchmarkCountHard(b, "<>") } -func BenchmarkCountHard2(b *testing.B) { benchmarkCountHard(b, "</pre>") } -func BenchmarkCountHard3(b *testing.B) { benchmarkCountHard(b, "<b>hello world</b>") } - -var benchInputTorture = Repeat("ABC", 1<<10) + "123" + Repeat("ABC", 1<<10) -var benchNeedleTorture = Repeat("ABC", 1<<10+1) - -func BenchmarkIndexTorture(b *testing.B) { - for i := 0; i < b.N; i++ { - Index(benchInputTorture, benchNeedleTorture) - } -} - -func BenchmarkCountTorture(b *testing.B) { - for i := 0; i < b.N; i++ { - Count(benchInputTorture, benchNeedleTorture) - } -} - -func BenchmarkCountTortureOverlapping(b *testing.B) { - A := Repeat("ABC", 1<<20) - B := Repeat("ABC", 1<<10) - for i := 0; i < b.N; i++ { - Count(A, B) - } -} - -var makeFieldsInput = func() string { - x := make([]byte, 1<<20) - // Input is ~10% space, ~10% 2-byte UTF-8, rest ASCII non-space. - for i := range x { - switch rand.Intn(10) { - case 0: - x[i] = ' ' - case 1: - if i > 0 && x[i-1] == 'x' { - copy(x[i-1:], "χ") - break - } - fallthrough - default: - x[i] = 'x' - } - } - return string(x) -} - -var fieldsInput = makeFieldsInput() - -func BenchmarkFields(b *testing.B) { - b.SetBytes(int64(len(fieldsInput))) - for i := 0; i < b.N; i++ { - Fields(fieldsInput) - } -} - -func BenchmarkFieldsFunc(b *testing.B) { - b.SetBytes(int64(len(fieldsInput))) - for i := 0; i < b.N; i++ { - FieldsFunc(fieldsInput, unicode.IsSpace) - } -} - -func BenchmarkSplit1(b *testing.B) { - for i := 0; i < b.N; i++ { - Split(benchInputHard, "") - } -} - -func BenchmarkSplit2(b *testing.B) { - for i := 0; i < b.N; i++ { - Split(benchInputHard, "/") - } -} - -func BenchmarkSplit3(b *testing.B) { - for i := 0; i < b.N; i++ { - Split(benchInputHard, "hello") - } -} |