summaryrefslogtreecommitdiff
path: root/src/pkg/strings
diff options
context:
space:
mode:
Diffstat (limited to 'src/pkg/strings')
-rw-r--r--src/pkg/strings/example_test.go225
-rw-r--r--src/pkg/strings/export_test.go45
-rw-r--r--src/pkg/strings/reader.go144
-rw-r--r--src/pkg/strings/reader_test.go159
-rw-r--r--src/pkg/strings/replace.go549
-rw-r--r--src/pkg/strings/replace_test.go506
-rw-r--r--src/pkg/strings/search.go124
-rw-r--r--src/pkg/strings/search_test.go90
-rw-r--r--src/pkg/strings/strings.go725
-rw-r--r--src/pkg/strings/strings.s5
-rw-r--r--src/pkg/strings/strings_decl.go8
-rw-r--r--src/pkg/strings/strings_test.go1176
12 files changed, 0 insertions, 3756 deletions
diff --git a/src/pkg/strings/example_test.go b/src/pkg/strings/example_test.go
deleted file mode 100644
index 7243e16b1..000000000
--- a/src/pkg/strings/example_test.go
+++ /dev/null
@@ -1,225 +0,0 @@
-// Copyright 2012 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package strings_test
-
-import (
- "fmt"
- "strings"
- "unicode"
-)
-
-func ExampleFields() {
- fmt.Printf("Fields are: %q", strings.Fields(" foo bar baz "))
- // Output: Fields are: ["foo" "bar" "baz"]
-}
-
-func ExampleFieldsFunc() {
- f := func(c rune) bool {
- return !unicode.IsLetter(c) && !unicode.IsNumber(c)
- }
- fmt.Printf("Fields are: %q", strings.FieldsFunc(" foo1;bar2,baz3...", f))
- // Output: Fields are: ["foo1" "bar2" "baz3"]
-}
-
-func ExampleContains() {
- fmt.Println(strings.Contains("seafood", "foo"))
- fmt.Println(strings.Contains("seafood", "bar"))
- fmt.Println(strings.Contains("seafood", ""))
- fmt.Println(strings.Contains("", ""))
- // Output:
- // true
- // false
- // true
- // true
-}
-
-func ExampleContainsAny() {
- fmt.Println(strings.ContainsAny("team", "i"))
- fmt.Println(strings.ContainsAny("failure", "u & i"))
- fmt.Println(strings.ContainsAny("foo", ""))
- fmt.Println(strings.ContainsAny("", ""))
- // Output:
- // false
- // true
- // false
- // false
-}
-
-func ExampleCount() {
- fmt.Println(strings.Count("cheese", "e"))
- fmt.Println(strings.Count("five", "")) // before & after each rune
- // Output:
- // 3
- // 5
-}
-
-func ExampleEqualFold() {
- fmt.Println(strings.EqualFold("Go", "go"))
- // Output: true
-}
-
-func ExampleIndex() {
- fmt.Println(strings.Index("chicken", "ken"))
- fmt.Println(strings.Index("chicken", "dmr"))
- // Output:
- // 4
- // -1
-}
-
-func ExampleIndexFunc() {
- f := func(c rune) bool {
- return unicode.Is(unicode.Han, c)
- }
- fmt.Println(strings.IndexFunc("Hello, 世界", f))
- fmt.Println(strings.IndexFunc("Hello, world", f))
- // Output:
- // 7
- // -1
-}
-
-func ExampleIndexAny() {
- fmt.Println(strings.IndexAny("chicken", "aeiouy"))
- fmt.Println(strings.IndexAny("crwth", "aeiouy"))
- // Output:
- // 2
- // -1
-}
-
-func ExampleIndexRune() {
- fmt.Println(strings.IndexRune("chicken", 'k'))
- fmt.Println(strings.IndexRune("chicken", 'd'))
- // Output:
- // 4
- // -1
-}
-
-func ExampleLastIndex() {
- fmt.Println(strings.Index("go gopher", "go"))
- fmt.Println(strings.LastIndex("go gopher", "go"))
- fmt.Println(strings.LastIndex("go gopher", "rodent"))
- // Output:
- // 0
- // 3
- // -1
-}
-
-func ExampleJoin() {
- s := []string{"foo", "bar", "baz"}
- fmt.Println(strings.Join(s, ", "))
- // Output: foo, bar, baz
-}
-
-func ExampleRepeat() {
- fmt.Println("ba" + strings.Repeat("na", 2))
- // Output: banana
-}
-
-func ExampleReplace() {
- fmt.Println(strings.Replace("oink oink oink", "k", "ky", 2))
- fmt.Println(strings.Replace("oink oink oink", "oink", "moo", -1))
- // Output:
- // oinky oinky oink
- // moo moo moo
-}
-
-func ExampleSplit() {
- fmt.Printf("%q\n", strings.Split("a,b,c", ","))
- fmt.Printf("%q\n", strings.Split("a man a plan a canal panama", "a "))
- fmt.Printf("%q\n", strings.Split(" xyz ", ""))
- fmt.Printf("%q\n", strings.Split("", "Bernardo O'Higgins"))
- // Output:
- // ["a" "b" "c"]
- // ["" "man " "plan " "canal panama"]
- // [" " "x" "y" "z" " "]
- // [""]
-}
-
-func ExampleSplitN() {
- fmt.Printf("%q\n", strings.SplitN("a,b,c", ",", 2))
- z := strings.SplitN("a,b,c", ",", 0)
- fmt.Printf("%q (nil = %v)\n", z, z == nil)
- // Output:
- // ["a" "b,c"]
- // [] (nil = true)
-}
-
-func ExampleSplitAfter() {
- fmt.Printf("%q\n", strings.SplitAfter("a,b,c", ","))
- // Output: ["a," "b," "c"]
-}
-
-func ExampleSplitAfterN() {
- fmt.Printf("%q\n", strings.SplitAfterN("a,b,c", ",", 2))
- // Output: ["a," "b,c"]
-}
-
-func ExampleTitle() {
- fmt.Println(strings.Title("her royal highness"))
- // Output: Her Royal Highness
-}
-
-func ExampleToTitle() {
- fmt.Println(strings.ToTitle("loud noises"))
- fmt.Println(strings.ToTitle("хлеб"))
- // Output:
- // LOUD NOISES
- // ХЛЕБ
-}
-
-func ExampleTrim() {
- fmt.Printf("[%q]", strings.Trim(" !!! Achtung! Achtung! !!! ", "! "))
- // Output: ["Achtung! Achtung"]
-}
-
-func ExampleMap() {
- rot13 := func(r rune) rune {
- switch {
- case r >= 'A' && r <= 'Z':
- return 'A' + (r-'A'+13)%26
- case r >= 'a' && r <= 'z':
- return 'a' + (r-'a'+13)%26
- }
- return r
- }
- fmt.Println(strings.Map(rot13, "'Twas brillig and the slithy gopher..."))
- // Output: 'Gjnf oevyyvt naq gur fyvgul tbcure...
-}
-
-func ExampleTrimSpace() {
- fmt.Println(strings.TrimSpace(" \t\n a lone gopher \n\t\r\n"))
- // Output: a lone gopher
-}
-
-func ExampleNewReplacer() {
- r := strings.NewReplacer("<", "&lt;", ">", "&gt;")
- fmt.Println(r.Replace("This is <b>HTML</b>!"))
- // Output: This is &lt;b&gt;HTML&lt;/b&gt;!
-}
-
-func ExampleToUpper() {
- fmt.Println(strings.ToUpper("Gopher"))
- // Output: GOPHER
-}
-
-func ExampleToLower() {
- fmt.Println(strings.ToLower("Gopher"))
- // Output: gopher
-}
-
-func ExampleTrimSuffix() {
- var s = "Hello, goodbye, etc!"
- s = strings.TrimSuffix(s, "goodbye, etc!")
- s = strings.TrimSuffix(s, "planet")
- fmt.Print(s, "world!")
- // Output: Hello, world!
-}
-
-func ExampleTrimPrefix() {
- var s = "Goodbye,, world!"
- s = strings.TrimPrefix(s, "Goodbye,")
- s = strings.TrimPrefix(s, "Howdy,")
- fmt.Print("Hello" + s)
- // Output: Hello, world!
-}
diff --git a/src/pkg/strings/export_test.go b/src/pkg/strings/export_test.go
deleted file mode 100644
index 17c806aa5..000000000
--- a/src/pkg/strings/export_test.go
+++ /dev/null
@@ -1,45 +0,0 @@
-// Copyright 2011 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package strings
-
-func (r *Replacer) Replacer() interface{} {
- return r.r
-}
-
-func (r *Replacer) PrintTrie() string {
- gen := r.r.(*genericReplacer)
- return gen.printNode(&gen.root, 0)
-}
-
-func (r *genericReplacer) printNode(t *trieNode, depth int) (s string) {
- if t.priority > 0 {
- s += "+"
- } else {
- s += "-"
- }
- s += "\n"
-
- if t.prefix != "" {
- s += Repeat(".", depth) + t.prefix
- s += r.printNode(t.next, depth+len(t.prefix))
- } else if t.table != nil {
- for b, m := range r.mapping {
- if int(m) != r.tableSize && t.table[m] != nil {
- s += Repeat(".", depth) + string([]byte{byte(b)})
- s += r.printNode(t.table[m], depth+1)
- }
- }
- }
- return
-}
-
-func StringFind(pattern, text string) int {
- return makeStringFinder(pattern).next(text)
-}
-
-func DumpTables(pattern string) ([]int, []int) {
- finder := makeStringFinder(pattern)
- return finder.badCharSkip[:], finder.goodSuffixSkip
-}
diff --git a/src/pkg/strings/reader.go b/src/pkg/strings/reader.go
deleted file mode 100644
index 82df97439..000000000
--- a/src/pkg/strings/reader.go
+++ /dev/null
@@ -1,144 +0,0 @@
-// Copyright 2009 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package strings
-
-import (
- "errors"
- "io"
- "unicode/utf8"
-)
-
-// A Reader implements the io.Reader, io.ReaderAt, io.Seeker, io.WriterTo,
-// io.ByteScanner, and io.RuneScanner interfaces by reading
-// from a string.
-type Reader struct {
- s string
- i int64 // current reading index
- prevRune int // index of previous rune; or < 0
-}
-
-// Len returns the number of bytes of the unread portion of the
-// string.
-func (r *Reader) Len() int {
- if r.i >= int64(len(r.s)) {
- return 0
- }
- return int(int64(len(r.s)) - r.i)
-}
-
-func (r *Reader) Read(b []byte) (n int, err error) {
- if len(b) == 0 {
- return 0, nil
- }
- if r.i >= int64(len(r.s)) {
- return 0, io.EOF
- }
- r.prevRune = -1
- n = copy(b, r.s[r.i:])
- r.i += int64(n)
- return
-}
-
-func (r *Reader) ReadAt(b []byte, off int64) (n int, err error) {
- // cannot modify state - see io.ReaderAt
- if off < 0 {
- return 0, errors.New("strings.Reader.ReadAt: negative offset")
- }
- if off >= int64(len(r.s)) {
- return 0, io.EOF
- }
- n = copy(b, r.s[off:])
- if n < len(b) {
- err = io.EOF
- }
- return
-}
-
-func (r *Reader) ReadByte() (b byte, err error) {
- r.prevRune = -1
- if r.i >= int64(len(r.s)) {
- return 0, io.EOF
- }
- b = r.s[r.i]
- r.i++
- return
-}
-
-func (r *Reader) UnreadByte() error {
- r.prevRune = -1
- if r.i <= 0 {
- return errors.New("strings.Reader.UnreadByte: at beginning of string")
- }
- r.i--
- return nil
-}
-
-func (r *Reader) ReadRune() (ch rune, size int, err error) {
- if r.i >= int64(len(r.s)) {
- r.prevRune = -1
- return 0, 0, io.EOF
- }
- r.prevRune = int(r.i)
- if c := r.s[r.i]; c < utf8.RuneSelf {
- r.i++
- return rune(c), 1, nil
- }
- ch, size = utf8.DecodeRuneInString(r.s[r.i:])
- r.i += int64(size)
- return
-}
-
-func (r *Reader) UnreadRune() error {
- if r.prevRune < 0 {
- return errors.New("strings.Reader.UnreadRune: previous operation was not ReadRune")
- }
- r.i = int64(r.prevRune)
- r.prevRune = -1
- return nil
-}
-
-// Seek implements the io.Seeker interface.
-func (r *Reader) Seek(offset int64, whence int) (int64, error) {
- r.prevRune = -1
- var abs int64
- switch whence {
- case 0:
- abs = offset
- case 1:
- abs = int64(r.i) + offset
- case 2:
- abs = int64(len(r.s)) + offset
- default:
- return 0, errors.New("strings.Reader.Seek: invalid whence")
- }
- if abs < 0 {
- return 0, errors.New("strings.Reader.Seek: negative position")
- }
- r.i = abs
- return abs, nil
-}
-
-// WriteTo implements the io.WriterTo interface.
-func (r *Reader) WriteTo(w io.Writer) (n int64, err error) {
- r.prevRune = -1
- if r.i >= int64(len(r.s)) {
- return 0, nil
- }
- s := r.s[r.i:]
- m, err := io.WriteString(w, s)
- if m > len(s) {
- panic("strings.Reader.WriteTo: invalid WriteString count")
- }
- r.i += int64(m)
- n = int64(m)
- if m != len(s) && err == nil {
- err = io.ErrShortWrite
- }
- return
-}
-
-// NewReader returns a new Reader reading from s.
-// It is similar to bytes.NewBufferString but more efficient and read-only.
-func NewReader(s string) *Reader { return &Reader{s, 0, -1} }
diff --git a/src/pkg/strings/reader_test.go b/src/pkg/strings/reader_test.go
deleted file mode 100644
index bee90eb25..000000000
--- a/src/pkg/strings/reader_test.go
+++ /dev/null
@@ -1,159 +0,0 @@
-// Copyright 2012 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package strings_test
-
-import (
- "bytes"
- "fmt"
- "io"
- "os"
- "strings"
- "sync"
- "testing"
-)
-
-func TestReader(t *testing.T) {
- r := strings.NewReader("0123456789")
- tests := []struct {
- off int64
- seek int
- n int
- want string
- wantpos int64
- seekerr string
- }{
- {seek: os.SEEK_SET, off: 0, n: 20, want: "0123456789"},
- {seek: os.SEEK_SET, off: 1, n: 1, want: "1"},
- {seek: os.SEEK_CUR, off: 1, wantpos: 3, n: 2, want: "34"},
- {seek: os.SEEK_SET, off: -1, seekerr: "strings.Reader.Seek: negative position"},
- {seek: os.SEEK_SET, off: 1 << 33, wantpos: 1 << 33},
- {seek: os.SEEK_CUR, off: 1, wantpos: 1<<33 + 1},
- {seek: os.SEEK_SET, n: 5, want: "01234"},
- {seek: os.SEEK_CUR, n: 5, want: "56789"},
- {seek: os.SEEK_END, off: -1, n: 1, wantpos: 9, want: "9"},
- }
-
- for i, tt := range tests {
- pos, err := r.Seek(tt.off, tt.seek)
- if err == nil && tt.seekerr != "" {
- t.Errorf("%d. want seek error %q", i, tt.seekerr)
- continue
- }
- if err != nil && err.Error() != tt.seekerr {
- t.Errorf("%d. seek error = %q; want %q", i, err.Error(), tt.seekerr)
- continue
- }
- if tt.wantpos != 0 && tt.wantpos != pos {
- t.Errorf("%d. pos = %d, want %d", i, pos, tt.wantpos)
- }
- buf := make([]byte, tt.n)
- n, err := r.Read(buf)
- if err != nil {
- t.Errorf("%d. read = %v", i, err)
- continue
- }
- got := string(buf[:n])
- if got != tt.want {
- t.Errorf("%d. got %q; want %q", i, got, tt.want)
- }
- }
-}
-
-func TestReadAfterBigSeek(t *testing.T) {
- r := strings.NewReader("0123456789")
- if _, err := r.Seek(1<<31+5, os.SEEK_SET); err != nil {
- t.Fatal(err)
- }
- if n, err := r.Read(make([]byte, 10)); n != 0 || err != io.EOF {
- t.Errorf("Read = %d, %v; want 0, EOF", n, err)
- }
-}
-
-func TestReaderAt(t *testing.T) {
- r := strings.NewReader("0123456789")
- tests := []struct {
- off int64
- n int
- want string
- wanterr interface{}
- }{
- {0, 10, "0123456789", nil},
- {1, 10, "123456789", io.EOF},
- {1, 9, "123456789", nil},
- {11, 10, "", io.EOF},
- {0, 0, "", nil},
- {-1, 0, "", "strings.Reader.ReadAt: negative offset"},
- }
- for i, tt := range tests {
- b := make([]byte, tt.n)
- rn, err := r.ReadAt(b, tt.off)
- got := string(b[:rn])
- if got != tt.want {
- t.Errorf("%d. got %q; want %q", i, got, tt.want)
- }
- if fmt.Sprintf("%v", err) != fmt.Sprintf("%v", tt.wanterr) {
- t.Errorf("%d. got error = %v; want %v", i, err, tt.wanterr)
- }
- }
-}
-
-func TestReaderAtConcurrent(t *testing.T) {
- // Test for the race detector, to verify ReadAt doesn't mutate
- // any state.
- r := strings.NewReader("0123456789")
- var wg sync.WaitGroup
- for i := 0; i < 5; i++ {
- wg.Add(1)
- go func(i int) {
- defer wg.Done()
- var buf [1]byte
- r.ReadAt(buf[:], int64(i))
- }(i)
- }
- wg.Wait()
-}
-
-func TestEmptyReaderConcurrent(t *testing.T) {
- // Test for the race detector, to verify a Read that doesn't yield any bytes
- // is okay to use from multiple goroutines. This was our historic behavior.
- // See golang.org/issue/7856
- r := strings.NewReader("")
- var wg sync.WaitGroup
- for i := 0; i < 5; i++ {
- wg.Add(2)
- go func() {
- defer wg.Done()
- var buf [1]byte
- r.Read(buf[:])
- }()
- go func() {
- defer wg.Done()
- r.Read(nil)
- }()
- }
- wg.Wait()
-}
-
-func TestWriteTo(t *testing.T) {
- const str = "0123456789"
- for i := 0; i <= len(str); i++ {
- s := str[i:]
- r := strings.NewReader(s)
- var b bytes.Buffer
- n, err := r.WriteTo(&b)
- if expect := int64(len(s)); n != expect {
- t.Errorf("got %v; want %v", n, expect)
- }
- if err != nil {
- t.Errorf("for length %d: got error = %v; want nil", len(s), err)
- }
- if b.String() != s {
- t.Errorf("got string %q; want %q", b.String(), s)
- }
- if r.Len() != 0 {
- t.Errorf("reader contains %v bytes; want 0", r.Len())
- }
- }
-}
diff --git a/src/pkg/strings/replace.go b/src/pkg/strings/replace.go
deleted file mode 100644
index 3e05d2057..000000000
--- a/src/pkg/strings/replace.go
+++ /dev/null
@@ -1,549 +0,0 @@
-// Copyright 2011 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package strings
-
-import "io"
-
-// A Replacer replaces a list of strings with replacements.
-type Replacer struct {
- r replacer
-}
-
-// replacer is the interface that a replacement algorithm needs to implement.
-type replacer interface {
- Replace(s string) string
- WriteString(w io.Writer, s string) (n int, err error)
-}
-
-// byteBitmap represents bytes which are sought for replacement.
-// byteBitmap is 256 bits wide, with a bit set for each old byte to be
-// replaced.
-type byteBitmap [256 / 32]uint32
-
-func (m *byteBitmap) set(b byte) {
- m[b>>5] |= uint32(1 << (b & 31))
-}
-
-// NewReplacer returns a new Replacer from a list of old, new string pairs.
-// Replacements are performed in order, without overlapping matches.
-func NewReplacer(oldnew ...string) *Replacer {
- if len(oldnew)%2 == 1 {
- panic("strings.NewReplacer: odd argument count")
- }
-
- if len(oldnew) == 2 && len(oldnew[0]) > 1 {
- return &Replacer{r: makeSingleStringReplacer(oldnew[0], oldnew[1])}
- }
-
- allNewBytes := true
- for i := 0; i < len(oldnew); i += 2 {
- if len(oldnew[i]) != 1 {
- return &Replacer{r: makeGenericReplacer(oldnew)}
- }
- if len(oldnew[i+1]) != 1 {
- allNewBytes = false
- }
- }
-
- if allNewBytes {
- bb := &byteReplacer{}
- for i := 0; i < len(oldnew); i += 2 {
- o, n := oldnew[i][0], oldnew[i+1][0]
- if bb.old[o>>5]&uint32(1<<(o&31)) != 0 {
- // Later old->new maps do not override previous ones with the same old string.
- continue
- }
- bb.old.set(o)
- bb.new[o] = n
- }
- return &Replacer{r: bb}
- }
-
- bs := &byteStringReplacer{}
- for i := 0; i < len(oldnew); i += 2 {
- o, new := oldnew[i][0], oldnew[i+1]
- if bs.old[o>>5]&uint32(1<<(o&31)) != 0 {
- // Later old->new maps do not override previous ones with the same old string.
- continue
- }
- bs.old.set(o)
- bs.new[o] = []byte(new)
- }
- return &Replacer{r: bs}
-}
-
-// Replace returns a copy of s with all replacements performed.
-func (r *Replacer) Replace(s string) string {
- return r.r.Replace(s)
-}
-
-// WriteString writes s to w with all replacements performed.
-func (r *Replacer) WriteString(w io.Writer, s string) (n int, err error) {
- return r.r.WriteString(w, s)
-}
-
-// trieNode is a node in a lookup trie for prioritized key/value pairs. Keys
-// and values may be empty. For example, the trie containing keys "ax", "ay",
-// "bcbc", "x" and "xy" could have eight nodes:
-//
-// n0 -
-// n1 a-
-// n2 .x+
-// n3 .y+
-// n4 b-
-// n5 .cbc+
-// n6 x+
-// n7 .y+
-//
-// n0 is the root node, and its children are n1, n4 and n6; n1's children are
-// n2 and n3; n4's child is n5; n6's child is n7. Nodes n0, n1 and n4 (marked
-// with a trailing "-") are partial keys, and nodes n2, n3, n5, n6 and n7
-// (marked with a trailing "+") are complete keys.
-type trieNode struct {
- // value is the value of the trie node's key/value pair. It is empty if
- // this node is not a complete key.
- value string
- // priority is the priority (higher is more important) of the trie node's
- // key/value pair; keys are not necessarily matched shortest- or longest-
- // first. Priority is positive if this node is a complete key, and zero
- // otherwise. In the example above, positive/zero priorities are marked
- // with a trailing "+" or "-".
- priority int
-
- // A trie node may have zero, one or more child nodes:
- // * if the remaining fields are zero, there are no children.
- // * if prefix and next are non-zero, there is one child in next.
- // * if table is non-zero, it defines all the children.
- //
- // Prefixes are preferred over tables when there is one child, but the
- // root node always uses a table for lookup efficiency.
-
- // prefix is the difference in keys between this trie node and the next.
- // In the example above, node n4 has prefix "cbc" and n4's next node is n5.
- // Node n5 has no children and so has zero prefix, next and table fields.
- prefix string
- next *trieNode
-
- // table is a lookup table indexed by the next byte in the key, after
- // remapping that byte through genericReplacer.mapping to create a dense
- // index. In the example above, the keys only use 'a', 'b', 'c', 'x' and
- // 'y', which remap to 0, 1, 2, 3 and 4. All other bytes remap to 5, and
- // genericReplacer.tableSize will be 5. Node n0's table will be
- // []*trieNode{ 0:n1, 1:n4, 3:n6 }, where the 0, 1 and 3 are the remapped
- // 'a', 'b' and 'x'.
- table []*trieNode
-}
-
-func (t *trieNode) add(key, val string, priority int, r *genericReplacer) {
- if key == "" {
- if t.priority == 0 {
- t.value = val
- t.priority = priority
- }
- return
- }
-
- if t.prefix != "" {
- // Need to split the prefix among multiple nodes.
- var n int // length of the longest common prefix
- for ; n < len(t.prefix) && n < len(key); n++ {
- if t.prefix[n] != key[n] {
- break
- }
- }
- if n == len(t.prefix) {
- t.next.add(key[n:], val, priority, r)
- } else if n == 0 {
- // First byte differs, start a new lookup table here. Looking up
- // what is currently t.prefix[0] will lead to prefixNode, and
- // looking up key[0] will lead to keyNode.
- var prefixNode *trieNode
- if len(t.prefix) == 1 {
- prefixNode = t.next
- } else {
- prefixNode = &trieNode{
- prefix: t.prefix[1:],
- next: t.next,
- }
- }
- keyNode := new(trieNode)
- t.table = make([]*trieNode, r.tableSize)
- t.table[r.mapping[t.prefix[0]]] = prefixNode
- t.table[r.mapping[key[0]]] = keyNode
- t.prefix = ""
- t.next = nil
- keyNode.add(key[1:], val, priority, r)
- } else {
- // Insert new node after the common section of the prefix.
- next := &trieNode{
- prefix: t.prefix[n:],
- next: t.next,
- }
- t.prefix = t.prefix[:n]
- t.next = next
- next.add(key[n:], val, priority, r)
- }
- } else if t.table != nil {
- // Insert into existing table.
- m := r.mapping[key[0]]
- if t.table[m] == nil {
- t.table[m] = new(trieNode)
- }
- t.table[m].add(key[1:], val, priority, r)
- } else {
- t.prefix = key
- t.next = new(trieNode)
- t.next.add("", val, priority, r)
- }
-}
-
-func (r *genericReplacer) lookup(s string, ignoreRoot bool) (val string, keylen int, found bool) {
- // Iterate down the trie to the end, and grab the value and keylen with
- // the highest priority.
- bestPriority := 0
- node := &r.root
- n := 0
- for node != nil {
- if node.priority > bestPriority && !(ignoreRoot && node == &r.root) {
- bestPriority = node.priority
- val = node.value
- keylen = n
- found = true
- }
-
- if s == "" {
- break
- }
- if node.table != nil {
- index := r.mapping[s[0]]
- if int(index) == r.tableSize {
- break
- }
- node = node.table[index]
- s = s[1:]
- n++
- } else if node.prefix != "" && HasPrefix(s, node.prefix) {
- n += len(node.prefix)
- s = s[len(node.prefix):]
- node = node.next
- } else {
- break
- }
- }
- return
-}
-
-// genericReplacer is the fully generic algorithm.
-// It's used as a fallback when nothing faster can be used.
-type genericReplacer struct {
- root trieNode
- // tableSize is the size of a trie node's lookup table. It is the number
- // of unique key bytes.
- tableSize int
- // mapping maps from key bytes to a dense index for trieNode.table.
- mapping [256]byte
-}
-
-func makeGenericReplacer(oldnew []string) *genericReplacer {
- r := new(genericReplacer)
- // Find each byte used, then assign them each an index.
- for i := 0; i < len(oldnew); i += 2 {
- key := oldnew[i]
- for j := 0; j < len(key); j++ {
- r.mapping[key[j]] = 1
- }
- }
-
- for _, b := range r.mapping {
- r.tableSize += int(b)
- }
-
- var index byte
- for i, b := range r.mapping {
- if b == 0 {
- r.mapping[i] = byte(r.tableSize)
- } else {
- r.mapping[i] = index
- index++
- }
- }
- // Ensure root node uses a lookup table (for performance).
- r.root.table = make([]*trieNode, r.tableSize)
-
- for i := 0; i < len(oldnew); i += 2 {
- r.root.add(oldnew[i], oldnew[i+1], len(oldnew)-i, r)
- }
- return r
-}
-
-type appendSliceWriter []byte
-
-// Write writes to the buffer to satisfy io.Writer.
-func (w *appendSliceWriter) Write(p []byte) (int, error) {
- *w = append(*w, p...)
- return len(p), nil
-}
-
-// WriteString writes to the buffer without string->[]byte->string allocations.
-func (w *appendSliceWriter) WriteString(s string) (int, error) {
- *w = append(*w, s...)
- return len(s), nil
-}
-
-type stringWriterIface interface {
- WriteString(string) (int, error)
-}
-
-type stringWriter struct {
- w io.Writer
-}
-
-func (w stringWriter) WriteString(s string) (int, error) {
- return w.w.Write([]byte(s))
-}
-
-func getStringWriter(w io.Writer) stringWriterIface {
- sw, ok := w.(stringWriterIface)
- if !ok {
- sw = stringWriter{w}
- }
- return sw
-}
-
-func (r *genericReplacer) Replace(s string) string {
- buf := make(appendSliceWriter, 0, len(s))
- r.WriteString(&buf, s)
- return string(buf)
-}
-
-func (r *genericReplacer) WriteString(w io.Writer, s string) (n int, err error) {
- sw := getStringWriter(w)
- var last, wn int
- var prevMatchEmpty bool
- for i := 0; i <= len(s); {
- // Ignore the empty match iff the previous loop found the empty match.
- val, keylen, match := r.lookup(s[i:], prevMatchEmpty)
- prevMatchEmpty = match && keylen == 0
- if match {
- wn, err = sw.WriteString(s[last:i])
- n += wn
- if err != nil {
- return
- }
- wn, err = sw.WriteString(val)
- n += wn
- if err != nil {
- return
- }
- i += keylen
- last = i
- continue
- }
- i++
- }
- if last != len(s) {
- wn, err = sw.WriteString(s[last:])
- n += wn
- }
- return
-}
-
-// singleStringReplacer is the implementation that's used when there is only
-// one string to replace (and that string has more than one byte).
-type singleStringReplacer struct {
- finder *stringFinder
- // value is the new string that replaces that pattern when it's found.
- value string
-}
-
-func makeSingleStringReplacer(pattern string, value string) *singleStringReplacer {
- return &singleStringReplacer{finder: makeStringFinder(pattern), value: value}
-}
-
-func (r *singleStringReplacer) Replace(s string) string {
- var buf []byte
- i, matched := 0, false
- for {
- match := r.finder.next(s[i:])
- if match == -1 {
- break
- }
- matched = true
- buf = append(buf, s[i:i+match]...)
- buf = append(buf, r.value...)
- i += match + len(r.finder.pattern)
- }
- if !matched {
- return s
- }
- buf = append(buf, s[i:]...)
- return string(buf)
-}
-
-func (r *singleStringReplacer) WriteString(w io.Writer, s string) (n int, err error) {
- sw := getStringWriter(w)
- var i, wn int
- for {
- match := r.finder.next(s[i:])
- if match == -1 {
- break
- }
- wn, err = sw.WriteString(s[i : i+match])
- n += wn
- if err != nil {
- return
- }
- wn, err = sw.WriteString(r.value)
- n += wn
- if err != nil {
- return
- }
- i += match + len(r.finder.pattern)
- }
- wn, err = sw.WriteString(s[i:])
- n += wn
- return
-}
-
-// byteReplacer is the implementation that's used when all the "old"
-// and "new" values are single ASCII bytes.
-type byteReplacer struct {
- // old has a bit set for each old byte that should be replaced.
- old byteBitmap
-
- // replacement byte, indexed by old byte. only valid if
- // corresponding old bit is set.
- new [256]byte
-}
-
-func (r *byteReplacer) Replace(s string) string {
- var buf []byte // lazily allocated
- for i := 0; i < len(s); i++ {
- b := s[i]
- if r.old[b>>5]&uint32(1<<(b&31)) != 0 {
- if buf == nil {
- buf = []byte(s)
- }
- buf[i] = r.new[b]
- }
- }
- if buf == nil {
- return s
- }
- return string(buf)
-}
-
-func (r *byteReplacer) WriteString(w io.Writer, s string) (n int, err error) {
- // TODO(bradfitz): use io.WriteString with slices of s, avoiding allocation.
- bufsize := 32 << 10
- if len(s) < bufsize {
- bufsize = len(s)
- }
- buf := make([]byte, bufsize)
-
- for len(s) > 0 {
- ncopy := copy(buf, s[:])
- s = s[ncopy:]
- for i, b := range buf[:ncopy] {
- if r.old[b>>5]&uint32(1<<(b&31)) != 0 {
- buf[i] = r.new[b]
- }
- }
- wn, err := w.Write(buf[:ncopy])
- n += wn
- if err != nil {
- return n, err
- }
- }
- return n, nil
-}
-
-// byteStringReplacer is the implementation that's used when all the
-// "old" values are single ASCII bytes but the "new" values vary in
-// size.
-type byteStringReplacer struct {
- // old has a bit set for each old byte that should be replaced.
- old byteBitmap
-
- // replacement string, indexed by old byte. only valid if
- // corresponding old bit is set.
- new [256][]byte
-}
-
-func (r *byteStringReplacer) Replace(s string) string {
- newSize := 0
- anyChanges := false
- for i := 0; i < len(s); i++ {
- b := s[i]
- if r.old[b>>5]&uint32(1<<(b&31)) != 0 {
- anyChanges = true
- newSize += len(r.new[b])
- } else {
- newSize++
- }
- }
- if !anyChanges {
- return s
- }
- buf := make([]byte, newSize)
- bi := buf
- for i := 0; i < len(s); i++ {
- b := s[i]
- if r.old[b>>5]&uint32(1<<(b&31)) != 0 {
- n := copy(bi, r.new[b])
- bi = bi[n:]
- } else {
- bi[0] = b
- bi = bi[1:]
- }
- }
- return string(buf)
-}
-
-// WriteString maintains one buffer that's at most 32KB. The bytes in
-// s are enumerated and the buffer is filled. If it reaches its
-// capacity or a byte has a replacement, the buffer is flushed to w.
-func (r *byteStringReplacer) WriteString(w io.Writer, s string) (n int, err error) {
- // TODO(bradfitz): use io.WriteString with slices of s instead.
- bufsize := 32 << 10
- if len(s) < bufsize {
- bufsize = len(s)
- }
- buf := make([]byte, bufsize)
- bi := buf[:0]
-
- for i := 0; i < len(s); i++ {
- b := s[i]
- var new []byte
- if r.old[b>>5]&uint32(1<<(b&31)) != 0 {
- new = r.new[b]
- } else {
- bi = append(bi, b)
- }
- if len(bi) == cap(bi) || (len(bi) > 0 && len(new) > 0) {
- nw, err := w.Write(bi)
- n += nw
- if err != nil {
- return n, err
- }
- bi = buf[:0]
- }
- if len(new) > 0 {
- nw, err := w.Write(new)
- n += nw
- if err != nil {
- return n, err
- }
- }
- }
- if len(bi) > 0 {
- nw, err := w.Write(bi)
- n += nw
- if err != nil {
- return n, err
- }
- }
- return n, nil
-}
diff --git a/src/pkg/strings/replace_test.go b/src/pkg/strings/replace_test.go
deleted file mode 100644
index 82e4b6ef0..000000000
--- a/src/pkg/strings/replace_test.go
+++ /dev/null
@@ -1,506 +0,0 @@
-// Copyright 2009 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package strings_test
-
-import (
- "bytes"
- "fmt"
- . "strings"
- "testing"
-)
-
-var htmlEscaper = NewReplacer(
- "&", "&amp;",
- "<", "&lt;",
- ">", "&gt;",
- `"`, "&quot;",
- "'", "&apos;",
-)
-
-var htmlUnescaper = NewReplacer(
- "&amp;", "&",
- "&lt;", "<",
- "&gt;", ">",
- "&quot;", `"`,
- "&apos;", "'",
-)
-
-// The http package's old HTML escaping function.
-func oldHTMLEscape(s string) string {
- s = Replace(s, "&", "&amp;", -1)
- s = Replace(s, "<", "&lt;", -1)
- s = Replace(s, ">", "&gt;", -1)
- s = Replace(s, `"`, "&quot;", -1)
- s = Replace(s, "'", "&apos;", -1)
- return s
-}
-
-var capitalLetters = NewReplacer("a", "A", "b", "B")
-
-// TestReplacer tests the replacer implementations.
-func TestReplacer(t *testing.T) {
- type testCase struct {
- r *Replacer
- in, out string
- }
- var testCases []testCase
-
- // str converts 0xff to "\xff". This isn't just string(b) since that converts to UTF-8.
- str := func(b byte) string {
- return string([]byte{b})
- }
- var s []string
-
- // inc maps "\x00"->"\x01", ..., "a"->"b", "b"->"c", ..., "\xff"->"\x00".
- s = nil
- for i := 0; i < 256; i++ {
- s = append(s, str(byte(i)), str(byte(i+1)))
- }
- inc := NewReplacer(s...)
-
- // Test cases with 1-byte old strings, 1-byte new strings.
- testCases = append(testCases,
- testCase{capitalLetters, "brad", "BrAd"},
- testCase{capitalLetters, Repeat("a", (32<<10)+123), Repeat("A", (32<<10)+123)},
- testCase{capitalLetters, "", ""},
-
- testCase{inc, "brad", "csbe"},
- testCase{inc, "\x00\xff", "\x01\x00"},
- testCase{inc, "", ""},
-
- testCase{NewReplacer("a", "1", "a", "2"), "brad", "br1d"},
- )
-
- // repeat maps "a"->"a", "b"->"bb", "c"->"ccc", ...
- s = nil
- for i := 0; i < 256; i++ {
- n := i + 1 - 'a'
- if n < 1 {
- n = 1
- }
- s = append(s, str(byte(i)), Repeat(str(byte(i)), n))
- }
- repeat := NewReplacer(s...)
-
- // Test cases with 1-byte old strings, variable length new strings.
- testCases = append(testCases,
- testCase{htmlEscaper, "No changes", "No changes"},
- testCase{htmlEscaper, "I <3 escaping & stuff", "I &lt;3 escaping &amp; stuff"},
- testCase{htmlEscaper, "&&&", "&amp;&amp;&amp;"},
- testCase{htmlEscaper, "", ""},
-
- testCase{repeat, "brad", "bbrrrrrrrrrrrrrrrrrradddd"},
- testCase{repeat, "abba", "abbbba"},
- testCase{repeat, "", ""},
-
- testCase{NewReplacer("a", "11", "a", "22"), "brad", "br11d"},
- )
-
- // The remaining test cases have variable length old strings.
-
- testCases = append(testCases,
- testCase{htmlUnescaper, "&amp;amp;", "&amp;"},
- testCase{htmlUnescaper, "&lt;b&gt;HTML&apos;s neat&lt;/b&gt;", "<b>HTML's neat</b>"},
- testCase{htmlUnescaper, "", ""},
-
- testCase{NewReplacer("a", "1", "a", "2", "xxx", "xxx"), "brad", "br1d"},
-
- testCase{NewReplacer("a", "1", "aa", "2", "aaa", "3"), "aaaa", "1111"},
-
- testCase{NewReplacer("aaa", "3", "aa", "2", "a", "1"), "aaaa", "31"},
- )
-
- // gen1 has multiple old strings of variable length. There is no
- // overall non-empty common prefix, but some pairwise common prefixes.
- gen1 := NewReplacer(
- "aaa", "3[aaa]",
- "aa", "2[aa]",
- "a", "1[a]",
- "i", "i",
- "longerst", "most long",
- "longer", "medium",
- "long", "short",
- "xx", "xx",
- "x", "X",
- "X", "Y",
- "Y", "Z",
- )
- testCases = append(testCases,
- testCase{gen1, "fooaaabar", "foo3[aaa]b1[a]r"},
- testCase{gen1, "long, longerst, longer", "short, most long, medium"},
- testCase{gen1, "xxxxx", "xxxxX"},
- testCase{gen1, "XiX", "YiY"},
- testCase{gen1, "", ""},
- )
-
- // gen2 has multiple old strings with no pairwise common prefix.
- gen2 := NewReplacer(
- "roses", "red",
- "violets", "blue",
- "sugar", "sweet",
- )
- testCases = append(testCases,
- testCase{gen2, "roses are red, violets are blue...", "red are red, blue are blue..."},
- testCase{gen2, "", ""},
- )
-
- // gen3 has multiple old strings with an overall common prefix.
- gen3 := NewReplacer(
- "abracadabra", "poof",
- "abracadabrakazam", "splat",
- "abraham", "lincoln",
- "abrasion", "scrape",
- "abraham", "isaac",
- )
- testCases = append(testCases,
- testCase{gen3, "abracadabrakazam abraham", "poofkazam lincoln"},
- testCase{gen3, "abrasion abracad", "scrape abracad"},
- testCase{gen3, "abba abram abrasive", "abba abram abrasive"},
- testCase{gen3, "", ""},
- )
-
- // foo{1,2,3,4} have multiple old strings with an overall common prefix
- // and 1- or 2- byte extensions from the common prefix.
- foo1 := NewReplacer(
- "foo1", "A",
- "foo2", "B",
- "foo3", "C",
- )
- foo2 := NewReplacer(
- "foo1", "A",
- "foo2", "B",
- "foo31", "C",
- "foo32", "D",
- )
- foo3 := NewReplacer(
- "foo11", "A",
- "foo12", "B",
- "foo31", "C",
- "foo32", "D",
- )
- foo4 := NewReplacer(
- "foo12", "B",
- "foo32", "D",
- )
- testCases = append(testCases,
- testCase{foo1, "fofoofoo12foo32oo", "fofooA2C2oo"},
- testCase{foo1, "", ""},
-
- testCase{foo2, "fofoofoo12foo32oo", "fofooA2Doo"},
- testCase{foo2, "", ""},
-
- testCase{foo3, "fofoofoo12foo32oo", "fofooBDoo"},
- testCase{foo3, "", ""},
-
- testCase{foo4, "fofoofoo12foo32oo", "fofooBDoo"},
- testCase{foo4, "", ""},
- )
-
- // genAll maps "\x00\x01\x02...\xfe\xff" to "[all]", amongst other things.
- allBytes := make([]byte, 256)
- for i := range allBytes {
- allBytes[i] = byte(i)
- }
- allString := string(allBytes)
- genAll := NewReplacer(
- allString, "[all]",
- "\xff", "[ff]",
- "\x00", "[00]",
- )
- testCases = append(testCases,
- testCase{genAll, allString, "[all]"},
- testCase{genAll, "a\xff" + allString + "\x00", "a[ff][all][00]"},
- testCase{genAll, "", ""},
- )
-
- // Test cases with empty old strings.
-
- blankToX1 := NewReplacer("", "X")
- blankToX2 := NewReplacer("", "X", "", "")
- blankHighPriority := NewReplacer("", "X", "o", "O")
- blankLowPriority := NewReplacer("o", "O", "", "X")
- blankNoOp1 := NewReplacer("", "")
- blankNoOp2 := NewReplacer("", "", "", "A")
- blankFoo := NewReplacer("", "X", "foobar", "R", "foobaz", "Z")
- testCases = append(testCases,
- testCase{blankToX1, "foo", "XfXoXoX"},
- testCase{blankToX1, "", "X"},
-
- testCase{blankToX2, "foo", "XfXoXoX"},
- testCase{blankToX2, "", "X"},
-
- testCase{blankHighPriority, "oo", "XOXOX"},
- testCase{blankHighPriority, "ii", "XiXiX"},
- testCase{blankHighPriority, "oiio", "XOXiXiXOX"},
- testCase{blankHighPriority, "iooi", "XiXOXOXiX"},
- testCase{blankHighPriority, "", "X"},
-
- testCase{blankLowPriority, "oo", "OOX"},
- testCase{blankLowPriority, "ii", "XiXiX"},
- testCase{blankLowPriority, "oiio", "OXiXiOX"},
- testCase{blankLowPriority, "iooi", "XiOOXiX"},
- testCase{blankLowPriority, "", "X"},
-
- testCase{blankNoOp1, "foo", "foo"},
- testCase{blankNoOp1, "", ""},
-
- testCase{blankNoOp2, "foo", "foo"},
- testCase{blankNoOp2, "", ""},
-
- testCase{blankFoo, "foobarfoobaz", "XRXZX"},
- testCase{blankFoo, "foobar-foobaz", "XRX-XZX"},
- testCase{blankFoo, "", "X"},
- )
-
- // single string replacer
-
- abcMatcher := NewReplacer("abc", "[match]")
-
- testCases = append(testCases,
- testCase{abcMatcher, "", ""},
- testCase{abcMatcher, "ab", "ab"},
- testCase{abcMatcher, "abc", "[match]"},
- testCase{abcMatcher, "abcd", "[match]d"},
- testCase{abcMatcher, "cabcabcdabca", "c[match][match]d[match]a"},
- )
-
- // Issue 6659 cases (more single string replacer)
-
- noHello := NewReplacer("Hello", "")
- testCases = append(testCases,
- testCase{noHello, "Hello", ""},
- testCase{noHello, "Hellox", "x"},
- testCase{noHello, "xHello", "x"},
- testCase{noHello, "xHellox", "xx"},
- )
-
- // No-arg test cases.
-
- nop := NewReplacer()
- testCases = append(testCases,
- testCase{nop, "abc", "abc"},
- testCase{nop, "", ""},
- )
-
- // Run the test cases.
-
- for i, tc := range testCases {
- if s := tc.r.Replace(tc.in); s != tc.out {
- t.Errorf("%d. Replace(%q) = %q, want %q", i, tc.in, s, tc.out)
- }
- var buf bytes.Buffer
- n, err := tc.r.WriteString(&buf, tc.in)
- if err != nil {
- t.Errorf("%d. WriteString: %v", i, err)
- continue
- }
- got := buf.String()
- if got != tc.out {
- t.Errorf("%d. WriteString(%q) wrote %q, want %q", i, tc.in, got, tc.out)
- continue
- }
- if n != len(tc.out) {
- t.Errorf("%d. WriteString(%q) wrote correct string but reported %d bytes; want %d (%q)",
- i, tc.in, n, len(tc.out), tc.out)
- }
- }
-}
-
-// TestPickAlgorithm tests that NewReplacer picks the correct algorithm.
-func TestPickAlgorithm(t *testing.T) {
- testCases := []struct {
- r *Replacer
- want string
- }{
- {capitalLetters, "*strings.byteReplacer"},
- {htmlEscaper, "*strings.byteStringReplacer"},
- {NewReplacer("12", "123"), "*strings.singleStringReplacer"},
- {NewReplacer("1", "12"), "*strings.byteStringReplacer"},
- {NewReplacer("", "X"), "*strings.genericReplacer"},
- {NewReplacer("a", "1", "b", "12", "cde", "123"), "*strings.genericReplacer"},
- }
- for i, tc := range testCases {
- got := fmt.Sprintf("%T", tc.r.Replacer())
- if got != tc.want {
- t.Errorf("%d. algorithm = %s, want %s", i, got, tc.want)
- }
- }
-}
-
-// TestGenericTrieBuilding verifies the structure of the generated trie. There
-// is one node per line, and the key ending with the current line is in the
-// trie if it ends with a "+".
-func TestGenericTrieBuilding(t *testing.T) {
- testCases := []struct{ in, out string }{
- {"abc;abdef;abdefgh;xx;xy;z", `-
- a-
- .b-
- ..c+
- ..d-
- ...ef+
- .....gh+
- x-
- .x+
- .y+
- z+
- `},
- {"abracadabra;abracadabrakazam;abraham;abrasion", `-
- a-
- .bra-
- ....c-
- .....adabra+
- ...........kazam+
- ....h-
- .....am+
- ....s-
- .....ion+
- `},
- {"aaa;aa;a;i;longerst;longer;long;xx;x;X;Y", `-
- X+
- Y+
- a+
- .a+
- ..a+
- i+
- l-
- .ong+
- ....er+
- ......st+
- x+
- .x+
- `},
- {"foo;;foo;foo1", `+
- f-
- .oo+
- ...1+
- `},
- }
-
- for _, tc := range testCases {
- keys := Split(tc.in, ";")
- args := make([]string, len(keys)*2)
- for i, key := range keys {
- args[i*2] = key
- }
-
- got := NewReplacer(args...).PrintTrie()
- // Remove tabs from tc.out
- wantbuf := make([]byte, 0, len(tc.out))
- for i := 0; i < len(tc.out); i++ {
- if tc.out[i] != '\t' {
- wantbuf = append(wantbuf, tc.out[i])
- }
- }
- want := string(wantbuf)
-
- if got != want {
- t.Errorf("PrintTrie(%q)\ngot\n%swant\n%s", tc.in, got, want)
- }
- }
-}
-
-func BenchmarkGenericNoMatch(b *testing.B) {
- str := Repeat("A", 100) + Repeat("B", 100)
- generic := NewReplacer("a", "A", "b", "B", "12", "123") // varying lengths forces generic
- for i := 0; i < b.N; i++ {
- generic.Replace(str)
- }
-}
-
-func BenchmarkGenericMatch1(b *testing.B) {
- str := Repeat("a", 100) + Repeat("b", 100)
- generic := NewReplacer("a", "A", "b", "B", "12", "123")
- for i := 0; i < b.N; i++ {
- generic.Replace(str)
- }
-}
-
-func BenchmarkGenericMatch2(b *testing.B) {
- str := Repeat("It&apos;s &lt;b&gt;HTML&lt;/b&gt;!", 100)
- for i := 0; i < b.N; i++ {
- htmlUnescaper.Replace(str)
- }
-}
-
-func benchmarkSingleString(b *testing.B, pattern, text string) {
- r := NewReplacer(pattern, "[match]")
- b.SetBytes(int64(len(text)))
- b.ResetTimer()
- for i := 0; i < b.N; i++ {
- r.Replace(text)
- }
-}
-
-func BenchmarkSingleMaxSkipping(b *testing.B) {
- benchmarkSingleString(b, Repeat("b", 25), Repeat("a", 10000))
-}
-
-func BenchmarkSingleLongSuffixFail(b *testing.B) {
- benchmarkSingleString(b, "b"+Repeat("a", 500), Repeat("a", 1002))
-}
-
-func BenchmarkSingleMatch(b *testing.B) {
- benchmarkSingleString(b, "abcdef", Repeat("abcdefghijklmno", 1000))
-}
-
-func BenchmarkByteByteNoMatch(b *testing.B) {
- str := Repeat("A", 100) + Repeat("B", 100)
- for i := 0; i < b.N; i++ {
- capitalLetters.Replace(str)
- }
-}
-
-func BenchmarkByteByteMatch(b *testing.B) {
- str := Repeat("a", 100) + Repeat("b", 100)
- for i := 0; i < b.N; i++ {
- capitalLetters.Replace(str)
- }
-}
-
-func BenchmarkByteStringMatch(b *testing.B) {
- str := "<" + Repeat("a", 99) + Repeat("b", 99) + ">"
- for i := 0; i < b.N; i++ {
- htmlEscaper.Replace(str)
- }
-}
-
-func BenchmarkHTMLEscapeNew(b *testing.B) {
- str := "I <3 to escape HTML & other text too."
- for i := 0; i < b.N; i++ {
- htmlEscaper.Replace(str)
- }
-}
-
-func BenchmarkHTMLEscapeOld(b *testing.B) {
- str := "I <3 to escape HTML & other text too."
- for i := 0; i < b.N; i++ {
- oldHTMLEscape(str)
- }
-}
-
-// BenchmarkByteByteReplaces compares byteByteImpl against multiple Replaces.
-func BenchmarkByteByteReplaces(b *testing.B) {
- str := Repeat("a", 100) + Repeat("b", 100)
- for i := 0; i < b.N; i++ {
- Replace(Replace(str, "a", "A", -1), "b", "B", -1)
- }
-}
-
-// BenchmarkByteByteMap compares byteByteImpl against Map.
-func BenchmarkByteByteMap(b *testing.B) {
- str := Repeat("a", 100) + Repeat("b", 100)
- fn := func(r rune) rune {
- switch r {
- case 'a':
- return 'A'
- case 'b':
- return 'B'
- }
- return r
- }
- for i := 0; i < b.N; i++ {
- Map(fn, str)
- }
-}
diff --git a/src/pkg/strings/search.go b/src/pkg/strings/search.go
deleted file mode 100644
index f77c879c5..000000000
--- a/src/pkg/strings/search.go
+++ /dev/null
@@ -1,124 +0,0 @@
-// Copyright 2012 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package strings
-
-// stringFinder efficiently finds strings in a source text. It's implemented
-// using the Boyer-Moore string search algorithm:
-// http://en.wikipedia.org/wiki/Boyer-Moore_string_search_algorithm
-// http://www.cs.utexas.edu/~moore/publications/fstrpos.pdf (note: this aged
-// document uses 1-based indexing)
-type stringFinder struct {
- // pattern is the string that we are searching for in the text.
- pattern string
-
- // badCharSkip[b] contains the distance between the last byte of pattern
- // and the rightmost occurrence of b in pattern. If b is not in pattern,
- // badCharSkip[b] is len(pattern).
- //
- // Whenever a mismatch is found with byte b in the text, we can safely
- // shift the matching frame at least badCharSkip[b] until the next time
- // the matching char could be in alignment.
- badCharSkip [256]int
-
- // goodSuffixSkip[i] defines how far we can shift the matching frame given
- // that the suffix pattern[i+1:] matches, but the byte pattern[i] does
- // not. There are two cases to consider:
- //
- // 1. The matched suffix occurs elsewhere in pattern (with a different
- // byte preceding it that we might possibly match). In this case, we can
- // shift the matching frame to align with the next suffix chunk. For
- // example, the pattern "mississi" has the suffix "issi" next occurring
- // (in right-to-left order) at index 1, so goodSuffixSkip[3] ==
- // shift+len(suffix) == 3+4 == 7.
- //
- // 2. If the matched suffix does not occur elsewhere in pattern, then the
- // matching frame may share part of its prefix with the end of the
- // matching suffix. In this case, goodSuffixSkip[i] will contain how far
- // to shift the frame to align this portion of the prefix to the
- // suffix. For example, in the pattern "abcxxxabc", when the first
- // mismatch from the back is found to be in position 3, the matching
- // suffix "xxabc" is not found elsewhere in the pattern. However, its
- // rightmost "abc" (at position 6) is a prefix of the whole pattern, so
- // goodSuffixSkip[3] == shift+len(suffix) == 6+5 == 11.
- goodSuffixSkip []int
-}
-
-func makeStringFinder(pattern string) *stringFinder {
- f := &stringFinder{
- pattern: pattern,
- goodSuffixSkip: make([]int, len(pattern)),
- }
- // last is the index of the last character in the pattern.
- last := len(pattern) - 1
-
- // Build bad character table.
- // Bytes not in the pattern can skip one pattern's length.
- for i := range f.badCharSkip {
- f.badCharSkip[i] = len(pattern)
- }
- // The loop condition is < instead of <= so that the last byte does not
- // have a zero distance to itself. Finding this byte out of place implies
- // that it is not in the last position.
- for i := 0; i < last; i++ {
- f.badCharSkip[pattern[i]] = last - i
- }
-
- // Build good suffix table.
- // First pass: set each value to the next index which starts a prefix of
- // pattern.
- lastPrefix := last
- for i := last; i >= 0; i-- {
- if HasPrefix(pattern, pattern[i+1:]) {
- lastPrefix = i + 1
- }
- // lastPrefix is the shift, and (last-i) is len(suffix).
- f.goodSuffixSkip[i] = lastPrefix + last - i
- }
- // Second pass: find repeats of pattern's suffix starting from the front.
- for i := 0; i < last; i++ {
- lenSuffix := longestCommonSuffix(pattern, pattern[1:i+1])
- if pattern[i-lenSuffix] != pattern[last-lenSuffix] {
- // (last-i) is the shift, and lenSuffix is len(suffix).
- f.goodSuffixSkip[last-lenSuffix] = lenSuffix + last - i
- }
- }
-
- return f
-}
-
-func longestCommonSuffix(a, b string) (i int) {
- for ; i < len(a) && i < len(b); i++ {
- if a[len(a)-1-i] != b[len(b)-1-i] {
- break
- }
- }
- return
-}
-
-// next returns the index in text of the first occurrence of the pattern. If
-// the pattern is not found, it returns -1.
-func (f *stringFinder) next(text string) int {
- i := len(f.pattern) - 1
- for i < len(text) {
- // Compare backwards from the end until the first unmatching character.
- j := len(f.pattern) - 1
- for j >= 0 && text[i] == f.pattern[j] {
- i--
- j--
- }
- if j < 0 {
- return i + 1 // match
- }
- i += max(f.badCharSkip[text[i]], f.goodSuffixSkip[j])
- }
- return -1
-}
-
-func max(a, b int) int {
- if a > b {
- return a
- }
- return b
-}
diff --git a/src/pkg/strings/search_test.go b/src/pkg/strings/search_test.go
deleted file mode 100644
index 966c05e65..000000000
--- a/src/pkg/strings/search_test.go
+++ /dev/null
@@ -1,90 +0,0 @@
-// Copyright 2012 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package strings_test
-
-import (
- "reflect"
- . "strings"
- "testing"
-)
-
-func TestFinderNext(t *testing.T) {
- testCases := []struct {
- pat, text string
- index int
- }{
- {"", "", 0},
- {"", "abc", 0},
- {"abc", "", -1},
- {"abc", "abc", 0},
- {"d", "abcdefg", 3},
- {"nan", "banana", 2},
- {"pan", "anpanman", 2},
- {"nnaaman", "anpanmanam", -1},
- {"abcd", "abc", -1},
- {"abcd", "bcd", -1},
- {"bcd", "abcd", 1},
- {"abc", "acca", -1},
- {"aa", "aaa", 0},
- {"baa", "aaaaa", -1},
- {"at that", "which finally halts. at that point", 22},
- }
-
- for _, tc := range testCases {
- got := StringFind(tc.pat, tc.text)
- want := tc.index
- if got != want {
- t.Errorf("stringFind(%q, %q) got %d, want %d\n", tc.pat, tc.text, got, want)
- }
- }
-}
-
-func TestFinderCreation(t *testing.T) {
- testCases := []struct {
- pattern string
- bad [256]int
- suf []int
- }{
- {
- "abc",
- [256]int{'a': 2, 'b': 1, 'c': 3},
- []int{5, 4, 1},
- },
- {
- "mississi",
- [256]int{'i': 3, 'm': 7, 's': 1},
- []int{15, 14, 13, 7, 11, 10, 7, 1},
- },
- // From http://www.cs.utexas.edu/~moore/publications/fstrpos.pdf
- {
- "abcxxxabc",
- [256]int{'a': 2, 'b': 1, 'c': 6, 'x': 3},
- []int{14, 13, 12, 11, 10, 9, 11, 10, 1},
- },
- {
- "abyxcdeyx",
- [256]int{'a': 8, 'b': 7, 'c': 4, 'd': 3, 'e': 2, 'y': 1, 'x': 5},
- []int{17, 16, 15, 14, 13, 12, 7, 10, 1},
- },
- }
-
- for _, tc := range testCases {
- bad, good := DumpTables(tc.pattern)
-
- for i, got := range bad {
- want := tc.bad[i]
- if want == 0 {
- want = len(tc.pattern)
- }
- if got != want {
- t.Errorf("boyerMoore(%q) bad['%c']: got %d want %d", tc.pattern, i, got, want)
- }
- }
-
- if !reflect.DeepEqual(good, tc.suf) {
- t.Errorf("boyerMoore(%q) got %v want %v", tc.pattern, good, tc.suf)
- }
- }
-}
diff --git a/src/pkg/strings/strings.go b/src/pkg/strings/strings.go
deleted file mode 100644
index 5d46211d8..000000000
--- a/src/pkg/strings/strings.go
+++ /dev/null
@@ -1,725 +0,0 @@
-// Copyright 2009 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// Package strings implements simple functions to manipulate strings.
-package strings
-
-import (
- "unicode"
- "unicode/utf8"
-)
-
-// explode splits s into an array of UTF-8 sequences, one per Unicode character (still strings) up to a maximum of n (n < 0 means no limit).
-// Invalid UTF-8 sequences become correct encodings of U+FFF8.
-func explode(s string, n int) []string {
- if n == 0 {
- return nil
- }
- l := utf8.RuneCountInString(s)
- if n <= 0 || n > l {
- n = l
- }
- a := make([]string, n)
- var size int
- var ch rune
- i, cur := 0, 0
- for ; i+1 < n; i++ {
- ch, size = utf8.DecodeRuneInString(s[cur:])
- if ch == utf8.RuneError {
- a[i] = string(utf8.RuneError)
- } else {
- a[i] = s[cur : cur+size]
- }
- cur += size
- }
- // add the rest, if there is any
- if cur < len(s) {
- a[i] = s[cur:]
- }
- return a
-}
-
-// primeRK is the prime base used in Rabin-Karp algorithm.
-const primeRK = 16777619
-
-// hashstr returns the hash and the appropriate multiplicative
-// factor for use in Rabin-Karp algorithm.
-func hashstr(sep string) (uint32, uint32) {
- hash := uint32(0)
- for i := 0; i < len(sep); i++ {
- hash = hash*primeRK + uint32(sep[i])
-
- }
- var pow, sq uint32 = 1, primeRK
- for i := len(sep); i > 0; i >>= 1 {
- if i&1 != 0 {
- pow *= sq
- }
- sq *= sq
- }
- return hash, pow
-}
-
-// Count counts the number of non-overlapping instances of sep in s.
-func Count(s, sep string) int {
- n := 0
- // special cases
- switch {
- case len(sep) == 0:
- return utf8.RuneCountInString(s) + 1
- case len(sep) == 1:
- // special case worth making fast
- c := sep[0]
- for i := 0; i < len(s); i++ {
- if s[i] == c {
- n++
- }
- }
- return n
- case len(sep) > len(s):
- return 0
- case len(sep) == len(s):
- if sep == s {
- return 1
- }
- return 0
- }
- hashsep, pow := hashstr(sep)
- h := uint32(0)
- for i := 0; i < len(sep); i++ {
- h = h*primeRK + uint32(s[i])
- }
- lastmatch := 0
- if h == hashsep && s[:len(sep)] == sep {
- n++
- lastmatch = len(sep)
- }
- for i := len(sep); i < len(s); {
- h *= primeRK
- h += uint32(s[i])
- h -= pow * uint32(s[i-len(sep)])
- i++
- if h == hashsep && lastmatch <= i-len(sep) && s[i-len(sep):i] == sep {
- n++
- lastmatch = i
- }
- }
- return n
-}
-
-// Contains returns true if substr is within s.
-func Contains(s, substr string) bool {
- return Index(s, substr) >= 0
-}
-
-// ContainsAny returns true if any Unicode code points in chars are within s.
-func ContainsAny(s, chars string) bool {
- return IndexAny(s, chars) >= 0
-}
-
-// ContainsRune returns true if the Unicode code point r is within s.
-func ContainsRune(s string, r rune) bool {
- return IndexRune(s, r) >= 0
-}
-
-// Index returns the index of the first instance of sep in s, or -1 if sep is not present in s.
-func Index(s, sep string) int {
- n := len(sep)
- switch {
- case n == 0:
- return 0
- case n == 1:
- return IndexByte(s, sep[0])
- case n == len(s):
- if sep == s {
- return 0
- }
- return -1
- case n > len(s):
- return -1
- }
- // Hash sep.
- hashsep, pow := hashstr(sep)
- var h uint32
- for i := 0; i < n; i++ {
- h = h*primeRK + uint32(s[i])
- }
- if h == hashsep && s[:n] == sep {
- return 0
- }
- for i := n; i < len(s); {
- h *= primeRK
- h += uint32(s[i])
- h -= pow * uint32(s[i-n])
- i++
- if h == hashsep && s[i-n:i] == sep {
- return i - n
- }
- }
- return -1
-}
-
-// LastIndex returns the index of the last instance of sep in s, or -1 if sep is not present in s.
-func LastIndex(s, sep string) int {
- n := len(sep)
- if n == 0 {
- return len(s)
- }
- c := sep[0]
- if n == 1 {
- // special case worth making fast
- for i := len(s) - 1; i >= 0; i-- {
- if s[i] == c {
- return i
- }
- }
- return -1
- }
- // n > 1
- for i := len(s) - n; i >= 0; i-- {
- if s[i] == c && s[i:i+n] == sep {
- return i
- }
- }
- return -1
-}
-
-// IndexRune returns the index of the first instance of the Unicode code point
-// r, or -1 if rune is not present in s.
-func IndexRune(s string, r rune) int {
- switch {
- case r < 0x80:
- b := byte(r)
- for i := 0; i < len(s); i++ {
- if s[i] == b {
- return i
- }
- }
- default:
- for i, c := range s {
- if c == r {
- return i
- }
- }
- }
- return -1
-}
-
-// IndexAny returns the index of the first instance of any Unicode code point
-// from chars in s, or -1 if no Unicode code point from chars is present in s.
-func IndexAny(s, chars string) int {
- if len(chars) > 0 {
- for i, c := range s {
- for _, m := range chars {
- if c == m {
- return i
- }
- }
- }
- }
- return -1
-}
-
-// LastIndexAny returns the index of the last instance of any Unicode code
-// point from chars in s, or -1 if no Unicode code point from chars is
-// present in s.
-func LastIndexAny(s, chars string) int {
- if len(chars) > 0 {
- for i := len(s); i > 0; {
- rune, size := utf8.DecodeLastRuneInString(s[0:i])
- i -= size
- for _, m := range chars {
- if rune == m {
- return i
- }
- }
- }
- }
- return -1
-}
-
-// Generic split: splits after each instance of sep,
-// including sepSave bytes of sep in the subarrays.
-func genSplit(s, sep string, sepSave, n int) []string {
- if n == 0 {
- return nil
- }
- if sep == "" {
- return explode(s, n)
- }
- if n < 0 {
- n = Count(s, sep) + 1
- }
- c := sep[0]
- start := 0
- a := make([]string, n)
- na := 0
- for i := 0; i+len(sep) <= len(s) && na+1 < n; i++ {
- if s[i] == c && (len(sep) == 1 || s[i:i+len(sep)] == sep) {
- a[na] = s[start : i+sepSave]
- na++
- start = i + len(sep)
- i += len(sep) - 1
- }
- }
- a[na] = s[start:]
- return a[0 : na+1]
-}
-
-// SplitN slices s into substrings separated by sep and returns a slice of
-// the substrings between those separators.
-// If sep is empty, SplitN splits after each UTF-8 sequence.
-// The count determines the number of substrings to return:
-// n > 0: at most n substrings; the last substring will be the unsplit remainder.
-// n == 0: the result is nil (zero substrings)
-// n < 0: all substrings
-func SplitN(s, sep string, n int) []string { return genSplit(s, sep, 0, n) }
-
-// SplitAfterN slices s into substrings after each instance of sep and
-// returns a slice of those substrings.
-// If sep is empty, SplitAfterN splits after each UTF-8 sequence.
-// The count determines the number of substrings to return:
-// n > 0: at most n substrings; the last substring will be the unsplit remainder.
-// n == 0: the result is nil (zero substrings)
-// n < 0: all substrings
-func SplitAfterN(s, sep string, n int) []string {
- return genSplit(s, sep, len(sep), n)
-}
-
-// Split slices s into all substrings separated by sep and returns a slice of
-// the substrings between those separators.
-// If sep is empty, Split splits after each UTF-8 sequence.
-// It is equivalent to SplitN with a count of -1.
-func Split(s, sep string) []string { return genSplit(s, sep, 0, -1) }
-
-// SplitAfter slices s into all substrings after each instance of sep and
-// returns a slice of those substrings.
-// If sep is empty, SplitAfter splits after each UTF-8 sequence.
-// It is equivalent to SplitAfterN with a count of -1.
-func SplitAfter(s, sep string) []string {
- return genSplit(s, sep, len(sep), -1)
-}
-
-// Fields splits the string s around each instance of one or more consecutive white space
-// characters, as defined by unicode.IsSpace, returning an array of substrings of s or an
-// empty list if s contains only white space.
-func Fields(s string) []string {
- return FieldsFunc(s, unicode.IsSpace)
-}
-
-// FieldsFunc splits the string s at each run of Unicode code points c satisfying f(c)
-// and returns an array of slices of s. If all code points in s satisfy f(c) or the
-// string is empty, an empty slice is returned.
-func FieldsFunc(s string, f func(rune) bool) []string {
- // First count the fields.
- n := 0
- inField := false
- for _, rune := range s {
- wasInField := inField
- inField = !f(rune)
- if inField && !wasInField {
- n++
- }
- }
-
- // Now create them.
- a := make([]string, n)
- na := 0
- fieldStart := -1 // Set to -1 when looking for start of field.
- for i, rune := range s {
- if f(rune) {
- if fieldStart >= 0 {
- a[na] = s[fieldStart:i]
- na++
- fieldStart = -1
- }
- } else if fieldStart == -1 {
- fieldStart = i
- }
- }
- if fieldStart >= 0 { // Last field might end at EOF.
- a[na] = s[fieldStart:]
- }
- return a
-}
-
-// Join concatenates the elements of a to create a single string. The separator string
-// sep is placed between elements in the resulting string.
-func Join(a []string, sep string) string {
- if len(a) == 0 {
- return ""
- }
- if len(a) == 1 {
- return a[0]
- }
- n := len(sep) * (len(a) - 1)
- for i := 0; i < len(a); i++ {
- n += len(a[i])
- }
-
- b := make([]byte, n)
- bp := copy(b, a[0])
- for _, s := range a[1:] {
- bp += copy(b[bp:], sep)
- bp += copy(b[bp:], s)
- }
- return string(b)
-}
-
-// HasPrefix tests whether the string s begins with prefix.
-func HasPrefix(s, prefix string) bool {
- return len(s) >= len(prefix) && s[0:len(prefix)] == prefix
-}
-
-// HasSuffix tests whether the string s ends with suffix.
-func HasSuffix(s, suffix string) bool {
- return len(s) >= len(suffix) && s[len(s)-len(suffix):] == suffix
-}
-
-// Map returns a copy of the string s with all its characters modified
-// according to the mapping function. If mapping returns a negative value, the character is
-// dropped from the string with no replacement.
-func Map(mapping func(rune) rune, s string) string {
- // In the worst case, the string can grow when mapped, making
- // things unpleasant. But it's so rare we barge in assuming it's
- // fine. It could also shrink but that falls out naturally.
- maxbytes := len(s) // length of b
- nbytes := 0 // number of bytes encoded in b
- // The output buffer b is initialized on demand, the first
- // time a character differs.
- var b []byte
-
- for i, c := range s {
- r := mapping(c)
- if b == nil {
- if r == c {
- continue
- }
- b = make([]byte, maxbytes)
- nbytes = copy(b, s[:i])
- }
- if r >= 0 {
- wid := 1
- if r >= utf8.RuneSelf {
- wid = utf8.RuneLen(r)
- }
- if nbytes+wid > maxbytes {
- // Grow the buffer.
- maxbytes = maxbytes*2 + utf8.UTFMax
- nb := make([]byte, maxbytes)
- copy(nb, b[0:nbytes])
- b = nb
- }
- nbytes += utf8.EncodeRune(b[nbytes:maxbytes], r)
- }
- }
- if b == nil {
- return s
- }
- return string(b[0:nbytes])
-}
-
-// Repeat returns a new string consisting of count copies of the string s.
-func Repeat(s string, count int) string {
- b := make([]byte, len(s)*count)
- bp := 0
- for i := 0; i < count; i++ {
- bp += copy(b[bp:], s)
- }
- return string(b)
-}
-
-// ToUpper returns a copy of the string s with all Unicode letters mapped to their upper case.
-func ToUpper(s string) string { return Map(unicode.ToUpper, s) }
-
-// ToLower returns a copy of the string s with all Unicode letters mapped to their lower case.
-func ToLower(s string) string { return Map(unicode.ToLower, s) }
-
-// ToTitle returns a copy of the string s with all Unicode letters mapped to their title case.
-func ToTitle(s string) string { return Map(unicode.ToTitle, s) }
-
-// ToUpperSpecial returns a copy of the string s with all Unicode letters mapped to their
-// upper case, giving priority to the special casing rules.
-func ToUpperSpecial(_case unicode.SpecialCase, s string) string {
- return Map(func(r rune) rune { return _case.ToUpper(r) }, s)
-}
-
-// ToLowerSpecial returns a copy of the string s with all Unicode letters mapped to their
-// lower case, giving priority to the special casing rules.
-func ToLowerSpecial(_case unicode.SpecialCase, s string) string {
- return Map(func(r rune) rune { return _case.ToLower(r) }, s)
-}
-
-// ToTitleSpecial returns a copy of the string s with all Unicode letters mapped to their
-// title case, giving priority to the special casing rules.
-func ToTitleSpecial(_case unicode.SpecialCase, s string) string {
- return Map(func(r rune) rune { return _case.ToTitle(r) }, s)
-}
-
-// isSeparator reports whether the rune could mark a word boundary.
-// TODO: update when package unicode captures more of the properties.
-func isSeparator(r rune) bool {
- // ASCII alphanumerics and underscore are not separators
- if r <= 0x7F {
- switch {
- case '0' <= r && r <= '9':
- return false
- case 'a' <= r && r <= 'z':
- return false
- case 'A' <= r && r <= 'Z':
- return false
- case r == '_':
- return false
- }
- return true
- }
- // Letters and digits are not separators
- if unicode.IsLetter(r) || unicode.IsDigit(r) {
- return false
- }
- // Otherwise, all we can do for now is treat spaces as separators.
- return unicode.IsSpace(r)
-}
-
-// Title returns a copy of the string s with all Unicode letters that begin words
-// mapped to their title case.
-//
-// BUG: The rule Title uses for word boundaries does not handle Unicode punctuation properly.
-func Title(s string) string {
- // Use a closure here to remember state.
- // Hackish but effective. Depends on Map scanning in order and calling
- // the closure once per rune.
- prev := ' '
- return Map(
- func(r rune) rune {
- if isSeparator(prev) {
- prev = r
- return unicode.ToTitle(r)
- }
- prev = r
- return r
- },
- s)
-}
-
-// TrimLeftFunc returns a slice of the string s with all leading
-// Unicode code points c satisfying f(c) removed.
-func TrimLeftFunc(s string, f func(rune) bool) string {
- i := indexFunc(s, f, false)
- if i == -1 {
- return ""
- }
- return s[i:]
-}
-
-// TrimRightFunc returns a slice of the string s with all trailing
-// Unicode code points c satisfying f(c) removed.
-func TrimRightFunc(s string, f func(rune) bool) string {
- i := lastIndexFunc(s, f, false)
- if i >= 0 && s[i] >= utf8.RuneSelf {
- _, wid := utf8.DecodeRuneInString(s[i:])
- i += wid
- } else {
- i++
- }
- return s[0:i]
-}
-
-// TrimFunc returns a slice of the string s with all leading
-// and trailing Unicode code points c satisfying f(c) removed.
-func TrimFunc(s string, f func(rune) bool) string {
- return TrimRightFunc(TrimLeftFunc(s, f), f)
-}
-
-// IndexFunc returns the index into s of the first Unicode
-// code point satisfying f(c), or -1 if none do.
-func IndexFunc(s string, f func(rune) bool) int {
- return indexFunc(s, f, true)
-}
-
-// LastIndexFunc returns the index into s of the last
-// Unicode code point satisfying f(c), or -1 if none do.
-func LastIndexFunc(s string, f func(rune) bool) int {
- return lastIndexFunc(s, f, true)
-}
-
-// indexFunc is the same as IndexFunc except that if
-// truth==false, the sense of the predicate function is
-// inverted.
-func indexFunc(s string, f func(rune) bool, truth bool) int {
- start := 0
- for start < len(s) {
- wid := 1
- r := rune(s[start])
- if r >= utf8.RuneSelf {
- r, wid = utf8.DecodeRuneInString(s[start:])
- }
- if f(r) == truth {
- return start
- }
- start += wid
- }
- return -1
-}
-
-// lastIndexFunc is the same as LastIndexFunc except that if
-// truth==false, the sense of the predicate function is
-// inverted.
-func lastIndexFunc(s string, f func(rune) bool, truth bool) int {
- for i := len(s); i > 0; {
- r, size := utf8.DecodeLastRuneInString(s[0:i])
- i -= size
- if f(r) == truth {
- return i
- }
- }
- return -1
-}
-
-func makeCutsetFunc(cutset string) func(rune) bool {
- return func(r rune) bool { return IndexRune(cutset, r) >= 0 }
-}
-
-// Trim returns a slice of the string s with all leading and
-// trailing Unicode code points contained in cutset removed.
-func Trim(s string, cutset string) string {
- if s == "" || cutset == "" {
- return s
- }
- return TrimFunc(s, makeCutsetFunc(cutset))
-}
-
-// TrimLeft returns a slice of the string s with all leading
-// Unicode code points contained in cutset removed.
-func TrimLeft(s string, cutset string) string {
- if s == "" || cutset == "" {
- return s
- }
- return TrimLeftFunc(s, makeCutsetFunc(cutset))
-}
-
-// TrimRight returns a slice of the string s, with all trailing
-// Unicode code points contained in cutset removed.
-func TrimRight(s string, cutset string) string {
- if s == "" || cutset == "" {
- return s
- }
- return TrimRightFunc(s, makeCutsetFunc(cutset))
-}
-
-// TrimSpace returns a slice of the string s, with all leading
-// and trailing white space removed, as defined by Unicode.
-func TrimSpace(s string) string {
- return TrimFunc(s, unicode.IsSpace)
-}
-
-// TrimPrefix returns s without the provided leading prefix string.
-// If s doesn't start with prefix, s is returned unchanged.
-func TrimPrefix(s, prefix string) string {
- if HasPrefix(s, prefix) {
- return s[len(prefix):]
- }
- return s
-}
-
-// TrimSuffix returns s without the provided trailing suffix string.
-// If s doesn't end with suffix, s is returned unchanged.
-func TrimSuffix(s, suffix string) string {
- if HasSuffix(s, suffix) {
- return s[:len(s)-len(suffix)]
- }
- return s
-}
-
-// Replace returns a copy of the string s with the first n
-// non-overlapping instances of old replaced by new.
-// If n < 0, there is no limit on the number of replacements.
-func Replace(s, old, new string, n int) string {
- if old == new || n == 0 {
- return s // avoid allocation
- }
-
- // Compute number of replacements.
- if m := Count(s, old); m == 0 {
- return s // avoid allocation
- } else if n < 0 || m < n {
- n = m
- }
-
- // Apply replacements to buffer.
- t := make([]byte, len(s)+n*(len(new)-len(old)))
- w := 0
- start := 0
- for i := 0; i < n; i++ {
- j := start
- if len(old) == 0 {
- if i > 0 {
- _, wid := utf8.DecodeRuneInString(s[start:])
- j += wid
- }
- } else {
- j += Index(s[start:], old)
- }
- w += copy(t[w:], s[start:j])
- w += copy(t[w:], new)
- start = j + len(old)
- }
- w += copy(t[w:], s[start:])
- return string(t[0:w])
-}
-
-// EqualFold reports whether s and t, interpreted as UTF-8 strings,
-// are equal under Unicode case-folding.
-func EqualFold(s, t string) bool {
- for s != "" && t != "" {
- // Extract first rune from each string.
- var sr, tr rune
- if s[0] < utf8.RuneSelf {
- sr, s = rune(s[0]), s[1:]
- } else {
- r, size := utf8.DecodeRuneInString(s)
- sr, s = r, s[size:]
- }
- if t[0] < utf8.RuneSelf {
- tr, t = rune(t[0]), t[1:]
- } else {
- r, size := utf8.DecodeRuneInString(t)
- tr, t = r, t[size:]
- }
-
- // If they match, keep going; if not, return false.
-
- // Easy case.
- if tr == sr {
- continue
- }
-
- // Make sr < tr to simplify what follows.
- if tr < sr {
- tr, sr = sr, tr
- }
- // Fast check for ASCII.
- if tr < utf8.RuneSelf && 'A' <= sr && sr <= 'Z' {
- // ASCII, and sr is upper case. tr must be lower case.
- if tr == sr+'a'-'A' {
- continue
- }
- return false
- }
-
- // General case. SimpleFold(x) returns the next equivalent rune > x
- // or wraps around to smaller values.
- r := unicode.SimpleFold(sr)
- for r != sr && r < tr {
- r = unicode.SimpleFold(r)
- }
- if r == tr {
- continue
- }
- return false
- }
-
- // One string is empty. Are both?
- return s == t
-}
diff --git a/src/pkg/strings/strings.s b/src/pkg/strings/strings.s
deleted file mode 100644
index 55103bae0..000000000
--- a/src/pkg/strings/strings.s
+++ /dev/null
@@ -1,5 +0,0 @@
-// Copyright 2013 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// This file is here just to make the go tool happy.
diff --git a/src/pkg/strings/strings_decl.go b/src/pkg/strings/strings_decl.go
deleted file mode 100644
index 810a696af..000000000
--- a/src/pkg/strings/strings_decl.go
+++ /dev/null
@@ -1,8 +0,0 @@
-// Copyright 2013 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package strings
-
-// IndexByte returns the index of the first instance of c in s, or -1 if c is not present in s.
-func IndexByte(s string, c byte) int // ../runtime/asm_$GOARCH.s
diff --git a/src/pkg/strings/strings_test.go b/src/pkg/strings/strings_test.go
deleted file mode 100644
index e40a18015..000000000
--- a/src/pkg/strings/strings_test.go
+++ /dev/null
@@ -1,1176 +0,0 @@
-// Copyright 2009 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package strings_test
-
-import (
- "bytes"
- "io"
- "math/rand"
- "reflect"
- . "strings"
- "testing"
- "unicode"
- "unicode/utf8"
- "unsafe"
-)
-
-func eq(a, b []string) bool {
- if len(a) != len(b) {
- return false
- }
- for i := 0; i < len(a); i++ {
- if a[i] != b[i] {
- return false
- }
- }
- return true
-}
-
-var abcd = "abcd"
-var faces = "☺☻☹"
-var commas = "1,2,3,4"
-var dots = "1....2....3....4"
-
-type IndexTest struct {
- s string
- sep string
- out int
-}
-
-var indexTests = []IndexTest{
- {"", "", 0},
- {"", "a", -1},
- {"", "foo", -1},
- {"fo", "foo", -1},
- {"foo", "foo", 0},
- {"oofofoofooo", "f", 2},
- {"oofofoofooo", "foo", 4},
- {"barfoobarfoo", "foo", 3},
- {"foo", "", 0},
- {"foo", "o", 1},
- {"abcABCabc", "A", 3},
- // cases with one byte strings - test special case in Index()
- {"", "a", -1},
- {"x", "a", -1},
- {"x", "x", 0},
- {"abc", "a", 0},
- {"abc", "b", 1},
- {"abc", "c", 2},
- {"abc", "x", -1},
-}
-
-var lastIndexTests = []IndexTest{
- {"", "", 0},
- {"", "a", -1},
- {"", "foo", -1},
- {"fo", "foo", -1},
- {"foo", "foo", 0},
- {"foo", "f", 0},
- {"oofofoofooo", "f", 7},
- {"oofofoofooo", "foo", 7},
- {"barfoobarfoo", "foo", 9},
- {"foo", "", 3},
- {"foo", "o", 2},
- {"abcABCabc", "A", 3},
- {"abcABCabc", "a", 6},
-}
-
-var indexAnyTests = []IndexTest{
- {"", "", -1},
- {"", "a", -1},
- {"", "abc", -1},
- {"a", "", -1},
- {"a", "a", 0},
- {"aaa", "a", 0},
- {"abc", "xyz", -1},
- {"abc", "xcz", 2},
- {"a☺b☻c☹d", "uvw☻xyz", 2 + len("☺")},
- {"aRegExp*", ".(|)*+?^$[]", 7},
- {dots + dots + dots, " ", -1},
-}
-var lastIndexAnyTests = []IndexTest{
- {"", "", -1},
- {"", "a", -1},
- {"", "abc", -1},
- {"a", "", -1},
- {"a", "a", 0},
- {"aaa", "a", 2},
- {"abc", "xyz", -1},
- {"abc", "ab", 1},
- {"a☺b☻c☹d", "uvw☻xyz", 2 + len("☺")},
- {"a.RegExp*", ".(|)*+?^$[]", 8},
- {dots + dots + dots, " ", -1},
-}
-
-// Execute f on each test case. funcName should be the name of f; it's used
-// in failure reports.
-func runIndexTests(t *testing.T, f func(s, sep string) int, funcName string, testCases []IndexTest) {
- for _, test := range testCases {
- actual := f(test.s, test.sep)
- if actual != test.out {
- t.Errorf("%s(%q,%q) = %v; want %v", funcName, test.s, test.sep, actual, test.out)
- }
- }
-}
-
-func TestIndex(t *testing.T) { runIndexTests(t, Index, "Index", indexTests) }
-func TestLastIndex(t *testing.T) { runIndexTests(t, LastIndex, "LastIndex", lastIndexTests) }
-func TestIndexAny(t *testing.T) { runIndexTests(t, IndexAny, "IndexAny", indexAnyTests) }
-func TestLastIndexAny(t *testing.T) { runIndexTests(t, LastIndexAny, "LastIndexAny", lastIndexAnyTests) }
-
-var indexRuneTests = []struct {
- s string
- rune rune
- out int
-}{
- {"a A x", 'A', 2},
- {"some_text=some_value", '=', 9},
- {"☺a", 'a', 3},
- {"a☻☺b", '☺', 4},
-}
-
-func TestIndexRune(t *testing.T) {
- for _, test := range indexRuneTests {
- if actual := IndexRune(test.s, test.rune); actual != test.out {
- t.Errorf("IndexRune(%q,%d)= %v; want %v", test.s, test.rune, actual, test.out)
- }
- }
-}
-
-const benchmarkString = "some_text=some☺value"
-
-func BenchmarkIndexRune(b *testing.B) {
- if got := IndexRune(benchmarkString, '☺'); got != 14 {
- b.Fatalf("wrong index: expected 14, got=%d", got)
- }
- for i := 0; i < b.N; i++ {
- IndexRune(benchmarkString, '☺')
- }
-}
-
-func BenchmarkIndexRuneFastPath(b *testing.B) {
- if got := IndexRune(benchmarkString, 'v'); got != 17 {
- b.Fatalf("wrong index: expected 17, got=%d", got)
- }
- for i := 0; i < b.N; i++ {
- IndexRune(benchmarkString, 'v')
- }
-}
-
-func BenchmarkIndex(b *testing.B) {
- if got := Index(benchmarkString, "v"); got != 17 {
- b.Fatalf("wrong index: expected 17, got=%d", got)
- }
- for i := 0; i < b.N; i++ {
- Index(benchmarkString, "v")
- }
-}
-
-func BenchmarkIndexByte(b *testing.B) {
- if got := IndexByte(benchmarkString, 'v'); got != 17 {
- b.Fatalf("wrong index: expected 17, got=%d", got)
- }
- for i := 0; i < b.N; i++ {
- IndexByte(benchmarkString, 'v')
- }
-}
-
-var explodetests = []struct {
- s string
- n int
- a []string
-}{
- {"", -1, []string{}},
- {abcd, 4, []string{"a", "b", "c", "d"}},
- {faces, 3, []string{"☺", "☻", "☹"}},
- {abcd, 2, []string{"a", "bcd"}},
-}
-
-func TestExplode(t *testing.T) {
- for _, tt := range explodetests {
- a := SplitN(tt.s, "", tt.n)
- if !eq(a, tt.a) {
- t.Errorf("explode(%q, %d) = %v; want %v", tt.s, tt.n, a, tt.a)
- continue
- }
- s := Join(a, "")
- if s != tt.s {
- t.Errorf(`Join(explode(%q, %d), "") = %q`, tt.s, tt.n, s)
- }
- }
-}
-
-type SplitTest struct {
- s string
- sep string
- n int
- a []string
-}
-
-var splittests = []SplitTest{
- {abcd, "a", 0, nil},
- {abcd, "a", -1, []string{"", "bcd"}},
- {abcd, "z", -1, []string{"abcd"}},
- {abcd, "", -1, []string{"a", "b", "c", "d"}},
- {commas, ",", -1, []string{"1", "2", "3", "4"}},
- {dots, "...", -1, []string{"1", ".2", ".3", ".4"}},
- {faces, "☹", -1, []string{"☺☻", ""}},
- {faces, "~", -1, []string{faces}},
- {faces, "", -1, []string{"☺", "☻", "☹"}},
- {"1 2 3 4", " ", 3, []string{"1", "2", "3 4"}},
- {"1 2", " ", 3, []string{"1", "2"}},
- {"123", "", 2, []string{"1", "23"}},
- {"123", "", 17, []string{"1", "2", "3"}},
-}
-
-func TestSplit(t *testing.T) {
- for _, tt := range splittests {
- a := SplitN(tt.s, tt.sep, tt.n)
- if !eq(a, tt.a) {
- t.Errorf("Split(%q, %q, %d) = %v; want %v", tt.s, tt.sep, tt.n, a, tt.a)
- continue
- }
- if tt.n == 0 {
- continue
- }
- s := Join(a, tt.sep)
- if s != tt.s {
- t.Errorf("Join(Split(%q, %q, %d), %q) = %q", tt.s, tt.sep, tt.n, tt.sep, s)
- }
- if tt.n < 0 {
- b := Split(tt.s, tt.sep)
- if !reflect.DeepEqual(a, b) {
- t.Errorf("Split disagrees with SplitN(%q, %q, %d) = %v; want %v", tt.s, tt.sep, tt.n, b, a)
- }
- }
- }
-}
-
-var splitaftertests = []SplitTest{
- {abcd, "a", -1, []string{"a", "bcd"}},
- {abcd, "z", -1, []string{"abcd"}},
- {abcd, "", -1, []string{"a", "b", "c", "d"}},
- {commas, ",", -1, []string{"1,", "2,", "3,", "4"}},
- {dots, "...", -1, []string{"1...", ".2...", ".3...", ".4"}},
- {faces, "☹", -1, []string{"☺☻☹", ""}},
- {faces, "~", -1, []string{faces}},
- {faces, "", -1, []string{"☺", "☻", "☹"}},
- {"1 2 3 4", " ", 3, []string{"1 ", "2 ", "3 4"}},
- {"1 2 3", " ", 3, []string{"1 ", "2 ", "3"}},
- {"1 2", " ", 3, []string{"1 ", "2"}},
- {"123", "", 2, []string{"1", "23"}},
- {"123", "", 17, []string{"1", "2", "3"}},
-}
-
-func TestSplitAfter(t *testing.T) {
- for _, tt := range splitaftertests {
- a := SplitAfterN(tt.s, tt.sep, tt.n)
- if !eq(a, tt.a) {
- t.Errorf(`Split(%q, %q, %d) = %v; want %v`, tt.s, tt.sep, tt.n, a, tt.a)
- continue
- }
- s := Join(a, "")
- if s != tt.s {
- t.Errorf(`Join(Split(%q, %q, %d), %q) = %q`, tt.s, tt.sep, tt.n, tt.sep, s)
- }
- if tt.n < 0 {
- b := SplitAfter(tt.s, tt.sep)
- if !reflect.DeepEqual(a, b) {
- t.Errorf("SplitAfter disagrees with SplitAfterN(%q, %q, %d) = %v; want %v", tt.s, tt.sep, tt.n, b, a)
- }
- }
- }
-}
-
-type FieldsTest struct {
- s string
- a []string
-}
-
-var fieldstests = []FieldsTest{
- {"", []string{}},
- {" ", []string{}},
- {" \t ", []string{}},
- {" abc ", []string{"abc"}},
- {"1 2 3 4", []string{"1", "2", "3", "4"}},
- {"1 2 3 4", []string{"1", "2", "3", "4"}},
- {"1\t\t2\t\t3\t4", []string{"1", "2", "3", "4"}},
- {"1\u20002\u20013\u20024", []string{"1", "2", "3", "4"}},
- {"\u2000\u2001\u2002", []string{}},
- {"\n™\t™\n", []string{"™", "™"}},
- {faces, []string{faces}},
-}
-
-func TestFields(t *testing.T) {
- for _, tt := range fieldstests {
- a := Fields(tt.s)
- if !eq(a, tt.a) {
- t.Errorf("Fields(%q) = %v; want %v", tt.s, a, tt.a)
- continue
- }
- }
-}
-
-var FieldsFuncTests = []FieldsTest{
- {"", []string{}},
- {"XX", []string{}},
- {"XXhiXXX", []string{"hi"}},
- {"aXXbXXXcX", []string{"a", "b", "c"}},
-}
-
-func TestFieldsFunc(t *testing.T) {
- for _, tt := range fieldstests {
- a := FieldsFunc(tt.s, unicode.IsSpace)
- if !eq(a, tt.a) {
- t.Errorf("FieldsFunc(%q, unicode.IsSpace) = %v; want %v", tt.s, a, tt.a)
- continue
- }
- }
- pred := func(c rune) bool { return c == 'X' }
- for _, tt := range FieldsFuncTests {
- a := FieldsFunc(tt.s, pred)
- if !eq(a, tt.a) {
- t.Errorf("FieldsFunc(%q) = %v, want %v", tt.s, a, tt.a)
- }
- }
-}
-
-// Test case for any function which accepts and returns a single string.
-type StringTest struct {
- in, out string
-}
-
-// Execute f on each test case. funcName should be the name of f; it's used
-// in failure reports.
-func runStringTests(t *testing.T, f func(string) string, funcName string, testCases []StringTest) {
- for _, tc := range testCases {
- actual := f(tc.in)
- if actual != tc.out {
- t.Errorf("%s(%q) = %q; want %q", funcName, tc.in, actual, tc.out)
- }
- }
-}
-
-var upperTests = []StringTest{
- {"", ""},
- {"abc", "ABC"},
- {"AbC123", "ABC123"},
- {"azAZ09_", "AZAZ09_"},
- {"\u0250\u0250\u0250\u0250\u0250", "\u2C6F\u2C6F\u2C6F\u2C6F\u2C6F"}, // grows one byte per char
-}
-
-var lowerTests = []StringTest{
- {"", ""},
- {"abc", "abc"},
- {"AbC123", "abc123"},
- {"azAZ09_", "azaz09_"},
- {"\u2C6D\u2C6D\u2C6D\u2C6D\u2C6D", "\u0251\u0251\u0251\u0251\u0251"}, // shrinks one byte per char
-}
-
-const space = "\t\v\r\f\n\u0085\u00a0\u2000\u3000"
-
-var trimSpaceTests = []StringTest{
- {"", ""},
- {"abc", "abc"},
- {space + "abc" + space, "abc"},
- {" ", ""},
- {" \t\r\n \t\t\r\r\n\n ", ""},
- {" \t\r\n x\t\t\r\r\n\n ", "x"},
- {" \u2000\t\r\n x\t\t\r\r\ny\n \u3000", "x\t\t\r\r\ny"},
- {"1 \t\r\n2", "1 \t\r\n2"},
- {" x\x80", "x\x80"},
- {" x\xc0", "x\xc0"},
- {"x \xc0\xc0 ", "x \xc0\xc0"},
- {"x \xc0", "x \xc0"},
- {"x \xc0 ", "x \xc0"},
- {"x \xc0\xc0 ", "x \xc0\xc0"},
- {"x ☺\xc0\xc0 ", "x ☺\xc0\xc0"},
- {"x ☺ ", "x ☺"},
-}
-
-func tenRunes(ch rune) string {
- r := make([]rune, 10)
- for i := range r {
- r[i] = ch
- }
- return string(r)
-}
-
-// User-defined self-inverse mapping function
-func rot13(r rune) rune {
- step := rune(13)
- if r >= 'a' && r <= 'z' {
- return ((r - 'a' + step) % 26) + 'a'
- }
- if r >= 'A' && r <= 'Z' {
- return ((r - 'A' + step) % 26) + 'A'
- }
- return r
-}
-
-func TestMap(t *testing.T) {
- // Run a couple of awful growth/shrinkage tests
- a := tenRunes('a')
- // 1. Grow. This triggers two reallocations in Map.
- maxRune := func(rune) rune { return unicode.MaxRune }
- m := Map(maxRune, a)
- expect := tenRunes(unicode.MaxRune)
- if m != expect {
- t.Errorf("growing: expected %q got %q", expect, m)
- }
-
- // 2. Shrink
- minRune := func(rune) rune { return 'a' }
- m = Map(minRune, tenRunes(unicode.MaxRune))
- expect = a
- if m != expect {
- t.Errorf("shrinking: expected %q got %q", expect, m)
- }
-
- // 3. Rot13
- m = Map(rot13, "a to zed")
- expect = "n gb mrq"
- if m != expect {
- t.Errorf("rot13: expected %q got %q", expect, m)
- }
-
- // 4. Rot13^2
- m = Map(rot13, Map(rot13, "a to zed"))
- expect = "a to zed"
- if m != expect {
- t.Errorf("rot13: expected %q got %q", expect, m)
- }
-
- // 5. Drop
- dropNotLatin := func(r rune) rune {
- if unicode.Is(unicode.Latin, r) {
- return r
- }
- return -1
- }
- m = Map(dropNotLatin, "Hello, 세계")
- expect = "Hello"
- if m != expect {
- t.Errorf("drop: expected %q got %q", expect, m)
- }
-
- // 6. Identity
- identity := func(r rune) rune {
- return r
- }
- orig := "Input string that we expect not to be copied."
- m = Map(identity, orig)
- if (*reflect.StringHeader)(unsafe.Pointer(&orig)).Data !=
- (*reflect.StringHeader)(unsafe.Pointer(&m)).Data {
- t.Error("unexpected copy during identity map")
- }
-}
-
-func TestToUpper(t *testing.T) { runStringTests(t, ToUpper, "ToUpper", upperTests) }
-
-func TestToLower(t *testing.T) { runStringTests(t, ToLower, "ToLower", lowerTests) }
-
-func BenchmarkMapNoChanges(b *testing.B) {
- identity := func(r rune) rune {
- return r
- }
- for i := 0; i < b.N; i++ {
- Map(identity, "Some string that won't be modified.")
- }
-}
-
-func TestSpecialCase(t *testing.T) {
- lower := "abcçdefgğhıijklmnoöprsştuüvyz"
- upper := "ABCÇDEFGĞHIİJKLMNOÖPRSŞTUÜVYZ"
- u := ToUpperSpecial(unicode.TurkishCase, upper)
- if u != upper {
- t.Errorf("Upper(upper) is %s not %s", u, upper)
- }
- u = ToUpperSpecial(unicode.TurkishCase, lower)
- if u != upper {
- t.Errorf("Upper(lower) is %s not %s", u, upper)
- }
- l := ToLowerSpecial(unicode.TurkishCase, lower)
- if l != lower {
- t.Errorf("Lower(lower) is %s not %s", l, lower)
- }
- l = ToLowerSpecial(unicode.TurkishCase, upper)
- if l != lower {
- t.Errorf("Lower(upper) is %s not %s", l, lower)
- }
-}
-
-func TestTrimSpace(t *testing.T) { runStringTests(t, TrimSpace, "TrimSpace", trimSpaceTests) }
-
-var trimTests = []struct {
- f string
- in, arg, out string
-}{
- {"Trim", "abba", "a", "bb"},
- {"Trim", "abba", "ab", ""},
- {"TrimLeft", "abba", "ab", ""},
- {"TrimRight", "abba", "ab", ""},
- {"TrimLeft", "abba", "a", "bba"},
- {"TrimRight", "abba", "a", "abb"},
- {"Trim", "<tag>", "<>", "tag"},
- {"Trim", "* listitem", " *", "listitem"},
- {"Trim", `"quote"`, `"`, "quote"},
- {"Trim", "\u2C6F\u2C6F\u0250\u0250\u2C6F\u2C6F", "\u2C6F", "\u0250\u0250"},
- //empty string tests
- {"Trim", "abba", "", "abba"},
- {"Trim", "", "123", ""},
- {"Trim", "", "", ""},
- {"TrimLeft", "abba", "", "abba"},
- {"TrimLeft", "", "123", ""},
- {"TrimLeft", "", "", ""},
- {"TrimRight", "abba", "", "abba"},
- {"TrimRight", "", "123", ""},
- {"TrimRight", "", "", ""},
- {"TrimRight", "☺\xc0", "☺", "☺\xc0"},
- {"TrimPrefix", "aabb", "a", "abb"},
- {"TrimPrefix", "aabb", "b", "aabb"},
- {"TrimSuffix", "aabb", "a", "aabb"},
- {"TrimSuffix", "aabb", "b", "aab"},
-}
-
-func TestTrim(t *testing.T) {
- for _, tc := range trimTests {
- name := tc.f
- var f func(string, string) string
- switch name {
- case "Trim":
- f = Trim
- case "TrimLeft":
- f = TrimLeft
- case "TrimRight":
- f = TrimRight
- case "TrimPrefix":
- f = TrimPrefix
- case "TrimSuffix":
- f = TrimSuffix
- default:
- t.Errorf("Undefined trim function %s", name)
- }
- actual := f(tc.in, tc.arg)
- if actual != tc.out {
- t.Errorf("%s(%q, %q) = %q; want %q", name, tc.in, tc.arg, actual, tc.out)
- }
- }
-}
-
-type predicate struct {
- f func(rune) bool
- name string
-}
-
-var isSpace = predicate{unicode.IsSpace, "IsSpace"}
-var isDigit = predicate{unicode.IsDigit, "IsDigit"}
-var isUpper = predicate{unicode.IsUpper, "IsUpper"}
-var isValidRune = predicate{
- func(r rune) bool {
- return r != utf8.RuneError
- },
- "IsValidRune",
-}
-
-func not(p predicate) predicate {
- return predicate{
- func(r rune) bool {
- return !p.f(r)
- },
- "not " + p.name,
- }
-}
-
-var trimFuncTests = []struct {
- f predicate
- in, out string
-}{
- {isSpace, space + " hello " + space, "hello"},
- {isDigit, "\u0e50\u0e5212hello34\u0e50\u0e51", "hello"},
- {isUpper, "\u2C6F\u2C6F\u2C6F\u2C6FABCDhelloEF\u2C6F\u2C6FGH\u2C6F\u2C6F", "hello"},
- {not(isSpace), "hello" + space + "hello", space},
- {not(isDigit), "hello\u0e50\u0e521234\u0e50\u0e51helo", "\u0e50\u0e521234\u0e50\u0e51"},
- {isValidRune, "ab\xc0a\xc0cd", "\xc0a\xc0"},
- {not(isValidRune), "\xc0a\xc0", "a"},
-}
-
-func TestTrimFunc(t *testing.T) {
- for _, tc := range trimFuncTests {
- actual := TrimFunc(tc.in, tc.f.f)
- if actual != tc.out {
- t.Errorf("TrimFunc(%q, %q) = %q; want %q", tc.in, tc.f.name, actual, tc.out)
- }
- }
-}
-
-var indexFuncTests = []struct {
- in string
- f predicate
- first, last int
-}{
- {"", isValidRune, -1, -1},
- {"abc", isDigit, -1, -1},
- {"0123", isDigit, 0, 3},
- {"a1b", isDigit, 1, 1},
- {space, isSpace, 0, len(space) - 3}, // last rune in space is 3 bytes
- {"\u0e50\u0e5212hello34\u0e50\u0e51", isDigit, 0, 18},
- {"\u2C6F\u2C6F\u2C6F\u2C6FABCDhelloEF\u2C6F\u2C6FGH\u2C6F\u2C6F", isUpper, 0, 34},
- {"12\u0e50\u0e52hello34\u0e50\u0e51", not(isDigit), 8, 12},
-
- // tests of invalid UTF-8
- {"\x801", isDigit, 1, 1},
- {"\x80abc", isDigit, -1, -1},
- {"\xc0a\xc0", isValidRune, 1, 1},
- {"\xc0a\xc0", not(isValidRune), 0, 2},
- {"\xc0☺\xc0", not(isValidRune), 0, 4},
- {"\xc0☺\xc0\xc0", not(isValidRune), 0, 5},
- {"ab\xc0a\xc0cd", not(isValidRune), 2, 4},
- {"a\xe0\x80cd", not(isValidRune), 1, 2},
- {"\x80\x80\x80\x80", not(isValidRune), 0, 3},
-}
-
-func TestIndexFunc(t *testing.T) {
- for _, tc := range indexFuncTests {
- first := IndexFunc(tc.in, tc.f.f)
- if first != tc.first {
- t.Errorf("IndexFunc(%q, %s) = %d; want %d", tc.in, tc.f.name, first, tc.first)
- }
- last := LastIndexFunc(tc.in, tc.f.f)
- if last != tc.last {
- t.Errorf("LastIndexFunc(%q, %s) = %d; want %d", tc.in, tc.f.name, last, tc.last)
- }
- }
-}
-
-func equal(m string, s1, s2 string, t *testing.T) bool {
- if s1 == s2 {
- return true
- }
- e1 := Split(s1, "")
- e2 := Split(s2, "")
- for i, c1 := range e1 {
- if i >= len(e2) {
- break
- }
- r1, _ := utf8.DecodeRuneInString(c1)
- r2, _ := utf8.DecodeRuneInString(e2[i])
- if r1 != r2 {
- t.Errorf("%s diff at %d: U+%04X U+%04X", m, i, r1, r2)
- }
- }
- return false
-}
-
-func TestCaseConsistency(t *testing.T) {
- // Make a string of all the runes.
- numRunes := int(unicode.MaxRune + 1)
- if testing.Short() {
- numRunes = 1000
- }
- a := make([]rune, numRunes)
- for i := range a {
- a[i] = rune(i)
- }
- s := string(a)
- // convert the cases.
- upper := ToUpper(s)
- lower := ToLower(s)
-
- // Consistency checks
- if n := utf8.RuneCountInString(upper); n != numRunes {
- t.Error("rune count wrong in upper:", n)
- }
- if n := utf8.RuneCountInString(lower); n != numRunes {
- t.Error("rune count wrong in lower:", n)
- }
- if !equal("ToUpper(upper)", ToUpper(upper), upper, t) {
- t.Error("ToUpper(upper) consistency fail")
- }
- if !equal("ToLower(lower)", ToLower(lower), lower, t) {
- t.Error("ToLower(lower) consistency fail")
- }
- /*
- These fail because of non-one-to-oneness of the data, such as multiple
- upper case 'I' mapping to 'i'. We comment them out but keep them for
- interest.
- For instance: CAPITAL LETTER I WITH DOT ABOVE:
- unicode.ToUpper(unicode.ToLower('\u0130')) != '\u0130'
-
- if !equal("ToUpper(lower)", ToUpper(lower), upper, t) {
- t.Error("ToUpper(lower) consistency fail");
- }
- if !equal("ToLower(upper)", ToLower(upper), lower, t) {
- t.Error("ToLower(upper) consistency fail");
- }
- */
-}
-
-var RepeatTests = []struct {
- in, out string
- count int
-}{
- {"", "", 0},
- {"", "", 1},
- {"", "", 2},
- {"-", "", 0},
- {"-", "-", 1},
- {"-", "----------", 10},
- {"abc ", "abc abc abc ", 3},
-}
-
-func TestRepeat(t *testing.T) {
- for _, tt := range RepeatTests {
- a := Repeat(tt.in, tt.count)
- if !equal("Repeat(s)", a, tt.out, t) {
- t.Errorf("Repeat(%v, %d) = %v; want %v", tt.in, tt.count, a, tt.out)
- continue
- }
- }
-}
-
-func runesEqual(a, b []rune) bool {
- if len(a) != len(b) {
- return false
- }
- for i, r := range a {
- if r != b[i] {
- return false
- }
- }
- return true
-}
-
-var RunesTests = []struct {
- in string
- out []rune
- lossy bool
-}{
- {"", []rune{}, false},
- {" ", []rune{32}, false},
- {"ABC", []rune{65, 66, 67}, false},
- {"abc", []rune{97, 98, 99}, false},
- {"\u65e5\u672c\u8a9e", []rune{26085, 26412, 35486}, false},
- {"ab\x80c", []rune{97, 98, 0xFFFD, 99}, true},
- {"ab\xc0c", []rune{97, 98, 0xFFFD, 99}, true},
-}
-
-func TestRunes(t *testing.T) {
- for _, tt := range RunesTests {
- a := []rune(tt.in)
- if !runesEqual(a, tt.out) {
- t.Errorf("[]rune(%q) = %v; want %v", tt.in, a, tt.out)
- continue
- }
- if !tt.lossy {
- // can only test reassembly if we didn't lose information
- s := string(a)
- if s != tt.in {
- t.Errorf("string([]rune(%q)) = %x; want %x", tt.in, s, tt.in)
- }
- }
- }
-}
-
-func TestReadByte(t *testing.T) {
- testStrings := []string{"", abcd, faces, commas}
- for _, s := range testStrings {
- reader := NewReader(s)
- if e := reader.UnreadByte(); e == nil {
- t.Errorf("Unreading %q at beginning: expected error", s)
- }
- var res bytes.Buffer
- for {
- b, e := reader.ReadByte()
- if e == io.EOF {
- break
- }
- if e != nil {
- t.Errorf("Reading %q: %s", s, e)
- break
- }
- res.WriteByte(b)
- // unread and read again
- e = reader.UnreadByte()
- if e != nil {
- t.Errorf("Unreading %q: %s", s, e)
- break
- }
- b1, e := reader.ReadByte()
- if e != nil {
- t.Errorf("Reading %q after unreading: %s", s, e)
- break
- }
- if b1 != b {
- t.Errorf("Reading %q after unreading: want byte %q, got %q", s, b, b1)
- break
- }
- }
- if res.String() != s {
- t.Errorf("Reader(%q).ReadByte() produced %q", s, res.String())
- }
- }
-}
-
-func TestReadRune(t *testing.T) {
- testStrings := []string{"", abcd, faces, commas}
- for _, s := range testStrings {
- reader := NewReader(s)
- if e := reader.UnreadRune(); e == nil {
- t.Errorf("Unreading %q at beginning: expected error", s)
- }
- res := ""
- for {
- r, z, e := reader.ReadRune()
- if e == io.EOF {
- break
- }
- if e != nil {
- t.Errorf("Reading %q: %s", s, e)
- break
- }
- res += string(r)
- // unread and read again
- e = reader.UnreadRune()
- if e != nil {
- t.Errorf("Unreading %q: %s", s, e)
- break
- }
- r1, z1, e := reader.ReadRune()
- if e != nil {
- t.Errorf("Reading %q after unreading: %s", s, e)
- break
- }
- if r1 != r {
- t.Errorf("Reading %q after unreading: want rune %q, got %q", s, r, r1)
- break
- }
- if z1 != z {
- t.Errorf("Reading %q after unreading: want size %d, got %d", s, z, z1)
- break
- }
- }
- if res != s {
- t.Errorf("Reader(%q).ReadRune() produced %q", s, res)
- }
- }
-}
-
-var UnreadRuneErrorTests = []struct {
- name string
- f func(*Reader)
-}{
- {"Read", func(r *Reader) { r.Read([]byte{0}) }},
- {"ReadByte", func(r *Reader) { r.ReadByte() }},
- {"UnreadRune", func(r *Reader) { r.UnreadRune() }},
- {"Seek", func(r *Reader) { r.Seek(0, 1) }},
- {"WriteTo", func(r *Reader) { r.WriteTo(&bytes.Buffer{}) }},
-}
-
-func TestUnreadRuneError(t *testing.T) {
- for _, tt := range UnreadRuneErrorTests {
- reader := NewReader("0123456789")
- if _, _, err := reader.ReadRune(); err != nil {
- // should not happen
- t.Fatal(err)
- }
- tt.f(reader)
- err := reader.UnreadRune()
- if err == nil {
- t.Errorf("Unreading after %s: expected error", tt.name)
- }
- }
-}
-
-var ReplaceTests = []struct {
- in string
- old, new string
- n int
- out string
-}{
- {"hello", "l", "L", 0, "hello"},
- {"hello", "l", "L", -1, "heLLo"},
- {"hello", "x", "X", -1, "hello"},
- {"", "x", "X", -1, ""},
- {"radar", "r", "<r>", -1, "<r>ada<r>"},
- {"", "", "<>", -1, "<>"},
- {"banana", "a", "<>", -1, "b<>n<>n<>"},
- {"banana", "a", "<>", 1, "b<>nana"},
- {"banana", "a", "<>", 1000, "b<>n<>n<>"},
- {"banana", "an", "<>", -1, "b<><>a"},
- {"banana", "ana", "<>", -1, "b<>na"},
- {"banana", "", "<>", -1, "<>b<>a<>n<>a<>n<>a<>"},
- {"banana", "", "<>", 10, "<>b<>a<>n<>a<>n<>a<>"},
- {"banana", "", "<>", 6, "<>b<>a<>n<>a<>n<>a"},
- {"banana", "", "<>", 5, "<>b<>a<>n<>a<>na"},
- {"banana", "", "<>", 1, "<>banana"},
- {"banana", "a", "a", -1, "banana"},
- {"banana", "a", "a", 1, "banana"},
- {"☺☻☹", "", "<>", -1, "<>☺<>☻<>☹<>"},
-}
-
-func TestReplace(t *testing.T) {
- for _, tt := range ReplaceTests {
- if s := Replace(tt.in, tt.old, tt.new, tt.n); s != tt.out {
- t.Errorf("Replace(%q, %q, %q, %d) = %q, want %q", tt.in, tt.old, tt.new, tt.n, s, tt.out)
- }
- }
-}
-
-var TitleTests = []struct {
- in, out string
-}{
- {"", ""},
- {"a", "A"},
- {" aaa aaa aaa ", " Aaa Aaa Aaa "},
- {" Aaa Aaa Aaa ", " Aaa Aaa Aaa "},
- {"123a456", "123a456"},
- {"double-blind", "Double-Blind"},
- {"ÿøû", "Ÿøû"},
- {"with_underscore", "With_underscore"},
- {"unicode \xe2\x80\xa8 line separator", "Unicode \xe2\x80\xa8 Line Separator"},
-}
-
-func TestTitle(t *testing.T) {
- for _, tt := range TitleTests {
- if s := Title(tt.in); s != tt.out {
- t.Errorf("Title(%q) = %q, want %q", tt.in, s, tt.out)
- }
- }
-}
-
-var ContainsTests = []struct {
- str, substr string
- expected bool
-}{
- {"abc", "bc", true},
- {"abc", "bcd", false},
- {"abc", "", true},
- {"", "a", false},
-}
-
-func TestContains(t *testing.T) {
- for _, ct := range ContainsTests {
- if Contains(ct.str, ct.substr) != ct.expected {
- t.Errorf("Contains(%s, %s) = %v, want %v",
- ct.str, ct.substr, !ct.expected, ct.expected)
- }
- }
-}
-
-var ContainsAnyTests = []struct {
- str, substr string
- expected bool
-}{
- {"", "", false},
- {"", "a", false},
- {"", "abc", false},
- {"a", "", false},
- {"a", "a", true},
- {"aaa", "a", true},
- {"abc", "xyz", false},
- {"abc", "xcz", true},
- {"a☺b☻c☹d", "uvw☻xyz", true},
- {"aRegExp*", ".(|)*+?^$[]", true},
- {dots + dots + dots, " ", false},
-}
-
-func TestContainsAny(t *testing.T) {
- for _, ct := range ContainsAnyTests {
- if ContainsAny(ct.str, ct.substr) != ct.expected {
- t.Errorf("ContainsAny(%s, %s) = %v, want %v",
- ct.str, ct.substr, !ct.expected, ct.expected)
- }
- }
-}
-
-var ContainsRuneTests = []struct {
- str string
- r rune
- expected bool
-}{
- {"", 'a', false},
- {"a", 'a', true},
- {"aaa", 'a', true},
- {"abc", 'y', false},
- {"abc", 'c', true},
- {"a☺b☻c☹d", 'x', false},
- {"a☺b☻c☹d", '☻', true},
- {"aRegExp*", '*', true},
-}
-
-func TestContainsRune(t *testing.T) {
- for _, ct := range ContainsRuneTests {
- if ContainsRune(ct.str, ct.r) != ct.expected {
- t.Errorf("ContainsRune(%q, %q) = %v, want %v",
- ct.str, ct.r, !ct.expected, ct.expected)
- }
- }
-}
-
-var EqualFoldTests = []struct {
- s, t string
- out bool
-}{
- {"abc", "abc", true},
- {"ABcd", "ABcd", true},
- {"123abc", "123ABC", true},
- {"αβδ", "ΑΒΔ", true},
- {"abc", "xyz", false},
- {"abc", "XYZ", false},
- {"abcdefghijk", "abcdefghijX", false},
- {"abcdefghijk", "abcdefghij\u212A", true},
- {"abcdefghijK", "abcdefghij\u212A", true},
- {"abcdefghijkz", "abcdefghij\u212Ay", false},
- {"abcdefghijKz", "abcdefghij\u212Ay", false},
-}
-
-func TestEqualFold(t *testing.T) {
- for _, tt := range EqualFoldTests {
- if out := EqualFold(tt.s, tt.t); out != tt.out {
- t.Errorf("EqualFold(%#q, %#q) = %v, want %v", tt.s, tt.t, out, tt.out)
- }
- if out := EqualFold(tt.t, tt.s); out != tt.out {
- t.Errorf("EqualFold(%#q, %#q) = %v, want %v", tt.t, tt.s, out, tt.out)
- }
- }
-}
-
-var CountTests = []struct {
- s, sep string
- num int
-}{
- {"", "", 1},
- {"", "notempty", 0},
- {"notempty", "", 9},
- {"smaller", "not smaller", 0},
- {"12345678987654321", "6", 2},
- {"611161116", "6", 3},
- {"notequal", "NotEqual", 0},
- {"equal", "equal", 1},
- {"abc1231231123q", "123", 3},
- {"11111", "11", 2},
-}
-
-func TestCount(t *testing.T) {
- for _, tt := range CountTests {
- if num := Count(tt.s, tt.sep); num != tt.num {
- t.Errorf("Count(\"%s\", \"%s\") = %d, want %d", tt.s, tt.sep, num, tt.num)
- }
- }
-}
-
-func makeBenchInputHard() string {
- tokens := [...]string{
- "<a>", "<p>", "<b>", "<strong>",
- "</a>", "</p>", "</b>", "</strong>",
- "hello", "world",
- }
- x := make([]byte, 0, 1<<20)
- for len(x) < 1<<20 {
- i := rand.Intn(len(tokens))
- x = append(x, tokens[i]...)
- }
- return string(x)
-}
-
-var benchInputHard = makeBenchInputHard()
-
-func benchmarkIndexHard(b *testing.B, sep string) {
- for i := 0; i < b.N; i++ {
- Index(benchInputHard, sep)
- }
-}
-
-func benchmarkCountHard(b *testing.B, sep string) {
- for i := 0; i < b.N; i++ {
- Count(benchInputHard, sep)
- }
-}
-
-func BenchmarkIndexHard1(b *testing.B) { benchmarkIndexHard(b, "<>") }
-func BenchmarkIndexHard2(b *testing.B) { benchmarkIndexHard(b, "</pre>") }
-func BenchmarkIndexHard3(b *testing.B) { benchmarkIndexHard(b, "<b>hello world</b>") }
-
-func BenchmarkCountHard1(b *testing.B) { benchmarkCountHard(b, "<>") }
-func BenchmarkCountHard2(b *testing.B) { benchmarkCountHard(b, "</pre>") }
-func BenchmarkCountHard3(b *testing.B) { benchmarkCountHard(b, "<b>hello world</b>") }
-
-var benchInputTorture = Repeat("ABC", 1<<10) + "123" + Repeat("ABC", 1<<10)
-var benchNeedleTorture = Repeat("ABC", 1<<10+1)
-
-func BenchmarkIndexTorture(b *testing.B) {
- for i := 0; i < b.N; i++ {
- Index(benchInputTorture, benchNeedleTorture)
- }
-}
-
-func BenchmarkCountTorture(b *testing.B) {
- for i := 0; i < b.N; i++ {
- Count(benchInputTorture, benchNeedleTorture)
- }
-}
-
-func BenchmarkCountTortureOverlapping(b *testing.B) {
- A := Repeat("ABC", 1<<20)
- B := Repeat("ABC", 1<<10)
- for i := 0; i < b.N; i++ {
- Count(A, B)
- }
-}
-
-var makeFieldsInput = func() string {
- x := make([]byte, 1<<20)
- // Input is ~10% space, ~10% 2-byte UTF-8, rest ASCII non-space.
- for i := range x {
- switch rand.Intn(10) {
- case 0:
- x[i] = ' '
- case 1:
- if i > 0 && x[i-1] == 'x' {
- copy(x[i-1:], "χ")
- break
- }
- fallthrough
- default:
- x[i] = 'x'
- }
- }
- return string(x)
-}
-
-var fieldsInput = makeFieldsInput()
-
-func BenchmarkFields(b *testing.B) {
- b.SetBytes(int64(len(fieldsInput)))
- for i := 0; i < b.N; i++ {
- Fields(fieldsInput)
- }
-}
-
-func BenchmarkFieldsFunc(b *testing.B) {
- b.SetBytes(int64(len(fieldsInput)))
- for i := 0; i < b.N; i++ {
- FieldsFunc(fieldsInput, unicode.IsSpace)
- }
-}
-
-func BenchmarkSplit1(b *testing.B) {
- for i := 0; i < b.N; i++ {
- Split(benchInputHard, "")
- }
-}
-
-func BenchmarkSplit2(b *testing.B) {
- for i := 0; i < b.N; i++ {
- Split(benchInputHard, "/")
- }
-}
-
-func BenchmarkSplit3(b *testing.B) {
- for i := 0; i < b.N; i++ {
- Split(benchInputHard, "hello")
- }
-}