diff options
Diffstat (limited to 'src/pkg/bytes/bytes.go')
-rw-r--r-- | src/pkg/bytes/bytes.go | 697 |
1 files changed, 0 insertions, 697 deletions
diff --git a/src/pkg/bytes/bytes.go b/src/pkg/bytes/bytes.go deleted file mode 100644 index 0c53e4c0b..000000000 --- a/src/pkg/bytes/bytes.go +++ /dev/null @@ -1,697 +0,0 @@ -// Copyright 2009 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -// Package bytes implements functions for the manipulation of byte slices. -// It is analogous to the facilities of the strings package. -package bytes - -import ( - "unicode" - "unicode/utf8" -) - -func equalPortable(a, b []byte) bool { - if len(a) != len(b) { - return false - } - for i, c := range a { - if c != b[i] { - return false - } - } - return true -} - -// explode splits s into a slice of UTF-8 sequences, one per Unicode character (still slices of bytes), -// up to a maximum of n byte slices. Invalid UTF-8 sequences are chopped into individual bytes. -func explode(s []byte, n int) [][]byte { - if n <= 0 { - n = len(s) - } - a := make([][]byte, n) - var size int - na := 0 - for len(s) > 0 { - if na+1 >= n { - a[na] = s - na++ - break - } - _, size = utf8.DecodeRune(s) - a[na] = s[0:size] - s = s[size:] - na++ - } - return a[0:na] -} - -// Count counts the number of non-overlapping instances of sep in s. -func Count(s, sep []byte) int { - n := len(sep) - if n == 0 { - return utf8.RuneCount(s) + 1 - } - if n > len(s) { - return 0 - } - count := 0 - c := sep[0] - i := 0 - t := s[:len(s)-n+1] - for i < len(t) { - if t[i] != c { - o := IndexByte(t[i:], c) - if o < 0 { - break - } - i += o - } - if n == 1 || Equal(s[i:i+n], sep) { - count++ - i += n - continue - } - i++ - } - return count -} - -// Contains reports whether subslice is within b. -func Contains(b, subslice []byte) bool { - return Index(b, subslice) != -1 -} - -// Index returns the index of the first instance of sep in s, or -1 if sep is not present in s. -func Index(s, sep []byte) int { - n := len(sep) - if n == 0 { - return 0 - } - if n > len(s) { - return -1 - } - c := sep[0] - if n == 1 { - return IndexByte(s, c) - } - i := 0 - t := s[:len(s)-n+1] - for i < len(t) { - if t[i] != c { - o := IndexByte(t[i:], c) - if o < 0 { - break - } - i += o - } - if Equal(s[i:i+n], sep) { - return i - } - i++ - } - return -1 -} - -func indexBytePortable(s []byte, c byte) int { - for i, b := range s { - if b == c { - return i - } - } - return -1 -} - -// LastIndex returns the index of the last instance of sep in s, or -1 if sep is not present in s. -func LastIndex(s, sep []byte) int { - n := len(sep) - if n == 0 { - return len(s) - } - c := sep[0] - for i := len(s) - n; i >= 0; i-- { - if s[i] == c && (n == 1 || Equal(s[i:i+n], sep)) { - return i - } - } - return -1 -} - -// IndexRune interprets s as a sequence of UTF-8-encoded Unicode code points. -// It returns the byte index of the first occurrence in s of the given rune. -// It returns -1 if rune is not present in s. -func IndexRune(s []byte, r rune) int { - for i := 0; i < len(s); { - r1, size := utf8.DecodeRune(s[i:]) - if r == r1 { - return i - } - i += size - } - return -1 -} - -// IndexAny interprets s as a sequence of UTF-8-encoded Unicode code points. -// It returns the byte index of the first occurrence in s of any of the Unicode -// code points in chars. It returns -1 if chars is empty or if there is no code -// point in common. -func IndexAny(s []byte, chars string) int { - if len(chars) > 0 { - var r rune - var width int - for i := 0; i < len(s); i += width { - r = rune(s[i]) - if r < utf8.RuneSelf { - width = 1 - } else { - r, width = utf8.DecodeRune(s[i:]) - } - for _, ch := range chars { - if r == ch { - return i - } - } - } - } - return -1 -} - -// LastIndexAny interprets s as a sequence of UTF-8-encoded Unicode code -// points. It returns the byte index of the last occurrence in s of any of -// the Unicode code points in chars. It returns -1 if chars is empty or if -// there is no code point in common. -func LastIndexAny(s []byte, chars string) int { - if len(chars) > 0 { - for i := len(s); i > 0; { - r, size := utf8.DecodeLastRune(s[0:i]) - i -= size - for _, ch := range chars { - if r == ch { - return i - } - } - } - } - return -1 -} - -// Generic split: splits after each instance of sep, -// including sepSave bytes of sep in the subslices. -func genSplit(s, sep []byte, sepSave, n int) [][]byte { - if n == 0 { - return nil - } - if len(sep) == 0 { - return explode(s, n) - } - if n < 0 { - n = Count(s, sep) + 1 - } - c := sep[0] - start := 0 - a := make([][]byte, n) - na := 0 - for i := 0; i+len(sep) <= len(s) && na+1 < n; i++ { - if s[i] == c && (len(sep) == 1 || Equal(s[i:i+len(sep)], sep)) { - a[na] = s[start : i+sepSave] - na++ - start = i + len(sep) - i += len(sep) - 1 - } - } - a[na] = s[start:] - return a[0 : na+1] -} - -// SplitN slices s into subslices separated by sep and returns a slice of -// the subslices between those separators. -// If sep is empty, SplitN splits after each UTF-8 sequence. -// The count determines the number of subslices to return: -// n > 0: at most n subslices; the last subslice will be the unsplit remainder. -// n == 0: the result is nil (zero subslices) -// n < 0: all subslices -func SplitN(s, sep []byte, n int) [][]byte { return genSplit(s, sep, 0, n) } - -// SplitAfterN slices s into subslices after each instance of sep and -// returns a slice of those subslices. -// If sep is empty, SplitAfterN splits after each UTF-8 sequence. -// The count determines the number of subslices to return: -// n > 0: at most n subslices; the last subslice will be the unsplit remainder. -// n == 0: the result is nil (zero subslices) -// n < 0: all subslices -func SplitAfterN(s, sep []byte, n int) [][]byte { - return genSplit(s, sep, len(sep), n) -} - -// Split slices s into all subslices separated by sep and returns a slice of -// the subslices between those separators. -// If sep is empty, Split splits after each UTF-8 sequence. -// It is equivalent to SplitN with a count of -1. -func Split(s, sep []byte) [][]byte { return genSplit(s, sep, 0, -1) } - -// SplitAfter slices s into all subslices after each instance of sep and -// returns a slice of those subslices. -// If sep is empty, SplitAfter splits after each UTF-8 sequence. -// It is equivalent to SplitAfterN with a count of -1. -func SplitAfter(s, sep []byte) [][]byte { - return genSplit(s, sep, len(sep), -1) -} - -// Fields splits the slice s around each instance of one or more consecutive white space -// characters, returning a slice of subslices of s or an empty list if s contains only white space. -func Fields(s []byte) [][]byte { - return FieldsFunc(s, unicode.IsSpace) -} - -// FieldsFunc interprets s as a sequence of UTF-8-encoded Unicode code points. -// It splits the slice s at each run of code points c satisfying f(c) and -// returns a slice of subslices of s. If all code points in s satisfy f(c), or -// len(s) == 0, an empty slice is returned. -func FieldsFunc(s []byte, f func(rune) bool) [][]byte { - n := 0 - inField := false - for i := 0; i < len(s); { - r, size := utf8.DecodeRune(s[i:]) - wasInField := inField - inField = !f(r) - if inField && !wasInField { - n++ - } - i += size - } - - a := make([][]byte, n) - na := 0 - fieldStart := -1 - for i := 0; i <= len(s) && na < n; { - r, size := utf8.DecodeRune(s[i:]) - if fieldStart < 0 && size > 0 && !f(r) { - fieldStart = i - i += size - continue - } - if fieldStart >= 0 && (size == 0 || f(r)) { - a[na] = s[fieldStart:i] - na++ - fieldStart = -1 - } - if size == 0 { - break - } - i += size - } - return a[0:na] -} - -// Join concatenates the elements of s to create a new byte slice. The separator -// sep is placed between elements in the resulting slice. -func Join(s [][]byte, sep []byte) []byte { - if len(s) == 0 { - return []byte{} - } - if len(s) == 1 { - // Just return a copy. - return append([]byte(nil), s[0]...) - } - n := len(sep) * (len(s) - 1) - for _, v := range s { - n += len(v) - } - - b := make([]byte, n) - bp := copy(b, s[0]) - for _, v := range s[1:] { - bp += copy(b[bp:], sep) - bp += copy(b[bp:], v) - } - return b -} - -// HasPrefix tests whether the byte slice s begins with prefix. -func HasPrefix(s, prefix []byte) bool { - return len(s) >= len(prefix) && Equal(s[0:len(prefix)], prefix) -} - -// HasSuffix tests whether the byte slice s ends with suffix. -func HasSuffix(s, suffix []byte) bool { - return len(s) >= len(suffix) && Equal(s[len(s)-len(suffix):], suffix) -} - -// Map returns a copy of the byte slice s with all its characters modified -// according to the mapping function. If mapping returns a negative value, the character is -// dropped from the string with no replacement. The characters in s and the -// output are interpreted as UTF-8-encoded Unicode code points. -func Map(mapping func(r rune) rune, s []byte) []byte { - // In the worst case, the slice can grow when mapped, making - // things unpleasant. But it's so rare we barge in assuming it's - // fine. It could also shrink but that falls out naturally. - maxbytes := len(s) // length of b - nbytes := 0 // number of bytes encoded in b - b := make([]byte, maxbytes) - for i := 0; i < len(s); { - wid := 1 - r := rune(s[i]) - if r >= utf8.RuneSelf { - r, wid = utf8.DecodeRune(s[i:]) - } - r = mapping(r) - if r >= 0 { - rl := utf8.RuneLen(r) - if rl < 0 { - rl = len(string(utf8.RuneError)) - } - if nbytes+rl > maxbytes { - // Grow the buffer. - maxbytes = maxbytes*2 + utf8.UTFMax - nb := make([]byte, maxbytes) - copy(nb, b[0:nbytes]) - b = nb - } - nbytes += utf8.EncodeRune(b[nbytes:maxbytes], r) - } - i += wid - } - return b[0:nbytes] -} - -// Repeat returns a new byte slice consisting of count copies of b. -func Repeat(b []byte, count int) []byte { - nb := make([]byte, len(b)*count) - bp := 0 - for i := 0; i < count; i++ { - bp += copy(nb[bp:], b) - } - return nb -} - -// ToUpper returns a copy of the byte slice s with all Unicode letters mapped to their upper case. -func ToUpper(s []byte) []byte { return Map(unicode.ToUpper, s) } - -// ToLower returns a copy of the byte slice s with all Unicode letters mapped to their lower case. -func ToLower(s []byte) []byte { return Map(unicode.ToLower, s) } - -// ToTitle returns a copy of the byte slice s with all Unicode letters mapped to their title case. -func ToTitle(s []byte) []byte { return Map(unicode.ToTitle, s) } - -// ToUpperSpecial returns a copy of the byte slice s with all Unicode letters mapped to their -// upper case, giving priority to the special casing rules. -func ToUpperSpecial(_case unicode.SpecialCase, s []byte) []byte { - return Map(func(r rune) rune { return _case.ToUpper(r) }, s) -} - -// ToLowerSpecial returns a copy of the byte slice s with all Unicode letters mapped to their -// lower case, giving priority to the special casing rules. -func ToLowerSpecial(_case unicode.SpecialCase, s []byte) []byte { - return Map(func(r rune) rune { return _case.ToLower(r) }, s) -} - -// ToTitleSpecial returns a copy of the byte slice s with all Unicode letters mapped to their -// title case, giving priority to the special casing rules. -func ToTitleSpecial(_case unicode.SpecialCase, s []byte) []byte { - return Map(func(r rune) rune { return _case.ToTitle(r) }, s) -} - -// isSeparator reports whether the rune could mark a word boundary. -// TODO: update when package unicode captures more of the properties. -func isSeparator(r rune) bool { - // ASCII alphanumerics and underscore are not separators - if r <= 0x7F { - switch { - case '0' <= r && r <= '9': - return false - case 'a' <= r && r <= 'z': - return false - case 'A' <= r && r <= 'Z': - return false - case r == '_': - return false - } - return true - } - // Letters and digits are not separators - if unicode.IsLetter(r) || unicode.IsDigit(r) { - return false - } - // Otherwise, all we can do for now is treat spaces as separators. - return unicode.IsSpace(r) -} - -// Title returns a copy of s with all Unicode letters that begin words -// mapped to their title case. -// -// BUG: The rule Title uses for word boundaries does not handle Unicode punctuation properly. -func Title(s []byte) []byte { - // Use a closure here to remember state. - // Hackish but effective. Depends on Map scanning in order and calling - // the closure once per rune. - prev := ' ' - return Map( - func(r rune) rune { - if isSeparator(prev) { - prev = r - return unicode.ToTitle(r) - } - prev = r - return r - }, - s) -} - -// TrimLeftFunc returns a subslice of s by slicing off all leading UTF-8-encoded -// Unicode code points c that satisfy f(c). -func TrimLeftFunc(s []byte, f func(r rune) bool) []byte { - i := indexFunc(s, f, false) - if i == -1 { - return nil - } - return s[i:] -} - -// TrimRightFunc returns a subslice of s by slicing off all trailing UTF-8 -// encoded Unicode code points c that satisfy f(c). -func TrimRightFunc(s []byte, f func(r rune) bool) []byte { - i := lastIndexFunc(s, f, false) - if i >= 0 && s[i] >= utf8.RuneSelf { - _, wid := utf8.DecodeRune(s[i:]) - i += wid - } else { - i++ - } - return s[0:i] -} - -// TrimFunc returns a subslice of s by slicing off all leading and trailing -// UTF-8-encoded Unicode code points c that satisfy f(c). -func TrimFunc(s []byte, f func(r rune) bool) []byte { - return TrimRightFunc(TrimLeftFunc(s, f), f) -} - -// TrimPrefix returns s without the provided leading prefix string. -// If s doesn't start with prefix, s is returned unchanged. -func TrimPrefix(s, prefix []byte) []byte { - if HasPrefix(s, prefix) { - return s[len(prefix):] - } - return s -} - -// TrimSuffix returns s without the provided trailing suffix string. -// If s doesn't end with suffix, s is returned unchanged. -func TrimSuffix(s, suffix []byte) []byte { - if HasSuffix(s, suffix) { - return s[:len(s)-len(suffix)] - } - return s -} - -// IndexFunc interprets s as a sequence of UTF-8-encoded Unicode code points. -// It returns the byte index in s of the first Unicode -// code point satisfying f(c), or -1 if none do. -func IndexFunc(s []byte, f func(r rune) bool) int { - return indexFunc(s, f, true) -} - -// LastIndexFunc interprets s as a sequence of UTF-8-encoded Unicode code points. -// It returns the byte index in s of the last Unicode -// code point satisfying f(c), or -1 if none do. -func LastIndexFunc(s []byte, f func(r rune) bool) int { - return lastIndexFunc(s, f, true) -} - -// indexFunc is the same as IndexFunc except that if -// truth==false, the sense of the predicate function is -// inverted. -func indexFunc(s []byte, f func(r rune) bool, truth bool) int { - start := 0 - for start < len(s) { - wid := 1 - r := rune(s[start]) - if r >= utf8.RuneSelf { - r, wid = utf8.DecodeRune(s[start:]) - } - if f(r) == truth { - return start - } - start += wid - } - return -1 -} - -// lastIndexFunc is the same as LastIndexFunc except that if -// truth==false, the sense of the predicate function is -// inverted. -func lastIndexFunc(s []byte, f func(r rune) bool, truth bool) int { - for i := len(s); i > 0; { - r, size := rune(s[i-1]), 1 - if r >= utf8.RuneSelf { - r, size = utf8.DecodeLastRune(s[0:i]) - } - i -= size - if f(r) == truth { - return i - } - } - return -1 -} - -func makeCutsetFunc(cutset string) func(r rune) bool { - return func(r rune) bool { - for _, c := range cutset { - if c == r { - return true - } - } - return false - } -} - -// Trim returns a subslice of s by slicing off all leading and -// trailing UTF-8-encoded Unicode code points contained in cutset. -func Trim(s []byte, cutset string) []byte { - return TrimFunc(s, makeCutsetFunc(cutset)) -} - -// TrimLeft returns a subslice of s by slicing off all leading -// UTF-8-encoded Unicode code points contained in cutset. -func TrimLeft(s []byte, cutset string) []byte { - return TrimLeftFunc(s, makeCutsetFunc(cutset)) -} - -// TrimRight returns a subslice of s by slicing off all trailing -// UTF-8-encoded Unicode code points that are contained in cutset. -func TrimRight(s []byte, cutset string) []byte { - return TrimRightFunc(s, makeCutsetFunc(cutset)) -} - -// TrimSpace returns a subslice of s by slicing off all leading and -// trailing white space, as defined by Unicode. -func TrimSpace(s []byte) []byte { - return TrimFunc(s, unicode.IsSpace) -} - -// Runes returns a slice of runes (Unicode code points) equivalent to s. -func Runes(s []byte) []rune { - t := make([]rune, utf8.RuneCount(s)) - i := 0 - for len(s) > 0 { - r, l := utf8.DecodeRune(s) - t[i] = r - i++ - s = s[l:] - } - return t -} - -// Replace returns a copy of the slice s with the first n -// non-overlapping instances of old replaced by new. -// If n < 0, there is no limit on the number of replacements. -func Replace(s, old, new []byte, n int) []byte { - m := 0 - if n != 0 { - // Compute number of replacements. - m = Count(s, old) - } - if m == 0 { - // Just return a copy. - return append([]byte(nil), s...) - } - if n < 0 || m < n { - n = m - } - - // Apply replacements to buffer. - t := make([]byte, len(s)+n*(len(new)-len(old))) - w := 0 - start := 0 - for i := 0; i < n; i++ { - j := start - if len(old) == 0 { - if i > 0 { - _, wid := utf8.DecodeRune(s[start:]) - j += wid - } - } else { - j += Index(s[start:], old) - } - w += copy(t[w:], s[start:j]) - w += copy(t[w:], new) - start = j + len(old) - } - w += copy(t[w:], s[start:]) - return t[0:w] -} - -// EqualFold reports whether s and t, interpreted as UTF-8 strings, -// are equal under Unicode case-folding. -func EqualFold(s, t []byte) bool { - for len(s) != 0 && len(t) != 0 { - // Extract first rune from each. - var sr, tr rune - if s[0] < utf8.RuneSelf { - sr, s = rune(s[0]), s[1:] - } else { - r, size := utf8.DecodeRune(s) - sr, s = r, s[size:] - } - if t[0] < utf8.RuneSelf { - tr, t = rune(t[0]), t[1:] - } else { - r, size := utf8.DecodeRune(t) - tr, t = r, t[size:] - } - - // If they match, keep going; if not, return false. - - // Easy case. - if tr == sr { - continue - } - - // Make sr < tr to simplify what follows. - if tr < sr { - tr, sr = sr, tr - } - // Fast check for ASCII. - if tr < utf8.RuneSelf && 'A' <= sr && sr <= 'Z' { - // ASCII, and sr is upper case. tr must be lower case. - if tr == sr+'a'-'A' { - continue - } - return false - } - - // General case. SimpleFold(x) returns the next equivalent rune > x - // or wraps around to smaller values. - r := unicode.SimpleFold(sr) - for r != sr && r < tr { - r = unicode.SimpleFold(r) - } - if r == tr { - continue - } - return false - } - - // One string is empty. Are both? - return len(s) == len(t) -} |