diff options
author | David Symonds <dsymonds@golang.org> | 2009-06-24 19:02:29 -0700 |
---|---|---|
committer | David Symonds <dsymonds@golang.org> | 2009-06-24 19:02:29 -0700 |
commit | e3fe6186634f9b74b9f39924516e24b068b9e141 (patch) | |
tree | c423ba821d41ea2d673a7420ecf7c082aa827863 /src/pkg/bytes/bytes.go | |
parent | c9af30256dfa40f26f2a60e88b68dadfbabdd2ba (diff) | |
download | golang-e3fe6186634f9b74b9f39924516e24b068b9e141.tar.gz |
Change strings.Split, bytes.Split to take a maximum substring count argument.
R=rsc
APPROVED=r
DELTA=131 (39 added, 10 deleted, 82 changed)
OCL=30669
CL=30723
Diffstat (limited to 'src/pkg/bytes/bytes.go')
-rw-r--r-- | src/pkg/bytes/bytes.go | 43 |
1 files changed, 27 insertions, 16 deletions
diff --git a/src/pkg/bytes/bytes.go b/src/pkg/bytes/bytes.go index 17f82db7c..e5e8bffd8 100644 --- a/src/pkg/bytes/bytes.go +++ b/src/pkg/bytes/bytes.go @@ -55,19 +55,27 @@ func Copy(dst, src []byte) int { return len(src) } -// Explode splits s into an array of UTF-8 sequences, one per Unicode character (still arrays of bytes). -// Invalid UTF-8 sequences become correct encodings of U+FFF8. -func Explode(s []byte) [][]byte { - a := make([][]byte, utf8.RuneCount(s)); +// explode splits s into an array of UTF-8 sequences, one per Unicode character (still arrays of bytes), +// up to a maximum of n byte arrays. Invalid UTF-8 sequences are chopped into individual bytes. +func explode(s []byte, n int) [][]byte { + if n <= 0 { + n = len(s); + } + a := make([][]byte, n); var size, rune int; - i := 0; + na := 0; for len(s) > 0 { + if na+1 >= n { + a[na] = s; + na++; + break + } rune, size = utf8.DecodeRune(s); - a[i] = s[0:size]; + a[na] = s[0:size]; s = s[size:len(s)]; - i++; + na++; } - return a + return a[0:na] } // Count counts the number of non-overlapping instances of sep in s. @@ -101,27 +109,30 @@ func Index(s, sep []byte) int { return -1 } -// Split returns the array representing the subarrays of s separated by sep. Adjacent -// occurrences of sep produce empty subarrays. If sep is empty, it is the same as Explode. -func Split(s, sep []byte) [][]byte { +// Split splits the array s around each instance of sep, returning an array of subarrays of s. +// If sep is empty, Split splits s after each UTF-8 sequence. +// If n > 0, split Splits s into at most n subarrays; the last subarray will contain an unsplit remainder. +func Split(s, sep []byte, n int) [][]byte { if len(sep) == 0 { - return Explode(s) + return explode(s, n) + } + if n <= 0 { + n = Count(s, sep) + 1; } c := sep[0]; start := 0; - n := Count(s, sep)+1; a := make([][]byte, n); na := 0; - for i := 0; i+len(sep) <= len(s); i++ { + for i := 0; i+len(sep) <= len(s) && na+1 < n; i++ { if s[i] == c && (len(sep) == 1 || Equal(s[i:i+len(sep)], sep)) { a[na] = s[start:i]; na++; start = i+len(sep); - i += len(sep)-1 + i += len(sep)-1; } } a[na] = s[start:len(s)]; - return a + return a[0:na+1] } // Join concatenates the elements of a to create a single byte array. The separator |