Change strings.Split, bytes.Split to take a maximum substring count argument.

R=rsc APPROVED=r DELTA=131 (39 added, 10 deleted, 82 changed) OCL=30669 CL=30723
author: David Symonds <dsymonds@golang.org> 2009-06-24 19:02:29 -0700
committer: David Symonds <dsymonds@golang.org> 2009-06-24 19:02:29 -0700
commit: e3fe6186634f9b74b9f39924516e24b068b9e141 (patch)
tree: c423ba821d41ea2d673a7420ecf7c082aa827863 /src/pkg/bytes/bytes.go
parent: c9af30256dfa40f26f2a60e88b68dadfbabdd2ba (diff)
download: golang-e3fe6186634f9b74b9f39924516e24b068b9e141.tar.gz
1 files changed, 27 insertions, 16 deletions
diff --git a/src/pkg/bytes/bytes.go b/src/pkg/bytes/bytes.go
index 17f82db7c..e5e8bffd8 100644
--- a/src/pkg/bytes/bytes.go
+++ b/src/pkg/bytes/bytes.go
@@ -55,19 +55,27 @@ func Copy(dst, src []byte) int {
 	return len(src)
 }
 
-// Explode splits s into an array of UTF-8 sequences, one per Unicode character (still arrays of bytes).
-// Invalid UTF-8 sequences become correct encodings of U+FFF8.
-func Explode(s []byte) [][]byte {
-	a := make([][]byte, utf8.RuneCount(s));
+// explode splits s into an array of UTF-8 sequences, one per Unicode character (still arrays of bytes),
+// up to a maximum of n byte arrays. Invalid UTF-8 sequences are chopped into individual bytes.
+func explode(s []byte, n int) [][]byte {
+	if n <= 0 {
+		n = len(s);
+	}
+	a := make([][]byte, n);
 	var size, rune int;
-	i := 0;
+	na := 0;
 	for len(s) > 0 {
+		if na+1 >= n {
+			a[na] = s;
+			na++;
+			break
+		}
 		rune, size = utf8.DecodeRune(s);
-		a[i] = s[0:size];
+		a[na] = s[0:size];
 		s = s[size:len(s)];
-		i++;
+		na++;
 	}
-	return a
+	return a[0:na]
 }
 
 // Count counts the number of non-overlapping instances of sep in s.
@@ -101,27 +109,30 @@ func Index(s, sep []byte) int {
 	return -1
 }
 
-// Split returns the array representing the subarrays of s separated by sep. Adjacent
-// occurrences of sep produce empty subarrays.  If sep is empty, it is the same as Explode.
-func Split(s, sep []byte) [][]byte {
+// Split splits the array s around each instance of sep, returning an array of subarrays of s.
+// If sep is empty, Split splits s after each UTF-8 sequence.
+// If n > 0, split Splits s into at most n subarrays; the last subarray will contain an unsplit remainder.
+func Split(s, sep []byte, n int) [][]byte {
 	if len(sep) == 0 {
-		return Explode(s)
+		return explode(s, n)
+	}
+	if n <= 0 {
+		n = Count(s, sep) + 1;
 	}
 	c := sep[0];
 	start := 0;
-	n := Count(s, sep)+1;
 	a := make([][]byte, n);
 	na := 0;
-	for i := 0; i+len(sep) <= len(s); i++ {
+	for i := 0; i+len(sep) <= len(s) && na+1 < n; i++ {
 		if s[i] == c && (len(sep) == 1 || Equal(s[i:i+len(sep)], sep)) {
 			a[na] = s[start:i];
 			na++;
 			start = i+len(sep);
-			i += len(sep)-1
+			i += len(sep)-1;
 		}
 	}
 	a[na] = s[start:len(s)];
-	return a
+	return a[0:na+1]
 }
 
 // Join concatenates the elements of a to create a single byte array.   The separator
author	David Symonds <dsymonds@golang.org>	2009-06-24 19:02:29 -0700
committer	David Symonds <dsymonds@golang.org>	2009-06-24 19:02:29 -0700
commit	e3fe6186634f9b74b9f39924516e24b068b9e141 (patch)
tree	c423ba821d41ea2d673a7420ecf7c082aa827863 /src/pkg/bytes/bytes.go
parent	c9af30256dfa40f26f2a60e88b68dadfbabdd2ba (diff)
download	golang-e3fe6186634f9b74b9f39924516e24b068b9e141.tar.gz