summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPeter Froehlich <peter.hans.froehlich@gmail.com>2009-12-02 20:47:38 -0800
committerPeter Froehlich <peter.hans.froehlich@gmail.com>2009-12-02 20:47:38 -0800
commit0247d93d819700b0bfc6814dc7b1c2afec4648a0 (patch)
tree34efe677ee0f64db58643d216d064b70bc35e016
parentb7bc89750bfcae2432653c3cd79ad5570b9c456d (diff)
downloadgolang-0247d93d819700b0bfc6814dc7b1c2afec4648a0.tar.gz
Runes: turn string into []int
Split: fixed typo in documentation R=rsc, r, r1 http://codereview.appspot.com/157170 Committer: Russ Cox <rsc@golang.org>
-rw-r--r--src/pkg/bytes/bytes.go13
-rw-r--r--src/pkg/bytes/bytes_test.go46
-rw-r--r--src/pkg/strings/strings.go13
-rw-r--r--src/pkg/strings/strings_test.go45
4 files changed, 116 insertions, 1 deletions
diff --git a/src/pkg/bytes/bytes.go b/src/pkg/bytes/bytes.go
index 8548b1549..91ecdf947 100644
--- a/src/pkg/bytes/bytes.go
+++ b/src/pkg/bytes/bytes.go
@@ -333,3 +333,16 @@ func AddByte(s []byte, t byte) []byte {
s[lens] = t;
return s;
}
+
+// Runes returns a slice of runes (Unicode code points) equivalent to s.
+func Runes(s []byte) []int {
+ t := make([]int, utf8.RuneCount(s));
+ i := 0;
+ for len(s) > 0 {
+ r, l := utf8.DecodeRune(s);
+ t[i] = r;
+ i++;
+ s = s[l:];
+ }
+ return t;
+}
diff --git a/src/pkg/bytes/bytes_test.go b/src/pkg/bytes/bytes_test.go
index b7f826293..a4f4adcfe 100644
--- a/src/pkg/bytes/bytes_test.go
+++ b/src/pkg/bytes/bytes_test.go
@@ -400,3 +400,49 @@ func TestRepeat(t *testing.T) {
}
}
}
+
+func runesEqual(a, b []int) bool {
+ if len(a) != len(b) {
+ return false
+ }
+ for i, r := range a {
+ if r != b[i] {
+ return false
+ }
+ }
+ return true;
+}
+
+type RunesTest struct {
+ in string;
+ out []int;
+ lossy bool;
+}
+
+var RunesTests = []RunesTest{
+ RunesTest{"", []int{}, false},
+ RunesTest{" ", []int{32}, false},
+ RunesTest{"ABC", []int{65, 66, 67}, false},
+ RunesTest{"abc", []int{97, 98, 99}, false},
+ RunesTest{"\u65e5\u672c\u8a9e", []int{26085, 26412, 35486}, false},
+ RunesTest{"ab\x80c", []int{97, 98, 0xFFFD, 99}, true},
+ RunesTest{"ab\xc0c", []int{97, 98, 0xFFFD, 99}, true},
+}
+
+func TestRunes(t *testing.T) {
+ for _, tt := range RunesTests {
+ tin := strings.Bytes(tt.in);
+ a := Runes(tin);
+ if !runesEqual(a, tt.out) {
+ t.Errorf("Runes(%q) = %v; want %v", tin, a, tt.out);
+ continue;
+ }
+ if !tt.lossy {
+ // can only test reassembly if we didn't lose information
+ s := string(a);
+ if s != tt.in {
+ t.Errorf("string(Runes(%q)) = %x; want %x", tin, s, tin)
+ }
+ }
+ }
+}
diff --git a/src/pkg/strings/strings.go b/src/pkg/strings/strings.go
index 013af680a..7be98e6c1 100644
--- a/src/pkg/strings/strings.go
+++ b/src/pkg/strings/strings.go
@@ -124,7 +124,7 @@ func genSplit(s, sep string, sepSave, n int) []string {
// Split splits the string s around each instance of sep, returning an array of substrings of s.
// If sep is empty, Split splits s after each UTF-8 sequence.
-// If n > 0, split Splits s into at most n substrings; the last substring will be the unsplit remainder.
+// If n > 0, Split splits s into at most n substrings; the last substring will be the unsplit remainder.
func Split(s, sep string, n int) []string { return genSplit(s, sep, 0, n) }
// SplitAfter splits the string s after each instance of sep, returning an array of substrings of s.
@@ -272,3 +272,14 @@ func Bytes(s string) []byte {
}
return b;
}
+
+// Runes returns a slice of runes (Unicode code points) equivalent to the string s.
+func Runes(s string) []int {
+ t := make([]int, utf8.RuneCountInString(s));
+ i := 0;
+ for _, r := range s {
+ t[i] = r;
+ i++;
+ }
+ return t;
+}
diff --git a/src/pkg/strings/strings_test.go b/src/pkg/strings/strings_test.go
index 1171db224..ce77c5c2f 100644
--- a/src/pkg/strings/strings_test.go
+++ b/src/pkg/strings/strings_test.go
@@ -370,3 +370,48 @@ func TestRepeat(t *testing.T) {
}
}
}
+
+func runesEqual(a, b []int) bool {
+ if len(a) != len(b) {
+ return false
+ }
+ for i, r := range a {
+ if r != b[i] {
+ return false
+ }
+ }
+ return true;
+}
+
+type RunesTest struct {
+ in string;
+ out []int;
+ lossy bool;
+}
+
+var RunesTests = []RunesTest{
+ RunesTest{"", []int{}, false},
+ RunesTest{" ", []int{32}, false},
+ RunesTest{"ABC", []int{65, 66, 67}, false},
+ RunesTest{"abc", []int{97, 98, 99}, false},
+ RunesTest{"\u65e5\u672c\u8a9e", []int{26085, 26412, 35486}, false},
+ RunesTest{"ab\x80c", []int{97, 98, 0xFFFD, 99}, true},
+ RunesTest{"ab\xc0c", []int{97, 98, 0xFFFD, 99}, true},
+}
+
+func TestRunes(t *testing.T) {
+ for _, tt := range RunesTests {
+ a := Runes(tt.in);
+ if !runesEqual(a, tt.out) {
+ t.Errorf("Runes(%q) = %v; want %v", tt.in, a, tt.out);
+ continue;
+ }
+ if !tt.lossy {
+ // can only test reassembly if we didn't lose information
+ s := string(a);
+ if s != tt.in {
+ t.Errorf("string(Runes(%q)) = %x; want %x", tt.in, s, tt.in)
+ }
+ }
+ }
+}