summaryrefslogtreecommitdiff
path: root/src/pkg/bytes/bytes.go
diff options
context:
space:
mode:
authorRob Pike <r@golang.org>2009-09-01 13:46:59 -0700
committerRob Pike <r@golang.org>2009-09-01 13:46:59 -0700
commitd504196b95d2f87f4796b32148579955a3f2caf2 (patch)
treed4b6e571edb2c10b0dbe9341187850fa899a7699 /src/pkg/bytes/bytes.go
parenta9fec6c6215fb66ec448c996278026436639baca (diff)
downloadgolang-d504196b95d2f87f4796b32148579955a3f2caf2.tar.gz
casing operations for byte arrays
R=rsc DELTA=186 (181 added, 0 deleted, 5 changed) OCL=34203 CL=34203
Diffstat (limited to 'src/pkg/bytes/bytes.go')
-rw-r--r--src/pkg/bytes/bytes.go85
1 files changed, 84 insertions, 1 deletions
diff --git a/src/pkg/bytes/bytes.go b/src/pkg/bytes/bytes.go
index e5e8bffd8..5375fecaa 100644
--- a/src/pkg/bytes/bytes.go
+++ b/src/pkg/bytes/bytes.go
@@ -6,7 +6,10 @@
// Analagous to the facilities of the strings package.
package bytes
-import "utf8"
+import (
+ "unicode";
+ "utf8";
+)
// Compare returns an integer comparing the two byte arrays lexicographically.
// The result will be 0 if a==b, -1 if a < b, and +1 if a > b
@@ -177,3 +180,83 @@ func HasPrefix(s, prefix []byte) bool {
func HasSuffix(s, suffix []byte) bool {
return len(s) >= len(suffix) && Equal(s[len(s)-len(suffix):len(s)], suffix)
}
+
+// Map returns a copy of the byte array s with all its characters modified
+// according to the mapping function.
+func Map(mapping func(rune int) int, s []byte) []byte {
+ // In the worst case, the array can grow when mapped, making
+ // things unpleasant. But it's so rare we barge in assuming it's
+ // fine. It could also shrink but that falls out naturally.
+ maxbytes := len(s); // length of b
+ nbytes := 0; // number of bytes encoded in b
+ b := make([]byte, maxbytes);
+ for wid, i := 0, 0; i < len(s); i += wid {
+ wid = 1;
+ rune := int(s[i]);
+ if rune < utf8.RuneSelf {
+ rune = mapping(rune);
+ } else {
+ rune, wid = utf8.DecodeRune(s[i:len(s)]);
+ }
+ rune = mapping(rune);
+ if nbytes + utf8.RuneLen(rune) > maxbytes {
+ // Grow the buffer.
+ maxbytes = maxbytes*2 + utf8.UTFMax;
+ nb := make([]byte, maxbytes);
+ for i, c := range b[0:nbytes] {
+ nb[i] = c
+ }
+ b = nb;
+ }
+ nbytes += utf8.EncodeRune(rune, b[nbytes:maxbytes]);
+ }
+ return b[0:nbytes];
+}
+
+// ToUpper returns a copy of the byte array s with all Unicode letters mapped to their upper case.
+func ToUpper(s []byte) []byte {
+ return Map(unicode.ToUpper, s)
+}
+
+// ToUpper returns a copy of the byte array s with all Unicode letters mapped to their lower case.
+func ToLower(s []byte) []byte {
+ return Map(unicode.ToLower, s)
+}
+
+// ToTitle returns a copy of the byte array s with all Unicode letters mapped to their title case.
+func Title(s []byte) []byte {
+ return Map(unicode.ToTitle, s)
+}
+
+// Trim returns a slice of the string s, with all leading and trailing white space
+// removed, as defined by Unicode.
+func TrimSpace(s []byte) []byte {
+ start, end := 0, len(s);
+ for wid := 0; start < end; start += wid {
+ wid = 1;
+ rune := int(s[start]);
+ if rune >= utf8.RuneSelf {
+ rune, wid = utf8.DecodeRune(s[start:end])
+ }
+ if !unicode.IsSpace(rune) {
+ break;
+ }
+ }
+ for wid := 0; start < end; end -= wid {
+ wid = 1;
+ rune := int(s[end-1]);
+ if rune >= utf8.RuneSelf {
+ // Back up carefully looking for beginning of rune. Mustn't pass start.
+ for wid = 2; start <= end-wid && !utf8.RuneStart(s[end-wid]); wid++ {
+ }
+ if start > end-wid { // invalid UTF-8 sequence; stop processing
+ return s[start:end]
+ }
+ rune, wid = utf8.DecodeRune(s[end-wid:end]);
+ }
+ if !unicode.IsSpace(rune) {
+ break;
+ }
+ }
+ return s[start:end];
+}