diff options
Diffstat (limited to 'src/pkg/utf8/utf8.go')
-rw-r--r-- | src/pkg/utf8/utf8.go | 69 |
1 files changed, 68 insertions, 1 deletions
diff --git a/src/pkg/utf8/utf8.go b/src/pkg/utf8/utf8.go index 8e373e32d..455499e4d 100644 --- a/src/pkg/utf8/utf8.go +++ b/src/pkg/utf8/utf8.go @@ -209,6 +209,73 @@ func DecodeRuneInString(s string) (rune, size int) { return } +// DecodeLastRune unpacks the last UTF-8 encoding in p +// and returns the rune and its width in bytes. +func DecodeLastRune(p []byte) (rune, size int) { + end := len(p) + if end == 0 { + return RuneError, 0 + } + start := end - 1 + rune = int(p[start]) + if rune < RuneSelf { + return rune, 1 + } + // guard against O(n^2) behavior when traversing + // backwards through strings with long sequences of + // invalid UTF-8. + lim := end - UTFMax + if lim < 0 { + lim = 0 + } + for start--; start >= lim; start-- { + if RuneStart(p[start]) { + break + } + } + if start < 0 { + start = 0 + } + rune, size = DecodeRune(p[start:end]) + if start+size != end { + return RuneError, 1 + } + return rune, size +} + +// DecodeLastRuneInString is like DecodeLastRune but its input is a string. +func DecodeLastRuneInString(s string) (rune, size int) { + end := len(s) + if end == 0 { + return RuneError, 0 + } + start := end - 1 + rune = int(s[start]) + if rune < RuneSelf { + return rune, 1 + } + // guard against O(n^2) behavior when traversing + // backwards through strings with long sequences of + // invalid UTF-8. + lim := end - UTFMax + if lim < 0 { + lim = 0 + } + for start--; start >= lim; start-- { + if RuneStart(s[start]) { + break + } + } + if start < 0 { + start = 0 + } + rune, size = DecodeRuneInString(s[start:end]) + if start+size != end { + return RuneError, 1 + } + return rune, size +} + // RuneLen returns the number of bytes required to encode the rune. func RuneLen(rune int) int { switch { @@ -226,7 +293,7 @@ func RuneLen(rune int) int { // EncodeRune writes into p (which must be large enough) the UTF-8 encoding of the rune. // It returns the number of bytes written. -func EncodeRune(rune int, p []byte) int { +func EncodeRune(p []byte, rune int) int { // Negative values are erroneous. Making it unsigned addresses the problem. r := uint(rune) |