diff options
Diffstat (limited to 'src/pkg/utf8')
| -rw-r--r-- | src/pkg/utf8/utf8.go | 150 | ||||
| -rw-r--r-- | src/pkg/utf8/utf8_test.go | 90 |
2 files changed, 120 insertions, 120 deletions
diff --git a/src/pkg/utf8/utf8.go b/src/pkg/utf8/utf8.go index ad78f599c..8e373e32d 100644 --- a/src/pkg/utf8/utf8.go +++ b/src/pkg/utf8/utf8.go @@ -6,40 +6,40 @@ // This package calls a Unicode character a rune for brevity. package utf8 -import "unicode" // only needed for a couple of constants +import "unicode" // only needed for a couple of constants // Numbers fundamental to the encoding. const ( - RuneError = unicode.ReplacementChar; // the "error" Rune or "replacement character". - RuneSelf = 0x80; // characters below Runeself are represented as themselves in a single byte. - UTFMax = 4; // maximum number of bytes of a UTF-8 encoded Unicode character. + RuneError = unicode.ReplacementChar // the "error" Rune or "replacement character". + RuneSelf = 0x80 // characters below Runeself are represented as themselves in a single byte. + UTFMax = 4 // maximum number of bytes of a UTF-8 encoded Unicode character. ) const ( - _T1 = 0x00; // 0000 0000 - _Tx = 0x80; // 1000 0000 - _T2 = 0xC0; // 1100 0000 - _T3 = 0xE0; // 1110 0000 - _T4 = 0xF0; // 1111 0000 - _T5 = 0xF8; // 1111 1000 - - _Maskx = 0x3F; // 0011 1111 - _Mask2 = 0x1F; // 0001 1111 - _Mask3 = 0x0F; // 0000 1111 - _Mask4 = 0x07; // 0000 0111 - - _Rune1Max = 1<<7 - 1; - _Rune2Max = 1<<11 - 1; - _Rune3Max = 1<<16 - 1; - _Rune4Max = 1<<21 - 1; + _T1 = 0x00 // 0000 0000 + _Tx = 0x80 // 1000 0000 + _T2 = 0xC0 // 1100 0000 + _T3 = 0xE0 // 1110 0000 + _T4 = 0xF0 // 1111 0000 + _T5 = 0xF8 // 1111 1000 + + _Maskx = 0x3F // 0011 1111 + _Mask2 = 0x1F // 0001 1111 + _Mask3 = 0x0F // 0000 1111 + _Mask4 = 0x07 // 0000 0111 + + _Rune1Max = 1<<7 - 1 + _Rune2Max = 1<<11 - 1 + _Rune3Max = 1<<16 - 1 + _Rune4Max = 1<<21 - 1 ) func decodeRuneInternal(p []byte) (rune, size int, short bool) { - n := len(p); + n := len(p) if n < 1 { return RuneError, 0, true } - c0 := p[0]; + c0 := p[0] // 1-byte, 7-bit sequence? if c0 < _Tx { @@ -55,66 +55,66 @@ func decodeRuneInternal(p []byte) (rune, size int, short bool) { if n < 2 { return RuneError, 1, true } - c1 := p[1]; + c1 := p[1] if c1 < _Tx || _T2 <= c1 { return RuneError, 1, false } // 2-byte, 11-bit sequence? if c0 < _T3 { - rune = int(c0&_Mask2)<<6 | int(c1&_Maskx); + rune = int(c0&_Mask2)<<6 | int(c1&_Maskx) if rune <= _Rune1Max { return RuneError, 1, false } - return rune, 2, false; + return rune, 2, false } // need second continuation byte if n < 3 { return RuneError, 1, true } - c2 := p[2]; + c2 := p[2] if c2 < _Tx || _T2 <= c2 { return RuneError, 1, false } // 3-byte, 16-bit sequence? if c0 < _T4 { - rune = int(c0&_Mask3)<<12 | int(c1&_Maskx)<<6 | int(c2&_Maskx); + rune = int(c0&_Mask3)<<12 | int(c1&_Maskx)<<6 | int(c2&_Maskx) if rune <= _Rune2Max { return RuneError, 1, false } - return rune, 3, false; + return rune, 3, false } // need third continuation byte if n < 4 { return RuneError, 1, true } - c3 := p[3]; + c3 := p[3] if c3 < _Tx || _T2 <= c3 { return RuneError, 1, false } // 4-byte, 21-bit sequence? if c0 < _T5 { - rune = int(c0&_Mask4)<<18 | int(c1&_Maskx)<<12 | int(c2&_Maskx)<<6 | int(c3&_Maskx); + rune = int(c0&_Mask4)<<18 | int(c1&_Maskx)<<12 | int(c2&_Maskx)<<6 | int(c3&_Maskx) if rune <= _Rune3Max { return RuneError, 1, false } - return rune, 4, false; + return rune, 4, false } // error - return RuneError, 1, false; + return RuneError, 1, false } func decodeRuneInStringInternal(s string) (rune, size int, short bool) { - n := len(s); + n := len(s) if n < 1 { return RuneError, 0, true } - c0 := s[0]; + c0 := s[0] // 1-byte, 7-bit sequence? if c0 < _Tx { @@ -130,83 +130,83 @@ func decodeRuneInStringInternal(s string) (rune, size int, short bool) { if n < 2 { return RuneError, 1, true } - c1 := s[1]; + c1 := s[1] if c1 < _Tx || _T2 <= c1 { return RuneError, 1, false } // 2-byte, 11-bit sequence? if c0 < _T3 { - rune = int(c0&_Mask2)<<6 | int(c1&_Maskx); + rune = int(c0&_Mask2)<<6 | int(c1&_Maskx) if rune <= _Rune1Max { return RuneError, 1, false } - return rune, 2, false; + return rune, 2, false } // need second continuation byte if n < 3 { return RuneError, 1, true } - c2 := s[2]; + c2 := s[2] if c2 < _Tx || _T2 <= c2 { return RuneError, 1, false } // 3-byte, 16-bit sequence? if c0 < _T4 { - rune = int(c0&_Mask3)<<12 | int(c1&_Maskx)<<6 | int(c2&_Maskx); + rune = int(c0&_Mask3)<<12 | int(c1&_Maskx)<<6 | int(c2&_Maskx) if rune <= _Rune2Max { return RuneError, 1, false } - return rune, 3, false; + return rune, 3, false } // need third continuation byte if n < 4 { return RuneError, 1, true } - c3 := s[3]; + c3 := s[3] if c3 < _Tx || _T2 <= c3 { return RuneError, 1, false } // 4-byte, 21-bit sequence? if c0 < _T5 { - rune = int(c0&_Mask4)<<18 | int(c1&_Maskx)<<12 | int(c2&_Maskx)<<6 | int(c3&_Maskx); + rune = int(c0&_Mask4)<<18 | int(c1&_Maskx)<<12 | int(c2&_Maskx)<<6 | int(c3&_Maskx) if rune <= _Rune3Max { return RuneError, 1, false } - return rune, 4, false; + return rune, 4, false } // error - return RuneError, 1, false; + return RuneError, 1, false } // FullRune reports whether the bytes in p begin with a full UTF-8 encoding of a rune. // An invalid encoding is considered a full Rune since it will convert as a width-1 error rune. func FullRune(p []byte) bool { - _, _, short := decodeRuneInternal(p); - return !short; + _, _, short := decodeRuneInternal(p) + return !short } // FullRuneInString is like FullRune but its input is a string. func FullRuneInString(s string) bool { - _, _, short := decodeRuneInStringInternal(s); - return !short; + _, _, short := decodeRuneInStringInternal(s) + return !short } // DecodeRune unpacks the first UTF-8 encoding in p and returns the rune and its width in bytes. func DecodeRune(p []byte) (rune, size int) { - rune, size, _ = decodeRuneInternal(p); - return; + rune, size, _ = decodeRuneInternal(p) + return } // DecodeRuneInString is like DecodeRune but its input is a string. func DecodeRuneInString(s string) (rune, size int) { - rune, size, _ = decodeRuneInStringInternal(s); - return; + rune, size, _ = decodeRuneInStringInternal(s) + return } // RuneLen returns the number of bytes required to encode the rune. @@ -221,24 +221,24 @@ func RuneLen(rune int) int { case rune <= _Rune4Max: return 4 } - return -1; + return -1 } // EncodeRune writes into p (which must be large enough) the UTF-8 encoding of the rune. // It returns the number of bytes written. func EncodeRune(rune int, p []byte) int { // Negative values are erroneous. Making it unsigned addresses the problem. - r := uint(rune); + r := uint(rune) if r <= _Rune1Max { - p[0] = byte(r); - return 1; + p[0] = byte(r) + return 1 } if r <= _Rune2Max { - p[0] = _T2 | byte(r>>6); - p[1] = _Tx | byte(r)&_Maskx; - return 2; + p[0] = _T2 | byte(r>>6) + p[1] = _Tx | byte(r)&_Maskx + return 2 } if r > unicode.MaxRune { @@ -246,33 +246,33 @@ func EncodeRune(rune int, p []byte) int { } if r <= _Rune3Max { - p[0] = _T3 | byte(r>>12); - p[1] = _Tx | byte(r>>6)&_Maskx; - p[2] = _Tx | byte(r)&_Maskx; - return 3; + p[0] = _T3 | byte(r>>12) + p[1] = _Tx | byte(r>>6)&_Maskx + p[2] = _Tx | byte(r)&_Maskx + return 3 } - p[0] = _T4 | byte(r>>18); - p[1] = _Tx | byte(r>>12)&_Maskx; - p[2] = _Tx | byte(r>>6)&_Maskx; - p[3] = _Tx | byte(r)&_Maskx; - return 4; + p[0] = _T4 | byte(r>>18) + p[1] = _Tx | byte(r>>12)&_Maskx + p[2] = _Tx | byte(r>>6)&_Maskx + p[3] = _Tx | byte(r)&_Maskx + return 4 } // RuneCount returns the number of runes in p. Erroneous and short // encodings are treated as single runes of width 1 byte. func RuneCount(p []byte) int { - i := 0; - var n int; + i := 0 + var n int for n = 0; i < len(p); n++ { if p[i] < RuneSelf { i++ } else { - _, size := DecodeRune(p[i:]); - i += size; + _, size := DecodeRune(p[i:]) + i += size } } - return n; + return n } // RuneCountInString is like RuneCount but its input is a string. @@ -280,10 +280,10 @@ func RuneCountInString(s string) (n int) { for _ = range s { n++ } - return; + return } // RuneStart reports whether the byte could be the first byte of // an encoded rune. Second and subsequent bytes always have the top // two bits set to 10. -func RuneStart(b byte) bool { return b&0xC0 != 0x80 } +func RuneStart(b byte) bool { return b&0xC0 != 0x80 } diff --git a/src/pkg/utf8/utf8_test.go b/src/pkg/utf8/utf8_test.go index 595efc634..68bfa6a77 100644 --- a/src/pkg/utf8/utf8_test.go +++ b/src/pkg/utf8/utf8_test.go @@ -5,15 +5,15 @@ package utf8_test import ( - "bytes"; - "strings"; - "testing"; - . "utf8"; + "bytes" + "strings" + "testing" + . "utf8" ) type Utf8Map struct { - rune int; - str string; + rune int + str string } var utf8map = []Utf8Map{ @@ -47,27 +47,27 @@ var utf8map = []Utf8Map{ // strings.Bytes with one extra byte at end func makeBytes(s string) []byte { - s += "\x00"; - b := strings.Bytes(s); - return b[0 : len(s)-1]; + s += "\x00" + b := strings.Bytes(s) + return b[0 : len(s)-1] } func TestFullRune(t *testing.T) { for i := 0; i < len(utf8map); i++ { - m := utf8map[i]; - b := makeBytes(m.str); + m := utf8map[i] + b := makeBytes(m.str) if !FullRune(b) { t.Errorf("FullRune(%q) (rune %04x) = false, want true", b, m.rune) } - s := m.str; + s := m.str if !FullRuneInString(s) { t.Errorf("FullRuneInString(%q) (rune %04x) = false, want true", s, m.rune) } - b1 := b[0 : len(b)-1]; + b1 := b[0 : len(b)-1] if FullRune(b1) { t.Errorf("FullRune(%q) = true, want false", b1) } - s1 := string(b1); + s1 := string(b1) if FullRuneInString(s1) { t.Errorf("FullRune(%q) = true, want false", s1) } @@ -76,11 +76,11 @@ func TestFullRune(t *testing.T) { func TestEncodeRune(t *testing.T) { for i := 0; i < len(utf8map); i++ { - m := utf8map[i]; - b := makeBytes(m.str); - var buf [10]byte; - n := EncodeRune(m.rune, &buf); - b1 := buf[0:n]; + m := utf8map[i] + b := makeBytes(m.str) + var buf [10]byte + n := EncodeRune(m.rune, &buf) + b1 := buf[0:n] if !bytes.Equal(b, b1) { t.Errorf("EncodeRune(%#04x) = %q want %q", m.rune, b1, b) } @@ -89,40 +89,40 @@ func TestEncodeRune(t *testing.T) { func TestDecodeRune(t *testing.T) { for i := 0; i < len(utf8map); i++ { - m := utf8map[i]; - b := makeBytes(m.str); - rune, size := DecodeRune(b); + m := utf8map[i] + b := makeBytes(m.str) + rune, size := DecodeRune(b) if rune != m.rune || size != len(b) { t.Errorf("DecodeRune(%q) = %#04x, %d want %#04x, %d", b, rune, size, m.rune, len(b)) } - s := m.str; - rune, size = DecodeRuneInString(s); + s := m.str + rune, size = DecodeRuneInString(s) if rune != m.rune || size != len(b) { t.Errorf("DecodeRune(%q) = %#04x, %d want %#04x, %d", s, rune, size, m.rune, len(b)) } // there's an extra byte that bytes left behind - make sure trailing byte works - rune, size = DecodeRune(b[0:cap(b)]); + rune, size = DecodeRune(b[0:cap(b)]) if rune != m.rune || size != len(b) { t.Errorf("DecodeRune(%q) = %#04x, %d want %#04x, %d", b, rune, size, m.rune, len(b)) } - s = m.str + "\x00"; - rune, size = DecodeRuneInString(s); + s = m.str + "\x00" + rune, size = DecodeRuneInString(s) if rune != m.rune || size != len(b) { t.Errorf("DecodeRuneInString(%q) = %#04x, %d want %#04x, %d", s, rune, size, m.rune, len(b)) } // make sure missing bytes fail - wantsize := 1; + wantsize := 1 if wantsize >= len(b) { wantsize = 0 } - rune, size = DecodeRune(b[0 : len(b)-1]); + rune, size = DecodeRune(b[0 : len(b)-1]) if rune != RuneError || size != wantsize { t.Errorf("DecodeRune(%q) = %#04x, %d want %#04x, %d", b[0:len(b)-1], rune, size, RuneError, wantsize) } - s = m.str[0 : len(m.str)-1]; - rune, size = DecodeRuneInString(s); + s = m.str[0 : len(m.str)-1] + rune, size = DecodeRuneInString(s) if rune != RuneError || size != wantsize { t.Errorf("DecodeRuneInString(%q) = %#04x, %d want %#04x, %d", s, rune, size, RuneError, wantsize) } @@ -133,12 +133,12 @@ func TestDecodeRune(t *testing.T) { } else { b[len(b)-1] = 0x7F } - rune, size = DecodeRune(b); + rune, size = DecodeRune(b) if rune != RuneError || size != 1 { t.Errorf("DecodeRune(%q) = %#04x, %d want %#04x, %d", b, rune, size, RuneError, 1) } - s = string(b); - rune, size = DecodeRune(b); + s = string(b) + rune, size = DecodeRune(b) if rune != RuneError || size != 1 { t.Errorf("DecodeRuneInString(%q) = %#04x, %d want %#04x, %d", s, rune, size, RuneError, 1) } @@ -147,18 +147,18 @@ func TestDecodeRune(t *testing.T) { // Check that negative runes encode as U+FFFD. func TestNegativeRune(t *testing.T) { - errorbuf := make([]byte, UTFMax); - errorbuf = errorbuf[0:EncodeRune(RuneError, errorbuf)]; - buf := make([]byte, UTFMax); - buf = buf[0:EncodeRune(-1, buf)]; + errorbuf := make([]byte, UTFMax) + errorbuf = errorbuf[0:EncodeRune(RuneError, errorbuf)] + buf := make([]byte, UTFMax) + buf = buf[0:EncodeRune(-1, buf)] if !bytes.Equal(buf, errorbuf) { t.Errorf("incorrect encoding [% x] for -1; expected [% x]", buf, errorbuf) } } type RuneCountTest struct { - in string; - out int; + in string + out int } var runecounttests = []RuneCountTest{ @@ -170,7 +170,7 @@ var runecounttests = []RuneCountTest{ func TestRuneCount(t *testing.T) { for i := 0; i < len(runecounttests); i++ { - tt := runecounttests[i]; + tt := runecounttests[i] if out := RuneCountInString(tt.in); out != tt.out { t.Errorf("RuneCountInString(%q) = %d, want %d", tt.in, out, tt.out) } @@ -193,28 +193,28 @@ func BenchmarkRuneCountTenJapaneseChars(b *testing.B) { } func BenchmarkEncodeASCIIRune(b *testing.B) { - buf := make([]byte, UTFMax); + buf := make([]byte, UTFMax) for i := 0; i < b.N; i++ { EncodeRune('a', buf) } } func BenchmarkEncodeJapaneseRune(b *testing.B) { - buf := make([]byte, UTFMax); + buf := make([]byte, UTFMax) for i := 0; i < b.N; i++ { EncodeRune('本', buf) } } func BenchmarkDecodeASCIIRune(b *testing.B) { - a := []byte{'a'}; + a := []byte{'a'} for i := 0; i < b.N; i++ { DecodeRune(a) } } func BenchmarkDecodeJapaneseRune(b *testing.B) { - nihon := strings.Bytes("本"); + nihon := strings.Bytes("本") for i := 0; i < b.N; i++ { DecodeRune(nihon) } |
