diff options
Diffstat (limited to 'src/pkg/unicode/utf16')
-rw-r--r-- | src/pkg/unicode/utf16/export_test.go | 11 | ||||
-rw-r--r-- | src/pkg/unicode/utf16/utf16.go | 108 | ||||
-rw-r--r-- | src/pkg/unicode/utf16/utf16_test.go | 149 |
3 files changed, 0 insertions, 268 deletions
diff --git a/src/pkg/unicode/utf16/export_test.go b/src/pkg/unicode/utf16/export_test.go deleted file mode 100644 index 306247e48..000000000 --- a/src/pkg/unicode/utf16/export_test.go +++ /dev/null @@ -1,11 +0,0 @@ -// Copyright 2012 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package utf16 - -// Extra names for constants so we can validate them during testing. -const ( - MaxRune = maxRune - ReplacementChar = replacementChar -) diff --git a/src/pkg/unicode/utf16/utf16.go b/src/pkg/unicode/utf16/utf16.go deleted file mode 100644 index c0e47c535..000000000 --- a/src/pkg/unicode/utf16/utf16.go +++ /dev/null @@ -1,108 +0,0 @@ -// Copyright 2010 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -// Package utf16 implements encoding and decoding of UTF-16 sequences. -package utf16 - -// The conditions replacementChar==unicode.ReplacementChar and -// maxRune==unicode.MaxRune are verified in the tests. -// Defining them locally avoids this package depending on package unicode. - -const ( - replacementChar = '\uFFFD' // Unicode replacement character - maxRune = '\U0010FFFF' // Maximum valid Unicode code point. -) - -const ( - // 0xd800-0xdc00 encodes the high 10 bits of a pair. - // 0xdc00-0xe000 encodes the low 10 bits of a pair. - // the value is those 20 bits plus 0x10000. - surr1 = 0xd800 - surr2 = 0xdc00 - surr3 = 0xe000 - - surrSelf = 0x10000 -) - -// IsSurrogate returns true if the specified Unicode code point -// can appear in a surrogate pair. -func IsSurrogate(r rune) bool { - return surr1 <= r && r < surr3 -} - -// DecodeRune returns the UTF-16 decoding of a surrogate pair. -// If the pair is not a valid UTF-16 surrogate pair, DecodeRune returns -// the Unicode replacement code point U+FFFD. -func DecodeRune(r1, r2 rune) rune { - if surr1 <= r1 && r1 < surr2 && surr2 <= r2 && r2 < surr3 { - return (r1-surr1)<<10 | (r2 - surr2) + 0x10000 - } - return replacementChar -} - -// EncodeRune returns the UTF-16 surrogate pair r1, r2 for the given rune. -// If the rune is not a valid Unicode code point or does not need encoding, -// EncodeRune returns U+FFFD, U+FFFD. -func EncodeRune(r rune) (r1, r2 rune) { - if r < surrSelf || r > maxRune || IsSurrogate(r) { - return replacementChar, replacementChar - } - r -= surrSelf - return surr1 + (r>>10)&0x3ff, surr2 + r&0x3ff -} - -// Encode returns the UTF-16 encoding of the Unicode code point sequence s. -func Encode(s []rune) []uint16 { - n := len(s) - for _, v := range s { - if v >= surrSelf { - n++ - } - } - - a := make([]uint16, n) - n = 0 - for _, v := range s { - switch { - case v < 0, surr1 <= v && v < surr3, v > maxRune: - v = replacementChar - fallthrough - case v < surrSelf: - a[n] = uint16(v) - n++ - default: - r1, r2 := EncodeRune(v) - a[n] = uint16(r1) - a[n+1] = uint16(r2) - n += 2 - } - } - return a[0:n] -} - -// Decode returns the Unicode code point sequence represented -// by the UTF-16 encoding s. -func Decode(s []uint16) []rune { - a := make([]rune, len(s)) - n := 0 - for i := 0; i < len(s); i++ { - switch r := s[i]; { - case surr1 <= r && r < surr2 && i+1 < len(s) && - surr2 <= s[i+1] && s[i+1] < surr3: - // valid surrogate sequence - a[n] = DecodeRune(rune(r), rune(s[i+1])) - i++ - n++ - case surr1 <= r && r < surr3: - // invalid surrogate sequence - a[n] = replacementChar - n++ - default: - // normal rune - a[n] = rune(r) - n++ - } - } - return a[0:n] -} diff --git a/src/pkg/unicode/utf16/utf16_test.go b/src/pkg/unicode/utf16/utf16_test.go deleted file mode 100644 index 3dca472bb..000000000 --- a/src/pkg/unicode/utf16/utf16_test.go +++ /dev/null @@ -1,149 +0,0 @@ -// Copyright 2010 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package utf16_test - -import ( - "reflect" - "testing" - "unicode" - . "unicode/utf16" -) - -// Validate the constants redefined from unicode. -func TestConstants(t *testing.T) { - if MaxRune != unicode.MaxRune { - t.Errorf("utf16.maxRune is wrong: %x should be %x", MaxRune, unicode.MaxRune) - } - if ReplacementChar != unicode.ReplacementChar { - t.Errorf("utf16.replacementChar is wrong: %x should be %x", ReplacementChar, unicode.ReplacementChar) - } -} - -type encodeTest struct { - in []rune - out []uint16 -} - -var encodeTests = []encodeTest{ - {[]rune{1, 2, 3, 4}, []uint16{1, 2, 3, 4}}, - {[]rune{0xffff, 0x10000, 0x10001, 0x12345, 0x10ffff}, - []uint16{0xffff, 0xd800, 0xdc00, 0xd800, 0xdc01, 0xd808, 0xdf45, 0xdbff, 0xdfff}}, - {[]rune{'a', 'b', 0xd7ff, 0xd800, 0xdfff, 0xe000, 0x110000, -1}, - []uint16{'a', 'b', 0xd7ff, 0xfffd, 0xfffd, 0xe000, 0xfffd, 0xfffd}}, -} - -func TestEncode(t *testing.T) { - for _, tt := range encodeTests { - out := Encode(tt.in) - if !reflect.DeepEqual(out, tt.out) { - t.Errorf("Encode(%x) = %x; want %x", tt.in, out, tt.out) - } - } -} - -func TestEncodeRune(t *testing.T) { - for i, tt := range encodeTests { - j := 0 - for _, r := range tt.in { - r1, r2 := EncodeRune(r) - if r < 0x10000 || r > unicode.MaxRune { - if j >= len(tt.out) { - t.Errorf("#%d: ran out of tt.out", i) - break - } - if r1 != unicode.ReplacementChar || r2 != unicode.ReplacementChar { - t.Errorf("EncodeRune(%#x) = %#x, %#x; want 0xfffd, 0xfffd", r, r1, r2) - } - j++ - } else { - if j+1 >= len(tt.out) { - t.Errorf("#%d: ran out of tt.out", i) - break - } - if r1 != rune(tt.out[j]) || r2 != rune(tt.out[j+1]) { - t.Errorf("EncodeRune(%#x) = %#x, %#x; want %#x, %#x", r, r1, r2, tt.out[j], tt.out[j+1]) - } - j += 2 - dec := DecodeRune(r1, r2) - if dec != r { - t.Errorf("DecodeRune(%#x, %#x) = %#x; want %#x", r1, r2, dec, r) - } - } - } - if j != len(tt.out) { - t.Errorf("#%d: EncodeRune didn't generate enough output", i) - } - } -} - -type decodeTest struct { - in []uint16 - out []rune -} - -var decodeTests = []decodeTest{ - {[]uint16{1, 2, 3, 4}, []rune{1, 2, 3, 4}}, - {[]uint16{0xffff, 0xd800, 0xdc00, 0xd800, 0xdc01, 0xd808, 0xdf45, 0xdbff, 0xdfff}, - []rune{0xffff, 0x10000, 0x10001, 0x12345, 0x10ffff}}, - {[]uint16{0xd800, 'a'}, []rune{0xfffd, 'a'}}, - {[]uint16{0xdfff}, []rune{0xfffd}}, -} - -func TestDecode(t *testing.T) { - for _, tt := range decodeTests { - out := Decode(tt.in) - if !reflect.DeepEqual(out, tt.out) { - t.Errorf("Decode(%x) = %x; want %x", tt.in, out, tt.out) - } - } -} - -var decodeRuneTests = []struct { - r1, r2 rune - want rune -}{ - {0xd800, 0xdc00, 0x10000}, - {0xd800, 0xdc01, 0x10001}, - {0xd808, 0xdf45, 0x12345}, - {0xdbff, 0xdfff, 0x10ffff}, - {0xd800, 'a', 0xfffd}, // illegal, replacement rune substituted -} - -func TestDecodeRune(t *testing.T) { - for i, tt := range decodeRuneTests { - got := DecodeRune(tt.r1, tt.r2) - if got != tt.want { - t.Errorf("%d: DecodeRune(%q, %q) = %v; want %v", i, tt.r1, tt.r2, got, tt.want) - } - } -} - -var surrogateTests = []struct { - r rune - want bool -}{ - // from http://en.wikipedia.org/wiki/UTF-16 - {'\u007A', false}, // LATIN SMALL LETTER Z - {'\u6C34', false}, // CJK UNIFIED IDEOGRAPH-6C34 (water) - {'\uFEFF', false}, // Byte Order Mark - {'\U00010000', false}, // LINEAR B SYLLABLE B008 A (first non-BMP code point) - {'\U0001D11E', false}, // MUSICAL SYMBOL G CLEF - {'\U0010FFFD', false}, // PRIVATE USE CHARACTER-10FFFD (last Unicode code point) - - {rune(0xd7ff), false}, // surr1-1 - {rune(0xd800), true}, // surr1 - {rune(0xdc00), true}, // surr2 - {rune(0xe000), false}, // surr3 - {rune(0xdfff), true}, // surr3-1 -} - -func TestIsSurrogate(t *testing.T) { - for i, tt := range surrogateTests { - got := IsSurrogate(tt.r) - if got != tt.want { - t.Errorf("%d: IsSurrogate(%q) = %v; want %v", i, tt.r, got, tt.want) - } - } -} |