diff options
Diffstat (limited to 'src/pkg/strconv')
-rw-r--r-- | src/pkg/strconv/isprint.go | 521 | ||||
-rw-r--r-- | src/pkg/strconv/makeisprint.go | 162 | ||||
-rw-r--r-- | src/pkg/strconv/quote.go | 181 | ||||
-rw-r--r-- | src/pkg/strconv/quote_test.go | 15 |
4 files changed, 834 insertions, 45 deletions
diff --git a/src/pkg/strconv/isprint.go b/src/pkg/strconv/isprint.go new file mode 100644 index 000000000..a03a07bfb --- /dev/null +++ b/src/pkg/strconv/isprint.go @@ -0,0 +1,521 @@ +// DO NOT EDIT. GENERATED BY +// go run makeisprint.go >x && mv x isprint.go + +package strconv + +// (474+134+42)*2 + (180)*4 = 2020 bytes + +var isPrint16 = []uint16{ + 0x0020, 0x007e, + 0x00a1, 0x0377, + 0x037a, 0x037e, + 0x0384, 0x0527, + 0x0531, 0x0556, + 0x0559, 0x058a, + 0x0591, 0x05c7, + 0x05d0, 0x05ea, + 0x05f0, 0x05f4, + 0x0606, 0x061b, + 0x061e, 0x070d, + 0x0710, 0x074a, + 0x074d, 0x07b1, + 0x07c0, 0x07fa, + 0x0800, 0x082d, + 0x0830, 0x085b, + 0x085e, 0x085e, + 0x0900, 0x098c, + 0x098f, 0x0990, + 0x0993, 0x09b2, + 0x09b6, 0x09b9, + 0x09bc, 0x09c4, + 0x09c7, 0x09c8, + 0x09cb, 0x09ce, + 0x09d7, 0x09d7, + 0x09dc, 0x09e3, + 0x09e6, 0x09fb, + 0x0a01, 0x0a0a, + 0x0a0f, 0x0a10, + 0x0a13, 0x0a39, + 0x0a3c, 0x0a42, + 0x0a47, 0x0a48, + 0x0a4b, 0x0a4d, + 0x0a51, 0x0a51, + 0x0a59, 0x0a5e, + 0x0a66, 0x0a75, + 0x0a81, 0x0ab9, + 0x0abc, 0x0acd, + 0x0ad0, 0x0ad0, + 0x0ae0, 0x0ae3, + 0x0ae6, 0x0af1, + 0x0b01, 0x0b0c, + 0x0b0f, 0x0b10, + 0x0b13, 0x0b39, + 0x0b3c, 0x0b44, + 0x0b47, 0x0b48, + 0x0b4b, 0x0b4d, + 0x0b56, 0x0b57, + 0x0b5c, 0x0b63, + 0x0b66, 0x0b77, + 0x0b82, 0x0b8a, + 0x0b8e, 0x0b95, + 0x0b99, 0x0b9f, + 0x0ba3, 0x0ba4, + 0x0ba8, 0x0baa, + 0x0bae, 0x0bb9, + 0x0bbe, 0x0bc2, + 0x0bc6, 0x0bcd, + 0x0bd0, 0x0bd0, + 0x0bd7, 0x0bd7, + 0x0be6, 0x0bfa, + 0x0c01, 0x0c39, + 0x0c3d, 0x0c4d, + 0x0c55, 0x0c59, + 0x0c60, 0x0c63, + 0x0c66, 0x0c6f, + 0x0c78, 0x0c7f, + 0x0c82, 0x0cb9, + 0x0cbc, 0x0ccd, + 0x0cd5, 0x0cd6, + 0x0cde, 0x0ce3, + 0x0ce6, 0x0cf2, + 0x0d02, 0x0d3a, + 0x0d3d, 0x0d4e, + 0x0d57, 0x0d57, + 0x0d60, 0x0d63, + 0x0d66, 0x0d75, + 0x0d79, 0x0d7f, + 0x0d82, 0x0d96, + 0x0d9a, 0x0dbd, + 0x0dc0, 0x0dc6, + 0x0dca, 0x0dca, + 0x0dcf, 0x0ddf, + 0x0df2, 0x0df4, + 0x0e01, 0x0e3a, + 0x0e3f, 0x0e5b, + 0x0e81, 0x0e84, + 0x0e87, 0x0e8a, + 0x0e8d, 0x0e8d, + 0x0e94, 0x0ea7, + 0x0eaa, 0x0ebd, + 0x0ec0, 0x0ecd, + 0x0ed0, 0x0ed9, + 0x0edc, 0x0edd, + 0x0f00, 0x0f6c, + 0x0f71, 0x0fda, + 0x1000, 0x10c5, + 0x10d0, 0x10fc, + 0x1100, 0x124d, + 0x1250, 0x125d, + 0x1260, 0x128d, + 0x1290, 0x12b5, + 0x12b8, 0x12c5, + 0x12c8, 0x1315, + 0x1318, 0x135a, + 0x135d, 0x137c, + 0x1380, 0x1399, + 0x13a0, 0x13f4, + 0x1400, 0x169c, + 0x16a0, 0x16f0, + 0x1700, 0x1714, + 0x1720, 0x1736, + 0x1740, 0x1753, + 0x1760, 0x1773, + 0x1780, 0x17b3, + 0x17b6, 0x17dd, + 0x17e0, 0x17e9, + 0x17f0, 0x17f9, + 0x1800, 0x180d, + 0x1810, 0x1819, + 0x1820, 0x1877, + 0x1880, 0x18aa, + 0x18b0, 0x18f5, + 0x1900, 0x191c, + 0x1920, 0x192b, + 0x1930, 0x193b, + 0x1940, 0x1940, + 0x1944, 0x196d, + 0x1970, 0x1974, + 0x1980, 0x19ab, + 0x19b0, 0x19c9, + 0x19d0, 0x19da, + 0x19de, 0x1a1b, + 0x1a1e, 0x1a7c, + 0x1a7f, 0x1a89, + 0x1a90, 0x1a99, + 0x1aa0, 0x1aad, + 0x1b00, 0x1b4b, + 0x1b50, 0x1b7c, + 0x1b80, 0x1baa, + 0x1bae, 0x1bb9, + 0x1bc0, 0x1bf3, + 0x1bfc, 0x1c37, + 0x1c3b, 0x1c49, + 0x1c4d, 0x1c7f, + 0x1cd0, 0x1cf2, + 0x1d00, 0x1de6, + 0x1dfc, 0x1f15, + 0x1f18, 0x1f1d, + 0x1f20, 0x1f45, + 0x1f48, 0x1f4d, + 0x1f50, 0x1f7d, + 0x1f80, 0x1fd3, + 0x1fd6, 0x1fef, + 0x1ff2, 0x1ffe, + 0x2010, 0x2027, + 0x2030, 0x205e, + 0x2070, 0x2071, + 0x2074, 0x209c, + 0x20a0, 0x20b9, + 0x20d0, 0x20f0, + 0x2100, 0x2189, + 0x2190, 0x23f3, + 0x2400, 0x2426, + 0x2440, 0x244a, + 0x2460, 0x2b4c, + 0x2b50, 0x2b59, + 0x2c00, 0x2cf1, + 0x2cf9, 0x2d25, + 0x2d30, 0x2d65, + 0x2d6f, 0x2d70, + 0x2d7f, 0x2d96, + 0x2da0, 0x2e31, + 0x2e80, 0x2ef3, + 0x2f00, 0x2fd5, + 0x2ff0, 0x2ffb, + 0x3001, 0x3096, + 0x3099, 0x30ff, + 0x3105, 0x312d, + 0x3131, 0x31ba, + 0x31c0, 0x31e3, + 0x31f0, 0x4db5, + 0x4dc0, 0x9fcb, + 0xa000, 0xa48c, + 0xa490, 0xa4c6, + 0xa4d0, 0xa62b, + 0xa640, 0xa673, + 0xa67c, 0xa697, + 0xa6a0, 0xa6f7, + 0xa700, 0xa791, + 0xa7a0, 0xa7a9, + 0xa7fa, 0xa82b, + 0xa830, 0xa839, + 0xa840, 0xa877, + 0xa880, 0xa8c4, + 0xa8ce, 0xa8d9, + 0xa8e0, 0xa8fb, + 0xa900, 0xa953, + 0xa95f, 0xa97c, + 0xa980, 0xa9d9, + 0xa9de, 0xa9df, + 0xaa00, 0xaa36, + 0xaa40, 0xaa4d, + 0xaa50, 0xaa59, + 0xaa5c, 0xaa7b, + 0xaa80, 0xaac2, + 0xaadb, 0xaadf, + 0xab01, 0xab06, + 0xab09, 0xab0e, + 0xab11, 0xab16, + 0xab20, 0xab2e, + 0xabc0, 0xabed, + 0xabf0, 0xabf9, + 0xac00, 0xd7a3, + 0xd7b0, 0xd7c6, + 0xd7cb, 0xd7fb, + 0xf900, 0xfa2d, + 0xfa30, 0xfa6d, + 0xfa70, 0xfad9, + 0xfb00, 0xfb06, + 0xfb13, 0xfb17, + 0xfb1d, 0xfbc1, + 0xfbd3, 0xfd3f, + 0xfd50, 0xfd8f, + 0xfd92, 0xfdc7, + 0xfdf0, 0xfdfd, + 0xfe00, 0xfe19, + 0xfe20, 0xfe26, + 0xfe30, 0xfe6b, + 0xfe70, 0xfefc, + 0xff01, 0xffbe, + 0xffc2, 0xffc7, + 0xffca, 0xffcf, + 0xffd2, 0xffd7, + 0xffda, 0xffdc, + 0xffe0, 0xffee, + 0xfffc, 0xfffd, +} + +var isNotPrint16 = []uint16{ + 0x00ad, + 0x038b, + 0x038d, + 0x03a2, + 0x0560, + 0x0588, + 0x06dd, + 0x083f, + 0x0978, + 0x0980, + 0x0984, + 0x09a9, + 0x09b1, + 0x09de, + 0x0a04, + 0x0a29, + 0x0a31, + 0x0a34, + 0x0a37, + 0x0a3d, + 0x0a5d, + 0x0a84, + 0x0a8e, + 0x0a92, + 0x0aa9, + 0x0ab1, + 0x0ab4, + 0x0ac6, + 0x0aca, + 0x0af0, + 0x0b04, + 0x0b29, + 0x0b31, + 0x0b34, + 0x0b5e, + 0x0b84, + 0x0b91, + 0x0b9b, + 0x0b9d, + 0x0bc9, + 0x0c04, + 0x0c0d, + 0x0c11, + 0x0c29, + 0x0c34, + 0x0c45, + 0x0c49, + 0x0c57, + 0x0c84, + 0x0c8d, + 0x0c91, + 0x0ca9, + 0x0cb4, + 0x0cc5, + 0x0cc9, + 0x0cdf, + 0x0cf0, + 0x0d04, + 0x0d0d, + 0x0d11, + 0x0d45, + 0x0d49, + 0x0d84, + 0x0db2, + 0x0dbc, + 0x0dd5, + 0x0dd7, + 0x0e83, + 0x0e89, + 0x0e98, + 0x0ea0, + 0x0ea4, + 0x0ea6, + 0x0eac, + 0x0eba, + 0x0ec5, + 0x0ec7, + 0x0f48, + 0x0f98, + 0x0fbd, + 0x0fcd, + 0x1249, + 0x1257, + 0x1259, + 0x1289, + 0x12b1, + 0x12bf, + 0x12c1, + 0x12d7, + 0x1311, + 0x1680, + 0x170d, + 0x176d, + 0x1771, + 0x1a5f, + 0x1f58, + 0x1f5a, + 0x1f5c, + 0x1f5e, + 0x1fb5, + 0x1fc5, + 0x1fdc, + 0x1ff5, + 0x208f, + 0x2700, + 0x27cb, + 0x27cd, + 0x2c2f, + 0x2c5f, + 0x2da7, + 0x2daf, + 0x2db7, + 0x2dbf, + 0x2dc7, + 0x2dcf, + 0x2dd7, + 0x2ddf, + 0x2e9a, + 0x3040, + 0x318f, + 0x321f, + 0x32ff, + 0xa78f, + 0xa9ce, + 0xab27, + 0xfb37, + 0xfb3d, + 0xfb3f, + 0xfb42, + 0xfb45, + 0xfe53, + 0xfe67, + 0xfe75, + 0xffe7, +} + +var isPrint32 = []uint32{ + 0x010000, 0x01004d, + 0x010050, 0x01005d, + 0x010080, 0x0100fa, + 0x010100, 0x010102, + 0x010107, 0x010133, + 0x010137, 0x01018a, + 0x010190, 0x01019b, + 0x0101d0, 0x0101fd, + 0x010280, 0x01029c, + 0x0102a0, 0x0102d0, + 0x010300, 0x010323, + 0x010330, 0x01034a, + 0x010380, 0x0103c3, + 0x0103c8, 0x0103d5, + 0x010400, 0x01049d, + 0x0104a0, 0x0104a9, + 0x010800, 0x010805, + 0x010808, 0x010838, + 0x01083c, 0x01083c, + 0x01083f, 0x01085f, + 0x010900, 0x01091b, + 0x01091f, 0x010939, + 0x01093f, 0x01093f, + 0x010a00, 0x010a06, + 0x010a0c, 0x010a33, + 0x010a38, 0x010a3a, + 0x010a3f, 0x010a47, + 0x010a50, 0x010a58, + 0x010a60, 0x010a7f, + 0x010b00, 0x010b35, + 0x010b39, 0x010b55, + 0x010b58, 0x010b72, + 0x010b78, 0x010b7f, + 0x010c00, 0x010c48, + 0x010e60, 0x010e7e, + 0x011000, 0x01104d, + 0x011052, 0x01106f, + 0x011080, 0x0110c1, + 0x012000, 0x01236e, + 0x012400, 0x012462, + 0x012470, 0x012473, + 0x013000, 0x01342e, + 0x016800, 0x016a38, + 0x01b000, 0x01b001, + 0x01d000, 0x01d0f5, + 0x01d100, 0x01d126, + 0x01d129, 0x01d172, + 0x01d17b, 0x01d1dd, + 0x01d200, 0x01d245, + 0x01d300, 0x01d356, + 0x01d360, 0x01d371, + 0x01d400, 0x01d49f, + 0x01d4a2, 0x01d4a2, + 0x01d4a5, 0x01d4a6, + 0x01d4a9, 0x01d50a, + 0x01d50d, 0x01d546, + 0x01d54a, 0x01d6a5, + 0x01d6a8, 0x01d7cb, + 0x01d7ce, 0x01d7ff, + 0x01f000, 0x01f02b, + 0x01f030, 0x01f093, + 0x01f0a0, 0x01f0ae, + 0x01f0b1, 0x01f0be, + 0x01f0c1, 0x01f0df, + 0x01f100, 0x01f10a, + 0x01f110, 0x01f169, + 0x01f170, 0x01f19a, + 0x01f1e6, 0x01f202, + 0x01f210, 0x01f23a, + 0x01f240, 0x01f248, + 0x01f250, 0x01f251, + 0x01f300, 0x01f320, + 0x01f330, 0x01f37c, + 0x01f380, 0x01f393, + 0x01f3a0, 0x01f3ca, + 0x01f3e0, 0x01f3f0, + 0x01f400, 0x01f4fc, + 0x01f500, 0x01f53d, + 0x01f550, 0x01f567, + 0x01f5fb, 0x01f625, + 0x01f628, 0x01f62d, + 0x01f630, 0x01f640, + 0x01f645, 0x01f64f, + 0x01f680, 0x01f6c5, + 0x01f700, 0x01f773, + 0x020000, 0x02a6d6, + 0x02a700, 0x02b734, + 0x02b740, 0x02b81d, + 0x02f800, 0x02fa1d, + 0x0e0100, 0x0e01ef, +} + +var isNotPrint32 = []uint16{ // add 0x10000 to each entry + 0x000c, + 0x0027, + 0x003b, + 0x003e, + 0x031f, + 0x039e, + 0x0809, + 0x0836, + 0x0856, + 0x0a04, + 0x0a14, + 0x0a18, + 0x10bd, + 0xd455, + 0xd49d, + 0xd4ad, + 0xd4ba, + 0xd4bc, + 0xd4c4, + 0xd506, + 0xd515, + 0xd51d, + 0xd53a, + 0xd53f, + 0xd545, + 0xd551, + 0xf0d0, + 0xf12f, + 0xf336, + 0xf3c5, + 0xf43f, + 0xf441, + 0xf4f8, + 0xf600, + 0xf611, + 0xf615, + 0xf617, + 0xf619, + 0xf61b, + 0xf61f, + 0xf62c, + 0xf634, +} diff --git a/src/pkg/strconv/makeisprint.go b/src/pkg/strconv/makeisprint.go new file mode 100644 index 000000000..8a6699bdb --- /dev/null +++ b/src/pkg/strconv/makeisprint.go @@ -0,0 +1,162 @@ +// Copyright 2012 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build ignore + +// makeisprint generates the tables for strconv's compact isPrint. +package main + +import ( + "fmt" + "os" + "unicode" +) + +var ( + range16 []uint16 + except16 []uint16 + range32 []uint32 + except32 []uint32 +) + +// bsearch16 returns the smallest i such that a[i] >= x. +// If there is no such i, bsearch16 returns len(a). +func bsearch16(a []uint16, x uint16) int { + i, j := 0, len(a) + for i < j { + h := i + (j-i)/2 + if a[h] < x { + i = h + 1 + } else { + j = h + } + } + return i +} + +// bsearch32 returns the smallest i such that a[i] >= x. +// If there is no such i, bsearch32 returns len(a). +func bsearch32(a []uint32, x uint32) int { + i, j := 0, len(a) + for i < j { + h := i + (j-i)/2 + if a[h] < x { + i = h + 1 + } else { + j = h + } + } + return i +} + +func isPrint(r rune) bool { + // Same algorithm, either on uint16 or uint32 value. + // First, find first i such that rang[i] >= x. + // This is the index of either the start or end of a pair that might span x. + // The start is even (rang[i&^1]) and the end is odd (rang[i|1]). + // If we find x in a range, make sure x is not in exception list. + + if 0 <= r && r < 1<<16 { + rr, rang, except := uint16(r), range16, except16 + i := bsearch16(rang, rr) + if i >= len(rang) || rr < rang[i&^1] || rang[i|1] < rr { + return false + } + j := bsearch16(except, rr) + return j >= len(except) || except[j] != rr + } + + rr, rang, except := uint32(r), range32, except32 + i := bsearch32(rang, rr) + if i >= len(rang) || rr < rang[i&^1] || rang[i|1] < rr { + return false + } + j := bsearch32(except, rr) + return j >= len(except) || except[j] != rr +} + +func scan(min, max rune) (rang, except []uint32) { + lo := rune(-1) + for i := min; ; i++ { + if (i > max || !unicode.IsPrint(i)) && lo >= 0 { + // End range, but avoid flip flop. + if i+1 <= max && unicode.IsPrint(i+1) { + except = append(except, uint32(i)) + continue + } + rang = append(rang, uint32(lo), uint32(i-1)) + lo = -1 + } + if i > max { + break + } + if lo < 0 && unicode.IsPrint(i) { + lo = i + } + } + return +} + +func to16(x []uint32) []uint16 { + var y []uint16 + for _, v := range x { + if uint32(uint16(v)) != v { + panic("bad 32->16 conversion") + } + y = append(y, uint16(v)) + } + return y +} + +func main() { + rang, except := scan(0, 0xFFFF) + range16 = to16(rang) + except16 = to16(except) + range32, except32 = scan(0x10000, unicode.MaxRune) + + for i := rune(0); i <= unicode.MaxRune; i++ { + if isPrint(i) != unicode.IsPrint(i) { + fmt.Fprintf(os.Stderr, "%U: isPrint=%v, want %v\n", i, isPrint(i), unicode.IsPrint(i)) + return + } + } + + fmt.Printf("// DO NOT EDIT. GENERATED BY\n") + fmt.Printf("// go run makeisprint.go >x && mv x isprint.go\n\n") + fmt.Printf("package strconv\n\n") + + fmt.Printf("// (%d+%d+%d)*2 + (%d)*4 = %d bytes\n\n", + len(range16), len(except16), len(except32), + len(range32), + (len(range16)+len(except16)+len(except32))*2+ + (len(range32))*4) + + fmt.Printf("var isPrint16 = []uint16{\n") + for i := 0; i < len(range16); i += 2 { + fmt.Printf("\t%#04x, %#04x,\n", range16[i], range16[i+1]) + } + fmt.Printf("}\n\n") + + fmt.Printf("var isNotPrint16 = []uint16{\n") + for _, r := range except16 { + fmt.Printf("\t%#04x,\n", r) + } + fmt.Printf("}\n\n") + + fmt.Printf("var isPrint32 = []uint32{\n") + for i := 0; i < len(range32); i += 2 { + fmt.Printf("\t%#06x, %#06x,\n", range32[i], range32[i+1]) + } + fmt.Printf("}\n\n") + + fmt.Printf("var isNotPrint32 = []uint16{ // add 0x10000 to each entry\n") + for _, r := range except32 { + if r >= 0x20000 { + fmt.Fprintf(os.Stderr, "%U too big for isNotPrint32\n", r) + return + } + fmt.Printf("\t%#04x,\n", r-0x10000) + } + fmt.Printf("}\n") +} diff --git a/src/pkg/strconv/quote.go b/src/pkg/strconv/quote.go index 61dbcae70..8a73f9d3b 100644 --- a/src/pkg/strconv/quote.go +++ b/src/pkg/strconv/quote.go @@ -5,17 +5,15 @@ package strconv import ( - "bytes" - "strings" - "unicode" "unicode/utf8" ) const lowerhex = "0123456789abcdef" func quoteWith(s string, quote byte, ASCIIonly bool) string { - var buf bytes.Buffer - buf.WriteByte(quote) + var runeTmp [utf8.UTFMax]byte + buf := make([]byte, 0, 3*len(s)/2) // Try to avoid more allocations. + buf = append(buf, quote) for width := 0; len(s) > 0; s = s[width:] { r := rune(s[0]) width = 1 @@ -23,71 +21,72 @@ func quoteWith(s string, quote byte, ASCIIonly bool) string { r, width = utf8.DecodeRuneInString(s) } if width == 1 && r == utf8.RuneError { - buf.WriteString(`\x`) - buf.WriteByte(lowerhex[s[0]>>4]) - buf.WriteByte(lowerhex[s[0]&0xF]) + buf = append(buf, `\x`...) + buf = append(buf, lowerhex[s[0]>>4]) + buf = append(buf, lowerhex[s[0]&0xF]) continue } if r == rune(quote) || r == '\\' { // always backslashed - buf.WriteByte('\\') - buf.WriteByte(byte(r)) + buf = append(buf, '\\') + buf = append(buf, byte(r)) continue } if ASCIIonly { - if r <= unicode.MaxASCII && unicode.IsPrint(r) { - buf.WriteRune(r) + if r < utf8.RuneSelf && IsPrint(r) { + buf = append(buf, byte(r)) continue } - } else if unicode.IsPrint(r) { - buf.WriteRune(r) + } else if IsPrint(r) { + n := utf8.EncodeRune(runeTmp[:], r) + buf = append(buf, runeTmp[:n]...) continue } switch r { case '\a': - buf.WriteString(`\a`) + buf = append(buf, `\a`...) case '\b': - buf.WriteString(`\b`) + buf = append(buf, `\b`...) case '\f': - buf.WriteString(`\f`) + buf = append(buf, `\f`...) case '\n': - buf.WriteString(`\n`) + buf = append(buf, `\n`...) case '\r': - buf.WriteString(`\r`) + buf = append(buf, `\r`...) case '\t': - buf.WriteString(`\t`) + buf = append(buf, `\t`...) case '\v': - buf.WriteString(`\v`) + buf = append(buf, `\v`...) default: switch { case r < ' ': - buf.WriteString(`\x`) - buf.WriteByte(lowerhex[s[0]>>4]) - buf.WriteByte(lowerhex[s[0]&0xF]) - case r > unicode.MaxRune: + buf = append(buf, `\x`...) + buf = append(buf, lowerhex[s[0]>>4]) + buf = append(buf, lowerhex[s[0]&0xF]) + case r > utf8.MaxRune: r = 0xFFFD fallthrough case r < 0x10000: - buf.WriteString(`\u`) + buf = append(buf, `\u`...) for s := 12; s >= 0; s -= 4 { - buf.WriteByte(lowerhex[r>>uint(s)&0xF]) + buf = append(buf, lowerhex[r>>uint(s)&0xF]) } default: - buf.WriteString(`\U`) + buf = append(buf, `\U`...) for s := 28; s >= 0; s -= 4 { - buf.WriteByte(lowerhex[r>>uint(s)&0xF]) + buf = append(buf, lowerhex[r>>uint(s)&0xF]) } } } } - buf.WriteByte(quote) - return buf.String() + buf = append(buf, quote) + return string(buf) } // Quote returns a double-quoted Go string literal representing s. The // returned string uses Go escape sequences (\t, \n, \xFF, \u0100) for // control characters and non-printable characters as defined by -// unicode.IsPrint. +// IsPrint. func Quote(s string) string { return quoteWith(s, '"', false) } @@ -100,8 +99,7 @@ func AppendQuote(dst []byte, s string) []byte { // QuoteToASCII returns a double-quoted Go string literal representing s. // The returned string uses Go escape sequences (\t, \n, \xFF, \u0100) for -// non-ASCII characters and non-printable characters as defined by -// unicode.IsPrint. +// non-ASCII characters and non-printable characters as defined by IsPrint. func QuoteToASCII(s string) string { return quoteWith(s, '"', true) } @@ -114,8 +112,7 @@ func AppendQuoteToASCII(dst []byte, s string) []byte { // QuoteRune returns a single-quoted Go character literal representing the // rune. The returned string uses Go escape sequences (\t, \n, \xFF, \u0100) -// for control characters and non-printable characters as defined by -// unicode.IsPrint. +// for control characters and non-printable characters as defined by IsPrint. func QuoteRune(r rune) string { // TODO: avoid the allocation here. return quoteWith(string(r), '\'', false) @@ -130,7 +127,7 @@ func AppendQuoteRune(dst []byte, r rune) []byte { // QuoteRuneToASCII returns a single-quoted Go character literal representing // the rune. The returned string uses Go escape sequences (\t, \n, \xFF, // \u0100) for non-ASCII characters and non-printable characters as defined -// by unicode.IsPrint. +// by IsPrint. func QuoteRuneToASCII(r rune) string { // TODO: avoid the allocation here. return quoteWith(string(r), '\'', true) @@ -245,7 +242,7 @@ func UnquoteChar(s string, quote byte) (value rune, multibyte bool, tail string, value = v break } - if v > unicode.MaxRune { + if v > utf8.MaxRune { err = ErrSyntax return } @@ -304,7 +301,7 @@ func Unquote(s string) (t string, err error) { s = s[1 : n-1] if quote == '`' { - if strings.Contains(s, "`") { + if contains(s, '`') { return "", ErrSyntax } return s, nil @@ -312,12 +309,12 @@ func Unquote(s string) (t string, err error) { if quote != '"' && quote != '\'' { return "", ErrSyntax } - if strings.Index(s, "\n") >= 0 { + if contains(s, '\n') { return "", ErrSyntax } // Is it trivial? Avoid allocation. - if strings.Index(s, `\`) < 0 && strings.IndexRune(s, rune(quote)) < 0 { + if !contains(s, '\\') && !contains(s, quote) { switch quote { case '"': return s, nil @@ -329,7 +326,8 @@ func Unquote(s string) (t string, err error) { } } - var buf bytes.Buffer + var runeTmp [utf8.UTFMax]byte + buf := make([]byte, 0, 3*len(s)/2) // Try to avoid more allocations. for len(s) > 0 { c, multibyte, ss, err := UnquoteChar(s, quote) if err != nil { @@ -337,14 +335,107 @@ func Unquote(s string) (t string, err error) { } s = ss if c < utf8.RuneSelf || !multibyte { - buf.WriteByte(byte(c)) + buf = append(buf, byte(c)) } else { - buf.WriteString(string(c)) + n := utf8.EncodeRune(runeTmp[:], c) + buf = append(buf, runeTmp[:n]...) } if quote == '\'' && len(s) != 0 { // single-quoted must be single character return "", ErrSyntax } } - return buf.String(), nil + return string(buf), nil +} + +// contains reports whether the string contains the byte c. +func contains(s string, c byte) bool { + for i := 0; i < len(s); i++ { + if s[i] == c { + return true + } + } + return false +} + +// bsearch16 returns the smallest i such that a[i] >= x. +// If there is no such i, bsearch16 returns len(a). +func bsearch16(a []uint16, x uint16) int { + i, j := 0, len(a) + for i < j { + h := i + (j-i)/2 + if a[h] < x { + i = h + 1 + } else { + j = h + } + } + return i +} + +// bsearch32 returns the smallest i such that a[i] >= x. +// If there is no such i, bsearch32 returns len(a). +func bsearch32(a []uint32, x uint32) int { + i, j := 0, len(a) + for i < j { + h := i + (j-i)/2 + if a[h] < x { + i = h + 1 + } else { + j = h + } + } + return i +} + +// TODO: IsPrint is a local implementation of unicode.IsPrint, verified by the tests +// to give the same answer. It allows this package not to depend on unicode, +// and therefore not pull in all the Unicode tables. If the linker were better +// at tossing unused tables, we could get rid of this implementation. +// That would be nice. + +// IsPrint reports whether the rune is defined as printable by Go, with +// the same definition as unicode.IsPrint: letters, numbers, punctuation, +// symbols and ASCII space. +func IsPrint(r rune) bool { + // Fast check for Latin-1 + if r <= 0xFF { + if 0x20 <= r && r <= 0x7E { + // All the ASCII is printable from space through DEL-1. + return true + } + if 0xA1 <= r && r <= 0xFF { + // Similarly for ¡ through ÿ... + return r != 0xAD // ...except for the bizarre soft hyphen. + } + return false + } + + // Same algorithm, either on uint16 or uint32 value. + // First, find first i such that isPrint[i] >= x. + // This is the index of either the start or end of a pair that might span x. + // The start is even (isPrint[i&^1]) and the end is odd (isPrint[i|1]). + // If we find x in a range, make sure x is not in isNotPrint list. + + if 0 <= r && r < 1<<16 { + rr, isPrint, isNotPrint := uint16(r), isPrint16, isNotPrint16 + i := bsearch16(isPrint, rr) + if i >= len(isPrint) || rr < isPrint[i&^1] || isPrint[i|1] < rr { + return false + } + j := bsearch16(isNotPrint, rr) + return j >= len(isNotPrint) || isNotPrint[j] != rr + } + + rr, isPrint, isNotPrint := uint32(r), isPrint32, isNotPrint32 + i := bsearch32(isPrint, rr) + if i >= len(isPrint) || rr < isPrint[i&^1] || isPrint[i|1] < rr { + return false + } + if r >= 0x20000 { + return true + } + r -= 0x10000 + j := bsearch16(isNotPrint, uint16(r)) + return j >= len(isNotPrint) || isNotPrint[j] != uint16(r) } diff --git a/src/pkg/strconv/quote_test.go b/src/pkg/strconv/quote_test.go index 3f544c43c..61d9bf9a5 100644 --- a/src/pkg/strconv/quote_test.go +++ b/src/pkg/strconv/quote_test.go @@ -7,8 +7,23 @@ package strconv_test import ( . "strconv" "testing" + "unicode" ) +// Verify that our isPrint agrees with unicode.IsPrint +func TestIsPrint(t *testing.T) { + n := 0 + for r := rune(0); r <= unicode.MaxRune; r++ { + if IsPrint(r) != unicode.IsPrint(r) { + t.Errorf("IsPrint(%U)=%t incorrect", r, IsPrint(r)) + n++ + if n > 10 { + return + } + } + } +} + type quoteTest struct { in string out string |