summaryrefslogtreecommitdiff
path: root/src/pkg/strconv
diff options
context:
space:
mode:
Diffstat (limited to 'src/pkg/strconv')
-rw-r--r--src/pkg/strconv/isprint.go521
-rw-r--r--src/pkg/strconv/makeisprint.go162
-rw-r--r--src/pkg/strconv/quote.go181
-rw-r--r--src/pkg/strconv/quote_test.go15
4 files changed, 834 insertions, 45 deletions
diff --git a/src/pkg/strconv/isprint.go b/src/pkg/strconv/isprint.go
new file mode 100644
index 000000000..a03a07bfb
--- /dev/null
+++ b/src/pkg/strconv/isprint.go
@@ -0,0 +1,521 @@
+// DO NOT EDIT. GENERATED BY
+// go run makeisprint.go >x && mv x isprint.go
+
+package strconv
+
+// (474+134+42)*2 + (180)*4 = 2020 bytes
+
+var isPrint16 = []uint16{
+ 0x0020, 0x007e,
+ 0x00a1, 0x0377,
+ 0x037a, 0x037e,
+ 0x0384, 0x0527,
+ 0x0531, 0x0556,
+ 0x0559, 0x058a,
+ 0x0591, 0x05c7,
+ 0x05d0, 0x05ea,
+ 0x05f0, 0x05f4,
+ 0x0606, 0x061b,
+ 0x061e, 0x070d,
+ 0x0710, 0x074a,
+ 0x074d, 0x07b1,
+ 0x07c0, 0x07fa,
+ 0x0800, 0x082d,
+ 0x0830, 0x085b,
+ 0x085e, 0x085e,
+ 0x0900, 0x098c,
+ 0x098f, 0x0990,
+ 0x0993, 0x09b2,
+ 0x09b6, 0x09b9,
+ 0x09bc, 0x09c4,
+ 0x09c7, 0x09c8,
+ 0x09cb, 0x09ce,
+ 0x09d7, 0x09d7,
+ 0x09dc, 0x09e3,
+ 0x09e6, 0x09fb,
+ 0x0a01, 0x0a0a,
+ 0x0a0f, 0x0a10,
+ 0x0a13, 0x0a39,
+ 0x0a3c, 0x0a42,
+ 0x0a47, 0x0a48,
+ 0x0a4b, 0x0a4d,
+ 0x0a51, 0x0a51,
+ 0x0a59, 0x0a5e,
+ 0x0a66, 0x0a75,
+ 0x0a81, 0x0ab9,
+ 0x0abc, 0x0acd,
+ 0x0ad0, 0x0ad0,
+ 0x0ae0, 0x0ae3,
+ 0x0ae6, 0x0af1,
+ 0x0b01, 0x0b0c,
+ 0x0b0f, 0x0b10,
+ 0x0b13, 0x0b39,
+ 0x0b3c, 0x0b44,
+ 0x0b47, 0x0b48,
+ 0x0b4b, 0x0b4d,
+ 0x0b56, 0x0b57,
+ 0x0b5c, 0x0b63,
+ 0x0b66, 0x0b77,
+ 0x0b82, 0x0b8a,
+ 0x0b8e, 0x0b95,
+ 0x0b99, 0x0b9f,
+ 0x0ba3, 0x0ba4,
+ 0x0ba8, 0x0baa,
+ 0x0bae, 0x0bb9,
+ 0x0bbe, 0x0bc2,
+ 0x0bc6, 0x0bcd,
+ 0x0bd0, 0x0bd0,
+ 0x0bd7, 0x0bd7,
+ 0x0be6, 0x0bfa,
+ 0x0c01, 0x0c39,
+ 0x0c3d, 0x0c4d,
+ 0x0c55, 0x0c59,
+ 0x0c60, 0x0c63,
+ 0x0c66, 0x0c6f,
+ 0x0c78, 0x0c7f,
+ 0x0c82, 0x0cb9,
+ 0x0cbc, 0x0ccd,
+ 0x0cd5, 0x0cd6,
+ 0x0cde, 0x0ce3,
+ 0x0ce6, 0x0cf2,
+ 0x0d02, 0x0d3a,
+ 0x0d3d, 0x0d4e,
+ 0x0d57, 0x0d57,
+ 0x0d60, 0x0d63,
+ 0x0d66, 0x0d75,
+ 0x0d79, 0x0d7f,
+ 0x0d82, 0x0d96,
+ 0x0d9a, 0x0dbd,
+ 0x0dc0, 0x0dc6,
+ 0x0dca, 0x0dca,
+ 0x0dcf, 0x0ddf,
+ 0x0df2, 0x0df4,
+ 0x0e01, 0x0e3a,
+ 0x0e3f, 0x0e5b,
+ 0x0e81, 0x0e84,
+ 0x0e87, 0x0e8a,
+ 0x0e8d, 0x0e8d,
+ 0x0e94, 0x0ea7,
+ 0x0eaa, 0x0ebd,
+ 0x0ec0, 0x0ecd,
+ 0x0ed0, 0x0ed9,
+ 0x0edc, 0x0edd,
+ 0x0f00, 0x0f6c,
+ 0x0f71, 0x0fda,
+ 0x1000, 0x10c5,
+ 0x10d0, 0x10fc,
+ 0x1100, 0x124d,
+ 0x1250, 0x125d,
+ 0x1260, 0x128d,
+ 0x1290, 0x12b5,
+ 0x12b8, 0x12c5,
+ 0x12c8, 0x1315,
+ 0x1318, 0x135a,
+ 0x135d, 0x137c,
+ 0x1380, 0x1399,
+ 0x13a0, 0x13f4,
+ 0x1400, 0x169c,
+ 0x16a0, 0x16f0,
+ 0x1700, 0x1714,
+ 0x1720, 0x1736,
+ 0x1740, 0x1753,
+ 0x1760, 0x1773,
+ 0x1780, 0x17b3,
+ 0x17b6, 0x17dd,
+ 0x17e0, 0x17e9,
+ 0x17f0, 0x17f9,
+ 0x1800, 0x180d,
+ 0x1810, 0x1819,
+ 0x1820, 0x1877,
+ 0x1880, 0x18aa,
+ 0x18b0, 0x18f5,
+ 0x1900, 0x191c,
+ 0x1920, 0x192b,
+ 0x1930, 0x193b,
+ 0x1940, 0x1940,
+ 0x1944, 0x196d,
+ 0x1970, 0x1974,
+ 0x1980, 0x19ab,
+ 0x19b0, 0x19c9,
+ 0x19d0, 0x19da,
+ 0x19de, 0x1a1b,
+ 0x1a1e, 0x1a7c,
+ 0x1a7f, 0x1a89,
+ 0x1a90, 0x1a99,
+ 0x1aa0, 0x1aad,
+ 0x1b00, 0x1b4b,
+ 0x1b50, 0x1b7c,
+ 0x1b80, 0x1baa,
+ 0x1bae, 0x1bb9,
+ 0x1bc0, 0x1bf3,
+ 0x1bfc, 0x1c37,
+ 0x1c3b, 0x1c49,
+ 0x1c4d, 0x1c7f,
+ 0x1cd0, 0x1cf2,
+ 0x1d00, 0x1de6,
+ 0x1dfc, 0x1f15,
+ 0x1f18, 0x1f1d,
+ 0x1f20, 0x1f45,
+ 0x1f48, 0x1f4d,
+ 0x1f50, 0x1f7d,
+ 0x1f80, 0x1fd3,
+ 0x1fd6, 0x1fef,
+ 0x1ff2, 0x1ffe,
+ 0x2010, 0x2027,
+ 0x2030, 0x205e,
+ 0x2070, 0x2071,
+ 0x2074, 0x209c,
+ 0x20a0, 0x20b9,
+ 0x20d0, 0x20f0,
+ 0x2100, 0x2189,
+ 0x2190, 0x23f3,
+ 0x2400, 0x2426,
+ 0x2440, 0x244a,
+ 0x2460, 0x2b4c,
+ 0x2b50, 0x2b59,
+ 0x2c00, 0x2cf1,
+ 0x2cf9, 0x2d25,
+ 0x2d30, 0x2d65,
+ 0x2d6f, 0x2d70,
+ 0x2d7f, 0x2d96,
+ 0x2da0, 0x2e31,
+ 0x2e80, 0x2ef3,
+ 0x2f00, 0x2fd5,
+ 0x2ff0, 0x2ffb,
+ 0x3001, 0x3096,
+ 0x3099, 0x30ff,
+ 0x3105, 0x312d,
+ 0x3131, 0x31ba,
+ 0x31c0, 0x31e3,
+ 0x31f0, 0x4db5,
+ 0x4dc0, 0x9fcb,
+ 0xa000, 0xa48c,
+ 0xa490, 0xa4c6,
+ 0xa4d0, 0xa62b,
+ 0xa640, 0xa673,
+ 0xa67c, 0xa697,
+ 0xa6a0, 0xa6f7,
+ 0xa700, 0xa791,
+ 0xa7a0, 0xa7a9,
+ 0xa7fa, 0xa82b,
+ 0xa830, 0xa839,
+ 0xa840, 0xa877,
+ 0xa880, 0xa8c4,
+ 0xa8ce, 0xa8d9,
+ 0xa8e0, 0xa8fb,
+ 0xa900, 0xa953,
+ 0xa95f, 0xa97c,
+ 0xa980, 0xa9d9,
+ 0xa9de, 0xa9df,
+ 0xaa00, 0xaa36,
+ 0xaa40, 0xaa4d,
+ 0xaa50, 0xaa59,
+ 0xaa5c, 0xaa7b,
+ 0xaa80, 0xaac2,
+ 0xaadb, 0xaadf,
+ 0xab01, 0xab06,
+ 0xab09, 0xab0e,
+ 0xab11, 0xab16,
+ 0xab20, 0xab2e,
+ 0xabc0, 0xabed,
+ 0xabf0, 0xabf9,
+ 0xac00, 0xd7a3,
+ 0xd7b0, 0xd7c6,
+ 0xd7cb, 0xd7fb,
+ 0xf900, 0xfa2d,
+ 0xfa30, 0xfa6d,
+ 0xfa70, 0xfad9,
+ 0xfb00, 0xfb06,
+ 0xfb13, 0xfb17,
+ 0xfb1d, 0xfbc1,
+ 0xfbd3, 0xfd3f,
+ 0xfd50, 0xfd8f,
+ 0xfd92, 0xfdc7,
+ 0xfdf0, 0xfdfd,
+ 0xfe00, 0xfe19,
+ 0xfe20, 0xfe26,
+ 0xfe30, 0xfe6b,
+ 0xfe70, 0xfefc,
+ 0xff01, 0xffbe,
+ 0xffc2, 0xffc7,
+ 0xffca, 0xffcf,
+ 0xffd2, 0xffd7,
+ 0xffda, 0xffdc,
+ 0xffe0, 0xffee,
+ 0xfffc, 0xfffd,
+}
+
+var isNotPrint16 = []uint16{
+ 0x00ad,
+ 0x038b,
+ 0x038d,
+ 0x03a2,
+ 0x0560,
+ 0x0588,
+ 0x06dd,
+ 0x083f,
+ 0x0978,
+ 0x0980,
+ 0x0984,
+ 0x09a9,
+ 0x09b1,
+ 0x09de,
+ 0x0a04,
+ 0x0a29,
+ 0x0a31,
+ 0x0a34,
+ 0x0a37,
+ 0x0a3d,
+ 0x0a5d,
+ 0x0a84,
+ 0x0a8e,
+ 0x0a92,
+ 0x0aa9,
+ 0x0ab1,
+ 0x0ab4,
+ 0x0ac6,
+ 0x0aca,
+ 0x0af0,
+ 0x0b04,
+ 0x0b29,
+ 0x0b31,
+ 0x0b34,
+ 0x0b5e,
+ 0x0b84,
+ 0x0b91,
+ 0x0b9b,
+ 0x0b9d,
+ 0x0bc9,
+ 0x0c04,
+ 0x0c0d,
+ 0x0c11,
+ 0x0c29,
+ 0x0c34,
+ 0x0c45,
+ 0x0c49,
+ 0x0c57,
+ 0x0c84,
+ 0x0c8d,
+ 0x0c91,
+ 0x0ca9,
+ 0x0cb4,
+ 0x0cc5,
+ 0x0cc9,
+ 0x0cdf,
+ 0x0cf0,
+ 0x0d04,
+ 0x0d0d,
+ 0x0d11,
+ 0x0d45,
+ 0x0d49,
+ 0x0d84,
+ 0x0db2,
+ 0x0dbc,
+ 0x0dd5,
+ 0x0dd7,
+ 0x0e83,
+ 0x0e89,
+ 0x0e98,
+ 0x0ea0,
+ 0x0ea4,
+ 0x0ea6,
+ 0x0eac,
+ 0x0eba,
+ 0x0ec5,
+ 0x0ec7,
+ 0x0f48,
+ 0x0f98,
+ 0x0fbd,
+ 0x0fcd,
+ 0x1249,
+ 0x1257,
+ 0x1259,
+ 0x1289,
+ 0x12b1,
+ 0x12bf,
+ 0x12c1,
+ 0x12d7,
+ 0x1311,
+ 0x1680,
+ 0x170d,
+ 0x176d,
+ 0x1771,
+ 0x1a5f,
+ 0x1f58,
+ 0x1f5a,
+ 0x1f5c,
+ 0x1f5e,
+ 0x1fb5,
+ 0x1fc5,
+ 0x1fdc,
+ 0x1ff5,
+ 0x208f,
+ 0x2700,
+ 0x27cb,
+ 0x27cd,
+ 0x2c2f,
+ 0x2c5f,
+ 0x2da7,
+ 0x2daf,
+ 0x2db7,
+ 0x2dbf,
+ 0x2dc7,
+ 0x2dcf,
+ 0x2dd7,
+ 0x2ddf,
+ 0x2e9a,
+ 0x3040,
+ 0x318f,
+ 0x321f,
+ 0x32ff,
+ 0xa78f,
+ 0xa9ce,
+ 0xab27,
+ 0xfb37,
+ 0xfb3d,
+ 0xfb3f,
+ 0xfb42,
+ 0xfb45,
+ 0xfe53,
+ 0xfe67,
+ 0xfe75,
+ 0xffe7,
+}
+
+var isPrint32 = []uint32{
+ 0x010000, 0x01004d,
+ 0x010050, 0x01005d,
+ 0x010080, 0x0100fa,
+ 0x010100, 0x010102,
+ 0x010107, 0x010133,
+ 0x010137, 0x01018a,
+ 0x010190, 0x01019b,
+ 0x0101d0, 0x0101fd,
+ 0x010280, 0x01029c,
+ 0x0102a0, 0x0102d0,
+ 0x010300, 0x010323,
+ 0x010330, 0x01034a,
+ 0x010380, 0x0103c3,
+ 0x0103c8, 0x0103d5,
+ 0x010400, 0x01049d,
+ 0x0104a0, 0x0104a9,
+ 0x010800, 0x010805,
+ 0x010808, 0x010838,
+ 0x01083c, 0x01083c,
+ 0x01083f, 0x01085f,
+ 0x010900, 0x01091b,
+ 0x01091f, 0x010939,
+ 0x01093f, 0x01093f,
+ 0x010a00, 0x010a06,
+ 0x010a0c, 0x010a33,
+ 0x010a38, 0x010a3a,
+ 0x010a3f, 0x010a47,
+ 0x010a50, 0x010a58,
+ 0x010a60, 0x010a7f,
+ 0x010b00, 0x010b35,
+ 0x010b39, 0x010b55,
+ 0x010b58, 0x010b72,
+ 0x010b78, 0x010b7f,
+ 0x010c00, 0x010c48,
+ 0x010e60, 0x010e7e,
+ 0x011000, 0x01104d,
+ 0x011052, 0x01106f,
+ 0x011080, 0x0110c1,
+ 0x012000, 0x01236e,
+ 0x012400, 0x012462,
+ 0x012470, 0x012473,
+ 0x013000, 0x01342e,
+ 0x016800, 0x016a38,
+ 0x01b000, 0x01b001,
+ 0x01d000, 0x01d0f5,
+ 0x01d100, 0x01d126,
+ 0x01d129, 0x01d172,
+ 0x01d17b, 0x01d1dd,
+ 0x01d200, 0x01d245,
+ 0x01d300, 0x01d356,
+ 0x01d360, 0x01d371,
+ 0x01d400, 0x01d49f,
+ 0x01d4a2, 0x01d4a2,
+ 0x01d4a5, 0x01d4a6,
+ 0x01d4a9, 0x01d50a,
+ 0x01d50d, 0x01d546,
+ 0x01d54a, 0x01d6a5,
+ 0x01d6a8, 0x01d7cb,
+ 0x01d7ce, 0x01d7ff,
+ 0x01f000, 0x01f02b,
+ 0x01f030, 0x01f093,
+ 0x01f0a0, 0x01f0ae,
+ 0x01f0b1, 0x01f0be,
+ 0x01f0c1, 0x01f0df,
+ 0x01f100, 0x01f10a,
+ 0x01f110, 0x01f169,
+ 0x01f170, 0x01f19a,
+ 0x01f1e6, 0x01f202,
+ 0x01f210, 0x01f23a,
+ 0x01f240, 0x01f248,
+ 0x01f250, 0x01f251,
+ 0x01f300, 0x01f320,
+ 0x01f330, 0x01f37c,
+ 0x01f380, 0x01f393,
+ 0x01f3a0, 0x01f3ca,
+ 0x01f3e0, 0x01f3f0,
+ 0x01f400, 0x01f4fc,
+ 0x01f500, 0x01f53d,
+ 0x01f550, 0x01f567,
+ 0x01f5fb, 0x01f625,
+ 0x01f628, 0x01f62d,
+ 0x01f630, 0x01f640,
+ 0x01f645, 0x01f64f,
+ 0x01f680, 0x01f6c5,
+ 0x01f700, 0x01f773,
+ 0x020000, 0x02a6d6,
+ 0x02a700, 0x02b734,
+ 0x02b740, 0x02b81d,
+ 0x02f800, 0x02fa1d,
+ 0x0e0100, 0x0e01ef,
+}
+
+var isNotPrint32 = []uint16{ // add 0x10000 to each entry
+ 0x000c,
+ 0x0027,
+ 0x003b,
+ 0x003e,
+ 0x031f,
+ 0x039e,
+ 0x0809,
+ 0x0836,
+ 0x0856,
+ 0x0a04,
+ 0x0a14,
+ 0x0a18,
+ 0x10bd,
+ 0xd455,
+ 0xd49d,
+ 0xd4ad,
+ 0xd4ba,
+ 0xd4bc,
+ 0xd4c4,
+ 0xd506,
+ 0xd515,
+ 0xd51d,
+ 0xd53a,
+ 0xd53f,
+ 0xd545,
+ 0xd551,
+ 0xf0d0,
+ 0xf12f,
+ 0xf336,
+ 0xf3c5,
+ 0xf43f,
+ 0xf441,
+ 0xf4f8,
+ 0xf600,
+ 0xf611,
+ 0xf615,
+ 0xf617,
+ 0xf619,
+ 0xf61b,
+ 0xf61f,
+ 0xf62c,
+ 0xf634,
+}
diff --git a/src/pkg/strconv/makeisprint.go b/src/pkg/strconv/makeisprint.go
new file mode 100644
index 000000000..8a6699bdb
--- /dev/null
+++ b/src/pkg/strconv/makeisprint.go
@@ -0,0 +1,162 @@
+// Copyright 2012 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build ignore
+
+// makeisprint generates the tables for strconv's compact isPrint.
+package main
+
+import (
+ "fmt"
+ "os"
+ "unicode"
+)
+
+var (
+ range16 []uint16
+ except16 []uint16
+ range32 []uint32
+ except32 []uint32
+)
+
+// bsearch16 returns the smallest i such that a[i] >= x.
+// If there is no such i, bsearch16 returns len(a).
+func bsearch16(a []uint16, x uint16) int {
+ i, j := 0, len(a)
+ for i < j {
+ h := i + (j-i)/2
+ if a[h] < x {
+ i = h + 1
+ } else {
+ j = h
+ }
+ }
+ return i
+}
+
+// bsearch32 returns the smallest i such that a[i] >= x.
+// If there is no such i, bsearch32 returns len(a).
+func bsearch32(a []uint32, x uint32) int {
+ i, j := 0, len(a)
+ for i < j {
+ h := i + (j-i)/2
+ if a[h] < x {
+ i = h + 1
+ } else {
+ j = h
+ }
+ }
+ return i
+}
+
+func isPrint(r rune) bool {
+ // Same algorithm, either on uint16 or uint32 value.
+ // First, find first i such that rang[i] >= x.
+ // This is the index of either the start or end of a pair that might span x.
+ // The start is even (rang[i&^1]) and the end is odd (rang[i|1]).
+ // If we find x in a range, make sure x is not in exception list.
+
+ if 0 <= r && r < 1<<16 {
+ rr, rang, except := uint16(r), range16, except16
+ i := bsearch16(rang, rr)
+ if i >= len(rang) || rr < rang[i&^1] || rang[i|1] < rr {
+ return false
+ }
+ j := bsearch16(except, rr)
+ return j >= len(except) || except[j] != rr
+ }
+
+ rr, rang, except := uint32(r), range32, except32
+ i := bsearch32(rang, rr)
+ if i >= len(rang) || rr < rang[i&^1] || rang[i|1] < rr {
+ return false
+ }
+ j := bsearch32(except, rr)
+ return j >= len(except) || except[j] != rr
+}
+
+func scan(min, max rune) (rang, except []uint32) {
+ lo := rune(-1)
+ for i := min; ; i++ {
+ if (i > max || !unicode.IsPrint(i)) && lo >= 0 {
+ // End range, but avoid flip flop.
+ if i+1 <= max && unicode.IsPrint(i+1) {
+ except = append(except, uint32(i))
+ continue
+ }
+ rang = append(rang, uint32(lo), uint32(i-1))
+ lo = -1
+ }
+ if i > max {
+ break
+ }
+ if lo < 0 && unicode.IsPrint(i) {
+ lo = i
+ }
+ }
+ return
+}
+
+func to16(x []uint32) []uint16 {
+ var y []uint16
+ for _, v := range x {
+ if uint32(uint16(v)) != v {
+ panic("bad 32->16 conversion")
+ }
+ y = append(y, uint16(v))
+ }
+ return y
+}
+
+func main() {
+ rang, except := scan(0, 0xFFFF)
+ range16 = to16(rang)
+ except16 = to16(except)
+ range32, except32 = scan(0x10000, unicode.MaxRune)
+
+ for i := rune(0); i <= unicode.MaxRune; i++ {
+ if isPrint(i) != unicode.IsPrint(i) {
+ fmt.Fprintf(os.Stderr, "%U: isPrint=%v, want %v\n", i, isPrint(i), unicode.IsPrint(i))
+ return
+ }
+ }
+
+ fmt.Printf("// DO NOT EDIT. GENERATED BY\n")
+ fmt.Printf("// go run makeisprint.go >x && mv x isprint.go\n\n")
+ fmt.Printf("package strconv\n\n")
+
+ fmt.Printf("// (%d+%d+%d)*2 + (%d)*4 = %d bytes\n\n",
+ len(range16), len(except16), len(except32),
+ len(range32),
+ (len(range16)+len(except16)+len(except32))*2+
+ (len(range32))*4)
+
+ fmt.Printf("var isPrint16 = []uint16{\n")
+ for i := 0; i < len(range16); i += 2 {
+ fmt.Printf("\t%#04x, %#04x,\n", range16[i], range16[i+1])
+ }
+ fmt.Printf("}\n\n")
+
+ fmt.Printf("var isNotPrint16 = []uint16{\n")
+ for _, r := range except16 {
+ fmt.Printf("\t%#04x,\n", r)
+ }
+ fmt.Printf("}\n\n")
+
+ fmt.Printf("var isPrint32 = []uint32{\n")
+ for i := 0; i < len(range32); i += 2 {
+ fmt.Printf("\t%#06x, %#06x,\n", range32[i], range32[i+1])
+ }
+ fmt.Printf("}\n\n")
+
+ fmt.Printf("var isNotPrint32 = []uint16{ // add 0x10000 to each entry\n")
+ for _, r := range except32 {
+ if r >= 0x20000 {
+ fmt.Fprintf(os.Stderr, "%U too big for isNotPrint32\n", r)
+ return
+ }
+ fmt.Printf("\t%#04x,\n", r-0x10000)
+ }
+ fmt.Printf("}\n")
+}
diff --git a/src/pkg/strconv/quote.go b/src/pkg/strconv/quote.go
index 61dbcae70..8a73f9d3b 100644
--- a/src/pkg/strconv/quote.go
+++ b/src/pkg/strconv/quote.go
@@ -5,17 +5,15 @@
package strconv
import (
- "bytes"
- "strings"
- "unicode"
"unicode/utf8"
)
const lowerhex = "0123456789abcdef"
func quoteWith(s string, quote byte, ASCIIonly bool) string {
- var buf bytes.Buffer
- buf.WriteByte(quote)
+ var runeTmp [utf8.UTFMax]byte
+ buf := make([]byte, 0, 3*len(s)/2) // Try to avoid more allocations.
+ buf = append(buf, quote)
for width := 0; len(s) > 0; s = s[width:] {
r := rune(s[0])
width = 1
@@ -23,71 +21,72 @@ func quoteWith(s string, quote byte, ASCIIonly bool) string {
r, width = utf8.DecodeRuneInString(s)
}
if width == 1 && r == utf8.RuneError {
- buf.WriteString(`\x`)
- buf.WriteByte(lowerhex[s[0]>>4])
- buf.WriteByte(lowerhex[s[0]&0xF])
+ buf = append(buf, `\x`...)
+ buf = append(buf, lowerhex[s[0]>>4])
+ buf = append(buf, lowerhex[s[0]&0xF])
continue
}
if r == rune(quote) || r == '\\' { // always backslashed
- buf.WriteByte('\\')
- buf.WriteByte(byte(r))
+ buf = append(buf, '\\')
+ buf = append(buf, byte(r))
continue
}
if ASCIIonly {
- if r <= unicode.MaxASCII && unicode.IsPrint(r) {
- buf.WriteRune(r)
+ if r < utf8.RuneSelf && IsPrint(r) {
+ buf = append(buf, byte(r))
continue
}
- } else if unicode.IsPrint(r) {
- buf.WriteRune(r)
+ } else if IsPrint(r) {
+ n := utf8.EncodeRune(runeTmp[:], r)
+ buf = append(buf, runeTmp[:n]...)
continue
}
switch r {
case '\a':
- buf.WriteString(`\a`)
+ buf = append(buf, `\a`...)
case '\b':
- buf.WriteString(`\b`)
+ buf = append(buf, `\b`...)
case '\f':
- buf.WriteString(`\f`)
+ buf = append(buf, `\f`...)
case '\n':
- buf.WriteString(`\n`)
+ buf = append(buf, `\n`...)
case '\r':
- buf.WriteString(`\r`)
+ buf = append(buf, `\r`...)
case '\t':
- buf.WriteString(`\t`)
+ buf = append(buf, `\t`...)
case '\v':
- buf.WriteString(`\v`)
+ buf = append(buf, `\v`...)
default:
switch {
case r < ' ':
- buf.WriteString(`\x`)
- buf.WriteByte(lowerhex[s[0]>>4])
- buf.WriteByte(lowerhex[s[0]&0xF])
- case r > unicode.MaxRune:
+ buf = append(buf, `\x`...)
+ buf = append(buf, lowerhex[s[0]>>4])
+ buf = append(buf, lowerhex[s[0]&0xF])
+ case r > utf8.MaxRune:
r = 0xFFFD
fallthrough
case r < 0x10000:
- buf.WriteString(`\u`)
+ buf = append(buf, `\u`...)
for s := 12; s >= 0; s -= 4 {
- buf.WriteByte(lowerhex[r>>uint(s)&0xF])
+ buf = append(buf, lowerhex[r>>uint(s)&0xF])
}
default:
- buf.WriteString(`\U`)
+ buf = append(buf, `\U`...)
for s := 28; s >= 0; s -= 4 {
- buf.WriteByte(lowerhex[r>>uint(s)&0xF])
+ buf = append(buf, lowerhex[r>>uint(s)&0xF])
}
}
}
}
- buf.WriteByte(quote)
- return buf.String()
+ buf = append(buf, quote)
+ return string(buf)
}
// Quote returns a double-quoted Go string literal representing s. The
// returned string uses Go escape sequences (\t, \n, \xFF, \u0100) for
// control characters and non-printable characters as defined by
-// unicode.IsPrint.
+// IsPrint.
func Quote(s string) string {
return quoteWith(s, '"', false)
}
@@ -100,8 +99,7 @@ func AppendQuote(dst []byte, s string) []byte {
// QuoteToASCII returns a double-quoted Go string literal representing s.
// The returned string uses Go escape sequences (\t, \n, \xFF, \u0100) for
-// non-ASCII characters and non-printable characters as defined by
-// unicode.IsPrint.
+// non-ASCII characters and non-printable characters as defined by IsPrint.
func QuoteToASCII(s string) string {
return quoteWith(s, '"', true)
}
@@ -114,8 +112,7 @@ func AppendQuoteToASCII(dst []byte, s string) []byte {
// QuoteRune returns a single-quoted Go character literal representing the
// rune. The returned string uses Go escape sequences (\t, \n, \xFF, \u0100)
-// for control characters and non-printable characters as defined by
-// unicode.IsPrint.
+// for control characters and non-printable characters as defined by IsPrint.
func QuoteRune(r rune) string {
// TODO: avoid the allocation here.
return quoteWith(string(r), '\'', false)
@@ -130,7 +127,7 @@ func AppendQuoteRune(dst []byte, r rune) []byte {
// QuoteRuneToASCII returns a single-quoted Go character literal representing
// the rune. The returned string uses Go escape sequences (\t, \n, \xFF,
// \u0100) for non-ASCII characters and non-printable characters as defined
-// by unicode.IsPrint.
+// by IsPrint.
func QuoteRuneToASCII(r rune) string {
// TODO: avoid the allocation here.
return quoteWith(string(r), '\'', true)
@@ -245,7 +242,7 @@ func UnquoteChar(s string, quote byte) (value rune, multibyte bool, tail string,
value = v
break
}
- if v > unicode.MaxRune {
+ if v > utf8.MaxRune {
err = ErrSyntax
return
}
@@ -304,7 +301,7 @@ func Unquote(s string) (t string, err error) {
s = s[1 : n-1]
if quote == '`' {
- if strings.Contains(s, "`") {
+ if contains(s, '`') {
return "", ErrSyntax
}
return s, nil
@@ -312,12 +309,12 @@ func Unquote(s string) (t string, err error) {
if quote != '"' && quote != '\'' {
return "", ErrSyntax
}
- if strings.Index(s, "\n") >= 0 {
+ if contains(s, '\n') {
return "", ErrSyntax
}
// Is it trivial? Avoid allocation.
- if strings.Index(s, `\`) < 0 && strings.IndexRune(s, rune(quote)) < 0 {
+ if !contains(s, '\\') && !contains(s, quote) {
switch quote {
case '"':
return s, nil
@@ -329,7 +326,8 @@ func Unquote(s string) (t string, err error) {
}
}
- var buf bytes.Buffer
+ var runeTmp [utf8.UTFMax]byte
+ buf := make([]byte, 0, 3*len(s)/2) // Try to avoid more allocations.
for len(s) > 0 {
c, multibyte, ss, err := UnquoteChar(s, quote)
if err != nil {
@@ -337,14 +335,107 @@ func Unquote(s string) (t string, err error) {
}
s = ss
if c < utf8.RuneSelf || !multibyte {
- buf.WriteByte(byte(c))
+ buf = append(buf, byte(c))
} else {
- buf.WriteString(string(c))
+ n := utf8.EncodeRune(runeTmp[:], c)
+ buf = append(buf, runeTmp[:n]...)
}
if quote == '\'' && len(s) != 0 {
// single-quoted must be single character
return "", ErrSyntax
}
}
- return buf.String(), nil
+ return string(buf), nil
+}
+
+// contains reports whether the string contains the byte c.
+func contains(s string, c byte) bool {
+ for i := 0; i < len(s); i++ {
+ if s[i] == c {
+ return true
+ }
+ }
+ return false
+}
+
+// bsearch16 returns the smallest i such that a[i] >= x.
+// If there is no such i, bsearch16 returns len(a).
+func bsearch16(a []uint16, x uint16) int {
+ i, j := 0, len(a)
+ for i < j {
+ h := i + (j-i)/2
+ if a[h] < x {
+ i = h + 1
+ } else {
+ j = h
+ }
+ }
+ return i
+}
+
+// bsearch32 returns the smallest i such that a[i] >= x.
+// If there is no such i, bsearch32 returns len(a).
+func bsearch32(a []uint32, x uint32) int {
+ i, j := 0, len(a)
+ for i < j {
+ h := i + (j-i)/2
+ if a[h] < x {
+ i = h + 1
+ } else {
+ j = h
+ }
+ }
+ return i
+}
+
+// TODO: IsPrint is a local implementation of unicode.IsPrint, verified by the tests
+// to give the same answer. It allows this package not to depend on unicode,
+// and therefore not pull in all the Unicode tables. If the linker were better
+// at tossing unused tables, we could get rid of this implementation.
+// That would be nice.
+
+// IsPrint reports whether the rune is defined as printable by Go, with
+// the same definition as unicode.IsPrint: letters, numbers, punctuation,
+// symbols and ASCII space.
+func IsPrint(r rune) bool {
+ // Fast check for Latin-1
+ if r <= 0xFF {
+ if 0x20 <= r && r <= 0x7E {
+ // All the ASCII is printable from space through DEL-1.
+ return true
+ }
+ if 0xA1 <= r && r <= 0xFF {
+ // Similarly for ¡ through ÿ...
+ return r != 0xAD // ...except for the bizarre soft hyphen.
+ }
+ return false
+ }
+
+ // Same algorithm, either on uint16 or uint32 value.
+ // First, find first i such that isPrint[i] >= x.
+ // This is the index of either the start or end of a pair that might span x.
+ // The start is even (isPrint[i&^1]) and the end is odd (isPrint[i|1]).
+ // If we find x in a range, make sure x is not in isNotPrint list.
+
+ if 0 <= r && r < 1<<16 {
+ rr, isPrint, isNotPrint := uint16(r), isPrint16, isNotPrint16
+ i := bsearch16(isPrint, rr)
+ if i >= len(isPrint) || rr < isPrint[i&^1] || isPrint[i|1] < rr {
+ return false
+ }
+ j := bsearch16(isNotPrint, rr)
+ return j >= len(isNotPrint) || isNotPrint[j] != rr
+ }
+
+ rr, isPrint, isNotPrint := uint32(r), isPrint32, isNotPrint32
+ i := bsearch32(isPrint, rr)
+ if i >= len(isPrint) || rr < isPrint[i&^1] || isPrint[i|1] < rr {
+ return false
+ }
+ if r >= 0x20000 {
+ return true
+ }
+ r -= 0x10000
+ j := bsearch16(isNotPrint, uint16(r))
+ return j >= len(isNotPrint) || isNotPrint[j] != uint16(r)
}
diff --git a/src/pkg/strconv/quote_test.go b/src/pkg/strconv/quote_test.go
index 3f544c43c..61d9bf9a5 100644
--- a/src/pkg/strconv/quote_test.go
+++ b/src/pkg/strconv/quote_test.go
@@ -7,8 +7,23 @@ package strconv_test
import (
. "strconv"
"testing"
+ "unicode"
)
+// Verify that our isPrint agrees with unicode.IsPrint
+func TestIsPrint(t *testing.T) {
+ n := 0
+ for r := rune(0); r <= unicode.MaxRune; r++ {
+ if IsPrint(r) != unicode.IsPrint(r) {
+ t.Errorf("IsPrint(%U)=%t incorrect", r, IsPrint(r))
+ n++
+ if n > 10 {
+ return
+ }
+ }
+ }
+}
+
type quoteTest struct {
in string
out string