diff options
Diffstat (limited to 'src/pkg/strconv/quote.go')
-rw-r--r-- | src/pkg/strconv/quote.go | 132 |
1 files changed, 86 insertions, 46 deletions
diff --git a/src/pkg/strconv/quote.go b/src/pkg/strconv/quote.go index 05e49d32d..61dbcae70 100644 --- a/src/pkg/strconv/quote.go +++ b/src/pkg/strconv/quote.go @@ -6,10 +6,9 @@ package strconv import ( "bytes" - "os" "strings" "unicode" - "utf8" + "unicode/utf8" ) const lowerhex = "0123456789abcdef" @@ -18,32 +17,32 @@ func quoteWith(s string, quote byte, ASCIIonly bool) string { var buf bytes.Buffer buf.WriteByte(quote) for width := 0; len(s) > 0; s = s[width:] { - rune := int(s[0]) + r := rune(s[0]) width = 1 - if rune >= utf8.RuneSelf { - rune, width = utf8.DecodeRuneInString(s) + if r >= utf8.RuneSelf { + r, width = utf8.DecodeRuneInString(s) } - if width == 1 && rune == utf8.RuneError { + if width == 1 && r == utf8.RuneError { buf.WriteString(`\x`) buf.WriteByte(lowerhex[s[0]>>4]) buf.WriteByte(lowerhex[s[0]&0xF]) continue } - if rune == int(quote) || rune == '\\' { // always backslashed + if r == rune(quote) || r == '\\' { // always backslashed buf.WriteByte('\\') - buf.WriteByte(byte(rune)) + buf.WriteByte(byte(r)) continue } if ASCIIonly { - if rune <= unicode.MaxASCII && unicode.IsPrint(rune) { - buf.WriteRune(rune) + if r <= unicode.MaxASCII && unicode.IsPrint(r) { + buf.WriteRune(r) continue } - } else if unicode.IsPrint(rune) { - buf.WriteRune(rune) + } else if unicode.IsPrint(r) { + buf.WriteRune(r) continue } - switch rune { + switch r { case '\a': buf.WriteString(`\a`) case '\b': @@ -60,22 +59,22 @@ func quoteWith(s string, quote byte, ASCIIonly bool) string { buf.WriteString(`\v`) default: switch { - case rune < ' ': + case r < ' ': buf.WriteString(`\x`) buf.WriteByte(lowerhex[s[0]>>4]) buf.WriteByte(lowerhex[s[0]&0xF]) - case rune > unicode.MaxRune: - rune = 0xFFFD + case r > unicode.MaxRune: + r = 0xFFFD fallthrough - case rune < 0x10000: + case r < 0x10000: buf.WriteString(`\u`) for s := 12; s >= 0; s -= 4 { - buf.WriteByte(lowerhex[rune>>uint(s)&0xF]) + buf.WriteByte(lowerhex[r>>uint(s)&0xF]) } default: buf.WriteString(`\U`) for s := 28; s >= 0; s -= 4 { - buf.WriteByte(lowerhex[rune>>uint(s)&0xF]) + buf.WriteByte(lowerhex[r>>uint(s)&0xF]) } } } @@ -93,6 +92,12 @@ func Quote(s string) string { return quoteWith(s, '"', false) } +// AppendQuote appends a double-quoted Go string literal representing s, +// as generated by Quote, to dst and returns the extended buffer. +func AppendQuote(dst []byte, s string) []byte { + return append(dst, Quote(s)...) +} + // QuoteToASCII returns a double-quoted Go string literal representing s. // The returned string uses Go escape sequences (\t, \n, \xFF, \u0100) for // non-ASCII characters and non-printable characters as defined by @@ -101,22 +106,40 @@ func QuoteToASCII(s string) string { return quoteWith(s, '"', true) } +// AppendQuoteToASCII appends a double-quoted Go string literal representing s, +// as generated by QuoteToASCII, to dst and returns the extended buffer. +func AppendQuoteToASCII(dst []byte, s string) []byte { + return append(dst, QuoteToASCII(s)...) +} + // QuoteRune returns a single-quoted Go character literal representing the // rune. The returned string uses Go escape sequences (\t, \n, \xFF, \u0100) // for control characters and non-printable characters as defined by // unicode.IsPrint. -func QuoteRune(rune int) string { +func QuoteRune(r rune) string { // TODO: avoid the allocation here. - return quoteWith(string(rune), '\'', false) + return quoteWith(string(r), '\'', false) +} + +// AppendQuoteRune appends a single-quoted Go character literal representing the rune, +// as generated by QuoteRune, to dst and returns the extended buffer. +func AppendQuoteRune(dst []byte, r rune) []byte { + return append(dst, QuoteRune(r)...) } // QuoteRuneToASCII returns a single-quoted Go character literal representing // the rune. The returned string uses Go escape sequences (\t, \n, \xFF, // \u0100) for non-ASCII characters and non-printable characters as defined // by unicode.IsPrint. -func QuoteRuneToASCII(rune int) string { +func QuoteRuneToASCII(r rune) string { // TODO: avoid the allocation here. - return quoteWith(string(rune), '\'', true) + return quoteWith(string(r), '\'', true) +} + +// AppendQuoteRune appends a single-quoted Go character literal representing the rune, +// as generated by QuoteRuneToASCII, to dst and returns the extended buffer. +func AppendQuoteRuneToASCII(dst []byte, r rune) []byte { + return append(dst, QuoteRuneToASCII(r)...) } // CanBackquote returns whether the string s would be @@ -130,8 +153,8 @@ func CanBackquote(s string) bool { return true } -func unhex(b byte) (v int, ok bool) { - c := int(b) +func unhex(b byte) (v rune, ok bool) { + c := rune(b) switch { case '0' <= c && c <= '9': return c - '0', true @@ -157,22 +180,22 @@ func unhex(b byte) (v int, ok bool) { // If set to a single quote, it permits the sequence \' and disallows unescaped '. // If set to a double quote, it permits \" and disallows unescaped ". // If set to zero, it does not permit either escape and allows both quote characters to appear unescaped. -func UnquoteChar(s string, quote byte) (value int, multibyte bool, tail string, err os.Error) { +func UnquoteChar(s string, quote byte) (value rune, multibyte bool, tail string, err error) { // easy cases switch c := s[0]; { case c == quote && (quote == '\'' || quote == '"'): - err = os.EINVAL + err = ErrSyntax return case c >= utf8.RuneSelf: r, size := utf8.DecodeRuneInString(s) return r, true, s[size:], nil case c != '\\': - return int(s[0]), false, s[1:], nil + return rune(s[0]), false, s[1:], nil } // hard case: c is backslash if len(s) <= 1 { - err = os.EINVAL + err = ErrSyntax return } c := s[1] @@ -203,15 +226,15 @@ func UnquoteChar(s string, quote byte) (value int, multibyte bool, tail string, case 'U': n = 8 } - v := 0 + var v rune if len(s) < n { - err = os.EINVAL + err = ErrSyntax return } for j := 0; j < n; j++ { x, ok := unhex(s[j]) if !ok { - err = os.EINVAL + err = ErrSyntax return } v = v<<4 | x @@ -223,27 +246,28 @@ func UnquoteChar(s string, quote byte) (value int, multibyte bool, tail string, break } if v > unicode.MaxRune { - err = os.EINVAL + err = ErrSyntax return } value = v multibyte = true case '0', '1', '2', '3', '4', '5', '6', '7': - v := int(c) - '0' + v := rune(c) - '0' if len(s) < 2 { - err = os.EINVAL + err = ErrSyntax return } for j := 0; j < 2; j++ { // one digit already; two more - x := int(s[j]) - '0' + x := rune(s[j]) - '0' if x < 0 || x > 7 { + err = ErrSyntax return } v = (v << 3) | x } s = s[2:] if v > 255 { - err = os.EINVAL + err = ErrSyntax return } value = v @@ -251,12 +275,12 @@ func UnquoteChar(s string, quote byte) (value int, multibyte bool, tail string, value = '\\' case '\'', '"': if c != quote { - err = os.EINVAL + err = ErrSyntax return } - value = int(c) + value = rune(c) default: - err = os.EINVAL + err = ErrSyntax return } tail = s @@ -268,25 +292,41 @@ func UnquoteChar(s string, quote byte) (value int, multibyte bool, tail string, // that s quotes. (If s is single-quoted, it would be a Go // character literal; Unquote returns the corresponding // one-character string.) -func Unquote(s string) (t string, err os.Error) { +func Unquote(s string) (t string, err error) { n := len(s) if n < 2 { - return "", os.EINVAL + return "", ErrSyntax } quote := s[0] if quote != s[n-1] { - return "", os.EINVAL + return "", ErrSyntax } s = s[1 : n-1] if quote == '`' { if strings.Contains(s, "`") { - return "", os.EINVAL + return "", ErrSyntax } return s, nil } if quote != '"' && quote != '\'' { - return "", os.EINVAL + return "", ErrSyntax + } + if strings.Index(s, "\n") >= 0 { + return "", ErrSyntax + } + + // Is it trivial? Avoid allocation. + if strings.Index(s, `\`) < 0 && strings.IndexRune(s, rune(quote)) < 0 { + switch quote { + case '"': + return s, nil + case '\'': + r, size := utf8.DecodeRuneInString(s) + if size == len(s) && (r != utf8.RuneError || size != 1) { + return s, nil + } + } } var buf bytes.Buffer @@ -303,7 +343,7 @@ func Unquote(s string) (t string, err os.Error) { } if quote == '\'' && len(s) != 0 { // single-quoted must be single character - return "", os.EINVAL + return "", ErrSyntax } } return buf.String(), nil |