| // Copyright 2009 The Go Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style |
| // license that can be found in the LICENSE file. |
| |
| package strconv |
| |
| import ( |
| "bytes" |
| "os" |
| "strings" |
| "unicode" |
| "utf8" |
| ) |
| |
| const lowerhex = "0123456789abcdef" |
| |
| // Quote returns a double-quoted Go string literal |
| // representing s. The returned string s uses Go escape |
| // sequences (\t, \n, \xFF, \u0100) for control characters |
| // and non-ASCII characters. |
| func Quote(s string) string { |
| var buf bytes.Buffer |
| buf.WriteByte('"') |
| for ; len(s) > 0; s = s[1:] { |
| switch c := s[0]; { |
| case c == '"': |
| buf.WriteString(`\"`) |
| case c == '\\': |
| buf.WriteString(`\\`) |
| case ' ' <= c && c <= '~': |
| buf.WriteString(string(c)) |
| case c == '\a': |
| buf.WriteString(`\a`) |
| case c == '\b': |
| buf.WriteString(`\b`) |
| case c == '\f': |
| buf.WriteString(`\f`) |
| case c == '\n': |
| buf.WriteString(`\n`) |
| case c == '\r': |
| buf.WriteString(`\r`) |
| case c == '\t': |
| buf.WriteString(`\t`) |
| case c == '\v': |
| buf.WriteString(`\v`) |
| |
| case c >= utf8.RuneSelf && utf8.FullRuneInString(s): |
| r, size := utf8.DecodeRuneInString(s) |
| if r == utf8.RuneError && size == 1 { |
| goto EscX |
| } |
| s = s[size-1:] // next iteration will slice off 1 more |
| if r < 0x10000 { |
| buf.WriteString(`\u`) |
| for j := uint(0); j < 4; j++ { |
| buf.WriteByte(lowerhex[(r>>(12-4*j))&0xF]) |
| } |
| } else { |
| buf.WriteString(`\U`) |
| for j := uint(0); j < 8; j++ { |
| buf.WriteByte(lowerhex[(r>>(28-4*j))&0xF]) |
| } |
| } |
| |
| default: |
| EscX: |
| buf.WriteString(`\x`) |
| buf.WriteByte(lowerhex[c>>4]) |
| buf.WriteByte(lowerhex[c&0xF]) |
| } |
| } |
| buf.WriteByte('"') |
| return buf.String() |
| } |
| |
| // CanBackquote returns whether the string s would be |
| // a valid Go string literal if enclosed in backquotes. |
| func CanBackquote(s string) bool { |
| for i := 0; i < len(s); i++ { |
| if (s[i] < ' ' && s[i] != '\t') || s[i] == '`' { |
| return false |
| } |
| } |
| return true |
| } |
| |
| func unhex(b byte) (v int, ok bool) { |
| c := int(b) |
| switch { |
| case '0' <= c && c <= '9': |
| return c - '0', true |
| case 'a' <= c && c <= 'f': |
| return c - 'a' + 10, true |
| case 'A' <= c && c <= 'F': |
| return c - 'A' + 10, true |
| } |
| return |
| } |
| |
| // UnquoteChar decodes the first character or byte in the escaped string |
| // or character literal represented by the string s. |
| // It returns four values: |
| // |
| // 1) value, the decoded Unicode code point or byte value; |
| // 2) multibyte, a boolean indicating whether the decoded character requires a multibyte UTF-8 representation; |
| // 3) tail, the remainder of the string after the character; and |
| // 4) an error that will be nil if the character is syntactically valid. |
| // |
| // The second argument, quote, specifies the type of literal being parsed |
| // and therefore which escaped quote character is permitted. |
| // If set to a single quote, it permits the sequence \' and disallows unescaped '. |
| // If set to a double quote, it permits \" and disallows unescaped ". |
| // If set to zero, it does not permit either escape and allows both quote characters to appear unescaped. |
| func UnquoteChar(s string, quote byte) (value int, multibyte bool, tail string, err os.Error) { |
| // easy cases |
| switch c := s[0]; { |
| case c == quote && (quote == '\'' || quote == '"'): |
| err = os.EINVAL |
| return |
| case c >= utf8.RuneSelf: |
| r, size := utf8.DecodeRuneInString(s) |
| return r, true, s[size:], nil |
| case c != '\\': |
| return int(s[0]), false, s[1:], nil |
| } |
| |
| // hard case: c is backslash |
| if len(s) <= 1 { |
| err = os.EINVAL |
| return |
| } |
| c := s[1] |
| s = s[2:] |
| |
| switch c { |
| case 'a': |
| value = '\a' |
| case 'b': |
| value = '\b' |
| case 'f': |
| value = '\f' |
| case 'n': |
| value = '\n' |
| case 'r': |
| value = '\r' |
| case 't': |
| value = '\t' |
| case 'v': |
| value = '\v' |
| case 'x', 'u', 'U': |
| n := 0 |
| switch c { |
| case 'x': |
| n = 2 |
| case 'u': |
| n = 4 |
| case 'U': |
| n = 8 |
| } |
| v := 0 |
| if len(s) < n { |
| err = os.EINVAL |
| return |
| } |
| for j := 0; j < n; j++ { |
| x, ok := unhex(s[j]) |
| if !ok { |
| err = os.EINVAL |
| return |
| } |
| v = v<<4 | x |
| } |
| s = s[n:] |
| if c == 'x' { |
| // single-byte string, possibly not UTF-8 |
| value = v |
| break |
| } |
| if v > unicode.MaxRune { |
| err = os.EINVAL |
| return |
| } |
| value = v |
| multibyte = true |
| case '0', '1', '2', '3', '4', '5', '6', '7': |
| v := int(c) - '0' |
| if len(s) < 2 { |
| err = os.EINVAL |
| return |
| } |
| for j := 0; j < 2; j++ { // one digit already; two more |
| x := int(s[j]) - '0' |
| if x < 0 || x > 7 { |
| return |
| } |
| v = (v << 3) | x |
| } |
| s = s[2:] |
| if v > 255 { |
| err = os.EINVAL |
| return |
| } |
| value = v |
| case '\\': |
| value = '\\' |
| case '\'', '"': |
| if c != quote { |
| err = os.EINVAL |
| return |
| } |
| value = int(c) |
| default: |
| err = os.EINVAL |
| return |
| } |
| tail = s |
| return |
| } |
| |
| // Unquote interprets s as a single-quoted, double-quoted, |
| // or backquoted Go string literal, returning the string value |
| // that s quotes. (If s is single-quoted, it would be a Go |
| // character literal; Unquote returns the corresponding |
| // one-character string.) |
| func Unquote(s string) (t string, err os.Error) { |
| n := len(s) |
| if n < 2 { |
| return "", os.EINVAL |
| } |
| quote := s[0] |
| if quote != s[n-1] { |
| return "", os.EINVAL |
| } |
| s = s[1 : n-1] |
| |
| if quote == '`' { |
| if strings.Contains(s, "`") { |
| return "", os.EINVAL |
| } |
| return s, nil |
| } |
| if quote != '"' && quote != '\'' { |
| return "", os.EINVAL |
| } |
| |
| var buf bytes.Buffer |
| for len(s) > 0 { |
| c, multibyte, ss, err := UnquoteChar(s, quote) |
| if err != nil { |
| return "", err |
| } |
| s = ss |
| if c < utf8.RuneSelf || !multibyte { |
| buf.WriteByte(byte(c)) |
| } else { |
| buf.WriteString(string(c)) |
| } |
| if quote == '\'' && len(s) != 0 { |
| // single-quoted must be single character |
| return "", os.EINVAL |
| } |
| } |
| return buf.String(), nil |
| } |