Russ Cox | 387df5e | 2008-11-24 14:51:33 -0800 | [diff] [blame] | 1 | // Copyright 2009 The Go Authors. All rights reserved. |
| 2 | // Use of this source code is governed by a BSD-style |
| 3 | // license that can be found in the LICENSE file. |
| 4 | |
ChaiShushan | 5db510f | 2014-08-28 14:55:02 -0700 | [diff] [blame] | 5 | //go:generate go run makeisprint.go -output isprint.go |
| 6 | |
Russ Cox | 387df5e | 2008-11-24 14:51:33 -0800 | [diff] [blame] | 7 | package strconv |
| 8 | |
| 9 | import ( |
Rob Pike | 45e3bcb | 2011-11-08 15:41:54 -0800 | [diff] [blame] | 10 | "unicode/utf8" |
Russ Cox | 387df5e | 2008-11-24 14:51:33 -0800 | [diff] [blame] | 11 | ) |
| 12 | |
Russ Cox | b54133d | 2009-01-15 16:16:42 -0800 | [diff] [blame] | 13 | const lowerhex = "0123456789abcdef" |
Russ Cox | 387df5e | 2008-11-24 14:51:33 -0800 | [diff] [blame] | 14 | |
Rob Pike | f2f3b8f | 2011-06-07 12:23:08 +0000 | [diff] [blame] | 15 | func quoteWith(s string, quote byte, ASCIIonly bool) string { |
Rob Pike | eab4261 | 2012-03-06 15:25:42 +1100 | [diff] [blame] | 16 | var runeTmp [utf8.UTFMax]byte |
| 17 | buf := make([]byte, 0, 3*len(s)/2) // Try to avoid more allocations. |
| 18 | buf = append(buf, quote) |
Rob Pike | f2f3b8f | 2011-06-07 12:23:08 +0000 | [diff] [blame] | 19 | for width := 0; len(s) > 0; s = s[width:] { |
Russ Cox | b50a847 | 2011-10-25 22:23:54 -0700 | [diff] [blame] | 20 | r := rune(s[0]) |
Rob Pike | f2f3b8f | 2011-06-07 12:23:08 +0000 | [diff] [blame] | 21 | width = 1 |
Russ Cox | b50a847 | 2011-10-25 22:23:54 -0700 | [diff] [blame] | 22 | if r >= utf8.RuneSelf { |
| 23 | r, width = utf8.DecodeRuneInString(s) |
Rob Pike | f2f3b8f | 2011-06-07 12:23:08 +0000 | [diff] [blame] | 24 | } |
Russ Cox | b50a847 | 2011-10-25 22:23:54 -0700 | [diff] [blame] | 25 | if width == 1 && r == utf8.RuneError { |
Rob Pike | eab4261 | 2012-03-06 15:25:42 +1100 | [diff] [blame] | 26 | buf = append(buf, `\x`...) |
| 27 | buf = append(buf, lowerhex[s[0]>>4]) |
| 28 | buf = append(buf, lowerhex[s[0]&0xF]) |
Russ Cox | 21e75da | 2011-06-17 06:07:13 -0400 | [diff] [blame] | 29 | continue |
Rob Pike | f2f3b8f | 2011-06-07 12:23:08 +0000 | [diff] [blame] | 30 | } |
Russ Cox | b50a847 | 2011-10-25 22:23:54 -0700 | [diff] [blame] | 31 | if r == rune(quote) || r == '\\' { // always backslashed |
Rob Pike | eab4261 | 2012-03-06 15:25:42 +1100 | [diff] [blame] | 32 | buf = append(buf, '\\') |
| 33 | buf = append(buf, byte(r)) |
Rob Pike | f2f3b8f | 2011-06-07 12:23:08 +0000 | [diff] [blame] | 34 | continue |
| 35 | } |
| 36 | if ASCIIonly { |
Rob Pike | f91326b | 2012-03-07 13:50:31 +1100 | [diff] [blame] | 37 | if r < utf8.RuneSelf && IsPrint(r) { |
Rob Pike | eab4261 | 2012-03-06 15:25:42 +1100 | [diff] [blame] | 38 | buf = append(buf, byte(r)) |
Rob Pike | f2f3b8f | 2011-06-07 12:23:08 +0000 | [diff] [blame] | 39 | continue |
| 40 | } |
Rob Pike | f91326b | 2012-03-07 13:50:31 +1100 | [diff] [blame] | 41 | } else if IsPrint(r) { |
Rob Pike | eab4261 | 2012-03-06 15:25:42 +1100 | [diff] [blame] | 42 | n := utf8.EncodeRune(runeTmp[:], r) |
| 43 | buf = append(buf, runeTmp[:n]...) |
Rob Pike | f2f3b8f | 2011-06-07 12:23:08 +0000 | [diff] [blame] | 44 | continue |
| 45 | } |
Russ Cox | b50a847 | 2011-10-25 22:23:54 -0700 | [diff] [blame] | 46 | switch r { |
Rob Pike | f2f3b8f | 2011-06-07 12:23:08 +0000 | [diff] [blame] | 47 | case '\a': |
Rob Pike | eab4261 | 2012-03-06 15:25:42 +1100 | [diff] [blame] | 48 | buf = append(buf, `\a`...) |
Rob Pike | f2f3b8f | 2011-06-07 12:23:08 +0000 | [diff] [blame] | 49 | case '\b': |
Rob Pike | eab4261 | 2012-03-06 15:25:42 +1100 | [diff] [blame] | 50 | buf = append(buf, `\b`...) |
Rob Pike | f2f3b8f | 2011-06-07 12:23:08 +0000 | [diff] [blame] | 51 | case '\f': |
Rob Pike | eab4261 | 2012-03-06 15:25:42 +1100 | [diff] [blame] | 52 | buf = append(buf, `\f`...) |
Rob Pike | f2f3b8f | 2011-06-07 12:23:08 +0000 | [diff] [blame] | 53 | case '\n': |
Rob Pike | eab4261 | 2012-03-06 15:25:42 +1100 | [diff] [blame] | 54 | buf = append(buf, `\n`...) |
Rob Pike | f2f3b8f | 2011-06-07 12:23:08 +0000 | [diff] [blame] | 55 | case '\r': |
Rob Pike | eab4261 | 2012-03-06 15:25:42 +1100 | [diff] [blame] | 56 | buf = append(buf, `\r`...) |
Rob Pike | f2f3b8f | 2011-06-07 12:23:08 +0000 | [diff] [blame] | 57 | case '\t': |
Rob Pike | eab4261 | 2012-03-06 15:25:42 +1100 | [diff] [blame] | 58 | buf = append(buf, `\t`...) |
Rob Pike | f2f3b8f | 2011-06-07 12:23:08 +0000 | [diff] [blame] | 59 | case '\v': |
Rob Pike | eab4261 | 2012-03-06 15:25:42 +1100 | [diff] [blame] | 60 | buf = append(buf, `\v`...) |
Russ Cox | 387df5e | 2008-11-24 14:51:33 -0800 | [diff] [blame] | 61 | default: |
Rob Pike | f2f3b8f | 2011-06-07 12:23:08 +0000 | [diff] [blame] | 62 | switch { |
Russ Cox | b50a847 | 2011-10-25 22:23:54 -0700 | [diff] [blame] | 63 | case r < ' ': |
Rob Pike | eab4261 | 2012-03-06 15:25:42 +1100 | [diff] [blame] | 64 | buf = append(buf, `\x`...) |
| 65 | buf = append(buf, lowerhex[s[0]>>4]) |
| 66 | buf = append(buf, lowerhex[s[0]&0xF]) |
Rob Pike | f91326b | 2012-03-07 13:50:31 +1100 | [diff] [blame] | 67 | case r > utf8.MaxRune: |
Russ Cox | b50a847 | 2011-10-25 22:23:54 -0700 | [diff] [blame] | 68 | r = 0xFFFD |
Rob Pike | f2f3b8f | 2011-06-07 12:23:08 +0000 | [diff] [blame] | 69 | fallthrough |
Russ Cox | b50a847 | 2011-10-25 22:23:54 -0700 | [diff] [blame] | 70 | case r < 0x10000: |
Rob Pike | eab4261 | 2012-03-06 15:25:42 +1100 | [diff] [blame] | 71 | buf = append(buf, `\u`...) |
Rob Pike | f2f3b8f | 2011-06-07 12:23:08 +0000 | [diff] [blame] | 72 | for s := 12; s >= 0; s -= 4 { |
Rob Pike | eab4261 | 2012-03-06 15:25:42 +1100 | [diff] [blame] | 73 | buf = append(buf, lowerhex[r>>uint(s)&0xF]) |
Rob Pike | f2f3b8f | 2011-06-07 12:23:08 +0000 | [diff] [blame] | 74 | } |
| 75 | default: |
Rob Pike | eab4261 | 2012-03-06 15:25:42 +1100 | [diff] [blame] | 76 | buf = append(buf, `\U`...) |
Rob Pike | f2f3b8f | 2011-06-07 12:23:08 +0000 | [diff] [blame] | 77 | for s := 28; s >= 0; s -= 4 { |
Rob Pike | eab4261 | 2012-03-06 15:25:42 +1100 | [diff] [blame] | 78 | buf = append(buf, lowerhex[r>>uint(s)&0xF]) |
Rob Pike | f2f3b8f | 2011-06-07 12:23:08 +0000 | [diff] [blame] | 79 | } |
| 80 | } |
Russ Cox | 387df5e | 2008-11-24 14:51:33 -0800 | [diff] [blame] | 81 | } |
| 82 | } |
Rob Pike | eab4261 | 2012-03-06 15:25:42 +1100 | [diff] [blame] | 83 | buf = append(buf, quote) |
| 84 | return string(buf) |
Rob Pike | c4918db | 2011-05-25 15:04:07 +1000 | [diff] [blame] | 85 | |
| 86 | } |
| 87 | |
Rob Pike | f2f3b8f | 2011-06-07 12:23:08 +0000 | [diff] [blame] | 88 | // Quote returns a double-quoted Go string literal representing s. The |
| 89 | // returned string uses Go escape sequences (\t, \n, \xFF, \u0100) for |
| 90 | // control characters and non-printable characters as defined by |
Rob Pike | f91326b | 2012-03-07 13:50:31 +1100 | [diff] [blame] | 91 | // IsPrint. |
Rob Pike | c4918db | 2011-05-25 15:04:07 +1000 | [diff] [blame] | 92 | func Quote(s string) string { |
Rob Pike | f2f3b8f | 2011-06-07 12:23:08 +0000 | [diff] [blame] | 93 | return quoteWith(s, '"', false) |
Rob Pike | c4918db | 2011-05-25 15:04:07 +1000 | [diff] [blame] | 94 | } |
| 95 | |
Russ Cox | efbeaed | 2011-12-05 15:48:21 -0500 | [diff] [blame] | 96 | // AppendQuote appends a double-quoted Go string literal representing s, |
| 97 | // as generated by Quote, to dst and returns the extended buffer. |
| 98 | func AppendQuote(dst []byte, s string) []byte { |
| 99 | return append(dst, Quote(s)...) |
| 100 | } |
| 101 | |
Rob Pike | f2f3b8f | 2011-06-07 12:23:08 +0000 | [diff] [blame] | 102 | // QuoteToASCII returns a double-quoted Go string literal representing s. |
| 103 | // The returned string uses Go escape sequences (\t, \n, \xFF, \u0100) for |
Rob Pike | f91326b | 2012-03-07 13:50:31 +1100 | [diff] [blame] | 104 | // non-ASCII characters and non-printable characters as defined by IsPrint. |
Rob Pike | f2f3b8f | 2011-06-07 12:23:08 +0000 | [diff] [blame] | 105 | func QuoteToASCII(s string) string { |
| 106 | return quoteWith(s, '"', true) |
| 107 | } |
| 108 | |
Russ Cox | efbeaed | 2011-12-05 15:48:21 -0500 | [diff] [blame] | 109 | // AppendQuoteToASCII appends a double-quoted Go string literal representing s, |
| 110 | // as generated by QuoteToASCII, to dst and returns the extended buffer. |
| 111 | func AppendQuoteToASCII(dst []byte, s string) []byte { |
| 112 | return append(dst, QuoteToASCII(s)...) |
| 113 | } |
| 114 | |
Rob Pike | f2f3b8f | 2011-06-07 12:23:08 +0000 | [diff] [blame] | 115 | // QuoteRune returns a single-quoted Go character literal representing the |
| 116 | // rune. The returned string uses Go escape sequences (\t, \n, \xFF, \u0100) |
Rob Pike | f91326b | 2012-03-07 13:50:31 +1100 | [diff] [blame] | 117 | // for control characters and non-printable characters as defined by IsPrint. |
Rob Pike | 38df045 | 2011-12-13 11:13:23 -0800 | [diff] [blame] | 118 | func QuoteRune(r rune) string { |
Rob Pike | c4918db | 2011-05-25 15:04:07 +1000 | [diff] [blame] | 119 | // TODO: avoid the allocation here. |
Rob Pike | 38df045 | 2011-12-13 11:13:23 -0800 | [diff] [blame] | 120 | return quoteWith(string(r), '\'', false) |
Rob Pike | f2f3b8f | 2011-06-07 12:23:08 +0000 | [diff] [blame] | 121 | } |
| 122 | |
Russ Cox | efbeaed | 2011-12-05 15:48:21 -0500 | [diff] [blame] | 123 | // AppendQuoteRune appends a single-quoted Go character literal representing the rune, |
| 124 | // as generated by QuoteRune, to dst and returns the extended buffer. |
Rob Pike | 38df045 | 2011-12-13 11:13:23 -0800 | [diff] [blame] | 125 | func AppendQuoteRune(dst []byte, r rune) []byte { |
| 126 | return append(dst, QuoteRune(r)...) |
Russ Cox | efbeaed | 2011-12-05 15:48:21 -0500 | [diff] [blame] | 127 | } |
| 128 | |
Rob Pike | f2f3b8f | 2011-06-07 12:23:08 +0000 | [diff] [blame] | 129 | // QuoteRuneToASCII returns a single-quoted Go character literal representing |
| 130 | // the rune. The returned string uses Go escape sequences (\t, \n, \xFF, |
| 131 | // \u0100) for non-ASCII characters and non-printable characters as defined |
Rob Pike | f91326b | 2012-03-07 13:50:31 +1100 | [diff] [blame] | 132 | // by IsPrint. |
Rob Pike | 38df045 | 2011-12-13 11:13:23 -0800 | [diff] [blame] | 133 | func QuoteRuneToASCII(r rune) string { |
Rob Pike | f2f3b8f | 2011-06-07 12:23:08 +0000 | [diff] [blame] | 134 | // TODO: avoid the allocation here. |
Rob Pike | 38df045 | 2011-12-13 11:13:23 -0800 | [diff] [blame] | 135 | return quoteWith(string(r), '\'', true) |
Russ Cox | 387df5e | 2008-11-24 14:51:33 -0800 | [diff] [blame] | 136 | } |
| 137 | |
ChaiShushan | 418e2f6 | 2013-08-10 11:38:42 +1000 | [diff] [blame] | 138 | // AppendQuoteRuneToASCII appends a single-quoted Go character literal representing the rune, |
Russ Cox | efbeaed | 2011-12-05 15:48:21 -0500 | [diff] [blame] | 139 | // as generated by QuoteRuneToASCII, to dst and returns the extended buffer. |
Rob Pike | 38df045 | 2011-12-13 11:13:23 -0800 | [diff] [blame] | 140 | func AppendQuoteRuneToASCII(dst []byte, r rune) []byte { |
| 141 | return append(dst, QuoteRuneToASCII(r)...) |
Russ Cox | efbeaed | 2011-12-05 15:48:21 -0500 | [diff] [blame] | 142 | } |
| 143 | |
Shenghou Ma | 4692711 | 2013-02-26 06:33:59 +0800 | [diff] [blame] | 144 | // CanBackquote reports whether the string s can be represented |
| 145 | // unchanged as a single-line backquoted string without control |
Rob Pike | b6571a0 | 2014-09-09 11:45:36 -0700 | [diff] [blame] | 146 | // characters other than tab. |
Russ Cox | 839a684 | 2009-01-20 14:40:40 -0800 | [diff] [blame] | 147 | func CanBackquote(s string) bool { |
Volker Dobler | c0a824a | 2014-07-14 19:49:26 -0700 | [diff] [blame] | 148 | for len(s) > 0 { |
| 149 | r, wid := utf8.DecodeRuneInString(s) |
| 150 | s = s[wid:] |
| 151 | if wid > 1 { |
Volker Dobler | 3b1b840 | 2014-07-16 13:06:11 -0700 | [diff] [blame] | 152 | if r == '\ufeff' { |
| 153 | return false // BOMs are invisible and should not be quoted. |
| 154 | } |
| 155 | continue // All other multibyte runes are correctly encoded and assumed printable. |
Volker Dobler | c0a824a | 2014-07-14 19:49:26 -0700 | [diff] [blame] | 156 | } |
| 157 | if r == utf8.RuneError { |
| 158 | return false |
| 159 | } |
| 160 | if (r < ' ' && r != '\t') || r == '`' || r == '\u007F' { |
Robert Griesemer | 40621d5 | 2009-11-09 12:07:39 -0800 | [diff] [blame] | 161 | return false |
Russ Cox | 387df5e | 2008-11-24 14:51:33 -0800 | [diff] [blame] | 162 | } |
| 163 | } |
Robert Griesemer | d65a5cc | 2009-12-15 15:40:16 -0800 | [diff] [blame] | 164 | return true |
Russ Cox | 387df5e | 2008-11-24 14:51:33 -0800 | [diff] [blame] | 165 | } |
| 166 | |
Russ Cox | b50a847 | 2011-10-25 22:23:54 -0700 | [diff] [blame] | 167 | func unhex(b byte) (v rune, ok bool) { |
| 168 | c := rune(b) |
Russ Cox | 73aadff | 2009-04-13 13:27:39 -0700 | [diff] [blame] | 169 | switch { |
| 170 | case '0' <= c && c <= '9': |
Robert Griesemer | 3bb0032 | 2009-11-09 21:23:52 -0800 | [diff] [blame] | 171 | return c - '0', true |
Russ Cox | 73aadff | 2009-04-13 13:27:39 -0700 | [diff] [blame] | 172 | case 'a' <= c && c <= 'f': |
Robert Griesemer | 3bb0032 | 2009-11-09 21:23:52 -0800 | [diff] [blame] | 173 | return c - 'a' + 10, true |
Russ Cox | 73aadff | 2009-04-13 13:27:39 -0700 | [diff] [blame] | 174 | case 'A' <= c && c <= 'F': |
Robert Griesemer | 3bb0032 | 2009-11-09 21:23:52 -0800 | [diff] [blame] | 175 | return c - 'A' + 10, true |
Russ Cox | 73aadff | 2009-04-13 13:27:39 -0700 | [diff] [blame] | 176 | } |
Robert Griesemer | d65a5cc | 2009-12-15 15:40:16 -0800 | [diff] [blame] | 177 | return |
Russ Cox | 73aadff | 2009-04-13 13:27:39 -0700 | [diff] [blame] | 178 | } |
| 179 | |
Russ Cox | 0d77947 | 2009-06-23 16:44:01 -0700 | [diff] [blame] | 180 | // UnquoteChar decodes the first character or byte in the escaped string |
| 181 | // or character literal represented by the string s. |
Russ Cox | 9154943 | 2009-10-07 11:55:06 -0700 | [diff] [blame] | 182 | // It returns four values: |
Robert Griesemer | 3ad995e | 2010-08-31 14:18:20 -0700 | [diff] [blame] | 183 | // |
| 184 | // 1) value, the decoded Unicode code point or byte value; |
| 185 | // 2) multibyte, a boolean indicating whether the decoded character requires a multibyte UTF-8 representation; |
| 186 | // 3) tail, the remainder of the string after the character; and |
| 187 | // 4) an error that will be nil if the character is syntactically valid. |
| 188 | // |
Russ Cox | 0d77947 | 2009-06-23 16:44:01 -0700 | [diff] [blame] | 189 | // The second argument, quote, specifies the type of literal being parsed |
| 190 | // and therefore which escaped quote character is permitted. |
| 191 | // If set to a single quote, it permits the sequence \' and disallows unescaped '. |
| 192 | // If set to a double quote, it permits \" and disallows unescaped ". |
| 193 | // If set to zero, it does not permit either escape and allows both quote characters to appear unescaped. |
Russ Cox | eb69292 | 2011-11-01 22:05:34 -0400 | [diff] [blame] | 194 | func UnquoteChar(s string, quote byte) (value rune, multibyte bool, tail string, err error) { |
Russ Cox | 73aadff | 2009-04-13 13:27:39 -0700 | [diff] [blame] | 195 | // easy cases |
Russ Cox | 3619f1e | 2009-05-11 14:10:34 -0700 | [diff] [blame] | 196 | switch c := s[0]; { |
Russ Cox | 0d77947 | 2009-06-23 16:44:01 -0700 | [diff] [blame] | 197 | case c == quote && (quote == '\'' || quote == '"'): |
Russ Cox | c1178aa | 2011-10-27 19:46:31 -0700 | [diff] [blame] | 198 | err = ErrSyntax |
Robert Griesemer | d65a5cc | 2009-12-15 15:40:16 -0800 | [diff] [blame] | 199 | return |
Russ Cox | 73aadff | 2009-04-13 13:27:39 -0700 | [diff] [blame] | 200 | case c >= utf8.RuneSelf: |
Robert Griesemer | d65a5cc | 2009-12-15 15:40:16 -0800 | [diff] [blame] | 201 | r, size := utf8.DecodeRuneInString(s) |
| 202 | return r, true, s[size:], nil |
Russ Cox | 73aadff | 2009-04-13 13:27:39 -0700 | [diff] [blame] | 203 | case c != '\\': |
Russ Cox | b50a847 | 2011-10-25 22:23:54 -0700 | [diff] [blame] | 204 | return rune(s[0]), false, s[1:], nil |
Russ Cox | 73aadff | 2009-04-13 13:27:39 -0700 | [diff] [blame] | 205 | } |
| 206 | |
| 207 | // hard case: c is backslash |
Russ Cox | 3619f1e | 2009-05-11 14:10:34 -0700 | [diff] [blame] | 208 | if len(s) <= 1 { |
Russ Cox | c1178aa | 2011-10-27 19:46:31 -0700 | [diff] [blame] | 209 | err = ErrSyntax |
Robert Griesemer | d65a5cc | 2009-12-15 15:40:16 -0800 | [diff] [blame] | 210 | return |
Russ Cox | 73aadff | 2009-04-13 13:27:39 -0700 | [diff] [blame] | 211 | } |
Robert Griesemer | d65a5cc | 2009-12-15 15:40:16 -0800 | [diff] [blame] | 212 | c := s[1] |
| 213 | s = s[2:] |
Russ Cox | 73aadff | 2009-04-13 13:27:39 -0700 | [diff] [blame] | 214 | |
| 215 | switch c { |
| 216 | case 'a': |
Robert Griesemer | 40621d5 | 2009-11-09 12:07:39 -0800 | [diff] [blame] | 217 | value = '\a' |
Russ Cox | 73aadff | 2009-04-13 13:27:39 -0700 | [diff] [blame] | 218 | case 'b': |
Robert Griesemer | 40621d5 | 2009-11-09 12:07:39 -0800 | [diff] [blame] | 219 | value = '\b' |
Russ Cox | 73aadff | 2009-04-13 13:27:39 -0700 | [diff] [blame] | 220 | case 'f': |
Robert Griesemer | 40621d5 | 2009-11-09 12:07:39 -0800 | [diff] [blame] | 221 | value = '\f' |
Russ Cox | 73aadff | 2009-04-13 13:27:39 -0700 | [diff] [blame] | 222 | case 'n': |
Robert Griesemer | 40621d5 | 2009-11-09 12:07:39 -0800 | [diff] [blame] | 223 | value = '\n' |
Russ Cox | 73aadff | 2009-04-13 13:27:39 -0700 | [diff] [blame] | 224 | case 'r': |
Robert Griesemer | 40621d5 | 2009-11-09 12:07:39 -0800 | [diff] [blame] | 225 | value = '\r' |
Russ Cox | 73aadff | 2009-04-13 13:27:39 -0700 | [diff] [blame] | 226 | case 't': |
Robert Griesemer | 40621d5 | 2009-11-09 12:07:39 -0800 | [diff] [blame] | 227 | value = '\t' |
Russ Cox | 73aadff | 2009-04-13 13:27:39 -0700 | [diff] [blame] | 228 | case 'v': |
Robert Griesemer | 40621d5 | 2009-11-09 12:07:39 -0800 | [diff] [blame] | 229 | value = '\v' |
Russ Cox | 73aadff | 2009-04-13 13:27:39 -0700 | [diff] [blame] | 230 | case 'x', 'u', 'U': |
Robert Griesemer | d65a5cc | 2009-12-15 15:40:16 -0800 | [diff] [blame] | 231 | n := 0 |
Russ Cox | 73aadff | 2009-04-13 13:27:39 -0700 | [diff] [blame] | 232 | switch c { |
| 233 | case 'x': |
Robert Griesemer | 40621d5 | 2009-11-09 12:07:39 -0800 | [diff] [blame] | 234 | n = 2 |
Russ Cox | 73aadff | 2009-04-13 13:27:39 -0700 | [diff] [blame] | 235 | case 'u': |
Robert Griesemer | 40621d5 | 2009-11-09 12:07:39 -0800 | [diff] [blame] | 236 | n = 4 |
Russ Cox | 73aadff | 2009-04-13 13:27:39 -0700 | [diff] [blame] | 237 | case 'U': |
Robert Griesemer | 40621d5 | 2009-11-09 12:07:39 -0800 | [diff] [blame] | 238 | n = 8 |
Russ Cox | 73aadff | 2009-04-13 13:27:39 -0700 | [diff] [blame] | 239 | } |
Russ Cox | b50a847 | 2011-10-25 22:23:54 -0700 | [diff] [blame] | 240 | var v rune |
Russ Cox | 3619f1e | 2009-05-11 14:10:34 -0700 | [diff] [blame] | 241 | if len(s) < n { |
Russ Cox | c1178aa | 2011-10-27 19:46:31 -0700 | [diff] [blame] | 242 | err = ErrSyntax |
Robert Griesemer | d65a5cc | 2009-12-15 15:40:16 -0800 | [diff] [blame] | 243 | return |
Russ Cox | 3619f1e | 2009-05-11 14:10:34 -0700 | [diff] [blame] | 244 | } |
Russ Cox | 73aadff | 2009-04-13 13:27:39 -0700 | [diff] [blame] | 245 | for j := 0; j < n; j++ { |
Robert Griesemer | d65a5cc | 2009-12-15 15:40:16 -0800 | [diff] [blame] | 246 | x, ok := unhex(s[j]) |
Russ Cox | 73aadff | 2009-04-13 13:27:39 -0700 | [diff] [blame] | 247 | if !ok { |
Russ Cox | c1178aa | 2011-10-27 19:46:31 -0700 | [diff] [blame] | 248 | err = ErrSyntax |
Robert Griesemer | d65a5cc | 2009-12-15 15:40:16 -0800 | [diff] [blame] | 249 | return |
Russ Cox | 73aadff | 2009-04-13 13:27:39 -0700 | [diff] [blame] | 250 | } |
Robert Griesemer | d65a5cc | 2009-12-15 15:40:16 -0800 | [diff] [blame] | 251 | v = v<<4 | x |
Russ Cox | 73aadff | 2009-04-13 13:27:39 -0700 | [diff] [blame] | 252 | } |
Robert Griesemer | d65a5cc | 2009-12-15 15:40:16 -0800 | [diff] [blame] | 253 | s = s[n:] |
Russ Cox | 73aadff | 2009-04-13 13:27:39 -0700 | [diff] [blame] | 254 | if c == 'x' { |
Russ Cox | 3619f1e | 2009-05-11 14:10:34 -0700 | [diff] [blame] | 255 | // single-byte string, possibly not UTF-8 |
Robert Griesemer | d65a5cc | 2009-12-15 15:40:16 -0800 | [diff] [blame] | 256 | value = v |
| 257 | break |
Russ Cox | 73aadff | 2009-04-13 13:27:39 -0700 | [diff] [blame] | 258 | } |
Rob Pike | f91326b | 2012-03-07 13:50:31 +1100 | [diff] [blame] | 259 | if v > utf8.MaxRune { |
Russ Cox | c1178aa | 2011-10-27 19:46:31 -0700 | [diff] [blame] | 260 | err = ErrSyntax |
Robert Griesemer | d65a5cc | 2009-12-15 15:40:16 -0800 | [diff] [blame] | 261 | return |
Russ Cox | 73aadff | 2009-04-13 13:27:39 -0700 | [diff] [blame] | 262 | } |
Robert Griesemer | d65a5cc | 2009-12-15 15:40:16 -0800 | [diff] [blame] | 263 | value = v |
| 264 | multibyte = true |
Russ Cox | 73aadff | 2009-04-13 13:27:39 -0700 | [diff] [blame] | 265 | case '0', '1', '2', '3', '4', '5', '6', '7': |
Russ Cox | b50a847 | 2011-10-25 22:23:54 -0700 | [diff] [blame] | 266 | v := rune(c) - '0' |
Russ Cox | 3619f1e | 2009-05-11 14:10:34 -0700 | [diff] [blame] | 267 | if len(s) < 2 { |
Russ Cox | c1178aa | 2011-10-27 19:46:31 -0700 | [diff] [blame] | 268 | err = ErrSyntax |
Robert Griesemer | d65a5cc | 2009-12-15 15:40:16 -0800 | [diff] [blame] | 269 | return |
Russ Cox | 3619f1e | 2009-05-11 14:10:34 -0700 | [diff] [blame] | 270 | } |
Robert Griesemer | d65a5cc | 2009-12-15 15:40:16 -0800 | [diff] [blame] | 271 | for j := 0; j < 2; j++ { // one digit already; two more |
Russ Cox | b50a847 | 2011-10-25 22:23:54 -0700 | [diff] [blame] | 272 | x := rune(s[j]) - '0' |
Russ Cox | 73aadff | 2009-04-13 13:27:39 -0700 | [diff] [blame] | 273 | if x < 0 || x > 7 { |
Sameer Ajmani | cbf4f4b | 2012-01-09 19:55:18 -0500 | [diff] [blame] | 274 | err = ErrSyntax |
Robert Griesemer | 40621d5 | 2009-11-09 12:07:39 -0800 | [diff] [blame] | 275 | return |
Russ Cox | 73aadff | 2009-04-13 13:27:39 -0700 | [diff] [blame] | 276 | } |
Robert Griesemer | d65a5cc | 2009-12-15 15:40:16 -0800 | [diff] [blame] | 277 | v = (v << 3) | x |
Russ Cox | 73aadff | 2009-04-13 13:27:39 -0700 | [diff] [blame] | 278 | } |
Robert Griesemer | d65a5cc | 2009-12-15 15:40:16 -0800 | [diff] [blame] | 279 | s = s[2:] |
Russ Cox | 73aadff | 2009-04-13 13:27:39 -0700 | [diff] [blame] | 280 | if v > 255 { |
Russ Cox | c1178aa | 2011-10-27 19:46:31 -0700 | [diff] [blame] | 281 | err = ErrSyntax |
Robert Griesemer | d65a5cc | 2009-12-15 15:40:16 -0800 | [diff] [blame] | 282 | return |
Russ Cox | 73aadff | 2009-04-13 13:27:39 -0700 | [diff] [blame] | 283 | } |
Robert Griesemer | d65a5cc | 2009-12-15 15:40:16 -0800 | [diff] [blame] | 284 | value = v |
Russ Cox | 0d77947 | 2009-06-23 16:44:01 -0700 | [diff] [blame] | 285 | case '\\': |
Robert Griesemer | 40621d5 | 2009-11-09 12:07:39 -0800 | [diff] [blame] | 286 | value = '\\' |
Russ Cox | 0d77947 | 2009-06-23 16:44:01 -0700 | [diff] [blame] | 287 | case '\'', '"': |
| 288 | if c != quote { |
Russ Cox | c1178aa | 2011-10-27 19:46:31 -0700 | [diff] [blame] | 289 | err = ErrSyntax |
Robert Griesemer | d65a5cc | 2009-12-15 15:40:16 -0800 | [diff] [blame] | 290 | return |
Russ Cox | 0d77947 | 2009-06-23 16:44:01 -0700 | [diff] [blame] | 291 | } |
Russ Cox | b50a847 | 2011-10-25 22:23:54 -0700 | [diff] [blame] | 292 | value = rune(c) |
Russ Cox | 0d77947 | 2009-06-23 16:44:01 -0700 | [diff] [blame] | 293 | default: |
Russ Cox | c1178aa | 2011-10-27 19:46:31 -0700 | [diff] [blame] | 294 | err = ErrSyntax |
Robert Griesemer | d65a5cc | 2009-12-15 15:40:16 -0800 | [diff] [blame] | 295 | return |
Russ Cox | 73aadff | 2009-04-13 13:27:39 -0700 | [diff] [blame] | 296 | } |
Robert Griesemer | d65a5cc | 2009-12-15 15:40:16 -0800 | [diff] [blame] | 297 | tail = s |
| 298 | return |
Russ Cox | 73aadff | 2009-04-13 13:27:39 -0700 | [diff] [blame] | 299 | } |
| 300 | |
| 301 | // Unquote interprets s as a single-quoted, double-quoted, |
| 302 | // or backquoted Go string literal, returning the string value |
| 303 | // that s quotes. (If s is single-quoted, it would be a Go |
| 304 | // character literal; Unquote returns the corresponding |
| 305 | // one-character string.) |
Russ Cox | eb69292 | 2011-11-01 22:05:34 -0400 | [diff] [blame] | 306 | func Unquote(s string) (t string, err error) { |
Robert Griesemer | d65a5cc | 2009-12-15 15:40:16 -0800 | [diff] [blame] | 307 | n := len(s) |
Russ Cox | 3619f1e | 2009-05-11 14:10:34 -0700 | [diff] [blame] | 308 | if n < 2 { |
Russ Cox | c1178aa | 2011-10-27 19:46:31 -0700 | [diff] [blame] | 309 | return "", ErrSyntax |
Russ Cox | 3619f1e | 2009-05-11 14:10:34 -0700 | [diff] [blame] | 310 | } |
Robert Griesemer | d65a5cc | 2009-12-15 15:40:16 -0800 | [diff] [blame] | 311 | quote := s[0] |
Russ Cox | 3619f1e | 2009-05-11 14:10:34 -0700 | [diff] [blame] | 312 | if quote != s[n-1] { |
Russ Cox | c1178aa | 2011-10-27 19:46:31 -0700 | [diff] [blame] | 313 | return "", ErrSyntax |
Russ Cox | 3619f1e | 2009-05-11 14:10:34 -0700 | [diff] [blame] | 314 | } |
Robert Griesemer | d65a5cc | 2009-12-15 15:40:16 -0800 | [diff] [blame] | 315 | s = s[1 : n-1] |
Russ Cox | 3619f1e | 2009-05-11 14:10:34 -0700 | [diff] [blame] | 316 | |
| 317 | if quote == '`' { |
Rob Pike | f91326b | 2012-03-07 13:50:31 +1100 | [diff] [blame] | 318 | if contains(s, '`') { |
Russ Cox | c1178aa | 2011-10-27 19:46:31 -0700 | [diff] [blame] | 319 | return "", ErrSyntax |
Russ Cox | 7732d80 | 2009-11-01 09:25:55 -0800 | [diff] [blame] | 320 | } |
Robert Griesemer | d65a5cc | 2009-12-15 15:40:16 -0800 | [diff] [blame] | 321 | return s, nil |
Russ Cox | 3619f1e | 2009-05-11 14:10:34 -0700 | [diff] [blame] | 322 | } |
| 323 | if quote != '"' && quote != '\'' { |
Russ Cox | c1178aa | 2011-10-27 19:46:31 -0700 | [diff] [blame] | 324 | return "", ErrSyntax |
Russ Cox | 73aadff | 2009-04-13 13:27:39 -0700 | [diff] [blame] | 325 | } |
Rob Pike | f91326b | 2012-03-07 13:50:31 +1100 | [diff] [blame] | 326 | if contains(s, '\n') { |
Russ Cox | c1178aa | 2011-10-27 19:46:31 -0700 | [diff] [blame] | 327 | return "", ErrSyntax |
Russ Cox | ba444d8 | 2011-09-26 13:59:12 -0400 | [diff] [blame] | 328 | } |
| 329 | |
| 330 | // Is it trivial? Avoid allocation. |
Rob Pike | f91326b | 2012-03-07 13:50:31 +1100 | [diff] [blame] | 331 | if !contains(s, '\\') && !contains(s, quote) { |
Russ Cox | ba444d8 | 2011-09-26 13:59:12 -0400 | [diff] [blame] | 332 | switch quote { |
| 333 | case '"': |
| 334 | return s, nil |
| 335 | case '\'': |
| 336 | r, size := utf8.DecodeRuneInString(s) |
| 337 | if size == len(s) && (r != utf8.RuneError || size != 1) { |
| 338 | return s, nil |
| 339 | } |
| 340 | } |
| 341 | } |
Russ Cox | 73aadff | 2009-04-13 13:27:39 -0700 | [diff] [blame] | 342 | |
Rob Pike | eab4261 | 2012-03-06 15:25:42 +1100 | [diff] [blame] | 343 | var runeTmp [utf8.UTFMax]byte |
| 344 | buf := make([]byte, 0, 3*len(s)/2) // Try to avoid more allocations. |
Russ Cox | 3619f1e | 2009-05-11 14:10:34 -0700 | [diff] [blame] | 345 | for len(s) > 0 { |
Robert Griesemer | d65a5cc | 2009-12-15 15:40:16 -0800 | [diff] [blame] | 346 | c, multibyte, ss, err := UnquoteChar(s, quote) |
Russ Cox | 7732d80 | 2009-11-01 09:25:55 -0800 | [diff] [blame] | 347 | if err != nil { |
Robert Griesemer | 40621d5 | 2009-11-09 12:07:39 -0800 | [diff] [blame] | 348 | return "", err |
Russ Cox | 73aadff | 2009-04-13 13:27:39 -0700 | [diff] [blame] | 349 | } |
Robert Griesemer | d65a5cc | 2009-12-15 15:40:16 -0800 | [diff] [blame] | 350 | s = ss |
Russ Cox | 7732d80 | 2009-11-01 09:25:55 -0800 | [diff] [blame] | 351 | if c < utf8.RuneSelf || !multibyte { |
Rob Pike | eab4261 | 2012-03-06 15:25:42 +1100 | [diff] [blame] | 352 | buf = append(buf, byte(c)) |
Russ Cox | 0d77947 | 2009-06-23 16:44:01 -0700 | [diff] [blame] | 353 | } else { |
Rob Pike | eab4261 | 2012-03-06 15:25:42 +1100 | [diff] [blame] | 354 | n := utf8.EncodeRune(runeTmp[:], c) |
| 355 | buf = append(buf, runeTmp[:n]...) |
Russ Cox | 0d77947 | 2009-06-23 16:44:01 -0700 | [diff] [blame] | 356 | } |
Russ Cox | 3619f1e | 2009-05-11 14:10:34 -0700 | [diff] [blame] | 357 | if quote == '\'' && len(s) != 0 { |
| 358 | // single-quoted must be single character |
Russ Cox | c1178aa | 2011-10-27 19:46:31 -0700 | [diff] [blame] | 359 | return "", ErrSyntax |
Russ Cox | 3619f1e | 2009-05-11 14:10:34 -0700 | [diff] [blame] | 360 | } |
Russ Cox | 73aadff | 2009-04-13 13:27:39 -0700 | [diff] [blame] | 361 | } |
Rob Pike | eab4261 | 2012-03-06 15:25:42 +1100 | [diff] [blame] | 362 | return string(buf), nil |
Russ Cox | 73aadff | 2009-04-13 13:27:39 -0700 | [diff] [blame] | 363 | } |
Russ Cox | e9d5a64 | 2012-03-06 00:36:12 -0500 | [diff] [blame] | 364 | |
Rob Pike | f91326b | 2012-03-07 13:50:31 +1100 | [diff] [blame] | 365 | // contains reports whether the string contains the byte c. |
| 366 | func contains(s string, c byte) bool { |
| 367 | for i := 0; i < len(s); i++ { |
| 368 | if s[i] == c { |
| 369 | return true |
| 370 | } |
| 371 | } |
| 372 | return false |
| 373 | } |
| 374 | |
Russ Cox | e9d5a64 | 2012-03-06 00:36:12 -0500 | [diff] [blame] | 375 | // bsearch16 returns the smallest i such that a[i] >= x. |
| 376 | // If there is no such i, bsearch16 returns len(a). |
| 377 | func bsearch16(a []uint16, x uint16) int { |
| 378 | i, j := 0, len(a) |
| 379 | for i < j { |
| 380 | h := i + (j-i)/2 |
| 381 | if a[h] < x { |
| 382 | i = h + 1 |
| 383 | } else { |
| 384 | j = h |
| 385 | } |
| 386 | } |
| 387 | return i |
| 388 | } |
| 389 | |
| 390 | // bsearch32 returns the smallest i such that a[i] >= x. |
| 391 | // If there is no such i, bsearch32 returns len(a). |
| 392 | func bsearch32(a []uint32, x uint32) int { |
| 393 | i, j := 0, len(a) |
| 394 | for i < j { |
| 395 | h := i + (j-i)/2 |
| 396 | if a[h] < x { |
| 397 | i = h + 1 |
| 398 | } else { |
| 399 | j = h |
| 400 | } |
| 401 | } |
| 402 | return i |
| 403 | } |
| 404 | |
Rob Pike | f91326b | 2012-03-07 13:50:31 +1100 | [diff] [blame] | 405 | // TODO: IsPrint is a local implementation of unicode.IsPrint, verified by the tests |
| 406 | // to give the same answer. It allows this package not to depend on unicode, |
| 407 | // and therefore not pull in all the Unicode tables. If the linker were better |
| 408 | // at tossing unused tables, we could get rid of this implementation. |
| 409 | // That would be nice. |
| 410 | |
| 411 | // IsPrint reports whether the rune is defined as printable by Go, with |
| 412 | // the same definition as unicode.IsPrint: letters, numbers, punctuation, |
| 413 | // symbols and ASCII space. |
| 414 | func IsPrint(r rune) bool { |
| 415 | // Fast check for Latin-1 |
| 416 | if r <= 0xFF { |
| 417 | if 0x20 <= r && r <= 0x7E { |
| 418 | // All the ASCII is printable from space through DEL-1. |
| 419 | return true |
| 420 | } |
| 421 | if 0xA1 <= r && r <= 0xFF { |
| 422 | // Similarly for ¡ through ÿ... |
| 423 | return r != 0xAD // ...except for the bizarre soft hyphen. |
| 424 | } |
| 425 | return false |
| 426 | } |
| 427 | |
Russ Cox | e9d5a64 | 2012-03-06 00:36:12 -0500 | [diff] [blame] | 428 | // Same algorithm, either on uint16 or uint32 value. |
| 429 | // First, find first i such that isPrint[i] >= x. |
| 430 | // This is the index of either the start or end of a pair that might span x. |
| 431 | // The start is even (isPrint[i&^1]) and the end is odd (isPrint[i|1]). |
| 432 | // If we find x in a range, make sure x is not in isNotPrint list. |
| 433 | |
| 434 | if 0 <= r && r < 1<<16 { |
| 435 | rr, isPrint, isNotPrint := uint16(r), isPrint16, isNotPrint16 |
| 436 | i := bsearch16(isPrint, rr) |
| 437 | if i >= len(isPrint) || rr < isPrint[i&^1] || isPrint[i|1] < rr { |
| 438 | return false |
| 439 | } |
| 440 | j := bsearch16(isNotPrint, rr) |
| 441 | return j >= len(isNotPrint) || isNotPrint[j] != rr |
| 442 | } |
| 443 | |
| 444 | rr, isPrint, isNotPrint := uint32(r), isPrint32, isNotPrint32 |
| 445 | i := bsearch32(isPrint, rr) |
| 446 | if i >= len(isPrint) || rr < isPrint[i&^1] || isPrint[i|1] < rr { |
| 447 | return false |
| 448 | } |
Rob Pike | f91326b | 2012-03-07 13:50:31 +1100 | [diff] [blame] | 449 | if r >= 0x20000 { |
| 450 | return true |
| 451 | } |
| 452 | r -= 0x10000 |
| 453 | j := bsearch16(isNotPrint, uint16(r)) |
| 454 | return j >= len(isNotPrint) || isNotPrint[j] != uint16(r) |
Russ Cox | e9d5a64 | 2012-03-06 00:36:12 -0500 | [diff] [blame] | 455 | } |