blob: 53d51b5a46a324a90bbdbf168efba9e3fab75129 [file] [log] [blame]
Russ Cox387df5e2008-11-24 14:51:33 -08001// Copyright 2009 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
ChaiShushan5db510f2014-08-28 14:55:02 -07005//go:generate go run makeisprint.go -output isprint.go
6
Russ Cox387df5e2008-11-24 14:51:33 -08007package strconv
8
9import (
Rob Pike45e3bcb2011-11-08 15:41:54 -080010 "unicode/utf8"
Russ Cox387df5e2008-11-24 14:51:33 -080011)
12
Russ Coxb54133d2009-01-15 16:16:42 -080013const lowerhex = "0123456789abcdef"
Russ Cox387df5e2008-11-24 14:51:33 -080014
Rob Pikef2f3b8f2011-06-07 12:23:08 +000015func quoteWith(s string, quote byte, ASCIIonly bool) string {
Rob Pikeeab42612012-03-06 15:25:42 +110016 var runeTmp [utf8.UTFMax]byte
17 buf := make([]byte, 0, 3*len(s)/2) // Try to avoid more allocations.
18 buf = append(buf, quote)
Rob Pikef2f3b8f2011-06-07 12:23:08 +000019 for width := 0; len(s) > 0; s = s[width:] {
Russ Coxb50a8472011-10-25 22:23:54 -070020 r := rune(s[0])
Rob Pikef2f3b8f2011-06-07 12:23:08 +000021 width = 1
Russ Coxb50a8472011-10-25 22:23:54 -070022 if r >= utf8.RuneSelf {
23 r, width = utf8.DecodeRuneInString(s)
Rob Pikef2f3b8f2011-06-07 12:23:08 +000024 }
Russ Coxb50a8472011-10-25 22:23:54 -070025 if width == 1 && r == utf8.RuneError {
Rob Pikeeab42612012-03-06 15:25:42 +110026 buf = append(buf, `\x`...)
27 buf = append(buf, lowerhex[s[0]>>4])
28 buf = append(buf, lowerhex[s[0]&0xF])
Russ Cox21e75da2011-06-17 06:07:13 -040029 continue
Rob Pikef2f3b8f2011-06-07 12:23:08 +000030 }
Russ Coxb50a8472011-10-25 22:23:54 -070031 if r == rune(quote) || r == '\\' { // always backslashed
Rob Pikeeab42612012-03-06 15:25:42 +110032 buf = append(buf, '\\')
33 buf = append(buf, byte(r))
Rob Pikef2f3b8f2011-06-07 12:23:08 +000034 continue
35 }
36 if ASCIIonly {
Rob Pikef91326b2012-03-07 13:50:31 +110037 if r < utf8.RuneSelf && IsPrint(r) {
Rob Pikeeab42612012-03-06 15:25:42 +110038 buf = append(buf, byte(r))
Rob Pikef2f3b8f2011-06-07 12:23:08 +000039 continue
40 }
Rob Pikef91326b2012-03-07 13:50:31 +110041 } else if IsPrint(r) {
Rob Pikeeab42612012-03-06 15:25:42 +110042 n := utf8.EncodeRune(runeTmp[:], r)
43 buf = append(buf, runeTmp[:n]...)
Rob Pikef2f3b8f2011-06-07 12:23:08 +000044 continue
45 }
Russ Coxb50a8472011-10-25 22:23:54 -070046 switch r {
Rob Pikef2f3b8f2011-06-07 12:23:08 +000047 case '\a':
Rob Pikeeab42612012-03-06 15:25:42 +110048 buf = append(buf, `\a`...)
Rob Pikef2f3b8f2011-06-07 12:23:08 +000049 case '\b':
Rob Pikeeab42612012-03-06 15:25:42 +110050 buf = append(buf, `\b`...)
Rob Pikef2f3b8f2011-06-07 12:23:08 +000051 case '\f':
Rob Pikeeab42612012-03-06 15:25:42 +110052 buf = append(buf, `\f`...)
Rob Pikef2f3b8f2011-06-07 12:23:08 +000053 case '\n':
Rob Pikeeab42612012-03-06 15:25:42 +110054 buf = append(buf, `\n`...)
Rob Pikef2f3b8f2011-06-07 12:23:08 +000055 case '\r':
Rob Pikeeab42612012-03-06 15:25:42 +110056 buf = append(buf, `\r`...)
Rob Pikef2f3b8f2011-06-07 12:23:08 +000057 case '\t':
Rob Pikeeab42612012-03-06 15:25:42 +110058 buf = append(buf, `\t`...)
Rob Pikef2f3b8f2011-06-07 12:23:08 +000059 case '\v':
Rob Pikeeab42612012-03-06 15:25:42 +110060 buf = append(buf, `\v`...)
Russ Cox387df5e2008-11-24 14:51:33 -080061 default:
Rob Pikef2f3b8f2011-06-07 12:23:08 +000062 switch {
Russ Coxb50a8472011-10-25 22:23:54 -070063 case r < ' ':
Rob Pikeeab42612012-03-06 15:25:42 +110064 buf = append(buf, `\x`...)
65 buf = append(buf, lowerhex[s[0]>>4])
66 buf = append(buf, lowerhex[s[0]&0xF])
Rob Pikef91326b2012-03-07 13:50:31 +110067 case r > utf8.MaxRune:
Russ Coxb50a8472011-10-25 22:23:54 -070068 r = 0xFFFD
Rob Pikef2f3b8f2011-06-07 12:23:08 +000069 fallthrough
Russ Coxb50a8472011-10-25 22:23:54 -070070 case r < 0x10000:
Rob Pikeeab42612012-03-06 15:25:42 +110071 buf = append(buf, `\u`...)
Rob Pikef2f3b8f2011-06-07 12:23:08 +000072 for s := 12; s >= 0; s -= 4 {
Rob Pikeeab42612012-03-06 15:25:42 +110073 buf = append(buf, lowerhex[r>>uint(s)&0xF])
Rob Pikef2f3b8f2011-06-07 12:23:08 +000074 }
75 default:
Rob Pikeeab42612012-03-06 15:25:42 +110076 buf = append(buf, `\U`...)
Rob Pikef2f3b8f2011-06-07 12:23:08 +000077 for s := 28; s >= 0; s -= 4 {
Rob Pikeeab42612012-03-06 15:25:42 +110078 buf = append(buf, lowerhex[r>>uint(s)&0xF])
Rob Pikef2f3b8f2011-06-07 12:23:08 +000079 }
80 }
Russ Cox387df5e2008-11-24 14:51:33 -080081 }
82 }
Rob Pikeeab42612012-03-06 15:25:42 +110083 buf = append(buf, quote)
84 return string(buf)
Rob Pikec4918db2011-05-25 15:04:07 +100085
86}
87
Rob Pikef2f3b8f2011-06-07 12:23:08 +000088// Quote returns a double-quoted Go string literal representing s. The
89// returned string uses Go escape sequences (\t, \n, \xFF, \u0100) for
90// control characters and non-printable characters as defined by
Rob Pikef91326b2012-03-07 13:50:31 +110091// IsPrint.
Rob Pikec4918db2011-05-25 15:04:07 +100092func Quote(s string) string {
Rob Pikef2f3b8f2011-06-07 12:23:08 +000093 return quoteWith(s, '"', false)
Rob Pikec4918db2011-05-25 15:04:07 +100094}
95
Russ Coxefbeaed2011-12-05 15:48:21 -050096// AppendQuote appends a double-quoted Go string literal representing s,
97// as generated by Quote, to dst and returns the extended buffer.
98func AppendQuote(dst []byte, s string) []byte {
99 return append(dst, Quote(s)...)
100}
101
Rob Pikef2f3b8f2011-06-07 12:23:08 +0000102// QuoteToASCII returns a double-quoted Go string literal representing s.
103// The returned string uses Go escape sequences (\t, \n, \xFF, \u0100) for
Rob Pikef91326b2012-03-07 13:50:31 +1100104// non-ASCII characters and non-printable characters as defined by IsPrint.
Rob Pikef2f3b8f2011-06-07 12:23:08 +0000105func QuoteToASCII(s string) string {
106 return quoteWith(s, '"', true)
107}
108
Russ Coxefbeaed2011-12-05 15:48:21 -0500109// AppendQuoteToASCII appends a double-quoted Go string literal representing s,
110// as generated by QuoteToASCII, to dst and returns the extended buffer.
111func AppendQuoteToASCII(dst []byte, s string) []byte {
112 return append(dst, QuoteToASCII(s)...)
113}
114
Rob Pikef2f3b8f2011-06-07 12:23:08 +0000115// QuoteRune returns a single-quoted Go character literal representing the
116// rune. The returned string uses Go escape sequences (\t, \n, \xFF, \u0100)
Rob Pikef91326b2012-03-07 13:50:31 +1100117// for control characters and non-printable characters as defined by IsPrint.
Rob Pike38df0452011-12-13 11:13:23 -0800118func QuoteRune(r rune) string {
Rob Pikec4918db2011-05-25 15:04:07 +1000119 // TODO: avoid the allocation here.
Rob Pike38df0452011-12-13 11:13:23 -0800120 return quoteWith(string(r), '\'', false)
Rob Pikef2f3b8f2011-06-07 12:23:08 +0000121}
122
Russ Coxefbeaed2011-12-05 15:48:21 -0500123// AppendQuoteRune appends a single-quoted Go character literal representing the rune,
124// as generated by QuoteRune, to dst and returns the extended buffer.
Rob Pike38df0452011-12-13 11:13:23 -0800125func AppendQuoteRune(dst []byte, r rune) []byte {
126 return append(dst, QuoteRune(r)...)
Russ Coxefbeaed2011-12-05 15:48:21 -0500127}
128
Rob Pikef2f3b8f2011-06-07 12:23:08 +0000129// QuoteRuneToASCII returns a single-quoted Go character literal representing
130// the rune. The returned string uses Go escape sequences (\t, \n, \xFF,
131// \u0100) for non-ASCII characters and non-printable characters as defined
Rob Pikef91326b2012-03-07 13:50:31 +1100132// by IsPrint.
Rob Pike38df0452011-12-13 11:13:23 -0800133func QuoteRuneToASCII(r rune) string {
Rob Pikef2f3b8f2011-06-07 12:23:08 +0000134 // TODO: avoid the allocation here.
Rob Pike38df0452011-12-13 11:13:23 -0800135 return quoteWith(string(r), '\'', true)
Russ Cox387df5e2008-11-24 14:51:33 -0800136}
137
ChaiShushan418e2f62013-08-10 11:38:42 +1000138// AppendQuoteRuneToASCII appends a single-quoted Go character literal representing the rune,
Russ Coxefbeaed2011-12-05 15:48:21 -0500139// as generated by QuoteRuneToASCII, to dst and returns the extended buffer.
Rob Pike38df0452011-12-13 11:13:23 -0800140func AppendQuoteRuneToASCII(dst []byte, r rune) []byte {
141 return append(dst, QuoteRuneToASCII(r)...)
Russ Coxefbeaed2011-12-05 15:48:21 -0500142}
143
Shenghou Ma46927112013-02-26 06:33:59 +0800144// CanBackquote reports whether the string s can be represented
145// unchanged as a single-line backquoted string without control
Rob Pikeb6571a02014-09-09 11:45:36 -0700146// characters other than tab.
Russ Cox839a6842009-01-20 14:40:40 -0800147func CanBackquote(s string) bool {
Volker Doblerc0a824a2014-07-14 19:49:26 -0700148 for len(s) > 0 {
149 r, wid := utf8.DecodeRuneInString(s)
150 s = s[wid:]
151 if wid > 1 {
Volker Dobler3b1b8402014-07-16 13:06:11 -0700152 if r == '\ufeff' {
153 return false // BOMs are invisible and should not be quoted.
154 }
155 continue // All other multibyte runes are correctly encoded and assumed printable.
Volker Doblerc0a824a2014-07-14 19:49:26 -0700156 }
157 if r == utf8.RuneError {
158 return false
159 }
160 if (r < ' ' && r != '\t') || r == '`' || r == '\u007F' {
Robert Griesemer40621d52009-11-09 12:07:39 -0800161 return false
Russ Cox387df5e2008-11-24 14:51:33 -0800162 }
163 }
Robert Griesemerd65a5cc2009-12-15 15:40:16 -0800164 return true
Russ Cox387df5e2008-11-24 14:51:33 -0800165}
166
Russ Coxb50a8472011-10-25 22:23:54 -0700167func unhex(b byte) (v rune, ok bool) {
168 c := rune(b)
Russ Cox73aadff2009-04-13 13:27:39 -0700169 switch {
170 case '0' <= c && c <= '9':
Robert Griesemer3bb00322009-11-09 21:23:52 -0800171 return c - '0', true
Russ Cox73aadff2009-04-13 13:27:39 -0700172 case 'a' <= c && c <= 'f':
Robert Griesemer3bb00322009-11-09 21:23:52 -0800173 return c - 'a' + 10, true
Russ Cox73aadff2009-04-13 13:27:39 -0700174 case 'A' <= c && c <= 'F':
Robert Griesemer3bb00322009-11-09 21:23:52 -0800175 return c - 'A' + 10, true
Russ Cox73aadff2009-04-13 13:27:39 -0700176 }
Robert Griesemerd65a5cc2009-12-15 15:40:16 -0800177 return
Russ Cox73aadff2009-04-13 13:27:39 -0700178}
179
Russ Cox0d779472009-06-23 16:44:01 -0700180// UnquoteChar decodes the first character or byte in the escaped string
181// or character literal represented by the string s.
Russ Cox91549432009-10-07 11:55:06 -0700182// It returns four values:
Robert Griesemer3ad995e2010-08-31 14:18:20 -0700183//
184// 1) value, the decoded Unicode code point or byte value;
185// 2) multibyte, a boolean indicating whether the decoded character requires a multibyte UTF-8 representation;
186// 3) tail, the remainder of the string after the character; and
187// 4) an error that will be nil if the character is syntactically valid.
188//
Russ Cox0d779472009-06-23 16:44:01 -0700189// The second argument, quote, specifies the type of literal being parsed
190// and therefore which escaped quote character is permitted.
191// If set to a single quote, it permits the sequence \' and disallows unescaped '.
192// If set to a double quote, it permits \" and disallows unescaped ".
193// If set to zero, it does not permit either escape and allows both quote characters to appear unescaped.
Russ Coxeb692922011-11-01 22:05:34 -0400194func UnquoteChar(s string, quote byte) (value rune, multibyte bool, tail string, err error) {
Russ Cox73aadff2009-04-13 13:27:39 -0700195 // easy cases
Russ Cox3619f1e2009-05-11 14:10:34 -0700196 switch c := s[0]; {
Russ Cox0d779472009-06-23 16:44:01 -0700197 case c == quote && (quote == '\'' || quote == '"'):
Russ Coxc1178aa2011-10-27 19:46:31 -0700198 err = ErrSyntax
Robert Griesemerd65a5cc2009-12-15 15:40:16 -0800199 return
Russ Cox73aadff2009-04-13 13:27:39 -0700200 case c >= utf8.RuneSelf:
Robert Griesemerd65a5cc2009-12-15 15:40:16 -0800201 r, size := utf8.DecodeRuneInString(s)
202 return r, true, s[size:], nil
Russ Cox73aadff2009-04-13 13:27:39 -0700203 case c != '\\':
Russ Coxb50a8472011-10-25 22:23:54 -0700204 return rune(s[0]), false, s[1:], nil
Russ Cox73aadff2009-04-13 13:27:39 -0700205 }
206
207 // hard case: c is backslash
Russ Cox3619f1e2009-05-11 14:10:34 -0700208 if len(s) <= 1 {
Russ Coxc1178aa2011-10-27 19:46:31 -0700209 err = ErrSyntax
Robert Griesemerd65a5cc2009-12-15 15:40:16 -0800210 return
Russ Cox73aadff2009-04-13 13:27:39 -0700211 }
Robert Griesemerd65a5cc2009-12-15 15:40:16 -0800212 c := s[1]
213 s = s[2:]
Russ Cox73aadff2009-04-13 13:27:39 -0700214
215 switch c {
216 case 'a':
Robert Griesemer40621d52009-11-09 12:07:39 -0800217 value = '\a'
Russ Cox73aadff2009-04-13 13:27:39 -0700218 case 'b':
Robert Griesemer40621d52009-11-09 12:07:39 -0800219 value = '\b'
Russ Cox73aadff2009-04-13 13:27:39 -0700220 case 'f':
Robert Griesemer40621d52009-11-09 12:07:39 -0800221 value = '\f'
Russ Cox73aadff2009-04-13 13:27:39 -0700222 case 'n':
Robert Griesemer40621d52009-11-09 12:07:39 -0800223 value = '\n'
Russ Cox73aadff2009-04-13 13:27:39 -0700224 case 'r':
Robert Griesemer40621d52009-11-09 12:07:39 -0800225 value = '\r'
Russ Cox73aadff2009-04-13 13:27:39 -0700226 case 't':
Robert Griesemer40621d52009-11-09 12:07:39 -0800227 value = '\t'
Russ Cox73aadff2009-04-13 13:27:39 -0700228 case 'v':
Robert Griesemer40621d52009-11-09 12:07:39 -0800229 value = '\v'
Russ Cox73aadff2009-04-13 13:27:39 -0700230 case 'x', 'u', 'U':
Robert Griesemerd65a5cc2009-12-15 15:40:16 -0800231 n := 0
Russ Cox73aadff2009-04-13 13:27:39 -0700232 switch c {
233 case 'x':
Robert Griesemer40621d52009-11-09 12:07:39 -0800234 n = 2
Russ Cox73aadff2009-04-13 13:27:39 -0700235 case 'u':
Robert Griesemer40621d52009-11-09 12:07:39 -0800236 n = 4
Russ Cox73aadff2009-04-13 13:27:39 -0700237 case 'U':
Robert Griesemer40621d52009-11-09 12:07:39 -0800238 n = 8
Russ Cox73aadff2009-04-13 13:27:39 -0700239 }
Russ Coxb50a8472011-10-25 22:23:54 -0700240 var v rune
Russ Cox3619f1e2009-05-11 14:10:34 -0700241 if len(s) < n {
Russ Coxc1178aa2011-10-27 19:46:31 -0700242 err = ErrSyntax
Robert Griesemerd65a5cc2009-12-15 15:40:16 -0800243 return
Russ Cox3619f1e2009-05-11 14:10:34 -0700244 }
Russ Cox73aadff2009-04-13 13:27:39 -0700245 for j := 0; j < n; j++ {
Robert Griesemerd65a5cc2009-12-15 15:40:16 -0800246 x, ok := unhex(s[j])
Russ Cox73aadff2009-04-13 13:27:39 -0700247 if !ok {
Russ Coxc1178aa2011-10-27 19:46:31 -0700248 err = ErrSyntax
Robert Griesemerd65a5cc2009-12-15 15:40:16 -0800249 return
Russ Cox73aadff2009-04-13 13:27:39 -0700250 }
Robert Griesemerd65a5cc2009-12-15 15:40:16 -0800251 v = v<<4 | x
Russ Cox73aadff2009-04-13 13:27:39 -0700252 }
Robert Griesemerd65a5cc2009-12-15 15:40:16 -0800253 s = s[n:]
Russ Cox73aadff2009-04-13 13:27:39 -0700254 if c == 'x' {
Russ Cox3619f1e2009-05-11 14:10:34 -0700255 // single-byte string, possibly not UTF-8
Robert Griesemerd65a5cc2009-12-15 15:40:16 -0800256 value = v
257 break
Russ Cox73aadff2009-04-13 13:27:39 -0700258 }
Rob Pikef91326b2012-03-07 13:50:31 +1100259 if v > utf8.MaxRune {
Russ Coxc1178aa2011-10-27 19:46:31 -0700260 err = ErrSyntax
Robert Griesemerd65a5cc2009-12-15 15:40:16 -0800261 return
Russ Cox73aadff2009-04-13 13:27:39 -0700262 }
Robert Griesemerd65a5cc2009-12-15 15:40:16 -0800263 value = v
264 multibyte = true
Russ Cox73aadff2009-04-13 13:27:39 -0700265 case '0', '1', '2', '3', '4', '5', '6', '7':
Russ Coxb50a8472011-10-25 22:23:54 -0700266 v := rune(c) - '0'
Russ Cox3619f1e2009-05-11 14:10:34 -0700267 if len(s) < 2 {
Russ Coxc1178aa2011-10-27 19:46:31 -0700268 err = ErrSyntax
Robert Griesemerd65a5cc2009-12-15 15:40:16 -0800269 return
Russ Cox3619f1e2009-05-11 14:10:34 -0700270 }
Robert Griesemerd65a5cc2009-12-15 15:40:16 -0800271 for j := 0; j < 2; j++ { // one digit already; two more
Russ Coxb50a8472011-10-25 22:23:54 -0700272 x := rune(s[j]) - '0'
Russ Cox73aadff2009-04-13 13:27:39 -0700273 if x < 0 || x > 7 {
Sameer Ajmanicbf4f4b2012-01-09 19:55:18 -0500274 err = ErrSyntax
Robert Griesemer40621d52009-11-09 12:07:39 -0800275 return
Russ Cox73aadff2009-04-13 13:27:39 -0700276 }
Robert Griesemerd65a5cc2009-12-15 15:40:16 -0800277 v = (v << 3) | x
Russ Cox73aadff2009-04-13 13:27:39 -0700278 }
Robert Griesemerd65a5cc2009-12-15 15:40:16 -0800279 s = s[2:]
Russ Cox73aadff2009-04-13 13:27:39 -0700280 if v > 255 {
Russ Coxc1178aa2011-10-27 19:46:31 -0700281 err = ErrSyntax
Robert Griesemerd65a5cc2009-12-15 15:40:16 -0800282 return
Russ Cox73aadff2009-04-13 13:27:39 -0700283 }
Robert Griesemerd65a5cc2009-12-15 15:40:16 -0800284 value = v
Russ Cox0d779472009-06-23 16:44:01 -0700285 case '\\':
Robert Griesemer40621d52009-11-09 12:07:39 -0800286 value = '\\'
Russ Cox0d779472009-06-23 16:44:01 -0700287 case '\'', '"':
288 if c != quote {
Russ Coxc1178aa2011-10-27 19:46:31 -0700289 err = ErrSyntax
Robert Griesemerd65a5cc2009-12-15 15:40:16 -0800290 return
Russ Cox0d779472009-06-23 16:44:01 -0700291 }
Russ Coxb50a8472011-10-25 22:23:54 -0700292 value = rune(c)
Russ Cox0d779472009-06-23 16:44:01 -0700293 default:
Russ Coxc1178aa2011-10-27 19:46:31 -0700294 err = ErrSyntax
Robert Griesemerd65a5cc2009-12-15 15:40:16 -0800295 return
Russ Cox73aadff2009-04-13 13:27:39 -0700296 }
Robert Griesemerd65a5cc2009-12-15 15:40:16 -0800297 tail = s
298 return
Russ Cox73aadff2009-04-13 13:27:39 -0700299}
300
301// Unquote interprets s as a single-quoted, double-quoted,
302// or backquoted Go string literal, returning the string value
303// that s quotes. (If s is single-quoted, it would be a Go
304// character literal; Unquote returns the corresponding
305// one-character string.)
Russ Coxeb692922011-11-01 22:05:34 -0400306func Unquote(s string) (t string, err error) {
Robert Griesemerd65a5cc2009-12-15 15:40:16 -0800307 n := len(s)
Russ Cox3619f1e2009-05-11 14:10:34 -0700308 if n < 2 {
Russ Coxc1178aa2011-10-27 19:46:31 -0700309 return "", ErrSyntax
Russ Cox3619f1e2009-05-11 14:10:34 -0700310 }
Robert Griesemerd65a5cc2009-12-15 15:40:16 -0800311 quote := s[0]
Russ Cox3619f1e2009-05-11 14:10:34 -0700312 if quote != s[n-1] {
Russ Coxc1178aa2011-10-27 19:46:31 -0700313 return "", ErrSyntax
Russ Cox3619f1e2009-05-11 14:10:34 -0700314 }
Robert Griesemerd65a5cc2009-12-15 15:40:16 -0800315 s = s[1 : n-1]
Russ Cox3619f1e2009-05-11 14:10:34 -0700316
317 if quote == '`' {
Rob Pikef91326b2012-03-07 13:50:31 +1100318 if contains(s, '`') {
Russ Coxc1178aa2011-10-27 19:46:31 -0700319 return "", ErrSyntax
Russ Cox7732d802009-11-01 09:25:55 -0800320 }
Robert Griesemerd65a5cc2009-12-15 15:40:16 -0800321 return s, nil
Russ Cox3619f1e2009-05-11 14:10:34 -0700322 }
323 if quote != '"' && quote != '\'' {
Russ Coxc1178aa2011-10-27 19:46:31 -0700324 return "", ErrSyntax
Russ Cox73aadff2009-04-13 13:27:39 -0700325 }
Rob Pikef91326b2012-03-07 13:50:31 +1100326 if contains(s, '\n') {
Russ Coxc1178aa2011-10-27 19:46:31 -0700327 return "", ErrSyntax
Russ Coxba444d82011-09-26 13:59:12 -0400328 }
329
330 // Is it trivial? Avoid allocation.
Rob Pikef91326b2012-03-07 13:50:31 +1100331 if !contains(s, '\\') && !contains(s, quote) {
Russ Coxba444d82011-09-26 13:59:12 -0400332 switch quote {
333 case '"':
334 return s, nil
335 case '\'':
336 r, size := utf8.DecodeRuneInString(s)
337 if size == len(s) && (r != utf8.RuneError || size != 1) {
338 return s, nil
339 }
340 }
341 }
Russ Cox73aadff2009-04-13 13:27:39 -0700342
Rob Pikeeab42612012-03-06 15:25:42 +1100343 var runeTmp [utf8.UTFMax]byte
344 buf := make([]byte, 0, 3*len(s)/2) // Try to avoid more allocations.
Russ Cox3619f1e2009-05-11 14:10:34 -0700345 for len(s) > 0 {
Robert Griesemerd65a5cc2009-12-15 15:40:16 -0800346 c, multibyte, ss, err := UnquoteChar(s, quote)
Russ Cox7732d802009-11-01 09:25:55 -0800347 if err != nil {
Robert Griesemer40621d52009-11-09 12:07:39 -0800348 return "", err
Russ Cox73aadff2009-04-13 13:27:39 -0700349 }
Robert Griesemerd65a5cc2009-12-15 15:40:16 -0800350 s = ss
Russ Cox7732d802009-11-01 09:25:55 -0800351 if c < utf8.RuneSelf || !multibyte {
Rob Pikeeab42612012-03-06 15:25:42 +1100352 buf = append(buf, byte(c))
Russ Cox0d779472009-06-23 16:44:01 -0700353 } else {
Rob Pikeeab42612012-03-06 15:25:42 +1100354 n := utf8.EncodeRune(runeTmp[:], c)
355 buf = append(buf, runeTmp[:n]...)
Russ Cox0d779472009-06-23 16:44:01 -0700356 }
Russ Cox3619f1e2009-05-11 14:10:34 -0700357 if quote == '\'' && len(s) != 0 {
358 // single-quoted must be single character
Russ Coxc1178aa2011-10-27 19:46:31 -0700359 return "", ErrSyntax
Russ Cox3619f1e2009-05-11 14:10:34 -0700360 }
Russ Cox73aadff2009-04-13 13:27:39 -0700361 }
Rob Pikeeab42612012-03-06 15:25:42 +1100362 return string(buf), nil
Russ Cox73aadff2009-04-13 13:27:39 -0700363}
Russ Coxe9d5a642012-03-06 00:36:12 -0500364
Rob Pikef91326b2012-03-07 13:50:31 +1100365// contains reports whether the string contains the byte c.
366func contains(s string, c byte) bool {
367 for i := 0; i < len(s); i++ {
368 if s[i] == c {
369 return true
370 }
371 }
372 return false
373}
374
Russ Coxe9d5a642012-03-06 00:36:12 -0500375// bsearch16 returns the smallest i such that a[i] >= x.
376// If there is no such i, bsearch16 returns len(a).
377func bsearch16(a []uint16, x uint16) int {
378 i, j := 0, len(a)
379 for i < j {
380 h := i + (j-i)/2
381 if a[h] < x {
382 i = h + 1
383 } else {
384 j = h
385 }
386 }
387 return i
388}
389
390// bsearch32 returns the smallest i such that a[i] >= x.
391// If there is no such i, bsearch32 returns len(a).
392func bsearch32(a []uint32, x uint32) int {
393 i, j := 0, len(a)
394 for i < j {
395 h := i + (j-i)/2
396 if a[h] < x {
397 i = h + 1
398 } else {
399 j = h
400 }
401 }
402 return i
403}
404
Rob Pikef91326b2012-03-07 13:50:31 +1100405// TODO: IsPrint is a local implementation of unicode.IsPrint, verified by the tests
406// to give the same answer. It allows this package not to depend on unicode,
407// and therefore not pull in all the Unicode tables. If the linker were better
408// at tossing unused tables, we could get rid of this implementation.
409// That would be nice.
410
411// IsPrint reports whether the rune is defined as printable by Go, with
412// the same definition as unicode.IsPrint: letters, numbers, punctuation,
413// symbols and ASCII space.
414func IsPrint(r rune) bool {
415 // Fast check for Latin-1
416 if r <= 0xFF {
417 if 0x20 <= r && r <= 0x7E {
418 // All the ASCII is printable from space through DEL-1.
419 return true
420 }
421 if 0xA1 <= r && r <= 0xFF {
422 // Similarly for ¡ through ÿ...
423 return r != 0xAD // ...except for the bizarre soft hyphen.
424 }
425 return false
426 }
427
Russ Coxe9d5a642012-03-06 00:36:12 -0500428 // Same algorithm, either on uint16 or uint32 value.
429 // First, find first i such that isPrint[i] >= x.
430 // This is the index of either the start or end of a pair that might span x.
431 // The start is even (isPrint[i&^1]) and the end is odd (isPrint[i|1]).
432 // If we find x in a range, make sure x is not in isNotPrint list.
433
434 if 0 <= r && r < 1<<16 {
435 rr, isPrint, isNotPrint := uint16(r), isPrint16, isNotPrint16
436 i := bsearch16(isPrint, rr)
437 if i >= len(isPrint) || rr < isPrint[i&^1] || isPrint[i|1] < rr {
438 return false
439 }
440 j := bsearch16(isNotPrint, rr)
441 return j >= len(isNotPrint) || isNotPrint[j] != rr
442 }
443
444 rr, isPrint, isNotPrint := uint32(r), isPrint32, isNotPrint32
445 i := bsearch32(isPrint, rr)
446 if i >= len(isPrint) || rr < isPrint[i&^1] || isPrint[i|1] < rr {
447 return false
448 }
Rob Pikef91326b2012-03-07 13:50:31 +1100449 if r >= 0x20000 {
450 return true
451 }
452 r -= 0x10000
453 j := bsearch16(isNotPrint, uint16(r))
454 return j >= len(isNotPrint) || isNotPrint[j] != uint16(r)
Russ Coxe9d5a642012-03-06 00:36:12 -0500455}