blob: 83650e605d16b57a3a81fb6c7df1500c2a46fbd0 [file] [log] [blame]
Russ Coxfb94eb12011-04-08 12:27:24 -04001// Copyright 2010 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package fmt
6
7import (
8 "bytes"
9 "io"
10 "math"
11 "os"
12 "reflect"
13 "strconv"
14 "strings"
15 "unicode"
16 "utf8"
17)
18
19// runeUnreader is the interface to something that can unread runes.
20// If the object provided to Scan does not satisfy this interface,
21// a local buffer will be used to back up the input, but its contents
22// will be lost when Scan returns.
23type runeUnreader interface {
24 UnreadRune() os.Error
25}
26
27// ScanState represents the scanner state passed to custom scanners.
28// Scanners may do rune-at-a-time scanning or ask the ScanState
29// to discover the next space-delimited token.
30type ScanState interface {
31 // ReadRune reads the next rune (Unicode code point) from the input.
32 // If invoked during Scanln, Fscanln, or Sscanln, ReadRune() will
33 // return EOF after returning the first '\n' or when reading beyond
34 // the specified width.
35 ReadRune() (rune int, size int, err os.Error)
36 // UnreadRune causes the next call to ReadRune to return the same rune.
37 UnreadRune() os.Error
38 // Token skips space in the input if skipSpace is true, then returns the
39 // run of Unicode code points c satisfying f(c). If f is nil,
40 // !unicode.IsSpace(c) is used; that is, the token will hold non-space
41 // characters. Newlines are treated as space unless the scan operation
42 // is Scanln, Fscanln or Sscanln, in which case a newline is treated as
43 // EOF. The returned slice points to shared data that may be overwritten
44 // by the next call to Token, a call to a Scan function using the ScanState
45 // as input, or when the calling Scan method returns.
46 Token(skipSpace bool, f func(int) bool) (token []byte, err os.Error)
47 // Width returns the value of the width option and whether it has been set.
48 // The unit is Unicode code points.
49 Width() (wid int, ok bool)
50 // Because ReadRune is implemented by the interface, Read should never be
51 // called by the scanning routines and a valid implementation of
52 // ScanState may choose always to return an error from Read.
53 Read(buf []byte) (n int, err os.Error)
54}
55
56// Scanner is implemented by any value that has a Scan method, which scans
57// the input for the representation of a value and stores the result in the
58// receiver, which must be a pointer to be useful. The Scan method is called
59// for any argument to Scan, Scanf, or Scanln that implements it.
60type Scanner interface {
61 Scan(state ScanState, verb int) os.Error
62}
63
64// Scan scans text read from standard input, storing successive
65// space-separated values into successive arguments. Newlines count
66// as space. It returns the number of items successfully scanned.
67// If that is less than the number of arguments, err will report why.
68func Scan(a ...interface{}) (n int, err os.Error) {
69 return Fscan(os.Stdin, a...)
70}
71
72// Scanln is similar to Scan, but stops scanning at a newline and
73// after the final item there must be a newline or EOF.
74func Scanln(a ...interface{}) (n int, err os.Error) {
75 return Fscanln(os.Stdin, a...)
76}
77
78// Scanf scans text read from standard input, storing successive
79// space-separated values into successive arguments as determined by
80// the format. It returns the number of items successfully scanned.
81func Scanf(format string, a ...interface{}) (n int, err os.Error) {
82 return Fscanf(os.Stdin, format, a...)
83}
84
85// Sscan scans the argument string, storing successive space-separated
86// values into successive arguments. Newlines count as space. It
87// returns the number of items successfully scanned. If that is less
88// than the number of arguments, err will report why.
89func Sscan(str string, a ...interface{}) (n int, err os.Error) {
90 return Fscan(strings.NewReader(str), a...)
91}
92
93// Sscanln is similar to Sscan, but stops scanning at a newline and
94// after the final item there must be a newline or EOF.
95func Sscanln(str string, a ...interface{}) (n int, err os.Error) {
96 return Fscanln(strings.NewReader(str), a...)
97}
98
99// Sscanf scans the argument string, storing successive space-separated
100// values into successive arguments as determined by the format. It
101// returns the number of items successfully parsed.
102func Sscanf(str string, format string, a ...interface{}) (n int, err os.Error) {
103 return Fscanf(strings.NewReader(str), format, a...)
104}
105
106// Fscan scans text read from r, storing successive space-separated
107// values into successive arguments. Newlines count as space. It
108// returns the number of items successfully scanned. If that is less
109// than the number of arguments, err will report why.
110func Fscan(r io.Reader, a ...interface{}) (n int, err os.Error) {
111 s, old := newScanState(r, true, false)
112 n, err = s.doScan(a)
113 s.free(old)
114 return
115}
116
117// Fscanln is similar to Fscan, but stops scanning at a newline and
118// after the final item there must be a newline or EOF.
119func Fscanln(r io.Reader, a ...interface{}) (n int, err os.Error) {
120 s, old := newScanState(r, false, true)
121 n, err = s.doScan(a)
122 s.free(old)
123 return
124}
125
126// Fscanf scans text read from r, storing successive space-separated
127// values into successive arguments as determined by the format. It
128// returns the number of items successfully parsed.
129func Fscanf(r io.Reader, format string, a ...interface{}) (n int, err os.Error) {
130 s, old := newScanState(r, false, false)
131 n, err = s.doScanf(format, a)
132 s.free(old)
133 return
134}
135
136// scanError represents an error generated by the scanning software.
137// It's used as a unique signature to identify such errors when recovering.
138type scanError struct {
139 err os.Error
140}
141
142const eof = -1
143
144// ss is the internal implementation of ScanState.
145type ss struct {
146 rr io.RuneReader // where to read input
147 buf bytes.Buffer // token accumulator
148 peekRune int // one-rune lookahead
149 prevRune int // last rune returned by ReadRune
150 count int // runes consumed so far.
151 atEOF bool // already read EOF
152 ssave
153}
154
155// ssave holds the parts of ss that need to be
156// saved and restored on recursive scans.
157type ssave struct {
158 validSave bool // is or was a part of an actual ss.
159 nlIsEnd bool // whether newline terminates scan
160 nlIsSpace bool // whether newline counts as white space
161 fieldLimit int // max value of ss.count for this field; fieldLimit <= limit
162 limit int // max value of ss.count.
163 maxWid int // width of this field.
164}
165
166// The Read method is only in ScanState so that ScanState
167// satisfies io.Reader. It will never be called when used as
168// intended, so there is no need to make it actually work.
169func (s *ss) Read(buf []byte) (n int, err os.Error) {
170 return 0, os.ErrorString("ScanState's Read should not be called. Use ReadRune")
171}
172
173func (s *ss) ReadRune() (rune int, size int, err os.Error) {
174 if s.peekRune >= 0 {
175 s.count++
176 rune = s.peekRune
177 size = utf8.RuneLen(rune)
178 s.prevRune = rune
179 s.peekRune = -1
180 return
181 }
182 if s.atEOF || s.nlIsEnd && s.prevRune == '\n' || s.count >= s.fieldLimit {
183 err = os.EOF
184 return
185 }
186
187 rune, size, err = s.rr.ReadRune()
188 if err == nil {
189 s.count++
190 s.prevRune = rune
191 } else if err == os.EOF {
192 s.atEOF = true
193 }
194 return
195}
196
197func (s *ss) Width() (wid int, ok bool) {
198 if s.maxWid == hugeWid {
199 return 0, false
200 }
201 return s.maxWid, true
202}
203
204// The public method returns an error; this private one panics.
205// If getRune reaches EOF, the return value is EOF (-1).
206func (s *ss) getRune() (rune int) {
207 rune, _, err := s.ReadRune()
208 if err != nil {
209 if err == os.EOF {
210 return eof
211 }
212 s.error(err)
213 }
214 return
215}
216
217// mustReadRune turns os.EOF into a panic(io.ErrUnexpectedEOF).
218// It is called in cases such as string scanning where an EOF is a
219// syntax error.
220func (s *ss) mustReadRune() (rune int) {
221 rune = s.getRune()
222 if rune == eof {
223 s.error(io.ErrUnexpectedEOF)
224 }
225 return
226}
227
228func (s *ss) UnreadRune() os.Error {
229 if u, ok := s.rr.(runeUnreader); ok {
230 u.UnreadRune()
231 } else {
232 s.peekRune = s.prevRune
233 }
234 s.count--
235 return nil
236}
237
238func (s *ss) error(err os.Error) {
239 panic(scanError{err})
240}
241
242func (s *ss) errorString(err string) {
243 panic(scanError{os.ErrorString(err)})
244}
245
246func (s *ss) Token(skipSpace bool, f func(int) bool) (tok []byte, err os.Error) {
247 defer func() {
248 if e := recover(); e != nil {
249 if se, ok := e.(scanError); ok {
250 err = se.err
251 } else {
252 panic(e)
253 }
254 }
255 }()
256 if f == nil {
257 f = notSpace
258 }
259 s.buf.Reset()
260 tok = s.token(skipSpace, f)
261 return
262}
263
264// notSpace is the default scanning function used in Token.
265func notSpace(r int) bool {
266 return !unicode.IsSpace(r)
267}
268
269// readRune is a structure to enable reading UTF-8 encoded code points
270// from an io.Reader. It is used if the Reader given to the scanner does
271// not already implement io.RuneReader.
272type readRune struct {
273 reader io.Reader
274 buf [utf8.UTFMax]byte // used only inside ReadRune
275 pending int // number of bytes in pendBuf; only >0 for bad UTF-8
276 pendBuf [utf8.UTFMax]byte // bytes left over
277}
278
279// readByte returns the next byte from the input, which may be
280// left over from a previous read if the UTF-8 was ill-formed.
281func (r *readRune) readByte() (b byte, err os.Error) {
282 if r.pending > 0 {
283 b = r.pendBuf[0]
284 copy(r.pendBuf[0:], r.pendBuf[1:])
285 r.pending--
286 return
287 }
288 _, err = r.reader.Read(r.pendBuf[0:1])
289 return r.pendBuf[0], err
290}
291
292// unread saves the bytes for the next read.
293func (r *readRune) unread(buf []byte) {
294 copy(r.pendBuf[r.pending:], buf)
295 r.pending += len(buf)
296}
297
298// ReadRune returns the next UTF-8 encoded code point from the
299// io.Reader inside r.
300func (r *readRune) ReadRune() (rune int, size int, err os.Error) {
301 r.buf[0], err = r.readByte()
302 if err != nil {
303 return 0, 0, err
304 }
305 if r.buf[0] < utf8.RuneSelf { // fast check for common ASCII case
306 rune = int(r.buf[0])
307 return
308 }
309 var n int
310 for n = 1; !utf8.FullRune(r.buf[0:n]); n++ {
311 r.buf[n], err = r.readByte()
312 if err != nil {
313 if err == os.EOF {
314 err = nil
315 break
316 }
317 return
318 }
319 }
320 rune, size = utf8.DecodeRune(r.buf[0:n])
321 if size < n { // an error
322 r.unread(r.buf[size:n])
323 }
324 return
325}
326
Russ Coxfb94eb12011-04-08 12:27:24 -0400327var ssFree = newCache(func() interface{} { return new(ss) })
328
329// Allocate a new ss struct or grab a cached one.
330func newScanState(r io.Reader, nlIsSpace, nlIsEnd bool) (s *ss, old ssave) {
331 // If the reader is a *ss, then we've got a recursive
332 // call to Scan, so re-use the scan state.
333 s, ok := r.(*ss)
334 if ok {
335 old = s.ssave
336 s.limit = s.fieldLimit
337 s.nlIsEnd = nlIsEnd || s.nlIsEnd
338 s.nlIsSpace = nlIsSpace
339 return
340 }
341
342 s = ssFree.get().(*ss)
343 if rr, ok := r.(io.RuneReader); ok {
344 s.rr = rr
345 } else {
346 s.rr = &readRune{reader: r}
347 }
348 s.nlIsSpace = nlIsSpace
349 s.nlIsEnd = nlIsEnd
350 s.prevRune = -1
351 s.peekRune = -1
352 s.atEOF = false
353 s.limit = hugeWid
354 s.fieldLimit = hugeWid
355 s.maxWid = hugeWid
356 s.validSave = true
357 return
358}
359
360// Save used ss structs in ssFree; avoid an allocation per invocation.
361func (s *ss) free(old ssave) {
362 // If it was used recursively, just restore the old state.
363 if old.validSave {
364 s.ssave = old
365 return
366 }
367 // Don't hold on to ss structs with large buffers.
368 if cap(s.buf.Bytes()) > 1024 {
369 return
370 }
371 s.buf.Reset()
372 s.rr = nil
373 ssFree.put(s)
374}
375
376// skipSpace skips spaces and maybe newlines.
377func (s *ss) skipSpace(stopAtNewline bool) {
378 for {
379 rune := s.getRune()
380 if rune == eof {
381 return
382 }
383 if rune == '\n' {
384 if stopAtNewline {
385 break
386 }
387 if s.nlIsSpace {
388 continue
389 }
390 s.errorString("unexpected newline")
391 return
392 }
393 if !unicode.IsSpace(rune) {
394 s.UnreadRune()
395 break
396 }
397 }
398}
399
Russ Coxfb94eb12011-04-08 12:27:24 -0400400// token returns the next space-delimited string from the input. It
401// skips white space. For Scanln, it stops at newlines. For Scan,
402// newlines are treated as spaces.
403func (s *ss) token(skipSpace bool, f func(int) bool) []byte {
404 if skipSpace {
405 s.skipSpace(false)
406 }
407 // read until white space or newline
408 for {
409 rune := s.getRune()
410 if rune == eof {
411 break
412 }
413 if !f(rune) {
414 s.UnreadRune()
415 break
416 }
417 s.buf.WriteRune(rune)
418 }
419 return s.buf.Bytes()
420}
421
422// typeError indicates that the type of the operand did not match the format
423func (s *ss) typeError(field interface{}, expected string) {
424 s.errorString("expected field of type pointer to " + expected + "; found " + reflect.Typeof(field).String())
425}
426
427var complexError = os.ErrorString("syntax error scanning complex number")
428var boolError = os.ErrorString("syntax error scanning boolean")
429
430// consume reads the next rune in the input and reports whether it is in the ok string.
431// If accept is true, it puts the character into the input token.
432func (s *ss) consume(ok string, accept bool) bool {
433 rune := s.getRune()
434 if rune == eof {
435 return false
436 }
437 if strings.IndexRune(ok, rune) >= 0 {
438 if accept {
439 s.buf.WriteRune(rune)
440 }
441 return true
442 }
443 if rune != eof && accept {
444 s.UnreadRune()
445 }
446 return false
447}
448
449// peek reports whether the next character is in the ok string, without consuming it.
450func (s *ss) peek(ok string) bool {
451 rune := s.getRune()
452 if rune != eof {
453 s.UnreadRune()
454 }
455 return strings.IndexRune(ok, rune) >= 0
456}
457
458// accept checks the next rune in the input. If it's a byte (sic) in the string, it puts it in the
459// buffer and returns true. Otherwise it return false.
460func (s *ss) accept(ok string) bool {
461 return s.consume(ok, true)
462}
463
464// okVerb verifies that the verb is present in the list, setting s.err appropriately if not.
465func (s *ss) okVerb(verb int, okVerbs, typ string) bool {
466 for _, v := range okVerbs {
467 if v == verb {
468 return true
469 }
470 }
471 s.errorString("bad verb %" + string(verb) + " for " + typ)
472 return false
473}
474
475// scanBool returns the value of the boolean represented by the next token.
476func (s *ss) scanBool(verb int) bool {
477 if !s.okVerb(verb, "tv", "boolean") {
478 return false
479 }
480 // Syntax-checking a boolean is annoying. We're not fastidious about case.
481 switch s.mustReadRune() {
482 case '0':
483 return false
484 case '1':
485 return true
486 case 't', 'T':
487 if s.accept("rR") && (!s.accept("uU") || !s.accept("eE")) {
488 s.error(boolError)
489 }
490 return true
491 case 'f', 'F':
492 if s.accept("aL") && (!s.accept("lL") || !s.accept("sS") || !s.accept("eE")) {
493 s.error(boolError)
494 }
495 return false
496 }
497 return false
498}
499
500// Numerical elements
501const (
502 binaryDigits = "01"
503 octalDigits = "01234567"
504 decimalDigits = "0123456789"
505 hexadecimalDigits = "0123456789aAbBcCdDeEfF"
506 sign = "+-"
507 period = "."
508 exponent = "eEp"
509)
510
511// getBase returns the numeric base represented by the verb and its digit string.
512func (s *ss) getBase(verb int) (base int, digits string) {
513 s.okVerb(verb, "bdoUxXv", "integer") // sets s.err
514 base = 10
515 digits = decimalDigits
516 switch verb {
517 case 'b':
518 base = 2
519 digits = binaryDigits
520 case 'o':
521 base = 8
522 digits = octalDigits
523 case 'x', 'X', 'U':
524 base = 16
525 digits = hexadecimalDigits
526 }
527 return
528}
529
530// scanNumber returns the numerical string with specified digits starting here.
531func (s *ss) scanNumber(digits string, haveDigits bool) string {
532 if !haveDigits && !s.accept(digits) {
533 s.errorString("expected integer")
534 }
535 for s.accept(digits) {
536 }
537 return s.buf.String()
538}
539
540// scanRune returns the next rune value in the input.
541func (s *ss) scanRune(bitSize int) int64 {
542 rune := int64(s.mustReadRune())
543 n := uint(bitSize)
544 x := (rune << (64 - n)) >> (64 - n)
545 if x != rune {
546 s.errorString("overflow on character value " + string(rune))
547 }
548 return rune
549}
550
551// scanBasePrefix reports whether the integer begins with a 0 or 0x,
552// and returns the base, digit string, and whether a zero was found.
553// It is called only if the verb is %v.
554func (s *ss) scanBasePrefix() (base int, digits string, found bool) {
555 if !s.peek("0") {
556 return 10, decimalDigits, false
557 }
558 s.accept("0")
559 found = true // We've put a digit into the token buffer.
560 // Special cases for '0' && '0x'
561 base, digits = 8, octalDigits
562 if s.peek("xX") {
563 s.consume("xX", false)
564 base, digits = 16, hexadecimalDigits
565 }
566 return
567}
568
569// scanInt returns the value of the integer represented by the next
570// token, checking for overflow. Any error is stored in s.err.
571func (s *ss) scanInt(verb int, bitSize int) int64 {
572 if verb == 'c' {
573 return s.scanRune(bitSize)
574 }
575 s.skipSpace(false)
576 base, digits := s.getBase(verb)
577 haveDigits := false
578 if verb == 'U' {
579 if !s.consume("U", false) || !s.consume("+", false) {
580 s.errorString("bad unicode format ")
581 }
582 } else {
583 s.accept(sign) // If there's a sign, it will be left in the token buffer.
584 if verb == 'v' {
585 base, digits, haveDigits = s.scanBasePrefix()
586 }
587 }
588 tok := s.scanNumber(digits, haveDigits)
589 i, err := strconv.Btoi64(tok, base)
590 if err != nil {
591 s.error(err)
592 }
593 n := uint(bitSize)
594 x := (i << (64 - n)) >> (64 - n)
595 if x != i {
596 s.errorString("integer overflow on token " + tok)
597 }
598 return i
599}
600
601// scanUint returns the value of the unsigned integer represented
602// by the next token, checking for overflow. Any error is stored in s.err.
603func (s *ss) scanUint(verb int, bitSize int) uint64 {
604 if verb == 'c' {
605 return uint64(s.scanRune(bitSize))
606 }
607 s.skipSpace(false)
608 base, digits := s.getBase(verb)
609 haveDigits := false
610 if verb == 'U' {
611 if !s.consume("U", false) || !s.consume("+", false) {
612 s.errorString("bad unicode format ")
613 }
614 } else if verb == 'v' {
615 base, digits, haveDigits = s.scanBasePrefix()
616 }
617 tok := s.scanNumber(digits, haveDigits)
618 i, err := strconv.Btoui64(tok, base)
619 if err != nil {
620 s.error(err)
621 }
622 n := uint(bitSize)
623 x := (i << (64 - n)) >> (64 - n)
624 if x != i {
625 s.errorString("unsigned integer overflow on token " + tok)
626 }
627 return i
628}
629
630// floatToken returns the floating-point number starting here, no longer than swid
631// if the width is specified. It's not rigorous about syntax because it doesn't check that
632// we have at least some digits, but Atof will do that.
633func (s *ss) floatToken() string {
634 s.buf.Reset()
635 // NaN?
636 if s.accept("nN") && s.accept("aA") && s.accept("nN") {
637 return s.buf.String()
638 }
639 // leading sign?
640 s.accept(sign)
641 // Inf?
642 if s.accept("iI") && s.accept("nN") && s.accept("fF") {
643 return s.buf.String()
644 }
645 // digits?
646 for s.accept(decimalDigits) {
647 }
648 // decimal point?
649 if s.accept(period) {
650 // fraction?
651 for s.accept(decimalDigits) {
652 }
653 }
654 // exponent?
655 if s.accept(exponent) {
656 // leading sign?
657 s.accept(sign)
658 // digits?
659 for s.accept(decimalDigits) {
660 }
661 }
662 return s.buf.String()
663}
664
665// complexTokens returns the real and imaginary parts of the complex number starting here.
666// The number might be parenthesized and has the format (N+Ni) where N is a floating-point
667// number and there are no spaces within.
668func (s *ss) complexTokens() (real, imag string) {
669 // TODO: accept N and Ni independently?
670 parens := s.accept("(")
671 real = s.floatToken()
672 s.buf.Reset()
673 // Must now have a sign.
674 if !s.accept("+-") {
675 s.error(complexError)
676 }
677 // Sign is now in buffer
678 imagSign := s.buf.String()
679 imag = s.floatToken()
680 if !s.accept("i") {
681 s.error(complexError)
682 }
683 if parens && !s.accept(")") {
684 s.error(complexError)
685 }
686 return real, imagSign + imag
687}
688
689// convertFloat converts the string to a float64value.
690func (s *ss) convertFloat(str string, n int) float64 {
691 if p := strings.Index(str, "p"); p >= 0 {
692 // Atof doesn't handle power-of-2 exponents,
693 // but they're easy to evaluate.
694 f, err := strconv.AtofN(str[:p], n)
695 if err != nil {
696 // Put full string into error.
697 if e, ok := err.(*strconv.NumError); ok {
698 e.Num = str
699 }
700 s.error(err)
701 }
702 n, err := strconv.Atoi(str[p+1:])
703 if err != nil {
704 // Put full string into error.
705 if e, ok := err.(*strconv.NumError); ok {
706 e.Num = str
707 }
708 s.error(err)
709 }
710 return math.Ldexp(f, n)
711 }
712 f, err := strconv.AtofN(str, n)
713 if err != nil {
714 s.error(err)
715 }
716 return f
717}
718
719// convertComplex converts the next token to a complex128 value.
720// The atof argument is a type-specific reader for the underlying type.
721// If we're reading complex64, atof will parse float32s and convert them
722// to float64's to avoid reproducing this code for each complex type.
723func (s *ss) scanComplex(verb int, n int) complex128 {
724 if !s.okVerb(verb, floatVerbs, "complex") {
725 return 0
726 }
727 s.skipSpace(false)
728 sreal, simag := s.complexTokens()
729 real := s.convertFloat(sreal, n/2)
730 imag := s.convertFloat(simag, n/2)
731 return complex(real, imag)
732}
733
734// convertString returns the string represented by the next input characters.
735// The format of the input is determined by the verb.
736func (s *ss) convertString(verb int) (str string) {
737 if !s.okVerb(verb, "svqx", "string") {
738 return ""
739 }
740 s.skipSpace(false)
741 switch verb {
742 case 'q':
743 str = s.quotedString()
744 case 'x':
745 str = s.hexString()
746 default:
747 str = string(s.token(true, notSpace)) // %s and %v just return the next word
748 }
749 // Empty strings other than with %q are not OK.
750 if len(str) == 0 && verb != 'q' && s.maxWid > 0 {
751 s.errorString("Scan: no data for string")
752 }
753 return
754}
755
756// quotedString returns the double- or back-quoted string represented by the next input characters.
757func (s *ss) quotedString() string {
758 quote := s.mustReadRune()
759 switch quote {
760 case '`':
761 // Back-quoted: Anything goes until EOF or back quote.
762 for {
763 rune := s.mustReadRune()
764 if rune == quote {
765 break
766 }
767 s.buf.WriteRune(rune)
768 }
769 return s.buf.String()
770 case '"':
771 // Double-quoted: Include the quotes and let strconv.Unquote do the backslash escapes.
772 s.buf.WriteRune(quote)
773 for {
774 rune := s.mustReadRune()
775 s.buf.WriteRune(rune)
776 if rune == '\\' {
777 // In a legal backslash escape, no matter how long, only the character
778 // immediately after the escape can itself be a backslash or quote.
779 // Thus we only need to protect the first character after the backslash.
780 rune := s.mustReadRune()
781 s.buf.WriteRune(rune)
782 } else if rune == '"' {
783 break
784 }
785 }
786 result, err := strconv.Unquote(s.buf.String())
787 if err != nil {
788 s.error(err)
789 }
790 return result
791 default:
792 s.errorString("expected quoted string")
793 }
794 return ""
795}
796
797// hexDigit returns the value of the hexadecimal digit
798func (s *ss) hexDigit(digit int) int {
799 switch digit {
800 case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
801 return digit - '0'
802 case 'a', 'b', 'c', 'd', 'e', 'f':
803 return 10 + digit - 'a'
804 case 'A', 'B', 'C', 'D', 'E', 'F':
805 return 10 + digit - 'A'
806 }
807 s.errorString("Scan: illegal hex digit")
808 return 0
809}
810
811// hexByte returns the next hex-encoded (two-character) byte from the input.
812// There must be either two hexadecimal digits or a space character in the input.
813func (s *ss) hexByte() (b byte, ok bool) {
814 rune1 := s.getRune()
815 if rune1 == eof {
816 return
817 }
818 if unicode.IsSpace(rune1) {
819 s.UnreadRune()
820 return
821 }
822 rune2 := s.mustReadRune()
823 return byte(s.hexDigit(rune1)<<4 | s.hexDigit(rune2)), true
824}
825
826// hexString returns the space-delimited hexpair-encoded string.
827func (s *ss) hexString() string {
828 for {
829 b, ok := s.hexByte()
830 if !ok {
831 break
832 }
833 s.buf.WriteByte(b)
834 }
835 if s.buf.Len() == 0 {
836 s.errorString("Scan: no hex data for %x string")
837 return ""
838 }
839 return s.buf.String()
840}
841
842const floatVerbs = "beEfFgGv"
843
844const hugeWid = 1 << 30
845
846// scanOne scans a single value, deriving the scanner from the type of the argument.
847func (s *ss) scanOne(verb int, field interface{}) {
848 s.buf.Reset()
849 var err os.Error
850 // If the parameter has its own Scan method, use that.
851 if v, ok := field.(Scanner); ok {
852 err = v.Scan(s, verb)
853 if err != nil {
854 if err == os.EOF {
855 err = io.ErrUnexpectedEOF
856 }
857 s.error(err)
858 }
859 return
860 }
861 switch v := field.(type) {
862 case *bool:
863 *v = s.scanBool(verb)
864 case *complex64:
865 *v = complex64(s.scanComplex(verb, 64))
866 case *complex128:
867 *v = s.scanComplex(verb, 128)
868 case *int:
869 *v = int(s.scanInt(verb, intBits))
870 case *int8:
871 *v = int8(s.scanInt(verb, 8))
872 case *int16:
873 *v = int16(s.scanInt(verb, 16))
874 case *int32:
875 *v = int32(s.scanInt(verb, 32))
876 case *int64:
877 *v = s.scanInt(verb, 64)
878 case *uint:
879 *v = uint(s.scanUint(verb, intBits))
880 case *uint8:
881 *v = uint8(s.scanUint(verb, 8))
882 case *uint16:
883 *v = uint16(s.scanUint(verb, 16))
884 case *uint32:
885 *v = uint32(s.scanUint(verb, 32))
886 case *uint64:
887 *v = s.scanUint(verb, 64)
888 case *uintptr:
889 *v = uintptr(s.scanUint(verb, uintptrBits))
890 // Floats are tricky because you want to scan in the precision of the result, not
891 // scan in high precision and convert, in order to preserve the correct error condition.
892 case *float32:
893 if s.okVerb(verb, floatVerbs, "float32") {
894 s.skipSpace(false)
895 *v = float32(s.convertFloat(s.floatToken(), 32))
896 }
897 case *float64:
898 if s.okVerb(verb, floatVerbs, "float64") {
899 s.skipSpace(false)
900 *v = s.convertFloat(s.floatToken(), 64)
901 }
902 case *string:
903 *v = s.convertString(verb)
904 case *[]byte:
905 // We scan to string and convert so we get a copy of the data.
906 // If we scanned to bytes, the slice would point at the buffer.
907 *v = []byte(s.convertString(verb))
908 default:
909 val := reflect.NewValue(v)
910 ptr, ok := val.(*reflect.PtrValue)
911 if !ok {
912 s.errorString("Scan: type not a pointer: " + val.Type().String())
913 return
914 }
915 switch v := ptr.Elem().(type) {
916 case *reflect.BoolValue:
917 v.Set(s.scanBool(verb))
918 case *reflect.IntValue:
919 v.Set(s.scanInt(verb, v.Type().Bits()))
920 case *reflect.UintValue:
921 v.Set(s.scanUint(verb, v.Type().Bits()))
922 case *reflect.StringValue:
923 v.Set(s.convertString(verb))
924 case *reflect.SliceValue:
925 // For now, can only handle (renamed) []byte.
926 typ := v.Type().(*reflect.SliceType)
927 if typ.Elem().Kind() != reflect.Uint8 {
928 goto CantHandle
929 }
930 str := s.convertString(verb)
931 v.Set(reflect.MakeSlice(typ, len(str), len(str)))
932 for i := 0; i < len(str); i++ {
933 v.Elem(i).(*reflect.UintValue).Set(uint64(str[i]))
934 }
935 case *reflect.FloatValue:
936 s.skipSpace(false)
937 v.Set(s.convertFloat(s.floatToken(), v.Type().Bits()))
938 case *reflect.ComplexValue:
939 v.Set(s.scanComplex(verb, v.Type().Bits()))
940 default:
941 CantHandle:
942 s.errorString("Scan: can't handle type: " + val.Type().String())
943 }
944 }
945}
946
947// errorHandler turns local panics into error returns. EOFs are benign.
948func errorHandler(errp *os.Error) {
949 if e := recover(); e != nil {
950 if se, ok := e.(scanError); ok { // catch local error
951 if se.err != os.EOF {
952 *errp = se.err
953 }
954 } else {
955 panic(e)
956 }
957 }
958}
959
960// doScan does the real work for scanning without a format string.
961func (s *ss) doScan(a []interface{}) (numProcessed int, err os.Error) {
962 defer errorHandler(&err)
963 for _, field := range a {
964 s.scanOne('v', field)
965 numProcessed++
966 }
967 // Check for newline if required.
968 if !s.nlIsSpace {
969 for {
970 rune := s.getRune()
971 if rune == '\n' || rune == eof {
972 break
973 }
974 if !unicode.IsSpace(rune) {
975 s.errorString("Scan: expected newline")
976 break
977 }
978 }
979 }
980 return
981}
982
983// advance determines whether the next characters in the input match
984// those of the format. It returns the number of bytes (sic) consumed
985// in the format. Newlines included, all runs of space characters in
986// either input or format behave as a single space. This routine also
987// handles the %% case. If the return value is zero, either format
988// starts with a % (with no following %) or the input is empty.
989// If it is negative, the input did not match the string.
990func (s *ss) advance(format string) (i int) {
991 for i < len(format) {
992 fmtc, w := utf8.DecodeRuneInString(format[i:])
993 if fmtc == '%' {
994 // %% acts like a real percent
995 nextc, _ := utf8.DecodeRuneInString(format[i+w:]) // will not match % if string is empty
996 if nextc != '%' {
997 return
998 }
999 i += w // skip the first %
1000 }
1001 sawSpace := false
1002 for unicode.IsSpace(fmtc) && i < len(format) {
1003 sawSpace = true
1004 i += w
1005 fmtc, w = utf8.DecodeRuneInString(format[i:])
1006 }
1007 if sawSpace {
1008 // There was space in the format, so there should be space (EOF)
1009 // in the input.
1010 inputc := s.getRune()
1011 if inputc == eof {
1012 return
1013 }
1014 if !unicode.IsSpace(inputc) {
1015 // Space in format but not in input: error
1016 s.errorString("expected space in input to match format")
1017 }
1018 s.skipSpace(true)
1019 continue
1020 }
1021 inputc := s.mustReadRune()
1022 if fmtc != inputc {
1023 s.UnreadRune()
1024 return -1
1025 }
1026 i += w
1027 }
1028 return
1029}
1030
1031// doScanf does the real work when scanning with a format string.
1032// At the moment, it handles only pointers to basic types.
1033func (s *ss) doScanf(format string, a []interface{}) (numProcessed int, err os.Error) {
1034 defer errorHandler(&err)
1035 end := len(format) - 1
1036 // We process one item per non-trivial format
1037 for i := 0; i <= end; {
1038 w := s.advance(format[i:])
1039 if w > 0 {
1040 i += w
1041 continue
1042 }
1043 // Either we failed to advance, we have a percent character, or we ran out of input.
1044 if format[i] != '%' {
1045 // Can't advance format. Why not?
1046 if w < 0 {
1047 s.errorString("input does not match format")
1048 }
1049 // Otherwise at EOF; "too many operands" error handled below
1050 break
1051 }
1052 i++ // % is one byte
1053
1054 // do we have 20 (width)?
1055 var widPresent bool
1056 s.maxWid, widPresent, i = parsenum(format, i, end)
1057 if !widPresent {
1058 s.maxWid = hugeWid
1059 }
1060 s.fieldLimit = s.limit
1061 if f := s.count + s.maxWid; f < s.fieldLimit {
1062 s.fieldLimit = f
1063 }
1064
1065 c, w := utf8.DecodeRuneInString(format[i:])
1066 i += w
1067
1068 if numProcessed >= len(a) { // out of operands
1069 s.errorString("too few operands for format %" + format[i-w:])
1070 break
1071 }
1072 field := a[numProcessed]
1073
1074 s.scanOne(c, field)
1075 numProcessed++
1076 s.fieldLimit = s.limit
1077 }
1078 if numProcessed < len(a) {
1079 s.errorString("too many operands")
1080 }
1081 return
1082}