message: factor out format string parsing
We need the parser outside of message.
The plan is to move it to an internal package.
Change-Id: I89e8ee940a940634d99548b01583a61de4c60461
Reviewed-on: https://go-review.googlesource.com/79235
Run-TryBot: Marcel van Lohuizen <mpvl@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Nigel Tao <nigeltao@golang.org>
diff --git a/message/format.go b/message/format.go
index d3340d1..d5534ae 100644
--- a/message/format.go
+++ b/message/format.go
@@ -22,6 +22,8 @@
// flags placed in a separate struct for easy clearing.
type fmtFlags struct {
+ verb rune
+
widPresent bool
precPresent bool
minus bool
@@ -35,8 +37,48 @@
// different, flagless formats set at the top level.
plusV bool
sharpV bool
+
+ wid int // width
+ prec int // precision
+
+ // retain arguments across calls.
+ args []interface{}
+ // retain current argument number across calls
+ argNum int
+
+ // reordered records whether the format string used argument reordering.
+ reordered bool
+ // goodArgNum records whether the most recent reordering directive was valid.
+ goodArgNum bool
+
+ // position info
+ format string
+ startPos int
+ endPos int
+ state state
}
+func (p *fmtFlags) Text() string { return p.format[p.startPos:p.endPos] }
+
+func (p *fmtFlags) init(format string) {
+ p.format = format
+ p.startPos = 0
+ p.endPos = 0
+ p.argNum = 0
+}
+
+type state int
+
+const (
+ text state = iota
+ substitution
+ badWidth
+ badPrec
+ noVerb
+ badArgNum
+ missingArg
+)
+
// A formatInfo is the raw formatter used by Printf etc.
// It prints into a buffer that must be set up separately.
type formatInfo struct {
@@ -44,16 +86,22 @@
fmtFlags
- wid int // width
- prec int // precision
-
// intbuf is large enough to store %b of an int64 with a sign and
// avoids padding at the end of the struct on 32 bit architectures.
intbuf [68]byte
}
-func (f *formatInfo) clearflags() {
- f.fmtFlags = fmtFlags{}
+func (f *fmtFlags) clearflags() {
+ f.widPresent = false
+ f.precPresent = false
+ f.minus = false
+ f.plus = false
+ f.sharp = false
+ f.space = false
+ f.zero = false
+
+ f.plusV = false
+ f.sharpV = false
}
func (f *formatInfo) init(buf *bytes.Buffer) {
diff --git a/message/message.go b/message/message.go
index ba4f95a..db80e47 100644
--- a/message/message.go
+++ b/message/message.go
@@ -120,7 +120,7 @@
func lookupAndFormat(p *Printer, r Reference, a []interface{}) {
p.printer.reset()
- p.printer.args = a
+ p.printer.fmt.args = a
var id, msg string
switch v := r.(type) {
case string:
@@ -142,8 +142,8 @@
// Arg implements catmsg.Renderer.
func (p *printer) Arg(i int) interface{} { // TODO, also return "ok" bool
i--
- if uint(i) < uint(len(p.args)) {
- return p.args[i]
+ if uint(i) < uint(len(p.fmt.args)) {
+ return p.fmt.args[i]
}
return nil
}
diff --git a/message/print.go b/message/print.go
index 5819cba..4d80d51 100644
--- a/message/print.go
+++ b/message/print.go
@@ -48,10 +48,6 @@
// buffer for accumulating output.
bytes.Buffer
- // retain arguments across calls.
- args []interface{}
- // retain current argument number across calls
- argNum int
// arg holds the current item, as an interface{}.
arg interface{}
// value is used instead of arg for reflect values.
@@ -60,10 +56,6 @@
// fmt is used to format basic items such as integers or strings.
fmt formatInfo
- // reordered records whether the format string used argument reordering.
- reordered bool
- // goodArgNum records whether the most recent reordering directive was valid.
- goodArgNum bool
// panicking is set by catchPanic to avoid infinite panic, recover, panic, ... recursion.
panicking bool
// erroring is set when printing an error string to guard against calling handleMethods.
@@ -75,8 +67,9 @@
func (p *printer) reset() {
p.Buffer.Reset()
- p.argNum = 0
- p.reordered = false
+ p.fmt.argNum = 0
+ p.fmt.startPos = 0
+ p.fmt.reordered = false
p.panicking = false
p.erroring = false
p.fmt.init(&p.Buffer)
@@ -911,7 +904,7 @@
}
// intFromArg gets the argNumth element of a. On return, isInt reports whether the argument has integer type.
-func (p *printer) intFromArg() (num int, isInt bool) {
+func (p *fmtFlags) intFromArg() (num int, isInt bool) {
if p.argNum < len(p.args) {
arg := p.args[p.argNum]
num, isInt = arg.(int) // Almost always OK.
@@ -971,7 +964,7 @@
// updateArgNumber returns the next argument to evaluate, which is either the value of the passed-in
// argNum or the value of the bracketed integer that begins format[i:]. It also returns
// the new value of i, that is, the index of the next byte of the format to process.
-func (p *printer) updateArgNumber(format string, i int) (newi int, found bool) {
+func (p *fmtFlags) updateArgNumber(format string, i int) (newi int, found bool) {
if len(format) <= i || format[i] != '[' {
return i, false
}
@@ -997,151 +990,184 @@
p.WriteString(missingString)
}
-func (p *printer) doPrintf(format string) {
+func (p *fmtFlags) Scan() bool {
+ p.state = text
+ format := p.format
end := len(format)
+ if p.endPos >= end {
+ return false
+ }
afterIndex := false // previous item in format was an index like [3].
-formatLoop:
- for i := 0; i < end; {
- p.goodArgNum = true
- lasti := i
- for i < end && format[i] != '%' {
- i++
- }
- if i > lasti {
- p.WriteString(format[lasti:i])
- }
- if i >= end {
- // done processing format string
- break
- }
- // Process one verb
+ p.startPos = p.endPos
+ p.goodArgNum = true
+ i := p.startPos
+ for i < end && format[i] != '%' {
i++
+ }
+ if i > p.startPos {
+ p.endPos = i
+ return true
+ }
+ // Process one verb
+ i++
- // Do we have flags?
- p.fmt.clearflags()
- simpleFormat:
- for ; i < end; i++ {
- c := format[i]
- switch c {
- case '#':
- p.fmt.sharp = true
- case '0':
- p.fmt.zero = !p.fmt.minus // Only allow zero padding to the left.
- case '+':
- p.fmt.plus = true
- case '-':
- p.fmt.minus = true
- p.fmt.zero = false // Do not pad with zeros to the right.
- case ' ':
- p.fmt.space = true
- default:
- // Fast path for common case of ascii lower case simple verbs
- // without precision or width or argument indices.
- if 'a' <= c && c <= 'z' && p.argNum < len(p.args) {
- if c == 'v' {
- // Go syntax
- p.fmt.sharpV = p.fmt.sharp
- p.fmt.sharp = false
- // Struct-field syntax
- p.fmt.plusV = p.fmt.plus
- p.fmt.plus = false
- }
- p.printArg(p.Arg(p.argNum+1), rune(c))
- p.argNum++
- i++
- continue formatLoop
+ p.state = substitution
+
+ // Do we have flags?
+ p.clearflags()
+
+simpleFormat:
+ for ; i < end; i++ {
+ c := p.format[i]
+ switch c {
+ case '#':
+ p.sharp = true
+ case '0':
+ p.zero = !p.minus // Only allow zero padding to the left.
+ case '+':
+ p.plus = true
+ case '-':
+ p.minus = true
+ p.zero = false // Do not pad with zeros to the right.
+ case ' ':
+ p.space = true
+ default:
+ // Fast path for common case of ascii lower case simple verbs
+ // without precision or width or argument indices.
+ if 'a' <= c && c <= 'z' && p.argNum < len(p.args) {
+ if c == 'v' {
+ // Go syntax
+ p.sharpV = p.sharp
+ p.sharp = false
+ // Struct-field syntax
+ p.plusV = p.plus
+ p.plus = false
}
- // Format is more complex than simple flags and a verb or is malformed.
- break simpleFormat
+ p.verb = rune(c)
+ p.argNum++
+ p.endPos = i + 1
+ return true
}
+ // Format is more complex than simple flags and a verb or is malformed.
+ break simpleFormat
+ }
+ }
+
+ // Do we have an explicit argument index?
+ i, afterIndex = p.updateArgNumber(format, i)
+
+ // Do we have width?
+ if i < end && format[i] == '*' {
+ i++
+ p.wid, p.widPresent = p.intFromArg()
+
+ if !p.widPresent {
+ p.state = badWidth
}
- // Do we have an explicit argument index?
- i, afterIndex = p.updateArgNumber(format, i)
+ // We have a negative width, so take its value and ensure
+ // that the minus flag is set
+ if p.wid < 0 {
+ p.wid = -p.wid
+ p.minus = true
+ p.zero = false // Do not pad with zeros to the right.
+ }
+ afterIndex = false
+ } else {
+ p.wid, p.widPresent, i = parsenum(format, i, end)
+ if afterIndex && p.widPresent { // "%[3]2d"
+ p.goodArgNum = false
+ }
+ }
- // Do we have width?
+ // Do we have precision?
+ if i+1 < end && format[i] == '.' {
+ i++
+ if afterIndex { // "%[3].2d"
+ p.goodArgNum = false
+ }
+ i, afterIndex = p.updateArgNumber(format, i)
if i < end && format[i] == '*' {
i++
- p.fmt.wid, p.fmt.widPresent = p.intFromArg()
-
- if !p.fmt.widPresent {
- p.WriteString(badWidthString)
+ p.prec, p.precPresent = p.intFromArg()
+ // Negative precision arguments don't make sense
+ if p.prec < 0 {
+ p.prec = 0
+ p.precPresent = false
}
-
- // We have a negative width, so take its value and ensure
- // that the minus flag is set
- if p.fmt.wid < 0 {
- p.fmt.wid = -p.fmt.wid
- p.fmt.minus = true
- p.fmt.zero = false // Do not pad with zeros to the right.
+ if !p.precPresent {
+ p.state = badPrec
}
afterIndex = false
} else {
- p.fmt.wid, p.fmt.widPresent, i = parsenum(format, i, end)
- if afterIndex && p.fmt.widPresent { // "%[3]2d"
- p.goodArgNum = false
+ p.prec, p.precPresent, i = parsenum(format, i, end)
+ if !p.precPresent {
+ p.prec = 0
+ p.precPresent = true
}
}
+ }
- // Do we have precision?
- if i+1 < end && format[i] == '.' {
- i++
- if afterIndex { // "%[3].2d"
- p.goodArgNum = false
- }
- i, afterIndex = p.updateArgNumber(format, i)
- if i < end && format[i] == '*' {
- i++
- p.fmt.prec, p.fmt.precPresent = p.intFromArg()
- // Negative precision arguments don't make sense
- if p.fmt.prec < 0 {
- p.fmt.prec = 0
- p.fmt.precPresent = false
- }
- if !p.fmt.precPresent {
- p.WriteString(badPrecString)
- }
- afterIndex = false
- } else {
- p.fmt.prec, p.fmt.precPresent, i = parsenum(format, i, end)
- if !p.fmt.precPresent {
- p.fmt.prec = 0
- p.fmt.precPresent = true
- }
- }
- }
+ if !afterIndex {
+ i, afterIndex = p.updateArgNumber(format, i)
+ }
- if !afterIndex {
- i, afterIndex = p.updateArgNumber(format, i)
- }
+ if i >= end {
+ p.endPos = i
+ p.state = noVerb
+ return true
+ }
- if i >= end {
+ verb, w := utf8.DecodeRuneInString(format[i:])
+ p.endPos = i + w
+ p.verb = verb
+
+ switch {
+ case verb == '%': // Percent does not absorb operands and ignores f.wid and f.prec.
+ p.startPos = p.endPos - 1
+ p.state = text
+ case !p.goodArgNum:
+ p.state = badArgNum
+ case p.argNum >= len(p.args): // No argument left over to print for the current verb.
+ p.state = missingArg
+ case verb == 'v':
+ // Go syntax
+ p.sharpV = p.sharp
+ p.sharp = false
+ // Struct-field syntax
+ p.plusV = p.plus
+ p.plus = false
+ fallthrough
+ default:
+ p.argNum++
+ }
+ return true
+}
+
+func (p *printer) doPrintf(format string) {
+ p.fmt.fmtFlags.init(format)
+
+ for p.fmt.Scan() {
+ switch p.fmt.state {
+ case text:
+ p.WriteString(p.fmt.Text())
+ case substitution:
+ p.printArg(p.Arg(p.fmt.argNum), p.fmt.verb)
+ case badWidth:
+ p.WriteString(badWidthString)
+ p.printArg(p.Arg(p.fmt.argNum), p.fmt.verb)
+ case badPrec:
+ p.WriteString(badPrecString)
+ p.printArg(p.Arg(p.fmt.argNum), p.fmt.verb)
+ case noVerb:
p.WriteString(noVerbString)
- break
- }
-
- verb, w := utf8.DecodeRuneInString(format[i:])
- i += w
-
- switch {
- case verb == '%': // Percent does not absorb operands and ignores f.wid and f.prec.
- p.WriteByte('%')
- case !p.goodArgNum:
- p.badArgNum(verb)
- case p.argNum >= len(p.args): // No argument left over to print for the current verb.
- p.missingArg(verb)
- case verb == 'v':
- // Go syntax
- p.fmt.sharpV = p.fmt.sharp
- p.fmt.sharp = false
- // Struct-field syntax
- p.fmt.plusV = p.fmt.plus
- p.fmt.plus = false
- fallthrough
+ case badArgNum:
+ p.badArgNum(p.fmt.verb)
+ case missingArg:
+ p.missingArg(p.fmt.verb)
default:
- p.printArg(p.args[p.argNum], verb)
- p.argNum++
+ panic("unreachable")
}
}
@@ -1149,10 +1175,10 @@
// argument. Note that this behavior is necessarily different from fmt:
// different variants of messages may opt to drop some or all of the
// arguments.
- if !p.reordered && p.argNum < len(p.args) && p.argNum != 0 {
+ if !p.fmt.reordered && p.fmt.argNum < len(p.fmt.args) && p.fmt.argNum != 0 {
p.fmt.clearflags()
p.WriteString(extraString)
- for i, arg := range p.args[p.argNum:] {
+ for i, arg := range p.fmt.args[p.fmt.argNum:] {
if i > 0 {
p.WriteString(commaSpaceString)
}
@@ -1160,7 +1186,7 @@
p.WriteString(nilAngleString)
} else {
p.WriteString(reflect.TypeOf(arg).String())
- p.WriteByte('=')
+ p.WriteString("=")
p.printArg(arg, 'v')
}
}