| // Copyright 2024 The Go Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style |
| // license that can be found in the LICENSE file. |
| |
| // Package fmtstr defines a parser for format strings as used by [fmt.Printf]. |
| package fmtstr |
| |
| import ( |
| "fmt" |
| "strconv" |
| "strings" |
| "unicode/utf8" |
| ) |
| |
| // Operation holds the parsed representation of a printf operation such as "%3.*[4]d". |
| // It is constructed by [Parse]. |
| type Operation struct { |
| Text string // full text of the operation, e.g. "%[2]*.3d" |
| Verb Verb // verb specifier, guaranteed to exist, e.g., 'd' in '%[1]d' |
| Range Range // the range of Text within the overall format string |
| Flags string // formatting flags, e.g. "-0" |
| Width Size // width specifier, e.g., '3' in '%3d' |
| Prec Size // precision specifier, e.g., '.4' in '%.4f' |
| } |
| |
| // Size describes an optional width or precision in a format operation. |
| // It may represent no value, a literal number, an asterisk, or an indexed asterisk. |
| type Size struct { |
| // At most one of these two fields is non-negative. |
| Fixed int // e.g. 4 from "%4d", otherwise -1 |
| Dynamic int // index of argument providing dynamic size (e.g. %*d or %[3]*d), otherwise -1 |
| |
| Index int // If the width or precision uses an indexed argument (e.g. 2 in %[2]*d), this is the index, otherwise -1 |
| Range Range // position of the size specifier within the operation |
| } |
| |
| // Verb represents the verb character of a format operation (e.g., 'd', 's', 'f'). |
| // It also includes positional information and any explicit argument indexing. |
| type Verb struct { |
| Verb rune |
| Range Range // positional range of the verb in the format string |
| Index int // index of an indexed argument, (e.g. 2 in %[2]d), otherwise -1 |
| ArgIndex int // argument index (0-based) associated with this verb, relative to CallExpr |
| } |
| |
| // byte offsets of format string |
| type Range struct { |
| Start, End int |
| } |
| |
| // Parse takes a format string and its index in the printf-like call, |
| // parses out all format operations, returns a slice of parsed |
| // [Operation] which describes flags, width, precision, verb, and argument indexing, |
| // or an error if parsing fails. |
| // |
| // All error messages are in predicate form ("call has a problem") |
| // so that they may be affixed into a subject ("log.Printf "). |
| // |
| // The flags will only be a subset of ['#', '0', '+', '-', ' ']. |
| // It does not perform any validation of verbs, nor the |
| // existence of corresponding arguments (obviously it can't). The provided format string may differ |
| // from the one in CallExpr, such as a concatenated string or a string |
| // referred to by the argument in the CallExpr. |
| func Parse(format string, idx int) ([]*Operation, error) { |
| if !strings.Contains(format, "%") { |
| return nil, fmt.Errorf("call has arguments but no formatting directives") |
| } |
| |
| firstArg := idx + 1 // Arguments are immediately after format string. |
| argNum := firstArg |
| var operations []*Operation |
| for i, w := 0, 0; i < len(format); i += w { |
| w = 1 |
| if format[i] != '%' { |
| continue |
| } |
| state, err := parseOperation(format[i:], firstArg, argNum) |
| if err != nil { |
| return nil, err |
| } |
| |
| state.operation.addOffset(i) |
| operations = append(operations, state.operation) |
| |
| w = len(state.operation.Text) |
| // Do not waste an argument for '%'. |
| if state.operation.Verb.Verb != '%' { |
| argNum = state.argNum + 1 |
| } |
| } |
| return operations, nil |
| } |
| |
| // Internal parsing state to operation. |
| type state struct { |
| operation *Operation |
| firstArg int // index of the first argument after the format string |
| argNum int // which argument we're expecting to format now |
| hasIndex bool // whether the argument is indexed |
| index int // the encountered index |
| indexPos int // the encountered index's offset |
| indexPending bool // whether we have an indexed argument that has not resolved |
| nbytes int // number of bytes of the format string consumed |
| } |
| |
| // parseOperation parses one format operation starting at the given substring `format`, |
| // which should begin with '%'. It returns a fully populated state or an error |
| // if the operation is malformed. The firstArg and argNum parameters help determine how |
| // arguments map to this operation. |
| // |
| // Parse sequence: '%' -> flags -> {[N]* or width} -> .{[N]* or precision} -> [N] -> verb. |
| func parseOperation(format string, firstArg, argNum int) (*state, error) { |
| state := &state{ |
| operation: &Operation{ |
| Text: format, |
| Width: Size{ |
| Fixed: -1, |
| Dynamic: -1, |
| Index: -1, |
| }, |
| Prec: Size{ |
| Fixed: -1, |
| Dynamic: -1, |
| Index: -1, |
| }, |
| }, |
| firstArg: firstArg, |
| argNum: argNum, |
| hasIndex: false, |
| index: 0, |
| indexPos: 0, |
| indexPending: false, |
| nbytes: len("%"), // There's guaranteed to be a percent sign. |
| } |
| // There may be flags. |
| state.parseFlags() |
| // There may be an index. |
| if err := state.parseIndex(); err != nil { |
| return nil, err |
| } |
| // There may be a width. |
| state.parseSize(Width) |
| // There may be a precision. |
| if err := state.parsePrecision(); err != nil { |
| return nil, err |
| } |
| // Now a verb, possibly prefixed by an index (which we may already have). |
| if !state.indexPending { |
| if err := state.parseIndex(); err != nil { |
| return nil, err |
| } |
| } |
| if state.nbytes == len(state.operation.Text) { |
| return nil, fmt.Errorf("format %s is missing verb at end of string", state.operation.Text) |
| } |
| verb, w := utf8.DecodeRuneInString(state.operation.Text[state.nbytes:]) |
| |
| // Ensure there must be a verb. |
| if state.indexPending { |
| state.operation.Verb = Verb{ |
| Verb: verb, |
| Range: Range{ |
| Start: state.indexPos, |
| End: state.nbytes + w, |
| }, |
| Index: state.index, |
| ArgIndex: state.argNum, |
| } |
| } else { |
| state.operation.Verb = Verb{ |
| Verb: verb, |
| Range: Range{ |
| Start: state.nbytes, |
| End: state.nbytes + w, |
| }, |
| Index: -1, |
| ArgIndex: state.argNum, |
| } |
| } |
| |
| state.nbytes += w |
| state.operation.Text = state.operation.Text[:state.nbytes] |
| return state, nil |
| } |
| |
| // addOffset adjusts the recorded positions in Verb, Width, Prec, and the |
| // operation's overall Range to be relative to the position in the full format string. |
| func (s *Operation) addOffset(parsedLen int) { |
| s.Verb.Range.Start += parsedLen |
| s.Verb.Range.End += parsedLen |
| |
| s.Range.Start = parsedLen |
| s.Range.End = s.Verb.Range.End |
| |
| // one of Fixed or Dynamic is non-negative means existence. |
| if s.Prec.Fixed != -1 || s.Prec.Dynamic != -1 { |
| s.Prec.Range.Start += parsedLen |
| s.Prec.Range.End += parsedLen |
| } |
| if s.Width.Fixed != -1 || s.Width.Dynamic != -1 { |
| s.Width.Range.Start += parsedLen |
| s.Width.Range.End += parsedLen |
| } |
| } |
| |
| // parseFlags accepts any printf flags. |
| func (s *state) parseFlags() { |
| s.operation.Flags = prefixOf(s.operation.Text[s.nbytes:], "#0+- ") |
| s.nbytes += len(s.operation.Flags) |
| } |
| |
| // prefixOf returns the prefix of s composed only of runes from the specified set. |
| func prefixOf(s, set string) string { |
| rest := strings.TrimLeft(s, set) |
| return s[:len(s)-len(rest)] |
| } |
| |
| // parseIndex parses an argument index of the form "[n]" that can appear |
| // in a printf operation (e.g., "%[2]d"). Returns an error if syntax is |
| // malformed or index is invalid. |
| func (s *state) parseIndex() error { |
| if s.nbytes == len(s.operation.Text) || s.operation.Text[s.nbytes] != '[' { |
| return nil |
| } |
| // Argument index present. |
| s.nbytes++ // skip '[' |
| start := s.nbytes |
| if num, ok := s.scanNum(); ok { |
| // Later consumed/stored by a '*' or verb. |
| s.index = num |
| s.indexPos = start - 1 |
| } |
| |
| ok := true |
| if s.nbytes == len(s.operation.Text) || s.nbytes == start || s.operation.Text[s.nbytes] != ']' { |
| ok = false // syntax error is either missing "]" or invalid index. |
| s.nbytes = strings.Index(s.operation.Text[start:], "]") |
| if s.nbytes < 0 { |
| return fmt.Errorf("format %s is missing closing ]", s.operation.Text) |
| } |
| s.nbytes = s.nbytes + start |
| } |
| arg32, err := strconv.ParseInt(s.operation.Text[start:s.nbytes], 10, 32) |
| if err != nil || !ok || arg32 <= 0 { |
| return fmt.Errorf("format has invalid argument index [%s]", s.operation.Text[start:s.nbytes]) |
| } |
| |
| s.nbytes++ // skip ']' |
| arg := int(arg32) |
| arg += s.firstArg - 1 // We want to zero-index the actual arguments. |
| s.argNum = arg |
| s.hasIndex = true |
| s.indexPending = true |
| return nil |
| } |
| |
| // scanNum advances through a decimal number if present, which represents a [Size] or [Index]. |
| func (s *state) scanNum() (int, bool) { |
| start := s.nbytes |
| for ; s.nbytes < len(s.operation.Text); s.nbytes++ { |
| c := s.operation.Text[s.nbytes] |
| if c < '0' || '9' < c { |
| if start < s.nbytes { |
| num, _ := strconv.ParseInt(s.operation.Text[start:s.nbytes], 10, 32) |
| return int(num), true |
| } else { |
| return 0, false |
| } |
| } |
| } |
| return 0, false |
| } |
| |
| type sizeType int |
| |
| const ( |
| Width sizeType = iota |
| Precision |
| ) |
| |
| // parseSize parses a width or precision specifier. It handles literal numeric |
| // values (e.g., "%3d"), asterisk values (e.g., "%*d"), or indexed asterisk values (e.g., "%[2]*d"). |
| func (s *state) parseSize(kind sizeType) { |
| if s.nbytes < len(s.operation.Text) && s.operation.Text[s.nbytes] == '*' { |
| s.nbytes++ |
| if s.indexPending { |
| // Absorb it. |
| s.indexPending = false |
| size := Size{ |
| Fixed: -1, |
| Dynamic: s.argNum, |
| Index: s.index, |
| Range: Range{ |
| Start: s.indexPos, |
| End: s.nbytes, |
| }, |
| } |
| switch kind { |
| case Width: |
| s.operation.Width = size |
| case Precision: |
| // Include the leading '.'. |
| size.Range.Start -= len(".") |
| s.operation.Prec = size |
| default: |
| panic(kind) |
| } |
| } else { |
| // Non-indexed asterisk: "%*d". |
| size := Size{ |
| Dynamic: s.argNum, |
| Index: -1, |
| Fixed: -1, |
| Range: Range{ |
| Start: s.nbytes - 1, |
| End: s.nbytes, |
| }, |
| } |
| switch kind { |
| case Width: |
| s.operation.Width = size |
| case Precision: |
| // For precision, include the '.' in the range. |
| size.Range.Start -= 1 |
| s.operation.Prec = size |
| default: |
| panic(kind) |
| } |
| } |
| s.argNum++ |
| } else { // Literal number, e.g. "%10d" |
| start := s.nbytes |
| if num, ok := s.scanNum(); ok { |
| size := Size{ |
| Fixed: num, |
| Index: -1, |
| Dynamic: -1, |
| Range: Range{ |
| Start: start, |
| End: s.nbytes, |
| }, |
| } |
| switch kind { |
| case Width: |
| s.operation.Width = size |
| case Precision: |
| // Include the leading '.'. |
| size.Range.Start -= 1 |
| s.operation.Prec = size |
| default: |
| panic(kind) |
| } |
| } |
| } |
| } |
| |
| // parsePrecision checks if there's a precision specified after a '.' character. |
| // If found, it may also parse an index or an asterisk. Returns an error if any index |
| // parsing fails. |
| func (s *state) parsePrecision() error { |
| // If there's a period, there may be a precision. |
| if s.nbytes < len(s.operation.Text) && s.operation.Text[s.nbytes] == '.' { |
| s.nbytes++ |
| if err := s.parseIndex(); err != nil { |
| return err |
| } |
| s.parseSize(Precision) |
| } |
| return nil |
| } |