blob: 06e59559a9636f73004d7c60a1d11611b1074456 [file] [log] [blame]
// Copyright 2015 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package number
import (
"errors"
"unicode/utf8"
)
// This file contains a parser for the CLDR number patterns as described in
// https://unicode.org/reports/tr35/tr35-numbers.html#Number_Format_Patterns.
//
// The following BNF is derived from this standard.
//
// pattern := subpattern (';' subpattern)?
// subpattern := affix? number exponent? affix?
// number := decimal | sigDigits
// decimal := '#'* '0'* ('.' fraction)? | '#' | '0'
// fraction := '0'* '#'*
// sigDigits := '#'* '@' '@'* '#'*
// exponent := 'E' '+'? '0'* '0'
// padSpec := '*' \L
//
// Notes:
// - An affix pattern may contain any runes, but runes with special meaning
// should be escaped.
// - Sequences of digits, '#', and '@' in decimal and sigDigits may have
// interstitial commas.
// TODO: replace special characters in affixes (-, +, ¤) with control codes.
// Pattern holds information for formatting numbers. It is designed to hold
// information from CLDR number patterns.
//
// This pattern is precompiled for all patterns for all languages. Even though
// the number of patterns is not very large, we want to keep this small.
//
// This type is only intended for internal use.
type Pattern struct {
RoundingContext
Affix string // includes prefix and suffix. First byte is prefix length.
Offset uint16 // Offset into Affix for prefix and suffix
NegOffset uint16 // Offset into Affix for negative prefix and suffix or 0.
PadRune rune
FormatWidth uint16
GroupingSize [2]uint8
Flags PatternFlag
}
// A RoundingContext indicates how a number should be converted to digits.
// It contains all information needed to determine the "visible digits" as
// required by the pluralization rules.
type RoundingContext struct {
// TODO: unify these two fields so that there is a more unambiguous meaning
// of how precision is handled.
MaxSignificantDigits int16 // -1 is unlimited
MaxFractionDigits int16 // -1 is unlimited
Increment uint32
IncrementScale uint8 // May differ from printed scale.
Mode RoundingMode
DigitShift uint8 // Number of decimals to shift. Used for % and ‰.
// Number of digits.
MinIntegerDigits uint8
MaxIntegerDigits uint8
MinFractionDigits uint8
MinSignificantDigits uint8
MinExponentDigits uint8
}
// RoundSignificantDigits returns the number of significant digits an
// implementation of Convert may round to or n < 0 if there is no maximum or
// a maximum is not recommended.
func (r *RoundingContext) RoundSignificantDigits() (n int) {
if r.MaxFractionDigits == 0 && r.MaxSignificantDigits > 0 {
return int(r.MaxSignificantDigits)
} else if r.isScientific() && r.MaxIntegerDigits == 1 {
if r.MaxSignificantDigits == 0 ||
int(r.MaxFractionDigits+1) == int(r.MaxSignificantDigits) {
// Note: don't add DigitShift: it is only used for decimals.
return int(r.MaxFractionDigits) + 1
}
}
return -1
}
// RoundFractionDigits returns the number of fraction digits an implementation
// of Convert may round to or n < 0 if there is no maximum or a maximum is not
// recommended.
func (r *RoundingContext) RoundFractionDigits() (n int) {
if r.MinExponentDigits == 0 &&
r.MaxSignificantDigits == 0 &&
r.MaxFractionDigits >= 0 {
return int(r.MaxFractionDigits) + int(r.DigitShift)
}
return -1
}
// SetScale fixes the RoundingContext to a fixed number of fraction digits.
func (r *RoundingContext) SetScale(scale int) {
r.MinFractionDigits = uint8(scale)
r.MaxFractionDigits = int16(scale)
}
func (r *RoundingContext) SetPrecision(prec int) {
r.MaxSignificantDigits = int16(prec)
}
func (r *RoundingContext) isScientific() bool {
return r.MinExponentDigits > 0
}
func (f *Pattern) needsSep(pos int) bool {
p := pos - 1
size := int(f.GroupingSize[0])
if size == 0 || p == 0 {
return false
}
if p == size {
return true
}
if p -= size; p < 0 {
return false
}
// TODO: make second groupingsize the same as first if 0 so that we can
// avoid this check.
if x := int(f.GroupingSize[1]); x != 0 {
size = x
}
return p%size == 0
}
// A PatternFlag is a bit mask for the flag field of a Pattern.
type PatternFlag uint8
const (
AlwaysSign PatternFlag = 1 << iota
ElideSign // Use space instead of plus sign. AlwaysSign must be true.
AlwaysExpSign
AlwaysDecimalSeparator
ParenthesisForNegative // Common pattern. Saves space.
PadAfterNumber
PadAfterAffix
PadBeforePrefix = 0 // Default
PadAfterPrefix = PadAfterAffix
PadBeforeSuffix = PadAfterNumber
PadAfterSuffix = PadAfterNumber | PadAfterAffix
PadMask = PadAfterNumber | PadAfterAffix
)
type parser struct {
*Pattern
leadingSharps int
pos int
err error
doNotTerminate bool
groupingCount uint
hasGroup bool
buf []byte
}
func (p *parser) setError(err error) {
if p.err == nil {
p.err = err
}
}
func (p *parser) updateGrouping() {
if p.hasGroup &&
0 < p.groupingCount && p.groupingCount < 255 {
p.GroupingSize[1] = p.GroupingSize[0]
p.GroupingSize[0] = uint8(p.groupingCount)
}
p.groupingCount = 0
p.hasGroup = true
}
var (
// TODO: more sensible and localizeable error messages.
errMultiplePadSpecifiers = errors.New("format: pattern has multiple pad specifiers")
errInvalidPadSpecifier = errors.New("format: invalid pad specifier")
errInvalidQuote = errors.New("format: invalid quote")
errAffixTooLarge = errors.New("format: prefix or suffix exceeds maximum UTF-8 length of 256 bytes")
errDuplicatePercentSign = errors.New("format: duplicate percent sign")
errDuplicatePermilleSign = errors.New("format: duplicate permille sign")
errUnexpectedEnd = errors.New("format: unexpected end of pattern")
)
// ParsePattern extracts formatting information from a CLDR number pattern.
//
// See https://unicode.org/reports/tr35/tr35-numbers.html#Number_Format_Patterns.
func ParsePattern(s string) (f *Pattern, err error) {
p := parser{Pattern: &Pattern{}}
s = p.parseSubPattern(s)
if s != "" {
// Parse negative sub pattern.
if s[0] != ';' {
p.setError(errors.New("format: error parsing first sub pattern"))
return nil, p.err
}
neg := parser{Pattern: &Pattern{}} // just for extracting the affixes.
s = neg.parseSubPattern(s[len(";"):])
p.NegOffset = uint16(len(p.buf))
p.buf = append(p.buf, neg.buf...)
}
if s != "" {
p.setError(errors.New("format: spurious characters at end of pattern"))
}
if p.err != nil {
return nil, p.err
}
if affix := string(p.buf); affix == "\x00\x00" || affix == "\x00\x00\x00\x00" {
// No prefix or suffixes.
p.NegOffset = 0
} else {
p.Affix = affix
}
if p.Increment == 0 {
p.IncrementScale = 0
}
return p.Pattern, nil
}
func (p *parser) parseSubPattern(s string) string {
s = p.parsePad(s, PadBeforePrefix)
s = p.parseAffix(s)
s = p.parsePad(s, PadAfterPrefix)
s = p.parse(p.number, s)
p.updateGrouping()
s = p.parsePad(s, PadBeforeSuffix)
s = p.parseAffix(s)
s = p.parsePad(s, PadAfterSuffix)
return s
}
func (p *parser) parsePad(s string, f PatternFlag) (tail string) {
if len(s) >= 2 && s[0] == '*' {
r, sz := utf8.DecodeRuneInString(s[1:])
if p.PadRune != 0 {
p.err = errMultiplePadSpecifiers
} else {
p.Flags |= f
p.PadRune = r
}
return s[1+sz:]
}
return s
}
func (p *parser) parseAffix(s string) string {
x := len(p.buf)
p.buf = append(p.buf, 0) // placeholder for affix length
s = p.parse(p.affix, s)
n := len(p.buf) - x - 1
if n > 0xFF {
p.setError(errAffixTooLarge)
}
p.buf[x] = uint8(n)
return s
}
// state implements a state transition. It returns the new state. A state
// function may set an error on the parser or may simply return on an incorrect
// token and let the next phase fail.
type state func(r rune) state
// parse repeatedly applies a state function on the given string until a
// termination condition is reached.
func (p *parser) parse(fn state, s string) (tail string) {
for i, r := range s {
p.doNotTerminate = false
if fn = fn(r); fn == nil || p.err != nil {
return s[i:]
}
p.FormatWidth++
}
if p.doNotTerminate {
p.setError(errUnexpectedEnd)
}
return ""
}
func (p *parser) affix(r rune) state {
switch r {
case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
'#', '@', '.', '*', ',', ';':
return nil
case '\'':
p.FormatWidth--
return p.escapeFirst
case '%':
if p.DigitShift != 0 {
p.setError(errDuplicatePercentSign)
}
p.DigitShift = 2
case '\u2030': // ‰ Per mille
if p.DigitShift != 0 {
p.setError(errDuplicatePermilleSign)
}
p.DigitShift = 3
// TODO: handle currency somehow: ¤, ¤¤, ¤¤¤, ¤¤¤¤
}
p.buf = append(p.buf, string(r)...)
return p.affix
}
func (p *parser) escapeFirst(r rune) state {
switch r {
case '\'':
p.buf = append(p.buf, "\\'"...)
return p.affix
default:
p.buf = append(p.buf, '\'')
p.buf = append(p.buf, string(r)...)
}
return p.escape
}
func (p *parser) escape(r rune) state {
switch r {
case '\'':
p.FormatWidth--
p.buf = append(p.buf, '\'')
return p.affix
default:
p.buf = append(p.buf, string(r)...)
}
return p.escape
}
// number parses a number. The BNF says the integer part should always have
// a '0', but that does not appear to be the case according to the rest of the
// documentation. We will allow having only '#' numbers.
func (p *parser) number(r rune) state {
switch r {
case '#':
p.groupingCount++
p.leadingSharps++
case '@':
p.groupingCount++
p.leadingSharps = 0
p.MaxFractionDigits = -1
return p.sigDigits(r)
case ',':
if p.leadingSharps == 0 { // no leading commas
return nil
}
p.updateGrouping()
case 'E':
p.MaxIntegerDigits = uint8(p.leadingSharps)
return p.exponent
case '.': // allow ".##" etc.
p.updateGrouping()
return p.fraction
case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
return p.integer(r)
default:
return nil
}
return p.number
}
func (p *parser) integer(r rune) state {
if !('0' <= r && r <= '9') {
var next state
switch r {
case 'E':
if p.leadingSharps > 0 {
p.MaxIntegerDigits = uint8(p.leadingSharps) + p.MinIntegerDigits
}
next = p.exponent
case '.':
next = p.fraction
case ',':
next = p.integer
}
p.updateGrouping()
return next
}
p.Increment = p.Increment*10 + uint32(r-'0')
p.groupingCount++
p.MinIntegerDigits++
return p.integer
}
func (p *parser) sigDigits(r rune) state {
switch r {
case '@':
p.groupingCount++
p.MaxSignificantDigits++
p.MinSignificantDigits++
case '#':
return p.sigDigitsFinal(r)
case 'E':
p.updateGrouping()
return p.normalizeSigDigitsWithExponent()
default:
p.updateGrouping()
return nil
}
return p.sigDigits
}
func (p *parser) sigDigitsFinal(r rune) state {
switch r {
case '#':
p.groupingCount++
p.MaxSignificantDigits++
case 'E':
p.updateGrouping()
return p.normalizeSigDigitsWithExponent()
default:
p.updateGrouping()
return nil
}
return p.sigDigitsFinal
}
func (p *parser) normalizeSigDigitsWithExponent() state {
p.MinIntegerDigits, p.MaxIntegerDigits = 1, 1
p.MinFractionDigits = p.MinSignificantDigits - 1
p.MaxFractionDigits = p.MaxSignificantDigits - 1
p.MinSignificantDigits, p.MaxSignificantDigits = 0, 0
return p.exponent
}
func (p *parser) fraction(r rune) state {
switch r {
case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
p.Increment = p.Increment*10 + uint32(r-'0')
p.IncrementScale++
p.MinFractionDigits++
p.MaxFractionDigits++
case '#':
p.MaxFractionDigits++
case 'E':
if p.leadingSharps > 0 {
p.MaxIntegerDigits = uint8(p.leadingSharps) + p.MinIntegerDigits
}
return p.exponent
default:
return nil
}
return p.fraction
}
func (p *parser) exponent(r rune) state {
switch r {
case '+':
// Set mode and check it wasn't already set.
if p.Flags&AlwaysExpSign != 0 || p.MinExponentDigits > 0 {
break
}
p.Flags |= AlwaysExpSign
p.doNotTerminate = true
return p.exponent
case '0':
p.MinExponentDigits++
return p.exponent
}
// termination condition
if p.MinExponentDigits == 0 {
p.setError(errors.New("format: need at least one digit"))
}
return nil
}