math/big: permit internal nat.scan to accept decimal point
This will simplify parsing of rational and (eventually) floating point numbers.
Also streamlined inner loop. As a result, scan runs faster for all but short
(<= 10 digit) numbers. For short numbers it is < 10% slower (cause is known
and could be addressed in a future CL).
Minor unrelated cleanups. Added additional tests.
benchmark old ns/op new ns/op delta
BenchmarkScanPi 134465 125122 -6.95%
BenchmarkScan10Base2 493 540 +9.53%
BenchmarkScan100Base2 3608 3244 -10.09%
BenchmarkScan1000Base2 35376 32377 -8.48%
BenchmarkScan10000Base2 481504 450028 -6.54%
BenchmarkScan100000Base2 17936774 17648080 -1.61%
BenchmarkScan10Base8 258 280 +8.53%
BenchmarkScan100Base8 1389 1323 -4.75%
BenchmarkScan1000Base8 14221 13036 -8.33%
BenchmarkScan10000Base8 271298 258993 -4.54%
BenchmarkScan100000Base8 15715465 15672580 -0.27%
BenchmarkScan10Base10 249 268 +7.63%
BenchmarkScan100Base10 1324 1220 -7.85%
BenchmarkScan1000Base10 13398 12234 -8.69%
BenchmarkScan10000Base10 259157 249342 -3.79%
BenchmarkScan100000Base10 15670613 15582409 -0.56%
BenchmarkScan10Base16 231 251 +8.66%
BenchmarkScan100Base16 1093 1065 -2.56%
BenchmarkScan1000Base16 12687 12196 -3.87%
BenchmarkScan10000Base16 282349 271443 -3.86%
BenchmarkScan100000Base16 16742669 16552917 -1.13%
Change-Id: I4b9b078792788aef872b307399f00ffd34903127
Reviewed-on: https://go-review.googlesource.com/2960
Reviewed-by: Alan Donovan <adonovan@google.com>
diff --git a/src/math/big/nat.go b/src/math/big/nat.go
index bf00b88..4d65c5f 100644
--- a/src/math/big/nat.go
+++ b/src/math/big/nat.go
@@ -68,7 +68,7 @@
func (z nat) make(n int) nat {
if n <= cap(z) {
- return z[0:n] // reuse z
+ return z[:n] // reuse z
}
// Choosing a good value for e has significant performance impact
// because it increases the chance that a value can be reused.
@@ -78,7 +78,7 @@
func (z nat) setWord(x Word) nat {
if x == 0 {
- return z.make(0)
+ return z[:0]
}
z = z.make(1)
z[0] = x
@@ -122,7 +122,7 @@
return z.add(y, x)
case m == 0:
// n == 0 because m >= n; result is 0
- return z.make(0)
+ return z[:0]
case n == 0:
// result is x
return z.set(x)
@@ -148,7 +148,7 @@
panic("underflow")
case m == 0:
// n == 0 because m >= n; result is 0
- return z.make(0)
+ return z[:0]
case n == 0:
// result is x
return z.set(x)
@@ -384,7 +384,7 @@
case m < n:
return z.mul(y, x)
case m == 0 || n == 0:
- return z.make(0)
+ return z[:0]
case n == 1:
return z.mulAddWW(x, y[0], 0)
}
@@ -488,7 +488,7 @@
q = z.set(x) // result is x
return
case m == 0:
- q = z.make(0) // result is 0
+ q = z[:0] // result is 0
return
}
// m > 0
@@ -504,7 +504,7 @@
}
if u.cmp(v) < 0 {
- q = z.make(0)
+ q = z[:0]
r = z2.set(u)
return
}
@@ -543,7 +543,7 @@
u = nil // u is an alias for uIn or v - cannot reuse
}
u = u.make(len(uIn) + 1)
- u.clear()
+ u.clear() // TODO(gri) no need to clear if we allocated a new u
// D1.
shift := leadingZeros(v[n-1])
@@ -607,51 +607,82 @@
}
// MaxBase is the largest number base accepted for string conversions.
-const MaxBase = 'z' - 'a' + 10 + 1 // = hexValue('z') + 1
+const MaxBase = 'z' - 'a' + 10 + 1
-func hexValue(ch rune) Word {
- d := int(MaxBase + 1) // invalid base
- switch {
- case '0' <= ch && ch <= '9':
- d = int(ch - '0')
- case 'a' <= ch && ch <= 'z':
- d = int(ch - 'a' + 10)
- case 'A' <= ch && ch <= 'Z':
- d = int(ch - 'A' + 10)
+// maxPow returns (b**n, n) such that b**n is the largest power b**n <= _M.
+// For instance maxPow(10) == (1e19, 19) for 19 decimal digits in a 64bit Word.
+// In other words, at most n digits in base b fit into a Word.
+// TODO(gri) replace this with a table, generated at build time.
+func maxPow(b Word) (p Word, n int) {
+ p, n = b, 1 // assuming b <= _M
+ for max := _M / b; p <= max; {
+ // p == b**n && p <= max
+ p *= b
+ n++
}
- return Word(d)
+ // p == b**n && p <= _M
+ return
}
-// scan sets z to the natural number corresponding to the longest possible prefix
-// read from r representing an unsigned integer in a given conversion base.
-// It returns z, the actual conversion base used, and an error, if any. In the
-// error case, the value of z is undefined. The syntax follows the syntax of
-// unsigned integer literals in Go.
+// pow returns x**n for n > 0, and 1 otherwise.
+func pow(x Word, n int) (p Word) {
+ p = 1
+ for n > 0 {
+ if n&1 != 0 {
+ p *= x
+ }
+ x *= x
+ n >>= 1
+ }
+ return
+}
+
+// scan scans the number corresponding to the longest possible prefix
+// from r representing an unsigned number in a given conversion base.
+// It returns the corresponding natural number res, the actual base b,
+// a digit count, and an error err, if any.
//
-// The base argument must be 0 or a value from 2 through MaxBase. If the base
-// is 0, the string prefix determines the actual conversion base. A prefix of
-// ``0x'' or ``0X'' selects base 16; the ``0'' prefix selects base 8, and a
-// ``0b'' or ``0B'' prefix selects base 2. Otherwise the selected base is 10.
+// number = [ prefix ] digits | digits "." [ digits ] | "." digits .
+// prefix = "0" [ "x" | "X" | "b" | "B" ] .
+// digits = digit { digit } .
+// digit = "0" ... "9" | "a" ... "z" | "A" ... "Z" .
//
-func (z nat) scan(r io.RuneScanner, base int) (nat, int, error) {
- // reject invalid bases
- if base < 0 || base == 1 || MaxBase < base {
- return z, 0, errors.New("invalid number base")
+// The base argument must be a value between 0 and MaxBase (inclusive).
+// For base 0, the number prefix determines the actual base: A prefix of
+// ``0x'' or ``0X'' selects base 16; the ``0'' prefix selects base 8, and
+// a ``0b'' or ``0B'' prefix selects base 2. Otherwise the selected base
+// is 10 and no prefix is permitted.
+//
+// Base argument 1 selects actual base 10 but also enables scanning a number
+// with a decimal point.
+//
+// A result digit count > 0 corresponds to the number of (non-prefix) digits
+// parsed. A digit count <= 0 indicates the presence of a decimal point (for
+// base == 1, only), and the number of fractional digits is -count. In this
+// case, the value of the scanned number is res * 10**count.
+//
+func (z nat) scan(r io.RuneScanner, base int) (res nat, b, count int, err error) {
+ // reject illegal bases
+ if base < 0 || base > MaxBase {
+ err = errors.New("illegal number base")
+ return
}
// one char look-ahead
ch, _, err := r.ReadRune()
if err != nil {
- return z, 0, err
+ return
}
- // determine base if necessary
- b := Word(base)
- if base == 0 {
+ // determine actual base
+ switch base {
+ case 0:
+ // actual base is 10 unless there's a base prefix
b = 10
if ch == '0' {
switch ch, _, err = r.ReadRune(); err {
case nil:
+ // possibly one of 0x, 0X, 0b, 0B
b = 8
switch ch {
case 'x', 'X':
@@ -661,62 +692,120 @@
}
if b == 2 || b == 16 {
if ch, _, err = r.ReadRune(); err != nil {
- return z, 0, err
+ // io.EOF is also an error in this case
+ return
}
}
case io.EOF:
- return z.make(0), 10, nil
+ // input is "0"
+ res = z[:0]
+ count = 1
+ err = nil
+ return
default:
- return z, 10, err
+ // read error
+ return
}
}
+ case 1:
+ // actual base is 10 and decimal point is permitted
+ b = 10
+ default:
+ b = base
}
// convert string
- // - group as many digits d as possible together into a "super-digit" dd with "super-base" bb
- // - only when bb does not fit into a word anymore, do a full number mulAddWW using bb and dd
- z = z.make(0)
- bb := Word(1)
- dd := Word(0)
- for max := _M / b; ; {
- d := hexValue(ch)
- if d >= b {
+ // Algorithm: Collect digits in groups of at most n digits in di
+ // and then use mulAddWW for every such group to add them to the
+ // result.
+ z = z[:0]
+ b1 := Word(b)
+ bn, n := maxPow(b1) // at most n digits in base b1 fit into Word
+ di := Word(0) // 0 <= di < b1**i < bn
+ i := 0 // 0 <= i < n
+ dp := -1 // position of decimal point
+ for {
+ if base == 1 && ch == '.' {
+ base = 10 // no 2nd decimal point permitted
+ dp = count
+ // advance
+ if ch, _, err = r.ReadRune(); err != nil {
+ if err == io.EOF {
+ err = nil
+ break
+ }
+ return
+ }
+ }
+
+ // convert rune into digit value d1
+ var d1 Word
+ switch {
+ case '0' <= ch && ch <= '9':
+ d1 = Word(ch - '0')
+ case 'a' <= ch && ch <= 'z':
+ d1 = Word(ch - 'a' + 10)
+ case 'A' <= ch && ch <= 'Z':
+ d1 = Word(ch - 'A' + 10)
+ default:
+ d1 = MaxBase + 1
+ }
+ if d1 >= b1 {
r.UnreadRune() // ch does not belong to number anymore
break
}
+ count++
- if bb <= max {
- bb *= b
- dd = dd*b + d
- } else {
- // bb * b would overflow
- z = z.mulAddWW(z, bb, dd)
- bb = b
- dd = d
+ // collect d1 in di
+ di = di*b1 + d1
+ i++
+
+ // if di is "full", add it to the result
+ if i == n {
+ z = z.mulAddWW(z, bn, di)
+ di = 0
+ i = 0
}
+ // advance
if ch, _, err = r.ReadRune(); err != nil {
- if err != io.EOF {
- return z, int(b), err
+ if err == io.EOF {
+ err = nil
+ break
}
- break
+ return
}
}
- switch {
- case bb > 1:
- // there was at least one mantissa digit
- z = z.mulAddWW(z, bb, dd)
- case base == 0 && b == 8:
- // there was only the octal prefix 0 (possibly followed by digits > 7);
- // return base 10, not 8
- return z, 10, nil
- case base != 0 || b != 8:
- // there was neither a mantissa digit nor the octal prefix 0
- return z, int(b), errors.New("syntax error scanning number")
+ if count == 0 {
+ // no digits found
+ switch {
+ case base == 0 && b == 8:
+ // there was only the octal prefix 0 (possibly followed by digits > 7);
+ // count as one digit and return base 10, not 8
+ count = 1
+ b = 10
+ case base != 0 || b != 8:
+ // there was neither a mantissa digit nor the octal prefix 0
+ err = errors.New("syntax error scanning number")
+ }
+ return
+ }
+ // count > 0
+
+ // add remaining digits to result
+ if i > 0 {
+ z = z.mulAddWW(z, pow(b1, i), di)
+ }
+ res = z.norm()
+
+ // adjust for fraction, if any
+ if dp >= 0 {
+ // 0 <= dp <= count > 0
+ count = dp - count
}
- return z.norm(), int(b), nil
+ return
}
// Character sets for string conversion.
@@ -799,14 +888,7 @@
}
} else {
- // determine "big base"; i.e., the largest possible value bb
- // that is a power of base b and still fits into a Word
- // (as in 10^19 for 19 decimal digits in a 64bit Word)
- bb := b // big base is b**ndigits
- ndigits := 1 // number of base b digits
- for max := Word(_M / b); bb <= max; bb *= b {
- ndigits++ // maximize ndigits where bb = b**ndigits, bb <= _M
- }
+ bb, ndigits := maxPow(Word(b))
// construct table of successive squares of bb*leafSize to use in subdivisions
// result (table != nil) <=> (len(x) > leafSize > 0)
@@ -1047,7 +1129,7 @@
func (z nat) shl(x nat, s uint) nat {
m := len(x)
if m == 0 {
- return z.make(0)
+ return z[:0]
}
// m > 0
@@ -1064,7 +1146,7 @@
m := len(x)
n := m - int(s/_W)
if n <= 0 {
- return z.make(0)
+ return z[:0]
}
// n > 0
@@ -1103,12 +1185,14 @@
panic("set bit is not 0 or 1")
}
-func (z nat) bit(i uint) uint {
- j := int(i / _W)
- if j >= len(z) {
+// bit returns the value of the i'th bit, with lsb == bit 0.
+func (x nat) bit(i uint) uint {
+ j := i / _W
+ if j >= uint(len(x)) {
return 0
}
- return uint(z[j] >> (i % _W) & 1)
+ // 0 <= j < len(x)
+ return uint(x[j] >> (i % _W) & 1)
}
func (z nat) and(x, y nat) nat {