Robert Griesemer | db3bf9c | 2009-08-14 11:53:27 -0700 | [diff] [blame] | 1 | // Copyright 2009 The Go Authors. All rights reserved. |
| 2 | // Use of this source code is governed by a BSD-style |
| 3 | // license that can be found in the LICENSE file. |
| 4 | |
Robert Griesemer | 3d4cd14 | 2015-09-25 22:26:52 -0700 | [diff] [blame] | 5 | // This file implements unsigned multi-precision integers (natural |
| 6 | // numbers). They are the building blocks for the implementation |
| 7 | // of signed integers, rationals, and floating-point numbers. |
Robert Griesemer | db3bf9c | 2009-08-14 11:53:27 -0700 | [diff] [blame] | 8 | |
Robert Griesemer | 3d4cd14 | 2015-09-25 22:26:52 -0700 | [diff] [blame] | 9 | package big |
Nigel Tao | 6a186d3 | 2011-04-20 09:57:05 +1000 | [diff] [blame] | 10 | |
Aliaksandr Valialkin | 187afde | 2016-04-04 19:28:15 +0300 | [diff] [blame] | 11 | import ( |
| 12 | "math/rand" |
| 13 | "sync" |
| 14 | ) |
Adam Langley | 1941855 | 2009-11-11 13:21:37 -0800 | [diff] [blame] | 15 | |
Robert Griesemer | db3bf9c | 2009-08-14 11:53:27 -0700 | [diff] [blame] | 16 | // An unsigned integer x of the form |
| 17 | // |
| 18 | // x = x[n-1]*_B^(n-1) + x[n-2]*_B^(n-2) + ... + x[1]*_B + x[0] |
| 19 | // |
| 20 | // with 0 <= x[i] < _B and 0 <= i < n is stored in a slice of length n, |
| 21 | // with the digits x[i] as the slice elements. |
| 22 | // |
| 23 | // A number is normalized if the slice contains no leading 0 digits. |
| 24 | // During arithmetic operations, denormalized values may occur but are |
| 25 | // always normalized before returning the final result. The normalized |
| 26 | // representation of 0 is the empty or nil slice (length = 0). |
Robert Griesemer | 696ced5 | 2011-10-21 14:11:36 -0700 | [diff] [blame] | 27 | // |
Evan Shaw | 841a32d | 2010-04-22 16:57:29 -0700 | [diff] [blame] | 28 | type nat []Word |
Robert Griesemer | e587422 | 2009-08-15 11:43:54 -0700 | [diff] [blame] | 29 | |
Robert Griesemer | b218370 | 2010-04-27 19:16:08 -0700 | [diff] [blame] | 30 | var ( |
| 31 | natOne = nat{1} |
| 32 | natTwo = nat{2} |
Evan Shaw | 5ac88f4 | 2010-05-21 16:14:55 -0700 | [diff] [blame] | 33 | natTen = nat{10} |
Robert Griesemer | b218370 | 2010-04-27 19:16:08 -0700 | [diff] [blame] | 34 | ) |
| 35 | |
Robert Griesemer | 52cc058 | 2010-05-08 13:52:36 -0700 | [diff] [blame] | 36 | func (z nat) clear() { |
Robert Griesemer | b218370 | 2010-04-27 19:16:08 -0700 | [diff] [blame] | 37 | for i := range z { |
| 38 | z[i] = 0 |
| 39 | } |
Robert Griesemer | b218370 | 2010-04-27 19:16:08 -0700 | [diff] [blame] | 40 | } |
| 41 | |
Evan Shaw | 841a32d | 2010-04-22 16:57:29 -0700 | [diff] [blame] | 42 | func (z nat) norm() nat { |
Robert Griesemer | 5a1d332 | 2009-12-15 15:33:31 -0800 | [diff] [blame] | 43 | i := len(z) |
Robert Griesemer | db3bf9c | 2009-08-14 11:53:27 -0700 | [diff] [blame] | 44 | for i > 0 && z[i-1] == 0 { |
Robert Griesemer | 40621d5 | 2009-11-09 12:07:39 -0800 | [diff] [blame] | 45 | i-- |
Robert Griesemer | db3bf9c | 2009-08-14 11:53:27 -0700 | [diff] [blame] | 46 | } |
Robert Griesemer | 52cc058 | 2010-05-08 13:52:36 -0700 | [diff] [blame] | 47 | return z[0:i] |
Robert Griesemer | db3bf9c | 2009-08-14 11:53:27 -0700 | [diff] [blame] | 48 | } |
| 49 | |
Robert Griesemer | 52cc058 | 2010-05-08 13:52:36 -0700 | [diff] [blame] | 50 | func (z nat) make(n int) nat { |
| 51 | if n <= cap(z) { |
Robert Griesemer | 4e0618c | 2015-01-15 18:38:25 -0800 | [diff] [blame] | 52 | return z[:n] // reuse z |
Robert Griesemer | db3bf9c | 2009-08-14 11:53:27 -0700 | [diff] [blame] | 53 | } |
Robert Griesemer | 52cc058 | 2010-05-08 13:52:36 -0700 | [diff] [blame] | 54 | // Choosing a good value for e has significant performance impact |
| 55 | // because it increases the chance that a value can be reused. |
| 56 | const e = 4 // extra capacity |
| 57 | return make(nat, n, n+e) |
Robert Griesemer | db3bf9c | 2009-08-14 11:53:27 -0700 | [diff] [blame] | 58 | } |
| 59 | |
Robert Griesemer | a688eb6 | 2010-05-19 09:36:50 -0700 | [diff] [blame] | 60 | func (z nat) setWord(x Word) nat { |
Robert Griesemer | db3bf9c | 2009-08-14 11:53:27 -0700 | [diff] [blame] | 61 | if x == 0 { |
Robert Griesemer | 4e0618c | 2015-01-15 18:38:25 -0800 | [diff] [blame] | 62 | return z[:0] |
Robert Griesemer | db3bf9c | 2009-08-14 11:53:27 -0700 | [diff] [blame] | 63 | } |
Robert Griesemer | a688eb6 | 2010-05-19 09:36:50 -0700 | [diff] [blame] | 64 | z = z.make(1) |
| 65 | z[0] = x |
| 66 | return z |
| 67 | } |
Robert Griesemer | db3bf9c | 2009-08-14 11:53:27 -0700 | [diff] [blame] | 68 | |
Robert Griesemer | a688eb6 | 2010-05-19 09:36:50 -0700 | [diff] [blame] | 69 | func (z nat) setUint64(x uint64) nat { |
Robert Griesemer | db3bf9c | 2009-08-14 11:53:27 -0700 | [diff] [blame] | 70 | // single-digit values |
Robert Griesemer | a688eb6 | 2010-05-19 09:36:50 -0700 | [diff] [blame] | 71 | if w := Word(x); uint64(w) == x { |
| 72 | return z.setWord(w) |
Robert Griesemer | db3bf9c | 2009-08-14 11:53:27 -0700 | [diff] [blame] | 73 | } |
| 74 | |
| 75 | // compute number of words n required to represent x |
Robert Griesemer | 5a1d332 | 2009-12-15 15:33:31 -0800 | [diff] [blame] | 76 | n := 0 |
Robert Griesemer | db3bf9c | 2009-08-14 11:53:27 -0700 | [diff] [blame] | 77 | for t := x; t > 0; t >>= _W { |
Robert Griesemer | 40621d5 | 2009-11-09 12:07:39 -0800 | [diff] [blame] | 78 | n++ |
Robert Griesemer | db3bf9c | 2009-08-14 11:53:27 -0700 | [diff] [blame] | 79 | } |
| 80 | |
| 81 | // split x into n words |
Robert Griesemer | b218370 | 2010-04-27 19:16:08 -0700 | [diff] [blame] | 82 | z = z.make(n) |
Robert Griesemer | 52cc058 | 2010-05-08 13:52:36 -0700 | [diff] [blame] | 83 | for i := range z { |
Robert Griesemer | 5a1d332 | 2009-12-15 15:33:31 -0800 | [diff] [blame] | 84 | z[i] = Word(x & _M) |
| 85 | x >>= _W |
Robert Griesemer | db3bf9c | 2009-08-14 11:53:27 -0700 | [diff] [blame] | 86 | } |
| 87 | |
Robert Griesemer | 5a1d332 | 2009-12-15 15:33:31 -0800 | [diff] [blame] | 88 | return z |
Robert Griesemer | db3bf9c | 2009-08-14 11:53:27 -0700 | [diff] [blame] | 89 | } |
| 90 | |
Evan Shaw | 841a32d | 2010-04-22 16:57:29 -0700 | [diff] [blame] | 91 | func (z nat) set(x nat) nat { |
Robert Griesemer | b218370 | 2010-04-27 19:16:08 -0700 | [diff] [blame] | 92 | z = z.make(len(x)) |
Evan Shaw | 2e00bf9 | 2010-07-09 11:24:31 -0700 | [diff] [blame] | 93 | copy(z, x) |
Robert Griesemer | 5a1d332 | 2009-12-15 15:33:31 -0800 | [diff] [blame] | 94 | return z |
Robert Griesemer | db3bf9c | 2009-08-14 11:53:27 -0700 | [diff] [blame] | 95 | } |
| 96 | |
Evan Shaw | 841a32d | 2010-04-22 16:57:29 -0700 | [diff] [blame] | 97 | func (z nat) add(x, y nat) nat { |
Robert Griesemer | 5a1d332 | 2009-12-15 15:33:31 -0800 | [diff] [blame] | 98 | m := len(x) |
| 99 | n := len(y) |
Robert Griesemer | db3bf9c | 2009-08-14 11:53:27 -0700 | [diff] [blame] | 100 | |
| 101 | switch { |
| 102 | case m < n: |
Evan Shaw | 841a32d | 2010-04-22 16:57:29 -0700 | [diff] [blame] | 103 | return z.add(y, x) |
Robert Griesemer | db3bf9c | 2009-08-14 11:53:27 -0700 | [diff] [blame] | 104 | case m == 0: |
| 105 | // n == 0 because m >= n; result is 0 |
Robert Griesemer | 4e0618c | 2015-01-15 18:38:25 -0800 | [diff] [blame] | 106 | return z[:0] |
Robert Griesemer | db3bf9c | 2009-08-14 11:53:27 -0700 | [diff] [blame] | 107 | case n == 0: |
| 108 | // result is x |
Evan Shaw | 841a32d | 2010-04-22 16:57:29 -0700 | [diff] [blame] | 109 | return z.set(x) |
Robert Griesemer | db3bf9c | 2009-08-14 11:53:27 -0700 | [diff] [blame] | 110 | } |
Robert Griesemer | e587422 | 2009-08-15 11:43:54 -0700 | [diff] [blame] | 111 | // m > 0 |
Robert Griesemer | db3bf9c | 2009-08-14 11:53:27 -0700 | [diff] [blame] | 112 | |
Robert Griesemer | 52cc058 | 2010-05-08 13:52:36 -0700 | [diff] [blame] | 113 | z = z.make(m + 1) |
| 114 | c := addVV(z[0:n], x, y) |
Robert Griesemer | db3bf9c | 2009-08-14 11:53:27 -0700 | [diff] [blame] | 115 | if m > n { |
Robert Griesemer | 52cc058 | 2010-05-08 13:52:36 -0700 | [diff] [blame] | 116 | c = addVW(z[n:m], x[n:], c) |
Robert Griesemer | db3bf9c | 2009-08-14 11:53:27 -0700 | [diff] [blame] | 117 | } |
Robert Griesemer | 52cc058 | 2010-05-08 13:52:36 -0700 | [diff] [blame] | 118 | z[m] = c |
Robert Griesemer | db3bf9c | 2009-08-14 11:53:27 -0700 | [diff] [blame] | 119 | |
Robert Griesemer | 52cc058 | 2010-05-08 13:52:36 -0700 | [diff] [blame] | 120 | return z.norm() |
Robert Griesemer | db3bf9c | 2009-08-14 11:53:27 -0700 | [diff] [blame] | 121 | } |
| 122 | |
Evan Shaw | 841a32d | 2010-04-22 16:57:29 -0700 | [diff] [blame] | 123 | func (z nat) sub(x, y nat) nat { |
Robert Griesemer | 5a1d332 | 2009-12-15 15:33:31 -0800 | [diff] [blame] | 124 | m := len(x) |
| 125 | n := len(y) |
Robert Griesemer | db3bf9c | 2009-08-14 11:53:27 -0700 | [diff] [blame] | 126 | |
| 127 | switch { |
| 128 | case m < n: |
Robert Griesemer | 40621d5 | 2009-11-09 12:07:39 -0800 | [diff] [blame] | 129 | panic("underflow") |
Robert Griesemer | db3bf9c | 2009-08-14 11:53:27 -0700 | [diff] [blame] | 130 | case m == 0: |
| 131 | // n == 0 because m >= n; result is 0 |
Robert Griesemer | 4e0618c | 2015-01-15 18:38:25 -0800 | [diff] [blame] | 132 | return z[:0] |
Robert Griesemer | db3bf9c | 2009-08-14 11:53:27 -0700 | [diff] [blame] | 133 | case n == 0: |
| 134 | // result is x |
Evan Shaw | 841a32d | 2010-04-22 16:57:29 -0700 | [diff] [blame] | 135 | return z.set(x) |
Robert Griesemer | db3bf9c | 2009-08-14 11:53:27 -0700 | [diff] [blame] | 136 | } |
Robert Griesemer | e587422 | 2009-08-15 11:43:54 -0700 | [diff] [blame] | 137 | // m > 0 |
Robert Griesemer | db3bf9c | 2009-08-14 11:53:27 -0700 | [diff] [blame] | 138 | |
Robert Griesemer | b218370 | 2010-04-27 19:16:08 -0700 | [diff] [blame] | 139 | z = z.make(m) |
Robert Griesemer | 52cc058 | 2010-05-08 13:52:36 -0700 | [diff] [blame] | 140 | c := subVV(z[0:n], x, y) |
Robert Griesemer | db3bf9c | 2009-08-14 11:53:27 -0700 | [diff] [blame] | 141 | if m > n { |
Robert Griesemer | 52cc058 | 2010-05-08 13:52:36 -0700 | [diff] [blame] | 142 | c = subVW(z[n:], x[n:], c) |
Robert Griesemer | db3bf9c | 2009-08-14 11:53:27 -0700 | [diff] [blame] | 143 | } |
| 144 | if c != 0 { |
Robert Griesemer | 40621d5 | 2009-11-09 12:07:39 -0800 | [diff] [blame] | 145 | panic("underflow") |
Robert Griesemer | db3bf9c | 2009-08-14 11:53:27 -0700 | [diff] [blame] | 146 | } |
Robert Griesemer | e587422 | 2009-08-15 11:43:54 -0700 | [diff] [blame] | 147 | |
Robert Griesemer | 26078c3 | 2010-05-01 15:11:27 -0700 | [diff] [blame] | 148 | return z.norm() |
Robert Griesemer | db3bf9c | 2009-08-14 11:53:27 -0700 | [diff] [blame] | 149 | } |
| 150 | |
Evan Shaw | 841a32d | 2010-04-22 16:57:29 -0700 | [diff] [blame] | 151 | func (x nat) cmp(y nat) (r int) { |
Robert Griesemer | 5a1d332 | 2009-12-15 15:33:31 -0800 | [diff] [blame] | 152 | m := len(x) |
| 153 | n := len(y) |
Robert Griesemer | db3bf9c | 2009-08-14 11:53:27 -0700 | [diff] [blame] | 154 | if m != n || m == 0 { |
Robert Griesemer | 88742ef | 2009-08-18 10:06:15 -0700 | [diff] [blame] | 155 | switch { |
Russ Cox | c62b326 | 2009-10-06 11:42:55 -0700 | [diff] [blame] | 156 | case m < n: |
Robert Griesemer | 40621d5 | 2009-11-09 12:07:39 -0800 | [diff] [blame] | 157 | r = -1 |
Russ Cox | c62b326 | 2009-10-06 11:42:55 -0700 | [diff] [blame] | 158 | case m > n: |
Robert Griesemer | 40621d5 | 2009-11-09 12:07:39 -0800 | [diff] [blame] | 159 | r = 1 |
Robert Griesemer | 88742ef | 2009-08-18 10:06:15 -0700 | [diff] [blame] | 160 | } |
Robert Griesemer | 5a1d332 | 2009-12-15 15:33:31 -0800 | [diff] [blame] | 161 | return |
Robert Griesemer | db3bf9c | 2009-08-14 11:53:27 -0700 | [diff] [blame] | 162 | } |
| 163 | |
Robert Griesemer | 5a1d332 | 2009-12-15 15:33:31 -0800 | [diff] [blame] | 164 | i := m - 1 |
Robert Griesemer | db3bf9c | 2009-08-14 11:53:27 -0700 | [diff] [blame] | 165 | for i > 0 && x[i] == y[i] { |
Robert Griesemer | 40621d5 | 2009-11-09 12:07:39 -0800 | [diff] [blame] | 166 | i-- |
Robert Griesemer | db3bf9c | 2009-08-14 11:53:27 -0700 | [diff] [blame] | 167 | } |
| 168 | |
Robert Griesemer | db3bf9c | 2009-08-14 11:53:27 -0700 | [diff] [blame] | 169 | switch { |
Russ Cox | c62b326 | 2009-10-06 11:42:55 -0700 | [diff] [blame] | 170 | case x[i] < y[i]: |
Robert Griesemer | 40621d5 | 2009-11-09 12:07:39 -0800 | [diff] [blame] | 171 | r = -1 |
Russ Cox | c62b326 | 2009-10-06 11:42:55 -0700 | [diff] [blame] | 172 | case x[i] > y[i]: |
Robert Griesemer | 40621d5 | 2009-11-09 12:07:39 -0800 | [diff] [blame] | 173 | r = 1 |
Robert Griesemer | db3bf9c | 2009-08-14 11:53:27 -0700 | [diff] [blame] | 174 | } |
Robert Griesemer | 5a1d332 | 2009-12-15 15:33:31 -0800 | [diff] [blame] | 175 | return |
Robert Griesemer | db3bf9c | 2009-08-14 11:53:27 -0700 | [diff] [blame] | 176 | } |
| 177 | |
Evan Shaw | 841a32d | 2010-04-22 16:57:29 -0700 | [diff] [blame] | 178 | func (z nat) mulAddWW(x nat, y, r Word) nat { |
Robert Griesemer | 5a1d332 | 2009-12-15 15:33:31 -0800 | [diff] [blame] | 179 | m := len(x) |
Robert Griesemer | e587422 | 2009-08-15 11:43:54 -0700 | [diff] [blame] | 180 | if m == 0 || y == 0 { |
Robert Griesemer | a688eb6 | 2010-05-19 09:36:50 -0700 | [diff] [blame] | 181 | return z.setWord(r) // result is r |
Robert Griesemer | db3bf9c | 2009-08-14 11:53:27 -0700 | [diff] [blame] | 182 | } |
| 183 | // m > 0 |
Robert Griesemer | e587422 | 2009-08-15 11:43:54 -0700 | [diff] [blame] | 184 | |
Robert Griesemer | 52cc058 | 2010-05-08 13:52:36 -0700 | [diff] [blame] | 185 | z = z.make(m + 1) |
| 186 | z[m] = mulAddVWW(z[0:m], x, y, r) |
Robert Griesemer | e587422 | 2009-08-15 11:43:54 -0700 | [diff] [blame] | 187 | |
Robert Griesemer | 52cc058 | 2010-05-08 13:52:36 -0700 | [diff] [blame] | 188 | return z.norm() |
Robert Griesemer | db3bf9c | 2009-08-14 11:53:27 -0700 | [diff] [blame] | 189 | } |
| 190 | |
Robert Griesemer | b218370 | 2010-04-27 19:16:08 -0700 | [diff] [blame] | 191 | // basicMul multiplies x and y and leaves the result in z. |
| 192 | // The (non-normalized) result is placed in z[0 : len(x) + len(y)]. |
| 193 | func basicMul(z, x, y nat) { |
Robert Griesemer | 3f287b5 | 2010-05-06 18:20:01 -0700 | [diff] [blame] | 194 | z[0 : len(x)+len(y)].clear() // initialize z |
Robert Griesemer | b218370 | 2010-04-27 19:16:08 -0700 | [diff] [blame] | 195 | for i, d := range y { |
| 196 | if d != 0 { |
Robert Griesemer | 52cc058 | 2010-05-08 13:52:36 -0700 | [diff] [blame] | 197 | z[len(x)+i] = addMulVVW(z[i:i+len(x)], x, d) |
Robert Griesemer | b218370 | 2010-04-27 19:16:08 -0700 | [diff] [blame] | 198 | } |
| 199 | } |
| 200 | } |
| 201 | |
Russ Cox | 4306352 | 2015-12-09 11:49:53 -0500 | [diff] [blame] | 202 | // montgomery computes z mod m = x*y*2**(-n*_W) mod m, |
| 203 | // assuming k = -1/m mod 2**_W. |
Vlad Krasnov | 9279684 | 2015-04-22 15:03:59 -0700 | [diff] [blame] | 204 | // z is used for storing the result which is returned; |
| 205 | // z must not alias x, y or m. |
Russ Cox | 4306352 | 2015-12-09 11:49:53 -0500 | [diff] [blame] | 206 | // See Gueron, "Efficient Software Implementations of Modular Exponentiation". |
| 207 | // https://eprint.iacr.org/2011/239.pdf |
| 208 | // In the terminology of that paper, this is an "Almost Montgomery Multiplication": |
| 209 | // x and y are required to satisfy 0 <= z < 2**(n*_W) and then the result |
| 210 | // z is guaranteed to satisfy 0 <= z < 2**(n*_W), but it may not be < m. |
Vlad Krasnov | 9279684 | 2015-04-22 15:03:59 -0700 | [diff] [blame] | 211 | func (z nat) montgomery(x, y, m nat, k Word, n int) nat { |
Russ Cox | 4306352 | 2015-12-09 11:49:53 -0500 | [diff] [blame] | 212 | // This code assumes x, y, m are all the same length, n. |
| 213 | // (required by addMulVVW and the for loop). |
| 214 | // It also assumes that x, y are already reduced mod m, |
| 215 | // or else the result will not be properly reduced. |
| 216 | if len(x) != n || len(y) != n || len(m) != n { |
| 217 | panic("math/big: mismatched montgomery number lengths") |
| 218 | } |
Vlad Krasnov | 9279684 | 2015-04-22 15:03:59 -0700 | [diff] [blame] | 219 | z = z.make(n) |
| 220 | z.clear() |
Russ Cox | 6bcec09 | 2015-12-09 11:53:04 -0500 | [diff] [blame] | 221 | var c Word |
Vlad Krasnov | 9279684 | 2015-04-22 15:03:59 -0700 | [diff] [blame] | 222 | for i := 0; i < n; i++ { |
| 223 | d := y[i] |
Russ Cox | 6bcec09 | 2015-12-09 11:53:04 -0500 | [diff] [blame] | 224 | c2 := addMulVVW(z, x, d) |
Vlad Krasnov | 9279684 | 2015-04-22 15:03:59 -0700 | [diff] [blame] | 225 | t := z[0] * k |
Russ Cox | 6bcec09 | 2015-12-09 11:53:04 -0500 | [diff] [blame] | 226 | c3 := addMulVVW(z, m, t) |
Vlad Krasnov | 9279684 | 2015-04-22 15:03:59 -0700 | [diff] [blame] | 227 | copy(z, z[1:]) |
Russ Cox | 6bcec09 | 2015-12-09 11:53:04 -0500 | [diff] [blame] | 228 | cx := c + c2 |
Russ Cox | 4306352 | 2015-12-09 11:49:53 -0500 | [diff] [blame] | 229 | cy := cx + c3 |
| 230 | z[n-1] = cy |
| 231 | if cx < c2 || cy < c3 { |
Russ Cox | 6bcec09 | 2015-12-09 11:53:04 -0500 | [diff] [blame] | 232 | c = 1 |
Vlad Krasnov | 9279684 | 2015-04-22 15:03:59 -0700 | [diff] [blame] | 233 | } else { |
Russ Cox | 6bcec09 | 2015-12-09 11:53:04 -0500 | [diff] [blame] | 234 | c = 0 |
Vlad Krasnov | 9279684 | 2015-04-22 15:03:59 -0700 | [diff] [blame] | 235 | } |
| 236 | } |
Russ Cox | 6bcec09 | 2015-12-09 11:53:04 -0500 | [diff] [blame] | 237 | if c != 0 { |
Vlad Krasnov | 9279684 | 2015-04-22 15:03:59 -0700 | [diff] [blame] | 238 | subVV(z, z, m) |
| 239 | } |
| 240 | return z |
| 241 | } |
| 242 | |
Robert Griesemer | b218370 | 2010-04-27 19:16:08 -0700 | [diff] [blame] | 243 | // Fast version of z[0:n+n>>1].add(z[0:n+n>>1], x[0:n]) w/o bounds checks. |
| 244 | // Factored out for readability - do not use outside karatsuba. |
| 245 | func karatsubaAdd(z, x nat, n int) { |
Robert Griesemer | 52cc058 | 2010-05-08 13:52:36 -0700 | [diff] [blame] | 246 | if c := addVV(z[0:n], z, x); c != 0 { |
| 247 | addVW(z[n:n+n>>1], z[n:], c) |
Robert Griesemer | b218370 | 2010-04-27 19:16:08 -0700 | [diff] [blame] | 248 | } |
| 249 | } |
| 250 | |
Robert Griesemer | b218370 | 2010-04-27 19:16:08 -0700 | [diff] [blame] | 251 | // Like karatsubaAdd, but does subtract. |
| 252 | func karatsubaSub(z, x nat, n int) { |
Robert Griesemer | 52cc058 | 2010-05-08 13:52:36 -0700 | [diff] [blame] | 253 | if c := subVV(z[0:n], z, x); c != 0 { |
| 254 | subVW(z[n:n+n>>1], z[n:], c) |
Robert Griesemer | b218370 | 2010-04-27 19:16:08 -0700 | [diff] [blame] | 255 | } |
| 256 | } |
| 257 | |
Robert Griesemer | b218370 | 2010-04-27 19:16:08 -0700 | [diff] [blame] | 258 | // Operands that are shorter than karatsubaThreshold are multiplied using |
| 259 | // "grade school" multiplication; for longer operands the Karatsuba algorithm |
| 260 | // is used. |
Robert Griesemer | 6a135a0 | 2012-07-12 14:19:09 -0700 | [diff] [blame] | 261 | var karatsubaThreshold int = 40 // computed by calibrate.go |
Robert Griesemer | b218370 | 2010-04-27 19:16:08 -0700 | [diff] [blame] | 262 | |
| 263 | // karatsuba multiplies x and y and leaves the result in z. |
| 264 | // Both x and y must have the same length n and n must be a |
| 265 | // power of 2. The result vector z must have len(z) >= 6*n. |
| 266 | // The (non-normalized) result is placed in z[0 : 2*n]. |
| 267 | func karatsuba(z, x, y nat) { |
| 268 | n := len(y) |
| 269 | |
| 270 | // Switch to basic multiplication if numbers are odd or small. |
| 271 | // (n is always even if karatsubaThreshold is even, but be |
| 272 | // conservative) |
| 273 | if n&1 != 0 || n < karatsubaThreshold || n < 2 { |
| 274 | basicMul(z, x, y) |
| 275 | return |
| 276 | } |
| 277 | // n&1 == 0 && n >= karatsubaThreshold && n >= 2 |
| 278 | |
| 279 | // Karatsuba multiplication is based on the observation that |
| 280 | // for two numbers x and y with: |
| 281 | // |
| 282 | // x = x1*b + x0 |
| 283 | // y = y1*b + y0 |
| 284 | // |
| 285 | // the product x*y can be obtained with 3 products z2, z1, z0 |
| 286 | // instead of 4: |
| 287 | // |
| 288 | // x*y = x1*y1*b*b + (x1*y0 + x0*y1)*b + x0*y0 |
| 289 | // = z2*b*b + z1*b + z0 |
| 290 | // |
| 291 | // with: |
| 292 | // |
| 293 | // xd = x1 - x0 |
| 294 | // yd = y0 - y1 |
| 295 | // |
Rémy Oudompheng | 018c60b | 2012-05-04 19:05:26 +0200 | [diff] [blame] | 296 | // z1 = xd*yd + z2 + z0 |
| 297 | // = (x1-x0)*(y0 - y1) + z2 + z0 |
| 298 | // = x1*y0 - x1*y1 - x0*y0 + x0*y1 + z2 + z0 |
| 299 | // = x1*y0 - z2 - z0 + x0*y1 + z2 + z0 |
Robert Griesemer | b218370 | 2010-04-27 19:16:08 -0700 | [diff] [blame] | 300 | // = x1*y0 + x0*y1 |
| 301 | |
| 302 | // split x, y into "digits" |
| 303 | n2 := n >> 1 // n2 >= 1 |
| 304 | x1, x0 := x[n2:], x[0:n2] // x = x1*b + y0 |
| 305 | y1, y0 := y[n2:], y[0:n2] // y = y1*b + y0 |
| 306 | |
| 307 | // z is used for the result and temporary storage: |
| 308 | // |
| 309 | // 6*n 5*n 4*n 3*n 2*n 1*n 0*n |
| 310 | // z = [z2 copy|z0 copy| xd*yd | yd:xd | x1*y1 | x0*y0 ] |
| 311 | // |
| 312 | // For each recursive call of karatsuba, an unused slice of |
| 313 | // z is passed in that has (at least) half the length of the |
| 314 | // caller's z. |
| 315 | |
| 316 | // compute z0 and z2 with the result "in place" in z |
| 317 | karatsuba(z, x0, y0) // z0 = x0*y0 |
| 318 | karatsuba(z[n:], x1, y1) // z2 = x1*y1 |
| 319 | |
| 320 | // compute xd (or the negative value if underflow occurs) |
| 321 | s := 1 // sign of product xd*yd |
| 322 | xd := z[2*n : 2*n+n2] |
Robert Griesemer | 52cc058 | 2010-05-08 13:52:36 -0700 | [diff] [blame] | 323 | if subVV(xd, x1, x0) != 0 { // x1-x0 |
Robert Griesemer | b218370 | 2010-04-27 19:16:08 -0700 | [diff] [blame] | 324 | s = -s |
Robert Griesemer | 52cc058 | 2010-05-08 13:52:36 -0700 | [diff] [blame] | 325 | subVV(xd, x0, x1) // x0-x1 |
Robert Griesemer | b218370 | 2010-04-27 19:16:08 -0700 | [diff] [blame] | 326 | } |
| 327 | |
| 328 | // compute yd (or the negative value if underflow occurs) |
| 329 | yd := z[2*n+n2 : 3*n] |
Robert Griesemer | 52cc058 | 2010-05-08 13:52:36 -0700 | [diff] [blame] | 330 | if subVV(yd, y0, y1) != 0 { // y0-y1 |
Robert Griesemer | b218370 | 2010-04-27 19:16:08 -0700 | [diff] [blame] | 331 | s = -s |
Robert Griesemer | 52cc058 | 2010-05-08 13:52:36 -0700 | [diff] [blame] | 332 | subVV(yd, y1, y0) // y1-y0 |
Robert Griesemer | b218370 | 2010-04-27 19:16:08 -0700 | [diff] [blame] | 333 | } |
| 334 | |
| 335 | // p = (x1-x0)*(y0-y1) == x1*y0 - x1*y1 - x0*y0 + x0*y1 for s > 0 |
| 336 | // p = (x0-x1)*(y0-y1) == x0*y0 - x0*y1 - x1*y0 + x1*y1 for s < 0 |
| 337 | p := z[n*3:] |
| 338 | karatsuba(p, xd, yd) |
| 339 | |
| 340 | // save original z2:z0 |
| 341 | // (ok to use upper half of z since we're done recursing) |
| 342 | r := z[n*4:] |
Rémy Oudompheng | 018c60b | 2012-05-04 19:05:26 +0200 | [diff] [blame] | 343 | copy(r, z[:n*2]) |
Robert Griesemer | b218370 | 2010-04-27 19:16:08 -0700 | [diff] [blame] | 344 | |
| 345 | // add up all partial products |
| 346 | // |
| 347 | // 2*n n 0 |
| 348 | // z = [ z2 | z0 ] |
| 349 | // + [ z0 ] |
| 350 | // + [ z2 ] |
| 351 | // + [ p ] |
| 352 | // |
| 353 | karatsubaAdd(z[n2:], r, n) |
| 354 | karatsubaAdd(z[n2:], r[n:], n) |
| 355 | if s > 0 { |
| 356 | karatsubaAdd(z[n2:], p, n) |
| 357 | } else { |
| 358 | karatsubaSub(z[n2:], p, n) |
| 359 | } |
| 360 | } |
| 361 | |
Josh Bleecher Snyder | 2adc4e8 | 2015-02-17 15:44:42 -0800 | [diff] [blame] | 362 | // alias reports whether x and y share the same base array. |
Robert Griesemer | b218370 | 2010-04-27 19:16:08 -0700 | [diff] [blame] | 363 | func alias(x, y nat) bool { |
Robert Griesemer | b9caa4a | 2010-05-03 18:48:05 -0700 | [diff] [blame] | 364 | return cap(x) > 0 && cap(y) > 0 && &x[0:cap(x)][cap(x)-1] == &y[0:cap(y)][cap(y)-1] |
Robert Griesemer | b218370 | 2010-04-27 19:16:08 -0700 | [diff] [blame] | 365 | } |
| 366 | |
Rémy Oudompheng | ac12131 | 2012-07-12 10:18:24 -0700 | [diff] [blame] | 367 | // addAt implements z += x<<(_W*i); z must be long enough. |
Robert Griesemer | b218370 | 2010-04-27 19:16:08 -0700 | [diff] [blame] | 368 | // (we don't use nat.add because we need z to stay the same |
| 369 | // slice, and we don't need to normalize z after each addition) |
| 370 | func addAt(z, x nat, i int) { |
| 371 | if n := len(x); n > 0 { |
Robert Griesemer | 52cc058 | 2010-05-08 13:52:36 -0700 | [diff] [blame] | 372 | if c := addVV(z[i:i+n], z[i:], x); c != 0 { |
Robert Griesemer | b218370 | 2010-04-27 19:16:08 -0700 | [diff] [blame] | 373 | j := i + n |
| 374 | if j < len(z) { |
Robert Griesemer | 52cc058 | 2010-05-08 13:52:36 -0700 | [diff] [blame] | 375 | addVW(z[j:], z[j:], c) |
Robert Griesemer | b218370 | 2010-04-27 19:16:08 -0700 | [diff] [blame] | 376 | } |
| 377 | } |
| 378 | } |
| 379 | } |
| 380 | |
Robert Griesemer | b218370 | 2010-04-27 19:16:08 -0700 | [diff] [blame] | 381 | func max(x, y int) int { |
| 382 | if x > y { |
| 383 | return x |
| 384 | } |
| 385 | return y |
| 386 | } |
| 387 | |
Robert Griesemer | 407dbb4 | 2010-04-30 11:54:27 -0700 | [diff] [blame] | 388 | // karatsubaLen computes an approximation to the maximum k <= n such that |
| 389 | // k = p<<i for a number p <= karatsubaThreshold and an i >= 0. Thus, the |
| 390 | // result is the largest number that can be divided repeatedly by 2 before |
| 391 | // becoming about the value of karatsubaThreshold. |
| 392 | func karatsubaLen(n int) int { |
| 393 | i := uint(0) |
| 394 | for n > karatsubaThreshold { |
| 395 | n >>= 1 |
| 396 | i++ |
| 397 | } |
| 398 | return n << i |
| 399 | } |
| 400 | |
Evan Shaw | 841a32d | 2010-04-22 16:57:29 -0700 | [diff] [blame] | 401 | func (z nat) mul(x, y nat) nat { |
Robert Griesemer | 5a1d332 | 2009-12-15 15:33:31 -0800 | [diff] [blame] | 402 | m := len(x) |
| 403 | n := len(y) |
Robert Griesemer | e587422 | 2009-08-15 11:43:54 -0700 | [diff] [blame] | 404 | |
| 405 | switch { |
| 406 | case m < n: |
Evan Shaw | 841a32d | 2010-04-22 16:57:29 -0700 | [diff] [blame] | 407 | return z.mul(y, x) |
Robert Griesemer | e587422 | 2009-08-15 11:43:54 -0700 | [diff] [blame] | 408 | case m == 0 || n == 0: |
Robert Griesemer | 4e0618c | 2015-01-15 18:38:25 -0800 | [diff] [blame] | 409 | return z[:0] |
Robert Griesemer | 88742ef | 2009-08-18 10:06:15 -0700 | [diff] [blame] | 410 | case n == 1: |
Evan Shaw | 841a32d | 2010-04-22 16:57:29 -0700 | [diff] [blame] | 411 | return z.mulAddWW(x, y[0], 0) |
Robert Griesemer | e587422 | 2009-08-15 11:43:54 -0700 | [diff] [blame] | 412 | } |
Robert Griesemer | b218370 | 2010-04-27 19:16:08 -0700 | [diff] [blame] | 413 | // m >= n > 1 |
Robert Griesemer | e587422 | 2009-08-15 11:43:54 -0700 | [diff] [blame] | 414 | |
Robert Griesemer | b218370 | 2010-04-27 19:16:08 -0700 | [diff] [blame] | 415 | // determine if z can be reused |
Robert Griesemer | b9caa4a | 2010-05-03 18:48:05 -0700 | [diff] [blame] | 416 | if alias(z, x) || alias(z, y) { |
Robert Griesemer | b218370 | 2010-04-27 19:16:08 -0700 | [diff] [blame] | 417 | z = nil // z is an alias for x or y - cannot reuse |
Robert Griesemer | 88742ef | 2009-08-18 10:06:15 -0700 | [diff] [blame] | 418 | } |
Robert Griesemer | e587422 | 2009-08-15 11:43:54 -0700 | [diff] [blame] | 419 | |
Robert Griesemer | b218370 | 2010-04-27 19:16:08 -0700 | [diff] [blame] | 420 | // use basic multiplication if the numbers are small |
David G. Andersen | 917f764 | 2012-07-02 15:30:00 -0700 | [diff] [blame] | 421 | if n < karatsubaThreshold { |
Robert Griesemer | b218370 | 2010-04-27 19:16:08 -0700 | [diff] [blame] | 422 | z = z.make(m + n) |
| 423 | basicMul(z, x, y) |
| 424 | return z.norm() |
| 425 | } |
| 426 | // m >= n && n >= karatsubaThreshold && n >= 2 |
| 427 | |
Robert Griesemer | 407dbb4 | 2010-04-30 11:54:27 -0700 | [diff] [blame] | 428 | // determine Karatsuba length k such that |
Robert Griesemer | b218370 | 2010-04-27 19:16:08 -0700 | [diff] [blame] | 429 | // |
Rémy Oudompheng | ac12131 | 2012-07-12 10:18:24 -0700 | [diff] [blame] | 430 | // x = xh*b + x0 (0 <= x0 < b) |
| 431 | // y = yh*b + y0 (0 <= y0 < b) |
Robert Griesemer | b218370 | 2010-04-27 19:16:08 -0700 | [diff] [blame] | 432 | // b = 1<<(_W*k) ("base" of digits xi, yi) |
| 433 | // |
Robert Griesemer | 407dbb4 | 2010-04-30 11:54:27 -0700 | [diff] [blame] | 434 | k := karatsubaLen(n) |
Robert Griesemer | b218370 | 2010-04-27 19:16:08 -0700 | [diff] [blame] | 435 | // k <= n |
| 436 | |
| 437 | // multiply x0 and y0 via Karatsuba |
| 438 | x0 := x[0:k] // x0 is not normalized |
| 439 | y0 := y[0:k] // y0 is not normalized |
| 440 | z = z.make(max(6*k, m+n)) // enough space for karatsuba of x0*y0 and full result of x*y |
| 441 | karatsuba(z, x0, y0) |
Rémy Oudompheng | ac12131 | 2012-07-12 10:18:24 -0700 | [diff] [blame] | 442 | z = z[0 : m+n] // z has final length but may be incomplete |
| 443 | z[2*k:].clear() // upper portion of z is garbage (and 2*k <= m+n since k <= n <= m) |
Robert Griesemer | b218370 | 2010-04-27 19:16:08 -0700 | [diff] [blame] | 444 | |
Rémy Oudompheng | ac12131 | 2012-07-12 10:18:24 -0700 | [diff] [blame] | 445 | // If xh != 0 or yh != 0, add the missing terms to z. For |
Robert Griesemer | 465b9c3 | 2012-10-30 13:38:01 -0700 | [diff] [blame] | 446 | // |
| 447 | // xh = xi*b^i + ... + x2*b^2 + x1*b (0 <= xi < b) |
| 448 | // yh = y1*b (0 <= y1 < b) |
| 449 | // |
| 450 | // the missing terms are |
| 451 | // |
| 452 | // x0*y1*b and xi*y0*b^i, xi*y1*b^(i+1) for i > 0 |
| 453 | // |
| 454 | // since all the yi for i > 1 are 0 by choice of k: If any of them |
| 455 | // were > 0, then yh >= b^2 and thus y >= b^2. Then k' = k*2 would |
| 456 | // be a larger valid threshold contradicting the assumption about k. |
Robert Griesemer | b218370 | 2010-04-27 19:16:08 -0700 | [diff] [blame] | 457 | // |
| 458 | if k < n || m != n { |
Robert Griesemer | b218370 | 2010-04-27 19:16:08 -0700 | [diff] [blame] | 459 | var t nat |
Rémy Oudompheng | ac12131 | 2012-07-12 10:18:24 -0700 | [diff] [blame] | 460 | |
| 461 | // add x0*y1*b |
| 462 | x0 := x0.norm() |
Robert Griesemer | 98ca655 | 2012-07-12 14:12:50 -0700 | [diff] [blame] | 463 | y1 := y[k:] // y1 is normalized because y is |
| 464 | t = t.mul(x0, y1) // update t so we don't lose t's underlying array |
| 465 | addAt(z, t, k) |
Rémy Oudompheng | ac12131 | 2012-07-12 10:18:24 -0700 | [diff] [blame] | 466 | |
| 467 | // add xi*y0<<i, xi*y1*b<<(i+k) |
| 468 | y0 := y0.norm() |
| 469 | for i := k; i < len(x); i += k { |
| 470 | xi := x[i:] |
| 471 | if len(xi) > k { |
| 472 | xi = xi[:k] |
| 473 | } |
| 474 | xi = xi.norm() |
Robert Griesemer | 98ca655 | 2012-07-12 14:12:50 -0700 | [diff] [blame] | 475 | t = t.mul(xi, y0) |
| 476 | addAt(z, t, i) |
| 477 | t = t.mul(xi, y1) |
| 478 | addAt(z, t, i+k) |
Rémy Oudompheng | ac12131 | 2012-07-12 10:18:24 -0700 | [diff] [blame] | 479 | } |
Robert Griesemer | b218370 | 2010-04-27 19:16:08 -0700 | [diff] [blame] | 480 | } |
| 481 | |
| 482 | return z.norm() |
| 483 | } |
| 484 | |
Robert Griesemer | b218370 | 2010-04-27 19:16:08 -0700 | [diff] [blame] | 485 | // mulRange computes the product of all the unsigned integers in the |
| 486 | // range [a, b] inclusively. If a > b (empty range), the result is 1. |
| 487 | func (z nat) mulRange(a, b uint64) nat { |
| 488 | switch { |
| 489 | case a == 0: |
| 490 | // cut long ranges short (optimization) |
Robert Griesemer | dbb6232 | 2010-05-15 10:23:41 -0700 | [diff] [blame] | 491 | return z.setUint64(0) |
Robert Griesemer | b218370 | 2010-04-27 19:16:08 -0700 | [diff] [blame] | 492 | case a > b: |
Robert Griesemer | dbb6232 | 2010-05-15 10:23:41 -0700 | [diff] [blame] | 493 | return z.setUint64(1) |
Robert Griesemer | b218370 | 2010-04-27 19:16:08 -0700 | [diff] [blame] | 494 | case a == b: |
Robert Griesemer | dbb6232 | 2010-05-15 10:23:41 -0700 | [diff] [blame] | 495 | return z.setUint64(a) |
Robert Griesemer | b218370 | 2010-04-27 19:16:08 -0700 | [diff] [blame] | 496 | case a+1 == b: |
Robert Griesemer | f5cf0a4 | 2011-11-14 13:35:22 -0800 | [diff] [blame] | 497 | return z.mul(nat(nil).setUint64(a), nat(nil).setUint64(b)) |
Robert Griesemer | b218370 | 2010-04-27 19:16:08 -0700 | [diff] [blame] | 498 | } |
| 499 | m := (a + b) / 2 |
Robert Griesemer | f5cf0a4 | 2011-11-14 13:35:22 -0800 | [diff] [blame] | 500 | return z.mul(nat(nil).mulRange(a, m), nat(nil).mulRange(m+1, b)) |
Robert Griesemer | db3bf9c | 2009-08-14 11:53:27 -0700 | [diff] [blame] | 501 | } |
| 502 | |
Robert Griesemer | db3bf9c | 2009-08-14 11:53:27 -0700 | [diff] [blame] | 503 | // q = (x-r)/y, with 0 <= r < y |
Evan Shaw | 841a32d | 2010-04-22 16:57:29 -0700 | [diff] [blame] | 504 | func (z nat) divW(x nat, y Word) (q nat, r Word) { |
Robert Griesemer | 5a1d332 | 2009-12-15 15:33:31 -0800 | [diff] [blame] | 505 | m := len(x) |
Robert Griesemer | db3bf9c | 2009-08-14 11:53:27 -0700 | [diff] [blame] | 506 | switch { |
| 507 | case y == 0: |
Robert Griesemer | 40621d5 | 2009-11-09 12:07:39 -0800 | [diff] [blame] | 508 | panic("division by zero") |
Robert Griesemer | db3bf9c | 2009-08-14 11:53:27 -0700 | [diff] [blame] | 509 | case y == 1: |
Evan Shaw | 841a32d | 2010-04-22 16:57:29 -0700 | [diff] [blame] | 510 | q = z.set(x) // result is x |
Robert Griesemer | 5a1d332 | 2009-12-15 15:33:31 -0800 | [diff] [blame] | 511 | return |
Robert Griesemer | db3bf9c | 2009-08-14 11:53:27 -0700 | [diff] [blame] | 512 | case m == 0: |
Robert Griesemer | 4e0618c | 2015-01-15 18:38:25 -0800 | [diff] [blame] | 513 | q = z[:0] // result is 0 |
Robert Griesemer | 5a1d332 | 2009-12-15 15:33:31 -0800 | [diff] [blame] | 514 | return |
Robert Griesemer | db3bf9c | 2009-08-14 11:53:27 -0700 | [diff] [blame] | 515 | } |
| 516 | // m > 0 |
Robert Griesemer | b218370 | 2010-04-27 19:16:08 -0700 | [diff] [blame] | 517 | z = z.make(m) |
Robert Griesemer | 52cc058 | 2010-05-08 13:52:36 -0700 | [diff] [blame] | 518 | r = divWVW(z, 0, x, y) |
Evan Shaw | 841a32d | 2010-04-22 16:57:29 -0700 | [diff] [blame] | 519 | q = z.norm() |
Robert Griesemer | 5a1d332 | 2009-12-15 15:33:31 -0800 | [diff] [blame] | 520 | return |
Robert Griesemer | db3bf9c | 2009-08-14 11:53:27 -0700 | [diff] [blame] | 521 | } |
| 522 | |
Evan Shaw | 841a32d | 2010-04-22 16:57:29 -0700 | [diff] [blame] | 523 | func (z nat) div(z2, u, v nat) (q, r nat) { |
Adam Langley | 1941855 | 2009-11-11 13:21:37 -0800 | [diff] [blame] | 524 | if len(v) == 0 { |
Evan Shaw | 841a32d | 2010-04-22 16:57:29 -0700 | [diff] [blame] | 525 | panic("division by zero") |
Adam Langley | 1941855 | 2009-11-11 13:21:37 -0800 | [diff] [blame] | 526 | } |
| 527 | |
Evan Shaw | 841a32d | 2010-04-22 16:57:29 -0700 | [diff] [blame] | 528 | if u.cmp(v) < 0 { |
Robert Griesemer | 4e0618c | 2015-01-15 18:38:25 -0800 | [diff] [blame] | 529 | q = z[:0] |
Evan Shaw | 841a32d | 2010-04-22 16:57:29 -0700 | [diff] [blame] | 530 | r = z2.set(u) |
Robert Griesemer | 5a1d332 | 2009-12-15 15:33:31 -0800 | [diff] [blame] | 531 | return |
Adam Langley | 1941855 | 2009-11-11 13:21:37 -0800 | [diff] [blame] | 532 | } |
| 533 | |
| 534 | if len(v) == 1 { |
Robert Griesemer | b7c5e23 | 2012-06-13 09:37:47 -0700 | [diff] [blame] | 535 | var r2 Word |
| 536 | q, r2 = z.divW(u, v[0]) |
| 537 | r = z2.setWord(r2) |
Robert Griesemer | 5a1d332 | 2009-12-15 15:33:31 -0800 | [diff] [blame] | 538 | return |
Adam Langley | 1941855 | 2009-11-11 13:21:37 -0800 | [diff] [blame] | 539 | } |
| 540 | |
Evan Shaw | 841a32d | 2010-04-22 16:57:29 -0700 | [diff] [blame] | 541 | q, r = z.divLarge(z2, u, v) |
Robert Griesemer | 5a1d332 | 2009-12-15 15:33:31 -0800 | [diff] [blame] | 542 | return |
Adam Langley | 1941855 | 2009-11-11 13:21:37 -0800 | [diff] [blame] | 543 | } |
| 544 | |
Russ Cox | 3a90728 | 2016-10-06 22:42:20 -0400 | [diff] [blame^] | 545 | // getNat returns a *nat of len n. The contents may not be zero. |
| 546 | // The pool holds *nat to avoid allocation when converting to interface{}. |
| 547 | func getNat(n int) *nat { |
| 548 | var z *nat |
Aliaksandr Valialkin | 187afde | 2016-04-04 19:28:15 +0300 | [diff] [blame] | 549 | if v := natPool.Get(); v != nil { |
Russ Cox | 3a90728 | 2016-10-06 22:42:20 -0400 | [diff] [blame^] | 550 | z = v.(*nat) |
Aliaksandr Valialkin | 187afde | 2016-04-04 19:28:15 +0300 | [diff] [blame] | 551 | } |
Russ Cox | 3a90728 | 2016-10-06 22:42:20 -0400 | [diff] [blame^] | 552 | if z == nil { |
| 553 | z = new(nat) |
| 554 | } |
| 555 | *z = z.make(n) |
| 556 | return z |
Aliaksandr Valialkin | 187afde | 2016-04-04 19:28:15 +0300 | [diff] [blame] | 557 | } |
| 558 | |
Russ Cox | 3a90728 | 2016-10-06 22:42:20 -0400 | [diff] [blame^] | 559 | func putNat(x *nat) { |
Aliaksandr Valialkin | 187afde | 2016-04-04 19:28:15 +0300 | [diff] [blame] | 560 | natPool.Put(x) |
| 561 | } |
| 562 | |
| 563 | var natPool sync.Pool |
| 564 | |
Adam Langley | 65063bc | 2009-11-05 15:55:41 -0800 | [diff] [blame] | 565 | // q = (uIn-r)/v, with 0 <= r < y |
Robert Griesemer | 3f287b5 | 2010-05-06 18:20:01 -0700 | [diff] [blame] | 566 | // Uses z as storage for q, and u as storage for r if possible. |
Adam Langley | 65063bc | 2009-11-05 15:55:41 -0800 | [diff] [blame] | 567 | // See Knuth, Volume 2, section 4.3.1, Algorithm D. |
| 568 | // Preconditions: |
| 569 | // len(v) >= 2 |
Adam Langley | 1941855 | 2009-11-11 13:21:37 -0800 | [diff] [blame] | 570 | // len(uIn) >= len(v) |
Robert Griesemer | 3f287b5 | 2010-05-06 18:20:01 -0700 | [diff] [blame] | 571 | func (z nat) divLarge(u, uIn, v nat) (q, r nat) { |
Robert Griesemer | 5a1d332 | 2009-12-15 15:33:31 -0800 | [diff] [blame] | 572 | n := len(v) |
Robert Griesemer | 3f287b5 | 2010-05-06 18:20:01 -0700 | [diff] [blame] | 573 | m := len(uIn) - n |
Adam Langley | 65063bc | 2009-11-05 15:55:41 -0800 | [diff] [blame] | 574 | |
Robert Griesemer | 90d0c33 | 2010-05-18 16:31:49 -0700 | [diff] [blame] | 575 | // determine if z can be reused |
Robert Griesemer | a688eb6 | 2010-05-19 09:36:50 -0700 | [diff] [blame] | 576 | // TODO(gri) should find a better solution - this if statement |
| 577 | // is very costly (see e.g. time pidigits -s -n 10000) |
Robert Griesemer | 90d0c33 | 2010-05-18 16:31:49 -0700 | [diff] [blame] | 578 | if alias(z, uIn) || alias(z, v) { |
| 579 | z = nil // z is an alias for uIn or v - cannot reuse |
| 580 | } |
Robert Griesemer | b218370 | 2010-04-27 19:16:08 -0700 | [diff] [blame] | 581 | q = z.make(m + 1) |
Robert Griesemer | 90d0c33 | 2010-05-18 16:31:49 -0700 | [diff] [blame] | 582 | |
Russ Cox | 3a90728 | 2016-10-06 22:42:20 -0400 | [diff] [blame^] | 583 | qhatvp := getNat(n + 1) |
| 584 | qhatv := *qhatvp |
Robert Griesemer | 90d0c33 | 2010-05-18 16:31:49 -0700 | [diff] [blame] | 585 | if alias(u, uIn) || alias(u, v) { |
| 586 | u = nil // u is an alias for uIn or v - cannot reuse |
Robert Griesemer | 3f287b5 | 2010-05-06 18:20:01 -0700 | [diff] [blame] | 587 | } |
Robert Griesemer | 52cc058 | 2010-05-08 13:52:36 -0700 | [diff] [blame] | 588 | u = u.make(len(uIn) + 1) |
Robert Griesemer | 4e0618c | 2015-01-15 18:38:25 -0800 | [diff] [blame] | 589 | u.clear() // TODO(gri) no need to clear if we allocated a new u |
Adam Langley | 65063bc | 2009-11-05 15:55:41 -0800 | [diff] [blame] | 590 | |
| 591 | // D1. |
Russ Cox | 3a90728 | 2016-10-06 22:42:20 -0400 | [diff] [blame^] | 592 | var v1p *nat |
Robert Griesemer | 635cd91 | 2015-05-26 16:42:24 -0700 | [diff] [blame] | 593 | shift := nlz(v[n-1]) |
Robert Griesemer | 191a6bf | 2011-06-02 11:07:41 -0700 | [diff] [blame] | 594 | if shift > 0 { |
| 595 | // do not modify v, it may be used by another goroutine simultaneously |
Russ Cox | 3a90728 | 2016-10-06 22:42:20 -0400 | [diff] [blame^] | 596 | v1p = getNat(n) |
| 597 | v1 := *v1p |
Robert Griesemer | 5bf57c1 | 2011-06-02 12:58:26 -0700 | [diff] [blame] | 598 | shlVU(v1, v, shift) |
Robert Griesemer | 191a6bf | 2011-06-02 11:07:41 -0700 | [diff] [blame] | 599 | v = v1 |
| 600 | } |
Robert Griesemer | 5bf57c1 | 2011-06-02 12:58:26 -0700 | [diff] [blame] | 601 | u[len(uIn)] = shlVU(u[0:len(uIn)], uIn, shift) |
Adam Langley | 65063bc | 2009-11-05 15:55:41 -0800 | [diff] [blame] | 602 | |
| 603 | // D2. |
Russ Cox | 3a90728 | 2016-10-06 22:42:20 -0400 | [diff] [blame^] | 604 | vn1 := v[n-1] |
Adam Langley | 65063bc | 2009-11-05 15:55:41 -0800 | [diff] [blame] | 605 | for j := m; j >= 0; j-- { |
| 606 | // D3. |
Robert Griesemer | a688eb6 | 2010-05-19 09:36:50 -0700 | [diff] [blame] | 607 | qhat := Word(_M) |
Russ Cox | 3a90728 | 2016-10-06 22:42:20 -0400 | [diff] [blame^] | 608 | if ujn := u[j+n]; ujn != vn1 { |
Robert Griesemer | 5a1d332 | 2009-12-15 15:33:31 -0800 | [diff] [blame] | 609 | var rhat Word |
Russ Cox | 3a90728 | 2016-10-06 22:42:20 -0400 | [diff] [blame^] | 610 | qhat, rhat = divWW(ujn, u[j+n-1], vn1) |
Adam Langley | 65063bc | 2009-11-05 15:55:41 -0800 | [diff] [blame] | 611 | |
Adam Langley | bf1f63a | 2009-11-18 19:26:12 -0800 | [diff] [blame] | 612 | // x1 | x2 = q̂v_{n-2} |
Russ Cox | 3a90728 | 2016-10-06 22:42:20 -0400 | [diff] [blame^] | 613 | vn2 := v[n-2] |
| 614 | x1, x2 := mulWW(qhat, vn2) |
Adam Langley | bf1f63a | 2009-11-18 19:26:12 -0800 | [diff] [blame] | 615 | // test if q̂v_{n-2} > br̂ + u_{j+n-2} |
Russ Cox | 3a90728 | 2016-10-06 22:42:20 -0400 | [diff] [blame^] | 616 | ujn2 := u[j+n-2] |
| 617 | for greaterThan(x1, x2, rhat, ujn2) { |
Robert Griesemer | 5a1d332 | 2009-12-15 15:33:31 -0800 | [diff] [blame] | 618 | qhat-- |
| 619 | prevRhat := rhat |
Russ Cox | 3a90728 | 2016-10-06 22:42:20 -0400 | [diff] [blame^] | 620 | rhat += vn1 |
Adam Langley | bf1f63a | 2009-11-18 19:26:12 -0800 | [diff] [blame] | 621 | // v[n-1] >= 0, so this tests for overflow. |
| 622 | if rhat < prevRhat { |
| 623 | break |
| 624 | } |
Russ Cox | 3a90728 | 2016-10-06 22:42:20 -0400 | [diff] [blame^] | 625 | x1, x2 = mulWW(qhat, vn2) |
Adam Langley | 65063bc | 2009-11-05 15:55:41 -0800 | [diff] [blame] | 626 | } |
Adam Langley | 65063bc | 2009-11-05 15:55:41 -0800 | [diff] [blame] | 627 | } |
| 628 | |
| 629 | // D4. |
Robert Griesemer | 52cc058 | 2010-05-08 13:52:36 -0700 | [diff] [blame] | 630 | qhatv[n] = mulAddVWW(qhatv[0:n], v, qhat, 0) |
Adam Langley | 65063bc | 2009-11-05 15:55:41 -0800 | [diff] [blame] | 631 | |
Robert Griesemer | 52cc058 | 2010-05-08 13:52:36 -0700 | [diff] [blame] | 632 | c := subVV(u[j:j+len(qhatv)], u[j:], qhatv) |
Adam Langley | 65063bc | 2009-11-05 15:55:41 -0800 | [diff] [blame] | 633 | if c != 0 { |
Robert Griesemer | 52cc058 | 2010-05-08 13:52:36 -0700 | [diff] [blame] | 634 | c := addVV(u[j:j+n], u[j:], v) |
Robert Griesemer | 3f287b5 | 2010-05-06 18:20:01 -0700 | [diff] [blame] | 635 | u[j+n] += c |
Robert Griesemer | 5a1d332 | 2009-12-15 15:33:31 -0800 | [diff] [blame] | 636 | qhat-- |
Adam Langley | 65063bc | 2009-11-05 15:55:41 -0800 | [diff] [blame] | 637 | } |
| 638 | |
Robert Griesemer | 5a1d332 | 2009-12-15 15:33:31 -0800 | [diff] [blame] | 639 | q[j] = qhat |
Adam Langley | 65063bc | 2009-11-05 15:55:41 -0800 | [diff] [blame] | 640 | } |
Russ Cox | 3a90728 | 2016-10-06 22:42:20 -0400 | [diff] [blame^] | 641 | if v1p != nil { |
| 642 | putNat(v1p) |
Aliaksandr Valialkin | 187afde | 2016-04-04 19:28:15 +0300 | [diff] [blame] | 643 | } |
Russ Cox | 3a90728 | 2016-10-06 22:42:20 -0400 | [diff] [blame^] | 644 | putNat(qhatvp) |
Adam Langley | 65063bc | 2009-11-05 15:55:41 -0800 | [diff] [blame] | 645 | |
Evan Shaw | 841a32d | 2010-04-22 16:57:29 -0700 | [diff] [blame] | 646 | q = q.norm() |
Robert Griesemer | 5bf57c1 | 2011-06-02 12:58:26 -0700 | [diff] [blame] | 647 | shrVU(u, u, shift) |
Evan Shaw | 841a32d | 2010-04-22 16:57:29 -0700 | [diff] [blame] | 648 | r = u.norm() |
Adam Langley | 65063bc | 2009-11-05 15:55:41 -0800 | [diff] [blame] | 649 | |
Robert Griesemer | 5a1d332 | 2009-12-15 15:33:31 -0800 | [diff] [blame] | 650 | return q, r |
Adam Langley | 65063bc | 2009-11-05 15:55:41 -0800 | [diff] [blame] | 651 | } |
| 652 | |
Robert Griesemer | 26078c3 | 2010-05-01 15:11:27 -0700 | [diff] [blame] | 653 | // Length of x in bits. x must be normalized. |
| 654 | func (x nat) bitLen() int { |
Robert Griesemer | b218370 | 2010-04-27 19:16:08 -0700 | [diff] [blame] | 655 | if i := len(x) - 1; i >= 0 { |
Robert Griesemer | 26078c3 | 2010-05-01 15:11:27 -0700 | [diff] [blame] | 656 | return i*_W + bitLen(x[i]) |
Robert Griesemer | db3bf9c | 2009-08-14 11:53:27 -0700 | [diff] [blame] | 657 | } |
Robert Griesemer | 26078c3 | 2010-05-01 15:11:27 -0700 | [diff] [blame] | 658 | return 0 |
Robert Griesemer | db3bf9c | 2009-08-14 11:53:27 -0700 | [diff] [blame] | 659 | } |
| 660 | |
Adam Langley | 1941855 | 2009-11-11 13:21:37 -0800 | [diff] [blame] | 661 | const deBruijn32 = 0x077CB531 |
| 662 | |
Josh Bleecher Snyder | 3357a02 | 2016-07-05 10:33:50 -0700 | [diff] [blame] | 663 | var deBruijn32Lookup = [...]byte{ |
Adam Langley | 1941855 | 2009-11-11 13:21:37 -0800 | [diff] [blame] | 664 | 0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8, |
| 665 | 31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9, |
| 666 | } |
| 667 | |
| 668 | const deBruijn64 = 0x03f79d71b4ca8b09 |
| 669 | |
Josh Bleecher Snyder | 3357a02 | 2016-07-05 10:33:50 -0700 | [diff] [blame] | 670 | var deBruijn64Lookup = [...]byte{ |
Adam Langley | 1941855 | 2009-11-11 13:21:37 -0800 | [diff] [blame] | 671 | 0, 1, 56, 2, 57, 49, 28, 3, 61, 58, 42, 50, 38, 29, 17, 4, |
| 672 | 62, 47, 59, 36, 45, 43, 51, 22, 53, 39, 33, 30, 24, 18, 12, 5, |
| 673 | 63, 55, 48, 27, 60, 41, 37, 16, 46, 35, 44, 21, 52, 32, 23, 11, |
| 674 | 54, 26, 40, 15, 34, 20, 31, 10, 25, 14, 19, 9, 13, 8, 7, 6, |
| 675 | } |
| 676 | |
Robert Griesemer | 014d036 | 2012-06-08 13:00:49 -0700 | [diff] [blame] | 677 | // trailingZeroBits returns the number of consecutive least significant zero |
| 678 | // bits of x. |
| 679 | func trailingZeroBits(x Word) uint { |
Adam Langley | 1941855 | 2009-11-11 13:21:37 -0800 | [diff] [blame] | 680 | // x & -x leaves only the right-most bit set in the word. Let k be the |
| 681 | // index of that bit. Since only a single bit is set, the value is two |
Robert Hencke | c8727c8 | 2011-05-18 13:14:56 -0400 | [diff] [blame] | 682 | // to the power of k. Multiplying by a power of two is equivalent to |
Brad Fitzpatrick | 5fea2cc | 2016-03-01 23:21:55 +0000 | [diff] [blame] | 683 | // left shifting, in this case by k bits. The de Bruijn constant is |
Adam Langley | 1941855 | 2009-11-11 13:21:37 -0800 | [diff] [blame] | 684 | // such that all six bit, consecutive substrings are distinct. |
| 685 | // Therefore, if we have a left shifted version of this constant we can |
| 686 | // find by how many bits it was shifted by looking at which six bit |
| 687 | // substring ended up at the top of the word. |
Robert Griesemer | 014d036 | 2012-06-08 13:00:49 -0700 | [diff] [blame] | 688 | // (Knuth, volume 4, section 7.3.1) |
Adam Langley | 1941855 | 2009-11-11 13:21:37 -0800 | [diff] [blame] | 689 | switch _W { |
| 690 | case 32: |
Robert Griesemer | 014d036 | 2012-06-08 13:00:49 -0700 | [diff] [blame] | 691 | return uint(deBruijn32Lookup[((x&-x)*deBruijn32)>>27]) |
Adam Langley | 1941855 | 2009-11-11 13:21:37 -0800 | [diff] [blame] | 692 | case 64: |
Robert Griesemer | 014d036 | 2012-06-08 13:00:49 -0700 | [diff] [blame] | 693 | return uint(deBruijn64Lookup[((x&-x)*(deBruijn64&_M))>>58]) |
Adam Langley | 1941855 | 2009-11-11 13:21:37 -0800 | [diff] [blame] | 694 | default: |
Robert Griesemer | b7c5e23 | 2012-06-13 09:37:47 -0700 | [diff] [blame] | 695 | panic("unknown word size") |
Adam Langley | 1941855 | 2009-11-11 13:21:37 -0800 | [diff] [blame] | 696 | } |
Adam Langley | 65063bc | 2009-11-05 15:55:41 -0800 | [diff] [blame] | 697 | } |
| 698 | |
Robert Griesemer | 014d036 | 2012-06-08 13:00:49 -0700 | [diff] [blame] | 699 | // trailingZeroBits returns the number of consecutive least significant zero |
| 700 | // bits of x. |
| 701 | func (x nat) trailingZeroBits() uint { |
| 702 | if len(x) == 0 { |
| 703 | return 0 |
| 704 | } |
| 705 | var i uint |
| 706 | for x[i] == 0 { |
| 707 | i++ |
| 708 | } |
| 709 | // x[i] != 0 |
| 710 | return i*_W + trailingZeroBits(x[i]) |
| 711 | } |
| 712 | |
Robert Griesemer | 58e7799 | 2010-04-30 21:25:48 -0700 | [diff] [blame] | 713 | // z = x << s |
| 714 | func (z nat) shl(x nat, s uint) nat { |
| 715 | m := len(x) |
| 716 | if m == 0 { |
Robert Griesemer | 4e0618c | 2015-01-15 18:38:25 -0800 | [diff] [blame] | 717 | return z[:0] |
Robert Griesemer | 58e7799 | 2010-04-30 21:25:48 -0700 | [diff] [blame] | 718 | } |
| 719 | // m > 0 |
Robert Griesemer | b218370 | 2010-04-27 19:16:08 -0700 | [diff] [blame] | 720 | |
Robert Griesemer | 58e7799 | 2010-04-30 21:25:48 -0700 | [diff] [blame] | 721 | n := m + int(s/_W) |
| 722 | z = z.make(n + 1) |
Robert Griesemer | 5bf57c1 | 2011-06-02 12:58:26 -0700 | [diff] [blame] | 723 | z[n] = shlVU(z[n-m:n], x, s%_W) |
Robert Griesemer | 3f287b5 | 2010-05-06 18:20:01 -0700 | [diff] [blame] | 724 | z[0 : n-m].clear() |
Robert Griesemer | 58e7799 | 2010-04-30 21:25:48 -0700 | [diff] [blame] | 725 | |
| 726 | return z.norm() |
| 727 | } |
| 728 | |
Robert Griesemer | 58e7799 | 2010-04-30 21:25:48 -0700 | [diff] [blame] | 729 | // z = x >> s |
| 730 | func (z nat) shr(x nat, s uint) nat { |
| 731 | m := len(x) |
| 732 | n := m - int(s/_W) |
| 733 | if n <= 0 { |
Robert Griesemer | 4e0618c | 2015-01-15 18:38:25 -0800 | [diff] [blame] | 734 | return z[:0] |
Robert Griesemer | 58e7799 | 2010-04-30 21:25:48 -0700 | [diff] [blame] | 735 | } |
| 736 | // n > 0 |
| 737 | |
Robert Griesemer | 58e7799 | 2010-04-30 21:25:48 -0700 | [diff] [blame] | 738 | z = z.make(n) |
Robert Griesemer | 5bf57c1 | 2011-06-02 12:58:26 -0700 | [diff] [blame] | 739 | shrVU(z, x[m-n:], s%_W) |
Robert Griesemer | 58e7799 | 2010-04-30 21:25:48 -0700 | [diff] [blame] | 740 | |
| 741 | return z.norm() |
| 742 | } |
| 743 | |
Roger Peppe | 83fd82b | 2011-05-17 13:38:21 -0700 | [diff] [blame] | 744 | func (z nat) setBit(x nat, i uint, b uint) nat { |
| 745 | j := int(i / _W) |
| 746 | m := Word(1) << (i % _W) |
| 747 | n := len(x) |
| 748 | switch b { |
| 749 | case 0: |
| 750 | z = z.make(n) |
| 751 | copy(z, x) |
| 752 | if j >= n { |
| 753 | // no need to grow |
| 754 | return z |
| 755 | } |
| 756 | z[j] &^= m |
| 757 | return z.norm() |
| 758 | case 1: |
| 759 | if j >= n { |
Roger Peppe | ca6de00 | 2011-11-30 09:29:58 -0800 | [diff] [blame] | 760 | z = z.make(j + 1) |
| 761 | z[n:].clear() |
| 762 | } else { |
| 763 | z = z.make(n) |
Roger Peppe | 83fd82b | 2011-05-17 13:38:21 -0700 | [diff] [blame] | 764 | } |
Roger Peppe | 83fd82b | 2011-05-17 13:38:21 -0700 | [diff] [blame] | 765 | copy(z, x) |
| 766 | z[j] |= m |
| 767 | // no need to normalize |
| 768 | return z |
| 769 | } |
| 770 | panic("set bit is not 0 or 1") |
| 771 | } |
| 772 | |
Robert Griesemer | 4e0618c | 2015-01-15 18:38:25 -0800 | [diff] [blame] | 773 | // bit returns the value of the i'th bit, with lsb == bit 0. |
| 774 | func (x nat) bit(i uint) uint { |
| 775 | j := i / _W |
| 776 | if j >= uint(len(x)) { |
Roger Peppe | 83fd82b | 2011-05-17 13:38:21 -0700 | [diff] [blame] | 777 | return 0 |
| 778 | } |
Robert Griesemer | 4e0618c | 2015-01-15 18:38:25 -0800 | [diff] [blame] | 779 | // 0 <= j < len(x) |
| 780 | return uint(x[j] >> (i % _W) & 1) |
Roger Peppe | 83fd82b | 2011-05-17 13:38:21 -0700 | [diff] [blame] | 781 | } |
| 782 | |
Robert Griesemer | bd275b2 | 2014-12-08 14:36:39 -0800 | [diff] [blame] | 783 | // sticky returns 1 if there's a 1 bit within the |
| 784 | // i least significant bits, otherwise it returns 0. |
| 785 | func (x nat) sticky(i uint) uint { |
| 786 | j := i / _W |
| 787 | if j >= uint(len(x)) { |
| 788 | if len(x) == 0 { |
| 789 | return 0 |
| 790 | } |
| 791 | return 1 |
| 792 | } |
| 793 | // 0 <= j < len(x) |
| 794 | for _, x := range x[:j] { |
| 795 | if x != 0 { |
| 796 | return 1 |
| 797 | } |
| 798 | } |
| 799 | if x[j]<<(_W-i%_W) != 0 { |
| 800 | return 1 |
| 801 | } |
| 802 | return 0 |
| 803 | } |
| 804 | |
Evan Shaw | 4d1b157 | 2010-05-03 11:20:52 -0700 | [diff] [blame] | 805 | func (z nat) and(x, y nat) nat { |
| 806 | m := len(x) |
| 807 | n := len(y) |
| 808 | if m > n { |
| 809 | m = n |
| 810 | } |
| 811 | // m <= n |
| 812 | |
| 813 | z = z.make(m) |
| 814 | for i := 0; i < m; i++ { |
| 815 | z[i] = x[i] & y[i] |
| 816 | } |
| 817 | |
| 818 | return z.norm() |
| 819 | } |
| 820 | |
Evan Shaw | 4d1b157 | 2010-05-03 11:20:52 -0700 | [diff] [blame] | 821 | func (z nat) andNot(x, y nat) nat { |
| 822 | m := len(x) |
| 823 | n := len(y) |
| 824 | if n > m { |
| 825 | n = m |
| 826 | } |
| 827 | // m >= n |
| 828 | |
| 829 | z = z.make(m) |
| 830 | for i := 0; i < n; i++ { |
| 831 | z[i] = x[i] &^ y[i] |
| 832 | } |
| 833 | copy(z[n:m], x[n:m]) |
| 834 | |
| 835 | return z.norm() |
| 836 | } |
| 837 | |
Evan Shaw | 4d1b157 | 2010-05-03 11:20:52 -0700 | [diff] [blame] | 838 | func (z nat) or(x, y nat) nat { |
| 839 | m := len(x) |
| 840 | n := len(y) |
| 841 | s := x |
| 842 | if m < n { |
| 843 | n, m = m, n |
| 844 | s = y |
| 845 | } |
Evan Shaw | 28a0971 | 2010-08-09 10:21:54 -0700 | [diff] [blame] | 846 | // m >= n |
Evan Shaw | 4d1b157 | 2010-05-03 11:20:52 -0700 | [diff] [blame] | 847 | |
Evan Shaw | 28a0971 | 2010-08-09 10:21:54 -0700 | [diff] [blame] | 848 | z = z.make(m) |
| 849 | for i := 0; i < n; i++ { |
Evan Shaw | 4d1b157 | 2010-05-03 11:20:52 -0700 | [diff] [blame] | 850 | z[i] = x[i] | y[i] |
| 851 | } |
Evan Shaw | 28a0971 | 2010-08-09 10:21:54 -0700 | [diff] [blame] | 852 | copy(z[n:m], s[n:m]) |
Evan Shaw | 4d1b157 | 2010-05-03 11:20:52 -0700 | [diff] [blame] | 853 | |
| 854 | return z.norm() |
| 855 | } |
| 856 | |
Evan Shaw | 4d1b157 | 2010-05-03 11:20:52 -0700 | [diff] [blame] | 857 | func (z nat) xor(x, y nat) nat { |
| 858 | m := len(x) |
| 859 | n := len(y) |
| 860 | s := x |
Evan Shaw | 28a0971 | 2010-08-09 10:21:54 -0700 | [diff] [blame] | 861 | if m < n { |
Evan Shaw | 4d1b157 | 2010-05-03 11:20:52 -0700 | [diff] [blame] | 862 | n, m = m, n |
| 863 | s = y |
| 864 | } |
Evan Shaw | 28a0971 | 2010-08-09 10:21:54 -0700 | [diff] [blame] | 865 | // m >= n |
Evan Shaw | 4d1b157 | 2010-05-03 11:20:52 -0700 | [diff] [blame] | 866 | |
Evan Shaw | 28a0971 | 2010-08-09 10:21:54 -0700 | [diff] [blame] | 867 | z = z.make(m) |
| 868 | for i := 0; i < n; i++ { |
Evan Shaw | 4d1b157 | 2010-05-03 11:20:52 -0700 | [diff] [blame] | 869 | z[i] = x[i] ^ y[i] |
| 870 | } |
Evan Shaw | 28a0971 | 2010-08-09 10:21:54 -0700 | [diff] [blame] | 871 | copy(z[n:m], s[n:m]) |
Evan Shaw | 4d1b157 | 2010-05-03 11:20:52 -0700 | [diff] [blame] | 872 | |
| 873 | return z.norm() |
| 874 | } |
| 875 | |
Josh Bleecher Snyder | 2adc4e8 | 2015-02-17 15:44:42 -0800 | [diff] [blame] | 876 | // greaterThan reports whether (x1<<_W + x2) > (y1<<_W + y2) |
Michael T. Jones | d5c45c5 | 2011-06-07 16:02:34 -0700 | [diff] [blame] | 877 | func greaterThan(x1, x2, y1, y2 Word) bool { |
| 878 | return x1 > y1 || x1 == y1 && x2 > y2 |
| 879 | } |
Adam Langley | 1941855 | 2009-11-11 13:21:37 -0800 | [diff] [blame] | 880 | |
Evan Shaw | 841a32d | 2010-04-22 16:57:29 -0700 | [diff] [blame] | 881 | // modW returns x % d. |
| 882 | func (x nat) modW(d Word) (r Word) { |
Adam Langley | 1941855 | 2009-11-11 13:21:37 -0800 | [diff] [blame] | 883 | // TODO(agl): we don't actually need to store the q value. |
Evan Shaw | 841a32d | 2010-04-22 16:57:29 -0700 | [diff] [blame] | 884 | var q nat |
Robert Griesemer | b218370 | 2010-04-27 19:16:08 -0700 | [diff] [blame] | 885 | q = q.make(len(x)) |
Robert Griesemer | 52cc058 | 2010-05-08 13:52:36 -0700 | [diff] [blame] | 886 | return divWVW(q, 0, x, d) |
Adam Langley | 1941855 | 2009-11-11 13:21:37 -0800 | [diff] [blame] | 887 | } |
| 888 | |
Evan Shaw | 841a32d | 2010-04-22 16:57:29 -0700 | [diff] [blame] | 889 | // random creates a random integer in [0..limit), using the space in z if |
Adam Langley | 1941855 | 2009-11-11 13:21:37 -0800 | [diff] [blame] | 890 | // possible. n is the bit length of limit. |
Evan Shaw | 841a32d | 2010-04-22 16:57:29 -0700 | [diff] [blame] | 891 | func (z nat) random(rand *rand.Rand, limit nat, n int) nat { |
Robert Griesemer | fc78c5a | 2011-12-22 14:15:41 -0800 | [diff] [blame] | 892 | if alias(z, limit) { |
| 893 | z = nil // z is an alias for limit - cannot reuse |
| 894 | } |
| 895 | z = z.make(len(limit)) |
| 896 | |
Robert Griesemer | 5a1d332 | 2009-12-15 15:33:31 -0800 | [diff] [blame] | 897 | bitLengthOfMSW := uint(n % _W) |
Russ Cox | cfbee34 | 2010-01-05 16:49:05 -0800 | [diff] [blame] | 898 | if bitLengthOfMSW == 0 { |
| 899 | bitLengthOfMSW = _W |
| 900 | } |
Robert Griesemer | 5a1d332 | 2009-12-15 15:33:31 -0800 | [diff] [blame] | 901 | mask := Word((1 << bitLengthOfMSW) - 1) |
Adam Langley | 1941855 | 2009-11-11 13:21:37 -0800 | [diff] [blame] | 902 | |
| 903 | for { |
Robert Griesemer | b7c5e23 | 2012-06-13 09:37:47 -0700 | [diff] [blame] | 904 | switch _W { |
| 905 | case 32: |
| 906 | for i := range z { |
Adam Langley | 1941855 | 2009-11-11 13:21:37 -0800 | [diff] [blame] | 907 | z[i] = Word(rand.Uint32()) |
Robert Griesemer | b7c5e23 | 2012-06-13 09:37:47 -0700 | [diff] [blame] | 908 | } |
| 909 | case 64: |
| 910 | for i := range z { |
Adam Langley | 1941855 | 2009-11-11 13:21:37 -0800 | [diff] [blame] | 911 | z[i] = Word(rand.Uint32()) | Word(rand.Uint32())<<32 |
| 912 | } |
Robert Griesemer | b7c5e23 | 2012-06-13 09:37:47 -0700 | [diff] [blame] | 913 | default: |
| 914 | panic("unknown word size") |
Adam Langley | 1941855 | 2009-11-11 13:21:37 -0800 | [diff] [blame] | 915 | } |
Robert Griesemer | 5a1d332 | 2009-12-15 15:33:31 -0800 | [diff] [blame] | 916 | z[len(limit)-1] &= mask |
Evan Shaw | 841a32d | 2010-04-22 16:57:29 -0700 | [diff] [blame] | 917 | if z.cmp(limit) < 0 { |
Adam Langley | 1941855 | 2009-11-11 13:21:37 -0800 | [diff] [blame] | 918 | break |
| 919 | } |
| 920 | } |
| 921 | |
Evan Shaw | 841a32d | 2010-04-22 16:57:29 -0700 | [diff] [blame] | 922 | return z.norm() |
Adam Langley | 1941855 | 2009-11-11 13:21:37 -0800 | [diff] [blame] | 923 | } |
| 924 | |
Robert Griesemer | 7565726 | 2012-10-16 13:46:27 -0700 | [diff] [blame] | 925 | // If m != 0 (i.e., len(m) != 0), expNN sets z to x**y mod m; |
| 926 | // otherwise it sets z to x**y. The result is the value of z. |
Evan Shaw | 841a32d | 2010-04-22 16:57:29 -0700 | [diff] [blame] | 927 | func (z nat) expNN(x, y, m nat) nat { |
Adam Langley | eadebba | 2010-05-24 14:32:55 -0400 | [diff] [blame] | 928 | if alias(z, x) || alias(z, y) { |
Robert Griesemer | 98ca655 | 2012-07-12 14:12:50 -0700 | [diff] [blame] | 929 | // We cannot allow in-place modification of x or y. |
Adam Langley | eadebba | 2010-05-24 14:32:55 -0400 | [diff] [blame] | 930 | z = nil |
| 931 | } |
| 932 | |
Robert Griesemer | 2653386 | 2014-04-21 15:54:51 -0700 | [diff] [blame] | 933 | // x**y mod 1 == 0 |
| 934 | if len(m) == 1 && m[0] == 1 { |
| 935 | return z.setWord(0) |
| 936 | } |
| 937 | // m == 0 || m > 1 |
| 938 | |
| 939 | // x**0 == 1 |
Adam Langley | 1941855 | 2009-11-11 13:21:37 -0800 | [diff] [blame] | 940 | if len(y) == 0 { |
Robert Griesemer | 2653386 | 2014-04-21 15:54:51 -0700 | [diff] [blame] | 941 | return z.setWord(1) |
Adam Langley | 1941855 | 2009-11-11 13:21:37 -0800 | [diff] [blame] | 942 | } |
Robert Griesemer | 7565726 | 2012-10-16 13:46:27 -0700 | [diff] [blame] | 943 | // y > 0 |
Adam Langley | 1941855 | 2009-11-11 13:21:37 -0800 | [diff] [blame] | 944 | |
ALTree | e21154f | 2015-04-06 21:18:37 +0200 | [diff] [blame] | 945 | // x**1 mod m == x mod m |
| 946 | if len(y) == 1 && y[0] == 1 && len(m) != 0 { |
| 947 | _, z = z.div(z, x, m) |
| 948 | return z |
| 949 | } |
| 950 | // y > 1 |
| 951 | |
Robert Griesemer | 7565726 | 2012-10-16 13:46:27 -0700 | [diff] [blame] | 952 | if len(m) != 0 { |
Adam Langley | 1941855 | 2009-11-11 13:21:37 -0800 | [diff] [blame] | 953 | // We likely end up being as long as the modulus. |
Robert Griesemer | b218370 | 2010-04-27 19:16:08 -0700 | [diff] [blame] | 954 | z = z.make(len(m)) |
Adam Langley | 1941855 | 2009-11-11 13:21:37 -0800 | [diff] [blame] | 955 | } |
Evan Shaw | 841a32d | 2010-04-22 16:57:29 -0700 | [diff] [blame] | 956 | z = z.set(x) |
Robert Griesemer | 7565726 | 2012-10-16 13:46:27 -0700 | [diff] [blame] | 957 | |
Adam Langley | 73f1117 | 2012-10-17 11:19:26 -0400 | [diff] [blame] | 958 | // If the base is non-trivial and the exponent is large, we use |
| 959 | // 4-bit, windowed exponentiation. This involves precomputing 14 values |
| 960 | // (x^2...x^15) but then reduces the number of multiply-reduces by a |
| 961 | // third. Even for a 32-bit exponent, this reduces the number of |
Vlad Krasnov | 9279684 | 2015-04-22 15:03:59 -0700 | [diff] [blame] | 962 | // operations. Uses Montgomery method for odd moduli. |
Adam Langley | 73f1117 | 2012-10-17 11:19:26 -0400 | [diff] [blame] | 963 | if len(x) > 1 && len(y) > 1 && len(m) > 0 { |
Vlad Krasnov | 9279684 | 2015-04-22 15:03:59 -0700 | [diff] [blame] | 964 | if m[0]&1 == 1 { |
| 965 | return z.expNNMontgomery(x, y, m) |
| 966 | } |
Adam Langley | 73f1117 | 2012-10-17 11:19:26 -0400 | [diff] [blame] | 967 | return z.expNNWindowed(x, y, m) |
| 968 | } |
| 969 | |
Robert Griesemer | 7565726 | 2012-10-16 13:46:27 -0700 | [diff] [blame] | 970 | v := y[len(y)-1] // v > 0 because y is normalized and y > 0 |
Robert Griesemer | 635cd91 | 2015-05-26 16:42:24 -0700 | [diff] [blame] | 971 | shift := nlz(v) + 1 |
Robert Griesemer | 5a1d332 | 2009-12-15 15:33:31 -0800 | [diff] [blame] | 972 | v <<= shift |
Evan Shaw | 841a32d | 2010-04-22 16:57:29 -0700 | [diff] [blame] | 973 | var q nat |
Adam Langley | 1941855 | 2009-11-11 13:21:37 -0800 | [diff] [blame] | 974 | |
Robert Griesemer | 5a1d332 | 2009-12-15 15:33:31 -0800 | [diff] [blame] | 975 | const mask = 1 << (_W - 1) |
Adam Langley | 1941855 | 2009-11-11 13:21:37 -0800 | [diff] [blame] | 976 | |
| 977 | // We walk through the bits of the exponent one by one. Each time we |
| 978 | // see a bit, we square, thus doubling the power. If the bit is a one, |
| 979 | // we also multiply by x, thus adding one to the power. |
| 980 | |
Robert Griesemer | 5a1d332 | 2009-12-15 15:33:31 -0800 | [diff] [blame] | 981 | w := _W - int(shift) |
Adam Langley | 9070d57 | 2012-10-01 17:31:35 -0400 | [diff] [blame] | 982 | // zz and r are used to avoid allocating in mul and div as |
| 983 | // otherwise the arguments would alias. |
| 984 | var zz, r nat |
Adam Langley | 1941855 | 2009-11-11 13:21:37 -0800 | [diff] [blame] | 985 | for j := 0; j < w; j++ { |
Adam Langley | 9070d57 | 2012-10-01 17:31:35 -0400 | [diff] [blame] | 986 | zz = zz.mul(z, z) |
| 987 | zz, z = z, zz |
Adam Langley | 1941855 | 2009-11-11 13:21:37 -0800 | [diff] [blame] | 988 | |
| 989 | if v&mask != 0 { |
Adam Langley | 9070d57 | 2012-10-01 17:31:35 -0400 | [diff] [blame] | 990 | zz = zz.mul(z, x) |
| 991 | zz, z = z, zz |
Adam Langley | 1941855 | 2009-11-11 13:21:37 -0800 | [diff] [blame] | 992 | } |
| 993 | |
Robert Griesemer | 7565726 | 2012-10-16 13:46:27 -0700 | [diff] [blame] | 994 | if len(m) != 0 { |
Adam Langley | 9070d57 | 2012-10-01 17:31:35 -0400 | [diff] [blame] | 995 | zz, r = zz.div(r, z, m) |
| 996 | zz, r, q, z = q, z, zz, r |
Adam Langley | 1941855 | 2009-11-11 13:21:37 -0800 | [diff] [blame] | 997 | } |
| 998 | |
Robert Griesemer | 5a1d332 | 2009-12-15 15:33:31 -0800 | [diff] [blame] | 999 | v <<= 1 |
Adam Langley | 1941855 | 2009-11-11 13:21:37 -0800 | [diff] [blame] | 1000 | } |
| 1001 | |
| 1002 | for i := len(y) - 2; i >= 0; i-- { |
Robert Griesemer | 5a1d332 | 2009-12-15 15:33:31 -0800 | [diff] [blame] | 1003 | v = y[i] |
Adam Langley | 1941855 | 2009-11-11 13:21:37 -0800 | [diff] [blame] | 1004 | |
| 1005 | for j := 0; j < _W; j++ { |
Adam Langley | 9070d57 | 2012-10-01 17:31:35 -0400 | [diff] [blame] | 1006 | zz = zz.mul(z, z) |
| 1007 | zz, z = z, zz |
Adam Langley | 1941855 | 2009-11-11 13:21:37 -0800 | [diff] [blame] | 1008 | |
| 1009 | if v&mask != 0 { |
Adam Langley | 9070d57 | 2012-10-01 17:31:35 -0400 | [diff] [blame] | 1010 | zz = zz.mul(z, x) |
| 1011 | zz, z = z, zz |
Adam Langley | 1941855 | 2009-11-11 13:21:37 -0800 | [diff] [blame] | 1012 | } |
| 1013 | |
Robert Griesemer | 7565726 | 2012-10-16 13:46:27 -0700 | [diff] [blame] | 1014 | if len(m) != 0 { |
Adam Langley | 9070d57 | 2012-10-01 17:31:35 -0400 | [diff] [blame] | 1015 | zz, r = zz.div(r, z, m) |
| 1016 | zz, r, q, z = q, z, zz, r |
Adam Langley | 1941855 | 2009-11-11 13:21:37 -0800 | [diff] [blame] | 1017 | } |
| 1018 | |
Robert Griesemer | 5a1d332 | 2009-12-15 15:33:31 -0800 | [diff] [blame] | 1019 | v <<= 1 |
Adam Langley | 1941855 | 2009-11-11 13:21:37 -0800 | [diff] [blame] | 1020 | } |
| 1021 | } |
| 1022 | |
Michael T. Jones | 4c113ff | 2011-11-27 11:10:59 -0800 | [diff] [blame] | 1023 | return z.norm() |
| 1024 | } |
| 1025 | |
Adam Langley | 73f1117 | 2012-10-17 11:19:26 -0400 | [diff] [blame] | 1026 | // expNNWindowed calculates x**y mod m using a fixed, 4-bit window. |
| 1027 | func (z nat) expNNWindowed(x, y, m nat) nat { |
| 1028 | // zz and r are used to avoid allocating in mul and div as otherwise |
| 1029 | // the arguments would alias. |
| 1030 | var zz, r nat |
| 1031 | |
| 1032 | const n = 4 |
| 1033 | // powers[i] contains x^i. |
| 1034 | var powers [1 << n]nat |
| 1035 | powers[0] = natOne |
| 1036 | powers[1] = x |
| 1037 | for i := 2; i < 1<<n; i += 2 { |
| 1038 | p2, p, p1 := &powers[i/2], &powers[i], &powers[i+1] |
| 1039 | *p = p.mul(*p2, *p2) |
| 1040 | zz, r = zz.div(r, *p, m) |
| 1041 | *p, r = r, *p |
| 1042 | *p1 = p1.mul(*p, x) |
| 1043 | zz, r = zz.div(r, *p1, m) |
| 1044 | *p1, r = r, *p1 |
| 1045 | } |
| 1046 | |
| 1047 | z = z.setWord(1) |
| 1048 | |
| 1049 | for i := len(y) - 1; i >= 0; i-- { |
| 1050 | yi := y[i] |
| 1051 | for j := 0; j < _W; j += n { |
| 1052 | if i != len(y)-1 || j != 0 { |
| 1053 | // Unrolled loop for significant performance |
Brad Fitzpatrick | 5fea2cc | 2016-03-01 23:21:55 +0000 | [diff] [blame] | 1054 | // gain. Use go test -bench=".*" in crypto/rsa |
Adam Langley | 73f1117 | 2012-10-17 11:19:26 -0400 | [diff] [blame] | 1055 | // to check performance before making changes. |
| 1056 | zz = zz.mul(z, z) |
| 1057 | zz, z = z, zz |
| 1058 | zz, r = zz.div(r, z, m) |
| 1059 | z, r = r, z |
| 1060 | |
| 1061 | zz = zz.mul(z, z) |
| 1062 | zz, z = z, zz |
| 1063 | zz, r = zz.div(r, z, m) |
| 1064 | z, r = r, z |
| 1065 | |
| 1066 | zz = zz.mul(z, z) |
| 1067 | zz, z = z, zz |
| 1068 | zz, r = zz.div(r, z, m) |
| 1069 | z, r = r, z |
| 1070 | |
| 1071 | zz = zz.mul(z, z) |
| 1072 | zz, z = z, zz |
| 1073 | zz, r = zz.div(r, z, m) |
| 1074 | z, r = r, z |
| 1075 | } |
| 1076 | |
| 1077 | zz = zz.mul(z, powers[yi>>(_W-n)]) |
| 1078 | zz, z = z, zz |
| 1079 | zz, r = zz.div(r, z, m) |
| 1080 | z, r = r, z |
| 1081 | |
| 1082 | yi <<= n |
| 1083 | } |
| 1084 | } |
| 1085 | |
| 1086 | return z.norm() |
| 1087 | } |
| 1088 | |
Vlad Krasnov | 9279684 | 2015-04-22 15:03:59 -0700 | [diff] [blame] | 1089 | // expNNMontgomery calculates x**y mod m using a fixed, 4-bit window. |
| 1090 | // Uses Montgomery representation. |
| 1091 | func (z nat) expNNMontgomery(x, y, m nat) nat { |
Vlad Krasnov | 9279684 | 2015-04-22 15:03:59 -0700 | [diff] [blame] | 1092 | numWords := len(m) |
| 1093 | |
| 1094 | // We want the lengths of x and m to be equal. |
Russ Cox | 6bcec09 | 2015-12-09 11:53:04 -0500 | [diff] [blame] | 1095 | // It is OK if x >= m as long as len(x) == len(m). |
Vlad Krasnov | 9279684 | 2015-04-22 15:03:59 -0700 | [diff] [blame] | 1096 | if len(x) > numWords { |
Russ Cox | 6bcec09 | 2015-12-09 11:53:04 -0500 | [diff] [blame] | 1097 | _, x = nat(nil).div(nil, x, m) |
| 1098 | // Note: now len(x) <= numWords, not guaranteed ==. |
Vlad Krasnov | 9279684 | 2015-04-22 15:03:59 -0700 | [diff] [blame] | 1099 | } |
Russ Cox | 6bcec09 | 2015-12-09 11:53:04 -0500 | [diff] [blame] | 1100 | if len(x) < numWords { |
| 1101 | rr := make(nat, numWords) |
| 1102 | copy(rr, x) |
| 1103 | x = rr |
| 1104 | } |
Vlad Krasnov | 9279684 | 2015-04-22 15:03:59 -0700 | [diff] [blame] | 1105 | |
| 1106 | // Ideally the precomputations would be performed outside, and reused |
Russ Cox | 0816432 | 2015-12-07 09:52:31 -0500 | [diff] [blame] | 1107 | // k0 = -m**-1 mod 2**_W. Algorithm from: Dumas, J.G. "On Newton–Raphson |
Vlad Krasnov | 9279684 | 2015-04-22 15:03:59 -0700 | [diff] [blame] | 1108 | // Iteration for Multiplicative Inverses Modulo Prime Powers". |
| 1109 | k0 := 2 - m[0] |
| 1110 | t := m[0] - 1 |
| 1111 | for i := 1; i < _W; i <<= 1 { |
| 1112 | t *= t |
| 1113 | k0 *= (t + 1) |
| 1114 | } |
| 1115 | k0 = -k0 |
| 1116 | |
Russ Cox | 0816432 | 2015-12-07 09:52:31 -0500 | [diff] [blame] | 1117 | // RR = 2**(2*_W*len(m)) mod m |
Russ Cox | 6bcec09 | 2015-12-09 11:53:04 -0500 | [diff] [blame] | 1118 | RR := nat(nil).setWord(1) |
| 1119 | zz := nat(nil).shl(RR, uint(2*numWords*_W)) |
Vlad Krasnov | 9279684 | 2015-04-22 15:03:59 -0700 | [diff] [blame] | 1120 | _, RR = RR.div(RR, zz, m) |
| 1121 | if len(RR) < numWords { |
| 1122 | zz = zz.make(numWords) |
| 1123 | copy(zz, RR) |
| 1124 | RR = zz |
| 1125 | } |
| 1126 | // one = 1, with equal length to that of m |
Russ Cox | 6bcec09 | 2015-12-09 11:53:04 -0500 | [diff] [blame] | 1127 | one := make(nat, numWords) |
Vlad Krasnov | 9279684 | 2015-04-22 15:03:59 -0700 | [diff] [blame] | 1128 | one[0] = 1 |
| 1129 | |
| 1130 | const n = 4 |
| 1131 | // powers[i] contains x^i |
| 1132 | var powers [1 << n]nat |
| 1133 | powers[0] = powers[0].montgomery(one, RR, m, k0, numWords) |
| 1134 | powers[1] = powers[1].montgomery(x, RR, m, k0, numWords) |
| 1135 | for i := 2; i < 1<<n; i++ { |
| 1136 | powers[i] = powers[i].montgomery(powers[i-1], powers[1], m, k0, numWords) |
| 1137 | } |
| 1138 | |
| 1139 | // initialize z = 1 (Montgomery 1) |
| 1140 | z = z.make(numWords) |
| 1141 | copy(z, powers[0]) |
| 1142 | |
| 1143 | zz = zz.make(numWords) |
| 1144 | |
| 1145 | // same windowed exponent, but with Montgomery multiplications |
| 1146 | for i := len(y) - 1; i >= 0; i-- { |
| 1147 | yi := y[i] |
| 1148 | for j := 0; j < _W; j += n { |
| 1149 | if i != len(y)-1 || j != 0 { |
| 1150 | zz = zz.montgomery(z, z, m, k0, numWords) |
| 1151 | z = z.montgomery(zz, zz, m, k0, numWords) |
| 1152 | zz = zz.montgomery(z, z, m, k0, numWords) |
| 1153 | z = z.montgomery(zz, zz, m, k0, numWords) |
| 1154 | } |
| 1155 | zz = zz.montgomery(z, powers[yi>>(_W-n)], m, k0, numWords) |
| 1156 | z, zz = zz, z |
| 1157 | yi <<= n |
| 1158 | } |
| 1159 | } |
| 1160 | // convert to regular number |
| 1161 | zz = zz.montgomery(z, one, m, k0, numWords) |
Russ Cox | 1e066ca | 2016-01-11 09:52:56 -0500 | [diff] [blame] | 1162 | |
| 1163 | // One last reduction, just in case. |
| 1164 | // See golang.org/issue/13907. |
| 1165 | if zz.cmp(m) >= 0 { |
| 1166 | // Common case is m has high bit set; in that case, |
| 1167 | // since zz is the same length as m, there can be just |
| 1168 | // one multiple of m to remove. Just subtract. |
| 1169 | // We think that the subtract should be sufficient in general, |
| 1170 | // so do that unconditionally, but double-check, |
| 1171 | // in case our beliefs are wrong. |
| 1172 | // The div is not expected to be reached. |
| 1173 | zz = zz.sub(zz, m) |
| 1174 | if zz.cmp(m) >= 0 { |
| 1175 | _, zz = nat(nil).div(nil, zz, m) |
| 1176 | } |
| 1177 | } |
| 1178 | |
Vlad Krasnov | 9279684 | 2015-04-22 15:03:59 -0700 | [diff] [blame] | 1179 | return zz.norm() |
| 1180 | } |
| 1181 | |
Adam Langley | 5d5889c | 2015-08-30 09:21:35 -0700 | [diff] [blame] | 1182 | // probablyPrime performs n Miller-Rabin tests to check whether x is prime. |
| 1183 | // If x is prime, it returns true. |
| 1184 | // If x is not prime, it returns false with probability at least 1 - ¼ⁿ. |
| 1185 | // |
| 1186 | // It is not suitable for judging primes that an adversary may have crafted |
| 1187 | // to fool this test. |
Evan Shaw | 841a32d | 2010-04-22 16:57:29 -0700 | [diff] [blame] | 1188 | func (n nat) probablyPrime(reps int) bool { |
Adam Langley | 1941855 | 2009-11-11 13:21:37 -0800 | [diff] [blame] | 1189 | if len(n) == 0 { |
| 1190 | return false |
| 1191 | } |
| 1192 | |
| 1193 | if len(n) == 1 { |
Adam Langley | 308064f | 2010-03-05 15:55:26 -0500 | [diff] [blame] | 1194 | if n[0] < 2 { |
| 1195 | return false |
| 1196 | } |
| 1197 | |
Adam Langley | 1941855 | 2009-11-11 13:21:37 -0800 | [diff] [blame] | 1198 | if n[0]%2 == 0 { |
| 1199 | return n[0] == 2 |
| 1200 | } |
| 1201 | |
| 1202 | // We have to exclude these cases because we reject all |
| 1203 | // multiples of these numbers below. |
Robert Griesemer | 407dbb4 | 2010-04-30 11:54:27 -0700 | [diff] [blame] | 1204 | switch n[0] { |
| 1205 | case 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53: |
Adam Langley | 1941855 | 2009-11-11 13:21:37 -0800 | [diff] [blame] | 1206 | return true |
| 1207 | } |
| 1208 | } |
| 1209 | |
Alberto Donizetti | 5de497b | 2014-12-11 19:09:39 +0100 | [diff] [blame] | 1210 | if n[0]&1 == 0 { |
| 1211 | return false // n is even |
| 1212 | } |
| 1213 | |
Robert Griesemer | b218370 | 2010-04-27 19:16:08 -0700 | [diff] [blame] | 1214 | const primesProduct32 = 0xC0CFD797 // Π {p ∈ primes, 2 < p <= 29} |
| 1215 | const primesProduct64 = 0xE221F97C30E94E1D // Π {p ∈ primes, 2 < p <= 53} |
| 1216 | |
Robert Griesemer | 5a1d332 | 2009-12-15 15:33:31 -0800 | [diff] [blame] | 1217 | var r Word |
Adam Langley | 1941855 | 2009-11-11 13:21:37 -0800 | [diff] [blame] | 1218 | switch _W { |
| 1219 | case 32: |
Evan Shaw | 841a32d | 2010-04-22 16:57:29 -0700 | [diff] [blame] | 1220 | r = n.modW(primesProduct32) |
Adam Langley | 1941855 | 2009-11-11 13:21:37 -0800 | [diff] [blame] | 1221 | case 64: |
Evan Shaw | 841a32d | 2010-04-22 16:57:29 -0700 | [diff] [blame] | 1222 | r = n.modW(primesProduct64 & _M) |
Adam Langley | 1941855 | 2009-11-11 13:21:37 -0800 | [diff] [blame] | 1223 | default: |
| 1224 | panic("Unknown word size") |
| 1225 | } |
| 1226 | |
| 1227 | if r%3 == 0 || r%5 == 0 || r%7 == 0 || r%11 == 0 || |
| 1228 | r%13 == 0 || r%17 == 0 || r%19 == 0 || r%23 == 0 || r%29 == 0 { |
| 1229 | return false |
| 1230 | } |
| 1231 | |
| 1232 | if _W == 64 && (r%31 == 0 || r%37 == 0 || r%41 == 0 || |
| 1233 | r%43 == 0 || r%47 == 0 || r%53 == 0) { |
| 1234 | return false |
| 1235 | } |
| 1236 | |
Robert Griesemer | f5cf0a4 | 2011-11-14 13:35:22 -0800 | [diff] [blame] | 1237 | nm1 := nat(nil).sub(n, natOne) |
Robert Griesemer | 014d036 | 2012-06-08 13:00:49 -0700 | [diff] [blame] | 1238 | // determine q, k such that nm1 = q << k |
| 1239 | k := nm1.trailingZeroBits() |
| 1240 | q := nat(nil).shr(nm1, k) |
Adam Langley | 1941855 | 2009-11-11 13:21:37 -0800 | [diff] [blame] | 1241 | |
Robert Griesemer | f5cf0a4 | 2011-11-14 13:35:22 -0800 | [diff] [blame] | 1242 | nm3 := nat(nil).sub(nm1, natTwo) |
Robert Griesemer | 5a1d332 | 2009-12-15 15:33:31 -0800 | [diff] [blame] | 1243 | rand := rand.New(rand.NewSource(int64(n[0]))) |
Adam Langley | 1941855 | 2009-11-11 13:21:37 -0800 | [diff] [blame] | 1244 | |
Evan Shaw | 841a32d | 2010-04-22 16:57:29 -0700 | [diff] [blame] | 1245 | var x, y, quotient nat |
Robert Griesemer | 26078c3 | 2010-05-01 15:11:27 -0700 | [diff] [blame] | 1246 | nm3Len := nm3.bitLen() |
Adam Langley | 1941855 | 2009-11-11 13:21:37 -0800 | [diff] [blame] | 1247 | |
| 1248 | NextRandom: |
| 1249 | for i := 0; i < reps; i++ { |
Evan Shaw | 841a32d | 2010-04-22 16:57:29 -0700 | [diff] [blame] | 1250 | x = x.random(rand, nm3, nm3Len) |
Robert Griesemer | b218370 | 2010-04-27 19:16:08 -0700 | [diff] [blame] | 1251 | x = x.add(x, natTwo) |
Evan Shaw | 841a32d | 2010-04-22 16:57:29 -0700 | [diff] [blame] | 1252 | y = y.expNN(x, q, n) |
Robert Griesemer | b218370 | 2010-04-27 19:16:08 -0700 | [diff] [blame] | 1253 | if y.cmp(natOne) == 0 || y.cmp(nm1) == 0 { |
Adam Langley | 1941855 | 2009-11-11 13:21:37 -0800 | [diff] [blame] | 1254 | continue |
| 1255 | } |
Robert Griesemer | 014d036 | 2012-06-08 13:00:49 -0700 | [diff] [blame] | 1256 | for j := uint(1); j < k; j++ { |
Evan Shaw | 841a32d | 2010-04-22 16:57:29 -0700 | [diff] [blame] | 1257 | y = y.mul(y, y) |
| 1258 | quotient, y = quotient.div(y, y, n) |
| 1259 | if y.cmp(nm1) == 0 { |
Adam Langley | 1941855 | 2009-11-11 13:21:37 -0800 | [diff] [blame] | 1260 | continue NextRandom |
| 1261 | } |
Robert Griesemer | b218370 | 2010-04-27 19:16:08 -0700 | [diff] [blame] | 1262 | if y.cmp(natOne) == 0 { |
Adam Langley | 1941855 | 2009-11-11 13:21:37 -0800 | [diff] [blame] | 1263 | return false |
| 1264 | } |
| 1265 | } |
Robert Griesemer | 5a1d332 | 2009-12-15 15:33:31 -0800 | [diff] [blame] | 1266 | return false |
Adam Langley | 1941855 | 2009-11-11 13:21:37 -0800 | [diff] [blame] | 1267 | } |
| 1268 | |
Robert Griesemer | 5a1d332 | 2009-12-15 15:33:31 -0800 | [diff] [blame] | 1269 | return true |
Adam Langley | 1941855 | 2009-11-11 13:21:37 -0800 | [diff] [blame] | 1270 | } |
Robert Griesemer | 758d055 | 2011-03-08 17:27:44 -0800 | [diff] [blame] | 1271 | |
Robert Griesemer | 758d055 | 2011-03-08 17:27:44 -0800 | [diff] [blame] | 1272 | // bytes writes the value of z into buf using big-endian encoding. |
| 1273 | // len(buf) must be >= len(z)*_S. The value of z is encoded in the |
| 1274 | // slice buf[i:]. The number i of unused bytes at the beginning of |
| 1275 | // buf is returned as result. |
| 1276 | func (z nat) bytes(buf []byte) (i int) { |
| 1277 | i = len(buf) |
| 1278 | for _, d := range z { |
| 1279 | for j := 0; j < _S; j++ { |
| 1280 | i-- |
| 1281 | buf[i] = byte(d) |
| 1282 | d >>= 8 |
| 1283 | } |
| 1284 | } |
| 1285 | |
| 1286 | for i < len(buf) && buf[i] == 0 { |
| 1287 | i++ |
| 1288 | } |
| 1289 | |
| 1290 | return |
| 1291 | } |
| 1292 | |
Robert Griesemer | 758d055 | 2011-03-08 17:27:44 -0800 | [diff] [blame] | 1293 | // setBytes interprets buf as the bytes of a big-endian unsigned |
| 1294 | // integer, sets z to that value, and returns z. |
| 1295 | func (z nat) setBytes(buf []byte) nat { |
| 1296 | z = z.make((len(buf) + _S - 1) / _S) |
| 1297 | |
| 1298 | k := 0 |
| 1299 | s := uint(0) |
| 1300 | var d Word |
| 1301 | for i := len(buf); i > 0; i-- { |
| 1302 | d |= Word(buf[i-1]) << s |
| 1303 | if s += 8; s == _S*8 { |
| 1304 | z[k] = d |
| 1305 | k++ |
| 1306 | s = 0 |
| 1307 | d = 0 |
| 1308 | } |
| 1309 | } |
| 1310 | if k < len(z) { |
| 1311 | z[k] = d |
| 1312 | } |
| 1313 | |
| 1314 | return z.norm() |
| 1315 | } |