Adam Langley | 124e52d | 2012-03-12 10:59:04 -0400 | [diff] [blame] | 1 | // Copyright 2012 The Go Authors. All rights reserved. |
| 2 | // Use of this source code is governed by a BSD-style |
| 3 | // license that can be found in the LICENSE file. |
| 4 | |
Filippo Valsorda | fe70532 | 2019-11-07 16:06:22 -0500 | [diff] [blame] | 5 | // +build amd64,!gccgo,!appengine,!purego |
Ian Lance Taylor | 6779fad | 2012-11-07 22:50:39 -0800 | [diff] [blame] | 6 | |
Adam Langley | 124e52d | 2012-03-12 10:59:04 -0400 | [diff] [blame] | 7 | package curve25519 |
| 8 | |
| 9 | // These functions are implemented in the .s files. The names of the functions |
| 10 | // in the rest of the file are also taken from the SUPERCOP sources to help |
| 11 | // people following along. |
Shenghou Ma | bf54563 | 2013-02-19 19:15:01 +0800 | [diff] [blame] | 12 | |
| 13 | //go:noescape |
| 14 | |
Russ Cox | 750c6a9 | 2012-09-21 00:36:01 -0400 | [diff] [blame] | 15 | func cswap(inout *[5]uint64, v uint64) |
Shenghou Ma | bf54563 | 2013-02-19 19:15:01 +0800 | [diff] [blame] | 16 | |
| 17 | //go:noescape |
| 18 | |
Russ Cox | 750c6a9 | 2012-09-21 00:36:01 -0400 | [diff] [blame] | 19 | func ladderstep(inout *[5][5]uint64) |
Shenghou Ma | bf54563 | 2013-02-19 19:15:01 +0800 | [diff] [blame] | 20 | |
| 21 | //go:noescape |
| 22 | |
Adam Langley | 124e52d | 2012-03-12 10:59:04 -0400 | [diff] [blame] | 23 | func freeze(inout *[5]uint64) |
Shenghou Ma | bf54563 | 2013-02-19 19:15:01 +0800 | [diff] [blame] | 24 | |
| 25 | //go:noescape |
| 26 | |
Adam Langley | 124e52d | 2012-03-12 10:59:04 -0400 | [diff] [blame] | 27 | func mul(dest, a, b *[5]uint64) |
Shenghou Ma | bf54563 | 2013-02-19 19:15:01 +0800 | [diff] [blame] | 28 | |
| 29 | //go:noescape |
| 30 | |
Adam Langley | 124e52d | 2012-03-12 10:59:04 -0400 | [diff] [blame] | 31 | func square(out, in *[5]uint64) |
| 32 | |
| 33 | // mladder uses a Montgomery ladder to calculate (xr/zr) *= s. |
| 34 | func mladder(xr, zr *[5]uint64, s *[32]byte) { |
| 35 | var work [5][5]uint64 |
| 36 | |
| 37 | work[0] = *xr |
| 38 | setint(&work[1], 1) |
| 39 | setint(&work[2], 0) |
| 40 | work[3] = *xr |
| 41 | setint(&work[4], 1) |
| 42 | |
| 43 | j := uint(6) |
| 44 | var prevbit byte |
| 45 | |
| 46 | for i := 31; i >= 0; i-- { |
| 47 | for j < 8 { |
| 48 | bit := ((*s)[i] >> j) & 1 |
| 49 | swap := bit ^ prevbit |
| 50 | prevbit = bit |
| 51 | cswap(&work[1], uint64(swap)) |
| 52 | ladderstep(&work) |
| 53 | j-- |
| 54 | } |
| 55 | j = 7 |
| 56 | } |
| 57 | |
| 58 | *xr = work[1] |
| 59 | *zr = work[2] |
| 60 | } |
| 61 | |
| 62 | func scalarMult(out, in, base *[32]byte) { |
| 63 | var e [32]byte |
| 64 | copy(e[:], (*in)[:]) |
| 65 | e[0] &= 248 |
| 66 | e[31] &= 127 |
| 67 | e[31] |= 64 |
| 68 | |
| 69 | var t, z [5]uint64 |
| 70 | unpack(&t, base) |
| 71 | mladder(&t, &z, &e) |
| 72 | invert(&z, &z) |
| 73 | mul(&t, &t, &z) |
| 74 | pack(out, &t) |
| 75 | } |
| 76 | |
| 77 | func setint(r *[5]uint64, v uint64) { |
| 78 | r[0] = v |
| 79 | r[1] = 0 |
| 80 | r[2] = 0 |
| 81 | r[3] = 0 |
| 82 | r[4] = 0 |
| 83 | } |
| 84 | |
| 85 | // unpack sets r = x where r consists of 5, 51-bit limbs in little-endian |
| 86 | // order. |
| 87 | func unpack(r *[5]uint64, x *[32]byte) { |
| 88 | r[0] = uint64(x[0]) | |
| 89 | uint64(x[1])<<8 | |
| 90 | uint64(x[2])<<16 | |
| 91 | uint64(x[3])<<24 | |
| 92 | uint64(x[4])<<32 | |
| 93 | uint64(x[5])<<40 | |
| 94 | uint64(x[6]&7)<<48 |
| 95 | |
| 96 | r[1] = uint64(x[6])>>3 | |
| 97 | uint64(x[7])<<5 | |
| 98 | uint64(x[8])<<13 | |
| 99 | uint64(x[9])<<21 | |
| 100 | uint64(x[10])<<29 | |
| 101 | uint64(x[11])<<37 | |
| 102 | uint64(x[12]&63)<<45 |
| 103 | |
| 104 | r[2] = uint64(x[12])>>6 | |
| 105 | uint64(x[13])<<2 | |
| 106 | uint64(x[14])<<10 | |
| 107 | uint64(x[15])<<18 | |
| 108 | uint64(x[16])<<26 | |
| 109 | uint64(x[17])<<34 | |
| 110 | uint64(x[18])<<42 | |
| 111 | uint64(x[19]&1)<<50 |
| 112 | |
| 113 | r[3] = uint64(x[19])>>1 | |
| 114 | uint64(x[20])<<7 | |
| 115 | uint64(x[21])<<15 | |
| 116 | uint64(x[22])<<23 | |
| 117 | uint64(x[23])<<31 | |
| 118 | uint64(x[24])<<39 | |
| 119 | uint64(x[25]&15)<<47 |
| 120 | |
| 121 | r[4] = uint64(x[25])>>4 | |
| 122 | uint64(x[26])<<4 | |
| 123 | uint64(x[27])<<12 | |
| 124 | uint64(x[28])<<20 | |
| 125 | uint64(x[29])<<28 | |
| 126 | uint64(x[30])<<36 | |
| 127 | uint64(x[31]&127)<<44 |
| 128 | } |
| 129 | |
| 130 | // pack sets out = x where out is the usual, little-endian form of the 5, |
| 131 | // 51-bit limbs in x. |
| 132 | func pack(out *[32]byte, x *[5]uint64) { |
| 133 | t := *x |
| 134 | freeze(&t) |
| 135 | |
| 136 | out[0] = byte(t[0]) |
| 137 | out[1] = byte(t[0] >> 8) |
| 138 | out[2] = byte(t[0] >> 16) |
| 139 | out[3] = byte(t[0] >> 24) |
| 140 | out[4] = byte(t[0] >> 32) |
| 141 | out[5] = byte(t[0] >> 40) |
| 142 | out[6] = byte(t[0] >> 48) |
| 143 | |
| 144 | out[6] ^= byte(t[1]<<3) & 0xf8 |
| 145 | out[7] = byte(t[1] >> 5) |
| 146 | out[8] = byte(t[1] >> 13) |
| 147 | out[9] = byte(t[1] >> 21) |
| 148 | out[10] = byte(t[1] >> 29) |
| 149 | out[11] = byte(t[1] >> 37) |
| 150 | out[12] = byte(t[1] >> 45) |
| 151 | |
| 152 | out[12] ^= byte(t[2]<<6) & 0xc0 |
| 153 | out[13] = byte(t[2] >> 2) |
| 154 | out[14] = byte(t[2] >> 10) |
| 155 | out[15] = byte(t[2] >> 18) |
| 156 | out[16] = byte(t[2] >> 26) |
| 157 | out[17] = byte(t[2] >> 34) |
| 158 | out[18] = byte(t[2] >> 42) |
| 159 | out[19] = byte(t[2] >> 50) |
| 160 | |
| 161 | out[19] ^= byte(t[3]<<1) & 0xfe |
| 162 | out[20] = byte(t[3] >> 7) |
| 163 | out[21] = byte(t[3] >> 15) |
| 164 | out[22] = byte(t[3] >> 23) |
| 165 | out[23] = byte(t[3] >> 31) |
| 166 | out[24] = byte(t[3] >> 39) |
| 167 | out[25] = byte(t[3] >> 47) |
| 168 | |
| 169 | out[25] ^= byte(t[4]<<4) & 0xf0 |
| 170 | out[26] = byte(t[4] >> 4) |
| 171 | out[27] = byte(t[4] >> 12) |
| 172 | out[28] = byte(t[4] >> 20) |
| 173 | out[29] = byte(t[4] >> 28) |
| 174 | out[30] = byte(t[4] >> 36) |
| 175 | out[31] = byte(t[4] >> 44) |
| 176 | } |
| 177 | |
| 178 | // invert calculates r = x^-1 mod p using Fermat's little theorem. |
| 179 | func invert(r *[5]uint64, x *[5]uint64) { |
| 180 | var z2, z9, z11, z2_5_0, z2_10_0, z2_20_0, z2_50_0, z2_100_0, t [5]uint64 |
| 181 | |
| 182 | square(&z2, x) /* 2 */ |
| 183 | square(&t, &z2) /* 4 */ |
| 184 | square(&t, &t) /* 8 */ |
| 185 | mul(&z9, &t, x) /* 9 */ |
| 186 | mul(&z11, &z9, &z2) /* 11 */ |
| 187 | square(&t, &z11) /* 22 */ |
| 188 | mul(&z2_5_0, &t, &z9) /* 2^5 - 2^0 = 31 */ |
| 189 | |
| 190 | square(&t, &z2_5_0) /* 2^6 - 2^1 */ |
| 191 | for i := 1; i < 5; i++ { /* 2^20 - 2^10 */ |
| 192 | square(&t, &t) |
| 193 | } |
| 194 | mul(&z2_10_0, &t, &z2_5_0) /* 2^10 - 2^0 */ |
| 195 | |
| 196 | square(&t, &z2_10_0) /* 2^11 - 2^1 */ |
| 197 | for i := 1; i < 10; i++ { /* 2^20 - 2^10 */ |
| 198 | square(&t, &t) |
| 199 | } |
| 200 | mul(&z2_20_0, &t, &z2_10_0) /* 2^20 - 2^0 */ |
| 201 | |
| 202 | square(&t, &z2_20_0) /* 2^21 - 2^1 */ |
| 203 | for i := 1; i < 20; i++ { /* 2^40 - 2^20 */ |
| 204 | square(&t, &t) |
| 205 | } |
| 206 | mul(&t, &t, &z2_20_0) /* 2^40 - 2^0 */ |
| 207 | |
| 208 | square(&t, &t) /* 2^41 - 2^1 */ |
| 209 | for i := 1; i < 10; i++ { /* 2^50 - 2^10 */ |
| 210 | square(&t, &t) |
| 211 | } |
| 212 | mul(&z2_50_0, &t, &z2_10_0) /* 2^50 - 2^0 */ |
| 213 | |
| 214 | square(&t, &z2_50_0) /* 2^51 - 2^1 */ |
| 215 | for i := 1; i < 50; i++ { /* 2^100 - 2^50 */ |
| 216 | square(&t, &t) |
| 217 | } |
| 218 | mul(&z2_100_0, &t, &z2_50_0) /* 2^100 - 2^0 */ |
| 219 | |
| 220 | square(&t, &z2_100_0) /* 2^101 - 2^1 */ |
| 221 | for i := 1; i < 100; i++ { /* 2^200 - 2^100 */ |
| 222 | square(&t, &t) |
| 223 | } |
| 224 | mul(&t, &t, &z2_100_0) /* 2^200 - 2^0 */ |
| 225 | |
| 226 | square(&t, &t) /* 2^201 - 2^1 */ |
| 227 | for i := 1; i < 50; i++ { /* 2^250 - 2^50 */ |
| 228 | square(&t, &t) |
| 229 | } |
| 230 | mul(&t, &t, &z2_50_0) /* 2^250 - 2^0 */ |
| 231 | |
| 232 | square(&t, &t) /* 2^251 - 2^1 */ |
| 233 | square(&t, &t) /* 2^252 - 2^2 */ |
| 234 | square(&t, &t) /* 2^253 - 2^3 */ |
| 235 | |
| 236 | square(&t, &t) /* 2^254 - 2^4 */ |
| 237 | |
| 238 | square(&t, &t) /* 2^255 - 2^5 */ |
| 239 | mul(r, &t, &z11) /* 2^255 - 21 */ |
| 240 | } |