blob: 5120b779b9b48ba3f1eba27d43091f62b4424cd2 [file] [log] [blame]
Adam Langley124e52d2012-03-12 10:59:04 -04001// Copyright 2012 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
Filippo Valsordafe705322019-11-07 16:06:22 -05005// +build amd64,!gccgo,!appengine,!purego
Ian Lance Taylor6779fad2012-11-07 22:50:39 -08006
Adam Langley124e52d2012-03-12 10:59:04 -04007package curve25519
8
9// These functions are implemented in the .s files. The names of the functions
10// in the rest of the file are also taken from the SUPERCOP sources to help
11// people following along.
Shenghou Mabf545632013-02-19 19:15:01 +080012
13//go:noescape
14
Russ Cox750c6a92012-09-21 00:36:01 -040015func cswap(inout *[5]uint64, v uint64)
Shenghou Mabf545632013-02-19 19:15:01 +080016
17//go:noescape
18
Russ Cox750c6a92012-09-21 00:36:01 -040019func ladderstep(inout *[5][5]uint64)
Shenghou Mabf545632013-02-19 19:15:01 +080020
21//go:noescape
22
Adam Langley124e52d2012-03-12 10:59:04 -040023func freeze(inout *[5]uint64)
Shenghou Mabf545632013-02-19 19:15:01 +080024
25//go:noescape
26
Adam Langley124e52d2012-03-12 10:59:04 -040027func mul(dest, a, b *[5]uint64)
Shenghou Mabf545632013-02-19 19:15:01 +080028
29//go:noescape
30
Adam Langley124e52d2012-03-12 10:59:04 -040031func square(out, in *[5]uint64)
32
33// mladder uses a Montgomery ladder to calculate (xr/zr) *= s.
34func mladder(xr, zr *[5]uint64, s *[32]byte) {
35 var work [5][5]uint64
36
37 work[0] = *xr
38 setint(&work[1], 1)
39 setint(&work[2], 0)
40 work[3] = *xr
41 setint(&work[4], 1)
42
43 j := uint(6)
44 var prevbit byte
45
46 for i := 31; i >= 0; i-- {
47 for j < 8 {
48 bit := ((*s)[i] >> j) & 1
49 swap := bit ^ prevbit
50 prevbit = bit
51 cswap(&work[1], uint64(swap))
52 ladderstep(&work)
53 j--
54 }
55 j = 7
56 }
57
58 *xr = work[1]
59 *zr = work[2]
60}
61
62func scalarMult(out, in, base *[32]byte) {
63 var e [32]byte
64 copy(e[:], (*in)[:])
65 e[0] &= 248
66 e[31] &= 127
67 e[31] |= 64
68
69 var t, z [5]uint64
70 unpack(&t, base)
71 mladder(&t, &z, &e)
72 invert(&z, &z)
73 mul(&t, &t, &z)
74 pack(out, &t)
75}
76
77func setint(r *[5]uint64, v uint64) {
78 r[0] = v
79 r[1] = 0
80 r[2] = 0
81 r[3] = 0
82 r[4] = 0
83}
84
85// unpack sets r = x where r consists of 5, 51-bit limbs in little-endian
86// order.
87func unpack(r *[5]uint64, x *[32]byte) {
88 r[0] = uint64(x[0]) |
89 uint64(x[1])<<8 |
90 uint64(x[2])<<16 |
91 uint64(x[3])<<24 |
92 uint64(x[4])<<32 |
93 uint64(x[5])<<40 |
94 uint64(x[6]&7)<<48
95
96 r[1] = uint64(x[6])>>3 |
97 uint64(x[7])<<5 |
98 uint64(x[8])<<13 |
99 uint64(x[9])<<21 |
100 uint64(x[10])<<29 |
101 uint64(x[11])<<37 |
102 uint64(x[12]&63)<<45
103
104 r[2] = uint64(x[12])>>6 |
105 uint64(x[13])<<2 |
106 uint64(x[14])<<10 |
107 uint64(x[15])<<18 |
108 uint64(x[16])<<26 |
109 uint64(x[17])<<34 |
110 uint64(x[18])<<42 |
111 uint64(x[19]&1)<<50
112
113 r[3] = uint64(x[19])>>1 |
114 uint64(x[20])<<7 |
115 uint64(x[21])<<15 |
116 uint64(x[22])<<23 |
117 uint64(x[23])<<31 |
118 uint64(x[24])<<39 |
119 uint64(x[25]&15)<<47
120
121 r[4] = uint64(x[25])>>4 |
122 uint64(x[26])<<4 |
123 uint64(x[27])<<12 |
124 uint64(x[28])<<20 |
125 uint64(x[29])<<28 |
126 uint64(x[30])<<36 |
127 uint64(x[31]&127)<<44
128}
129
130// pack sets out = x where out is the usual, little-endian form of the 5,
131// 51-bit limbs in x.
132func pack(out *[32]byte, x *[5]uint64) {
133 t := *x
134 freeze(&t)
135
136 out[0] = byte(t[0])
137 out[1] = byte(t[0] >> 8)
138 out[2] = byte(t[0] >> 16)
139 out[3] = byte(t[0] >> 24)
140 out[4] = byte(t[0] >> 32)
141 out[5] = byte(t[0] >> 40)
142 out[6] = byte(t[0] >> 48)
143
144 out[6] ^= byte(t[1]<<3) & 0xf8
145 out[7] = byte(t[1] >> 5)
146 out[8] = byte(t[1] >> 13)
147 out[9] = byte(t[1] >> 21)
148 out[10] = byte(t[1] >> 29)
149 out[11] = byte(t[1] >> 37)
150 out[12] = byte(t[1] >> 45)
151
152 out[12] ^= byte(t[2]<<6) & 0xc0
153 out[13] = byte(t[2] >> 2)
154 out[14] = byte(t[2] >> 10)
155 out[15] = byte(t[2] >> 18)
156 out[16] = byte(t[2] >> 26)
157 out[17] = byte(t[2] >> 34)
158 out[18] = byte(t[2] >> 42)
159 out[19] = byte(t[2] >> 50)
160
161 out[19] ^= byte(t[3]<<1) & 0xfe
162 out[20] = byte(t[3] >> 7)
163 out[21] = byte(t[3] >> 15)
164 out[22] = byte(t[3] >> 23)
165 out[23] = byte(t[3] >> 31)
166 out[24] = byte(t[3] >> 39)
167 out[25] = byte(t[3] >> 47)
168
169 out[25] ^= byte(t[4]<<4) & 0xf0
170 out[26] = byte(t[4] >> 4)
171 out[27] = byte(t[4] >> 12)
172 out[28] = byte(t[4] >> 20)
173 out[29] = byte(t[4] >> 28)
174 out[30] = byte(t[4] >> 36)
175 out[31] = byte(t[4] >> 44)
176}
177
178// invert calculates r = x^-1 mod p using Fermat's little theorem.
179func invert(r *[5]uint64, x *[5]uint64) {
180 var z2, z9, z11, z2_5_0, z2_10_0, z2_20_0, z2_50_0, z2_100_0, t [5]uint64
181
182 square(&z2, x) /* 2 */
183 square(&t, &z2) /* 4 */
184 square(&t, &t) /* 8 */
185 mul(&z9, &t, x) /* 9 */
186 mul(&z11, &z9, &z2) /* 11 */
187 square(&t, &z11) /* 22 */
188 mul(&z2_5_0, &t, &z9) /* 2^5 - 2^0 = 31 */
189
190 square(&t, &z2_5_0) /* 2^6 - 2^1 */
191 for i := 1; i < 5; i++ { /* 2^20 - 2^10 */
192 square(&t, &t)
193 }
194 mul(&z2_10_0, &t, &z2_5_0) /* 2^10 - 2^0 */
195
196 square(&t, &z2_10_0) /* 2^11 - 2^1 */
197 for i := 1; i < 10; i++ { /* 2^20 - 2^10 */
198 square(&t, &t)
199 }
200 mul(&z2_20_0, &t, &z2_10_0) /* 2^20 - 2^0 */
201
202 square(&t, &z2_20_0) /* 2^21 - 2^1 */
203 for i := 1; i < 20; i++ { /* 2^40 - 2^20 */
204 square(&t, &t)
205 }
206 mul(&t, &t, &z2_20_0) /* 2^40 - 2^0 */
207
208 square(&t, &t) /* 2^41 - 2^1 */
209 for i := 1; i < 10; i++ { /* 2^50 - 2^10 */
210 square(&t, &t)
211 }
212 mul(&z2_50_0, &t, &z2_10_0) /* 2^50 - 2^0 */
213
214 square(&t, &z2_50_0) /* 2^51 - 2^1 */
215 for i := 1; i < 50; i++ { /* 2^100 - 2^50 */
216 square(&t, &t)
217 }
218 mul(&z2_100_0, &t, &z2_50_0) /* 2^100 - 2^0 */
219
220 square(&t, &z2_100_0) /* 2^101 - 2^1 */
221 for i := 1; i < 100; i++ { /* 2^200 - 2^100 */
222 square(&t, &t)
223 }
224 mul(&t, &t, &z2_100_0) /* 2^200 - 2^0 */
225
226 square(&t, &t) /* 2^201 - 2^1 */
227 for i := 1; i < 50; i++ { /* 2^250 - 2^50 */
228 square(&t, &t)
229 }
230 mul(&t, &t, &z2_50_0) /* 2^250 - 2^0 */
231
232 square(&t, &t) /* 2^251 - 2^1 */
233 square(&t, &t) /* 2^252 - 2^2 */
234 square(&t, &t) /* 2^253 - 2^3 */
235
236 square(&t, &t) /* 2^254 - 2^4 */
237
238 square(&t, &t) /* 2^255 - 2^5 */
239 mul(r, &t, &z11) /* 2^255 - 21 */
240}