mirror of
https://github.com/trezor/trezor-firmware.git
synced 2024-12-19 12:58:13 +00:00
Merge pull request #26 from jhoenicke/bignum_improvements
Bignum improvements
This commit is contained in:
commit
a757693fe3
556
bignum.c
556
bignum.c
@ -23,6 +23,7 @@
|
||||
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <assert.h>
|
||||
#include "bignum.h"
|
||||
#include "secp256k1.h"
|
||||
|
||||
@ -42,27 +43,37 @@ inline void write_be(uint8_t *data, uint32_t x)
|
||||
data[3] = x;
|
||||
}
|
||||
|
||||
// convert a raw bigendian 256 bit number to a normalized bignum
|
||||
void bn_read_be(const uint8_t *in_number, bignum256 *out_number)
|
||||
{
|
||||
int i;
|
||||
uint64_t temp = 0;
|
||||
uint32_t temp = 0;
|
||||
for (i = 0; i < 8; i++) {
|
||||
temp += (((uint64_t)read_be(in_number + (7 - i) * 4)) << (2 * i));
|
||||
// invariant: temp = (in_number % 2^(32i)) >> 30i
|
||||
// get next limb = (in_number % 2^(32(i+1))) >> 32i
|
||||
uint32_t limb = read_be(in_number + (7 - i) * 4);
|
||||
// temp = (in_number % 2^(32(i+1))) << 30i
|
||||
temp |= limb << (2*i);
|
||||
// store 30 bits into val[i]
|
||||
out_number->val[i]= temp & 0x3FFFFFFF;
|
||||
temp >>= 30;
|
||||
// prepare temp for next round
|
||||
temp = limb >> (30 - 2*i);
|
||||
}
|
||||
out_number->val[8] = temp;
|
||||
}
|
||||
|
||||
// convert a normalized bignum to a raw bigendian 256 bit number.
|
||||
// in_number must be normalized and < 2^256.
|
||||
void bn_write_be(const bignum256 *in_number, uint8_t *out_number)
|
||||
{
|
||||
int i, shift = 30 + 16 - 32;
|
||||
uint64_t temp = in_number->val[8];
|
||||
int i;
|
||||
uint32_t temp = in_number->val[8] << 16;
|
||||
for (i = 0; i < 8; i++) {
|
||||
temp <<= 30;
|
||||
temp |= in_number->val[7 - i];
|
||||
write_be(out_number + i * 4, temp >> shift);
|
||||
shift -= 2;
|
||||
// invariant: temp = (in_number >> 30*(8-i)) << (16 + 2i)
|
||||
uint32_t limb = in_number->val[7 - i];
|
||||
temp |= limb >> (14 - 2*i);
|
||||
write_be(out_number + i * 4, temp);
|
||||
temp = limb << (18 + 2*i);
|
||||
}
|
||||
}
|
||||
|
||||
@ -128,6 +139,26 @@ void bn_rshift(bignum256 *a)
|
||||
a->val[8] >>= 1;
|
||||
}
|
||||
|
||||
// multiply x by 3/2 modulo prime.
|
||||
// assumes x < 2*prime,
|
||||
// guarantees x < 4*prime on exit.
|
||||
void bn_mult_3_2(bignum256 * x, const bignum256 *prime)
|
||||
{
|
||||
int j;
|
||||
uint32_t xodd = -(x->val[0] & 1);
|
||||
// compute x = 3*x/2 mod prime
|
||||
// if x is odd compute (3*x+prime)/2
|
||||
uint32_t tmp1 = (3*x->val[0] + (prime->val[0] & xodd)) >> 1;
|
||||
for (j = 0; j < 8; j++) {
|
||||
uint32_t tmp2 = (3*x->val[j+1] + (prime->val[j+1] & xodd));
|
||||
tmp1 += (tmp2 & 1) << 29;
|
||||
x->val[j] = tmp1 & 0x3fffffff;
|
||||
tmp1 >>= 30;
|
||||
tmp1 += tmp2 >> 1;
|
||||
}
|
||||
x->val[8] = tmp1;
|
||||
}
|
||||
|
||||
// assumes x < 2*prime, result < prime
|
||||
void bn_mod(bignum256 *x, const bignum256 *prime)
|
||||
{
|
||||
@ -233,7 +264,6 @@ void bn_fast_mod(bignum256 *x, const bignum256 *prime)
|
||||
uint64_t temp;
|
||||
|
||||
coef = x->val[8] >> 16;
|
||||
if (!coef) return;
|
||||
// substract (coef * prime) from x
|
||||
// note that we unrolled the first iteration
|
||||
temp = 0x1000000000000000ull + x->val[0] - prime->val[0] * (uint64_t)coef;
|
||||
@ -283,10 +313,6 @@ void bn_sqrt(bignum256 *x, const bignum256 *prime)
|
||||
|
||||
#if ! USE_INVERSE_FAST
|
||||
|
||||
#if USE_PRECOMPUTED_IV
|
||||
#warning USE_PRECOMPUTED_IV will not be used
|
||||
#endif
|
||||
|
||||
// in field G_prime, small but slow
|
||||
void bn_inverse(bignum256 *x, const bignum256 *prime)
|
||||
{
|
||||
@ -322,285 +348,272 @@ void bn_inverse(bignum256 *x, const bignum256 *prime)
|
||||
|
||||
#else
|
||||
|
||||
// in field G_prime, big but fast
|
||||
// this algorithm is based on the Euklidean algorithm
|
||||
// the result is smaller than 2*prime
|
||||
// in field G_prime, big and complicated but fast
|
||||
// the input must not be 0 mod prime.
|
||||
// the result is smaller than prime
|
||||
void bn_inverse(bignum256 *x, const bignum256 *prime)
|
||||
{
|
||||
int i, j, k, len1, len2, mask;
|
||||
uint8_t buf[32];
|
||||
uint32_t u[8], v[8], s[9], r[10], temp32;
|
||||
uint64_t temp, temp2;
|
||||
// reduce x modulo prime
|
||||
int i, j, k, cmp;
|
||||
struct combo {
|
||||
uint32_t a[9];
|
||||
int len1;
|
||||
} us, vr, *odd, *even;
|
||||
uint32_t pp[8];
|
||||
uint32_t temp32;
|
||||
uint64_t temp;
|
||||
|
||||
// The algorithm is based on Schroeppel et. al. "Almost Modular Inverse"
|
||||
// algorithm. We keep four values u,v,r,s in the combo registers
|
||||
// us and vr. us stores u in the first len1 limbs (little endian)
|
||||
// and s in the last 9-len1 limbs (big endian). vr stores v and r.
|
||||
// This is because both u*s and v*r are guaranteed to fit in 8 limbs, so
|
||||
// their components are guaranteed to fit in 9. During the algorithm,
|
||||
// the length of u and v shrinks while r and s grow.
|
||||
// u,v,r,s correspond to F,G,B,C in Schroeppel's algorithm.
|
||||
|
||||
// reduce x modulo prime. This is necessary as it has to fit in 8 limbs.
|
||||
bn_fast_mod(x, prime);
|
||||
bn_mod(x, prime);
|
||||
// convert x and prime it to 8x32 bit limb form
|
||||
bn_write_be(prime, buf);
|
||||
// convert x and prime to 8x32 bit limb form
|
||||
temp32 = prime->val[0];
|
||||
for (i = 0; i < 8; i++) {
|
||||
u[i] = read_be(buf + 28 - i * 4);
|
||||
temp32 |= prime->val[i + 1] << (30-2*i);
|
||||
us.a[i] = pp[i] = temp32;
|
||||
temp32 = prime->val[i + 1] >> (2+2*i);
|
||||
}
|
||||
bn_write_be(x, buf);
|
||||
temp32 = x->val[0];
|
||||
for (i = 0; i < 8; i++) {
|
||||
v[i] = read_be(buf + 28 - i * 4);
|
||||
temp32 |= x->val[i + 1] << (30-2*i);
|
||||
vr.a[i] = temp32;
|
||||
temp32 = x->val[i + 1] >> (2+2*i);
|
||||
}
|
||||
len1 = 8;
|
||||
s[0] = 1;
|
||||
r[0] = 0;
|
||||
len2 = 1;
|
||||
us.len1 = 8;
|
||||
vr.len1 = 8;
|
||||
// set s = 1 and r = 0
|
||||
us.a[8] = 1;
|
||||
vr.a[8] = 0;
|
||||
// set k = 0.
|
||||
k = 0;
|
||||
// u = prime, v = x len1 = numlimbs(u,v)
|
||||
// r = 0 , s = 1 len2 = numlimbs(r,s)
|
||||
|
||||
// only one of the numbers u,v can be even at any time. We
|
||||
// let even point to that number and odd to the other.
|
||||
// Initially the prime u is guaranteed to be odd.
|
||||
odd = &us;
|
||||
even = &vr;
|
||||
|
||||
// u = prime, v = x
|
||||
// r = 0 , s = 1
|
||||
// k = 0
|
||||
for (;;) {
|
||||
// invariants:
|
||||
// r,s,u,v >= 0
|
||||
// let u = limbs us.a[0..u.len1-1] in little endian,
|
||||
// let s = limbs us.a[u.len..8] in big endian,
|
||||
// let v = limbs vr.a[0..u.len1-1] in little endian,
|
||||
// let r = limbs vr.a[u.len..8] in big endian,
|
||||
// r,s >= 0 ; u,v >= 1
|
||||
// x*-r = u*2^k mod prime
|
||||
// x*s = v*2^k mod prime
|
||||
// u*s + v*r = prime
|
||||
// floor(log2(u)) + floor(log2(v)) + k <= 510
|
||||
// max(u,v) <= 2^k
|
||||
// max(u,v) <= 2^k (*) see comment at end of loop
|
||||
// gcd(u,v) = 1
|
||||
// len1 = numlimbs(u,v)
|
||||
// len2 = numlimbs(r,s)
|
||||
// {odd,even} = {&us, &vr}
|
||||
// odd->a[0] and odd->a[8] are odd
|
||||
// even->a[0] or even->a[8] is even
|
||||
//
|
||||
// first u,v are large and s,r small
|
||||
// later u,v are small and s,r large
|
||||
// first u/v are large and r/s small
|
||||
// later u/v are small and r/s large
|
||||
assert(odd->a[0] & 1);
|
||||
assert(odd->a[8] & 1);
|
||||
|
||||
// if (is_zero(v)) break;
|
||||
for (i = 0; i < len1; i++) {
|
||||
if (v[i]) break;
|
||||
// adjust length of even.
|
||||
while (even->a[even->len1 - 1] == 0) {
|
||||
even->len1--;
|
||||
// if input was 0, return.
|
||||
// This simple check prevents crashing with stack underflow
|
||||
// or worse undesired behaviour for illegal input.
|
||||
if (even->len1 < 0)
|
||||
return;
|
||||
}
|
||||
if (i == len1) break;
|
||||
|
||||
// reduce u while it is even
|
||||
for (;;) {
|
||||
// count up to 30 zero bits of u.
|
||||
for (i = 0; i < 30; i++) {
|
||||
if (u[0] & (1 << i)) break;
|
||||
// reduce even->a while it is even
|
||||
while (even->a[0] == 0) {
|
||||
// shift right first part of even by a limb
|
||||
// and shift left second part of even by a limb.
|
||||
for (i = 0; i < 8; i++) {
|
||||
even->a[i] = even->a[i+1];
|
||||
}
|
||||
// if u was odd break
|
||||
if (i == 0) break;
|
||||
|
||||
// shift u right by i bits.
|
||||
mask = (1 << i) - 1;
|
||||
for (j = 0; j + 1 < len1; j++) {
|
||||
u[j] = (u[j] >> i) | ((u[j + 1] & mask) << (32 - i));
|
||||
}
|
||||
u[j] = (u[j] >> i);
|
||||
|
||||
// shift s left by i bits.
|
||||
mask = (1 << (32 - i)) - 1;
|
||||
s[len2] = s[len2 - 1] >> (32 - i);
|
||||
for (j = len2 - 1; j > 0; j--) {
|
||||
s[j] = (s[j - 1] >> (32 - i)) | ((s[j] & mask) << i);
|
||||
}
|
||||
s[0] = (s[0] & mask) << i;
|
||||
// update len2 if necessary
|
||||
if (s[len2]) {
|
||||
r[len2] = 0;
|
||||
len2++;
|
||||
}
|
||||
// add i bits to k.
|
||||
k += i;
|
||||
even->a[i] = 0;
|
||||
even->len1--;
|
||||
k += 32;
|
||||
}
|
||||
// reduce v while it is even
|
||||
for (;;) {
|
||||
// count up to 30 zero bits of v.
|
||||
for (i = 0; i < 30; i++) {
|
||||
if (v[0] & (1 << i)) break;
|
||||
}
|
||||
// if v was odd break
|
||||
if (i == 0) break;
|
||||
|
||||
// shift v right by i bits.
|
||||
mask = (1 << i) - 1;
|
||||
for (j = 0; j + 1 < len1; j++) {
|
||||
v[j] = (v[j] >> i) | ((v[j + 1] & mask) << (32 - i));
|
||||
}
|
||||
v[j] = (v[j] >> i);
|
||||
mask = (1 << (32 - i)) - 1;
|
||||
// shift r left by i bits.
|
||||
r[len2] = r[len2 - 1] >> (32 - i);
|
||||
for (j = len2 - 1; j > 0; j--) {
|
||||
r[j] = (r[j - 1] >> (32 - i)) | ((r[j] & mask) << i);
|
||||
}
|
||||
r[0] = (r[0] & mask) << i;
|
||||
// update len2 if necessary
|
||||
if (r[len2]) {
|
||||
s[len2] = 0;
|
||||
len2++;
|
||||
}
|
||||
// add i bits to k.
|
||||
k += i;
|
||||
// count up to 32 zero bits of even->a.
|
||||
j = 0;
|
||||
while ((even->a[0] & (1 << j)) == 0) {
|
||||
j++;
|
||||
}
|
||||
|
||||
// invariant is reestablished.
|
||||
i = len1 - 1;
|
||||
while (i > 0 && u[i] == v[i]) i--;
|
||||
if (u[i] > v[i]) {
|
||||
// u > v:
|
||||
// u = (u - v)/2;
|
||||
temp = 0x100000000ull + u[0] - v[0];
|
||||
u[0] = (temp >> 1) & 0x7FFFFFFF;
|
||||
temp >>= 32;
|
||||
for (i = 1; i < len1; i++) {
|
||||
temp += 0xFFFFFFFFull + u[i] - v[i];
|
||||
u[i - 1] += (temp & 1) << 31;
|
||||
u[i] = (temp >> 1) & 0x7FFFFFFF;
|
||||
temp >>= 32;
|
||||
if (j > 0) {
|
||||
// shift first part of even right by j bits.
|
||||
for (i = 0; i + 1 < even->len1; i++) {
|
||||
even->a[i] = (even->a[i] >> j) | (even->a[i + 1] << (32 - j));
|
||||
}
|
||||
temp = temp2 = 0;
|
||||
// r += s;
|
||||
// s += s;
|
||||
for (i = 0; i < len2; i++) {
|
||||
temp += s[i];
|
||||
temp += r[i];
|
||||
temp2 += s[i];
|
||||
temp2 += s[i];
|
||||
r[i] = temp;
|
||||
s[i] = temp2;
|
||||
temp >>= 32;
|
||||
temp2 >>= 32;
|
||||
}
|
||||
// expand if necessary.
|
||||
if (temp != 0 || temp2 != 0) {
|
||||
r[len2] = temp;
|
||||
s[len2] = temp2;
|
||||
len2++;
|
||||
}
|
||||
// note that
|
||||
// u'2^(k+1) = (u - v) 2^k = x -(r + s) = x -r' mod prime
|
||||
// v'2^(k+1) = 2*v 2^k = x (s + s) = x s' mod prime
|
||||
// u's' + v'r' = (u-v)/2(2s) + v(r+s) = us + vr
|
||||
} else {
|
||||
// v >= u:
|
||||
// v = v - u;
|
||||
temp = 0x100000000ull + v[0] - u[0];
|
||||
v[0] = (temp >> 1) & 0x7FFFFFFF;
|
||||
temp >>= 32;
|
||||
for (i = 1; i < len1; i++) {
|
||||
temp += 0xFFFFFFFFull + v[i] - u[i];
|
||||
v[i - 1] += (temp & 1) << 31;
|
||||
v[i] = (temp >> 1) & 0x7FFFFFFF;
|
||||
temp >>= 32;
|
||||
}
|
||||
// s = s + r
|
||||
// r = r + r
|
||||
temp = temp2 = 0;
|
||||
for (i = 0; i < len2; i++) {
|
||||
temp += s[i];
|
||||
temp += r[i];
|
||||
temp2 += r[i];
|
||||
temp2 += r[i];
|
||||
s[i] = temp;
|
||||
r[i] = temp2;
|
||||
temp >>= 32;
|
||||
temp2 >>= 32;
|
||||
}
|
||||
if (temp != 0 || temp2 != 0) {
|
||||
s[len2] = temp;
|
||||
r[len2] = temp2;
|
||||
len2++;
|
||||
}
|
||||
// note that
|
||||
// u'2^(k+1) = 2*u 2^k = x -(r + r) = x -r' mod prime
|
||||
// v'2^(k+1) = (v - u) 2^k = x (s + r) = x s' mod prime
|
||||
// u's' + v'r' = u(r+s) + (v-u)/2(2r) = us + vr
|
||||
}
|
||||
// adjust len1 if possible.
|
||||
if (u[len1 - 1] == 0 && v[len1 - 1] == 0) len1--;
|
||||
// increase k
|
||||
k++;
|
||||
}
|
||||
// In the last iteration just before the comparison and subtraction
|
||||
// we had u=1, v=1, s+r = prime, k <= 510, 2^k > max(s,r) >= prime/2
|
||||
// hence 0 <= r < prime and 255 <= k <= 510.
|
||||
//
|
||||
// Afterwards r is doubled, k is incremented by 1.
|
||||
// Hence 0 <= r < 2*prime and 256 <= k < 512.
|
||||
//
|
||||
// The invariants give us x*-r = 2^k mod prime,
|
||||
// hence r = -2^k * x^-1 mod prime.
|
||||
// We need to compute -r/2^k mod prime.
|
||||
|
||||
// convert r to bignum style
|
||||
j = r[0] >> 30;
|
||||
r[0] = r[0] & 0x3FFFFFFFu;
|
||||
for (i = 1; i < len2; i++) {
|
||||
uint32_t q = r[i] >> (30 - 2 * i);
|
||||
r[i] = ((r[i] << (2 * i)) & 0x3FFFFFFFu) + j;
|
||||
j=q;
|
||||
}
|
||||
r[i] = j;
|
||||
i++;
|
||||
for (; i < 9; i++) r[i] = 0;
|
||||
|
||||
// r = r mod prime, note that r<2*prime.
|
||||
i = 8;
|
||||
while (i > 0 && r[i] == prime->val[i]) i--;
|
||||
if (r[i] >= prime->val[i]) {
|
||||
temp32 = 1;
|
||||
for (i = 0; i < 9; i++) {
|
||||
temp32 += 0x3FFFFFFF + r[i] - prime->val[i];
|
||||
r[i] = temp32 & 0x3FFFFFFF;
|
||||
temp32 >>= 30;
|
||||
}
|
||||
}
|
||||
// negate r: r = prime - r
|
||||
temp32 = 1;
|
||||
for (i = 0; i < 9; i++) {
|
||||
temp32 += 0x3FFFFFFF + prime->val[i] - r[i];
|
||||
r[i] = temp32 & 0x3FFFFFFF;
|
||||
temp32 >>= 30;
|
||||
}
|
||||
// now: r = 2^k * x^-1 mod prime
|
||||
// compute r/2^k, 256 <= k < 511
|
||||
int done = 0;
|
||||
#if USE_PRECOMPUTED_IV
|
||||
if (prime == &prime256k1) {
|
||||
for (j = 0; j < 9; j++) {
|
||||
x->val[j] = r[j];
|
||||
}
|
||||
// secp256k1_iv[k-256] = 2^-k mod prime
|
||||
bn_multiply(secp256k1_iv + k - 256, x, prime);
|
||||
// bn_fast_mod is unnecessary as bn_multiply already
|
||||
// guarantees x < 2*prime
|
||||
bn_fast_mod(x, prime);
|
||||
// We don't guarantee x < prime!
|
||||
// the slow variant and the slow case below guarantee
|
||||
// this.
|
||||
done = 1;
|
||||
}
|
||||
#endif
|
||||
if (!done) {
|
||||
// compute r = r/2^k mod prime
|
||||
for (j = 0; j < k; j++) {
|
||||
// invariant: r = 2^(k-j) * x^-1 mod prime
|
||||
// in each iteration divide r by 2 modulo prime.
|
||||
if (r[0] & 1) {
|
||||
// r is odd; compute r = (prime + r)/2
|
||||
temp32 = r[0] + prime->val[0];
|
||||
r[0] = (temp32 >> 1) & 0x1FFFFFFF;
|
||||
temp32 >>= 30;
|
||||
for (i = 1; i < 9; i++) {
|
||||
temp32 += r[i] + prime->val[i];
|
||||
r[i - 1] += (temp32 & 1) << 29;
|
||||
r[i] = (temp32 >> 1) & 0x1FFFFFFF;
|
||||
temp32 >>= 30;
|
||||
}
|
||||
even->a[i] = (even->a[i] >> j);
|
||||
if (even->a[i] == 0) {
|
||||
even->len1--;
|
||||
} else {
|
||||
// r = r / 2
|
||||
for (i = 0; i < 8; i++) {
|
||||
r[i] = (r[i] >> 1) | ((r[i + 1] & 1) << 29);
|
||||
}
|
||||
r[8] = r[8] >> 1;
|
||||
i++;
|
||||
}
|
||||
|
||||
// shift second part of even left by j bits.
|
||||
for (; i < 8; i++) {
|
||||
even->a[i] = (even->a[i] << j) | (even->a[i + 1] >> (32 - j));
|
||||
}
|
||||
even->a[i] = (even->a[i] << j);
|
||||
// add j bits to k.
|
||||
k += j;
|
||||
}
|
||||
// r = x^-1 mod prime, since j = k
|
||||
for (j = 0; j < 9; j++) {
|
||||
x->val[j] = r[j];
|
||||
// invariant is reestablished.
|
||||
// now both a[0] are odd.
|
||||
assert(odd->a[0] & 1);
|
||||
assert(odd->a[8] & 1);
|
||||
assert(even->a[0] & 1);
|
||||
assert((even->a[8] & 1) == 0);
|
||||
|
||||
// cmp > 0 if us.a[0..len1-1] > vr.a[0..len1-1],
|
||||
// cmp = 0 if equal, < 0 if less.
|
||||
cmp = us.len1 - vr.len1;
|
||||
if (cmp == 0) {
|
||||
i = us.len1 - 1;
|
||||
while (i >= 0 && us.a[i] == vr.a[i]) i--;
|
||||
// both are equal to 1 and we are done.
|
||||
if (i == -1)
|
||||
break;
|
||||
cmp = us.a[i] > vr.a[i] ? 1 : -1;
|
||||
}
|
||||
if (cmp > 0) {
|
||||
even = &us;
|
||||
odd = &vr;
|
||||
} else {
|
||||
even = &vr;
|
||||
odd = &us;
|
||||
}
|
||||
|
||||
// now even > odd.
|
||||
|
||||
// even->a[0..len1-1] = (even->a[0..len1-1] - odd->a[0..len1-1]);
|
||||
temp = 1;
|
||||
for (i = 0; i < odd->len1; i++) {
|
||||
temp += 0xFFFFFFFFull + even->a[i] - odd->a[i];
|
||||
even->a[i] = temp & 0xFFFFFFFF;
|
||||
temp >>= 32;
|
||||
}
|
||||
for (; i < even->len1; i++) {
|
||||
temp += 0xFFFFFFFFull + even->a[i];
|
||||
even->a[i] = temp & 0xFFFFFFFF;
|
||||
temp >>= 32;
|
||||
}
|
||||
// odd->a[len1..8] = (odd->b[len1..8] + even->b[len1..8]);
|
||||
temp = 0;
|
||||
for (i = 8; i >= even->len1; i--) {
|
||||
temp += (uint64_t) odd->a[i] + even->a[i];
|
||||
odd->a[i] = temp & 0xFFFFFFFF;
|
||||
temp >>= 32;
|
||||
}
|
||||
for (; i >= odd->len1; i--) {
|
||||
temp += (uint64_t) odd->a[i];
|
||||
odd->a[i] = temp & 0xFFFFFFFF;
|
||||
temp >>= 32;
|
||||
}
|
||||
// note that
|
||||
// if u > v:
|
||||
// u'2^k = (u - v) 2^k = x(-r) - xs = x(-(r+s)) = x(-r') mod prime
|
||||
// u's' + v'r' = (u-v)s + v(r+s) = us + vr
|
||||
// if u < v:
|
||||
// v'2^k = (v - u) 2^k = xs - x(-r) = x(s+r) = xs' mod prime
|
||||
// u's' + v'r' = u(s+r) + (v-u)r = us + vr
|
||||
|
||||
// even->a[0] is difference between two odd numbers, hence even.
|
||||
// odd->a[8] is sum of even and odd number, hence odd.
|
||||
assert(odd->a[0] & 1);
|
||||
assert(odd->a[8] & 1);
|
||||
assert((even->a[0] & 1) == 0);
|
||||
|
||||
// The invariants are (almost) reestablished.
|
||||
// The invariant max(u,v) <= 2^k can be invalidated at this point,
|
||||
// because odd->a[len1..8] was changed. We only have
|
||||
//
|
||||
// odd->a[len1..8] <= 2^{k+1}
|
||||
//
|
||||
// Since even->a[0] is even, k will be incremented at the beginning
|
||||
// of the next loop while odd->a[len1..8] remains unchanged.
|
||||
// So after that, odd->a[len1..8] <= 2^k will hold again.
|
||||
}
|
||||
// In the last iteration we had u = v and gcd(u,v) = 1.
|
||||
// Hence, u=1, v=1, s+r = prime, k <= 510, 2^k > max(s,r) >= prime/2
|
||||
// This implies 0 <= s < prime and 255 <= k <= 510.
|
||||
//
|
||||
// The invariants also give us x*s = 2^k mod prime,
|
||||
// hence s = 2^k * x^-1 mod prime.
|
||||
// We need to compute s/2^k mod prime.
|
||||
|
||||
// First we compute inverse = -prime^-1 mod 2^32, which we need later.
|
||||
// We use the Explicit Quadratic Modular inverse algorithm.
|
||||
// http://arxiv.org/pdf/1209.6626.pdf
|
||||
// a^-1 = (2-a) * PROD_i (1 + (a - 1)^(2^i)) mod 2^32
|
||||
// the product will converge quickly, because (a-1)^(2^i) will be
|
||||
// zero mod 2^32 after at most five iterations.
|
||||
// We want to compute -prime^-1 so we start with (pp[0]-2).
|
||||
assert(pp[0] & 1);
|
||||
uint32_t amone = pp[0]-1;
|
||||
uint32_t inverse = pp[0] - 2;
|
||||
while (amone) {
|
||||
amone *= amone;
|
||||
inverse *= (amone + 1);
|
||||
}
|
||||
|
||||
while (k >= 32) {
|
||||
// compute s / 2^32 modulo prime.
|
||||
// Idea: compute factor, such that
|
||||
// s + factor*prime mod 2^32 == 0
|
||||
// i.e. factor = s * -1/prime mod 2^32.
|
||||
// Then compute s + factor*prime and shift right by 32 bits.
|
||||
uint32_t factor = (inverse * us.a[8]) & 0xffffffff;
|
||||
temp = us.a[8] + (uint64_t) pp[0] * factor;
|
||||
assert((temp & 0xffffffff) == 0);
|
||||
temp >>= 32;
|
||||
for (i = 0; i < 7; i++) {
|
||||
temp += us.a[8-(i+1)] + (uint64_t) pp[i+1] * factor;
|
||||
us.a[8-i] = temp & 0xffffffff;
|
||||
temp >>= 32;
|
||||
}
|
||||
us.a[8-i] = temp & 0xffffffff;
|
||||
k -= 32;
|
||||
}
|
||||
if (k > 0) {
|
||||
// compute s / 2^k modulo prime.
|
||||
// Same idea: compute factor, such that
|
||||
// s + factor*prime mod 2^k == 0
|
||||
// i.e. factor = s * -1/prime mod 2^k.
|
||||
// Then compute s + factor*prime and shift right by k bits.
|
||||
uint32_t mask = (1 << k) - 1;
|
||||
uint32_t factor = (inverse * us.a[8]) & mask;
|
||||
temp = (us.a[8] + (uint64_t) pp[0] * factor) >> k;
|
||||
assert(((us.a[8] + pp[0] * factor) & mask) == 0);
|
||||
for (i = 0; i < 7; i++) {
|
||||
temp += (us.a[8-(i+1)] + (uint64_t) pp[i+1] * factor) << (32 - k);
|
||||
us.a[8-i] = temp & 0xffffffff;
|
||||
temp >>= 32;
|
||||
}
|
||||
us.a[8-i] = temp & 0xffffffff;
|
||||
}
|
||||
|
||||
// convert s to bignum style
|
||||
temp32 = 0;
|
||||
for (i = 0; i < 8; i++) {
|
||||
x->val[i] = ((us.a[8-i] << (2 * i)) & 0x3FFFFFFFu) | temp32;
|
||||
temp32 = us.a[8-i] >> (30 - 2 * i);
|
||||
}
|
||||
x->val[i] = temp32;
|
||||
}
|
||||
#endif
|
||||
|
||||
@ -632,14 +645,15 @@ void bn_addmodi(bignum256 *a, uint32_t b, const bignum256 *prime) {
|
||||
bn_mod(a, prime);
|
||||
}
|
||||
|
||||
// res = a - b
|
||||
// b < 2*prime; result not normalized
|
||||
void bn_subtractmod(const bignum256 *a, const bignum256 *b, bignum256 *res)
|
||||
// res = a - b mod prime. More exactly res = a + (2*prime - b).
|
||||
// precondition: 0 <= b < 2*prime, 0 <= a < prime
|
||||
// res < 3*prime
|
||||
void bn_subtractmod(const bignum256 *a, const bignum256 *b, bignum256 *res, const bignum256 *prime)
|
||||
{
|
||||
int i;
|
||||
uint32_t temp = 0;
|
||||
for (i = 0; i < 9; i++) {
|
||||
temp += a->val[i] + 2u * prime256k1.val[i] - b->val[i];
|
||||
temp += a->val[i] + 2u * prime->val[i] - b->val[i];
|
||||
res->val[i] = temp & 0x3FFFFFFF;
|
||||
temp >>= 30;
|
||||
}
|
||||
@ -665,9 +679,17 @@ void bn_divmod58(bignum256 *a, uint32_t *r)
|
||||
rem = a->val[8] % 58;
|
||||
a->val[8] /= 58;
|
||||
for (i = 7; i >= 0; i--) {
|
||||
// invariants:
|
||||
// rem = old(a) >> 30(i+1) % 58
|
||||
// a[i+1..8] = old(a[i+1..8])/58
|
||||
// a[0..i] = old(a[0..i])
|
||||
// 2^30 == 18512790*58 + 4
|
||||
tmp = rem * 4 + a->val[i];
|
||||
// set a[i] = (rem * 2^30 + a[i])/58
|
||||
// = rem * 18512790 + (rem * 4 + a[i])/58
|
||||
a->val[i] = rem * 18512790 + (tmp / 58);
|
||||
// set rem = (rem * 2^30 + a[i]) mod 58
|
||||
// = (rem * 4 + a[i]) mod 58
|
||||
rem = tmp % 58;
|
||||
}
|
||||
*r = rem;
|
||||
|
4
bignum.h
4
bignum.h
@ -57,6 +57,8 @@ void bn_lshift(bignum256 *a);
|
||||
|
||||
void bn_rshift(bignum256 *a);
|
||||
|
||||
void bn_mult_3_2(bignum256 *x, const bignum256 *prime);
|
||||
|
||||
void bn_mod(bignum256 *x, const bignum256 *prime);
|
||||
|
||||
void bn_multiply(const bignum256 *k, bignum256 *x, const bignum256 *prime);
|
||||
@ -73,7 +75,7 @@ void bn_addmod(bignum256 *a, const bignum256 *b, const bignum256 *prime);
|
||||
|
||||
void bn_addmodi(bignum256 *a, uint32_t b, const bignum256 *prime);
|
||||
|
||||
void bn_subtractmod(const bignum256 *a, const bignum256 *b, bignum256 *res);
|
||||
void bn_subtractmod(const bignum256 *a, const bignum256 *b, bignum256 *res, const bignum256 *prime);
|
||||
|
||||
void bn_subtract(const bignum256 *a, const bignum256 *b, bignum256 *res);
|
||||
|
||||
|
557
ecdsa.c
557
ecdsa.c
@ -24,6 +24,7 @@
|
||||
#include <stdint.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <assert.h>
|
||||
|
||||
#include "bignum.h"
|
||||
#include "rand.h"
|
||||
@ -36,8 +37,7 @@
|
||||
// Set cp2 = cp1
|
||||
void point_copy(const curve_point *cp1, curve_point *cp2)
|
||||
{
|
||||
memcpy(&(cp2->x), &(cp1->x), sizeof(bignum256));
|
||||
memcpy(&(cp2->y), &(cp1->y), sizeof(bignum256));
|
||||
*cp2 = *cp1;
|
||||
}
|
||||
|
||||
// cp2 = cp1 + cp2
|
||||
@ -63,29 +63,32 @@ void point_add(const curve_point *cp1, curve_point *cp2)
|
||||
return;
|
||||
}
|
||||
|
||||
bn_subtractmod(&(cp2->x), &(cp1->x), &inv);
|
||||
bn_subtractmod(&(cp2->x), &(cp1->x), &inv, &prime256k1);
|
||||
bn_inverse(&inv, &prime256k1);
|
||||
bn_subtractmod(&(cp2->y), &(cp1->y), &lambda);
|
||||
bn_subtractmod(&(cp2->y), &(cp1->y), &lambda, &prime256k1);
|
||||
bn_multiply(&inv, &lambda, &prime256k1);
|
||||
memcpy(&xr, &lambda, sizeof(bignum256));
|
||||
|
||||
// xr = lambda^2 - x1 - x2
|
||||
xr = lambda;
|
||||
bn_multiply(&xr, &xr, &prime256k1);
|
||||
temp = 0;
|
||||
temp = 1;
|
||||
for (i = 0; i < 9; i++) {
|
||||
temp += xr.val[i] + 3u * prime256k1.val[i] - cp1->x.val[i] - cp2->x.val[i];
|
||||
temp += 0x3FFFFFFF + xr.val[i] + 2u * prime256k1.val[i] - cp1->x.val[i] - cp2->x.val[i];
|
||||
xr.val[i] = temp & 0x3FFFFFFF;
|
||||
temp >>= 30;
|
||||
}
|
||||
bn_fast_mod(&xr, &prime256k1);
|
||||
bn_subtractmod(&(cp1->x), &xr, &yr);
|
||||
// no need to fast_mod here
|
||||
// bn_fast_mod(&yr);
|
||||
bn_mod(&xr, &prime256k1);
|
||||
|
||||
// yr = lambda (x1 - xr) - y1
|
||||
bn_subtractmod(&(cp1->x), &xr, &yr, &prime256k1);
|
||||
bn_multiply(&lambda, &yr, &prime256k1);
|
||||
bn_subtractmod(&yr, &(cp1->y), &yr);
|
||||
bn_subtractmod(&yr, &(cp1->y), &yr, &prime256k1);
|
||||
bn_fast_mod(&yr, &prime256k1);
|
||||
memcpy(&(cp2->x), &xr, sizeof(bignum256));
|
||||
memcpy(&(cp2->y), &yr, sizeof(bignum256));
|
||||
bn_mod(&(cp2->x), &prime256k1);
|
||||
bn_mod(&(cp2->y), &prime256k1);
|
||||
bn_mod(&yr, &prime256k1);
|
||||
|
||||
cp2->x = xr;
|
||||
cp2->y = yr;
|
||||
}
|
||||
|
||||
// cp = cp + cp
|
||||
@ -93,7 +96,7 @@ void point_double(curve_point *cp)
|
||||
{
|
||||
int i;
|
||||
uint32_t temp;
|
||||
bignum256 lambda, inverse_y, xr, yr;
|
||||
bignum256 lambda, xr, yr;
|
||||
|
||||
if (point_is_infinity(cp)) {
|
||||
return;
|
||||
@ -103,56 +106,34 @@ void point_double(curve_point *cp)
|
||||
return;
|
||||
}
|
||||
|
||||
memcpy(&inverse_y, &(cp->y), sizeof(bignum256));
|
||||
bn_inverse(&inverse_y, &prime256k1);
|
||||
memcpy(&lambda, &three_over_two256k1, sizeof(bignum256));
|
||||
bn_multiply(&inverse_y, &lambda, &prime256k1);
|
||||
bn_multiply(&(cp->x), &lambda, &prime256k1);
|
||||
bn_multiply(&(cp->x), &lambda, &prime256k1);
|
||||
memcpy(&xr, &lambda, sizeof(bignum256));
|
||||
// lambda = 3/2 x^2 / y
|
||||
lambda = cp->y;
|
||||
bn_inverse(&lambda, &prime256k1);
|
||||
bn_multiply(&cp->x, &lambda, &prime256k1);
|
||||
bn_multiply(&cp->x, &lambda, &prime256k1);
|
||||
bn_mult_3_2(&lambda, &prime256k1);
|
||||
|
||||
// xr = lambda^2 - 2*x
|
||||
xr = lambda;
|
||||
bn_multiply(&xr, &xr, &prime256k1);
|
||||
temp = 0;
|
||||
temp = 1;
|
||||
for (i = 0; i < 9; i++) {
|
||||
temp += xr.val[i] + 3u * prime256k1.val[i] - 2u * cp->x.val[i];
|
||||
temp += 0x3FFFFFFF + xr.val[i] + 2u * (prime256k1.val[i] - cp->x.val[i]);
|
||||
xr.val[i] = temp & 0x3FFFFFFF;
|
||||
temp >>= 30;
|
||||
}
|
||||
bn_fast_mod(&xr, &prime256k1);
|
||||
bn_subtractmod(&(cp->x), &xr, &yr);
|
||||
// no need to fast_mod here
|
||||
// bn_fast_mod(&yr);
|
||||
bn_multiply(&lambda, &yr, &prime256k1);
|
||||
bn_subtractmod(&yr, &(cp->y), &yr);
|
||||
bn_fast_mod(&yr, &prime256k1);
|
||||
memcpy(&(cp->x), &xr, sizeof(bignum256));
|
||||
memcpy(&(cp->y), &yr, sizeof(bignum256));
|
||||
bn_mod(&(cp->x), &prime256k1);
|
||||
bn_mod(&(cp->y), &prime256k1);
|
||||
}
|
||||
bn_mod(&xr, &prime256k1);
|
||||
|
||||
// res = k * p
|
||||
void point_multiply(const bignum256 *k, const curve_point *p, curve_point *res)
|
||||
{
|
||||
int i, j;
|
||||
// result is zero
|
||||
int is_zero = 1;
|
||||
curve_point curr;
|
||||
// initial res
|
||||
memcpy(&curr, p, sizeof(curve_point));
|
||||
for (i = 0; i < 9; i++) {
|
||||
for (j = 0; j < 30; j++) {
|
||||
if (i == 8 && (k->val[i] >> j) == 0) break;
|
||||
if (k->val[i] & (1u << j)) {
|
||||
if (is_zero) {
|
||||
memcpy(res, &curr, sizeof(curve_point));
|
||||
is_zero = 0;
|
||||
} else {
|
||||
point_add(&curr, res);
|
||||
}
|
||||
}
|
||||
point_double(&curr);
|
||||
}
|
||||
}
|
||||
// yr = lambda (x - xr) - y
|
||||
bn_subtractmod(&(cp->x), &xr, &yr, &prime256k1);
|
||||
bn_multiply(&lambda, &yr, &prime256k1);
|
||||
bn_subtractmod(&yr, &(cp->y), &yr, &prime256k1);
|
||||
bn_fast_mod(&yr, &prime256k1);
|
||||
bn_mod(&yr, &prime256k1);
|
||||
|
||||
cp->x = xr;
|
||||
cp->y = yr;
|
||||
}
|
||||
|
||||
// set point to internal representation of point at infinity
|
||||
@ -192,48 +173,421 @@ int point_is_negative_of(const curve_point *p, const curve_point *q)
|
||||
return !bn_is_equal(&(p->y), &(q->y));
|
||||
}
|
||||
|
||||
// Negate a (modulo prime) if cond is 0xffffffff, keep it if cond is 0.
|
||||
// The timing of this function does not depend on cond.
|
||||
static void conditional_negate(uint32_t cond, bignum256 *a, const bignum256 *prime)
|
||||
{
|
||||
int j;
|
||||
uint32_t tmp = 1;
|
||||
for (j = 0; j < 8; j++) {
|
||||
tmp += 0x3fffffff + prime->val[j] - a->val[j];
|
||||
a->val[j] = ((tmp & 0x3fffffff) & cond) | (a->val[j] & ~cond);
|
||||
tmp >>= 30;
|
||||
}
|
||||
tmp += 0x3fffffff + prime->val[j] - a->val[j];
|
||||
a->val[j] = ((tmp & 0x3fffffff) & cond) | (a->val[j] & ~cond);
|
||||
}
|
||||
|
||||
typedef struct jacobian_curve_point {
|
||||
bignum256 x, y, z;
|
||||
} jacobian_curve_point;
|
||||
|
||||
static void curve_to_jacobian(const curve_point *p, jacobian_curve_point *jp) {
|
||||
int i;
|
||||
// randomize z coordinate
|
||||
for (i = 0; i < 8; i++) {
|
||||
jp->z.val[i] = random32() & 0x3FFFFFFF;
|
||||
}
|
||||
jp->z.val[8] = (random32() & 0x7fff) + 1;
|
||||
|
||||
jp->x = jp->z;
|
||||
bn_multiply(&jp->z, &jp->x, &prime256k1);
|
||||
// x = z^2
|
||||
jp->y = jp->x;
|
||||
bn_multiply(&jp->z, &jp->y, &prime256k1);
|
||||
// y = z^3
|
||||
|
||||
bn_multiply(&p->x, &jp->x, &prime256k1);
|
||||
bn_multiply(&p->y, &jp->y, &prime256k1);
|
||||
bn_mod(&jp->x, &prime256k1);
|
||||
bn_mod(&jp->y, &prime256k1);
|
||||
}
|
||||
|
||||
static void jacobian_to_curve(const jacobian_curve_point *jp, curve_point *p) {
|
||||
p->y = jp->z;
|
||||
bn_mod(&p->y, &prime256k1);
|
||||
bn_inverse(&p->y, &prime256k1);
|
||||
// p->y = z^-1
|
||||
p->x = p->y;
|
||||
bn_multiply(&p->x, &p->x, &prime256k1);
|
||||
// p->x = z^-2
|
||||
bn_multiply(&p->x, &p->y, &prime256k1);
|
||||
// p->y = z^-3
|
||||
bn_multiply(&jp->x, &p->x, &prime256k1);
|
||||
// p->x = jp->x * z^-2
|
||||
bn_multiply(&jp->y, &p->y, &prime256k1);
|
||||
// p->y = jp->y * z^-3
|
||||
bn_mod(&p->x, &prime256k1);
|
||||
bn_mod(&p->y, &prime256k1);
|
||||
}
|
||||
|
||||
static void point_jacobian_add(const curve_point *p1, jacobian_curve_point *p2) {
|
||||
bignum256 r, h;
|
||||
bignum256 rsq, hcb, hcby2, hsqx2;
|
||||
int j;
|
||||
uint64_t tmp1;
|
||||
|
||||
/* usual algorithm:
|
||||
*
|
||||
* lambda = (y1 - y2/z2^3) / (x1 - x2/z2^2)
|
||||
* x3/z3^2 = lambda^2 - x1 - x2/z2^2
|
||||
* y3/z3^3 = lambda * (x2/z2^2 - x3/z3^2) - y2/z2^3
|
||||
*
|
||||
* to get rid of fraction we set
|
||||
* r = (y1 * z2^3 - y2) (the numerator of lambda * z2^3)
|
||||
* h = (x1 * z2^2 - x2) (the denominator of lambda * z2^2)
|
||||
* Hence,
|
||||
* lambda = r / (h*z2)
|
||||
*
|
||||
* With z3 = h*z2 (the denominator of lambda)
|
||||
* we get x3 = lambda^2*z3^2 - x1*z3^2 - x2/z2^2*z3^2
|
||||
* = r^2 - x1*h^2*z2^2 - x2*h^2
|
||||
* = r^2 - h^2*(x1*z2^2 + x2)
|
||||
* = r^2 - h^2*(h + 2*x2)
|
||||
* = r^2 - h^3 - 2*h^2*x2
|
||||
* and y3 = (lambda * (x2/z2^2 - x3/z3^2) - y2/z2^3) * z3^3
|
||||
* = r * (h^2*x2 - x3) - h^3*y2
|
||||
*/
|
||||
|
||||
|
||||
/* h = x1*z2^2 - x2
|
||||
* r = y1*z2^3 - y2
|
||||
* x3 = r^2 - h^3 - 2*h^2*x2
|
||||
* y3 = r*(h^2*x2 - x3) - h^3*y2
|
||||
* z3 = h*z2
|
||||
*/
|
||||
|
||||
// h = x1 * z2^2 - x2;
|
||||
// r = y1 * z2^3 - y2;
|
||||
h = p2->z;
|
||||
bn_multiply(&h, &h, &prime256k1); // h = z2^2
|
||||
r = p2->z;
|
||||
bn_multiply(&h, &r, &prime256k1); // r = z2^3
|
||||
|
||||
bn_multiply(&p1->x, &h, &prime256k1);
|
||||
bn_subtractmod(&h, &p2->x, &h, &prime256k1);
|
||||
// h = x1 * z2^2 - x2;
|
||||
|
||||
bn_multiply(&p1->y, &r, &prime256k1);
|
||||
bn_subtractmod(&r, &p2->y, &r, &prime256k1);
|
||||
// r = y1 * z2^3 - y2;
|
||||
|
||||
// hsqx2 = h^2
|
||||
hsqx2 = h;
|
||||
bn_multiply(&hsqx2, &hsqx2, &prime256k1);
|
||||
|
||||
// hcb = h^3
|
||||
hcb = h;
|
||||
bn_multiply(&hsqx2, &hcb, &prime256k1);
|
||||
|
||||
// hsqx2 = h^2 * x2
|
||||
bn_multiply(&p2->x, &hsqx2, &prime256k1);
|
||||
|
||||
// hcby2 = h^3 * y2
|
||||
hcby2 = hcb;
|
||||
bn_multiply(&p2->y, &hcby2, &prime256k1);
|
||||
|
||||
// rsq = r^2
|
||||
rsq = r;
|
||||
bn_multiply(&rsq, &rsq, &prime256k1);
|
||||
|
||||
// z3 = h*z2
|
||||
bn_multiply(&h, &p2->z, &prime256k1);
|
||||
bn_mod(&p2->z, &prime256k1);
|
||||
|
||||
// x3 = r^2 - h^3 - 2h^2x2
|
||||
tmp1 = 0;
|
||||
for (j = 0; j < 9; j++) {
|
||||
tmp1 += (uint64_t) rsq.val[j] + 4*prime256k1.val[j] - hcb.val[j] - 2*hsqx2.val[j];
|
||||
assert(tmp1 < 5 * 0x40000000ull);
|
||||
p2->x.val[j] = tmp1 & 0x3fffffff;
|
||||
tmp1 >>= 30;
|
||||
}
|
||||
bn_fast_mod(&p2->x, &prime256k1);
|
||||
bn_mod(&p2->x, &prime256k1);
|
||||
|
||||
// y3 = r*(h^2x2 - x3) - y2*h^3
|
||||
bn_subtractmod(&hsqx2, &p2->x, &p2->y, &prime256k1);
|
||||
bn_multiply(&r, &p2->y, &prime256k1);
|
||||
bn_subtractmod(&p2->y, &hcby2, &p2->y, &prime256k1);
|
||||
bn_fast_mod(&p2->y, &prime256k1);
|
||||
bn_mod(&p2->y, &prime256k1);
|
||||
}
|
||||
|
||||
static void point_jacobian_double(jacobian_curve_point *p) {
|
||||
bignum256 m, msq, ysq, xysq;
|
||||
int j;
|
||||
uint32_t tmp1;
|
||||
|
||||
/* usual algorithm:
|
||||
*
|
||||
* lambda = (3(x/z^2)^2 / 2y/z^3) = 3x^2/2yz
|
||||
* x3/z3^2 = lambda^2 - 2x/z^2
|
||||
* y3/z3^3 = lambda * (x/z^2 - x3/z3^2) - y/z^3
|
||||
*
|
||||
* to get rid of fraction we set
|
||||
* m = 3/2 x^2
|
||||
* Hence,
|
||||
* lambda = m / yz
|
||||
*
|
||||
* With z3 = yz (the denominator of lambda)
|
||||
* we get x3 = lambda^2*z3^2 - 2*x/z^2*z3^2
|
||||
* = m^2 - 2*xy^2
|
||||
* and y3 = (lambda * (x/z^2 - x3/z3^2) - y/z^3) * z3^3
|
||||
* = m * (xy^2 - x3) - y^4
|
||||
*/
|
||||
|
||||
|
||||
/* m = 3/2*x*x
|
||||
* x3 = m^2 - 2*xy^2
|
||||
* y3 = m*(xy^2 - x3) - 8y^4
|
||||
* z3 = y*z
|
||||
*/
|
||||
|
||||
m = p->x;
|
||||
bn_multiply(&m, &m, &prime256k1);
|
||||
bn_mult_3_2(&m, &prime256k1);
|
||||
|
||||
// msq = m^2
|
||||
msq = m;
|
||||
bn_multiply(&msq, &msq, &prime256k1);
|
||||
// ysq = y^2
|
||||
ysq = p->y;
|
||||
bn_multiply(&ysq, &ysq, &prime256k1);
|
||||
// xysq = xy^2
|
||||
xysq = p->x;
|
||||
bn_multiply(&ysq, &xysq, &prime256k1);
|
||||
|
||||
// z3 = yz
|
||||
bn_multiply(&p->y, &p->z, &prime256k1);
|
||||
bn_mod(&p->z, &prime256k1);
|
||||
|
||||
// x3 = m^2 - 2*xy^2
|
||||
tmp1 = 0;
|
||||
for (j = 0; j < 9; j++) {
|
||||
tmp1 += msq.val[j] + 3*prime256k1.val[j] - 2*xysq.val[j];
|
||||
p->x.val[j] = tmp1 & 0x3fffffff;
|
||||
tmp1 >>= 30;
|
||||
}
|
||||
bn_fast_mod(&p->x, &prime256k1);
|
||||
bn_mod(&p->x, &prime256k1);
|
||||
|
||||
// y3 = m*(xy^2 - x3) - y^4
|
||||
bn_subtractmod(&xysq, &p->x, &p->y, &prime256k1);
|
||||
bn_multiply(&m, &p->y, &prime256k1);
|
||||
bn_multiply(&ysq, &ysq, &prime256k1);
|
||||
bn_subtractmod(&p->y, &ysq, &p->y, &prime256k1);
|
||||
bn_fast_mod(&p->y, &prime256k1);
|
||||
bn_mod(&p->y, &prime256k1);
|
||||
}
|
||||
|
||||
// res = k * p
|
||||
void point_multiply(const bignum256 *k, const curve_point *p, curve_point *res)
|
||||
{
|
||||
// this algorithm is loosely based on
|
||||
// Katsuyuki Okeya and Tsuyoshi Takagi, The Width-w NAF Method Provides
|
||||
// Small Memory and Fast Elliptic Scalar Multiplications Secure against
|
||||
// Side Channel Attacks.
|
||||
assert (bn_is_less(k, &order256k1));
|
||||
|
||||
int i, j;
|
||||
int pos, shift;
|
||||
bignum256 a;
|
||||
uint32_t is_even = (k->val[0] & 1) - 1;
|
||||
uint32_t bits, sign, nsign;
|
||||
jacobian_curve_point jres;
|
||||
curve_point pmult[8];
|
||||
|
||||
// is_even = 0xffffffff if k is even, 0 otherwise.
|
||||
|
||||
// add 2^256.
|
||||
// make number odd: subtract order256k1 if even
|
||||
uint32_t tmp = 1;
|
||||
uint32_t is_non_zero = 0;
|
||||
for (j = 0; j < 8; j++) {
|
||||
is_non_zero |= k->val[j];
|
||||
tmp += 0x3fffffff + k->val[j] - (order256k1.val[j] & is_even);
|
||||
a.val[j] = tmp & 0x3fffffff;
|
||||
tmp >>= 30;
|
||||
}
|
||||
is_non_zero |= k->val[j];
|
||||
a.val[j] = tmp + 0xffff + k->val[j] - (order256k1.val[j] & is_even);
|
||||
assert((a.val[0] & 1) != 0);
|
||||
|
||||
// special case 0*p: just return zero. We don't care about constant time.
|
||||
if (!is_non_zero) {
|
||||
point_set_infinity(res);
|
||||
return;
|
||||
}
|
||||
|
||||
// Now a = k + 2^256 (mod order256k1) and a is odd.
|
||||
//
|
||||
// The idea is to bring the new a into the form.
|
||||
// sum_{i=0..64} a[i] 16^i, where |a[i]| < 16 and a[i] is odd.
|
||||
// a[0] is odd, since a is odd. If a[i] would be even, we can
|
||||
// add 1 to it and subtract 16 from a[i-1]. Afterwards,
|
||||
// a[64] = 1, which is the 2^256 that we added before.
|
||||
//
|
||||
// Since k = a - 2^256 (mod order256k1), we can compute
|
||||
// k*p = sum_{i=0..63} a[i] 16^i * p
|
||||
//
|
||||
// We compute |a[i]| * p in advance for all possible
|
||||
// values of |a[i]| * p. pmult[i] = (2*i+1) * p
|
||||
// We compute p, 3*p, ..., 15*p and store it in the table pmult.
|
||||
// store p^2 temporarily in pmult[7]
|
||||
pmult[7] = *p;
|
||||
point_double(&pmult[7]);
|
||||
// compute 3*p, etc by repeatedly adding p^2.
|
||||
pmult[0] = *p;
|
||||
for (i = 1; i < 8; i++) {
|
||||
pmult[i] = pmult[7];
|
||||
point_add(&pmult[i-1], &pmult[i]);
|
||||
}
|
||||
|
||||
// now compute res = sum_{i=0..63} a[i] * 16^i * p step by step,
|
||||
// starting with i = 63.
|
||||
// initialize jres = |a[63]| * p.
|
||||
// Note that a[i] = a>>(4*i) & 0xf if (a&0x10) != 0
|
||||
// and - (16 - (a>>(4*i) & 0xf)) otherwise. We can compute this as
|
||||
// ((a ^ (((a >> 4) & 1) - 1)) & 0xf) >> 1
|
||||
// since a is odd.
|
||||
bits = a.val[8] >> 12;
|
||||
sign = (bits >> 4) - 1;
|
||||
bits ^= sign;
|
||||
bits &= 15;
|
||||
curve_to_jacobian(&pmult[bits>>1], &jres);
|
||||
for (i = 62; i >= 0; i--) {
|
||||
// sign = sign(a[i+1]) (0xffffffff for negative, 0 for positive)
|
||||
// invariant jres = (-1)^sign sum_{j=i+1..63} (a[j] * 16^{j-i-1} * p)
|
||||
|
||||
point_jacobian_double(&jres);
|
||||
point_jacobian_double(&jres);
|
||||
point_jacobian_double(&jres);
|
||||
point_jacobian_double(&jres);
|
||||
|
||||
// get lowest 5 bits of a >> (i*4).
|
||||
pos = i*4/30; shift = i*4 % 30;
|
||||
bits = (a.val[pos+1]<<(30-shift) | a.val[pos] >> shift) & 31;
|
||||
nsign = (bits >> 4) - 1;
|
||||
bits ^= nsign;
|
||||
bits &= 15;
|
||||
|
||||
// negate last result to make signs of this round and the
|
||||
// last round equal.
|
||||
conditional_negate(sign ^ nsign, &jres.z, &prime256k1);
|
||||
|
||||
// add odd factor
|
||||
point_jacobian_add(&pmult[bits >> 1], &jres);
|
||||
sign = nsign;
|
||||
}
|
||||
conditional_negate(sign, &jres.z, &prime256k1);
|
||||
jacobian_to_curve(&jres, res);
|
||||
}
|
||||
|
||||
#if USE_PRECOMPUTED_CP
|
||||
|
||||
// res = k * G
|
||||
// k must be a normalized number with 0 <= k < order256k1
|
||||
void scalar_multiply(const bignum256 *k, curve_point *res)
|
||||
{
|
||||
int i;
|
||||
// result is zero
|
||||
int is_zero = 1;
|
||||
curve_point curr;
|
||||
// initial res
|
||||
memcpy(&curr, &G256k1, sizeof(curve_point));
|
||||
for (i = 0; i < 256; i++) {
|
||||
if (k->val[i / 30] & (1u << (i % 30))) {
|
||||
if (is_zero) {
|
||||
#if USE_PRECOMPUTED_CP
|
||||
if (i < 255 && (k->val[(i + 1) / 30] & (1u << ((i + 1) % 30)))) {
|
||||
memcpy(res, secp256k1_cp2 + i, sizeof(curve_point));
|
||||
i++;
|
||||
} else {
|
||||
memcpy(res, secp256k1_cp + i, sizeof(curve_point));
|
||||
}
|
||||
#else
|
||||
memcpy(res, &curr, sizeof(curve_point));
|
||||
#endif
|
||||
is_zero = 0;
|
||||
} else {
|
||||
#if USE_PRECOMPUTED_CP
|
||||
if (i < 255 && (k->val[(i + 1) / 30] & (1u << ((i + 1) % 30)))) {
|
||||
point_add(secp256k1_cp2 + i, res);
|
||||
i++;
|
||||
} else {
|
||||
point_add(secp256k1_cp + i, res);
|
||||
}
|
||||
#else
|
||||
point_add(&curr, res);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
#if ! USE_PRECOMPUTED_CP
|
||||
point_double(&curr);
|
||||
#endif
|
||||
assert (bn_is_less(k, &order256k1));
|
||||
|
||||
int i, j;
|
||||
bignum256 a;
|
||||
uint32_t is_even = (k->val[0] & 1) - 1;
|
||||
uint32_t lowbits;
|
||||
jacobian_curve_point jres;
|
||||
|
||||
// is_even = 0xffffffff if k is even, 0 otherwise.
|
||||
|
||||
// add 2^256.
|
||||
// make number odd: subtract order256k1 if even
|
||||
uint32_t tmp = 1;
|
||||
uint32_t is_non_zero = 0;
|
||||
for (j = 0; j < 8; j++) {
|
||||
is_non_zero |= k->val[j];
|
||||
tmp += 0x3fffffff + k->val[j] - (order256k1.val[j] & is_even);
|
||||
a.val[j] = tmp & 0x3fffffff;
|
||||
tmp >>= 30;
|
||||
}
|
||||
is_non_zero |= k->val[j];
|
||||
a.val[j] = tmp + 0xffff + k->val[j] - (order256k1.val[j] & is_even);
|
||||
assert((a.val[0] & 1) != 0);
|
||||
|
||||
// special case 0*G: just return zero. We don't care about constant time.
|
||||
if (!is_non_zero) {
|
||||
point_set_infinity(res);
|
||||
return;
|
||||
}
|
||||
|
||||
// Now a = k + 2^256 (mod order256k1) and a is odd.
|
||||
//
|
||||
// The idea is to bring the new a into the form.
|
||||
// sum_{i=0..64} a[i] 16^i, where |a[i]| < 16 and a[i] is odd.
|
||||
// a[0] is odd, since a is odd. If a[i] would be even, we can
|
||||
// add 1 to it and subtract 16 from a[i-1]. Afterwards,
|
||||
// a[64] = 1, which is the 2^256 that we added before.
|
||||
//
|
||||
// Since k = a - 2^256 (mod order256k1), we can compute
|
||||
// k*G = sum_{i=0..63} a[i] 16^i * G
|
||||
//
|
||||
// We have a big table secp256k1_cp that stores all possible
|
||||
// values of |a[i]| 16^i * G.
|
||||
// secp256k1_cp[i][j] = (2*j+1) * 16^i * G
|
||||
|
||||
// now compute res = sum_{i=0..63} a[i] * 16^i * G step by step.
|
||||
// initial res = |a[0]| * G. Note that a[0] = a & 0xf if (a&0x10) != 0
|
||||
// and - (16 - (a & 0xf)) otherwise. We can compute this as
|
||||
// ((a ^ (((a >> 4) & 1) - 1)) & 0xf) >> 1
|
||||
// since a is odd.
|
||||
lowbits = a.val[0] & ((1 << 5) - 1);
|
||||
lowbits ^= (lowbits >> 4) - 1;
|
||||
lowbits &= 15;
|
||||
curve_to_jacobian(&secp256k1_cp[0][lowbits >> 1], &jres);
|
||||
for (i = 1; i < 64; i ++) {
|
||||
// invariant res = sign(a[i-1]) sum_{j=0..i-1} (a[j] * 16^j * G)
|
||||
|
||||
// shift a by 4 places.
|
||||
for (j = 0; j < 8; j++) {
|
||||
a.val[j] = (a.val[j] >> 4) | ((a.val[j + 1] & 0xf) << 26);
|
||||
}
|
||||
a.val[j] >>= 4;
|
||||
// a = old(a)>>(4*i)
|
||||
// a is even iff sign(a[i-1]) = -1
|
||||
|
||||
lowbits = a.val[0] & ((1 << 5) - 1);
|
||||
lowbits ^= (lowbits >> 4) - 1;
|
||||
lowbits &= 15;
|
||||
// negate last result to make signs of this round and the
|
||||
// last round equal.
|
||||
conditional_negate((lowbits & 1) - 1, &jres.y, &prime256k1);
|
||||
|
||||
// add odd factor
|
||||
point_jacobian_add(&secp256k1_cp[i][lowbits >> 1], &jres);
|
||||
}
|
||||
conditional_negate(((a.val[0] >> 4) & 1) - 1, &jres.y, &prime256k1);
|
||||
jacobian_to_curve(&jres, res);
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
void scalar_multiply(const bignum256 *k, curve_point *res)
|
||||
{
|
||||
point_multiply(k, &G256k1, res);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
// generate random K for signing
|
||||
int generate_k_random(bignum256 *k) {
|
||||
int i, j;
|
||||
@ -531,7 +885,6 @@ int ecdsa_verify_double(const uint8_t *pub_key, const uint8_t *sig, const uint8_
|
||||
// returns 0 if verification succeeded
|
||||
int ecdsa_verify_digest(const uint8_t *pub_key, const uint8_t *sig, const uint8_t *digest)
|
||||
{
|
||||
int i, j;
|
||||
curve_point pub, res;
|
||||
bignum256 r, s, z;
|
||||
|
||||
@ -562,16 +915,8 @@ int ecdsa_verify_digest(const uint8_t *pub_key, const uint8_t *sig, const uint8_
|
||||
}
|
||||
|
||||
// both pub and res can be infinity, can have y = 0 OR can be equal -> false negative
|
||||
for (i = 0; i < 9; i++) {
|
||||
for (j = 0; j < 30; j++) {
|
||||
if (i == 8 && (s.val[i] >> j) == 0) break;
|
||||
if (s.val[i] & (1u << j)) {
|
||||
point_add(&pub, &res);
|
||||
}
|
||||
point_double(&pub);
|
||||
}
|
||||
}
|
||||
|
||||
point_multiply(&s, &pub, &pub);
|
||||
point_add(&pub, &res);
|
||||
bn_mod(&(res.x), &order256k1);
|
||||
|
||||
// signature does not match
|
||||
|
@ -23,11 +23,6 @@
|
||||
#ifndef __OPTIONS_H__
|
||||
#define __OPTIONS_H__
|
||||
|
||||
// use precomputed Inverse Values of powers of two
|
||||
#ifndef USE_PRECOMPUTED_IV
|
||||
#define USE_PRECOMPUTED_IV 1
|
||||
#endif
|
||||
|
||||
// use precomputed Curve Points (some scalar multiples of curve base point G)
|
||||
#ifndef USE_PRECOMPUTED_CP
|
||||
#define USE_PRECOMPUTED_CP 1
|
||||
|
2952
secp256k1.c
2952
secp256k1.c
File diff suppressed because it is too large
Load Diff
@ -48,13 +48,8 @@ extern const bignum256 order256k1_half;
|
||||
// 3/2 in G_p
|
||||
extern const bignum256 three_over_two256k1;
|
||||
|
||||
#if USE_PRECOMPUTED_IV
|
||||
extern const bignum256 secp256k1_iv[256];
|
||||
#endif
|
||||
|
||||
#if USE_PRECOMPUTED_CP
|
||||
extern const curve_point secp256k1_cp[256];
|
||||
extern const curve_point secp256k1_cp2[255];
|
||||
extern const curve_point secp256k1_cp[64][8];
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
2
tests.c
2
tests.c
@ -34,6 +34,7 @@
|
||||
#include "bip39.h"
|
||||
#include "ecdsa.h"
|
||||
#include "pbkdf2.h"
|
||||
#include "rand.h"
|
||||
#include "sha2.h"
|
||||
#include "options.h"
|
||||
|
||||
@ -1271,6 +1272,7 @@ Suite *test_suite(void)
|
||||
int main(void)
|
||||
{
|
||||
int number_failed;
|
||||
init_rand(); // needed for scalar_multiply()
|
||||
Suite *s = test_suite();
|
||||
SRunner *sr = srunner_create(s);
|
||||
srunner_run_all(sr, CK_VERBOSE);
|
||||
|
Loading…
Reference in New Issue
Block a user