mirror of
https://github.com/trezor/trezor-firmware.git
synced 2025-01-10 15:30:55 +00:00
1832 lines
60 KiB
C
1832 lines
60 KiB
C
/**
|
|
* Copyright (c) 2013-2014 Tomas Dzetkulic
|
|
* Copyright (c) 2013-2014 Pavol Rusnak
|
|
* Copyright (c) 2015 Jochen Hoenicke
|
|
* Copyright (c) 2016 Alex Beregszaszi
|
|
*
|
|
* Permission is hereby granted, free of charge, to any person obtaining
|
|
* a copy of this software and associated documentation files (the "Software"),
|
|
* to deal in the Software without restriction, including without limitation
|
|
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
* and/or sell copies of the Software, and to permit persons to whom the
|
|
* Software is furnished to do so, subject to the following conditions:
|
|
*
|
|
* The above copyright notice and this permission notice shall be included
|
|
* in all copies or substantial portions of the Software.
|
|
*
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
|
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
|
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES
|
|
* OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
|
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
|
* OTHER DEALINGS IN THE SOFTWARE.
|
|
*/
|
|
|
|
#include "bignum.h"
|
|
|
|
#include <assert.h>
|
|
#include <stdint.h>
|
|
#include <stdio.h>
|
|
#include <string.h>
|
|
|
|
#include "memzero.h"
|
|
#include "script.h"
|
|
|
|
/*
|
|
This library implements 256-bit numbers arithmetic.
|
|
|
|
An unsigned 256-bit number is represented by a bignum256 structure, that is an
|
|
array of nine 32-bit values called limbs. Limbs are digits of the number in
|
|
the base 2**29 representation in the little endian order. This means that
|
|
bignum256 x;
|
|
represents the value
|
|
sum([x[i] * 2**(29*i) for i in range(9)).
|
|
|
|
A limb of a bignum256 is *normalized* iff it's less than 2**29.
|
|
A bignum256 is *normalized* iff every its limb is normalized.
|
|
A number is *fully reduced modulo p* iff it is less than p.
|
|
A number is *partly reduced modulo p* iff is is less than 2*p.
|
|
The number p is usually a prime number such that 2^256 - 2^224 <= p <= 2^256.
|
|
|
|
All functions except bn_fast_mod expect that all their bignum256 inputs are
|
|
normalized. (The function bn_fast_mod allows the input number to have the
|
|
most significant limb unnormalized). All bignum256 outputs of all functions
|
|
are guaranteed to be normalized.
|
|
|
|
A number can be partly reduced with bn_fast_mod, a partly reduced number can
|
|
be fully reduced with bn_mod.
|
|
|
|
A function has *constant control flow with regard to its argument* iff the
|
|
order in which instructions of the function are executed doesn't depend on the
|
|
value of the argument.
|
|
A function has *constant memory access flow with regard to its argument* iff
|
|
the memory addresses that are acessed and the order in which they are accessed
|
|
don't depend on the value of the argument.
|
|
A function *has contant control (memory access) flow* iff it has constant
|
|
control (memory access) flow with regard to all its arguments.
|
|
|
|
The following function has contant control flow with regard to its arugment
|
|
n, however is doesn't have constant memory access flow with regard to it:
|
|
void (int n, int *a) }
|
|
a[0] = 0;
|
|
a[n] = 0; // memory address reveals the value of n
|
|
}
|
|
|
|
Unless stated otherwise all functions are supposed to have both constant
|
|
control flow and constant memory access flow.
|
|
*/
|
|
|
|
#define BN_MAX_DECIMAL_DIGITS \
|
|
79 // floor(log(2**(LIMBS * BITS_PER_LIMB), 10)) + 1
|
|
|
|
// out_number = (bignum256) in_number
|
|
// Assumes in_number is a raw bigendian 256-bit number
|
|
// Guarantees out_number is normalized
|
|
void bn_read_be(const uint8_t *in_number, bignum256 *out_number) {
|
|
uint32_t temp = 0;
|
|
|
|
for (int i = 0; i < BN_LIMBS - 1; i++) {
|
|
uint32_t limb = read_be(in_number + (BN_LIMBS - 2 - i) * 4);
|
|
|
|
temp |= limb << (BN_EXTRA_BITS * i);
|
|
out_number->val[i] = temp & BN_LIMB_MASK;
|
|
|
|
temp = limb >> (32 - BN_EXTRA_BITS * (i + 1));
|
|
}
|
|
|
|
out_number->val[BN_LIMBS - 1] = temp;
|
|
}
|
|
|
|
// out_number = (256BE) in_number
|
|
// Assumes in_number < 2**256
|
|
// Guarantess out_number is a raw bigendian 256-bit number
|
|
void bn_write_be(const bignum256 *in_number, uint8_t *out_number) {
|
|
uint32_t temp = in_number->val[BN_LIMBS - 1];
|
|
for (int i = BN_LIMBS - 2; i >= 0; i--) {
|
|
uint32_t limb = in_number->val[i];
|
|
|
|
temp = (temp << (BN_BITS_PER_LIMB - BN_EXTRA_BITS * i)) |
|
|
(limb >> (BN_EXTRA_BITS * i));
|
|
write_be(out_number + (BN_LIMBS - 2 - i) * 4, temp);
|
|
|
|
temp = limb;
|
|
}
|
|
}
|
|
|
|
// out_number = (bignum256) in_number
|
|
// Assumes in_number is a raw little endian 256-bit number
|
|
// Guarantees out_number is normalized
|
|
void bn_read_le(const uint8_t *in_number, bignum256 *out_number) {
|
|
uint32_t temp = 0;
|
|
for (int i = 0; i < BN_LIMBS - 1; i++) {
|
|
uint32_t limb = read_le(in_number + i * 4);
|
|
|
|
temp |= limb << (BN_EXTRA_BITS * i);
|
|
out_number->val[i] = temp & BN_LIMB_MASK;
|
|
temp = limb >> (32 - BN_EXTRA_BITS * (i + 1));
|
|
}
|
|
|
|
out_number->val[BN_LIMBS - 1] = temp;
|
|
}
|
|
|
|
// out_number = (256LE) in_number
|
|
// Assumes in_number < 2**256
|
|
// Guarantess out_number is a raw little endian 256-bit number
|
|
void bn_write_le(const bignum256 *in_number, uint8_t *out_number) {
|
|
uint32_t temp = in_number->val[BN_LIMBS - 1];
|
|
|
|
for (int i = BN_LIMBS - 2; i >= 0; i--) {
|
|
uint32_t limb = in_number->val[i];
|
|
temp = (temp << (BN_BITS_PER_LIMB - BN_EXTRA_BITS * i)) |
|
|
(limb >> (BN_EXTRA_BITS * i));
|
|
write_le(out_number + i * 4, temp);
|
|
temp = limb;
|
|
}
|
|
}
|
|
|
|
// out_number = (bignum256) in_number
|
|
// Guarantees out_number is normalized
|
|
void bn_read_uint32(uint32_t in_number, bignum256 *out_number) {
|
|
out_number->val[0] = in_number & BN_LIMB_MASK;
|
|
out_number->val[1] = in_number >> BN_BITS_PER_LIMB;
|
|
for (uint32_t i = 2; i < BN_LIMBS; i++) out_number->val[i] = 0;
|
|
}
|
|
|
|
// out_number = (bignum256) in_number
|
|
// Guarantees out_number is normalized
|
|
void bn_read_uint64(uint64_t in_number, bignum256 *out_number) {
|
|
out_number->val[0] = in_number & BN_LIMB_MASK;
|
|
out_number->val[1] = (in_number >>= BN_BITS_PER_LIMB) & BN_LIMB_MASK;
|
|
out_number->val[2] = in_number >> BN_BITS_PER_LIMB;
|
|
for (uint32_t i = 3; i < BN_LIMBS; i++) out_number->val[i] = 0;
|
|
}
|
|
|
|
// Returns the bitsize of x
|
|
// Assumes x is normalized
|
|
// The function doesn't have neither constant control flow nor constant memory
|
|
// access flow
|
|
int bn_bitcount(const bignum256 *x) {
|
|
for (int i = BN_LIMBS - 1; i >= 0; i--) {
|
|
uint32_t limb = x->val[i];
|
|
if (limb != 0) {
|
|
// __builtin_clz returns the number of leading zero bits starting at the
|
|
// most significant bit position
|
|
return i * BN_BITS_PER_LIMB + (32 - __builtin_clz(limb));
|
|
}
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
// Returns the number of decimal digits of x; if x is 0, returns 1
|
|
// Assumes x is normalized
|
|
// The function doesn't have neither constant control flow nor constant memory
|
|
// access flow
|
|
unsigned int bn_digitcount(const bignum256 *x) {
|
|
bignum256 val = {0};
|
|
bn_copy(x, &val);
|
|
|
|
unsigned int digits = 1;
|
|
for (unsigned int i = 0; i < BN_MAX_DECIMAL_DIGITS; i += 3) {
|
|
uint32_t limb = 0;
|
|
|
|
bn_divmod1000(&val, &limb);
|
|
|
|
if (limb >= 100) {
|
|
digits = i + 3;
|
|
} else if (limb >= 10) {
|
|
digits = i + 2;
|
|
} else if (limb >= 1) {
|
|
digits = i + 1;
|
|
}
|
|
}
|
|
|
|
memzero(&val, sizeof(val));
|
|
|
|
return digits;
|
|
}
|
|
|
|
// x = 0
|
|
// Guarantees x is normalized
|
|
void bn_zero(bignum256 *x) {
|
|
for (int i = 0; i < BN_LIMBS; i++) {
|
|
x->val[i] = 0;
|
|
}
|
|
}
|
|
|
|
// x = 1
|
|
// Guarantees x is normalized
|
|
void bn_one(bignum256 *x) {
|
|
x->val[0] = 1;
|
|
for (int i = 1; i < BN_LIMBS; i++) {
|
|
x->val[i] = 0;
|
|
}
|
|
}
|
|
|
|
// Returns x == 0
|
|
// Assumes x is normalized
|
|
int bn_is_zero(const bignum256 *x) {
|
|
uint32_t result = 0;
|
|
for (int i = 0; i < BN_LIMBS; i++) {
|
|
result |= x->val[i];
|
|
}
|
|
return !result;
|
|
}
|
|
|
|
// Returns x == 1
|
|
// Assumes x is normalized
|
|
int bn_is_one(const bignum256 *x) {
|
|
uint32_t result = x->val[0] ^ 1;
|
|
for (int i = 1; i < BN_LIMBS; i++) {
|
|
result |= x->val[i];
|
|
}
|
|
return !result;
|
|
}
|
|
|
|
// Returns x < y
|
|
// Assumes x, y are normalized
|
|
int bn_is_less(const bignum256 *x, const bignum256 *y) {
|
|
uint32_t res1 = 0;
|
|
uint32_t res2 = 0;
|
|
for (int i = BN_LIMBS - 1; i >= 0; i--) {
|
|
res1 = (res1 << 1) | (x->val[i] < y->val[i]);
|
|
res2 = (res2 << 1) | (x->val[i] > y->val[i]);
|
|
}
|
|
return res1 > res2;
|
|
}
|
|
|
|
// Returns x == y
|
|
// Assumes x, y are normalized
|
|
int bn_is_equal(const bignum256 *x, const bignum256 *y) {
|
|
uint32_t result = 0;
|
|
for (int i = 0; i < BN_LIMBS; i++) {
|
|
result |= x->val[i] ^ y->val[i];
|
|
}
|
|
return !result;
|
|
}
|
|
|
|
// res = cond if truecase else falsecase
|
|
// Assumes cond is either 0 or 1
|
|
// Works properly even if &res == &truecase or &res == &falsecase or
|
|
// &truecase == &falsecase or &res == &truecase == &falsecase
|
|
void bn_cmov(bignum256 *res, volatile uint32_t cond, const bignum256 *truecase,
|
|
const bignum256 *falsecase) {
|
|
assert((cond == 1) | (cond == 0));
|
|
|
|
uint32_t tmask = -cond; // tmask = 0xFFFFFFFF if cond else 0x00000000
|
|
uint32_t fmask = ~tmask; // fmask = 0x00000000 if cond else 0xFFFFFFFF
|
|
|
|
for (int i = 0; i < BN_LIMBS; i++) {
|
|
res->val[i] = (truecase->val[i] & tmask) | (falsecase->val[i] & fmask);
|
|
}
|
|
}
|
|
|
|
// x = -x % prime if cond else x,
|
|
// Explicitly x = (3 * prime - x if x > prime else 2 * prime - x) if cond else
|
|
// else (x if x > prime else x + prime)
|
|
// Assumes x is normalized and partly reduced
|
|
// Assumes cond is either 1 or 0
|
|
// Guarantees x is normalized
|
|
// Assumes prime is normalized and
|
|
// 0 < prime < 2**260 == 2**(BITS_PER_LIMB * LIMBS - 1)
|
|
void bn_cnegate(volatile uint32_t cond, bignum256 *x, const bignum256 *prime) {
|
|
assert((cond == 1) | (cond == 0));
|
|
|
|
uint32_t tmask = -cond; // tmask = 0xFFFFFFFF if cond else 0x00000000
|
|
uint32_t fmask = ~tmask; // fmask = 0x00000000 if cond else 0xFFFFFFFF
|
|
|
|
bn_mod(x, prime);
|
|
// x < prime
|
|
|
|
uint32_t acc1 = 1;
|
|
uint32_t acc2 = 0;
|
|
|
|
for (int i = 0; i < BN_LIMBS; i++) {
|
|
acc1 += (BN_BASE - 1) + 2 * prime->val[i] - x->val[i];
|
|
// acc1 neither overflows 32 bits nor underflows 0
|
|
// Proof:
|
|
// acc1 + (BASE - 1) + 2 * prime[i] - x[i]
|
|
// >= (BASE - 1) - x >= (2**BITS_PER_LIMB - 1) - (2**BITS_PER_LIMB - 1)
|
|
// == 0
|
|
// acc1 + (BASE - 1) + 2 * prime[i] - x[i]
|
|
// <= acc1 + (BASE - 1) + 2 * prime[i]
|
|
// <= (2**(32 - BITS_PER_LIMB) - 1) + 2 * (2**BITS_PER_LIMB - 1) +
|
|
// (2**BITS_PER_LIMB - 1)
|
|
// == 7 + 3 * 2**29 < 2**32
|
|
|
|
acc2 += prime->val[i] + x->val[i];
|
|
// acc2 doesn't overflow 32 bits
|
|
// Proof:
|
|
// acc2 + prime[i] + x[i]
|
|
// <= 2**(32 - BITS_PER_LIMB) - 1 + 2 * (2**BITS_PER_LIMB - 1)
|
|
// == 2**(32 - BITS_PER_LIMB) + 2**(BITS_PER_LIMB + 1) - 2
|
|
// == 2**30 + 5 < 2**32
|
|
|
|
// x = acc1 & LIMB_MASK if cond else acc2 & LIMB_MASK
|
|
x->val[i] = ((acc1 & tmask) | (acc2 & fmask)) & BN_LIMB_MASK;
|
|
|
|
acc1 >>= BN_BITS_PER_LIMB;
|
|
// acc1 <= 7 == 2**(32 - BITS_PER_LIMB) - 1
|
|
// acc1 == 2**(BITS_PER_LIMB * (i + 1)) + 2 * prime[:i + 1] - x[:i + 1]
|
|
// >> BITS_PER_LIMB * (i + 1)
|
|
|
|
acc2 >>= BN_BITS_PER_LIMB;
|
|
// acc2 <= 7 == 2**(32 - BITS_PER_LIMB) - 1
|
|
// acc2 == prime[:i + 1] + x[:i + 1] >> BITS_PER_LIMB * (i + 1)
|
|
}
|
|
|
|
// assert(acc1 == 1); // assert prime <= 2**260
|
|
// assert(acc2 == 0);
|
|
|
|
// clang-format off
|
|
// acc1 == 1
|
|
// Proof:
|
|
// acc1 == 2**(BITS_PER_LIMB * LIMBS) + 2 * prime[:LIMBS] - x[:LIMBS] >> BITS_PER_LIMB * LIMBS
|
|
// == 2**(BITS_PER_LIMB * LIMBS) + 2 * prime - x >> BITS_PER_LIMB * LIMBS
|
|
// <= 2**(BITS_PER_LIMB * LIMBS) + 2 * prime >> BITS_PER_LIMB * LIMBS
|
|
// <= 2**(BITS_PER_LIMB * LIMBS) + 2 * (2**(BITS_PER_LIMB * LIMBS - 1) - 1) >> BITS_PER_LIMB * LIMBS
|
|
// <= 2**(BITS_PER_LIMB * LIMBS) + 2**(BITS_PER_LIMB * LIMBS) - 2 >> BITS_PER_LIMB * LIMBS
|
|
// == 1
|
|
|
|
// acc1 == 2**(BITS_PER_LIMB * LIMBS) + 2 * prime[:LIMBS] - x[:LIMBS] >> BITS_PER_LIMB * LIMBS
|
|
// == 2**(BITS_PER_LIMB * LIMBS) + 2 * prime - x >> BITS_PER_LIMB * LIMBS
|
|
// >= 2**(BITS_PER_LIMB * LIMBS) + 0 >> BITS_PER_LIMB * LIMBS
|
|
// == 1
|
|
|
|
// acc2 == 0
|
|
// Proof:
|
|
// acc2 == prime[:LIMBS] + x[:LIMBS] >> BITS_PER_LIMB * LIMBS
|
|
// == prime + x >> BITS_PER_LIMB * LIMBS
|
|
// <= 2 * prime - 1 >> BITS_PER_LIMB * LIMBS
|
|
// <= 2 * (2**(BITS_PER_LIMB * LIMBS - 1) - 1) - 1 >> 261
|
|
// == 2**(BITS_PER_LIMB * LIMBS) - 3 >> BITS_PER_LIMB * LIMBS
|
|
// == 0
|
|
// clang-format on
|
|
}
|
|
|
|
// x <<= 1
|
|
// Assumes x is normalized, x < 2**260 == 2**(LIMBS*BITS_PER_LIMB - 1)
|
|
// Guarantees x is normalized
|
|
void bn_lshift(bignum256 *x) {
|
|
for (int i = BN_LIMBS - 1; i > 0; i--) {
|
|
x->val[i] = ((x->val[i] << 1) & BN_LIMB_MASK) |
|
|
(x->val[i - 1] >> (BN_BITS_PER_LIMB - 1));
|
|
}
|
|
x->val[0] = (x->val[0] << 1) & BN_LIMB_MASK;
|
|
}
|
|
|
|
// x >>= 1, i.e. x = floor(x/2)
|
|
// Assumes x is normalized
|
|
// Guarantees x is normalized
|
|
// If x is partly reduced (fully reduced) modulo prime,
|
|
// guarantess x will be partly reduced (fully reduced) modulo prime
|
|
void bn_rshift(bignum256 *x) {
|
|
for (int i = 0; i < BN_LIMBS - 1; i++) {
|
|
x->val[i] =
|
|
(x->val[i] >> 1) | ((x->val[i + 1] & 1) << (BN_BITS_PER_LIMB - 1));
|
|
}
|
|
x->val[BN_LIMBS - 1] >>= 1;
|
|
}
|
|
|
|
// Sets i-th least significant bit (counting from zero)
|
|
// Assumes x is normalized and 0 <= i < 261 == LIMBS*BITS_PER_LIMB
|
|
// Guarantees x is normalized
|
|
// The function has constant control flow but not constant memory access flow
|
|
// with regard to i
|
|
void bn_setbit(bignum256 *x, uint16_t i) {
|
|
assert(i < BN_LIMBS * BN_BITS_PER_LIMB);
|
|
x->val[i / BN_BITS_PER_LIMB] |= (1u << (i % BN_BITS_PER_LIMB));
|
|
}
|
|
|
|
// clears i-th least significant bit (counting from zero)
|
|
// Assumes x is normalized and 0 <= i < 261 == LIMBS*BITS_PER_LIMB
|
|
// Guarantees x is normalized
|
|
// The function has constant control flow but not constant memory access flow
|
|
// with regard to i
|
|
void bn_clearbit(bignum256 *x, uint16_t i) {
|
|
assert(i < BN_LIMBS * BN_BITS_PER_LIMB);
|
|
x->val[i / BN_BITS_PER_LIMB] &= ~(1u << (i % BN_BITS_PER_LIMB));
|
|
}
|
|
|
|
// returns i-th least significant bit (counting from zero)
|
|
// Assumes x is normalized and 0 <= i < 261 == LIMBS*BITS_PER_LIMB
|
|
// The function has constant control flow but not constant memory access flow
|
|
// with regard to i
|
|
uint32_t bn_testbit(const bignum256 *x, uint16_t i) {
|
|
assert(i < BN_LIMBS * BN_BITS_PER_LIMB);
|
|
return (x->val[i / BN_BITS_PER_LIMB] >> (i % BN_BITS_PER_LIMB)) & 1;
|
|
}
|
|
|
|
// res = x ^ y
|
|
// Assumes x, y are normalized
|
|
// Guarantees res is normalized
|
|
// Works properly even if &res == &x or &res == &y or &res == &x == &y
|
|
void bn_xor(bignum256 *res, const bignum256 *x, const bignum256 *y) {
|
|
for (int i = 0; i < BN_LIMBS; i++) {
|
|
res->val[i] = x->val[i] ^ y->val[i];
|
|
}
|
|
}
|
|
|
|
// x = x / 2 % prime
|
|
// Explicitly x = x / 2 if is_even(x) else (x + prime) / 2
|
|
// Assumes x is normalized, x + prime < 261 == LIMBS * BITS_PER_LIMB
|
|
// Guarantees x is normalized
|
|
// If x is partly reduced (fully reduced) modulo prime,
|
|
// guarantess x will be partly reduced (fully reduced) modulo prime
|
|
// Assumes prime is an odd number and normalized
|
|
void bn_mult_half(bignum256 *x, const bignum256 *prime) {
|
|
// x = x / 2 if is_even(x) else (x + prime) / 2
|
|
|
|
uint32_t x_is_odd_mask =
|
|
-(x->val[0] & 1); // x_is_odd_mask = 0xFFFFFFFF if is_odd(x) else 0
|
|
|
|
uint32_t acc = (x->val[0] + (prime->val[0] & x_is_odd_mask)) >> 1;
|
|
// acc < 2**BITS_PER_LIMB
|
|
// Proof:
|
|
// acc == x[0] + prime[0] & x_is_odd_mask >> 1
|
|
// <= (2**(BITS_PER_LIMB) - 1) + (2**(BITS_PER_LIMB) - 1) >> 1
|
|
// == 2**(BITS_PER_LIMB + 1) - 2 >> 1
|
|
// < 2**(BITS_PER_LIMB)
|
|
|
|
for (int i = 0; i < BN_LIMBS - 1; i++) {
|
|
uint32_t temp = (x->val[i + 1] + (prime->val[i + 1] & x_is_odd_mask));
|
|
// temp < 2**(BITS_PER_LIMB + 1)
|
|
// Proof:
|
|
// temp == x[i + 1] + val[i + 1] & x_is_odd_mask
|
|
// <= (2**(BITS_PER_LIMB) - 1) + (2**(BITS_PER_LIMB) - 1)
|
|
// < 2**(BITS_PER_LIMB + 1)
|
|
|
|
acc += (temp & 1) << (BN_BITS_PER_LIMB - 1);
|
|
// acc doesn't overflow 32 bits
|
|
// Proof:
|
|
// acc + (temp & 1 << BITS_PER_LIMB - 1)
|
|
// <= 2**(BITS_PER_LIMB + 1) + 2**(BITS_PER_LIMB - 1)
|
|
// <= 2**30 + 2**28 < 2**32
|
|
|
|
x->val[i] = acc & BN_LIMB_MASK;
|
|
acc >>= BN_BITS_PER_LIMB;
|
|
acc += temp >> 1;
|
|
// acc < 2**(BITS_PER_LIMB + 1)
|
|
// Proof:
|
|
// acc + (temp >> 1)
|
|
// <= (2**(32 - BITS_PER_LIMB) - 1) + (2**(BITS_PER_LIMB + 1) - 1 >> 1)
|
|
// == 7 + 2**(BITS_PER_LIMB) - 1 < 2**(BITS_PER_LIMB + 1)
|
|
|
|
// acc == x[:i+2]+(prime[:i+2] & x_is_odd_mask) >> BITS_PER_LIMB * (i+1)
|
|
}
|
|
x->val[BN_LIMBS - 1] = acc;
|
|
|
|
// assert(acc >> BITS_PER_LIMB == 0);
|
|
// acc >> BITS_PER_LIMB == 0
|
|
// Proof:
|
|
// acc
|
|
// == x[:LIMBS] + (prime[:LIMBS] & x_is_odd_mask) >> BITS_PER_LIMB*LIMBS
|
|
// == x + (prime & x_is_odd_mask) >> BITS_PER_LIMB * LIMBS
|
|
// <= x + prime >> BITS_PER_LIMB * LIMBS
|
|
// <= 2**(BITS_PER_LIMB * LIMBS) - 1 >> BITS_PER_LIMB * LIMBS
|
|
// == 0
|
|
}
|
|
|
|
// x = x * k % prime
|
|
// Assumes x is normalized, 0 <= k <= 8 = 2**(32 - BITS_PER_LIMB)
|
|
// Assumes prime is normalized and 2^256 - 2^224 <= prime <= 2^256
|
|
// Guarantees x is normalized and partly reduced modulo prime
|
|
void bn_mult_k(bignum256 *x, uint8_t k, const bignum256 *prime) {
|
|
assert(k <= 8);
|
|
|
|
for (int i = 0; i < BN_LIMBS; i++) {
|
|
x->val[i] = k * x->val[i];
|
|
// x[i] doesn't overflow 32 bits
|
|
// k * x[i] <= 2**(32 - BITS_PER_LIMB) * (2**BITS_PER_LIMB - 1)
|
|
// < 2**(32 - BITS_PER_LIMB) * 2**BITS_PER_LIMB == 2**32
|
|
}
|
|
|
|
bn_fast_mod(x, prime);
|
|
}
|
|
|
|
// Reduces partly reduced x modulo prime
|
|
// Explicitly x = x if x < prime else x - prime
|
|
// Assumes x is partly reduced modulo prime
|
|
// Guarantees x is fully reduced modulo prime
|
|
// Assumes prime is nonzero and normalized
|
|
void bn_mod(bignum256 *x, const bignum256 *prime) {
|
|
uint32_t x_less_prime = bn_is_less(x, prime);
|
|
|
|
bignum256 temp = {0};
|
|
bn_subtract(x, prime, &temp);
|
|
bn_cmov(x, x_less_prime, x, &temp);
|
|
|
|
memzero(&temp, sizeof(temp));
|
|
}
|
|
|
|
// Auxiliary function for bn_multiply
|
|
// res = k * x
|
|
// Assumes k and x are normalized
|
|
// Guarantees res is normalized 18 digit little endian number in base 2**29
|
|
void bn_multiply_long(const bignum256 *k, const bignum256 *x,
|
|
uint32_t res[2 * BN_LIMBS]) {
|
|
// Uses long multiplication in base 2**29, see
|
|
// https://en.wikipedia.org/wiki/Multiplication_algorithm#Long_multiplication
|
|
|
|
uint64_t acc = 0;
|
|
|
|
// compute lower half
|
|
for (int i = 0; i < BN_LIMBS; i++) {
|
|
for (int j = 0; j <= i; j++) {
|
|
acc += k->val[j] * (uint64_t)x->val[i - j];
|
|
// acc doesn't overflow 64 bits
|
|
// Proof:
|
|
// acc <= acc + sum([k[j] * x[i-j] for j in range(i)])
|
|
// <= (2**(64 - BITS_PER_LIMB) - 1) +
|
|
// LIMBS * (2**BITS_PER_LIMB - 1) * (2**BITS_PER_LIMB - 1)
|
|
// == (2**35 - 1) + 9 * (2**29 - 1) * (2**29 - 1)
|
|
// <= 2**35 + 9 * 2**58 < 2**64
|
|
}
|
|
|
|
res[i] = acc & BN_LIMB_MASK;
|
|
acc >>= BN_BITS_PER_LIMB;
|
|
// acc <= 2**35 - 1 == 2**(64 - BITS_PER_LIMB) - 1
|
|
}
|
|
|
|
// compute upper half
|
|
for (int i = BN_LIMBS; i < 2 * BN_LIMBS - 1; i++) {
|
|
for (int j = i - BN_LIMBS + 1; j < BN_LIMBS; j++) {
|
|
acc += k->val[j] * (uint64_t)x->val[i - j];
|
|
// acc doesn't overflow 64 bits
|
|
// Proof:
|
|
// acc <= acc + sum([k[j] * x[i-j] for j in range(i)])
|
|
// <= (2**(64 - BITS_PER_LIMB) - 1)
|
|
// LIMBS * (2**BITS_PER_LIMB - 1) * (2**BITS_PER_LIMB - 1)
|
|
// == (2**35 - 1) + 9 * (2**29 - 1) * (2**29 - 1)
|
|
// <= 2**35 + 9 * 2**58 < 2**64
|
|
}
|
|
|
|
res[i] = acc & (BN_BASE - 1);
|
|
acc >>= BN_BITS_PER_LIMB;
|
|
// acc < 2**35 == 2**(64 - BITS_PER_LIMB)
|
|
}
|
|
|
|
res[2 * BN_LIMBS - 1] = acc;
|
|
}
|
|
|
|
// Auxiliary function for bn_multiply
|
|
// Assumes 0 <= d <= 8 == LIMBS - 1
|
|
// Assumes res is normalized and res < 2**(256 + 29*d + 31)
|
|
// Guarantess res in normalized and res < 2 * prime * 2**(29*d)
|
|
// Assumes prime is normalized, 2**256 - 2**224 <= prime <= 2**256
|
|
void bn_multiply_reduce_step(uint32_t res[2 * BN_LIMBS], const bignum256 *prime,
|
|
uint32_t d) {
|
|
// clang-format off
|
|
// Computes res = res - (res // 2**(256 + BITS_PER_LIMB * d)) * prime * 2**(BITS_PER_LIMB * d)
|
|
|
|
// res - (res // 2**(256 + BITS_PER_LIMB * d)) * prime * 2**(BITS_PER_LIMB * d) < 2 * prime * 2**(BITS_PER_LIMB * d)
|
|
// Proof:
|
|
// res - res // (2**(256 + BITS_PER_LIMB * d)) * 2**(BITS_PER_LIMB * d) * prime
|
|
// == res - res // (2**(256 + BITS_PER_LIMB * d)) * 2**(BITS_PER_LIMB * d) * (2**256 - (2**256 - prime))
|
|
// == res - res // (2**(256 + BITS_PER_LIMB * d)) * 2**(BITS_PER_LIMB * d) * 2**256 + res // (2**(256 + BITS_PER_LIMB * d)) * 2**(BITS_PER_LIMB * d) * (2**256 - prime)
|
|
// == (res % 2**(256 + BITS_PER_LIMB * d)) + res // (2**256 + BITS_PER_LIMB * d) * 2**(BITS_PER_LIMB * d) * (2**256 - prime)
|
|
// <= (2**(256 + 29*d + 31) % 2**(256 + 29*d)) + (2**(256 + 29*d + 31) - 1) / (2**256 + 29*d) * 2**(29*d) * (2**256 - prime)
|
|
// <= 2**(256 + 29*d) + 2**(256 + 29*d + 31) / (2**256 + 29*d) * 2**(29*d) * (2**256 - prime)
|
|
// == 2**(256 + 29*d) + 2**31 * 2**(29*d) * (2**256 - prime)
|
|
// == 2**(29*d) * (2**256 + 2**31 * (2*256 - prime))
|
|
// <= 2**(29*d) * (2**256 + 2**31 * 2*224)
|
|
// <= 2**(29*d) * (2**256 + 2**255)
|
|
// <= 2**(29*d) * 2 * (2**256 - 2**224)
|
|
// <= 2 * prime * 2**(29*d)
|
|
// clang-format on
|
|
|
|
uint32_t coef =
|
|
(res[d + BN_LIMBS - 1] >> (256 - (BN_LIMBS - 1) * BN_BITS_PER_LIMB)) +
|
|
(res[d + BN_LIMBS] << ((BN_LIMBS * BN_BITS_PER_LIMB) - 256));
|
|
|
|
// coef == res // 2**(256 + BITS_PER_LIMB * d)
|
|
|
|
// coef < 2**31
|
|
// Proof:
|
|
// coef == res // 2**(256 + BITS_PER_LIMB * d)
|
|
// < 2**(256 + 29 * d + 31) // 2**(256 + 29 * d)
|
|
// == 2**31
|
|
|
|
const int shift = 31;
|
|
uint64_t acc = 1ull << shift;
|
|
|
|
for (int i = 0; i < BN_LIMBS; i++) {
|
|
acc += (((uint64_t)(BN_BASE - 1)) << shift) + res[d + i] -
|
|
prime->val[i] * (uint64_t)coef;
|
|
// acc neither overflow 64 bits nor underflow zero
|
|
// Proof:
|
|
// acc + ((BASE - 1) << shift) + res[d + i] - prime[i] * coef
|
|
// >= ((BASE - 1) << shift) - prime[i] * coef
|
|
// == 2**shift * (2**BITS_PER_LIMB - 1) - (2**BITS_PER_LIMB - 1) *
|
|
// (2**31 - 1)
|
|
// == (2**shift - 2**31 + 1) * (2**BITS_PER_LIMB - 1)
|
|
// == (2**31 - 2**31 + 1) * (2**29 - 1)
|
|
// == 2**29 - 1 > 0
|
|
// acc + ((BASE - 1) << shift) + res[d + i] - prime[i] * coef
|
|
// <= acc + ((BASE - 1) << shift) + res[d+i]
|
|
// <= (2**(64 - BITS_PER_LIMB) - 1) + 2**shift * (2**BITS_PER_LIMB - 1)
|
|
// + (2*BITS_PER_LIMB - 1)
|
|
// == (2**(64 - BITS_PER_LIMB) - 1) + (2**shift + 1) *
|
|
// (2**BITS_PER_LIMB - 1)
|
|
// == (2**35 - 1) + (2**31 + 1) * (2**29 - 1)
|
|
// <= 2**35 + 2**60 + 2**29 < 2**64
|
|
|
|
res[d + i] = acc & BN_LIMB_MASK;
|
|
acc >>= BN_BITS_PER_LIMB;
|
|
// acc <= 2**(64 - BITS_PER_LIMB) - 1 == 2**35 - 1
|
|
|
|
// acc == (1 << BITS_PER_LIMB * (i + 1) + shift) + res[d : d + i + 1]
|
|
// - coef * prime[:i + 1] >> BITS_PER_LIMB * (i + 1)
|
|
}
|
|
|
|
// acc += (((uint64_t)(BASE - 1)) << shift) + res[d + LIMBS];
|
|
// acc >>= BITS_PER_LIMB;
|
|
// assert(acc <= 1ul << shift);
|
|
|
|
// clang-format off
|
|
// acc == 1 << shift
|
|
// Proof:
|
|
// acc
|
|
// == (1 << BITS_PER_LIMB * (LIMBS + 1) + shift) + res[d : d + LIMBS + 1] - coef * prime[:LIMBS] >> BITS_PER_LIMB * (LIMBS + 1)
|
|
// == (1 << BITS_PER_LIMB * (LIMBS + 1) + shift) + res[d : d + LIMBS + 1] - coef * prime >> BITS_PER_LIMB * (LIMBS + 1)
|
|
|
|
// == (1 << BITS_PER_LIMB * (LIMBS + 1) + shift) + (res[d : d + LIMBS + 1] - coef * prime) >> BITS_PER_LIMB * (LIMBS + 1)
|
|
// <= (1 << BITS_PER_LIMB * (LIMBS + 1) + shift) + (res[:d] + BASE**d * res[d : d + LIMBS + 1] - BASE**d * coef * prime)//BASE**d >> BITS_PER_LIMB * (LIMBS + 1)
|
|
// <= (1 << BITS_PER_LIMB * (LIMBS + 1) + shift) + (res - BASE**d * coef * prime) // BASE**d >> BITS_PER_LIMB * (LIMBS + 1)
|
|
// == (1 << BITS_PER_LIMB * (LIMBS + 1) + shift) + (2 * prime * BASE**d) // BASE**d >> BITS_PER_LIMB * (LIMBS + 1)
|
|
// <= (1 << 321) + 2 * 2**256 >> 290
|
|
// == 1 << 31 == 1 << shift
|
|
|
|
// == (1 << BITS_PER_LIMB * (LIMBS + 1) + shift) + res[d : d + LIMBS + 1] - coef * prime[:LIMBS + 1] >> BITS_PER_LIMB * (LIMBS + 1)
|
|
// >= (1 << BITS_PER_LIMB * (LIMBS + 1) + shift) + 0 >> BITS_PER_LIMB * (LIMBS + 1)
|
|
// == 1 << shift
|
|
// clang-format on
|
|
|
|
res[d + BN_LIMBS] = 0;
|
|
}
|
|
|
|
// Auxiliary function for bn_multiply
|
|
// Partly reduces res and stores both in x and res
|
|
// Assumes res in normalized and res < 2**519
|
|
// Guarantees x is normalized and partly reduced modulo prime
|
|
// Assumes prime is normalized, 2**256 - 2**224 <= prime <= 2**256
|
|
void bn_multiply_reduce(bignum256 *x, uint32_t res[2 * BN_LIMBS],
|
|
const bignum256 *prime) {
|
|
for (int i = BN_LIMBS - 1; i >= 0; i--) {
|
|
// res < 2**(256 + 29*i + 31)
|
|
// Proof:
|
|
// if i == LIMBS - 1:
|
|
// res < 2**519
|
|
// == 2**(256 + 29 * 8 + 31)
|
|
// == 2**(256 + 29 * (LIMBS - 1) + 31)
|
|
// else:
|
|
// res < 2 * prime * 2**(29 * (i + 1))
|
|
// <= 2**256 * 2**(29*i + 29) < 2**(256 + 29*i + 31)
|
|
bn_multiply_reduce_step(res, prime, i);
|
|
}
|
|
|
|
for (int i = 0; i < BN_LIMBS; i++) {
|
|
x->val[i] = res[i];
|
|
}
|
|
}
|
|
|
|
// x = k * x % prime
|
|
// Assumes k, x are normalized, k * x < 2**519
|
|
// Guarantees x is normalized and partly reduced modulo prime
|
|
// Assumes prime is normalized, 2**256 - 2**224 <= prime <= 2**256
|
|
void bn_multiply(const bignum256 *k, bignum256 *x, const bignum256 *prime) {
|
|
uint32_t res[2 * BN_LIMBS] = {0};
|
|
|
|
bn_multiply_long(k, x, res);
|
|
bn_multiply_reduce(x, res, prime);
|
|
|
|
memzero(res, sizeof(res));
|
|
}
|
|
|
|
// Partly reduces x modulo prime
|
|
// Assumes limbs of x except the last (the most significant) one are normalized
|
|
// Assumes prime is normalized and 2^256 - 2^224 <= prime <= 2^256
|
|
// Guarantees x is normalized and partly reduced modulo prime
|
|
void bn_fast_mod(bignum256 *x, const bignum256 *prime) {
|
|
// Computes x = x - (x // 2**256) * prime
|
|
|
|
// x < 2**((LIMBS - 1) * BITS_PER_LIMB + 32) == 2**264
|
|
|
|
// x - (x // 2**256) * prime < 2 * prime
|
|
// Proof:
|
|
// x - (x // 2**256) * prime
|
|
// == x - (x // 2**256) * (2**256 - (2**256 - prime))
|
|
// == x - ((x // 2**256) * 2**256) + (x // 2**256) * (2**256 - prime)
|
|
// == (x % prime) + (x // 2**256) * (2**256 - prime)
|
|
// <= prime - 1 + (2**264 // 2**256) * (2**256 - prime)
|
|
// <= 2**256 + 2**8 * 2**224 == 2**256 + 2**232
|
|
// < 2 * (2**256 - 2**224)
|
|
// <= 2 * prime
|
|
|
|
// x - (x // 2**256 - 1) * prime < 2 * prime
|
|
// Proof:
|
|
// x - (x // 2**256) * prime + prime
|
|
// == x - (x // 2**256) * (2**256 - (2**256 - prime)) + prime
|
|
// == x - ((x//2**256) * 2**256) + (x//2**256) * (2**256 - prime) + prime
|
|
// == (x % prime) + (x // 2**256) * (2**256 - prime) + prime
|
|
// <= 2 * prime - 1 + (2**264 // 2**256) * (2**256 - prime)
|
|
// <= 2 * prime + 2**8 * 2**224 == 2**256 + 2**232 + 2**256 - 2**224
|
|
// < 2 * (2**256 - 2**224)
|
|
// <= 2 * prime
|
|
|
|
uint32_t coef =
|
|
x->val[BN_LIMBS - 1] >> (256 - ((BN_LIMBS - 1) * BN_BITS_PER_LIMB));
|
|
|
|
// clang-format off
|
|
// coef == x // 2**256
|
|
// 0 <= coef < 2**((LIMBS - 1) * BITS_PER_LIMB + 32 - 256) == 256
|
|
// Proof:
|
|
//* Let x[[a : b] be the number consisting of a-th to (b-1)-th bit of the number x.
|
|
// x[LIMBS - 1] >> (256 - ((LIMBS - 1) * BITS_PER_LIMB))
|
|
// == x[[(LIMBS - 1) * BITS_PER_LIMB : (LIMBS - 1) * BITS_PER_LIMB + 32]] >> (256 - ((LIMBS - 1) * BITS_PER_LIMB))
|
|
// == x[[256 - ((LIMBS - 1) * BITS_PER_LIMB) + (LIMBS - 1) * BITS_PER_LIMB : (LIMBS - 1) * BITS_PER_LIMB + 32]]
|
|
// == x[[256 : (LIMBS - 1) * BITS_PER_LIMB + 32]]
|
|
// == x[[256 : 264]] == x // 2**256
|
|
// clang-format on
|
|
|
|
const int shift = 8;
|
|
uint64_t acc = 1ull << shift;
|
|
|
|
for (int i = 0; i < BN_LIMBS; i++) {
|
|
acc += (((uint64_t)(BN_BASE - 1)) << shift) + x->val[i] -
|
|
prime->val[i] * (uint64_t)coef;
|
|
// acc neither overflows 64 bits nor underflows 0
|
|
// Proof:
|
|
// acc + (BASE - 1 << shift) + x[i] - prime[i] * coef
|
|
// >= (BASE - 1 << shift) - prime[i] * coef
|
|
// >= 2**shift * (2**BITS_PER_LIMB - 1) - (2**BITS_PER_LIMB - 1) * 255
|
|
// == (2**shift - 255) * (2**BITS_PER_LIMB - 1)
|
|
// == (2**8 - 255) * (2**29 - 1) == 2**29 - 1 >= 0
|
|
// acc + (BASE - 1 << shift) + x[i] - prime[i] * coef
|
|
// <= acc + ((BASE - 1) << shift) + x[i]
|
|
// <= (2**(64 - BITS_PER_LIMB) - 1) + 2**shift * (2**BITS_PER_LIMB - 1)
|
|
// + (2**32 - 1)
|
|
// == (2**35 - 1) + 2**8 * (2**29 - 1) + 2**32
|
|
// < 2**35 + 2**37 + 2**32 < 2**64
|
|
|
|
x->val[i] = acc & BN_LIMB_MASK;
|
|
acc >>= BN_BITS_PER_LIMB;
|
|
// acc <= 2**(64 - BITS_PER_LIMB) - 1 == 2**35 - 1
|
|
|
|
// acc == (1 << BITS_PER_LIMB * (i + 1) + shift) + x[:i + 1]
|
|
// - coef * prime[:i + 1] >> BITS_PER_LIMB * (i + 1)
|
|
}
|
|
|
|
// assert(acc == 1 << shift);
|
|
|
|
// clang-format off
|
|
// acc == 1 << shift
|
|
// Proof:
|
|
// acc
|
|
// == (1 << BITS_PER_LIMB * LIMBS + shift) + x[:LIMBS] - coef * prime[:LIMBS] >> BITS_PER_LIMB * LIMBS
|
|
// == (1 << BITS_PER_LIMB * LIMBS + shift) + (x - coef * prime) >> BITS_PER_LIMB * LIMBS
|
|
// <= (1 << BITS_PER_LIMB * LIMBS + shift) + (2 * prime) >> BITS_PER_LIMB * LIMBS
|
|
// <= (1 << BITS_PER_LIMB * LIMBS + shift) + 2 * 2**256 >> BITS_PER_LIMB * LIMBS
|
|
// <= 2**269 + 2**257 >> 2**261
|
|
// <= 1 << 8 == 1 << shift
|
|
|
|
// acc
|
|
// == (1 << BITS_PER_LIMB * LIMBS + shift) + x[:LIMBS] - coef * prime[:LIMBS] >> BITS_PER_LIMB * LIMBS
|
|
// >= (1 << BITS_PER_LIMB * LIMBS + shift) + 0 >> BITS_PER_LIMB * LIMBS
|
|
// == (1 << BITS_PER_LIMB * LIMBS + shift) + 0 >> BITS_PER_LIMB * LIMBS
|
|
// <= 1 << 8 == 1 << shift
|
|
// clang-format on
|
|
}
|
|
|
|
// res = x**e % prime
|
|
// Assumes both x and e are normalized, x < 2**259
|
|
// Guarantees res is normalized and partly reduced modulo prime
|
|
// Works properly even if &x == &res
|
|
// Assumes prime is normalized, 2**256 - 2**224 <= prime <= 2**256
|
|
// The function doesn't have neither constant control flow nor constant memory
|
|
// access flow with regard to e
|
|
void bn_power_mod(const bignum256 *x, const bignum256 *e,
|
|
const bignum256 *prime, bignum256 *res) {
|
|
// Uses iterative right-to-left exponentiation by squaring, see
|
|
// https://en.wikipedia.org/wiki/Modular_exponentiation#Right-to-left_binary_method
|
|
|
|
bignum256 acc = {0};
|
|
bn_copy(x, &acc);
|
|
|
|
bn_one(res);
|
|
for (int i = 0; i < BN_LIMBS; i++) {
|
|
uint32_t limb = e->val[i];
|
|
|
|
for (int j = 0; j < BN_BITS_PER_LIMB; j++) {
|
|
// Break if the following bits of the last limb are zero
|
|
if (i == BN_LIMBS - 1 && limb == 0) break;
|
|
|
|
if (limb & 1)
|
|
// acc * res < 2**519
|
|
// Proof:
|
|
// acc * res <= max(2**259 - 1, 2 * prime) * (2 * prime)
|
|
// == max(2**259 - 1, 2**257) * 2**257 < 2**259 * 2**257
|
|
// == 2**516 < 2**519
|
|
bn_multiply(&acc, res, prime);
|
|
|
|
limb >>= 1;
|
|
// acc * acc < 2**519
|
|
// Proof:
|
|
// acc * acc <= max(2**259 - 1, 2 * prime)**2
|
|
// <= (2**259)**2 == 2**518 < 2**519
|
|
bn_multiply(&acc, &acc, prime);
|
|
}
|
|
// acc == x**(e[:i + 1]) % prime
|
|
}
|
|
|
|
memzero(&acc, sizeof(acc));
|
|
}
|
|
|
|
// x = sqrt(x) % prime
|
|
// Explicitly x = x**((prime+1)/4) % prime
|
|
// The other root is -sqrt(x)
|
|
// Assumes x is normalized, x < 2**259 and quadratic residuum mod prime
|
|
// Assumes prime is a prime number, prime % 4 == 3, it is normalized and
|
|
// 2**256 - 2**224 <= prime <= 2**256
|
|
// Guarantees x is normalized and fully reduced modulo prime
|
|
// The function doesn't have neither constant control flow nor constant memory
|
|
// access flow with regard to prime
|
|
void bn_sqrt(bignum256 *x, const bignum256 *prime) {
|
|
// Uses the Lagrange formula for the primes of the special form, see
|
|
// http://en.wikipedia.org/wiki/Quadratic_residue#Prime_or_prime_power_modulus
|
|
// If prime % 4 == 3, then sqrt(x) % prime == x**((prime+1)//4) % prime
|
|
|
|
assert(prime->val[BN_LIMBS - 1] % 4 == 3);
|
|
|
|
// e = (prime + 1) // 4
|
|
bignum256 e = {0};
|
|
bn_copy(prime, &e);
|
|
bn_addi(&e, 1);
|
|
bn_rshift(&e);
|
|
bn_rshift(&e);
|
|
|
|
bn_power_mod(x, &e, prime, x);
|
|
bn_mod(x, prime);
|
|
|
|
memzero(&e, sizeof(e));
|
|
}
|
|
|
|
// a = 1/a % 2**n
|
|
// Assumes a is odd, 1 <= n <= 32
|
|
// The function doesn't have neither constant control flow nor constant memory
|
|
// access flow with regard to n
|
|
uint32_t inverse_mod_power_two(uint32_t a, uint32_t n) {
|
|
// Uses "Explicit Quadratic Modular inverse modulo 2" from section 3.3 of "On
|
|
// Newton-Raphson iteration for multiplicative inverses modulo prime powers"
|
|
// by Jean-Guillaume Dumas, see
|
|
// https://arxiv.org/pdf/1209.6626.pdf
|
|
|
|
// 1/a % 2**n
|
|
// = (2-a) * product([1 + (a-1)**(2**i) for i in range(1, floor(log2(n)))])
|
|
|
|
uint32_t acc = 2 - a;
|
|
uint32_t f = a - 1;
|
|
|
|
// mask = (1 << n) - 1
|
|
uint32_t mask = n == 32 ? 0xFFFFFFFF : (1u << n) - 1;
|
|
|
|
for (uint32_t i = 1; i < n; i <<= 1) {
|
|
f = (f * f) & mask;
|
|
acc = (acc * (1 + f)) & mask;
|
|
}
|
|
|
|
return acc;
|
|
}
|
|
|
|
// x = (x / 2**BITS_PER_LIMB) % prime
|
|
// Assumes both x and prime are normalized
|
|
// Assumes prime is an odd number and normalized
|
|
// Guarantees x is normalized
|
|
// If x is partly reduced (fully reduced) modulo prime,
|
|
// guarantess x will be partly reduced (fully reduced) modulo prime
|
|
void bn_divide_base(bignum256 *x, const bignum256 *prime) {
|
|
// Uses an explicit formula for the modular inverse of power of two
|
|
// (x / 2**n) % prime == (x + ((-x / prime) % 2**n) * prime) // 2**n
|
|
// Proof:
|
|
// (x + ((-x / prime) % 2**n) * prime) % 2**n
|
|
// == (x - x / prime * prime) % 2**n
|
|
// == 0
|
|
// (x + ((-1 / prime) % 2**n) * prime) % prime
|
|
// == x
|
|
// if x < prime:
|
|
// (x + ((-x / prime) % 2**n) * prime) // 2**n
|
|
// <= ((prime - 1) + (2**n - 1) * prime) / 2**n
|
|
// == (2**n * prime - 1) / 2**n == prime - 1 / 2**n < prime
|
|
// if x < 2 * prime:
|
|
// (x + ((-x / prime) % 2**n) * prime) // 2**n
|
|
// <= ((2 * prime - 1) + (2**n - 1) * prime) / 2**n
|
|
// == (2**n * prime + prime - 1) / 2**n
|
|
// == prime + (prime - 1) / 2**n < 2 * prime
|
|
|
|
// m = (-x / prime) % 2**BITS_PER_LIMB
|
|
uint32_t m = (x->val[0] * (BN_BASE - inverse_mod_power_two(
|
|
prime->val[0], BN_BITS_PER_LIMB))) &
|
|
BN_LIMB_MASK;
|
|
// m < 2**BITS_PER_LIMB
|
|
|
|
uint64_t acc = x->val[0] + (uint64_t)m * prime->val[0];
|
|
acc >>= BN_BITS_PER_LIMB;
|
|
|
|
for (int i = 1; i < BN_LIMBS; i++) {
|
|
acc = acc + x->val[i] + (uint64_t)m * prime->val[i];
|
|
// acc does not overflow 64 bits
|
|
// acc == acc + x + m * prime
|
|
// <= 2**(64 - BITS_PER_LIMB) + 2**(BITS_PER_LIMB)
|
|
// 2**(BITS_PER_LIMB) * 2**(BITS_PER_LIMB)
|
|
// <= 2**(2 * BITS_PER_LIMB) + 2**(64 - BITS_PER_LIMB) +
|
|
// 2**(BITS_PER_LIMB)
|
|
// <= 2**58 + 2**35 + 2**29 < 2**64
|
|
|
|
x->val[i - 1] = acc & BN_LIMB_MASK;
|
|
acc >>= BN_BITS_PER_LIMB;
|
|
// acc < 2**35 == 2**(64 - BITS_PER_LIMB)
|
|
|
|
// acc == x[:i + 1] + m * prime[:i + 1] >> BITS_PER_LIMB * (i + 1)
|
|
}
|
|
|
|
x->val[BN_LIMBS - 1] = acc;
|
|
|
|
assert(acc >> BN_BITS_PER_LIMB == 0);
|
|
|
|
// clang-format off
|
|
// acc >> BITS_PER_LIMB == 0
|
|
// Proof:
|
|
// acc >> BITS_PER_LIMB
|
|
// == (x[:LIMB] + m * prime[:LIMB] >> BITS_PER_LIMB * LIMBS) >> BITS_PER_LIMB * (LIMBS + 1)
|
|
// == x + m * prime >> BITS_PER_LIMB * (LIMBS + 1)
|
|
// <= (2**(BITS_PER_LIMB * LIMBS) - 1) + (2**BITS_PER_LIMB - 1) * (2**(BITS_PER_LIMB * LIMBS) - 1) >> BITS_PER_LIMB * (LIMBS + 1)
|
|
// == 2**(BITS_PER_LIMB * LIMBS) - 1 + 2**(BITS_PER_LIMB * (LIMBS + 1)) - 2**(BITS_PER_LIMB * LIMBS) - 2**BITS_PER_LIMB + 1 >> BITS_PER_LIMB * (LIMBS + 1)
|
|
// == 2**(BITS_PER_LIMB * (LIMBS + 1)) - 2**BITS_PER_LIMB >> BITS_PER_LIMB * (LIMBS + 1)
|
|
// == 0
|
|
// clang-format on
|
|
}
|
|
|
|
#if !USE_INVERSE_FAST
|
|
// x = 1/x % prime if x != 0 else 0
|
|
// Assumes x is normalized
|
|
// Assumes prime is a prime number
|
|
// Guarantees x is normalized and fully reduced modulo prime
|
|
// Assumes prime is normalized, 2**256 - 2**224 <= prime <= 2**256
|
|
// The function doesn't have neither constant control flow nor constant memory
|
|
// access flow with regard to prime
|
|
static void bn_inverse_slow(bignum256 *x, const bignum256 *prime) {
|
|
// Uses formula 1/x % prime == x**(prime - 2) % prime
|
|
// See https://en.wikipedia.org/wiki/Fermat%27s_little_theorem
|
|
|
|
bn_fast_mod(x, prime);
|
|
|
|
// e = prime - 2
|
|
bignum256 e = {0};
|
|
bn_read_uint32(2, &e);
|
|
bn_subtract(prime, &e, &e);
|
|
|
|
bn_power_mod(x, &e, prime, x);
|
|
bn_mod(x, prime);
|
|
|
|
memzero(&e, sizeof(e));
|
|
}
|
|
#endif
|
|
|
|
#if false
|
|
// x = 1/x % prime if x != 0 else 0
|
|
// Assumes x is is_normalized
|
|
// Assumes GCD(x, prime) = 1
|
|
// Guarantees x is normalized and fully reduced modulo prime
|
|
// Assumes prime is odd, normalized, 2**256 - 2**224 <= prime <= 2**256
|
|
// The function doesn't have neither constant control flow nor constant memory
|
|
// access flow with regard to prime and x
|
|
static void bn_inverse_fast(bignum256 *x, const bignum256 *prime) {
|
|
// "The Almost Montgomery Inverse" from the section 3 of "Constant Time
|
|
// Modular Inversion" by Joppe W. Bos
|
|
// See http://www.joppebos.com/files/CTInversion.pdf
|
|
|
|
/*
|
|
u = prime
|
|
v = x & prime
|
|
s = 1
|
|
r = 0
|
|
|
|
k = 0
|
|
while v != 1:
|
|
k += 1
|
|
if is_even(u):
|
|
u = u // 2
|
|
s = 2 * s
|
|
elif is_even(v):
|
|
v = v // 2
|
|
r = 2 * r
|
|
elif v < u:
|
|
u = (u - v) // 2
|
|
r = r + s
|
|
s = 2 * s
|
|
else:
|
|
v = (v - u) // 2
|
|
s = r + s
|
|
r = 2 * r
|
|
|
|
s = (s / 2**k) % prime
|
|
return s
|
|
*/
|
|
|
|
if (bn_is_zero(x)) return;
|
|
|
|
bn_fast_mod(x, prime);
|
|
bn_mod(x, prime);
|
|
|
|
bignum256 u = {0}, v = {0}, r = {0}, s = {0};
|
|
bn_copy(prime, &u);
|
|
bn_copy(x, &v);
|
|
bn_one(&s);
|
|
bn_zero(&r);
|
|
|
|
int k = 0;
|
|
while (!bn_is_one(&v)) {
|
|
if ((u.val[0] & 1) == 0) {
|
|
bn_rshift(&u);
|
|
bn_lshift(&s);
|
|
} else if ((v.val[0] & 1) == 0) {
|
|
bn_rshift(&v);
|
|
bn_lshift(&r);
|
|
} else if (bn_is_less(&v, &u)) {
|
|
bn_subtract(&u, &v, &u);
|
|
bn_rshift(&u);
|
|
bn_add(&r, &s);
|
|
bn_lshift(&s);
|
|
} else {
|
|
bn_subtract(&v, &u, &v);
|
|
bn_rshift(&v);
|
|
bn_add(&s, &r);
|
|
bn_lshift(&r);
|
|
}
|
|
k += 1;
|
|
assert(!bn_is_zero(&v)); // assert GCD(x, prime) == 1
|
|
}
|
|
|
|
// s = s / 2**(k // BITS_PER_LIMB * BITS_PER_LIMB)
|
|
for (int i = 0; i < k / BITS_PER_LIMB; i++) {
|
|
bn_divide_base(&s, prime);
|
|
}
|
|
|
|
// s = s / 2**(k % BITS_PER_LIMB)
|
|
for (int i = 0; i < k % BN_BITS_PER_LIMB; i++) {
|
|
bn_mult_half(&s, prime);
|
|
}
|
|
|
|
bn_copy(&s, x);
|
|
|
|
memzero(&u, sizeof(u));
|
|
memzero(&v, sizeof(v));
|
|
memzero(&r, sizeof(r));
|
|
memzero(&s, sizeof(s));
|
|
}
|
|
#endif
|
|
|
|
#if USE_INVERSE_FAST
|
|
// x = 1/x % prime if x != 0 else 0
|
|
// Assumes x is is_normalized
|
|
// Assumes GCD(x, prime) = 1
|
|
// Guarantees x is normalized and fully reduced modulo prime
|
|
// Assumes prime is odd, normalized, 2**256 - 2**224 <= prime <= 2**256
|
|
// The function has constant control flow but not constant memory access flow
|
|
// with regard to prime and x
|
|
static void bn_inverse_fast(bignum256 *x, const bignum256 *prime) {
|
|
// Custom constant time version of "The Almost Montgomery Inverse" from the
|
|
// section 3 of "Constant Time Modular Inversion" by Joppe W. Bos
|
|
// See http://www.joppebos.com/files/CTInversion.pdf
|
|
|
|
/*
|
|
u = prime
|
|
v = x % prime
|
|
s = 1
|
|
r = 0
|
|
|
|
k = 0
|
|
while v != 1:
|
|
k += 1
|
|
if is_even(u): # b1
|
|
u = u // 2
|
|
s = 2 * s
|
|
elif is_even(v): # b2
|
|
v = v // 2
|
|
r = 2 * r
|
|
elif v < u: # b3
|
|
u = (u - v) // 2
|
|
r = r + s
|
|
s = 2 * s
|
|
else: # b4
|
|
v = (v - u) // 2
|
|
s = r + s
|
|
r = 2 * r
|
|
|
|
s = (s / 2**k) % prime
|
|
return s
|
|
*/
|
|
|
|
bn_fast_mod(x, prime);
|
|
bn_mod(x, prime);
|
|
|
|
bignum256 u = {0}, v = {0}, r = {0}, s = {0};
|
|
bn_copy(prime, &u);
|
|
bn_copy(x, &v);
|
|
bn_one(&s);
|
|
bn_zero(&r);
|
|
|
|
bignum256 zero = {0};
|
|
bn_zero(&zero);
|
|
|
|
int k = 0;
|
|
|
|
int finished = 0, u_even = 0, v_even = 0, v_less_u = 0, b1 = 0, b2 = 0,
|
|
b3 = 0, b4 = 0;
|
|
finished = 0;
|
|
|
|
for (int i = 0; i < 2 * BN_LIMBS * BN_BITS_PER_LIMB; i++) {
|
|
finished = finished | -bn_is_one(&v);
|
|
u_even = -bn_is_even(&u);
|
|
v_even = -bn_is_even(&v);
|
|
v_less_u = -bn_is_less(&v, &u);
|
|
|
|
b1 = ~finished & u_even;
|
|
b2 = ~finished & ~b1 & v_even;
|
|
b3 = ~finished & ~b1 & ~b2 & v_less_u;
|
|
b4 = ~finished & ~b1 & ~b2 & ~b3;
|
|
|
|
// The ternary operator for pointers with constant control flow
|
|
// BN_INVERSE_FAST_TERNARY(c, t, f) = t if c else f
|
|
// Very nasty hack, sorry for that
|
|
#define BN_INVERSE_FAST_TERNARY(c, t, f) \
|
|
((void *)(((c) & (uintptr_t)(t)) | (~(c) & (uintptr_t)(f))))
|
|
|
|
bn_subtract(BN_INVERSE_FAST_TERNARY(b3, &u, &v),
|
|
BN_INVERSE_FAST_TERNARY(
|
|
b3 | b4, BN_INVERSE_FAST_TERNARY(b3, &v, &u), &zero),
|
|
BN_INVERSE_FAST_TERNARY(b3, &u, &v));
|
|
|
|
bn_add(BN_INVERSE_FAST_TERNARY(b3, &r, &s),
|
|
BN_INVERSE_FAST_TERNARY(b3 | b4, BN_INVERSE_FAST_TERNARY(b3, &s, &r),
|
|
&zero));
|
|
bn_rshift(BN_INVERSE_FAST_TERNARY(b1 | b3, &u, &v));
|
|
bn_lshift(BN_INVERSE_FAST_TERNARY(b1 | b3, &s, &r));
|
|
|
|
k = k - ~finished;
|
|
}
|
|
|
|
// s = s / 2**(k // BITS_PER_LIMB * BITS_PER_LIMB)
|
|
for (int i = 0; i < 2 * BN_LIMBS; i++) {
|
|
// s = s / 2**BITS_PER_LIMB % prime if i < k // BITS_PER_LIMB else s
|
|
bn_copy(&s, &r);
|
|
bn_divide_base(&r, prime);
|
|
bn_cmov(&s, i < k / BN_BITS_PER_LIMB, &r, &s);
|
|
}
|
|
|
|
// s = s / 2**(k % BITS_PER_LIMB)
|
|
for (int i = 0; i < BN_BITS_PER_LIMB; i++) {
|
|
// s = s / 2 % prime if i < k % BITS_PER_LIMB else s
|
|
bn_copy(&s, &r);
|
|
bn_mult_half(&r, prime);
|
|
bn_cmov(&s, i < k % BN_BITS_PER_LIMB, &r, &s);
|
|
}
|
|
|
|
bn_cmov(x, bn_is_zero(x), x, &s);
|
|
|
|
memzero(&u, sizeof(u));
|
|
memzero(&v, sizeof(v));
|
|
memzero(&r, sizeof(s));
|
|
memzero(&s, sizeof(s));
|
|
}
|
|
#endif
|
|
|
|
#if false
|
|
// x = 1/x % prime if x != 0 else 0
|
|
// Assumes x is is_normalized
|
|
// Assumes GCD(x, prime) = 1
|
|
// Guarantees x is normalized and fully reduced modulo prime
|
|
// Assumes prime is odd, normalized, 2**256 - 2**224 <= prime <= 2**256
|
|
static void bn_inverse_fast(bignum256 *x, const bignum256 *prime) {
|
|
// Custom constant time version of "The Almost Montgomery Inverse" from the
|
|
// section 3 of "Constant Time Modular Inversion" by Joppe W. Bos
|
|
// See http://www.joppebos.com/files/CTInversion.pdf
|
|
|
|
/*
|
|
u = prime
|
|
v = x % prime
|
|
s = 1
|
|
r = 0
|
|
|
|
k = 0
|
|
while v != 1:
|
|
k += 1
|
|
if is_even(u): # b1
|
|
u = u // 2
|
|
s = 2 * s
|
|
elif is_even(v): # b2
|
|
v = v // 2
|
|
r = 2 * r
|
|
elif v < u: # b3
|
|
u = (u - v) // 2
|
|
r = r + s
|
|
s = 2 * s
|
|
else: # b4
|
|
v = (v - u) // 2
|
|
s = r + s
|
|
r = 2 * r
|
|
|
|
s = (s / 2**k) % prime
|
|
return s
|
|
*/
|
|
|
|
bn_fast_mod(x, prime);
|
|
bn_mod(x, prime);
|
|
|
|
bignum256 u = {0}, v = {0}, r = {0}, s = {0};
|
|
bn_copy(prime, &u);
|
|
bn_copy(x, &v);
|
|
bn_one(&s);
|
|
bn_zero(&r);
|
|
|
|
bignum256 zero = {0};
|
|
bn_zero(&zero);
|
|
|
|
int k = 0;
|
|
|
|
uint32_t finished = 0, u_even = 0, v_even = 0, v_less_u = 0, b1 = 0, b2 = 0,
|
|
b3 = 0, b4 = 0;
|
|
finished = 0;
|
|
|
|
bignum256 u_half = {0}, v_half = {0}, u_minus_v_half = {0}, v_minus_u_half = {0}, r_plus_s = {0}, r_twice = {0}, s_twice = {0};
|
|
for (int i = 0; i < 2 * BN_LIMBS * BN_BITS_PER_LIMB; i++) {
|
|
finished = finished | bn_is_one(&v);
|
|
u_even = bn_is_even(&u);
|
|
v_even = bn_is_even(&v);
|
|
v_less_u = bn_is_less(&v, &u);
|
|
|
|
b1 = (finished ^ 1) & u_even;
|
|
b2 = (finished ^ 1) & (b1 ^ 1) & v_even;
|
|
b3 = (finished ^ 1) & (b1 ^ 1) & (b2 ^ 1) & v_less_u;
|
|
b4 = (finished ^ 1) & (b1 ^ 1) & (b2 ^ 1) & (b3 ^ 1);
|
|
|
|
// u_half = u // 2
|
|
bn_copy(&u, &u_half);
|
|
bn_rshift(&u_half);
|
|
|
|
// v_half = v // 2
|
|
bn_copy(&v, &v_half);
|
|
bn_rshift(&v_half);
|
|
|
|
// u_minus_v_half = (u - v) // 2
|
|
bn_subtract(&u, &v, &u_minus_v_half);
|
|
bn_rshift(&u_minus_v_half);
|
|
|
|
// v_minus_u_half = (v - u) // 2
|
|
bn_subtract(&v, &u, &v_minus_u_half);
|
|
bn_rshift(&v_minus_u_half);
|
|
|
|
// r_plus_s = r + s
|
|
bn_copy(&r, &r_plus_s);
|
|
bn_add(&r_plus_s, &s);
|
|
|
|
// r_twice = 2 * r
|
|
bn_copy(&r, &r_twice);
|
|
bn_lshift(&r_twice);
|
|
|
|
// s_twice = 2 * s
|
|
bn_copy(&s, &s_twice);
|
|
bn_lshift(&s_twice);
|
|
|
|
bn_cmov(&u, b1, &u_half, &u);
|
|
bn_cmov(&u, b3, &u_minus_v_half, &u);
|
|
|
|
bn_cmov(&v, b2, &v_half, &v);
|
|
bn_cmov(&v, b4, &v_minus_u_half, &v);
|
|
|
|
bn_cmov(&r, b2 | b4, &r_twice, &r);
|
|
bn_cmov(&r, b3, &r_plus_s, &r);
|
|
|
|
bn_cmov(&s, b1 | b3, &s_twice, &s);
|
|
bn_cmov(&s, b4, &r_plus_s, &s);
|
|
|
|
k = k + (finished ^ 1);
|
|
}
|
|
|
|
// s = s / 2**(k // BITS_PER_LIMB * BITS_PER_LIMB)
|
|
for (int i = 0; i < 2 * BN_LIMBS; i++) {
|
|
// s = s / 2**BITS_PER_LIMB % prime if i < k // BITS_PER_LIMB else s
|
|
bn_copy(&s, &r);
|
|
bn_divide_base(&r, prime);
|
|
bn_cmov(&s, i < k / BITS_PER_LIMB, &r, &s);
|
|
}
|
|
|
|
// s = s / 2**(k % BITS_PER_LIMB)
|
|
for (int i = 0; i < BN_BITS_PER_LIMB; i++) {
|
|
// s = s / 2 % prime if i < k % BITS_PER_LIMB else s
|
|
bn_copy(&s, &r);
|
|
bn_mult_half(&r, prime);
|
|
bn_cmov(&s, i < k % BN_BITS_PER_LIMB, &r, &s);
|
|
}
|
|
|
|
bn_cmov(x, bn_is_zero(x), x, &s);
|
|
|
|
memzero(&u, sizeof(u));
|
|
memzero(&v, sizeof(v));
|
|
memzero(&r, sizeof(r));
|
|
memzero(&s, sizeof(s));
|
|
memzero(&u_half, sizeof(u_half));
|
|
memzero(&v_half, sizeof(v_half));
|
|
memzero(&u_minus_v_half, sizeof(u_minus_v_half));
|
|
memzero(&v_minus_u_half, sizeof(v_minus_u_half));
|
|
memzero(&r_twice, sizeof(r_twice));
|
|
memzero(&s_twice, sizeof(s_twice));
|
|
memzero(&r_plus_s, sizeof(r_plus_s));
|
|
}
|
|
#endif
|
|
|
|
// Normalizes x
|
|
// Assumes x < 2**261 == 2**(LIMBS * BITS_PER_LIMB)
|
|
// Guarantees x is normalized
|
|
void bn_normalize(bignum256 *x) {
|
|
uint32_t acc = 0;
|
|
|
|
for (int i = 0; i < BN_LIMBS; i++) {
|
|
acc += x->val[i];
|
|
// acc doesn't overflow 32 bits
|
|
// Proof:
|
|
// acc + x[i]
|
|
// <= (2**(32 - BITS_PER_LIMB) - 1) + (2**BITS_PER_LIMB - 1)
|
|
// == 7 + 2**29 - 1 < 2**32
|
|
|
|
x->val[i] = acc & BN_LIMB_MASK;
|
|
acc >>= (BN_BITS_PER_LIMB);
|
|
// acc <= 7 == 2**(32 - BITS_PER_LIMB) - 1
|
|
}
|
|
}
|
|
|
|
// x = x + y
|
|
// Assumes x, y are normalized, x + y < 2**(LIMBS*BITS_PER_LIMB) == 2**261
|
|
// Guarantees x is normalized
|
|
// Works properly even if &x == &y
|
|
void bn_add(bignum256 *x, const bignum256 *y) {
|
|
uint32_t acc = 0;
|
|
for (int i = 0; i < BN_LIMBS; i++) {
|
|
acc += x->val[i] + y->val[i];
|
|
// acc doesn't overflow 32 bits
|
|
// Proof:
|
|
// acc + x[i] + y[i]
|
|
// <= (2**(32 - BITS_PER_LIMB) - 1) + 2 * (2**BITS_PER_LIMB - 1)
|
|
// == (2**(32 - BITS_PER_LIMB) - 1) + 2**(BITS_PER_LIMB + 1) - 2
|
|
// == 7 + 2**30 - 2 < 2**32
|
|
|
|
x->val[i] = acc & BN_LIMB_MASK;
|
|
acc >>= BN_BITS_PER_LIMB;
|
|
// acc <= 7 == 2**(32 - BITS_PER_LIMB) - 1
|
|
|
|
// acc == x[:i + 1] + y[:i + 1] >> BITS_PER_LIMB * (i + 1)
|
|
}
|
|
|
|
// assert(acc == 0); // assert x + y < 2**261
|
|
// acc == 0
|
|
// Proof:
|
|
// acc == x[:LIMBS] + y[:LIMBS] >> LIMBS * BITS_PER_LIMB
|
|
// == x + y >> LIMBS * BITS_PER_LIMB
|
|
// <= 2**(LIMBS * BITS_PER_LIMB) - 1 >> LIMBS * BITS_PER_LIMB == 0
|
|
}
|
|
|
|
// x = x + y % prime
|
|
// Assumes x, y are normalized
|
|
// Guarantees x is normalized and partly reduced modulo prime
|
|
// Assumes prime is normalized and 2^256 - 2^224 <= prime <= 2^256
|
|
void bn_addmod(bignum256 *x, const bignum256 *y, const bignum256 *prime) {
|
|
for (int i = 0; i < BN_LIMBS; i++) {
|
|
x->val[i] += y->val[i];
|
|
// x[i] doesn't overflow 32 bits
|
|
// Proof:
|
|
// x[i] + y[i]
|
|
// <= 2 * (2**BITS_PER_LIMB - 1)
|
|
// == 2**30 - 2 < 2**32
|
|
}
|
|
|
|
bn_fast_mod(x, prime);
|
|
}
|
|
|
|
// x = x + y
|
|
// Assumes x is normalized
|
|
// Assumes y <= 2**32 - 2**29 == 2**32 - 2**BITS_PER_LIMB and
|
|
// x + y < 2**261 == 2**(LIMBS * BITS_PER_LIMB)
|
|
// Guarantees x is normalized
|
|
void bn_addi(bignum256 *x, uint32_t y) {
|
|
// assert(y <= 3758096384); // assert y <= 2**32 - 2**29
|
|
uint32_t acc = y;
|
|
|
|
for (int i = 0; i < BN_LIMBS; i++) {
|
|
acc += x->val[i];
|
|
// acc doesn't overflow 32 bits
|
|
// Proof:
|
|
// if i == 0:
|
|
// acc + x[i] == y + x[0]
|
|
// <= (2**32 - 2**BITS_PER_LIMB) + (2**BITS_PER_LIMB - 1)
|
|
// == 2**32 - 1 < 2**32
|
|
// else:
|
|
// acc + x[i]
|
|
// <= (2**(32 - BITS_PER_LIMB) - 1) + (2**BITS_PER_LIMB - 1)
|
|
// == 7 + 2**29 - 1 < 2**32
|
|
|
|
x->val[i] = acc & BN_LIMB_MASK;
|
|
acc >>= (BN_BITS_PER_LIMB);
|
|
// acc <= 7 == 2**(32 - BITS_PER_LIMB) - 1
|
|
|
|
// acc == x[:i + 1] + y >> BITS_PER_LIMB * (i + 1)
|
|
}
|
|
|
|
// assert(acc == 0); // assert x + y < 2**261
|
|
// acc == 0
|
|
// Proof:
|
|
// acc == x[:LIMBS] + y << LIMBS * BITS_PER_LIMB
|
|
// == x + y << LIMBS * BITS_PER_LIMB
|
|
// <= 2**(LIMBS + BITS_PER_LIMB) - 1 << LIMBS * BITS_PER_LIMB
|
|
// == 0
|
|
}
|
|
|
|
// x = x - y % prime
|
|
// Explicitly x = x + prime - y
|
|
// Assumes x, y are normalized
|
|
// Assumes y < prime[0], x + prime - y < 2**261 == 2**(LIMBS * BITS_PER_LIMB)
|
|
// Guarantees x is normalized
|
|
// If x is fully reduced modulo prime,
|
|
// guarantess x will be partly reduced modulo prime
|
|
// Assumes prime is nonzero and normalized
|
|
void bn_subi(bignum256 *x, uint32_t y, const bignum256 *prime) {
|
|
assert(y < prime->val[0]);
|
|
|
|
// x = x + prime - y
|
|
|
|
uint32_t acc = -y;
|
|
for (int i = 0; i < BN_LIMBS; i++) {
|
|
acc += x->val[i] + prime->val[i];
|
|
// acc neither overflows 32 bits nor underflows 0
|
|
// Proof:
|
|
// acc + x[i] + prime[i]
|
|
// <= (2**(32 - BITS_PER_LIMB) - 1) + 2 * (2**BITS_PER_LIMB - 1)
|
|
// <= 7 + 2**30 - 2 < 2**32
|
|
// acc + x[i] + prime[i]
|
|
// >= -y + prime[0] >= 0
|
|
|
|
x->val[i] = acc & BN_LIMB_MASK;
|
|
acc >>= BN_BITS_PER_LIMB;
|
|
// acc <= 7 == 2**(32 - BITS_PER_LIMB) - 1
|
|
|
|
// acc == x[:i + 1] + prime[:i + 1] - y >> BITS_PER_LIMB * (i + 1)
|
|
}
|
|
|
|
// assert(acc == 0); // assert x + prime - y < 2**261
|
|
// acc == 0
|
|
// Proof:
|
|
// acc == x[:LIMBS] + prime[:LIMBS] - y >> BITS_PER_LIMB * LIMBS
|
|
// == x + prime - y >> BITS_PER_LIMB * LIMBS
|
|
// <= 2**(LIMBS * BITS_PER_LIMB) - 1 >> BITS_PER_LIMB * LIMBS == 0
|
|
}
|
|
|
|
// res = x - y % prime
|
|
// Explicitly res = x + (2 * prime - y)
|
|
// Assumes x, y are normalized, y is partly reduced
|
|
// Assumes x + 2 * prime - y < 2**261 == 2**(BITS_PER_LIMB * LIMBS)
|
|
// Guarantees res is normalized
|
|
// Assumes prime is nonzero and normalized
|
|
void bn_subtractmod(const bignum256 *x, const bignum256 *y, bignum256 *res,
|
|
const bignum256 *prime) {
|
|
// res = x + (2 * prime - y)
|
|
|
|
uint32_t acc = 1;
|
|
|
|
for (int i = 0; i < BN_LIMBS; i++) {
|
|
acc += (BN_BASE - 1) + x->val[i] + 2 * prime->val[i] - y->val[i];
|
|
// acc neither overflows 32 bits nor underflows 0
|
|
// Proof:
|
|
// acc + (BASE - 1) + x[i] + 2 * prime[i] - y[i]
|
|
// >= (BASE - 1) - y[i]
|
|
// == (2**BITS_PER_LIMB - 1) - (2**BITS_PER_LIMB - 1) == 0
|
|
// acc + (BASE - 1) + x[i] + 2 * prime[i] - y[i]
|
|
// <= acc + (BASE - 1) + x[i] + 2 * prime[i]
|
|
// <= (2**(32 - BITS_PER_LIMB) - 1) + (2**BITS_PER_LIMB - 1) +
|
|
// (2**BITS_PER_LIMB - 1) + 2 * (2**BITS_PER_LIMB - 1)
|
|
// <= (2**(32 - BITS_PER_LIMB) - 1) + 4 * (2**BITS_PER_LIMB - 1)
|
|
// == 7 + 4 * 2**29 - 4 == 2**31 + 3 < 2**32
|
|
|
|
res->val[i] = acc & (BN_BASE - 1);
|
|
acc >>= BN_BITS_PER_LIMB;
|
|
// acc <= 7 == 2**(32 - BITS_PER_LIMB) - 1
|
|
|
|
// acc == 2**(BITS_PER_LIMB * (i + 1)) + x[:i+1] - y[:i+1] + 2*prime[:i+1]
|
|
// >> BITS_PER_LIMB * (i+1)
|
|
}
|
|
|
|
// assert(acc == 1); // assert x + 2 * prime - y < 2**261
|
|
|
|
// clang-format off
|
|
// acc == 1
|
|
// Proof:
|
|
// acc == 2**(BITS_PER_LIMB * LIMBS) + x[:LIMBS] - y[:LIMBS] + 2 * prime[:LIMBS] >> BITS_PER_LIMB * LIMBS
|
|
// == 2**(BITS_PER_LIMB * LIMBS) + x - y + 2 * prime >> BITS_PER_LIMB * LIMBS
|
|
// == 2**(BITS_PER_LIMB * LIMBS) + x + (2 * prime - y) >> BITS_PER_LIMB * LIMBS
|
|
// <= 2**(BITS_PER_LIMB * LIMBS) + 2**(BITS_PER_LIMB * LIMBS) - 1 >> BITS_PER_LIMB * LIMBS
|
|
// <= 2 * 2**(BITS_PER_LIMB * LIMBS) - 1 >> BITS_PER_LIMB * LIMBS
|
|
// == 1
|
|
|
|
// acc == 2**(BITS_PER_LIMB * LIMBS) + x[:LIMBS] - y[:LIMBS] + 2 * prime[:LIMBS] >> BITS_PER_LIMB * LIMBS
|
|
// == 2**(BITS_PER_LIMB * LIMBS) + x - y + 2 * prime >> BITS_PER_LIMB * LIMBS
|
|
// == 2**(BITS_PER_LIMB * LIMBS) + x + (2 * prime - y) >> BITS_PER_LIMB * LIMBS
|
|
// >= 2**(BITS_PER_LIMB * LIMBS) + 0 + 1 >> BITS_PER_LIMB * LIMBS
|
|
// == 1
|
|
// clang-format on
|
|
}
|
|
|
|
// res = x - y
|
|
// Assumes x, y are normalized and x >= y
|
|
// Guarantees res is normalized
|
|
// Works properly even if &x == &y or &x == &res or &y == &res or
|
|
// &x == &y == &res
|
|
void bn_subtract(const bignum256 *x, const bignum256 *y, bignum256 *res) {
|
|
uint32_t acc = 1;
|
|
for (int i = 0; i < BN_LIMBS; i++) {
|
|
acc += (BN_BASE - 1) + x->val[i] - y->val[i];
|
|
// acc neither overflows 32 bits nor underflows 0
|
|
// Proof:
|
|
// acc + (BASE - 1) + x[i] - y[i]
|
|
// >= (BASE - 1) - y == (2**BITS_PER_LIMB - 1) - (2**BITS_PER_LIMB - 1)
|
|
// == 0
|
|
// acc + (BASE - 1) + x[i] - y[i]
|
|
// <= acc + (BASE - 1) + x[i]
|
|
// <= (2**(32 - BITS_PER_LIMB) - 1) + (2**BITS_PER_LIMB - 1) +
|
|
// (2**BITS_PER_LIMB - 1)
|
|
// == 7 + 2 * 2**29 < 2 **32
|
|
|
|
res->val[i] = acc & BN_LIMB_MASK;
|
|
acc >>= BN_BITS_PER_LIMB;
|
|
// acc <= 7 == 2**(32 - BITS_PER_LIMB) - 1
|
|
|
|
// acc == 2**(BITS_PER_LIMB * (i + 1)) + x[:i + 1] - y[:i + 1]
|
|
// >> BITS_PER_LIMB * (i + 1)
|
|
}
|
|
|
|
// assert(acc == 1); // assert x >= y
|
|
|
|
// clang-format off
|
|
// acc == 1
|
|
// Proof:
|
|
// acc == 2**(BITS_PER_LIMB * LIMBS) + x[:LIMBS] - y[:LIMBS] >> BITS_PER_LIMB * LIMBS
|
|
// == 2**(BITS_PER_LIMB * LIMBS) + x - y >> BITS_PER_LIMB * LIMBS
|
|
// == 2**(BITS_PER_LIMB * LIMBS) + x >> BITS_PER_LIMB * LIMBS
|
|
// <= 2**(BITS_PER_LIMB * LIMBS) + 2**(BITS_PER_LIMB * LIMBS) - 1 >> BITS_PER_LIMB * LIMBS
|
|
// <= 2 * 2**(BITS_PER_LIMB * LIMBS) - 1 >> BITS_PER_LIMB * LIMBS
|
|
// == 1
|
|
|
|
// acc == 2**(BITS_PER_LIMB * LIMBS) + x[:LIMBS] - y[:LIMBS] >> BITS_PER_LIMB * LIMBS
|
|
// == 2**(BITS_PER_LIMB * LIMBS) + x - y >> BITS_PER_LIMB * LIMBS
|
|
// >= 2**(BITS_PER_LIMB * LIMBS) >> BITS_PER_LIMB * LIMBS
|
|
// == 1
|
|
}
|
|
|
|
// q = x // d, r = x % d
|
|
// Assumes x is normalized, 1 <= d <= 61304
|
|
// Guarantees q is normalized
|
|
void bn_long_division(bignum256 *x, uint32_t d, bignum256 *q, uint32_t *r) {
|
|
assert(1 <= d && d < 61304);
|
|
|
|
uint32_t acc = 0;
|
|
|
|
*r = x->val[BN_LIMBS - 1] % d;
|
|
q->val[BN_LIMBS - 1] = x->val[BN_LIMBS - 1] / d;
|
|
|
|
for (int i = BN_LIMBS - 2; i >= 0; i--) {
|
|
acc = *r * (BN_BASE % d) + x->val[i];
|
|
// acc doesn't overflow 32 bits
|
|
// Proof:
|
|
// r * (BASE % d) + x[i]
|
|
// <= (d - 1) * (d - 1) + (2**BITS_PER_LIMB - 1)
|
|
// == d**2 - 2*d + 2**BITS_PER_LIMB
|
|
// == 61304**2 - 2 * 61304 + 2**29
|
|
// == 3758057808 + 2**29 < 2**32
|
|
|
|
q->val[i] = *r * (BN_BASE / d) + (acc / d);
|
|
// q[i] doesn't overflow 32 bits
|
|
// Proof:
|
|
// r * (BASE // d) + (acc // d)
|
|
// <= (d - 1) * (2**BITS_PER_LIMB / d) +
|
|
// ((d**2 - 2*d + 2**BITS_PER_LIMB) / d)
|
|
// <= (d - 1) * (2**BITS_PER_LIMB / d) + (d - 2 + 2**BITS_PER_LIMB / d)
|
|
// == (d - 1 + 1) * (2**BITS_PER_LIMB / d) + d - 2
|
|
// == 2**BITS_PER_LIMB + d - 2 <= 2**29 + 61304 < 2**32
|
|
|
|
// q[i] == (r * BASE + x[i]) // d
|
|
// Proof:
|
|
// q[i] == r * (BASE // d) + (acc // d)
|
|
// == r * (BASE // d) + (r * (BASE % d) + x[i]) // d
|
|
// == (r * d * (BASE // d) + r * (BASE % d) + x[i]) // d
|
|
// == (r * (d * (BASE // d) + (BASE % d)) + x[i]) // d
|
|
// == (r * BASE + x[i]) // d
|
|
|
|
// q[i] < 2**BITS_PER_LIMB
|
|
// Proof:
|
|
// q[i] == (r * BASE + x[i]) // d
|
|
// <= ((d - 1) * 2**BITS_PER_LIMB + (2**BITS_PER_LIMB - 1)) / d
|
|
// == (d * 2**BITS_PER_LIMB - 1) / d == 2**BITS_PER_LIMB - 1 / d
|
|
// < 2**BITS_PER_LIMB
|
|
|
|
*r = acc % d;
|
|
// r == (r * BASE + x[i]) % d
|
|
// Proof:
|
|
// r == acc % d == (r * (BASE % d) + x[i]) % d
|
|
// == (r * BASE + x[i]) % d
|
|
|
|
// x[:i] == q[:i] * d + r
|
|
}
|
|
}
|
|
|
|
// x = x // 58, r = x % 58
|
|
// Assumes x is normalized
|
|
// Guarantees x is normalized
|
|
void bn_divmod58(bignum256 *x, uint32_t *r) { bn_long_division(x, 58, x, r); }
|
|
|
|
// x = x // 1000, r = x % 1000
|
|
// Assumes x is normalized
|
|
// Guarantees x is normalized
|
|
void bn_divmod1000(bignum256 *x, uint32_t *r) {
|
|
bn_long_division(x, 1000, x, r);
|
|
}
|
|
|
|
// x = x // 10, r = x % 10
|
|
// Assumes x is normalized
|
|
// Guarantees x is normalized
|
|
void bn_divmod10(bignum256 *x, uint32_t *r) { bn_long_division(x, 10, x, r); }
|
|
|
|
// Formats amount
|
|
// Assumes amount is normalized
|
|
// Assumes prefix and suffix are null-terminated strings
|
|
// Assumes output is an array of length output_length
|
|
// The function doesn't have neither constant control flow nor constant memory
|
|
// access flow with regard to any its argument
|
|
size_t bn_format(const bignum256 *amount, const char *prefix, const char *suffix, unsigned int decimals, int exponent, bool trailing, char *output, size_t output_length) {
|
|
|
|
/*
|
|
Python prototype of the function:
|
|
|
|
def format(amount, prefix, suffix, decimals, exponent, trailing):
|
|
if exponent >= 0:
|
|
amount *= 10 ** exponent
|
|
else:
|
|
amount //= 10 ** (-exponent)
|
|
|
|
d = pow(10, decimals)
|
|
|
|
if decimals:
|
|
output = "%d.%0*d" % (amount // d, decimals, amount % d)
|
|
if not trailing:
|
|
output = output.rstrip("0").rstrip(".")
|
|
else:
|
|
output = "%d" % (amount // d)
|
|
|
|
return prefix + output + suffix
|
|
*/
|
|
|
|
// Auxiliary macro for bn_format
|
|
// If enough space adds one character to output starting from the end
|
|
#define BN_FORMAT_ADD_OUTPUT_CHAR(c) \
|
|
{ \
|
|
--position; \
|
|
if (output <= position && position < output + output_length) { \
|
|
*position = (c); \
|
|
} else { \
|
|
memset(output, '\0', output_length); \
|
|
return 0; \
|
|
} \
|
|
}
|
|
|
|
bignum256 temp = {0};
|
|
bn_copy(amount, &temp);
|
|
uint32_t digit = 0;
|
|
|
|
char *position = output + output_length;
|
|
|
|
// Add string ending character
|
|
BN_FORMAT_ADD_OUTPUT_CHAR('\0');
|
|
|
|
// Add suffix
|
|
size_t suffix_length = suffix ? strlen(suffix) : 0;
|
|
for (int i = suffix_length - 1; i >= 0; --i)
|
|
BN_FORMAT_ADD_OUTPUT_CHAR(suffix[i])
|
|
|
|
// amount //= 10**exponent
|
|
for (; exponent < 0; ++exponent) {
|
|
// if temp == 0, there is no need to divide it by 10 anymore
|
|
if (bn_is_zero(&temp)) {
|
|
exponent = 0;
|
|
break;
|
|
}
|
|
bn_divmod10(&temp, &digit);
|
|
}
|
|
|
|
// exponent >= 0 && decimals >= 0
|
|
|
|
bool fractional_part = false; // is fractional-part of amount present
|
|
|
|
{ // Add fractional-part digits of amount
|
|
// Add trailing zeroes
|
|
unsigned int trailing_zeros = decimals < (unsigned int) exponent ? decimals : (unsigned int) exponent;
|
|
// When casting a negative int to unsigned int, UINT_MAX is added to the int before
|
|
// Since exponent >= 0, the value remains unchanged
|
|
decimals -= trailing_zeros;
|
|
exponent -= trailing_zeros;
|
|
|
|
if (trailing && trailing_zeros) {
|
|
fractional_part = true;
|
|
for (; trailing_zeros > 0; --trailing_zeros)
|
|
BN_FORMAT_ADD_OUTPUT_CHAR('0')
|
|
}
|
|
|
|
// exponent == 0 || decimals == 0
|
|
|
|
// Add significant digits and leading zeroes
|
|
for (; decimals > 0; --decimals) {
|
|
bn_divmod10(&temp, &digit);
|
|
|
|
if (fractional_part || digit || trailing) {
|
|
fractional_part = true;
|
|
BN_FORMAT_ADD_OUTPUT_CHAR('0' + digit)
|
|
}
|
|
else if (bn_is_zero(&temp)) {
|
|
// We break since the remaining digits are zeroes and fractional_part == trailing == false
|
|
decimals = 0;
|
|
break;
|
|
}
|
|
}
|
|
// decimals == 0
|
|
}
|
|
|
|
if (fractional_part) {
|
|
BN_FORMAT_ADD_OUTPUT_CHAR('.')
|
|
}
|
|
|
|
{ // Add integer-part digits of amount
|
|
// Add trailing zeroes
|
|
if (!bn_is_zero(&temp)) {
|
|
for (; exponent > 0; --exponent) {
|
|
BN_FORMAT_ADD_OUTPUT_CHAR('0')
|
|
}
|
|
}
|
|
// decimals == 0 && exponent == 0
|
|
|
|
// Add significant digits
|
|
do {
|
|
bn_divmod10(&temp, &digit);
|
|
BN_FORMAT_ADD_OUTPUT_CHAR('0' + digit)
|
|
} while (!bn_is_zero(&temp));
|
|
}
|
|
|
|
// Add prefix
|
|
size_t prefix_length = prefix ? strlen(prefix) : 0;
|
|
for (int i = prefix_length - 1; i >= 0; --i)
|
|
BN_FORMAT_ADD_OUTPUT_CHAR(prefix[i])
|
|
|
|
// Move formatted amount to the start of output
|
|
int length = output - position + output_length;
|
|
memmove(output, position, length);
|
|
return length - 1;
|
|
}
|
|
|
|
#if USE_BN_PRINT
|
|
// Prints x in hexadecimal
|
|
// Assumes x is normalized and x < 2**256
|
|
void bn_print(const bignum256 *x) {
|
|
printf("%06x", x->val[8]);
|
|
printf("%08x", ((x->val[7] << 3) | (x->val[6] >> 26)));
|
|
printf("%07x", ((x->val[6] << 2) | (x->val[5] >> 27)) & 0x0FFFFFFF);
|
|
printf("%07x", ((x->val[5] << 1) | (x->val[4] >> 28)) & 0x0FFFFFFF);
|
|
printf("%07x", x->val[4] & 0x0FFFFFFF);
|
|
printf("%08x", ((x->val[3] << 3) | (x->val[2] >> 26)));
|
|
printf("%07x", ((x->val[2] << 2) | (x->val[1] >> 27)) & 0x0FFFFFFF);
|
|
printf("%07x", ((x->val[1] << 1) | (x->val[0] >> 28)) & 0x0FFFFFFF);
|
|
printf("%07x", x->val[0] & 0x0FFFFFFF);
|
|
}
|
|
|
|
// Prints comma separated list of limbs of x
|
|
void bn_print_raw(const bignum256 *x) {
|
|
for (int i = 0; i < BN_LIMBS - 1; i++) {
|
|
printf("0x%08x, ", x->val[i]);
|
|
}
|
|
printf("0x%08x", x->val[BN_LIMBS - 1]);
|
|
}
|
|
#endif
|
|
|
|
#if USE_INVERSE_FAST
|
|
void bn_inverse(bignum256 *x, const bignum256 *prime) {
|
|
bn_inverse_fast(x, prime);
|
|
}
|
|
#else
|
|
void bn_inverse(bignum256 *x, const bignum256 *prime) {
|
|
bn_inverse_slow(x, prime);
|
|
}
|
|
#endif
|