mirror of
https://github.com/trezor/trezor-firmware.git
synced 2024-11-24 16:38:15 +00:00
336 lines
9.7 KiB
C
336 lines
9.7 KiB
C
/*
|
|
* Implementation of the hazardous parts of the SSS library
|
|
*
|
|
* Copyright (c) 2017 Daan Sprenkels <hello@dsprenkels.com>
|
|
* Copyright (c) 2019 SatoshiLabs
|
|
*
|
|
* Permission is hereby granted, free of charge, to any person obtaining
|
|
* a copy of this software and associated documentation files (the "Software"),
|
|
* to deal in the Software without restriction, including without limitation
|
|
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
* and/or sell copies of the Software, and to permit persons to whom the
|
|
* Software is furnished to do so, subject to the following conditions:
|
|
*
|
|
* The above copyright notice and this permission notice shall be included
|
|
* in all copies or substantial portions of the Software.
|
|
*
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
|
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
|
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES
|
|
* OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
|
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
|
* OTHER DEALINGS IN THE SOFTWARE.
|
|
*
|
|
* This code contains the actual Shamir secret sharing functionality. The
|
|
* implementation of this code is based on the idea that the user likes to
|
|
* generate/combine 32 shares (in GF(2^8)) at the same time, because a 256 bit
|
|
* key will be exactly 32 bytes. Therefore we bitslice all the input and
|
|
* unbitslice the output right before returning.
|
|
*
|
|
* This bitslice approach optimizes natively on all architectures that are 32
|
|
* bit or more. Care is taken to use not too many registers, to ensure that no
|
|
* values have to be leaked to the stack.
|
|
*
|
|
* All functions in this module are implemented constant time and constant
|
|
* lookup operations, as all proper crypto code must be.
|
|
*/
|
|
|
|
#include "shamir.h"
|
|
#include <string.h>
|
|
#include "memzero.h"
|
|
|
|
static void bitslice(uint32_t r[8], const uint8_t *x, size_t len) {
|
|
size_t bit_idx = 0, arr_idx = 0;
|
|
uint32_t cur = 0;
|
|
|
|
memset(r, 0, sizeof(uint32_t[8]));
|
|
for (arr_idx = 0; arr_idx < len; arr_idx++) {
|
|
cur = (uint32_t)x[arr_idx];
|
|
for (bit_idx = 0; bit_idx < 8; bit_idx++) {
|
|
r[bit_idx] |= ((cur >> bit_idx) & 1) << arr_idx;
|
|
}
|
|
}
|
|
}
|
|
|
|
static void unbitslice(uint8_t *r, const uint32_t x[8], size_t len) {
|
|
size_t bit_idx = 0, arr_idx = 0;
|
|
uint32_t cur = 0;
|
|
|
|
memset(r, 0, sizeof(uint8_t) * len);
|
|
for (bit_idx = 0; bit_idx < 8; bit_idx++) {
|
|
cur = (uint32_t)x[bit_idx];
|
|
for (arr_idx = 0; arr_idx < len; arr_idx++) {
|
|
r[arr_idx] |= ((cur >> arr_idx) & 1) << bit_idx;
|
|
}
|
|
}
|
|
}
|
|
|
|
static void bitslice_setall(uint32_t r[8], const uint8_t x) {
|
|
size_t idx = 0;
|
|
for (idx = 0; idx < 8; idx++) {
|
|
r[idx] = -((x >> idx) & 1);
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Add (XOR) `r` with `x` and store the result in `r`.
|
|
*/
|
|
static void gf256_add(uint32_t r[8], const uint32_t x[8]) {
|
|
size_t idx = 0;
|
|
for (idx = 0; idx < 8; idx++) r[idx] ^= x[idx];
|
|
}
|
|
|
|
/*
|
|
* Safely multiply two bitsliced polynomials in GF(2^8) reduced by
|
|
* x^8 + x^4 + x^3 + x + 1. `r` and `a` may overlap, but overlapping of `r`
|
|
* and `b` will produce an incorrect result! If you need to square a polynomial
|
|
* use `gf256_square` instead.
|
|
*/
|
|
static void gf256_mul(uint32_t r[8], const uint32_t a[8], const uint32_t b[8]) {
|
|
/* This function implements Russian Peasant multiplication on two
|
|
* bitsliced polynomials.
|
|
*
|
|
* I personally think that these kinds of long lists of operations
|
|
* are often a bit ugly. A double for loop would be nicer and would
|
|
* take up a lot less lines of code.
|
|
* However, some compilers seem to fail in optimizing these kinds of
|
|
* loops. So we will just have to do this by hand.
|
|
*/
|
|
uint32_t a2[8] = {0};
|
|
memcpy(a2, a, sizeof(uint32_t[8]));
|
|
|
|
r[0] = a2[0] & b[0]; /* add (assignment, because r is 0) */
|
|
r[1] = a2[1] & b[0];
|
|
r[2] = a2[2] & b[0];
|
|
r[3] = a2[3] & b[0];
|
|
r[4] = a2[4] & b[0];
|
|
r[5] = a2[5] & b[0];
|
|
r[6] = a2[6] & b[0];
|
|
r[7] = a2[7] & b[0];
|
|
a2[0] ^= a2[7]; /* reduce */
|
|
a2[2] ^= a2[7];
|
|
a2[3] ^= a2[7];
|
|
|
|
r[0] ^= a2[7] & b[1]; /* add */
|
|
r[1] ^= a2[0] & b[1];
|
|
r[2] ^= a2[1] & b[1];
|
|
r[3] ^= a2[2] & b[1];
|
|
r[4] ^= a2[3] & b[1];
|
|
r[5] ^= a2[4] & b[1];
|
|
r[6] ^= a2[5] & b[1];
|
|
r[7] ^= a2[6] & b[1];
|
|
a2[7] ^= a2[6]; /* reduce */
|
|
a2[1] ^= a2[6];
|
|
a2[2] ^= a2[6];
|
|
|
|
r[0] ^= a2[6] & b[2]; /* add */
|
|
r[1] ^= a2[7] & b[2];
|
|
r[2] ^= a2[0] & b[2];
|
|
r[3] ^= a2[1] & b[2];
|
|
r[4] ^= a2[2] & b[2];
|
|
r[5] ^= a2[3] & b[2];
|
|
r[6] ^= a2[4] & b[2];
|
|
r[7] ^= a2[5] & b[2];
|
|
a2[6] ^= a2[5]; /* reduce */
|
|
a2[0] ^= a2[5];
|
|
a2[1] ^= a2[5];
|
|
|
|
r[0] ^= a2[5] & b[3]; /* add */
|
|
r[1] ^= a2[6] & b[3];
|
|
r[2] ^= a2[7] & b[3];
|
|
r[3] ^= a2[0] & b[3];
|
|
r[4] ^= a2[1] & b[3];
|
|
r[5] ^= a2[2] & b[3];
|
|
r[6] ^= a2[3] & b[3];
|
|
r[7] ^= a2[4] & b[3];
|
|
a2[5] ^= a2[4]; /* reduce */
|
|
a2[7] ^= a2[4];
|
|
a2[0] ^= a2[4];
|
|
|
|
r[0] ^= a2[4] & b[4]; /* add */
|
|
r[1] ^= a2[5] & b[4];
|
|
r[2] ^= a2[6] & b[4];
|
|
r[3] ^= a2[7] & b[4];
|
|
r[4] ^= a2[0] & b[4];
|
|
r[5] ^= a2[1] & b[4];
|
|
r[6] ^= a2[2] & b[4];
|
|
r[7] ^= a2[3] & b[4];
|
|
a2[4] ^= a2[3]; /* reduce */
|
|
a2[6] ^= a2[3];
|
|
a2[7] ^= a2[3];
|
|
|
|
r[0] ^= a2[3] & b[5]; /* add */
|
|
r[1] ^= a2[4] & b[5];
|
|
r[2] ^= a2[5] & b[5];
|
|
r[3] ^= a2[6] & b[5];
|
|
r[4] ^= a2[7] & b[5];
|
|
r[5] ^= a2[0] & b[5];
|
|
r[6] ^= a2[1] & b[5];
|
|
r[7] ^= a2[2] & b[5];
|
|
a2[3] ^= a2[2]; /* reduce */
|
|
a2[5] ^= a2[2];
|
|
a2[6] ^= a2[2];
|
|
|
|
r[0] ^= a2[2] & b[6]; /* add */
|
|
r[1] ^= a2[3] & b[6];
|
|
r[2] ^= a2[4] & b[6];
|
|
r[3] ^= a2[5] & b[6];
|
|
r[4] ^= a2[6] & b[6];
|
|
r[5] ^= a2[7] & b[6];
|
|
r[6] ^= a2[0] & b[6];
|
|
r[7] ^= a2[1] & b[6];
|
|
a2[2] ^= a2[1]; /* reduce */
|
|
a2[4] ^= a2[1];
|
|
a2[5] ^= a2[1];
|
|
|
|
r[0] ^= a2[1] & b[7]; /* add */
|
|
r[1] ^= a2[2] & b[7];
|
|
r[2] ^= a2[3] & b[7];
|
|
r[3] ^= a2[4] & b[7];
|
|
r[4] ^= a2[5] & b[7];
|
|
r[5] ^= a2[6] & b[7];
|
|
r[6] ^= a2[7] & b[7];
|
|
r[7] ^= a2[0] & b[7];
|
|
|
|
memzero(a2, sizeof(a2));
|
|
}
|
|
|
|
/*
|
|
* Square `x` in GF(2^8) and write the result to `r`. `r` and `x` may overlap.
|
|
*/
|
|
static void gf256_square(uint32_t r[8], const uint32_t x[8]) {
|
|
uint32_t r8 = 0, r10 = 0, r12 = 0, r14 = 0;
|
|
/* Use the Freshman's Dream rule to square the polynomial
|
|
* Assignments are done from 7 downto 0, because this allows the user
|
|
* to execute this function in-place (e.g. `gf256_square(r, r);`).
|
|
*/
|
|
r14 = x[7];
|
|
r12 = x[6];
|
|
r10 = x[5];
|
|
r8 = x[4];
|
|
r[6] = x[3];
|
|
r[4] = x[2];
|
|
r[2] = x[1];
|
|
r[0] = x[0];
|
|
|
|
/* Reduce with x^8 + x^4 + x^3 + x + 1 until order is less than 8 */
|
|
r[7] = r14; /* r[7] was 0 */
|
|
r[6] ^= r14;
|
|
r10 ^= r14;
|
|
/* Skip, because r13 is always 0 */
|
|
r[4] ^= r12;
|
|
r[5] = r12; /* r[5] was 0 */
|
|
r[7] ^= r12;
|
|
r8 ^= r12;
|
|
/* Skip, because r11 is always 0 */
|
|
r[2] ^= r10;
|
|
r[3] = r10; /* r[3] was 0 */
|
|
r[5] ^= r10;
|
|
r[6] ^= r10;
|
|
r[1] = r14; /* r[1] was 0 */
|
|
r[2] ^= r14; /* Substitute r9 by r14 because they will always be equal*/
|
|
r[4] ^= r14;
|
|
r[5] ^= r14;
|
|
r[0] ^= r8;
|
|
r[1] ^= r8;
|
|
r[3] ^= r8;
|
|
r[4] ^= r8;
|
|
}
|
|
|
|
/*
|
|
* Invert `x` in GF(2^8) and write the result to `r`
|
|
*/
|
|
static void gf256_inv(uint32_t r[8], uint32_t x[8]) {
|
|
uint32_t y[8] = {0}, z[8] = {0};
|
|
|
|
gf256_square(y, x); // y = x^2
|
|
gf256_square(y, y); // y = x^4
|
|
gf256_square(r, y); // r = x^8
|
|
gf256_mul(z, r, x); // z = x^9
|
|
gf256_square(r, r); // r = x^16
|
|
gf256_mul(r, r, z); // r = x^25
|
|
gf256_square(r, r); // r = x^50
|
|
gf256_square(z, r); // z = x^100
|
|
gf256_square(z, z); // z = x^200
|
|
gf256_mul(r, r, z); // r = x^250
|
|
gf256_mul(r, r, y); // r = x^254
|
|
|
|
memzero(y, sizeof(y));
|
|
memzero(z, sizeof(z));
|
|
}
|
|
|
|
bool shamir_interpolate(uint8_t *result, uint8_t result_index,
|
|
const uint8_t *share_indices,
|
|
const uint8_t **share_values, uint8_t share_count,
|
|
size_t len) {
|
|
size_t i = 0, j = 0;
|
|
uint32_t x[8] = {0};
|
|
uint32_t xs[share_count][8];
|
|
memset(xs, 0, sizeof(xs));
|
|
uint32_t ys[share_count][8];
|
|
memset(ys, 0, sizeof(ys));
|
|
uint32_t num[8] = {~0}; /* num is the numerator (=1) */
|
|
uint32_t denom[8] = {0};
|
|
uint32_t tmp[8] = {0};
|
|
uint32_t secret[8] = {0};
|
|
bool ret = true;
|
|
|
|
if (len > SHAMIR_MAX_LEN) return false;
|
|
|
|
/* Collect the x and y values */
|
|
for (i = 0; i < share_count; i++) {
|
|
bitslice_setall(xs[i], share_indices[i]);
|
|
bitslice(ys[i], share_values[i], len);
|
|
}
|
|
bitslice_setall(x, result_index);
|
|
|
|
for (i = 0; i < share_count; i++) {
|
|
memcpy(tmp, x, sizeof(uint32_t[8]));
|
|
gf256_add(tmp, xs[i]);
|
|
gf256_mul(num, num, tmp);
|
|
}
|
|
|
|
/* Use Lagrange basis polynomials to calculate the secret coefficient */
|
|
for (i = 0; i < share_count; i++) {
|
|
/* The code below assumes that none of the share_indices are equal to
|
|
* result_index. We need to treat that as a special case. */
|
|
if (share_indices[i] != result_index) {
|
|
memcpy(denom, x, sizeof(denom));
|
|
gf256_add(denom, xs[i]);
|
|
} else {
|
|
bitslice_setall(denom, 1);
|
|
gf256_add(secret, ys[i]);
|
|
}
|
|
for (j = 0; j < share_count; j++) {
|
|
if (i == j) continue;
|
|
memcpy(tmp, xs[i], sizeof(uint32_t[8]));
|
|
gf256_add(tmp, xs[j]);
|
|
gf256_mul(denom, denom, tmp);
|
|
}
|
|
if ((denom[0] | denom[1] | denom[2] | denom[3] | denom[4] | denom[5] |
|
|
denom[6] | denom[7]) == 0) {
|
|
/* The share_indices are not unique. */
|
|
ret = false;
|
|
break;
|
|
}
|
|
gf256_inv(tmp, denom); /* inverted denominator */
|
|
gf256_mul(tmp, tmp, num); /* basis polynomial */
|
|
gf256_mul(tmp, tmp, ys[i]); /* scaled coefficient */
|
|
gf256_add(secret, tmp);
|
|
}
|
|
|
|
if (ret == true) {
|
|
unbitslice(result, secret, len);
|
|
}
|
|
|
|
memzero(x, sizeof(x));
|
|
memzero(xs, sizeof(xs));
|
|
memzero(ys, sizeof(ys));
|
|
memzero(num, sizeof(num));
|
|
memzero(denom, sizeof(denom));
|
|
memzero(tmp, sizeof(tmp));
|
|
memzero(secret, sizeof(secret));
|
|
return ret;
|
|
}
|