1
0
mirror of https://github.com/trezor/trezor-firmware.git synced 2024-11-22 07:28:10 +00:00

simplify ed25519 code

This commit is contained in:
Pavol Rusnak 2017-03-28 18:32:58 +02:00
parent cb471ba2ec
commit 397a13f654
No known key found for this signature in database
GPG Key ID: 91F3B339B9A02A3D
19 changed files with 15 additions and 5072 deletions

View File

@ -26,10 +26,6 @@ CFLAGS += $(OPTFLAGS) \
# disable sequence point warning because of AES code
CFLAGS += -Wno-sequence-point
CFLAGS += -DED25519_CUSTOMRANDOM=1
CFLAGS += -DED25519_CUSTOMHASH=1
CFLAGS += -DED25519_NO_INLINE_ASM
CFLAGS += -DED25519_FORCE_32BIT=1
CFLAGS += -Ied25519-donna -Icurve25519-donna -I.
CFLAGS += -DUSE_ETHEREUM=1
CFLAGS += -DUSE_GRAPHENE=1

View File

@ -1,413 +0,0 @@
/*
Public domain by Adam Langley <agl@imperialviolet.org> &
Andrew M. <liquidsun@gmail.com>
See: https://github.com/floodyberry/curve25519-donna
64bit integer curve25519 implementation
*/
typedef uint64_t bignum25519[5];
static const uint64_t reduce_mask_40 = ((uint64_t)1 << 40) - 1;
static const uint64_t reduce_mask_51 = ((uint64_t)1 << 51) - 1;
static const uint64_t reduce_mask_56 = ((uint64_t)1 << 56) - 1;
/* out = in */
DONNA_INLINE static void
curve25519_copy(bignum25519 out, const bignum25519 in) {
out[0] = in[0];
out[1] = in[1];
out[2] = in[2];
out[3] = in[3];
out[4] = in[4];
}
/* out = a + b */
DONNA_INLINE static void
curve25519_add(bignum25519 out, const bignum25519 a, const bignum25519 b) {
out[0] = a[0] + b[0];
out[1] = a[1] + b[1];
out[2] = a[2] + b[2];
out[3] = a[3] + b[3];
out[4] = a[4] + b[4];
}
/* out = a + b, where a and/or b are the result of a basic op (add,sub) */
DONNA_INLINE static void
curve25519_add_after_basic(bignum25519 out, const bignum25519 a, const bignum25519 b) {
out[0] = a[0] + b[0];
out[1] = a[1] + b[1];
out[2] = a[2] + b[2];
out[3] = a[3] + b[3];
out[4] = a[4] + b[4];
}
DONNA_INLINE static void
curve25519_add_reduce(bignum25519 out, const bignum25519 a, const bignum25519 b) {
uint64_t c;
out[0] = a[0] + b[0] ; c = (out[0] >> 51); out[0] &= reduce_mask_51;
out[1] = a[1] + b[1] + c; c = (out[1] >> 51); out[1] &= reduce_mask_51;
out[2] = a[2] + b[2] + c; c = (out[2] >> 51); out[2] &= reduce_mask_51;
out[3] = a[3] + b[3] + c; c = (out[3] >> 51); out[3] &= reduce_mask_51;
out[4] = a[4] + b[4] + c; c = (out[4] >> 51); out[4] &= reduce_mask_51;
out[0] += c * 19;
}
/* multiples of p */
static const uint64_t twoP0 = 0x0fffffffffffda;
static const uint64_t twoP1234 = 0x0ffffffffffffe;
static const uint64_t fourP0 = 0x1fffffffffffb4;
static const uint64_t fourP1234 = 0x1ffffffffffffc;
/* out = a - b */
DONNA_INLINE static void
curve25519_sub(bignum25519 out, const bignum25519 a, const bignum25519 b) {
out[0] = a[0] + twoP0 - b[0];
out[1] = a[1] + twoP1234 - b[1];
out[2] = a[2] + twoP1234 - b[2];
out[3] = a[3] + twoP1234 - b[3];
out[4] = a[4] + twoP1234 - b[4];
}
/* out = a - b, where a and/or b are the result of a basic op (add,sub) */
DONNA_INLINE static void
curve25519_sub_after_basic(bignum25519 out, const bignum25519 a, const bignum25519 b) {
out[0] = a[0] + fourP0 - b[0];
out[1] = a[1] + fourP1234 - b[1];
out[2] = a[2] + fourP1234 - b[2];
out[3] = a[3] + fourP1234 - b[3];
out[4] = a[4] + fourP1234 - b[4];
}
DONNA_INLINE static void
curve25519_sub_reduce(bignum25519 out, const bignum25519 a, const bignum25519 b) {
uint64_t c;
out[0] = a[0] + fourP0 - b[0] ; c = (out[0] >> 51); out[0] &= reduce_mask_51;
out[1] = a[1] + fourP1234 - b[1] + c; c = (out[1] >> 51); out[1] &= reduce_mask_51;
out[2] = a[2] + fourP1234 - b[2] + c; c = (out[2] >> 51); out[2] &= reduce_mask_51;
out[3] = a[3] + fourP1234 - b[3] + c; c = (out[3] >> 51); out[3] &= reduce_mask_51;
out[4] = a[4] + fourP1234 - b[4] + c; c = (out[4] >> 51); out[4] &= reduce_mask_51;
out[0] += c * 19;
}
/* out = -a */
DONNA_INLINE static void
curve25519_neg(bignum25519 out, const bignum25519 a) {
uint64_t c;
out[0] = twoP0 - a[0] ; c = (out[0] >> 51); out[0] &= reduce_mask_51;
out[1] = twoP1234 - a[1] + c; c = (out[1] >> 51); out[1] &= reduce_mask_51;
out[2] = twoP1234 - a[2] + c; c = (out[2] >> 51); out[2] &= reduce_mask_51;
out[3] = twoP1234 - a[3] + c; c = (out[3] >> 51); out[3] &= reduce_mask_51;
out[4] = twoP1234 - a[4] + c; c = (out[4] >> 51); out[4] &= reduce_mask_51;
out[0] += c * 19;
}
/* out = a * b */
DONNA_INLINE static void
curve25519_mul(bignum25519 out, const bignum25519 in2, const bignum25519 in) {
#if !defined(HAVE_NATIVE_UINT128)
uint128_t mul;
#endif
uint128_t t[5];
uint64_t r0,r1,r2,r3,r4,s0,s1,s2,s3,s4,c;
r0 = in[0];
r1 = in[1];
r2 = in[2];
r3 = in[3];
r4 = in[4];
s0 = in2[0];
s1 = in2[1];
s2 = in2[2];
s3 = in2[3];
s4 = in2[4];
#if defined(HAVE_NATIVE_UINT128)
t[0] = ((uint128_t) r0) * s0;
t[1] = ((uint128_t) r0) * s1 + ((uint128_t) r1) * s0;
t[2] = ((uint128_t) r0) * s2 + ((uint128_t) r2) * s0 + ((uint128_t) r1) * s1;
t[3] = ((uint128_t) r0) * s3 + ((uint128_t) r3) * s0 + ((uint128_t) r1) * s2 + ((uint128_t) r2) * s1;
t[4] = ((uint128_t) r0) * s4 + ((uint128_t) r4) * s0 + ((uint128_t) r3) * s1 + ((uint128_t) r1) * s3 + ((uint128_t) r2) * s2;
#else
mul64x64_128(t[0], r0, s0)
mul64x64_128(t[1], r0, s1) mul64x64_128(mul, r1, s0) add128(t[1], mul)
mul64x64_128(t[2], r0, s2) mul64x64_128(mul, r2, s0) add128(t[2], mul) mul64x64_128(mul, r1, s1) add128(t[2], mul)
mul64x64_128(t[3], r0, s3) mul64x64_128(mul, r3, s0) add128(t[3], mul) mul64x64_128(mul, r1, s2) add128(t[3], mul) mul64x64_128(mul, r2, s1) add128(t[3], mul)
mul64x64_128(t[4], r0, s4) mul64x64_128(mul, r4, s0) add128(t[4], mul) mul64x64_128(mul, r3, s1) add128(t[4], mul) mul64x64_128(mul, r1, s3) add128(t[4], mul) mul64x64_128(mul, r2, s2) add128(t[4], mul)
#endif
r1 *= 19;
r2 *= 19;
r3 *= 19;
r4 *= 19;
#if defined(HAVE_NATIVE_UINT128)
t[0] += ((uint128_t) r4) * s1 + ((uint128_t) r1) * s4 + ((uint128_t) r2) * s3 + ((uint128_t) r3) * s2;
t[1] += ((uint128_t) r4) * s2 + ((uint128_t) r2) * s4 + ((uint128_t) r3) * s3;
t[2] += ((uint128_t) r4) * s3 + ((uint128_t) r3) * s4;
t[3] += ((uint128_t) r4) * s4;
#else
mul64x64_128(mul, r4, s1) add128(t[0], mul) mul64x64_128(mul, r1, s4) add128(t[0], mul) mul64x64_128(mul, r2, s3) add128(t[0], mul) mul64x64_128(mul, r3, s2) add128(t[0], mul)
mul64x64_128(mul, r4, s2) add128(t[1], mul) mul64x64_128(mul, r2, s4) add128(t[1], mul) mul64x64_128(mul, r3, s3) add128(t[1], mul)
mul64x64_128(mul, r4, s3) add128(t[2], mul) mul64x64_128(mul, r3, s4) add128(t[2], mul)
mul64x64_128(mul, r4, s4) add128(t[3], mul)
#endif
r0 = lo128(t[0]) & reduce_mask_51; shr128(c, t[0], 51);
add128_64(t[1], c) r1 = lo128(t[1]) & reduce_mask_51; shr128(c, t[1], 51);
add128_64(t[2], c) r2 = lo128(t[2]) & reduce_mask_51; shr128(c, t[2], 51);
add128_64(t[3], c) r3 = lo128(t[3]) & reduce_mask_51; shr128(c, t[3], 51);
add128_64(t[4], c) r4 = lo128(t[4]) & reduce_mask_51; shr128(c, t[4], 51);
r0 += c * 19; c = r0 >> 51; r0 = r0 & reduce_mask_51;
r1 += c;
out[0] = r0;
out[1] = r1;
out[2] = r2;
out[3] = r3;
out[4] = r4;
}
DONNA_NOINLINE static void
curve25519_mul_noinline(bignum25519 out, const bignum25519 in2, const bignum25519 in) {
curve25519_mul(out, in2, in);
}
/* out = in^(2 * count) */
DONNA_NOINLINE static void
curve25519_square_times(bignum25519 out, const bignum25519 in, uint64_t count) {
#if !defined(HAVE_NATIVE_UINT128)
uint128_t mul;
#endif
uint128_t t[5];
uint64_t r0,r1,r2,r3,r4,c;
uint64_t d0,d1,d2,d4,d419;
r0 = in[0];
r1 = in[1];
r2 = in[2];
r3 = in[3];
r4 = in[4];
do {
d0 = r0 * 2;
d1 = r1 * 2;
d2 = r2 * 2 * 19;
d419 = r4 * 19;
d4 = d419 * 2;
#if defined(HAVE_NATIVE_UINT128)
t[0] = ((uint128_t) r0) * r0 + ((uint128_t) d4) * r1 + (((uint128_t) d2) * (r3 ));
t[1] = ((uint128_t) d0) * r1 + ((uint128_t) d4) * r2 + (((uint128_t) r3) * (r3 * 19));
t[2] = ((uint128_t) d0) * r2 + ((uint128_t) r1) * r1 + (((uint128_t) d4) * (r3 ));
t[3] = ((uint128_t) d0) * r3 + ((uint128_t) d1) * r2 + (((uint128_t) r4) * (d419 ));
t[4] = ((uint128_t) d0) * r4 + ((uint128_t) d1) * r3 + (((uint128_t) r2) * (r2 ));
#else
mul64x64_128(t[0], r0, r0) mul64x64_128(mul, d4, r1) add128(t[0], mul) mul64x64_128(mul, d2, r3) add128(t[0], mul)
mul64x64_128(t[1], d0, r1) mul64x64_128(mul, d4, r2) add128(t[1], mul) mul64x64_128(mul, r3, r3 * 19) add128(t[1], mul)
mul64x64_128(t[2], d0, r2) mul64x64_128(mul, r1, r1) add128(t[2], mul) mul64x64_128(mul, d4, r3) add128(t[2], mul)
mul64x64_128(t[3], d0, r3) mul64x64_128(mul, d1, r2) add128(t[3], mul) mul64x64_128(mul, r4, d419) add128(t[3], mul)
mul64x64_128(t[4], d0, r4) mul64x64_128(mul, d1, r3) add128(t[4], mul) mul64x64_128(mul, r2, r2) add128(t[4], mul)
#endif
r0 = lo128(t[0]) & reduce_mask_51;
r1 = lo128(t[1]) & reduce_mask_51; shl128(c, t[0], 13); r1 += c;
r2 = lo128(t[2]) & reduce_mask_51; shl128(c, t[1], 13); r2 += c;
r3 = lo128(t[3]) & reduce_mask_51; shl128(c, t[2], 13); r3 += c;
r4 = lo128(t[4]) & reduce_mask_51; shl128(c, t[3], 13); r4 += c;
shl128(c, t[4], 13); r0 += c * 19;
c = r0 >> 51; r0 &= reduce_mask_51;
r1 += c ; c = r1 >> 51; r1 &= reduce_mask_51;
r2 += c ; c = r2 >> 51; r2 &= reduce_mask_51;
r3 += c ; c = r3 >> 51; r3 &= reduce_mask_51;
r4 += c ; c = r4 >> 51; r4 &= reduce_mask_51;
r0 += c * 19;
} while(--count);
out[0] = r0;
out[1] = r1;
out[2] = r2;
out[3] = r3;
out[4] = r4;
}
DONNA_INLINE static void
curve25519_square(bignum25519 out, const bignum25519 in) {
#if !defined(HAVE_NATIVE_UINT128)
uint128_t mul;
#endif
uint128_t t[5];
uint64_t r0,r1,r2,r3,r4,c;
uint64_t d0,d1,d2,d4,d419;
r0 = in[0];
r1 = in[1];
r2 = in[2];
r3 = in[3];
r4 = in[4];
d0 = r0 * 2;
d1 = r1 * 2;
d2 = r2 * 2 * 19;
d419 = r4 * 19;
d4 = d419 * 2;
#if defined(HAVE_NATIVE_UINT128)
t[0] = ((uint128_t) r0) * r0 + ((uint128_t) d4) * r1 + (((uint128_t) d2) * (r3 ));
t[1] = ((uint128_t) d0) * r1 + ((uint128_t) d4) * r2 + (((uint128_t) r3) * (r3 * 19));
t[2] = ((uint128_t) d0) * r2 + ((uint128_t) r1) * r1 + (((uint128_t) d4) * (r3 ));
t[3] = ((uint128_t) d0) * r3 + ((uint128_t) d1) * r2 + (((uint128_t) r4) * (d419 ));
t[4] = ((uint128_t) d0) * r4 + ((uint128_t) d1) * r3 + (((uint128_t) r2) * (r2 ));
#else
mul64x64_128(t[0], r0, r0) mul64x64_128(mul, d4, r1) add128(t[0], mul) mul64x64_128(mul, d2, r3) add128(t[0], mul)
mul64x64_128(t[1], d0, r1) mul64x64_128(mul, d4, r2) add128(t[1], mul) mul64x64_128(mul, r3, r3 * 19) add128(t[1], mul)
mul64x64_128(t[2], d0, r2) mul64x64_128(mul, r1, r1) add128(t[2], mul) mul64x64_128(mul, d4, r3) add128(t[2], mul)
mul64x64_128(t[3], d0, r3) mul64x64_128(mul, d1, r2) add128(t[3], mul) mul64x64_128(mul, r4, d419) add128(t[3], mul)
mul64x64_128(t[4], d0, r4) mul64x64_128(mul, d1, r3) add128(t[4], mul) mul64x64_128(mul, r2, r2) add128(t[4], mul)
#endif
r0 = lo128(t[0]) & reduce_mask_51; shr128(c, t[0], 51);
add128_64(t[1], c) r1 = lo128(t[1]) & reduce_mask_51; shr128(c, t[1], 51);
add128_64(t[2], c) r2 = lo128(t[2]) & reduce_mask_51; shr128(c, t[2], 51);
add128_64(t[3], c) r3 = lo128(t[3]) & reduce_mask_51; shr128(c, t[3], 51);
add128_64(t[4], c) r4 = lo128(t[4]) & reduce_mask_51; shr128(c, t[4], 51);
r0 += c * 19; c = r0 >> 51; r0 = r0 & reduce_mask_51;
r1 += c;
out[0] = r0;
out[1] = r1;
out[2] = r2;
out[3] = r3;
out[4] = r4;
}
/* Take a little-endian, 32-byte number and expand it into polynomial form */
DONNA_INLINE static void
curve25519_expand(bignum25519 out, const unsigned char *in) {
static const union { uint8_t b[2]; uint16_t s; } endian_check = {{1,0}};
uint64_t x0,x1,x2,x3;
if (endian_check.s == 1) {
x0 = *(uint64_t *)(in + 0);
x1 = *(uint64_t *)(in + 8);
x2 = *(uint64_t *)(in + 16);
x3 = *(uint64_t *)(in + 24);
} else {
#define F(s) \
((((uint64_t)in[s + 0]) ) | \
(((uint64_t)in[s + 1]) << 8) | \
(((uint64_t)in[s + 2]) << 16) | \
(((uint64_t)in[s + 3]) << 24) | \
(((uint64_t)in[s + 4]) << 32) | \
(((uint64_t)in[s + 5]) << 40) | \
(((uint64_t)in[s + 6]) << 48) | \
(((uint64_t)in[s + 7]) << 56))
x0 = F(0);
x1 = F(8);
x2 = F(16);
x3 = F(24);
}
out[0] = x0 & reduce_mask_51; x0 = (x0 >> 51) | (x1 << 13);
out[1] = x0 & reduce_mask_51; x1 = (x1 >> 38) | (x2 << 26);
out[2] = x1 & reduce_mask_51; x2 = (x2 >> 25) | (x3 << 39);
out[3] = x2 & reduce_mask_51; x3 = (x3 >> 12);
out[4] = x3 & reduce_mask_51;
}
/* Take a fully reduced polynomial form number and contract it into a
* little-endian, 32-byte array
*/
DONNA_INLINE static void
curve25519_contract(unsigned char *out, const bignum25519 input) {
uint64_t t[5];
uint64_t f, i;
t[0] = input[0];
t[1] = input[1];
t[2] = input[2];
t[3] = input[3];
t[4] = input[4];
#define curve25519_contract_carry() \
t[1] += t[0] >> 51; t[0] &= reduce_mask_51; \
t[2] += t[1] >> 51; t[1] &= reduce_mask_51; \
t[3] += t[2] >> 51; t[2] &= reduce_mask_51; \
t[4] += t[3] >> 51; t[3] &= reduce_mask_51;
#define curve25519_contract_carry_full() curve25519_contract_carry() \
t[0] += 19 * (t[4] >> 51); t[4] &= reduce_mask_51;
#define curve25519_contract_carry_final() curve25519_contract_carry() \
t[4] &= reduce_mask_51;
curve25519_contract_carry_full()
curve25519_contract_carry_full()
/* now t is between 0 and 2^255-1, properly carried. */
/* case 1: between 0 and 2^255-20. case 2: between 2^255-19 and 2^255-1. */
t[0] += 19;
curve25519_contract_carry_full()
/* now between 19 and 2^255-1 in both cases, and offset by 19. */
t[0] += (reduce_mask_51 + 1) - 19;
t[1] += (reduce_mask_51 + 1) - 1;
t[2] += (reduce_mask_51 + 1) - 1;
t[3] += (reduce_mask_51 + 1) - 1;
t[4] += (reduce_mask_51 + 1) - 1;
/* now between 2^255 and 2^256-20, and offset by 2^255. */
curve25519_contract_carry_final()
#define write51full(n,shift) \
f = ((t[n] >> shift) | (t[n+1] << (51 - shift))); \
for (i = 0; i < 8; i++, f >>= 8) *out++ = (unsigned char)f;
#define write51(n) write51full(n,13*n)
write51(0)
write51(1)
write51(2)
write51(3)
}
#if !defined(ED25519_GCC_64BIT_CHOOSE)
/* out = (flag) ? in : out */
DONNA_INLINE static void
curve25519_move_conditional_bytes(uint8_t out[96], const uint8_t in[96], uint64_t flag) {
const uint64_t nb = flag - 1, b = ~nb;
const uint64_t *inq = (const uint64_t *)in;
uint64_t *outq = (uint64_t *)out;
outq[0] = (outq[0] & nb) | (inq[0] & b);
outq[1] = (outq[1] & nb) | (inq[1] & b);
outq[2] = (outq[2] & nb) | (inq[2] & b);
outq[3] = (outq[3] & nb) | (inq[3] & b);
outq[4] = (outq[4] & nb) | (inq[4] & b);
outq[5] = (outq[5] & nb) | (inq[5] & b);
outq[6] = (outq[6] & nb) | (inq[6] & b);
outq[7] = (outq[7] & nb) | (inq[7] & b);
outq[8] = (outq[8] & nb) | (inq[8] & b);
outq[9] = (outq[9] & nb) | (inq[9] & b);
outq[10] = (outq[10] & nb) | (inq[10] & b);
outq[11] = (outq[11] & nb) | (inq[11] & b);
}
/* if (iswap) swap(a, b) */
DONNA_INLINE static void
curve25519_swap_conditional(bignum25519 a, bignum25519 b, uint64_t iswap) {
const uint64_t swap = (uint64_t)(-(int64_t)iswap);
uint64_t x0,x1,x2,x3,x4;
x0 = swap & (a[0] ^ b[0]); a[0] ^= x0; b[0] ^= x0;
x1 = swap & (a[1] ^ b[1]); a[1] ^= x1; b[1] ^= x1;
x2 = swap & (a[2] ^ b[2]); a[2] ^= x2; b[2] ^= x2;
x3 = swap & (a[3] ^ b[3]); a[3] ^= x3; b[3] ^= x3;
x4 = swap & (a[4] ^ b[4]); a[4] ^= x4; b[4] ^= x4;
}
#endif /* ED25519_GCC_64BIT_CHOOSE */
#define ED25519_64BIT_TABLES

File diff suppressed because it is too large Load Diff

View File

@ -1,513 +0,0 @@
#if defined(ED25519_GCC_32BIT_SSE_CHOOSE)
#define HAVE_GE25519_SCALARMULT_BASE_CHOOSE_NIELS
DONNA_NOINLINE static void
ge25519_scalarmult_base_choose_niels(ge25519_niels *t, const uint8_t table[256][96], uint32_t pos, signed char b) {
int32_t breg = (int32_t)b;
uint32_t sign = (uint32_t)breg >> 31;
uint32_t mask = ~(sign - 1);
uint32_t u = (breg + mask) ^ mask;
__asm__ __volatile__ (
/* ysubx+xaddy */
"movl %0, %%eax ;\n"
"movd %%eax, %%xmm6 ;\n"
"pshufd $0x00, %%xmm6, %%xmm6 ;\n"
"pxor %%xmm0, %%xmm0 ;\n"
"pxor %%xmm1, %%xmm1 ;\n"
"pxor %%xmm2, %%xmm2 ;\n"
"pxor %%xmm3, %%xmm3 ;\n"
/* 0 */
"movl $0, %%eax ;\n"
"movd %%eax, %%xmm7 ;\n"
"pshufd $0x00, %%xmm7, %%xmm7 ;\n"
"pcmpeqd %%xmm6, %%xmm7 ;\n"
"movl $1, %%ecx ;\n"
"movd %%ecx, %%xmm4 ;\n"
"pxor %%xmm5, %%xmm5 ;\n"
"pand %%xmm7, %%xmm4 ;\n"
"pand %%xmm7, %%xmm5 ;\n"
"por %%xmm4, %%xmm0 ;\n"
"por %%xmm5, %%xmm1 ;\n"
"por %%xmm4, %%xmm2 ;\n"
"por %%xmm5, %%xmm3 ;\n"
/* 1 */
"movl $1, %%eax ;\n"
"movd %%eax, %%xmm7 ;\n"
"pshufd $0x00, %%xmm7, %%xmm7 ;\n"
"pcmpeqd %%xmm6, %%xmm7 ;\n"
"movdqa 0(%1), %%xmm4 ;\n"
"movdqa 16(%1), %%xmm5 ;\n"
"pand %%xmm7, %%xmm4 ;\n"
"pand %%xmm7, %%xmm5 ;\n"
"por %%xmm4, %%xmm0 ;\n"
"por %%xmm5, %%xmm1 ;\n"
"movdqa 32(%1), %%xmm4 ;\n"
"movdqa 48(%1), %%xmm5 ;\n"
"pand %%xmm7, %%xmm4 ;\n"
"pand %%xmm7, %%xmm5 ;\n"
"por %%xmm4, %%xmm2 ;\n"
"por %%xmm5, %%xmm3 ;\n"
/* 2 */
"movl $2, %%eax ;\n"
"movd %%eax, %%xmm7 ;\n"
"pshufd $0x00, %%xmm7, %%xmm7 ;\n"
"pcmpeqd %%xmm6, %%xmm7 ;\n"
"movdqa 96(%1), %%xmm4 ;\n"
"movdqa 112(%1), %%xmm5 ;\n"
"pand %%xmm7, %%xmm4 ;\n"
"pand %%xmm7, %%xmm5 ;\n"
"por %%xmm4, %%xmm0 ;\n"
"por %%xmm5, %%xmm1 ;\n"
"movdqa 128(%1), %%xmm4 ;\n"
"movdqa 144(%1), %%xmm5 ;\n"
"pand %%xmm7, %%xmm4 ;\n"
"pand %%xmm7, %%xmm5 ;\n"
"por %%xmm4, %%xmm2 ;\n"
"por %%xmm5, %%xmm3 ;\n"
/* 3 */
"movl $3, %%eax ;\n"
"movd %%eax, %%xmm7 ;\n"
"pshufd $0x00, %%xmm7, %%xmm7 ;\n"
"pcmpeqd %%xmm6, %%xmm7 ;\n"
"movdqa 192(%1), %%xmm4 ;\n"
"movdqa 208(%1), %%xmm5 ;\n"
"pand %%xmm7, %%xmm4 ;\n"
"pand %%xmm7, %%xmm5 ;\n"
"por %%xmm4, %%xmm0 ;\n"
"por %%xmm5, %%xmm1 ;\n"
"movdqa 224(%1), %%xmm4 ;\n"
"movdqa 240(%1), %%xmm5 ;\n"
"pand %%xmm7, %%xmm4 ;\n"
"pand %%xmm7, %%xmm5 ;\n"
"por %%xmm4, %%xmm2 ;\n"
"por %%xmm5, %%xmm3 ;\n"
/* 4 */
"movl $4, %%eax ;\n"
"movd %%eax, %%xmm7 ;\n"
"pshufd $0x00, %%xmm7, %%xmm7 ;\n"
"pcmpeqd %%xmm6, %%xmm7 ;\n"
"movdqa 288(%1), %%xmm4 ;\n"
"movdqa 304(%1), %%xmm5 ;\n"
"pand %%xmm7, %%xmm4 ;\n"
"pand %%xmm7, %%xmm5 ;\n"
"por %%xmm4, %%xmm0 ;\n"
"por %%xmm5, %%xmm1 ;\n"
"movdqa 320(%1), %%xmm4 ;\n"
"movdqa 336(%1), %%xmm5 ;\n"
"pand %%xmm7, %%xmm4 ;\n"
"pand %%xmm7, %%xmm5 ;\n"
"por %%xmm4, %%xmm2 ;\n"
"por %%xmm5, %%xmm3 ;\n"
/* 5 */
"movl $5, %%eax ;\n"
"movd %%eax, %%xmm7 ;\n"
"pshufd $0x00, %%xmm7, %%xmm7 ;\n"
"pcmpeqd %%xmm6, %%xmm7 ;\n"
"movdqa 384(%1), %%xmm4 ;\n"
"movdqa 400(%1), %%xmm5 ;\n"
"pand %%xmm7, %%xmm4 ;\n"
"pand %%xmm7, %%xmm5 ;\n"
"por %%xmm4, %%xmm0 ;\n"
"por %%xmm5, %%xmm1 ;\n"
"movdqa 416(%1), %%xmm4 ;\n"
"movdqa 432(%1), %%xmm5 ;\n"
"pand %%xmm7, %%xmm4 ;\n"
"pand %%xmm7, %%xmm5 ;\n"
"por %%xmm4, %%xmm2 ;\n"
"por %%xmm5, %%xmm3 ;\n"
/* 6 */
"movl $6, %%eax ;\n"
"movd %%eax, %%xmm7 ;\n"
"pshufd $0x00, %%xmm7, %%xmm7 ;\n"
"pcmpeqd %%xmm6, %%xmm7 ;\n"
"movdqa 480(%1), %%xmm4 ;\n"
"movdqa 496(%1), %%xmm5 ;\n"
"pand %%xmm7, %%xmm4 ;\n"
"pand %%xmm7, %%xmm5 ;\n"
"por %%xmm4, %%xmm0 ;\n"
"por %%xmm5, %%xmm1 ;\n"
"movdqa 512(%1), %%xmm4 ;\n"
"movdqa 528(%1), %%xmm5 ;\n"
"pand %%xmm7, %%xmm4 ;\n"
"pand %%xmm7, %%xmm5 ;\n"
"por %%xmm4, %%xmm2 ;\n"
"por %%xmm5, %%xmm3 ;\n"
/* 7 */
"movl $7, %%eax ;\n"
"movd %%eax, %%xmm7 ;\n"
"pshufd $0x00, %%xmm7, %%xmm7 ;\n"
"pcmpeqd %%xmm6, %%xmm7 ;\n"
"movdqa 576(%1), %%xmm4 ;\n"
"movdqa 592(%1), %%xmm5 ;\n"
"pand %%xmm7, %%xmm4 ;\n"
"pand %%xmm7, %%xmm5 ;\n"
"por %%xmm4, %%xmm0 ;\n"
"por %%xmm5, %%xmm1 ;\n"
"movdqa 608(%1), %%xmm4 ;\n"
"movdqa 624(%1), %%xmm5 ;\n"
"pand %%xmm7, %%xmm4 ;\n"
"pand %%xmm7, %%xmm5 ;\n"
"por %%xmm4, %%xmm2 ;\n"
"por %%xmm5, %%xmm3 ;\n"
/* 8 */
"movl $8, %%eax ;\n"
"movd %%eax, %%xmm7 ;\n"
"pshufd $0x00, %%xmm7, %%xmm7 ;\n"
"pcmpeqd %%xmm6, %%xmm7 ;\n"
"movdqa 672(%1), %%xmm4 ;\n"
"movdqa 688(%1), %%xmm5 ;\n"
"pand %%xmm7, %%xmm4 ;\n"
"pand %%xmm7, %%xmm5 ;\n"
"por %%xmm4, %%xmm0 ;\n"
"por %%xmm5, %%xmm1 ;\n"
"movdqa 704(%1), %%xmm4 ;\n"
"movdqa 720(%1), %%xmm5 ;\n"
"pand %%xmm7, %%xmm4 ;\n"
"pand %%xmm7, %%xmm5 ;\n"
"por %%xmm4, %%xmm2 ;\n"
"por %%xmm5, %%xmm3 ;\n"
/* conditional swap based on sign */
"movl %3, %%ecx ;\n"
"movl %2, %%eax ;\n"
"xorl $1, %%ecx ;\n"
"movd %%ecx, %%xmm6 ;\n"
"pxor %%xmm7, %%xmm7 ;\n"
"pshufd $0x00, %%xmm6, %%xmm6 ;\n"
"pxor %%xmm0, %%xmm2 ;\n"
"pxor %%xmm1, %%xmm3 ;\n"
"pcmpeqd %%xmm6, %%xmm7 ;\n"
"movdqa %%xmm2, %%xmm4 ;\n"
"movdqa %%xmm3, %%xmm5 ;\n"
"pand %%xmm7, %%xmm4 ;\n"
"pand %%xmm7, %%xmm5 ;\n"
"pxor %%xmm4, %%xmm0 ;\n"
"pxor %%xmm5, %%xmm1 ;\n"
"pxor %%xmm0, %%xmm2 ;\n"
"pxor %%xmm1, %%xmm3 ;\n"
/* store ysubx */
"movd %%xmm0, %%ecx ;\n"
"movl %%ecx, %%edx ;\n"
"pshufd $0x39, %%xmm0, %%xmm0 ;\n"
"andl $0x3ffffff, %%ecx ;\n"
"movl %%ecx, 0(%%eax) ;\n"
"movd %%xmm0, %%ecx ;\n"
"pshufd $0x39, %%xmm0, %%xmm0 ;\n"
"shrdl $26, %%ecx, %%edx ;\n"
"andl $0x1ffffff, %%edx ;\n"
"movl %%edx, 4(%%eax) ;\n"
"movd %%xmm0, %%edx ;\n"
"pshufd $0x39, %%xmm0, %%xmm0 ;\n"
"shrdl $19, %%edx, %%ecx ;\n"
"andl $0x3ffffff, %%ecx ;\n"
"movl %%ecx, 8(%%eax) ;\n"
"movd %%xmm0, %%ecx ;\n"
"shrdl $13, %%ecx, %%edx ;\n"
"andl $0x1ffffff, %%edx ;\n"
"movl %%edx, 12(%%eax) ;\n"
"movd %%xmm1, %%edx ;\n"
"pshufd $0x39, %%xmm1, %%xmm1 ;\n"
"shrl $6, %%ecx ;\n"
"andl $0x3ffffff, %%ecx ;\n"
"movl %%ecx, 16(%%eax) ;\n"
"movl %%edx, %%ecx ;\n"
"andl $0x1ffffff, %%edx ;\n"
"movl %%edx, 20(%%eax) ;\n"
"movd %%xmm1, %%edx ;\n"
"pshufd $0x39, %%xmm1, %%xmm1 ;\n"
"shrdl $25, %%edx, %%ecx ;\n"
"andl $0x3ffffff, %%ecx ;\n"
"movl %%ecx, 24(%%eax) ;\n"
"movd %%xmm1, %%ecx ;\n"
"pshufd $0x39, %%xmm1, %%xmm1 ;\n"
"shrdl $19, %%ecx, %%edx ;\n"
"andl $0x1ffffff, %%edx ;\n"
"movl %%edx, 28(%%eax) ;\n"
"movd %%xmm1, %%edx ;\n"
"shrdl $12, %%edx, %%ecx ;\n"
"andl $0x3ffffff, %%ecx ;\n"
"movl %%ecx, 32(%%eax) ;\n"
"shrl $6, %%edx ;\n"
"andl $0x1ffffff, %%edx ;\n"
"xorl %%ecx, %%ecx ;\n"
"movl %%edx, 36(%%eax) ;\n"
"movl %%ecx, 40(%%eax) ;\n"
"movl %%ecx, 44(%%eax) ;\n"
/* store xaddy */
"addl $48, %%eax ;\n"
"movdqa %%xmm2, %%xmm0 ;\n"
"movdqa %%xmm3, %%xmm1 ;\n"
"movd %%xmm0, %%ecx ;\n"
"movl %%ecx, %%edx ;\n"
"pshufd $0x39, %%xmm0, %%xmm0 ;\n"
"andl $0x3ffffff, %%ecx ;\n"
"movl %%ecx, 0(%%eax) ;\n"
"movd %%xmm0, %%ecx ;\n"
"pshufd $0x39, %%xmm0, %%xmm0 ;\n"
"shrdl $26, %%ecx, %%edx ;\n"
"andl $0x1ffffff, %%edx ;\n"
"movl %%edx, 4(%%eax) ;\n"
"movd %%xmm0, %%edx ;\n"
"pshufd $0x39, %%xmm0, %%xmm0 ;\n"
"shrdl $19, %%edx, %%ecx ;\n"
"andl $0x3ffffff, %%ecx ;\n"
"movl %%ecx, 8(%%eax) ;\n"
"movd %%xmm0, %%ecx ;\n"
"shrdl $13, %%ecx, %%edx ;\n"
"andl $0x1ffffff, %%edx ;\n"
"movl %%edx, 12(%%eax) ;\n"
"movd %%xmm1, %%edx ;\n"
"pshufd $0x39, %%xmm1, %%xmm1 ;\n"
"shrl $6, %%ecx ;\n"
"andl $0x3ffffff, %%ecx ;\n"
"movl %%ecx, 16(%%eax) ;\n"
"movl %%edx, %%ecx ;\n"
"andl $0x1ffffff, %%edx ;\n"
"movl %%edx, 20(%%eax) ;\n"
"movd %%xmm1, %%edx ;\n"
"pshufd $0x39, %%xmm1, %%xmm1 ;\n"
"shrdl $25, %%edx, %%ecx ;\n"
"andl $0x3ffffff, %%ecx ;\n"
"movl %%ecx, 24(%%eax) ;\n"
"movd %%xmm1, %%ecx ;\n"
"pshufd $0x39, %%xmm1, %%xmm1 ;\n"
"shrdl $19, %%ecx, %%edx ;\n"
"andl $0x1ffffff, %%edx ;\n"
"movl %%edx, 28(%%eax) ;\n"
"movd %%xmm1, %%edx ;\n"
"shrdl $12, %%edx, %%ecx ;\n"
"andl $0x3ffffff, %%ecx ;\n"
"movl %%ecx, 32(%%eax) ;\n"
"shrl $6, %%edx ;\n"
"andl $0x1ffffff, %%edx ;\n"
"xorl %%ecx, %%ecx ;\n"
"movl %%edx, 36(%%eax) ;\n"
"movl %%ecx, 40(%%eax) ;\n"
"movl %%ecx, 44(%%eax) ;\n"
/* t2d */
"movl %0, %%eax ;\n"
"movd %%eax, %%xmm6 ;\n"
"pshufd $0x00, %%xmm6, %%xmm6 ;\n"
"pxor %%xmm0, %%xmm0 ;\n"
"pxor %%xmm1, %%xmm1 ;\n"
/* 0 */
"movl $0, %%eax ;\n"
"movd %%eax, %%xmm7 ;\n"
"pshufd $0x00, %%xmm7, %%xmm7 ;\n"
"pcmpeqd %%xmm6, %%xmm7 ;\n"
"pxor %%xmm0, %%xmm0 ;\n"
"pxor %%xmm1, %%xmm1 ;\n"
/* 1 */
"movl $1, %%eax ;\n"
"movd %%eax, %%xmm7 ;\n"
"pshufd $0x00, %%xmm7, %%xmm7 ;\n"
"pcmpeqd %%xmm6, %%xmm7 ;\n"
"movdqa 64(%1), %%xmm3 ;\n"
"movdqa 80(%1), %%xmm4 ;\n"
"pand %%xmm7, %%xmm3 ;\n"
"pand %%xmm7, %%xmm4 ;\n"
"por %%xmm3, %%xmm0 ;\n"
"por %%xmm4, %%xmm1 ;\n"
/* 2 */
"movl $2, %%eax ;\n"
"movd %%eax, %%xmm7 ;\n"
"pshufd $0x00, %%xmm7, %%xmm7 ;\n"
"pcmpeqd %%xmm6, %%xmm7 ;\n"
"movdqa 160(%1), %%xmm3 ;\n"
"movdqa 176(%1), %%xmm4 ;\n"
"pand %%xmm7, %%xmm3 ;\n"
"pand %%xmm7, %%xmm4 ;\n"
"por %%xmm3, %%xmm0 ;\n"
"por %%xmm4, %%xmm1 ;\n"
/* 3 */
"movl $3, %%eax ;\n"
"movd %%eax, %%xmm7 ;\n"
"pshufd $0x00, %%xmm7, %%xmm7 ;\n"
"pcmpeqd %%xmm6, %%xmm7 ;\n"
"movdqa 256(%1), %%xmm3 ;\n"
"movdqa 272(%1), %%xmm4 ;\n"
"pand %%xmm7, %%xmm3 ;\n"
"pand %%xmm7, %%xmm4 ;\n"
"por %%xmm3, %%xmm0 ;\n"
"por %%xmm4, %%xmm1 ;\n"
/* 4 */
"movl $4, %%eax ;\n"
"movd %%eax, %%xmm7 ;\n"
"pshufd $0x00, %%xmm7, %%xmm7 ;\n"
"pcmpeqd %%xmm6, %%xmm7 ;\n"
"movdqa 352(%1), %%xmm3 ;\n"
"movdqa 368(%1), %%xmm4 ;\n"
"pand %%xmm7, %%xmm3 ;\n"
"pand %%xmm7, %%xmm4 ;\n"
"por %%xmm3, %%xmm0 ;\n"
"por %%xmm4, %%xmm1 ;\n"
/* 5 */
"movl $5, %%eax ;\n"
"movd %%eax, %%xmm7 ;\n"
"pshufd $0x00, %%xmm7, %%xmm7 ;\n"
"pcmpeqd %%xmm6, %%xmm7 ;\n"
"movdqa 448(%1), %%xmm3 ;\n"
"movdqa 464(%1), %%xmm4 ;\n"
"pand %%xmm7, %%xmm3 ;\n"
"pand %%xmm7, %%xmm4 ;\n"
"por %%xmm3, %%xmm0 ;\n"
"por %%xmm4, %%xmm1 ;\n"
/* 6 */
"movl $6, %%eax ;\n"
"movd %%eax, %%xmm7 ;\n"
"pshufd $0x00, %%xmm7, %%xmm7 ;\n"
"pcmpeqd %%xmm6, %%xmm7 ;\n"
"movdqa 544(%1), %%xmm3 ;\n"
"movdqa 560(%1), %%xmm4 ;\n"
"pand %%xmm7, %%xmm3 ;\n"
"pand %%xmm7, %%xmm4 ;\n"
"por %%xmm3, %%xmm0 ;\n"
"por %%xmm4, %%xmm1 ;\n"
/* 7 */
"movl $7, %%eax ;\n"
"movd %%eax, %%xmm7 ;\n"
"pshufd $0x00, %%xmm7, %%xmm7 ;\n"
"pcmpeqd %%xmm6, %%xmm7 ;\n"
"movdqa 640(%1), %%xmm3 ;\n"
"movdqa 656(%1), %%xmm4 ;\n"
"pand %%xmm7, %%xmm3 ;\n"
"pand %%xmm7, %%xmm4 ;\n"
"por %%xmm3, %%xmm0 ;\n"
"por %%xmm4, %%xmm1 ;\n"
/* 8 */
"movl $8, %%eax ;\n"
"movd %%eax, %%xmm7 ;\n"
"pshufd $0x00, %%xmm7, %%xmm7 ;\n"
"pcmpeqd %%xmm6, %%xmm7 ;\n"
"movdqa 736(%1), %%xmm3 ;\n"
"movdqa 752(%1), %%xmm4 ;\n"
"pand %%xmm7, %%xmm3 ;\n"
"pand %%xmm7, %%xmm4 ;\n"
"por %%xmm3, %%xmm0 ;\n"
"por %%xmm4, %%xmm1 ;\n"
/* store t2d */
"movl %2, %%eax ;\n"
"addl $96, %%eax ;\n"
"movd %%xmm0, %%ecx ;\n"
"movl %%ecx, %%edx ;\n"
"pshufd $0x39, %%xmm0, %%xmm0 ;\n"
"andl $0x3ffffff, %%ecx ;\n"
"movl %%ecx, 0(%%eax) ;\n"
"movd %%xmm0, %%ecx ;\n"
"pshufd $0x39, %%xmm0, %%xmm0 ;\n"
"shrdl $26, %%ecx, %%edx ;\n"
"andl $0x1ffffff, %%edx ;\n"
"movl %%edx, 4(%%eax) ;\n"
"movd %%xmm0, %%edx ;\n"
"pshufd $0x39, %%xmm0, %%xmm0 ;\n"
"shrdl $19, %%edx, %%ecx ;\n"
"andl $0x3ffffff, %%ecx ;\n"
"movl %%ecx, 8(%%eax) ;\n"
"movd %%xmm0, %%ecx ;\n"
"shrdl $13, %%ecx, %%edx ;\n"
"andl $0x1ffffff, %%edx ;\n"
"movl %%edx, 12(%%eax) ;\n"
"movd %%xmm1, %%edx ;\n"
"pshufd $0x39, %%xmm1, %%xmm1 ;\n"
"shrl $6, %%ecx ;\n"
"andl $0x3ffffff, %%ecx ;\n"
"movl %%ecx, 16(%%eax) ;\n"
"movl %%edx, %%ecx ;\n"
"andl $0x1ffffff, %%edx ;\n"
"movl %%edx, 20(%%eax) ;\n"
"movd %%xmm1, %%edx ;\n"
"pshufd $0x39, %%xmm1, %%xmm1 ;\n"
"shrdl $25, %%edx, %%ecx ;\n"
"andl $0x3ffffff, %%ecx ;\n"
"movl %%ecx, 24(%%eax) ;\n"
"movd %%xmm1, %%ecx ;\n"
"pshufd $0x39, %%xmm1, %%xmm1 ;\n"
"shrdl $19, %%ecx, %%edx ;\n"
"andl $0x1ffffff, %%edx ;\n"
"movl %%edx, 28(%%eax) ;\n"
"movd %%xmm1, %%edx ;\n"
"movd %%xmm1, %%edx ;\n"
"shrdl $12, %%edx, %%ecx ;\n"
"andl $0x3ffffff, %%ecx ;\n"
"movl %%ecx, 32(%%eax) ;\n"
"shrl $6, %%edx ;\n"
"andl $0x1ffffff, %%edx ;\n"
"xorl %%ecx, %%ecx ;\n"
"movl %%edx, 36(%%eax) ;\n"
"movl %%ecx, 40(%%eax) ;\n"
"movl %%ecx, 44(%%eax) ;\n"
"movdqa 0(%%eax), %%xmm0 ;\n"
"movdqa 16(%%eax), %%xmm1 ;\n"
"movdqa 32(%%eax), %%xmm2 ;\n"
/* conditionally negate t2d */
/* set up 2p in to 3/4 */
"movl $0x7ffffda, %%ecx ;\n"
"movl $0x3fffffe, %%edx ;\n"
"movd %%ecx, %%xmm3 ;\n"
"movd %%edx, %%xmm5 ;\n"
"movl $0x7fffffe, %%ecx ;\n"
"movd %%ecx, %%xmm4 ;\n"
"punpckldq %%xmm5, %%xmm3 ;\n"
"punpckldq %%xmm5, %%xmm4 ;\n"
"punpcklqdq %%xmm4, %%xmm3 ;\n"
"movdqa %%xmm4, %%xmm5 ;\n"
"punpcklqdq %%xmm4, %%xmm4 ;\n"
/* subtract and conditionally move */
"movl %3, %%ecx ;\n"
"sub $1, %%ecx ;\n"
"movd %%ecx, %%xmm6 ;\n"
"pshufd $0x00, %%xmm6, %%xmm6 ;\n"
"movdqa %%xmm6, %%xmm7 ;\n"
"psubd %%xmm0, %%xmm3 ;\n"
"psubd %%xmm1, %%xmm4 ;\n"
"psubd %%xmm2, %%xmm5 ;\n"
"pand %%xmm6, %%xmm0 ;\n"
"pand %%xmm6, %%xmm1 ;\n"
"pand %%xmm6, %%xmm2 ;\n"
"pandn %%xmm3, %%xmm6 ;\n"
"movdqa %%xmm7, %%xmm3 ;\n"
"pandn %%xmm4, %%xmm7 ;\n"
"pandn %%xmm5, %%xmm3 ;\n"
"por %%xmm6, %%xmm0 ;\n"
"por %%xmm7, %%xmm1 ;\n"
"por %%xmm3, %%xmm2 ;\n"
/* store */
"movdqa %%xmm0, 0(%%eax) ;\n"
"movdqa %%xmm1, 16(%%eax) ;\n"
"movdqa %%xmm2, 32(%%eax) ;\n"
:
: "m"(u), "r"(&table[pos * 8]), "m"(t), "m"(sign) /* %0 = u, %1 = table, %2 = t, %3 = sign */
: "%eax", "%ecx", "%edx", "%xmm0", "%xmm1", "%xmm2", "%xmm3", "%xmm4", "%xmm5", "%xmm6", "%xmm7", "cc", "memory"
);
}
#endif /* defined(ED25519_GCC_32BIT_SSE_CHOOSE) */

View File

@ -1,436 +0,0 @@
#if defined(ED25519_GCC_64BIT_SSE_CHOOSE)
#define HAVE_GE25519_SCALARMULT_BASE_CHOOSE_NIELS
DONNA_NOINLINE static void
ge25519_scalarmult_base_choose_niels(ge25519_niels *t, const uint8_t table[256][96], uint32_t pos, signed char b) {
int64_t breg = (int64_t)b;
uint64_t sign = (uint64_t)breg >> 63;
uint64_t mask = ~(sign - 1);
uint64_t u = (breg + mask) ^ mask;
__asm__ __volatile__ (
/* ysubx+xaddy+t2d */
"movq %0, %%rax ;\n"
"movd %%rax, %%xmm14 ;\n"
"pshufd $0x00, %%xmm14, %%xmm14 ;\n"
"pxor %%xmm0, %%xmm0 ;\n"
"pxor %%xmm1, %%xmm1 ;\n"
"pxor %%xmm2, %%xmm2 ;\n"
"pxor %%xmm3, %%xmm3 ;\n"
"pxor %%xmm4, %%xmm4 ;\n"
"pxor %%xmm5, %%xmm5 ;\n"
/* 0 */
"movq $0, %%rax ;\n"
"movd %%rax, %%xmm15 ;\n"
"pshufd $0x00, %%xmm15, %%xmm15 ;\n"
"pcmpeqd %%xmm14, %%xmm15 ;\n"
"movq $1, %%rax ;\n"
"movd %%rax, %%xmm6 ;\n"
"pxor %%xmm7, %%xmm7 ;\n"
"pand %%xmm15, %%xmm6 ;\n"
"pand %%xmm15, %%xmm7 ;\n"
"por %%xmm6, %%xmm0 ;\n"
"por %%xmm7, %%xmm1 ;\n"
"por %%xmm6, %%xmm2 ;\n"
"por %%xmm7, %%xmm3 ;\n"
/* 1 */
"movq $1, %%rax ;\n"
"movd %%rax, %%xmm15 ;\n"
"pshufd $0x00, %%xmm15, %%xmm15 ;\n"
"pcmpeqd %%xmm14, %%xmm15 ;\n"
"movdqa 0(%1), %%xmm6 ;\n"
"movdqa 16(%1), %%xmm7 ;\n"
"movdqa 32(%1), %%xmm8 ;\n"
"movdqa 48(%1), %%xmm9 ;\n"
"movdqa 64(%1), %%xmm10 ;\n"
"movdqa 80(%1), %%xmm11 ;\n"
"pand %%xmm15, %%xmm6 ;\n"
"pand %%xmm15, %%xmm7 ;\n"
"pand %%xmm15, %%xmm8 ;\n"
"pand %%xmm15, %%xmm9 ;\n"
"pand %%xmm15, %%xmm10 ;\n"
"pand %%xmm15, %%xmm11 ;\n"
"por %%xmm6, %%xmm0 ;\n"
"por %%xmm7, %%xmm1 ;\n"
"por %%xmm8, %%xmm2 ;\n"
"por %%xmm9, %%xmm3 ;\n"
"por %%xmm10, %%xmm4 ;\n"
"por %%xmm11, %%xmm5 ;\n"
/* 2 */
"movq $2, %%rax ;\n"
"movd %%rax, %%xmm15 ;\n"
"pshufd $0x00, %%xmm15, %%xmm15 ;\n"
"pcmpeqd %%xmm14, %%xmm15 ;\n"
"movdqa 96(%1), %%xmm6 ;\n"
"movdqa 112(%1), %%xmm7 ;\n"
"movdqa 128(%1), %%xmm8 ;\n"
"movdqa 144(%1), %%xmm9 ;\n"
"movdqa 160(%1), %%xmm10 ;\n"
"movdqa 176(%1), %%xmm11 ;\n"
"pand %%xmm15, %%xmm6 ;\n"
"pand %%xmm15, %%xmm7 ;\n"
"pand %%xmm15, %%xmm8 ;\n"
"pand %%xmm15, %%xmm9 ;\n"
"pand %%xmm15, %%xmm10 ;\n"
"pand %%xmm15, %%xmm11 ;\n"
"por %%xmm6, %%xmm0 ;\n"
"por %%xmm7, %%xmm1 ;\n"
"por %%xmm8, %%xmm2 ;\n"
"por %%xmm9, %%xmm3 ;\n"
"por %%xmm10, %%xmm4 ;\n"
"por %%xmm11, %%xmm5 ;\n"
/* 3 */
"movq $3, %%rax ;\n"
"movd %%rax, %%xmm15 ;\n"
"pshufd $0x00, %%xmm15, %%xmm15 ;\n"
"pcmpeqd %%xmm14, %%xmm15 ;\n"
"movdqa 192(%1), %%xmm6 ;\n"
"movdqa 208(%1), %%xmm7 ;\n"
"movdqa 224(%1), %%xmm8 ;\n"
"movdqa 240(%1), %%xmm9 ;\n"
"movdqa 256(%1), %%xmm10 ;\n"
"movdqa 272(%1), %%xmm11 ;\n"
"pand %%xmm15, %%xmm6 ;\n"
"pand %%xmm15, %%xmm7 ;\n"
"pand %%xmm15, %%xmm8 ;\n"
"pand %%xmm15, %%xmm9 ;\n"
"pand %%xmm15, %%xmm10 ;\n"
"pand %%xmm15, %%xmm11 ;\n"
"por %%xmm6, %%xmm0 ;\n"
"por %%xmm7, %%xmm1 ;\n"
"por %%xmm8, %%xmm2 ;\n"
"por %%xmm9, %%xmm3 ;\n"
"por %%xmm10, %%xmm4 ;\n"
"por %%xmm11, %%xmm5 ;\n"
/* 4 */
"movq $4, %%rax ;\n"
"movd %%rax, %%xmm15 ;\n"
"pshufd $0x00, %%xmm15, %%xmm15 ;\n"
"pcmpeqd %%xmm14, %%xmm15 ;\n"
"movdqa 288(%1), %%xmm6 ;\n"
"movdqa 304(%1), %%xmm7 ;\n"
"movdqa 320(%1), %%xmm8 ;\n"
"movdqa 336(%1), %%xmm9 ;\n"
"movdqa 352(%1), %%xmm10 ;\n"
"movdqa 368(%1), %%xmm11 ;\n"
"pand %%xmm15, %%xmm6 ;\n"
"pand %%xmm15, %%xmm7 ;\n"
"pand %%xmm15, %%xmm8 ;\n"
"pand %%xmm15, %%xmm9 ;\n"
"pand %%xmm15, %%xmm10 ;\n"
"pand %%xmm15, %%xmm11 ;\n"
"por %%xmm6, %%xmm0 ;\n"
"por %%xmm7, %%xmm1 ;\n"
"por %%xmm8, %%xmm2 ;\n"
"por %%xmm9, %%xmm3 ;\n"
"por %%xmm10, %%xmm4 ;\n"
"por %%xmm11, %%xmm5 ;\n"
/* 5 */
"movq $5, %%rax ;\n"
"movd %%rax, %%xmm15 ;\n"
"pshufd $0x00, %%xmm15, %%xmm15 ;\n"
"pcmpeqd %%xmm14, %%xmm15 ;\n"
"movdqa 384(%1), %%xmm6 ;\n"
"movdqa 400(%1), %%xmm7 ;\n"
"movdqa 416(%1), %%xmm8 ;\n"
"movdqa 432(%1), %%xmm9 ;\n"
"movdqa 448(%1), %%xmm10 ;\n"
"movdqa 464(%1), %%xmm11 ;\n"
"pand %%xmm15, %%xmm6 ;\n"
"pand %%xmm15, %%xmm7 ;\n"
"pand %%xmm15, %%xmm8 ;\n"
"pand %%xmm15, %%xmm9 ;\n"
"pand %%xmm15, %%xmm10 ;\n"
"pand %%xmm15, %%xmm11 ;\n"
"por %%xmm6, %%xmm0 ;\n"
"por %%xmm7, %%xmm1 ;\n"
"por %%xmm8, %%xmm2 ;\n"
"por %%xmm9, %%xmm3 ;\n"
"por %%xmm10, %%xmm4 ;\n"
"por %%xmm11, %%xmm5 ;\n"
/* 6 */
"movq $6, %%rax ;\n"
"movd %%rax, %%xmm15 ;\n"
"pshufd $0x00, %%xmm15, %%xmm15 ;\n"
"pcmpeqd %%xmm14, %%xmm15 ;\n"
"movdqa 480(%1), %%xmm6 ;\n"
"movdqa 496(%1), %%xmm7 ;\n"
"movdqa 512(%1), %%xmm8 ;\n"
"movdqa 528(%1), %%xmm9 ;\n"
"movdqa 544(%1), %%xmm10 ;\n"
"movdqa 560(%1), %%xmm11 ;\n"
"pand %%xmm15, %%xmm6 ;\n"
"pand %%xmm15, %%xmm7 ;\n"
"pand %%xmm15, %%xmm8 ;\n"
"pand %%xmm15, %%xmm9 ;\n"
"pand %%xmm15, %%xmm10 ;\n"
"pand %%xmm15, %%xmm11 ;\n"
"por %%xmm6, %%xmm0 ;\n"
"por %%xmm7, %%xmm1 ;\n"
"por %%xmm8, %%xmm2 ;\n"
"por %%xmm9, %%xmm3 ;\n"
"por %%xmm10, %%xmm4 ;\n"
"por %%xmm11, %%xmm5 ;\n"
/* 7 */
"movq $7, %%rax ;\n"
"movd %%rax, %%xmm15 ;\n"
"pshufd $0x00, %%xmm15, %%xmm15 ;\n"
"pcmpeqd %%xmm14, %%xmm15 ;\n"
"movdqa 576(%1), %%xmm6 ;\n"
"movdqa 592(%1), %%xmm7 ;\n"
"movdqa 608(%1), %%xmm8 ;\n"
"movdqa 624(%1), %%xmm9 ;\n"
"movdqa 640(%1), %%xmm10 ;\n"
"movdqa 656(%1), %%xmm11 ;\n"
"pand %%xmm15, %%xmm6 ;\n"
"pand %%xmm15, %%xmm7 ;\n"
"pand %%xmm15, %%xmm8 ;\n"
"pand %%xmm15, %%xmm9 ;\n"
"pand %%xmm15, %%xmm10 ;\n"
"pand %%xmm15, %%xmm11 ;\n"
"por %%xmm6, %%xmm0 ;\n"
"por %%xmm7, %%xmm1 ;\n"
"por %%xmm8, %%xmm2 ;\n"
"por %%xmm9, %%xmm3 ;\n"
"por %%xmm10, %%xmm4 ;\n"
"por %%xmm11, %%xmm5 ;\n"
/* 8 */
"movq $8, %%rax ;\n"
"movd %%rax, %%xmm15 ;\n"
"pshufd $0x00, %%xmm15, %%xmm15 ;\n"
"pcmpeqd %%xmm14, %%xmm15 ;\n"
"movdqa 672(%1), %%xmm6 ;\n"
"movdqa 688(%1), %%xmm7 ;\n"
"movdqa 704(%1), %%xmm8 ;\n"
"movdqa 720(%1), %%xmm9 ;\n"
"movdqa 736(%1), %%xmm10 ;\n"
"movdqa 752(%1), %%xmm11 ;\n"
"pand %%xmm15, %%xmm6 ;\n"
"pand %%xmm15, %%xmm7 ;\n"
"pand %%xmm15, %%xmm8 ;\n"
"pand %%xmm15, %%xmm9 ;\n"
"pand %%xmm15, %%xmm10 ;\n"
"pand %%xmm15, %%xmm11 ;\n"
"por %%xmm6, %%xmm0 ;\n"
"por %%xmm7, %%xmm1 ;\n"
"por %%xmm8, %%xmm2 ;\n"
"por %%xmm9, %%xmm3 ;\n"
"por %%xmm10, %%xmm4 ;\n"
"por %%xmm11, %%xmm5 ;\n"
/* conditionally swap ysubx and xaddy */
"movq %3, %%rax ;\n"
"xorq $1, %%rax ;\n"
"movd %%rax, %%xmm14 ;\n"
"pxor %%xmm15, %%xmm15 ;\n"
"pshufd $0x00, %%xmm14, %%xmm14 ;\n"
"pxor %%xmm0, %%xmm2 ;\n"
"pxor %%xmm1, %%xmm3 ;\n"
"pcmpeqd %%xmm14, %%xmm15 ;\n"
"movdqa %%xmm2, %%xmm6 ;\n"
"movdqa %%xmm3, %%xmm7 ;\n"
"pand %%xmm15, %%xmm6 ;\n"
"pand %%xmm15, %%xmm7 ;\n"
"pxor %%xmm6, %%xmm0 ;\n"
"pxor %%xmm7, %%xmm1 ;\n"
"pxor %%xmm0, %%xmm2 ;\n"
"pxor %%xmm1, %%xmm3 ;\n"
/* store ysubx */
"xorq %%rax, %%rax ;\n"
"movd %%xmm0, %%rcx ;\n"
"movd %%xmm0, %%r8 ;\n"
"movd %%xmm1, %%rsi ;\n"
"pshufd $0xee, %%xmm0, %%xmm0 ;\n"
"pshufd $0xee, %%xmm1, %%xmm1 ;\n"
"movd %%xmm0, %%rdx ;\n"
"movd %%xmm1, %%rdi ;\n"
"shrdq $51, %%rdx, %%r8 ;\n"
"shrdq $38, %%rsi, %%rdx ;\n"
"shrdq $25, %%rdi, %%rsi ;\n"
"shrq $12, %%rdi ;\n"
"movq %%rcx, %%r9 ;\n"
"movq %%r8, %%r10 ;\n"
"movq %%rdx, %%r11 ;\n"
"movq %%rsi, %%r12 ;\n"
"movq %%rdi, %%r13 ;\n"
"shrq $26, %%r9 ;\n"
"shrq $26, %%r10 ;\n"
"shrq $26, %%r11 ;\n"
"shrq $26, %%r12 ;\n"
"shrq $26, %%r13 ;\n"
"andl $0x3ffffff, %%ecx ;\n"
"andl $0x1ffffff, %%r9d ;\n"
"andl $0x3ffffff, %%r8d ;\n"
"andl $0x1ffffff, %%r10d ;\n"
"andl $0x3ffffff, %%edx ;\n"
"andl $0x1ffffff, %%r11d ;\n"
"andl $0x3ffffff, %%esi ;\n"
"andl $0x1ffffff, %%r12d ;\n"
"andl $0x3ffffff, %%edi ;\n"
"andl $0x1ffffff, %%r13d ;\n"
"movl %%ecx, 0(%2) ;\n"
"movl %%r9d, 4(%2) ;\n"
"movl %%r8d, 8(%2) ;\n"
"movl %%r10d, 12(%2) ;\n"
"movl %%edx, 16(%2) ;\n"
"movl %%r11d, 20(%2) ;\n"
"movl %%esi, 24(%2) ;\n"
"movl %%r12d, 28(%2) ;\n"
"movl %%edi, 32(%2) ;\n"
"movl %%r13d, 36(%2) ;\n"
"movq %%rax, 40(%2) ;\n"
/* store xaddy */
"movd %%xmm2, %%rcx ;\n"
"movd %%xmm2, %%r8 ;\n"
"movd %%xmm3, %%rsi ;\n"
"pshufd $0xee, %%xmm2, %%xmm2 ;\n"
"pshufd $0xee, %%xmm3, %%xmm3 ;\n"
"movd %%xmm2, %%rdx ;\n"
"movd %%xmm3, %%rdi ;\n"
"shrdq $51, %%rdx, %%r8 ;\n"
"shrdq $38, %%rsi, %%rdx ;\n"
"shrdq $25, %%rdi, %%rsi ;\n"
"shrq $12, %%rdi ;\n"
"movq %%rcx, %%r9 ;\n"
"movq %%r8, %%r10 ;\n"
"movq %%rdx, %%r11 ;\n"
"movq %%rsi, %%r12 ;\n"
"movq %%rdi, %%r13 ;\n"
"shrq $26, %%r9 ;\n"
"shrq $26, %%r10 ;\n"
"shrq $26, %%r11 ;\n"
"shrq $26, %%r12 ;\n"
"shrq $26, %%r13 ;\n"
"andl $0x3ffffff, %%ecx ;\n"
"andl $0x1ffffff, %%r9d ;\n"
"andl $0x3ffffff, %%r8d ;\n"
"andl $0x1ffffff, %%r10d ;\n"
"andl $0x3ffffff, %%edx ;\n"
"andl $0x1ffffff, %%r11d ;\n"
"andl $0x3ffffff, %%esi ;\n"
"andl $0x1ffffff, %%r12d ;\n"
"andl $0x3ffffff, %%edi ;\n"
"andl $0x1ffffff, %%r13d ;\n"
"movl %%ecx, 48(%2) ;\n"
"movl %%r9d, 52(%2) ;\n"
"movl %%r8d, 56(%2) ;\n"
"movl %%r10d, 60(%2) ;\n"
"movl %%edx, 64(%2) ;\n"
"movl %%r11d, 68(%2) ;\n"
"movl %%esi, 72(%2) ;\n"
"movl %%r12d, 76(%2) ;\n"
"movl %%edi, 80(%2) ;\n"
"movl %%r13d, 84(%2) ;\n"
"movq %%rax, 88(%2) ;\n"
/* extract t2d */
"xorq %%rax, %%rax ;\n"
"movd %%xmm4, %%rcx ;\n"
"movd %%xmm4, %%r8 ;\n"
"movd %%xmm5, %%rsi ;\n"
"pshufd $0xee, %%xmm4, %%xmm4 ;\n"
"pshufd $0xee, %%xmm5, %%xmm5 ;\n"
"movd %%xmm4, %%rdx ;\n"
"movd %%xmm5, %%rdi ;\n"
"shrdq $51, %%rdx, %%r8 ;\n"
"shrdq $38, %%rsi, %%rdx ;\n"
"shrdq $25, %%rdi, %%rsi ;\n"
"shrq $12, %%rdi ;\n"
"movq %%rcx, %%r9 ;\n"
"movq %%r8, %%r10 ;\n"
"movq %%rdx, %%r11 ;\n"
"movq %%rsi, %%r12 ;\n"
"movq %%rdi, %%r13 ;\n"
"shrq $26, %%r9 ;\n"
"shrq $26, %%r10 ;\n"
"shrq $26, %%r11 ;\n"
"shrq $26, %%r12 ;\n"
"shrq $26, %%r13 ;\n"
"andl $0x3ffffff, %%ecx ;\n"
"andl $0x1ffffff, %%r9d ;\n"
"andl $0x3ffffff, %%r8d ;\n"
"andl $0x1ffffff, %%r10d ;\n"
"andl $0x3ffffff, %%edx ;\n"
"andl $0x1ffffff, %%r11d ;\n"
"andl $0x3ffffff, %%esi ;\n"
"andl $0x1ffffff, %%r12d ;\n"
"andl $0x3ffffff, %%edi ;\n"
"andl $0x1ffffff, %%r13d ;\n"
"movd %%ecx, %%xmm0 ;\n"
"movd %%r9d, %%xmm4 ;\n"
"movd %%r8d, %%xmm8 ;\n"
"movd %%r10d, %%xmm3 ;\n"
"movd %%edx, %%xmm1 ;\n"
"movd %%r11d, %%xmm5 ;\n"
"movd %%esi, %%xmm6 ;\n"
"movd %%r12d, %%xmm7 ;\n"
"movd %%edi, %%xmm2 ;\n"
"movd %%r13d, %%xmm9 ;\n"
"punpckldq %%xmm4, %%xmm0 ;\n"
"punpckldq %%xmm3, %%xmm8 ;\n"
"punpckldq %%xmm5, %%xmm1 ;\n"
"punpckldq %%xmm7, %%xmm6 ;\n"
"punpckldq %%xmm9, %%xmm2 ;\n"
"punpcklqdq %%xmm8, %%xmm0 ;\n"
"punpcklqdq %%xmm6, %%xmm1 ;\n"
/* set up 2p in to 3/4 */
"movl $0x7ffffda, %%ecx ;\n"
"movl $0x3fffffe, %%edx ;\n"
"movl $0x7fffffe, %%eax ;\n"
"movd %%ecx, %%xmm3 ;\n"
"movd %%edx, %%xmm5 ;\n"
"movd %%eax, %%xmm4 ;\n"
"punpckldq %%xmm5, %%xmm3 ;\n"
"punpckldq %%xmm5, %%xmm4 ;\n"
"punpcklqdq %%xmm4, %%xmm3 ;\n"
"movdqa %%xmm4, %%xmm5 ;\n"
"punpcklqdq %%xmm4, %%xmm4 ;\n"
/* subtract and conditionally move */
"movl %3, %%ecx ;\n"
"sub $1, %%ecx ;\n"
"movd %%ecx, %%xmm6 ;\n"
"pshufd $0x00, %%xmm6, %%xmm6 ;\n"
"movdqa %%xmm6, %%xmm7 ;\n"
"psubd %%xmm0, %%xmm3 ;\n"
"psubd %%xmm1, %%xmm4 ;\n"
"psubd %%xmm2, %%xmm5 ;\n"
"pand %%xmm6, %%xmm0 ;\n"
"pand %%xmm6, %%xmm1 ;\n"
"pand %%xmm6, %%xmm2 ;\n"
"pandn %%xmm3, %%xmm6 ;\n"
"movdqa %%xmm7, %%xmm3 ;\n"
"pandn %%xmm4, %%xmm7 ;\n"
"pandn %%xmm5, %%xmm3 ;\n"
"por %%xmm6, %%xmm0 ;\n"
"por %%xmm7, %%xmm1 ;\n"
"por %%xmm3, %%xmm2 ;\n"
/* store t2d */
"movdqa %%xmm0, 96(%2) ;\n"
"movdqa %%xmm1, 112(%2) ;\n"
"movdqa %%xmm2, 128(%2) ;\n"
:
: "m"(u), "r"(&table[pos * 8]), "r"(t), "m"(sign) /* %0 = u, %1 = table, %2 = t, %3 = sign */
:
"%rax", "%rcx", "%rdx", "%rdi", "%rsi", "%r8", "%r9", "%r10", "%r11", "%r12", "%r13",
"%xmm0", "%xmm1", "%xmm2", "%xmm3", "%xmm4", "%xmm5", "%xmm6", "%xmm7", "%xmm8", "%xmm9", "%xmm10", "%xmm11", "%xmm14", "%xmm14",
"cc", "memory"
);
}
#endif /* defined(ED25519_GCC_64BIT_SSE_CHOOSE) */

View File

@ -1,53 +0,0 @@
const ge25519 ge25519_basepoint = {
{0x00062d608f25d51a,0x000412a4b4f6592a,0x00075b7171a4b31d,0x0001ff60527118fe,0x000216936d3cd6e5},
{0x0006666666666658,0x0004cccccccccccc,0x0001999999999999,0x0003333333333333,0x0006666666666666},
{0x0000000000000001,0x0000000000000000,0x0000000000000000,0x0000000000000000,0x0000000000000000},
{0x00068ab3a5b7dda3,0x00000eea2a5eadbb,0x0002af8df483c27e,0x000332b375274732,0x00067875f0fd78b7}
};
const bignum25519 ge25519_ecd = {
0x00034dca135978a3,0x0001a8283b156ebd,0x0005e7a26001c029,0x000739c663a03cbb,0x00052036cee2b6ff
};
const bignum25519 ge25519_ec2d = {
0x00069b9426b2f159,0x00035050762add7a,0x0003cf44c0038052,0x0006738cc7407977,0x0002406d9dc56dff
};
const bignum25519 ge25519_sqrtneg1 = {
0x00061b274a0ea0b0,0x0000d5a5fc8f189d,0x0007ef5e9cbd0c60,0x00078595a6804c9e,0x0002b8324804fc1d
};
const ge25519_niels ge25519_niels_sliding_multiples[32] = {
{{0x00003905d740913e,0x0000ba2817d673a2,0x00023e2827f4e67c,0x000133d2e0c21a34,0x00044fd2f9298f81},{0x000493c6f58c3b85,0x0000df7181c325f7,0x0000f50b0b3e4cb7,0x0005329385a44c32,0x00007cf9d3a33d4b},{0x00011205877aaa68,0x000479955893d579,0x00050d66309b67a0,0x0002d42d0dbee5ee,0x0006f117b689f0c6}},
{{0x00011fe8a4fcd265,0x0007bcb8374faacc,0x00052f5af4ef4d4f,0x0005314098f98d10,0x0002ab91587555bd},{0x0005b0a84cee9730,0x00061d10c97155e4,0x0004059cc8096a10,0x00047a608da8014f,0x0007a164e1b9a80f},{0x0006933f0dd0d889,0x00044386bb4c4295,0x0003cb6d3162508c,0x00026368b872a2c6,0x0005a2826af12b9b}},
{{0x000182c3a447d6ba,0x00022964e536eff2,0x000192821f540053,0x0002f9f19e788e5c,0x000154a7e73eb1b5},{0x0002bc4408a5bb33,0x000078ebdda05442,0x0002ffb112354123,0x000375ee8df5862d,0x0002945ccf146e20},{0x0003dbf1812a8285,0x0000fa17ba3f9797,0x0006f69cb49c3820,0x00034d5a0db3858d,0x00043aabe696b3bb}},
{{0x00072c9aaa3221b1,0x000267774474f74d,0x000064b0e9b28085,0x0003f04ef53b27c9,0x0001d6edd5d2e531},{0x00025cd0944ea3bf,0x00075673b81a4d63,0x000150b925d1c0d4,0x00013f38d9294114,0x000461bea69283c9},{0x00036dc801b8b3a2,0x0000e0a7d4935e30,0x0001deb7cecc0d7d,0x000053a94e20dd2c,0x0007a9fbb1c6a0f9}},
{{0x0006217e039d8064,0x0006dea408337e6d,0x00057ac112628206,0x000647cb65e30473,0x00049c05a51fadc9},{0x0006678aa6a8632f,0x0005ea3788d8b365,0x00021bd6d6994279,0x0007ace75919e4e3,0x00034b9ed338add7},{0x0004e8bf9045af1b,0x000514e33a45e0d6,0x0007533c5b8bfe0f,0x000583557b7e14c9,0x00073c172021b008}},
{{0x00075b0249864348,0x00052ee11070262b,0x000237ae54fb5acd,0x0003bfd1d03aaab5,0x00018ab598029d5c},{0x000700848a802ade,0x0001e04605c4e5f7,0x0005c0d01b9767fb,0x0007d7889f42388b,0x0004275aae2546d8},{0x00032cc5fd6089e9,0x000426505c949b05,0x00046a18880c7ad2,0x0004a4221888ccda,0x0003dc65522b53df}},
{{0x0007013b327fbf93,0x0001336eeded6a0d,0x0002b565a2bbf3af,0x000253ce89591955,0x0000267882d17602},{0x0000c222a2007f6d,0x000356b79bdb77ee,0x00041ee81efe12ce,0x000120a9bd07097d,0x000234fd7eec346f},{0x0000a119732ea378,0x00063bf1ba8e2a6c,0x00069f94cc90df9a,0x000431d1779bfc48,0x000497ba6fdaa097}},
{{0x0003cd86468ccf0b,0x00048553221ac081,0x0006c9464b4e0a6e,0x00075fba84180403,0x00043b5cd4218d05},{0x0006cc0313cfeaa0,0x0001a313848da499,0x0007cb534219230a,0x00039596dedefd60,0x00061e22917f12de},{0x0002762f9bd0b516,0x0001c6e7fbddcbb3,0x00075909c3ace2bd,0x00042101972d3ec9,0x000511d61210ae4d}},
{{0x000386484420de87,0x0002d6b25db68102,0x000650b4962873c0,0x0004081cfd271394,0x00071a7fe6fe2482},{0x000676ef950e9d81,0x0001b81ae089f258,0x00063c4922951883,0x0002f1d54d9b3237,0x0006d325924ddb85},{0x000182b8a5c8c854,0x00073fcbe5406d8e,0x0005de3430cff451,0x000554b967ac8c41,0x0004746c4b6559ee}},
{{0x000546c864741147,0x0003a1df99092690,0x0001ca8cc9f4d6bb,0x00036b7fc9cd3b03,0x000219663497db5e},{0x00077b3c6dc69a2b,0x0004edf13ec2fa6e,0x0004e85ad77beac8,0x0007dba2b28e7bda,0x0005c9a51de34fe9},{0x0000f1cf79f10e67,0x00043ccb0a2b7ea2,0x00005089dfff776a,0x0001dd84e1d38b88,0x0004804503c60822}},
{{0x000021d23a36d175,0x0004fd3373c6476d,0x00020e291eeed02a,0x00062f2ecf2e7210,0x000771e098858de4},{0x00049ed02ca37fc7,0x000474c2b5957884,0x0005b8388e816683,0x0004b6c454b76be4,0x000553398a516506},{0x0002f5d278451edf,0x000730b133997342,0x0006965420eb6975,0x000308a3bfa516cf,0x0005a5ed1d68ff5a}},
{{0x0005e0c558527359,0x0003395b73afd75c,0x000072afa4e4b970,0x00062214329e0f6d,0x000019b60135fefd},{0x0005122afe150e83,0x0004afc966bb0232,0x0001c478833c8268,0x00017839c3fc148f,0x00044acb897d8bf9},{0x000068145e134b83,0x0001e4860982c3cc,0x000068fb5f13d799,0x0007c9283744547e,0x000150c49fde6ad2}},
{{0x0001863c9cdca868,0x0003770e295a1709,0x0000d85a3720fd13,0x0005e0ff1f71ab06,0x00078a6d7791e05f},{0x0003f29509471138,0x000729eeb4ca31cf,0x00069c22b575bfbc,0x0004910857bce212,0x0006b2b5a075bb99},{0x0007704b47a0b976,0x0002ae82e91aab17,0x00050bd6429806cd,0x00068055158fd8ea,0x000725c7ffc4ad55}},
{{0x00002bf71cd098c0,0x00049dabcc6cd230,0x00040a6533f905b2,0x000573efac2eb8a4,0x0004cd54625f855f},{0x00026715d1cf99b2,0x0002205441a69c88,0x000448427dcd4b54,0x0001d191e88abdc5,0x000794cc9277cb1f},{0x0006c426c2ac5053,0x0005a65ece4b095e,0x0000c44086f26bb6,0x0007429568197885,0x0007008357b6fcc8}},
{{0x00039fbb82584a34,0x00047a568f257a03,0x00014d88091ead91,0x0002145b18b1ce24,0x00013a92a3669d6d},{0x0000672738773f01,0x000752bf799f6171,0x0006b4a6dae33323,0x0007b54696ead1dc,0x00006ef7e9851ad0},{0x0003771cc0577de5,0x0003ca06bb8b9952,0x00000b81c5d50390,0x00043512340780ec,0x0003c296ddf8a2af}},
{{0x00034d2ebb1f2541,0x0000e815b723ff9d,0x000286b416e25443,0x0000bdfe38d1bee8,0x0000a892c7007477},{0x000515f9d914a713,0x00073191ff2255d5,0x00054f5cc2a4bdef,0x0003dd57fc118bcf,0x0007a99d393490c7},{0x0002ed2436bda3e8,0x00002afd00f291ea,0x0000be7381dea321,0x0003e952d4b2b193,0x000286762d28302f}},
{{0x00058e2bce2ef5bd,0x00068ce8f78c6f8a,0x0006ee26e39261b2,0x00033d0aa50bcf9d,0x0007686f2a3d6f17},{0x000036093ce35b25,0x0003b64d7552e9cf,0x00071ee0fe0b8460,0x00069d0660c969e5,0x00032f1da046a9d9},{0x000512a66d597c6a,0x0000609a70a57551,0x000026c08a3c464c,0x0004531fc8ee39e1,0x000561305f8a9ad2}},
{{0x0002cc28e7b0c0d5,0x00077b60eb8a6ce4,0x0004042985c277a6,0x000636657b46d3eb,0x000030a1aef2c57c},{0x0004978dec92aed1,0x000069adae7ca201,0x00011ee923290f55,0x00069641898d916c,0x00000aaec53e35d4},{0x0001f773003ad2aa,0x000005642cc10f76,0x00003b48f82cfca6,0x0002403c10ee4329,0x00020be9c1c24065}},
{{0x0000e44ae2025e60,0x0005f97b9727041c,0x0005683472c0ecec,0x000188882eb1ce7c,0x00069764c545067e},{0x000387d8249673a6,0x0005bea8dc927c2a,0x0005bd8ed5650ef0,0x0000ef0e3fcd40e1,0x000750ab3361f0ac},{0x00023283a2f81037,0x000477aff97e23d1,0x0000b8958dbcbb68,0x0000205b97e8add6,0x00054f96b3fb7075}},
{{0x0005afc616b11ecd,0x00039f4aec8f22ef,0x0003b39e1625d92e,0x0005f85bd4508873,0x00078e6839fbe85d},{0x0005f20429669279,0x00008fafae4941f5,0x00015d83c4eb7688,0x0001cf379eca4146,0x0003d7fe9c52bb75},{0x00032df737b8856b,0x0000608342f14e06,0x0003967889d74175,0x0001211907fba550,0x00070f268f350088}},
{{0x0004112070dcf355,0x0007dcff9c22e464,0x00054ada60e03325,0x00025cd98eef769a,0x000404e56c039b8c},{0x00064583b1805f47,0x00022c1baf832cd0,0x000132c01bd4d717,0x0004ecf4c3a75b8f,0x0007c0d345cfad88},{0x00071f4b8c78338a,0x00062cfc16bc2b23,0x00017cf51280d9aa,0x0003bbae5e20a95a,0x00020d754762aaec}},
{{0x0004feb135b9f543,0x00063bd192ad93ae,0x00044e2ea612cdf7,0x000670f4991583ab,0x00038b8ada8790b4},{0x0007c36fc73bb758,0x0004a6c797734bd1,0x0000ef248ab3950e,0x00063154c9a53ec8,0x0002b8f1e46f3cee},{0x00004a9cdf51f95d,0x0005d963fbd596b8,0x00022d9b68ace54a,0x0004a98e8836c599,0x000049aeb32ceba1}},
{{0x00067d3c63dcfe7e,0x000112f0adc81aee,0x00053df04c827165,0x0002fe5b33b430f0,0x00051c665e0c8d62},{0x00007d0b75fc7931,0x00016f4ce4ba754a,0x0005ace4c03fbe49,0x00027e0ec12a159c,0x000795ee17530f67},{0x00025b0a52ecbd81,0x0005dc0695fce4a9,0x0003b928c575047d,0x00023bf3512686e5,0x0006cd19bf49dc54}},
{{0x0007619052179ca3,0x0000c16593f0afd0,0x000265c4795c7428,0x00031c40515d5442,0x0007520f3db40b2e},{0x0006612165afc386,0x0001171aa36203ff,0x0002642ea820a8aa,0x0001f3bb7b313f10,0x0005e01b3a7429e4},{0x00050be3d39357a1,0x0003ab33d294a7b6,0x0004c479ba59edb3,0x0004c30d184d326f,0x00071092c9ccef3c}},
{{0x0000523f0364918c,0x000687f56d638a7b,0x00020796928ad013,0x0005d38405a54f33,0x0000ea15b03d0257},{0x0003d8ac74051dcf,0x00010ab6f543d0ad,0x0005d0f3ac0fda90,0x0005ef1d2573e5e4,0x0004173a5bb7137a},{0x00056e31f0f9218a,0x0005635f88e102f8,0x0002cbc5d969a5b8,0x000533fbc98b347a,0x0005fc565614a4e3}},
{{0x0006570dc46d7ae5,0x00018a9f1b91e26d,0x000436b6183f42ab,0x000550acaa4f8198,0x00062711c414c454},{0x0002e1e67790988e,0x0001e38b9ae44912,0x000648fbb4075654,0x00028df1d840cd72,0x0003214c7409d466},{0x0001827406651770,0x0004d144f286c265,0x00017488f0ee9281,0x00019e6cdb5c760c,0x0005bea94073ecb8}},
{{0x0005bf0912c89be4,0x00062fadcaf38c83,0x00025ec196b3ce2c,0x00077655ff4f017b,0x0003aacd5c148f61},{0x0000ce63f343d2f8,0x0001e0a87d1e368e,0x000045edbc019eea,0x0006979aed28d0d1,0x0004ad0785944f1b},{0x00063b34c3318301,0x0000e0e62d04d0b1,0x000676a233726701,0x00029e9a042d9769,0x0003aff0cb1d9028}},
{{0x0005c7eb3a20405e,0x0005fdb5aad930f8,0x0004a757e63b8c47,0x00028e9492972456,0x000110e7e86f4cd2},{0x0006430bf4c53505,0x000264c3e4507244,0x00074c9f19a39270,0x00073f84f799bc47,0x0002ccf9f732bd99},{0x0000d89ed603f5e4,0x00051e1604018af8,0x0000b8eedc4a2218,0x00051ba98b9384d0,0x00005c557e0b9693}},
{{0x0001ce311fc97e6f,0x0006023f3fb5db1f,0x0007b49775e8fc98,0x0003ad70adbf5045,0x0006e154c178fe98},{0x0006bbb089c20eb0,0x0006df41fb0b9eee,0x00051087ed87e16f,0x000102db5c9fa731,0x000289fef0841861},{0x00016336fed69abf,0x0004f066b929f9ec,0x0004e9ff9e6c5b93,0x00018c89bc4bb2ba,0x0006afbf642a95ca}},
{{0x0000de0c62f5d2c1,0x00049601cf734fb5,0x0006b5c38263f0f6,0x0004623ef5b56d06,0x0000db4b851b9503},{0x00055070f913a8cc,0x000765619eac2bbc,0x0003ab5225f47459,0x00076ced14ab5b48,0x00012c093cedb801},{0x00047f9308b8190f,0x000414235c621f82,0x00031f5ff41a5a76,0x0006736773aab96d,0x00033aa8799c6635}},
{{0x0007f51ebd085cf2,0x00012cfa67e3f5e1,0x0001800cf1e3d46a,0x00054337615ff0a8,0x000233c6f29e8e21},{0x0000f588fc156cb1,0x000363414da4f069,0x0007296ad9b68aea,0x0004d3711316ae43,0x000212cd0c1c8d58},{0x0004d5107f18c781,0x00064a4fd3a51a5e,0x0004f4cd0448bb37,0x000671d38543151e,0x0001db7778911914}},
{{0x000352397c6bc26f,0x00018a7aa0227bbe,0x0005e68cc1ea5f8b,0x0006fe3e3a7a1d5f,0x00031ad97ad26e2a},{0x00014769dd701ab6,0x00028339f1b4b667,0x0004ab214b8ae37b,0x00025f0aefa0b0fe,0x0007ae2ca8a017d2},{0x000017ed0920b962,0x000187e33b53b6fd,0x00055829907a1463,0x000641f248e0a792,0x0001ed1fc53a6622}}
};

View File

@ -1,435 +0,0 @@
#if defined(ED25519_GCC_64BIT_32BIT_CHOOSE)
#define HAVE_GE25519_SCALARMULT_BASE_CHOOSE_NIELS
DONNA_NOINLINE static void
ge25519_scalarmult_base_choose_niels(ge25519_niels *t, const uint8_t table[256][96], uint32_t pos, signed char b) {
int64_t breg = (int64_t)b;
uint64_t sign = (uint64_t)breg >> 63;
uint64_t mask = ~(sign - 1);
uint64_t u = (breg + mask) ^ mask;
__asm__ __volatile__ (
/* ysubx+xaddy+t2d */
"movq %0, %%rax ;\n"
"movd %%rax, %%xmm14 ;\n"
"pshufd $0x00, %%xmm14, %%xmm14 ;\n"
"pxor %%xmm0, %%xmm0 ;\n"
"pxor %%xmm1, %%xmm1 ;\n"
"pxor %%xmm2, %%xmm2 ;\n"
"pxor %%xmm3, %%xmm3 ;\n"
"pxor %%xmm4, %%xmm4 ;\n"
"pxor %%xmm5, %%xmm5 ;\n"
/* 0 */
"movq $0, %%rax ;\n"
"movd %%rax, %%xmm15 ;\n"
"pshufd $0x00, %%xmm15, %%xmm15 ;\n"
"pcmpeqd %%xmm14, %%xmm15 ;\n"
"movq $1, %%rax ;\n"
"movd %%rax, %%xmm6 ;\n"
"pxor %%xmm7, %%xmm7 ;\n"
"pand %%xmm15, %%xmm6 ;\n"
"pand %%xmm15, %%xmm7 ;\n"
"por %%xmm6, %%xmm0 ;\n"
"por %%xmm7, %%xmm1 ;\n"
"por %%xmm6, %%xmm2 ;\n"
"por %%xmm7, %%xmm3 ;\n"
/* 1 */
"movq $1, %%rax ;\n"
"movd %%rax, %%xmm15 ;\n"
"pshufd $0x00, %%xmm15, %%xmm15 ;\n"
"pcmpeqd %%xmm14, %%xmm15 ;\n"
"movdqa 0(%1), %%xmm6 ;\n"
"movdqa 16(%1), %%xmm7 ;\n"
"movdqa 32(%1), %%xmm8 ;\n"
"movdqa 48(%1), %%xmm9 ;\n"
"movdqa 64(%1), %%xmm10 ;\n"
"movdqa 80(%1), %%xmm11 ;\n"
"pand %%xmm15, %%xmm6 ;\n"
"pand %%xmm15, %%xmm7 ;\n"
"pand %%xmm15, %%xmm8 ;\n"
"pand %%xmm15, %%xmm9 ;\n"
"pand %%xmm15, %%xmm10 ;\n"
"pand %%xmm15, %%xmm11 ;\n"
"por %%xmm6, %%xmm0 ;\n"
"por %%xmm7, %%xmm1 ;\n"
"por %%xmm8, %%xmm2 ;\n"
"por %%xmm9, %%xmm3 ;\n"
"por %%xmm10, %%xmm4 ;\n"
"por %%xmm11, %%xmm5 ;\n"
/* 2 */
"movq $2, %%rax ;\n"
"movd %%rax, %%xmm15 ;\n"
"pshufd $0x00, %%xmm15, %%xmm15 ;\n"
"pcmpeqd %%xmm14, %%xmm15 ;\n"
"movdqa 96(%1), %%xmm6 ;\n"
"movdqa 112(%1), %%xmm7 ;\n"
"movdqa 128(%1), %%xmm8 ;\n"
"movdqa 144(%1), %%xmm9 ;\n"
"movdqa 160(%1), %%xmm10 ;\n"
"movdqa 176(%1), %%xmm11 ;\n"
"pand %%xmm15, %%xmm6 ;\n"
"pand %%xmm15, %%xmm7 ;\n"
"pand %%xmm15, %%xmm8 ;\n"
"pand %%xmm15, %%xmm9 ;\n"
"pand %%xmm15, %%xmm10 ;\n"
"pand %%xmm15, %%xmm11 ;\n"
"por %%xmm6, %%xmm0 ;\n"
"por %%xmm7, %%xmm1 ;\n"
"por %%xmm8, %%xmm2 ;\n"
"por %%xmm9, %%xmm3 ;\n"
"por %%xmm10, %%xmm4 ;\n"
"por %%xmm11, %%xmm5 ;\n"
/* 3 */
"movq $3, %%rax ;\n"
"movd %%rax, %%xmm15 ;\n"
"pshufd $0x00, %%xmm15, %%xmm15 ;\n"
"pcmpeqd %%xmm14, %%xmm15 ;\n"
"movdqa 192(%1), %%xmm6 ;\n"
"movdqa 208(%1), %%xmm7 ;\n"
"movdqa 224(%1), %%xmm8 ;\n"
"movdqa 240(%1), %%xmm9 ;\n"
"movdqa 256(%1), %%xmm10 ;\n"
"movdqa 272(%1), %%xmm11 ;\n"
"pand %%xmm15, %%xmm6 ;\n"
"pand %%xmm15, %%xmm7 ;\n"
"pand %%xmm15, %%xmm8 ;\n"
"pand %%xmm15, %%xmm9 ;\n"
"pand %%xmm15, %%xmm10 ;\n"
"pand %%xmm15, %%xmm11 ;\n"
"por %%xmm6, %%xmm0 ;\n"
"por %%xmm7, %%xmm1 ;\n"
"por %%xmm8, %%xmm2 ;\n"
"por %%xmm9, %%xmm3 ;\n"
"por %%xmm10, %%xmm4 ;\n"
"por %%xmm11, %%xmm5 ;\n"
/* 4 */
"movq $4, %%rax ;\n"
"movd %%rax, %%xmm15 ;\n"
"pshufd $0x00, %%xmm15, %%xmm15 ;\n"
"pcmpeqd %%xmm14, %%xmm15 ;\n"
"movdqa 288(%1), %%xmm6 ;\n"
"movdqa 304(%1), %%xmm7 ;\n"
"movdqa 320(%1), %%xmm8 ;\n"
"movdqa 336(%1), %%xmm9 ;\n"
"movdqa 352(%1), %%xmm10 ;\n"
"movdqa 368(%1), %%xmm11 ;\n"
"pand %%xmm15, %%xmm6 ;\n"
"pand %%xmm15, %%xmm7 ;\n"
"pand %%xmm15, %%xmm8 ;\n"
"pand %%xmm15, %%xmm9 ;\n"
"pand %%xmm15, %%xmm10 ;\n"
"pand %%xmm15, %%xmm11 ;\n"
"por %%xmm6, %%xmm0 ;\n"
"por %%xmm7, %%xmm1 ;\n"
"por %%xmm8, %%xmm2 ;\n"
"por %%xmm9, %%xmm3 ;\n"
"por %%xmm10, %%xmm4 ;\n"
"por %%xmm11, %%xmm5 ;\n"
/* 5 */
"movq $5, %%rax ;\n"
"movd %%rax, %%xmm15 ;\n"
"pshufd $0x00, %%xmm15, %%xmm15 ;\n"
"pcmpeqd %%xmm14, %%xmm15 ;\n"
"movdqa 384(%1), %%xmm6 ;\n"
"movdqa 400(%1), %%xmm7 ;\n"
"movdqa 416(%1), %%xmm8 ;\n"
"movdqa 432(%1), %%xmm9 ;\n"
"movdqa 448(%1), %%xmm10 ;\n"
"movdqa 464(%1), %%xmm11 ;\n"
"pand %%xmm15, %%xmm6 ;\n"
"pand %%xmm15, %%xmm7 ;\n"
"pand %%xmm15, %%xmm8 ;\n"
"pand %%xmm15, %%xmm9 ;\n"
"pand %%xmm15, %%xmm10 ;\n"
"pand %%xmm15, %%xmm11 ;\n"
"por %%xmm6, %%xmm0 ;\n"
"por %%xmm7, %%xmm1 ;\n"
"por %%xmm8, %%xmm2 ;\n"
"por %%xmm9, %%xmm3 ;\n"
"por %%xmm10, %%xmm4 ;\n"
"por %%xmm11, %%xmm5 ;\n"
/* 6 */
"movq $6, %%rax ;\n"
"movd %%rax, %%xmm15 ;\n"
"pshufd $0x00, %%xmm15, %%xmm15 ;\n"
"pcmpeqd %%xmm14, %%xmm15 ;\n"
"movdqa 480(%1), %%xmm6 ;\n"
"movdqa 496(%1), %%xmm7 ;\n"
"movdqa 512(%1), %%xmm8 ;\n"
"movdqa 528(%1), %%xmm9 ;\n"
"movdqa 544(%1), %%xmm10 ;\n"
"movdqa 560(%1), %%xmm11 ;\n"
"pand %%xmm15, %%xmm6 ;\n"
"pand %%xmm15, %%xmm7 ;\n"
"pand %%xmm15, %%xmm8 ;\n"
"pand %%xmm15, %%xmm9 ;\n"
"pand %%xmm15, %%xmm10 ;\n"
"pand %%xmm15, %%xmm11 ;\n"
"por %%xmm6, %%xmm0 ;\n"
"por %%xmm7, %%xmm1 ;\n"
"por %%xmm8, %%xmm2 ;\n"
"por %%xmm9, %%xmm3 ;\n"
"por %%xmm10, %%xmm4 ;\n"
"por %%xmm11, %%xmm5 ;\n"
/* 7 */
"movq $7, %%rax ;\n"
"movd %%rax, %%xmm15 ;\n"
"pshufd $0x00, %%xmm15, %%xmm15 ;\n"
"pcmpeqd %%xmm14, %%xmm15 ;\n"
"movdqa 576(%1), %%xmm6 ;\n"
"movdqa 592(%1), %%xmm7 ;\n"
"movdqa 608(%1), %%xmm8 ;\n"
"movdqa 624(%1), %%xmm9 ;\n"
"movdqa 640(%1), %%xmm10 ;\n"
"movdqa 656(%1), %%xmm11 ;\n"
"pand %%xmm15, %%xmm6 ;\n"
"pand %%xmm15, %%xmm7 ;\n"
"pand %%xmm15, %%xmm8 ;\n"
"pand %%xmm15, %%xmm9 ;\n"
"pand %%xmm15, %%xmm10 ;\n"
"pand %%xmm15, %%xmm11 ;\n"
"por %%xmm6, %%xmm0 ;\n"
"por %%xmm7, %%xmm1 ;\n"
"por %%xmm8, %%xmm2 ;\n"
"por %%xmm9, %%xmm3 ;\n"
"por %%xmm10, %%xmm4 ;\n"
"por %%xmm11, %%xmm5 ;\n"
/* 8 */
"movq $8, %%rax ;\n"
"movd %%rax, %%xmm15 ;\n"
"pshufd $0x00, %%xmm15, %%xmm15 ;\n"
"pcmpeqd %%xmm14, %%xmm15 ;\n"
"movdqa 672(%1), %%xmm6 ;\n"
"movdqa 688(%1), %%xmm7 ;\n"
"movdqa 704(%1), %%xmm8 ;\n"
"movdqa 720(%1), %%xmm9 ;\n"
"movdqa 736(%1), %%xmm10 ;\n"
"movdqa 752(%1), %%xmm11 ;\n"
"pand %%xmm15, %%xmm6 ;\n"
"pand %%xmm15, %%xmm7 ;\n"
"pand %%xmm15, %%xmm8 ;\n"
"pand %%xmm15, %%xmm9 ;\n"
"pand %%xmm15, %%xmm10 ;\n"
"pand %%xmm15, %%xmm11 ;\n"
"por %%xmm6, %%xmm0 ;\n"
"por %%xmm7, %%xmm1 ;\n"
"por %%xmm8, %%xmm2 ;\n"
"por %%xmm9, %%xmm3 ;\n"
"por %%xmm10, %%xmm4 ;\n"
"por %%xmm11, %%xmm5 ;\n"
/* conditionally swap ysubx and xaddy */
"movq %3, %%rax ;\n"
"xorq $1, %%rax ;\n"
"movd %%rax, %%xmm14 ;\n"
"pxor %%xmm15, %%xmm15 ;\n"
"pshufd $0x00, %%xmm14, %%xmm14 ;\n"
"pxor %%xmm0, %%xmm2 ;\n"
"pxor %%xmm1, %%xmm3 ;\n"
"pcmpeqd %%xmm14, %%xmm15 ;\n"
"movdqa %%xmm2, %%xmm6 ;\n"
"movdqa %%xmm3, %%xmm7 ;\n"
"pand %%xmm15, %%xmm6 ;\n"
"pand %%xmm15, %%xmm7 ;\n"
"pxor %%xmm6, %%xmm0 ;\n"
"pxor %%xmm7, %%xmm1 ;\n"
"pxor %%xmm0, %%xmm2 ;\n"
"pxor %%xmm1, %%xmm3 ;\n"
/* store ysubx */
"xorq %%rax, %%rax ;\n"
"movd %%xmm0, %%rcx ;\n"
"movd %%xmm0, %%r8 ;\n"
"movd %%xmm1, %%rsi ;\n"
"pshufd $0xee, %%xmm0, %%xmm0 ;\n"
"pshufd $0xee, %%xmm1, %%xmm1 ;\n"
"movd %%xmm0, %%rdx ;\n"
"movd %%xmm1, %%rdi ;\n"
"shrdq $51, %%rdx, %%r8 ;\n"
"shrdq $38, %%rsi, %%rdx ;\n"
"shrdq $25, %%rdi, %%rsi ;\n"
"shrq $12, %%rdi ;\n"
"movq %%rcx, %%r9 ;\n"
"movq %%r8, %%r10 ;\n"
"movq %%rdx, %%r11 ;\n"
"movq %%rsi, %%r12 ;\n"
"movq %%rdi, %%r13 ;\n"
"shrq $26, %%r9 ;\n"
"shrq $26, %%r10 ;\n"
"shrq $26, %%r11 ;\n"
"shrq $26, %%r12 ;\n"
"shrq $26, %%r13 ;\n"
"andl $0x3ffffff, %%ecx ;\n"
"andl $0x1ffffff, %%r9d ;\n"
"andl $0x3ffffff, %%r8d ;\n"
"andl $0x1ffffff, %%r10d ;\n"
"andl $0x3ffffff, %%edx ;\n"
"andl $0x1ffffff, %%r11d ;\n"
"andl $0x3ffffff, %%esi ;\n"
"andl $0x1ffffff, %%r12d ;\n"
"andl $0x3ffffff, %%edi ;\n"
"andl $0x1ffffff, %%r13d ;\n"
"movl %%ecx, 0(%2) ;\n"
"movl %%r9d, 4(%2) ;\n"
"movl %%r8d, 8(%2) ;\n"
"movl %%r10d, 12(%2) ;\n"
"movl %%edx, 16(%2) ;\n"
"movl %%r11d, 20(%2) ;\n"
"movl %%esi, 24(%2) ;\n"
"movl %%r12d, 28(%2) ;\n"
"movl %%edi, 32(%2) ;\n"
"movl %%r13d, 36(%2) ;\n"
/* store xaddy */
"movd %%xmm2, %%rcx ;\n"
"movd %%xmm2, %%r8 ;\n"
"movd %%xmm3, %%rsi ;\n"
"pshufd $0xee, %%xmm2, %%xmm2 ;\n"
"pshufd $0xee, %%xmm3, %%xmm3 ;\n"
"movd %%xmm2, %%rdx ;\n"
"movd %%xmm3, %%rdi ;\n"
"shrdq $51, %%rdx, %%r8 ;\n"
"shrdq $38, %%rsi, %%rdx ;\n"
"shrdq $25, %%rdi, %%rsi ;\n"
"shrq $12, %%rdi ;\n"
"movq %%rcx, %%r9 ;\n"
"movq %%r8, %%r10 ;\n"
"movq %%rdx, %%r11 ;\n"
"movq %%rsi, %%r12 ;\n"
"movq %%rdi, %%r13 ;\n"
"shrq $26, %%r9 ;\n"
"shrq $26, %%r10 ;\n"
"shrq $26, %%r11 ;\n"
"shrq $26, %%r12 ;\n"
"shrq $26, %%r13 ;\n"
"andl $0x3ffffff, %%ecx ;\n"
"andl $0x1ffffff, %%r9d ;\n"
"andl $0x3ffffff, %%r8d ;\n"
"andl $0x1ffffff, %%r10d ;\n"
"andl $0x3ffffff, %%edx ;\n"
"andl $0x1ffffff, %%r11d ;\n"
"andl $0x3ffffff, %%esi ;\n"
"andl $0x1ffffff, %%r12d ;\n"
"andl $0x3ffffff, %%edi ;\n"
"andl $0x1ffffff, %%r13d ;\n"
"movl %%ecx, 40(%2) ;\n"
"movl %%r9d, 44(%2) ;\n"
"movl %%r8d, 48(%2) ;\n"
"movl %%r10d, 52(%2) ;\n"
"movl %%edx, 56(%2) ;\n"
"movl %%r11d, 60(%2) ;\n"
"movl %%esi, 64(%2) ;\n"
"movl %%r12d, 68(%2) ;\n"
"movl %%edi, 72(%2) ;\n"
"movl %%r13d, 76(%2) ;\n"
/* extract t2d */
"xorq %%rax, %%rax ;\n"
"movd %%xmm4, %%rcx ;\n"
"movd %%xmm4, %%r8 ;\n"
"movd %%xmm5, %%rsi ;\n"
"pshufd $0xee, %%xmm4, %%xmm4 ;\n"
"pshufd $0xee, %%xmm5, %%xmm5 ;\n"
"movd %%xmm4, %%rdx ;\n"
"movd %%xmm5, %%rdi ;\n"
"shrdq $51, %%rdx, %%r8 ;\n"
"shrdq $38, %%rsi, %%rdx ;\n"
"shrdq $25, %%rdi, %%rsi ;\n"
"shrq $12, %%rdi ;\n"
"movq %%rcx, %%r9 ;\n"
"movq %%r8, %%r10 ;\n"
"movq %%rdx, %%r11 ;\n"
"movq %%rsi, %%r12 ;\n"
"movq %%rdi, %%r13 ;\n"
"shrq $26, %%r9 ;\n"
"shrq $26, %%r10 ;\n"
"shrq $26, %%r11 ;\n"
"shrq $26, %%r12 ;\n"
"shrq $26, %%r13 ;\n"
"andl $0x3ffffff, %%ecx ;\n"
"andl $0x1ffffff, %%r9d ;\n"
"andl $0x3ffffff, %%r8d ;\n"
"andl $0x1ffffff, %%r10d ;\n"
"andl $0x3ffffff, %%edx ;\n"
"andl $0x1ffffff, %%r11d ;\n"
"andl $0x3ffffff, %%esi ;\n"
"andl $0x1ffffff, %%r12d ;\n"
"andl $0x3ffffff, %%edi ;\n"
"andl $0x1ffffff, %%r13d ;\n"
"movd %%ecx, %%xmm0 ;\n"
"movd %%r9d, %%xmm4 ;\n"
"movd %%r8d, %%xmm8 ;\n"
"movd %%r10d, %%xmm3 ;\n"
"movd %%edx, %%xmm1 ;\n"
"movd %%r11d, %%xmm5 ;\n"
"movd %%esi, %%xmm6 ;\n"
"movd %%r12d, %%xmm7 ;\n"
"movd %%edi, %%xmm2 ;\n"
"movd %%r13d, %%xmm9 ;\n"
"punpckldq %%xmm4, %%xmm0 ;\n"
"punpckldq %%xmm3, %%xmm8 ;\n"
"punpckldq %%xmm5, %%xmm1 ;\n"
"punpckldq %%xmm7, %%xmm6 ;\n"
"punpckldq %%xmm9, %%xmm2 ;\n"
"punpcklqdq %%xmm8, %%xmm0 ;\n"
"punpcklqdq %%xmm6, %%xmm1 ;\n"
/* set up 2p in to 3/4 */
"movl $0x7ffffda, %%ecx ;\n"
"movl $0x3fffffe, %%edx ;\n"
"movl $0x7fffffe, %%eax ;\n"
"movd %%ecx, %%xmm3 ;\n"
"movd %%edx, %%xmm5 ;\n"
"movd %%eax, %%xmm4 ;\n"
"punpckldq %%xmm5, %%xmm3 ;\n"
"punpckldq %%xmm5, %%xmm4 ;\n"
"punpcklqdq %%xmm4, %%xmm3 ;\n"
"movdqa %%xmm4, %%xmm5 ;\n"
"punpcklqdq %%xmm4, %%xmm4 ;\n"
/* subtract and conditionally move */
"movl %3, %%ecx ;\n"
"sub $1, %%ecx ;\n"
"movd %%ecx, %%xmm6 ;\n"
"pshufd $0x00, %%xmm6, %%xmm6 ;\n"
"movdqa %%xmm6, %%xmm7 ;\n"
"psubd %%xmm0, %%xmm3 ;\n"
"psubd %%xmm1, %%xmm4 ;\n"
"psubd %%xmm2, %%xmm5 ;\n"
"pand %%xmm6, %%xmm0 ;\n"
"pand %%xmm6, %%xmm1 ;\n"
"pand %%xmm6, %%xmm2 ;\n"
"pandn %%xmm3, %%xmm6 ;\n"
"movdqa %%xmm7, %%xmm3 ;\n"
"pandn %%xmm4, %%xmm7 ;\n"
"pandn %%xmm5, %%xmm3 ;\n"
"por %%xmm6, %%xmm0 ;\n"
"por %%xmm7, %%xmm1 ;\n"
"por %%xmm3, %%xmm2 ;\n"
/* store t2d */
"movdqa %%xmm0, 80(%2) ;\n"
"movdqa %%xmm1, 96(%2) ;\n"
"movd %%xmm2, %%rax ;\n"
"movq %%rax, 112(%2) ;\n"
:
: "m"(u), "r"(&table[pos * 8]), "r"(t), "m"(sign) /* %0 = u, %1 = table, %2 = t, %3 = sign */
:
"%rax", "%rcx", "%rdx", "%rdi", "%rsi", "%r8", "%r9", "%r10", "%r11", "%r12", "%r13",
"%xmm0", "%xmm1", "%xmm2", "%xmm3", "%xmm4", "%xmm5", "%xmm6", "%xmm7", "%xmm8", "%xmm9", "%xmm10", "%xmm11", "%xmm14", "%xmm14",
"cc", "memory"
);
}
#endif /* defined(ED25519_GCC_64BIT_32BIT_CHOOSE) */

View File

@ -1,351 +0,0 @@
#if defined(ED25519_GCC_64BIT_X86_CHOOSE)
#define HAVE_GE25519_SCALARMULT_BASE_CHOOSE_NIELS
DONNA_NOINLINE static void
ge25519_scalarmult_base_choose_niels(ge25519_niels *t, const uint8_t table[256][96], uint32_t pos, signed char b) {
int64_t breg = (int64_t)b;
uint64_t sign = (uint64_t)breg >> 63;
uint64_t mask = ~(sign - 1);
uint64_t u = (breg + mask) ^ mask;
__asm__ __volatile__ (
/* ysubx+xaddy+t2d */
"movq %0, %%rax ;\n"
"movd %%rax, %%xmm14 ;\n"
"pshufd $0x00, %%xmm14, %%xmm14 ;\n"
"pxor %%xmm0, %%xmm0 ;\n"
"pxor %%xmm1, %%xmm1 ;\n"
"pxor %%xmm2, %%xmm2 ;\n"
"pxor %%xmm3, %%xmm3 ;\n"
"pxor %%xmm4, %%xmm4 ;\n"
"pxor %%xmm5, %%xmm5 ;\n"
/* 0 */
"movq $0, %%rax ;\n"
"movd %%rax, %%xmm15 ;\n"
"pshufd $0x00, %%xmm15, %%xmm15 ;\n"
"pcmpeqd %%xmm14, %%xmm15 ;\n"
"movq $1, %%rax ;\n"
"movd %%rax, %%xmm6 ;\n"
"pxor %%xmm7, %%xmm7 ;\n"
"pand %%xmm15, %%xmm6 ;\n"
"pand %%xmm15, %%xmm7 ;\n"
"por %%xmm6, %%xmm0 ;\n"
"por %%xmm7, %%xmm1 ;\n"
"por %%xmm6, %%xmm2 ;\n"
"por %%xmm7, %%xmm3 ;\n"
/* 1 */
"movq $1, %%rax ;\n"
"movd %%rax, %%xmm15 ;\n"
"pshufd $0x00, %%xmm15, %%xmm15 ;\n"
"pcmpeqd %%xmm14, %%xmm15 ;\n"
"movdqa 0(%1), %%xmm6 ;\n"
"movdqa 16(%1), %%xmm7 ;\n"
"movdqa 32(%1), %%xmm8 ;\n"
"movdqa 48(%1), %%xmm9 ;\n"
"movdqa 64(%1), %%xmm10 ;\n"
"movdqa 80(%1), %%xmm11 ;\n"
"pand %%xmm15, %%xmm6 ;\n"
"pand %%xmm15, %%xmm7 ;\n"
"pand %%xmm15, %%xmm8 ;\n"
"pand %%xmm15, %%xmm9 ;\n"
"pand %%xmm15, %%xmm10 ;\n"
"pand %%xmm15, %%xmm11 ;\n"
"por %%xmm6, %%xmm0 ;\n"
"por %%xmm7, %%xmm1 ;\n"
"por %%xmm8, %%xmm2 ;\n"
"por %%xmm9, %%xmm3 ;\n"
"por %%xmm10, %%xmm4 ;\n"
"por %%xmm11, %%xmm5 ;\n"
/* 2 */
"movq $2, %%rax ;\n"
"movd %%rax, %%xmm15 ;\n"
"pshufd $0x00, %%xmm15, %%xmm15 ;\n"
"pcmpeqd %%xmm14, %%xmm15 ;\n"
"movdqa 96(%1), %%xmm6 ;\n"
"movdqa 112(%1), %%xmm7 ;\n"
"movdqa 128(%1), %%xmm8 ;\n"
"movdqa 144(%1), %%xmm9 ;\n"
"movdqa 160(%1), %%xmm10 ;\n"
"movdqa 176(%1), %%xmm11 ;\n"
"pand %%xmm15, %%xmm6 ;\n"
"pand %%xmm15, %%xmm7 ;\n"
"pand %%xmm15, %%xmm8 ;\n"
"pand %%xmm15, %%xmm9 ;\n"
"pand %%xmm15, %%xmm10 ;\n"
"pand %%xmm15, %%xmm11 ;\n"
"por %%xmm6, %%xmm0 ;\n"
"por %%xmm7, %%xmm1 ;\n"
"por %%xmm8, %%xmm2 ;\n"
"por %%xmm9, %%xmm3 ;\n"
"por %%xmm10, %%xmm4 ;\n"
"por %%xmm11, %%xmm5 ;\n"
/* 3 */
"movq $3, %%rax ;\n"
"movd %%rax, %%xmm15 ;\n"
"pshufd $0x00, %%xmm15, %%xmm15 ;\n"
"pcmpeqd %%xmm14, %%xmm15 ;\n"
"movdqa 192(%1), %%xmm6 ;\n"
"movdqa 208(%1), %%xmm7 ;\n"
"movdqa 224(%1), %%xmm8 ;\n"
"movdqa 240(%1), %%xmm9 ;\n"
"movdqa 256(%1), %%xmm10 ;\n"
"movdqa 272(%1), %%xmm11 ;\n"
"pand %%xmm15, %%xmm6 ;\n"
"pand %%xmm15, %%xmm7 ;\n"
"pand %%xmm15, %%xmm8 ;\n"
"pand %%xmm15, %%xmm9 ;\n"
"pand %%xmm15, %%xmm10 ;\n"
"pand %%xmm15, %%xmm11 ;\n"
"por %%xmm6, %%xmm0 ;\n"
"por %%xmm7, %%xmm1 ;\n"
"por %%xmm8, %%xmm2 ;\n"
"por %%xmm9, %%xmm3 ;\n"
"por %%xmm10, %%xmm4 ;\n"
"por %%xmm11, %%xmm5 ;\n"
/* 4 */
"movq $4, %%rax ;\n"
"movd %%rax, %%xmm15 ;\n"
"pshufd $0x00, %%xmm15, %%xmm15 ;\n"
"pcmpeqd %%xmm14, %%xmm15 ;\n"
"movdqa 288(%1), %%xmm6 ;\n"
"movdqa 304(%1), %%xmm7 ;\n"
"movdqa 320(%1), %%xmm8 ;\n"
"movdqa 336(%1), %%xmm9 ;\n"
"movdqa 352(%1), %%xmm10 ;\n"
"movdqa 368(%1), %%xmm11 ;\n"
"pand %%xmm15, %%xmm6 ;\n"
"pand %%xmm15, %%xmm7 ;\n"
"pand %%xmm15, %%xmm8 ;\n"
"pand %%xmm15, %%xmm9 ;\n"
"pand %%xmm15, %%xmm10 ;\n"
"pand %%xmm15, %%xmm11 ;\n"
"por %%xmm6, %%xmm0 ;\n"
"por %%xmm7, %%xmm1 ;\n"
"por %%xmm8, %%xmm2 ;\n"
"por %%xmm9, %%xmm3 ;\n"
"por %%xmm10, %%xmm4 ;\n"
"por %%xmm11, %%xmm5 ;\n"
/* 5 */
"movq $5, %%rax ;\n"
"movd %%rax, %%xmm15 ;\n"
"pshufd $0x00, %%xmm15, %%xmm15 ;\n"
"pcmpeqd %%xmm14, %%xmm15 ;\n"
"movdqa 384(%1), %%xmm6 ;\n"
"movdqa 400(%1), %%xmm7 ;\n"
"movdqa 416(%1), %%xmm8 ;\n"
"movdqa 432(%1), %%xmm9 ;\n"
"movdqa 448(%1), %%xmm10 ;\n"
"movdqa 464(%1), %%xmm11 ;\n"
"pand %%xmm15, %%xmm6 ;\n"
"pand %%xmm15, %%xmm7 ;\n"
"pand %%xmm15, %%xmm8 ;\n"
"pand %%xmm15, %%xmm9 ;\n"
"pand %%xmm15, %%xmm10 ;\n"
"pand %%xmm15, %%xmm11 ;\n"
"por %%xmm6, %%xmm0 ;\n"
"por %%xmm7, %%xmm1 ;\n"
"por %%xmm8, %%xmm2 ;\n"
"por %%xmm9, %%xmm3 ;\n"
"por %%xmm10, %%xmm4 ;\n"
"por %%xmm11, %%xmm5 ;\n"
/* 6 */
"movq $6, %%rax ;\n"
"movd %%rax, %%xmm15 ;\n"
"pshufd $0x00, %%xmm15, %%xmm15 ;\n"
"pcmpeqd %%xmm14, %%xmm15 ;\n"
"movdqa 480(%1), %%xmm6 ;\n"
"movdqa 496(%1), %%xmm7 ;\n"
"movdqa 512(%1), %%xmm8 ;\n"
"movdqa 528(%1), %%xmm9 ;\n"
"movdqa 544(%1), %%xmm10 ;\n"
"movdqa 560(%1), %%xmm11 ;\n"
"pand %%xmm15, %%xmm6 ;\n"
"pand %%xmm15, %%xmm7 ;\n"
"pand %%xmm15, %%xmm8 ;\n"
"pand %%xmm15, %%xmm9 ;\n"
"pand %%xmm15, %%xmm10 ;\n"
"pand %%xmm15, %%xmm11 ;\n"
"por %%xmm6, %%xmm0 ;\n"
"por %%xmm7, %%xmm1 ;\n"
"por %%xmm8, %%xmm2 ;\n"
"por %%xmm9, %%xmm3 ;\n"
"por %%xmm10, %%xmm4 ;\n"
"por %%xmm11, %%xmm5 ;\n"
/* 7 */
"movq $7, %%rax ;\n"
"movd %%rax, %%xmm15 ;\n"
"pshufd $0x00, %%xmm15, %%xmm15 ;\n"
"pcmpeqd %%xmm14, %%xmm15 ;\n"
"movdqa 576(%1), %%xmm6 ;\n"
"movdqa 592(%1), %%xmm7 ;\n"
"movdqa 608(%1), %%xmm8 ;\n"
"movdqa 624(%1), %%xmm9 ;\n"
"movdqa 640(%1), %%xmm10 ;\n"
"movdqa 656(%1), %%xmm11 ;\n"
"pand %%xmm15, %%xmm6 ;\n"
"pand %%xmm15, %%xmm7 ;\n"
"pand %%xmm15, %%xmm8 ;\n"
"pand %%xmm15, %%xmm9 ;\n"
"pand %%xmm15, %%xmm10 ;\n"
"pand %%xmm15, %%xmm11 ;\n"
"por %%xmm6, %%xmm0 ;\n"
"por %%xmm7, %%xmm1 ;\n"
"por %%xmm8, %%xmm2 ;\n"
"por %%xmm9, %%xmm3 ;\n"
"por %%xmm10, %%xmm4 ;\n"
"por %%xmm11, %%xmm5 ;\n"
/* 8 */
"movq $8, %%rax ;\n"
"movd %%rax, %%xmm15 ;\n"
"pshufd $0x00, %%xmm15, %%xmm15 ;\n"
"pcmpeqd %%xmm14, %%xmm15 ;\n"
"movdqa 672(%1), %%xmm6 ;\n"
"movdqa 688(%1), %%xmm7 ;\n"
"movdqa 704(%1), %%xmm8 ;\n"
"movdqa 720(%1), %%xmm9 ;\n"
"movdqa 736(%1), %%xmm10 ;\n"
"movdqa 752(%1), %%xmm11 ;\n"
"pand %%xmm15, %%xmm6 ;\n"
"pand %%xmm15, %%xmm7 ;\n"
"pand %%xmm15, %%xmm8 ;\n"
"pand %%xmm15, %%xmm9 ;\n"
"pand %%xmm15, %%xmm10 ;\n"
"pand %%xmm15, %%xmm11 ;\n"
"por %%xmm6, %%xmm0 ;\n"
"por %%xmm7, %%xmm1 ;\n"
"por %%xmm8, %%xmm2 ;\n"
"por %%xmm9, %%xmm3 ;\n"
"por %%xmm10, %%xmm4 ;\n"
"por %%xmm11, %%xmm5 ;\n"
/* conditionally swap ysubx and xaddy */
"movq %3, %%rax ;\n"
"xorq $1, %%rax ;\n"
"movd %%rax, %%xmm14 ;\n"
"pxor %%xmm15, %%xmm15 ;\n"
"pshufd $0x00, %%xmm14, %%xmm14 ;\n"
"pxor %%xmm0, %%xmm2 ;\n"
"pxor %%xmm1, %%xmm3 ;\n"
"pcmpeqd %%xmm14, %%xmm15 ;\n"
"movdqa %%xmm2, %%xmm6 ;\n"
"movdqa %%xmm3, %%xmm7 ;\n"
"pand %%xmm15, %%xmm6 ;\n"
"pand %%xmm15, %%xmm7 ;\n"
"pxor %%xmm6, %%xmm0 ;\n"
"pxor %%xmm7, %%xmm1 ;\n"
"pxor %%xmm0, %%xmm2 ;\n"
"pxor %%xmm1, %%xmm3 ;\n"
/* store ysubx */
"movq $0x7ffffffffffff, %%rax ;\n"
"movd %%xmm0, %%rcx ;\n"
"movd %%xmm0, %%r8 ;\n"
"movd %%xmm1, %%rsi ;\n"
"pshufd $0xee, %%xmm0, %%xmm0 ;\n"
"pshufd $0xee, %%xmm1, %%xmm1 ;\n"
"movd %%xmm0, %%rdx ;\n"
"movd %%xmm1, %%rdi ;\n"
"shrdq $51, %%rdx, %%r8 ;\n"
"shrdq $38, %%rsi, %%rdx ;\n"
"shrdq $25, %%rdi, %%rsi ;\n"
"shrq $12, %%rdi ;\n"
"andq %%rax, %%rcx ;\n"
"andq %%rax, %%r8 ;\n"
"andq %%rax, %%rdx ;\n"
"andq %%rax, %%rsi ;\n"
"andq %%rax, %%rdi ;\n"
"movq %%rcx, 0(%2) ;\n"
"movq %%r8, 8(%2) ;\n"
"movq %%rdx, 16(%2) ;\n"
"movq %%rsi, 24(%2) ;\n"
"movq %%rdi, 32(%2) ;\n"
/* store xaddy */
"movq $0x7ffffffffffff, %%rax ;\n"
"movd %%xmm2, %%rcx ;\n"
"movd %%xmm2, %%r8 ;\n"
"movd %%xmm3, %%rsi ;\n"
"pshufd $0xee, %%xmm2, %%xmm2 ;\n"
"pshufd $0xee, %%xmm3, %%xmm3 ;\n"
"movd %%xmm2, %%rdx ;\n"
"movd %%xmm3, %%rdi ;\n"
"shrdq $51, %%rdx, %%r8 ;\n"
"shrdq $38, %%rsi, %%rdx ;\n"
"shrdq $25, %%rdi, %%rsi ;\n"
"shrq $12, %%rdi ;\n"
"andq %%rax, %%rcx ;\n"
"andq %%rax, %%r8 ;\n"
"andq %%rax, %%rdx ;\n"
"andq %%rax, %%rsi ;\n"
"andq %%rax, %%rdi ;\n"
"movq %%rcx, 40(%2) ;\n"
"movq %%r8, 48(%2) ;\n"
"movq %%rdx, 56(%2) ;\n"
"movq %%rsi, 64(%2) ;\n"
"movq %%rdi, 72(%2) ;\n"
/* extract t2d */
"movq $0x7ffffffffffff, %%rax ;\n"
"movd %%xmm4, %%rcx ;\n"
"movd %%xmm4, %%r8 ;\n"
"movd %%xmm5, %%rsi ;\n"
"pshufd $0xee, %%xmm4, %%xmm4 ;\n"
"pshufd $0xee, %%xmm5, %%xmm5 ;\n"
"movd %%xmm4, %%rdx ;\n"
"movd %%xmm5, %%rdi ;\n"
"shrdq $51, %%rdx, %%r8 ;\n"
"shrdq $38, %%rsi, %%rdx ;\n"
"shrdq $25, %%rdi, %%rsi ;\n"
"shrq $12, %%rdi ;\n"
"andq %%rax, %%rcx ;\n"
"andq %%rax, %%r8 ;\n"
"andq %%rax, %%rdx ;\n"
"andq %%rax, %%rsi ;\n"
"andq %%rax, %%rdi ;\n"
/* conditionally negate t2d */
"movq %3, %%rax ;\n"
"movq $0xfffffffffffda, %%r9 ;\n"
"movq $0xffffffffffffe, %%r10 ;\n"
"movq %%r10, %%r11 ;\n"
"movq %%r10, %%r12 ;\n"
"movq %%r10, %%r13 ;\n"
"subq %%rcx, %%r9 ;\n"
"subq %%r8, %%r10 ;\n"
"subq %%rdx, %%r11 ;\n"
"subq %%rsi, %%r12 ;\n"
"subq %%rdi, %%r13 ;\n"
"cmpq $1, %%rax ;\n"
"cmove %%r9, %%rcx ;\n"
"cmove %%r10, %%r8 ;\n"
"cmove %%r11, %%rdx ;\n"
"cmove %%r12, %%rsi ;\n"
"cmove %%r13, %%rdi ;\n"
/* store t2d */
"movq %%rcx, 80(%2) ;\n"
"movq %%r8, 88(%2) ;\n"
"movq %%rdx, 96(%2) ;\n"
"movq %%rsi, 104(%2) ;\n"
"movq %%rdi, 112(%2) ;\n"
:
: "m"(u), "r"(&table[pos * 8]), "r"(t), "m"(sign) /* %0 = u, %1 = table, %2 = t, %3 = sign */
:
"%rax", "%rcx", "%rdx", "%rdi", "%rsi", "%r8", "%r9", "%r10", "%r11", "%r12", "%r13",
"%xmm0", "%xmm1", "%xmm2", "%xmm3", "%xmm4", "%xmm5", "%xmm6", "%xmm7", "%xmm8", "%xmm9", "%xmm10", "%xmm11", "%xmm14", "%xmm14",
"cc", "memory"
);
}
#endif /* defined(ED25519_GCC_64BIT_X86_CHOOSE) */

View File

@ -1,275 +0,0 @@
/*
Ed25519 batch verification
*/
#define max_batch_size 64
#define heap_batch_size ((max_batch_size * 2) + 1)
/* which limb is the 128th bit in? */
static const size_t limb128bits = (128 + bignum256modm_bits_per_limb - 1) / bignum256modm_bits_per_limb;
typedef size_t heap_index_t;
typedef struct batch_heap_t {
unsigned char r[heap_batch_size][16]; /* 128 bit random values */
ge25519 points[heap_batch_size];
bignum256modm scalars[heap_batch_size];
heap_index_t heap[heap_batch_size];
size_t size;
} batch_heap;
/* swap two values in the heap */
static void
heap_swap(heap_index_t *heap, size_t a, size_t b) {
heap_index_t temp;
temp = heap[a];
heap[a] = heap[b];
heap[b] = temp;
}
/* add the scalar at the end of the list to the heap */
static void
heap_insert_next(batch_heap *heap) {
size_t node = heap->size, parent;
heap_index_t *pheap = heap->heap;
bignum256modm *scalars = heap->scalars;
/* insert at the bottom */
pheap[node] = (heap_index_t)node;
/* sift node up to its sorted spot */
parent = (node - 1) / 2;
while (node && lt256_modm_batch(scalars[pheap[parent]], scalars[pheap[node]], bignum256modm_limb_size - 1)) {
heap_swap(pheap, parent, node);
node = parent;
parent = (node - 1) / 2;
}
heap->size++;
}
/* update the heap when the root element is updated */
static void
heap_updated_root(batch_heap *heap, size_t limbsize) {
size_t node, parent, childr, childl;
heap_index_t *pheap = heap->heap;
bignum256modm *scalars = heap->scalars;
/* sift root to the bottom */
parent = 0;
node = 1;
childl = 1;
childr = 2;
while ((childr < heap->size)) {
node = lt256_modm_batch(scalars[pheap[childl]], scalars[pheap[childr]], limbsize) ? childr : childl;
heap_swap(pheap, parent, node);
parent = node;
childl = (parent * 2) + 1;
childr = childl + 1;
}
/* sift root back up to its sorted spot */
parent = (node - 1) / 2;
while (node && lte256_modm_batch(scalars[pheap[parent]], scalars[pheap[node]], limbsize)) {
heap_swap(pheap, parent, node);
node = parent;
parent = (node - 1) / 2;
}
}
/* build the heap with count elements, count must be >= 3 */
static void
heap_build(batch_heap *heap, size_t count) {
heap->heap[0] = 0;
heap->size = 0;
while (heap->size < count)
heap_insert_next(heap);
}
/* extend the heap to contain new_count elements */
static void
heap_extend(batch_heap *heap, size_t new_count) {
while (heap->size < new_count)
heap_insert_next(heap);
}
/* get the top 2 elements of the heap */
static void
heap_get_top2(batch_heap *heap, heap_index_t *max1, heap_index_t *max2, size_t limbsize) {
heap_index_t h0 = heap->heap[0], h1 = heap->heap[1], h2 = heap->heap[2];
if (lt256_modm_batch(heap->scalars[h1], heap->scalars[h2], limbsize))
h1 = h2;
*max1 = h0;
*max2 = h1;
}
/* */
static void
ge25519_multi_scalarmult_vartime_final(ge25519 *r, ge25519 *point, bignum256modm scalar) {
const bignum256modm_element_t topbit = ((bignum256modm_element_t)1 << (bignum256modm_bits_per_limb - 1));
size_t limb = limb128bits;
bignum256modm_element_t flag;
if (isone256_modm_batch(scalar)) {
/* this will happen most of the time after bos-carter */
*r = *point;
return;
} else if (iszero256_modm_batch(scalar)) {
/* this will only happen if all scalars == 0 */
memset(r, 0, sizeof(*r));
r->y[0] = 1;
r->z[0] = 1;
return;
}
*r = *point;
/* find the limb where first bit is set */
while (!scalar[limb])
limb--;
/* find the first bit */
flag = topbit;
while ((scalar[limb] & flag) == 0)
flag >>= 1;
/* exponentiate */
for (;;) {
ge25519_double(r, r);
if (scalar[limb] & flag)
ge25519_add(r, r, point);
flag >>= 1;
if (!flag) {
if (!limb--)
break;
flag = topbit;
}
}
}
/* count must be >= 5 */
static void
ge25519_multi_scalarmult_vartime(ge25519 *r, batch_heap *heap, size_t count) {
heap_index_t max1, max2;
/* start with the full limb size */
size_t limbsize = bignum256modm_limb_size - 1;
/* whether the heap has been extended to include the 128 bit scalars */
int extended = 0;
/* grab an odd number of scalars to build the heap, unknown limb sizes */
heap_build(heap, ((count + 1) / 2) | 1);
for (;;) {
heap_get_top2(heap, &max1, &max2, limbsize);
/* only one scalar remaining, we're done */
if (iszero256_modm_batch(heap->scalars[max2]))
break;
/* exhausted another limb? */
if (!heap->scalars[max1][limbsize])
limbsize -= 1;
/* can we extend to the 128 bit scalars? */
if (!extended && isatmost128bits256_modm_batch(heap->scalars[max1])) {
heap_extend(heap, count);
heap_get_top2(heap, &max1, &max2, limbsize);
extended = 1;
}
sub256_modm_batch(heap->scalars[max1], heap->scalars[max1], heap->scalars[max2], limbsize);
ge25519_add(&heap->points[max2], &heap->points[max2], &heap->points[max1]);
heap_updated_root(heap, limbsize);
}
ge25519_multi_scalarmult_vartime_final(r, &heap->points[max1], heap->scalars[max1]);
}
/* not actually used for anything other than testing */
unsigned char batch_point_buffer[3][32];
static int
ge25519_is_neutral_vartime(const ge25519 *p) {
static const unsigned char zero[32] = {0};
unsigned char point_buffer[3][32];
curve25519_contract(point_buffer[0], p->x);
curve25519_contract(point_buffer[1], p->y);
curve25519_contract(point_buffer[2], p->z);
memcpy(batch_point_buffer[1], point_buffer[1], 32);
return (memcmp(point_buffer[0], zero, 32) == 0) && (memcmp(point_buffer[1], point_buffer[2], 32) == 0);
}
int
ED25519_FN(ed25519_sign_open_batch) (const unsigned char **m, size_t *mlen, const unsigned char **pk, const unsigned char **RS, size_t num, int *valid) {
batch_heap ALIGN(16) batch;
ge25519 ALIGN(16) p;
bignum256modm *r_scalars;
size_t i, batchsize;
unsigned char hram[64];
int ret = 0;
for (i = 0; i < num; i++)
valid[i] = 1;
while (num > 3) {
batchsize = (num > max_batch_size) ? max_batch_size : num;
/* generate r (scalars[batchsize+1]..scalars[2*batchsize] */
ED25519_FN(ed25519_randombytes_unsafe) (batch.r, batchsize * 16);
r_scalars = &batch.scalars[batchsize + 1];
for (i = 0; i < batchsize; i++)
expand256_modm(r_scalars[i], batch.r[i], 16);
/* compute scalars[0] = ((r1s1 + r2s2 + ...)) */
for (i = 0; i < batchsize; i++) {
expand256_modm(batch.scalars[i], RS[i] + 32, 32);
mul256_modm(batch.scalars[i], batch.scalars[i], r_scalars[i]);
}
for (i = 1; i < batchsize; i++)
add256_modm(batch.scalars[0], batch.scalars[0], batch.scalars[i]);
/* compute scalars[1]..scalars[batchsize] as r[i]*H(R[i],A[i],m[i]) */
for (i = 0; i < batchsize; i++) {
ed25519_hram(hram, RS[i], pk[i], m[i], mlen[i]);
expand256_modm(batch.scalars[i+1], hram, 64);
mul256_modm(batch.scalars[i+1], batch.scalars[i+1], r_scalars[i]);
}
/* compute points */
batch.points[0] = ge25519_basepoint;
for (i = 0; i < batchsize; i++)
if (!ge25519_unpack_negative_vartime(&batch.points[i+1], pk[i]))
goto fallback;
for (i = 0; i < batchsize; i++)
if (!ge25519_unpack_negative_vartime(&batch.points[batchsize+i+1], RS[i]))
goto fallback;
ge25519_multi_scalarmult_vartime(&p, &batch, (batchsize * 2) + 1);
if (!ge25519_is_neutral_vartime(&p)) {
ret |= 2;
fallback:
for (i = 0; i < batchsize; i++) {
valid[i] = ED25519_FN(ed25519_sign_open) (m[i], mlen[i], pk[i], RS[i]) ? 0 : 1;
ret |= (valid[i] ^ 1);
}
}
m += batchsize;
mlen += batchsize;
pk += batchsize;
RS += batchsize;
num -= batchsize;
valid += batchsize;
}
for (i = 0; i < num; i++) {
valid[i] = ED25519_FN(ed25519_sign_open) (m[i], mlen[i], pk[i], RS[i]) ? 0 : 1;
ret |= (valid[i] ^ 1);
}
return ret;
}

View File

@ -1,519 +0,0 @@
/*
conversions
*/
static void
ge25519_p1p1_to_partial(ge25519 *r, const ge25519_p1p1 *p) {
packed64bignum25519 ALIGN(16) xz, tt, xzout;
curve25519_mul(r->y, p->y, p->z);
curve25519_tangle64(xz, p->x, p->z);
curve25519_tangleone64(tt, p->t);
curve25519_mul_packed64(xzout, xz, tt);
curve25519_untangle64(r->x, r->z, xzout);
}
static void
ge25519_p1p1_to_full(ge25519 *r, const ge25519_p1p1 *p) {
packed64bignum25519 ALIGN(16) zy, xt, xx, zz, ty;
curve25519_tangle64(ty, p->t, p->y);
curve25519_tangleone64(xx, p->x);
curve25519_mul_packed64(xt, xx, ty);
curve25519_untangle64(r->x, r->t, xt);
curve25519_tangleone64(zz, p->z);
curve25519_mul_packed64(zy, zz, ty);
curve25519_untangle64(r->z, r->y, zy);
}
static void
ge25519_full_to_pniels(ge25519_pniels *p, const ge25519 *r) {
curve25519_sub(p->ysubx, r->y, r->x);
curve25519_add(p->xaddy, r->x, r->y);
curve25519_copy(p->z, r->z);
curve25519_mul(p->t2d, r->t, ge25519_ec2d);
}
/*
adding & doubling
*/
static void
ge25519_add_p1p1(ge25519_p1p1 *r, const ge25519 *p, const ge25519 *q) {
bignum25519 ALIGN(16) a,b,c,d;
packed32bignum25519 ALIGN(16) xx, yy, yypxx, yymxx, bd, ac, bdmac, bdpac;
packed64bignum25519 ALIGN(16) at, bu, atbu, ptz, qtz, cd;
curve25519_tangle32(yy, p->y, q->y);
curve25519_tangle32(xx, p->x, q->x);
curve25519_add_packed32(yypxx, yy, xx);
curve25519_sub_packed32(yymxx, yy, xx);
curve25519_tangle64_from32(at, bu, yymxx, yypxx);
curve25519_mul_packed64(atbu, at, bu);
curve25519_untangle64(a, b, atbu);
curve25519_tangle64(ptz, p->t, p->z);
curve25519_tangle64(qtz, q->t, q->z);
curve25519_mul_packed64(cd, ptz, qtz);
curve25519_untangle64(c, d, cd);
curve25519_mul(c, c, ge25519_ec2d);
curve25519_add_reduce(d, d, d);
/* reduce, so no after_basic is needed later */
curve25519_tangle32(bd, b, d);
curve25519_tangle32(ac, a, c);
curve25519_sub_packed32(bdmac, bd, ac);
curve25519_add_packed32(bdpac, bd, ac);
curve25519_untangle32(r->x, r->t, bdmac);
curve25519_untangle32(r->y, r->z, bdpac);
}
static void
ge25519_double_p1p1(ge25519_p1p1 *r, const ge25519 *p) {
bignum25519 ALIGN(16) a,b,c,x;
packed64bignum25519 ALIGN(16) xy, zx, ab, cx;
packed32bignum25519 ALIGN(16) xc, yz, xt, yc, ac, bc;
curve25519_add(x, p->x, p->y);
curve25519_tangle64(xy, p->x, p->y);
curve25519_square_packed64(ab, xy);
curve25519_untangle64(a, b, ab);
curve25519_tangle64(zx, p->z, x);
curve25519_square_packed64(cx, zx);
curve25519_untangle64(c, x, cx);
curve25519_tangle32(bc, b, c);
curve25519_tangle32(ac, a, c);
curve25519_add_reduce_packed32(yc, bc, ac);
curve25519_untangle32(r->y, c, yc);
curve25519_sub(r->z, b, a);
curve25519_tangle32(yz, r->y, r->z);
curve25519_tangle32(xc, x, c);
curve25519_sub_after_basic_packed32(xt, xc, yz);
curve25519_untangle32(r->x, r->t, xt);
}
static void
ge25519_nielsadd2_p1p1(ge25519_p1p1 *r, const ge25519 *p, const ge25519_niels *q, unsigned char signbit) {
const bignum25519 *qb = (const bignum25519 *)q;
bignum25519 *rb = (bignum25519 *)r;
bignum25519 ALIGN(16) a,b,c;
packed64bignum25519 ALIGN(16) ab, yx, aybx;
packed32bignum25519 ALIGN(16) bd, ac, bdac;
curve25519_sub(a, p->y, p->x);
curve25519_add(b, p->y, p->x);
curve25519_tangle64(ab, a, b);
curve25519_tangle64(yx, qb[signbit], qb[signbit^1]);
curve25519_mul_packed64(aybx, ab, yx);
curve25519_untangle64(a, b, aybx);
curve25519_add(r->y, b, a);
curve25519_add_reduce(r->t, p->z, p->z);
curve25519_mul(c, p->t, q->t2d);
curve25519_copy(r->z, r->t);
curve25519_add(rb[2+signbit], rb[2+signbit], c);
curve25519_tangle32(bd, b, rb[2+(signbit^1)]);
curve25519_tangle32(ac, a, c);
curve25519_sub_packed32(bdac, bd, ac);
curve25519_untangle32(r->x, rb[2+(signbit^1)], bdac);
}
static void
ge25519_pnielsadd_p1p1(ge25519_p1p1 *r, const ge25519 *p, const ge25519_pniels *q, unsigned char signbit) {
const bignum25519 *qb = (const bignum25519 *)q;
bignum25519 *rb = (bignum25519 *)r;
bignum25519 ALIGN(16) a,b,c;
packed64bignum25519 ALIGN(16) ab, yx, aybx, zt, zt2d, tc;
packed32bignum25519 ALIGN(16) bd, ac, bdac;
curve25519_sub(a, p->y, p->x);
curve25519_add(b, p->y, p->x);
curve25519_tangle64(ab, a, b);
curve25519_tangle64(yx, qb[signbit], qb[signbit^1]);
curve25519_mul_packed64(aybx, ab, yx);
curve25519_untangle64(a, b, aybx);
curve25519_add(r->y, b, a);
curve25519_tangle64(zt, p->z, p->t);
curve25519_tangle64(zt2d, q->z, q->t2d);
curve25519_mul_packed64(tc, zt, zt2d);
curve25519_untangle64(r->t, c, tc);
curve25519_add_reduce(r->t, r->t, r->t);
curve25519_copy(r->z, r->t);
curve25519_add(rb[2+signbit], rb[2+signbit], c);
curve25519_tangle32(bd, b, rb[2+(signbit^1)]);
curve25519_tangle32(ac, a, c);
curve25519_sub_packed32(bdac, bd, ac);
curve25519_untangle32(r->x, rb[2+(signbit^1)], bdac);
}
static void
ge25519_double(ge25519 *r, const ge25519 *p) {
ge25519_p1p1 ALIGN(16) t;
ge25519_double_p1p1(&t, p);
ge25519_p1p1_to_full(r, &t);
}
STATIC void
ge25519_add(ge25519 *r, const ge25519 *p, const ge25519 *q) {
ge25519_p1p1 ALIGN(16) t;
ge25519_add_p1p1(&t, p, q);
ge25519_p1p1_to_full(r, &t);
}
static void
ge25519_double_partial(ge25519 *r, const ge25519 *p) {
ge25519_p1p1 ALIGN(16) t;
ge25519_double_p1p1(&t, p);
ge25519_p1p1_to_partial(r, &t);
}
static void
ge25519_nielsadd2(ge25519 *r, const ge25519_niels *q) {
packed64bignum25519 ALIGN(16) ab, yx, aybx, eg, ff, hh, xz, ty;
packed32bignum25519 ALIGN(16) bd, ac, bdac;
bignum25519 ALIGN(16) a,b,c,d,e,f,g,h;
curve25519_sub(a, r->y, r->x);
curve25519_add(b, r->y, r->x);
curve25519_tangle64(ab, a, b);
curve25519_tangle64(yx, q->ysubx, q->xaddy);
curve25519_mul_packed64(aybx, ab, yx);
curve25519_untangle64(a, b, aybx);
curve25519_add(h, b, a);
curve25519_add_reduce(d, r->z, r->z);
curve25519_mul(c, r->t, q->t2d);
curve25519_add(g, d, c); /* d is reduced, so no need for after_basic */
curve25519_tangle32(bd, b, d);
curve25519_tangle32(ac, a, c);
curve25519_sub_packed32(bdac, bd, ac); /* d is reduced, so no need for after_basic */
curve25519_untangle32(e, f, bdac);
curve25519_tangle64(eg, e, g);
curve25519_tangleone64(ff, f);
curve25519_mul_packed64(xz, eg, ff);
curve25519_untangle64(r->x, r->z, xz);
curve25519_tangleone64(hh, h);
curve25519_mul_packed64(ty, eg, hh);
curve25519_untangle64(r->t, r->y, ty);
}
static void
ge25519_pnielsadd(ge25519_pniels *r, const ge25519 *p, const ge25519_pniels *q) {
ge25519_p1p1 ALIGN(16) t;
ge25519 ALIGN(16) f;
ge25519_pnielsadd_p1p1(&t, p, q, 0);
ge25519_p1p1_to_full(&f, &t);
ge25519_full_to_pniels(r, &f);
}
/*
pack & unpack
*/
STATIC void
ge25519_pack(unsigned char r[32], const ge25519 *p) {
bignum25519 ALIGN(16) tx, ty, zi;
unsigned char parity[32];
curve25519_recip(zi, p->z);
curve25519_mul(tx, p->x, zi);
curve25519_mul(ty, p->y, zi);
curve25519_contract(r, ty);
curve25519_contract(parity, tx);
r[31] ^= ((parity[0] & 1) << 7);
}
STATIC int
ge25519_unpack_negative_vartime(ge25519 *r, const unsigned char p[32]) {
static const bignum25519 ALIGN(16) one = {1};
static const unsigned char zero[32] = {0};
unsigned char parity = p[31] >> 7;
unsigned char check[32];
bignum25519 ALIGN(16) t, root, num, den, d3;
curve25519_expand(r->y, p);
curve25519_copy(r->z, one);
curve25519_square_times(num, r->y, 1); /* x = y^2 */
curve25519_mul(den, num, ge25519_ecd); /* den = dy^2 */
curve25519_sub_reduce(num, num, r->z); /* x = y^2 - 1 */
curve25519_add(den, den, r->z); /* den = dy^2 + 1 */
/* Computation of sqrt(num/den) */
/* 1.: computation of num^((p-5)/8)*den^((7p-35)/8) = (num*den^7)^((p-5)/8) */
curve25519_square_times(t, den, 1);
curve25519_mul(d3, t, den);
curve25519_square_times(r->x, d3, 1);
curve25519_mul(r->x, r->x, den);
curve25519_mul(r->x, r->x, num);
curve25519_pow_two252m3(r->x, r->x);
/* 2. computation of r->x = t * num * den^3 */
curve25519_mul(r->x, r->x, d3);
curve25519_mul(r->x, r->x, num);
/* 3. Check if either of the roots works: */
curve25519_square_times(t, r->x, 1);
curve25519_mul(t, t, den);
curve25519_copy(root, t);
curve25519_sub_reduce(root, root, num);
curve25519_contract(check, root);
if (!ed25519_verify(check, zero, 32)) {
curve25519_add_reduce(t, t, num);
curve25519_contract(check, t);
if (!ed25519_verify(check, zero, 32))
return 0;
curve25519_mul(r->x, r->x, ge25519_sqrtneg1);
}
curve25519_contract(check, r->x);
if ((check[0] & 1) == parity) {
curve25519_copy(t, r->x);
curve25519_neg(r->x, t);
}
curve25519_mul(r->t, r->x, r->y);
return 1;
}
/*
scalarmults
*/
DONNA_INLINE static void ge25519_set_neutral(ge25519 *r)
{
memset(r, 0, sizeof(ge25519));
r->y[0] = 1;
r->z[0] = 1;
}
#define S1_SWINDOWSIZE 5
#define S1_TABLE_SIZE (1<<(S1_SWINDOWSIZE-2))
#define S2_SWINDOWSIZE 7
#define S2_TABLE_SIZE (1<<(S2_SWINDOWSIZE-2))
/* computes [s1]p1 + [s2]base */
STATIC void
ge25519_double_scalarmult_vartime(ge25519 *r, const ge25519 *p1, const bignum256modm s1, const bignum256modm s2) {
signed char slide1[256], slide2[256];
ge25519_pniels ALIGN(16) pre1[S1_TABLE_SIZE];
ge25519 ALIGN(16) d1;
ge25519_p1p1 ALIGN(16) t;
int32_t i;
contract256_slidingwindow_modm(slide1, s1, S1_SWINDOWSIZE);
contract256_slidingwindow_modm(slide2, s2, S2_SWINDOWSIZE);
ge25519_double(&d1, p1);
ge25519_full_to_pniels(pre1, p1);
for (i = 0; i < S1_TABLE_SIZE - 1; i++)
ge25519_pnielsadd(&pre1[i+1], &d1, &pre1[i]);
/* set neutral */
memset(r, 0, sizeof(ge25519));
r->y[0] = 1;
r->z[0] = 1;
i = 255;
while ((i >= 0) && !(slide1[i] | slide2[i]))
i--;
for (; i >= 0; i--) {
ge25519_double_p1p1(&t, r);
if (slide1[i]) {
ge25519_p1p1_to_full(r, &t);
ge25519_pnielsadd_p1p1(&t, r, &pre1[abs(slide1[i]) / 2], (unsigned char)slide1[i] >> 7);
}
if (slide2[i]) {
ge25519_p1p1_to_full(r, &t);
ge25519_nielsadd2_p1p1(&t, r, &ge25519_niels_sliding_multiples[abs(slide2[i]) / 2], (unsigned char)slide2[i] >> 7);
}
ge25519_p1p1_to_partial(r, &t);
}
}
#ifndef MM16
# define MM16 __attribute__((aligned(16)))
#endif
STATIC void
ge25519_scalarmult_vartime(ge25519 *r, const ge25519 *p1, const bignum256modm s1) {
signed char slide1[256];
ge25519_pniels MM16 pre1[S1_TABLE_SIZE];
ge25519 MM16 d1;
ge25519_p1p1 MM16 t;
int32_t i;
contract256_slidingwindow_modm(slide1, s1, S1_SWINDOWSIZE);
ge25519_double(&d1, p1);
ge25519_full_to_pniels(pre1, p1);
for (i = 0; i < S1_TABLE_SIZE - 1; i++)
ge25519_pnielsadd(&pre1[i+1], &d1, &pre1[i]);
/* set neutral */
memset(r, 0, sizeof(ge25519));
r->y[0] = 1;
r->z[0] = 1;
i = 255;
while ((i >= 0) && !slide1[i])
i--;
for (; i >= 0; i--) {
ge25519_double_p1p1(&t, r);
if (slide1[i]) {
ge25519_p1p1_to_full(r, &t);
ge25519_pnielsadd_p1p1(&t, r, &pre1[abs(slide1[i]) / 2], (unsigned char)slide1[i] >> 7);
}
ge25519_p1p1_to_partial(r, &t);
}
}
DONNA_INLINE static void ge25519_cmove_stride4(long * r, long * p, long * pos, long * n, int stride) {
int i;
long x0=p[0], x1=p[1], x2=p[2], x3=p[3], y0, y1, y2, y3;
for(p+=stride; p<n; p+=stride) {
y0 = p[0];
y1 = p[1];
y2 = p[2];
y3 = p[3];
x0 = (p==pos) ? y0 : x0;
x1 = (p==pos) ? y1 : x1;
x2 = (p==pos) ? y2 : x2;
x3 = (p==pos) ? y3 : x3;
}
r[0] = x0;
r[1] = x1;
r[2] = x2;
r[3] = x3;
}
#define HAS_CMOVE_STRIDE4
STATIC void ge25519_move_conditional_pniels_array(ge25519_pniels * r, const ge25519_pniels * p, int pos, int n) {
#ifdef HAS_CMOVE_STRIDE4
int i;
for(i=0; i<sizeof(ge25519_pniels)/sizeof(long); i+=4) {
ge25519_cmove_stride4(((long*)r)+i,
((long*)p)+i,
((long*)(p+pos))+i,
((long*)(p+n))+i,
sizeof(ge25519_pniels)/sizeof(long));
}
#else
int i;
for(i=0; i<n; i++) {
ge25519_move_conditional_pniels(r, p+i, pos==i);
}
#endif
}
STATIC void ge25519_move_conditional_niels_array(ge25519_niels * r, const uint8_t p[8][96], int pos, int n) {
int i;
for(i=0; i<96/sizeof(long); i+=4) {
ge25519_cmove_stride4(((long*)r)+i,
((long*)p)+i,
((long*)(p+pos))+i,
((long*)(p+n))+i,
96/sizeof(long));
}
}
/* computes [s1]p1, constant time */
STATIC void ge25519_scalarmult(ge25519 *r, const ge25519 *p1, const bignum256modm s1) {
signed char slide1[64];
ge25519_pniels MM16 pre1[9];
ge25519_pniels MM16 pre;
ge25519 MM16 d1, r1;
ge25519_p1p1 MM16 t;
int32_t i, j;
contract256_window4_modm(slide1, s1);
/* set neutral */
ge25519_set_neutral(r);
ge25519_full_to_pniels(pre1, r);
ge25519_full_to_pniels(pre1+1, p1);
ge25519_double(&d1, p1);
ge25519_full_to_pniels(pre1+2, &d1);
for (i = 0; i < 7; i++) {
ge25519_pnielsadd(&pre1[i+2], &d1, &pre1[i]);
}
for (i = 63; i >= 0; i--) {
int k=abs(slide1[i]);
ge25519_double_partial(r, r);
ge25519_double_partial(r, r);
ge25519_double_partial(r, r);
ge25519_double_p1p1(&t, r);
ge25519_move_conditional_pniels_array(&pre, pre1, k, 9);
ge25519_p1p1_to_full(r, &t);
ge25519_pnielsadd_p1p1(&t, r, &pre, (unsigned char)slide1[i] >> 7);
ge25519_p1p1_to_partial(r, &t);
}
}
#if !defined(HAVE_GE25519_SCALARMULT_BASE_CHOOSE_NIELS)
static void
ge25519_scalarmult_base_choose_niels(ge25519_niels *t, const uint8_t table[256][96], uint32_t pos, signed char b) {
bignum25519 ALIGN(16) neg;
uint32_t sign = (uint32_t)((unsigned char)b >> 7);
uint32_t mask = ~(sign - 1);
uint32_t u = (b + mask) ^ mask;
uint32_t i;
/* ysubx, xaddy, t2d in packed form. initialize to ysubx = 1, xaddy = 1, t2d = 0 */
uint8_t ALIGN(16) packed[96] = {0};
packed[0] = 1;
packed[32] = 1;
ge25519_move_conditional_niels_array(packed, &table[pos*8], u-1, 8);
/* expand in to t */
curve25519_expand(t->ysubx, packed + 0);
curve25519_expand(t->xaddy, packed + 32);
curve25519_expand(t->t2d , packed + 64);
/* adjust for sign */
curve25519_swap_conditional(t->ysubx, t->xaddy, sign);
curve25519_neg(neg, t->t2d);
curve25519_swap_conditional(t->t2d, neg, sign);
}
#endif /* HAVE_GE25519_SCALARMULT_BASE_CHOOSE_NIELS */
STATIC void
ge25519_scalarmult_base_niels(ge25519 *r, const uint8_t table[256][96], const bignum256modm s) {
signed char b[64];
uint32_t i;
ge25519_niels ALIGN(16) t;
contract256_window4_modm(b, s);
ge25519_scalarmult_base_choose_niels(&t, table, 0, b[1]);
curve25519_sub_reduce(r->x, t.xaddy, t.ysubx);
curve25519_add_reduce(r->y, t.xaddy, t.ysubx);
memset(r->z, 0, sizeof(bignum25519));
r->z[0] = 2;
curve25519_copy(r->t, t.t2d);
for (i = 3; i < 64; i += 2) {
ge25519_scalarmult_base_choose_niels(&t, table, i / 2, b[i]);
ge25519_nielsadd2(r, &t);
}
ge25519_double_partial(r, r);
ge25519_double_partial(r, r);
ge25519_double_partial(r, r);
ge25519_double(r, r);
ge25519_scalarmult_base_choose_niels(&t, table, 0, b[0]);
curve25519_mul(t.t2d, t.t2d, ge25519_ecd);
ge25519_nielsadd2(r, &t);
for(i = 2; i < 64; i += 2) {
ge25519_scalarmult_base_choose_niels(&t, table, i / 2, b[i]);
ge25519_nielsadd2(r, &t);
}
}
STATIC void ge25519_scalarmult_base(ge25519 *r, const bignum256modm s) {
ge25519_scalarmult_base_niels(r, ge25519_niels_base_multiples, s);
}

View File

@ -1,106 +0,0 @@
/* os */
#if defined(_WIN32) || defined(_WIN64) || defined(__TOS_WIN__) || defined(__WINDOWS__)
#define OS_WINDOWS
#elif defined(sun) || defined(__sun) || defined(__SVR4) || defined(__svr4__)
#define OS_SOLARIS
#else
#include <sys/param.h> /* need this to define BSD */
#define OS_NIX
#if defined(__linux__)
#define OS_LINUX
#elif defined(BSD)
#define OS_BSD
#if defined(MACOS_X) || (defined(__APPLE__) & defined(__MACH__))
#define OS_OSX
#elif defined(macintosh) || defined(Macintosh)
#define OS_MAC
#elif defined(__OpenBSD__)
#define OS_OPENBSD
#endif
#endif
#endif
/* compiler */
#if defined(_MSC_VER)
#define COMPILER_MSVC
#endif
#if defined(__ICC)
#define COMPILER_INTEL
#endif
#if defined(__GNUC__)
#if (__GNUC__ >= 3)
#define COMPILER_GCC ((__GNUC__ * 10000) + (__GNUC_MINOR__ * 100) + (__GNUC_PATCHLEVEL__))
#else
#define COMPILER_GCC ((__GNUC__ * 10000) + (__GNUC_MINOR__ * 100) )
#endif
#endif
#if defined(__PATHCC__)
#define COMPILER_PATHCC
#endif
#if defined(__clang__)
#define COMPILER_CLANG ((__clang_major__ * 10000) + (__clang_minor__ * 100) + (__clang_patchlevel__))
#endif
/* cpu */
#if defined(__amd64__) || defined(__amd64) || defined(__x86_64__ ) || defined(_M_X64)
#define CPU_X86_64
#elif defined(__i586__) || defined(__i686__) || (defined(_M_IX86) && (_M_IX86 >= 500))
#define CPU_X86 500
#ifdef __SSE2__
#define ED25519_SSE2
#endif
#elif defined(__i486__) || (defined(_M_IX86) && (_M_IX86 >= 400))
#define CPU_X86 400
#elif defined(__i386__) || (defined(_M_IX86) && (_M_IX86 >= 300)) || defined(__X86__) || defined(_X86_) || defined(__I86__)
#define CPU_X86 300
#elif defined(__ia64__) || defined(_IA64) || defined(__IA64__) || defined(_M_IA64) || defined(__ia64)
#define CPU_IA64
#endif
#if defined(__sparc__) || defined(__sparc) || defined(__sparcv9)
#define CPU_SPARC
#if defined(__sparcv9)
#define CPU_SPARC64
#endif
#endif
#if defined(powerpc) || defined(__PPC__) || defined(__ppc__) || defined(_ARCH_PPC) || defined(__powerpc__) || defined(__powerpc) || defined(POWERPC) || defined(_M_PPC)
#define CPU_PPC
#if defined(_ARCH_PWR7)
#define CPU_POWER7
#elif defined(__64BIT__)
#define CPU_PPC64
#else
#define CPU_PPC32
#endif
#endif
#if defined(__hppa__) || defined(__hppa)
#define CPU_HPPA
#endif
#if defined(__alpha__) || defined(__alpha) || defined(_M_ALPHA)
#define CPU_ALPHA
#endif
/* 64 bit cpu */
#if defined(CPU_X86_64) || defined(CPU_IA64) || defined(CPU_SPARC64) || defined(__64BIT__) || defined(__LP64__) || defined(_LP64) || (defined(_MIPS_SZLONG) && (_MIPS_SZLONG == 64))
#define CPU_64BITS
#endif
#if defined(COMPILER_MSVC)
typedef signed char int8_t;
typedef unsigned char uint8_t;
typedef signed short int16_t;
typedef unsigned short uint16_t;
typedef signed int int32_t;
typedef unsigned int uint32_t;
typedef signed __int64 int64_t;
typedef unsigned __int64 uint64_t;
#else
#include <stdint.h>
#endif

View File

@ -1,137 +1,27 @@
#include "ed25519-donna-portable-identify.h"
#define mul32x32_64(a,b) (((uint64_t)(a))*(b))
/* platform */
#if defined(COMPILER_MSVC)
#include <intrin.h>
#if !defined(_DEBUG)
#undef mul32x32_64
#define mul32x32_64(a,b) __emulu(a,b)
#endif
#undef inline
#define inline __forceinline
#define DONNA_INLINE __forceinline
#define DONNA_NOINLINE __declspec(noinline)
#undef ALIGN
#define ALIGN(x) __declspec(align(x))
#define ROTL32(a,b) _rotl(a,b)
#define ROTR32(a,b) _rotr(a,b)
#else
#include <sys/param.h>
#define DONNA_INLINE inline __attribute__((always_inline))
#define DONNA_NOINLINE __attribute__((noinline))
#undef ALIGN
#define ALIGN(x) __attribute__((aligned(x)))
#define ROTL32(a,b) (((a) << (b)) | ((a) >> (32 - b)))
#define ROTR32(a,b) (((a) >> (b)) | ((a) << (32 - b)))
#endif
#include <stdlib.h>
#include <string.h>
#include <stdint.h>
/* uint128_t */
#if defined(CPU_64BITS) && !defined(ED25519_FORCE_32BIT)
#if defined(COMPILER_CLANG) && (COMPILER_CLANG >= 30100)
#define HAVE_NATIVE_UINT128
typedef unsigned __int128 uint128_t;
#elif defined(COMPILER_MSVC)
#define HAVE_UINT128
typedef struct uint128_t {
uint64_t lo, hi;
} uint128_t;
#define mul64x64_128(out,a,b) out.lo = _umul128(a,b,&out.hi);
#define shr128_pair(out,hi,lo,shift) out = __shiftright128(lo, hi, shift);
#define shl128_pair(out,hi,lo,shift) out = __shiftleft128(lo, hi, shift);
#define shr128(out,in,shift) shr128_pair(out, in.hi, in.lo, shift)
#define shl128(out,in,shift) shl128_pair(out, in.hi, in.lo, shift)
#define add128(a,b) { uint64_t p = a.lo; a.lo += b.lo; a.hi += b.hi + (a.lo < p); }
#define add128_64(a,b) { uint64_t p = a.lo; a.lo += b; a.hi += (a.lo < p); }
#define lo128(a) (a.lo)
#define hi128(a) (a.hi)
#elif defined(COMPILER_GCC) && !defined(HAVE_NATIVE_UINT128)
#if defined(__SIZEOF_INT128__)
#define HAVE_NATIVE_UINT128
typedef unsigned __int128 uint128_t;
#elif (COMPILER_GCC >= 40400)
#define HAVE_NATIVE_UINT128
typedef unsigned uint128_t __attribute__((mode(TI)));
#elif defined(CPU_X86_64)
#define HAVE_UINT128
typedef struct uint128_t {
uint64_t lo, hi;
} uint128_t;
#define mul64x64_128(out,a,b) __asm__ ("mulq %3" : "=a" (out.lo), "=d" (out.hi) : "a" (a), "rm" (b));
#define shr128_pair(out,hi,lo,shift) __asm__ ("shrdq %2,%1,%0" : "+r" (lo) : "r" (hi), "J" (shift)); out = lo;
#define shl128_pair(out,hi,lo,shift) __asm__ ("shldq %2,%1,%0" : "+r" (hi) : "r" (lo), "J" (shift)); out = hi;
#define shr128(out,in,shift) shr128_pair(out,in.hi, in.lo, shift)
#define shl128(out,in,shift) shl128_pair(out,in.hi, in.lo, shift)
#define add128(a,b) __asm__ ("addq %4,%2; adcq %5,%3" : "=r" (a.hi), "=r" (a.lo) : "1" (a.lo), "0" (a.hi), "rm" (b.lo), "rm" (b.hi) : "cc");
#define add128_64(a,b) __asm__ ("addq %4,%2; adcq $0,%3" : "=r" (a.hi), "=r" (a.lo) : "1" (a.lo), "0" (a.hi), "rm" (b) : "cc");
#define lo128(a) (a.lo)
#define hi128(a) (a.hi)
#endif
#endif
#define DONNA_INLINE inline __attribute__((always_inline))
#define DONNA_NOINLINE __attribute__((noinline))
#undef ALIGN
#define ALIGN(x) __attribute__((aligned(x)))
#define ROTL32(a,b) (((a) << (b)) | ((a) >> (32 - b)))
#define ROTR32(a,b) (((a) >> (b)) | ((a) << (32 - b)))
#if defined(HAVE_NATIVE_UINT128)
#define HAVE_UINT128
#define mul64x64_128(out,a,b) out = (uint128_t)a * b;
#define shr128_pair(out,hi,lo,shift) out = (uint64_t)((((uint128_t)hi << 64) | lo) >> (shift));
#define shl128_pair(out,hi,lo,shift) out = (uint64_t)(((((uint128_t)hi << 64) | lo) << (shift)) >> 64);
#define shr128(out,in,shift) out = (uint64_t)(in >> (shift));
#define shl128(out,in,shift) out = (uint64_t)((in << shift) >> 64);
#define add128(a,b) a += b;
#define add128_64(a,b) a += (uint64_t)b;
#define lo128(a) ((uint64_t)a)
#define hi128(a) ((uint64_t)(a >> 64))
#endif
#if !defined(HAVE_UINT128)
#error Need a uint128_t implementation!
#endif
#endif
/* endian */
#if !defined(ED25519_OPENSSLRNG)
static inline void U32TO8_LE(unsigned char *p, const uint32_t v) {
p[0] = (unsigned char)(v );
p[1] = (unsigned char)(v >> 8);
p[2] = (unsigned char)(v >> 16);
p[3] = (unsigned char)(v >> 24);
}
#endif
#if !defined(HAVE_UINT128)
static inline uint32_t U8TO32_LE(const unsigned char *p) {
return
(((uint32_t)(p[0]) ) |
(((uint32_t)(p[0]) ) |
((uint32_t)(p[1]) << 8) |
((uint32_t)(p[2]) << 16) |
((uint32_t)(p[3]) << 24));
}
#else
static inline uint64_t U8TO64_LE(const unsigned char *p) {
return
(((uint64_t)(p[0]) ) |
((uint64_t)(p[1]) << 8) |
((uint64_t)(p[2]) << 16) |
((uint64_t)(p[3]) << 24) |
((uint64_t)(p[4]) << 32) |
((uint64_t)(p[5]) << 40) |
((uint64_t)(p[6]) << 48) |
((uint64_t)(p[7]) << 56));
}
static inline void U64TO8_LE(unsigned char *p, const uint64_t v) {
p[0] = (unsigned char)(v );
p[1] = (unsigned char)(v >> 8);
p[2] = (unsigned char)(v >> 16);
p[3] = (unsigned char)(v >> 24);
p[4] = (unsigned char)(v >> 32);
p[5] = (unsigned char)(v >> 40);
p[6] = (unsigned char)(v >> 48);
p[7] = (unsigned char)(v >> 56);
}
#endif
#include <stdlib.h>
#include <string.h>

View File

@ -11,52 +11,11 @@
#include "ed25519-donna-portable.h"
#if defined(ED25519_SSE2)
#else
#if defined(HAVE_UINT128) && !defined(ED25519_FORCE_32BIT)
#define ED25519_64BIT
#else
#define ED25519_32BIT
#endif
#endif
#if !defined(ED25519_NO_INLINE_ASM)
/* detect extra features first so un-needed functions can be disabled throughout */
#if defined(ED25519_SSE2)
#if defined(COMPILER_GCC) && defined(CPU_X86)
#define ED25519_GCC_32BIT_SSE_CHOOSE
#elif defined(COMPILER_GCC) && defined(CPU_X86_64)
#define ED25519_GCC_64BIT_SSE_CHOOSE
#endif
#else
#if defined(CPU_X86_64)
#if defined(COMPILER_GCC)
#if defined(ED25519_64BIT)
#define ED25519_GCC_64BIT_X86_CHOOSE
#else
#define ED25519_GCC_64BIT_32BIT_CHOOSE
#endif
#endif
#endif
#endif
#endif
#if defined(ED25519_SSE2)
#include "curve25519-donna-sse2.h"
#elif defined(ED25519_64BIT)
#include "curve25519-donna-64bit.h"
#else
#include "curve25519-donna-32bit.h"
#endif
#include "curve25519-donna-32bit.h"
#include "curve25519-donna-helpers.h"
/* separate uint128 check for 64 bit sse2 */
#if defined(HAVE_UINT128) && !defined(ED25519_FORCE_32BIT)
#include "modm-donna-64bit.h"
#else
#include "modm-donna-32bit.h"
#endif
#include "modm-donna-32bit.h"
typedef unsigned char hash_512bits[64];
@ -96,20 +55,6 @@ typedef struct ge25519_pniels_t {
#include "ed25519-donna-basepoint-table.h"
#if defined(ED25519_64BIT)
#include "ed25519-donna-64bit-tables.h"
#include "ed25519-donna-64bit-x86.h"
#else
#include "ed25519-donna-32bit-tables.h"
#include "ed25519-donna-64bit-x86-32bit.h"
#endif
#if defined(ED25519_SSE2)
#include "ed25519-donna-32bit-sse2.h"
#include "ed25519-donna-64bit-sse2.h"
#include "ed25519-donna-impl-sse2.h"
#else
#include "ed25519-donna-impl-base.h"
#endif
#include "ed25519-donna-32bit-tables.h"
#include "ed25519-donna-impl-base.h"

View File

@ -1,219 +0,0 @@
#if defined(ED25519_REFHASH)
/* reference/slow SHA-512. really, do not use this */
#define HASH_BLOCK_SIZE 128
#define HASH_DIGEST_SIZE 64
typedef struct sha512_state_t {
uint64_t H[8];
uint64_t T[2];
uint32_t leftover;
uint8_t buffer[HASH_BLOCK_SIZE];
} sha512_state;
typedef sha512_state ed25519_hash_context;
static const uint64_t sha512_constants[80] = {
0x428a2f98d728ae22ull, 0x7137449123ef65cdull, 0xb5c0fbcfec4d3b2full, 0xe9b5dba58189dbbcull,
0x3956c25bf348b538ull, 0x59f111f1b605d019ull, 0x923f82a4af194f9bull, 0xab1c5ed5da6d8118ull,
0xd807aa98a3030242ull, 0x12835b0145706fbeull, 0x243185be4ee4b28cull, 0x550c7dc3d5ffb4e2ull,
0x72be5d74f27b896full, 0x80deb1fe3b1696b1ull, 0x9bdc06a725c71235ull, 0xc19bf174cf692694ull,
0xe49b69c19ef14ad2ull, 0xefbe4786384f25e3ull, 0x0fc19dc68b8cd5b5ull, 0x240ca1cc77ac9c65ull,
0x2de92c6f592b0275ull, 0x4a7484aa6ea6e483ull, 0x5cb0a9dcbd41fbd4ull, 0x76f988da831153b5ull,
0x983e5152ee66dfabull, 0xa831c66d2db43210ull, 0xb00327c898fb213full, 0xbf597fc7beef0ee4ull,
0xc6e00bf33da88fc2ull, 0xd5a79147930aa725ull, 0x06ca6351e003826full, 0x142929670a0e6e70ull,
0x27b70a8546d22ffcull, 0x2e1b21385c26c926ull, 0x4d2c6dfc5ac42aedull, 0x53380d139d95b3dfull,
0x650a73548baf63deull, 0x766a0abb3c77b2a8ull, 0x81c2c92e47edaee6ull, 0x92722c851482353bull,
0xa2bfe8a14cf10364ull, 0xa81a664bbc423001ull, 0xc24b8b70d0f89791ull, 0xc76c51a30654be30ull,
0xd192e819d6ef5218ull, 0xd69906245565a910ull, 0xf40e35855771202aull, 0x106aa07032bbd1b8ull,
0x19a4c116b8d2d0c8ull, 0x1e376c085141ab53ull, 0x2748774cdf8eeb99ull, 0x34b0bcb5e19b48a8ull,
0x391c0cb3c5c95a63ull, 0x4ed8aa4ae3418acbull, 0x5b9cca4f7763e373ull, 0x682e6ff3d6b2b8a3ull,
0x748f82ee5defb2fcull, 0x78a5636f43172f60ull, 0x84c87814a1f0ab72ull, 0x8cc702081a6439ecull,
0x90befffa23631e28ull, 0xa4506cebde82bde9ull, 0xbef9a3f7b2c67915ull, 0xc67178f2e372532bull,
0xca273eceea26619cull, 0xd186b8c721c0c207ull, 0xeada7dd6cde0eb1eull, 0xf57d4f7fee6ed178ull,
0x06f067aa72176fbaull, 0x0a637dc5a2c898a6ull, 0x113f9804bef90daeull, 0x1b710b35131c471bull,
0x28db77f523047d84ull, 0x32caab7b40c72493ull, 0x3c9ebe0a15c9bebcull, 0x431d67c49c100d4cull,
0x4cc5d4becb3e42b6ull, 0x597f299cfc657e2aull, 0x5fcb6fab3ad6faecull, 0x6c44198c4a475817ull
};
static uint64_t
sha512_ROTR64(uint64_t x, int k) {
return (x >> k) | (x << (64 - k));
}
static uint64_t
sha512_LOAD64_BE(const uint8_t *p) {
return
((uint64_t)p[0] << 56) |
((uint64_t)p[1] << 48) |
((uint64_t)p[2] << 40) |
((uint64_t)p[3] << 32) |
((uint64_t)p[4] << 24) |
((uint64_t)p[5] << 16) |
((uint64_t)p[6] << 8) |
((uint64_t)p[7] );
}
static void
sha512_STORE64_BE(uint8_t *p, uint64_t v) {
p[0] = (uint8_t)(v >> 56);
p[1] = (uint8_t)(v >> 48);
p[2] = (uint8_t)(v >> 40);
p[3] = (uint8_t)(v >> 32);
p[4] = (uint8_t)(v >> 24);
p[5] = (uint8_t)(v >> 16);
p[6] = (uint8_t)(v >> 8);
p[7] = (uint8_t)(v );
}
#define Ch(x,y,z) (z ^ (x & (y ^ z)))
#define Maj(x,y,z) (((x | y) & z) | (x & y))
#define S0(x) (sha512_ROTR64(x, 28) ^ sha512_ROTR64(x, 34) ^ sha512_ROTR64(x, 39))
#define S1(x) (sha512_ROTR64(x, 14) ^ sha512_ROTR64(x, 18) ^ sha512_ROTR64(x, 41))
#define G0(x) (sha512_ROTR64(x, 1) ^ sha512_ROTR64(x, 8) ^ (x >> 7))
#define G1(x) (sha512_ROTR64(x, 19) ^ sha512_ROTR64(x, 61) ^ (x >> 6))
#define W0(in,i) (sha512_LOAD64_BE(&in[i * 8]))
#define W1(i) (G1(w[i - 2]) + w[i - 7] + G0(w[i - 15]) + w[i - 16])
#define STEP(i) \
t1 = S0(r[0]) + Maj(r[0], r[1], r[2]); \
t0 = r[7] + S1(r[4]) + Ch(r[4], r[5], r[6]) + sha512_constants[i] + w[i]; \
r[7] = r[6]; \
r[6] = r[5]; \
r[5] = r[4]; \
r[4] = r[3] + t0; \
r[3] = r[2]; \
r[2] = r[1]; \
r[1] = r[0]; \
r[0] = t0 + t1;
static void
sha512_blocks(sha512_state *S, const uint8_t *in, size_t blocks) {
uint64_t r[8], w[80], t0, t1;
size_t i;
for (i = 0; i < 8; i++) r[i] = S->H[i];
while (blocks--) {
for (i = 0; i < 16; i++) { w[i] = W0(in, i); }
for (i = 16; i < 80; i++) { w[i] = W1(i); }
for (i = 0; i < 80; i++) { STEP(i); }
for (i = 0; i < 8; i++) { r[i] += S->H[i]; S->H[i] = r[i]; }
S->T[0] += HASH_BLOCK_SIZE * 8;
S->T[1] += (!S->T[0]) ? 1 : 0;
in += HASH_BLOCK_SIZE;
}
}
static void
ed25519_hash_init(sha512_state *S) {
S->H[0] = 0x6a09e667f3bcc908ull;
S->H[1] = 0xbb67ae8584caa73bull;
S->H[2] = 0x3c6ef372fe94f82bull;
S->H[3] = 0xa54ff53a5f1d36f1ull;
S->H[4] = 0x510e527fade682d1ull;
S->H[5] = 0x9b05688c2b3e6c1full;
S->H[6] = 0x1f83d9abfb41bd6bull;
S->H[7] = 0x5be0cd19137e2179ull;
S->T[0] = 0;
S->T[1] = 0;
S->leftover = 0;
}
static void
ed25519_hash_update(sha512_state *S, const uint8_t *in, size_t inlen) {
size_t blocks, want;
/* handle the previous data */
if (S->leftover) {
want = (HASH_BLOCK_SIZE - S->leftover);
want = (want < inlen) ? want : inlen;
memcpy(S->buffer + S->leftover, in, want);
S->leftover += (uint32_t)want;
if (S->leftover < HASH_BLOCK_SIZE)
return;
in += want;
inlen -= want;
sha512_blocks(S, S->buffer, 1);
}
/* handle the current data */
blocks = (inlen & ~(HASH_BLOCK_SIZE - 1));
S->leftover = (uint32_t)(inlen - blocks);
if (blocks) {
sha512_blocks(S, in, blocks / HASH_BLOCK_SIZE);
in += blocks;
}
/* handle leftover data */
if (S->leftover)
memcpy(S->buffer, in, S->leftover);
}
static void
ed25519_hash_final(sha512_state *S, uint8_t *hash) {
uint64_t t0 = S->T[0] + (S->leftover * 8), t1 = S->T[1];
S->buffer[S->leftover] = 0x80;
if (S->leftover <= 111) {
memset(S->buffer + S->leftover + 1, 0, 111 - S->leftover);
} else {
memset(S->buffer + S->leftover + 1, 0, 127 - S->leftover);
sha512_blocks(S, S->buffer, 1);
memset(S->buffer, 0, 112);
}
sha512_STORE64_BE(S->buffer + 112, t1);
sha512_STORE64_BE(S->buffer + 120, t0);
sha512_blocks(S, S->buffer, 1);
sha512_STORE64_BE(&hash[ 0], S->H[0]);
sha512_STORE64_BE(&hash[ 8], S->H[1]);
sha512_STORE64_BE(&hash[16], S->H[2]);
sha512_STORE64_BE(&hash[24], S->H[3]);
sha512_STORE64_BE(&hash[32], S->H[4]);
sha512_STORE64_BE(&hash[40], S->H[5]);
sha512_STORE64_BE(&hash[48], S->H[6]);
sha512_STORE64_BE(&hash[56], S->H[7]);
}
static void
ed25519_hash(uint8_t *hash, const uint8_t *in, size_t inlen) {
ed25519_hash_context ctx;
ed25519_hash_init(&ctx);
ed25519_hash_update(&ctx, in, inlen);
ed25519_hash_final(&ctx, hash);
}
#elif defined(ED25519_CUSTOMHASH)
#include "ed25519-hash-custom.h"
#else
#include <openssl/sha.h>
typedef SHA512_CTX ed25519_hash_context;
static void
ed25519_hash_init(ed25519_hash_context *ctx) {
SHA512_Init(ctx);
}
static void
ed25519_hash_update(ed25519_hash_context *ctx, const uint8_t *in, size_t inlen) {
SHA512_Update(ctx, in, inlen);
}
static void
ed25519_hash_final(ed25519_hash_context *ctx, uint8_t *hash) {
SHA512_Final(hash, ctx);
}
static void
ed25519_hash(uint8_t *hash, const uint8_t *in, size_t inlen) {
SHA512(in, inlen, hash);
}
#endif

View File

@ -1,12 +0,0 @@
/*
a custom randombytes must implement:
void ED25519_FN(ed25519_randombytes_unsafe) (void *p, size_t len);
ed25519_randombytes_unsafe is used by the batch verification function
to create random scalars
*/
#include "rand.h"
#define ed25519_randombytes_unsafe(p, len) random_buffer((uint8_t *)(p), (len))

View File

@ -1,91 +0,0 @@
#if defined(ED25519_TEST)
/*
ISAAC+ "variant", the paper is not clear on operator precedence and other
things. This is the "first in, first out" option!
Not threadsafe or securely initialized, only for deterministic testing
*/
typedef struct isaacp_state_t {
uint32_t state[256];
unsigned char buffer[1024];
uint32_t a, b, c;
size_t left;
} isaacp_state;
#define isaacp_step(offset, mix) \
x = mm[i + offset]; \
a = (a ^ (mix)) + (mm[(i + offset + 128) & 0xff]); \
y = (a ^ b) + mm[(x >> 2) & 0xff]; \
mm[i + offset] = y; \
b = (x + a) ^ mm[(y >> 10) & 0xff]; \
U32TO8_LE(out + (i + offset) * 4, b);
static void
isaacp_mix(isaacp_state *st) {
uint32_t i, x, y;
uint32_t a = st->a, b = st->b, c = st->c;
uint32_t *mm = st->state;
unsigned char *out = st->buffer;
c = c + 1;
b = b + c;
for (i = 0; i < 256; i += 4) {
isaacp_step(0, ROTL32(a,13))
isaacp_step(1, ROTR32(a, 6))
isaacp_step(2, ROTL32(a, 2))
isaacp_step(3, ROTR32(a,16))
}
st->a = a;
st->b = b;
st->c = c;
st->left = 1024;
}
static void
isaacp_random(isaacp_state *st, void *p, size_t len) {
size_t use;
unsigned char *c = (unsigned char *)p;
while (len) {
use = (len > st->left) ? st->left : len;
memcpy(c, st->buffer + (sizeof(st->buffer) - st->left), use);
st->left -= use;
c += use;
len -= use;
if (!st->left)
isaacp_mix(st);
}
}
void
ED25519_FN(ed25519_randombytes_unsafe) (void *p, size_t len) {
static int initialized = 0;
static isaacp_state rng;
if (!initialized) {
memset(&rng, 0, sizeof(rng));
isaacp_mix(&rng);
isaacp_mix(&rng);
initialized = 1;
}
isaacp_random(&rng, p, len);
}
#elif defined(ED25519_CUSTOMRANDOM)
#include "ed25519-randombytes-custom.h"
#else
#include <openssl/rand.h>
void
ED25519_FN(ed25519_randombytes_unsafe) (void *p, size_t len) {
RAND_bytes(p, (int) len);
}
#endif

View File

@ -17,8 +17,7 @@
#include "ed25519-donna.h"
#include "ed25519.h"
#include "ed25519-randombytes.h"
#include "ed25519-hash.h"
#include "ed25519-hash-custom.h"
/*
Generates a (extsk[0..31]) and aExt (extsk[32..63])
@ -116,8 +115,6 @@ ED25519_FN(ed25519_sign_open) (const unsigned char *m, size_t mlen, const ed2551
return ed25519_verify(RS, checkR, 32) ? 0 : -1;
}
#include "ed25519-donna-batchverify.h"
/*
Fast Curve25519 basepoint scalar multiplication
*/

View File

@ -19,8 +19,6 @@ void ed25519_sign(const unsigned char *m, size_t mlen, const ed25519_secret_key
int ed25519_sign_open_batch(const unsigned char **m, size_t *mlen, const unsigned char **pk, const unsigned char **RS, size_t num, int *valid);
void ed25519_randombytes_unsafe(void *out, size_t count);
void curved25519_scalarmult_basepoint(curved25519_key pk, const curved25519_key e);
#if defined(__cplusplus)

View File

@ -1,348 +0,0 @@
/*
Public domain by Andrew M. <liquidsun@gmail.com>
*/
/*
Arithmetic modulo the group order n = 2^252 + 27742317777372353535851937790883648493 = 7237005577332262213973186563042994240857116359379907606001950938285454250989
k = 32
b = 1 << 8 = 256
m = 2^252 + 27742317777372353535851937790883648493 = 0x1000000000000000000000000000000014def9dea2f79cd65812631a5cf5d3ed
mu = floor( b^(k*2) / m ) = 0xfffffffffffffffffffffffffffffffeb2106215d086329a7ed9ce5a30a2c131b
*/
#define bignum256modm_bits_per_limb 56
#define bignum256modm_limb_size 5
typedef uint64_t bignum256modm_element_t;
typedef bignum256modm_element_t bignum256modm[5];
static const bignum256modm modm_m = {
0x12631a5cf5d3ed,
0xf9dea2f79cd658,
0x000000000014de,
0x00000000000000,
0x00000010000000
};
static const bignum256modm modm_mu = {
0x9ce5a30a2c131b,
0x215d086329a7ed,
0xffffffffeb2106,
0xffffffffffffff,
0x00000fffffffff
};
static bignum256modm_element_t
lt_modm(bignum256modm_element_t a, bignum256modm_element_t b) {
return (a - b) >> 63;
}
static void
reduce256_modm(bignum256modm r) {
bignum256modm t;
bignum256modm_element_t b = 0, pb, mask;
/* t = r - m */
pb = 0;
pb += modm_m[0]; b = lt_modm(r[0], pb); t[0] = (r[0] - pb + (b << 56)); pb = b;
pb += modm_m[1]; b = lt_modm(r[1], pb); t[1] = (r[1] - pb + (b << 56)); pb = b;
pb += modm_m[2]; b = lt_modm(r[2], pb); t[2] = (r[2] - pb + (b << 56)); pb = b;
pb += modm_m[3]; b = lt_modm(r[3], pb); t[3] = (r[3] - pb + (b << 56)); pb = b;
pb += modm_m[4]; b = lt_modm(r[4], pb); t[4] = (r[4] - pb + (b << 32));
/* keep r if r was smaller than m */
mask = b - 1;
r[0] ^= mask & (r[0] ^ t[0]);
r[1] ^= mask & (r[1] ^ t[1]);
r[2] ^= mask & (r[2] ^ t[2]);
r[3] ^= mask & (r[3] ^ t[3]);
r[4] ^= mask & (r[4] ^ t[4]);
}
static void
barrett_reduce256_modm(bignum256modm r, const bignum256modm q1, const bignum256modm r1) {
bignum256modm q3, r2;
uint128_t c, mul;
bignum256modm_element_t f, b, pb;
/* q1 = x >> 248 = 264 bits = 5 56 bit elements
q2 = mu * q1
q3 = (q2 / 256(32+1)) = q2 / (2^8)^(32+1) = q2 >> 264 */
mul64x64_128(c, modm_mu[0], q1[3]) mul64x64_128(mul, modm_mu[3], q1[0]) add128(c, mul) mul64x64_128(mul, modm_mu[1], q1[2]) add128(c, mul) mul64x64_128(mul, modm_mu[2], q1[1]) add128(c, mul) shr128(f, c, 56);
mul64x64_128(c, modm_mu[0], q1[4]) add128_64(c, f) mul64x64_128(mul, modm_mu[4], q1[0]) add128(c, mul) mul64x64_128(mul, modm_mu[3], q1[1]) add128(c, mul) mul64x64_128(mul, modm_mu[1], q1[3]) add128(c, mul) mul64x64_128(mul, modm_mu[2], q1[2]) add128(c, mul)
f = lo128(c); q3[0] = (f >> 40) & 0xffff; shr128(f, c, 56);
mul64x64_128(c, modm_mu[4], q1[1]) add128_64(c, f) mul64x64_128(mul, modm_mu[1], q1[4]) add128(c, mul) mul64x64_128(mul, modm_mu[2], q1[3]) add128(c, mul) mul64x64_128(mul, modm_mu[3], q1[2]) add128(c, mul)
f = lo128(c); q3[0] |= (f << 16) & 0xffffffffffffff; q3[1] = (f >> 40) & 0xffff; shr128(f, c, 56);
mul64x64_128(c, modm_mu[4], q1[2]) add128_64(c, f) mul64x64_128(mul, modm_mu[2], q1[4]) add128(c, mul) mul64x64_128(mul, modm_mu[3], q1[3]) add128(c, mul)
f = lo128(c); q3[1] |= (f << 16) & 0xffffffffffffff; q3[2] = (f >> 40) & 0xffff; shr128(f, c, 56);
mul64x64_128(c, modm_mu[4], q1[3]) add128_64(c, f) mul64x64_128(mul, modm_mu[3], q1[4]) add128(c, mul)
f = lo128(c); q3[2] |= (f << 16) & 0xffffffffffffff; q3[3] = (f >> 40) & 0xffff; shr128(f, c, 56);
mul64x64_128(c, modm_mu[4], q1[4]) add128_64(c, f)
f = lo128(c); q3[3] |= (f << 16) & 0xffffffffffffff; q3[4] = (f >> 40) & 0xffff; shr128(f, c, 56);
q3[4] |= (f << 16);
mul64x64_128(c, modm_m[0], q3[0])
r2[0] = lo128(c) & 0xffffffffffffff; shr128(f, c, 56);
mul64x64_128(c, modm_m[0], q3[1]) add128_64(c, f) mul64x64_128(mul, modm_m[1], q3[0]) add128(c, mul)
r2[1] = lo128(c) & 0xffffffffffffff; shr128(f, c, 56);
mul64x64_128(c, modm_m[0], q3[2]) add128_64(c, f) mul64x64_128(mul, modm_m[2], q3[0]) add128(c, mul) mul64x64_128(mul, modm_m[1], q3[1]) add128(c, mul)
r2[2] = lo128(c) & 0xffffffffffffff; shr128(f, c, 56);
mul64x64_128(c, modm_m[0], q3[3]) add128_64(c, f) mul64x64_128(mul, modm_m[3], q3[0]) add128(c, mul) mul64x64_128(mul, modm_m[1], q3[2]) add128(c, mul) mul64x64_128(mul, modm_m[2], q3[1]) add128(c, mul)
r2[3] = lo128(c) & 0xffffffffffffff; shr128(f, c, 56);
mul64x64_128(c, modm_m[0], q3[4]) add128_64(c, f) mul64x64_128(mul, modm_m[4], q3[0]) add128(c, mul) mul64x64_128(mul, modm_m[3], q3[1]) add128(c, mul) mul64x64_128(mul, modm_m[1], q3[3]) add128(c, mul) mul64x64_128(mul, modm_m[2], q3[2]) add128(c, mul)
r2[4] = lo128(c) & 0x0000ffffffffff;
pb = 0;
pb += r2[0]; b = lt_modm(r1[0], pb); r[0] = (r1[0] - pb + (b << 56)); pb = b;
pb += r2[1]; b = lt_modm(r1[1], pb); r[1] = (r1[1] - pb + (b << 56)); pb = b;
pb += r2[2]; b = lt_modm(r1[2], pb); r[2] = (r1[2] - pb + (b << 56)); pb = b;
pb += r2[3]; b = lt_modm(r1[3], pb); r[3] = (r1[3] - pb + (b << 56)); pb = b;
pb += r2[4]; b = lt_modm(r1[4], pb); r[4] = (r1[4] - pb + (b << 40));
reduce256_modm(r);
reduce256_modm(r);
}
STATIC void add256_modm(bignum256modm r, const bignum256modm x, const bignum256modm y) {
bignum256modm_element_t c;
c = x[0] + y[0]; r[0] = c & 0xffffffffffffff; c >>= 56;
c += x[1] + y[1]; r[1] = c & 0xffffffffffffff; c >>= 56;
c += x[2] + y[2]; r[2] = c & 0xffffffffffffff; c >>= 56;
c += x[3] + y[3]; r[3] = c & 0xffffffffffffff; c >>= 56;
c += x[4] + y[4]; r[4] = c;
reduce256_modm(r);
}
STATIC void mul256_modm(bignum256modm r, const bignum256modm x, const bignum256modm y) {
bignum256modm q1, r1;
uint128_t c, mul;
bignum256modm_element_t f;
mul64x64_128(c, x[0], y[0])
f = lo128(c); r1[0] = f & 0xffffffffffffff; shr128(f, c, 56);
mul64x64_128(c, x[0], y[1]) add128_64(c, f) mul64x64_128(mul, x[1], y[0]) add128(c, mul)
f = lo128(c); r1[1] = f & 0xffffffffffffff; shr128(f, c, 56);
mul64x64_128(c, x[0], y[2]) add128_64(c, f) mul64x64_128(mul, x[2], y[0]) add128(c, mul) mul64x64_128(mul, x[1], y[1]) add128(c, mul)
f = lo128(c); r1[2] = f & 0xffffffffffffff; shr128(f, c, 56);
mul64x64_128(c, x[0], y[3]) add128_64(c, f) mul64x64_128(mul, x[3], y[0]) add128(c, mul) mul64x64_128(mul, x[1], y[2]) add128(c, mul) mul64x64_128(mul, x[2], y[1]) add128(c, mul)
f = lo128(c); r1[3] = f & 0xffffffffffffff; shr128(f, c, 56);
mul64x64_128(c, x[0], y[4]) add128_64(c, f) mul64x64_128(mul, x[4], y[0]) add128(c, mul) mul64x64_128(mul, x[3], y[1]) add128(c, mul) mul64x64_128(mul, x[1], y[3]) add128(c, mul) mul64x64_128(mul, x[2], y[2]) add128(c, mul)
f = lo128(c); r1[4] = f & 0x0000ffffffffff; q1[0] = (f >> 24) & 0xffffffff; shr128(f, c, 56);
mul64x64_128(c, x[4], y[1]) add128_64(c, f) mul64x64_128(mul, x[1], y[4]) add128(c, mul) mul64x64_128(mul, x[2], y[3]) add128(c, mul) mul64x64_128(mul, x[3], y[2]) add128(c, mul)
f = lo128(c); q1[0] |= (f << 32) & 0xffffffffffffff; q1[1] = (f >> 24) & 0xffffffff; shr128(f, c, 56);
mul64x64_128(c, x[4], y[2]) add128_64(c, f) mul64x64_128(mul, x[2], y[4]) add128(c, mul) mul64x64_128(mul, x[3], y[3]) add128(c, mul)
f = lo128(c); q1[1] |= (f << 32) & 0xffffffffffffff; q1[2] = (f >> 24) & 0xffffffff; shr128(f, c, 56);
mul64x64_128(c, x[4], y[3]) add128_64(c, f) mul64x64_128(mul, x[3], y[4]) add128(c, mul)
f = lo128(c); q1[2] |= (f << 32) & 0xffffffffffffff; q1[3] = (f >> 24) & 0xffffffff; shr128(f, c, 56);
mul64x64_128(c, x[4], y[4]) add128_64(c, f)
f = lo128(c); q1[3] |= (f << 32) & 0xffffffffffffff; q1[4] = (f >> 24) & 0xffffffff; shr128(f, c, 56);
q1[4] |= (f << 32);
barrett_reduce256_modm(r, q1, r1);
}
STATIC void expand256_modm(bignum256modm out, const unsigned char *in, size_t len) {
unsigned char work[64] = {0};
bignum256modm_element_t x[16];
bignum256modm q1;
memcpy(work, in, len);
x[0] = U8TO64_LE(work + 0);
x[1] = U8TO64_LE(work + 8);
x[2] = U8TO64_LE(work + 16);
x[3] = U8TO64_LE(work + 24);
x[4] = U8TO64_LE(work + 32);
x[5] = U8TO64_LE(work + 40);
x[6] = U8TO64_LE(work + 48);
x[7] = U8TO64_LE(work + 56);
/* r1 = (x mod 256^(32+1)) = x mod (2^8)(31+1) = x & ((1 << 264) - 1) */
out[0] = ( x[0]) & 0xffffffffffffff;
out[1] = ((x[ 0] >> 56) | (x[ 1] << 8)) & 0xffffffffffffff;
out[2] = ((x[ 1] >> 48) | (x[ 2] << 16)) & 0xffffffffffffff;
out[3] = ((x[ 2] >> 40) | (x[ 3] << 24)) & 0xffffffffffffff;
out[4] = ((x[ 3] >> 32) | (x[ 4] << 32)) & 0x0000ffffffffff;
/* under 252 bits, no need to reduce */
if (len < 32)
return;
/* q1 = x >> 248 = 264 bits */
q1[0] = ((x[ 3] >> 56) | (x[ 4] << 8)) & 0xffffffffffffff;
q1[1] = ((x[ 4] >> 48) | (x[ 5] << 16)) & 0xffffffffffffff;
q1[2] = ((x[ 5] >> 40) | (x[ 6] << 24)) & 0xffffffffffffff;
q1[3] = ((x[ 6] >> 32) | (x[ 7] << 32)) & 0xffffffffffffff;
q1[4] = ((x[ 7] >> 24) );
barrett_reduce256_modm(out, q1, out);
}
STATIC void expand_raw256_modm(bignum256modm out, const unsigned char in[32]) {
bignum256modm_element_t x[4];
x[0] = U8TO64_LE(in + 0);
x[1] = U8TO64_LE(in + 8);
x[2] = U8TO64_LE(in + 16);
x[3] = U8TO64_LE(in + 24);
out[0] = ( x[0]) & 0xffffffffffffff;
out[1] = ((x[ 0] >> 56) | (x[ 1] << 8)) & 0xffffffffffffff;
out[2] = ((x[ 1] >> 48) | (x[ 2] << 16)) & 0xffffffffffffff;
out[3] = ((x[ 2] >> 40) | (x[ 3] << 24)) & 0xffffffffffffff;
out[4] = ((x[ 3] >> 32) ) & 0x000000ffffffff;
}
STATIC void contract256_modm(unsigned char out[32], const bignum256modm in) {
U64TO8_LE(out + 0, (in[0] ) | (in[1] << 56));
U64TO8_LE(out + 8, (in[1] >> 8) | (in[2] << 48));
U64TO8_LE(out + 16, (in[2] >> 16) | (in[3] << 40));
U64TO8_LE(out + 24, (in[3] >> 24) | (in[4] << 32));
}
STATIC void contract256_window4_modm(signed char r[64], const bignum256modm in) {
char carry;
signed char *quads = r;
bignum256modm_element_t i, j, v, m;
for (i = 0; i < 5; i++) {
v = in[i];
m = (i == 4) ? 8 : 14;
for (j = 0; j < m; j++) {
*quads++ = (v & 15);
v >>= 4;
}
}
/* making it signed */
carry = 0;
for(i = 0; i < 63; i++) {
r[i] += carry;
r[i+1] += (r[i] >> 4);
r[i] &= 15;
carry = (r[i] >> 3);
r[i] -= (carry << 4);
}
r[63] += carry;
}
STATIC void contract256_slidingwindow_modm(signed char r[256], const bignum256modm s, int windowsize) {
int i,j,k,b;
int m = (1 << (windowsize - 1)) - 1, soplen = 256;
signed char *bits = r;
bignum256modm_element_t v;
/* first put the binary expansion into r */
for (i = 0; i < 4; i++) {
v = s[i];
for (j = 0; j < 56; j++, v >>= 1)
*bits++ = (v & 1);
}
v = s[4];
for (j = 0; j < 32; j++, v >>= 1)
*bits++ = (v & 1);
/* Making it sliding window */
for (j = 0; j < soplen; j++) {
if (!r[j])
continue;
for (b = 1; (b < (soplen - j)) && (b <= 6); b++) {
if ((r[j] + (r[j + b] << b)) <= m) {
r[j] += r[j + b] << b;
r[j + b] = 0;
} else if ((r[j] - (r[j + b] << b)) >= -m) {
r[j] -= r[j + b] << b;
for (k = j + b; k < soplen; k++) {
if (!r[k]) {
r[k] = 1;
break;
}
r[k] = 0;
}
} else if (r[j + b]) {
break;
}
}
}
}
/*
helpers for batch verifcation, are allowed to be vartime
*/
/* out = a - b, a must be larger than b */
STATIC void sub256_modm_batch(bignum256modm out, const bignum256modm a, const bignum256modm b, size_t limbsize) {
size_t i = 0;
bignum256modm_element_t carry = 0;
switch (limbsize) {
case 4: out[i] = (a[i] - b[i]) ; carry = (out[i] >> 63); out[i] &= 0xffffffffffffff; i++;
case 3: out[i] = (a[i] - b[i]) - carry; carry = (out[i] >> 63); out[i] &= 0xffffffffffffff; i++;
case 2: out[i] = (a[i] - b[i]) - carry; carry = (out[i] >> 63); out[i] &= 0xffffffffffffff; i++;
case 1: out[i] = (a[i] - b[i]) - carry; carry = (out[i] >> 63); out[i] &= 0xffffffffffffff; i++;
case 0:
default: out[i] = (a[i] - b[i]) - carry;
}
}
/* is a < b */
STATIC int lt256_modm_batch(const bignum256modm a, const bignum256modm b, size_t limbsize) {
size_t i = 0;
bignum256modm_element_t t, carry = 0;
switch (limbsize) {
case 4: t = (a[i] - b[i]) ; carry = (t >> 63); i++;
case 3: t = (a[i] - b[i]) - carry; carry = (t >> 63); i++;
case 2: t = (a[i] - b[i]) - carry; carry = (t >> 63); i++;
case 1: t = (a[i] - b[i]) - carry; carry = (t >> 63); i++;
case 0: t = (a[i] - b[i]) - carry; carry = (t >> 63);
}
return (int)carry;
}
/* is a <= b */
STATIC int lte256_modm_batch(const bignum256modm a, const bignum256modm b, size_t limbsize) {
size_t i = 0;
bignum256modm_element_t t, carry = 0;
switch (limbsize) {
case 4: t = (b[i] - a[i]) ; carry = (t >> 63); i++;
case 3: t = (b[i] - a[i]) - carry; carry = (t >> 63); i++;
case 2: t = (b[i] - a[i]) - carry; carry = (t >> 63); i++;
case 1: t = (b[i] - a[i]) - carry; carry = (t >> 63); i++;
case 0: t = (b[i] - a[i]) - carry; carry = (t >> 63);
}
return (int)!carry;
}
/* is a == 0 */
STATIC int iszero256_modm_batch(const bignum256modm a) {
size_t i;
for (i = 0; i < 5; i++)
if (a[i])
return 0;
return 1;
}
/* is a == 1 */
STATIC int isone256_modm_batch(const bignum256modm a) {
size_t i;
for (i = 0; i < 5; i++)
if (a[i] != ((i) ? 0 : 1))
return 0;
return 1;
}
/* can a fit in to (at most) 128 bits */
STATIC int isatmost128bits256_modm_batch(const bignum256modm a) {
uint64_t mask =
((a[4] ) | /* 32 */
(a[3] ) | /* 88 */
(a[2] & 0xffffffffff0000));
return (mask == 0);
}