diff --git a/OpenCL/inc_ecc_secp256k1.cl b/OpenCL/inc_ecc_secp256k1.cl new file mode 100644 index 000000000..92551d5e5 --- /dev/null +++ b/OpenCL/inc_ecc_secp256k1.cl @@ -0,0 +1,1820 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + * + * Furthermore, since elliptic curve operations are highly researched and optimized, + * we've consulted a lot of online resources to implement this, including several papers and + * example code. + * + * Credits where credits are due: there are a lot of nice projects that explain and/or optimize + * elliptic curve operations (especially elliptic curve multiplications by a scalar). + * + * We want to shout out following projects, which were quite helpful when implementing this: + * - secp256k1 by Pieter Wuille (https://github.com/bitcoin-core/secp256k1/, MIT) + * - secp256k1-cl by hhanh00 (https://github.com/hhanh00/secp256k1-cl/, MIT) + * - ec_pure_c by masterzorag (https://github.com/masterzorag/ec_pure_c/) + * - ecc-gmp by leivaburto (https://github.com/leivaburto/ecc-gmp) + * - micro-ecc by Ken MacKay (https://github.com/kmackay/micro-ecc/, BSD) + * - curve_example by willem (https://gist.github.com/nlitsme/c9031c7b9bf6bb009e5a) + * - py_ecc by Vitalik Buterin (https://github.com/ethereum/py_ecc/, MIT) + * + * + * Some BigNum operations are implemented similar to micro-ecc which is licensed under these terms: + * Copyright 2014 Ken MacKay, 2-Clause BSD License + * + * Redistribution and use in source and binary forms, with or without modification, are permitted + * provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, this list of + * conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY + * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * ATTENTION: this code is NOT meant to be used in security critical environments that are at risk + * of side-channel or timing attacks etc, it's only purpose is to make it work fast for GPGPU + * (OpenCL/CUDA). Some attack vectors like side-channel and timing-attacks might be possible, + * because of some optimizations used within this code (non-constant time etc). + */ + +/* + * Implementation considerations: + * point double and point add are implemented similar to algorithms mentioned in this 2011 paper: + * http://eprint.iacr.org/2011/338.pdf + * (Fast and Regular Algorithms for Scalar Multiplication over Elliptic Curves by Matthieu Rivain) + * + * In theory we could use the Jacobian Co-Z enhancement to get rid of the larger buffer caused by + * the z coordinates (and in this way reduce register pressure etc). + * For the Co-Z improvement there are a lot of fast algorithms, but we might still be faster + * with this implementation (b/c we allow non-constant time) without the Brier/Joye Montgomery-like + * ladder. Of course, this claim would need to be verified and tested to see which one is faster + * for our specific scenario at the end. + * + * A speedup could also be possible by using scalars converted to (w)NAF (non-adjacent form) or by + * just using the windowed (precomputed zi) method or similar improvements: + * The general idea of w-NAF would be to pre-compute some zi coefficients like below to reduce the + * costly point additions by using a non-binary ("signed") number system (values other than just + * 0 and 1, but ranging from -2^(w-1)-1 to 2^(w-1)-1). This would work best with the left-to-right + * binary algorithm such that we could just add zi * P when adding point P (pre-compute all the + * possible zi * P values because the x/y coordinates are known before the kernel starts): + * + * // Example with window size w = 2 (i.e. mod 4 => & 3): + * // 173 => 1 0 -1 0 -1 0 -1 0 1 = 2^8 - 2^6 - 2^4 - 2^2 + 1 + * int e = 0b10101101; // 173 + * int z[8 + 1] = { 0 }; // our zi/di, we need one extra slot to make the substract work + * + * int i = 0; + * + * while (e) + * { + * if (e & 1) + * { + * // for window size w = 3 it would be: + * // => 2^(w-0) = 2^3 = 8 + * // => 2^(w-1) = 2^2 = 4 + * + * int bit; // = 2 - (e & 3) for w = 2 + * + * if ((e & 3) >= 2) // e % 4 == e & 3, use (e & 7) >= 4 for w = 3 + * bit = (e & 3) - 4; // (e & 7) - 8 for w = 3 + * else + * bit = e & 3; // e & 7 for w = 3 + * + * z[i] = bit; + * e -= bit; + * } + * + * e >>= 1; // e / 2 + * i++; + * } +*/ + +#include "inc_ecc_secp256k1.h" + +DECLSPEC u32 sub (u32 r[8], const u32 a[8], const u32 b[8]) +{ + u32 c = 0; // carry/borrow + + for (u32 i = 0; i < 8; i++) + { + const u32 diff = a[i] - b[i] - c; + + if (diff != a[i]) c = (diff > a[i]); + + r[i] = diff; + } + + return c; +} + +DECLSPEC u32 add (u32 r[8], const u32 a[8], const u32 b[8]) +{ + u32 c = 0; // carry/borrow + + for (u32 i = 0; i < 8; i++) + { + const u32 t = a[i] + b[i] + c; + + if (t != a[i]) c = (t < a[i]); + + r[i] = t; + } + + return c; +} + +DECLSPEC void sub_mod (u32 r[8], const u32 a[8], const u32 b[8]) +{ + const u32 c = sub (r, a, b); // carry + + if (c) + { + u32 t[8]; + + t[0] = SECP256K1_P0; + t[1] = SECP256K1_P1; + t[2] = SECP256K1_P2; + t[3] = SECP256K1_P3; + t[4] = SECP256K1_P4; + t[5] = SECP256K1_P5; + t[6] = SECP256K1_P6; + t[7] = SECP256K1_P7; + + add (r, r, t); + } +} + +DECLSPEC void add_mod (u32 r[8], const u32 a[8], const u32 b[8]) +{ + const u32 c = add (r, a, b); // carry + + /* + * Modulo operation: + */ + + // note: we could have an early exit in case of c == 1 => sub () + + u32 t[8]; + + t[0] = SECP256K1_P0; + t[1] = SECP256K1_P1; + t[2] = SECP256K1_P2; + t[3] = SECP256K1_P3; + t[4] = SECP256K1_P4; + t[5] = SECP256K1_P5; + t[6] = SECP256K1_P6; + t[7] = SECP256K1_P7; + + // check if modulo operation is needed + + u32 mod = 1; + + if (c == 0) + { + for (int i = 7; i >= 0; i--) + { + if (r[i] < t[i]) + { + mod = 0; + + break; // or return ! (check if faster) + } + + if (r[i] > t[i]) break; + } + } + + if (mod == 1) + { + sub (r, r, t); + } +} + +DECLSPEC void mod_512 (u32 n[16]) +{ + // we need to perform a modulo operation with 512-bit % 256-bit (bignum modulo): + // the modulus is the secp256k1 group order + + // ATTENTION: for this function the byte-order is reversed (most significant bytes + // at the left) + + /* + the general modulo by shift and substract code (a = a % b): + + x = b; + + t = a >> 1; + + while (x <= t) x <<= 1; + + while (a >= b) + { + if (a >= x) a -= x; + + x >>= 1; + } + + return a; // remainder + */ + + u32 a[16]; + + a[ 0] = n[ 0]; + a[ 1] = n[ 1]; + a[ 2] = n[ 2]; + a[ 3] = n[ 3]; + a[ 4] = n[ 4]; + a[ 5] = n[ 5]; + a[ 6] = n[ 6]; + a[ 7] = n[ 7]; + a[ 8] = n[ 8]; + a[ 9] = n[ 9]; + a[10] = n[10]; + a[11] = n[11]; + a[12] = n[12]; + a[13] = n[13]; + a[14] = n[14]; + a[15] = n[15]; + + u32 b[16]; + + b[ 0] = 0x00000000; + b[ 1] = 0x00000000; + b[ 2] = 0x00000000; + b[ 3] = 0x00000000; + b[ 4] = 0x00000000; + b[ 5] = 0x00000000; + b[ 6] = 0x00000000; + b[ 7] = 0x00000000; + b[ 8] = SECP256K1_N7; + b[ 9] = SECP256K1_N6; + b[10] = SECP256K1_N5; + b[11] = SECP256K1_N4; + b[12] = SECP256K1_N3; + b[13] = SECP256K1_N2; + b[14] = SECP256K1_N1; + b[15] = SECP256K1_N0; + + /* + * Start: + */ + + // x = b (but with a fast "shift" trick to avoid the while loop) + + u32 x[16]; + + x[ 0] = b[ 8]; // this is a trick: we just put the group order's most significant bit all the + x[ 1] = b[ 9]; // way to the top to avoid doing the initial: while (x <= t) x <<= 1 + x[ 2] = b[10]; + x[ 3] = b[11]; + x[ 4] = b[12]; + x[ 5] = b[13]; + x[ 6] = b[14]; + x[ 7] = b[15]; + x[ 8] = 0x00000000; + x[ 9] = 0x00000000; + x[10] = 0x00000000; + x[11] = 0x00000000; + x[12] = 0x00000000; + x[13] = 0x00000000; + x[14] = 0x00000000; + x[15] = 0x00000000; + + // a >= b + + while (a[0] >= b[0]) + { + const u32 l1 = (a[ 0] < b[ 0]) << 0 + | (a[ 1] < b[ 1]) << 1 + | (a[ 2] < b[ 2]) << 2 + | (a[ 3] < b[ 3]) << 3 + | (a[ 4] < b[ 4]) << 4 + | (a[ 5] < b[ 5]) << 5 + | (a[ 6] < b[ 6]) << 6 + | (a[ 7] < b[ 7]) << 7 + | (a[ 8] < b[ 8]) << 8 + | (a[ 9] < b[ 9]) << 9 + | (a[10] < b[10]) << 10 + | (a[11] < b[11]) << 11 + | (a[12] < b[12]) << 12 + | (a[13] < b[13]) << 13 + | (a[14] < b[14]) << 14 + | (a[15] < b[15]) << 15; + + const u32 e1 = (a[ 0] == b[ 0]) << 0 + | (a[ 1] == b[ 1]) << 1 + | (a[ 2] == b[ 2]) << 2 + | (a[ 3] == b[ 3]) << 3 + | (a[ 4] == b[ 4]) << 4 + | (a[ 5] == b[ 5]) << 5 + | (a[ 6] == b[ 6]) << 6 + | (a[ 7] == b[ 7]) << 7 + | (a[ 8] == b[ 8]) << 8 + | (a[ 9] == b[ 9]) << 9 + | (a[10] == b[10]) << 10 + | (a[11] == b[11]) << 11 + | (a[12] == b[12]) << 12 + | (a[13] == b[13]) << 13 + | (a[14] == b[14]) << 14 + | (a[15] == b[15]) << 15; + + if (l1) + { + if (l1 & 0x0001) break; + if (l1 & 0x0002) if ((e1 & 0x0001) == 0x0001) break; + if (l1 & 0x0004) if ((e1 & 0x0003) == 0x0003) break; + if (l1 & 0x0008) if ((e1 & 0x0007) == 0x0007) break; + if (l1 & 0x0010) if ((e1 & 0x000f) == 0x000f) break; + if (l1 & 0x0020) if ((e1 & 0x001f) == 0x001f) break; + if (l1 & 0x0040) if ((e1 & 0x003f) == 0x003f) break; + if (l1 & 0x0080) if ((e1 & 0x007f) == 0x007f) break; + if (l1 & 0x0100) if ((e1 & 0x00ff) == 0x00ff) break; + if (l1 & 0x0200) if ((e1 & 0x01ff) == 0x01ff) break; + if (l1 & 0x0400) if ((e1 & 0x03ff) == 0x03ff) break; + if (l1 & 0x0800) if ((e1 & 0x07ff) == 0x07ff) break; + if (l1 & 0x1000) if ((e1 & 0x0fff) == 0x0fff) break; + if (l1 & 0x2000) if ((e1 & 0x1fff) == 0x1fff) break; + if (l1 & 0x4000) if ((e1 & 0x3fff) == 0x3fff) break; + if (l1 & 0x8000) if ((e1 & 0x7fff) == 0x7fff) break; + } + + // r = x (copy it to have the original values for the subtraction) + + u32 r[16]; + + r[ 0] = x[ 0]; + r[ 1] = x[ 1]; + r[ 2] = x[ 2]; + r[ 3] = x[ 3]; + r[ 4] = x[ 4]; + r[ 5] = x[ 5]; + r[ 6] = x[ 6]; + r[ 7] = x[ 7]; + r[ 8] = x[ 8]; + r[ 9] = x[ 9]; + r[10] = x[10]; + r[11] = x[11]; + r[12] = x[12]; + r[13] = x[13]; + r[14] = x[14]; + r[15] = x[15]; + + // x >>= 1 + + x[15] = x[15] >> 1 | (x[14] & 1) << 31; + x[14] = x[14] >> 1 | (x[13] & 1) << 31; + x[13] = x[13] >> 1 | (x[12] & 1) << 31; + x[12] = x[12] >> 1 | (x[11] & 1) << 31; + x[11] = x[11] >> 1 | (x[10] & 1) << 31; + x[10] = x[10] >> 1 | (x[ 9] & 1) << 31; + x[ 9] = x[ 9] >> 1 | (x[ 8] & 1) << 31; + x[ 8] = x[ 8] >> 1 | (x[ 7] & 1) << 31; + x[ 7] = x[ 7] >> 1 | (x[ 6] & 1) << 31; + x[ 6] = x[ 6] >> 1 | (x[ 5] & 1) << 31; + x[ 5] = x[ 5] >> 1 | (x[ 4] & 1) << 31; + x[ 4] = x[ 4] >> 1 | (x[ 3] & 1) << 31; + x[ 3] = x[ 3] >> 1 | (x[ 2] & 1) << 31; + x[ 2] = x[ 2] >> 1 | (x[ 1] & 1) << 31; + x[ 1] = x[ 1] >> 1 | (x[ 0] & 1) << 31; + x[ 0] = x[ 0] >> 1; + + // if (a >= r) a -= r; + + const u32 l2 = (a[ 0] < r[ 0]) << 0 + | (a[ 1] < r[ 1]) << 1 + | (a[ 2] < r[ 2]) << 2 + | (a[ 3] < r[ 3]) << 3 + | (a[ 4] < r[ 4]) << 4 + | (a[ 5] < r[ 5]) << 5 + | (a[ 6] < r[ 6]) << 6 + | (a[ 7] < r[ 7]) << 7 + | (a[ 8] < r[ 8]) << 8 + | (a[ 9] < r[ 9]) << 9 + | (a[10] < r[10]) << 10 + | (a[11] < r[11]) << 11 + | (a[12] < r[12]) << 12 + | (a[13] < r[13]) << 13 + | (a[14] < r[14]) << 14 + | (a[15] < r[15]) << 15; + + const u32 e2 = (a[ 0] == r[ 0]) << 0 + | (a[ 1] == r[ 1]) << 1 + | (a[ 2] == r[ 2]) << 2 + | (a[ 3] == r[ 3]) << 3 + | (a[ 4] == r[ 4]) << 4 + | (a[ 5] == r[ 5]) << 5 + | (a[ 6] == r[ 6]) << 6 + | (a[ 7] == r[ 7]) << 7 + | (a[ 8] == r[ 8]) << 8 + | (a[ 9] == r[ 9]) << 9 + | (a[10] == r[10]) << 10 + | (a[11] == r[11]) << 11 + | (a[12] == r[12]) << 12 + | (a[13] == r[13]) << 13 + | (a[14] == r[14]) << 14 + | (a[15] == r[15]) << 15; + + if (l2) + { + if (l2 & 0x0001) continue; + if (l2 & 0x0002) if ((e2 & 0x0001) == 0x0001) continue; + if (l2 & 0x0004) if ((e2 & 0x0003) == 0x0003) continue; + if (l2 & 0x0008) if ((e2 & 0x0007) == 0x0007) continue; + if (l2 & 0x0010) if ((e2 & 0x000f) == 0x000f) continue; + if (l2 & 0x0020) if ((e2 & 0x001f) == 0x001f) continue; + if (l2 & 0x0040) if ((e2 & 0x003f) == 0x003f) continue; + if (l2 & 0x0080) if ((e2 & 0x007f) == 0x007f) continue; + if (l2 & 0x0100) if ((e2 & 0x00ff) == 0x00ff) continue; + if (l2 & 0x0200) if ((e2 & 0x01ff) == 0x01ff) continue; + if (l2 & 0x0400) if ((e2 & 0x03ff) == 0x03ff) continue; + if (l2 & 0x0800) if ((e2 & 0x07ff) == 0x07ff) continue; + if (l2 & 0x1000) if ((e2 & 0x0fff) == 0x0fff) continue; + if (l2 & 0x2000) if ((e2 & 0x1fff) == 0x1fff) continue; + if (l2 & 0x4000) if ((e2 & 0x3fff) == 0x3fff) continue; + if (l2 & 0x8000) if ((e2 & 0x7fff) == 0x7fff) continue; + } + + // substract (a -= r): + + r[ 0] = a[ 0] - r[ 0]; + r[ 1] = a[ 1] - r[ 1]; + r[ 2] = a[ 2] - r[ 2]; + r[ 3] = a[ 3] - r[ 3]; + r[ 4] = a[ 4] - r[ 4]; + r[ 5] = a[ 5] - r[ 5]; + r[ 6] = a[ 6] - r[ 6]; + r[ 7] = a[ 7] - r[ 7]; + r[ 8] = a[ 8] - r[ 8]; + r[ 9] = a[ 9] - r[ 9]; + r[10] = a[10] - r[10]; + r[11] = a[11] - r[11]; + r[12] = a[12] - r[12]; + r[13] = a[13] - r[13]; + r[14] = a[14] - r[14]; + r[15] = a[15] - r[15]; + + // take care of the "borrow" (we can't do it the other way around 15...1 because r[x] is changed!) + + if (r[ 1] > a[ 1]) r[ 0]--; + if (r[ 2] > a[ 2]) r[ 1]--; + if (r[ 3] > a[ 3]) r[ 2]--; + if (r[ 4] > a[ 4]) r[ 3]--; + if (r[ 5] > a[ 5]) r[ 4]--; + if (r[ 6] > a[ 6]) r[ 5]--; + if (r[ 7] > a[ 7]) r[ 6]--; + if (r[ 8] > a[ 8]) r[ 7]--; + if (r[ 9] > a[ 9]) r[ 8]--; + if (r[10] > a[10]) r[ 9]--; + if (r[11] > a[11]) r[10]--; + if (r[12] > a[12]) r[11]--; + if (r[13] > a[13]) r[12]--; + if (r[14] > a[14]) r[13]--; + if (r[15] > a[15]) r[14]--; + + a[ 0] = r[ 0]; + a[ 1] = r[ 1]; + a[ 2] = r[ 2]; + a[ 3] = r[ 3]; + a[ 4] = r[ 4]; + a[ 5] = r[ 5]; + a[ 6] = r[ 6]; + a[ 7] = r[ 7]; + a[ 8] = r[ 8]; + a[ 9] = r[ 9]; + a[10] = r[10]; + a[11] = r[11]; + a[12] = r[12]; + a[13] = r[13]; + a[14] = r[14]; + a[15] = r[15]; + } + + n[ 0] = a[ 0]; + n[ 1] = a[ 1]; + n[ 2] = a[ 2]; + n[ 3] = a[ 3]; + n[ 4] = a[ 4]; + n[ 5] = a[ 5]; + n[ 6] = a[ 6]; + n[ 7] = a[ 7]; + n[ 8] = a[ 8]; + n[ 9] = a[ 9]; + n[10] = a[10]; + n[11] = a[11]; + n[12] = a[12]; + n[13] = a[13]; + n[14] = a[14]; + n[15] = a[15]; +} + +DECLSPEC void mul_mod (u32 r[8], const u32 a[8], const u32 b[8]) // TODO get rid of u64 ? +{ + u32 t[16] = { 0 }; // we need up to double the space (2 * 8) + + /* + * First start with the basic a * b multiplication: + */ + + u32 t0 = 0; + u32 t1 = 0; + u32 c = 0; + + for (u32 i = 0; i < 8; i++) + { + for (u32 j = 0; j <= i; j++) + { + u64 p = ((u64) a[j]) * b[i - j]; + + u64 d = ((u64) t1) << 32 | t0; + + d += p; + + t0 = (u32) d; + t1 = d >> 32; + + c += d < p; // carry + } + + t[i] = t0; + + t0 = t1; + t1 = c; + + c = 0; + } + + for (u32 i = 8; i < 15; i++) + { + for (u32 j = i - 7; j < 8; j++) + { + u64 p = ((u64) a[j]) * b[i - j]; + + u64 d = ((u64) t1) << 32 | t0; + + d += p; + + t0 = (u32) d; + t1 = d >> 32; + + c += d < p; + } + + t[i] = t0; + + t0 = t1; + t1 = c; + + c = 0; + } + + t[15] = t0; + + + + /* + * Now do the modulo operation: + * (r = t % p) + * + * http://www.isys.uni-klu.ac.at/PDF/2001-0126-MT.pdf (p.354 or p.9 in that document) + */ + + u32 tmp[16] = { 0 }; + + // c = 0; + + // Note: SECP256K1_P = 2^256 - 2^32 - 977 (0x03d1 = 977) + // multiply t[8]...t[15] by omega: + + for (u32 i = 0, j = 8; i < 8; i++, j++) + { + u64 p = ((u64) 0x03d1) * t[j] + c; + + tmp[i] = (u32) p; + + c = p >> 32; + } + + tmp[8] = c; + + c = add (tmp + 1, tmp + 1, t + 8); // modifies tmp[1]...tmp[8] + + tmp[9] = c; + + + // r = t + tmp + + c = add (r, t, tmp); + + // multiply t[0]...t[7] by omega: + + u32 c2 = 0; + + // memset (t, 0, sizeof (t)); + + for (u32 i = 0, j = 8; i < 8; i++, j++) + { + u64 p = ((u64) 0x3d1) * tmp[j] + c2; + + t[i] = (u32) p; + + c2 = p >> 32; + } + + t[8] = c2; + + c2 = add (t + 1, t + 1, tmp + 8); // modifies t[1]...t[8] + + t[9] = c2; + + + // r = r + t + + c2 = add (r, r, t); + + c += c2; + + t[0] = SECP256K1_P0; + t[1] = SECP256K1_P1; + t[2] = SECP256K1_P2; + t[3] = SECP256K1_P3; + t[4] = SECP256K1_P4; + t[5] = SECP256K1_P5; + t[6] = SECP256K1_P6; + t[7] = SECP256K1_P7; + + for (u32 i = c; i > 0; i--) + { + sub (r, r, t); + } + + for (int i = 7; i >= 0; i--) + { + if (r[i] < t[i]) break; + + if (r[i] > t[i]) + { + sub (r, r, t); + + break; + } + } +} + +DECLSPEC void sqrt_mod (u32 r[8]) +{ + // Fermat's Little Theorem + // secp256k1: y^2 = x^3 + 7 % p + // y ^ (p - 1) = 1 + // y ^ (p - 1) = (y^2) ^ ((p - 1) / 2) = 1 => y^2 = (y^2) ^ (((p - 1) / 2) + 1) + // => y = (y^2) ^ ((((p - 1) / 2) + 1) / 2) + // y = (y^2) ^ (((p - 1 + 2) / 2) / 2) = (y^2) ^ ((p + 1) / 4) + + // y1 = (x^3 + 7) ^ ((p + 1) / 4) + // y2 = p - y1 (or y2 = y1 * -1 % p) + + u32 s[8]; + + s[0] = SECP256K1_P0 + 1; // because of (p + 1) / 4 or use add (s, s, 1) + s[1] = SECP256K1_P1; + s[2] = SECP256K1_P2; + s[3] = SECP256K1_P3; + s[4] = SECP256K1_P4; + s[5] = SECP256K1_P5; + s[6] = SECP256K1_P6; + s[7] = SECP256K1_P7; + + u32 t[8] = { 0 }; + + t[0] = 1; + + for (u32 i = 255; i > 1; i--) // we just skip the last 2 multiplications (=> exp / 4) + { + mul_mod (t, t, t); // r * r + + u32 idx = i >> 5; + u32 mask = 1 << (i & 0x1f); + + if (s[idx] & mask) + { + mul_mod (t, t, r); // t * r + } + } + + r[0] = t[0]; + r[1] = t[1]; + r[2] = t[2]; + r[3] = t[3]; + r[4] = t[4]; + r[5] = t[5]; + r[6] = t[6]; + r[7] = t[7]; +} + +// (inverse (a, p) * a) % p == 1 (or think of a * a^-1 = a / a = 1) + +DECLSPEC void inv_mod (u32 a[8]) +{ + // How often does this really happen? it should "almost" never happen (but would be safer) + // if ((a[0] | a[1] | a[2] | a[3] | a[4] | a[5] | a[6] | a[7]) == 0) return; + + u32 t0[8]; + + t0[0] = a[0]; + t0[1] = a[1]; + t0[2] = a[2]; + t0[3] = a[3]; + t0[4] = a[4]; + t0[5] = a[5]; + t0[6] = a[6]; + t0[7] = a[7]; + + u32 p[8]; + + p[0] = SECP256K1_P0; + p[1] = SECP256K1_P1; + p[2] = SECP256K1_P2; + p[3] = SECP256K1_P3; + p[4] = SECP256K1_P4; + p[5] = SECP256K1_P5; + p[6] = SECP256K1_P6; + p[7] = SECP256K1_P7; + + u32 t1[8]; + + t1[0] = SECP256K1_P0; + t1[1] = SECP256K1_P1; + t1[2] = SECP256K1_P2; + t1[3] = SECP256K1_P3; + t1[4] = SECP256K1_P4; + t1[5] = SECP256K1_P5; + t1[6] = SECP256K1_P6; + t1[7] = SECP256K1_P7; + + u32 t2[8] = { 0 }; + + t2[0] = 0x00000001; + + u32 t3[8] = { 0 }; + + u32 b = (t0[0] != t1[0]) + | (t0[1] != t1[1]) + | (t0[2] != t1[2]) + | (t0[3] != t1[3]) + | (t0[4] != t1[4]) + | (t0[5] != t1[5]) + | (t0[6] != t1[6]) + | (t0[7] != t1[7]); + + while (b) + { + if ((t0[0] & 1) == 0) // even + { + t0[0] = t0[0] >> 1 | t0[1] << 31; + t0[1] = t0[1] >> 1 | t0[2] << 31; + t0[2] = t0[2] >> 1 | t0[3] << 31; + t0[3] = t0[3] >> 1 | t0[4] << 31; + t0[4] = t0[4] >> 1 | t0[5] << 31; + t0[5] = t0[5] >> 1 | t0[6] << 31; + t0[6] = t0[6] >> 1 | t0[7] << 31; + t0[7] = t0[7] >> 1; + + u32 c = 0; + + if (t2[0] & 1) c = add (t2, t2, p); + + t2[0] = t2[0] >> 1 | t2[1] << 31; + t2[1] = t2[1] >> 1 | t2[2] << 31; + t2[2] = t2[2] >> 1 | t2[3] << 31; + t2[3] = t2[3] >> 1 | t2[4] << 31; + t2[4] = t2[4] >> 1 | t2[5] << 31; + t2[5] = t2[5] >> 1 | t2[6] << 31; + t2[6] = t2[6] >> 1 | t2[7] << 31; + t2[7] = t2[7] >> 1 | c << 31; + } + else if ((t1[0] & 1) == 0) + { + t1[0] = t1[0] >> 1 | t1[1] << 31; + t1[1] = t1[1] >> 1 | t1[2] << 31; + t1[2] = t1[2] >> 1 | t1[3] << 31; + t1[3] = t1[3] >> 1 | t1[4] << 31; + t1[4] = t1[4] >> 1 | t1[5] << 31; + t1[5] = t1[5] >> 1 | t1[6] << 31; + t1[6] = t1[6] >> 1 | t1[7] << 31; + t1[7] = t1[7] >> 1; + + u32 c = 0; + + if (t3[0] & 1) c = add (t3, t3, p); + + t3[0] = t3[0] >> 1 | t3[1] << 31; + t3[1] = t3[1] >> 1 | t3[2] << 31; + t3[2] = t3[2] >> 1 | t3[3] << 31; + t3[3] = t3[3] >> 1 | t3[4] << 31; + t3[4] = t3[4] >> 1 | t3[5] << 31; + t3[5] = t3[5] >> 1 | t3[6] << 31; + t3[6] = t3[6] >> 1 | t3[7] << 31; + t3[7] = t3[7] >> 1 | c << 31; + } + else + { + u32 gt = 0; + + for (int i = 7; i >= 0; i--) + { + if (t0[i] > t1[i]) + { + gt = 1; + + break; + } + + if (t0[i] < t1[i]) break; + } + + if (gt) + { + sub (t0, t0, t1); + + t0[0] = t0[0] >> 1 | t0[1] << 31; + t0[1] = t0[1] >> 1 | t0[2] << 31; + t0[2] = t0[2] >> 1 | t0[3] << 31; + t0[3] = t0[3] >> 1 | t0[4] << 31; + t0[4] = t0[4] >> 1 | t0[5] << 31; + t0[5] = t0[5] >> 1 | t0[6] << 31; + t0[6] = t0[6] >> 1 | t0[7] << 31; + t0[7] = t0[7] >> 1; + + u32 lt = 0; + + for (int i = 7; i >= 0; i--) + { + if (t2[i] < t3[i]) + { + lt = 1; + + break; + } + + if (t2[i] > t3[i]) break; + } + + if (lt) add (t2, t2, p); + + sub (t2, t2, t3); + + u32 c = 0; + + if (t2[0] & 1) c = add (t2, t2, p); + + t2[0] = t2[0] >> 1 | t2[1] << 31; + t2[1] = t2[1] >> 1 | t2[2] << 31; + t2[2] = t2[2] >> 1 | t2[3] << 31; + t2[3] = t2[3] >> 1 | t2[4] << 31; + t2[4] = t2[4] >> 1 | t2[5] << 31; + t2[5] = t2[5] >> 1 | t2[6] << 31; + t2[6] = t2[6] >> 1 | t2[7] << 31; + t2[7] = t2[7] >> 1 | c << 31; + } + else + { + sub (t1, t1, t0); + + t1[0] = t1[0] >> 1 | t1[1] << 31; + t1[1] = t1[1] >> 1 | t1[2] << 31; + t1[2] = t1[2] >> 1 | t1[3] << 31; + t1[3] = t1[3] >> 1 | t1[4] << 31; + t1[4] = t1[4] >> 1 | t1[5] << 31; + t1[5] = t1[5] >> 1 | t1[6] << 31; + t1[6] = t1[6] >> 1 | t1[7] << 31; + t1[7] = t1[7] >> 1; + + u32 lt = 0; + + for (int i = 7; i >= 0; i--) + { + if (t3[i] < t2[i]) + { + lt = 1; + + break; + } + + if (t3[i] > t2[i]) break; + } + + if (lt) add (t3, t3, p); + + sub (t3, t3, t2); + + u32 c = 0; + + if (t3[0] & 1) c = add (t3, t3, p); + + t3[0] = t3[0] >> 1 | t3[1] << 31; + t3[1] = t3[1] >> 1 | t3[2] << 31; + t3[2] = t3[2] >> 1 | t3[3] << 31; + t3[3] = t3[3] >> 1 | t3[4] << 31; + t3[4] = t3[4] >> 1 | t3[5] << 31; + t3[5] = t3[5] >> 1 | t3[6] << 31; + t3[6] = t3[6] >> 1 | t3[7] << 31; + t3[7] = t3[7] >> 1 | c << 31; + } + } + + // update b: + + b = (t0[0] != t1[0]) + | (t0[1] != t1[1]) + | (t0[2] != t1[2]) + | (t0[3] != t1[3]) + | (t0[4] != t1[4]) + | (t0[5] != t1[5]) + | (t0[6] != t1[6]) + | (t0[7] != t1[7]); + } + + // set result: + + a[0] = t2[0]; + a[1] = t2[1]; + a[2] = t2[2]; + a[3] = t2[3]; + a[4] = t2[4]; + a[5] = t2[5]; + a[6] = t2[6]; + a[7] = t2[7]; +} + +/* + // everything from the formulas below of course MOD the prime: + + // we use this formula: + + X = (3/2 * x^2)^2 - 2 * x * y^2 + Y = (3/2 * x^2) * (x * y^2 - X) - y^4 + Z = y * z + + this is identical to the more frequently used form: + + X = (3 * x^2)^2 - 8 * x * y^2 + Y = 3 * x^2 * (4 * x * y^2 - X) - 8 * y^4 + Z = 2 * y * z +*/ + +DECLSPEC void point_double (u32 x[8], u32 y[8], u32 z[8]) +{ + // How often does this really happen? it should "almost" never happen (but would be safer) + + /* + if ((y[0] | y[1] | y[2] | y[3] | y[4] | y[5] | y[6] | y[7]) == 0) + { + x[0] = 0; + x[1] = 0; + x[2] = 0; + x[3] = 0; + x[4] = 0; + x[5] = 0; + x[6] = 0; + x[7] = 0; + + y[0] = 0; + y[1] = 0; + y[2] = 0; + y[3] = 0; + y[4] = 0; + y[5] = 0; + y[6] = 0; + y[7] = 0; + + z[0] = 0; + z[1] = 0; + z[2] = 0; + z[3] = 0; + z[4] = 0; + z[5] = 0; + z[6] = 0; + z[7] = 0; + + return; + } + */ + + u32 t1[8]; + + t1[0] = x[0]; + t1[1] = x[1]; + t1[2] = x[2]; + t1[3] = x[3]; + t1[4] = x[4]; + t1[5] = x[5]; + t1[6] = x[6]; + t1[7] = x[7]; + + u32 t2[8]; + + t2[0] = y[0]; + t2[1] = y[1]; + t2[2] = y[2]; + t2[3] = y[3]; + t2[4] = y[4]; + t2[5] = y[5]; + t2[6] = y[6]; + t2[7] = y[7]; + + u32 t3[8]; + + t3[0] = z[0]; + t3[1] = z[1]; + t3[2] = z[2]; + t3[3] = z[3]; + t3[4] = z[4]; + t3[5] = z[5]; + t3[6] = z[6]; + t3[7] = z[7]; + + u32 t4[8]; + u32 t5[8]; + u32 t6[8]; + + mul_mod (t4, t1, t1); // t4 = x^2 + + mul_mod (t5, t2, t2); // t5 = y^2 + + mul_mod (t1, t1, t5); // t1 = x*y^2 + + mul_mod (t5, t5, t5); // t5 = t5^2 = y^4 + + // here the z^2 and z^4 is not needed for a = 0 + + mul_mod (t3, t2, t3); // t3 = x * z + + add_mod (t2, t4, t4); // t2 = 2 * t4 = 2 * x^2 + add_mod (t4, t4, t2); // t4 = 3 * t4 = 3 * x^2 + + // a * z^4 = 0 * 1^4 = 0 + + // don't discard the least significant bit it's important too! + + u32 c = 0; + + if (t4[0] & 1) + { + u32 t[8]; + + t[0] = SECP256K1_P0; + t[1] = SECP256K1_P1; + t[2] = SECP256K1_P2; + t[3] = SECP256K1_P3; + t[4] = SECP256K1_P4; + t[5] = SECP256K1_P5; + t[6] = SECP256K1_P6; + t[7] = SECP256K1_P7; + + c = add (t4, t4, t); // t4 + SECP256K1_P + } + + // right shift (t4 / 2): + + t4[0] = t4[0] >> 1 | t4[1] << 31; + t4[1] = t4[1] >> 1 | t4[2] << 31; + t4[2] = t4[2] >> 1 | t4[3] << 31; + t4[3] = t4[3] >> 1 | t4[4] << 31; + t4[4] = t4[4] >> 1 | t4[5] << 31; + t4[5] = t4[5] >> 1 | t4[6] << 31; + t4[6] = t4[6] >> 1 | t4[7] << 31; + t4[7] = t4[7] >> 1 | c << 31; + + mul_mod (t6, t4, t4); // t6 = t4^2 = (3/2 * x^2)^2 + + add_mod (t2, t1, t1); // t2 = 2 * t1 + + sub_mod (t6, t6, t2); // t6 = t6 - t2 + sub_mod (t1, t1, t6); // t1 = t1 - t6 + + mul_mod (t4, t4, t1); // t4 = t4 * t1 + + sub_mod (t1, t4, t5); // t1 = t4 - t5 + + // => x = t6, y = t1, z = t3: + + x[0] = t6[0]; + x[1] = t6[1]; + x[2] = t6[2]; + x[3] = t6[3]; + x[4] = t6[4]; + x[5] = t6[5]; + x[6] = t6[6]; + x[7] = t6[7]; + + y[0] = t1[0]; + y[1] = t1[1]; + y[2] = t1[2]; + y[3] = t1[3]; + y[4] = t1[4]; + y[5] = t1[5]; + y[6] = t1[6]; + y[7] = t1[7]; + + z[0] = t3[0]; + z[1] = t3[1]; + z[2] = t3[2]; + z[3] = t3[3]; + z[4] = t3[4]; + z[5] = t3[5]; + z[6] = t3[6]; + z[7] = t3[7]; +} + +DECLSPEC void point_add (u32 x1[8], u32 y1[8], u32 z1[8], const u32 x2[8], const u32 y2[8], const u32 z2[8]) +{ + // How often does this really happen? it should "almost" never happen (but would be safer) + + /* + if ((y2[0] | y2[1] | y2[2] | y2[3] | y2[4] | y2[5] | y2[6] | y2[7]) == 0) return; + + if ((y1[0] | y1[1] | y1[2] | y1[3] | y1[4] | y1[5] | y1[6] | y1[7]) == 0) + { + x1[0] = x2[0]; + x1[1] = x2[1]; + x1[2] = x2[2]; + x1[3] = x2[3]; + x1[4] = x2[4]; + x1[5] = x2[5]; + x1[6] = x2[6]; + x1[7] = x2[7]; + + y1[0] = y2[0]; + y1[1] = y2[1]; + y1[2] = y2[2]; + y1[3] = y2[3]; + y1[4] = y2[4]; + y1[5] = y2[5]; + y1[6] = y2[6]; + y1[7] = y2[7]; + + z1[0] = z2[0]; + z1[1] = z2[1]; + z1[2] = z2[2]; + z1[3] = z2[3]; + z1[4] = z2[4]; + z1[5] = z2[5]; + z1[6] = z2[6]; + z1[7] = z2[7]; + + return; + } + */ + + // if x1 == x2 and y2 == y2 and z2 == z2 we need to double instead? + + // x1/y1/z1: + + u32 t1[8]; + + t1[0] = x1[0]; + t1[1] = x1[1]; + t1[2] = x1[2]; + t1[3] = x1[3]; + t1[4] = x1[4]; + t1[5] = x1[5]; + t1[6] = x1[6]; + t1[7] = x1[7]; + + u32 t2[8]; + + t2[0] = y1[0]; + t2[1] = y1[1]; + t2[2] = y1[2]; + t2[3] = y1[3]; + t2[4] = y1[4]; + t2[5] = y1[5]; + t2[6] = y1[6]; + t2[7] = y1[7]; + + u32 t3[8]; + + t3[0] = z1[0]; + t3[1] = z1[1]; + t3[2] = z1[2]; + t3[3] = z1[3]; + t3[4] = z1[4]; + t3[5] = z1[5]; + t3[6] = z1[6]; + t3[7] = z1[7]; + + // x2/y2/z2: + + u32 t4[8]; + + t4[0] = x2[0]; + t4[1] = x2[1]; + t4[2] = x2[2]; + t4[3] = x2[3]; + t4[4] = x2[4]; + t4[5] = x2[5]; + t4[6] = x2[6]; + t4[7] = x2[7]; + + u32 t5[8]; + + t5[0] = y2[0]; + t5[1] = y2[1]; + t5[2] = y2[2]; + t5[3] = y2[3]; + t5[4] = y2[4]; + t5[5] = y2[5]; + t5[6] = y2[6]; + t5[7] = y2[7]; + + u32 t6[8]; + + t6[0] = z2[0]; + t6[1] = z2[1]; + t6[2] = z2[2]; + t6[3] = z2[3]; + t6[4] = z2[4]; + t6[5] = z2[5]; + t6[6] = z2[6]; + t6[7] = z2[7]; + + u32 t7[8]; + + mul_mod (t7, t3, t3); // t7 = z1^2 + mul_mod (t4, t4, t7); // t4 = x2 * z1^2 = B + + mul_mod (t5, t5, t3); // t5 = y2 * z1 + mul_mod (t5, t5, t7); // t5 = y2 * z1^3 = D + + mul_mod (t7, t6, t6); // t7 = z2^2 + + mul_mod (t1, t1, t7); // t1 = x1 * z2^2 + + mul_mod (t2, t2, t6); // t2 = y1 * z2 + mul_mod (t2, t2, t7); // t2 = y1 * z2^3 = C + + sub_mod (t1, t1, t4); // t1 = A - B = E + + mul_mod (t3, t6, t3); // t3 = z1 * z2 + mul_mod (t3, t1, t3); // t3 = z1 * z2 * E = Z3 + + sub_mod (t2, t2, t5); // t2 = C - D = F + + mul_mod (t7, t1, t1); // t7 = E^2 + mul_mod (t6, t2, t2); // t6 = F^2 + + mul_mod (t4, t4, t7); // t4 = B * E^2 + mul_mod (t1, t7, t1); // t1 = E^3 + + sub_mod (t6, t6, t1); // t6 = F^2 - E^3 + + add_mod (t7, t4, t4); // t7 = 2 * B * E^2 + + sub_mod (t6, t6, t7); // t6 = F^2 - E^2 - 2 * B * E^2 = X3 + sub_mod (t4, t4, t6); // t4 = B * E^2 - X3 + + mul_mod (t2, t2, t4); // t2 = F * (B * E^2 - X3) + mul_mod (t7, t5, t1); // t7 = D * E^3 + + sub_mod (t7, t2, t7); // t7 = F * (B * E^2 - X3) - D * E^3 = Y3 + + x1[0] = t6[0]; + x1[1] = t6[1]; + x1[2] = t6[2]; + x1[3] = t6[3]; + x1[4] = t6[4]; + x1[5] = t6[5]; + x1[6] = t6[6]; + x1[7] = t6[7]; + + y1[0] = t7[0]; + y1[1] = t7[1]; + y1[2] = t7[2]; + y1[3] = t7[3]; + y1[4] = t7[4]; + y1[5] = t7[5]; + y1[6] = t7[6]; + y1[7] = t7[7]; + + z1[0] = t3[0]; + z1[1] = t3[1]; + z1[2] = t3[2]; + z1[3] = t3[3]; + z1[4] = t3[4]; + z1[5] = t3[5]; + z1[6] = t3[6]; + z1[7] = t3[7]; +} + +DECLSPEC void point_get_coords (secp256k1_t *r, const u32 x[8], const u32 y[8]) +{ + // init the values with x and y: + + u32 x1[8]; + + x1[0] = x[0]; + x1[1] = x[1]; + x1[2] = x[2]; + x1[3] = x[3]; + x1[4] = x[4]; + x1[5] = x[5]; + x1[6] = x[6]; + x1[7] = x[7]; + + u32 y1[8]; + + y1[0] = y[0]; + y1[1] = y[1]; + y1[2] = y[2]; + y1[3] = y[3]; + y1[4] = y[4]; + y1[5] = y[5]; + y1[6] = y[6]; + y1[7] = y[7]; + + u32 t1[8]; + + t1[0] = y[0]; + t1[1] = y[1]; + t1[2] = y[2]; + t1[3] = y[3]; + t1[4] = y[4]; + t1[5] = y[5]; + t1[6] = y[6]; + t1[7] = y[7]; + + // we use jacobian forms and the convertion with z = 1 is basically a NO-OP: + // X = X1 * z^2 = X1, Y = Y1 * z^3 = Y + + // https://eprint.iacr.org/2011/338.pdf + + // initial jacobian doubling + + u32 t2[8]; + u32 t3[8]; + u32 t4[8]; + + mul_mod (t2, x1, x1); // t2 = x1^2 + mul_mod (t3, y1, y1); // t3 = y1^2 + + mul_mod (x1, x1, t3); // x1 = x1*y1^2 + + mul_mod (t3, t3, t3); // t3 = t3^2 = y1^4 + + // here the z^2 and z^4 is not needed for a = 0 (and furthermore we have z = 1) + + add_mod (y1, t2, t2); // y1 = 2 * t2 = 2 * x1^2 + add_mod (t2, y1, t2); // t2 = 3 * t2 = 3 * x1^2 + + // a * z^4 = 0 * 1^4 = 0 + + // don't discard the least significant bit it's important too! + + u32 c = 0; + + if (t2[0] & 1) + { + u32 t[8]; + + t[0] = SECP256K1_P0; + t[1] = SECP256K1_P1; + t[2] = SECP256K1_P2; + t[3] = SECP256K1_P3; + t[4] = SECP256K1_P4; + t[5] = SECP256K1_P5; + t[6] = SECP256K1_P6; + t[7] = SECP256K1_P7; + + c = add (t2, t2, t); // t2 + SECP256K1_P + } + + // right shift (t2 / 2): + + t2[0] = t2[0] >> 1 | t2[1] << 31; + t2[1] = t2[1] >> 1 | t2[2] << 31; + t2[2] = t2[2] >> 1 | t2[3] << 31; + t2[3] = t2[3] >> 1 | t2[4] << 31; + t2[4] = t2[4] >> 1 | t2[5] << 31; + t2[5] = t2[5] >> 1 | t2[6] << 31; + t2[6] = t2[6] >> 1 | t2[7] << 31; + t2[7] = t2[7] >> 1 | c << 31; + + mul_mod (t4, t2, t2); // t4 = t2^2 = (3/2*x1^2)^2 + + add_mod (y1, x1, x1); // y1 = 2 * x1_new + + sub_mod (t4, t4, y1); // t4 = t4 - y1_new + sub_mod (x1, x1, t4); // x1 = x1 - t4 + + mul_mod (t2, t2, x1); // t2 = t2 * x1_new + + sub_mod (x1, t2, t3); // x1 = t2 - t3 + + // => X = t4, Y = x1, Z = t1: + // (and t2, t3 can now be safely reused) + + // convert to affine coordinates (to save some bytes copied around) and store it: + + u32 inv[8]; + + inv[0] = t1[0]; + inv[1] = t1[1]; + inv[2] = t1[2]; + inv[3] = t1[3]; + inv[4] = t1[4]; + inv[5] = t1[5]; + inv[6] = t1[6]; + inv[7] = t1[7]; + + inv_mod (inv); + + mul_mod (t2, inv, inv); // t2 = inv^2 + mul_mod (t3, inv, t2); // t3 = inv^3 + + // output to y1 + + mul_mod (t3, t3, x1); + + r->xy[31] = t3[7]; + r->xy[30] = t3[6]; + r->xy[29] = t3[5]; + r->xy[28] = t3[4]; + r->xy[27] = t3[3]; + r->xy[26] = t3[2]; + r->xy[25] = t3[1]; + r->xy[24] = t3[0]; + + // output to x1 + + mul_mod (t3, t2, t4); + + r->xy[23] = t3[7]; + r->xy[22] = t3[6]; + r->xy[21] = t3[5]; + r->xy[20] = t3[4]; + r->xy[19] = t3[3]; + r->xy[18] = t3[2]; + r->xy[17] = t3[1]; + r->xy[16] = t3[0]; + + // also store orginal x/y: + + r->xy[15] = y[7]; + r->xy[14] = y[6]; + r->xy[13] = y[5]; + r->xy[12] = y[4]; + r->xy[11] = y[3]; + r->xy[10] = y[2]; + r->xy[ 9] = y[1]; + r->xy[ 8] = y[0]; + + r->xy[ 7] = x[7]; + r->xy[ 6] = x[6]; + r->xy[ 5] = x[5]; + r->xy[ 4] = x[4]; + r->xy[ 3] = x[3]; + r->xy[ 2] = x[2]; + r->xy[ 1] = x[1]; + r->xy[ 0] = x[0]; + + + // do the double of the double (i.e. "triple") too, just in case we need it in the main loop: + + point_double (t4, x1, t1); + + // convert to affine coordinates and store it: + + inv_mod (t1); + + mul_mod (t2, t1, t1); // t2 = t1^2 + mul_mod (t3, t1, t2); // t3 = t1^3 + + // output to y1 + + mul_mod (t3, t3, x1); + + r->xy[47] = t3[7]; + r->xy[46] = t3[6]; + r->xy[45] = t3[5]; + r->xy[44] = t3[4]; + r->xy[43] = t3[3]; + r->xy[42] = t3[2]; + r->xy[41] = t3[1]; + r->xy[40] = t3[0]; + + // output to x1 + + mul_mod (t3, t2, t4); + + r->xy[39] = t3[7]; + r->xy[38] = t3[6]; + r->xy[37] = t3[5]; + r->xy[36] = t3[4]; + r->xy[35] = t3[3]; + r->xy[34] = t3[2]; + r->xy[33] = t3[1]; + r->xy[32] = t3[0]; +} + +DECLSPEC void point_mul (u32 r[9], const u32 k[8], GLOBAL_AS const secp256k1_t *tmps) +{ + // first check the position of the least significant bit + + // the following fancy shift operation just checks the last 2 bits, finds the + // least significant bit (set to 1) and updates idx according to this table: + // last bits | idx + // 0bxxxxxx00 | 2 + // 0bxxxxxx01 | 0 + // 0bxxxxxx10 | 1 + // 0bxxxxxx11 | 0 + + const u32 idx = (0x0102 >> ((k[0] & 3) << 2)) & 3; + + const u32 offset = idx << 4; // * (8 + 8) = 16 (=> offset of 16 u32 = 16 * 4 bytes) + + u32 x1[8]; + + x1[0] = tmps->xy[offset + 0]; + x1[1] = tmps->xy[offset + 1]; + x1[2] = tmps->xy[offset + 2]; + x1[3] = tmps->xy[offset + 3]; + x1[4] = tmps->xy[offset + 4]; + x1[5] = tmps->xy[offset + 5]; + x1[6] = tmps->xy[offset + 6]; + x1[7] = tmps->xy[offset + 7]; + + u32 y1[8]; + + y1[0] = tmps->xy[offset + 8]; + y1[1] = tmps->xy[offset + 9]; + y1[2] = tmps->xy[offset + 10]; + y1[3] = tmps->xy[offset + 11]; + y1[4] = tmps->xy[offset + 12]; + y1[5] = tmps->xy[offset + 13]; + y1[6] = tmps->xy[offset + 14]; + y1[7] = tmps->xy[offset + 15]; + + u32 z1[8] = { 0 }; + + z1[0] = 1; + + // do NOT allow to overflow the tmps->xy buffer: + + u32 final_offset = offset; + + if (final_offset > 16) final_offset = 16; + + u32 x2[8]; + + x2[0] = tmps->xy[final_offset + 16]; + x2[1] = tmps->xy[final_offset + 17]; + x2[2] = tmps->xy[final_offset + 18]; + x2[3] = tmps->xy[final_offset + 19]; + x2[4] = tmps->xy[final_offset + 20]; + x2[5] = tmps->xy[final_offset + 21]; + x2[6] = tmps->xy[final_offset + 22]; + x2[7] = tmps->xy[final_offset + 23]; + + u32 y2[8]; + + y2[0] = tmps->xy[final_offset + 24]; + y2[1] = tmps->xy[final_offset + 25]; + y2[2] = tmps->xy[final_offset + 26]; + y2[3] = tmps->xy[final_offset + 27]; + y2[4] = tmps->xy[final_offset + 28]; + y2[5] = tmps->xy[final_offset + 29]; + y2[6] = tmps->xy[final_offset + 30]; + y2[7] = tmps->xy[final_offset + 31]; + + u32 z2[8] = { 0 }; + + z2[0] = 1; + + // ... then find out the position of the most significant bit + + int loop_start = idx; + int loop_end = 255; + + for (int i = 255; i > 0; i--) // or use: i > idx + { + u32 idx = i >> 5; // the current u32 (each consisting of 2^5 = 32 bits) to inspect + + u32 mask = 1 << (i & 0x1f); + + if (k[idx] & mask) break; // found it ! + + loop_end--; + } + + /* + * Start + */ + + // "just" double until we find the first add (where the first bit is set): + + for (int pos = loop_start; pos < loop_end; pos++) + { + const u32 idx = pos >> 5; + + const u32 mask = 1 << (pos & 0x1f); + + if (k[idx] & mask) break; + + point_double (x2, y2, z2); + + loop_start++; + } + + // for case 0 and 1 we can skip the double (we already did it in the host) + + if (idx > 1) + { + x1[0] = x2[0]; + x1[1] = x2[1]; + x1[2] = x2[2]; + x1[3] = x2[3]; + x1[4] = x2[4]; + x1[5] = x2[5]; + x1[6] = x2[6]; + x1[7] = x2[7]; + + y1[0] = y2[0]; + y1[1] = y2[1]; + y1[2] = y2[2]; + y1[3] = y2[3]; + y1[4] = y2[4]; + y1[5] = y2[5]; + y1[6] = y2[6]; + y1[7] = y2[7]; + + z1[0] = z2[0]; + z1[1] = z2[1]; + z1[2] = z2[2]; + z1[3] = z2[3]; + z1[4] = z2[4]; + z1[5] = z2[5]; + z1[6] = z2[6]; + z1[7] = z2[7]; + + point_double (x2, y2, z2); + } + + // main loop (right-to-left binary algorithm): + + for (int pos = loop_start + 1; pos < loop_end; pos++) + { + u32 idx = pos >> 5; + + u32 mask = 1 << (pos & 0x1f); + + // add only if needed: + + if (k[idx] & mask) + { + point_add (x1, y1, z1, x2, y2, z2); + } + + // always double: + + point_double (x2, y2, z2); + } + + // handle last one: + + //const u32 final_idx = loop_end >> 5; + //const u32 mask = 1 << (loop_end & 0x1f); + + //if (k[final_idx] & mask) + //{ + // here we just assume that we have at least 2 bits set (an initial one and one additional bit) + // this could be dangerous/wrong in some situations, but very, very, very unlikely + point_add (x1, y1, z1, x2, y2, z2); + //} + + /* + * Get the corresponding affine coordinates x/y: + * + * Note: + * x1_affine = x1_jacobian / z1^2 = x1_jacobian * z1_inv^2 + * y1_affine = y1_jacobian / z1^2 = y1_jacobian * z1_inv^2 + * + */ + + inv_mod (z1); + + // z2 is just used as temporary storage to keep the unmodified z1 for calculating z1^3: + + mul_mod (z2, z1, z1); // z1^2 + mul_mod (x1, x1, z2); // x1_affine + + mul_mod (z1, z2, z1); // z1^3 + mul_mod (y1, y1, z1); // y1_affine + + /* + * output: + */ + + // shift by 1 byte (8 bits) to make room and add the parity/sign (for odd/even y): + + r[8] = (x1[0] << 24); + r[7] = (x1[0] >> 8) | (x1[1] << 24); + r[6] = (x1[1] >> 8) | (x1[2] << 24); + r[5] = (x1[2] >> 8) | (x1[3] << 24); + r[4] = (x1[3] >> 8) | (x1[4] << 24); + r[3] = (x1[4] >> 8) | (x1[5] << 24); + r[2] = (x1[5] >> 8) | (x1[6] << 24); + r[1] = (x1[6] >> 8) | (x1[7] << 24); + r[0] = (x1[7] >> 8); + + const u32 type = 0x02 | (y1[0] & 1); // (note: 0b10 | 0b01 = 0x03) + + r[0] = r[0] | type << 24; // 0x02 or 0x03 +} + +DECLSPEC u32 parse_public (secp256k1_t *r, const u32 k[9]) +{ + // verify: + + const u32 first_byte = k[0] & 0xff; + + if ((first_byte != '\x02') && (first_byte != '\x03')) + { + return 1; + } + + // load k into x without the first byte: + + u32 x[8]; + + x[0] = (k[7] & 0xff00) << 16 | (k[7] & 0xff0000) | (k[7] & 0xff000000) >> 16 | (k[8] & 0xff); + x[1] = (k[6] & 0xff00) << 16 | (k[6] & 0xff0000) | (k[6] & 0xff000000) >> 16 | (k[7] & 0xff); + x[2] = (k[5] & 0xff00) << 16 | (k[5] & 0xff0000) | (k[5] & 0xff000000) >> 16 | (k[6] & 0xff); + x[3] = (k[4] & 0xff00) << 16 | (k[4] & 0xff0000) | (k[4] & 0xff000000) >> 16 | (k[5] & 0xff); + x[4] = (k[3] & 0xff00) << 16 | (k[3] & 0xff0000) | (k[3] & 0xff000000) >> 16 | (k[4] & 0xff); + x[5] = (k[2] & 0xff00) << 16 | (k[2] & 0xff0000) | (k[2] & 0xff000000) >> 16 | (k[3] & 0xff); + x[6] = (k[1] & 0xff00) << 16 | (k[1] & 0xff0000) | (k[1] & 0xff000000) >> 16 | (k[2] & 0xff); + x[7] = (k[0] & 0xff00) << 16 | (k[0] & 0xff0000) | (k[0] & 0xff000000) >> 16 | (k[1] & 0xff); + + u32 p[8]; + + p[0] = SECP256K1_P0; + p[1] = SECP256K1_P1; + p[2] = SECP256K1_P2; + p[3] = SECP256K1_P3; + p[4] = SECP256K1_P4; + p[5] = SECP256K1_P5; + p[6] = SECP256K1_P6; + p[7] = SECP256K1_P7; + + // x must be smaller than p (because of y ^ 2 = x ^ 3 % p) + + for (int i = 7; i >= 0; i--) + { + if (x[i] < p[i]) break; + if (x[i] > p[i]) return 1; + } + + + // get y^2 = x^3 + 7: + + u32 b[8] = { 0 }; + + b[0] = SECP256K1_B; + + u32 y[8]; + + mul_mod (y, x, x); + mul_mod (y, y, x); + add_mod (y, y, b); + + // get y = sqrt (y^2): + + sqrt_mod (y); + + // check if it's of the correct parity that we want (odd/even): + + if ((first_byte & 1) != (y[0] & 1)) + { + // y2 = p - y1 (or y2 = y1 * -1) + + sub_mod (y, p, y); + } + + // get xy: + + point_get_coords (r, x, y); + + return 0; +} diff --git a/OpenCL/inc_ecc_secp256k1.h b/OpenCL/inc_ecc_secp256k1.h new file mode 100644 index 000000000..501235d4b --- /dev/null +++ b/OpenCL/inc_ecc_secp256k1.h @@ -0,0 +1,40 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +#ifndef _INC_ECC_SECP256K1_H +#define _INC_ECC_SECP256K1_H + +// y^2 = x^3 + ax + b with a = 0 and b = 7 => y^2 = x^3 + 7: + +#define SECP256K1_B 7 + +#define SECP256K1_P0 0xfffffc2f +#define SECP256K1_P1 0xfffffffe +#define SECP256K1_P2 0xffffffff +#define SECP256K1_P3 0xffffffff +#define SECP256K1_P4 0xffffffff +#define SECP256K1_P5 0xffffffff +#define SECP256K1_P6 0xffffffff +#define SECP256K1_P7 0xffffffff + +#define SECP256K1_N0 0xd0364141 +#define SECP256K1_N1 0xbfd25e8c +#define SECP256K1_N2 0xaf48a03b +#define SECP256K1_N3 0xbaaedce6 +#define SECP256K1_N4 0xfffffffe +#define SECP256K1_N5 0xffffffff +#define SECP256K1_N6 0xffffffff +#define SECP256K1_N7 0xffffffff + +typedef struct secp256k1 +{ + u32 xy[48]; // all 3 pairs of 32+32 bytes: x,y, x1,y1, x2,y2 + +} secp256k1_t; + +DECLSPEC u32 parse_public (secp256k1_t *r, const u32 k[9]); +DECLSPEC void point_mul (u32 *r, const u32 k[8], GLOBAL_AS const secp256k1_t *tmps); + +#endif // _INC_ECC_SECP256K1_H diff --git a/OpenCL/inc_zip_inflate.cl b/OpenCL/inc_zip_inflate.cl index d43d6bc10..b980e4674 100644 --- a/OpenCL/inc_zip_inflate.cl +++ b/OpenCL/inc_zip_inflate.cl @@ -209,6 +209,15 @@ DECLSPEC void *memset(u8 *s, int c, u32 len){ #define TINFL_MEMSET(p, c, l) memset(p, c, (u32)l) #define MZ_CLEAR_OBJ(obj) memset(&(obj), 0, sizeof(obj)) +// hashcat-patched/hashcat-specific: +#ifdef CRC32_IN_INFLATE +#define M_DICT_SIZE 1 +#define MAYBE_GLOBAL GLOBAL_AS +#else +#define M_DICT_SIZE TINFL_LZ_DICT_SIZE +#define MAYBE_GLOBAL +#endif + #define TINFL_CR_FINISH } #define TINFL_CR_BEGIN \ switch (r->m_state) \ @@ -411,14 +420,16 @@ typedef struct tinfl_decompressor m_decomp; mz_uint m_dict_ofs, m_dict_avail, m_first_call, m_has_flushed; int m_window_bits; - mz_uint8 m_dict[1]; // hashcat-patched: we do not need m_dict because we have our own output buffer + // hashcat-patched: we do not need m_dict in case of CRC32 checksums, + // because we have our own output buffer: + mz_uint8 m_dict[M_DICT_SIZE]; tinfl_status m_last_status; } inflate_state; typedef struct mz_stream_s { - GLOBAL_AS const unsigned char *next_in; /* pointer to next byte to read */ + MAYBE_GLOBAL const unsigned char *next_in; /* pointer to next byte to read */ unsigned int avail_in; /* number of bytes available at next_in */ mz_ulong total_in; /* total number of bytes consumed so far */ @@ -457,9 +468,10 @@ DECLSPEC int mz_inflateEnd(mz_streamp pStream); DECLSPEC int mz_inflateInit2(mz_streamp pStream, int window_bits, inflate_state*); - +// hashcat-patched/hashcat-specific: DECLSPEC const mz_uint8 pIn_xor_byte (const mz_uint8 c, mz_streamp pStream) { + #ifdef CRC32_IN_INFLATE mz_uint8 r = c; u32 key3; @@ -469,18 +481,21 @@ DECLSPEC const mz_uint8 pIn_xor_byte (const mz_uint8 c, mz_streamp pStream) update_key012 (pStream->key0, pStream->key1, pStream->key2, plain, pStream->crc32tab); return (mz_uint8) plain; + #else + return c; + #endif } -DECLSPEC void memcpy_g(void *dest, GLOBAL_AS const void *src, size_t n, mz_streamp pStream){ - GLOBAL_AS char *csrc = (GLOBAL_AS char *)src; +DECLSPEC void memcpy_g(void *dest, MAYBE_GLOBAL const void *src, size_t n, mz_streamp pStream){ + MAYBE_GLOBAL char *csrc = (MAYBE_GLOBAL char *)src; char *cdest = (char *)dest; for (int i=0; iavail_out; status = tinfl_decompress(&pState->m_decomp, pStream->next_in, &in_bytes, pStream->next_out, pStream->next_out, &out_bytes, decomp_flags, pStream); + #ifdef CRC32_IN_INFLATE for (int i = 0; i < out_bytes; i++) { pStream->crc32 = CRC32 (pStream->crc32, pStream->next_out[i], pStream->crc32tab); } + #endif pState->m_last_status = status; pStream->next_in += (mz_uint)in_bytes; @@ -1040,10 +1057,12 @@ DECLSPEC int mz_inflate(mz_streamp pStream, int flush) n = MZ_MIN(pState->m_dict_avail, pStream->avail_out); memcpy(pStream->next_out, pState->m_dict + pState->m_dict_ofs, n); + #ifdef CRC32_IN_INFLATE for (int i = 0; i < n; i++) { pStream->crc32 = CRC32 (pStream->crc32, pStream->next_out[i], pStream->crc32tab); } + #endif //pStream->next_out += n; //pStream->avail_out -= n; @@ -1072,10 +1091,12 @@ DECLSPEC int mz_inflate(mz_streamp pStream, int flush) n = MZ_MIN(pState->m_dict_avail, pStream->avail_out); memcpy(pStream->next_out, pState->m_dict + pState->m_dict_ofs, n); + #ifdef CRC32_IN_INFLATE for (int i = 0; i < n; i++) { pStream->crc32 = CRC32 (pStream->crc32, pStream->next_out[i], pStream->crc32tab); } + #endif //pStream->next_out += n; //pStream->avail_out -= n; @@ -1158,10 +1179,12 @@ DECLSPEC int hc_inflate (mz_streamp pStream) tinfl_status status = tinfl_decompress (&pState->m_decomp, pStream->next_in, &in_bytes, pStream->next_out, pStream->next_out + pStream->total_out, &out_bytes, decomp_flags, pStream); + #ifdef CRC32_IN_INFLATE for (int i = 0; i < out_bytes; i++) { pStream->crc32 = CRC32 (pStream->crc32, pStream->next_out[pStream->total_out + i], pStream->crc32tab); } + #endif pStream->next_in += (mz_uint) in_bytes; pStream->avail_in -= (mz_uint) in_bytes; diff --git a/OpenCL/m21700-pure.cl b/OpenCL/m21700-pure.cl index 03b899701..45bd04ee0 100644 --- a/OpenCL/m21700-pure.cl +++ b/OpenCL/m21700-pure.cl @@ -13,12 +13,15 @@ #include "inc_simd.cl" #include "inc_hash_sha256.cl" #include "inc_hash_sha512.cl" +#include "inc_ecc_secp256k1.cl" #endif #define COMPARE_M "inc_comp_multi.cl" typedef struct electrum { + secp256k1_t coords; + u32 data_buf[4096]; u32 data_len; @@ -34,16 +37,6 @@ typedef struct electrum_tmp } electrum_tmp_t; -typedef struct -{ - u32 ukey[8]; - - u32 pubkey[9]; // 32 + 1 bytes (for sign of the curve point) - - u32 hook_success; - -} electrum_hook_t; - DECLSPEC void hmac_sha512_run_V (u32x *w0, u32x *w1, u32x *w2, u32x *w3, u32x *w4, u32x *w5, u32x *w6, u32x *w7, u64x *ipad, u64x *opad, u64x *digest) { digest[0] = ipad[0]; @@ -102,7 +95,7 @@ DECLSPEC void hmac_sha512_run_V (u32x *w0, u32x *w1, u32x *w2, u32x *w3, u32x *w sha512_transform_vector (w0, w1, w2, w3, w4, w5, w6, w7, digest); } -KERNEL_FQ void m21700_init (KERN_ATTR_TMPS_HOOKS_ESALT (electrum_tmp_t, electrum_hook_t, electrum_t)) +KERNEL_FQ void m21700_init (KERN_ATTR_TMPS_ESALT (electrum_tmp_t, electrum_t)) { /** * base @@ -199,7 +192,7 @@ KERNEL_FQ void m21700_init (KERN_ATTR_TMPS_HOOKS_ESALT (electrum_tmp_t, electrum tmps[gid].out[7] = tmps[gid].dgst[7]; } -KERNEL_FQ void m21700_loop (KERN_ATTR_TMPS_HOOKS_ESALT (electrum_tmp_t, electrum_hook_t, electrum_t)) +KERNEL_FQ void m21700_loop (KERN_ATTR_TMPS_ESALT (electrum_tmp_t, electrum_t)) { const u64 gid = get_global_id (0); @@ -322,8 +315,12 @@ KERNEL_FQ void m21700_loop (KERN_ATTR_TMPS_HOOKS_ESALT (electrum_tmp_t, electrum unpack64v (tmps, out, gid, 7, out[7]); } -KERNEL_FQ void m21700_hook23 (KERN_ATTR_TMPS_HOOKS_ESALT (electrum_tmp_t, electrum_hook_t, electrum_t)) +KERNEL_FQ void m21700_comp (KERN_ATTR_TMPS_ESALT (electrum_tmp_t, electrum_t)) { + /** + * base + */ + const u64 gid = get_global_id (0); if (gid >= gid_max) return; @@ -339,27 +336,9 @@ KERNEL_FQ void m21700_hook23 (KERN_ATTR_TMPS_HOOKS_ESALT (electrum_tmp_t, electr out[6] = tmps[gid].out[6]; out[7] = tmps[gid].out[7]; - // we need to perform a modulo operation with 512-bit % 256-bit (bignum modulo): - // the modulus is the secp256k1 group order - /* - the general modulo by shift and substract code (a = a % b): - - x = b; - - t = a >> 1; - - while (x <= t) x <<= 1; - - while (a >= b) - { - if (a >= x) a -= x; - - x >>= 1; - } - - return a; // remainder - */ + * First calculate the modulo of the pbkdf2 hash with SECP256K1_N: + */ u32 a[16]; @@ -380,302 +359,43 @@ KERNEL_FQ void m21700_hook23 (KERN_ATTR_TMPS_HOOKS_ESALT (electrum_tmp_t, electr a[14] = h32_from_64_S (out[7]); a[15] = l32_from_64_S (out[7]); - u32 b[16]; - - b[ 0] = 0x00000000; - b[ 1] = 0x00000000; - b[ 2] = 0x00000000; - b[ 3] = 0x00000000; - b[ 4] = 0x00000000; - b[ 5] = 0x00000000; - b[ 6] = 0x00000000; - b[ 7] = 0x00000000; - b[ 8] = 0xffffffff; - b[ 9] = 0xffffffff; - b[10] = 0xffffffff; - b[11] = 0xfffffffe; - b[12] = 0xbaaedce6; - b[13] = 0xaf48a03b; - b[14] = 0xbfd25e8c; - b[15] = 0xd0364141; + mod_512 (a); - /* - * Start: - */ + // copy the last 256 bit (32 bytes) of modulo (a): - // x = b (but with a fast "shift" trick to avoid the while loop) - - u32 x[16]; - - x[ 0] = b[ 8]; // this is a trick: we just put the group order's most significant bit all the - x[ 1] = b[ 9]; // way to the top to avoid doing the initial: while (x <= t) x <<= 1 - x[ 2] = b[10]; - x[ 3] = b[11]; - x[ 4] = b[12]; - x[ 5] = b[13]; - x[ 6] = b[14]; - x[ 7] = b[15]; - x[ 8] = 0x00000000; - x[ 9] = 0x00000000; - x[10] = 0x00000000; - x[11] = 0x00000000; - x[12] = 0x00000000; - x[13] = 0x00000000; - x[14] = 0x00000000; - x[15] = 0x00000000; - - // a >= b - - while (a[0] >= b[0]) - { - const u32 l1 = (a[ 0] < b[ 0]) << 0 - | (a[ 1] < b[ 1]) << 1 - | (a[ 2] < b[ 2]) << 2 - | (a[ 3] < b[ 3]) << 3 - | (a[ 4] < b[ 4]) << 4 - | (a[ 5] < b[ 5]) << 5 - | (a[ 6] < b[ 6]) << 6 - | (a[ 7] < b[ 7]) << 7 - | (a[ 8] < b[ 8]) << 8 - | (a[ 9] < b[ 9]) << 9 - | (a[10] < b[10]) << 10 - | (a[11] < b[11]) << 11 - | (a[12] < b[12]) << 12 - | (a[13] < b[13]) << 13 - | (a[14] < b[14]) << 14 - | (a[15] < b[15]) << 15; - - const u32 e1 = (a[ 0] == b[ 0]) << 0 - | (a[ 1] == b[ 1]) << 1 - | (a[ 2] == b[ 2]) << 2 - | (a[ 3] == b[ 3]) << 3 - | (a[ 4] == b[ 4]) << 4 - | (a[ 5] == b[ 5]) << 5 - | (a[ 6] == b[ 6]) << 6 - | (a[ 7] == b[ 7]) << 7 - | (a[ 8] == b[ 8]) << 8 - | (a[ 9] == b[ 9]) << 9 - | (a[10] == b[10]) << 10 - | (a[11] == b[11]) << 11 - | (a[12] == b[12]) << 12 - | (a[13] == b[13]) << 13 - | (a[14] == b[14]) << 14 - | (a[15] == b[15]) << 15; - - if (l1) - { - if (l1 & 0x0001) break; - if (l1 & 0x0002) if ((e1 & 0x0001) == 0x0001) break; - if (l1 & 0x0004) if ((e1 & 0x0003) == 0x0003) break; - if (l1 & 0x0008) if ((e1 & 0x0007) == 0x0007) break; - if (l1 & 0x0010) if ((e1 & 0x000f) == 0x000f) break; - if (l1 & 0x0020) if ((e1 & 0x001f) == 0x001f) break; - if (l1 & 0x0040) if ((e1 & 0x003f) == 0x003f) break; - if (l1 & 0x0080) if ((e1 & 0x007f) == 0x007f) break; - if (l1 & 0x0100) if ((e1 & 0x00ff) == 0x00ff) break; - if (l1 & 0x0200) if ((e1 & 0x01ff) == 0x01ff) break; - if (l1 & 0x0400) if ((e1 & 0x03ff) == 0x03ff) break; - if (l1 & 0x0800) if ((e1 & 0x07ff) == 0x07ff) break; - if (l1 & 0x1000) if ((e1 & 0x0fff) == 0x0fff) break; - if (l1 & 0x2000) if ((e1 & 0x1fff) == 0x1fff) break; - if (l1 & 0x4000) if ((e1 & 0x3fff) == 0x3fff) break; - if (l1 & 0x8000) if ((e1 & 0x7fff) == 0x7fff) break; - } - - // r = x (copy it to have the original values for the subtraction) - - u32 r[16]; - - r[ 0] = x[ 0]; - r[ 1] = x[ 1]; - r[ 2] = x[ 2]; - r[ 3] = x[ 3]; - r[ 4] = x[ 4]; - r[ 5] = x[ 5]; - r[ 6] = x[ 6]; - r[ 7] = x[ 7]; - r[ 8] = x[ 8]; - r[ 9] = x[ 9]; - r[10] = x[10]; - r[11] = x[11]; - r[12] = x[12]; - r[13] = x[13]; - r[14] = x[14]; - r[15] = x[15]; - - // x >>= 1 - - x[15] = x[15] >> 1 | (x[14] & 1) << 31; - x[14] = x[14] >> 1 | (x[13] & 1) << 31; - x[13] = x[13] >> 1 | (x[12] & 1) << 31; - x[12] = x[12] >> 1 | (x[11] & 1) << 31; - x[11] = x[11] >> 1 | (x[10] & 1) << 31; - x[10] = x[10] >> 1 | (x[ 9] & 1) << 31; - x[ 9] = x[ 9] >> 1 | (x[ 8] & 1) << 31; - x[ 8] = x[ 8] >> 1 | (x[ 7] & 1) << 31; - x[ 7] = x[ 7] >> 1 | (x[ 6] & 1) << 31; - x[ 6] = x[ 6] >> 1 | (x[ 5] & 1) << 31; - x[ 5] = x[ 5] >> 1 | (x[ 4] & 1) << 31; - x[ 4] = x[ 4] >> 1 | (x[ 3] & 1) << 31; - x[ 3] = x[ 3] >> 1 | (x[ 2] & 1) << 31; - x[ 2] = x[ 2] >> 1 | (x[ 1] & 1) << 31; - x[ 1] = x[ 1] >> 1 | (x[ 0] & 1) << 31; - x[ 0] = x[ 0] >> 1; - - // if (a >= r) a -= r; - - const u32 l2 = (a[ 0] < r[ 0]) << 0 - | (a[ 1] < r[ 1]) << 1 - | (a[ 2] < r[ 2]) << 2 - | (a[ 3] < r[ 3]) << 3 - | (a[ 4] < r[ 4]) << 4 - | (a[ 5] < r[ 5]) << 5 - | (a[ 6] < r[ 6]) << 6 - | (a[ 7] < r[ 7]) << 7 - | (a[ 8] < r[ 8]) << 8 - | (a[ 9] < r[ 9]) << 9 - | (a[10] < r[10]) << 10 - | (a[11] < r[11]) << 11 - | (a[12] < r[12]) << 12 - | (a[13] < r[13]) << 13 - | (a[14] < r[14]) << 14 - | (a[15] < r[15]) << 15; - - const u32 e2 = (a[ 0] == r[ 0]) << 0 - | (a[ 1] == r[ 1]) << 1 - | (a[ 2] == r[ 2]) << 2 - | (a[ 3] == r[ 3]) << 3 - | (a[ 4] == r[ 4]) << 4 - | (a[ 5] == r[ 5]) << 5 - | (a[ 6] == r[ 6]) << 6 - | (a[ 7] == r[ 7]) << 7 - | (a[ 8] == r[ 8]) << 8 - | (a[ 9] == r[ 9]) << 9 - | (a[10] == r[10]) << 10 - | (a[11] == r[11]) << 11 - | (a[12] == r[12]) << 12 - | (a[13] == r[13]) << 13 - | (a[14] == r[14]) << 14 - | (a[15] == r[15]) << 15; - - if (l2) - { - if (l2 & 0x0001) continue; - if (l2 & 0x0002) if ((e2 & 0x0001) == 0x0001) continue; - if (l2 & 0x0004) if ((e2 & 0x0003) == 0x0003) continue; - if (l2 & 0x0008) if ((e2 & 0x0007) == 0x0007) continue; - if (l2 & 0x0010) if ((e2 & 0x000f) == 0x000f) continue; - if (l2 & 0x0020) if ((e2 & 0x001f) == 0x001f) continue; - if (l2 & 0x0040) if ((e2 & 0x003f) == 0x003f) continue; - if (l2 & 0x0080) if ((e2 & 0x007f) == 0x007f) continue; - if (l2 & 0x0100) if ((e2 & 0x00ff) == 0x00ff) continue; - if (l2 & 0x0200) if ((e2 & 0x01ff) == 0x01ff) continue; - if (l2 & 0x0400) if ((e2 & 0x03ff) == 0x03ff) continue; - if (l2 & 0x0800) if ((e2 & 0x07ff) == 0x07ff) continue; - if (l2 & 0x1000) if ((e2 & 0x0fff) == 0x0fff) continue; - if (l2 & 0x2000) if ((e2 & 0x1fff) == 0x1fff) continue; - if (l2 & 0x4000) if ((e2 & 0x3fff) == 0x3fff) continue; - if (l2 & 0x8000) if ((e2 & 0x7fff) == 0x7fff) continue; - } - - // substract (a -= r): - - r[ 0] = a[ 0] - r[ 0]; - r[ 1] = a[ 1] - r[ 1]; - r[ 2] = a[ 2] - r[ 2]; - r[ 3] = a[ 3] - r[ 3]; - r[ 4] = a[ 4] - r[ 4]; - r[ 5] = a[ 5] - r[ 5]; - r[ 6] = a[ 6] - r[ 6]; - r[ 7] = a[ 7] - r[ 7]; - r[ 8] = a[ 8] - r[ 8]; - r[ 9] = a[ 9] - r[ 9]; - r[10] = a[10] - r[10]; - r[11] = a[11] - r[11]; - r[12] = a[12] - r[12]; - r[13] = a[13] - r[13]; - r[14] = a[14] - r[14]; - r[15] = a[15] - r[15]; - - // take care of the "borrow" (we can't do it the other way around 15...1 because r[x] is changed!) - - if (r[ 1] > a[ 1]) r[ 0]--; - if (r[ 2] > a[ 2]) r[ 1]--; - if (r[ 3] > a[ 3]) r[ 2]--; - if (r[ 4] > a[ 4]) r[ 3]--; - if (r[ 5] > a[ 5]) r[ 4]--; - if (r[ 6] > a[ 6]) r[ 5]--; - if (r[ 7] > a[ 7]) r[ 6]--; - if (r[ 8] > a[ 8]) r[ 7]--; - if (r[ 9] > a[ 9]) r[ 8]--; - if (r[10] > a[10]) r[ 9]--; - if (r[11] > a[11]) r[10]--; - if (r[12] > a[12]) r[11]--; - if (r[13] > a[13]) r[12]--; - if (r[14] > a[14]) r[13]--; - if (r[15] > a[15]) r[14]--; - - a[ 0] = r[ 0]; - a[ 1] = r[ 1]; - a[ 2] = r[ 2]; - a[ 3] = r[ 3]; - a[ 4] = r[ 4]; - a[ 5] = r[ 5]; - a[ 6] = r[ 6]; - a[ 7] = r[ 7]; - a[ 8] = r[ 8]; - a[ 9] = r[ 9]; - a[10] = r[10]; - a[11] = r[11]; - a[12] = r[12]; - a[13] = r[13]; - a[14] = r[14]; - a[15] = r[15]; - } + u32 tweak[8]; - /** - * copy the last 256 bit (32 bytes) of modulo (a) to the hook buffer - */ + tweak[0] = a[15]; + tweak[1] = a[14]; + tweak[2] = a[13]; + tweak[3] = a[12]; + tweak[4] = a[11]; + tweak[5] = a[10]; + tweak[6] = a[ 9]; + tweak[7] = a[ 8]; - hooks[gid].ukey[0] = hc_swap32_S (a[ 8]); - hooks[gid].ukey[1] = hc_swap32_S (a[ 9]); - hooks[gid].ukey[2] = hc_swap32_S (a[10]); - hooks[gid].ukey[3] = hc_swap32_S (a[11]); - hooks[gid].ukey[4] = hc_swap32_S (a[12]); - hooks[gid].ukey[5] = hc_swap32_S (a[13]); - hooks[gid].ukey[6] = hc_swap32_S (a[14]); - hooks[gid].ukey[7] = hc_swap32_S (a[15]); -} -KERNEL_FQ void m21700_comp (KERN_ATTR_TMPS_HOOKS_ESALT (electrum_tmp_t, electrum_hook_t, electrum_t)) -{ - /** - * base + /* + * the main secp256k1 point multiplication by a scalar/tweak: */ - const u64 gid = get_global_id (0); + GLOBAL_AS secp256k1_t *coords = (GLOBAL_AS secp256k1_t *) &esalt_bufs[digests_offset].coords; - if (gid >= gid_max) return; + u32 pubkey[64] = { 0 }; // for point_mul () we need: 1 + 32 bytes (for sha512 () we need more) - if (hooks[gid].hook_success == 0) return; + point_mul (pubkey, tweak, coords); - u32 pubkey[64] = { 0 }; - pubkey[0] = hooks[gid].pubkey[0]; - pubkey[1] = hooks[gid].pubkey[1]; - pubkey[2] = hooks[gid].pubkey[2]; - pubkey[3] = hooks[gid].pubkey[3]; - pubkey[4] = hooks[gid].pubkey[4]; - pubkey[5] = hooks[gid].pubkey[5]; - pubkey[6] = hooks[gid].pubkey[6]; - pubkey[7] = hooks[gid].pubkey[7]; - pubkey[8] = hooks[gid].pubkey[8]; + /* + * sha512 () of the pubkey: + */ sha512_ctx_t sha512_ctx; - sha512_init (&sha512_ctx); - sha512_update_swap (&sha512_ctx, pubkey, 33); // 33 because of 32 byte curve point + sign - sha512_final (&sha512_ctx); + sha512_init (&sha512_ctx); + sha512_update (&sha512_ctx, pubkey, 33); // 33 because of 32 byte curve point + sign + sha512_final (&sha512_ctx); + /* * sha256-hmac () of the data_buf diff --git a/OpenCL/m21800-pure.cl b/OpenCL/m21800-pure.cl index 3658721e2..ccdf822aa 100644 --- a/OpenCL/m21800-pure.cl +++ b/OpenCL/m21800-pure.cl @@ -12,8 +12,19 @@ #include "inc_common.cl" #include "inc_simd.cl" #include "inc_hash_sha512.cl" +#include "inc_ecc_secp256k1.cl" +#include "inc_cipher_aes.cl" +#include "inc_zip_inflate.cl" #endif +typedef struct electrum +{ + secp256k1_t coords; + + u32 data_buf[256]; + +} electrum_t; + typedef struct electrum_tmp { u64 ipad[8]; @@ -24,14 +35,6 @@ typedef struct electrum_tmp } electrum_tmp_t; -typedef struct -{ - u32 ukey[8]; - - u32 hook_success; - -} electrum_hook_t; - DECLSPEC void hmac_sha512_run_V (u32x *w0, u32x *w1, u32x *w2, u32x *w3, u32x *w4, u32x *w5, u32x *w6, u32x *w7, u64x *ipad, u64x *opad, u64x *digest) { digest[0] = ipad[0]; @@ -90,7 +93,7 @@ DECLSPEC void hmac_sha512_run_V (u32x *w0, u32x *w1, u32x *w2, u32x *w3, u32x *w sha512_transform_vector (w0, w1, w2, w3, w4, w5, w6, w7, digest); } -KERNEL_FQ void m21800_init (KERN_ATTR_TMPS_HOOKS (electrum_tmp_t, electrum_hook_t)) +KERNEL_FQ void m21800_init (KERN_ATTR_TMPS_ESALT (electrum_tmp_t, electrum_t)) { /** * base @@ -187,7 +190,7 @@ KERNEL_FQ void m21800_init (KERN_ATTR_TMPS_HOOKS (electrum_tmp_t, electrum_hook_ tmps[gid].out[7] = tmps[gid].dgst[7]; } -KERNEL_FQ void m21800_loop (KERN_ATTR_TMPS_HOOKS (electrum_tmp_t, electrum_hook_t)) +KERNEL_FQ void m21800_loop (KERN_ATTR_TMPS_ESALT (electrum_tmp_t, electrum_t)) { const u64 gid = get_global_id (0); @@ -310,12 +313,70 @@ KERNEL_FQ void m21800_loop (KERN_ATTR_TMPS_HOOKS (electrum_tmp_t, electrum_hook_ unpack64v (tmps, out, gid, 7, out[7]); } -KERNEL_FQ void m21800_hook23 (KERN_ATTR_TMPS_HOOKS (electrum_tmp_t, electrum_hook_t)) +KERNEL_FQ void m21800_comp (KERN_ATTR_TMPS_ESALT (electrum_tmp_t, electrum_t)) { - const u64 gid = get_global_id (0); + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + /** + * aes shared + */ + + #ifdef REAL_SHM + + LOCAL_VK u32 s_td0[256]; + LOCAL_VK u32 s_td1[256]; + LOCAL_VK u32 s_td2[256]; + LOCAL_VK u32 s_td3[256]; + LOCAL_VK u32 s_td4[256]; + + LOCAL_VK u32 s_te0[256]; + LOCAL_VK u32 s_te1[256]; + LOCAL_VK u32 s_te2[256]; + LOCAL_VK u32 s_te3[256]; + LOCAL_VK u32 s_te4[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + s_td0[i] = td0[i]; + s_td1[i] = td1[i]; + s_td2[i] = td2[i]; + s_td3[i] = td3[i]; + s_td4[i] = td4[i]; + + s_te0[i] = te0[i]; + s_te1[i] = te1[i]; + s_te2[i] = te2[i]; + s_te3[i] = te3[i]; + s_te4[i] = te4[i]; + } + + SYNC_THREADS (); + + #else + + CONSTANT_AS u32a *s_td0 = td0; + CONSTANT_AS u32a *s_td1 = td1; + CONSTANT_AS u32a *s_td2 = td2; + CONSTANT_AS u32a *s_td3 = td3; + CONSTANT_AS u32a *s_td4 = td4; + + CONSTANT_AS u32a *s_te0 = te0; + CONSTANT_AS u32a *s_te1 = te1; + CONSTANT_AS u32a *s_te2 = te2; + CONSTANT_AS u32a *s_te3 = te3; + CONSTANT_AS u32a *s_te4 = te4; + + #endif if (gid >= gid_max) return; + + /* + * Start by copying/aligning the data + */ + u64 out[8]; out[0] = tmps[gid].out[0]; @@ -327,27 +388,9 @@ KERNEL_FQ void m21800_hook23 (KERN_ATTR_TMPS_HOOKS (electrum_tmp_t, electrum_hoo out[6] = tmps[gid].out[6]; out[7] = tmps[gid].out[7]; - // we need to perform a modulo operation with 512-bit % 256-bit (bignum modulo): - // the modulus is the secp256k1 group order - /* - the general modulo by shift and substract code (a = a % b): - - x = b; - - t = a >> 1; - - while (x <= t) x <<= 1; - - while (a >= b) - { - if (a >= x) a -= x; - - x >>= 1; - } - - return a; // remainder - */ + * First calculate the modulo of the pbkdf2 hash with SECP256K1_N: + */ u32 a[16]; @@ -368,284 +411,199 @@ KERNEL_FQ void m21800_hook23 (KERN_ATTR_TMPS_HOOKS (electrum_tmp_t, electrum_hoo a[14] = h32_from_64_S (out[7]); a[15] = l32_from_64_S (out[7]); - u32 b[16]; - - b[ 0] = 0x00000000; - b[ 1] = 0x00000000; - b[ 2] = 0x00000000; - b[ 3] = 0x00000000; - b[ 4] = 0x00000000; - b[ 5] = 0x00000000; - b[ 6] = 0x00000000; - b[ 7] = 0x00000000; - b[ 8] = 0xffffffff; - b[ 9] = 0xffffffff; - b[10] = 0xffffffff; - b[11] = 0xfffffffe; - b[12] = 0xbaaedce6; - b[13] = 0xaf48a03b; - b[14] = 0xbfd25e8c; - b[15] = 0xd0364141; + mod_512 (a); + + // copy the last 256 bit (32 bytes) of modulo (a): + + u32 tweak[8]; + + tweak[0] = a[15]; + tweak[1] = a[14]; + tweak[2] = a[13]; + tweak[3] = a[12]; + tweak[4] = a[11]; + tweak[5] = a[10]; + tweak[6] = a[ 9]; + tweak[7] = a[ 8]; + /* - * Start: + * the main secp256k1 point multiplication by a scalar/tweak: */ - // x = b (but with a fast "shift" trick to avoid the while loop) - - u32 x[16]; - - x[ 0] = b[ 8]; // this is a trick: we just put the group order's most significant bit all the - x[ 1] = b[ 9]; // way to the top to avoid doing the initial: while (x <= t) x <<= 1 - x[ 2] = b[10]; - x[ 3] = b[11]; - x[ 4] = b[12]; - x[ 5] = b[13]; - x[ 6] = b[14]; - x[ 7] = b[15]; - x[ 8] = 0x00000000; - x[ 9] = 0x00000000; - x[10] = 0x00000000; - x[11] = 0x00000000; - x[12] = 0x00000000; - x[13] = 0x00000000; - x[14] = 0x00000000; - x[15] = 0x00000000; - - // a >= b - - while (a[0] >= b[0]) - { - const u32 l1 = (a[ 0] < b[ 0]) << 0 - | (a[ 1] < b[ 1]) << 1 - | (a[ 2] < b[ 2]) << 2 - | (a[ 3] < b[ 3]) << 3 - | (a[ 4] < b[ 4]) << 4 - | (a[ 5] < b[ 5]) << 5 - | (a[ 6] < b[ 6]) << 6 - | (a[ 7] < b[ 7]) << 7 - | (a[ 8] < b[ 8]) << 8 - | (a[ 9] < b[ 9]) << 9 - | (a[10] < b[10]) << 10 - | (a[11] < b[11]) << 11 - | (a[12] < b[12]) << 12 - | (a[13] < b[13]) << 13 - | (a[14] < b[14]) << 14 - | (a[15] < b[15]) << 15; - - const u32 e1 = (a[ 0] == b[ 0]) << 0 - | (a[ 1] == b[ 1]) << 1 - | (a[ 2] == b[ 2]) << 2 - | (a[ 3] == b[ 3]) << 3 - | (a[ 4] == b[ 4]) << 4 - | (a[ 5] == b[ 5]) << 5 - | (a[ 6] == b[ 6]) << 6 - | (a[ 7] == b[ 7]) << 7 - | (a[ 8] == b[ 8]) << 8 - | (a[ 9] == b[ 9]) << 9 - | (a[10] == b[10]) << 10 - | (a[11] == b[11]) << 11 - | (a[12] == b[12]) << 12 - | (a[13] == b[13]) << 13 - | (a[14] == b[14]) << 14 - | (a[15] == b[15]) << 15; - - if (l1) - { - if (l1 & 0x0001) break; - if (l1 & 0x0002) if ((e1 & 0x0001) == 0x0001) break; - if (l1 & 0x0004) if ((e1 & 0x0003) == 0x0003) break; - if (l1 & 0x0008) if ((e1 & 0x0007) == 0x0007) break; - if (l1 & 0x0010) if ((e1 & 0x000f) == 0x000f) break; - if (l1 & 0x0020) if ((e1 & 0x001f) == 0x001f) break; - if (l1 & 0x0040) if ((e1 & 0x003f) == 0x003f) break; - if (l1 & 0x0080) if ((e1 & 0x007f) == 0x007f) break; - if (l1 & 0x0100) if ((e1 & 0x00ff) == 0x00ff) break; - if (l1 & 0x0200) if ((e1 & 0x01ff) == 0x01ff) break; - if (l1 & 0x0400) if ((e1 & 0x03ff) == 0x03ff) break; - if (l1 & 0x0800) if ((e1 & 0x07ff) == 0x07ff) break; - if (l1 & 0x1000) if ((e1 & 0x0fff) == 0x0fff) break; - if (l1 & 0x2000) if ((e1 & 0x1fff) == 0x1fff) break; - if (l1 & 0x4000) if ((e1 & 0x3fff) == 0x3fff) break; - if (l1 & 0x8000) if ((e1 & 0x7fff) == 0x7fff) break; - } + GLOBAL_AS secp256k1_t *coords = (GLOBAL_AS secp256k1_t *) &esalt_bufs[digests_offset].coords; - // r = x (copy it to have the original values for the subtraction) - - u32 r[16]; - - r[ 0] = x[ 0]; - r[ 1] = x[ 1]; - r[ 2] = x[ 2]; - r[ 3] = x[ 3]; - r[ 4] = x[ 4]; - r[ 5] = x[ 5]; - r[ 6] = x[ 6]; - r[ 7] = x[ 7]; - r[ 8] = x[ 8]; - r[ 9] = x[ 9]; - r[10] = x[10]; - r[11] = x[11]; - r[12] = x[12]; - r[13] = x[13]; - r[14] = x[14]; - r[15] = x[15]; - - // x >>= 1 - - x[15] = x[15] >> 1 | (x[14] & 1) << 31; - x[14] = x[14] >> 1 | (x[13] & 1) << 31; - x[13] = x[13] >> 1 | (x[12] & 1) << 31; - x[12] = x[12] >> 1 | (x[11] & 1) << 31; - x[11] = x[11] >> 1 | (x[10] & 1) << 31; - x[10] = x[10] >> 1 | (x[ 9] & 1) << 31; - x[ 9] = x[ 9] >> 1 | (x[ 8] & 1) << 31; - x[ 8] = x[ 8] >> 1 | (x[ 7] & 1) << 31; - x[ 7] = x[ 7] >> 1 | (x[ 6] & 1) << 31; - x[ 6] = x[ 6] >> 1 | (x[ 5] & 1) << 31; - x[ 5] = x[ 5] >> 1 | (x[ 4] & 1) << 31; - x[ 4] = x[ 4] >> 1 | (x[ 3] & 1) << 31; - x[ 3] = x[ 3] >> 1 | (x[ 2] & 1) << 31; - x[ 2] = x[ 2] >> 1 | (x[ 1] & 1) << 31; - x[ 1] = x[ 1] >> 1 | (x[ 0] & 1) << 31; - x[ 0] = x[ 0] >> 1; - - // if (a >= r) a -= r; - - const u32 l2 = (a[ 0] < r[ 0]) << 0 - | (a[ 1] < r[ 1]) << 1 - | (a[ 2] < r[ 2]) << 2 - | (a[ 3] < r[ 3]) << 3 - | (a[ 4] < r[ 4]) << 4 - | (a[ 5] < r[ 5]) << 5 - | (a[ 6] < r[ 6]) << 6 - | (a[ 7] < r[ 7]) << 7 - | (a[ 8] < r[ 8]) << 8 - | (a[ 9] < r[ 9]) << 9 - | (a[10] < r[10]) << 10 - | (a[11] < r[11]) << 11 - | (a[12] < r[12]) << 12 - | (a[13] < r[13]) << 13 - | (a[14] < r[14]) << 14 - | (a[15] < r[15]) << 15; - - const u32 e2 = (a[ 0] == r[ 0]) << 0 - | (a[ 1] == r[ 1]) << 1 - | (a[ 2] == r[ 2]) << 2 - | (a[ 3] == r[ 3]) << 3 - | (a[ 4] == r[ 4]) << 4 - | (a[ 5] == r[ 5]) << 5 - | (a[ 6] == r[ 6]) << 6 - | (a[ 7] == r[ 7]) << 7 - | (a[ 8] == r[ 8]) << 8 - | (a[ 9] == r[ 9]) << 9 - | (a[10] == r[10]) << 10 - | (a[11] == r[11]) << 11 - | (a[12] == r[12]) << 12 - | (a[13] == r[13]) << 13 - | (a[14] == r[14]) << 14 - | (a[15] == r[15]) << 15; - - if (l2) - { - if (l2 & 0x0001) continue; - if (l2 & 0x0002) if ((e2 & 0x0001) == 0x0001) continue; - if (l2 & 0x0004) if ((e2 & 0x0003) == 0x0003) continue; - if (l2 & 0x0008) if ((e2 & 0x0007) == 0x0007) continue; - if (l2 & 0x0010) if ((e2 & 0x000f) == 0x000f) continue; - if (l2 & 0x0020) if ((e2 & 0x001f) == 0x001f) continue; - if (l2 & 0x0040) if ((e2 & 0x003f) == 0x003f) continue; - if (l2 & 0x0080) if ((e2 & 0x007f) == 0x007f) continue; - if (l2 & 0x0100) if ((e2 & 0x00ff) == 0x00ff) continue; - if (l2 & 0x0200) if ((e2 & 0x01ff) == 0x01ff) continue; - if (l2 & 0x0400) if ((e2 & 0x03ff) == 0x03ff) continue; - if (l2 & 0x0800) if ((e2 & 0x07ff) == 0x07ff) continue; - if (l2 & 0x1000) if ((e2 & 0x0fff) == 0x0fff) continue; - if (l2 & 0x2000) if ((e2 & 0x1fff) == 0x1fff) continue; - if (l2 & 0x4000) if ((e2 & 0x3fff) == 0x3fff) continue; - if (l2 & 0x8000) if ((e2 & 0x7fff) == 0x7fff) continue; - } + u32 pubkey[64] = { 0 }; // for point_mul () we need: 1 + 32 bytes (for sha512 () we need more) - // substract (a -= r): - - r[ 0] = a[ 0] - r[ 0]; - r[ 1] = a[ 1] - r[ 1]; - r[ 2] = a[ 2] - r[ 2]; - r[ 3] = a[ 3] - r[ 3]; - r[ 4] = a[ 4] - r[ 4]; - r[ 5] = a[ 5] - r[ 5]; - r[ 6] = a[ 6] - r[ 6]; - r[ 7] = a[ 7] - r[ 7]; - r[ 8] = a[ 8] - r[ 8]; - r[ 9] = a[ 9] - r[ 9]; - r[10] = a[10] - r[10]; - r[11] = a[11] - r[11]; - r[12] = a[12] - r[12]; - r[13] = a[13] - r[13]; - r[14] = a[14] - r[14]; - r[15] = a[15] - r[15]; - - // take care of the "borrow" (we can't do it the other way around 15...1 because r[x] is changed!) - - if (r[ 1] > a[ 1]) r[ 0]--; - if (r[ 2] > a[ 2]) r[ 1]--; - if (r[ 3] > a[ 3]) r[ 2]--; - if (r[ 4] > a[ 4]) r[ 3]--; - if (r[ 5] > a[ 5]) r[ 4]--; - if (r[ 6] > a[ 6]) r[ 5]--; - if (r[ 7] > a[ 7]) r[ 6]--; - if (r[ 8] > a[ 8]) r[ 7]--; - if (r[ 9] > a[ 9]) r[ 8]--; - if (r[10] > a[10]) r[ 9]--; - if (r[11] > a[11]) r[10]--; - if (r[12] > a[12]) r[11]--; - if (r[13] > a[13]) r[12]--; - if (r[14] > a[14]) r[13]--; - if (r[15] > a[15]) r[14]--; - - a[ 0] = r[ 0]; - a[ 1] = r[ 1]; - a[ 2] = r[ 2]; - a[ 3] = r[ 3]; - a[ 4] = r[ 4]; - a[ 5] = r[ 5]; - a[ 6] = r[ 6]; - a[ 7] = r[ 7]; - a[ 8] = r[ 8]; - a[ 9] = r[ 9]; - a[10] = r[10]; - a[11] = r[11]; - a[12] = r[12]; - a[13] = r[13]; - a[14] = r[14]; - a[15] = r[15]; - } + point_mul (pubkey, tweak, coords); - /** - * copy the last 256 bit (32 bytes) of modulo (a) to the hook buffer + + /* + * sha512 () of the pubkey: */ - hooks[gid].ukey[0] = hc_swap32_S (a[ 8]); - hooks[gid].ukey[1] = hc_swap32_S (a[ 9]); - hooks[gid].ukey[2] = hc_swap32_S (a[10]); - hooks[gid].ukey[3] = hc_swap32_S (a[11]); - hooks[gid].ukey[4] = hc_swap32_S (a[12]); - hooks[gid].ukey[5] = hc_swap32_S (a[13]); - hooks[gid].ukey[6] = hc_swap32_S (a[14]); - hooks[gid].ukey[7] = hc_swap32_S (a[15]); -} + sha512_ctx_t sha512_ctx; -KERNEL_FQ void m21800_comp (KERN_ATTR_TMPS_HOOKS (electrum_tmp_t, electrum_hook_t)) -{ - /** - * base + sha512_init (&sha512_ctx); + sha512_update (&sha512_ctx, pubkey, 33); // 33 because of 32 byte curve point + sign + sha512_final (&sha512_ctx); + + // ... now we have the result in sha512_ctx.h[0]...sha512_ctx.h[7] + + u32 iv[4]; + + iv[0] = h32_from_64_S (sha512_ctx.h[0]); + iv[1] = l32_from_64_S (sha512_ctx.h[0]); + iv[2] = h32_from_64_S (sha512_ctx.h[1]); + iv[3] = l32_from_64_S (sha512_ctx.h[1]); + + iv[0] = hc_swap32_S (iv[0]); + iv[1] = hc_swap32_S (iv[1]); + iv[2] = hc_swap32_S (iv[2]); + iv[3] = hc_swap32_S (iv[3]); + + u32 key[4]; + + key[0] = h32_from_64_S (sha512_ctx.h[2]); + key[1] = l32_from_64_S (sha512_ctx.h[2]); + key[2] = h32_from_64_S (sha512_ctx.h[3]); + key[3] = l32_from_64_S (sha512_ctx.h[3]); + + key[0] = hc_swap32_S (key[0]); + key[1] = hc_swap32_S (key[1]); + key[2] = hc_swap32_S (key[2]); + key[3] = hc_swap32_S (key[3]); + + + /* + * AES decrypt the data_buf */ - const u64 gid = get_global_id (0); + // init AES - if (gid >= gid_max) return; + #define KEYLEN 44 + + u32 ks[KEYLEN]; + + aes128_set_decrypt_key (ks, key, s_te0, s_te1, s_te2, s_te3, s_td0, s_td1, s_td2, s_td3); + + // #define AES_LEN 1024 + // in my tests it also worked with only 128 input bytes ! + #define AES_LEN 128 + #define AES_LEN_DIV_4 32 + + u32 buf_full[AES_LEN_DIV_4]; + + // we need to run it at least once: + + GLOBAL_AS u32 *data_buf = (GLOBAL_AS u32 *) esalt_bufs[digests_offset].data_buf; + + u32 data[4]; + + data[0] = data_buf[0]; + data[1] = data_buf[1]; + data[2] = data_buf[2]; + data[3] = data_buf[3]; + + u32 buf[4]; + + aes128_decrypt (ks, data, buf, s_td0, s_td1, s_td2, s_td3, s_td4); + + buf[0] ^= iv[0]; + + // early reject + + if ((buf[0] & 0x0007ffff) != 0x00059c78) return; + + buf[1] ^= iv[1]; + buf[2] ^= iv[2]; + buf[3] ^= iv[3]; + + buf_full[0] = buf[0]; + buf_full[1] = buf[1]; + buf_full[2] = buf[2]; + buf_full[3] = buf[3]; + + iv[0] = data[0]; + iv[1] = data[1]; + iv[2] = data[2]; + iv[3] = data[3]; + + // for AES_LEN > 16 we need to loop + + for (int i = 16, j = 4; i < AES_LEN; i += 16, j += 4) + { + data[0] = data_buf[j + 0]; + data[1] = data_buf[j + 1]; + data[2] = data_buf[j + 2]; + data[3] = data_buf[j + 3]; + + aes128_decrypt (ks, data, buf, s_td0, s_td1, s_td2, s_td3, s_td4); + + buf[0] ^= iv[0]; + buf[1] ^= iv[1]; + buf[2] ^= iv[2]; + buf[3] ^= iv[3]; + + iv[0] = data[0]; + iv[1] = data[1]; + iv[2] = data[2]; + iv[3] = data[3]; + + buf_full[j + 0] = buf[0]; + buf_full[j + 1] = buf[1]; + buf_full[j + 2] = buf[2]; + buf_full[j + 3] = buf[3]; + } + + + /* + * zlib inflate/decompress: + */ + + mz_stream infstream; + + infstream.opaque = Z_NULL; + + // input: + + infstream.avail_in = AES_LEN; + infstream.next_in = (u8 *) buf_full; + + // output: + + #define OUT_SIZE 16 + + u8 tmp[OUT_SIZE]; + + infstream.avail_out = OUT_SIZE; + infstream.next_out = tmp; + + + // decompress it: + + inflate_state pStream; + + mz_inflateInit2 (&infstream, MAX_WBITS, &pStream); + + const int zlib_ret = inflate (&infstream, Z_NO_FLUSH); + + if ((zlib_ret != MZ_OK) && (zlib_ret != MZ_STREAM_END)) + { + return; + } + + + /* + * Verify if decompressed data is either: + * - "{\n \"" or + * - "{\r\n \"" + */ - if (hooks[gid].hook_success == 1) + if (((tmp[0] == 0x7b) && (tmp[1] == 0x0a) && (tmp[2] == 0x20) && (tmp[3] == 0x20) && + (tmp[4] == 0x20) && (tmp[5] == 0x20) && (tmp[6] == 0x22)) || + ((tmp[0] == 0x7b) && (tmp[1] == 0x0d) && (tmp[2] == 0x0a) && (tmp[3] == 0x20) && + (tmp[4] == 0x20) && (tmp[5] == 0x20) && (tmp[6] == 0x20) && (tmp[7] == 0x22))) { if (atomic_inc (&hashes_shown[digests_offset]) == 0) { diff --git a/docs/credits.txt b/docs/credits.txt index 71ad760fa..403a6f261 100644 --- a/docs/credits.txt +++ b/docs/credits.txt @@ -56,7 +56,7 @@ Other contributors to hashcat * LZMA-SDK by Igor Pavlov * zlib by Jean-loup Gailly and Mark Adler * win-iconv by Yukihiro Nakadaira -* secp256k1 library by Pieter Wuille +* micro-ecc by Ken MacKay (used as reference for some secp256k1 operations) # Furthermore the following persons helped the project: diff --git a/include/emu_inc_ecc_secp256k1.h b/include/emu_inc_ecc_secp256k1.h new file mode 100644 index 000000000..a411dafd1 --- /dev/null +++ b/include/emu_inc_ecc_secp256k1.h @@ -0,0 +1,14 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +#ifndef _EMU_INC_ECC_SECP256K1_H +#define _EMU_INC_ECC_SECP256K1_H + +#include "emu_general.h" + +#include "inc_vendor.h" +#include "inc_ecc_secp256k1.h" + +#endif // _EMU_INC_ECC_SECP256K1_H diff --git a/include/ext_secp256k1.h b/include/ext_secp256k1.h deleted file mode 100644 index 689a75300..000000000 --- a/include/ext_secp256k1.h +++ /dev/null @@ -1,13 +0,0 @@ -/** - * Author......: See docs/credits.txt - * License.....: MIT - */ - -#ifndef _EXT_SECP256K1_H - -#include "secp256k1.h" - -bool hc_secp256k1_pubkey_parse (secp256k1_pubkey *pubkey, u8 *buf, size_t length); -bool hc_secp256k1_pubkey_tweak_mul (secp256k1_pubkey *pubkey, u8 *buf, size_t length); - -#endif // _EXT_SECP256K1_H diff --git a/src/Makefile b/src/Makefile index 6520f928c..a922a2362 100644 --- a/src/Makefile +++ b/src/Makefile @@ -10,13 +10,9 @@ PRODUCTION_VERSION := v5.1.0 ENABLE_BRAIN := 1 USE_SYSTEM_LZMA := 0 USE_SYSTEM_ZLIB := 0 -USE_SYSTEM_LIBSECP256K1 := 0 USE_SYSTEM_OPENCL := 0 USE_SYSTEM_XXHASH := 0 -# NOTE: USE_SYSTEM_LIBSECP256K1 set to 1 can come with a huge performance hit for Electrum 4-5 -# this is due to the public API (secp256k1.h) not exposing all the faster ECC operations we need - ## ## Detect Operating System ## @@ -124,12 +120,6 @@ else DEPS_ZLIB_PATH := $(LIBRARY_DEV_ROOT_FOLDER)/ endif -ifeq ($(USE_SYSTEM_LIBSECP256K1),0) -DEPS_LIBSECP256K1_PATH := deps/secp256k1/ -else -DEPS_LIBSECP256K1_PATH := $(LIBRARY_DEV_ROOT_FOLDER)/ -endif - ifeq ($(USE_SYSTEM_OPENCL),0) DEPS_OPENCL_PATH := deps/OpenCL-Headers else @@ -195,11 +185,6 @@ CFLAGS_ZLIB += -Wno-unused-parameter CFLAGS_ZLIB += -DIOAPI_NO_64 endif -## because LIBSECP256K1 (Electrum 4/5) -CFLAGS_LIBSECP256K1 += -Wno-unused-parameter -CFLAGS_LIBSECP256K1 += -Wno-unused-function -CFLAGS_LIBSECP256K1 += -Wno-nonnull-compare - ifeq ($(DEBUG),0) CFLAGS += -O2 ifneq ($(UNAME),Darwin) @@ -238,24 +223,6 @@ ifeq ($(USE_SYSTEM_ZLIB),1) LFLAGS += -lz endif -# LIBSECP256K1 - -ifeq ($(USE_SYSTEM_LIBSECP256K1),1) -LFLAGS += -lsecp256k1 -CFLAGS_LIBSECP256K1 += -DWITH_LIBSECP256K1 - -# NOT working if used only in CFLAGS_LIBSECP256K1 because we need to include secp256k1.h in the module too -CFLAGS += -I$(DEPS_LIBSECP256K1_PATH) -else -CFLAGS_LIBSECP256K1 += -I$(DEPS_LIBSECP256K1_PATH)/src/ - -# files in deps/secp256k1/ include "include/secp256k1.h" so we need the parent folder too -CFLAGS_LIBSECP256K1 += -I$(DEPS_LIBSECP256K1_PATH) - -# NOT working if used only in CFLAGS_LIBSECP256K1 because we need to include secp256k1.h in the module too -CFLAGS += -I$(DEPS_LIBSECP256K1_PATH)/include/ -endif - # OpenCL CFLAGS += -I$(DEPS_OPENCL_PATH) @@ -333,10 +300,10 @@ endif # MSYS2 EMU_OBJS_ALL := emu_general emu_inc_common emu_inc_platform emu_inc_scalar emu_inc_simd EMU_OBJS_ALL += emu_inc_rp emu_inc_rp_optimized EMU_OBJS_ALL += emu_inc_truecrypt_crc32 emu_inc_truecrypt_keyfile emu_inc_truecrypt_xts emu_inc_veracrypt_xts -EMU_OBJS_ALL += emu_inc_hash_md4 emu_inc_hash_md5 emu_inc_hash_ripemd160 emu_inc_hash_sha1 emu_inc_hash_sha256 emu_inc_hash_sha384 emu_inc_hash_sha512 emu_inc_hash_streebog256 emu_inc_hash_streebog512 +EMU_OBJS_ALL += emu_inc_hash_md4 emu_inc_hash_md5 emu_inc_hash_ripemd160 emu_inc_hash_sha1 emu_inc_hash_sha256 emu_inc_hash_sha384 emu_inc_hash_sha512 emu_inc_hash_streebog256 emu_inc_hash_streebog512 emu_inc_ecc_secp256k1 EMU_OBJS_ALL += emu_inc_cipher_aes emu_inc_cipher_camellia emu_inc_cipher_des emu_inc_cipher_kuznyechik emu_inc_cipher_serpent emu_inc_cipher_twofish -OBJS_ALL := affinity autotune backend benchmark bitmap bitops combinator common convert cpt cpu_crc32 debugfile dictstat dispatch dynloader event ext_ADL ext_cuda ext_nvapi ext_nvml ext_nvrtc ext_OpenCL ext_sysfs ext_lzma ext_secp256k1 filehandling folder hashcat hashes hlfmt hwmon induct interface keyboard_layout locking logfile loopback memory monitor mpsp outfile_check outfile pidfile potfile restore rp rp_cpu selftest slow_candidates shared status stdout straight terminal thread timer tuningdb usage user_options wordlist $(EMU_OBJS_ALL) +OBJS_ALL := affinity autotune backend benchmark bitmap bitops combinator common convert cpt cpu_crc32 debugfile dictstat dispatch dynloader event ext_ADL ext_cuda ext_nvapi ext_nvml ext_nvrtc ext_OpenCL ext_sysfs ext_lzma filehandling folder hashcat hashes hlfmt hwmon induct interface keyboard_layout locking logfile loopback memory monitor mpsp outfile_check outfile pidfile potfile restore rp rp_cpu selftest slow_candidates shared status stdout straight terminal thread timer tuningdb usage user_options wordlist $(EMU_OBJS_ALL) ifeq ($(ENABLE_BRAIN),1) OBJS_ALL += brain @@ -517,9 +484,6 @@ obj/%.NATIVE.o: $(DEPS_ZLIB_PATH)/%.c $(CC) -c $(CFLAGS_NATIVE) $(CFLAGS_ZLIB) $< -o $@ -fpic endif -obj/ext_secp256k1.NATIVE.o: src/ext_secp256k1.c - $(CC) -c $(CFLAGS_NATIVE) $(CFLAGS_LIBSECP256K1) $< -o $@ -fpic - ifeq ($(USE_SYSTEM_XXHASH),0) ifeq ($(ENABLE_BRAIN),1) obj/%.NATIVE.o: $(DEPS_XXHASH_PATH)/%.c @@ -682,12 +646,6 @@ obj/%.WIN.o: $(DEPS_XXHASH_PATH)/%.c endif endif -obj/ext_secp256k1.LINUX.o: src/ext_secp256k1.c - $(CC_LINUX) $(CFLAGS_CROSS_LINUX) $(CFLAGS_LIBSECP256K1) -c -o $@ $< - -obj/ext_secp256k1.WIN.o: src/ext_secp256k1.c - $(CC_WIN) $(CFLAGS_CROSS_WIN) $(CFLAGS_LIBSECP256K1) -c -o $@ $< - obj/combined.LINUX.a: $(LINUX_OBJS) $(AR_LINUX) rcs $@ $^ diff --git a/src/emu_inc_ecc_secp256k1.c b/src/emu_inc_ecc_secp256k1.c new file mode 100644 index 000000000..934824312 --- /dev/null +++ b/src/emu_inc_ecc_secp256k1.c @@ -0,0 +1,13 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +#include "types.h" +#include "common.h" +#include "emu_general.h" + +#include "inc_vendor.h" +#include "inc_platform.h" +#include "inc_ecc_secp256k1.cl" + diff --git a/src/ext_secp256k1.c b/src/ext_secp256k1.c deleted file mode 100644 index ad081af54..000000000 --- a/src/ext_secp256k1.c +++ /dev/null @@ -1,151 +0,0 @@ -/** - * Author......: See docs/credits.txt - * License.....: MIT - */ - -#include "types.h" -#include "common.h" - -#include "ext_secp256k1.h" - - -#if !defined (WITH_LIBSECP256K1) - -// some macros needed for secp256k1 header and source code includes: - -// is this a good 64-bit support check ? -#if !defined (__LP64__) && !defined (_WIN64) && !defined (__x86_64__) - -#define USE_SCALAR_8X32 -#define USE_FIELD_10X26 - -#else - -#define HAVE___INT128 -#define USE_ASM_X86_64 -// doesn't change speed much: #define USE_ECMULT_STATIC_PRECOMPUTATION - -#define USE_SCALAR_4X64 -#define USE_FIELD_5X52 - -#endif - -#define USE_SCALAR_INV_BUILTIN -#define USE_FIELD_INV_BUILTIN - -#define ECMULT_WINDOW_SIZE 15 -#define ECMULT_GEN_PREC_BITS 4 - -#define USE_NUM_NONE - -#include "secp256k1.c" - -#endif - -bool hc_secp256k1_pubkey_parse (secp256k1_pubkey *pubkey, u8 *buf, size_t length) -{ - secp256k1_context *t_ctx = secp256k1_context_create (SECP256K1_CONTEXT_NONE); - - if (secp256k1_ec_pubkey_parse (t_ctx, pubkey, buf, length) == 0) - { - secp256k1_context_destroy (t_ctx); - - return false; - } - - secp256k1_context_destroy (t_ctx); - - return true; -} - -bool hc_secp256k1_pubkey_tweak_mul (secp256k1_pubkey *pubkey, u8 *buf, size_t length) -{ - #if !defined (WITH_LIBSECP256K1) - - secp256k1_context *sctx = secp256k1_context_create (SECP256K1_CONTEXT_NONE); - - secp256k1_gej res; - secp256k1_ge pt; - - // load the public key and 32 byte scalar: - - secp256k1_pubkey_load (sctx, &pt, pubkey); - - int overflow = 0; - - secp256k1_scalar s; - - secp256k1_scalar_set_b32 (&s, buf, &overflow); - - if (overflow != 0) - { - secp256k1_scalar_clear (&s); - - secp256k1_context_destroy (sctx); - - return false; - } - - if (secp256k1_scalar_is_zero (&s)) - { - secp256k1_scalar_clear (&s); - - secp256k1_context_destroy (sctx); - - return false; - } - - - // main multiply operation: - - const size_t scalar_size = (length - 1) * 8; - - secp256k1_ecmult_const (&res, &pt, &s, scalar_size); - secp256k1_ge_set_gej (&pt, &res); - secp256k1_fe_normalize (&pt.x); - secp256k1_fe_normalize (&pt.y); - - - // output: - - buf[0] = 0x02 | secp256k1_fe_is_odd (&pt.y); - - secp256k1_fe_get_b32 (buf + 1, &pt.x); - - - // cleanup: - - secp256k1_scalar_clear (&s); - - secp256k1_context_destroy (sctx); - - #else - - // ATTENTION: this way to multiply was much slower in our tests - - secp256k1_context *sctx = secp256k1_context_create (SECP256K1_CONTEXT_VERIFY); - - - // main multiply operation: - - if (secp256k1_ec_pubkey_tweak_mul (sctx, pubkey, buf) == 0) - { - secp256k1_context_destroy (sctx); - - return false; - } - - - // output: - - secp256k1_ec_pubkey_serialize (sctx, buf, &length, pubkey, SECP256K1_EC_COMPRESSED); - - - // cleanup: - - secp256k1_context_destroy (sctx); - - #endif - - return true; -} diff --git a/src/modules/module_21700.c b/src/modules/module_21700.c index c6fa73ecd..49155aabe 100644 --- a/src/modules/module_21700.c +++ b/src/modules/module_21700.c @@ -10,7 +10,7 @@ #include "convert.h" #include "shared.h" #include "memory.h" -#include "ext_secp256k1.h" +#include "emu_inc_ecc_secp256k1.h" static const u32 ATTACK_EXEC = ATTACK_EXEC_OUTSIDE_KERNEL; static const u32 DGST_POS0 = 0; @@ -24,8 +24,7 @@ static const u64 KERN_TYPE = 21700; static const u32 OPTI_TYPE = OPTI_TYPE_ZERO_BYTE | OPTI_TYPE_USES_BITS_64 | OPTI_TYPE_SLOW_HASH_SIMD_LOOP; -static const u64 OPTS_TYPE = OPTS_TYPE_PT_GENERATE_LE - | OPTS_TYPE_HOOK23; +static const u64 OPTS_TYPE = OPTS_TYPE_PT_GENERATE_LE; static const u32 SALT_TYPE = SALT_TYPE_EMBEDDED; static const char *ST_PASS = "hashcat"; static const char *ST_HASH = "$electrum$4*03eae309d8bda5dcbddaae8145469193152763894b7260a6c4ba181b3ac2ed5653*8c594086a64dc87a9c1f8a69f646e31e8d3182c3c722def4427aa20684776ac26092c6f60bf2762e27adfa93fe1e952dcb8d6362224b9a371953aa3a2edb596ce5eb4c0879c4353f2cc515ec6c9e7a6defa26c5df346d18a62e9d40fcc606bc8c34322bf2212f77770a683788db0baf4cb43595c2a27fe5ff8bdcb1fd915bcd725149d8ee8f14c71635fecb04da5dde97584f4581ceb7d907dceed80ae5daa8352dda20b25fd6001e99a96b7cf839a36cd3f5656304e6998c18e03dd2fb720cb41386c52910c9cb83272c3d50f3a6ff362ab8389b0c21c75133c971df0a75b331796371b060b32fe1673f4a041d7ae08bbdeffb45d706eaf65f99573c07972701c97766b4d7a8a03bba0f885eb3845dfd9152286e1de1f93e25ce04c54712509166dda80a84c2d34652f68e6c01e662f8b1cc7c15103a4502c29332a4fdbdda470c875809e15aab3f2fcb061ee96992ad7e8ab9da88203e35f47d6e88b07a13b0e70ef76de3be20dc06facbddc1e47206b16b44573f57396265116b4d243e77d1c98bc2b28aa3ec0f8d959764a54ecdd03d8360ff2823577fe2183e618aac15b30c1d20986841e3d83c0bfabcedb7c27ddc436eb7113db927e0beae7522b04566631a090b214660152a4f4a90e19356e66ee7309a0671b2e7bfde82667538d193fc7e397442052c6c611b6bf0a04f629a1dc7fa9eb44bfad1bfc6a0bce9f0564c3b483737e447720b7fd038c9a961a25e9594b76bf8c8071c83fcacd689c7469f698ee4aee4d4f626a73e21ce4967e705e4d83e1145b4260330367d8341c84723a1b02567ffbab26aac3afd1079887b4391f05d09780fc65f8b4f68cd51391c06593919d7eafd0775f83045b8f5c2e59cef902ff500654ea29b7623c7594ab2cc0e05ffe3f10abc46c9c5dac824673c307dcbff5bc5f3774141ff99f6a34ec4dd8a58d154a1c72636a2422b8fafdef399dec350d2b91947448582d52291f2261d264d29399ae3c92dc61769a49224af9e7c98d74190f93eb49a44db7587c1a2afb5e1a4bec5cdeb8ad2aac9728d5ae95600c52e9f063c11cdb32b7c1d8435ce76fcf1fa562bd38f14bf6c303c70fb373d951b8a691ab793f12c0f3336d6191378bccaed32923bba81868148f029e3d5712a2fb9f610997549710716db37f7400690c8dfbed12ff0a683d8e4d0079b380e2fd856eeafb8c6eedfac8fb54dacd6bd8a96e9f8d23ea87252c1a7c2b53efc6e6aa1f0cc30fbaaf68ee7d46666afc15856669cd9baebf9397ff9f322cce5285e68a985f3b6aadce5e8f14e9f9dd16764bc4e9f62168aa265d8634ab706ed40b0809023f141c36717bd6ccef9ec6aa6bfd2d00bda9375c2fee9ebba49590a166*1b0997cf64bb2c2ff88cb87bcacd9729d404bd46db18117c20d94e67c946fedc"; @@ -47,6 +46,8 @@ const char *module_st_pass (MAYBE_UNUSED const hashconfig_t *hashconfig, typedef struct electrum { + secp256k1_t coords; + u32 data_buf[4096]; u32 data_len; @@ -62,102 +63,8 @@ typedef struct electrum_tmp } electrum_tmp_t; -typedef struct -{ - u32 ukey[8]; - - u32 pubkey[9]; // 32 + 1 bytes (for sign of the curve point) - - u32 hook_success; - -} electrum_hook_t; - -typedef struct electrum_hook_salt -{ - u8 ephemeral_pubkey_raw[33]; - - secp256k1_pubkey ephemeral_pubkey_struct; - -} electrum_hook_salt_t; - static const char *SIGNATURE_ELECTRUM = "$electrum$4*"; -#define M21700_MAX_ACCEL 16 -#define M21700_MAX_THREADS 64 - -u32 module_kernel_accel_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) -{ - const u32 kernel_accel_max = (user_options->kernel_accel_chgd == true) ? user_options->kernel_accel : M21700_MAX_ACCEL; - - return kernel_accel_max; -} - -u32 module_kernel_threads_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) -{ - const u32 kernel_threads_max = (user_options->kernel_threads_chgd == true) ? user_options->kernel_threads : M21700_MAX_THREADS; - - return kernel_threads_max; -} - -void module_hook23 (hc_device_param_t *device_param, const void *hook_salts_buf, const u32 salt_pos, const u64 pw_pos) -{ - electrum_hook_t *hook_items = (electrum_hook_t *) device_param->hooks_buf; - - electrum_hook_salt_t *electrums = (electrum_hook_salt_t *) hook_salts_buf; - electrum_hook_salt_t *electrum = &electrums[salt_pos]; - - // we need to copy it because the secp256k1_ec_pubkey_tweak_mul () function has side effects - - secp256k1_pubkey ephemeral_pubkey = electrum->ephemeral_pubkey_struct; // shallow copy is safe ! - - // this hook data needs to be updated (the "hook_success" variable): - - electrum_hook_t *hook_item = &hook_items[pw_pos]; - - hook_item->hook_success = 0; - - u32 *hook_pubkey = hook_item->pubkey; - - hook_pubkey[0] = hook_item->ukey[0]; - hook_pubkey[1] = hook_item->ukey[1]; - hook_pubkey[2] = hook_item->ukey[2]; - hook_pubkey[3] = hook_item->ukey[3]; - hook_pubkey[4] = hook_item->ukey[4]; - hook_pubkey[5] = hook_item->ukey[5]; - hook_pubkey[6] = hook_item->ukey[6]; - hook_pubkey[7] = hook_item->ukey[7]; - hook_pubkey[8] = 0; - - /* - * Start with Elliptic Curve Cryptography (ECC) - */ - - const size_t length = 33; // NOT a bug (32 + 1 for the sign) - - bool multiply_success = hc_secp256k1_pubkey_tweak_mul (&ephemeral_pubkey, (u8 *) hook_pubkey, length); - - if (multiply_success == false) return; - - // in this case hook_success set to 1 doesn't mean that we've cracked it, but just that there were - // no problems detected by secp256k1_ec_pubkey_tweak_mul () - - hook_item->hook_success = 1; -} - -u64 module_hook_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) -{ - const u64 hook_size = (const u64) sizeof (electrum_hook_t); - - return hook_size; -} - -u64 module_hook_salt_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) -{ - const u64 hook_salt_size = (const u64) sizeof (electrum_hook_salt_t); - - return hook_salt_size; -} - u64 module_esalt_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { const u64 esalt_size = (const u64) sizeof (electrum_t); @@ -194,8 +101,6 @@ int module_hash_decode (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSE electrum_t *esalt = (electrum_t *) esalt_buf; - electrum_hook_salt_t *hook = (electrum_hook_salt_t *) hook_salt_buf; - token_t token; token.token_cnt = 4; @@ -245,16 +150,20 @@ int module_hash_decode (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSE // ephemeral pubkey: + u32 ephemeral_pubkey[9] = { 0 }; + + u8 *ephemeral_pubkey_ptr = (u8 *) ephemeral_pubkey; + for (u32 i = 0, j = 0; j < 66; i += 1, j += 2) { - hook->ephemeral_pubkey_raw[i] = hex_to_u8 (ephemeral_pos + j); + ephemeral_pubkey_ptr[i] = hex_to_u8 (ephemeral_pos + j); } - size_t length = 33; + secp256k1_t *coords = &esalt->coords; - bool parse_success = hc_secp256k1_pubkey_parse (&hook->ephemeral_pubkey_struct, hook->ephemeral_pubkey_raw, length); + u32 parse_success = parse_public (coords, ephemeral_pubkey); - if (parse_success == false) return (PARSER_SALT_VALUE); + if (parse_success != 0) return (PARSER_SALT_VALUE); // data buf: @@ -296,17 +205,19 @@ int module_hash_encode (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSE electrum_t *esalt = (electrum_t *) esalt_buf; - electrum_hook_salt_t *hook = (electrum_hook_salt_t *) hook_salt_buf; - // ephemeral pubkey: char ephemeral[66 + 1]; memset (ephemeral, 0, sizeof (ephemeral)); - for (u32 i = 0, j = 0; i < 33; i += 1, j += 2) + u8 type = 0x02 | (esalt->coords.xy[8] & 1); // odd or even y coordinate + + snprintf (ephemeral, 66 + 1, "%02x", type); + + for (int i = 31, j = 2; i >= 0; i -= 1, j += 2) { - const u8 *ptr = (const u8 *) hook->ephemeral_pubkey_raw; + const u8 *ptr = (const u8 *) esalt->coords.xy; snprintf (ephemeral + j, 66 + 1 - j, "%02x", ptr[i]); } @@ -383,16 +294,16 @@ void module_init (module_ctx_t *module_ctx) module_ctx->module_hashes_count_max = MODULE_DEFAULT; module_ctx->module_hlfmt_disable = MODULE_DEFAULT; module_ctx->module_hook12 = MODULE_DEFAULT; - module_ctx->module_hook23 = module_hook23; - module_ctx->module_hook_salt_size = module_hook_salt_size; - module_ctx->module_hook_size = module_hook_size; + module_ctx->module_hook23 = MODULE_DEFAULT; + module_ctx->module_hook_salt_size = MODULE_DEFAULT; + module_ctx->module_hook_size = MODULE_DEFAULT; module_ctx->module_jit_build_options = module_jit_build_options; module_ctx->module_jit_cache_disable = MODULE_DEFAULT; - module_ctx->module_kernel_accel_max = module_kernel_accel_max; + module_ctx->module_kernel_accel_max = MODULE_DEFAULT; module_ctx->module_kernel_accel_min = MODULE_DEFAULT; module_ctx->module_kernel_loops_max = MODULE_DEFAULT; module_ctx->module_kernel_loops_min = MODULE_DEFAULT; - module_ctx->module_kernel_threads_max = module_kernel_threads_max; + module_ctx->module_kernel_threads_max = MODULE_DEFAULT; module_ctx->module_kernel_threads_min = MODULE_DEFAULT; module_ctx->module_kern_type = module_kern_type; module_ctx->module_kern_type_dynamic = MODULE_DEFAULT; diff --git a/src/modules/module_21800.c b/src/modules/module_21800.c index 30d3a4d1d..12ffbd834 100644 --- a/src/modules/module_21800.c +++ b/src/modules/module_21800.c @@ -10,11 +10,7 @@ #include "convert.h" #include "shared.h" #include "memory.h" -#include "emu_inc_hash_sha512.h" -#include "emu_inc_hash_sha256.h" -#include "emu_inc_cipher_aes.h" -#include "ext_secp256k1.h" -#include "zlib.h" +#include "emu_inc_ecc_secp256k1.h" static const u32 ATTACK_EXEC = ATTACK_EXEC_OUTSIDE_KERNEL; static const u32 DGST_POS0 = 0; @@ -28,8 +24,7 @@ static const u64 KERN_TYPE = 21800; static const u32 OPTI_TYPE = OPTI_TYPE_ZERO_BYTE | OPTI_TYPE_USES_BITS_64 | OPTI_TYPE_SLOW_HASH_SIMD_LOOP; -static const u64 OPTS_TYPE = OPTS_TYPE_PT_GENERATE_LE - | OPTS_TYPE_HOOK23; +static const u64 OPTS_TYPE = OPTS_TYPE_PT_GENERATE_LE; static const u32 SALT_TYPE = SALT_TYPE_EMBEDDED; static const char *ST_PASS = "hashcat"; static const char *ST_HASH = "$electrum$5*02170fee7c35f1ef3b229edc90fbd0793b688a0d6f41137a97aab2343d315cce16*94cf72d8f5d774932b414a3344984859e43721268d2eb35fa531de5a2fc7024b463c730a54f4f46229dd9fede5034b19ac415c2916e9c16b02094f845795df0c397ff76d597886b1f9e014ad1a8f64a3f617d9900aa645b3ba86f16ce542251fc22c41d93fa6bc118be96d9582917e19d2a299743331804cfc7ce2c035367b4cbcfb70adfb1e10a0f2795769f2165d8fd13daa8b45eeac495b5b63e91a87f63b42e483f84a881e49adecacf6519cb564694b42dd9fe80fcbc6cdb63cf5ae33f35255266f5c2524dd93d3cc15eba0f2ccdc3c109cc2d7e8f711b8b440f168caf8b005e8bcdfe694148e94a04d2a738f09349a96600bd8e8edae793b26ebae231022f24e96cb158db141ac40400a9e9ef099e673cfe017281537c57f82fb45c62bdb64462235a6eefb594961d5eb2c46537958e4d04250804c6e9f343ab7a0db07af6b8a9d1a6c5cfcd311b8fb8383ac9ed9d98d427d526c2f517fc97473bd87cb59899bd0e8fb8c57fa0f7e0d53daa57c972cf92764af4b1725a5fb8f504b663ec519731929b3caaa793d8ee74293eee27d0e208a60e26290bc546e6fa9ed865076e13febfea249729218c1b5752e912055fbf993fbac5df2cca2b37c5e0f9c30789858ceeb3c482a8db123966775aeed2eee2fc34efb160d164929f51589bff748ca773f38978bff3508d5a7591fb2d2795df983504a788071f469d78c88fd7899cabbc5804f458653d0206b82771a59522e1fa794d7de1536c51a437f5d6df5efd6654678e5794ca429b5752e1103340ed80786f1e9da7f5b39af628b2212e4d88cd36b8a7136d50a6b6e275ab406ba7c57cc70d77d01c4c16e9363901164fa92dc9e9b99219d5376f24862e775968605001e71b000e2c7123b4b43f3ca40db17efd729388782e46e64d43ccb947db4eb1473ff1a3836b74fe312cd1a33b73b8b8d80c087088932277773c329f2f66a01d6b3fc1e651c56959ebbed7b14a21b977f3acdedf1a0d98d519a74b50c39b3052d840106da4145345d86ec0461cddafacc2a4f0dd646457ad05bf04dcbcc80516a5c5ed14d2d639a70e77b686f19cbfb63f546d81ae19cc8ba35cce3f3b5b9602df25b678e14411fecec87b8347f5047513df415c6b1a3d39871a6bcb0f67d9cf8311596deae45fd1d84a04fd58f1fd55c5156b7309af09094c99a53674809cb87a45f95a2d69f9997a38085519cb4e056f9efd56672a2c1fe927d5ea8eec25b8aff6e56f9a2310f1a481daf407b8adf16201da267c59973920fd21bb087b88123ef98709839d6a3ee34efb8ccd5c15ed0e46cff3172682769531164b66c8689c35a26299dd26d09233d1f64f9667474141cf9c6a6de7f2bc52c3bb44cfe679ff4b912c06df406283836b3581773cb76d375304f46239da5996594a8d03b14c02f1b35a432dc44a96331242ae31174*33a7ee59d6d17ed1ee99dc0a71771227e6f3734b17ba36eb589bdced56244135"; @@ -49,6 +44,14 @@ u32 module_salt_type (MAYBE_UNUSED const hashconfig_t *hashconfig, const char *module_st_hash (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return ST_HASH; } const char *module_st_pass (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return ST_PASS; } +typedef struct electrum +{ + secp256k1_t coords; + + u32 data_buf[256]; + +} electrum_t; + typedef struct electrum_tmp { u64 ipad[8]; @@ -59,250 +62,13 @@ typedef struct electrum_tmp } electrum_tmp_t; -typedef struct -{ - u32 ukey[8]; - - u32 hook_success; - -} electrum_hook_t; - -typedef struct electrum_hook_salt -{ - u32 data_buf[256]; - - u8 ephemeral_pubkey_raw[33]; - - secp256k1_pubkey ephemeral_pubkey_struct; - -} electrum_hook_salt_t; - static const char *SIGNATURE_ELECTRUM = "$electrum$5*"; -#define M21800_MAX_ACCEL 16 -#define M21800_MAX_THREADS 64 - -u32 module_kernel_accel_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) -{ - const u32 kernel_accel_max = (user_options->kernel_accel_chgd == true) ? user_options->kernel_accel : M21800_MAX_ACCEL; - - return kernel_accel_max; -} - -u32 module_kernel_threads_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) -{ - const u32 kernel_threads_max = (user_options->kernel_threads_chgd == true) ? user_options->kernel_threads : M21800_MAX_THREADS; - - return kernel_threads_max; -} - -void module_hook23 (hc_device_param_t *device_param, const void *hook_salts_buf, const u32 salt_pos, const u64 pw_pos) -{ - electrum_hook_t *hook_items = (electrum_hook_t *) device_param->hooks_buf; - - electrum_hook_salt_t *electrums = (electrum_hook_salt_t *) hook_salts_buf; - electrum_hook_salt_t *electrum = &electrums[salt_pos]; - - u32 *data_buf = electrum->data_buf; - - // we need to copy it because the secp256k1_ec_pubkey_tweak_mul () function has side effects - - secp256k1_pubkey ephemeral_pubkey = electrum->ephemeral_pubkey_struct; // shallow copy is safe ! - - // this hook data needs to be updated (the "hook_success" variable): - - electrum_hook_t *hook_item = &hook_items[pw_pos]; - - hook_item->hook_success = 0; - - u32 ukey[9]; // (32 + 1) + 3 = 9 * 4 = 36 bytes (+1 for holding the "sign" of the curve point) - - ukey[0] = hook_item->ukey[0]; - ukey[1] = hook_item->ukey[1]; - ukey[2] = hook_item->ukey[2]; - ukey[3] = hook_item->ukey[3]; - ukey[4] = hook_item->ukey[4]; - ukey[5] = hook_item->ukey[5]; - ukey[6] = hook_item->ukey[6]; - ukey[7] = hook_item->ukey[7]; - ukey[8] = 0; - - /* - * Start with Elliptic Curve Cryptography (ECC) - */ - - u8 *tmp_buf = (u8 *) ukey; - - const size_t length = 33; // NOT a bug (32 + 1 for the sign) - - bool multiply_success = hc_secp256k1_pubkey_tweak_mul (&ephemeral_pubkey, tmp_buf, length); - - if (multiply_success == false) return; - - u32 input[64] = { 0 }; - - memcpy (input, tmp_buf, length); - - sha512_ctx_t sha512_ctx; - - sha512_init (&sha512_ctx); - sha512_update_swap (&sha512_ctx, input, length); - sha512_final (&sha512_ctx); - - // ... now we have the result in sha512_ctx.h[0]...sha512_ctx.h[7] - - u32 iv[4]; - - iv[0] = v32b_from_v64 (sha512_ctx.h[0]); - iv[1] = v32a_from_v64 (sha512_ctx.h[0]); - iv[2] = v32b_from_v64 (sha512_ctx.h[1]); - iv[3] = v32a_from_v64 (sha512_ctx.h[1]); - - iv[0] = byte_swap_32 (iv[0]); - iv[1] = byte_swap_32 (iv[1]); - iv[2] = byte_swap_32 (iv[2]); - iv[3] = byte_swap_32 (iv[3]); - - u32 key[4]; - - key[0] = v32b_from_v64 (sha512_ctx.h[2]); - key[1] = v32a_from_v64 (sha512_ctx.h[2]); - key[2] = v32b_from_v64 (sha512_ctx.h[3]); - key[3] = v32a_from_v64 (sha512_ctx.h[3]); - - key[0] = byte_swap_32 (key[0]); - key[1] = byte_swap_32 (key[1]); - key[2] = byte_swap_32 (key[2]); - key[3] = byte_swap_32 (key[3]); - - // init AES - - AES_KEY aes_key; - - memset (&aes_key, 0, sizeof (aes_key)); - - aes128_set_decrypt_key (aes_key.rdk, key, (u32 *) te0, (u32 *) te1, (u32 *) te2, (u32 *) te3, (u32 *) td0, (u32 *) td1, (u32 *) td2, (u32 *) td3); - - int aes_len = 1024; // in my tests (very few) it also worked with only 128 input bytes ! - // int aes_len = 128; - - u32 data[4]; - u32 out[4]; - - u32 out_full[256]; // 1024 / 4 - - // we need to run it at least once: - - data[0] = data_buf[0]; - data[1] = data_buf[1]; - data[2] = data_buf[2]; - data[3] = data_buf[3]; - - aes128_decrypt (aes_key.rdk, data, out, (u32 *) td0, (u32 *) td1, (u32 *) td2, (u32 *) td3, (u32 *) td4); - - out[0] ^= iv[0]; - - // early reject - - if ((out[0] & 0x0007ffff) != 0x00059c78) return; - - out[1] ^= iv[1]; - out[2] ^= iv[2]; - out[3] ^= iv[3]; - - out_full[0] = out[0]; - out_full[1] = out[1]; - out_full[2] = out[2]; - out_full[3] = out[3]; - - iv[0] = data[0]; - iv[1] = data[1]; - iv[2] = data[2]; - iv[3] = data[3]; - - // for aes_len > 16 we need to loop - - for (int i = 16, j = 4; i < aes_len; i += 16, j += 4) - { - data[0] = data_buf[j + 0]; - data[1] = data_buf[j + 1]; - data[2] = data_buf[j + 2]; - data[3] = data_buf[j + 3]; - - aes128_decrypt (aes_key.rdk, data, out, (u32 *) td0, (u32 *) td1, (u32 *) td2, (u32 *) td3, (u32 *) td4); - - out[0] ^= iv[0]; - out[1] ^= iv[1]; - out[2] ^= iv[2]; - out[3] ^= iv[3]; - - iv[0] = data[0]; - iv[1] = data[1]; - iv[2] = data[2]; - iv[3] = data[3]; - - out_full[j + 0] = out[0]; - out_full[j + 1] = out[1]; - out_full[j + 2] = out[2]; - out_full[j + 3] = out[3]; - } - - // decompress with zlib: - - size_t compressed_data_len = aes_len; - u8 *compressed_data = (u8 *) out_full; - - size_t decompressed_data_len = 16; // we do NOT need more than the first bytes for validation - u8 *decompressed_data = (unsigned char *) hcmalloc (decompressed_data_len); - - z_stream inf; - - inf.zalloc = Z_NULL; - inf.zfree = Z_NULL; - inf.opaque = Z_NULL; - - inf.next_in = compressed_data; - inf.avail_in = compressed_data_len; - - inf.next_out = decompressed_data; - inf.avail_out = decompressed_data_len; - - // inflate: - - inflateInit2 (&inf, MAX_WBITS); - - int zlib_ret = inflate (&inf, Z_NO_FLUSH); - - inflateEnd (&inf); - - if ((zlib_ret != Z_OK) && (zlib_ret != Z_STREAM_END)) - { - hcfree (decompressed_data); - - return; - } - - if ((memcmp (decompressed_data, "{\n \"", 7) == 0) || - (memcmp (decompressed_data, "{\r\n \"", 8) == 0)) - { - hook_item->hook_success = 1; - } - - hcfree (decompressed_data); -} - -u64 module_hook_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) -{ - const u64 hook_size = (const u64) sizeof (electrum_hook_t); - - return hook_size; -} - -u64 module_hook_salt_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) +u64 module_esalt_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { - const u64 hook_salt_size = (const u64) sizeof (electrum_hook_salt_t); + const u64 esalt_size = (const u64) sizeof (electrum_t); - return hook_salt_size; + return esalt_size; } u64 module_tmp_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) @@ -332,7 +98,7 @@ int module_hash_decode (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSE { u32 *digest = (u32 *) digest_buf; - electrum_hook_salt_t *electrum = (electrum_hook_salt_t *) hook_salt_buf; + electrum_t *esalt = (electrum_t *) esalt_buf; token_t token; @@ -377,20 +143,24 @@ int module_hash_decode (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSE // ephemeral pubkey: + u32 ephemeral_pubkey[9] = { 0 }; + + u8 *ephemeral_pubkey_ptr = (u8 *) ephemeral_pubkey; + for (u32 i = 0, j = 0; j < 66; i += 1, j += 2) { - electrum->ephemeral_pubkey_raw[i] = hex_to_u8 (ephemeral_pos + j); + ephemeral_pubkey_ptr[i] = hex_to_u8 (ephemeral_pos + j); } - size_t length = 33; + secp256k1_t *coords = &esalt->coords; - bool parse_success = hc_secp256k1_pubkey_parse (&electrum->ephemeral_pubkey_struct, electrum->ephemeral_pubkey_raw, length); + u32 parse_success = parse_public (coords, ephemeral_pubkey); - if (parse_success == false) return (PARSER_SALT_VALUE); + if (parse_success != 0) return (PARSER_SALT_VALUE); // data buf: - u8* data_buf_ptr = (u8 *) electrum->data_buf; + u8* data_buf_ptr = (u8 *) esalt->data_buf; for (u32 i = 0, j = 0; j < 2048; i += 1, j += 2) { @@ -408,10 +178,10 @@ int module_hash_decode (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSE // fake salt - salt->salt_buf[0] = electrum->data_buf[0]; - salt->salt_buf[1] = electrum->data_buf[1]; - salt->salt_buf[2] = electrum->data_buf[2]; - salt->salt_buf[3] = electrum->data_buf[3]; + salt->salt_buf[0] = esalt->data_buf[0]; + salt->salt_buf[1] = esalt->data_buf[1]; + salt->salt_buf[2] = esalt->data_buf[2]; + salt->salt_buf[3] = esalt->data_buf[3]; salt->salt_len = 16; @@ -424,7 +194,7 @@ int module_hash_encode (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSE { u32 *digest = (u32 *) digest_buf; - electrum_hook_salt_t *electrum = (electrum_hook_salt_t *) hook_salt_buf; + electrum_t *esalt = (electrum_t *) esalt_buf; // ephemeral pubkey: @@ -432,9 +202,13 @@ int module_hash_encode (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSE memset (ephemeral, 0, sizeof (ephemeral)); - for (u32 i = 0, j = 0; i < 33; i += 1, j += 2) + u8 type = 0x02 | (esalt->coords.xy[8] & 1); // odd or even y coordinate + + snprintf (ephemeral, 66 + 1, "%02x", type); + + for (int i = 31, j = 2; i >= 0; i -= 1, j += 2) { - const u8 *ptr = (const u8 *) electrum->ephemeral_pubkey_raw; + const u8 *ptr = (const u8 *) esalt->coords.xy; snprintf (ephemeral + j, 66 + 1 - j, "%02x", ptr[i]); } @@ -447,7 +221,7 @@ int module_hash_encode (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSE for (u32 i = 0, j = 0; i < 1024; i += 1, j += 2) { - const u8 *ptr = (const u8 *) electrum->data_buf; + const u8 *ptr = (const u8 *) esalt->data_buf; snprintf (data_buf + j, 2048 + 1 - j, "%02x", ptr[i]); } @@ -490,7 +264,7 @@ void module_init (module_ctx_t *module_ctx) module_ctx->module_dgst_pos3 = module_dgst_pos3; module_ctx->module_dgst_size = module_dgst_size; module_ctx->module_dictstat_disable = MODULE_DEFAULT; - module_ctx->module_esalt_size = MODULE_DEFAULT; + module_ctx->module_esalt_size = module_esalt_size; module_ctx->module_extra_buffer_size = MODULE_DEFAULT; module_ctx->module_extra_tmp_size = MODULE_DEFAULT; module_ctx->module_forced_outfile_format = MODULE_DEFAULT; @@ -511,16 +285,16 @@ void module_init (module_ctx_t *module_ctx) module_ctx->module_hashes_count_max = MODULE_DEFAULT; module_ctx->module_hlfmt_disable = MODULE_DEFAULT; module_ctx->module_hook12 = MODULE_DEFAULT; - module_ctx->module_hook23 = module_hook23; - module_ctx->module_hook_salt_size = module_hook_salt_size; - module_ctx->module_hook_size = module_hook_size; + module_ctx->module_hook23 = MODULE_DEFAULT; + module_ctx->module_hook_salt_size = MODULE_DEFAULT; + module_ctx->module_hook_size = MODULE_DEFAULT; module_ctx->module_jit_build_options = module_jit_build_options; module_ctx->module_jit_cache_disable = MODULE_DEFAULT; - module_ctx->module_kernel_accel_max = module_kernel_accel_max; + module_ctx->module_kernel_accel_max = MODULE_DEFAULT; module_ctx->module_kernel_accel_min = MODULE_DEFAULT; module_ctx->module_kernel_loops_max = MODULE_DEFAULT; module_ctx->module_kernel_loops_min = MODULE_DEFAULT; - module_ctx->module_kernel_threads_max = module_kernel_threads_max; + module_ctx->module_kernel_threads_max = MODULE_DEFAULT; module_ctx->module_kernel_threads_min = MODULE_DEFAULT; module_ctx->module_kern_type = module_kern_type; module_ctx->module_kern_type_dynamic = MODULE_DEFAULT;