diff --git a/OpenCL/inc_bignum_operations.cl b/OpenCL/inc_bignum_operations.cl new file mode 100644 index 000000000..b471693f4 --- /dev/null +++ b/OpenCL/inc_bignum_operations.cl @@ -0,0 +1,2402 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +#include "inc_bignum_operations.h" + +// n[128], input and result, n = n % m +// m[128], modulo, we increased it from m[64] for speed (memory+comparison) purposes +// similar to mod_512 () for 512bit modulo from OpenCL/inc_ecc_secp256k1.cl: + +DECLSPEC void mod_4096 (PRIVATE_AS u32 *n, PRIVATE_AS const u32 *m) +{ + // we need to perform a modulo operation with 4096-bit % 2048-bit (bignum modulo): + // 512 bytes % 256 bytes (or 128 u32 % 64 u32) + // note: we increased the size of m to avoid further temp variables/copies of m + // but in general the modulo is just 256 bytes + + // ATTENTION: least significant byte are at n[0] and m[0] + + /* + the general modulo by shift and substract code (n = n % m): + + x = m; + + t = n >> 1; + + while (x <= t) x <<= 1; + + while (n >= m) + { + if (n >= x) n -= x; + + x >>= 1; + } + + return n; // remainder + */ + + + /* + * Start: + */ + + // x = m; + + u32 x[128]; + + // we use a shift trick similar to the mod_512 () shortcut from + // OpenCL/inc_ecc_secp256k1.cl + + // for (u32 i = 0; i < 64; i++) x[i] = 0; + // for (u32 i = 64; i < 128; i++) x[i] = m[i - 64]; + + x[ 0] = 0; x[ 1] = 0; x[ 2] = 0; x[ 3] = 0; + x[ 4] = 0; x[ 5] = 0; x[ 6] = 0; x[ 7] = 0; + x[ 8] = 0; x[ 9] = 0; x[ 10] = 0; x[ 11] = 0; + x[ 12] = 0; x[ 13] = 0; x[ 14] = 0; x[ 15] = 0; + x[ 16] = 0; x[ 17] = 0; x[ 18] = 0; x[ 19] = 0; + x[ 20] = 0; x[ 21] = 0; x[ 22] = 0; x[ 23] = 0; + x[ 24] = 0; x[ 25] = 0; x[ 26] = 0; x[ 27] = 0; + x[ 28] = 0; x[ 29] = 0; x[ 30] = 0; x[ 31] = 0; + x[ 32] = 0; x[ 33] = 0; x[ 34] = 0; x[ 35] = 0; + x[ 36] = 0; x[ 37] = 0; x[ 38] = 0; x[ 39] = 0; + x[ 40] = 0; x[ 41] = 0; x[ 42] = 0; x[ 43] = 0; + x[ 44] = 0; x[ 45] = 0; x[ 46] = 0; x[ 47] = 0; + x[ 48] = 0; x[ 49] = 0; x[ 50] = 0; x[ 51] = 0; + x[ 52] = 0; x[ 53] = 0; x[ 54] = 0; x[ 55] = 0; + x[ 56] = 0; x[ 57] = 0; x[ 58] = 0; x[ 59] = 0; + x[ 60] = 0; x[ 61] = 0; x[ 62] = 0; x[ 63] = 0; + x[ 64] = m[ 0]; x[ 65] = m[ 1]; x[ 66] = m[ 2]; x[ 67] = m[ 3]; + x[ 68] = m[ 4]; x[ 69] = m[ 5]; x[ 70] = m[ 6]; x[ 71] = m[ 7]; + x[ 72] = m[ 8]; x[ 73] = m[ 9]; x[ 74] = m[ 10]; x[ 75] = m[ 11]; + x[ 76] = m[ 12]; x[ 77] = m[ 13]; x[ 78] = m[ 14]; x[ 79] = m[ 15]; + x[ 80] = m[ 16]; x[ 81] = m[ 17]; x[ 82] = m[ 18]; x[ 83] = m[ 19]; + x[ 84] = m[ 20]; x[ 85] = m[ 21]; x[ 86] = m[ 22]; x[ 87] = m[ 23]; + x[ 88] = m[ 24]; x[ 89] = m[ 25]; x[ 90] = m[ 26]; x[ 91] = m[ 27]; + x[ 92] = m[ 28]; x[ 93] = m[ 29]; x[ 94] = m[ 30]; x[ 95] = m[ 31]; + x[ 96] = m[ 32]; x[ 97] = m[ 33]; x[ 98] = m[ 34]; x[ 99] = m[ 35]; + x[100] = m[ 36]; x[101] = m[ 37]; x[102] = m[ 38]; x[103] = m[ 39]; + x[104] = m[ 40]; x[105] = m[ 41]; x[106] = m[ 42]; x[107] = m[ 43]; + x[108] = m[ 44]; x[109] = m[ 45]; x[110] = m[ 46]; x[111] = m[ 47]; + x[112] = m[ 48]; x[113] = m[ 49]; x[114] = m[ 50]; x[115] = m[ 51]; + x[116] = m[ 52]; x[117] = m[ 53]; x[118] = m[ 54]; x[119] = m[ 55]; + x[120] = m[ 56]; x[121] = m[ 57]; x[122] = m[ 58]; x[123] = m[ 59]; + x[124] = m[ 60]; x[125] = m[ 61]; x[126] = m[ 62]; x[127] = m[ 63]; + + // n >= b + + while (n[127] >= m[127]) + { + u32 shift_needed = 1; + + for (int i = 127; i >= 0; i--) + { + if (n[i] < m[i]) // FAIL case + { + shift_needed = 0; + + break; + } + else + if (n[i] > m[i]) // definitely a SUCCESS case (otherwise continue to check if equal) + { + break; + } + } + + if (shift_needed == 0) break; + + // r = x (copy it to have the original values for the subtraction) + + u32 r[128]; + + // for (u32 i = 0; i < 128; i++) r[i] = x[i]; + + r[ 0] = x[ 0]; r[ 1] = x[ 1]; r[ 2] = x[ 2]; r[ 3] = x[ 3]; + r[ 4] = x[ 4]; r[ 5] = x[ 5]; r[ 6] = x[ 6]; r[ 7] = x[ 7]; + r[ 8] = x[ 8]; r[ 9] = x[ 9]; r[ 10] = x[ 10]; r[ 11] = x[ 11]; + r[ 12] = x[ 12]; r[ 13] = x[ 13]; r[ 14] = x[ 14]; r[ 15] = x[ 15]; + r[ 16] = x[ 16]; r[ 17] = x[ 17]; r[ 18] = x[ 18]; r[ 19] = x[ 19]; + r[ 20] = x[ 20]; r[ 21] = x[ 21]; r[ 22] = x[ 22]; r[ 23] = x[ 23]; + r[ 24] = x[ 24]; r[ 25] = x[ 25]; r[ 26] = x[ 26]; r[ 27] = x[ 27]; + r[ 28] = x[ 28]; r[ 29] = x[ 29]; r[ 30] = x[ 30]; r[ 31] = x[ 31]; + r[ 32] = x[ 32]; r[ 33] = x[ 33]; r[ 34] = x[ 34]; r[ 35] = x[ 35]; + r[ 36] = x[ 36]; r[ 37] = x[ 37]; r[ 38] = x[ 38]; r[ 39] = x[ 39]; + r[ 40] = x[ 40]; r[ 41] = x[ 41]; r[ 42] = x[ 42]; r[ 43] = x[ 43]; + r[ 44] = x[ 44]; r[ 45] = x[ 45]; r[ 46] = x[ 46]; r[ 47] = x[ 47]; + r[ 48] = x[ 48]; r[ 49] = x[ 49]; r[ 50] = x[ 50]; r[ 51] = x[ 51]; + r[ 52] = x[ 52]; r[ 53] = x[ 53]; r[ 54] = x[ 54]; r[ 55] = x[ 55]; + r[ 56] = x[ 56]; r[ 57] = x[ 57]; r[ 58] = x[ 58]; r[ 59] = x[ 59]; + r[ 60] = x[ 60]; r[ 61] = x[ 61]; r[ 62] = x[ 62]; r[ 63] = x[ 63]; + r[ 64] = x[ 64]; r[ 65] = x[ 65]; r[ 66] = x[ 66]; r[ 67] = x[ 67]; + r[ 68] = x[ 68]; r[ 69] = x[ 69]; r[ 70] = x[ 70]; r[ 71] = x[ 71]; + r[ 72] = x[ 72]; r[ 73] = x[ 73]; r[ 74] = x[ 74]; r[ 75] = x[ 75]; + r[ 76] = x[ 76]; r[ 77] = x[ 77]; r[ 78] = x[ 78]; r[ 79] = x[ 79]; + r[ 80] = x[ 80]; r[ 81] = x[ 81]; r[ 82] = x[ 82]; r[ 83] = x[ 83]; + r[ 84] = x[ 84]; r[ 85] = x[ 85]; r[ 86] = x[ 86]; r[ 87] = x[ 87]; + r[ 88] = x[ 88]; r[ 89] = x[ 89]; r[ 90] = x[ 90]; r[ 91] = x[ 91]; + r[ 92] = x[ 92]; r[ 93] = x[ 93]; r[ 94] = x[ 94]; r[ 95] = x[ 95]; + r[ 96] = x[ 96]; r[ 97] = x[ 97]; r[ 98] = x[ 98]; r[ 99] = x[ 99]; + r[100] = x[100]; r[101] = x[101]; r[102] = x[102]; r[103] = x[103]; + r[104] = x[104]; r[105] = x[105]; r[106] = x[106]; r[107] = x[107]; + r[108] = x[108]; r[109] = x[109]; r[110] = x[110]; r[111] = x[111]; + r[112] = x[112]; r[113] = x[113]; r[114] = x[114]; r[115] = x[115]; + r[116] = x[116]; r[117] = x[117]; r[118] = x[118]; r[119] = x[119]; + r[120] = x[120]; r[121] = x[121]; r[122] = x[122]; r[123] = x[123]; + r[124] = x[124]; r[125] = x[125]; r[126] = x[126]; r[127] = x[127]; + + + // x >>= 1 (half it): + + // for (u32 i = 0; i < 127; i++) x[i] = (x[i + 1] << 31) | (x[i] >> 1); + // x[127] >>= 1; + + x[ 0] = (x[ 1] << 31) | (x[ 0] >> 1); + x[ 1] = (x[ 2] << 31) | (x[ 1] >> 1); + x[ 2] = (x[ 3] << 31) | (x[ 2] >> 1); + x[ 3] = (x[ 4] << 31) | (x[ 3] >> 1); + x[ 4] = (x[ 5] << 31) | (x[ 4] >> 1); + x[ 5] = (x[ 6] << 31) | (x[ 5] >> 1); + x[ 6] = (x[ 7] << 31) | (x[ 6] >> 1); + x[ 7] = (x[ 8] << 31) | (x[ 7] >> 1); + x[ 8] = (x[ 9] << 31) | (x[ 8] >> 1); + x[ 9] = (x[ 10] << 31) | (x[ 9] >> 1); + x[ 10] = (x[ 11] << 31) | (x[ 10] >> 1); + x[ 11] = (x[ 12] << 31) | (x[ 11] >> 1); + x[ 12] = (x[ 13] << 31) | (x[ 12] >> 1); + x[ 13] = (x[ 14] << 31) | (x[ 13] >> 1); + x[ 14] = (x[ 15] << 31) | (x[ 14] >> 1); + x[ 15] = (x[ 16] << 31) | (x[ 15] >> 1); + x[ 16] = (x[ 17] << 31) | (x[ 16] >> 1); + x[ 17] = (x[ 18] << 31) | (x[ 17] >> 1); + x[ 18] = (x[ 19] << 31) | (x[ 18] >> 1); + x[ 19] = (x[ 20] << 31) | (x[ 19] >> 1); + x[ 20] = (x[ 21] << 31) | (x[ 20] >> 1); + x[ 21] = (x[ 22] << 31) | (x[ 21] >> 1); + x[ 22] = (x[ 23] << 31) | (x[ 22] >> 1); + x[ 23] = (x[ 24] << 31) | (x[ 23] >> 1); + x[ 24] = (x[ 25] << 31) | (x[ 24] >> 1); + x[ 25] = (x[ 26] << 31) | (x[ 25] >> 1); + x[ 26] = (x[ 27] << 31) | (x[ 26] >> 1); + x[ 27] = (x[ 28] << 31) | (x[ 27] >> 1); + x[ 28] = (x[ 29] << 31) | (x[ 28] >> 1); + x[ 29] = (x[ 30] << 31) | (x[ 29] >> 1); + x[ 30] = (x[ 31] << 31) | (x[ 30] >> 1); + x[ 31] = (x[ 32] << 31) | (x[ 31] >> 1); + x[ 32] = (x[ 33] << 31) | (x[ 32] >> 1); + x[ 33] = (x[ 34] << 31) | (x[ 33] >> 1); + x[ 34] = (x[ 35] << 31) | (x[ 34] >> 1); + x[ 35] = (x[ 36] << 31) | (x[ 35] >> 1); + x[ 36] = (x[ 37] << 31) | (x[ 36] >> 1); + x[ 37] = (x[ 38] << 31) | (x[ 37] >> 1); + x[ 38] = (x[ 39] << 31) | (x[ 38] >> 1); + x[ 39] = (x[ 40] << 31) | (x[ 39] >> 1); + x[ 40] = (x[ 41] << 31) | (x[ 40] >> 1); + x[ 41] = (x[ 42] << 31) | (x[ 41] >> 1); + x[ 42] = (x[ 43] << 31) | (x[ 42] >> 1); + x[ 43] = (x[ 44] << 31) | (x[ 43] >> 1); + x[ 44] = (x[ 45] << 31) | (x[ 44] >> 1); + x[ 45] = (x[ 46] << 31) | (x[ 45] >> 1); + x[ 46] = (x[ 47] << 31) | (x[ 46] >> 1); + x[ 47] = (x[ 48] << 31) | (x[ 47] >> 1); + x[ 48] = (x[ 49] << 31) | (x[ 48] >> 1); + x[ 49] = (x[ 50] << 31) | (x[ 49] >> 1); + x[ 50] = (x[ 51] << 31) | (x[ 50] >> 1); + x[ 51] = (x[ 52] << 31) | (x[ 51] >> 1); + x[ 52] = (x[ 53] << 31) | (x[ 52] >> 1); + x[ 53] = (x[ 54] << 31) | (x[ 53] >> 1); + x[ 54] = (x[ 55] << 31) | (x[ 54] >> 1); + x[ 55] = (x[ 56] << 31) | (x[ 55] >> 1); + x[ 56] = (x[ 57] << 31) | (x[ 56] >> 1); + x[ 57] = (x[ 58] << 31) | (x[ 57] >> 1); + x[ 58] = (x[ 59] << 31) | (x[ 58] >> 1); + x[ 59] = (x[ 60] << 31) | (x[ 59] >> 1); + x[ 60] = (x[ 61] << 31) | (x[ 60] >> 1); + x[ 61] = (x[ 62] << 31) | (x[ 61] >> 1); + x[ 62] = (x[ 63] << 31) | (x[ 62] >> 1); + x[ 63] = (x[ 64] << 31) | (x[ 63] >> 1); + x[ 64] = (x[ 65] << 31) | (x[ 64] >> 1); + x[ 65] = (x[ 66] << 31) | (x[ 65] >> 1); + x[ 66] = (x[ 67] << 31) | (x[ 66] >> 1); + x[ 67] = (x[ 68] << 31) | (x[ 67] >> 1); + x[ 68] = (x[ 69] << 31) | (x[ 68] >> 1); + x[ 69] = (x[ 70] << 31) | (x[ 69] >> 1); + x[ 70] = (x[ 71] << 31) | (x[ 70] >> 1); + x[ 71] = (x[ 72] << 31) | (x[ 71] >> 1); + x[ 72] = (x[ 73] << 31) | (x[ 72] >> 1); + x[ 73] = (x[ 74] << 31) | (x[ 73] >> 1); + x[ 74] = (x[ 75] << 31) | (x[ 74] >> 1); + x[ 75] = (x[ 76] << 31) | (x[ 75] >> 1); + x[ 76] = (x[ 77] << 31) | (x[ 76] >> 1); + x[ 77] = (x[ 78] << 31) | (x[ 77] >> 1); + x[ 78] = (x[ 79] << 31) | (x[ 78] >> 1); + x[ 79] = (x[ 80] << 31) | (x[ 79] >> 1); + x[ 80] = (x[ 81] << 31) | (x[ 80] >> 1); + x[ 81] = (x[ 82] << 31) | (x[ 81] >> 1); + x[ 82] = (x[ 83] << 31) | (x[ 82] >> 1); + x[ 83] = (x[ 84] << 31) | (x[ 83] >> 1); + x[ 84] = (x[ 85] << 31) | (x[ 84] >> 1); + x[ 85] = (x[ 86] << 31) | (x[ 85] >> 1); + x[ 86] = (x[ 87] << 31) | (x[ 86] >> 1); + x[ 87] = (x[ 88] << 31) | (x[ 87] >> 1); + x[ 88] = (x[ 89] << 31) | (x[ 88] >> 1); + x[ 89] = (x[ 90] << 31) | (x[ 89] >> 1); + x[ 90] = (x[ 91] << 31) | (x[ 90] >> 1); + x[ 91] = (x[ 92] << 31) | (x[ 91] >> 1); + x[ 92] = (x[ 93] << 31) | (x[ 92] >> 1); + x[ 93] = (x[ 94] << 31) | (x[ 93] >> 1); + x[ 94] = (x[ 95] << 31) | (x[ 94] >> 1); + x[ 95] = (x[ 96] << 31) | (x[ 95] >> 1); + x[ 96] = (x[ 97] << 31) | (x[ 96] >> 1); + x[ 97] = (x[ 98] << 31) | (x[ 97] >> 1); + x[ 98] = (x[ 99] << 31) | (x[ 98] >> 1); + x[ 99] = (x[100] << 31) | (x[ 99] >> 1); + x[100] = (x[101] << 31) | (x[100] >> 1); + x[101] = (x[102] << 31) | (x[101] >> 1); + x[102] = (x[103] << 31) | (x[102] >> 1); + x[103] = (x[104] << 31) | (x[103] >> 1); + x[104] = (x[105] << 31) | (x[104] >> 1); + x[105] = (x[106] << 31) | (x[105] >> 1); + x[106] = (x[107] << 31) | (x[106] >> 1); + x[107] = (x[108] << 31) | (x[107] >> 1); + x[108] = (x[109] << 31) | (x[108] >> 1); + x[109] = (x[110] << 31) | (x[109] >> 1); + x[110] = (x[111] << 31) | (x[110] >> 1); + x[111] = (x[112] << 31) | (x[111] >> 1); + x[112] = (x[113] << 31) | (x[112] >> 1); + x[113] = (x[114] << 31) | (x[113] >> 1); + x[114] = (x[115] << 31) | (x[114] >> 1); + x[115] = (x[116] << 31) | (x[115] >> 1); + x[116] = (x[117] << 31) | (x[116] >> 1); + x[117] = (x[118] << 31) | (x[117] >> 1); + x[118] = (x[119] << 31) | (x[118] >> 1); + x[119] = (x[120] << 31) | (x[119] >> 1); + x[120] = (x[121] << 31) | (x[120] >> 1); + x[121] = (x[122] << 31) | (x[121] >> 1); + x[122] = (x[123] << 31) | (x[122] >> 1); + x[123] = (x[124] << 31) | (x[123] >> 1); + x[124] = (x[125] << 31) | (x[124] >> 1); + x[125] = (x[126] << 31) | (x[125] >> 1); + x[126] = (x[127] << 31) | (x[126] >> 1); + x[127] = (x[127] >> 1); + + + /* + * if (n >= r) n -= r; + */ + + // check if (n >= r): + + u32 sub_needed = 1; + + for (int i = 127; i >= 0; i--) + { + if (n[i] < r[i]) // FAIL case + { + sub_needed = 0; + + break; + } + else + if (n[i] > r[i]) // definitely a SUCCESS case (otherwise continue to check if equal) + { + break; + } + } + + if (sub_needed == 0) continue; + + // n -= r; + + // u32 res = 0; for (u32 i = 0; i < 128; i++) res |= r[i]; if (res == 0) break; + + if ((r[ 0] == 0) && (r[ 1] == 0) && (r[ 2] == 0) && (r[ 3] == 0) && + (r[ 4] == 0) && (r[ 5] == 0) && (r[ 6] == 0) && (r[ 7] == 0) && + (r[ 8] == 0) && (r[ 9] == 0) && (r[ 10] == 0) && (r[ 11] == 0) && + (r[ 12] == 0) && (r[ 13] == 0) && (r[ 14] == 0) && (r[ 15] == 0) && + (r[ 16] == 0) && (r[ 17] == 0) && (r[ 18] == 0) && (r[ 19] == 0) && + (r[ 20] == 0) && (r[ 21] == 0) && (r[ 22] == 0) && (r[ 23] == 0) && + (r[ 24] == 0) && (r[ 25] == 0) && (r[ 26] == 0) && (r[ 27] == 0) && + (r[ 28] == 0) && (r[ 29] == 0) && (r[ 30] == 0) && (r[ 31] == 0) && + (r[ 32] == 0) && (r[ 33] == 0) && (r[ 34] == 0) && (r[ 35] == 0) && + (r[ 36] == 0) && (r[ 37] == 0) && (r[ 38] == 0) && (r[ 39] == 0) && + (r[ 40] == 0) && (r[ 41] == 0) && (r[ 42] == 0) && (r[ 43] == 0) && + (r[ 44] == 0) && (r[ 45] == 0) && (r[ 46] == 0) && (r[ 47] == 0) && + (r[ 48] == 0) && (r[ 49] == 0) && (r[ 50] == 0) && (r[ 51] == 0) && + (r[ 52] == 0) && (r[ 53] == 0) && (r[ 54] == 0) && (r[ 55] == 0) && + (r[ 56] == 0) && (r[ 57] == 0) && (r[ 58] == 0) && (r[ 59] == 0) && + (r[ 60] == 0) && (r[ 61] == 0) && (r[ 62] == 0) && (r[ 63] == 0) && + (r[ 64] == 0) && (r[ 65] == 0) && (r[ 66] == 0) && (r[ 67] == 0) && + (r[ 68] == 0) && (r[ 69] == 0) && (r[ 70] == 0) && (r[ 71] == 0) && + (r[ 72] == 0) && (r[ 73] == 0) && (r[ 74] == 0) && (r[ 75] == 0) && + (r[ 76] == 0) && (r[ 77] == 0) && (r[ 78] == 0) && (r[ 79] == 0) && + (r[ 80] == 0) && (r[ 81] == 0) && (r[ 82] == 0) && (r[ 83] == 0) && + (r[ 84] == 0) && (r[ 85] == 0) && (r[ 86] == 0) && (r[ 87] == 0) && + (r[ 88] == 0) && (r[ 89] == 0) && (r[ 90] == 0) && (r[ 91] == 0) && + (r[ 92] == 0) && (r[ 93] == 0) && (r[ 94] == 0) && (r[ 95] == 0) && + (r[ 96] == 0) && (r[ 97] == 0) && (r[ 98] == 0) && (r[ 99] == 0) && + (r[100] == 0) && (r[101] == 0) && (r[102] == 0) && (r[103] == 0) && + (r[104] == 0) && (r[105] == 0) && (r[106] == 0) && (r[107] == 0) && + (r[108] == 0) && (r[109] == 0) && (r[110] == 0) && (r[111] == 0) && + (r[112] == 0) && (r[113] == 0) && (r[114] == 0) && (r[115] == 0) && + (r[116] == 0) && (r[117] == 0) && (r[118] == 0) && (r[119] == 0) && + (r[120] == 0) && (r[121] == 0) && (r[122] == 0) && (r[123] == 0) && + (r[124] == 0) && (r[125] == 0) && (r[126] == 0) && (r[127] == 0)) break; // error if r == 0 + + // for (u32 i = 0; i < 128; i++) r[i] = n[i] - r[i]; + + r[ 0] = n[ 0] - r[ 0]; + r[ 1] = n[ 1] - r[ 1]; + r[ 2] = n[ 2] - r[ 2]; + r[ 3] = n[ 3] - r[ 3]; + r[ 4] = n[ 4] - r[ 4]; + r[ 5] = n[ 5] - r[ 5]; + r[ 6] = n[ 6] - r[ 6]; + r[ 7] = n[ 7] - r[ 7]; + r[ 8] = n[ 8] - r[ 8]; + r[ 9] = n[ 9] - r[ 9]; + r[ 10] = n[ 10] - r[ 10]; + r[ 11] = n[ 11] - r[ 11]; + r[ 12] = n[ 12] - r[ 12]; + r[ 13] = n[ 13] - r[ 13]; + r[ 14] = n[ 14] - r[ 14]; + r[ 15] = n[ 15] - r[ 15]; + r[ 16] = n[ 16] - r[ 16]; + r[ 17] = n[ 17] - r[ 17]; + r[ 18] = n[ 18] - r[ 18]; + r[ 19] = n[ 19] - r[ 19]; + r[ 20] = n[ 20] - r[ 20]; + r[ 21] = n[ 21] - r[ 21]; + r[ 22] = n[ 22] - r[ 22]; + r[ 23] = n[ 23] - r[ 23]; + r[ 24] = n[ 24] - r[ 24]; + r[ 25] = n[ 25] - r[ 25]; + r[ 26] = n[ 26] - r[ 26]; + r[ 27] = n[ 27] - r[ 27]; + r[ 28] = n[ 28] - r[ 28]; + r[ 29] = n[ 29] - r[ 29]; + r[ 30] = n[ 30] - r[ 30]; + r[ 31] = n[ 31] - r[ 31]; + r[ 32] = n[ 32] - r[ 32]; + r[ 33] = n[ 33] - r[ 33]; + r[ 34] = n[ 34] - r[ 34]; + r[ 35] = n[ 35] - r[ 35]; + r[ 36] = n[ 36] - r[ 36]; + r[ 37] = n[ 37] - r[ 37]; + r[ 38] = n[ 38] - r[ 38]; + r[ 39] = n[ 39] - r[ 39]; + r[ 40] = n[ 40] - r[ 40]; + r[ 41] = n[ 41] - r[ 41]; + r[ 42] = n[ 42] - r[ 42]; + r[ 43] = n[ 43] - r[ 43]; + r[ 44] = n[ 44] - r[ 44]; + r[ 45] = n[ 45] - r[ 45]; + r[ 46] = n[ 46] - r[ 46]; + r[ 47] = n[ 47] - r[ 47]; + r[ 48] = n[ 48] - r[ 48]; + r[ 49] = n[ 49] - r[ 49]; + r[ 50] = n[ 50] - r[ 50]; + r[ 51] = n[ 51] - r[ 51]; + r[ 52] = n[ 52] - r[ 52]; + r[ 53] = n[ 53] - r[ 53]; + r[ 54] = n[ 54] - r[ 54]; + r[ 55] = n[ 55] - r[ 55]; + r[ 56] = n[ 56] - r[ 56]; + r[ 57] = n[ 57] - r[ 57]; + r[ 58] = n[ 58] - r[ 58]; + r[ 59] = n[ 59] - r[ 59]; + r[ 60] = n[ 60] - r[ 60]; + r[ 61] = n[ 61] - r[ 61]; + r[ 62] = n[ 62] - r[ 62]; + r[ 63] = n[ 63] - r[ 63]; + r[ 64] = n[ 64] - r[ 64]; + r[ 65] = n[ 65] - r[ 65]; + r[ 66] = n[ 66] - r[ 66]; + r[ 67] = n[ 67] - r[ 67]; + r[ 68] = n[ 68] - r[ 68]; + r[ 69] = n[ 69] - r[ 69]; + r[ 70] = n[ 70] - r[ 70]; + r[ 71] = n[ 71] - r[ 71]; + r[ 72] = n[ 72] - r[ 72]; + r[ 73] = n[ 73] - r[ 73]; + r[ 74] = n[ 74] - r[ 74]; + r[ 75] = n[ 75] - r[ 75]; + r[ 76] = n[ 76] - r[ 76]; + r[ 77] = n[ 77] - r[ 77]; + r[ 78] = n[ 78] - r[ 78]; + r[ 79] = n[ 79] - r[ 79]; + r[ 80] = n[ 80] - r[ 80]; + r[ 81] = n[ 81] - r[ 81]; + r[ 82] = n[ 82] - r[ 82]; + r[ 83] = n[ 83] - r[ 83]; + r[ 84] = n[ 84] - r[ 84]; + r[ 85] = n[ 85] - r[ 85]; + r[ 86] = n[ 86] - r[ 86]; + r[ 87] = n[ 87] - r[ 87]; + r[ 88] = n[ 88] - r[ 88]; + r[ 89] = n[ 89] - r[ 89]; + r[ 90] = n[ 90] - r[ 90]; + r[ 91] = n[ 91] - r[ 91]; + r[ 92] = n[ 92] - r[ 92]; + r[ 93] = n[ 93] - r[ 93]; + r[ 94] = n[ 94] - r[ 94]; + r[ 95] = n[ 95] - r[ 95]; + r[ 96] = n[ 96] - r[ 96]; + r[ 97] = n[ 97] - r[ 97]; + r[ 98] = n[ 98] - r[ 98]; + r[ 99] = n[ 99] - r[ 99]; + r[100] = n[100] - r[100]; + r[101] = n[101] - r[101]; + r[102] = n[102] - r[102]; + r[103] = n[103] - r[103]; + r[104] = n[104] - r[104]; + r[105] = n[105] - r[105]; + r[106] = n[106] - r[106]; + r[107] = n[107] - r[107]; + r[108] = n[108] - r[108]; + r[109] = n[109] - r[109]; + r[110] = n[110] - r[110]; + r[111] = n[111] - r[111]; + r[112] = n[112] - r[112]; + r[113] = n[113] - r[113]; + r[114] = n[114] - r[114]; + r[115] = n[115] - r[115]; + r[116] = n[116] - r[116]; + r[117] = n[117] - r[117]; + r[118] = n[118] - r[118]; + r[119] = n[119] - r[119]; + r[120] = n[120] - r[120]; + r[121] = n[121] - r[121]; + r[122] = n[122] - r[122]; + r[123] = n[123] - r[123]; + r[124] = n[124] - r[124]; + r[125] = n[125] - r[125]; + r[126] = n[126] - r[126]; + r[127] = n[127] - r[127]; + + // take care of the "borrow": + + // for (u32 i = 0; i < 127; i++) if (r[i] > n[i]) r[i + 1]--; + + // if (r[ 0] > n[ 0]) r[ 1]--; + // if (r[ 1] > n[ 1]) r[ 2]--; + // if (r[ 2] > n[ 2]) r[ 3]--; + // if (r[ 3] > n[ 3]) r[ 4]--; + // if (r[ 4] > n[ 4]) r[ 5]--; + // if (r[ 5] > n[ 5]) r[ 6]--; + // if (r[ 6] > n[ 6]) r[ 7]--; + // if (r[ 7] > n[ 7]) r[ 8]--; + // if (r[ 8] > n[ 8]) r[ 9]--; + // if (r[ 9] > n[ 9]) r[ 10]--; + // if (r[ 10] > n[ 10]) r[ 11]--; + // if (r[ 11] > n[ 11]) r[ 12]--; + // if (r[ 12] > n[ 12]) r[ 13]--; + // if (r[ 13] > n[ 13]) r[ 14]--; + // if (r[ 14] > n[ 14]) r[ 15]--; + // if (r[ 15] > n[ 15]) r[ 16]--; + // if (r[ 16] > n[ 16]) r[ 17]--; + // if (r[ 17] > n[ 17]) r[ 18]--; + // if (r[ 18] > n[ 18]) r[ 19]--; + // if (r[ 19] > n[ 19]) r[ 20]--; + // if (r[ 20] > n[ 20]) r[ 21]--; + // if (r[ 21] > n[ 21]) r[ 22]--; + // if (r[ 22] > n[ 22]) r[ 23]--; + // if (r[ 23] > n[ 23]) r[ 24]--; + // if (r[ 24] > n[ 24]) r[ 25]--; + // if (r[ 25] > n[ 25]) r[ 26]--; + // if (r[ 26] > n[ 26]) r[ 27]--; + // if (r[ 27] > n[ 27]) r[ 28]--; + // if (r[ 28] > n[ 28]) r[ 29]--; + // if (r[ 29] > n[ 29]) r[ 30]--; + // if (r[ 30] > n[ 30]) r[ 31]--; + // if (r[ 31] > n[ 31]) r[ 32]--; + // if (r[ 32] > n[ 32]) r[ 33]--; + // if (r[ 33] > n[ 33]) r[ 34]--; + // if (r[ 34] > n[ 34]) r[ 35]--; + // if (r[ 35] > n[ 35]) r[ 36]--; + // if (r[ 36] > n[ 36]) r[ 37]--; + // if (r[ 37] > n[ 37]) r[ 38]--; + // if (r[ 38] > n[ 38]) r[ 39]--; + // if (r[ 39] > n[ 39]) r[ 40]--; + // if (r[ 40] > n[ 40]) r[ 41]--; + // if (r[ 41] > n[ 41]) r[ 42]--; + // if (r[ 42] > n[ 42]) r[ 43]--; + // if (r[ 43] > n[ 43]) r[ 44]--; + // if (r[ 44] > n[ 44]) r[ 45]--; + // if (r[ 45] > n[ 45]) r[ 46]--; + // if (r[ 46] > n[ 46]) r[ 47]--; + // if (r[ 47] > n[ 47]) r[ 48]--; + // if (r[ 48] > n[ 48]) r[ 49]--; + // if (r[ 49] > n[ 49]) r[ 50]--; + // if (r[ 50] > n[ 50]) r[ 51]--; + // if (r[ 51] > n[ 51]) r[ 52]--; + // if (r[ 52] > n[ 52]) r[ 53]--; + // if (r[ 53] > n[ 53]) r[ 54]--; + // if (r[ 54] > n[ 54]) r[ 55]--; + // if (r[ 55] > n[ 55]) r[ 56]--; + // if (r[ 56] > n[ 56]) r[ 57]--; + // if (r[ 57] > n[ 57]) r[ 58]--; + // if (r[ 58] > n[ 58]) r[ 59]--; + // if (r[ 59] > n[ 59]) r[ 60]--; + // if (r[ 60] > n[ 60]) r[ 61]--; + // if (r[ 61] > n[ 61]) r[ 62]--; + // if (r[ 62] > n[ 62]) r[ 63]--; + // if (r[ 63] > n[ 63]) r[ 64]--; + // if (r[ 64] > n[ 64]) r[ 65]--; + // if (r[ 65] > n[ 65]) r[ 66]--; + // if (r[ 66] > n[ 66]) r[ 67]--; + // if (r[ 67] > n[ 67]) r[ 68]--; + // if (r[ 68] > n[ 68]) r[ 69]--; + // if (r[ 69] > n[ 69]) r[ 70]--; + // if (r[ 70] > n[ 70]) r[ 71]--; + // if (r[ 71] > n[ 71]) r[ 72]--; + // if (r[ 72] > n[ 72]) r[ 73]--; + // if (r[ 73] > n[ 73]) r[ 74]--; + // if (r[ 74] > n[ 74]) r[ 75]--; + // if (r[ 75] > n[ 75]) r[ 76]--; + // if (r[ 76] > n[ 76]) r[ 77]--; + // if (r[ 77] > n[ 77]) r[ 78]--; + // if (r[ 78] > n[ 78]) r[ 79]--; + // if (r[ 79] > n[ 79]) r[ 80]--; + // if (r[ 80] > n[ 80]) r[ 81]--; + // if (r[ 81] > n[ 81]) r[ 82]--; + // if (r[ 82] > n[ 82]) r[ 83]--; + // if (r[ 83] > n[ 83]) r[ 84]--; + // if (r[ 84] > n[ 84]) r[ 85]--; + // if (r[ 85] > n[ 85]) r[ 86]--; + // if (r[ 86] > n[ 86]) r[ 87]--; + // if (r[ 87] > n[ 87]) r[ 88]--; + // if (r[ 88] > n[ 88]) r[ 89]--; + // if (r[ 89] > n[ 89]) r[ 90]--; + // if (r[ 90] > n[ 90]) r[ 91]--; + // if (r[ 91] > n[ 91]) r[ 92]--; + // if (r[ 92] > n[ 92]) r[ 93]--; + // if (r[ 93] > n[ 93]) r[ 94]--; + // if (r[ 94] > n[ 94]) r[ 95]--; + // if (r[ 95] > n[ 95]) r[ 96]--; + // if (r[ 96] > n[ 96]) r[ 97]--; + // if (r[ 97] > n[ 97]) r[ 98]--; + // if (r[ 98] > n[ 98]) r[ 99]--; + // if (r[ 99] > n[ 99]) r[100]--; + // if (r[100] > n[100]) r[101]--; + // if (r[101] > n[101]) r[102]--; + // if (r[102] > n[102]) r[103]--; + // if (r[103] > n[103]) r[104]--; + // if (r[104] > n[104]) r[105]--; + // if (r[105] > n[105]) r[106]--; + // if (r[106] > n[106]) r[107]--; + // if (r[107] > n[107]) r[108]--; + // if (r[108] > n[108]) r[109]--; + // if (r[109] > n[109]) r[110]--; + // if (r[110] > n[110]) r[111]--; + // if (r[111] > n[111]) r[112]--; + // if (r[112] > n[112]) r[113]--; + // if (r[113] > n[113]) r[114]--; + // if (r[114] > n[114]) r[115]--; + // if (r[115] > n[115]) r[116]--; + // if (r[116] > n[116]) r[117]--; + // if (r[117] > n[117]) r[118]--; + // if (r[118] > n[118]) r[119]--; + // if (r[119] > n[119]) r[120]--; + // if (r[120] > n[120]) r[121]--; + // if (r[121] > n[121]) r[122]--; + // if (r[122] > n[122]) r[123]--; + // if (r[123] > n[123]) r[124]--; + // if (r[124] > n[124]) r[125]--; + // if (r[125] > n[125]) r[126]--; + // if (r[126] > n[126]) r[127]--; + + + // for (u32 i = 1; i < 128; i++) r[i] -= r[i - 1] > n[i - 1]; + + r[ 1] -= r[ 0] > n[ 0]; + r[ 2] -= r[ 1] > n[ 1]; + r[ 3] -= r[ 2] > n[ 2]; + r[ 4] -= r[ 3] > n[ 3]; + r[ 5] -= r[ 4] > n[ 4]; + r[ 6] -= r[ 5] > n[ 5]; + r[ 7] -= r[ 6] > n[ 6]; + r[ 8] -= r[ 7] > n[ 7]; + r[ 9] -= r[ 8] > n[ 8]; + r[ 10] -= r[ 9] > n[ 9]; + r[ 11] -= r[ 10] > n[ 10]; + r[ 12] -= r[ 11] > n[ 11]; + r[ 13] -= r[ 12] > n[ 12]; + r[ 14] -= r[ 13] > n[ 13]; + r[ 15] -= r[ 14] > n[ 14]; + r[ 16] -= r[ 15] > n[ 15]; + r[ 17] -= r[ 16] > n[ 16]; + r[ 18] -= r[ 17] > n[ 17]; + r[ 19] -= r[ 18] > n[ 18]; + r[ 20] -= r[ 19] > n[ 19]; + r[ 21] -= r[ 20] > n[ 20]; + r[ 22] -= r[ 21] > n[ 21]; + r[ 23] -= r[ 22] > n[ 22]; + r[ 24] -= r[ 23] > n[ 23]; + r[ 25] -= r[ 24] > n[ 24]; + r[ 26] -= r[ 25] > n[ 25]; + r[ 27] -= r[ 26] > n[ 26]; + r[ 28] -= r[ 27] > n[ 27]; + r[ 29] -= r[ 28] > n[ 28]; + r[ 30] -= r[ 29] > n[ 29]; + r[ 31] -= r[ 30] > n[ 30]; + r[ 32] -= r[ 31] > n[ 31]; + r[ 33] -= r[ 32] > n[ 32]; + r[ 34] -= r[ 33] > n[ 33]; + r[ 35] -= r[ 34] > n[ 34]; + r[ 36] -= r[ 35] > n[ 35]; + r[ 37] -= r[ 36] > n[ 36]; + r[ 38] -= r[ 37] > n[ 37]; + r[ 39] -= r[ 38] > n[ 38]; + r[ 40] -= r[ 39] > n[ 39]; + r[ 41] -= r[ 40] > n[ 40]; + r[ 42] -= r[ 41] > n[ 41]; + r[ 43] -= r[ 42] > n[ 42]; + r[ 44] -= r[ 43] > n[ 43]; + r[ 45] -= r[ 44] > n[ 44]; + r[ 46] -= r[ 45] > n[ 45]; + r[ 47] -= r[ 46] > n[ 46]; + r[ 48] -= r[ 47] > n[ 47]; + r[ 49] -= r[ 48] > n[ 48]; + r[ 50] -= r[ 49] > n[ 49]; + r[ 51] -= r[ 50] > n[ 50]; + r[ 52] -= r[ 51] > n[ 51]; + r[ 53] -= r[ 52] > n[ 52]; + r[ 54] -= r[ 53] > n[ 53]; + r[ 55] -= r[ 54] > n[ 54]; + r[ 56] -= r[ 55] > n[ 55]; + r[ 57] -= r[ 56] > n[ 56]; + r[ 58] -= r[ 57] > n[ 57]; + r[ 59] -= r[ 58] > n[ 58]; + r[ 60] -= r[ 59] > n[ 59]; + r[ 61] -= r[ 60] > n[ 60]; + r[ 62] -= r[ 61] > n[ 61]; + r[ 63] -= r[ 62] > n[ 62]; + r[ 64] -= r[ 63] > n[ 63]; + r[ 65] -= r[ 64] > n[ 64]; + r[ 66] -= r[ 65] > n[ 65]; + r[ 67] -= r[ 66] > n[ 66]; + r[ 68] -= r[ 67] > n[ 67]; + r[ 69] -= r[ 68] > n[ 68]; + r[ 70] -= r[ 69] > n[ 69]; + r[ 71] -= r[ 70] > n[ 70]; + r[ 72] -= r[ 71] > n[ 71]; + r[ 73] -= r[ 72] > n[ 72]; + r[ 74] -= r[ 73] > n[ 73]; + r[ 75] -= r[ 74] > n[ 74]; + r[ 76] -= r[ 75] > n[ 75]; + r[ 77] -= r[ 76] > n[ 76]; + r[ 78] -= r[ 77] > n[ 77]; + r[ 79] -= r[ 78] > n[ 78]; + r[ 80] -= r[ 79] > n[ 79]; + r[ 81] -= r[ 80] > n[ 80]; + r[ 82] -= r[ 81] > n[ 81]; + r[ 83] -= r[ 82] > n[ 82]; + r[ 84] -= r[ 83] > n[ 83]; + r[ 85] -= r[ 84] > n[ 84]; + r[ 86] -= r[ 85] > n[ 85]; + r[ 87] -= r[ 86] > n[ 86]; + r[ 88] -= r[ 87] > n[ 87]; + r[ 89] -= r[ 88] > n[ 88]; + r[ 90] -= r[ 89] > n[ 89]; + r[ 91] -= r[ 90] > n[ 90]; + r[ 92] -= r[ 91] > n[ 91]; + r[ 93] -= r[ 92] > n[ 92]; + r[ 94] -= r[ 93] > n[ 93]; + r[ 95] -= r[ 94] > n[ 94]; + r[ 96] -= r[ 95] > n[ 95]; + r[ 97] -= r[ 96] > n[ 96]; + r[ 98] -= r[ 97] > n[ 97]; + r[ 99] -= r[ 98] > n[ 98]; + r[100] -= r[ 99] > n[ 99]; + r[101] -= r[100] > n[100]; + r[102] -= r[101] > n[101]; + r[103] -= r[102] > n[102]; + r[104] -= r[103] > n[103]; + r[105] -= r[104] > n[104]; + r[106] -= r[105] > n[105]; + r[107] -= r[106] > n[106]; + r[108] -= r[107] > n[107]; + r[109] -= r[108] > n[108]; + r[110] -= r[109] > n[109]; + r[111] -= r[110] > n[110]; + r[112] -= r[111] > n[111]; + r[113] -= r[112] > n[112]; + r[114] -= r[113] > n[113]; + r[115] -= r[114] > n[114]; + r[116] -= r[115] > n[115]; + r[117] -= r[116] > n[116]; + r[118] -= r[117] > n[117]; + r[119] -= r[118] > n[118]; + r[120] -= r[119] > n[119]; + r[121] -= r[120] > n[120]; + r[122] -= r[121] > n[121]; + r[123] -= r[122] > n[122]; + r[124] -= r[123] > n[123]; + r[125] -= r[124] > n[124]; + r[126] -= r[125] > n[125]; + r[127] -= r[126] > n[126]; + + // for (u32 i = 0; i < 128; i++) n[i] = r[i]; + + n[ 0] = r[ 0]; n[ 1] = r[ 1]; n[ 2] = r[ 2]; n[ 3] = r[ 3]; + n[ 4] = r[ 4]; n[ 5] = r[ 5]; n[ 6] = r[ 6]; n[ 7] = r[ 7]; + n[ 8] = r[ 8]; n[ 9] = r[ 9]; n[ 10] = r[ 10]; n[ 11] = r[ 11]; + n[ 12] = r[ 12]; n[ 13] = r[ 13]; n[ 14] = r[ 14]; n[ 15] = r[ 15]; + n[ 16] = r[ 16]; n[ 17] = r[ 17]; n[ 18] = r[ 18]; n[ 19] = r[ 19]; + n[ 20] = r[ 20]; n[ 21] = r[ 21]; n[ 22] = r[ 22]; n[ 23] = r[ 23]; + n[ 24] = r[ 24]; n[ 25] = r[ 25]; n[ 26] = r[ 26]; n[ 27] = r[ 27]; + n[ 28] = r[ 28]; n[ 29] = r[ 29]; n[ 30] = r[ 30]; n[ 31] = r[ 31]; + n[ 32] = r[ 32]; n[ 33] = r[ 33]; n[ 34] = r[ 34]; n[ 35] = r[ 35]; + n[ 36] = r[ 36]; n[ 37] = r[ 37]; n[ 38] = r[ 38]; n[ 39] = r[ 39]; + n[ 40] = r[ 40]; n[ 41] = r[ 41]; n[ 42] = r[ 42]; n[ 43] = r[ 43]; + n[ 44] = r[ 44]; n[ 45] = r[ 45]; n[ 46] = r[ 46]; n[ 47] = r[ 47]; + n[ 48] = r[ 48]; n[ 49] = r[ 49]; n[ 50] = r[ 50]; n[ 51] = r[ 51]; + n[ 52] = r[ 52]; n[ 53] = r[ 53]; n[ 54] = r[ 54]; n[ 55] = r[ 55]; + n[ 56] = r[ 56]; n[ 57] = r[ 57]; n[ 58] = r[ 58]; n[ 59] = r[ 59]; + n[ 60] = r[ 60]; n[ 61] = r[ 61]; n[ 62] = r[ 62]; n[ 63] = r[ 63]; + n[ 64] = r[ 64]; n[ 65] = r[ 65]; n[ 66] = r[ 66]; n[ 67] = r[ 67]; + n[ 68] = r[ 68]; n[ 69] = r[ 69]; n[ 70] = r[ 70]; n[ 71] = r[ 71]; + n[ 72] = r[ 72]; n[ 73] = r[ 73]; n[ 74] = r[ 74]; n[ 75] = r[ 75]; + n[ 76] = r[ 76]; n[ 77] = r[ 77]; n[ 78] = r[ 78]; n[ 79] = r[ 79]; + n[ 80] = r[ 80]; n[ 81] = r[ 81]; n[ 82] = r[ 82]; n[ 83] = r[ 83]; + n[ 84] = r[ 84]; n[ 85] = r[ 85]; n[ 86] = r[ 86]; n[ 87] = r[ 87]; + n[ 88] = r[ 88]; n[ 89] = r[ 89]; n[ 90] = r[ 90]; n[ 91] = r[ 91]; + n[ 92] = r[ 92]; n[ 93] = r[ 93]; n[ 94] = r[ 94]; n[ 95] = r[ 95]; + n[ 96] = r[ 96]; n[ 97] = r[ 97]; n[ 98] = r[ 98]; n[ 99] = r[ 99]; + n[100] = r[100]; n[101] = r[101]; n[102] = r[102]; n[103] = r[103]; + n[104] = r[104]; n[105] = r[105]; n[106] = r[106]; n[107] = r[107]; + n[108] = r[108]; n[109] = r[109]; n[110] = r[110]; n[111] = r[111]; + n[112] = r[112]; n[113] = r[113]; n[114] = r[114]; n[115] = r[115]; + n[116] = r[116]; n[117] = r[117]; n[118] = r[118]; n[119] = r[119]; + n[120] = r[120]; n[121] = r[121]; n[122] = r[122]; n[123] = r[123]; + n[124] = r[124]; n[125] = r[125]; n[126] = r[126]; n[127] = r[127]; + } +} + +// r[128], result, r = x * y +// x[ 64], multiplier (1st factor) +// y[ 64], multiplicand (2nd factor) + +DECLSPEC void mul (PRIVATE_AS u32 *r, PRIVATE_AS const u32 *x, PRIVATE_AS const u32 *y) +{ + for (u32 i = 0; i < 64; i++) + { + u64 c = 0; // carry + + for (u32 j = 0; j < 64; j++) + { + const u32 idx = i + j; + + const u64 p = (u64) x[i] * y[j]; // main product + + const u32 u = p >> 32; + const u32 l = p >> 0; + + const u64 s = c + r[idx] + l; // sum + + c = (s >> 32) + u; + r[idx] = (s >> 0); + } + + r[i + 64] = (u32) c; + } +} + +// r[64], result, r = lower_bits (x * y), r = (x * y) & mask +// x[64], multiplier (1st factor) +// y[64], multiplicand (2nd factor) +// only compute lower bits (because we exploit our Montgomery "mask") + +DECLSPEC void mul_masked (PRIVATE_AS u32 *r, PRIVATE_AS const u32 *x, PRIVATE_AS const u32 *y) +{ + for (u32 i = 0; i < 64; i++) + { + u64 c = 0; // carry + + // for (u32 j = 0; j < 64; j++) + // { + // const u32 idx = i + j; + // if (idx >= 63) break; + // ... + // } + // or: + + for (u32 j = 0; j < (64 - i); j++) // our implied "mask" (only lower 256 bytes for idx) + { + const u32 idx = i + j; + + const u64 p = (u64) x[i] * y[j]; // product + + const u32 u = p >> 32; + const u32 l = p >> 0; + + const u64 s = c + r[idx] + l; // sum + + c = (s >> 32) + u; + r[idx] = (s >> 0); + } + } +} + +// x [ 64], multiplier, input and at the same time output, (x = (x * y) % m) +// y [ 64], multiplicand +// m [128], modulo +// fact[ 64], our m' (actually it is fact[65]) + +DECLSPEC void mul_mod (PRIVATE_AS u32 *x, PRIVATE_AS const u32 *y, PRIVATE_AS const u32 *m, PRIVATE_AS const u32 *fact) +{ + // 1st multiplication + // p = x * y + + u32 p[128] = { 0 }; + + mul (p, x, y); + + + + // 2nd multiplication + // t = ((p & mask) * fact) & mask, (we optimize this in mul_masked ()) + + u32 t[64] = { 0 }; // or use t[128] here, but not needed + + mul_masked (t, p, fact); + + + + // 3rd multiplication + // z = t * m + + u32 z[128] = { 0 }; + + mul (z, t, m); + + + // result: + + // t = (p + z) >> bits + + // u32 t[128] = { 0 }; // optimization, smaller t[] array: + // u32 t[ 64] = { 0 }; + + // for (u32 i = 0; i < 128; i++) t[i] = p[i] + z[i]; + + // t[ 0] = p[ 0] + z[ 0]; t[ 1] = p[ 1] + z[ 1]; + // t[ 2] = p[ 2] + z[ 2]; t[ 3] = p[ 3] + z[ 3]; + // t[ 4] = p[ 4] + z[ 4]; t[ 5] = p[ 5] + z[ 5]; + // t[ 6] = p[ 6] + z[ 6]; t[ 7] = p[ 7] + z[ 7]; + // t[ 8] = p[ 8] + z[ 8]; t[ 9] = p[ 9] + z[ 9]; + // t[ 10] = p[ 10] + z[ 10]; t[ 11] = p[ 11] + z[ 11]; + // t[ 12] = p[ 12] + z[ 12]; t[ 13] = p[ 13] + z[ 13]; + // t[ 14] = p[ 14] + z[ 14]; t[ 15] = p[ 15] + z[ 15]; + // t[ 16] = p[ 16] + z[ 16]; t[ 17] = p[ 17] + z[ 17]; + // t[ 18] = p[ 18] + z[ 18]; t[ 19] = p[ 19] + z[ 19]; + // t[ 20] = p[ 20] + z[ 20]; t[ 21] = p[ 21] + z[ 21]; + // t[ 22] = p[ 22] + z[ 22]; t[ 23] = p[ 23] + z[ 23]; + // t[ 24] = p[ 24] + z[ 24]; t[ 25] = p[ 25] + z[ 25]; + // t[ 26] = p[ 26] + z[ 26]; t[ 27] = p[ 27] + z[ 27]; + // t[ 28] = p[ 28] + z[ 28]; t[ 29] = p[ 29] + z[ 29]; + // t[ 30] = p[ 30] + z[ 30]; t[ 31] = p[ 31] + z[ 31]; + // t[ 32] = p[ 32] + z[ 32]; t[ 33] = p[ 33] + z[ 33]; + // t[ 34] = p[ 34] + z[ 34]; t[ 35] = p[ 35] + z[ 35]; + // t[ 36] = p[ 36] + z[ 36]; t[ 37] = p[ 37] + z[ 37]; + // t[ 38] = p[ 38] + z[ 38]; t[ 39] = p[ 39] + z[ 39]; + // t[ 40] = p[ 40] + z[ 40]; t[ 41] = p[ 41] + z[ 41]; + // t[ 42] = p[ 42] + z[ 42]; t[ 43] = p[ 43] + z[ 43]; + // t[ 44] = p[ 44] + z[ 44]; t[ 45] = p[ 45] + z[ 45]; + // t[ 46] = p[ 46] + z[ 46]; t[ 47] = p[ 47] + z[ 47]; + // t[ 48] = p[ 48] + z[ 48]; t[ 49] = p[ 49] + z[ 49]; + // t[ 50] = p[ 50] + z[ 50]; t[ 51] = p[ 51] + z[ 51]; + // t[ 52] = p[ 52] + z[ 52]; t[ 53] = p[ 53] + z[ 53]; + // t[ 54] = p[ 54] + z[ 54]; t[ 55] = p[ 55] + z[ 55]; + // t[ 56] = p[ 56] + z[ 56]; t[ 57] = p[ 57] + z[ 57]; + // t[ 58] = p[ 58] + z[ 58]; t[ 59] = p[ 59] + z[ 59]; + // t[ 60] = p[ 60] + z[ 60]; t[ 61] = p[ 61] + z[ 61]; + // t[ 62] = p[ 62] + z[ 62]; t[ 63] = p[ 63] + z[ 63]; + // t[ 64] = p[ 64] + z[ 64]; t[ 65] = p[ 65] + z[ 65]; + // t[ 66] = p[ 66] + z[ 66]; t[ 67] = p[ 67] + z[ 67]; + // t[ 68] = p[ 68] + z[ 68]; t[ 69] = p[ 69] + z[ 69]; + // t[ 70] = p[ 70] + z[ 70]; t[ 71] = p[ 71] + z[ 71]; + // t[ 72] = p[ 72] + z[ 72]; t[ 73] = p[ 73] + z[ 73]; + // t[ 74] = p[ 74] + z[ 74]; t[ 75] = p[ 75] + z[ 75]; + // t[ 76] = p[ 76] + z[ 76]; t[ 77] = p[ 77] + z[ 77]; + // t[ 78] = p[ 78] + z[ 78]; t[ 79] = p[ 79] + z[ 79]; + // t[ 80] = p[ 80] + z[ 80]; t[ 81] = p[ 81] + z[ 81]; + // t[ 82] = p[ 82] + z[ 82]; t[ 83] = p[ 83] + z[ 83]; + // t[ 84] = p[ 84] + z[ 84]; t[ 85] = p[ 85] + z[ 85]; + // t[ 86] = p[ 86] + z[ 86]; t[ 87] = p[ 87] + z[ 87]; + // t[ 88] = p[ 88] + z[ 88]; t[ 89] = p[ 89] + z[ 89]; + // t[ 90] = p[ 90] + z[ 90]; t[ 91] = p[ 91] + z[ 91]; + // t[ 92] = p[ 92] + z[ 92]; t[ 93] = p[ 93] + z[ 93]; + // t[ 94] = p[ 94] + z[ 94]; t[ 95] = p[ 95] + z[ 95]; + // t[ 96] = p[ 96] + z[ 96]; t[ 97] = p[ 97] + z[ 97]; + // t[ 98] = p[ 98] + z[ 98]; t[ 99] = p[ 99] + z[ 99]; + // t[100] = p[100] + z[100]; t[101] = p[101] + z[101]; + // t[102] = p[102] + z[102]; t[103] = p[103] + z[103]; + // t[104] = p[104] + z[104]; t[105] = p[105] + z[105]; + // t[106] = p[106] + z[106]; t[107] = p[107] + z[107]; + // t[108] = p[108] + z[108]; t[109] = p[109] + z[109]; + // t[110] = p[110] + z[110]; t[111] = p[111] + z[111]; + // t[112] = p[112] + z[112]; t[113] = p[113] + z[113]; + // t[114] = p[114] + z[114]; t[115] = p[115] + z[115]; + // t[116] = p[116] + z[116]; t[117] = p[117] + z[117]; + // t[118] = p[118] + z[118]; t[119] = p[119] + z[119]; + // t[120] = p[120] + z[120]; t[121] = p[121] + z[121]; + // t[122] = p[122] + z[122]; t[123] = p[123] + z[123]; + // t[124] = p[124] + z[124]; t[125] = p[125] + z[125]; + // t[126] = p[126] + z[126]; t[127] = p[127] + z[127]; + + // optimization, we shift it down already here: + + // for (u32 i = 0; i < 64; i++) t[i] = p[i + 64] + z[i + 64]; + + t[ 0] = p[ 64] + z[ 64]; t[ 1] = p[ 65] + z[ 65]; + t[ 2] = p[ 66] + z[ 66]; t[ 3] = p[ 67] + z[ 67]; + t[ 4] = p[ 68] + z[ 68]; t[ 5] = p[ 69] + z[ 69]; + t[ 6] = p[ 70] + z[ 70]; t[ 7] = p[ 71] + z[ 71]; + t[ 8] = p[ 72] + z[ 72]; t[ 9] = p[ 73] + z[ 73]; + t[10] = p[ 74] + z[ 74]; t[11] = p[ 75] + z[ 75]; + t[12] = p[ 76] + z[ 76]; t[13] = p[ 77] + z[ 77]; + t[14] = p[ 78] + z[ 78]; t[15] = p[ 79] + z[ 79]; + t[16] = p[ 80] + z[ 80]; t[17] = p[ 81] + z[ 81]; + t[18] = p[ 82] + z[ 82]; t[19] = p[ 83] + z[ 83]; + t[20] = p[ 84] + z[ 84]; t[21] = p[ 85] + z[ 85]; + t[22] = p[ 86] + z[ 86]; t[23] = p[ 87] + z[ 87]; + t[24] = p[ 88] + z[ 88]; t[25] = p[ 89] + z[ 89]; + t[26] = p[ 90] + z[ 90]; t[27] = p[ 91] + z[ 91]; + t[28] = p[ 92] + z[ 92]; t[29] = p[ 93] + z[ 93]; + t[30] = p[ 94] + z[ 94]; t[31] = p[ 95] + z[ 95]; + t[32] = p[ 96] + z[ 96]; t[33] = p[ 97] + z[ 97]; + t[34] = p[ 98] + z[ 98]; t[35] = p[ 99] + z[ 99]; + t[36] = p[100] + z[100]; t[37] = p[101] + z[101]; + t[38] = p[102] + z[102]; t[39] = p[103] + z[103]; + t[40] = p[104] + z[104]; t[41] = p[105] + z[105]; + t[42] = p[106] + z[106]; t[43] = p[107] + z[107]; + t[44] = p[108] + z[108]; t[45] = p[109] + z[109]; + t[46] = p[110] + z[110]; t[47] = p[111] + z[111]; + t[48] = p[112] + z[112]; t[49] = p[113] + z[113]; + t[50] = p[114] + z[114]; t[51] = p[115] + z[115]; + t[52] = p[116] + z[116]; t[53] = p[117] + z[117]; + t[54] = p[118] + z[118]; t[55] = p[119] + z[119]; + t[56] = p[120] + z[120]; t[57] = p[121] + z[121]; + t[58] = p[122] + z[122]; t[59] = p[123] + z[123]; + t[60] = p[124] + z[124]; t[61] = p[125] + z[125]; + t[62] = p[126] + z[126]; t[63] = p[127] + z[127]; + + // take care of the carry for the addition above (i.e check for "overflows") + + // for (u32 i = 1; i < 128; i++) t[i] += t[i - 1] < p[i - 1]; + + // t[ 1] += t[ 0] < p[ 0]; + // t[ 2] += t[ 1] < p[ 1]; + // t[ 3] += t[ 2] < p[ 2]; + // t[ 4] += t[ 3] < p[ 3]; + // t[ 5] += t[ 4] < p[ 4]; + // t[ 6] += t[ 5] < p[ 5]; + // t[ 7] += t[ 6] < p[ 6]; + // t[ 8] += t[ 7] < p[ 7]; + // t[ 9] += t[ 8] < p[ 8]; + // t[ 10] += t[ 9] < p[ 9]; + // t[ 11] += t[ 10] < p[ 10]; + // t[ 12] += t[ 11] < p[ 11]; + // t[ 13] += t[ 12] < p[ 12]; + // t[ 14] += t[ 13] < p[ 13]; + // t[ 15] += t[ 14] < p[ 14]; + // t[ 16] += t[ 15] < p[ 15]; + // t[ 17] += t[ 16] < p[ 16]; + // t[ 18] += t[ 17] < p[ 17]; + // t[ 19] += t[ 18] < p[ 18]; + // t[ 20] += t[ 19] < p[ 19]; + // t[ 21] += t[ 20] < p[ 20]; + // t[ 22] += t[ 21] < p[ 21]; + // t[ 23] += t[ 22] < p[ 22]; + // t[ 24] += t[ 23] < p[ 23]; + // t[ 25] += t[ 24] < p[ 24]; + // t[ 26] += t[ 25] < p[ 25]; + // t[ 27] += t[ 26] < p[ 26]; + // t[ 28] += t[ 27] < p[ 27]; + // t[ 29] += t[ 28] < p[ 28]; + // t[ 30] += t[ 29] < p[ 29]; + // t[ 31] += t[ 30] < p[ 30]; + // t[ 32] += t[ 31] < p[ 31]; + // t[ 33] += t[ 32] < p[ 32]; + // t[ 34] += t[ 33] < p[ 33]; + // t[ 35] += t[ 34] < p[ 34]; + // t[ 36] += t[ 35] < p[ 35]; + // t[ 37] += t[ 36] < p[ 36]; + // t[ 38] += t[ 37] < p[ 37]; + // t[ 39] += t[ 38] < p[ 38]; + // t[ 40] += t[ 39] < p[ 39]; + // t[ 41] += t[ 40] < p[ 40]; + // t[ 42] += t[ 41] < p[ 41]; + // t[ 43] += t[ 42] < p[ 42]; + // t[ 44] += t[ 43] < p[ 43]; + // t[ 45] += t[ 44] < p[ 44]; + // t[ 46] += t[ 45] < p[ 45]; + // t[ 47] += t[ 46] < p[ 46]; + // t[ 48] += t[ 47] < p[ 47]; + // t[ 49] += t[ 48] < p[ 48]; + // t[ 50] += t[ 49] < p[ 49]; + // t[ 51] += t[ 50] < p[ 50]; + // t[ 52] += t[ 51] < p[ 51]; + // t[ 53] += t[ 52] < p[ 52]; + // t[ 54] += t[ 53] < p[ 53]; + // t[ 55] += t[ 54] < p[ 54]; + // t[ 56] += t[ 55] < p[ 55]; + // t[ 57] += t[ 56] < p[ 56]; + // t[ 58] += t[ 57] < p[ 57]; + // t[ 59] += t[ 58] < p[ 58]; + // t[ 60] += t[ 59] < p[ 59]; + // t[ 61] += t[ 60] < p[ 60]; + // t[ 62] += t[ 61] < p[ 61]; + // t[ 63] += t[ 62] < p[ 62]; + + // t[ 64] += t; // we know that it overflows (due to the way Montgomery works) + // t[ 65] += t[ 64] < p[ 64]; + // t[ 66] += t[ 65] < p[ 65]; + // t[ 67] += t[ 66] < p[ 66]; + // t[ 68] += t[ 67] < p[ 67]; + // t[ 69] += t[ 68] < p[ 68]; + // t[ 70] += t[ 69] < p[ 69]; + // t[ 71] += t[ 70] < p[ 70]; + // t[ 72] += t[ 71] < p[ 71]; + // t[ 73] += t[ 72] < p[ 72]; + // t[ 74] += t[ 73] < p[ 73]; + // t[ 75] += t[ 74] < p[ 74]; + // t[ 76] += t[ 75] < p[ 75]; + // t[ 77] += t[ 76] < p[ 76]; + // t[ 78] += t[ 77] < p[ 77]; + // t[ 79] += t[ 78] < p[ 78]; + // t[ 80] += t[ 79] < p[ 79]; + // t[ 81] += t[ 80] < p[ 80]; + // t[ 82] += t[ 81] < p[ 81]; + // t[ 83] += t[ 82] < p[ 82]; + // t[ 84] += t[ 83] < p[ 83]; + // t[ 85] += t[ 84] < p[ 84]; + // t[ 86] += t[ 85] < p[ 85]; + // t[ 87] += t[ 86] < p[ 86]; + // t[ 88] += t[ 87] < p[ 87]; + // t[ 89] += t[ 88] < p[ 88]; + // t[ 90] += t[ 89] < p[ 89]; + // t[ 91] += t[ 90] < p[ 90]; + // t[ 92] += t[ 91] < p[ 91]; + // t[ 93] += t[ 92] < p[ 92]; + // t[ 94] += t[ 93] < p[ 93]; + // t[ 95] += t[ 94] < p[ 94]; + // t[ 96] += t[ 95] < p[ 95]; + // t[ 97] += t[ 96] < p[ 96]; + // t[ 98] += t[ 97] < p[ 97]; + // t[ 99] += t[ 98] < p[ 98]; + // t[100] += t[ 99] < p[ 99]; + // t[101] += t[100] < p[100]; + // t[102] += t[101] < p[101]; + // t[103] += t[102] < p[102]; + // t[104] += t[103] < p[103]; + // t[105] += t[104] < p[104]; + // t[106] += t[105] < p[105]; + // t[107] += t[106] < p[106]; + // t[108] += t[107] < p[107]; + // t[109] += t[108] < p[108]; + // t[110] += t[109] < p[109]; + // t[111] += t[110] < p[110]; + // t[112] += t[111] < p[111]; + // t[113] += t[112] < p[112]; + // t[114] += t[113] < p[113]; + // t[115] += t[114] < p[114]; + // t[116] += t[115] < p[115]; + // t[117] += t[116] < p[116]; + // t[118] += t[117] < p[117]; + // t[119] += t[118] < p[118]; + // t[120] += t[119] < p[119]; + // t[121] += t[120] < p[120]; + // t[122] += t[121] < p[121]; + // t[123] += t[122] < p[122]; + // t[124] += t[123] < p[123]; + // t[125] += t[124] < p[124]; + // t[126] += t[125] < p[125]; + // t[127] += t[126] < p[126]; + + t[ 0] += 1; // we know that it overflows (due to the way Montgomery works) + + + // for (u32 i = 1; i < 64; i++) t[i] += t[i - 1] < p[i + 63]; + + t[ 1] += t[ 0] < p[ 64]; + t[ 2] += t[ 1] < p[ 65]; + t[ 3] += t[ 2] < p[ 66]; + t[ 4] += t[ 3] < p[ 67]; + t[ 5] += t[ 4] < p[ 68]; + t[ 6] += t[ 5] < p[ 69]; + t[ 7] += t[ 6] < p[ 70]; + t[ 8] += t[ 7] < p[ 71]; + t[ 9] += t[ 8] < p[ 72]; + t[10] += t[ 9] < p[ 73]; + t[11] += t[10] < p[ 74]; + t[12] += t[11] < p[ 75]; + t[13] += t[12] < p[ 76]; + t[14] += t[13] < p[ 77]; + t[15] += t[14] < p[ 78]; + t[16] += t[15] < p[ 79]; + t[17] += t[16] < p[ 80]; + t[18] += t[17] < p[ 81]; + t[19] += t[18] < p[ 82]; + t[20] += t[19] < p[ 83]; + t[21] += t[20] < p[ 84]; + t[22] += t[21] < p[ 85]; + t[23] += t[22] < p[ 86]; + t[24] += t[23] < p[ 87]; + t[25] += t[24] < p[ 88]; + t[26] += t[25] < p[ 89]; + t[27] += t[26] < p[ 90]; + t[28] += t[27] < p[ 91]; + t[29] += t[28] < p[ 92]; + t[30] += t[29] < p[ 93]; + t[31] += t[30] < p[ 94]; + t[32] += t[31] < p[ 95]; + t[33] += t[32] < p[ 96]; + t[34] += t[33] < p[ 97]; + t[35] += t[34] < p[ 98]; + t[36] += t[35] < p[ 99]; + t[37] += t[36] < p[100]; + t[38] += t[37] < p[101]; + t[39] += t[38] < p[102]; + t[40] += t[39] < p[103]; + t[41] += t[40] < p[104]; + t[42] += t[41] < p[105]; + t[43] += t[42] < p[106]; + t[44] += t[43] < p[107]; + t[45] += t[44] < p[108]; + t[46] += t[45] < p[109]; + t[47] += t[46] < p[110]; + t[48] += t[47] < p[111]; + t[49] += t[48] < p[112]; + t[50] += t[49] < p[113]; + t[51] += t[50] < p[114]; + t[52] += t[51] < p[115]; + t[53] += t[52] < p[116]; + t[54] += t[53] < p[117]; + t[55] += t[54] < p[118]; + t[56] += t[55] < p[119]; + t[57] += t[56] < p[120]; + t[58] += t[57] < p[121]; + t[59] += t[58] < p[122]; + t[60] += t[59] < p[123]; + t[61] += t[60] < p[124]; + t[62] += t[61] < p[125]; + t[63] += t[62] < p[126]; + + // VERY IMPORTANT: if we have an overflow at the highest u32, we need to do the substraction + + const u32 c = t[63] < p[127]; // carry / overflow + + + // x = t + // (x is actually already our result, but it's possible that we need to do the modulo/substract) + + // for (u32 i = 0; i < 64; i++) x[i] = t[i]; + + x[ 0] = t[ 0]; x[ 1] = t[ 1]; x[ 2] = t[ 2]; x[ 3] = t[ 3]; + x[ 4] = t[ 4]; x[ 5] = t[ 5]; x[ 6] = t[ 6]; x[ 7] = t[ 7]; + x[ 8] = t[ 8]; x[ 9] = t[ 9]; x[10] = t[10]; x[11] = t[11]; + x[12] = t[12]; x[13] = t[13]; x[14] = t[14]; x[15] = t[15]; + x[16] = t[16]; x[17] = t[17]; x[18] = t[18]; x[19] = t[19]; + x[20] = t[20]; x[21] = t[21]; x[22] = t[22]; x[23] = t[23]; + x[24] = t[24]; x[25] = t[25]; x[26] = t[26]; x[27] = t[27]; + x[28] = t[28]; x[29] = t[29]; x[30] = t[30]; x[31] = t[31]; + x[32] = t[32]; x[33] = t[33]; x[34] = t[34]; x[35] = t[35]; + x[36] = t[36]; x[37] = t[37]; x[38] = t[38]; x[39] = t[39]; + x[40] = t[40]; x[41] = t[41]; x[42] = t[42]; x[43] = t[43]; + x[44] = t[44]; x[45] = t[45]; x[46] = t[46]; x[47] = t[47]; + x[48] = t[48]; x[49] = t[49]; x[50] = t[50]; x[51] = t[51]; + x[52] = t[52]; x[53] = t[53]; x[54] = t[54]; x[55] = t[55]; + x[56] = t[56]; x[57] = t[57]; x[58] = t[58]; x[59] = t[59]; + x[60] = t[60]; x[61] = t[61]; x[62] = t[62]; x[63] = t[63]; + + // if (x >= m) x -= m + // (check if we need to substract, or do a "fast modulo") + + u32 sub_needed = 1; + + if (c == 0) + { + for (int i = 63; i >= 0; i--) + { + if (x[i] < m[i]) // FAIL case + { + sub_needed = 0; + + break; + } + else + if (x[i] > m[i]) // definitely a SUCCESS case (otherwise continue to check if equal) + { + break; + } + } + } + + if (sub_needed == 1) + { + // x -= m + + // for (u32 i = 0; i < 64; i++) x[i] -= m[i]; + + x[ 0] -= m[ 0]; x[ 1] -= m[ 1]; x[ 2] -= m[ 2]; x[ 3] -= m[ 3]; + x[ 4] -= m[ 4]; x[ 5] -= m[ 5]; x[ 6] -= m[ 6]; x[ 7] -= m[ 7]; + x[ 8] -= m[ 8]; x[ 9] -= m[ 9]; x[10] -= m[10]; x[11] -= m[11]; + x[12] -= m[12]; x[13] -= m[13]; x[14] -= m[14]; x[15] -= m[15]; + x[16] -= m[16]; x[17] -= m[17]; x[18] -= m[18]; x[19] -= m[19]; + x[20] -= m[20]; x[21] -= m[21]; x[22] -= m[22]; x[23] -= m[23]; + x[24] -= m[24]; x[25] -= m[25]; x[26] -= m[26]; x[27] -= m[27]; + x[28] -= m[28]; x[29] -= m[29]; x[30] -= m[30]; x[31] -= m[31]; + x[32] -= m[32]; x[33] -= m[33]; x[34] -= m[34]; x[35] -= m[35]; + x[36] -= m[36]; x[37] -= m[37]; x[38] -= m[38]; x[39] -= m[39]; + x[40] -= m[40]; x[41] -= m[41]; x[42] -= m[42]; x[43] -= m[43]; + x[44] -= m[44]; x[45] -= m[45]; x[46] -= m[46]; x[47] -= m[47]; + x[48] -= m[48]; x[49] -= m[49]; x[50] -= m[50]; x[51] -= m[51]; + x[52] -= m[52]; x[53] -= m[53]; x[54] -= m[54]; x[55] -= m[55]; + x[56] -= m[56]; x[57] -= m[57]; x[58] -= m[58]; x[59] -= m[59]; + x[60] -= m[60]; x[61] -= m[61]; x[62] -= m[62]; x[63] -= m[63]; + + // take care of the "borrow": + + // for (u32 i = 0; i < 63; i++) if (x[i] > t[i]) x[i + 1]--; + + // if (x[ 0] > t[ 0]) x[ 1]--; + // if (x[ 1] > t[ 1]) x[ 2]--; + // if (x[ 2] > t[ 2]) x[ 3]--; + // if (x[ 3] > t[ 3]) x[ 4]--; + // if (x[ 4] > t[ 4]) x[ 5]--; + // if (x[ 5] > t[ 5]) x[ 6]--; + // if (x[ 6] > t[ 6]) x[ 7]--; + // if (x[ 7] > t[ 7]) x[ 8]--; + // if (x[ 8] > t[ 8]) x[ 9]--; + // if (x[ 9] > t[ 9]) x[ 10]--; + // if (x[ 10] > t[ 10]) x[ 11]--; + // if (x[ 11] > t[ 11]) x[ 12]--; + // if (x[ 12] > t[ 12]) x[ 13]--; + // if (x[ 13] > t[ 13]) x[ 14]--; + // if (x[ 14] > t[ 14]) x[ 15]--; + // if (x[ 15] > t[ 15]) x[ 16]--; + // if (x[ 16] > t[ 16]) x[ 17]--; + // if (x[ 17] > t[ 17]) x[ 18]--; + // if (x[ 18] > t[ 18]) x[ 19]--; + // if (x[ 19] > t[ 19]) x[ 20]--; + // if (x[ 20] > t[ 20]) x[ 21]--; + // if (x[ 21] > t[ 21]) x[ 22]--; + // if (x[ 22] > t[ 22]) x[ 23]--; + // if (x[ 23] > t[ 23]) x[ 24]--; + // if (x[ 24] > t[ 24]) x[ 25]--; + // if (x[ 25] > t[ 25]) x[ 26]--; + // if (x[ 26] > t[ 26]) x[ 27]--; + // if (x[ 27] > t[ 27]) x[ 28]--; + // if (x[ 28] > t[ 28]) x[ 29]--; + // if (x[ 29] > t[ 29]) x[ 30]--; + // if (x[ 30] > t[ 30]) x[ 31]--; + // if (x[ 31] > t[ 31]) x[ 32]--; + // if (x[ 32] > t[ 32]) x[ 33]--; + // if (x[ 33] > t[ 33]) x[ 34]--; + // if (x[ 34] > t[ 34]) x[ 35]--; + // if (x[ 35] > t[ 35]) x[ 36]--; + // if (x[ 36] > t[ 36]) x[ 37]--; + // if (x[ 37] > t[ 37]) x[ 38]--; + // if (x[ 38] > t[ 38]) x[ 39]--; + // if (x[ 39] > t[ 39]) x[ 40]--; + // if (x[ 40] > t[ 40]) x[ 41]--; + // if (x[ 41] > t[ 41]) x[ 42]--; + // if (x[ 42] > t[ 42]) x[ 43]--; + // if (x[ 43] > t[ 43]) x[ 44]--; + // if (x[ 44] > t[ 44]) x[ 45]--; + // if (x[ 45] > t[ 45]) x[ 46]--; + // if (x[ 46] > t[ 46]) x[ 47]--; + // if (x[ 47] > t[ 47]) x[ 48]--; + // if (x[ 48] > t[ 48]) x[ 49]--; + // if (x[ 49] > t[ 49]) x[ 50]--; + // if (x[ 50] > t[ 50]) x[ 51]--; + // if (x[ 51] > t[ 51]) x[ 52]--; + // if (x[ 52] > t[ 52]) x[ 53]--; + // if (x[ 53] > t[ 53]) x[ 54]--; + // if (x[ 54] > t[ 54]) x[ 55]--; + // if (x[ 55] > t[ 55]) x[ 56]--; + // if (x[ 56] > t[ 56]) x[ 57]--; + // if (x[ 57] > t[ 57]) x[ 58]--; + // if (x[ 58] > t[ 58]) x[ 59]--; + // if (x[ 59] > t[ 59]) x[ 60]--; + // if (x[ 60] > t[ 60]) x[ 61]--; + // if (x[ 61] > t[ 61]) x[ 62]--; + // if (x[ 62] > t[ 62]) x[ 63]--; + + // for (u32 i = 1; i < 64; i++) x[i] -= x[i - 1] > t[i - 1]; + + x[ 1] -= x[ 0] > t[ 0]; + x[ 2] -= x[ 1] > t[ 1]; + x[ 3] -= x[ 2] > t[ 2]; + x[ 4] -= x[ 3] > t[ 3]; + x[ 5] -= x[ 4] > t[ 4]; + x[ 6] -= x[ 5] > t[ 5]; + x[ 7] -= x[ 6] > t[ 6]; + x[ 8] -= x[ 7] > t[ 7]; + x[ 9] -= x[ 8] > t[ 8]; + x[10] -= x[ 9] > t[ 9]; + x[11] -= x[10] > t[10]; + x[12] -= x[11] > t[11]; + x[13] -= x[12] > t[12]; + x[14] -= x[13] > t[13]; + x[15] -= x[14] > t[14]; + x[16] -= x[15] > t[15]; + x[17] -= x[16] > t[16]; + x[18] -= x[17] > t[17]; + x[19] -= x[18] > t[18]; + x[20] -= x[19] > t[19]; + x[21] -= x[20] > t[20]; + x[22] -= x[21] > t[21]; + x[23] -= x[22] > t[22]; + x[24] -= x[23] > t[23]; + x[25] -= x[24] > t[24]; + x[26] -= x[25] > t[25]; + x[27] -= x[26] > t[26]; + x[28] -= x[27] > t[27]; + x[29] -= x[28] > t[28]; + x[30] -= x[29] > t[29]; + x[31] -= x[30] > t[30]; + x[32] -= x[31] > t[31]; + x[33] -= x[32] > t[32]; + x[34] -= x[33] > t[33]; + x[35] -= x[34] > t[34]; + x[36] -= x[35] > t[35]; + x[37] -= x[36] > t[36]; + x[38] -= x[37] > t[37]; + x[39] -= x[38] > t[38]; + x[40] -= x[39] > t[39]; + x[41] -= x[40] > t[40]; + x[42] -= x[41] > t[41]; + x[43] -= x[42] > t[42]; + x[44] -= x[43] > t[43]; + x[45] -= x[44] > t[44]; + x[46] -= x[45] > t[45]; + x[47] -= x[46] > t[46]; + x[48] -= x[47] > t[47]; + x[49] -= x[48] > t[48]; + x[50] -= x[49] > t[49]; + x[51] -= x[50] > t[50]; + x[52] -= x[51] > t[51]; + x[53] -= x[52] > t[52]; + x[54] -= x[53] > t[53]; + x[55] -= x[54] > t[54]; + x[56] -= x[55] > t[55]; + x[57] -= x[56] > t[56]; + x[58] -= x[57] > t[57]; + x[59] -= x[58] > t[58]; + x[60] -= x[59] > t[59]; + x[61] -= x[60] > t[60]; + x[62] -= x[61] > t[61]; + x[63] -= x[62] > t[62]; + } +} + +// r [ 64], input (reducer % m) and at the same time output, r = (y ^ x) % m +// u32 b_pre[10240], precomputed base powers (160 powers for each bit of SHA1 * 64 bytes) +// y [ 5], exponent (in standard form, not Montgomery) +// m [ 128], modulo +// fact [ 64], our m' (actually it is fact[65]) + +DECLSPEC void pow_mod_precomp_g (PRIVATE_AS u32 *r, PRIVATE_AS const u32 *b_pre, PRIVATE_AS const u32 *y, PRIVATE_AS const u32 *m, PRIVATE_AS const u32 *fact) +{ + for (u32 i = 0; i < 160; i++) + { + const u32 div = i / 32; + const u32 mod = i % 32; // & 31 + + const u32 bit_set = (y[div] >> mod) & 1; + + if (bit_set == 1) mul_mod (r, b_pre + i * 64, m, fact); + } +} + +// r [ 64], input (reducer % m) and at the same time output, r = (y ^ x) % m +// x [ 64], our main Montgomery number (base, generator) +// y [ 5], our SHA1 vector, exponent (in standard form, not Montgomery) +// m [128], modulo +// fact[ 64], our m' (actually it is fact[65]) +// we replaced this with pow_mod_precomp_g () + +DECLSPEC void pow_mod (PRIVATE_AS u32 *r, PRIVATE_AS u32 *x, PRIVATE_AS const u32 *y, PRIVATE_AS const u32 *m, PRIVATE_AS const u32 *fact) +{ + u32 highest_bit = 0; + + for (highest_bit = 159; highest_bit >= 1; highest_bit--) + { + const u32 div = highest_bit / 32; + const u32 mod = highest_bit % 32; // & 31 + + // if we find a bit that is set to 1: + + const u32 bit_set = (y[div] >> mod) & 1; + + if (bit_set == 1) break; + } + + for (u32 i = 0; i <= highest_bit; i++) + { + const u32 div = i / 32; + const u32 mod = i % 32; // & 31 + + const u32 bit_set = (y[div] >> mod) & 1; + + if (bit_set == 1) mul_mod (r, x, m, fact); + + mul_mod (x, x, m, fact); + } +} + +// u [ 64], rinv (r') output +// v [ 65], fact (m') output, we only set the first 256 bytes, but we need 1 extra u32 (or byte) +// m [128], modulo + +DECLSPEC void simple_euclidean_gcd (PRIVATE_AS u32 *u, PRIVATE_AS u32 *v, PRIVATE_AS const u32 *m) +{ + // outputs u and v must be initialized to ALL zeros + + u[0] = 1; + // v[0] = 0; + + // we skip this next step, since we know "r" in advance: + // u32 r[65] = { 0 }; + + // r[64] = 1; + // + // // t = r: + // + // u32 t[65] = { 0 }; + // + // for (u32 i = 0; i < 65; i++) t[i] = r[i]; + // + // t[ 0] = r[ 0]; t[ 1] = r[ 1]; t[ 2] = r[ 2]; t[ 3] = r[ 3]; + // t[ 4] = r[ 4]; t[ 5] = r[ 5]; t[ 6] = r[ 6]; t[ 7] = r[ 7]; + // t[ 8] = r[ 8]; t[ 9] = r[ 9]; t[10] = r[10]; t[11] = r[11]; + // t[12] = r[12]; t[13] = r[13]; t[14] = r[14]; t[15] = r[15]; + // t[16] = r[16]; t[17] = r[17]; t[18] = r[18]; t[19] = r[19]; + // t[20] = r[20]; t[21] = r[21]; t[22] = r[22]; t[23] = r[23]; + // t[24] = r[24]; t[25] = r[25]; t[26] = r[26]; t[27] = r[27]; + // t[28] = r[28]; t[29] = r[29]; t[30] = r[30]; t[31] = r[31]; + // t[32] = r[32]; t[33] = r[33]; t[34] = r[34]; t[35] = r[35]; + // t[36] = r[36]; t[37] = r[37]; t[38] = r[38]; t[39] = r[39]; + // t[40] = r[40]; t[41] = r[41]; t[42] = r[42]; t[43] = r[43]; + // t[44] = r[44]; t[45] = r[45]; t[46] = r[46]; t[47] = r[47]; + // t[48] = r[48]; t[49] = r[49]; t[50] = r[50]; t[51] = r[51]; + // t[52] = r[52]; t[53] = r[53]; t[54] = r[54]; t[55] = r[55]; + // t[56] = r[56]; t[57] = r[57]; t[58] = r[58]; t[59] = r[59]; + // t[60] = r[60]; t[61] = r[61]; t[62] = r[62]; t[63] = r[63]; + // t[64] = r[64]; + // + // while ((t[ 0] != 0) || (t[ 1] != 0) || (t[ 2] != 0) || (t[ 3] != 0) || + // (t[ 4] != 0) || (t[ 5] != 0) || (t[ 6] != 0) || (t[ 7] != 0) || + // (t[ 8] != 0) || (t[ 9] != 0) || (t[10] != 0) || (t[11] != 0) || + // (t[12] != 0) || (t[13] != 0) || (t[14] != 0) || (t[15] != 0) || + // (t[16] != 0) || (t[17] != 0) || (t[18] != 0) || (t[19] != 0) || + // (t[20] != 0) || (t[21] != 0) || (t[22] != 0) || (t[23] != 0) || + // (t[24] != 0) || (t[25] != 0) || (t[26] != 0) || (t[27] != 0) || + // (t[28] != 0) || (t[29] != 0) || (t[30] != 0) || (t[31] != 0) || + // (t[32] != 0) || (t[33] != 0) || (t[34] != 0) || (t[35] != 0) || + // (t[36] != 0) || (t[37] != 0) || (t[38] != 0) || (t[39] != 0) || + // (t[40] != 0) || (t[41] != 0) || (t[42] != 0) || (t[43] != 0) || + // (t[44] != 0) || (t[45] != 0) || (t[46] != 0) || (t[47] != 0) || + // (t[48] != 0) || (t[49] != 0) || (t[50] != 0) || (t[51] != 0) || + // (t[52] != 0) || (t[53] != 0) || (t[54] != 0) || (t[55] != 0) || + // (t[56] != 0) || (t[57] != 0) || (t[58] != 0) || (t[59] != 0) || + // (t[60] != 0) || (t[61] != 0) || (t[62] != 0) || (t[63] != 0) || + // (t[64] != 0)) + // { + // // t >>= 1: + // + // t[ 0] = (t[ 1] << 31) | (t[ 0] >> 1); + // t[ 1] = (t[ 2] << 31) | (t[ 1] >> 1); + // t[ 2] = (t[ 3] << 31) | (t[ 2] >> 1); + // t[ 3] = (t[ 4] << 31) | (t[ 3] >> 1); + // t[ 4] = (t[ 5] << 31) | (t[ 4] >> 1); + // t[ 5] = (t[ 6] << 31) | (t[ 5] >> 1); + // t[ 6] = (t[ 7] << 31) | (t[ 6] >> 1); + // t[ 7] = (t[ 8] << 31) | (t[ 7] >> 1); + // t[ 8] = (t[ 9] << 31) | (t[ 8] >> 1); + // t[ 9] = (t[ 10] << 31) | (t[ 9] >> 1); + // t[ 10] = (t[ 11] << 31) | (t[ 10] >> 1); + // t[ 11] = (t[ 12] << 31) | (t[ 11] >> 1); + // t[ 12] = (t[ 13] << 31) | (t[ 12] >> 1); + // t[ 13] = (t[ 14] << 31) | (t[ 13] >> 1); + // t[ 14] = (t[ 15] << 31) | (t[ 14] >> 1); + // t[ 15] = (t[ 16] << 31) | (t[ 15] >> 1); + // t[ 16] = (t[ 17] << 31) | (t[ 16] >> 1); + // t[ 17] = (t[ 18] << 31) | (t[ 17] >> 1); + // t[ 18] = (t[ 19] << 31) | (t[ 18] >> 1); + // t[ 19] = (t[ 20] << 31) | (t[ 19] >> 1); + // t[ 20] = (t[ 21] << 31) | (t[ 20] >> 1); + // t[ 21] = (t[ 22] << 31) | (t[ 21] >> 1); + // t[ 22] = (t[ 23] << 31) | (t[ 22] >> 1); + // t[ 23] = (t[ 24] << 31) | (t[ 23] >> 1); + // t[ 24] = (t[ 25] << 31) | (t[ 24] >> 1); + // t[ 25] = (t[ 26] << 31) | (t[ 25] >> 1); + // t[ 26] = (t[ 27] << 31) | (t[ 26] >> 1); + // t[ 27] = (t[ 28] << 31) | (t[ 27] >> 1); + // t[ 28] = (t[ 29] << 31) | (t[ 28] >> 1); + // t[ 29] = (t[ 30] << 31) | (t[ 29] >> 1); + // t[ 30] = (t[ 31] << 31) | (t[ 30] >> 1); + // t[ 31] = (t[ 32] << 31) | (t[ 31] >> 1); + // t[ 32] = (t[ 33] << 31) | (t[ 32] >> 1); + // t[ 33] = (t[ 34] << 31) | (t[ 33] >> 1); + // t[ 34] = (t[ 35] << 31) | (t[ 34] >> 1); + // t[ 35] = (t[ 36] << 31) | (t[ 35] >> 1); + // t[ 36] = (t[ 37] << 31) | (t[ 36] >> 1); + // t[ 37] = (t[ 38] << 31) | (t[ 37] >> 1); + // t[ 38] = (t[ 39] << 31) | (t[ 38] >> 1); + // t[ 39] = (t[ 40] << 31) | (t[ 39] >> 1); + // t[ 40] = (t[ 41] << 31) | (t[ 40] >> 1); + // t[ 41] = (t[ 42] << 31) | (t[ 41] >> 1); + // t[ 42] = (t[ 43] << 31) | (t[ 42] >> 1); + // t[ 43] = (t[ 44] << 31) | (t[ 43] >> 1); + // t[ 44] = (t[ 45] << 31) | (t[ 44] >> 1); + // t[ 45] = (t[ 46] << 31) | (t[ 45] >> 1); + // t[ 46] = (t[ 47] << 31) | (t[ 46] >> 1); + // t[ 47] = (t[ 48] << 31) | (t[ 47] >> 1); + // t[ 48] = (t[ 49] << 31) | (t[ 48] >> 1); + // t[ 49] = (t[ 50] << 31) | (t[ 49] >> 1); + // t[ 50] = (t[ 51] << 31) | (t[ 50] >> 1); + // t[ 51] = (t[ 52] << 31) | (t[ 51] >> 1); + // t[ 52] = (t[ 53] << 31) | (t[ 52] >> 1); + // t[ 53] = (t[ 54] << 31) | (t[ 53] >> 1); + // t[ 54] = (t[ 55] << 31) | (t[ 54] >> 1); + // t[ 55] = (t[ 56] << 31) | (t[ 55] >> 1); + // t[ 56] = (t[ 57] << 31) | (t[ 56] >> 1); + // t[ 57] = (t[ 58] << 31) | (t[ 57] >> 1); + // t[ 58] = (t[ 59] << 31) | (t[ 58] >> 1); + // t[ 59] = (t[ 60] << 31) | (t[ 59] >> 1); + // t[ 60] = (t[ 61] << 31) | (t[ 60] >> 1); + // t[ 61] = (t[ 62] << 31) | (t[ 61] >> 1); + // t[ 62] = (t[ 63] << 31) | (t[ 62] >> 1); + // t[ 63] = (t[ 64] << 31) | (t[ 63] >> 1); + // t[ 64] = (t[ 64] >> 1); + // + // ... + // } + + + // we use this optimization (we know "r" in advance, hard-coded/fixed) + + for (u32 i = 0; i < 2049; i++) + { + if ((u[0] & 1) == 0) + { + // u >>= 1: + + // for (u32 i = 0; i < 63; i++) u[i] = (u[i + 1] << 31) | (u[i] >> 1); + // u[63] >>= 1; + + u[ 0] = (u[ 1] << 31) | (u[ 0] >> 1); + u[ 1] = (u[ 2] << 31) | (u[ 1] >> 1); + u[ 2] = (u[ 3] << 31) | (u[ 2] >> 1); + u[ 3] = (u[ 4] << 31) | (u[ 3] >> 1); + u[ 4] = (u[ 5] << 31) | (u[ 4] >> 1); + u[ 5] = (u[ 6] << 31) | (u[ 5] >> 1); + u[ 6] = (u[ 7] << 31) | (u[ 6] >> 1); + u[ 7] = (u[ 8] << 31) | (u[ 7] >> 1); + u[ 8] = (u[ 9] << 31) | (u[ 8] >> 1); + u[ 9] = (u[ 10] << 31) | (u[ 9] >> 1); + u[ 10] = (u[ 11] << 31) | (u[ 10] >> 1); + u[ 11] = (u[ 12] << 31) | (u[ 11] >> 1); + u[ 12] = (u[ 13] << 31) | (u[ 12] >> 1); + u[ 13] = (u[ 14] << 31) | (u[ 13] >> 1); + u[ 14] = (u[ 15] << 31) | (u[ 14] >> 1); + u[ 15] = (u[ 16] << 31) | (u[ 15] >> 1); + u[ 16] = (u[ 17] << 31) | (u[ 16] >> 1); + u[ 17] = (u[ 18] << 31) | (u[ 17] >> 1); + u[ 18] = (u[ 19] << 31) | (u[ 18] >> 1); + u[ 19] = (u[ 20] << 31) | (u[ 19] >> 1); + u[ 20] = (u[ 21] << 31) | (u[ 20] >> 1); + u[ 21] = (u[ 22] << 31) | (u[ 21] >> 1); + u[ 22] = (u[ 23] << 31) | (u[ 22] >> 1); + u[ 23] = (u[ 24] << 31) | (u[ 23] >> 1); + u[ 24] = (u[ 25] << 31) | (u[ 24] >> 1); + u[ 25] = (u[ 26] << 31) | (u[ 25] >> 1); + u[ 26] = (u[ 27] << 31) | (u[ 26] >> 1); + u[ 27] = (u[ 28] << 31) | (u[ 27] >> 1); + u[ 28] = (u[ 29] << 31) | (u[ 28] >> 1); + u[ 29] = (u[ 30] << 31) | (u[ 29] >> 1); + u[ 30] = (u[ 31] << 31) | (u[ 30] >> 1); + u[ 31] = (u[ 32] << 31) | (u[ 31] >> 1); + u[ 32] = (u[ 33] << 31) | (u[ 32] >> 1); + u[ 33] = (u[ 34] << 31) | (u[ 33] >> 1); + u[ 34] = (u[ 35] << 31) | (u[ 34] >> 1); + u[ 35] = (u[ 36] << 31) | (u[ 35] >> 1); + u[ 36] = (u[ 37] << 31) | (u[ 36] >> 1); + u[ 37] = (u[ 38] << 31) | (u[ 37] >> 1); + u[ 38] = (u[ 39] << 31) | (u[ 38] >> 1); + u[ 39] = (u[ 40] << 31) | (u[ 39] >> 1); + u[ 40] = (u[ 41] << 31) | (u[ 40] >> 1); + u[ 41] = (u[ 42] << 31) | (u[ 41] >> 1); + u[ 42] = (u[ 43] << 31) | (u[ 42] >> 1); + u[ 43] = (u[ 44] << 31) | (u[ 43] >> 1); + u[ 44] = (u[ 45] << 31) | (u[ 44] >> 1); + u[ 45] = (u[ 46] << 31) | (u[ 45] >> 1); + u[ 46] = (u[ 47] << 31) | (u[ 46] >> 1); + u[ 47] = (u[ 48] << 31) | (u[ 47] >> 1); + u[ 48] = (u[ 49] << 31) | (u[ 48] >> 1); + u[ 49] = (u[ 50] << 31) | (u[ 49] >> 1); + u[ 50] = (u[ 51] << 31) | (u[ 50] >> 1); + u[ 51] = (u[ 52] << 31) | (u[ 51] >> 1); + u[ 52] = (u[ 53] << 31) | (u[ 52] >> 1); + u[ 53] = (u[ 54] << 31) | (u[ 53] >> 1); + u[ 54] = (u[ 55] << 31) | (u[ 54] >> 1); + u[ 55] = (u[ 56] << 31) | (u[ 55] >> 1); + u[ 56] = (u[ 57] << 31) | (u[ 56] >> 1); + u[ 57] = (u[ 58] << 31) | (u[ 57] >> 1); + u[ 58] = (u[ 59] << 31) | (u[ 58] >> 1); + u[ 59] = (u[ 60] << 31) | (u[ 59] >> 1); + u[ 60] = (u[ 61] << 31) | (u[ 60] >> 1); + u[ 61] = (u[ 62] << 31) | (u[ 61] >> 1); + u[ 62] = (u[ 63] << 31) | (u[ 62] >> 1); + u[ 63] = (u[ 63] >> 1); + + // v >>= 1 + + // for (u32 i = 0; i < 64; i++) v[i] = (v[i + 1] << 31) | (v[i] >> 1); + // v[64] >>= 1; + + v[ 0] = (v[ 1] << 31) | (v[ 0] >> 1); + v[ 1] = (v[ 2] << 31) | (v[ 1] >> 1); + v[ 2] = (v[ 3] << 31) | (v[ 2] >> 1); + v[ 3] = (v[ 4] << 31) | (v[ 3] >> 1); + v[ 4] = (v[ 5] << 31) | (v[ 4] >> 1); + v[ 5] = (v[ 6] << 31) | (v[ 5] >> 1); + v[ 6] = (v[ 7] << 31) | (v[ 6] >> 1); + v[ 7] = (v[ 8] << 31) | (v[ 7] >> 1); + v[ 8] = (v[ 9] << 31) | (v[ 8] >> 1); + v[ 9] = (v[ 10] << 31) | (v[ 9] >> 1); + v[ 10] = (v[ 11] << 31) | (v[ 10] >> 1); + v[ 11] = (v[ 12] << 31) | (v[ 11] >> 1); + v[ 12] = (v[ 13] << 31) | (v[ 12] >> 1); + v[ 13] = (v[ 14] << 31) | (v[ 13] >> 1); + v[ 14] = (v[ 15] << 31) | (v[ 14] >> 1); + v[ 15] = (v[ 16] << 31) | (v[ 15] >> 1); + v[ 16] = (v[ 17] << 31) | (v[ 16] >> 1); + v[ 17] = (v[ 18] << 31) | (v[ 17] >> 1); + v[ 18] = (v[ 19] << 31) | (v[ 18] >> 1); + v[ 19] = (v[ 20] << 31) | (v[ 19] >> 1); + v[ 20] = (v[ 21] << 31) | (v[ 20] >> 1); + v[ 21] = (v[ 22] << 31) | (v[ 21] >> 1); + v[ 22] = (v[ 23] << 31) | (v[ 22] >> 1); + v[ 23] = (v[ 24] << 31) | (v[ 23] >> 1); + v[ 24] = (v[ 25] << 31) | (v[ 24] >> 1); + v[ 25] = (v[ 26] << 31) | (v[ 25] >> 1); + v[ 26] = (v[ 27] << 31) | (v[ 26] >> 1); + v[ 27] = (v[ 28] << 31) | (v[ 27] >> 1); + v[ 28] = (v[ 29] << 31) | (v[ 28] >> 1); + v[ 29] = (v[ 30] << 31) | (v[ 29] >> 1); + v[ 30] = (v[ 31] << 31) | (v[ 30] >> 1); + v[ 31] = (v[ 32] << 31) | (v[ 31] >> 1); + v[ 32] = (v[ 33] << 31) | (v[ 32] >> 1); + v[ 33] = (v[ 34] << 31) | (v[ 33] >> 1); + v[ 34] = (v[ 35] << 31) | (v[ 34] >> 1); + v[ 35] = (v[ 36] << 31) | (v[ 35] >> 1); + v[ 36] = (v[ 37] << 31) | (v[ 36] >> 1); + v[ 37] = (v[ 38] << 31) | (v[ 37] >> 1); + v[ 38] = (v[ 39] << 31) | (v[ 38] >> 1); + v[ 39] = (v[ 40] << 31) | (v[ 39] >> 1); + v[ 40] = (v[ 41] << 31) | (v[ 40] >> 1); + v[ 41] = (v[ 42] << 31) | (v[ 41] >> 1); + v[ 42] = (v[ 43] << 31) | (v[ 42] >> 1); + v[ 43] = (v[ 44] << 31) | (v[ 43] >> 1); + v[ 44] = (v[ 45] << 31) | (v[ 44] >> 1); + v[ 45] = (v[ 46] << 31) | (v[ 45] >> 1); + v[ 46] = (v[ 47] << 31) | (v[ 46] >> 1); + v[ 47] = (v[ 48] << 31) | (v[ 47] >> 1); + v[ 48] = (v[ 49] << 31) | (v[ 48] >> 1); + v[ 49] = (v[ 50] << 31) | (v[ 49] >> 1); + v[ 50] = (v[ 51] << 31) | (v[ 50] >> 1); + v[ 51] = (v[ 52] << 31) | (v[ 51] >> 1); + v[ 52] = (v[ 53] << 31) | (v[ 52] >> 1); + v[ 53] = (v[ 54] << 31) | (v[ 53] >> 1); + v[ 54] = (v[ 55] << 31) | (v[ 54] >> 1); + v[ 55] = (v[ 56] << 31) | (v[ 55] >> 1); + v[ 56] = (v[ 57] << 31) | (v[ 56] >> 1); + v[ 57] = (v[ 58] << 31) | (v[ 57] >> 1); + v[ 58] = (v[ 59] << 31) | (v[ 58] >> 1); + v[ 59] = (v[ 60] << 31) | (v[ 59] >> 1); + v[ 60] = (v[ 61] << 31) | (v[ 60] >> 1); + v[ 61] = (v[ 62] << 31) | (v[ 61] >> 1); + v[ 62] = (v[ 63] << 31) | (v[ 62] >> 1); + v[ 63] = (v[ 64] << 31) | (v[ 63] >> 1); + v[ 64] = (v[ 64] >> 1); + } + else + { + // s = u ^ m + + u32 s[64]; + + // for (u32 i = 0; i < 64; i++) s[i] = u[i] ^ m[i]; + + s[ 0] = u[ 0] ^ m[ 0]; s[ 1] = u[ 1] ^ m[ 1]; + s[ 2] = u[ 2] ^ m[ 2]; s[ 3] = u[ 3] ^ m[ 3]; + s[ 4] = u[ 4] ^ m[ 4]; s[ 5] = u[ 5] ^ m[ 5]; + s[ 6] = u[ 6] ^ m[ 6]; s[ 7] = u[ 7] ^ m[ 7]; + s[ 8] = u[ 8] ^ m[ 8]; s[ 9] = u[ 9] ^ m[ 9]; + s[10] = u[10] ^ m[10]; s[11] = u[11] ^ m[11]; + s[12] = u[12] ^ m[12]; s[13] = u[13] ^ m[13]; + s[14] = u[14] ^ m[14]; s[15] = u[15] ^ m[15]; + s[16] = u[16] ^ m[16]; s[17] = u[17] ^ m[17]; + s[18] = u[18] ^ m[18]; s[19] = u[19] ^ m[19]; + s[20] = u[20] ^ m[20]; s[21] = u[21] ^ m[21]; + s[22] = u[22] ^ m[22]; s[23] = u[23] ^ m[23]; + s[24] = u[24] ^ m[24]; s[25] = u[25] ^ m[25]; + s[26] = u[26] ^ m[26]; s[27] = u[27] ^ m[27]; + s[28] = u[28] ^ m[28]; s[29] = u[29] ^ m[29]; + s[30] = u[30] ^ m[30]; s[31] = u[31] ^ m[31]; + s[32] = u[32] ^ m[32]; s[33] = u[33] ^ m[33]; + s[34] = u[34] ^ m[34]; s[35] = u[35] ^ m[35]; + s[36] = u[36] ^ m[36]; s[37] = u[37] ^ m[37]; + s[38] = u[38] ^ m[38]; s[39] = u[39] ^ m[39]; + s[40] = u[40] ^ m[40]; s[41] = u[41] ^ m[41]; + s[42] = u[42] ^ m[42]; s[43] = u[43] ^ m[43]; + s[44] = u[44] ^ m[44]; s[45] = u[45] ^ m[45]; + s[46] = u[46] ^ m[46]; s[47] = u[47] ^ m[47]; + s[48] = u[48] ^ m[48]; s[49] = u[49] ^ m[49]; + s[50] = u[50] ^ m[50]; s[51] = u[51] ^ m[51]; + s[52] = u[52] ^ m[52]; s[53] = u[53] ^ m[53]; + s[54] = u[54] ^ m[54]; s[55] = u[55] ^ m[55]; + s[56] = u[56] ^ m[56]; s[57] = u[57] ^ m[57]; + s[58] = u[58] ^ m[58]; s[59] = u[59] ^ m[59]; + s[60] = u[60] ^ m[60]; s[61] = u[61] ^ m[61]; + s[62] = u[62] ^ m[62]; s[63] = u[63] ^ m[63]; + + // s >>= 1 + + // for (u32 i = 0; i < 63; i++) s[i] = (s[i + 1] << 31) | (s[i] >> 1); + // s[63] >>= 1; + + s[ 0] = (s[ 1] << 31) | (s[ 0] >> 1); + s[ 1] = (s[ 2] << 31) | (s[ 1] >> 1); + s[ 2] = (s[ 3] << 31) | (s[ 2] >> 1); + s[ 3] = (s[ 4] << 31) | (s[ 3] >> 1); + s[ 4] = (s[ 5] << 31) | (s[ 4] >> 1); + s[ 5] = (s[ 6] << 31) | (s[ 5] >> 1); + s[ 6] = (s[ 7] << 31) | (s[ 6] >> 1); + s[ 7] = (s[ 8] << 31) | (s[ 7] >> 1); + s[ 8] = (s[ 9] << 31) | (s[ 8] >> 1); + s[ 9] = (s[ 10] << 31) | (s[ 9] >> 1); + s[ 10] = (s[ 11] << 31) | (s[ 10] >> 1); + s[ 11] = (s[ 12] << 31) | (s[ 11] >> 1); + s[ 12] = (s[ 13] << 31) | (s[ 12] >> 1); + s[ 13] = (s[ 14] << 31) | (s[ 13] >> 1); + s[ 14] = (s[ 15] << 31) | (s[ 14] >> 1); + s[ 15] = (s[ 16] << 31) | (s[ 15] >> 1); + s[ 16] = (s[ 17] << 31) | (s[ 16] >> 1); + s[ 17] = (s[ 18] << 31) | (s[ 17] >> 1); + s[ 18] = (s[ 19] << 31) | (s[ 18] >> 1); + s[ 19] = (s[ 20] << 31) | (s[ 19] >> 1); + s[ 20] = (s[ 21] << 31) | (s[ 20] >> 1); + s[ 21] = (s[ 22] << 31) | (s[ 21] >> 1); + s[ 22] = (s[ 23] << 31) | (s[ 22] >> 1); + s[ 23] = (s[ 24] << 31) | (s[ 23] >> 1); + s[ 24] = (s[ 25] << 31) | (s[ 24] >> 1); + s[ 25] = (s[ 26] << 31) | (s[ 25] >> 1); + s[ 26] = (s[ 27] << 31) | (s[ 26] >> 1); + s[ 27] = (s[ 28] << 31) | (s[ 27] >> 1); + s[ 28] = (s[ 29] << 31) | (s[ 28] >> 1); + s[ 29] = (s[ 30] << 31) | (s[ 29] >> 1); + s[ 30] = (s[ 31] << 31) | (s[ 30] >> 1); + s[ 31] = (s[ 32] << 31) | (s[ 31] >> 1); + s[ 32] = (s[ 33] << 31) | (s[ 32] >> 1); + s[ 33] = (s[ 34] << 31) | (s[ 33] >> 1); + s[ 34] = (s[ 35] << 31) | (s[ 34] >> 1); + s[ 35] = (s[ 36] << 31) | (s[ 35] >> 1); + s[ 36] = (s[ 37] << 31) | (s[ 36] >> 1); + s[ 37] = (s[ 38] << 31) | (s[ 37] >> 1); + s[ 38] = (s[ 39] << 31) | (s[ 38] >> 1); + s[ 39] = (s[ 40] << 31) | (s[ 39] >> 1); + s[ 40] = (s[ 41] << 31) | (s[ 40] >> 1); + s[ 41] = (s[ 42] << 31) | (s[ 41] >> 1); + s[ 42] = (s[ 43] << 31) | (s[ 42] >> 1); + s[ 43] = (s[ 44] << 31) | (s[ 43] >> 1); + s[ 44] = (s[ 45] << 31) | (s[ 44] >> 1); + s[ 45] = (s[ 46] << 31) | (s[ 45] >> 1); + s[ 46] = (s[ 47] << 31) | (s[ 46] >> 1); + s[ 47] = (s[ 48] << 31) | (s[ 47] >> 1); + s[ 48] = (s[ 49] << 31) | (s[ 48] >> 1); + s[ 49] = (s[ 50] << 31) | (s[ 49] >> 1); + s[ 50] = (s[ 51] << 31) | (s[ 50] >> 1); + s[ 51] = (s[ 52] << 31) | (s[ 51] >> 1); + s[ 52] = (s[ 53] << 31) | (s[ 52] >> 1); + s[ 53] = (s[ 54] << 31) | (s[ 53] >> 1); + s[ 54] = (s[ 55] << 31) | (s[ 54] >> 1); + s[ 55] = (s[ 56] << 31) | (s[ 55] >> 1); + s[ 56] = (s[ 57] << 31) | (s[ 56] >> 1); + s[ 57] = (s[ 58] << 31) | (s[ 57] >> 1); + s[ 58] = (s[ 59] << 31) | (s[ 58] >> 1); + s[ 59] = (s[ 60] << 31) | (s[ 59] >> 1); + s[ 60] = (s[ 61] << 31) | (s[ 60] >> 1); + s[ 61] = (s[ 62] << 31) | (s[ 61] >> 1); + s[ 62] = (s[ 63] << 31) | (s[ 62] >> 1); + s[ 63] = (s[ 63] >> 1); + + // u &= m + + // for (u32 i = 0; i < 64; i++) u[i] &= m[i]; + + u[ 0] &= m[ 0]; u[ 1] &= m[ 1]; u[ 2] &= m[ 2]; u[ 3] &= m[ 3]; + u[ 4] &= m[ 4]; u[ 5] &= m[ 5]; u[ 6] &= m[ 6]; u[ 7] &= m[ 7]; + u[ 8] &= m[ 8]; u[ 9] &= m[ 9]; u[10] &= m[10]; u[11] &= m[11]; + u[12] &= m[12]; u[13] &= m[13]; u[14] &= m[14]; u[15] &= m[15]; + u[16] &= m[16]; u[17] &= m[17]; u[18] &= m[18]; u[19] &= m[19]; + u[20] &= m[20]; u[21] &= m[21]; u[22] &= m[22]; u[23] &= m[23]; + u[24] &= m[24]; u[25] &= m[25]; u[26] &= m[26]; u[27] &= m[27]; + u[28] &= m[28]; u[29] &= m[29]; u[30] &= m[30]; u[31] &= m[31]; + u[32] &= m[32]; u[33] &= m[33]; u[34] &= m[34]; u[35] &= m[35]; + u[36] &= m[36]; u[37] &= m[37]; u[38] &= m[38]; u[39] &= m[39]; + u[40] &= m[40]; u[41] &= m[41]; u[42] &= m[42]; u[43] &= m[43]; + u[44] &= m[44]; u[45] &= m[45]; u[46] &= m[46]; u[47] &= m[47]; + u[48] &= m[48]; u[49] &= m[49]; u[50] &= m[50]; u[51] &= m[51]; + u[52] &= m[52]; u[53] &= m[53]; u[54] &= m[54]; u[55] &= m[55]; + u[56] &= m[56]; u[57] &= m[57]; u[58] &= m[58]; u[59] &= m[59]; + u[60] &= m[60]; u[61] &= m[61]; u[62] &= m[62]; u[63] &= m[63]; + + // t = u (to have the original u) + + u32 t[64]; // 65 would be needed for "r" (but we know "r" in advance) + + // for (u32 i = 0; i < 64; i++) t[i] = u[i]; + + t[ 0] = u[ 0]; t[ 1] = u[ 1]; t[ 2] = u[ 2]; t[ 3] = u[ 3]; + t[ 4] = u[ 4]; t[ 5] = u[ 5]; t[ 6] = u[ 6]; t[ 7] = u[ 7]; + t[ 8] = u[ 8]; t[ 9] = u[ 9]; t[10] = u[10]; t[11] = u[11]; + t[12] = u[12]; t[13] = u[13]; t[14] = u[14]; t[15] = u[15]; + t[16] = u[16]; t[17] = u[17]; t[18] = u[18]; t[19] = u[19]; + t[20] = u[20]; t[21] = u[21]; t[22] = u[22]; t[23] = u[23]; + t[24] = u[24]; t[25] = u[25]; t[26] = u[26]; t[27] = u[27]; + t[28] = u[28]; t[29] = u[29]; t[30] = u[30]; t[31] = u[31]; + t[32] = u[32]; t[33] = u[33]; t[34] = u[34]; t[35] = u[35]; + t[36] = u[36]; t[37] = u[37]; t[38] = u[38]; t[39] = u[39]; + t[40] = u[40]; t[41] = u[41]; t[42] = u[42]; t[43] = u[43]; + t[44] = u[44]; t[45] = u[45]; t[46] = u[46]; t[47] = u[47]; + t[48] = u[48]; t[49] = u[49]; t[50] = u[50]; t[51] = u[51]; + t[52] = u[52]; t[53] = u[53]; t[54] = u[54]; t[55] = u[55]; + t[56] = u[56]; t[57] = u[57]; t[58] = u[58]; t[59] = u[59]; + t[60] = u[60]; t[61] = u[61]; t[62] = u[62]; t[63] = u[63]; + + // u += s + + // for (u32 i = 0; i < 64; i++) u[i] += s[i]; + + u[ 0] += s[ 0]; u[ 1] += s[ 1]; u[ 2] += s[ 2]; u[ 3] += s[ 3]; + u[ 4] += s[ 4]; u[ 5] += s[ 5]; u[ 6] += s[ 6]; u[ 7] += s[ 7]; + u[ 8] += s[ 8]; u[ 9] += s[ 9]; u[10] += s[10]; u[11] += s[11]; + u[12] += s[12]; u[13] += s[13]; u[14] += s[14]; u[15] += s[15]; + u[16] += s[16]; u[17] += s[17]; u[18] += s[18]; u[19] += s[19]; + u[20] += s[20]; u[21] += s[21]; u[22] += s[22]; u[23] += s[23]; + u[24] += s[24]; u[25] += s[25]; u[26] += s[26]; u[27] += s[27]; + u[28] += s[28]; u[29] += s[29]; u[30] += s[30]; u[31] += s[31]; + u[32] += s[32]; u[33] += s[33]; u[34] += s[34]; u[35] += s[35]; + u[36] += s[36]; u[37] += s[37]; u[38] += s[38]; u[39] += s[39]; + u[40] += s[40]; u[41] += s[41]; u[42] += s[42]; u[43] += s[43]; + u[44] += s[44]; u[45] += s[45]; u[46] += s[46]; u[47] += s[47]; + u[48] += s[48]; u[49] += s[49]; u[50] += s[50]; u[51] += s[51]; + u[52] += s[52]; u[53] += s[53]; u[54] += s[54]; u[55] += s[55]; + u[56] += s[56]; u[57] += s[57]; u[58] += s[58]; u[59] += s[59]; + u[60] += s[60]; u[61] += s[61]; u[62] += s[62]; u[63] += s[63]; + + // take care of the carry for the addition above (i.e check for "overflows") + + // for (u32 i = 0; i < 63; i++) if (u[i] < t[i]) u[i + 1]++; + + // if (u[ 0] < t[ 0]) u[ 1]++; + // if (u[ 1] < t[ 1]) u[ 2]++; + // if (u[ 2] < t[ 2]) u[ 3]++; + // if (u[ 3] < t[ 3]) u[ 4]++; + // if (u[ 4] < t[ 4]) u[ 5]++; + // if (u[ 5] < t[ 5]) u[ 6]++; + // if (u[ 6] < t[ 6]) u[ 7]++; + // if (u[ 7] < t[ 7]) u[ 8]++; + // if (u[ 8] < t[ 8]) u[ 9]++; + // if (u[ 9] < t[ 9]) u[10]++; + // if (u[10] < t[10]) u[11]++; + // if (u[11] < t[11]) u[12]++; + // if (u[12] < t[12]) u[13]++; + // if (u[13] < t[13]) u[14]++; + // if (u[14] < t[14]) u[15]++; + // if (u[15] < t[15]) u[16]++; + // if (u[16] < t[16]) u[17]++; + // if (u[17] < t[17]) u[18]++; + // if (u[18] < t[18]) u[19]++; + // if (u[19] < t[19]) u[20]++; + // if (u[20] < t[20]) u[21]++; + // if (u[21] < t[21]) u[22]++; + // if (u[22] < t[22]) u[23]++; + // if (u[23] < t[23]) u[24]++; + // if (u[24] < t[24]) u[25]++; + // if (u[25] < t[25]) u[26]++; + // if (u[26] < t[26]) u[27]++; + // if (u[27] < t[27]) u[28]++; + // if (u[28] < t[28]) u[29]++; + // if (u[29] < t[29]) u[30]++; + // if (u[30] < t[30]) u[31]++; + // if (u[31] < t[31]) u[32]++; + // if (u[32] < t[32]) u[33]++; + // if (u[33] < t[33]) u[34]++; + // if (u[34] < t[34]) u[35]++; + // if (u[35] < t[35]) u[36]++; + // if (u[36] < t[36]) u[37]++; + // if (u[37] < t[37]) u[38]++; + // if (u[38] < t[38]) u[39]++; + // if (u[39] < t[39]) u[40]++; + // if (u[40] < t[40]) u[41]++; + // if (u[41] < t[41]) u[42]++; + // if (u[42] < t[42]) u[43]++; + // if (u[43] < t[43]) u[44]++; + // if (u[44] < t[44]) u[45]++; + // if (u[45] < t[45]) u[46]++; + // if (u[46] < t[46]) u[47]++; + // if (u[47] < t[47]) u[48]++; + // if (u[48] < t[48]) u[49]++; + // if (u[49] < t[49]) u[50]++; + // if (u[50] < t[50]) u[51]++; + // if (u[51] < t[51]) u[52]++; + // if (u[52] < t[52]) u[53]++; + // if (u[53] < t[53]) u[54]++; + // if (u[54] < t[54]) u[55]++; + // if (u[55] < t[55]) u[56]++; + // if (u[56] < t[56]) u[57]++; + // if (u[57] < t[57]) u[58]++; + // if (u[58] < t[58]) u[59]++; + // if (u[59] < t[59]) u[60]++; + // if (u[60] < t[60]) u[61]++; + // if (u[61] < t[61]) u[62]++; + // if (u[62] < t[62]) u[63]++; + + // for (u32 i = 1; i < 64; i++) u[i] += u[i - 1] < t[i - 1]; + + u[ 1] += u[ 0] < t[ 0]; + u[ 2] += u[ 1] < t[ 1]; + u[ 3] += u[ 2] < t[ 2]; + u[ 4] += u[ 3] < t[ 3]; + u[ 5] += u[ 4] < t[ 4]; + u[ 6] += u[ 5] < t[ 5]; + u[ 7] += u[ 6] < t[ 6]; + u[ 8] += u[ 7] < t[ 7]; + u[ 9] += u[ 8] < t[ 8]; + u[10] += u[ 9] < t[ 9]; + u[11] += u[10] < t[10]; + u[12] += u[11] < t[11]; + u[13] += u[12] < t[12]; + u[14] += u[13] < t[13]; + u[15] += u[14] < t[14]; + u[16] += u[15] < t[15]; + u[17] += u[16] < t[16]; + u[18] += u[17] < t[17]; + u[19] += u[18] < t[18]; + u[20] += u[19] < t[19]; + u[21] += u[20] < t[20]; + u[22] += u[21] < t[21]; + u[23] += u[22] < t[22]; + u[24] += u[23] < t[23]; + u[25] += u[24] < t[24]; + u[26] += u[25] < t[25]; + u[27] += u[26] < t[26]; + u[28] += u[27] < t[27]; + u[29] += u[28] < t[28]; + u[30] += u[29] < t[29]; + u[31] += u[30] < t[30]; + u[32] += u[31] < t[31]; + u[33] += u[32] < t[32]; + u[34] += u[33] < t[33]; + u[35] += u[34] < t[34]; + u[36] += u[35] < t[35]; + u[37] += u[36] < t[36]; + u[38] += u[37] < t[37]; + u[39] += u[38] < t[38]; + u[40] += u[39] < t[39]; + u[41] += u[40] < t[40]; + u[42] += u[41] < t[41]; + u[43] += u[42] < t[42]; + u[44] += u[43] < t[43]; + u[45] += u[44] < t[44]; + u[46] += u[45] < t[45]; + u[47] += u[46] < t[46]; + u[48] += u[47] < t[47]; + u[49] += u[48] < t[48]; + u[50] += u[49] < t[49]; + u[51] += u[50] < t[50]; + u[52] += u[51] < t[51]; + u[53] += u[52] < t[52]; + u[54] += u[53] < t[53]; + u[55] += u[54] < t[54]; + u[56] += u[55] < t[55]; + u[57] += u[56] < t[56]; + u[58] += u[57] < t[57]; + u[59] += u[58] < t[58]; + u[60] += u[59] < t[59]; + u[61] += u[60] < t[60]; + u[62] += u[61] < t[61]; + u[63] += u[62] < t[62]; + + // v >>= 1 + + // for (u32 i = 0; i < 64; i++) v[i] = (v[i + 1] << 31) | (v[i] >> 1); + // v[64] >>= 1; + + v[ 0] = (v[ 1] << 31) | (v[ 0] >> 1); + v[ 1] = (v[ 2] << 31) | (v[ 1] >> 1); + v[ 2] = (v[ 3] << 31) | (v[ 2] >> 1); + v[ 3] = (v[ 4] << 31) | (v[ 3] >> 1); + v[ 4] = (v[ 5] << 31) | (v[ 4] >> 1); + v[ 5] = (v[ 6] << 31) | (v[ 5] >> 1); + v[ 6] = (v[ 7] << 31) | (v[ 6] >> 1); + v[ 7] = (v[ 8] << 31) | (v[ 7] >> 1); + v[ 8] = (v[ 9] << 31) | (v[ 8] >> 1); + v[ 9] = (v[ 10] << 31) | (v[ 9] >> 1); + v[ 10] = (v[ 11] << 31) | (v[ 10] >> 1); + v[ 11] = (v[ 12] << 31) | (v[ 11] >> 1); + v[ 12] = (v[ 13] << 31) | (v[ 12] >> 1); + v[ 13] = (v[ 14] << 31) | (v[ 13] >> 1); + v[ 14] = (v[ 15] << 31) | (v[ 14] >> 1); + v[ 15] = (v[ 16] << 31) | (v[ 15] >> 1); + v[ 16] = (v[ 17] << 31) | (v[ 16] >> 1); + v[ 17] = (v[ 18] << 31) | (v[ 17] >> 1); + v[ 18] = (v[ 19] << 31) | (v[ 18] >> 1); + v[ 19] = (v[ 20] << 31) | (v[ 19] >> 1); + v[ 20] = (v[ 21] << 31) | (v[ 20] >> 1); + v[ 21] = (v[ 22] << 31) | (v[ 21] >> 1); + v[ 22] = (v[ 23] << 31) | (v[ 22] >> 1); + v[ 23] = (v[ 24] << 31) | (v[ 23] >> 1); + v[ 24] = (v[ 25] << 31) | (v[ 24] >> 1); + v[ 25] = (v[ 26] << 31) | (v[ 25] >> 1); + v[ 26] = (v[ 27] << 31) | (v[ 26] >> 1); + v[ 27] = (v[ 28] << 31) | (v[ 27] >> 1); + v[ 28] = (v[ 29] << 31) | (v[ 28] >> 1); + v[ 29] = (v[ 30] << 31) | (v[ 29] >> 1); + v[ 30] = (v[ 31] << 31) | (v[ 30] >> 1); + v[ 31] = (v[ 32] << 31) | (v[ 31] >> 1); + v[ 32] = (v[ 33] << 31) | (v[ 32] >> 1); + v[ 33] = (v[ 34] << 31) | (v[ 33] >> 1); + v[ 34] = (v[ 35] << 31) | (v[ 34] >> 1); + v[ 35] = (v[ 36] << 31) | (v[ 35] >> 1); + v[ 36] = (v[ 37] << 31) | (v[ 36] >> 1); + v[ 37] = (v[ 38] << 31) | (v[ 37] >> 1); + v[ 38] = (v[ 39] << 31) | (v[ 38] >> 1); + v[ 39] = (v[ 40] << 31) | (v[ 39] >> 1); + v[ 40] = (v[ 41] << 31) | (v[ 40] >> 1); + v[ 41] = (v[ 42] << 31) | (v[ 41] >> 1); + v[ 42] = (v[ 43] << 31) | (v[ 42] >> 1); + v[ 43] = (v[ 44] << 31) | (v[ 43] >> 1); + v[ 44] = (v[ 45] << 31) | (v[ 44] >> 1); + v[ 45] = (v[ 46] << 31) | (v[ 45] >> 1); + v[ 46] = (v[ 47] << 31) | (v[ 46] >> 1); + v[ 47] = (v[ 48] << 31) | (v[ 47] >> 1); + v[ 48] = (v[ 49] << 31) | (v[ 48] >> 1); + v[ 49] = (v[ 50] << 31) | (v[ 49] >> 1); + v[ 50] = (v[ 51] << 31) | (v[ 50] >> 1); + v[ 51] = (v[ 52] << 31) | (v[ 51] >> 1); + v[ 52] = (v[ 53] << 31) | (v[ 52] >> 1); + v[ 53] = (v[ 54] << 31) | (v[ 53] >> 1); + v[ 54] = (v[ 55] << 31) | (v[ 54] >> 1); + v[ 55] = (v[ 56] << 31) | (v[ 55] >> 1); + v[ 56] = (v[ 57] << 31) | (v[ 56] >> 1); + v[ 57] = (v[ 58] << 31) | (v[ 57] >> 1); + v[ 58] = (v[ 59] << 31) | (v[ 58] >> 1); + v[ 59] = (v[ 60] << 31) | (v[ 59] >> 1); + v[ 60] = (v[ 61] << 31) | (v[ 60] >> 1); + v[ 61] = (v[ 62] << 31) | (v[ 61] >> 1); + v[ 62] = (v[ 63] << 31) | (v[ 62] >> 1); + v[ 63] = (v[ 64] << 31) | (v[ 63] >> 1); + v[ 64] = (v[ 64] >> 1); + + // t = v + // + // t[ 0] = v[ 0]; t[ 1] = v[ 1]; t[ 2] = v[ 2]; t[ 3] = v[ 3]; + // t[ 4] = v[ 4]; t[ 5] = v[ 5]; t[ 6] = v[ 6]; t[ 7] = v[ 7]; + // t[ 8] = v[ 8]; t[ 9] = v[ 9]; t[10] = v[10]; t[11] = v[11]; + // t[12] = v[12]; t[13] = v[13]; t[14] = v[14]; t[15] = v[15]; + // t[16] = v[16]; t[17] = v[17]; t[18] = v[18]; t[19] = v[19]; + // t[20] = v[20]; t[21] = v[21]; t[22] = v[22]; t[23] = v[23]; + // t[24] = v[24]; t[25] = v[25]; t[26] = v[26]; t[27] = v[27]; + // t[28] = v[28]; t[29] = v[29]; t[30] = v[30]; t[31] = v[31]; + // t[32] = v[32]; t[33] = v[33]; t[34] = v[34]; t[35] = v[35]; + // t[36] = v[36]; t[37] = v[37]; t[38] = v[38]; t[39] = v[39]; + // t[40] = v[40]; t[41] = v[41]; t[42] = v[42]; t[43] = v[43]; + // t[44] = v[44]; t[45] = v[45]; t[46] = v[46]; t[47] = v[47]; + // t[48] = v[48]; t[49] = v[49]; t[50] = v[50]; t[51] = v[51]; + // t[52] = v[52]; t[53] = v[53]; t[54] = v[54]; t[55] = v[55]; + // t[56] = v[56]; t[57] = v[57]; t[58] = v[58]; t[59] = v[59]; + // t[60] = v[60]; t[61] = v[61]; t[62] = v[62]; t[63] = v[63]; + // t[60] = v[60]; t[61] = v[61]; t[62] = v[62]; t[63] = v[63]; + // t[64] = v[64]; + + + // v += r (only r[64] has the value "1") + + // we can skip the sum with r[x], since most are set to 0 + + // v[ 0] += r[ 0]; v[ 1] += r[ 1]; v[ 2] += r[ 2]; v[ 3] += r[ 3]; + // v[ 4] += r[ 4]; v[ 5] += r[ 5]; v[ 6] += r[ 6]; v[ 7] += r[ 7]; + // v[ 8] += r[ 8]; v[ 9] += r[ 9]; v[10] += r[10]; v[11] += r[11]; + // v[12] += r[12]; v[13] += r[13]; v[14] += r[14]; v[15] += r[15]; + // v[16] += r[16]; v[17] += r[17]; v[18] += r[18]; v[19] += r[19]; + // v[20] += r[20]; v[21] += r[21]; v[22] += r[22]; v[23] += r[23]; + // v[24] += r[24]; v[25] += r[25]; v[26] += r[26]; v[27] += r[27]; + // v[28] += r[28]; v[29] += r[29]; v[30] += r[30]; v[31] += r[31]; + // v[32] += r[32]; v[33] += r[33]; v[34] += r[34]; v[35] += r[35]; + // v[36] += r[36]; v[37] += r[37]; v[38] += r[38]; v[39] += r[39]; + // v[40] += r[40]; v[41] += r[41]; v[42] += r[42]; v[43] += r[43]; + // v[44] += r[44]; v[45] += r[45]; v[46] += r[46]; v[47] += r[47]; + // v[48] += r[48]; v[49] += r[49]; v[50] += r[50]; v[51] += r[51]; + // v[52] += r[52]; v[53] += r[53]; v[54] += r[54]; v[55] += r[55]; + // v[56] += r[56]; v[57] += r[57]; v[58] += r[58]; v[59] += r[59]; + // v[60] += r[60]; v[61] += r[61]; v[62] += r[62]; v[63] += r[63]; + // v[64] += r[64]; + + v[64] += 1; // hard-coded "r" value (r[64]) + + // take care of the carry for the addition above (i.e check for "overflows") + + // for (u32 i = 1; i < 65; i++) v[i] += v[i - 1] < t[i - 1]; + + // v[ 1] += v[ 0] < t[ 0]; + // v[ 2] += v[ 1] < t[ 1]; + // v[ 3] += v[ 2] < t[ 2]; + // v[ 4] += v[ 3] < t[ 3]; + // v[ 5] += v[ 4] < t[ 4]; + // v[ 6] += v[ 5] < t[ 5]; + // v[ 7] += v[ 6] < t[ 6]; + // v[ 8] += v[ 7] < t[ 7]; + // v[ 9] += v[ 8] < t[ 8]; + // v[10] += v[ 9] < t[ 9]; + // v[11] += v[10] < t[10]; + // v[12] += v[11] < t[11]; + // v[13] += v[12] < t[12]; + // v[14] += v[13] < t[13]; + // v[15] += v[14] < t[14]; + // v[16] += v[15] < t[15]; + // v[17] += v[16] < t[16]; + // v[18] += v[17] < t[17]; + // v[19] += v[18] < t[18]; + // v[20] += v[19] < t[19]; + // v[21] += v[20] < t[20]; + // v[22] += v[21] < t[21]; + // v[23] += v[22] < t[22]; + // v[24] += v[23] < t[23]; + // v[25] += v[24] < t[24]; + // v[26] += v[25] < t[25]; + // v[27] += v[26] < t[26]; + // v[28] += v[27] < t[27]; + // v[29] += v[28] < t[28]; + // v[30] += v[29] < t[29]; + // v[31] += v[30] < t[30]; + // v[32] += v[31] < t[31]; + // v[33] += v[32] < t[32]; + // v[34] += v[33] < t[33]; + // v[35] += v[34] < t[34]; + // v[36] += v[35] < t[35]; + // v[37] += v[36] < t[36]; + // v[38] += v[37] < t[37]; + // v[39] += v[38] < t[38]; + // v[40] += v[39] < t[39]; + // v[41] += v[40] < t[40]; + // v[42] += v[41] < t[41]; + // v[43] += v[42] < t[42]; + // v[44] += v[43] < t[43]; + // v[45] += v[44] < t[44]; + // v[46] += v[45] < t[45]; + // v[47] += v[46] < t[46]; + // v[48] += v[47] < t[47]; + // v[49] += v[48] < t[48]; + // v[50] += v[49] < t[49]; + // v[51] += v[50] < t[50]; + // v[52] += v[51] < t[51]; + // v[53] += v[52] < t[52]; + // v[54] += v[53] < t[53]; + // v[55] += v[54] < t[54]; + // v[56] += v[55] < t[55]; + // v[57] += v[56] < t[56]; + // v[58] += v[57] < t[57]; + // v[59] += v[58] < t[58]; + // v[60] += v[59] < t[59]; + // v[61] += v[60] < t[60]; + // v[62] += v[61] < t[61]; + // v[63] += v[62] < t[62]; + // v[64] += v[63] < t[63]; + } + } + + // v is fact and we skip v[64] in output ??? + + v[64] = 0; +} + +// r[ 64], the result, r = (a << bits) % m +// a[ 64], the 256 byte number we want to convert +// m[128], modulo + +DECLSPEC void to_montgomery (PRIVATE_AS u32 *r, PRIVATE_AS const u32 *a, PRIVATE_AS const u32 *m) +{ + u32 t[128] = { 0 }; + + // shift by "bits" bits, i.e. 256 bytes (64 u32) + + // for (u32 i = 0; i < 64; i++) t[i + 64] = a[i]; + + t[ 64] = a[ 0]; t[ 65] = a[ 1]; t[ 66] = a[ 2]; t[ 67] = a[ 3]; + t[ 68] = a[ 4]; t[ 69] = a[ 5]; t[ 70] = a[ 6]; t[ 71] = a[ 7]; + t[ 72] = a[ 8]; t[ 73] = a[ 9]; t[ 74] = a[10]; t[ 75] = a[11]; + t[ 76] = a[12]; t[ 77] = a[13]; t[ 78] = a[14]; t[ 79] = a[15]; + t[ 80] = a[16]; t[ 81] = a[17]; t[ 82] = a[18]; t[ 83] = a[19]; + t[ 84] = a[20]; t[ 85] = a[21]; t[ 86] = a[22]; t[ 87] = a[23]; + t[ 88] = a[24]; t[ 89] = a[25]; t[ 90] = a[26]; t[ 91] = a[27]; + t[ 92] = a[28]; t[ 93] = a[29]; t[ 94] = a[30]; t[ 95] = a[31]; + t[ 96] = a[32]; t[ 97] = a[33]; t[ 98] = a[34]; t[ 99] = a[35]; + t[100] = a[36]; t[101] = a[37]; t[102] = a[38]; t[103] = a[39]; + t[104] = a[40]; t[105] = a[41]; t[106] = a[42]; t[107] = a[43]; + t[108] = a[44]; t[109] = a[45]; t[110] = a[46]; t[111] = a[47]; + t[112] = a[48]; t[113] = a[49]; t[114] = a[50]; t[115] = a[51]; + t[116] = a[52]; t[117] = a[53]; t[118] = a[54]; t[119] = a[55]; + t[120] = a[56]; t[121] = a[57]; t[122] = a[58]; t[123] = a[59]; + t[124] = a[60]; t[125] = a[61]; t[126] = a[62]; t[127] = a[63]; + + mod_4096 (t, m); + + // for (u32 i = 0; i < 64; i++) r[i] = t[i]; + + r[ 0] = t[ 0]; r[ 1] = t[ 1]; r[ 2] = t[ 2]; r[ 3] = t[ 3]; + r[ 4] = t[ 4]; r[ 5] = t[ 5]; r[ 6] = t[ 6]; r[ 7] = t[ 7]; + r[ 8] = t[ 8]; r[ 9] = t[ 9]; r[10] = t[10]; r[11] = t[11]; + r[12] = t[12]; r[13] = t[13]; r[14] = t[14]; r[15] = t[15]; + r[16] = t[16]; r[17] = t[17]; r[18] = t[18]; r[19] = t[19]; + r[20] = t[20]; r[21] = t[21]; r[22] = t[22]; r[23] = t[23]; + r[24] = t[24]; r[25] = t[25]; r[26] = t[26]; r[27] = t[27]; + r[28] = t[28]; r[29] = t[29]; r[30] = t[30]; r[31] = t[31]; + r[32] = t[32]; r[33] = t[33]; r[34] = t[34]; r[35] = t[35]; + r[36] = t[36]; r[37] = t[37]; r[38] = t[38]; r[39] = t[39]; + r[40] = t[40]; r[41] = t[41]; r[42] = t[42]; r[43] = t[43]; + r[44] = t[44]; r[45] = t[45]; r[46] = t[46]; r[47] = t[47]; + r[48] = t[48]; r[49] = t[49]; r[50] = t[50]; r[51] = t[51]; + r[52] = t[52]; r[53] = t[53]; r[54] = t[54]; r[55] = t[55]; + r[56] = t[56]; r[57] = t[57]; r[58] = t[58]; r[59] = t[59]; + r[60] = t[60]; r[61] = t[61]; r[62] = t[62]; r[63] = t[63]; +} + +// r [ 64], result, r = a * (rinv * 2) % m +// a [ 64], the 256 byte number we want to convert back +// m [128], modulo +// rinv[ 64], r', our euclidean inverse r + +DECLSPEC void from_montgomery (PRIVATE_AS u32 *r, PRIVATE_AS const u32* a, PRIVATE_AS const u32 *m, PRIVATE_AS const u32 *rinv) +{ + // (a * (rinv * 2)) % m = (a * rinv2) % m + + // rinv2 = 2 * rinv (or rinv << 1) + + u32 rinv2[128] = { 0 }; + + // rinv2[64] = rinv[63] >> 31; + + // for (int i = 63; i > 0; i--) rinv2[i] = (rinv[i - 1] >> 31) | (rinv[i] << 1); + + // rinv2[0] = rinv[0] << 1; + + rinv2[64] = (rinv[63] >> 31); + rinv2[63] = (rinv[62] >> 31) | (rinv[63] << 1); + rinv2[62] = (rinv[61] >> 31) | (rinv[62] << 1); + rinv2[61] = (rinv[60] >> 31) | (rinv[61] << 1); + rinv2[60] = (rinv[59] >> 31) | (rinv[60] << 1); + rinv2[59] = (rinv[58] >> 31) | (rinv[59] << 1); + rinv2[58] = (rinv[57] >> 31) | (rinv[58] << 1); + rinv2[57] = (rinv[56] >> 31) | (rinv[57] << 1); + rinv2[56] = (rinv[55] >> 31) | (rinv[56] << 1); + rinv2[55] = (rinv[54] >> 31) | (rinv[55] << 1); + rinv2[54] = (rinv[53] >> 31) | (rinv[54] << 1); + rinv2[53] = (rinv[52] >> 31) | (rinv[53] << 1); + rinv2[52] = (rinv[51] >> 31) | (rinv[52] << 1); + rinv2[51] = (rinv[50] >> 31) | (rinv[51] << 1); + rinv2[50] = (rinv[49] >> 31) | (rinv[50] << 1); + rinv2[49] = (rinv[48] >> 31) | (rinv[49] << 1); + rinv2[48] = (rinv[47] >> 31) | (rinv[48] << 1); + rinv2[47] = (rinv[46] >> 31) | (rinv[47] << 1); + rinv2[46] = (rinv[45] >> 31) | (rinv[46] << 1); + rinv2[45] = (rinv[44] >> 31) | (rinv[45] << 1); + rinv2[44] = (rinv[43] >> 31) | (rinv[44] << 1); + rinv2[43] = (rinv[42] >> 31) | (rinv[43] << 1); + rinv2[42] = (rinv[41] >> 31) | (rinv[42] << 1); + rinv2[41] = (rinv[40] >> 31) | (rinv[41] << 1); + rinv2[40] = (rinv[39] >> 31) | (rinv[40] << 1); + rinv2[39] = (rinv[38] >> 31) | (rinv[39] << 1); + rinv2[38] = (rinv[37] >> 31) | (rinv[38] << 1); + rinv2[37] = (rinv[36] >> 31) | (rinv[37] << 1); + rinv2[36] = (rinv[35] >> 31) | (rinv[36] << 1); + rinv2[35] = (rinv[34] >> 31) | (rinv[35] << 1); + rinv2[34] = (rinv[33] >> 31) | (rinv[34] << 1); + rinv2[33] = (rinv[32] >> 31) | (rinv[33] << 1); + rinv2[32] = (rinv[31] >> 31) | (rinv[32] << 1); + rinv2[31] = (rinv[30] >> 31) | (rinv[31] << 1); + rinv2[30] = (rinv[29] >> 31) | (rinv[30] << 1); + rinv2[29] = (rinv[28] >> 31) | (rinv[29] << 1); + rinv2[28] = (rinv[27] >> 31) | (rinv[28] << 1); + rinv2[27] = (rinv[26] >> 31) | (rinv[27] << 1); + rinv2[26] = (rinv[25] >> 31) | (rinv[26] << 1); + rinv2[25] = (rinv[24] >> 31) | (rinv[25] << 1); + rinv2[24] = (rinv[23] >> 31) | (rinv[24] << 1); + rinv2[23] = (rinv[22] >> 31) | (rinv[23] << 1); + rinv2[22] = (rinv[21] >> 31) | (rinv[22] << 1); + rinv2[21] = (rinv[20] >> 31) | (rinv[21] << 1); + rinv2[20] = (rinv[19] >> 31) | (rinv[20] << 1); + rinv2[19] = (rinv[18] >> 31) | (rinv[19] << 1); + rinv2[18] = (rinv[17] >> 31) | (rinv[18] << 1); + rinv2[17] = (rinv[16] >> 31) | (rinv[17] << 1); + rinv2[16] = (rinv[15] >> 31) | (rinv[16] << 1); + rinv2[15] = (rinv[14] >> 31) | (rinv[15] << 1); + rinv2[14] = (rinv[13] >> 31) | (rinv[14] << 1); + rinv2[13] = (rinv[12] >> 31) | (rinv[13] << 1); + rinv2[12] = (rinv[11] >> 31) | (rinv[12] << 1); + rinv2[11] = (rinv[10] >> 31) | (rinv[11] << 1); + rinv2[10] = (rinv[ 9] >> 31) | (rinv[10] << 1); + rinv2[ 9] = (rinv[ 8] >> 31) | (rinv[ 9] << 1); + rinv2[ 8] = (rinv[ 7] >> 31) | (rinv[ 8] << 1); + rinv2[ 7] = (rinv[ 6] >> 31) | (rinv[ 7] << 1); + rinv2[ 6] = (rinv[ 5] >> 31) | (rinv[ 6] << 1); + rinv2[ 5] = (rinv[ 4] >> 31) | (rinv[ 5] << 1); + rinv2[ 4] = (rinv[ 3] >> 31) | (rinv[ 4] << 1); + rinv2[ 3] = (rinv[ 2] >> 31) | (rinv[ 3] << 1); + rinv2[ 2] = (rinv[ 1] >> 31) | (rinv[ 2] << 1); + rinv2[ 1] = (rinv[ 0] >> 31) | (rinv[ 1] << 1); + rinv2[ 0] = (rinv[ 0] << 1); + + mod_4096 (rinv2, m); + + + // t = a * rinv2 + + u32 t[128] = { 0 }; + + mul (t, a, rinv2); + + + // t %= m + + mod_4096 (t, m); + + // for (u32 i = 0; i < 64; i++) r[i] = t[i]; + + r[ 0] = t[ 0]; r[ 1] = t[ 1]; r[ 2] = t[ 2]; r[ 3] = t[ 3]; + r[ 4] = t[ 4]; r[ 5] = t[ 5]; r[ 6] = t[ 6]; r[ 7] = t[ 7]; + r[ 8] = t[ 8]; r[ 9] = t[ 9]; r[10] = t[10]; r[11] = t[11]; + r[12] = t[12]; r[13] = t[13]; r[14] = t[14]; r[15] = t[15]; + r[16] = t[16]; r[17] = t[17]; r[18] = t[18]; r[19] = t[19]; + r[20] = t[20]; r[21] = t[21]; r[22] = t[22]; r[23] = t[23]; + r[24] = t[24]; r[25] = t[25]; r[26] = t[26]; r[27] = t[27]; + r[28] = t[28]; r[29] = t[29]; r[30] = t[30]; r[31] = t[31]; + r[32] = t[32]; r[33] = t[33]; r[34] = t[34]; r[35] = t[35]; + r[36] = t[36]; r[37] = t[37]; r[38] = t[38]; r[39] = t[39]; + r[40] = t[40]; r[41] = t[41]; r[42] = t[42]; r[43] = t[43]; + r[44] = t[44]; r[45] = t[45]; r[46] = t[46]; r[47] = t[47]; + r[48] = t[48]; r[49] = t[49]; r[50] = t[50]; r[51] = t[51]; + r[52] = t[52]; r[53] = t[53]; r[54] = t[54]; r[55] = t[55]; + r[56] = t[56]; r[57] = t[57]; r[58] = t[58]; r[59] = t[59]; + r[60] = t[60]; r[61] = t[61]; r[62] = t[62]; r[63] = t[63]; +} diff --git a/OpenCL/inc_bignum_operations.h b/OpenCL/inc_bignum_operations.h new file mode 100644 index 000000000..ff20bc257 --- /dev/null +++ b/OpenCL/inc_bignum_operations.h @@ -0,0 +1,22 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +#ifndef _INC_BIGNUM_OPERATIONS_H +#define _INC_BIGNUM_OPERATIONS_H + +DECLSPEC void mod_4096 (PRIVATE_AS u32 *n, PRIVATE_AS const u32 *m); +DECLSPEC void mul (PRIVATE_AS u32 *r, PRIVATE_AS const u32 *x, PRIVATE_AS const u32 *y); +DECLSPEC void mul_masked (PRIVATE_AS u32 *r, PRIVATE_AS const u32 *x, PRIVATE_AS const u32 *y); +DECLSPEC void mul_mod (PRIVATE_AS u32 *x, PRIVATE_AS const u32 *y, PRIVATE_AS const u32 *m, PRIVATE_AS const u32 *fact); + +DECLSPEC void pow_mod_precomp_g (PRIVATE_AS u32 *r, PRIVATE_AS const u32 *b_pre, PRIVATE_AS const u32 *y, PRIVATE_AS const u32 *m, PRIVATE_AS const u32 *fact); +DECLSPEC void pow_mod (PRIVATE_AS u32 *r, PRIVATE_AS u32 *x, PRIVATE_AS const u32 *y, PRIVATE_AS const u32 *m, PRIVATE_AS const u32 *fact); + +DECLSPEC void simple_euclidean_gcd (PRIVATE_AS u32 *u, PRIVATE_AS u32 *v, PRIVATE_AS const u32 *m); + +DECLSPEC void to_montgomery (PRIVATE_AS u32 *r, PRIVATE_AS const u32 *a, PRIVATE_AS const u32 *m); +DECLSPEC void from_montgomery (PRIVATE_AS u32 *r, PRIVATE_AS const u32* a, PRIVATE_AS const u32 *m, PRIVATE_AS const u32 *rinv); + +#endif // _INC_BIGNUM_OPERATIONS_H diff --git a/OpenCL/inc_radmin3_constants.h b/OpenCL/inc_radmin3_constants.h new file mode 100644 index 000000000..62d7e2388 --- /dev/null +++ b/OpenCL/inc_radmin3_constants.h @@ -0,0 +1,73 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +#ifndef _INC_RADMIN3_CONSTANTS_H +#define _INC_RADMIN3_CONSTANTS_H + +#define PRECOMP_BITS 10 +#define PRECOMP_VECLEN 160 // SHA1 hash (in bits, 20 bytes) +#define PRECOMP_SLOTS (PRECOMP_VECLEN / PRECOMP_BITS) +#define PRECOMP_ENTRIES (1 << PRECOMP_BITS) // 2 ^ PRECOMP_BITS +#define PRECOMP_ENTRYLEN 256 / 4 // data len in u32 (therefore divided by 4 bytes) +#define PRECOMP_DATALEN (PRECOMP_ENTRIES - 1) * PRECOMP_SLOTS * PRECOMP_ENTRYLEN +#define PRECOMP_MASK 0xffffffff >> (32 - PRECOMP_BITS) + +CONSTANT_VK u32 RADMIN3_M[128] = +{ + 0x740a682f, 0x7b379fd7, 0x4af5b8d3, 0xd70b2bca, 0xd7f51544, 0xe5a4ccf5, + 0x24c9e5a2, 0x30ebc4fd, 0x40a71f19, 0xfaa0c43b, 0x2bfe7dba, 0xac9278b9, + 0x97245bd2, 0x9c4d18f6, 0x89dfd06c, 0x6091acea, 0x8ba2332d, 0x2cfb3b52, + 0x5350dec7, 0x34fd0fd4, 0x125db8ad, 0x149167ea, 0x01f560dd, 0x0fdbaf46, + 0x6b0c6bdb, 0xc5473fdb, 0x36fc4f80, 0xb8f90dea, 0x48598f2c, 0x590387f5, + 0xe63ec4b3, 0x021c881a, 0xa0d18a79, 0xbdd952f7, 0xe99e6127, 0xedbba5ea, + 0x102fc6ce, 0xf8bdd56a, 0x66261cd3, 0xdb701022, 0x0db4db96, 0x2d7cdf81, + 0x2cbdd4ac, 0xca9f0806, 0x5503342e, 0x81a32da9, 0xfd0f9e29, 0xffd8df96, + 0x4dd09947, 0xef2fcd2a, 0x0dfe4178, 0x86b0dbe6, 0x92cf3357, 0x8e0c3bff, + 0x53e36fb6, 0xcafca04a, 0x3e2cec58, 0x13b406f2, 0xd4304b01, 0xaec0b980, + 0x587d8f77, 0x5d02f19d, 0x0f891dfd, 0x9847fc7e, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, +}; + +CONSTANT_VK u32 RADMIN3_R[64] = +{ + 0x8bf597d1, 0x84c86028, 0xb50a472c, 0x28f4d435, 0x280aeabb, 0x1a5b330a, + 0xdb361a5d, 0xcf143b02, 0xbf58e0e6, 0x055f3bc4, 0xd4018245, 0x536d8746, + 0x68dba42d, 0x63b2e709, 0x76202f93, 0x9f6e5315, 0x745dccd2, 0xd304c4ad, + 0xacaf2138, 0xcb02f02b, 0xeda24752, 0xeb6e9815, 0xfe0a9f22, 0xf02450b9, + 0x94f39424, 0x3ab8c024, 0xc903b07f, 0x4706f215, 0xb7a670d3, 0xa6fc780a, + 0x19c13b4c, 0xfde377e5, 0x5f2e7586, 0x4226ad08, 0x16619ed8, 0x12445a15, + 0xefd03931, 0x07422a95, 0x99d9e32c, 0x248fefdd, 0xf24b2469, 0xd283207e, + 0xd3422b53, 0x3560f7f9, 0xaafccbd1, 0x7e5cd256, 0x02f061d6, 0x00272069, + 0xb22f66b8, 0x10d032d5, 0xf201be87, 0x794f2419, 0x6d30cca8, 0x71f3c400, + 0xac1c9049, 0x35035fb5, 0xc1d313a7, 0xec4bf90d, 0x2bcfb4fe, 0x513f467f, + 0xa7827088, 0xa2fd0e62, 0xf076e202, 0x67b80381, +}; + +CONSTANT_VK u32 RADMIN3_FACT[64] = +{ + 0xfdac2131, 0x56654c77, 0x3e4a8d19, 0x5a1a861c, 0x7906adbd, 0x89d806d0, + 0x26a253a8, 0xcb8c8be2, 0xb62eb887, 0xa2364fb0, 0xbff19140, 0x79aa7301, + 0x78ee9576, 0x554ba4e6, 0xa4f3efd8, 0x7637a767, 0xb413facb, 0xf333933b, + 0xb50a03bc, 0xf100305f, 0x0e3d5ca2, 0x57c949a8, 0x4e73b61d, 0xced3203c, + 0x578439b1, 0x37ed2593, 0x8fb7c6d4, 0x7cb1f2ab, 0xc035c148, 0xd9defd19, + 0x04bf254a, 0xcb970a46, 0xf2a7960c, 0x69a651f6, 0x6adc8010, 0x9e05042d, + 0x4e56ef9d, 0x595b31c9, 0x3f455d15, 0xf703c3e3, 0x3f164848, 0xe4bd5f3a, + 0x62101b16, 0x01138387, 0xf346380e, 0x2358d5cd, 0xc839e279, 0xa31123b1, + 0x45f240ea, 0xe39f2352, 0x91e590cb, 0x6d11e378, 0x04e89126, 0x904b2390, + 0xa11b556d, 0xb6d7dcfb, 0x6e826c53, 0x1392b6e4, 0xa76eefe9, 0x6c770e4d, + 0x0312ac4d, 0x73aa4ff4, 0x39ad3b1f, 0xfad6fce6, +}; + +#endif // _INC_RADMIN3_CONSTANTS_H diff --git a/OpenCL/m29200_a0-optimized.cl b/OpenCL/m29200_a0-optimized.cl new file mode 100644 index 000000000..3c763de1f --- /dev/null +++ b/OpenCL/m29200_a0-optimized.cl @@ -0,0 +1,1086 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + * This algorithm for password-storage for the Radmin 3 software was analyzed and made public by synacktiv: + * https://www.synacktiv.com/publications/cracking-radmin-server-3-passwords.html + */ + + +#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include M2S(INCLUDE_PATH/inc_vendor.h) +#include M2S(INCLUDE_PATH/inc_types.h) +#include M2S(INCLUDE_PATH/inc_platform.cl) +#include M2S(INCLUDE_PATH/inc_common.cl) +#include M2S(INCLUDE_PATH/inc_rp_optimized.h) +#include M2S(INCLUDE_PATH/inc_rp_optimized.cl) +#include M2S(INCLUDE_PATH/inc_simd.cl) +#include M2S(INCLUDE_PATH/inc_hash_sha1.cl) +#include M2S(INCLUDE_PATH/inc_bignum_operations.cl) +#include M2S(INCLUDE_PATH/inc_radmin3_constants.h) +#endif + +typedef struct radmin3 +{ + u32 user[64]; + u32 user_len; + + u32 pre[PRECOMP_DATALEN]; // 38400 for PRECOMP_BITS = 4 + +} radmin3_t; + +KERNEL_FQ void m29200_m04 (KERN_ATTR_RULES_ESALT (radmin3_t)) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + const u64 gid = get_global_id (0); + const u64 lsz = get_local_size (0); + + + /** + * cache constant values to shared memory + */ + + LOCAL_VK u32 m[64]; + LOCAL_VK u32 r[64]; + LOCAL_VK u32 fact[64]; + + for (u32 i = lid; i < 64; i += lsz) + { + m[i] = RADMIN3_M[i]; + r[i] = RADMIN3_R[i]; + fact[i] = RADMIN3_FACT[i]; + } + + SYNC_THREADS (); + + if (gid >= GID_CNT) return; + + u32 pw_buf0[4]; + u32 pw_buf1[4]; + + pw_buf0[0] = pws[gid].i[0]; + pw_buf0[1] = pws[gid].i[1]; + pw_buf0[2] = pws[gid].i[2]; + pw_buf0[3] = pws[gid].i[3]; + pw_buf1[0] = pws[gid].i[4]; + pw_buf1[1] = pws[gid].i[5]; + pw_buf1[2] = pws[gid].i[6]; + pw_buf1[3] = pws[gid].i[7]; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * salt + */ + + u32 salt1_buf0[4]; + u32 salt1_buf1[4]; + u32 salt1_buf2[4]; + u32 salt1_buf3[4]; + + salt1_buf0[0] = salt_bufs[SALT_POS_HOST].salt_buf[0]; + salt1_buf0[1] = salt_bufs[SALT_POS_HOST].salt_buf[1]; + salt1_buf0[2] = salt_bufs[SALT_POS_HOST].salt_buf[2]; + salt1_buf0[3] = salt_bufs[SALT_POS_HOST].salt_buf[3]; + salt1_buf1[0] = salt_bufs[SALT_POS_HOST].salt_buf[4]; + salt1_buf1[1] = salt_bufs[SALT_POS_HOST].salt_buf[5]; + salt1_buf1[2] = salt_bufs[SALT_POS_HOST].salt_buf[6]; + salt1_buf1[3] = salt_bufs[SALT_POS_HOST].salt_buf[7]; + salt1_buf2[0] = 0; + salt1_buf2[1] = 0; + salt1_buf2[2] = 0; + salt1_buf2[3] = 0; + salt1_buf3[0] = 0; + salt1_buf3[1] = 0; + salt1_buf3[2] = 0; + salt1_buf3[3] = 0; + + u32 salt2_buf0[4]; + u32 salt2_buf1[4]; + u32 salt2_buf2[4]; + u32 salt2_buf3[4]; + + salt2_buf0[0] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET_HOST].user[ 0]); + salt2_buf0[1] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET_HOST].user[ 1]); + salt2_buf0[2] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET_HOST].user[ 2]); + salt2_buf0[3] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET_HOST].user[ 3]); + salt2_buf1[0] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET_HOST].user[ 4]); + salt2_buf1[1] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET_HOST].user[ 5]); + salt2_buf1[2] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET_HOST].user[ 6]); + salt2_buf1[3] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET_HOST].user[ 7]); + salt2_buf2[0] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET_HOST].user[ 8]); + salt2_buf2[1] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET_HOST].user[ 9]); + salt2_buf2[2] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET_HOST].user[10]); + salt2_buf2[3] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET_HOST].user[11]); + salt2_buf3[0] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET_HOST].user[12]); + salt2_buf3[1] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET_HOST].user[13]); + salt2_buf3[2] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET_HOST].user[14]); + salt2_buf3[3] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET_HOST].user[15]); + + const u32 salt2_len = esalt_bufs[DIGESTS_OFFSET_HOST].user_len; + + for (u32 il_pos = 0; il_pos < IL_CNT; il_pos += VECT_SIZE) + { + u32x w0[4] = { 0 }; + u32x w1[4] = { 0 }; + u32x w2[4] = { 0 }; + u32x w3[4] = { 0 }; + + const u32x out_len = apply_rules_vect_optimized (pw_buf0, pw_buf1, pw_len, rules_buf, il_pos, w0, w1); + + make_utf16le (w1, w2, w3); + make_utf16le (w0, w0, w1); + + const u32x out_len2 = out_len * 2; + + /** + * prepend salt + */ + + const u32x out_salt_len = out_len2 + salt2_len; + + switch_buffer_by_offset_le_VV (w0, w1, w2, w3, salt2_len); + + w0[0] |= salt2_buf0[0]; + w0[1] |= salt2_buf0[1]; + w0[2] |= salt2_buf0[2]; + w0[3] |= salt2_buf0[3]; + w1[0] |= salt2_buf1[0]; + w1[1] |= salt2_buf1[1]; + w1[2] |= salt2_buf1[2]; + w1[3] |= salt2_buf1[3]; + w2[0] |= salt2_buf2[0]; + w2[1] |= salt2_buf2[1]; + w2[2] |= salt2_buf2[2]; + w2[3] |= salt2_buf2[3]; + w3[0] |= salt2_buf3[0]; + w3[1] |= salt2_buf3[1]; + w3[2] |= salt2_buf3[2]; + w3[3] |= salt2_buf3[3]; + + append_0x80_4x4_VV (w0, w1, w2, w3, out_salt_len); + + /** + * sha1 + */ + + u32x w0_t = hc_swap32 (w0[0]); + u32x w1_t = hc_swap32 (w0[1]); + u32x w2_t = hc_swap32 (w0[2]); + u32x w3_t = hc_swap32 (w0[3]); + u32x w4_t = hc_swap32 (w1[0]); + u32x w5_t = hc_swap32 (w1[1]); + u32x w6_t = hc_swap32 (w1[2]); + u32x w7_t = hc_swap32 (w1[3]); + u32x w8_t = hc_swap32 (w2[0]); + u32x w9_t = hc_swap32 (w2[1]); + u32x wa_t = hc_swap32 (w2[2]); + u32x wb_t = hc_swap32 (w2[3]); + u32x wc_t = hc_swap32 (w3[0]); + u32x wd_t = hc_swap32 (w3[1]); + u32x we_t = 0; + u32x wf_t = out_salt_len * 8; + + u32x a = SHA1M_A; + u32x b = SHA1M_B; + u32x c = SHA1M_C; + u32x d = SHA1M_D; + u32x e = SHA1M_E; + + #undef K + #define K SHA1C00 + + SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, w2_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, w4_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, w5_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, w6_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, w7_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, w8_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, w9_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, wa_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, wb_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, wc_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, wd_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, we_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3_t); + + #undef K + #define K SHA1C01 + + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w7_t); + + #undef K + #define K SHA1C02 + + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wb_t); + + #undef K + #define K SHA1C03 + + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t); + + a += make_u32x (SHA1M_A); + b += make_u32x (SHA1M_B); + c += make_u32x (SHA1M_C); + d += make_u32x (SHA1M_D); + e += make_u32x (SHA1M_E); + + w0_t = salt1_buf0[0]; + w1_t = salt1_buf0[1]; + w2_t = salt1_buf0[2]; + w3_t = salt1_buf0[3]; + w4_t = salt1_buf1[0]; + w5_t = salt1_buf1[1]; + w6_t = salt1_buf1[2]; + w7_t = salt1_buf1[3]; + w8_t = a; + w9_t = b; + wa_t = c; + wb_t = d; + wc_t = e; + wd_t = 0x80000000; + we_t = 0; + wf_t = (32 + 20) * 8; + + a = SHA1M_A; + b = SHA1M_B; + c = SHA1M_C; + d = SHA1M_D; + e = SHA1M_E; + + #undef K + #define K SHA1C00 + + SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, w2_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, w4_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, w5_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, w6_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, w7_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, w8_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, w9_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, wa_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, wb_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, wc_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, wd_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, we_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3_t); + + #undef K + #define K SHA1C01 + + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w7_t); + + #undef K + #define K SHA1C02 + + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wb_t); + + #undef K + #define K SHA1C03 + + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t); + + a += make_u32x (SHA1M_A); + b += make_u32x (SHA1M_B); + c += make_u32x (SHA1M_C); + d += make_u32x (SHA1M_D); + e += make_u32x (SHA1M_E); + + u32 exponent[5] = { 0 }; + + exponent[0] = e; + exponent[1] = d; + exponent[2] = c; + exponent[3] = b; + exponent[4] = a; + + u32 r_t[64] = + { + r[ 0], r[ 1], r[ 2], r[ 3], r[ 4], r[ 5], r[ 6], r[ 7], + r[ 8], r[ 9], r[10], r[11], r[12], r[13], r[14], r[15], + r[16], r[17], r[18], r[19], r[20], r[21], r[22], r[23], + r[24], r[25], r[26], r[27], r[28], r[29], r[30], r[31], + r[32], r[33], r[34], r[35], r[36], r[37], r[38], r[39], + r[40], r[41], r[42], r[43], r[44], r[45], r[46], r[47], + r[48], r[49], r[50], r[51], r[52], r[53], r[54], r[55], + r[56], r[57], r[58], r[59], r[60], r[61], r[62], r[63], + }; + + for (u32 i = 0, j = 0; i < PRECOMP_SLOTS; i += 1, j += PRECOMP_ENTRIES - 1) + { + const u32 div = (PRECOMP_BITS * i) / 32; // for 4 bits: (i / 8) + const u32 shift = (PRECOMP_BITS * i) % 32; // for 4 bits: (i % 8) * 4 + + // const + u32 cur_sel = (exponent[div] >> shift) & PRECOMP_MASK; // 0x0f == 0b1111 (4 bits) + + // working with non-divisible u32 (see PRECOMP_BITS): + + if (32 - shift < PRECOMP_BITS) + { + cur_sel |= (exponent[div + 1] << (32 - shift)) & PRECOMP_MASK; + } + + if (cur_sel == 0) continue; + + const u32 pre_idx = (j + cur_sel - 1) * PRECOMP_ENTRYLEN; // x * 64 is same as x << 6 + + // u32 p[64]; for (u32 i = 0; i < 64; i++) p[i] = esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + i]; + + const u32 p[64] = + { + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 0], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 1], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 2], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 3], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 4], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 5], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 6], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 7], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 8], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 9], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 10], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 11], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 12], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 13], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 14], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 15], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 16], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 17], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 18], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 19], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 20], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 21], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 22], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 23], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 24], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 25], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 26], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 27], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 28], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 29], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 30], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 31], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 32], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 33], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 34], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 35], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 36], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 37], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 38], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 39], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 40], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 41], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 42], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 43], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 44], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 45], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 46], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 47], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 48], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 49], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 50], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 51], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 52], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 53], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 54], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 55], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 56], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 57], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 58], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 59], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 60], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 61], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 62], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 63], + }; + + mul_mod (r_t, p, m, fact); // r = (r * pre[n]) % m + } + + const u32 r0 = r_t[0]; + const u32 r1 = r_t[1]; + const u32 r2 = r_t[2]; + const u32 r3 = r_t[3]; + + COMPARE_M_SIMD (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m29200_m08 (KERN_ATTR_RULES_ESALT (radmin3_t)) +{ +} + +KERNEL_FQ void m29200_m16 (KERN_ATTR_RULES_ESALT (radmin3_t)) +{ +} + +KERNEL_FQ void m29200_s04 (KERN_ATTR_RULES_ESALT (radmin3_t)) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + const u64 gid = get_global_id (0); + const u64 lsz = get_local_size (0); + + + /** + * cache constant values to shared memory + */ + + LOCAL_VK u32 m[64]; + LOCAL_VK u32 r[64]; + LOCAL_VK u32 fact[64]; + + for (u32 i = lid; i < 64; i += lsz) + { + m[i] = RADMIN3_M[i]; + r[i] = RADMIN3_R[i]; + fact[i] = RADMIN3_FACT[i]; + } + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R3] + }; + + /** + * base + */ + + SYNC_THREADS (); + + if (gid >= GID_CNT) return; + + u32 pw_buf0[4]; + u32 pw_buf1[4]; + + pw_buf0[0] = pws[gid].i[0]; + pw_buf0[1] = pws[gid].i[1]; + pw_buf0[2] = pws[gid].i[2]; + pw_buf0[3] = pws[gid].i[3]; + pw_buf1[0] = pws[gid].i[4]; + pw_buf1[1] = pws[gid].i[5]; + pw_buf1[2] = pws[gid].i[6]; + pw_buf1[3] = pws[gid].i[7]; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * salt + */ + + u32 salt1_buf0[4]; + u32 salt1_buf1[4]; + u32 salt1_buf2[4]; + u32 salt1_buf3[4]; + + salt1_buf0[0] = salt_bufs[SALT_POS_HOST].salt_buf[0]; + salt1_buf0[1] = salt_bufs[SALT_POS_HOST].salt_buf[1]; + salt1_buf0[2] = salt_bufs[SALT_POS_HOST].salt_buf[2]; + salt1_buf0[3] = salt_bufs[SALT_POS_HOST].salt_buf[3]; + salt1_buf1[0] = salt_bufs[SALT_POS_HOST].salt_buf[4]; + salt1_buf1[1] = salt_bufs[SALT_POS_HOST].salt_buf[5]; + salt1_buf1[2] = salt_bufs[SALT_POS_HOST].salt_buf[6]; + salt1_buf1[3] = salt_bufs[SALT_POS_HOST].salt_buf[7]; + salt1_buf2[0] = 0; + salt1_buf2[1] = 0; + salt1_buf2[2] = 0; + salt1_buf2[3] = 0; + salt1_buf3[0] = 0; + salt1_buf3[1] = 0; + salt1_buf3[2] = 0; + salt1_buf3[3] = 0; + + u32 salt2_buf0[4]; + u32 salt2_buf1[4]; + u32 salt2_buf2[4]; + u32 salt2_buf3[4]; + + salt2_buf0[0] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET_HOST].user[ 0]); + salt2_buf0[1] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET_HOST].user[ 1]); + salt2_buf0[2] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET_HOST].user[ 2]); + salt2_buf0[3] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET_HOST].user[ 3]); + salt2_buf1[0] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET_HOST].user[ 4]); + salt2_buf1[1] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET_HOST].user[ 5]); + salt2_buf1[2] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET_HOST].user[ 6]); + salt2_buf1[3] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET_HOST].user[ 7]); + salt2_buf2[0] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET_HOST].user[ 8]); + salt2_buf2[1] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET_HOST].user[ 9]); + salt2_buf2[2] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET_HOST].user[10]); + salt2_buf2[3] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET_HOST].user[11]); + salt2_buf3[0] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET_HOST].user[12]); + salt2_buf3[1] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET_HOST].user[13]); + salt2_buf3[2] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET_HOST].user[14]); + salt2_buf3[3] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET_HOST].user[15]); + + const u32 salt2_len = esalt_bufs[DIGESTS_OFFSET_HOST].user_len; + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < IL_CNT; il_pos += VECT_SIZE) + { + u32x w0[4] = { 0 }; + u32x w1[4] = { 0 }; + u32x w2[4] = { 0 }; + u32x w3[4] = { 0 }; + + const u32x out_len = apply_rules_vect_optimized (pw_buf0, pw_buf1, pw_len, rules_buf, il_pos, w0, w1); + + make_utf16le (w1, w2, w3); + make_utf16le (w0, w0, w1); + + const u32x out_len2 = out_len * 2; + /** + * prepend salt + */ + + const u32x out_salt_len = out_len2 + salt2_len; + + switch_buffer_by_offset_le_VV (w0, w1, w2, w3, salt2_len); + + w0[0] |= salt2_buf0[0]; + w0[1] |= salt2_buf0[1]; + w0[2] |= salt2_buf0[2]; + w0[3] |= salt2_buf0[3]; + w1[0] |= salt2_buf1[0]; + w1[1] |= salt2_buf1[1]; + w1[2] |= salt2_buf1[2]; + w1[3] |= salt2_buf1[3]; + w2[0] |= salt2_buf2[0]; + w2[1] |= salt2_buf2[1]; + w2[2] |= salt2_buf2[2]; + w2[3] |= salt2_buf2[3]; + w3[0] |= salt2_buf3[0]; + w3[1] |= salt2_buf3[1]; + w3[2] |= salt2_buf3[2]; + w3[3] |= salt2_buf3[3]; + + append_0x80_4x4_VV (w0, w1, w2, w3, out_salt_len); + + /** + * sha1 + */ + + u32x w0_t = hc_swap32 (w0[0]); + u32x w1_t = hc_swap32 (w0[1]); + u32x w2_t = hc_swap32 (w0[2]); + u32x w3_t = hc_swap32 (w0[3]); + u32x w4_t = hc_swap32 (w1[0]); + u32x w5_t = hc_swap32 (w1[1]); + u32x w6_t = hc_swap32 (w1[2]); + u32x w7_t = hc_swap32 (w1[3]); + u32x w8_t = hc_swap32 (w2[0]); + u32x w9_t = hc_swap32 (w2[1]); + u32x wa_t = hc_swap32 (w2[2]); + u32x wb_t = hc_swap32 (w2[3]); + u32x wc_t = hc_swap32 (w3[0]); + u32x wd_t = hc_swap32 (w3[1]); + u32x we_t = 0; + u32x wf_t = out_salt_len * 8; + + u32x a = SHA1M_A; + u32x b = SHA1M_B; + u32x c = SHA1M_C; + u32x d = SHA1M_D; + u32x e = SHA1M_E; + + #undef K + #define K SHA1C00 + + SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, w2_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, w4_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, w5_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, w6_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, w7_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, w8_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, w9_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, wa_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, wb_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, wc_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, wd_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, we_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3_t); + + #undef K + #define K SHA1C01 + + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w7_t); + + #undef K + #define K SHA1C02 + + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wb_t); + + #undef K + #define K SHA1C03 + + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t); + + a += make_u32x (SHA1M_A); + b += make_u32x (SHA1M_B); + c += make_u32x (SHA1M_C); + d += make_u32x (SHA1M_D); + e += make_u32x (SHA1M_E); + + w0_t = salt1_buf0[0]; + w1_t = salt1_buf0[1]; + w2_t = salt1_buf0[2]; + w3_t = salt1_buf0[3]; + w4_t = salt1_buf1[0]; + w5_t = salt1_buf1[1]; + w6_t = salt1_buf1[2]; + w7_t = salt1_buf1[3]; + w8_t = a; + w9_t = b; + wa_t = c; + wb_t = d; + wc_t = e; + wd_t = 0x80000000; + we_t = 0; + wf_t = (32 + 20) * 8; + + a = SHA1M_A; + b = SHA1M_B; + c = SHA1M_C; + d = SHA1M_D; + e = SHA1M_E; + + + #undef K + #define K SHA1C00 + + SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, w2_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, w4_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, w5_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, w6_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, w7_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, w8_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, w9_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, wa_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, wb_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, wc_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, wd_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, we_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3_t); + + #undef K + #define K SHA1C01 + + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w7_t); + + #undef K + #define K SHA1C02 + + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wb_t); + + #undef K + #define K SHA1C03 + + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t); + + a += make_u32x (SHA1M_A); + b += make_u32x (SHA1M_B); + c += make_u32x (SHA1M_C); + d += make_u32x (SHA1M_D); + e += make_u32x (SHA1M_E); + + u32 exponent[5] = { 0 }; + + exponent[0] = e; + exponent[1] = d; + exponent[2] = c; + exponent[3] = b; + exponent[4] = a; + + u32 r_t[64] = + { + r[ 0], r[ 1], r[ 2], r[ 3], r[ 4], r[ 5], r[ 6], r[ 7], + r[ 8], r[ 9], r[10], r[11], r[12], r[13], r[14], r[15], + r[16], r[17], r[18], r[19], r[20], r[21], r[22], r[23], + r[24], r[25], r[26], r[27], r[28], r[29], r[30], r[31], + r[32], r[33], r[34], r[35], r[36], r[37], r[38], r[39], + r[40], r[41], r[42], r[43], r[44], r[45], r[46], r[47], + r[48], r[49], r[50], r[51], r[52], r[53], r[54], r[55], + r[56], r[57], r[58], r[59], r[60], r[61], r[62], r[63], + }; + + for (u32 i = 0, j = 0; i < PRECOMP_SLOTS; i += 1, j += PRECOMP_ENTRIES - 1) + { + const u32 div = (PRECOMP_BITS * i) / 32; // for 4 bits: (i / 8) + const u32 shift = (PRECOMP_BITS * i) % 32; // for 4 bits: (i % 8) * 4 + + // const + u32 cur_sel = (exponent[div] >> shift) & PRECOMP_MASK; // 0x0f == 0b1111 (4 bits) + + // working with non-divisible u32 (see PRECOMP_BITS): + + if (32 - shift < PRECOMP_BITS) + { + cur_sel |= (exponent[div + 1] << (32 - shift)) & PRECOMP_MASK; + } + + if (cur_sel == 0) continue; + + const u32 pre_idx = (j + cur_sel - 1) * PRECOMP_ENTRYLEN; // x * 64 is same as x << 6 + + // u32 p[64]; for (u32 i = 0; i < 64; i++) p[i] = esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + i]; + + const u32 p[64] = + { + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 0], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 1], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 2], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 3], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 4], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 5], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 6], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 7], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 8], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 9], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 10], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 11], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 12], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 13], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 14], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 15], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 16], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 17], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 18], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 19], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 20], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 21], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 22], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 23], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 24], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 25], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 26], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 27], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 28], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 29], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 30], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 31], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 32], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 33], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 34], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 35], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 36], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 37], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 38], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 39], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 40], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 41], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 42], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 43], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 44], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 45], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 46], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 47], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 48], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 49], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 50], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 51], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 52], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 53], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 54], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 55], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 56], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 57], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 58], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 59], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 60], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 61], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 62], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 63], + }; + + mul_mod (r_t, p, m, fact); // r = (r * pre[n]) % m + } + + const u32 r0 = r_t[0]; + const u32 r1 = r_t[1]; + const u32 r2 = r_t[2]; + const u32 r3 = r_t[3]; + + COMPARE_S_SIMD (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m29200_s08 (KERN_ATTR_RULES_ESALT (radmin3_t)) +{ +} + +KERNEL_FQ void m29200_s16 (KERN_ATTR_RULES_ESALT (radmin3_t)) +{ +} diff --git a/OpenCL/m29200_a0-pure.cl b/OpenCL/m29200_a0-pure.cl new file mode 100644 index 000000000..1f540c3a5 --- /dev/null +++ b/OpenCL/m29200_a0-pure.cl @@ -0,0 +1,470 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + * This algorithm for password-storage for the Radmin 3 software was analyzed and made public by synacktiv: + * https://www.synacktiv.com/publications/cracking-radmin-server-3-passwords.html + */ + +//#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include M2S(INCLUDE_PATH/inc_vendor.h) +#include M2S(INCLUDE_PATH/inc_types.h) +#include M2S(INCLUDE_PATH/inc_platform.cl) +#include M2S(INCLUDE_PATH/inc_common.cl) +#include M2S(INCLUDE_PATH/inc_rp.h) +#include M2S(INCLUDE_PATH/inc_rp.cl) +#include M2S(INCLUDE_PATH/inc_scalar.cl) +#include M2S(INCLUDE_PATH/inc_hash_sha1.cl) +#include M2S(INCLUDE_PATH/inc_bignum_operations.cl) +#include M2S(INCLUDE_PATH/inc_radmin3_constants.h) +#endif + +typedef struct radmin3 +{ + u32 user[64]; + u32 user_len; + + u32 pre[PRECOMP_DATALEN]; // 38400 for PRECOMP_BITS = 4 + +} radmin3_t; + +KERNEL_FQ void m29200_mxx (KERN_ATTR_RULES_ESALT (radmin3_t)) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + const u64 gid = get_global_id (0); + const u64 lsz = get_local_size (0); + + + /** + * cache constant values to shared memory + */ + + LOCAL_VK u32 m[64]; + LOCAL_VK u32 r[64]; + LOCAL_VK u32 fact[64]; + + for (u32 i = lid; i < 64; i += lsz) + { + m[i] = RADMIN3_M[i]; + r[i] = RADMIN3_R[i]; + fact[i] = RADMIN3_FACT[i]; + } + + SYNC_THREADS (); + + if (gid >= GID_CNT) return; + + + /** + * base + */ + + // ctx0 with user + + sha1_ctx_t ctx0; + + sha1_init (&ctx0); + + sha1_update_global (&ctx0, esalt_bufs[DIGESTS_OFFSET_HOST].user, esalt_bufs[DIGESTS_OFFSET_HOST].user_len); + + + // ctx1 with main salt + + sha1_ctx_t ctx1; + + sha1_init (&ctx1); + + sha1_update_global (&ctx1, salt_bufs[SALT_POS_HOST].salt_buf, salt_bufs[SALT_POS_HOST].salt_len); + + + COPY_PW (pws[gid]); + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < IL_CNT; il_pos++) + { + pw_t tmp = PASTE_PW; + + tmp.pw_len = apply_rules (rules_buf[il_pos].cmds, tmp.i, tmp.pw_len); + + + // add password to the user name (and colon, included): + + sha1_ctx_t c0 = ctx0; + + sha1_update_utf16le_swap (&c0, tmp.i, tmp.pw_len); + + sha1_final (&c0); + + + // add first SHA1 result to main salt: + + sha1_ctx_t c1 = ctx1; + + u32 w0[4] = { 0 }; + u32 w1[4] = { 0 }; + u32 w2[4] = { 0 }; + u32 w3[4] = { 0 }; + + w0[0] = c0.h[0]; + w0[1] = c0.h[1]; + w0[2] = c0.h[2]; + w0[3] = c0.h[3]; + w1[0] = c0.h[4]; + + sha1_update_64 (&c1, w0, w1, w2, w3, 20); + + sha1_final (&c1); + + const u32 e[5] = { c1.h[4], c1.h[3], c1.h[2], c1.h[1], c1.h[0] }; + + // u32 r_t[64]; for (u32 i = 0; i < 64; i++) r_t[i] = r[i]; + + u32 r_t[64] = + { + r[ 0], r[ 1], r[ 2], r[ 3], r[ 4], r[ 5], r[ 6], r[ 7], + r[ 8], r[ 9], r[10], r[11], r[12], r[13], r[14], r[15], + r[16], r[17], r[18], r[19], r[20], r[21], r[22], r[23], + r[24], r[25], r[26], r[27], r[28], r[29], r[30], r[31], + r[32], r[33], r[34], r[35], r[36], r[37], r[38], r[39], + r[40], r[41], r[42], r[43], r[44], r[45], r[46], r[47], + r[48], r[49], r[50], r[51], r[52], r[53], r[54], r[55], + r[56], r[57], r[58], r[59], r[60], r[61], r[62], r[63], + }; + + + // main loop over the SHA1 result/vector e[]: + + for (u32 i = 0, j = 0; i < PRECOMP_SLOTS; i += 1, j += PRECOMP_ENTRIES - 1) + { + const u32 div = (PRECOMP_BITS * i) / 32; // for 4 bits: (i / 8) + const u32 shift = (PRECOMP_BITS * i) % 32; // for 4 bits: (i % 8) * 4 + + // const + u32 cur_sel = (e[div] >> shift) & PRECOMP_MASK; // 0x0f == 0b1111 (4 bits) + + // working with non-divisible u32 (see PRECOMP_BITS): + + if (32 - shift < PRECOMP_BITS) + { + cur_sel |= (e[div + 1] << (32 - shift)) & PRECOMP_MASK; + } + + if (cur_sel == 0) continue; + + const u32 pre_idx = (j + cur_sel - 1) * PRECOMP_ENTRYLEN; // x * 64 is same as x << 6 + + // u32 pre[64]; for (u32 i = 0; i < 64; i++) pre[i] = esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + i]; + + const u32 pre[64] = + { + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 0], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 1], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 2], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 3], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 4], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 5], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 6], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 7], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 8], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 9], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 10], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 11], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 12], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 13], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 14], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 15], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 16], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 17], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 18], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 19], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 20], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 21], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 22], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 23], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 24], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 25], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 26], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 27], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 28], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 29], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 30], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 31], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 32], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 33], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 34], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 35], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 36], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 37], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 38], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 39], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 40], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 41], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 42], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 43], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 44], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 45], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 46], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 47], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 48], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 49], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 50], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 51], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 52], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 53], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 54], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 55], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 56], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 57], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 58], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 59], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 60], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 61], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 62], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 63], + }; + + mul_mod (r_t, pre, m, fact); // r_t = (r_t * RADMIN3_PRE[n]) % m + } + + const u32 r0 = r_t[0]; + const u32 r1 = r_t[1]; + const u32 r2 = r_t[2]; + const u32 r3 = r_t[3]; + + COMPARE_M_SCALAR (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m29200_sxx (KERN_ATTR_RULES_ESALT (radmin3_t)) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + const u64 gid = get_global_id (0); + const u64 lsz = get_local_size (0); + + + /** + * cache constant values to shared memory + */ + + LOCAL_VK u32 m[64]; + LOCAL_VK u32 r[64]; + LOCAL_VK u32 fact[64]; + + for (u32 i = lid; i < 64; i += lsz) + { + m[i] = RADMIN3_M[i]; + r[i] = RADMIN3_R[i]; + fact[i] = RADMIN3_FACT[i]; + } + + SYNC_THREADS (); + + if (gid >= GID_CNT) return; + + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R3] + }; + + /** + * base + */ + + // ctx0 with user + + sha1_ctx_t ctx0; + + sha1_init (&ctx0); + + sha1_update_global (&ctx0, esalt_bufs[DIGESTS_OFFSET_HOST].user, esalt_bufs[DIGESTS_OFFSET_HOST].user_len); + + + // ctx1 with main salt + + sha1_ctx_t ctx1; + + sha1_init (&ctx1); + + sha1_update_global (&ctx1, salt_bufs[SALT_POS_HOST].salt_buf, salt_bufs[SALT_POS_HOST].salt_len); + + + COPY_PW (pws[gid]); + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < IL_CNT; il_pos++) + { + pw_t tmp = PASTE_PW; + + tmp.pw_len = apply_rules (rules_buf[il_pos].cmds, tmp.i, tmp.pw_len); + + + // add password to the user name (and colon, included): + + sha1_ctx_t c0 = ctx0; + + sha1_update_utf16le_swap (&c0, tmp.i, tmp.pw_len); + + sha1_final (&c0); + + + // add first SHA1 result to main salt: + + sha1_ctx_t c1 = ctx1; + + u32 w0[4] = { 0 }; + u32 w1[4] = { 0 }; + u32 w2[4] = { 0 }; + u32 w3[4] = { 0 }; + + w0[0] = c0.h[0]; + w0[1] = c0.h[1]; + w0[2] = c0.h[2]; + w0[3] = c0.h[3]; + w1[0] = c0.h[4]; + + sha1_update_64 (&c1, w0, w1, w2, w3, 20); + + sha1_final (&c1); + + const u32 e[5] = { c1.h[4], c1.h[3], c1.h[2], c1.h[1], c1.h[0] }; + + // u32 r_t[64]; for (u32 i = 0; i < 64; i++) r_t[i] = r[i]; + + u32 r_t[64] = + { + r[ 0], r[ 1], r[ 2], r[ 3], r[ 4], r[ 5], r[ 6], r[ 7], + r[ 8], r[ 9], r[10], r[11], r[12], r[13], r[14], r[15], + r[16], r[17], r[18], r[19], r[20], r[21], r[22], r[23], + r[24], r[25], r[26], r[27], r[28], r[29], r[30], r[31], + r[32], r[33], r[34], r[35], r[36], r[37], r[38], r[39], + r[40], r[41], r[42], r[43], r[44], r[45], r[46], r[47], + r[48], r[49], r[50], r[51], r[52], r[53], r[54], r[55], + r[56], r[57], r[58], r[59], r[60], r[61], r[62], r[63], + }; + + + // main loop over the SHA1 result/vector e[]: + + for (u32 i = 0, j = 0; i < PRECOMP_SLOTS; i += 1, j += PRECOMP_ENTRIES - 1) + { + const u32 div = (PRECOMP_BITS * i) / 32; // for 4 bits: (i / 8) + const u32 shift = (PRECOMP_BITS * i) % 32; // for 4 bits: (i % 8) * 4 + + // const + u32 cur_sel = (e[div] >> shift) & PRECOMP_MASK; // 0x0f == 0b1111 (4 bits) + + // working with non-divisible u32 (see PRECOMP_BITS): + + if (32 - shift < PRECOMP_BITS) + { + cur_sel |= (e[div + 1] << (32 - shift)) & PRECOMP_MASK; + } + + if (cur_sel == 0) continue; + + const u32 pre_idx = (j + cur_sel - 1) * PRECOMP_ENTRYLEN; // x * 64 is same as x << 6 + + // u32 pre[64]; for (u32 i = 0; i < 64; i++) pre[i] = esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + i]; + + const u32 pre[64] = + { + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 0], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 1], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 2], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 3], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 4], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 5], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 6], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 7], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 8], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 9], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 10], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 11], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 12], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 13], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 14], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 15], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 16], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 17], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 18], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 19], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 20], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 21], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 22], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 23], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 24], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 25], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 26], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 27], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 28], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 29], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 30], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 31], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 32], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 33], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 34], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 35], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 36], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 37], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 38], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 39], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 40], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 41], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 42], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 43], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 44], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 45], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 46], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 47], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 48], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 49], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 50], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 51], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 52], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 53], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 54], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 55], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 56], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 57], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 58], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 59], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 60], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 61], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 62], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 63], + }; + + mul_mod (r_t, pre, m, fact); // r_t = (r_t * RADMIN3_PRE[n]) % m + } + + const u32 r0 = r_t[0]; + const u32 r1 = r_t[1]; + const u32 r2 = r_t[2]; + const u32 r3 = r_t[3]; + + COMPARE_S_SCALAR (r0, r1, r2, r3); + } +} diff --git a/OpenCL/m29200_a1-optimized.cl b/OpenCL/m29200_a1-optimized.cl new file mode 100644 index 000000000..48f4424ca --- /dev/null +++ b/OpenCL/m29200_a1-optimized.cl @@ -0,0 +1,1211 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + * This algorithm for password-storage for the Radmin 3 software was analyzed and made public by synacktiv: + * https://www.synacktiv.com/publications/cracking-radmin-server-3-passwords.html + */ + + +#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include M2S(INCLUDE_PATH/inc_vendor.h) +#include M2S(INCLUDE_PATH/inc_types.h) +#include M2S(INCLUDE_PATH/inc_platform.cl) +#include M2S(INCLUDE_PATH/inc_common.cl) +#include M2S(INCLUDE_PATH/inc_rp_optimized.h) +#include M2S(INCLUDE_PATH/inc_rp_optimized.cl) +#include M2S(INCLUDE_PATH/inc_simd.cl) +#include M2S(INCLUDE_PATH/inc_hash_sha1.cl) +#include M2S(INCLUDE_PATH/inc_bignum_operations.cl) +#include M2S(INCLUDE_PATH/inc_radmin3_constants.h) +#endif + +typedef struct radmin3 +{ + u32 user[64]; + u32 user_len; + + u32 pre[PRECOMP_DATALEN]; // 38400 for PRECOMP_BITS = 4 + +} radmin3_t; + + +KERNEL_FQ void m29200_m04 (KERN_ATTR_RULES_ESALT (radmin3_t)) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + const u64 gid = get_global_id (0); + const u64 lsz = get_local_size (0); + + + /** + * cache constant values to shared memory + */ + + LOCAL_VK u32 m[64]; + LOCAL_VK u32 r[64]; + LOCAL_VK u32 fact[64]; + + for (u32 i = lid; i < 64; i += lsz) + { + m[i] = RADMIN3_M[i]; + r[i] = RADMIN3_R[i]; + fact[i] = RADMIN3_FACT[i]; + } + + SYNC_THREADS (); + + if (gid >= GID_CNT) return; + + u32 pw_buf0[4]; + u32 pw_buf1[4]; + + pw_buf0[0] = pws[gid].i[0]; + pw_buf0[1] = pws[gid].i[1]; + pw_buf0[2] = pws[gid].i[2]; + pw_buf0[3] = pws[gid].i[3]; + pw_buf1[0] = pws[gid].i[4]; + pw_buf1[1] = pws[gid].i[5]; + pw_buf1[2] = pws[gid].i[6]; + pw_buf1[3] = pws[gid].i[7]; + + const u32 pw_l_len = pws[gid].pw_len & 63; + + /** + * salt + */ + + u32 salt1_buf0[4]; + u32 salt1_buf1[4]; + u32 salt1_buf2[4]; + u32 salt1_buf3[4]; + + salt1_buf0[0] = salt_bufs[SALT_POS_HOST].salt_buf[0]; + salt1_buf0[1] = salt_bufs[SALT_POS_HOST].salt_buf[1]; + salt1_buf0[2] = salt_bufs[SALT_POS_HOST].salt_buf[2]; + salt1_buf0[3] = salt_bufs[SALT_POS_HOST].salt_buf[3]; + salt1_buf1[0] = salt_bufs[SALT_POS_HOST].salt_buf[4]; + salt1_buf1[1] = salt_bufs[SALT_POS_HOST].salt_buf[5]; + salt1_buf1[2] = salt_bufs[SALT_POS_HOST].salt_buf[6]; + salt1_buf1[3] = salt_bufs[SALT_POS_HOST].salt_buf[7]; + salt1_buf2[0] = 0; + salt1_buf2[1] = 0; + salt1_buf2[2] = 0; + salt1_buf2[3] = 0; + salt1_buf3[0] = 0; + salt1_buf3[1] = 0; + salt1_buf3[2] = 0; + salt1_buf3[3] = 0; + + u32 salt2_buf0[4]; + u32 salt2_buf1[4]; + u32 salt2_buf2[4]; + u32 salt2_buf3[4]; + + salt2_buf0[0] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET_HOST].user[ 0]); + salt2_buf0[1] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET_HOST].user[ 1]); + salt2_buf0[2] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET_HOST].user[ 2]); + salt2_buf0[3] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET_HOST].user[ 3]); + salt2_buf1[0] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET_HOST].user[ 4]); + salt2_buf1[1] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET_HOST].user[ 5]); + salt2_buf1[2] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET_HOST].user[ 6]); + salt2_buf1[3] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET_HOST].user[ 7]); + salt2_buf2[0] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET_HOST].user[ 8]); + salt2_buf2[1] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET_HOST].user[ 9]); + salt2_buf2[2] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET_HOST].user[10]); + salt2_buf2[3] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET_HOST].user[11]); + salt2_buf3[0] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET_HOST].user[12]); + salt2_buf3[1] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET_HOST].user[13]); + salt2_buf3[2] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET_HOST].user[14]); + salt2_buf3[3] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET_HOST].user[15]); + + const u32 salt2_len = esalt_bufs[DIGESTS_OFFSET_HOST].user_len; + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < IL_CNT; il_pos += VECT_SIZE) + { + const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos) & 63; + + const u32x pw_len = (pw_l_len + pw_r_len) & 63; + + /** + * concat password candidate + */ + + u32x wordl0[4] = { 0 }; + u32x wordl1[4] = { 0 }; + u32x wordl2[4] = { 0 }; + u32x wordl3[4] = { 0 }; + + wordl0[0] = pw_buf0[0]; + wordl0[1] = pw_buf0[1]; + wordl0[2] = pw_buf0[2]; + wordl0[3] = pw_buf0[3]; + wordl1[0] = pw_buf1[0]; + wordl1[1] = pw_buf1[1]; + wordl1[2] = pw_buf1[2]; + wordl1[3] = pw_buf1[3]; + + u32x wordr0[4] = { 0 }; + u32x wordr1[4] = { 0 }; + u32x wordr2[4] = { 0 }; + u32x wordr3[4] = { 0 }; + + wordr0[0] = ix_create_combt (combs_buf, il_pos, 0); + wordr0[1] = ix_create_combt (combs_buf, il_pos, 1); + wordr0[2] = ix_create_combt (combs_buf, il_pos, 2); + wordr0[3] = ix_create_combt (combs_buf, il_pos, 3); + wordr1[0] = ix_create_combt (combs_buf, il_pos, 4); + wordr1[1] = ix_create_combt (combs_buf, il_pos, 5); + wordr1[2] = ix_create_combt (combs_buf, il_pos, 6); + wordr1[3] = ix_create_combt (combs_buf, il_pos, 7); + + if (COMBS_MODE == COMBINATOR_MODE_BASE_LEFT) + { + switch_buffer_by_offset_le_VV (wordr0, wordr1, wordr2, wordr3, pw_l_len); + } + else + { + switch_buffer_by_offset_le_VV (wordl0, wordl1, wordl2, wordl3, pw_r_len); + } + + u32x w0[4]; + u32x w1[4]; + u32x w2[4]; + u32x w3[4]; + + w0[0] = wordl0[0] | wordr0[0]; + w0[1] = wordl0[1] | wordr0[1]; + w0[2] = wordl0[2] | wordr0[2]; + w0[3] = wordl0[3] | wordr0[3]; + w1[0] = wordl1[0] | wordr1[0]; + w1[1] = wordl1[1] | wordr1[1]; + w1[2] = wordl1[2] | wordr1[2]; + w1[3] = wordl1[3] | wordr1[3]; + w2[0] = wordl2[0] | wordr2[0]; + w2[1] = wordl2[1] | wordr2[1]; + w2[2] = wordl2[2] | wordr2[2]; + w2[3] = wordl2[3] | wordr2[3]; + w3[0] = wordl3[0] | wordr3[0]; + w3[1] = wordl3[1] | wordr3[1]; + w3[2] = wordl3[2] | wordr3[2]; + w3[3] = wordl3[3] | wordr3[3]; + + make_utf16le (w1, w2, w3); + make_utf16le (w0, w0, w1); + + const u32x pw_len2 = pw_len * 2; + + /** + * prepend salt + */ + + const u32x pw_salt_len = pw_len2 + salt2_len; + + switch_buffer_by_offset_le (w0, w1, w2, w3, salt2_len); + + w0[0] |= salt2_buf0[0]; + w0[1] |= salt2_buf0[1]; + w0[2] |= salt2_buf0[2]; + w0[3] |= salt2_buf0[3]; + w1[0] |= salt2_buf1[0]; + w1[1] |= salt2_buf1[1]; + w1[2] |= salt2_buf1[2]; + w1[3] |= salt2_buf1[3]; + w2[0] |= salt2_buf2[0]; + w2[1] |= salt2_buf2[1]; + w2[2] |= salt2_buf2[2]; + w2[3] |= salt2_buf2[3]; + w3[0] |= salt2_buf3[0]; + w3[1] |= salt2_buf3[1]; + w3[2] |= salt2_buf3[2]; + w3[3] |= salt2_buf3[3]; + + append_0x80_4x4_VV (w0, w1, w2, w3, pw_salt_len); + + /** + * sha1 + */ + + u32x w0_t = hc_swap32 (w0[0]); + u32x w1_t = hc_swap32 (w0[1]); + u32x w2_t = hc_swap32 (w0[2]); + u32x w3_t = hc_swap32 (w0[3]); + u32x w4_t = hc_swap32 (w1[0]); + u32x w5_t = hc_swap32 (w1[1]); + u32x w6_t = hc_swap32 (w1[2]); + u32x w7_t = hc_swap32 (w1[3]); + u32x w8_t = hc_swap32 (w2[0]); + u32x w9_t = hc_swap32 (w2[1]); + u32x wa_t = hc_swap32 (w2[2]); + u32x wb_t = hc_swap32 (w2[3]); + u32x wc_t = hc_swap32 (w3[0]); + u32x wd_t = hc_swap32 (w3[1]); + u32x we_t = 0; + u32x wf_t = pw_salt_len * 8; + + u32x a = SHA1M_A; + u32x b = SHA1M_B; + u32x c = SHA1M_C; + u32x d = SHA1M_D; + u32x e = SHA1M_E; + + #undef K + #define K SHA1C00 + + SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, w2_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, w4_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, w5_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, w6_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, w7_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, w8_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, w9_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, wa_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, wb_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, wc_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, wd_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, we_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3_t); + + #undef K + #define K SHA1C01 + + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w7_t); + + #undef K + #define K SHA1C02 + + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wb_t); + + #undef K + #define K SHA1C03 + + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t); + + a += make_u32x (SHA1M_A); + b += make_u32x (SHA1M_B); + c += make_u32x (SHA1M_C); + d += make_u32x (SHA1M_D); + e += make_u32x (SHA1M_E); + + w0_t = salt1_buf0[0]; + w1_t = salt1_buf0[1]; + w2_t = salt1_buf0[2]; + w3_t = salt1_buf0[3]; + w4_t = salt1_buf1[0]; + w5_t = salt1_buf1[1]; + w6_t = salt1_buf1[2]; + w7_t = salt1_buf1[3]; + w8_t = a; + w9_t = b; + wa_t = c; + wb_t = d; + wc_t = e; + wd_t = 0x80000000; + we_t = 0; + wf_t = (32 + 20) * 8; + + a = SHA1M_A; + b = SHA1M_B; + c = SHA1M_C; + d = SHA1M_D; + e = SHA1M_E; + + #undef K + #define K SHA1C00 + + SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, w2_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, w4_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, w5_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, w6_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, w7_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, w8_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, w9_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, wa_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, wb_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, wc_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, wd_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, we_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3_t); + + #undef K + #define K SHA1C01 + + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w7_t); + + #undef K + #define K SHA1C02 + + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wb_t); + + #undef K + #define K SHA1C03 + + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t); + + a += make_u32x (SHA1M_A); + b += make_u32x (SHA1M_B); + c += make_u32x (SHA1M_C); + d += make_u32x (SHA1M_D); + e += make_u32x (SHA1M_E); + + u32 exponent[5] = { 0 }; + + exponent[0] = e; + exponent[1] = d; + exponent[2] = c; + exponent[3] = b; + exponent[4] = a; + + u32 r_t[64] = + { + r[ 0], r[ 1], r[ 2], r[ 3], r[ 4], r[ 5], r[ 6], r[ 7], + r[ 8], r[ 9], r[10], r[11], r[12], r[13], r[14], r[15], + r[16], r[17], r[18], r[19], r[20], r[21], r[22], r[23], + r[24], r[25], r[26], r[27], r[28], r[29], r[30], r[31], + r[32], r[33], r[34], r[35], r[36], r[37], r[38], r[39], + r[40], r[41], r[42], r[43], r[44], r[45], r[46], r[47], + r[48], r[49], r[50], r[51], r[52], r[53], r[54], r[55], + r[56], r[57], r[58], r[59], r[60], r[61], r[62], r[63], + }; + + for (u32 i = 0, j = 0; i < PRECOMP_SLOTS; i += 1, j += PRECOMP_ENTRIES - 1) + { + const u32 div = (PRECOMP_BITS * i) / 32; // for 4 bits: (i / 8) + const u32 shift = (PRECOMP_BITS * i) % 32; // for 4 bits: (i % 8) * 4 + + // const + u32 cur_sel = (exponent[div] >> shift) & PRECOMP_MASK; // 0x0f == 0b1111 (4 bits) + + // working with non-divisible u32 (see PRECOMP_BITS): + + if (32 - shift < PRECOMP_BITS) + { + cur_sel |= (exponent[div + 1] << (32 - shift)) & PRECOMP_MASK; + } + + if (cur_sel == 0) continue; + + const u32 pre_idx = (j + cur_sel - 1) * PRECOMP_ENTRYLEN; // x * 64 is same as x << 6 + + // u32 p[64]; for (u32 i = 0; i < 64; i++) p[i] = esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + i]; + + const u32 p[64] = + { + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 0], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 1], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 2], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 3], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 4], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 5], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 6], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 7], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 8], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 9], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 10], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 11], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 12], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 13], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 14], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 15], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 16], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 17], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 18], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 19], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 20], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 21], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 22], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 23], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 24], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 25], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 26], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 27], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 28], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 29], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 30], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 31], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 32], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 33], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 34], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 35], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 36], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 37], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 38], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 39], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 40], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 41], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 42], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 43], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 44], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 45], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 46], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 47], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 48], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 49], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 50], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 51], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 52], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 53], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 54], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 55], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 56], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 57], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 58], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 59], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 60], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 61], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 62], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 63], + }; + + mul_mod (r_t, p, m, fact); // r = (r * pre[n]) % m + } + + const u32 r0 = r_t[0]; + const u32 r1 = r_t[1]; + const u32 r2 = r_t[2]; + const u32 r3 = r_t[3]; + + COMPARE_M_SIMD (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m29200_m08 (KERN_ATTR_RULES_ESALT (radmin3_t)) +{ +} + +KERNEL_FQ void m29200_m16 (KERN_ATTR_RULES_ESALT (radmin3_t)) +{ +} + +KERNEL_FQ void m29200_s04 (KERN_ATTR_RULES_ESALT (radmin3_t)) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + const u64 gid = get_global_id (0); + const u64 lsz = get_local_size (0); + + + /** + * cache constant values to shared memory + */ + + LOCAL_VK u32 m[64]; + LOCAL_VK u32 r[64]; + LOCAL_VK u32 fact[64]; + + for (u32 i = lid; i < 64; i += lsz) + { + m[i] = RADMIN3_M[i]; + r[i] = RADMIN3_R[i]; + fact[i] = RADMIN3_FACT[i]; + } + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R3] + }; + + /** + * base + */ + + SYNC_THREADS (); + + if (gid >= GID_CNT) return; + + u32 pw_buf0[4]; + u32 pw_buf1[4]; + + pw_buf0[0] = pws[gid].i[0]; + pw_buf0[1] = pws[gid].i[1]; + pw_buf0[2] = pws[gid].i[2]; + pw_buf0[3] = pws[gid].i[3]; + pw_buf1[0] = pws[gid].i[4]; + pw_buf1[1] = pws[gid].i[5]; + pw_buf1[2] = pws[gid].i[6]; + pw_buf1[3] = pws[gid].i[7]; + + const u32 pw_l_len = pws[gid].pw_len & 63; + + /** + * salt + */ + + u32 salt1_buf0[4]; + u32 salt1_buf1[4]; + u32 salt1_buf2[4]; + u32 salt1_buf3[4]; + + salt1_buf0[0] = salt_bufs[SALT_POS_HOST].salt_buf[0]; + salt1_buf0[1] = salt_bufs[SALT_POS_HOST].salt_buf[1]; + salt1_buf0[2] = salt_bufs[SALT_POS_HOST].salt_buf[2]; + salt1_buf0[3] = salt_bufs[SALT_POS_HOST].salt_buf[3]; + salt1_buf1[0] = salt_bufs[SALT_POS_HOST].salt_buf[4]; + salt1_buf1[1] = salt_bufs[SALT_POS_HOST].salt_buf[5]; + salt1_buf1[2] = salt_bufs[SALT_POS_HOST].salt_buf[6]; + salt1_buf1[3] = salt_bufs[SALT_POS_HOST].salt_buf[7]; + salt1_buf2[0] = 0; + salt1_buf2[1] = 0; + salt1_buf2[2] = 0; + salt1_buf2[3] = 0; + salt1_buf3[0] = 0; + salt1_buf3[1] = 0; + salt1_buf3[2] = 0; + salt1_buf3[3] = 0; + + u32 salt2_buf0[4]; + u32 salt2_buf1[4]; + u32 salt2_buf2[4]; + u32 salt2_buf3[4]; + + salt2_buf0[0] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET_HOST].user[ 0]); + salt2_buf0[1] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET_HOST].user[ 1]); + salt2_buf0[2] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET_HOST].user[ 2]); + salt2_buf0[3] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET_HOST].user[ 3]); + salt2_buf1[0] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET_HOST].user[ 4]); + salt2_buf1[1] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET_HOST].user[ 5]); + salt2_buf1[2] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET_HOST].user[ 6]); + salt2_buf1[3] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET_HOST].user[ 7]); + salt2_buf2[0] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET_HOST].user[ 8]); + salt2_buf2[1] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET_HOST].user[ 9]); + salt2_buf2[2] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET_HOST].user[10]); + salt2_buf2[3] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET_HOST].user[11]); + salt2_buf3[0] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET_HOST].user[12]); + salt2_buf3[1] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET_HOST].user[13]); + salt2_buf3[2] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET_HOST].user[14]); + salt2_buf3[3] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET_HOST].user[15]); + + const u32 salt2_len = esalt_bufs[DIGESTS_OFFSET_HOST].user_len; + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < IL_CNT; il_pos += VECT_SIZE) + { + const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos) & 63; + + const u32x pw_len = (pw_l_len + pw_r_len) & 63; + + /** + * concat password candidate + */ + + u32x wordl0[4] = { 0 }; + u32x wordl1[4] = { 0 }; + u32x wordl2[4] = { 0 }; + u32x wordl3[4] = { 0 }; + + wordl0[0] = pw_buf0[0]; + wordl0[1] = pw_buf0[1]; + wordl0[2] = pw_buf0[2]; + wordl0[3] = pw_buf0[3]; + wordl1[0] = pw_buf1[0]; + wordl1[1] = pw_buf1[1]; + wordl1[2] = pw_buf1[2]; + wordl1[3] = pw_buf1[3]; + + u32x wordr0[4] = { 0 }; + u32x wordr1[4] = { 0 }; + u32x wordr2[4] = { 0 }; + u32x wordr3[4] = { 0 }; + + wordr0[0] = ix_create_combt (combs_buf, il_pos, 0); + wordr0[1] = ix_create_combt (combs_buf, il_pos, 1); + wordr0[2] = ix_create_combt (combs_buf, il_pos, 2); + wordr0[3] = ix_create_combt (combs_buf, il_pos, 3); + wordr1[0] = ix_create_combt (combs_buf, il_pos, 4); + wordr1[1] = ix_create_combt (combs_buf, il_pos, 5); + wordr1[2] = ix_create_combt (combs_buf, il_pos, 6); + wordr1[3] = ix_create_combt (combs_buf, il_pos, 7); + + if (COMBS_MODE == COMBINATOR_MODE_BASE_LEFT) + { + switch_buffer_by_offset_le_VV (wordr0, wordr1, wordr2, wordr3, pw_l_len); + } + else + { + switch_buffer_by_offset_le_VV (wordl0, wordl1, wordl2, wordl3, pw_r_len); + } + + u32x w0[4]; + u32x w1[4]; + u32x w2[4]; + u32x w3[4]; + + w0[0] = wordl0[0] | wordr0[0]; + w0[1] = wordl0[1] | wordr0[1]; + w0[2] = wordl0[2] | wordr0[2]; + w0[3] = wordl0[3] | wordr0[3]; + w1[0] = wordl1[0] | wordr1[0]; + w1[1] = wordl1[1] | wordr1[1]; + w1[2] = wordl1[2] | wordr1[2]; + w1[3] = wordl1[3] | wordr1[3]; + w2[0] = wordl2[0] | wordr2[0]; + w2[1] = wordl2[1] | wordr2[1]; + w2[2] = wordl2[2] | wordr2[2]; + w2[3] = wordl2[3] | wordr2[3]; + w3[0] = wordl3[0] | wordr3[0]; + w3[1] = wordl3[1] | wordr3[1]; + w3[2] = wordl3[2] | wordr3[2]; + w3[3] = wordl3[3] | wordr3[3]; + + make_utf16le (w1, w2, w3); + make_utf16le (w0, w0, w1); + + const u32x pw_len2 = pw_len * 2; + + /** + * prepend salt + */ + + const u32x pw_salt_len = pw_len2 + salt2_len; + + switch_buffer_by_offset_le (w0, w1, w2, w3, salt2_len); + + w0[0] |= salt2_buf0[0]; + w0[1] |= salt2_buf0[1]; + w0[2] |= salt2_buf0[2]; + w0[3] |= salt2_buf0[3]; + w1[0] |= salt2_buf1[0]; + w1[1] |= salt2_buf1[1]; + w1[2] |= salt2_buf1[2]; + w1[3] |= salt2_buf1[3]; + w2[0] |= salt2_buf2[0]; + w2[1] |= salt2_buf2[1]; + w2[2] |= salt2_buf2[2]; + w2[3] |= salt2_buf2[3]; + w3[0] |= salt2_buf3[0]; + w3[1] |= salt2_buf3[1]; + w3[2] |= salt2_buf3[2]; + w3[3] |= salt2_buf3[3]; + + append_0x80_4x4_VV (w0, w1, w2, w3, pw_salt_len); + + /** + * sha1 + */ + + u32x w0_t = hc_swap32 (w0[0]); + u32x w1_t = hc_swap32 (w0[1]); + u32x w2_t = hc_swap32 (w0[2]); + u32x w3_t = hc_swap32 (w0[3]); + u32x w4_t = hc_swap32 (w1[0]); + u32x w5_t = hc_swap32 (w1[1]); + u32x w6_t = hc_swap32 (w1[2]); + u32x w7_t = hc_swap32 (w1[3]); + u32x w8_t = hc_swap32 (w2[0]); + u32x w9_t = hc_swap32 (w2[1]); + u32x wa_t = hc_swap32 (w2[2]); + u32x wb_t = hc_swap32 (w2[3]); + u32x wc_t = hc_swap32 (w3[0]); + u32x wd_t = hc_swap32 (w3[1]); + u32x we_t = 0; + u32x wf_t = pw_salt_len * 8; + + u32x a = SHA1M_A; + u32x b = SHA1M_B; + u32x c = SHA1M_C; + u32x d = SHA1M_D; + u32x e = SHA1M_E; + + #undef K + #define K SHA1C00 + + SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, w2_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, w4_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, w5_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, w6_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, w7_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, w8_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, w9_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, wa_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, wb_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, wc_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, wd_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, we_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3_t); + + #undef K + #define K SHA1C01 + + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w7_t); + + #undef K + #define K SHA1C02 + + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wb_t); + + #undef K + #define K SHA1C03 + + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t); + + a += make_u32x (SHA1M_A); + b += make_u32x (SHA1M_B); + c += make_u32x (SHA1M_C); + d += make_u32x (SHA1M_D); + e += make_u32x (SHA1M_E); + + w0_t = salt1_buf0[0]; + w1_t = salt1_buf0[1]; + w2_t = salt1_buf0[2]; + w3_t = salt1_buf0[3]; + w4_t = salt1_buf1[0]; + w5_t = salt1_buf1[1]; + w6_t = salt1_buf1[2]; + w7_t = salt1_buf1[3]; + w8_t = a; + w9_t = b; + wa_t = c; + wb_t = d; + wc_t = e; + wd_t = 0x80000000; + we_t = 0; + wf_t = (32 + 20) * 8; + + a = SHA1M_A; + b = SHA1M_B; + c = SHA1M_C; + d = SHA1M_D; + e = SHA1M_E; + + #undef K + #define K SHA1C00 + + SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, w2_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, w4_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, w5_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, w6_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, w7_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, w8_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, w9_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, wa_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, wb_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, wc_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, wd_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, we_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3_t); + + #undef K + #define K SHA1C01 + + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w7_t); + + #undef K + #define K SHA1C02 + + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wb_t); + + #undef K + #define K SHA1C03 + + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t); + + a += make_u32x (SHA1M_A); + b += make_u32x (SHA1M_B); + c += make_u32x (SHA1M_C); + d += make_u32x (SHA1M_D); + e += make_u32x (SHA1M_E); + + u32 exponent[5] = { 0 }; + + exponent[0] = e; + exponent[1] = d; + exponent[2] = c; + exponent[3] = b; + exponent[4] = a; + + u32 r_t[64] = + { + r[ 0], r[ 1], r[ 2], r[ 3], r[ 4], r[ 5], r[ 6], r[ 7], + r[ 8], r[ 9], r[10], r[11], r[12], r[13], r[14], r[15], + r[16], r[17], r[18], r[19], r[20], r[21], r[22], r[23], + r[24], r[25], r[26], r[27], r[28], r[29], r[30], r[31], + r[32], r[33], r[34], r[35], r[36], r[37], r[38], r[39], + r[40], r[41], r[42], r[43], r[44], r[45], r[46], r[47], + r[48], r[49], r[50], r[51], r[52], r[53], r[54], r[55], + r[56], r[57], r[58], r[59], r[60], r[61], r[62], r[63], + }; + + for (u32 i = 0, j = 0; i < PRECOMP_SLOTS; i += 1, j += PRECOMP_ENTRIES - 1) + { + const u32 div = (PRECOMP_BITS * i) / 32; // for 4 bits: (i / 8) + const u32 shift = (PRECOMP_BITS * i) % 32; // for 4 bits: (i % 8) * 4 + + // const + u32 cur_sel = (exponent[div] >> shift) & PRECOMP_MASK; // 0x0f == 0b1111 (4 bits) + + // working with non-divisible u32 (see PRECOMP_BITS): + + if (32 - shift < PRECOMP_BITS) + { + cur_sel |= (exponent[div + 1] << (32 - shift)) & PRECOMP_MASK; + } + + if (cur_sel == 0) continue; + + const u32 pre_idx = (j + cur_sel - 1) * PRECOMP_ENTRYLEN; // x * 64 is same as x << 6 + + // u32 p[64]; for (u32 i = 0; i < 64; i++) p[i] = esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + i]; + + const u32 p[64] = + { + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 0], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 1], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 2], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 3], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 4], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 5], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 6], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 7], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 8], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 9], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 10], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 11], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 12], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 13], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 14], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 15], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 16], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 17], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 18], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 19], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 20], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 21], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 22], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 23], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 24], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 25], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 26], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 27], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 28], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 29], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 30], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 31], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 32], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 33], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 34], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 35], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 36], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 37], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 38], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 39], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 40], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 41], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 42], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 43], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 44], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 45], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 46], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 47], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 48], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 49], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 50], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 51], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 52], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 53], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 54], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 55], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 56], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 57], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 58], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 59], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 60], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 61], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 62], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 63], + }; + + mul_mod (r_t, p, m, fact); // r = (r * pre[n]) % m + } + + const u32 r0 = r_t[0]; + const u32 r1 = r_t[1]; + const u32 r2 = r_t[2]; + const u32 r3 = r_t[3]; + + COMPARE_S_SIMD (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m29200_s08 (KERN_ATTR_RULES_ESALT (radmin3_t)) +{ +} + +KERNEL_FQ void m29200_s16 (KERN_ATTR_RULES_ESALT (radmin3_t)) +{ +} diff --git a/OpenCL/m29200_a1-pure.cl b/OpenCL/m29200_a1-pure.cl new file mode 100644 index 000000000..b9a9f0da7 --- /dev/null +++ b/OpenCL/m29200_a1-pure.cl @@ -0,0 +1,479 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + * This algorithm for password-storage for the Radmin 3 software was analyzed and made public by synacktiv: + * https://www.synacktiv.com/publications/cracking-radmin-server-3-passwords.html + */ + + +//#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include M2S(INCLUDE_PATH/inc_vendor.h) +#include M2S(INCLUDE_PATH/inc_types.h) +#include M2S(INCLUDE_PATH/inc_platform.cl) +#include M2S(INCLUDE_PATH/inc_common.cl) +#include M2S(INCLUDE_PATH/inc_scalar.cl) +#include M2S(INCLUDE_PATH/inc_hash_sha1.cl) +#include M2S(INCLUDE_PATH/inc_bignum_operations.cl) +#include M2S(INCLUDE_PATH/inc_radmin3_constants.h) +#endif + +typedef struct radmin3 +{ + u32 user[64]; + u32 user_len; + + u32 pre[PRECOMP_DATALEN]; // 38400 for PRECOMP_BITS = 4 + +} radmin3_t; + +KERNEL_FQ void m29200_mxx (KERN_ATTR_ESALT (radmin3_t)) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + const u64 gid = get_global_id (0); + const u64 lsz = get_local_size (0); + + + /** + * cache constant values to shared memory + */ + + LOCAL_VK u32 m[64]; + LOCAL_VK u32 r[64]; + LOCAL_VK u32 fact[64]; + + for (u32 i = lid; i < 64; i += lsz) + { + m[i] = RADMIN3_M[i]; + r[i] = RADMIN3_R[i]; + fact[i] = RADMIN3_FACT[i]; + } + + SYNC_THREADS (); + + if (gid >= GID_CNT) return; + + + /** + * base + */ + + const u32 pw_len = pws[gid].pw_len; + + u32x w[64] = { 0 }; + + for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1) + { + w[idx] = pws[gid].i[idx]; + } + + + // ctx0 with user + + sha1_ctx_t ctx0; + + sha1_init (&ctx0); + + sha1_update_global (&ctx0, esalt_bufs[DIGESTS_OFFSET_HOST].user, esalt_bufs[DIGESTS_OFFSET_HOST].user_len); + + sha1_update_utf16le_swap (&ctx0, w, pw_len); + + + // ctx1 with main salt + + sha1_ctx_t ctx1; + + sha1_init (&ctx1); + + sha1_update_global (&ctx1, salt_bufs[SALT_POS_HOST].salt_buf, salt_bufs[SALT_POS_HOST].salt_len); + + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < IL_CNT; il_pos++) + { + // add password to the user name (and colon and first part of the password, included): + + sha1_ctx_t c0 = ctx0; + + sha1_update_global_utf16le_swap (&c0, combs_buf[il_pos].i, combs_buf[il_pos].pw_len); + + sha1_final (&c0); + + + // add first SHA1 result to main salt: + + sha1_ctx_t c1 = ctx1; + + u32 w0_t[4] = { 0 }; + u32 w1_t[4] = { 0 }; + u32 w2_t[4] = { 0 }; + u32 w3_t[4] = { 0 }; + + w0_t[0] = c0.h[0]; + w0_t[1] = c0.h[1]; + w0_t[2] = c0.h[2]; + w0_t[3] = c0.h[3]; + w1_t[0] = c0.h[4]; + + sha1_update_64 (&c1, w0_t, w1_t, w2_t, w3_t, 20); + + sha1_final (&c1); + + const u32 e[5] = { c1.h[4], c1.h[3], c1.h[2], c1.h[1], c1.h[0] }; + + // u32 r_t[64]; for (u32 i = 0; i < 64; i++) r_t[i] = r[i]; + + u32 r_t[64] = + { + r[ 0], r[ 1], r[ 2], r[ 3], r[ 4], r[ 5], r[ 6], r[ 7], + r[ 8], r[ 9], r[10], r[11], r[12], r[13], r[14], r[15], + r[16], r[17], r[18], r[19], r[20], r[21], r[22], r[23], + r[24], r[25], r[26], r[27], r[28], r[29], r[30], r[31], + r[32], r[33], r[34], r[35], r[36], r[37], r[38], r[39], + r[40], r[41], r[42], r[43], r[44], r[45], r[46], r[47], + r[48], r[49], r[50], r[51], r[52], r[53], r[54], r[55], + r[56], r[57], r[58], r[59], r[60], r[61], r[62], r[63], + }; + + + // main loop over the SHA1 result/vector e[]: + + for (u32 i = 0, j = 0; i < PRECOMP_SLOTS; i += 1, j += PRECOMP_ENTRIES - 1) + { + const u32 div = (PRECOMP_BITS * i) / 32; // for 4 bits: (i / 8) + const u32 shift = (PRECOMP_BITS * i) % 32; // for 4 bits: (i % 8) * 4 + + // const + u32 cur_sel = (e[div] >> shift) & PRECOMP_MASK; // 0x0f == 0b1111 (4 bits) + + // working with non-divisible u32 (see PRECOMP_BITS): + + if (32 - shift < PRECOMP_BITS) + { + cur_sel |= (e[div + 1] << (32 - shift)) & PRECOMP_MASK; + } + + if (cur_sel == 0) continue; + + const u32 pre_idx = (j + cur_sel - 1) * PRECOMP_ENTRYLEN; // x * 64 is same as x << 6 + + // u32 pre[64]; for (u32 i = 0; i < 64; i++) pre[i] = esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + i]; + + const u32 pre[64] = + { + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 0], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 1], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 2], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 3], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 4], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 5], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 6], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 7], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 8], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 9], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 10], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 11], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 12], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 13], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 14], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 15], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 16], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 17], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 18], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 19], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 20], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 21], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 22], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 23], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 24], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 25], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 26], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 27], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 28], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 29], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 30], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 31], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 32], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 33], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 34], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 35], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 36], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 37], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 38], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 39], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 40], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 41], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 42], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 43], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 44], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 45], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 46], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 47], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 48], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 49], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 50], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 51], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 52], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 53], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 54], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 55], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 56], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 57], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 58], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 59], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 60], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 61], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 62], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 63], + }; + + mul_mod (r_t, pre, m, fact); // r_t = (r_t * RADMIN3_PRE[n]) % m + } + + const u32 r0 = r_t[0]; + const u32 r1 = r_t[1]; + const u32 r2 = r_t[2]; + const u32 r3 = r_t[3]; + + COMPARE_M_SCALAR (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m29200_sxx (KERN_ATTR_ESALT (radmin3_t)) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + const u64 gid = get_global_id (0); + const u64 lsz = get_local_size (0); + + + /** + * cache constant values to shared memory + */ + + LOCAL_VK u32 m[64]; + LOCAL_VK u32 r[64]; + LOCAL_VK u32 fact[64]; + + for (u32 i = lid; i < 64; i += lsz) + { + m[i] = RADMIN3_M[i]; + r[i] = RADMIN3_R[i]; + fact[i] = RADMIN3_FACT[i]; + } + + SYNC_THREADS (); + + if (gid >= GID_CNT) return; + + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R3] + }; + + /** + * base + */ + + const u32 pw_len = pws[gid].pw_len; + + u32x w[64] = { 0 }; + + for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1) + { + w[idx] = pws[gid].i[idx]; + } + + + // ctx0 with user + + sha1_ctx_t ctx0; + + sha1_init (&ctx0); + + sha1_update_global (&ctx0, esalt_bufs[DIGESTS_OFFSET_HOST].user, esalt_bufs[DIGESTS_OFFSET_HOST].user_len); + + sha1_update_utf16le_swap (&ctx0, w, pw_len); + + + // ctx1 with main salt + + sha1_ctx_t ctx1; + + sha1_init (&ctx1); + + sha1_update_global (&ctx1, salt_bufs[SALT_POS_HOST].salt_buf, salt_bufs[SALT_POS_HOST].salt_len); + + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < IL_CNT; il_pos++) + { + // add password to the user name (and colon and first part of the password, included): + + sha1_ctx_t c0 = ctx0; + + sha1_update_global_utf16le_swap (&c0, combs_buf[il_pos].i, combs_buf[il_pos].pw_len); + + sha1_final (&c0); + + + // add first SHA1 result to main salt: + + sha1_ctx_t c1 = ctx1; + + u32 w0_t[4] = { 0 }; + u32 w1_t[4] = { 0 }; + u32 w2_t[4] = { 0 }; + u32 w3_t[4] = { 0 }; + + w0_t[0] = c0.h[0]; + w0_t[1] = c0.h[1]; + w0_t[2] = c0.h[2]; + w0_t[3] = c0.h[3]; + w1_t[0] = c0.h[4]; + + sha1_update_64 (&c1, w0_t, w1_t, w2_t, w3_t, 20); + + sha1_final (&c1); + + const u32 e[5] = { c1.h[4], c1.h[3], c1.h[2], c1.h[1], c1.h[0] }; + + // u32 r_t[64]; for (u32 i = 0; i < 64; i++) r_t[i] = r[i]; + + u32 r_t[64] = + { + r[ 0], r[ 1], r[ 2], r[ 3], r[ 4], r[ 5], r[ 6], r[ 7], + r[ 8], r[ 9], r[10], r[11], r[12], r[13], r[14], r[15], + r[16], r[17], r[18], r[19], r[20], r[21], r[22], r[23], + r[24], r[25], r[26], r[27], r[28], r[29], r[30], r[31], + r[32], r[33], r[34], r[35], r[36], r[37], r[38], r[39], + r[40], r[41], r[42], r[43], r[44], r[45], r[46], r[47], + r[48], r[49], r[50], r[51], r[52], r[53], r[54], r[55], + r[56], r[57], r[58], r[59], r[60], r[61], r[62], r[63], + }; + + + // main loop over the SHA1 result/vector e[]: + + for (u32 i = 0, j = 0; i < PRECOMP_SLOTS; i += 1, j += PRECOMP_ENTRIES - 1) + { + const u32 div = (PRECOMP_BITS * i) / 32; // for 4 bits: (i / 8) + const u32 shift = (PRECOMP_BITS * i) % 32; // for 4 bits: (i % 8) * 4 + + // const + u32 cur_sel = (e[div] >> shift) & PRECOMP_MASK; // 0x0f == 0b1111 (4 bits) + + // working with non-divisible u32 (see PRECOMP_BITS): + + if (32 - shift < PRECOMP_BITS) + { + cur_sel |= (e[div + 1] << (32 - shift)) & PRECOMP_MASK; + } + + if (cur_sel == 0) continue; + + const u32 pre_idx = (j + cur_sel - 1) * PRECOMP_ENTRYLEN; // x * 64 is same as x << 6 + + // u32 pre[64]; for (u32 i = 0; i < 64; i++) pre[i] = esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + i]; + + const u32 pre[64] = + { + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 0], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 1], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 2], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 3], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 4], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 5], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 6], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 7], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 8], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 9], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 10], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 11], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 12], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 13], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 14], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 15], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 16], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 17], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 18], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 19], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 20], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 21], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 22], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 23], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 24], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 25], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 26], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 27], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 28], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 29], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 30], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 31], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 32], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 33], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 34], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 35], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 36], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 37], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 38], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 39], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 40], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 41], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 42], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 43], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 44], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 45], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 46], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 47], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 48], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 49], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 50], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 51], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 52], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 53], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 54], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 55], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 56], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 57], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 58], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 59], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 60], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 61], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 62], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 63], + }; + + mul_mod (r_t, pre, m, fact); // r_t = (r_t * RADMIN3_PRE[n]) % m + } + + const u32 r0 = r_t[0]; + const u32 r1 = r_t[1]; + const u32 r2 = r_t[2]; + const u32 r3 = r_t[3]; + + COMPARE_S_SCALAR (r0, r1, r2, r3); + } +} diff --git a/OpenCL/m29200_a3-optimized.cl b/OpenCL/m29200_a3-optimized.cl new file mode 100644 index 000000000..b4ac9ed9e --- /dev/null +++ b/OpenCL/m29200_a3-optimized.cl @@ -0,0 +1,1344 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + * This algorithm for password-storage for the Radmin 3 software was analyzed and made public by synacktiv: + * https://www.synacktiv.com/publications/cracking-radmin-server-3-passwords.html + */ + + +#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include M2S(INCLUDE_PATH/inc_vendor.h) +#include M2S(INCLUDE_PATH/inc_types.h) +#include M2S(INCLUDE_PATH/inc_platform.cl) +#include M2S(INCLUDE_PATH/inc_common.cl) +#include M2S(INCLUDE_PATH/inc_rp_optimized.h) +#include M2S(INCLUDE_PATH/inc_rp_optimized.cl) +#include M2S(INCLUDE_PATH/inc_simd.cl) +#include M2S(INCLUDE_PATH/inc_hash_sha1.cl) +#include M2S(INCLUDE_PATH/inc_bignum_operations.cl) +#include M2S(INCLUDE_PATH/inc_radmin3_constants.h) +#endif + +typedef struct radmin3 +{ + u32 user[64]; + u32 user_len; + + u32 pre[PRECOMP_DATALEN]; // 38400 for PRECOMP_BITS = 4 + +} radmin3_t; + +DECLSPEC void m29200m (PRIVATE_AS u32 *w0, PRIVATE_AS u32 *w1, PRIVATE_AS u32 *w2, PRIVATE_AS u32 *w3, const u32 pw_len, KERN_ATTR_FUNC_ESALT (radmin3_t)) +{ + /** + * modifiers are taken from args + */ + + /** + * cache constant values to shared memory + */ + + LOCAL_VK u32 m[64]; + LOCAL_VK u32 r[64]; + LOCAL_VK u32 fact[64]; + + for (u32 i = lid; i < 64; i += lsz) + { + m[i] = RADMIN3_M[i]; + r[i] = RADMIN3_R[i]; + fact[i] = RADMIN3_FACT[i]; + } + + /** + * salt + */ + + u32 salt1_buf0[4]; + u32 salt1_buf1[4]; + u32 salt1_buf2[4]; + u32 salt1_buf3[4]; + + salt1_buf0[0] = salt_bufs[SALT_POS_HOST].salt_buf[0]; + salt1_buf0[1] = salt_bufs[SALT_POS_HOST].salt_buf[1]; + salt1_buf0[2] = salt_bufs[SALT_POS_HOST].salt_buf[2]; + salt1_buf0[3] = salt_bufs[SALT_POS_HOST].salt_buf[3]; + salt1_buf1[0] = salt_bufs[SALT_POS_HOST].salt_buf[4]; + salt1_buf1[1] = salt_bufs[SALT_POS_HOST].salt_buf[5]; + salt1_buf1[2] = salt_bufs[SALT_POS_HOST].salt_buf[6]; + salt1_buf1[3] = salt_bufs[SALT_POS_HOST].salt_buf[7]; + salt1_buf2[0] = 0; + salt1_buf2[1] = 0; + salt1_buf2[2] = 0; + salt1_buf2[3] = 0; + salt1_buf3[0] = 0; + salt1_buf3[1] = 0; + salt1_buf3[2] = 0; + salt1_buf3[3] = 0; + + u32 salt2_buf0[4]; + u32 salt2_buf1[4]; + u32 salt2_buf2[4]; + u32 salt2_buf3[4]; + + salt2_buf0[0] = esalt_bufs[DIGESTS_OFFSET_HOST].user[ 0]; + salt2_buf0[1] = esalt_bufs[DIGESTS_OFFSET_HOST].user[ 1]; + salt2_buf0[2] = esalt_bufs[DIGESTS_OFFSET_HOST].user[ 2]; + salt2_buf0[3] = esalt_bufs[DIGESTS_OFFSET_HOST].user[ 3]; + salt2_buf1[0] = esalt_bufs[DIGESTS_OFFSET_HOST].user[ 4]; + salt2_buf1[1] = esalt_bufs[DIGESTS_OFFSET_HOST].user[ 5]; + salt2_buf1[2] = esalt_bufs[DIGESTS_OFFSET_HOST].user[ 6]; + salt2_buf1[3] = esalt_bufs[DIGESTS_OFFSET_HOST].user[ 7]; + salt2_buf2[0] = esalt_bufs[DIGESTS_OFFSET_HOST].user[ 8]; + salt2_buf2[1] = esalt_bufs[DIGESTS_OFFSET_HOST].user[ 9]; + salt2_buf2[2] = esalt_bufs[DIGESTS_OFFSET_HOST].user[10]; + salt2_buf2[3] = esalt_bufs[DIGESTS_OFFSET_HOST].user[11]; + salt2_buf3[0] = esalt_bufs[DIGESTS_OFFSET_HOST].user[12]; + salt2_buf3[1] = esalt_bufs[DIGESTS_OFFSET_HOST].user[13]; + salt2_buf3[2] = esalt_bufs[DIGESTS_OFFSET_HOST].user[14]; + salt2_buf3[3] = esalt_bufs[DIGESTS_OFFSET_HOST].user[15]; + + const u32 salt2_len = esalt_bufs[DIGESTS_OFFSET_HOST].user_len; + + const u32 pw_salt_len = pw_len + salt2_len; + + /** + * loop + */ + + u32 w0l = w0[0]; + + for (u32 il_pos = 0; il_pos < IL_CNT; il_pos += VECT_SIZE) + { + const u32x w0r = ix_create_bft (bfs_buf, il_pos); + + const u32x w0lr = w0l | w0r; + + u32x t0[4]; + u32x t1[4]; + u32x t2[4]; + u32x t3[4]; + + t0[0] = w0lr; + t0[1] = w0[1]; + t0[2] = w0[2]; + t0[3] = w0[3]; + t1[0] = w1[0]; + t1[1] = w1[1]; + t1[2] = w1[2]; + t1[3] = w1[3]; + t2[0] = w2[0]; + t2[1] = w2[1]; + t2[2] = w2[2]; + t2[3] = w2[3]; + t3[0] = w3[0]; + t3[1] = w3[1]; + t3[2] = w3[2]; + t3[3] = w3[3]; + + switch_buffer_by_offset_be (t0, t1, t2, t3, salt2_len); + + u32x s0[4]; + u32x s1[4]; + u32x s2[4]; + u32x s3[4]; + + t0[0] |= salt2_buf0[0]; + t0[1] |= salt2_buf0[1]; + t0[2] |= salt2_buf0[2]; + t0[3] |= salt2_buf0[3]; + t1[0] |= salt2_buf1[0]; + t1[1] |= salt2_buf1[1]; + t1[2] |= salt2_buf1[2]; + t1[3] |= salt2_buf1[3]; + t2[0] |= salt2_buf2[0]; + t2[1] |= salt2_buf2[1]; + t2[2] |= salt2_buf2[2]; + t2[3] |= salt2_buf2[3]; + t3[0] |= salt2_buf3[0]; + t3[1] |= salt2_buf3[1]; + t3[2] = 0; + t3[3] = pw_salt_len * 8; + + /** + * sha1 + */ + + u32x w0_t = t0[0]; + u32x w1_t = t0[1]; + u32x w2_t = t0[2]; + u32x w3_t = t0[3]; + u32x w4_t = t1[0]; + u32x w5_t = t1[1]; + u32x w6_t = t1[2]; + u32x w7_t = t1[3]; + u32x w8_t = t2[0]; + u32x w9_t = t2[1]; + u32x wa_t = t2[2]; + u32x wb_t = t2[3]; + u32x wc_t = t3[0]; + u32x wd_t = t3[1]; + u32x we_t = 0; + u32x wf_t = pw_salt_len * 8; + + u32x a = SHA1M_A; + u32x b = SHA1M_B; + u32x c = SHA1M_C; + u32x d = SHA1M_D; + u32x e = SHA1M_E; + + #undef K + #define K SHA1C00 + + SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, w2_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, w4_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, w5_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, w6_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, w7_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, w8_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, w9_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, wa_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, wb_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, wc_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, wd_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, we_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3_t); + + #undef K + #define K SHA1C01 + + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w7_t); + + #undef K + #define K SHA1C02 + + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wb_t); + + #undef K + #define K SHA1C03 + + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t); + + a += make_u32x (SHA1M_A); + b += make_u32x (SHA1M_B); + c += make_u32x (SHA1M_C); + d += make_u32x (SHA1M_D); + e += make_u32x (SHA1M_E); + + w0_t = salt1_buf0[0]; + w1_t = salt1_buf0[1]; + w2_t = salt1_buf0[2]; + w3_t = salt1_buf0[3]; + w4_t = salt1_buf1[0]; + w5_t = salt1_buf1[1]; + w6_t = salt1_buf1[2]; + w7_t = salt1_buf1[3]; + w8_t = a; + w9_t = b; + wa_t = c; + wb_t = d; + wc_t = e; + wd_t = 0x80000000; + we_t = 0; + wf_t = (32 + 20) * 8; + + a = SHA1M_A; + b = SHA1M_B; + c = SHA1M_C; + d = SHA1M_D; + e = SHA1M_E; + + #undef K + #define K SHA1C00 + + SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, w2_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, w4_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, w5_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, w6_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, w7_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, w8_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, w9_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, wa_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, wb_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, wc_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, wd_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, we_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3_t); + + #undef K + #define K SHA1C01 + + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w7_t); + + #undef K + #define K SHA1C02 + + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wb_t); + + #undef K + #define K SHA1C03 + + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t); + + a += make_u32x (SHA1M_A); + b += make_u32x (SHA1M_B); + c += make_u32x (SHA1M_C); + d += make_u32x (SHA1M_D); + e += make_u32x (SHA1M_E); + + u32 exponent[5] = { 0 }; + + exponent[0] = e; + exponent[1] = d; + exponent[2] = c; + exponent[3] = b; + exponent[4] = a; + + u32 r_t[64] = + { + r[ 0], r[ 1], r[ 2], r[ 3], r[ 4], r[ 5], r[ 6], r[ 7], + r[ 8], r[ 9], r[10], r[11], r[12], r[13], r[14], r[15], + r[16], r[17], r[18], r[19], r[20], r[21], r[22], r[23], + r[24], r[25], r[26], r[27], r[28], r[29], r[30], r[31], + r[32], r[33], r[34], r[35], r[36], r[37], r[38], r[39], + r[40], r[41], r[42], r[43], r[44], r[45], r[46], r[47], + r[48], r[49], r[50], r[51], r[52], r[53], r[54], r[55], + r[56], r[57], r[58], r[59], r[60], r[61], r[62], r[63], + }; + + for (u32 i = 0, j = 0; i < PRECOMP_SLOTS; i += 1, j += PRECOMP_ENTRIES - 1) + { + const u32 div = (PRECOMP_BITS * i) / 32; // for 4 bits: (i / 8) + const u32 shift = (PRECOMP_BITS * i) % 32; // for 4 bits: (i % 8) * 4 + + // const + u32 cur_sel = (exponent[div] >> shift) & PRECOMP_MASK; // 0x0f == 0b1111 (4 bits) + + // working with non-divisible u32 (see PRECOMP_BITS): + + if (32 - shift < PRECOMP_BITS) + { + cur_sel |= (exponent[div + 1] << (32 - shift)) & PRECOMP_MASK; + } + + if (cur_sel == 0) continue; + + const u32 pre_idx = (j + cur_sel - 1) * PRECOMP_ENTRYLEN; // x * 64 is same as x << 6 + + // u32 p[64]; for (u32 i = 0; i < 64; i++) p[i] = esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + i]; + + const u32 p[64] = + { + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 0], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 1], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 2], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 3], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 4], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 5], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 6], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 7], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 8], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 9], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 10], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 11], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 12], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 13], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 14], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 15], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 16], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 17], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 18], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 19], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 20], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 21], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 22], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 23], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 24], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 25], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 26], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 27], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 28], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 29], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 30], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 31], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 32], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 33], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 34], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 35], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 36], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 37], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 38], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 39], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 40], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 41], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 42], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 43], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 44], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 45], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 46], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 47], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 48], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 49], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 50], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 51], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 52], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 53], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 54], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 55], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 56], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 57], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 58], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 59], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 60], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 61], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 62], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 63], + }; + + mul_mod (r_t, p, m, fact); // r = (r * pre[n]) % m + } + + const u32 r0 = r_t[0]; + const u32 r1 = r_t[1]; + const u32 r2 = r_t[2]; + const u32 r3 = r_t[3]; + + COMPARE_M_SIMD (r0, r1, r2, r3); + } +} + +DECLSPEC void m29200s (PRIVATE_AS u32 *w0, PRIVATE_AS u32 *w1, PRIVATE_AS u32 *w2, PRIVATE_AS u32 *w3, const u32 pw_len, KERN_ATTR_FUNC_ESALT (radmin3_t)) +{ + /** + * modifiers are taken from args + */ + + /** + * cache constant values to shared memory + */ + + LOCAL_VK u32 m[64]; + LOCAL_VK u32 r[64]; + LOCAL_VK u32 fact[64]; + + for (u32 i = lid; i < 64; i += lsz) + { + m[i] = RADMIN3_M[i]; + r[i] = RADMIN3_R[i]; + fact[i] = RADMIN3_FACT[i]; + } + + /** + * salt + */ + + u32 salt1_buf0[4]; + u32 salt1_buf1[4]; + u32 salt1_buf2[4]; + u32 salt1_buf3[4]; + + salt1_buf0[0] = salt_bufs[SALT_POS_HOST].salt_buf[0]; + salt1_buf0[1] = salt_bufs[SALT_POS_HOST].salt_buf[1]; + salt1_buf0[2] = salt_bufs[SALT_POS_HOST].salt_buf[2]; + salt1_buf0[3] = salt_bufs[SALT_POS_HOST].salt_buf[3]; + salt1_buf1[0] = salt_bufs[SALT_POS_HOST].salt_buf[4]; + salt1_buf1[1] = salt_bufs[SALT_POS_HOST].salt_buf[5]; + salt1_buf1[2] = salt_bufs[SALT_POS_HOST].salt_buf[6]; + salt1_buf1[3] = salt_bufs[SALT_POS_HOST].salt_buf[7]; + salt1_buf2[0] = 0; + salt1_buf2[1] = 0; + salt1_buf2[2] = 0; + salt1_buf2[3] = 0; + salt1_buf3[0] = 0; + salt1_buf3[1] = 0; + salt1_buf3[2] = 0; + salt1_buf3[3] = 0; + + u32 salt2_buf0[4]; + u32 salt2_buf1[4]; + u32 salt2_buf2[4]; + u32 salt2_buf3[4]; + + salt2_buf0[0] = esalt_bufs[DIGESTS_OFFSET_HOST].user[ 0]; + salt2_buf0[1] = esalt_bufs[DIGESTS_OFFSET_HOST].user[ 1]; + salt2_buf0[2] = esalt_bufs[DIGESTS_OFFSET_HOST].user[ 2]; + salt2_buf0[3] = esalt_bufs[DIGESTS_OFFSET_HOST].user[ 3]; + salt2_buf1[0] = esalt_bufs[DIGESTS_OFFSET_HOST].user[ 4]; + salt2_buf1[1] = esalt_bufs[DIGESTS_OFFSET_HOST].user[ 5]; + salt2_buf1[2] = esalt_bufs[DIGESTS_OFFSET_HOST].user[ 6]; + salt2_buf1[3] = esalt_bufs[DIGESTS_OFFSET_HOST].user[ 7]; + salt2_buf2[0] = esalt_bufs[DIGESTS_OFFSET_HOST].user[ 8]; + salt2_buf2[1] = esalt_bufs[DIGESTS_OFFSET_HOST].user[ 9]; + salt2_buf2[2] = esalt_bufs[DIGESTS_OFFSET_HOST].user[10]; + salt2_buf2[3] = esalt_bufs[DIGESTS_OFFSET_HOST].user[11]; + salt2_buf3[0] = esalt_bufs[DIGESTS_OFFSET_HOST].user[12]; + salt2_buf3[1] = esalt_bufs[DIGESTS_OFFSET_HOST].user[13]; + salt2_buf3[2] = esalt_bufs[DIGESTS_OFFSET_HOST].user[14]; + salt2_buf3[3] = esalt_bufs[DIGESTS_OFFSET_HOST].user[15]; + + const u32 salt2_len = esalt_bufs[DIGESTS_OFFSET_HOST].user_len; + + const u32 pw_salt_len = pw_len + salt2_len; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R3] + }; + + /** + * loop + */ + + u32 w0l = w0[0]; + + for (u32 il_pos = 0; il_pos < IL_CNT; il_pos += VECT_SIZE) + { + const u32x w0r = ix_create_bft (bfs_buf, il_pos); + + const u32x w0lr = w0l | w0r; + + u32x t0[4]; + u32x t1[4]; + u32x t2[4]; + u32x t3[4]; + + t0[0] = w0lr; + t0[1] = w0[1]; + t0[2] = w0[2]; + t0[3] = w0[3]; + t1[0] = w1[0]; + t1[1] = w1[1]; + t1[2] = w1[2]; + t1[3] = w1[3]; + t2[0] = w2[0]; + t2[1] = w2[1]; + t2[2] = w2[2]; + t2[3] = w2[3]; + t3[0] = w3[0]; + t3[1] = w3[1]; + t3[2] = w3[2]; + t3[3] = w3[3]; + + switch_buffer_by_offset_be (t0, t1, t2, t3, salt2_len); + + t0[0] |= salt2_buf0[0]; + t0[1] |= salt2_buf0[1]; + t0[2] |= salt2_buf0[2]; + t0[3] |= salt2_buf0[3]; + t1[0] |= salt2_buf1[0]; + t1[1] |= salt2_buf1[1]; + t1[2] |= salt2_buf1[2]; + t1[3] |= salt2_buf1[3]; + t2[0] |= salt2_buf2[0]; + t2[1] |= salt2_buf2[1]; + t2[2] |= salt2_buf2[2]; + t2[3] |= salt2_buf2[3]; + t3[0] |= salt2_buf3[0]; + t3[1] |= salt2_buf3[1]; + t3[2] = 0; + t3[3] = pw_salt_len * 8; + + /** + * sha1 + */ + + u32x w0_t = t0[0]; + u32x w1_t = t0[1]; + u32x w2_t = t0[2]; + u32x w3_t = t0[3]; + u32x w4_t = t1[0]; + u32x w5_t = t1[1]; + u32x w6_t = t1[2]; + u32x w7_t = t1[3]; + u32x w8_t = t2[0]; + u32x w9_t = t2[1]; + u32x wa_t = t2[2]; + u32x wb_t = t2[3]; + u32x wc_t = t3[0]; + u32x wd_t = t3[1]; + u32x we_t = 0; + u32x wf_t = pw_salt_len * 8; + + u32x a = SHA1M_A; + u32x b = SHA1M_B; + u32x c = SHA1M_C; + u32x d = SHA1M_D; + u32x e = SHA1M_E; + + #undef K + #define K SHA1C00 + + SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, w2_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, w4_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, w5_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, w6_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, w7_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, w8_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, w9_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, wa_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, wb_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, wc_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, wd_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, we_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3_t); + + #undef K + #define K SHA1C01 + + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w7_t); + + #undef K + #define K SHA1C02 + + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wb_t); + + #undef K + #define K SHA1C03 + + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t); + + a += make_u32x (SHA1M_A); + b += make_u32x (SHA1M_B); + c += make_u32x (SHA1M_C); + d += make_u32x (SHA1M_D); + e += make_u32x (SHA1M_E); + + w0_t = salt1_buf0[0]; + w1_t = salt1_buf0[1]; + w2_t = salt1_buf0[2]; + w3_t = salt1_buf0[3]; + w4_t = salt1_buf1[0]; + w5_t = salt1_buf1[1]; + w6_t = salt1_buf1[2]; + w7_t = salt1_buf1[3]; + w8_t = a; + w9_t = b; + wa_t = c; + wb_t = d; + wc_t = e; + wd_t = 0x80000000; + we_t = 0; + wf_t = (32 + 20) * 8; + + a = SHA1M_A; + b = SHA1M_B; + c = SHA1M_C; + d = SHA1M_D; + e = SHA1M_E; + + #undef K + #define K SHA1C00 + + SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, w2_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, w4_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, w5_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, w6_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, w7_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, w8_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, w9_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, wa_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, wb_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, wc_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, wd_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, we_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3_t); + + #undef K + #define K SHA1C01 + + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w7_t); + + #undef K + #define K SHA1C02 + + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wb_t); + + #undef K + #define K SHA1C03 + + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t); + + a += make_u32x (SHA1M_A); + b += make_u32x (SHA1M_B); + c += make_u32x (SHA1M_C); + d += make_u32x (SHA1M_D); + e += make_u32x (SHA1M_E); + + u32 exponent[5] = { 0 }; + + exponent[0] = e; + exponent[1] = d; + exponent[2] = c; + exponent[3] = b; + exponent[4] = a; + + u32 r_t[64] = + { + r[ 0], r[ 1], r[ 2], r[ 3], r[ 4], r[ 5], r[ 6], r[ 7], + r[ 8], r[ 9], r[10], r[11], r[12], r[13], r[14], r[15], + r[16], r[17], r[18], r[19], r[20], r[21], r[22], r[23], + r[24], r[25], r[26], r[27], r[28], r[29], r[30], r[31], + r[32], r[33], r[34], r[35], r[36], r[37], r[38], r[39], + r[40], r[41], r[42], r[43], r[44], r[45], r[46], r[47], + r[48], r[49], r[50], r[51], r[52], r[53], r[54], r[55], + r[56], r[57], r[58], r[59], r[60], r[61], r[62], r[63], + }; + + for (u32 i = 0, j = 0; i < PRECOMP_SLOTS; i += 1, j += PRECOMP_ENTRIES - 1) + { + const u32 div = (PRECOMP_BITS * i) / 32; // for 4 bits: (i / 8) + const u32 shift = (PRECOMP_BITS * i) % 32; // for 4 bits: (i % 8) * 4 + + // const + u32 cur_sel = (exponent[div] >> shift) & PRECOMP_MASK; // 0x0f == 0b1111 (4 bits) + + // working with non-divisible u32 (see PRECOMP_BITS): + + if (32 - shift < PRECOMP_BITS) + { + cur_sel |= (exponent[div + 1] << (32 - shift)) & PRECOMP_MASK; + } + + if (cur_sel == 0) continue; + + const u32 pre_idx = (j + cur_sel - 1) * PRECOMP_ENTRYLEN; // x * 64 is same as x << 6 + + // u32 p[64]; for (u32 i = 0; i < 64; i++) p[i] = esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + i]; + + const u32 p[64] = + { + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 0], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 1], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 2], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 3], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 4], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 5], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 6], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 7], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 8], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 9], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 10], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 11], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 12], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 13], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 14], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 15], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 16], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 17], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 18], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 19], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 20], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 21], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 22], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 23], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 24], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 25], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 26], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 27], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 28], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 29], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 30], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 31], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 32], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 33], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 34], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 35], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 36], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 37], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 38], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 39], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 40], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 41], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 42], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 43], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 44], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 45], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 46], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 47], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 48], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 49], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 50], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 51], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 52], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 53], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 54], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 55], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 56], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 57], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 58], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 59], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 60], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 61], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 62], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 63], + }; + + mul_mod (r_t, p, m, fact); // r = (r * pre[n]) % m + } + + const u32 r0 = r_t[0]; + const u32 r1 = r_t[1]; + const u32 r2 = r_t[2]; + const u32 r3 = r_t[3]; + + COMPARE_S_SIMD (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m29200_m04 (KERN_ATTR_ESALT (radmin3_t)) +{ + /** + * base + */ + + const u64 lid = get_local_id (0); + const u64 gid = get_global_id (0); + const u64 lsz = get_local_size (0); + + if (gid >= GID_CNT) return; + + u32 w0[4]; + + w0[0] = pws[gid].i[ 0]; + w0[1] = pws[gid].i[ 1]; + w0[2] = pws[gid].i[ 2]; + w0[3] = pws[gid].i[ 3]; + + u32 w1[4]; + + w1[0] = 0; + w1[1] = 0; + w1[2] = 0; + w1[3] = 0; + + u32 w2[4]; + + w2[0] = 0; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + + u32 w3[4]; + + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = pws[gid].i[15]; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * main + */ + + m29200m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, kernel_param, gid, lid, lsz); +} + +KERNEL_FQ void m29200_m08 (KERN_ATTR_ESALT (radmin3_t)) +{ + /** + * base + */ + + const u64 lid = get_local_id (0); + const u64 gid = get_global_id (0); + const u64 lsz = get_local_size (0); + + if (gid >= GID_CNT) return; + + u32 w0[4]; + + w0[0] = pws[gid].i[ 0]; + w0[1] = pws[gid].i[ 1]; + w0[2] = pws[gid].i[ 2]; + w0[3] = pws[gid].i[ 3]; + + u32 w1[4]; + + w1[0] = pws[gid].i[ 4]; + w1[1] = pws[gid].i[ 5]; + w1[2] = pws[gid].i[ 6]; + w1[3] = pws[gid].i[ 7]; + + u32 w2[4]; + + w2[0] = 0; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + + u32 w3[4]; + + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = pws[gid].i[15]; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * main + */ + + m29200m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, kernel_param, gid, lid, lsz); +} + +KERNEL_FQ void m29200_m16 (KERN_ATTR_ESALT (radmin3_t)) +{ + /** + * base + */ + + const u64 lid = get_local_id (0); + const u64 gid = get_global_id (0); + const u64 lsz = get_local_size (0); + + if (gid >= GID_CNT) return; + + u32 w0[4]; + + w0[0] = pws[gid].i[ 0]; + w0[1] = pws[gid].i[ 1]; + w0[2] = pws[gid].i[ 2]; + w0[3] = pws[gid].i[ 3]; + + u32 w1[4]; + + w1[0] = pws[gid].i[ 4]; + w1[1] = pws[gid].i[ 5]; + w1[2] = pws[gid].i[ 6]; + w1[3] = pws[gid].i[ 7]; + + u32 w2[4]; + + w2[0] = pws[gid].i[ 8]; + w2[1] = pws[gid].i[ 9]; + w2[2] = pws[gid].i[10]; + w2[3] = pws[gid].i[11]; + + u32 w3[4]; + + w3[0] = pws[gid].i[12]; + w3[1] = pws[gid].i[13]; + w3[2] = pws[gid].i[14]; + w3[3] = pws[gid].i[15]; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * main + */ + + m29200m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, kernel_param, gid, lid, lsz); +} + +KERNEL_FQ void m29200_s04 (KERN_ATTR_ESALT (radmin3_t)) +{ + /** + * base + */ + + const u64 lid = get_local_id (0); + const u64 gid = get_global_id (0); + const u64 lsz = get_local_size (0); + + if (gid >= GID_CNT) return; + + u32 w0[4]; + + w0[0] = pws[gid].i[ 0]; + w0[1] = pws[gid].i[ 1]; + w0[2] = pws[gid].i[ 2]; + w0[3] = pws[gid].i[ 3]; + + u32 w1[4]; + + w1[0] = 0; + w1[1] = 0; + w1[2] = 0; + w1[3] = 0; + + u32 w2[4]; + + w2[0] = 0; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + + u32 w3[4]; + + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = pws[gid].i[15]; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * main + */ + + m29200s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, kernel_param, gid, lid, lsz); +} + +KERNEL_FQ void m29200_s08 (KERN_ATTR_ESALT (radmin3_t)) +{ + /** + * base + */ + + const u64 lid = get_local_id (0); + const u64 gid = get_global_id (0); + const u64 lsz = get_local_size (0); + + if (gid >= GID_CNT) return; + + u32 w0[4]; + + w0[0] = pws[gid].i[ 0]; + w0[1] = pws[gid].i[ 1]; + w0[2] = pws[gid].i[ 2]; + w0[3] = pws[gid].i[ 3]; + + u32 w1[4]; + + w1[0] = pws[gid].i[ 4]; + w1[1] = pws[gid].i[ 5]; + w1[2] = pws[gid].i[ 6]; + w1[3] = pws[gid].i[ 7]; + + u32 w2[4]; + + w2[0] = 0; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + + u32 w3[4]; + + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = pws[gid].i[15]; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * main + */ + + m29200s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, kernel_param, gid, lid, lsz); +} + +KERNEL_FQ void m29200_s16 (KERN_ATTR_ESALT (radmin3_t)) +{ + /** + * base + */ + + const u64 lid = get_local_id (0); + const u64 gid = get_global_id (0); + const u64 lsz = get_local_size (0); + + if (gid >= GID_CNT) return; + + u32 w0[4]; + + w0[0] = pws[gid].i[ 0]; + w0[1] = pws[gid].i[ 1]; + w0[2] = pws[gid].i[ 2]; + w0[3] = pws[gid].i[ 3]; + + u32 w1[4]; + + w1[0] = pws[gid].i[ 4]; + w1[1] = pws[gid].i[ 5]; + w1[2] = pws[gid].i[ 6]; + w1[3] = pws[gid].i[ 7]; + + u32 w2[4]; + + w2[0] = pws[gid].i[ 8]; + w2[1] = pws[gid].i[ 9]; + w2[2] = pws[gid].i[10]; + w2[3] = pws[gid].i[11]; + + u32 w3[4]; + + w3[0] = pws[gid].i[12]; + w3[1] = pws[gid].i[13]; + w3[2] = pws[gid].i[14]; + w3[3] = pws[gid].i[15]; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * main + */ + + m29200s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, kernel_param, gid, lid, lsz); +} diff --git a/OpenCL/m29200_a3-pure.cl b/OpenCL/m29200_a3-pure.cl new file mode 100644 index 000000000..61ec43964 --- /dev/null +++ b/OpenCL/m29200_a3-pure.cl @@ -0,0 +1,493 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + * This algorithm for password-storage for the Radmin 3 software was analyzed and made public by synacktiv: + * https://www.synacktiv.com/publications/cracking-radmin-server-3-passwords.html + */ + +//#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include M2S(INCLUDE_PATH/inc_vendor.h) +#include M2S(INCLUDE_PATH/inc_types.h) +#include M2S(INCLUDE_PATH/inc_platform.cl) +#include M2S(INCLUDE_PATH/inc_common.cl) +#include M2S(INCLUDE_PATH/inc_scalar.cl) +#include M2S(INCLUDE_PATH/inc_hash_sha1.cl) +#include M2S(INCLUDE_PATH/inc_bignum_operations.cl) +#include M2S(INCLUDE_PATH/inc_radmin3_constants.h) +#endif + +typedef struct radmin3 +{ + u32 user[64]; + u32 user_len; + + u32 pre[PRECOMP_DATALEN]; // 38400 for PRECOMP_BITS = 4 + +} radmin3_t; + +KERNEL_FQ void m29200_mxx (KERN_ATTR_VECTOR_ESALT (radmin3_t)) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + const u64 gid = get_global_id (0); + const u64 lsz = get_local_size (0); + + + /** + * cache constant values to shared memory + */ + + LOCAL_VK u32 m[64]; + LOCAL_VK u32 r[64]; + LOCAL_VK u32 fact[64]; + + for (u32 i = lid; i < 64; i += lsz) + { + m[i] = RADMIN3_M[i]; + r[i] = RADMIN3_R[i]; + fact[i] = RADMIN3_FACT[i]; + } + + SYNC_THREADS (); + + if (gid >= GID_CNT) return; + + + /** + * base + */ + + const u32 pw_len = pws[gid].pw_len; + + u32x w[64] = { 0 }; + + for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1) + { + w[idx] = pws[gid].i[idx]; + } + + + // ctx0 with user + + sha1_ctx_t ctx0; + + sha1_init (&ctx0); + + sha1_update_global (&ctx0, esalt_bufs[DIGESTS_OFFSET_HOST].user, esalt_bufs[DIGESTS_OFFSET_HOST].user_len); + + + // ctx1 with main salt + + sha1_ctx_t ctx1; + + sha1_init (&ctx1); + + sha1_update_global (&ctx1, salt_bufs[SALT_POS_HOST].salt_buf, salt_bufs[SALT_POS_HOST].salt_len); + + + /** + * loop + */ + + u32x w0l = w[0]; + + for (u32 il_pos = 0; il_pos < IL_CNT; il_pos += VECT_SIZE) + { + const u32x w0r = words_buf_r[il_pos / VECT_SIZE]; + + const u32x w0 = w0l | w0r; + + w[0] = w0; + + + // add password to the user name (and colon, included): + + sha1_ctx_t c0 = ctx0; + + sha1_update_utf16le_swap (&c0, w, pw_len); + + sha1_final (&c0); + + + // add first SHA1 result to main salt: + + sha1_ctx_t c1 = ctx1; + + u32 w0_t[4] = { 0 }; + u32 w1_t[4] = { 0 }; + u32 w2_t[4] = { 0 }; + u32 w3_t[4] = { 0 }; + + w0_t[0] = c0.h[0]; + w0_t[1] = c0.h[1]; + w0_t[2] = c0.h[2]; + w0_t[3] = c0.h[3]; + w1_t[0] = c0.h[4]; + + sha1_update_64 (&c1, w0_t, w1_t, w2_t, w3_t, 20); + + sha1_final (&c1); + + const u32 e[5] = { c1.h[4], c1.h[3], c1.h[2], c1.h[1], c1.h[0] }; + + // u32 r_t[64]; for (u32 i = 0; i < 64; i++) r_t[i] = r[i]; + + u32 r_t[64] = + { + r[ 0], r[ 1], r[ 2], r[ 3], r[ 4], r[ 5], r[ 6], r[ 7], + r[ 8], r[ 9], r[10], r[11], r[12], r[13], r[14], r[15], + r[16], r[17], r[18], r[19], r[20], r[21], r[22], r[23], + r[24], r[25], r[26], r[27], r[28], r[29], r[30], r[31], + r[32], r[33], r[34], r[35], r[36], r[37], r[38], r[39], + r[40], r[41], r[42], r[43], r[44], r[45], r[46], r[47], + r[48], r[49], r[50], r[51], r[52], r[53], r[54], r[55], + r[56], r[57], r[58], r[59], r[60], r[61], r[62], r[63], + }; + + + // main loop over the SHA1 result/vector e[]: + + for (u32 i = 0, j = 0; i < PRECOMP_SLOTS; i += 1, j += PRECOMP_ENTRIES - 1) + { + const u32 div = (PRECOMP_BITS * i) / 32; // for 4 bits: (i / 8) + const u32 shift = (PRECOMP_BITS * i) % 32; // for 4 bits: (i % 8) * 4 + + // const + u32 cur_sel = (e[div] >> shift) & PRECOMP_MASK; // 0x0f == 0b1111 (4 bits) + + // working with non-divisible u32 (see PRECOMP_BITS): + + if (32 - shift < PRECOMP_BITS) + { + cur_sel |= (e[div + 1] << (32 - shift)) & PRECOMP_MASK; + } + + if (cur_sel == 0) continue; + + const u32 pre_idx = (j + cur_sel - 1) * PRECOMP_ENTRYLEN; // x * 64 is same as x << 6 + + // u32 pre[64]; for (u32 i = 0; i < 64; i++) pre[i] = esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + i]; + + const u32 pre[64] = + { + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 0], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 1], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 2], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 3], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 4], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 5], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 6], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 7], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 8], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 9], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 10], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 11], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 12], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 13], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 14], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 15], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 16], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 17], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 18], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 19], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 20], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 21], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 22], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 23], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 24], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 25], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 26], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 27], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 28], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 29], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 30], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 31], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 32], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 33], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 34], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 35], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 36], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 37], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 38], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 39], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 40], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 41], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 42], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 43], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 44], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 45], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 46], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 47], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 48], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 49], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 50], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 51], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 52], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 53], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 54], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 55], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 56], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 57], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 58], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 59], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 60], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 61], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 62], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 63], + }; + + mul_mod (r_t, pre, m, fact); // r_t = (r_t * RADMIN3_PRE[n]) % m + } + + const u32 r0 = r_t[0]; + const u32 r1 = r_t[1]; + const u32 r2 = r_t[2]; + const u32 r3 = r_t[3]; + + COMPARE_M_SCALAR (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m29200_sxx (KERN_ATTR_VECTOR_ESALT (radmin3_t)) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + const u64 gid = get_global_id (0); + const u64 lsz = get_local_size (0); + + + /** + * cache constant values to shared memory + */ + + LOCAL_VK u32 m[64]; + LOCAL_VK u32 r[64]; + LOCAL_VK u32 fact[64]; + + for (u32 i = lid; i < 64; i += lsz) + { + m[i] = RADMIN3_M[i]; + r[i] = RADMIN3_R[i]; + fact[i] = RADMIN3_FACT[i]; + } + + SYNC_THREADS (); + + if (gid >= GID_CNT) return; + + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R3] + }; + + /** + * base + */ + + + // ctx0 with user + + sha1_ctx_t ctx0; + + sha1_init (&ctx0); + + sha1_update_global (&ctx0, esalt_bufs[DIGESTS_OFFSET_HOST].user, esalt_bufs[DIGESTS_OFFSET_HOST].user_len); + + + // ctx1 with main salt + + sha1_ctx_t ctx1; + + sha1_init (&ctx1); + + sha1_update_global (&ctx1, salt_bufs[SALT_POS_HOST].salt_buf, salt_bufs[SALT_POS_HOST].salt_len); + + + const u32 pw_len = pws[gid].pw_len; + + u32x w[64] = { 0 }; + + for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1) + { + w[idx] = pws[gid].i[idx]; + } + + + /** + * loop + */ + + u32x w0l = w[0]; + + for (u32 il_pos = 0; il_pos < IL_CNT; il_pos += VECT_SIZE) + { + const u32x w0r = words_buf_r[il_pos / VECT_SIZE]; + + const u32x w0 = w0l | w0r; + + w[0] = w0; + + + // add password to the user name (and colon, included): + + sha1_ctx_t c0 = ctx0; + + sha1_update_utf16le_swap (&c0, w, pw_len); + + sha1_final (&c0); + + + // add first SHA1 result to main salt: + + sha1_ctx_t c1 = ctx1; + + u32 w0_t[4] = { 0 }; + u32 w1_t[4] = { 0 }; + u32 w2_t[4] = { 0 }; + u32 w3_t[4] = { 0 }; + + w0_t[0] = c0.h[0]; + w0_t[1] = c0.h[1]; + w0_t[2] = c0.h[2]; + w0_t[3] = c0.h[3]; + w1_t[0] = c0.h[4]; + + sha1_update_64 (&c1, w0_t, w1_t, w2_t, w3_t, 20); + + sha1_final (&c1); + + const u32 e[5] = { c1.h[4], c1.h[3], c1.h[2], c1.h[1], c1.h[0] }; + + // u32 r_t[64]; for (u32 i = 0; i < 64; i++) r_t[i] = r[i]; + + u32 r_t[64] = + { + r[ 0], r[ 1], r[ 2], r[ 3], r[ 4], r[ 5], r[ 6], r[ 7], + r[ 8], r[ 9], r[10], r[11], r[12], r[13], r[14], r[15], + r[16], r[17], r[18], r[19], r[20], r[21], r[22], r[23], + r[24], r[25], r[26], r[27], r[28], r[29], r[30], r[31], + r[32], r[33], r[34], r[35], r[36], r[37], r[38], r[39], + r[40], r[41], r[42], r[43], r[44], r[45], r[46], r[47], + r[48], r[49], r[50], r[51], r[52], r[53], r[54], r[55], + r[56], r[57], r[58], r[59], r[60], r[61], r[62], r[63], + }; + + + // main loop over the SHA1 result/vector e[]: + + for (u32 i = 0, j = 0; i < PRECOMP_SLOTS; i += 1, j += PRECOMP_ENTRIES - 1) + { + const u32 div = (PRECOMP_BITS * i) / 32; // for 4 bits: (i / 8) + const u32 shift = (PRECOMP_BITS * i) % 32; // for 4 bits: (i % 8) * 4 + + // const + u32 cur_sel = (e[div] >> shift) & PRECOMP_MASK; // 0x0f == 0b1111 (4 bits) + + // working with non-divisible u32 (see PRECOMP_BITS): + + if (32 - shift < PRECOMP_BITS) + { + cur_sel |= (e[div + 1] << (32 - shift)) & PRECOMP_MASK; + } + + if (cur_sel == 0) continue; + + const u32 pre_idx = (j + cur_sel - 1) * PRECOMP_ENTRYLEN; // x * 64 is same as x << 6 + + // u32 pre[64]; for (u32 i = 0; i < 64; i++) pre[i] = esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + i]; + + const u32 pre[64] = + { + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 0], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 1], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 2], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 3], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 4], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 5], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 6], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 7], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 8], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 9], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 10], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 11], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 12], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 13], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 14], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 15], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 16], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 17], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 18], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 19], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 20], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 21], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 22], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 23], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 24], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 25], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 26], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 27], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 28], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 29], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 30], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 31], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 32], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 33], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 34], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 35], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 36], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 37], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 38], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 39], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 40], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 41], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 42], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 43], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 44], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 45], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 46], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 47], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 48], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 49], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 50], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 51], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 52], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 53], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 54], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 55], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 56], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 57], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 58], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 59], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 60], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 61], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 62], + esalt_bufs[DIGESTS_OFFSET_HOST].pre[pre_idx + 63], + }; + + mul_mod (r_t, pre, m, fact); // r_t = (r_t * RADMIN3_PRE[n]) % m + } + + const u32 r0 = r_t[0]; + const u32 r1 = r_t[1]; + const u32 r2 = r_t[2]; + const u32 r3 = r_t[3]; + + COMPARE_S_SCALAR (r0, r1, r2, r3); + } +} diff --git a/docs/changes.txt b/docs/changes.txt index 99bb9b257..909eff97b 100644 --- a/docs/changes.txt +++ b/docs/changes.txt @@ -14,6 +14,7 @@ - Added hash-mode: Kerberos 5, etype 17, DB - Added hash-mode: Kerberos 5, etype 18, DB - Added hash-mode: PostgreSQL SCRAM-SHA-256 +- Added hash-mode: Radmin3 - Added hash-mode: Teamspeak 3 (channel hash) - Added hash-mode: bcrypt(sha512($pass)) / bcryptsha512 - Added hash-mode: sha1($salt.sha1(utf16le($username).':'.utf16le($pass))) diff --git a/docs/readme.txt b/docs/readme.txt index ec40b8ad9..44573abad 100644 --- a/docs/readme.txt +++ b/docs/readme.txt @@ -207,6 +207,7 @@ NVIDIA GPUs require "NVIDIA Driver" (440.64 or later) and "CUDA Toolkit" (9.0 or - BSDi Crypt, Extended DES - NTLM - Radmin2 +- Radmin3 - Samsung Android Password/PIN - Windows Hello PIN/Password - Windows Phone 8+ PIN/password diff --git a/include/emu_inc_bignum_operations.h b/include/emu_inc_bignum_operations.h new file mode 100644 index 000000000..56f086195 --- /dev/null +++ b/include/emu_inc_bignum_operations.h @@ -0,0 +1,14 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +#ifndef _EMU_INC_BIGNUM_OPERATIONS_H +#define _EMU_INC_BIGNUM_OPERATIONS_H + +#include "emu_general.h" + +#include "inc_vendor.h" +#include "inc_bignum_operations.h" + +#endif // _EMU_INC_BIGNUM_OPERATIONS_H diff --git a/include/emu_inc_radmin3_constants.h b/include/emu_inc_radmin3_constants.h new file mode 100644 index 000000000..83b6b2039 --- /dev/null +++ b/include/emu_inc_radmin3_constants.h @@ -0,0 +1,14 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +#ifndef _EMU_INC_RADMIN3_CONSTANTS_H +#define _EMU_INC_RADMIN3_CONSTANTS_H + +#include "emu_general.h" + +#include "inc_vendor.h" +#include "inc_radmin3_constants.h" + +#endif // _EMU_INC_RADMIN3_CONSTANTS_H diff --git a/include/inc_radmin3_constants_pre.data b/include/inc_radmin3_constants_pre.data new file mode 100644 index 000000000..73bf89e5a Binary files /dev/null and b/include/inc_radmin3_constants_pre.data differ diff --git a/src/Makefile b/src/Makefile index 19e1216ca..ab518c6fd 100644 --- a/src/Makefile +++ b/src/Makefile @@ -394,7 +394,7 @@ CXXFLAGS := EMU_OBJS_ALL := emu_general emu_inc_common emu_inc_platform emu_inc_scalar emu_inc_simd EMU_OBJS_ALL += emu_inc_rp emu_inc_rp_optimized -EMU_OBJS_ALL += emu_inc_hash_md4 emu_inc_hash_md5 emu_inc_hash_ripemd160 emu_inc_hash_sha1 emu_inc_hash_sha256 emu_inc_hash_sha384 emu_inc_hash_sha512 emu_inc_hash_streebog256 emu_inc_hash_streebog512 emu_inc_ecc_secp256k1 +EMU_OBJS_ALL += emu_inc_hash_md4 emu_inc_hash_md5 emu_inc_hash_ripemd160 emu_inc_hash_sha1 emu_inc_hash_sha256 emu_inc_hash_sha384 emu_inc_hash_sha512 emu_inc_hash_streebog256 emu_inc_hash_streebog512 emu_inc_ecc_secp256k1 emu_inc_bignum_operations EMU_OBJS_ALL += emu_inc_cipher_aes emu_inc_cipher_camellia emu_inc_cipher_des emu_inc_cipher_kuznyechik emu_inc_cipher_serpent emu_inc_cipher_twofish OBJS_ALL := affinity autotune backend benchmark bitmap bitops combinator common convert cpt cpu_crc32 debugfile dictstat dispatch dynloader event ext_ADL ext_cuda ext_hip ext_nvapi ext_nvml ext_nvrtc ext_hiprtc ext_OpenCL ext_sysfs_amdgpu ext_sysfs_cpu ext_iokit ext_lzma filehandling folder hashcat hashes hlfmt hwmon induct interface keyboard_layout locking logfile loopback memory monitor mpsp outfile_check outfile pidfile potfile restore rp rp_cpu selftest slow_candidates shared status stdout straight terminal thread timer tuningdb usage user_options wordlist $(EMU_OBJS_ALL) diff --git a/src/emu_inc_bignum_operations.c b/src/emu_inc_bignum_operations.c new file mode 100644 index 000000000..ca78c2480 --- /dev/null +++ b/src/emu_inc_bignum_operations.c @@ -0,0 +1,12 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +#include "types.h" +#include "common.h" +#include "emu_general.h" + +#include "inc_vendor.h" +#include "inc_platform.h" +#include "inc_bignum_operations.cl" diff --git a/src/modules/module_29200.c b/src/modules/module_29200.c new file mode 100644 index 000000000..48ca6c63d --- /dev/null +++ b/src/modules/module_29200.c @@ -0,0 +1,349 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + * This algorithm for password-storage for the Radmin 3 software was analyzed and made public by synacktiv: + * https://www.synacktiv.com/publications/cracking-radmin-server-3-passwords.html + */ + + +#include "common.h" +#include "types.h" +#include "modules.h" +#include "bitops.h" +#include "convert.h" +#include "shared.h" +#include "emu_inc_bignum_operations.h" +#include "emu_inc_radmin3_constants.h" + +static const u32 ATTACK_EXEC = ATTACK_EXEC_INSIDE_KERNEL; +static const u32 DGST_POS0 = 0; +static const u32 DGST_POS1 = 1; +static const u32 DGST_POS2 = 2; +static const u32 DGST_POS3 = 3; +static const u32 DGST_SIZE = DGST_SIZE_4_4; +static const u32 HASH_CATEGORY = HASH_CATEGORY_NETWORK_PROTOCOL; +static const char *HASH_NAME = "Radmin3"; +static const u64 KERN_TYPE = 29200; +static const u32 OPTI_TYPE = OPTI_TYPE_ZERO_BYTE + | OPTI_TYPE_NOT_ITERATED; +static const u64 OPTS_TYPE = OPTS_TYPE_STOCK_MODULE + | OPTS_TYPE_PT_GENERATE_BE + | OPTS_TYPE_PT_ADD80 + | OPTS_TYPE_PT_ADDBITS15 + | OPTS_TYPE_PT_UTF16LE + | OPTS_TYPE_ST_HEX; +static const u32 SALT_TYPE = SALT_TYPE_EMBEDDED; +static const char *ST_PASS = "hashcat"; +static const char *ST_HASH = "$radmin3$75007300650072006e0061006d006500*c63bf695069d564844c4849e7df6d41f1fbc5f3a7d8fe27c5f20545a238398fa*0062fb848c21d606baa0a91d7177daceb69ad2f6d090c2f1b3a654cfb417be66f739ae952f5c7c5170743459daf854a22684787b24f8725337b3c3bd1e0f2a6285768ceccca77f26c579d42a66372df7782b2eefccb028a0efb51a4257dd0804d05e0a83f611f2a0f10ffe920568cc7af1ec426f450ec99ade1f2a4905fd319f8c190c2db0b0e24627d635bc2b4a2c4c9ae956b1e02784c9ce958eb9883c60ba8ea2731dd0e515f492c44f39324e4027587c1330f14216e17f212eaec949273797ae74497782ee8b6f640dd2d124c59db8c37724c8a5a63bad005f8e491b459ff1b92f861ab6d99a2548cb8902b0840c7f20a108ede6bf9a60093053781216fe"; + +u32 module_attack_exec (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return ATTACK_EXEC; } +u32 module_dgst_pos0 (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return DGST_POS0; } +u32 module_dgst_pos1 (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return DGST_POS1; } +u32 module_dgst_pos2 (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return DGST_POS2; } +u32 module_dgst_pos3 (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return DGST_POS3; } +u32 module_dgst_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return DGST_SIZE; } +u32 module_hash_category (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return HASH_CATEGORY; } +const char *module_hash_name (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return HASH_NAME; } +u64 module_kern_type (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return KERN_TYPE; } +u32 module_opti_type (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return OPTI_TYPE; } +u64 module_opts_type (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return OPTS_TYPE; } +u32 module_salt_type (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return SALT_TYPE; } +const char *module_st_hash (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return ST_HASH; } +const char *module_st_pass (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return ST_PASS; } + +static const char *SIGNATURE_RADMIN3 = "$radmin3$"; + +typedef struct radmin3 +{ + u32 user[64]; + u32 user_len; + + u32 pre[PRECOMP_DATALEN]; // 38400 for PRECOMP_BITS = 4 + +} radmin3_t; + +// trick to include binary data file in assembly: +// credits go to http://elm-chan.org/junk/32bit/binclude.html#inc_c + +#define INCLUDE_BIN(name, file) asm \ +( \ + ".section .rodata \n" \ + ".balign 4 \n" \ + ".global " name " \n" \ + name ": \n" \ + ".incbin \"" file "\" \n" \ + ".section .text \n" \ +) + +INCLUDE_BIN ("RADMIN3_PRE", "include/inc_radmin3_constants_pre.data"); + +#undef INCLUDE_BIN + +extern const u32 RADMIN3_PRE[PRECOMP_DATALEN]; + +u64 module_esalt_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) +{ + const u64 esalt_size = (const u64) sizeof (radmin3_t); + + return esalt_size; +} + +int module_hash_decode (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED void *digest_buf, MAYBE_UNUSED salt_t *salt, MAYBE_UNUSED void *esalt_buf, MAYBE_UNUSED void *hook_salt_buf, MAYBE_UNUSED hashinfo_t *hash_info, const char *line_buf, MAYBE_UNUSED const int line_len) +{ + u32 *digest = (u32 *) digest_buf; + + radmin3_t *esalt = (radmin3_t *) esalt_buf; + + hc_token_t token; + + token.token_cnt = 4; + token.signatures_cnt = 1; + token.signatures_buf[0] = SIGNATURE_RADMIN3; + + token.len[0] = 9; + token.attr[0] = TOKEN_ATTR_FIXED_LENGTH + | TOKEN_ATTR_VERIFY_SIGNATURE; + + // user name + token.sep[1] = '*'; + token.len_min[1] = (SALT_MIN * 2); + token.len_max[1] = (SALT_MAX * 2) - 1; // we store the colon (:) in esalt->user[] + token.attr[1] = TOKEN_ATTR_VERIFY_LENGTH; + + // SHA1 salt + token.sep[2] = '*'; + token.len_min[2] = 64; + token.len_max[2] = 64; + token.attr[2] = TOKEN_ATTR_VERIFY_LENGTH + | TOKEN_ATTR_VERIFY_HEX; + + // verifier + token.len[3] = 512; + token.attr[3] = TOKEN_ATTR_FIXED_LENGTH + | TOKEN_ATTR_VERIFY_HEX; + + + const int rc_tokenizer = input_tokenizer ((const u8 *) line_buf, line_len, &token); + + if (rc_tokenizer != PARSER_OK) return (rc_tokenizer); + + + // user name: + + if ((token.len[1] % 2) != 0) return (PARSER_SALT_LENGTH); + + u8 *u = (u8 *) esalt->user; + + hex_decode (token.buf[1], token.len[1], u); + + esalt->user_len = token.len[1] / 2; + + u[esalt->user_len] = ':'; + + esalt->user_len++; + + // call byte_swap () to avoid it in the kernel: + + for (u32 i = 0; i < 64; i++) + { + esalt->user[i] = byte_swap_32 (esalt->user[i]); + } + + + // salt (for salted SHA1): + + if ((token.len[2] % 2) != 0) return (PARSER_SALT_LENGTH); + + u8 *s = (u8 *) salt->salt_buf; + + hex_decode (token.buf[2], token.len[2], s); + + salt->salt_len = token.len[2] / 2; + + // call byte_swap () to avoid it in the kernel: + + for (u32 i = 0; i < 64; i++) + { + salt->salt_buf[i] = byte_swap_32 (salt->salt_buf[i]); + } + + + // verifier: + + if ((token.len[3] % 2) != 0) return (PARSER_SALT_LENGTH); + + u8 *v = (u8 *) salt->salt_buf_pc; + + hex_decode (token.buf[3], token.len[3], v); + + // change the order (byte_swap () and v[63] <-> v[0], v[62] <-> u[1] etc): + + for (u32 i = 0, j = 63; i < 32; i++, j--) + { + u32 t1 = salt->salt_buf_pc[i]; + u32 t2 = salt->salt_buf_pc[j]; + + salt->salt_buf_pc[j] = byte_swap_32 (t1); + salt->salt_buf_pc[i] = byte_swap_32 (t2); + } + + + // digest + + // convert our verifier to Montgomery form (s.t. we avoid converting it back on GPU): + + u32 dgst[64] = { 0 }; + + to_montgomery (dgst, salt->salt_buf_pc, RADMIN3_M); + + digest[0] = dgst[0]; + digest[1] = dgst[1]; + digest[2] = dgst[2]; + digest[3] = dgst[3]; + + + /* + * pre-computed values for BigNum exponentiation: + */ + + memcpy (esalt->pre, RADMIN3_PRE, sizeof (RADMIN3_PRE)); + + return (PARSER_OK); +} + +int module_hash_encode (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const void *digest_buf, MAYBE_UNUSED const salt_t *salt, MAYBE_UNUSED const void *esalt_buf, MAYBE_UNUSED const void *hook_salt_buf, MAYBE_UNUSED const hashinfo_t *hash_info, char *line_buf, MAYBE_UNUSED const int line_size) +{ + radmin3_t *esalt = (radmin3_t *) esalt_buf; + + u8 *out_buf = (u8 *) line_buf; + + // signature + + int out_len = snprintf (line_buf, line_size, "%s", SIGNATURE_RADMIN3); + + // user + + u32 u[64]; + + for (u32 i = 0; i < 64; i++) + { + u[i] = byte_swap_32 (esalt->user[i]); + } + + out_len += generic_salt_encode (hashconfig, (const u8 *) u, (const int) esalt->user_len - 1, out_buf + out_len); // -1 because of the ':' optimization + + out_buf[out_len] = '*'; + + out_len++; + + // salt + + u32 s[64]; + + for (u32 i = 0; i < 64; i++) + { + s[i] = byte_swap_32 (salt->salt_buf[i]); + } + + out_len += generic_salt_encode (hashconfig, (const u8 *) s, (const int) salt->salt_len, out_buf + out_len); + + out_buf[out_len] = '*'; + + out_len++; + + // verifier + + u32 verifier[64]; + + for (u32 i = 0, j = 63; i < 32; i++, j--) // fix the order + { + u32 t1 = salt->salt_buf_pc[i]; + u32 t2 = salt->salt_buf_pc[j]; + + verifier[j] = byte_swap_32 (t1); + verifier[i] = byte_swap_32 (t2); + } + + out_len += generic_salt_encode (hashconfig, (const u8 *) verifier, 256, out_buf + out_len); + + return out_len; +} + +void module_init (module_ctx_t *module_ctx) +{ + module_ctx->module_context_size = MODULE_CONTEXT_SIZE_CURRENT; + module_ctx->module_interface_version = MODULE_INTERFACE_VERSION_CURRENT; + + module_ctx->module_attack_exec = module_attack_exec; + module_ctx->module_benchmark_esalt = MODULE_DEFAULT; + module_ctx->module_benchmark_hook_salt = MODULE_DEFAULT; + module_ctx->module_benchmark_mask = MODULE_DEFAULT; + module_ctx->module_benchmark_salt = MODULE_DEFAULT; + module_ctx->module_build_plain_postprocess = MODULE_DEFAULT; + module_ctx->module_deep_comp_kernel = MODULE_DEFAULT; + module_ctx->module_deprecated_notice = MODULE_DEFAULT; + module_ctx->module_dgst_pos0 = module_dgst_pos0; + module_ctx->module_dgst_pos1 = module_dgst_pos1; + module_ctx->module_dgst_pos2 = module_dgst_pos2; + module_ctx->module_dgst_pos3 = module_dgst_pos3; + module_ctx->module_dgst_size = module_dgst_size; + module_ctx->module_dictstat_disable = MODULE_DEFAULT; + module_ctx->module_esalt_size = module_esalt_size; + module_ctx->module_extra_buffer_size = MODULE_DEFAULT; + module_ctx->module_extra_tmp_size = MODULE_DEFAULT; + module_ctx->module_extra_tuningdb_block = MODULE_DEFAULT; + module_ctx->module_forced_outfile_format = MODULE_DEFAULT; + module_ctx->module_hash_binary_count = MODULE_DEFAULT; + module_ctx->module_hash_binary_parse = MODULE_DEFAULT; + module_ctx->module_hash_binary_save = MODULE_DEFAULT; + module_ctx->module_hash_decode_postprocess = MODULE_DEFAULT; + module_ctx->module_hash_decode_potfile = MODULE_DEFAULT; + module_ctx->module_hash_decode_zero_hash = MODULE_DEFAULT; + module_ctx->module_hash_decode = module_hash_decode; + module_ctx->module_hash_encode_status = MODULE_DEFAULT; + module_ctx->module_hash_encode_potfile = MODULE_DEFAULT; + module_ctx->module_hash_encode = module_hash_encode; + module_ctx->module_hash_init_selftest = MODULE_DEFAULT; + module_ctx->module_hash_mode = MODULE_DEFAULT; + module_ctx->module_hash_category = module_hash_category; + module_ctx->module_hash_name = module_hash_name; + module_ctx->module_hashes_count_min = MODULE_DEFAULT; + module_ctx->module_hashes_count_max = MODULE_DEFAULT; + module_ctx->module_hlfmt_disable = MODULE_DEFAULT; + module_ctx->module_hook_extra_param_size = MODULE_DEFAULT; + module_ctx->module_hook_extra_param_init = MODULE_DEFAULT; + module_ctx->module_hook_extra_param_term = MODULE_DEFAULT; + module_ctx->module_hook12 = MODULE_DEFAULT; + module_ctx->module_hook23 = MODULE_DEFAULT; + module_ctx->module_hook_salt_size = MODULE_DEFAULT; + module_ctx->module_hook_size = MODULE_DEFAULT; + module_ctx->module_jit_build_options = MODULE_DEFAULT; + module_ctx->module_jit_cache_disable = MODULE_DEFAULT; + module_ctx->module_kernel_accel_max = MODULE_DEFAULT; + module_ctx->module_kernel_accel_min = MODULE_DEFAULT; + module_ctx->module_kernel_loops_max = MODULE_DEFAULT; + module_ctx->module_kernel_loops_min = MODULE_DEFAULT; + module_ctx->module_kernel_threads_max = MODULE_DEFAULT; + module_ctx->module_kernel_threads_min = MODULE_DEFAULT; + module_ctx->module_kern_type = module_kern_type; + module_ctx->module_kern_type_dynamic = MODULE_DEFAULT; + module_ctx->module_opti_type = module_opti_type; + module_ctx->module_opts_type = module_opts_type; + module_ctx->module_outfile_check_disable = MODULE_DEFAULT; + module_ctx->module_outfile_check_nocomp = MODULE_DEFAULT; + module_ctx->module_potfile_custom_check = MODULE_DEFAULT; + module_ctx->module_potfile_disable = MODULE_DEFAULT; + module_ctx->module_potfile_keep_all_hashes = MODULE_DEFAULT; + module_ctx->module_pwdump_column = MODULE_DEFAULT; + module_ctx->module_pw_max = MODULE_DEFAULT; + module_ctx->module_pw_min = MODULE_DEFAULT; + module_ctx->module_salt_max = MODULE_DEFAULT; + module_ctx->module_salt_min = MODULE_DEFAULT; + module_ctx->module_salt_type = module_salt_type; + module_ctx->module_separator = MODULE_DEFAULT; + module_ctx->module_st_hash = module_st_hash; + module_ctx->module_st_pass = module_st_pass; + module_ctx->module_tmp_size = MODULE_DEFAULT; + module_ctx->module_unstable_warning = MODULE_DEFAULT; + module_ctx->module_warmup_disable = MODULE_DEFAULT; +} diff --git a/tools/radmin3_to_hashcat.pl b/tools/radmin3_to_hashcat.pl new file mode 100644 index 000000000..e096fbd7e --- /dev/null +++ b/tools/radmin3_to_hashcat.pl @@ -0,0 +1,289 @@ +#!/usr/bin/env perl + +## +## Author......: See docs/credits.txt +## License.....: MIT +## + +# for this hashcat extraction tool the input should be a export/dump of the registry key +# [HKEY_LOCAL_MACHINE\SOFTWARE\WOW6432Node\Radmin\v3.0\Server\Parameters\Radmin Security\1] +# +# "reg export" cmd command can be used for this: +# reg export "HKEY_LOCAL_MACHINE\SOFTWARE\WOW6432Node\Radmin\v3.0\Server\Parameters\Radmin Security\1" radmin3_export.reg +# +# Note: this tool is intentionally not designed to do an automatic registry key read +# but this could be done easily also in software/perl: +# use Win32::TieRegistry (Delimiter => '/'); +# my $reg_key = $Registry->{'HKEY_LOCAL_MACHINE/SOFTWARE/WOW6432Node/Radmin/v3.0/Server/Parameters/Radmin Security'}; +# my $file_content = $reg_key->{'/1'}; +# +# An example input file (first command line parameter): +# +# [HKEY_LOCAL_MACHINE\SOFTWARE\WOW6432Node\Radmin\v3.0\Server\Parameters\Radmin Security\1] +# "1"=hex:10,00,00,0a,72,00,6f,00,67,00,65,00,72,00,30,00,01,00,98,47,fc,7e,0f,\ +# 89,1d,fd,5d,02,f1,9d,58,7d,8f,77,ae,c0,b9,80,d4,30,4b,01,13,b4,06,f2,3e,2c,\ +# ec,58,ca,fc,a0,4a,53,e3,6f,b6,8e,0c,3b,ff,92,cf,33,57,86,b0,db,e6,0d,fe,41,\ +# 78,ef,2f,cd,2a,4d,d0,99,47,ff,d8,df,96,fd,0f,9e,29,81,a3,2d,a9,55,03,34,2e,\ +# ca,9f,08,06,2c,bd,d4,ac,2d,7c,df,81,0d,b4,db,96,db,70,10,22,66,26,1c,d3,f8,\ +# bd,d5,6a,10,2f,c6,ce,ed,bb,a5,ea,e9,9e,61,27,bd,d9,52,f7,a0,d1,8a,79,02,1c,\ +# 88,1a,e6,3e,c4,b3,59,03,87,f5,48,59,8f,2c,b8,f9,0d,ea,36,fc,4f,80,c5,47,3f,\ +# db,6b,0c,6b,db,0f,db,af,46,01,f5,60,dd,14,91,67,ea,12,5d,b8,ad,34,fd,0f,d4,\ +# 53,50,de,c7,2c,fb,3b,52,8b,a2,33,2d,60,91,ac,ea,89,df,d0,6c,9c,4d,18,f6,97,\ +# 24,5b,d2,ac,92,78,b9,2b,fe,7d,ba,fa,a0,c4,3b,40,a7,1f,19,30,eb,c4,fd,24,c9,\ +# e5,a2,e5,a4,cc,f5,d7,f5,15,44,d7,0b,2b,ca,4a,f5,b8,d3,7b,37,9f,d7,74,0a,68,\ +# 2f,40,00,00,01,05,50,00,00,20,f9,89,48,2b,a8,3b,63,45,fd,1d,d7,e2,13,13,dc,\ +# d5,55,22,ba,57,15,b5,79,ea,b8,74,d7,64,33,92,8d,72,60,00,01,00,01,2a,1b,fd,\ +# 53,4a,88,d9,19,40,70,e6,1e,76,07,fd,69,90,94,ea,b6,3b,53,b2,76,6b,0c,f3,5e,\ +# 73,fb,cc,21,41,ae,d3,28,1f,64,ca,62,0b,27,95,1c,f5,e2,c2,78,60,37,54,27,5f,\ +# c1,63,51,ee,f0,8f,bb,e3,0c,f5,d9,27,be,c5,61,e5,ea,98,a6,df,a1,ee,e9,00,4b,\ +# 00,83,4f,d9,ca,d5,ae,59,1e,ef,4f,c8,8b,f9,73,75,04,d2,9e,c5,93,34,6c,cd,1d,\ +# 76,18,82,37,73,8e,0b,6e,8a,f8,47,ef,4a,74,a9,a4,d9,df,04,8d,5d,6b,f2,19,c7,\ +# ab,f5,40,72,00,c3,5d,3c,dc,d5,e7,e2,c6,51,fe,0d,77,bc,60,41,e1,51,96,46,f5,\ +# 8b,1c,cc,a2,11,1a,37,25,86,6b,be,2b,60,4f,9d,17,2f,28,53,9a,97,5d,1d,0f,99,\ +# 7e,4c,d2,8c,49,7f,ad,62,a7,90,e7,35,2f,19,40,1e,fb,7d,7f,b6,ba,cb,85,e0,67,\ +# 4e,ab,03,1d,78,2f,a0,e7,3d,8e,b4,b4,0a,c6,ee,cc,a8,d9,87,fd,b9,0c,c1,01,54,\ +# a5,39,6a,26,7c,69,cb,47,68,c3,a6,43,59,12,bb,b6,0d,68,91,d2,1b,de,bc,da,0f,\ +# 0a,b5,20,00,00,04,ff,01,00,00 + +use strict; +use warnings; +use utf8; + + +# +# Constants: +# + +my $REGISTRY_PREFIX = "=hex:"; + +my $ENTRY_KEY_USER = 16; +my $ENTRY_KEY_MODULUS = 48; +my $ENTRY_KEY_GENERATOR = 64; +my $ENTRY_KEY_SALT = 80; +my $ENTRY_KEY_VERIFIER = 96; + +my $HARD_CODED_GENERATOR = "05"; +my $HARD_CODED_MODULUS = "9847fc7e0f891dfd5d02f19d587d8f77aec0b980d4304b0113b406f23e2cec58cafca04a53e36fb68e0c3bff92cf335786b0dbe60dfe4178ef2fcd2a4dd09947ffd8df96fd0f9e2981a32da95503342eca9f08062cbdd4ac2d7cdf810db4db96db70102266261cd3f8bdd56a102fc6ceedbba5eae99e6127bdd952f7a0d18a79021c881ae63ec4b3590387f548598f2cb8f90dea36fc4f80c5473fdb6b0c6bdb0fdbaf4601f560dd149167ea125db8ad34fd0fd45350dec72cfb3b528ba2332d6091acea89dfd06c9c4d18f697245bd2ac9278b92bfe7dbafaa0c43b40a71f1930ebc4fd24c9e5a2e5a4ccf5d7f51544d70b2bca4af5b8d37b379fd7740a682f"; + + +# +# Start: +# + +if (scalar (@ARGV) < 1) +{ + print STDERR "Usage:\n" . $0 . " \n\n"; + print STDERR "Please specify the Radmin 3 registry export file as command line parameter\n\n"; + print STDERR "The registry key is something like:\n"; + print STDERR "HKEY_LOCAL_MACHINE\\SOFTWARE\\WOW6432Node\\Radmin\\v3.0\\Server\\Parameters\\Radmin Security\\1\n"; + + exit (1); +} + +my $file_name = $ARGV[0]; + +my $fh; + +if (! open ($fh, "<", $file_name)) +{ + print STDERR "ERROR: Could not open the registry dump file '$file_name'\n"; + + exit (1); +} + +binmode ($fh); + +my $file_content = ""; + +{ + local $/ = undef; + + $file_content = <$fh>; +} + +close ($fh); + + +if (length ($file_content) < 5 + 0) # replace 0 with minimum expected length +{ + print STDERR "ERROR: File size of file '$file_name' is invalid\n"; + + exit (1); +} + +$file_content =~ s/[\x00]//g; # this could be true if UTF16 + BOM are being used + +my $prefix_idx = index ($file_content, $REGISTRY_PREFIX); + +if ($prefix_idx < 0) +{ + print STDERR "ERROR: Could not find the key '=hex:' within the file content\n"; + + exit (1); +} + +$file_content = substr ($file_content, $prefix_idx + length ($REGISTRY_PREFIX)); + +# $file_content =~ s/[ \r\n,\\]//g; + +# we could also remove every character that is not an hexadecimal symbol: +$file_content =~ s/[^0-9a-fA-F]//g; + +$file_content = pack ("H*", $file_content); + + +# final length check (needed ?): + +my $file_content_len = length ($file_content); + +if ($file_content_len < 2 + 1 + 2 + 1 + 2 + 32 + 2 + 256 + 2 + 256) # replace with min length +{ + print STDERR "ERROR: File content of file '$file_name' is too short\n"; + + exit (1); +} + + +# loop over the data: + +my $user = ""; +my $salt = ""; +my $verifier = ""; + +my $found_user = 0; +my $found_modulus = 0; +my $found_generator = 0; +my $found_salt = 0; +my $found_verifier = 0; + +for (my $i = 0; $i < $file_content_len; $i += 4) +{ + if ($i + 4 > $file_content_len) + { + print STDERR "ERROR: Unexpected EOF (end of file) in file '$file_name'\n"; + + exit (1); + } + + my $type = ord (substr ($file_content, $i + 1, 1)) * 256 + + ord (substr ($file_content, $i + 0, 1)); + my $len = ord (substr ($file_content, $i + 2, 1)) * 256 + + ord (substr ($file_content, $i + 3, 1)); + + my $pos = $i + 4; + + $i += $len; + + # we are not interested in other values than what we need: + + if (($type != $ENTRY_KEY_USER) && + ($type != $ENTRY_KEY_MODULUS) && + ($type != $ENTRY_KEY_GENERATOR) && + ($type != $ENTRY_KEY_SALT) && + ($type != $ENTRY_KEY_VERIFIER)) + { + next; + } + + if ($i > $file_content_len) + { + print STDERR "ERROR: Unexpected EOF (end of file) in file '$file_name'\n"; + + exit (1); + } + + + # + # get the data, finally: + # + + my $value = substr ($file_content, $pos, $len); + + $value = unpack ("H*", $value); + + if ($type == $ENTRY_KEY_USER) + { + $user = $value; + + $found_user = 1; + } + elsif ($type == $ENTRY_KEY_MODULUS) + { + if ($value ne $HARD_CODED_MODULUS) + { + print STDERR "ERROR: Non-default modulus found in file '$file_name'\n"; + + exit (1); + } + + $found_modulus = 1; + } + elsif ($type == $ENTRY_KEY_GENERATOR) + { + if ($value ne $HARD_CODED_GENERATOR) + { + print STDERR "ERROR: Non-default generator found in file '$file_name'\n"; + + exit (1); + } + + $found_generator = 1; + } + elsif ($type == $ENTRY_KEY_SALT) + { + $salt = $value; + + $found_salt = 1; + } + elsif ($type == $ENTRY_KEY_VERIFIER) + { + $verifier = $value; + + $found_verifier = 1; + } +} + +if ($found_user == 0) +{ + print STDERR "ERROR: No user name found in file '$file_name'\n"; + + exit (1); +} + +if ($found_modulus == 0) +{ + print STDERR "ERROR: No modulus found in file '$file_name'\n"; + + exit (1); +} + +if ($found_generator == 0) +{ + print STDERR "ERROR: No generator found in file '$file_name'\n"; + + exit (1); +} + +if ($found_salt == 0) +{ + print STDERR "ERROR: No salt found in file '$file_name'\n"; + + exit (1); +} + +if ($found_verifier == 0) +{ + print STDERR "ERROR: No verifier found in file '$file_name'\n"; + + exit (1); +} + + +# +# Output: +# + +print sprintf ("\$radmin3\$%s*%s*%s\n", + $user, + $salt, + $verifier); diff --git a/tools/test_modules/m29200.pm b/tools/test_modules/m29200.pm new file mode 100644 index 000000000..78d70f836 --- /dev/null +++ b/tools/test_modules/m29200.pm @@ -0,0 +1,92 @@ +#!/usr/bin/env perl + +## +## Author......: See docs/credits.txt +## License.....: MIT +## + +use strict; +use warnings; + +use Digest::SHA qw (sha1 sha1_hex); +use Crypt::OpenSSL::Bignum::CTX; +use Encode; + +sub module_constraints { [[0, 256], [32, 32], [-1, -1], [-1, -1], [-1, -1]] } + +my $GENERATOR = "05"; +my $MODULUS = "9847fc7e0f891dfd5d02f19d587d8f77aec0b980d4304b0113b406f23e2cec58cafca04a53e36fb68e0c3bff92cf335786b0dbe60dfe4178ef2fcd2a4dd09947ffd8df96fd0f9e2981a32da95503342eca9f08062cbdd4ac2d7cdf810db4db96db70102266261cd3f8bdd56a102fc6ceedbba5eae99e6127bdd952f7a0d18a79021c881ae63ec4b3590387f548598f2cb8f90dea36fc4f80c5473fdb6b0c6bdb0fdbaf4601f560dd149167ea125db8ad34fd0fd45350dec72cfb3b528ba2332d6091acea89dfd06c9c4d18f697245bd2ac9278b92bfe7dbafaa0c43b40a71f1930ebc4fd24c9e5a2e5a4ccf5d7f51544d70b2bca4af5b8d37b379fd7740a682f"; + +sub module_generate_hash +{ + my $word = shift; + my $salt = shift; + my $user = shift; + + if (! defined ($user)) + { + $user = random_mixedcase_string (int (rand (128))); + + $user = encode ('UTF16-LE', $user); + } + + my $word_utf16 = encode ("UTF-16LE", $word); + + my $exponent = sha1_hex ($salt . sha1 ($user . ":" . $word_utf16)); + + my $g = Crypt::OpenSSL::Bignum->new_from_hex ($GENERATOR); + my $m = Crypt::OpenSSL::Bignum->new_from_hex ($MODULUS); + my $e = Crypt::OpenSSL::Bignum->new_from_hex ($exponent); + + my $ctx = Crypt::OpenSSL::Bignum::CTX->new (); + + my $pow = $g->mod_exp ($e, $m, $ctx); + + my $res = $pow->to_bin (); + + # IMPORTANT step: + + $res = "\x00" x (256 - length ($res)) . $res; # pad it to exactly 256 bytes + + + my $hash = sprintf ("\$radmin3\$%s*%s*%s", unpack ("H*", $user), unpack ("H*", $salt), unpack ("H*", $res)); + + return $hash; +} + +sub module_verify_hash +{ + my $line = shift; + + my $idx = index ($line, ':'); + + return unless $idx >= 0; + + my $hash = substr ($line, 0, $idx); + my $word = substr ($line, $idx + 1); + + return unless substr ($hash, 0, 9) eq '$radmin3$'; + + my ($user, $salt, $verifier) = split ('\*', substr ($hash, 9)); + + return unless defined $user; + return unless defined $salt; + return unless defined $verifier; + + return unless length ($salt) == 64; + + return unless $user =~ m/^[0-9a-fA-F]*$/; + return unless $salt =~ m/^[0-9a-fA-F]*$/; + return unless $verifier =~ m/^[0-9a-fA-F]*$/; + + $salt = pack ("H*", $salt); + $user = pack ("H*", $user); + + my $word_packed = pack_if_HEX_notation ($word); + + my $new_hash = module_generate_hash ($word_packed, $salt, $user); + + return ($new_hash, $word); +} + +1;