1
0
mirror of https://github.com/hashcat/hashcat.git synced 2024-12-22 14:48:12 +00:00

Optimize SM3 for use on platforms that support native bitselect()

This commit is contained in:
jsteube 2023-02-24 21:56:55 +00:00
parent 6e45d4dafc
commit ea6173b307
2 changed files with 241 additions and 1 deletions

View File

@ -15,8 +15,13 @@
#define SM3_FF0(x, y, z) ((x) ^ (y) ^ (z))
#define SM3_GG0(x, y, z) ((x) ^ (y) ^ (z))
#define SM3_FF1(x, y, z) (((x) & (y)) | (((x) | (y)) & (z)))
#ifdef USE_BITSELECT
#define SM3_FF1(x, y, z) (bitselect ((x), (y), ((x) ^ (z))))
#define SM3_GG1(x, y, z) (bitselect ((z), (y), (x)))
#else
#define SM3_FF1(x, y, z) (((x) & (y)) | ((z) & ((x) ^ (y))))
#define SM3_GG1(x, y, z) (((z) ^ ((x) & ((y) ^ (z)))))
#endif
#define SM3_EXPAND_S(a, b, c, d, e) (SM3_P1_S(a ^ b ^ hc_rotl32_S(c, 15)) ^ hc_rotl32_S(d, 7) ^ e)
#define SM3_EXPAND(a, b, c, d, e) (SM3_P1(a ^ b ^ hc_rotl32(c, 15)) ^ hc_rotl32(d, 7) ^ e)

View File

@ -263,6 +263,241 @@ DECLSPEC void m31100s (PRIVATE_AS u32 *w, const u32 pw_len, KERN_ATTR_FUNC_VECTO
}
}
/* expansion phase optimization, for some reason slower than current implementation - probably compiler optimizer
DECLSPEC void m31100s (PRIVATE_AS u32 *w, const u32 pw_len, KERN_ATTR_FUNC_VECTOR ())
{
const u32 search[4] =
{
digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R0],
digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R1],
digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R2],
digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R3]
};
const u32 d_rev = hc_rotr32_S (search[0], 9);
u32 pre_t[68];
pre_t[ 0] = 0;
pre_t[ 1] = w[ 1];
pre_t[ 2] = w[ 2];
pre_t[ 3] = w[ 3];
pre_t[ 4] = w[ 4];
pre_t[ 5] = w[ 5];
pre_t[ 6] = w[ 6];
pre_t[ 7] = w[ 7];
pre_t[ 8] = w[ 8];
pre_t[ 9] = w[ 9];
pre_t[10] = w[10];
pre_t[11] = w[11];
pre_t[12] = w[12];
pre_t[13] = w[13];
pre_t[14] = w[14];
pre_t[15] = w[15];
#ifdef _unroll
#pragma unroll
#endif
for (int i = 16; i < 68; i++)
{
pre_t[i] = SM3_EXPAND_S (pre_t[i - 16], pre_t[i - 9], pre_t[i - 3], pre_t[i - 13], pre_t[i - 6]);
}
u32 w0l = w[0];
for (u32 il_pos = 0; il_pos < IL_CNT; il_pos += VECT_SIZE)
{
const u32x w0r = words_buf_r[il_pos / VECT_SIZE];
const u32x w0 = w0l | w0r;
u32x t[68];
t[0] = w0;
#ifdef _unroll
#pragma unroll
#endif
for (int i = 1; i < 65; i++)
{
t[i] = pre_t[i];
}
const u32x fix16 = SM3_EXPAND ( w0, 0, 0, 0, 0);
const u32x fix19 = SM3_EXPAND ( 0, 0, fix16, 0, 0);
const u32x fix22 = SM3_EXPAND ( 0, 0, fix19, 0, fix16);
const u32x fix25 = SM3_EXPAND ( 0, fix16, fix22, 0, fix19);
const u32x fix28 = SM3_EXPAND ( 0, fix19, fix25, 0, fix22);
const u32x fix29 = SM3_EXPAND ( 0, 0, 0, fix16, 0);
const u32x fix31 = SM3_EXPAND ( 0, fix22, fix28, 0, fix25);
const u32x fix32 = SM3_EXPAND (fix16, 0, fix29, fix19, 0);
const u32x fix34 = SM3_EXPAND ( 0, fix25, fix31, 0, fix28);
const u32x fix35 = SM3_EXPAND (fix19, 0, fix32, fix22, fix29);
const u32x fix37 = SM3_EXPAND ( 0, fix28, fix34, 0, fix31);
const u32x fix38 = SM3_EXPAND (fix22, fix29, fix35, fix25, fix32);
const u32x fix40 = SM3_EXPAND ( 0, fix31, fix37, 0, fix34);
const u32x fix41 = SM3_EXPAND (fix25, fix32, fix38, fix28, fix35);
const u32x fix42 = SM3_EXPAND ( 0, 0, 0, fix29, 0);
const u32x fix43 = SM3_EXPAND ( 0, fix34, fix40, 0, fix37);
const u32x fix44 = SM3_EXPAND (fix28, fix35, fix41, fix31, fix38);
const u32x fix45 = SM3_EXPAND (fix29, 0, fix42, fix32, 0);
const u32x fix46 = SM3_EXPAND ( 0, fix37, fix43, 0, fix40);
const u32x fix47 = SM3_EXPAND (fix31, fix38, fix44, fix34, fix41);
const u32x fix48 = SM3_EXPAND (fix32, 0, fix45, fix35, fix42);
const u32x fix49 = SM3_EXPAND ( 0, fix40, fix46, 0, fix43);
const u32x fix50 = SM3_EXPAND (fix34, fix41, fix47, fix37, fix44);
const u32x fix51 = SM3_EXPAND (fix35, fix42, fix48, fix38, fix45);
const u32x fix52 = SM3_EXPAND ( 0, fix43, fix49, 0, fix46);
const u32x fix53 = SM3_EXPAND (fix37, fix44, fix50, fix40, fix47);
const u32x fix54 = SM3_EXPAND (fix38, fix45, fix51, fix41, fix48);
const u32x fix55 = SM3_EXPAND ( 0, fix46, fix52, fix42, fix49);
const u32x fix56 = SM3_EXPAND (fix40, fix47, fix53, fix43, fix50);
const u32x fix57 = SM3_EXPAND (fix41, fix48, fix54, fix44, fix51);
const u32x fix58 = SM3_EXPAND (fix42, fix49, fix55, fix45, fix52);
const u32x fix59 = SM3_EXPAND (fix43, fix50, fix56, fix46, fix53);
const u32x fix60 = SM3_EXPAND (fix44, fix51, fix57, fix47, fix54);
const u32x fix61 = SM3_EXPAND (fix45, fix52, fix58, fix48, fix55);
const u32x fix62 = SM3_EXPAND (fix46, fix53, fix59, fix49, fix56);
const u32x fix63 = SM3_EXPAND (fix47, fix54, fix60, fix50, fix57);
const u32x fix64 = SM3_EXPAND (fix48, fix55, fix61, fix51, fix58);
t[16] ^= fix16;
t[19] ^= fix19;
t[22] ^= fix22;
t[25] ^= fix25;
t[28] ^= fix28;
t[29] ^= fix29;
t[31] ^= fix31;
t[32] ^= fix32;
t[34] ^= fix34;
t[35] ^= fix35;
t[37] ^= fix37;
t[38] ^= fix38;
t[40] ^= fix40;
t[41] ^= fix41;
t[42] ^= fix42;
t[43] ^= fix43;
t[44] ^= fix44;
t[45] ^= fix45;
t[46] ^= fix46;
t[47] ^= fix47;
t[48] ^= fix48;
t[49] ^= fix49;
t[50] ^= fix50;
t[51] ^= fix51;
t[52] ^= fix52;
t[53] ^= fix53;
t[54] ^= fix54;
t[55] ^= fix55;
t[56] ^= fix56;
t[57] ^= fix57;
t[58] ^= fix58;
t[59] ^= fix59;
t[60] ^= fix60;
t[61] ^= fix61;
t[62] ^= fix62;
t[63] ^= fix63;
t[64] ^= fix64;
u32x a = SM3_IV_A;
u32x b = SM3_IV_B;
u32x c = SM3_IV_C;
u32x d = SM3_IV_D;
u32x e = SM3_IV_E;
u32x f = SM3_IV_F;
u32x g = SM3_IV_G;
u32x h = SM3_IV_H;
SM3_ROUND1 (a, b, c, d, e, f, g, h, SM3_T00, t[ 0], t[ 0] ^ t[ 4]);
SM3_ROUND1 (d, a, b, c, h, e, f, g, SM3_T01, t[ 1], t[ 1] ^ t[ 5]);
SM3_ROUND1 (c, d, a, b, g, h, e, f, SM3_T02, t[ 2], t[ 2] ^ t[ 6]);
SM3_ROUND1 (b, c, d, a, f, g, h, e, SM3_T03, t[ 3], t[ 3] ^ t[ 7]);
SM3_ROUND1 (a, b, c, d, e, f, g, h, SM3_T04, t[ 4], t[ 4] ^ t[ 8]);
SM3_ROUND1 (d, a, b, c, h, e, f, g, SM3_T05, t[ 5], t[ 5] ^ t[ 9]);
SM3_ROUND1 (c, d, a, b, g, h, e, f, SM3_T06, t[ 6], t[ 6] ^ t[10]);
SM3_ROUND1 (b, c, d, a, f, g, h, e, SM3_T07, t[ 7], t[ 7] ^ t[11]);
SM3_ROUND1 (a, b, c, d, e, f, g, h, SM3_T08, t[ 8], t[ 8] ^ t[12]);
SM3_ROUND1 (d, a, b, c, h, e, f, g, SM3_T09, t[ 9], t[ 9] ^ t[13]);
SM3_ROUND1 (c, d, a, b, g, h, e, f, SM3_T10, t[10], t[10] ^ t[14]);
SM3_ROUND1 (b, c, d, a, f, g, h, e, SM3_T11, t[11], t[11] ^ t[15]);
SM3_ROUND1 (a, b, c, d, e, f, g, h, SM3_T12, t[12], t[12] ^ t[16]);
SM3_ROUND1 (d, a, b, c, h, e, f, g, SM3_T13, t[13], t[13] ^ t[17]);
SM3_ROUND1 (c, d, a, b, g, h, e, f, SM3_T14, t[14], t[14] ^ t[18]);
SM3_ROUND1 (b, c, d, a, f, g, h, e, SM3_T15, t[15], t[15] ^ t[19]);
SM3_ROUND2 (a, b, c, d, e, f, g, h, SM3_T16, t[16], t[16] ^ t[20]);
SM3_ROUND2 (d, a, b, c, h, e, f, g, SM3_T17, t[17], t[17] ^ t[21]);
SM3_ROUND2 (c, d, a, b, g, h, e, f, SM3_T18, t[18], t[18] ^ t[22]);
SM3_ROUND2 (b, c, d, a, f, g, h, e, SM3_T19, t[19], t[19] ^ t[23]);
SM3_ROUND2 (a, b, c, d, e, f, g, h, SM3_T20, t[20], t[20] ^ t[24]);
SM3_ROUND2 (d, a, b, c, h, e, f, g, SM3_T21, t[21], t[21] ^ t[25]);
SM3_ROUND2 (c, d, a, b, g, h, e, f, SM3_T22, t[22], t[22] ^ t[26]);
SM3_ROUND2 (b, c, d, a, f, g, h, e, SM3_T23, t[23], t[23] ^ t[27]);
SM3_ROUND2 (a, b, c, d, e, f, g, h, SM3_T24, t[24], t[24] ^ t[28]);
SM3_ROUND2 (d, a, b, c, h, e, f, g, SM3_T25, t[25], t[25] ^ t[29]);
SM3_ROUND2 (c, d, a, b, g, h, e, f, SM3_T26, t[26], t[26] ^ t[30]);
SM3_ROUND2 (b, c, d, a, f, g, h, e, SM3_T27, t[27], t[27] ^ t[31]);
SM3_ROUND2 (a, b, c, d, e, f, g, h, SM3_T28, t[28], t[28] ^ t[32]);
SM3_ROUND2 (d, a, b, c, h, e, f, g, SM3_T29, t[29], t[29] ^ t[33]);
SM3_ROUND2 (c, d, a, b, g, h, e, f, SM3_T30, t[30], t[30] ^ t[34]);
SM3_ROUND2 (b, c, d, a, f, g, h, e, SM3_T31, t[31], t[31] ^ t[35]);
SM3_ROUND2 (a, b, c, d, e, f, g, h, SM3_T32, t[32], t[32] ^ t[36]);
SM3_ROUND2 (d, a, b, c, h, e, f, g, SM3_T33, t[33], t[33] ^ t[37]);
SM3_ROUND2 (c, d, a, b, g, h, e, f, SM3_T34, t[34], t[34] ^ t[38]);
SM3_ROUND2 (b, c, d, a, f, g, h, e, SM3_T35, t[35], t[35] ^ t[39]);
SM3_ROUND2 (a, b, c, d, e, f, g, h, SM3_T36, t[36], t[36] ^ t[40]);
SM3_ROUND2 (d, a, b, c, h, e, f, g, SM3_T37, t[37], t[37] ^ t[41]);
SM3_ROUND2 (c, d, a, b, g, h, e, f, SM3_T38, t[38], t[38] ^ t[42]);
SM3_ROUND2 (b, c, d, a, f, g, h, e, SM3_T39, t[39], t[39] ^ t[43]);
SM3_ROUND2 (a, b, c, d, e, f, g, h, SM3_T40, t[40], t[40] ^ t[44]);
SM3_ROUND2 (d, a, b, c, h, e, f, g, SM3_T41, t[41], t[41] ^ t[45]);
SM3_ROUND2 (c, d, a, b, g, h, e, f, SM3_T42, t[42], t[42] ^ t[46]);
SM3_ROUND2 (b, c, d, a, f, g, h, e, SM3_T43, t[43], t[43] ^ t[47]);
SM3_ROUND2 (a, b, c, d, e, f, g, h, SM3_T44, t[44], t[44] ^ t[48]);
SM3_ROUND2 (d, a, b, c, h, e, f, g, SM3_T45, t[45], t[45] ^ t[49]);
SM3_ROUND2 (c, d, a, b, g, h, e, f, SM3_T46, t[46], t[46] ^ t[50]);
SM3_ROUND2 (b, c, d, a, f, g, h, e, SM3_T47, t[47], t[47] ^ t[51]);
SM3_ROUND2 (a, b, c, d, e, f, g, h, SM3_T48, t[48], t[48] ^ t[52]);
SM3_ROUND2 (d, a, b, c, h, e, f, g, SM3_T49, t[49], t[49] ^ t[53]);
SM3_ROUND2 (c, d, a, b, g, h, e, f, SM3_T50, t[50], t[50] ^ t[54]);
SM3_ROUND2 (b, c, d, a, f, g, h, e, SM3_T51, t[51], t[51] ^ t[55]);
SM3_ROUND2 (a, b, c, d, e, f, g, h, SM3_T52, t[52], t[52] ^ t[56]);
SM3_ROUND2 (d, a, b, c, h, e, f, g, SM3_T53, t[53], t[53] ^ t[57]);
SM3_ROUND2 (c, d, a, b, g, h, e, f, SM3_T54, t[54], t[54] ^ t[58]);
SM3_ROUND2 (b, c, d, a, f, g, h, e, SM3_T55, t[55], t[55] ^ t[59]);
SM3_ROUND2 (a, b, c, d, e, f, g, h, SM3_T56, t[56], t[56] ^ t[60]);
SM3_ROUND2 (d, a, b, c, h, e, f, g, SM3_T57, t[57], t[57] ^ t[61]);
SM3_ROUND2 (c, d, a, b, g, h, e, f, SM3_T58, t[58], t[58] ^ t[62]);
SM3_ROUND2 (b, c, d, a, f, g, h, e, SM3_T59, t[59], t[59] ^ t[63]);
SM3_ROUND2 (a, b, c, d, e, f, g, h, SM3_T60, t[60], t[60] ^ t[64]);
if (MATCHES_NONE_VS (d, d_rev)) continue;
#ifdef _unroll
#pragma unroll
#endif
for (int i = 65; i < 68; i++)
{
t[i] = pre_t[i];
}
const u32x fix65 = SM3_EXPAND (fix49, fix56, fix62, fix52, fix59);
const u32x fix66 = SM3_EXPAND (fix50, fix57, fix63, fix53, fix60);
const u32x fix67 = SM3_EXPAND (fix51, fix58, fix64, fix54, fix61);
t[65] ^= fix65;
t[66] ^= fix66;
t[67] ^= fix67;
SM3_ROUND2 (d, a, b, c, h, e, f, g, SM3_T61, t[61], t[61] ^ t[65]);
SM3_ROUND2 (c, d, a, b, g, h, e, f, SM3_T62, t[62], t[62] ^ t[66]);
SM3_ROUND2 (b, c, d, a, f, g, h, e, SM3_T63, t[63], t[63] ^ t[67]);
COMPARE_S_SIMD (d, h, c, g);
}
}
*/
KERNEL_FQ void m31100_m04 (KERN_ATTR_VECTOR ())
{
/**