Change bitsliced kernels from 3d to 2d invocation mode for slightly better performance

pull/2315/head
Jens Steube 4 years ago
parent 6b8f0da8e9
commit f96e35649d

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

@ -1903,78 +1903,6 @@ KERNEL_FQ void m14000_mxx (KERN_ATTR_BITSLICE ())
u32 D62 = d62;
u32 D63 = d63;
/**
* digest
*/
const u32 s0 = digests_buf[0].digest_buf[0];
const u32 s1 = digests_buf[0].digest_buf[1];
#define S00 (((s0 >> 0) & 1) ? -1 : 0)
#define S01 (((s0 >> 1) & 1) ? -1 : 0)
#define S02 (((s0 >> 2) & 1) ? -1 : 0)
#define S03 (((s0 >> 3) & 1) ? -1 : 0)
#define S04 (((s0 >> 4) & 1) ? -1 : 0)
#define S05 (((s0 >> 5) & 1) ? -1 : 0)
#define S06 (((s0 >> 6) & 1) ? -1 : 0)
#define S07 (((s0 >> 7) & 1) ? -1 : 0)
#define S08 (((s0 >> 8) & 1) ? -1 : 0)
#define S09 (((s0 >> 9) & 1) ? -1 : 0)
#define S10 (((s0 >> 10) & 1) ? -1 : 0)
#define S11 (((s0 >> 11) & 1) ? -1 : 0)
#define S12 (((s0 >> 12) & 1) ? -1 : 0)
#define S13 (((s0 >> 13) & 1) ? -1 : 0)
#define S14 (((s0 >> 14) & 1) ? -1 : 0)
#define S15 (((s0 >> 15) & 1) ? -1 : 0)
#define S16 (((s0 >> 16) & 1) ? -1 : 0)
#define S17 (((s0 >> 17) & 1) ? -1 : 0)
#define S18 (((s0 >> 18) & 1) ? -1 : 0)
#define S19 (((s0 >> 19) & 1) ? -1 : 0)
#define S20 (((s0 >> 20) & 1) ? -1 : 0)
#define S21 (((s0 >> 21) & 1) ? -1 : 0)
#define S22 (((s0 >> 22) & 1) ? -1 : 0)
#define S23 (((s0 >> 23) & 1) ? -1 : 0)
#define S24 (((s0 >> 24) & 1) ? -1 : 0)
#define S25 (((s0 >> 25) & 1) ? -1 : 0)
#define S26 (((s0 >> 26) & 1) ? -1 : 0)
#define S27 (((s0 >> 27) & 1) ? -1 : 0)
#define S28 (((s0 >> 28) & 1) ? -1 : 0)
#define S29 (((s0 >> 29) & 1) ? -1 : 0)
#define S30 (((s0 >> 30) & 1) ? -1 : 0)
#define S31 (((s0 >> 31) & 1) ? -1 : 0)
#define S32 (((s1 >> 0) & 1) ? -1 : 0)
#define S33 (((s1 >> 1) & 1) ? -1 : 0)
#define S34 (((s1 >> 2) & 1) ? -1 : 0)
#define S35 (((s1 >> 3) & 1) ? -1 : 0)
#define S36 (((s1 >> 4) & 1) ? -1 : 0)
#define S37 (((s1 >> 5) & 1) ? -1 : 0)
#define S38 (((s1 >> 6) & 1) ? -1 : 0)
#define S39 (((s1 >> 7) & 1) ? -1 : 0)
#define S40 (((s1 >> 8) & 1) ? -1 : 0)
#define S41 (((s1 >> 9) & 1) ? -1 : 0)
#define S42 (((s1 >> 10) & 1) ? -1 : 0)
#define S43 (((s1 >> 11) & 1) ? -1 : 0)
#define S44 (((s1 >> 12) & 1) ? -1 : 0)
#define S45 (((s1 >> 13) & 1) ? -1 : 0)
#define S46 (((s1 >> 14) & 1) ? -1 : 0)
#define S47 (((s1 >> 15) & 1) ? -1 : 0)
#define S48 (((s1 >> 16) & 1) ? -1 : 0)
#define S49 (((s1 >> 17) & 1) ? -1 : 0)
#define S50 (((s1 >> 18) & 1) ? -1 : 0)
#define S51 (((s1 >> 19) & 1) ? -1 : 0)
#define S52 (((s1 >> 20) & 1) ? -1 : 0)
#define S53 (((s1 >> 21) & 1) ? -1 : 0)
#define S54 (((s1 >> 22) & 1) ? -1 : 0)
#define S55 (((s1 >> 23) & 1) ? -1 : 0)
#define S56 (((s1 >> 24) & 1) ? -1 : 0)
#define S57 (((s1 >> 25) & 1) ? -1 : 0)
#define S58 (((s1 >> 26) & 1) ? -1 : 0)
#define S59 (((s1 >> 27) & 1) ? -1 : 0)
#define S60 (((s1 >> 28) & 1) ? -1 : 0)
#define S61 (((s1 >> 29) & 1) ? -1 : 0)
#define S62 (((s1 >> 30) & 1) ? -1 : 0)
#define S63 (((s1 >> 31) & 1) ? -1 : 0)
/**
* base
*/
@ -2043,230 +1971,227 @@ KERNEL_FQ void m14000_mxx (KERN_ATTR_BITSLICE ())
* inner loop
*/
#ifdef IS_CUDA
const u32 pc_pos = (blockIdx.y * blockDim.y) + threadIdx.y;
#else
const u32 pc_pos = get_global_id (1);
#endif
const u32 il_pos = pc_pos * 32;
u32 k00 = K00;
u32 k01 = K01;
u32 k02 = K02;
u32 k03 = K03;
u32 k04 = K04;
u32 k05 = K05;
u32 k06 = K06;
u32 k07 = K07;
u32 k08 = K08;
u32 k09 = K09;
u32 k10 = K10;
u32 k11 = K11;
u32 k12 = K12;
u32 k13 = K13;
u32 k14 = K14;
u32 k15 = K15;
u32 k16 = K16;
u32 k17 = K17;
u32 k18 = K18;
u32 k19 = K19;
u32 k20 = K20;
u32 k21 = K21;
u32 k22 = K22;
u32 k23 = K23;
u32 k24 = K24;
u32 k25 = K25;
u32 k26 = K26;
u32 k27 = K27;
k00 |= words_buf_s[pc_pos].b[ 0];
k01 |= words_buf_s[pc_pos].b[ 1];
k02 |= words_buf_s[pc_pos].b[ 2];
k03 |= words_buf_s[pc_pos].b[ 3];
k04 |= words_buf_s[pc_pos].b[ 4];
k05 |= words_buf_s[pc_pos].b[ 5];
k06 |= words_buf_s[pc_pos].b[ 6];
k07 |= words_buf_s[pc_pos].b[ 7];
k08 |= words_buf_s[pc_pos].b[ 8];
k09 |= words_buf_s[pc_pos].b[ 9];
k10 |= words_buf_s[pc_pos].b[10];
k11 |= words_buf_s[pc_pos].b[11];
k12 |= words_buf_s[pc_pos].b[12];
k13 |= words_buf_s[pc_pos].b[13];
k14 |= words_buf_s[pc_pos].b[14];
k15 |= words_buf_s[pc_pos].b[15];
k16 |= words_buf_s[pc_pos].b[16];
k17 |= words_buf_s[pc_pos].b[17];
k18 |= words_buf_s[pc_pos].b[18];
k19 |= words_buf_s[pc_pos].b[19];
k20 |= words_buf_s[pc_pos].b[20];
k21 |= words_buf_s[pc_pos].b[21];
k22 |= words_buf_s[pc_pos].b[22];
k23 |= words_buf_s[pc_pos].b[23];
k24 |= words_buf_s[pc_pos].b[24];
k25 |= words_buf_s[pc_pos].b[25];
k26 |= words_buf_s[pc_pos].b[26];
k27 |= words_buf_s[pc_pos].b[27];
DES
(
k00, k01, k02, k03, k04, k05, k06,
k07, k08, k09, k10, k11, k12, k13,
k14, k15, k16, k17, k18, k19, k20,
k21, k22, k23, k24, k25, k26, k27,
K28, K29, K30, K31, K32, K33, K34,
K35, K36, K37, K38, K39, K40, K41,
K42, K43, K44, K45, K46, K47, K48,
K49, K50, K51, K52, K53, K54, K55,
&D00, &D01, &D02, &D03, &D04, &D05, &D06, &D07,
&D08, &D09, &D10, &D11, &D12, &D13, &D14, &D15,
&D16, &D17, &D18, &D19, &D20, &D21, &D22, &D23,
&D24, &D25, &D26, &D27, &D28, &D29, &D30, &D31,
&D32, &D33, &D34, &D35, &D36, &D37, &D38, &D39,
&D40, &D41, &D42, &D43, &D44, &D45, &D46, &D47,
&D48, &D49, &D50, &D51, &D52, &D53, &D54, &D55,
&D56, &D57, &D58, &D59, &D60, &D61, &D62, &D63
);
u32 out[64];
out[ 0] = D00;
out[ 1] = D01;
out[ 2] = D02;
out[ 3] = D03;
out[ 4] = D04;
out[ 5] = D05;
out[ 6] = D06;
out[ 7] = D07;
out[ 8] = D08;
out[ 9] = D09;
out[10] = D10;
out[11] = D11;
out[12] = D12;
out[13] = D13;
out[14] = D14;
out[15] = D15;
out[16] = D16;
out[17] = D17;
out[18] = D18;
out[19] = D19;
out[20] = D20;
out[21] = D21;
out[22] = D22;
out[23] = D23;
out[24] = D24;
out[25] = D25;
out[26] = D26;
out[27] = D27;
out[28] = D28;
out[29] = D29;
out[30] = D30;
out[31] = D31;
out[32] = D32;
out[33] = D33;
out[34] = D34;
out[35] = D35;
out[36] = D36;
out[37] = D37;
out[38] = D38;
out[39] = D39;
out[40] = D40;
out[41] = D41;
out[42] = D42;
out[43] = D43;
out[44] = D44;
out[45] = D45;
out[46] = D46;
out[47] = D47;
out[48] = D48;
out[49] = D49;
out[50] = D50;
out[51] = D51;
out[52] = D52;
out[53] = D53;
out[54] = D54;
out[55] = D55;
out[56] = D56;
out[57] = D57;
out[58] = D58;
out[59] = D59;
out[60] = D60;
out[61] = D61;
out[62] = D62;
out[63] = D63;
if (digests_cnt < 16)
for (u32 il_pos = 0; il_pos < il_cnt; il_pos += 32)
{
for (u32 d = 0; d < digests_cnt; d++)
u32 k00 = K00;
u32 k01 = K01;
u32 k02 = K02;
u32 k03 = K03;
u32 k04 = K04;
u32 k05 = K05;
u32 k06 = K06;
u32 k07 = K07;
u32 k08 = K08;
u32 k09 = K09;
u32 k10 = K10;
u32 k11 = K11;
u32 k12 = K12;
u32 k13 = K13;
u32 k14 = K14;
u32 k15 = K15;
u32 k16 = K16;
u32 k17 = K17;
u32 k18 = K18;
u32 k19 = K19;
u32 k20 = K20;
u32 k21 = K21;
u32 k22 = K22;
u32 k23 = K23;
u32 k24 = K24;
u32 k25 = K25;
u32 k26 = K26;
u32 k27 = K27;
const u32 pc_pos = il_pos / 32;
k00 |= words_buf_s[pc_pos].b[ 0];
k01 |= words_buf_s[pc_pos].b[ 1];
k02 |= words_buf_s[pc_pos].b[ 2];
k03 |= words_buf_s[pc_pos].b[ 3];
k04 |= words_buf_s[pc_pos].b[ 4];
k05 |= words_buf_s[pc_pos].b[ 5];
k06 |= words_buf_s[pc_pos].b[ 6];
k07 |= words_buf_s[pc_pos].b[ 7];
k08 |= words_buf_s[pc_pos].b[ 8];
k09 |= words_buf_s[pc_pos].b[ 9];
k10 |= words_buf_s[pc_pos].b[10];
k11 |= words_buf_s[pc_pos].b[11];
k12 |= words_buf_s[pc_pos].b[12];
k13 |= words_buf_s[pc_pos].b[13];
k14 |= words_buf_s[pc_pos].b[14];
k15 |= words_buf_s[pc_pos].b[15];
k16 |= words_buf_s[pc_pos].b[16];
k17 |= words_buf_s[pc_pos].b[17];
k18 |= words_buf_s[pc_pos].b[18];
k19 |= words_buf_s[pc_pos].b[19];
k20 |= words_buf_s[pc_pos].b[20];
k21 |= words_buf_s[pc_pos].b[21];
k22 |= words_buf_s[pc_pos].b[22];
k23 |= words_buf_s[pc_pos].b[23];
k24 |= words_buf_s[pc_pos].b[24];
k25 |= words_buf_s[pc_pos].b[25];
k26 |= words_buf_s[pc_pos].b[26];
k27 |= words_buf_s[pc_pos].b[27];
DES
(
k00, k01, k02, k03, k04, k05, k06,
k07, k08, k09, k10, k11, k12, k13,
k14, k15, k16, k17, k18, k19, k20,
k21, k22, k23, k24, k25, k26, k27,
K28, K29, K30, K31, K32, K33, K34,
K35, K36, K37, K38, K39, K40, K41,
K42, K43, K44, K45, K46, K47, K48,
K49, K50, K51, K52, K53, K54, K55,
&D00, &D01, &D02, &D03, &D04, &D05, &D06, &D07,
&D08, &D09, &D10, &D11, &D12, &D13, &D14, &D15,
&D16, &D17, &D18, &D19, &D20, &D21, &D22, &D23,
&D24, &D25, &D26, &D27, &D28, &D29, &D30, &D31,
&D32, &D33, &D34, &D35, &D36, &D37, &D38, &D39,
&D40, &D41, &D42, &D43, &D44, &D45, &D46, &D47,
&D48, &D49, &D50, &D51, &D52, &D53, &D54, &D55,
&D56, &D57, &D58, &D59, &D60, &D61, &D62, &D63
);
u32 out[64];
out[ 0] = D00;
out[ 1] = D01;
out[ 2] = D02;
out[ 3] = D03;
out[ 4] = D04;
out[ 5] = D05;
out[ 6] = D06;
out[ 7] = D07;
out[ 8] = D08;
out[ 9] = D09;
out[10] = D10;
out[11] = D11;
out[12] = D12;
out[13] = D13;
out[14] = D14;
out[15] = D15;
out[16] = D16;
out[17] = D17;
out[18] = D18;
out[19] = D19;
out[20] = D20;
out[21] = D21;
out[22] = D22;
out[23] = D23;
out[24] = D24;
out[25] = D25;
out[26] = D26;
out[27] = D27;
out[28] = D28;
out[29] = D29;
out[30] = D30;
out[31] = D31;
out[32] = D32;
out[33] = D33;
out[34] = D34;
out[35] = D35;
out[36] = D36;
out[37] = D37;
out[38] = D38;
out[39] = D39;
out[40] = D40;
out[41] = D41;
out[42] = D42;
out[43] = D43;
out[44] = D44;
out[45] = D45;
out[46] = D46;
out[47] = D47;
out[48] = D48;
out[49] = D49;
out[50] = D50;
out[51] = D51;
out[52] = D52;
out[53] = D53;
out[54] = D54;
out[55] = D55;
out[56] = D56;
out[57] = D57;
out[58] = D58;
out[59] = D59;
out[60] = D60;
out[61] = D61;
out[62] = D62;
out[63] = D63;
if (digests_cnt < 16)
{
const u32 final_hash_pos = digests_offset + d;
for (u32 d = 0; d < digests_cnt; d++)
{
const u32 final_hash_pos = digests_offset + d;
if (hashes_shown[final_hash_pos]) continue;
if (hashes_shown[final_hash_pos]) continue;
u32 search[2];
u32 search[2];
search[0] = digests_buf[final_hash_pos].digest_buf[DGST_R0];
search[1] = digests_buf[final_hash_pos].digest_buf[DGST_R1];
search[0] = digests_buf[final_hash_pos].digest_buf[DGST_R0];
search[1] = digests_buf[final_hash_pos].digest_buf[DGST_R1];
u32 tmpResult = 0;
u32 tmpResult = 0;
#ifdef _unroll
#pragma unroll
#endif
for (int i = 0; i < 32; i++)
{
const u32 b0 = -((search[0] >> i) & 1);
const u32 b1 = -((search[1] >> i) & 1);
#ifdef _unroll
#pragma unroll
#endif
for (int i = 0; i < 32; i++)
{
const u32 b0 = -((search[0] >> i) & 1);
const u32 b1 = -((search[1] >> i) & 1);
tmpResult |= out[ 0 + i] ^ b0;
tmpResult |= out[32 + i] ^ b1;
}
tmpResult |= out[ 0 + i] ^ b0;
tmpResult |= out[32 + i] ^ b1;
}
if (tmpResult == 0xffffffff) continue;
if (tmpResult == 0xffffffff) continue;
const u32 slice = ffz (tmpResult);
const u32 slice = ffz (tmpResult);
const u32 r0 = search[0];
const u32 r1 = search[1];
const u32 r2 = 0;
const u32 r3 = 0;
const u32 r0 = search[0];
const u32 r1 = search[1];
const u32 r2 = 0;
const u32 r3 = 0;
#ifdef KERNEL_STATIC
#include COMPARE_M
#endif
#ifdef KERNEL_STATIC
#include COMPARE_M
#endif
}
}
}
else
{
u32 out0[32];
u32 out1[32];
#ifdef _unroll
#pragma unroll
#endif
for (int i = 0; i < 32; i++)
else
{
out0[i] = out[ 0 + i];
out1[i] = out[32 + i];
}
u32 out0[32];
u32 out1[32];
transpose32c (out0);
transpose32c (out1);
#ifdef _unroll
#pragma unroll
#endif
for (int i = 0; i < 32; i++)
{
out0[i] = out[ 0 + i];
out1[i] = out[32 + i];
}
#ifdef _unroll
#pragma unroll
#endif
for (int slice = 0; slice < 32; slice++)
{
const u32 r0 = out0[slice];
const u32 r1 = out1[slice];
const u32 r2 = 0;
const u32 r3 = 0;
transpose32c (out0);
transpose32c (out1);
#ifdef KERNEL_STATIC
#include COMPARE_M
#ifdef _unroll
#pragma unroll
#endif
for (int slice = 0; slice < 32; slice++)
{
const u32 r0 = out0[slice];
const u32 r1 = out1[slice];
const u32 r2 = 0;
const u32 r3 = 0;
#ifdef KERNEL_STATIC
#include COMPARE_M
#endif
}
}
}
}
@ -2426,70 +2351,70 @@ KERNEL_FQ void m14000_sxx (KERN_ATTR_BITSLICE ())
const u32 s0 = digests_buf[0].digest_buf[0];
const u32 s1 = digests_buf[0].digest_buf[1];
#define S00 (((s0 >> 0) & 1) ? -1 : 0)
#define S01 (((s0 >> 1) & 1) ? -1 : 0)
#define S02 (((s0 >> 2) & 1) ? -1 : 0)
#define S03 (((s0 >> 3) & 1) ? -1 : 0)
#define S04 (((s0 >> 4) & 1) ? -1 : 0)
#define S05 (((s0 >> 5) & 1) ? -1 : 0)
#define S06 (((s0 >> 6) & 1) ? -1 : 0)
#define S07 (((s0 >> 7) & 1) ? -1 : 0)
#define S08 (((s0 >> 8) & 1) ? -1 : 0)
#define S09 (((s0 >> 9) & 1) ? -1 : 0)
#define S10 (((s0 >> 10) & 1) ? -1 : 0)
#define S11 (((s0 >> 11) & 1) ? -1 : 0)
#define S12 (((s0 >> 12) & 1) ? -1 : 0)
#define S13 (((s0 >> 13) & 1) ? -1 : 0)
#define S14 (((s0 >> 14) & 1) ? -1 : 0)
#define S15 (((s0 >> 15) & 1) ? -1 : 0)
#define S16 (((s0 >> 16) & 1) ? -1 : 0)
#define S17 (((s0 >> 17) & 1) ? -1 : 0)
#define S18 (((s0 >> 18) & 1) ? -1 : 0)
#define S19 (((s0 >> 19) & 1) ? -1 : 0)
#define S20 (((s0 >> 20) & 1) ? -1 : 0)
#define S21 (((s0 >> 21) & 1) ? -1 : 0)
#define S22 (((s0 >> 22) & 1) ? -1 : 0)
#define S23 (((s0 >> 23) & 1) ? -1 : 0)
#define S24 (((s0 >> 24) & 1) ? -1 : 0)
#define S25 (((s0 >> 25) & 1) ? -1 : 0)
#define S26 (((s0 >> 26) & 1) ? -1 : 0)
#define S27 (((s0 >> 27) & 1) ? -1 : 0)
#define S28 (((s0 >> 28) & 1) ? -1 : 0)
#define S29 (((s0 >> 29) & 1) ? -1 : 0)
#define S30 (((s0 >> 30) & 1) ? -1 : 0)
#define S31 (((s0 >> 31) & 1) ? -1 : 0)
#define S32 (((s1 >> 0) & 1) ? -1 : 0)
#define S33 (((s1 >> 1) & 1) ? -1 : 0)
#define S34 (((s1 >> 2) & 1) ? -1 : 0)
#define S35 (((s1 >> 3) & 1) ? -1 : 0)
#define S36 (((s1 >> 4) & 1) ? -1 : 0)
#define S37 (((s1 >> 5) & 1) ? -1 : 0)
#define S38 (((s1 >> 6) & 1) ? -1 : 0)
#define S39 (((s1 >> 7) & 1) ? -1 : 0)
#define S40 (((s1 >> 8) & 1) ? -1 : 0)
#define S41 (((s1 >> 9) & 1) ? -1 : 0)
#define S42 (((s1 >> 10) & 1) ? -1 : 0)
#define S43 (((s1 >> 11) & 1) ? -1 : 0)
#define S44 (((s1 >> 12) & 1) ? -1 : 0)
#define S45 (((s1 >> 13) & 1) ? -1 : 0)
#define S46 (((s1 >> 14) & 1) ? -1 : 0)
#define S47 (((s1 >> 15) & 1) ? -1 : 0)
#define S48 (((s1 >> 16) & 1) ? -1 : 0)
#define S49 (((s1 >> 17) & 1) ? -1 : 0)
#define S50 (((s1 >> 18) & 1) ? -1 : 0)
#define S51 (((s1 >> 19) & 1) ? -1 : 0)
#define S52 (((s1 >> 20) & 1) ? -1 : 0)
#define S53 (((s1 >> 21) & 1) ? -1 : 0)
#define S54 (((s1 >> 22) & 1) ? -1 : 0)
#define S55 (((s1 >> 23) & 1) ? -1 : 0)
#define S56 (((s1 >> 24) & 1) ? -1 : 0)
#define S57 (((s1 >> 25) & 1) ? -1 : 0)
#define S58 (((s1 >> 26) & 1) ? -1 : 0)
#define S59 (((s1 >> 27) & 1) ? -1 : 0)
#define S60 (((s1 >> 28) & 1) ? -1 : 0)
#define S61 (((s1 >> 29) & 1) ? -1 : 0)
#define S62 (((s1 >> 30) & 1) ? -1 : 0)
#define S63 (((s1 >> 31) & 1) ? -1 : 0)
const u32 S00 = (((s0 >> 0) & 1) ? -1 : 0);
const u32 S01 = (((s0 >> 1) & 1) ? -1 : 0);
const u32 S02 = (((s0 >> 2) & 1) ? -1 : 0);
const u32 S03 = (((s0 >> 3) & 1) ? -1 : 0);
const u32 S04 = (((s0 >> 4) & 1) ? -1 : 0);
const u32 S05 = (((s0 >> 5) & 1) ? -1 : 0);
const u32 S06 = (((s0 >> 6) & 1) ? -1 : 0);
const u32 S07 = (((s0 >> 7) & 1) ? -1 : 0);
const u32 S08 = (((s0 >> 8) & 1) ? -1 : 0);
const u32 S09 = (((s0 >> 9) & 1) ? -1 : 0);
const u32 S10 = (((s0 >> 10) & 1) ? -1 : 0);
const u32 S11 = (((s0 >> 11) & 1) ? -1 : 0);
const u32 S12 = (((s0 >> 12) & 1) ? -1 : 0);
const u32 S13 = (((s0 >> 13) & 1) ? -1 : 0);
const u32 S14 = (((s0 >> 14) & 1) ? -1 : 0);
const u32 S15 = (((s0 >> 15) & 1) ? -1 : 0);
const u32 S16 = (((s0 >> 16) & 1) ? -1 : 0);
const u32 S17 = (((s0 >> 17) & 1) ? -1 : 0);
const u32 S18 = (((s0 >> 18) & 1) ? -1 : 0);
const u32 S19 = (((s0 >> 19) & 1) ? -1 : 0);
const u32 S20 = (((s0 >> 20) & 1) ? -1 : 0);
const u32 S21 = (((s0 >> 21) & 1) ? -1 : 0);
const u32 S22 = (((s0 >> 22) & 1) ? -1 : 0);
const u32 S23 = (((s0 >> 23) & 1) ? -1 : 0);
const u32 S24 = (((s0 >> 24) & 1) ? -1 : 0);
const u32 S25 = (((s0 >> 25) & 1) ? -1 : 0);
const u32 S26 = (((s0 >> 26) & 1) ? -1 : 0);
const u32 S27 = (((s0 >> 27) & 1) ? -1 : 0);
const u32 S28 = (((s0 >> 28) & 1) ? -1 : 0);
const u32 S29 = (((s0 >> 29) & 1) ? -1 : 0);
const u32 S30 = (((s0 >> 30) & 1) ? -1 : 0);
const u32 S31 = (((s0 >> 31) & 1) ? -1 : 0);
const u32 S32 = (((s1 >> 0) & 1) ? -1 : 0);
const u32 S33 = (((s1 >> 1) & 1) ? -1 : 0);
const u32 S34 = (((s1 >> 2) & 1) ? -1 : 0);
const u32 S35 = (((s1 >> 3) & 1) ? -1 : 0);
const u32 S36 = (((s1 >> 4) & 1) ? -1 : 0);
const u32 S37 = (((s1 >> 5) & 1) ? -1 : 0);
const u32 S38 = (((s1 >> 6) & 1) ? -1 : 0);
const u32 S39 = (((s1 >> 7) & 1) ? -1 : 0);
const u32 S40 = (((s1 >> 8) & 1) ? -1 : 0);
const u32 S41 = (((s1 >> 9) & 1) ? -1 : 0);
const u32 S42 = (((s1 >> 10) & 1) ? -1 : 0);
const u32 S43 = (((s1 >> 11) & 1) ? -1 : 0);
const u32 S44 = (((s1 >> 12) & 1) ? -1 : 0);
const u32 S45 = (((s1 >> 13) & 1) ? -1 : 0);
const u32 S46 = (((s1 >> 14) & 1) ? -1 : 0);
const u32 S47 = (((s1 >> 15) & 1) ? -1 : 0);
const u32 S48 = (((s1 >> 16) & 1) ? -1 : 0);
const u32 S49 = (((s1 >> 17) & 1) ? -1 : 0);
const u32 S50 = (((s1 >> 18) & 1) ? -1 : 0);
const u32 S51 = (((s1 >> 19) & 1) ? -1 : 0);
const u32 S52 = (((s1 >> 20) & 1) ? -1 : 0);
const u32 S53 = (((s1 >> 21) & 1) ? -1 : 0);
const u32 S54 = (((s1 >> 22) & 1) ? -1 : 0);
const u32 S55 = (((s1 >> 23) & 1) ? -1 : 0);
const u32 S56 = (((s1 >> 24) & 1) ? -1 : 0);
const u32 S57 = (((s1 >> 25) & 1) ? -1 : 0);
const u32 S58 = (((s1 >> 26) & 1) ? -1 : 0);
const u32 S59 = (((s1 >> 27) & 1) ? -1 : 0);
const u32 S60 = (((s1 >> 28) & 1) ? -1 : 0);
const u32 S61 = (((s1 >> 29) & 1) ? -1 : 0);
const u32 S62 = (((s1 >> 30) & 1) ? -1 : 0);
const u32 S63 = (((s1 >> 31) & 1) ? -1 : 0);
/**
* base
@ -2559,173 +2484,170 @@ KERNEL_FQ void m14000_sxx (KERN_ATTR_BITSLICE ())
* inner loop
*/
#ifdef IS_CUDA
const u32 pc_pos = (blockIdx.y * blockDim.y) + threadIdx.y;
#else
const u32 pc_pos = get_global_id (1);
#endif
const u32 il_pos = pc_pos * 32;
u32 k00 = K00;
u32 k01 = K01;
u32 k02 = K02;
u32 k03 = K03;
u32 k04 = K04;
u32 k05 = K05;
u32 k06 = K06;
u32 k07 = K07;
u32 k08 = K08;
u32 k09 = K09;
u32 k10 = K10;
u32 k11 = K11;
u32 k12 = K12;
u32 k13 = K13;
u32 k14 = K14;
u32 k15 = K15;
u32 k16 = K16;
u32 k17 = K17;
u32 k18 = K18;
u32 k19 = K19;
u32 k20 = K20;
u32 k21 = K21;
u32 k22 = K22;
u32 k23 = K23;
u32 k24 = K24;
u32 k25 = K25;
u32 k26 = K26;
u32 k27 = K27;
k00 |= words_buf_s[pc_pos].b[ 0];
k01 |= words_buf_s[pc_pos].b[ 1];
k02 |= words_buf_s[pc_pos].b[ 2];
k03 |= words_buf_s[pc_pos].b[ 3];
k04 |= words_buf_s[pc_pos].b[ 4];
k05 |= words_buf_s[pc_pos].b[ 5];
k06 |= words_buf_s[pc_pos].b[ 6];
k07 |= words_buf_s[pc_pos].b[ 7];
k08 |= words_buf_s[pc_pos].b[ 8];
k09 |= words_buf_s[pc_pos].b[ 9];
k10 |= words_buf_s[pc_pos].b[10];
k11 |= words_buf_s[pc_pos].b[11];
k12 |= words_buf_s[pc_pos].b[12];
k13 |= words_buf_s[pc_pos].b[13];
k14 |= words_buf_s[pc_pos].b[14];
k15 |= words_buf_s[pc_pos].b[15];
k16 |= words_buf_s[pc_pos].b[16];
k17 |= words_buf_s[pc_pos].b[17];
k18 |= words_buf_s[pc_pos].b[18];
k19 |= words_buf_s[pc_pos].b[19];
k20 |= words_buf_s[pc_pos].b[20];
k21 |= words_buf_s[pc_pos].b[21];
k22 |= words_buf_s[pc_pos].b[22];
k23 |= words_buf_s[pc_pos].b[23];
k24 |= words_buf_s[pc_pos].b[24];
k25 |= words_buf_s[pc_pos].b[25];
k26 |= words_buf_s[pc_pos].b[26];
k27 |= words_buf_s[pc_pos].b[27];
DES
(
k00, k01, k02, k03, k04, k05, k06,
k07, k08, k09, k10, k11, k12, k13,
k14, k15, k16, k17, k18, k19, k20,
k21, k22, k23, k24, k25, k26, k27,
K28, K29, K30, K31, K32, K33, K34,
K35, K36, K37, K38, K39, K40, K41,
K42, K43, K44, K45, K46, K47, K48,
K49, K50, K51, K52, K53, K54, K55,
&D00, &D01, &D02, &D03, &D04, &D05, &D06, &D07,
&D08, &D09, &D10, &D11, &D12, &D13, &D14, &D15,
&D16, &D17, &D18, &D19, &D20, &D21, &D22, &D23,
&D24, &D25, &D26, &D27, &D28, &D29, &D30, &D31,
&D32, &D33, &D34, &D35, &D36, &D37, &D38, &D39,
&D40, &D41, &D42, &D43, &D44, &D45, &D46, &D47,
&D48, &D49, &D50, &D51, &D52, &D53, &D54, &D55,
&D56, &D57, &D58, &D59, &D60, &D61, &D62, &D63
);
u32 tmpResult = 0;
tmpResult |= D00 ^ S00;
tmpResult |= D01 ^ S01;
tmpResult |= D02 ^ S02;
tmpResult |= D03 ^ S03;
tmpResult |= D04 ^ S04;
tmpResult |= D05 ^ S05;
tmpResult |= D06 ^ S06;
tmpResult |= D07 ^ S07;
tmpResult |= D08 ^ S08;
tmpResult |= D09 ^ S09;
tmpResult |= D10 ^ S10;
tmpResult |= D11 ^ S11;
tmpResult |= D12 ^ S12;
tmpResult |= D13 ^ S13;
tmpResult |= D14 ^ S14;
tmpResult |= D15 ^ S15;
if (tmpResult == 0xffffffff) return;
tmpResult |= D16 ^ S16;
tmpResult |= D17 ^ S17;
tmpResult |= D18 ^ S18;
tmpResult |= D19 ^ S19;
tmpResult |= D20 ^ S20;
tmpResult |= D21 ^ S21;
tmpResult |= D22 ^ S22;
tmpResult |= D23 ^ S23;
tmpResult |= D24 ^ S24;
tmpResult |= D25 ^ S25;
tmpResult |= D26 ^ S26;
tmpResult |= D27 ^ S27;
tmpResult |= D28 ^ S28;
tmpResult |= D29 ^ S29;
tmpResult |= D30 ^ S30;
tmpResult |= D31 ^ S31;
if (tmpResult == 0xffffffff) return;
tmpResult |= D32 ^ S32;
tmpResult |= D33 ^ S33;
tmpResult |= D34 ^ S34;
tmpResult |= D35 ^ S35;
tmpResult |= D36 ^ S36;
tmpResult |= D37 ^ S37;
tmpResult |= D38 ^ S38;
tmpResult |= D39 ^ S39;
tmpResult |= D40 ^ S40;
tmpResult |= D41 ^ S41;
tmpResult |= D42 ^ S42;
tmpResult |= D43 ^ S43;
tmpResult |= D44 ^ S44;
tmpResult |= D45 ^ S45;
tmpResult |= D46 ^ S46;
tmpResult |= D47 ^ S47;
if (tmpResult == 0xffffffff) return;
tmpResult |= D48 ^ S48;
tmpResult |= D49 ^ S49;
tmpResult |= D50 ^ S50;
tmpResult |= D51 ^ S51;
tmpResult |= D52 ^ S52;
tmpResult |= D53 ^ S53;
tmpResult |= D54 ^ S54;
tmpResult |= D55 ^ S55;
tmpResult |= D56 ^ S56;
tmpResult |= D57 ^ S57;
tmpResult |= D58 ^ S58;
tmpResult |= D59 ^ S59;
tmpResult |= D60 ^ S60;
tmpResult |= D61 ^ S61;
tmpResult |= D62 ^ S62;
tmpResult |= D63 ^ S63;
if (tmpResult == 0xffffffff) return;
const u32 slice = ffz (tmpResult);
#ifdef KERNEL_STATIC
#include COMPARE_S
#endif
for (u32 il_pos = 0; il_pos < il_cnt; il_pos += 32)
{
u32 k00 = K00;
u32 k01 = K01;
u32 k02 = K02;
u32 k03 = K03;
u32 k04 = K04;
u32 k05 = K05;
u32 k06 = K06;
u32 k07 = K07;
u32 k08 = K08;
u32 k09 = K09;
u32 k10 = K10;
u32 k11 = K11;
u32 k12 = K12;
u32 k13 = K13;
u32 k14 = K14;
u32 k15 = K15;
u32 k16 = K16;
u32 k17 = K17;
u32 k18 = K18;
u32 k19 = K19;
u32 k20 = K20;
u32 k21 = K21;
u32 k22 = K22;
u32 k23 = K23;
u32 k24 = K24;
u32 k25 = K25;
u32 k26 = K26;
u32 k27 = K27;
const u32 pc_pos = il_pos / 32;
k00 |= words_buf_s[pc_pos].b[ 0];
k01 |= words_buf_s[pc_pos].b[ 1];
k02 |= words_buf_s[pc_pos].b[ 2];
k03 |= words_buf_s[pc_pos].b[ 3];
k04 |= words_buf_s[pc_pos].b[ 4];
k05 |= words_buf_s[pc_pos].b[ 5];
k06 |= words_buf_s[pc_pos].b[ 6];
k07 |= words_buf_s[pc_pos].b[ 7];
k08 |= words_buf_s[pc_pos].b[ 8];
k09 |= words_buf_s[pc_pos].b[ 9];
k10 |= words_buf_s[pc_pos].b[10];
k11 |= words_buf_s[pc_pos].b[11];
k12 |= words_buf_s[pc_pos].b[12];
k13 |= words_buf_s[pc_pos].b[13];
k14 |= words_buf_s[pc_pos].b[14];
k15 |= words_buf_s[pc_pos].b[15];
k16 |= words_buf_s[pc_pos].b[16];
k17 |= words_buf_s[pc_pos].b[17];
k18 |= words_buf_s[pc_pos].b[18];
k19 |= words_buf_s[pc_pos].b[19];
k20 |= words_buf_s[pc_pos].b[20];
k21 |= words_buf_s[pc_pos].b[21];
k22 |= words_buf_s[pc_pos].b[22];
k23 |= words_buf_s[pc_pos].b[23];
k24 |= words_buf_s[pc_pos].b[24];
k25 |= words_buf_s[pc_pos].b[25];
k26 |= words_buf_s[pc_pos].b[26];
k27 |= words_buf_s[pc_pos].b[27];
DES
(
k00, k01, k02, k03, k04, k05, k06,
k07, k08, k09, k10, k11, k12, k13,
k14, k15, k16, k17, k18, k19, k20,
k21, k22, k23, k24, k25, k26, k27,
K28, K29, K30, K31, K32, K33, K34,
K35, K36, K37, K38, K39, K40, K41,
K42, K43, K44, K45, K46, K47, K48,
K49, K50, K51, K52, K53, K54, K55,
&D00, &D01, &D02, &D03, &D04, &D05, &D06, &D07,
&D08, &D09, &D10, &D11, &D12, &D13, &D14, &D15,
&D16, &D17, &D18, &D19, &D20, &D21, &D22, &D23,
&D24, &D25, &D26, &D27, &D28, &D29, &D30, &D31,
&D32, &D33, &D34, &D35, &D36, &D37, &D38, &D39,
&D40, &D41, &D42, &D43, &D44, &D45, &D46, &D47,
&D48, &D49, &D50, &D51, &D52, &D53, &D54, &D55,
&D56, &D57, &D58, &D59, &D60, &D61, &D62, &D63
);
u32 tmpResult = 0;
tmpResult |= D00 ^ S00;
tmpResult |= D01 ^ S01;
tmpResult |= D02 ^ S02;
tmpResult |= D03 ^ S03;
tmpResult |= D04 ^ S04;
tmpResult |= D05 ^ S05;
tmpResult |= D06 ^ S06;
tmpResult |= D07 ^ S07;
tmpResult |= D08 ^ S08;
tmpResult |= D09 ^ S09;
tmpResult |= D10 ^ S10;
tmpResult |= D11 ^ S11;
tmpResult |= D12 ^ S12;
tmpResult |= D13 ^ S13;
tmpResult |= D14 ^ S14;
tmpResult |= D15 ^ S15;
if (tmpResult == 0xffffffff) continue;
tmpResult |= D16 ^ S16;
tmpResult |= D17 ^ S17;
tmpResult |= D18 ^ S18;
tmpResult |= D19 ^ S19;
tmpResult |= D20 ^ S20;
tmpResult |= D21 ^ S21;
tmpResult |= D22 ^ S22;
tmpResult |= D23 ^ S23;
tmpResult |= D24 ^ S24;
tmpResult |= D25 ^ S25;
tmpResult |= D26 ^ S26;
tmpResult |= D27 ^ S27;
tmpResult |= D28 ^ S28;
tmpResult |= D29 ^ S29;
tmpResult |= D30 ^ S30;
tmpResult |= D31 ^ S31;
if (tmpResult == 0xffffffff) continue;
tmpResult |= D32 ^ S32;
tmpResult |= D33 ^ S33;
tmpResult |= D34 ^ S34;
tmpResult |= D35 ^ S35;
tmpResult |= D36 ^ S36;
tmpResult |= D37 ^ S37;
tmpResult |= D38 ^ S38;
tmpResult |= D39 ^ S39;
tmpResult |= D40 ^ S40;
tmpResult |= D41 ^ S41;
tmpResult |= D42 ^ S42;
tmpResult |= D43 ^ S43;
tmpResult |= D44 ^ S44;
tmpResult |= D45 ^ S45;
tmpResult |= D46 ^ S46;
tmpResult |= D47 ^ S47;
if (tmpResult == 0xffffffff) continue;
tmpResult |= D48 ^ S48;
tmpResult |= D49 ^ S49;
tmpResult |= D50 ^ S50;
tmpResult |= D51 ^ S51;
tmpResult |= D52 ^ S52;
tmpResult |= D53 ^ S53;
tmpResult |= D54 ^ S54;
tmpResult |= D55 ^ S55;
tmpResult |= D56 ^ S56;
tmpResult |= D57 ^ S57;
tmpResult |= D58 ^ S58;
tmpResult |= D59 ^ S59;
tmpResult |= D60 ^ S60;
tmpResult |= D61 ^ S61;
tmpResult |= D62 ^ S62;
tmpResult |= D63 ^ S63;
if (tmpResult == 0xffffffff) continue;
const u32 slice = ffz (tmpResult);
#ifdef KERNEL_STATIC
#include COMPARE_S
#endif
}
}

@ -392,40 +392,40 @@ typedef enum opts_type
OPTS_TYPE_PT_GENERATE_LE = (1ULL << 9),
OPTS_TYPE_PT_GENERATE_BE = (1ULL << 10),
OPTS_TYPE_PT_NEVERCRACK = (1ULL << 11), // if we want all possible results
OPTS_TYPE_PT_BITSLICE = (1ULL << 12),
OPTS_TYPE_PT_ALWAYS_ASCII = (1ULL << 13),
OPTS_TYPE_PT_ALWAYS_HEXIFY = (1ULL << 14),
OPTS_TYPE_PT_LM = (1ULL << 15), // special handling: all lower, 7 max, ...
OPTS_TYPE_PT_HEX = (1ULL << 16), // input wordlist (and masks!) are always in hex
OPTS_TYPE_ST_UTF16LE = (1ULL << 17),
OPTS_TYPE_ST_UTF16BE = (1ULL << 18),
OPTS_TYPE_ST_UPPER = (1ULL << 19),
OPTS_TYPE_ST_LOWER = (1ULL << 20),
OPTS_TYPE_ST_ADD01 = (1ULL << 21),
OPTS_TYPE_ST_ADD02 = (1ULL << 22),
OPTS_TYPE_ST_ADD80 = (1ULL << 23),
OPTS_TYPE_ST_ADDBITS14 = (1ULL << 24),
OPTS_TYPE_ST_ADDBITS15 = (1ULL << 25),
OPTS_TYPE_ST_HEX = (1ULL << 26),
OPTS_TYPE_ST_BASE64 = (1ULL << 27),
OPTS_TYPE_ST_HASH_MD5 = (1ULL << 28),
OPTS_TYPE_HASH_COPY = (1ULL << 29),
OPTS_TYPE_HASH_SPLIT = (1ULL << 30),
OPTS_TYPE_LOOP_EXTENDED = (1ULL << 31), // a kernel which is called each time normal _loop kernel finished.
OPTS_TYPE_PT_ALWAYS_ASCII = (1ULL << 12),
OPTS_TYPE_PT_ALWAYS_HEXIFY = (1ULL << 13),
OPTS_TYPE_PT_LM = (1ULL << 14), // special handling: all lower, 7 max, ...
OPTS_TYPE_PT_HEX = (1ULL << 15), // input wordlist (and masks!) are always in hex
OPTS_TYPE_ST_UTF16LE = (1ULL << 16),
OPTS_TYPE_ST_UTF16BE = (1ULL << 17),
OPTS_TYPE_ST_UPPER = (1ULL << 18),
OPTS_TYPE_ST_LOWER = (1ULL << 19),
OPTS_TYPE_ST_ADD01 = (1ULL << 20),
OPTS_TYPE_ST_ADD02 = (1ULL << 21),
OPTS_TYPE_ST_ADD80 = (1ULL << 22),
OPTS_TYPE_ST_ADDBITS14 = (1ULL << 23),
OPTS_TYPE_ST_ADDBITS15 = (1ULL << 24),
OPTS_TYPE_ST_HEX = (1ULL << 25),
OPTS_TYPE_ST_BASE64 = (1ULL << 26),
OPTS_TYPE_ST_HASH_MD5 = (1ULL << 27),
OPTS_TYPE_HASH_COPY = (1ULL << 28),
OPTS_TYPE_HASH_SPLIT = (1ULL << 29),
OPTS_TYPE_LOOP_EXTENDED = (1ULL << 30), // a kernel which is called each time normal _loop kernel finished.
// but unlike a hook kernel this kernel is called for every _loop iteration offset
OPTS_TYPE_HOOK12 = (1ULL << 32),
OPTS_TYPE_HOOK23 = (1ULL << 33),
OPTS_TYPE_INIT2 = (1ULL << 34),
OPTS_TYPE_LOOP2 = (1ULL << 35),
OPTS_TYPE_AUX1 = (1ULL << 36),
OPTS_TYPE_AUX2 = (1ULL << 37),
OPTS_TYPE_AUX3 = (1ULL << 38),
OPTS_TYPE_AUX4 = (1ULL << 39),
OPTS_TYPE_BINARY_HASHFILE = (1ULL << 40),
OPTS_TYPE_PREFERED_THREAD = (1ULL << 41), // some algorithms (complicated ones with many branches) benefit from this
OPTS_TYPE_PT_ADD06 = (1ULL << 42),
OPTS_TYPE_KEYBOARD_MAPPING = (1ULL << 43),
OPTS_TYPE_DEEP_COMP_KERNEL = (1ULL << 44), // if we have to iterate through each hash inside the comp kernel, for example if each hash has to be decrypted separately
OPTS_TYPE_HOOK12 = (1ULL << 31),
OPTS_TYPE_HOOK23 = (1ULL << 32),
OPTS_TYPE_INIT2 = (1ULL << 33),
OPTS_TYPE_LOOP2 = (1ULL << 34),
OPTS_TYPE_AUX1 = (1ULL << 35),
OPTS_TYPE_AUX2 = (1ULL << 36),
OPTS_TYPE_AUX3 = (1ULL << 37),
OPTS_TYPE_AUX4 = (1ULL << 38),
OPTS_TYPE_BINARY_HASHFILE = (1ULL << 39),
OPTS_TYPE_PREFERED_THREAD = (1ULL << 40), // some algorithms (complicated ones with many branches) benefit from this
OPTS_TYPE_PT_ADD06 = (1ULL << 41),
OPTS_TYPE_KEYBOARD_MAPPING = (1ULL << 42),
OPTS_TYPE_DEEP_COMP_KERNEL = (1ULL << 43), // if we have to iterate through each hash inside the comp kernel, for example if each hash has to be decrypted separately
OPTS_TYPE_TM_KERNEL = (1ULL << 44),
OPTS_TYPE_SUGGEST_KG = (1ULL << 45), // suggest keep guessing for modules the user maybe wants to use --keep-guessing
OPTS_TYPE_COPY_TMPS = (1ULL << 46), // if we want to use data from tmps buffer (for example get the PMK in WPA)
OPTS_TYPE_POTFILE_NOPASS = (1ULL << 47), // sometimes the password should not be printed to potfile

@ -2868,7 +2868,7 @@ int choose_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param,
}
else
{
if (hashconfig->opts_type & OPTS_TYPE_PT_BITSLICE)
if (hashconfig->opts_type & OPTS_TYPE_TM_KERNEL)
{
const u32 size_tm = device_param->size_tm;
@ -3430,7 +3430,6 @@ int run_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, con
{
const hashconfig_t *hashconfig = hashcat_ctx->hashconfig;
const status_ctx_t *status_ctx = hashcat_ctx->status_ctx;
const user_options_t *user_options = hashcat_ctx->user_options;
u64 kernel_threads = 0;
u64 dynamic_shared_mem = 0;
@ -3544,44 +3543,33 @@ int run_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, con
num_elements = CEILDIV (num_elements, kernel_threads);
if ((hashconfig->opts_type & OPTS_TYPE_PT_BITSLICE) && (user_options->attack_mode == ATTACK_MODE_BF))
if (kern_run == KERN_RUN_1)
{
if (hc_cuEventRecord (hashcat_ctx, device_param->cuda_event1, device_param->cuda_stream) == -1) return -1;
if (hc_cuLaunchKernel (hashcat_ctx, cuda_function, num_elements, 32, 1, kernel_threads, 1, 1, dynamic_shared_mem, device_param->cuda_stream, device_param->kernel_params, NULL) == -1) return -1;
if (hc_cuEventRecord (hashcat_ctx, device_param->cuda_event2, device_param->cuda_stream) == -1) return -1;
}
else
{
if (kern_run == KERN_RUN_1)
if (hashconfig->opti_type & OPTI_TYPE_SLOW_HASH_SIMD_INIT)
{
if (hashconfig->opti_type & OPTI_TYPE_SLOW_HASH_SIMD_INIT)
{
num_elements = CEILDIV (num_elements, device_param->vector_width);
}
num_elements = CEILDIV (num_elements, device_param->vector_width);
}
else if (kern_run == KERN_RUN_2)
}
else if (kern_run == KERN_RUN_2)
{
if (hashconfig->opti_type & OPTI_TYPE_SLOW_HASH_SIMD_LOOP)
{
if (hashconfig->opti_type & OPTI_TYPE_SLOW_HASH_SIMD_LOOP)
{
num_elements = CEILDIV (num_elements, device_param->vector_width);
}
num_elements = CEILDIV (num_elements, device_param->vector_width);
}
else if (kern_run == KERN_RUN_3)
}
else if (kern_run == KERN_RUN_3)
{
if (hashconfig->opti_type & OPTI_TYPE_SLOW_HASH_SIMD_COMP)
{
if (hashconfig->opti_type & OPTI_TYPE_SLOW_HASH_SIMD_COMP)
{
num_elements = CEILDIV (num_elements, device_param->vector_width);
}
num_elements = CEILDIV (num_elements, device_param->vector_width);
}
}
if (hc_cuEventRecord (hashcat_ctx, device_param->cuda_event1, device_param->cuda_stream) == -1) return -1;
if (hc_cuEventRecord (hashcat_ctx, device_param->cuda_event1, device_param->cuda_stream) == -1) return -1;
if (hc_cuLaunchKernel (hashcat_ctx, cuda_function, num_elements, 1, 1, kernel_threads, 1, 1, dynamic_shared_mem, device_param->cuda_stream, device_param->kernel_params, NULL) == -1) return -1;
if (hc_cuLaunchKernel (hashcat_ctx, cuda_function, num_elements, 1, 1, kernel_threads, 1, 1, dynamic_shared_mem, device_param->cuda_stream, device_param->kernel_params, NULL) == -1) return -1;
if (hc_cuEventRecord (hashcat_ctx, device_param->cuda_event2, device_param->cuda_stream) == -1) return -1;
}
if (hc_cuEventRecord (hashcat_ctx, device_param->cuda_event2, device_param->cuda_stream) == -1) return -1;
if (hc_cuStreamSynchronize (hashcat_ctx, device_param->cuda_stream) == -1) return -1;
@ -3651,44 +3639,34 @@ int run_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, con
cl_event opencl_event;
if ((hashconfig->opts_type & OPTS_TYPE_PT_BITSLICE) && (user_options->attack_mode == ATTACK_MODE_BF))
{
const size_t global_work_size[3] = { num_elements, 32, 1 };
const size_t local_work_size[3] = { kernel_threads, 1, 1 };
if (hc_clEnqueueNDRangeKernel (hashcat_ctx, device_param->opencl_command_queue, opencl_kernel, 2, NULL, global_work_size, local_work_size, 0, NULL, &opencl_event) == -1) return -1;
}
else
if (kern_run == KERN_RUN_1)
{
if (kern_run == KERN_RUN_1)
if (hashconfig->opti_type & OPTI_TYPE_SLOW_HASH_SIMD_INIT)
{
if (hashconfig->opti_type & OPTI_TYPE_SLOW_HASH_SIMD_INIT)
{
num_elements = CEILDIV (num_elements, device_param->vector_width);
}
num_elements = CEILDIV (num_elements, device_param->vector_width);
}
else if (kern_run == KERN_RUN_2)
}
else if (kern_run == KERN_RUN_2)
{
if (hashconfig->opti_type & OPTI_TYPE_SLOW_HASH_SIMD_LOOP)
{
if (hashconfig->opti_type & OPTI_TYPE_SLOW_HASH_SIMD_LOOP)
{
num_elements = CEILDIV (num_elements, device_param->vector_width);
}
num_elements = CEILDIV (num_elements, device_param->vector_width);
}
else if (kern_run == KERN_RUN_3)
}
else if (kern_run == KERN_RUN_3)
{
if (hashconfig->opti_type & OPTI_TYPE_SLOW_HASH_SIMD_COMP)
{
if (hashconfig->opti_type & OPTI_TYPE_SLOW_HASH_SIMD_COMP)
{
num_elements = CEILDIV (num_elements, device_param->vector_width);
}
num_elements = CEILDIV (num_elements, device_param->vector_width);
}
}
num_elements = round_up_multiple_64 (num_elements, kernel_threads);
num_elements = round_up_multiple_64 (num_elements, kernel_threads);
const size_t global_work_size[3] = { num_elements, 1, 1 };
const size_t local_work_size[3] = { kernel_threads, 1, 1 };
const size_t global_work_size[3] = { num_elements, 1, 1 };
const size_t local_work_size[3] = { kernel_threads, 1, 1 };
if (hc_clEnqueueNDRangeKernel (hashcat_ctx, device_param->opencl_command_queue, opencl_kernel, 1, NULL, global_work_size, local_work_size, 0, NULL, &opencl_event) == -1) return -1;
}
if (hc_clEnqueueNDRangeKernel (hashcat_ctx, device_param->opencl_command_queue, opencl_kernel, 1, NULL, global_work_size, local_work_size, 0, NULL, &opencl_event) == -1) return -1;
if (hc_clFlush (hashcat_ctx, device_param->opencl_command_queue) == -1) return -1;
@ -8839,7 +8817,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
{
if (user_options->attack_mode == ATTACK_MODE_BF)
{
if (hashconfig->opts_type & OPTS_TYPE_PT_BITSLICE)
if (hashconfig->opts_type & OPTS_TYPE_TM_KERNEL)
{
snprintf (kernel_name, sizeof (kernel_name), "m%05u_tm", kern_type);
@ -9092,10 +9070,13 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
device_param->kernel_preferred_wgs_multiple_mp_r = device_param->cuda_warp_size;
if (hashconfig->opts_type & OPTS_TYPE_PT_BITSLICE)
if (user_options->attack_mode == ATTACK_MODE_BF)
{
//CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_tm, 0, sizeof (cl_mem), device_param->kernel_params_tm[0]); if (CL_rc == -1) return -1;
//CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_tm, 1, sizeof (cl_mem), device_param->kernel_params_tm[1]); if (CL_rc == -1) return -1;
if (hashconfig->opts_type & OPTS_TYPE_TM_KERNEL)
{
//CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_tm, 0, sizeof (cl_mem), device_param->kernel_params_tm[0]); if (CL_rc == -1) return -1;
//CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_tm, 1, sizeof (cl_mem), device_param->kernel_params_tm[1]); if (CL_rc == -1) return -1;
}
}
}
else if (user_options->attack_mode == ATTACK_MODE_HYBRID1)
@ -9422,7 +9403,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
{
if (user_options->attack_mode == ATTACK_MODE_BF)
{
if (hashconfig->opts_type & OPTS_TYPE_PT_BITSLICE)
if (hashconfig->opts_type & OPTS_TYPE_TM_KERNEL)
{
snprintf (kernel_name, sizeof (kernel_name), "m%05u_tm", kern_type);
@ -9670,10 +9651,13 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
if (get_opencl_kernel_preferred_wgs_multiple (hashcat_ctx, device_param, device_param->opencl_kernel_mp_r, &device_param->kernel_preferred_wgs_multiple_mp_r) == -1) return -1;
if (hashconfig->opts_type & OPTS_TYPE_PT_BITSLICE)
if (user_options->attack_mode == ATTACK_MODE_BF)
{
if (hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_tm, 0, sizeof (cl_mem), device_param->kernel_params_tm[0]) == -1) return -1;
if (hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_tm, 1, sizeof (cl_mem), device_param->kernel_params_tm[1]) == -1) return -1;
if (hashconfig->opts_type & OPTS_TYPE_TM_KERNEL)
{
if (hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_tm, 0, sizeof (cl_mem), device_param->kernel_params_tm[0]) == -1) return -1;
if (hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_tm, 1, sizeof (cl_mem), device_param->kernel_params_tm[1]) == -1) return -1;
}
}
}
else if (user_options->attack_mode == ATTACK_MODE_HYBRID1)

@ -22,7 +22,7 @@ static const char *HASH_NAME = "descrypt, DES (Unix), Traditional DES";
static const u64 KERN_TYPE = 1500;
static const u32 OPTI_TYPE = OPTI_TYPE_ZERO_BYTE;
static const u64 OPTS_TYPE = OPTS_TYPE_PT_GENERATE_LE
| OPTS_TYPE_PT_BITSLICE;
| OPTS_TYPE_TM_KERNEL;
static const u32 SALT_TYPE = SALT_TYPE_EMBEDDED;
static const char *ST_PASS = NULL; // the self-test can't work because the salt is not part of the code at compile-time
static const char *ST_HASH = "8133vc.5rieNk";
@ -73,6 +73,13 @@ int module_build_plain_postprocess (MAYBE_UNUSED const hashconfig_t *hashconfig,
return src_len;
}
u32 module_kernel_threads_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
{
const u32 kernel_threads_max = 64; // performance only optimization
return kernel_threads_max;
}
u32 module_kernel_loops_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
{
u32 kernel_loops_max = KERNEL_LOOPS_MAX;
@ -301,7 +308,7 @@ void module_init (module_ctx_t *module_ctx)
module_ctx->module_kernel_accel_min = MODULE_DEFAULT;
module_ctx->module_kernel_loops_max = module_kernel_loops_max;
module_ctx->module_kernel_loops_min = module_kernel_loops_min;
module_ctx->module_kernel_threads_max = MODULE_DEFAULT;
module_ctx->module_kernel_threads_max = module_kernel_threads_max;
module_ctx->module_kernel_threads_min = MODULE_DEFAULT;
module_ctx->module_kern_type = module_kern_type;
module_ctx->module_kern_type_dynamic = MODULE_DEFAULT;

@ -23,7 +23,7 @@ static const u64 KERN_TYPE = 3000;
static const u32 OPTI_TYPE = OPTI_TYPE_ZERO_BYTE;
static const u64 OPTS_TYPE = OPTS_TYPE_PT_GENERATE_LE
| OPTS_TYPE_PT_UPPER
| OPTS_TYPE_PT_BITSLICE
| OPTS_TYPE_TM_KERNEL
| OPTS_TYPE_PT_ALWAYS_ASCII
| OPTS_TYPE_PT_LM
| OPTS_TYPE_HASH_SPLIT;

@ -22,7 +22,7 @@ static const char *HASH_NAME = "DES (PT = $salt, key = $pass)";
static const u64 KERN_TYPE = 14000;
static const u32 OPTI_TYPE = OPTI_TYPE_ZERO_BYTE;
static const u64 OPTS_TYPE = OPTS_TYPE_PT_GENERATE_LE
| OPTS_TYPE_PT_BITSLICE
| OPTS_TYPE_TM_KERNEL
| OPTS_TYPE_ST_HEX;
static const u32 SALT_TYPE = SALT_TYPE_EMBEDDED;
static const char *ST_PASS = "hashcat1";

@ -201,7 +201,7 @@ static int selftest (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param
{
device_param->kernel_params_buf32[30] = 1;
if (hashconfig->opts_type & OPTS_TYPE_PT_BITSLICE)
if (hashconfig->opts_type & OPTS_TYPE_TM_KERNEL)
{
pw_t pw;

Loading…
Cancel
Save