1
0
mirror of https://github.com/hashcat/hashcat.git synced 2024-12-01 12:28:24 +00:00

Fix broken -m 1500 and -m 3000 in -a 3 mode

This commit is contained in:
Jens Steube 2016-03-06 14:24:21 +01:00
parent 2cbae341f4
commit 55cadb6834
4 changed files with 256 additions and 346 deletions

View File

@ -1922,65 +1922,71 @@ static void m01500m (__local u32 *s_S, __global pw_t *pws, __global kernel_rule_
const u32 salt = salt_bufs[salt_pos].salt_buf[0]; const u32 salt = salt_bufs[salt_pos].salt_buf[0];
/** /**
* keys * base
*/ */
const u32 K00 = pws[gid].i[ 0]; const u32 w0 = pws[gid].i[0];
const u32 K01 = pws[gid].i[ 1]; const u32 w1 = pws[gid].i[1];
const u32 K02 = pws[gid].i[ 2];
const u32 K03 = pws[gid].i[ 3]; const u32 w0s = (w0 << 1) & 0xfefefefe;
const u32 K04 = pws[gid].i[ 4]; const u32 w1s = (w1 << 1) & 0xfefefefe;
const u32 K05 = pws[gid].i[ 5];
const u32 K06 = pws[gid].i[ 6]; const u32 K00 = -((w0s >> ( 0 + 7)) & 1);
const u32 K07 = pws[gid].i[ 7]; const u32 K01 = -((w0s >> ( 0 + 6)) & 1);
const u32 K08 = pws[gid].i[ 8]; const u32 K02 = -((w0s >> ( 0 + 5)) & 1);
const u32 K09 = pws[gid].i[ 9]; const u32 K03 = -((w0s >> ( 0 + 4)) & 1);
const u32 K10 = pws[gid].i[10]; const u32 K04 = -((w0s >> ( 0 + 3)) & 1);
const u32 K11 = pws[gid].i[11]; const u32 K05 = -((w0s >> ( 0 + 2)) & 1);
const u32 K12 = pws[gid].i[12]; const u32 K06 = -((w0s >> ( 0 + 1)) & 1);
const u32 K13 = pws[gid].i[13]; const u32 K07 = -((w0s >> ( 8 + 7)) & 1);
const u32 K14 = pws[gid].i[14]; const u32 K08 = -((w0s >> ( 8 + 6)) & 1);
const u32 K15 = pws[gid].i[15]; const u32 K09 = -((w0s >> ( 8 + 5)) & 1);
const u32 K16 = pws[gid].i[16]; const u32 K10 = -((w0s >> ( 8 + 4)) & 1);
const u32 K17 = pws[gid].i[17]; const u32 K11 = -((w0s >> ( 8 + 3)) & 1);
const u32 K18 = pws[gid].i[18]; const u32 K12 = -((w0s >> ( 8 + 2)) & 1);
const u32 K19 = pws[gid].i[19]; const u32 K13 = -((w0s >> ( 8 + 1)) & 1);
const u32 K20 = pws[gid].i[20]; const u32 K14 = -((w0s >> (16 + 7)) & 1);
const u32 K21 = pws[gid].i[21]; const u32 K15 = -((w0s >> (16 + 6)) & 1);
const u32 K22 = pws[gid].i[22]; const u32 K16 = -((w0s >> (16 + 5)) & 1);
const u32 K23 = pws[gid].i[23]; const u32 K17 = -((w0s >> (16 + 4)) & 1);
const u32 K24 = pws[gid].i[24]; const u32 K18 = -((w0s >> (16 + 3)) & 1);
const u32 K25 = pws[gid].i[25]; const u32 K19 = -((w0s >> (16 + 2)) & 1);
const u32 K26 = pws[gid].i[26]; const u32 K20 = -((w0s >> (16 + 1)) & 1);
const u32 K27 = pws[gid].i[27]; const u32 K21 = -((w0s >> (24 + 7)) & 1);
const u32 K28 = pws[gid].i[28]; const u32 K22 = -((w0s >> (24 + 6)) & 1);
const u32 K29 = pws[gid].i[29]; const u32 K23 = -((w0s >> (24 + 5)) & 1);
const u32 K30 = pws[gid].i[30]; const u32 K24 = -((w0s >> (24 + 4)) & 1);
const u32 K31 = pws[gid].i[31]; const u32 K25 = -((w0s >> (24 + 3)) & 1);
const u32 K32 = pws[gid].i[32]; const u32 K26 = -((w0s >> (24 + 2)) & 1);
const u32 K33 = pws[gid].i[33]; const u32 K27 = -((w0s >> (24 + 1)) & 1);
const u32 K34 = pws[gid].i[34]; const u32 K28 = -((w1s >> ( 0 + 7)) & 1);
const u32 K35 = pws[gid].i[35]; const u32 K29 = -((w1s >> ( 0 + 6)) & 1);
const u32 K36 = pws[gid].i[36]; const u32 K30 = -((w1s >> ( 0 + 5)) & 1);
const u32 K37 = pws[gid].i[37]; const u32 K31 = -((w1s >> ( 0 + 4)) & 1);
const u32 K38 = pws[gid].i[38]; const u32 K32 = -((w1s >> ( 0 + 3)) & 1);
const u32 K39 = pws[gid].i[39]; const u32 K33 = -((w1s >> ( 0 + 2)) & 1);
const u32 K40 = pws[gid].i[40]; const u32 K34 = -((w1s >> ( 0 + 1)) & 1);
const u32 K41 = pws[gid].i[41]; const u32 K35 = -((w1s >> ( 8 + 7)) & 1);
const u32 K42 = pws[gid].i[42]; const u32 K36 = -((w1s >> ( 8 + 6)) & 1);
const u32 K43 = pws[gid].i[43]; const u32 K37 = -((w1s >> ( 8 + 5)) & 1);
const u32 K44 = pws[gid].i[44]; const u32 K38 = -((w1s >> ( 8 + 4)) & 1);
const u32 K45 = pws[gid].i[45]; const u32 K39 = -((w1s >> ( 8 + 3)) & 1);
const u32 K46 = pws[gid].i[46]; const u32 K40 = -((w1s >> ( 8 + 2)) & 1);
const u32 K47 = pws[gid].i[47]; const u32 K41 = -((w1s >> ( 8 + 1)) & 1);
const u32 K48 = pws[gid].i[48]; const u32 K42 = -((w1s >> (16 + 7)) & 1);
const u32 K49 = pws[gid].i[49]; const u32 K43 = -((w1s >> (16 + 6)) & 1);
const u32 K50 = pws[gid].i[50]; const u32 K44 = -((w1s >> (16 + 5)) & 1);
const u32 K51 = pws[gid].i[51]; const u32 K45 = -((w1s >> (16 + 4)) & 1);
const u32 K52 = pws[gid].i[52]; const u32 K46 = -((w1s >> (16 + 3)) & 1);
const u32 K53 = pws[gid].i[53]; const u32 K47 = -((w1s >> (16 + 2)) & 1);
const u32 K54 = pws[gid].i[54]; const u32 K48 = -((w1s >> (16 + 1)) & 1);
const u32 K55 = pws[gid].i[55]; const u32 K49 = -((w1s >> (24 + 7)) & 1);
const u32 K50 = -((w1s >> (24 + 6)) & 1);
const u32 K51 = -((w1s >> (24 + 5)) & 1);
const u32 K52 = -((w1s >> (24 + 4)) & 1);
const u32 K53 = -((w1s >> (24 + 3)) & 1);
const u32 K54 = -((w1s >> (24 + 2)) & 1);
const u32 K55 = -((w1s >> (24 + 1)) & 1);
/** /**
* inner loop * inner loop
@ -2351,65 +2357,71 @@ static void m01500s (__local u32 *s_S, __global pw_t *pws, __global kernel_rule_
#define S63 s_S[63] #define S63 s_S[63]
/** /**
* keys * base
*/ */
const u32 K00 = pws[gid].i[ 0]; const u32 w0 = pws[gid].i[0];
const u32 K01 = pws[gid].i[ 1]; const u32 w1 = pws[gid].i[1];
const u32 K02 = pws[gid].i[ 2];
const u32 K03 = pws[gid].i[ 3]; const u32 w0s = (w0 << 1) & 0xfefefefe;
const u32 K04 = pws[gid].i[ 4]; const u32 w1s = (w1 << 1) & 0xfefefefe;
const u32 K05 = pws[gid].i[ 5];
const u32 K06 = pws[gid].i[ 6]; const u32 K00 = -((w0s >> ( 0 + 7)) & 1);
const u32 K07 = pws[gid].i[ 7]; const u32 K01 = -((w0s >> ( 0 + 6)) & 1);
const u32 K08 = pws[gid].i[ 8]; const u32 K02 = -((w0s >> ( 0 + 5)) & 1);
const u32 K09 = pws[gid].i[ 9]; const u32 K03 = -((w0s >> ( 0 + 4)) & 1);
const u32 K10 = pws[gid].i[10]; const u32 K04 = -((w0s >> ( 0 + 3)) & 1);
const u32 K11 = pws[gid].i[11]; const u32 K05 = -((w0s >> ( 0 + 2)) & 1);
const u32 K12 = pws[gid].i[12]; const u32 K06 = -((w0s >> ( 0 + 1)) & 1);
const u32 K13 = pws[gid].i[13]; const u32 K07 = -((w0s >> ( 8 + 7)) & 1);
const u32 K14 = pws[gid].i[14]; const u32 K08 = -((w0s >> ( 8 + 6)) & 1);
const u32 K15 = pws[gid].i[15]; const u32 K09 = -((w0s >> ( 8 + 5)) & 1);
const u32 K16 = pws[gid].i[16]; const u32 K10 = -((w0s >> ( 8 + 4)) & 1);
const u32 K17 = pws[gid].i[17]; const u32 K11 = -((w0s >> ( 8 + 3)) & 1);
const u32 K18 = pws[gid].i[18]; const u32 K12 = -((w0s >> ( 8 + 2)) & 1);
const u32 K19 = pws[gid].i[19]; const u32 K13 = -((w0s >> ( 8 + 1)) & 1);
const u32 K20 = pws[gid].i[20]; const u32 K14 = -((w0s >> (16 + 7)) & 1);
const u32 K21 = pws[gid].i[21]; const u32 K15 = -((w0s >> (16 + 6)) & 1);
const u32 K22 = pws[gid].i[22]; const u32 K16 = -((w0s >> (16 + 5)) & 1);
const u32 K23 = pws[gid].i[23]; const u32 K17 = -((w0s >> (16 + 4)) & 1);
const u32 K24 = pws[gid].i[24]; const u32 K18 = -((w0s >> (16 + 3)) & 1);
const u32 K25 = pws[gid].i[25]; const u32 K19 = -((w0s >> (16 + 2)) & 1);
const u32 K26 = pws[gid].i[26]; const u32 K20 = -((w0s >> (16 + 1)) & 1);
const u32 K27 = pws[gid].i[27]; const u32 K21 = -((w0s >> (24 + 7)) & 1);
const u32 K28 = pws[gid].i[28]; const u32 K22 = -((w0s >> (24 + 6)) & 1);
const u32 K29 = pws[gid].i[29]; const u32 K23 = -((w0s >> (24 + 5)) & 1);
const u32 K30 = pws[gid].i[30]; const u32 K24 = -((w0s >> (24 + 4)) & 1);
const u32 K31 = pws[gid].i[31]; const u32 K25 = -((w0s >> (24 + 3)) & 1);
const u32 K32 = pws[gid].i[32]; const u32 K26 = -((w0s >> (24 + 2)) & 1);
const u32 K33 = pws[gid].i[33]; const u32 K27 = -((w0s >> (24 + 1)) & 1);
const u32 K34 = pws[gid].i[34]; const u32 K28 = -((w1s >> ( 0 + 7)) & 1);
const u32 K35 = pws[gid].i[35]; const u32 K29 = -((w1s >> ( 0 + 6)) & 1);
const u32 K36 = pws[gid].i[36]; const u32 K30 = -((w1s >> ( 0 + 5)) & 1);
const u32 K37 = pws[gid].i[37]; const u32 K31 = -((w1s >> ( 0 + 4)) & 1);
const u32 K38 = pws[gid].i[38]; const u32 K32 = -((w1s >> ( 0 + 3)) & 1);
const u32 K39 = pws[gid].i[39]; const u32 K33 = -((w1s >> ( 0 + 2)) & 1);
const u32 K40 = pws[gid].i[40]; const u32 K34 = -((w1s >> ( 0 + 1)) & 1);
const u32 K41 = pws[gid].i[41]; const u32 K35 = -((w1s >> ( 8 + 7)) & 1);
const u32 K42 = pws[gid].i[42]; const u32 K36 = -((w1s >> ( 8 + 6)) & 1);
const u32 K43 = pws[gid].i[43]; const u32 K37 = -((w1s >> ( 8 + 5)) & 1);
const u32 K44 = pws[gid].i[44]; const u32 K38 = -((w1s >> ( 8 + 4)) & 1);
const u32 K45 = pws[gid].i[45]; const u32 K39 = -((w1s >> ( 8 + 3)) & 1);
const u32 K46 = pws[gid].i[46]; const u32 K40 = -((w1s >> ( 8 + 2)) & 1);
const u32 K47 = pws[gid].i[47]; const u32 K41 = -((w1s >> ( 8 + 1)) & 1);
const u32 K48 = pws[gid].i[48]; const u32 K42 = -((w1s >> (16 + 7)) & 1);
const u32 K49 = pws[gid].i[49]; const u32 K43 = -((w1s >> (16 + 6)) & 1);
const u32 K50 = pws[gid].i[50]; const u32 K44 = -((w1s >> (16 + 5)) & 1);
const u32 K51 = pws[gid].i[51]; const u32 K45 = -((w1s >> (16 + 4)) & 1);
const u32 K52 = pws[gid].i[52]; const u32 K46 = -((w1s >> (16 + 3)) & 1);
const u32 K53 = pws[gid].i[53]; const u32 K47 = -((w1s >> (16 + 2)) & 1);
const u32 K54 = pws[gid].i[54]; const u32 K48 = -((w1s >> (16 + 1)) & 1);
const u32 K55 = pws[gid].i[55]; const u32 K49 = -((w1s >> (24 + 7)) & 1);
const u32 K50 = -((w1s >> (24 + 6)) & 1);
const u32 K51 = -((w1s >> (24 + 5)) & 1);
const u32 K52 = -((w1s >> (24 + 4)) & 1);
const u32 K53 = -((w1s >> (24 + 3)) & 1);
const u32 K54 = -((w1s >> (24 + 2)) & 1);
const u32 K55 = -((w1s >> (24 + 1)) & 1);
/** /**
* inner loop * inner loop
@ -2641,45 +2653,9 @@ static void m01500s (__local u32 *s_S, __global pw_t *pws, __global kernel_rule_
} }
// //
// transpose bitslice base : easy because no overlapping buffers // transpose bitslice mod : attention race conditions, need different buffers for *in and *out
// mod : attention race conditions, need different buffers for *in and *out
// //
__kernel void m01500_tb (__global pw_t *pws)
{
const u32 gid = get_global_id (0);
const u32 w0 = pws[gid].i[0];
const u32 w1 = pws[gid].i[1];
const u32 w0s = (w0 << 1) & 0xfefefefe;
const u32 w1s = (w1 << 1) & 0xfefefefe;
#pragma unroll
for (int i = 0, j = 0; i < 32; i += 8, j += 7)
{
pws[gid].i[j + 0 + 0] = -((w0s >> (i + 7)) & 1);
pws[gid].i[j + 0 + 1] = -((w0s >> (i + 6)) & 1);
pws[gid].i[j + 0 + 2] = -((w0s >> (i + 5)) & 1);
pws[gid].i[j + 0 + 3] = -((w0s >> (i + 4)) & 1);
pws[gid].i[j + 0 + 4] = -((w0s >> (i + 3)) & 1);
pws[gid].i[j + 0 + 5] = -((w0s >> (i + 2)) & 1);
pws[gid].i[j + 0 + 6] = -((w0s >> (i + 1)) & 1);
}
#pragma unroll
for (int i = 0, j = 0; i < 32; i += 8, j += 7)
{
pws[gid].i[j + 28 + 0] = -((w1s >> (i + 7)) & 1);
pws[gid].i[j + 28 + 1] = -((w1s >> (i + 6)) & 1);
pws[gid].i[j + 28 + 2] = -((w1s >> (i + 5)) & 1);
pws[gid].i[j + 28 + 3] = -((w1s >> (i + 4)) & 1);
pws[gid].i[j + 28 + 4] = -((w1s >> (i + 3)) & 1);
pws[gid].i[j + 28 + 5] = -((w1s >> (i + 2)) & 1);
pws[gid].i[j + 28 + 6] = -((w1s >> (i + 1)) & 1);
}
}
__kernel void m01500_tm (__global u32 *mod, __global bs_word_t *words_buf_r) __kernel void m01500_tm (__global u32 *mod, __global bs_word_t *words_buf_r)
{ {
const u32 gid = get_global_id (0); const u32 gid = get_global_id (0);

View File

@ -1754,65 +1754,72 @@ static void m03000m (__global pw_t *pws, __global kernel_rule_t *rules_buf, __gl
const u32 lid = get_local_id (0); const u32 lid = get_local_id (0);
/** /**
* keys * base
*/ */
const u32 K00 = pws[gid].i[ 0]; const u32 w0s = pws[gid].i[0];
const u32 K01 = pws[gid].i[ 1]; const u32 w1s = pws[gid].i[1];
const u32 K02 = pws[gid].i[ 2];
const u32 K03 = pws[gid].i[ 3]; const u32 K00 = -((w0s >> ( 0 + 7)) & 1);
const u32 K04 = pws[gid].i[ 4]; const u32 K01 = -((w0s >> ( 0 + 6)) & 1);
const u32 K05 = pws[gid].i[ 5]; const u32 K02 = -((w0s >> ( 0 + 5)) & 1);
const u32 K06 = pws[gid].i[ 6]; const u32 K03 = -((w0s >> ( 0 + 4)) & 1);
const u32 K07 = pws[gid].i[ 7]; const u32 K04 = -((w0s >> ( 0 + 3)) & 1);
const u32 K08 = pws[gid].i[ 8]; const u32 K05 = -((w0s >> ( 0 + 2)) & 1);
const u32 K09 = pws[gid].i[ 9]; const u32 K06 = -((w0s >> ( 0 + 1)) & 1);
const u32 K10 = pws[gid].i[10]; const u32 K07 = -((w0s >> ( 0 + 0)) & 1);
const u32 K11 = pws[gid].i[11]; const u32 K08 = -((w0s >> ( 8 + 7)) & 1);
const u32 K12 = pws[gid].i[12]; const u32 K09 = -((w0s >> ( 8 + 6)) & 1);
const u32 K13 = pws[gid].i[13]; const u32 K10 = -((w0s >> ( 8 + 5)) & 1);
const u32 K14 = pws[gid].i[14]; const u32 K11 = -((w0s >> ( 8 + 4)) & 1);
const u32 K15 = pws[gid].i[15]; const u32 K12 = -((w0s >> ( 8 + 3)) & 1);
const u32 K16 = pws[gid].i[16]; const u32 K13 = -((w0s >> ( 8 + 2)) & 1);
const u32 K17 = pws[gid].i[17]; const u32 K14 = -((w0s >> ( 8 + 1)) & 1);
const u32 K18 = pws[gid].i[18]; const u32 K15 = -((w0s >> ( 8 + 0)) & 1);
const u32 K19 = pws[gid].i[19]; const u32 K16 = -((w0s >> (16 + 7)) & 1);
const u32 K20 = pws[gid].i[20]; const u32 K17 = -((w0s >> (16 + 6)) & 1);
const u32 K21 = pws[gid].i[21]; const u32 K18 = -((w0s >> (16 + 5)) & 1);
const u32 K22 = pws[gid].i[22]; const u32 K19 = -((w0s >> (16 + 4)) & 1);
const u32 K23 = pws[gid].i[23]; const u32 K20 = -((w0s >> (16 + 3)) & 1);
const u32 K24 = pws[gid].i[24]; const u32 K21 = -((w0s >> (16 + 2)) & 1);
const u32 K25 = pws[gid].i[25]; const u32 K22 = -((w0s >> (16 + 1)) & 1);
const u32 K26 = pws[gid].i[26]; const u32 K23 = -((w0s >> (16 + 0)) & 1);
const u32 K27 = pws[gid].i[27]; const u32 K24 = -((w0s >> (24 + 7)) & 1);
const u32 K28 = pws[gid].i[28]; const u32 K25 = -((w0s >> (24 + 6)) & 1);
const u32 K29 = pws[gid].i[29]; const u32 K26 = -((w0s >> (24 + 5)) & 1);
const u32 K30 = pws[gid].i[30]; const u32 K27 = -((w0s >> (24 + 4)) & 1);
const u32 K31 = pws[gid].i[31]; const u32 K28 = -((w0s >> (24 + 3)) & 1);
const u32 K32 = pws[gid].i[32]; const u32 K29 = -((w0s >> (24 + 2)) & 1);
const u32 K33 = pws[gid].i[33]; const u32 K30 = -((w0s >> (24 + 1)) & 1);
const u32 K34 = pws[gid].i[34]; const u32 K31 = -((w0s >> (24 + 0)) & 1);
const u32 K35 = pws[gid].i[35]; const u32 K32 = -((w1s >> ( 0 + 7)) & 1);
const u32 K36 = pws[gid].i[36]; const u32 K33 = -((w1s >> ( 0 + 6)) & 1);
const u32 K37 = pws[gid].i[37]; const u32 K34 = -((w1s >> ( 0 + 5)) & 1);
const u32 K38 = pws[gid].i[38]; const u32 K35 = -((w1s >> ( 0 + 4)) & 1);
const u32 K39 = pws[gid].i[39]; const u32 K36 = -((w1s >> ( 0 + 3)) & 1);
const u32 K40 = pws[gid].i[40]; const u32 K37 = -((w1s >> ( 0 + 2)) & 1);
const u32 K41 = pws[gid].i[41]; const u32 K38 = -((w1s >> ( 0 + 1)) & 1);
const u32 K42 = pws[gid].i[42]; const u32 K39 = -((w1s >> ( 0 + 0)) & 1);
const u32 K43 = pws[gid].i[43]; const u32 K40 = -((w1s >> ( 8 + 7)) & 1);
const u32 K44 = pws[gid].i[44]; const u32 K41 = -((w1s >> ( 8 + 6)) & 1);
const u32 K45 = pws[gid].i[45]; const u32 K42 = -((w1s >> ( 8 + 5)) & 1);
const u32 K46 = pws[gid].i[46]; const u32 K43 = -((w1s >> ( 8 + 4)) & 1);
const u32 K47 = pws[gid].i[47]; const u32 K44 = -((w1s >> ( 8 + 3)) & 1);
const u32 K48 = pws[gid].i[48]; const u32 K45 = -((w1s >> ( 8 + 2)) & 1);
const u32 K49 = pws[gid].i[49]; const u32 K46 = -((w1s >> ( 8 + 1)) & 1);
const u32 K50 = pws[gid].i[50]; const u32 K47 = -((w1s >> ( 8 + 0)) & 1);
const u32 K51 = pws[gid].i[51]; const u32 K48 = -((w1s >> (16 + 7)) & 1);
const u32 K52 = pws[gid].i[52]; const u32 K49 = -((w1s >> (16 + 6)) & 1);
const u32 K53 = pws[gid].i[53]; const u32 K50 = -((w1s >> (16 + 5)) & 1);
const u32 K54 = pws[gid].i[54]; const u32 K51 = -((w1s >> (16 + 4)) & 1);
const u32 K55 = pws[gid].i[55]; const u32 K52 = -((w1s >> (16 + 3)) & 1);
const u32 K53 = -((w1s >> (16 + 2)) & 1);
const u32 K54 = -((w1s >> (16 + 1)) & 1);
const u32 K55 = -((w1s >> (16 + 0)) & 1);
/**
* inner loop
*/
const u32 pc_pos = get_local_id (1); const u32 pc_pos = get_local_id (1);
@ -2185,65 +2192,72 @@ static void m03000s (__global pw_t *pws, __global kernel_rule_t *rules_buf, __gl
const u32 S63 = -((s1 >> 31) & 1); const u32 S63 = -((s1 >> 31) & 1);
/** /**
* keys * base
*/ */
const u32 K00 = pws[gid].i[ 0]; const u32 w0s = pws[gid].i[0];
const u32 K01 = pws[gid].i[ 1]; const u32 w1s = pws[gid].i[1];
const u32 K02 = pws[gid].i[ 2];
const u32 K03 = pws[gid].i[ 3]; const u32 K00 = -((w0s >> ( 0 + 7)) & 1);
const u32 K04 = pws[gid].i[ 4]; const u32 K01 = -((w0s >> ( 0 + 6)) & 1);
const u32 K05 = pws[gid].i[ 5]; const u32 K02 = -((w0s >> ( 0 + 5)) & 1);
const u32 K06 = pws[gid].i[ 6]; const u32 K03 = -((w0s >> ( 0 + 4)) & 1);
const u32 K07 = pws[gid].i[ 7]; const u32 K04 = -((w0s >> ( 0 + 3)) & 1);
const u32 K08 = pws[gid].i[ 8]; const u32 K05 = -((w0s >> ( 0 + 2)) & 1);
const u32 K09 = pws[gid].i[ 9]; const u32 K06 = -((w0s >> ( 0 + 1)) & 1);
const u32 K10 = pws[gid].i[10]; const u32 K07 = -((w0s >> ( 0 + 0)) & 1);
const u32 K11 = pws[gid].i[11]; const u32 K08 = -((w0s >> ( 8 + 7)) & 1);
const u32 K12 = pws[gid].i[12]; const u32 K09 = -((w0s >> ( 8 + 6)) & 1);
const u32 K13 = pws[gid].i[13]; const u32 K10 = -((w0s >> ( 8 + 5)) & 1);
const u32 K14 = pws[gid].i[14]; const u32 K11 = -((w0s >> ( 8 + 4)) & 1);
const u32 K15 = pws[gid].i[15]; const u32 K12 = -((w0s >> ( 8 + 3)) & 1);
const u32 K16 = pws[gid].i[16]; const u32 K13 = -((w0s >> ( 8 + 2)) & 1);
const u32 K17 = pws[gid].i[17]; const u32 K14 = -((w0s >> ( 8 + 1)) & 1);
const u32 K18 = pws[gid].i[18]; const u32 K15 = -((w0s >> ( 8 + 0)) & 1);
const u32 K19 = pws[gid].i[19]; const u32 K16 = -((w0s >> (16 + 7)) & 1);
const u32 K20 = pws[gid].i[20]; const u32 K17 = -((w0s >> (16 + 6)) & 1);
const u32 K21 = pws[gid].i[21]; const u32 K18 = -((w0s >> (16 + 5)) & 1);
const u32 K22 = pws[gid].i[22]; const u32 K19 = -((w0s >> (16 + 4)) & 1);
const u32 K23 = pws[gid].i[23]; const u32 K20 = -((w0s >> (16 + 3)) & 1);
const u32 K24 = pws[gid].i[24]; const u32 K21 = -((w0s >> (16 + 2)) & 1);
const u32 K25 = pws[gid].i[25]; const u32 K22 = -((w0s >> (16 + 1)) & 1);
const u32 K26 = pws[gid].i[26]; const u32 K23 = -((w0s >> (16 + 0)) & 1);
const u32 K27 = pws[gid].i[27]; const u32 K24 = -((w0s >> (24 + 7)) & 1);
const u32 K28 = pws[gid].i[28]; const u32 K25 = -((w0s >> (24 + 6)) & 1);
const u32 K29 = pws[gid].i[29]; const u32 K26 = -((w0s >> (24 + 5)) & 1);
const u32 K30 = pws[gid].i[30]; const u32 K27 = -((w0s >> (24 + 4)) & 1);
const u32 K31 = pws[gid].i[31]; const u32 K28 = -((w0s >> (24 + 3)) & 1);
const u32 K32 = pws[gid].i[32]; const u32 K29 = -((w0s >> (24 + 2)) & 1);
const u32 K33 = pws[gid].i[33]; const u32 K30 = -((w0s >> (24 + 1)) & 1);
const u32 K34 = pws[gid].i[34]; const u32 K31 = -((w0s >> (24 + 0)) & 1);
const u32 K35 = pws[gid].i[35]; const u32 K32 = -((w1s >> ( 0 + 7)) & 1);
const u32 K36 = pws[gid].i[36]; const u32 K33 = -((w1s >> ( 0 + 6)) & 1);
const u32 K37 = pws[gid].i[37]; const u32 K34 = -((w1s >> ( 0 + 5)) & 1);
const u32 K38 = pws[gid].i[38]; const u32 K35 = -((w1s >> ( 0 + 4)) & 1);
const u32 K39 = pws[gid].i[39]; const u32 K36 = -((w1s >> ( 0 + 3)) & 1);
const u32 K40 = pws[gid].i[40]; const u32 K37 = -((w1s >> ( 0 + 2)) & 1);
const u32 K41 = pws[gid].i[41]; const u32 K38 = -((w1s >> ( 0 + 1)) & 1);
const u32 K42 = pws[gid].i[42]; const u32 K39 = -((w1s >> ( 0 + 0)) & 1);
const u32 K43 = pws[gid].i[43]; const u32 K40 = -((w1s >> ( 8 + 7)) & 1);
const u32 K44 = pws[gid].i[44]; const u32 K41 = -((w1s >> ( 8 + 6)) & 1);
const u32 K45 = pws[gid].i[45]; const u32 K42 = -((w1s >> ( 8 + 5)) & 1);
const u32 K46 = pws[gid].i[46]; const u32 K43 = -((w1s >> ( 8 + 4)) & 1);
const u32 K47 = pws[gid].i[47]; const u32 K44 = -((w1s >> ( 8 + 3)) & 1);
const u32 K48 = pws[gid].i[48]; const u32 K45 = -((w1s >> ( 8 + 2)) & 1);
const u32 K49 = pws[gid].i[49]; const u32 K46 = -((w1s >> ( 8 + 1)) & 1);
const u32 K50 = pws[gid].i[50]; const u32 K47 = -((w1s >> ( 8 + 0)) & 1);
const u32 K51 = pws[gid].i[51]; const u32 K48 = -((w1s >> (16 + 7)) & 1);
const u32 K52 = pws[gid].i[52]; const u32 K49 = -((w1s >> (16 + 6)) & 1);
const u32 K53 = pws[gid].i[53]; const u32 K50 = -((w1s >> (16 + 5)) & 1);
const u32 K54 = pws[gid].i[54]; const u32 K51 = -((w1s >> (16 + 4)) & 1);
const u32 K55 = pws[gid].i[55]; const u32 K52 = -((w1s >> (16 + 3)) & 1);
const u32 K53 = -((w1s >> (16 + 2)) & 1);
const u32 K54 = -((w1s >> (16 + 1)) & 1);
const u32 K55 = -((w1s >> (16 + 0)) & 1);
/**
* inner loop
*/
const u32 pc_pos = get_local_id (1); const u32 pc_pos = get_local_id (1);
@ -2486,44 +2500,9 @@ static void m03000s (__global pw_t *pws, __global kernel_rule_t *rules_buf, __gl
} }
// //
// transpose bitslice base : easy because no overlapping buffers // transpose bitslice mod : attention race conditions, need different buffers for *in and *out
// mod : attention race conditions, need different buffers for *in and *out
// //
__kernel void m03000_tb (__global pw_t *pws)
{
const u32 gid = get_global_id (0);
const u32 w0s = pws[gid].i[0];
const u32 w1s = pws[gid].i[1];
for (int i = 0; i < 32; i += 8)
{
pws[gid].i[i + 0 + 0] = -((w0s >> (i + 7)) & 1);
pws[gid].i[i + 0 + 1] = -((w0s >> (i + 6)) & 1);
pws[gid].i[i + 0 + 2] = -((w0s >> (i + 5)) & 1);
pws[gid].i[i + 0 + 3] = -((w0s >> (i + 4)) & 1);
pws[gid].i[i + 0 + 4] = -((w0s >> (i + 3)) & 1);
pws[gid].i[i + 0 + 5] = -((w0s >> (i + 2)) & 1);
pws[gid].i[i + 0 + 6] = -((w0s >> (i + 1)) & 1);
pws[gid].i[i + 0 + 7] = -((w0s >> (i + 0)) & 1);
}
for (int i = 0; i < 24; i += 8)
{
pws[gid].i[i + 32 + 0] = -((w1s >> (i + 7)) & 1);
pws[gid].i[i + 32 + 1] = -((w1s >> (i + 6)) & 1);
pws[gid].i[i + 32 + 2] = -((w1s >> (i + 5)) & 1);
pws[gid].i[i + 32 + 3] = -((w1s >> (i + 4)) & 1);
pws[gid].i[i + 32 + 4] = -((w1s >> (i + 3)) & 1);
pws[gid].i[i + 32 + 5] = -((w1s >> (i + 2)) & 1);
pws[gid].i[i + 32 + 6] = -((w1s >> (i + 1)) & 1);
pws[gid].i[i + 32 + 7] = -((w1s >> (i + 0)) & 1);
}
}
__kernel void m03000_tm (__global u32 *mod, __global bs_word_t *words_buf_r) __kernel void m03000_tm (__global u32 *mod, __global bs_word_t *words_buf_r)
{ {
const u32 gid = get_global_id (0); const u32 gid = get_global_id (0);

View File

@ -950,7 +950,6 @@ struct __hc_device_param
cl_kernel kernel_mp_l; cl_kernel kernel_mp_l;
cl_kernel kernel_mp_r; cl_kernel kernel_mp_r;
cl_kernel kernel_amp; cl_kernel kernel_amp;
cl_kernel kernel_tb;
cl_kernel kernel_tm; cl_kernel kernel_tm;
cl_kernel kernel_weak; cl_kernel kernel_weak;
@ -1000,7 +999,6 @@ struct __hc_device_param
void *kernel_params_mp_r[PARAMCNT]; void *kernel_params_mp_r[PARAMCNT];
void *kernel_params_mp_l[PARAMCNT]; void *kernel_params_mp_l[PARAMCNT];
void *kernel_params_amp[PARAMCNT]; void *kernel_params_amp[PARAMCNT];
void *kernel_params_tb[PARAMCNT];
void *kernel_params_tm[PARAMCNT]; void *kernel_params_tm[PARAMCNT];
u32 kernel_params_buf32[PARAMCNT]; u32 kernel_params_buf32[PARAMCNT];

View File

@ -2565,30 +2565,6 @@ static void run_kernel_mp (const uint kern_run, hc_device_param_t *device_param,
hc_clFinish (data.ocl, device_param->command_queue); hc_clFinish (data.ocl, device_param->command_queue);
} }
static void run_kernel_tb (hc_device_param_t *device_param, const uint num)
{
uint num_elements = num;
uint kernel_threads = device_param->kernel_threads;
while (num_elements % kernel_threads) num_elements++;
cl_kernel kernel = device_param->kernel_tb;
size_t workgroup_size = 0;
hc_clGetKernelWorkGroupInfo (data.ocl, kernel, device_param->device, CL_KERNEL_WORK_GROUP_SIZE, sizeof (size_t), &workgroup_size, NULL);
if (kernel_threads > workgroup_size) kernel_threads = workgroup_size;
const size_t global_work_size[3] = { num_elements, 1, 1 };
const size_t local_work_size[3] = { kernel_threads, 1, 1 };
hc_clEnqueueNDRangeKernel (data.ocl, device_param->command_queue, kernel, 1, NULL, global_work_size, local_work_size, 0, NULL, NULL);
hc_clFlush (data.ocl, device_param->command_queue);
hc_clFinish (data.ocl, device_param->command_queue);
}
static void run_kernel_tm (hc_device_param_t *device_param) static void run_kernel_tm (hc_device_param_t *device_param)
{ {
const uint num_elements = 1024; // fixed const uint num_elements = 1024; // fixed
@ -3114,16 +3090,6 @@ static void run_cracker (hc_device_param_t *device_param, const uint pws_cnt)
+ device_param->kernel_params_mp_l_buf32[5]; + device_param->kernel_params_mp_l_buf32[5];
} }
// bitslice optimization stuff
if (data.attack_mode == ATTACK_MODE_BF)
{
if (data.opts_type & OPTS_TYPE_PT_BITSLICE)
{
run_kernel_tb (device_param, pws_cnt);
}
}
// iteration type // iteration type
uint innerloop_step = 0; uint innerloop_step = 0;
@ -14344,8 +14310,6 @@ int main (int argc, char **argv)
device_param->kernel_params_amp[5] = &device_param->kernel_params_amp_buf32[5]; device_param->kernel_params_amp[5] = &device_param->kernel_params_amp_buf32[5];
device_param->kernel_params_amp[6] = &device_param->kernel_params_amp_buf32[6]; device_param->kernel_params_amp[6] = &device_param->kernel_params_amp_buf32[6];
device_param->kernel_params_tb[0] = &device_param->d_pws_buf;
device_param->kernel_params_tm[0] = &device_param->d_bfs_c; device_param->kernel_params_tm[0] = &device_param->d_bfs_c;
device_param->kernel_params_tm[1] = &device_param->d_tm_c; device_param->kernel_params_tm[1] = &device_param->d_tm_c;
@ -14390,10 +14354,6 @@ int main (int argc, char **argv)
{ {
if (opts_type & OPTS_TYPE_PT_BITSLICE) if (opts_type & OPTS_TYPE_PT_BITSLICE)
{ {
snprintf (kernel_name, sizeof (kernel_name) - 1, "m%05d_tb", kern_type);
device_param->kernel_tb = hc_clCreateKernel (data.ocl, device_param->program, kernel_name);
snprintf (kernel_name, sizeof (kernel_name) - 1, "m%05d_tm", kern_type); snprintf (kernel_name, sizeof (kernel_name) - 1, "m%05d_tm", kern_type);
device_param->kernel_tm = hc_clCreateKernel (data.ocl, device_param->program, kernel_name); device_param->kernel_tm = hc_clCreateKernel (data.ocl, device_param->program, kernel_name);
@ -14456,8 +14416,6 @@ int main (int argc, char **argv)
if (opts_type & OPTS_TYPE_PT_BITSLICE) if (opts_type & OPTS_TYPE_PT_BITSLICE)
{ {
hc_clSetKernelArg (data.ocl, device_param->kernel_tb, 0, sizeof (cl_mem), device_param->kernel_params_tb[0]);
hc_clSetKernelArg (data.ocl, device_param->kernel_tm, 0, sizeof (cl_mem), device_param->kernel_params_tm[0]); hc_clSetKernelArg (data.ocl, device_param->kernel_tm, 0, sizeof (cl_mem), device_param->kernel_params_tm[0]);
hc_clSetKernelArg (data.ocl, device_param->kernel_tm, 1, sizeof (cl_mem), device_param->kernel_params_tm[1]); hc_clSetKernelArg (data.ocl, device_param->kernel_tm, 1, sizeof (cl_mem), device_param->kernel_params_tm[1]);
} }
@ -16759,7 +16717,6 @@ int main (int argc, char **argv)
if (device_param->kernel_mp) hc_clReleaseKernel (data.ocl, device_param->kernel_mp); if (device_param->kernel_mp) hc_clReleaseKernel (data.ocl, device_param->kernel_mp);
if (device_param->kernel_mp_l) hc_clReleaseKernel (data.ocl, device_param->kernel_mp_l); if (device_param->kernel_mp_l) hc_clReleaseKernel (data.ocl, device_param->kernel_mp_l);
if (device_param->kernel_mp_r) hc_clReleaseKernel (data.ocl, device_param->kernel_mp_r); if (device_param->kernel_mp_r) hc_clReleaseKernel (data.ocl, device_param->kernel_mp_r);
if (device_param->kernel_tb) hc_clReleaseKernel (data.ocl, device_param->kernel_tb);
if (device_param->kernel_tm) hc_clReleaseKernel (data.ocl, device_param->kernel_tm); if (device_param->kernel_tm) hc_clReleaseKernel (data.ocl, device_param->kernel_tm);
if (device_param->kernel_amp) hc_clReleaseKernel (data.ocl, device_param->kernel_amp); if (device_param->kernel_amp) hc_clReleaseKernel (data.ocl, device_param->kernel_amp);