Converted -m 400 to password length 256 support

Something weird happend here, read on!

I've expected some performance drop because this algorithm is using the password data itself inside the iteration loop.
That is different to PBKDF2, which I've converted in mode 2100 before and which did not show any performance as expected.

So after I've finished converting this kernel and testing everything works using the unit test, I did some benchmarks to see how much the
performance drop is.

On my 750ti, the speed dropped (minimal) from 981kH/s -> 948kH/s, that's mostly because of the SIMD support i had to drop.
If I'd turn off the SIMD support in the original, the drop would be even less, that us 967kH/s -> 948kH/s which is a bit of a more reasable
comparison in case we just want to rate the drop that is actually caused by the code change itself.

The drop was acceptable for me, so I've decided to check on my GTX1080.Now the weird thing: The performance increased from 6619kH/s to
7134kH/s!!

When I gave it a second thought, it turned out that:

1. The GTX1080 is a scalar GPU so it wont suffer from the drop of the SIMD code as the 750ti did
2. There's a change in how the global data (password) is read into the registers, it reads only that amount of data it actually needs by using
the pw_len information
3. I've added a barrier for CLK_GLOBAL_MEM_FENCE as it turned out to increase the performance in the 750ti

Note that this kernel is now branched into password length < 40 and larger.

There's a large drop on performance where SIMD is really important, for example CPU.

We could workaround this issue by sticking to SIMD inside the length < 40 branch, but I don't know yet how this can be done efficiently.
pull/1284/head
jsteube 7 years ago
parent 0787b91327
commit 71d4926afa

@ -0,0 +1,707 @@
// important notes on this:
// input buf unused bytes needs to be set to zero
// input buf need to be in algorithm native byte order (md5 = LE, sha1 = BE, etc)
// input buf need to be 64 byte aligned when usin md5_update()
typedef struct md5_ctx
{
u32 h[4];
u32 w0[4];
u32 w1[4];
u32 w2[4];
u32 w3[4];
int len;
} md5_ctx_t;
void md5_transform (const u32 w0[4], const u32 w1[4], const u32 w2[4], const u32 w3[4], u32 digest[4])
{
u32 a = digest[0];
u32 b = digest[1];
u32 c = digest[2];
u32 d = digest[3];
u32 w0_t = w0[0];
u32 w1_t = w0[1];
u32 w2_t = w0[2];
u32 w3_t = w0[3];
u32 w4_t = w1[0];
u32 w5_t = w1[1];
u32 w6_t = w1[2];
u32 w7_t = w1[3];
u32 w8_t = w2[0];
u32 w9_t = w2[1];
u32 wa_t = w2[2];
u32 wb_t = w2[3];
u32 wc_t = w3[0];
u32 wd_t = w3[1];
u32 we_t = w3[2];
u32 wf_t = w3[3];
MD5_STEP_S (MD5_Fo, a, b, c, d, w0_t, MD5C00, MD5S00);
MD5_STEP_S (MD5_Fo, d, a, b, c, w1_t, MD5C01, MD5S01);
MD5_STEP_S (MD5_Fo, c, d, a, b, w2_t, MD5C02, MD5S02);
MD5_STEP_S (MD5_Fo, b, c, d, a, w3_t, MD5C03, MD5S03);
MD5_STEP_S (MD5_Fo, a, b, c, d, w4_t, MD5C04, MD5S00);
MD5_STEP_S (MD5_Fo, d, a, b, c, w5_t, MD5C05, MD5S01);
MD5_STEP_S (MD5_Fo, c, d, a, b, w6_t, MD5C06, MD5S02);
MD5_STEP_S (MD5_Fo, b, c, d, a, w7_t, MD5C07, MD5S03);
MD5_STEP_S (MD5_Fo, a, b, c, d, w8_t, MD5C08, MD5S00);
MD5_STEP_S (MD5_Fo, d, a, b, c, w9_t, MD5C09, MD5S01);
MD5_STEP_S (MD5_Fo, c, d, a, b, wa_t, MD5C0a, MD5S02);
MD5_STEP_S (MD5_Fo, b, c, d, a, wb_t, MD5C0b, MD5S03);
MD5_STEP_S (MD5_Fo, a, b, c, d, wc_t, MD5C0c, MD5S00);
MD5_STEP_S (MD5_Fo, d, a, b, c, wd_t, MD5C0d, MD5S01);
MD5_STEP_S (MD5_Fo, c, d, a, b, we_t, MD5C0e, MD5S02);
MD5_STEP_S (MD5_Fo, b, c, d, a, wf_t, MD5C0f, MD5S03);
MD5_STEP_S (MD5_Go, a, b, c, d, w1_t, MD5C10, MD5S10);
MD5_STEP_S (MD5_Go, d, a, b, c, w6_t, MD5C11, MD5S11);
MD5_STEP_S (MD5_Go, c, d, a, b, wb_t, MD5C12, MD5S12);
MD5_STEP_S (MD5_Go, b, c, d, a, w0_t, MD5C13, MD5S13);
MD5_STEP_S (MD5_Go, a, b, c, d, w5_t, MD5C14, MD5S10);
MD5_STEP_S (MD5_Go, d, a, b, c, wa_t, MD5C15, MD5S11);
MD5_STEP_S (MD5_Go, c, d, a, b, wf_t, MD5C16, MD5S12);
MD5_STEP_S (MD5_Go, b, c, d, a, w4_t, MD5C17, MD5S13);
MD5_STEP_S (MD5_Go, a, b, c, d, w9_t, MD5C18, MD5S10);
MD5_STEP_S (MD5_Go, d, a, b, c, we_t, MD5C19, MD5S11);
MD5_STEP_S (MD5_Go, c, d, a, b, w3_t, MD5C1a, MD5S12);
MD5_STEP_S (MD5_Go, b, c, d, a, w8_t, MD5C1b, MD5S13);
MD5_STEP_S (MD5_Go, a, b, c, d, wd_t, MD5C1c, MD5S10);
MD5_STEP_S (MD5_Go, d, a, b, c, w2_t, MD5C1d, MD5S11);
MD5_STEP_S (MD5_Go, c, d, a, b, w7_t, MD5C1e, MD5S12);
MD5_STEP_S (MD5_Go, b, c, d, a, wc_t, MD5C1f, MD5S13);
MD5_STEP_S (MD5_H , a, b, c, d, w5_t, MD5C20, MD5S20);
MD5_STEP_S (MD5_H , d, a, b, c, w8_t, MD5C21, MD5S21);
MD5_STEP_S (MD5_H , c, d, a, b, wb_t, MD5C22, MD5S22);
MD5_STEP_S (MD5_H , b, c, d, a, we_t, MD5C23, MD5S23);
MD5_STEP_S (MD5_H , a, b, c, d, w1_t, MD5C24, MD5S20);
MD5_STEP_S (MD5_H , d, a, b, c, w4_t, MD5C25, MD5S21);
MD5_STEP_S (MD5_H , c, d, a, b, w7_t, MD5C26, MD5S22);
MD5_STEP_S (MD5_H , b, c, d, a, wa_t, MD5C27, MD5S23);
MD5_STEP_S (MD5_H , a, b, c, d, wd_t, MD5C28, MD5S20);
MD5_STEP_S (MD5_H , d, a, b, c, w0_t, MD5C29, MD5S21);
MD5_STEP_S (MD5_H , c, d, a, b, w3_t, MD5C2a, MD5S22);
MD5_STEP_S (MD5_H , b, c, d, a, w6_t, MD5C2b, MD5S23);
MD5_STEP_S (MD5_H , a, b, c, d, w9_t, MD5C2c, MD5S20);
MD5_STEP_S (MD5_H , d, a, b, c, wc_t, MD5C2d, MD5S21);
MD5_STEP_S (MD5_H , c, d, a, b, wf_t, MD5C2e, MD5S22);
MD5_STEP_S (MD5_H , b, c, d, a, w2_t, MD5C2f, MD5S23);
MD5_STEP_S (MD5_I , a, b, c, d, w0_t, MD5C30, MD5S30);
MD5_STEP_S (MD5_I , d, a, b, c, w7_t, MD5C31, MD5S31);
MD5_STEP_S (MD5_I , c, d, a, b, we_t, MD5C32, MD5S32);
MD5_STEP_S (MD5_I , b, c, d, a, w5_t, MD5C33, MD5S33);
MD5_STEP_S (MD5_I , a, b, c, d, wc_t, MD5C34, MD5S30);
MD5_STEP_S (MD5_I , d, a, b, c, w3_t, MD5C35, MD5S31);
MD5_STEP_S (MD5_I , c, d, a, b, wa_t, MD5C36, MD5S32);
MD5_STEP_S (MD5_I , b, c, d, a, w1_t, MD5C37, MD5S33);
MD5_STEP_S (MD5_I , a, b, c, d, w8_t, MD5C38, MD5S30);
MD5_STEP_S (MD5_I , d, a, b, c, wf_t, MD5C39, MD5S31);
MD5_STEP_S (MD5_I , c, d, a, b, w6_t, MD5C3a, MD5S32);
MD5_STEP_S (MD5_I , b, c, d, a, wd_t, MD5C3b, MD5S33);
MD5_STEP_S (MD5_I , a, b, c, d, w4_t, MD5C3c, MD5S30);
MD5_STEP_S (MD5_I , d, a, b, c, wb_t, MD5C3d, MD5S31);
MD5_STEP_S (MD5_I , c, d, a, b, w2_t, MD5C3e, MD5S32);
MD5_STEP_S (MD5_I , b, c, d, a, w9_t, MD5C3f, MD5S33);
digest[0] += a;
digest[1] += b;
digest[2] += c;
digest[3] += d;
}
void md5_init (md5_ctx_t *ctx)
{
ctx->h[0] = MD5M_A;
ctx->h[1] = MD5M_B;
ctx->h[2] = MD5M_C;
ctx->h[3] = MD5M_D;
ctx->w0[0] = 0;
ctx->w0[1] = 0;
ctx->w0[2] = 0;
ctx->w0[3] = 0;
ctx->w1[0] = 0;
ctx->w1[1] = 0;
ctx->w1[2] = 0;
ctx->w1[3] = 0;
ctx->w2[0] = 0;
ctx->w2[1] = 0;
ctx->w2[2] = 0;
ctx->w2[3] = 0;
ctx->w3[0] = 0;
ctx->w3[1] = 0;
ctx->w3[2] = 0;
ctx->w3[3] = 0;
ctx->len = 0;
}
void md5_update_64 (md5_ctx_t *ctx, u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const int len)
{
const int pos = ctx->len & 63;
ctx->len += len;
if ((pos + len) < 64)
{
switch_buffer_by_offset_le_S (w0, w1, w2, w3, pos);
ctx->w0[0] |= w0[0];
ctx->w0[1] |= w0[1];
ctx->w0[2] |= w0[2];
ctx->w0[3] |= w0[3];
ctx->w1[0] |= w1[0];
ctx->w1[1] |= w1[1];
ctx->w1[2] |= w1[2];
ctx->w1[3] |= w1[3];
ctx->w2[0] |= w2[0];
ctx->w2[1] |= w2[1];
ctx->w2[2] |= w2[2];
ctx->w2[3] |= w2[3];
ctx->w3[0] |= w3[0];
ctx->w3[1] |= w3[1];
ctx->w3[2] |= w3[2];
ctx->w3[3] |= w3[3];
}
else
{
u32 c0[4] = { 0 };
u32 c1[4] = { 0 };
u32 c2[4] = { 0 };
u32 c3[4] = { 0 };
switch_buffer_by_offset_carry_le_S (w0, w1, w2, w3, c0, c1, c2, c3, pos);
ctx->w0[0] |= w0[0];
ctx->w0[1] |= w0[1];
ctx->w0[2] |= w0[2];
ctx->w0[3] |= w0[3];
ctx->w1[0] |= w1[0];
ctx->w1[1] |= w1[1];
ctx->w1[2] |= w1[2];
ctx->w1[3] |= w1[3];
ctx->w2[0] |= w2[0];
ctx->w2[1] |= w2[1];
ctx->w2[2] |= w2[2];
ctx->w2[3] |= w2[3];
ctx->w3[0] |= w3[0];
ctx->w3[1] |= w3[1];
ctx->w3[2] |= w3[2];
ctx->w3[3] |= w3[3];
md5_transform (ctx->w0, ctx->w1, ctx->w2, ctx->w3, ctx->h);
ctx->w0[0] = c0[0];
ctx->w0[1] = c0[1];
ctx->w0[2] = c0[2];
ctx->w0[3] = c0[3];
ctx->w1[0] = c1[0];
ctx->w1[1] = c1[1];
ctx->w1[2] = c1[2];
ctx->w1[3] = c1[3];
ctx->w2[0] = c2[0];
ctx->w2[1] = c2[1];
ctx->w2[2] = c2[2];
ctx->w2[3] = c2[3];
ctx->w3[0] = c3[0];
ctx->w3[1] = c3[1];
ctx->w3[2] = c3[2];
ctx->w3[3] = c3[3];
}
}
void md5_update (md5_ctx_t *ctx, const u32 *w, const int len)
{
u32 w0[4];
u32 w1[4];
u32 w2[4];
u32 w3[4];
int pos1;
int pos4;
for (pos1 = 0, pos4 = 0; pos1 < len - 64; pos1 += 64, pos4 += 16)
{
w0[0] = w[pos4 + 0];
w0[1] = w[pos4 + 1];
w0[2] = w[pos4 + 2];
w0[3] = w[pos4 + 3];
w1[0] = w[pos4 + 4];
w1[1] = w[pos4 + 5];
w1[2] = w[pos4 + 6];
w1[3] = w[pos4 + 7];
w2[0] = w[pos4 + 8];
w2[1] = w[pos4 + 9];
w2[2] = w[pos4 + 10];
w2[3] = w[pos4 + 11];
w3[0] = w[pos4 + 12];
w3[1] = w[pos4 + 13];
w3[2] = w[pos4 + 14];
w3[3] = w[pos4 + 15];
md5_update_64 (ctx, w0, w1, w2, w3, 64);
}
w0[0] = w[pos4 + 0];
w0[1] = w[pos4 + 1];
w0[2] = w[pos4 + 2];
w0[3] = w[pos4 + 3];
w1[0] = w[pos4 + 4];
w1[1] = w[pos4 + 5];
w1[2] = w[pos4 + 6];
w1[3] = w[pos4 + 7];
w2[0] = w[pos4 + 8];
w2[1] = w[pos4 + 9];
w2[2] = w[pos4 + 10];
w2[3] = w[pos4 + 11];
w3[0] = w[pos4 + 12];
w3[1] = w[pos4 + 13];
w3[2] = w[pos4 + 14];
w3[3] = w[pos4 + 15];
md5_update_64 (ctx, w0, w1, w2, w3, len - pos1);
}
void md5_update_global (md5_ctx_t *ctx, const __global u32 *w, const int len)
{
u32 w0[4];
u32 w1[4];
u32 w2[4];
u32 w3[4];
int pos1;
int pos4;
for (pos1 = 0, pos4 = 0; pos1 < len - 64; pos1 += 64, pos4 += 16)
{
w0[0] = w[pos4 + 0];
w0[1] = w[pos4 + 1];
w0[2] = w[pos4 + 2];
w0[3] = w[pos4 + 3];
w1[0] = w[pos4 + 4];
w1[1] = w[pos4 + 5];
w1[2] = w[pos4 + 6];
w1[3] = w[pos4 + 7];
w2[0] = w[pos4 + 8];
w2[1] = w[pos4 + 9];
w2[2] = w[pos4 + 10];
w2[3] = w[pos4 + 11];
w3[0] = w[pos4 + 12];
w3[1] = w[pos4 + 13];
w3[2] = w[pos4 + 14];
w3[3] = w[pos4 + 15];
md5_update_64 (ctx, w0, w1, w2, w3, 64);
}
w0[0] = w[pos4 + 0];
w0[1] = w[pos4 + 1];
w0[2] = w[pos4 + 2];
w0[3] = w[pos4 + 3];
w1[0] = w[pos4 + 4];
w1[1] = w[pos4 + 5];
w1[2] = w[pos4 + 6];
w1[3] = w[pos4 + 7];
w2[0] = w[pos4 + 8];
w2[1] = w[pos4 + 9];
w2[2] = w[pos4 + 10];
w2[3] = w[pos4 + 11];
w3[0] = w[pos4 + 12];
w3[1] = w[pos4 + 13];
w3[2] = w[pos4 + 14];
w3[3] = w[pos4 + 15];
md5_update_64 (ctx, w0, w1, w2, w3, len - pos1);
}
void md5_update_global_utf16le (md5_ctx_t *ctx, const __global u32 *w, const int len)
{
u32 w0[4];
u32 w1[4];
u32 w2[4];
u32 w3[4];
int pos1;
int pos4;
for (pos1 = 0, pos4 = 0; pos1 < len - 32; pos1 += 32, pos4 += 8)
{
w0[0] = w[pos4 + 0];
w0[1] = w[pos4 + 1];
w0[2] = w[pos4 + 2];
w0[3] = w[pos4 + 3];
w1[0] = w[pos4 + 4];
w1[1] = w[pos4 + 5];
w1[2] = w[pos4 + 6];
w1[3] = w[pos4 + 7];
make_utf16le_S (w1, w2, w3);
make_utf16le_S (w0, w0, w1);
md5_update_64 (ctx, w0, w1, w2, w3, 32 * 2);
}
w0[0] = w[pos4 + 0];
w0[1] = w[pos4 + 1];
w0[2] = w[pos4 + 2];
w0[3] = w[pos4 + 3];
w1[0] = w[pos4 + 4];
w1[1] = w[pos4 + 5];
w1[2] = w[pos4 + 6];
w1[3] = w[pos4 + 7];
make_utf16le_S (w1, w2, w3);
make_utf16le_S (w0, w0, w1);
md5_update_64 (ctx, w0, w1, w2, w3, (len - pos1) * 2);
}
void md5_final (md5_ctx_t *ctx)
{
const int pos = ctx->len & 63;
append_0x80_4x4_S (ctx->w0, ctx->w1, ctx->w2, ctx->w3, pos);
if (pos >= 56)
{
md5_transform (ctx->w0, ctx->w1, ctx->w2, ctx->w3, ctx->h);
ctx->w0[0] = 0;
ctx->w0[1] = 0;
ctx->w0[2] = 0;
ctx->w0[3] = 0;
ctx->w1[0] = 0;
ctx->w1[1] = 0;
ctx->w1[2] = 0;
ctx->w1[3] = 0;
ctx->w2[0] = 0;
ctx->w2[1] = 0;
ctx->w2[2] = 0;
ctx->w2[3] = 0;
ctx->w3[0] = 0;
ctx->w3[1] = 0;
ctx->w3[2] = 0;
ctx->w3[3] = 0;
}
ctx->w3[2] = ctx->len * 8;
ctx->w3[3] = 0;
md5_transform (ctx->w0, ctx->w1, ctx->w2, ctx->w3, ctx->h);
}
void md5_optimize_max_length (md5_ctx_t *ctx, const int bits)
{
ctx->len &= (1 << bits) - 1;
}
// while input buf can be a vector datatype, the length of the different elements can not
typedef struct md5_ctx_vector
{
u32x h[4];
u32x w0[4];
u32x w1[4];
u32x w2[4];
u32x w3[4];
int len;
} md5_ctx_vector_t;
void md5_transform_vector (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u32x digest[4])
{
u32x a = digest[0];
u32x b = digest[1];
u32x c = digest[2];
u32x d = digest[3];
u32x w0_t = w0[0];
u32x w1_t = w0[1];
u32x w2_t = w0[2];
u32x w3_t = w0[3];
u32x w4_t = w1[0];
u32x w5_t = w1[1];
u32x w6_t = w1[2];
u32x w7_t = w1[3];
u32x w8_t = w2[0];
u32x w9_t = w2[1];
u32x wa_t = w2[2];
u32x wb_t = w2[3];
u32x wc_t = w3[0];
u32x wd_t = w3[1];
u32x we_t = w3[2];
u32x wf_t = w3[3];
MD5_STEP (MD5_Fo, a, b, c, d, w0_t, MD5C00, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w1_t, MD5C01, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, w2_t, MD5C02, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, w3_t, MD5C03, MD5S03);
MD5_STEP (MD5_Fo, a, b, c, d, w4_t, MD5C04, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w5_t, MD5C05, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, w6_t, MD5C06, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, w7_t, MD5C07, MD5S03);
MD5_STEP (MD5_Fo, a, b, c, d, w8_t, MD5C08, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w9_t, MD5C09, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, wa_t, MD5C0a, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, wb_t, MD5C0b, MD5S03);
MD5_STEP (MD5_Fo, a, b, c, d, wc_t, MD5C0c, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, wd_t, MD5C0d, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, we_t, MD5C0e, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, wf_t, MD5C0f, MD5S03);
MD5_STEP (MD5_Go, a, b, c, d, w1_t, MD5C10, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, w6_t, MD5C11, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, wb_t, MD5C12, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w0_t, MD5C13, MD5S13);
MD5_STEP (MD5_Go, a, b, c, d, w5_t, MD5C14, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, wa_t, MD5C15, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, wf_t, MD5C16, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w4_t, MD5C17, MD5S13);
MD5_STEP (MD5_Go, a, b, c, d, w9_t, MD5C18, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, we_t, MD5C19, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, w3_t, MD5C1a, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w8_t, MD5C1b, MD5S13);
MD5_STEP (MD5_Go, a, b, c, d, wd_t, MD5C1c, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, w2_t, MD5C1d, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, w7_t, MD5C1e, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, wc_t, MD5C1f, MD5S13);
MD5_STEP (MD5_H , a, b, c, d, w5_t, MD5C20, MD5S20);
MD5_STEP (MD5_H , d, a, b, c, w8_t, MD5C21, MD5S21);
MD5_STEP (MD5_H , c, d, a, b, wb_t, MD5C22, MD5S22);
MD5_STEP (MD5_H , b, c, d, a, we_t, MD5C23, MD5S23);
MD5_STEP (MD5_H , a, b, c, d, w1_t, MD5C24, MD5S20);
MD5_STEP (MD5_H , d, a, b, c, w4_t, MD5C25, MD5S21);
MD5_STEP (MD5_H , c, d, a, b, w7_t, MD5C26, MD5S22);
MD5_STEP (MD5_H , b, c, d, a, wa_t, MD5C27, MD5S23);
MD5_STEP (MD5_H , a, b, c, d, wd_t, MD5C28, MD5S20);
MD5_STEP (MD5_H , d, a, b, c, w0_t, MD5C29, MD5S21);
MD5_STEP (MD5_H , c, d, a, b, w3_t, MD5C2a, MD5S22);
MD5_STEP (MD5_H , b, c, d, a, w6_t, MD5C2b, MD5S23);
MD5_STEP (MD5_H , a, b, c, d, w9_t, MD5C2c, MD5S20);
MD5_STEP (MD5_H , d, a, b, c, wc_t, MD5C2d, MD5S21);
MD5_STEP (MD5_H , c, d, a, b, wf_t, MD5C2e, MD5S22);
MD5_STEP (MD5_H , b, c, d, a, w2_t, MD5C2f, MD5S23);
MD5_STEP (MD5_I , a, b, c, d, w0_t, MD5C30, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, w7_t, MD5C31, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, we_t, MD5C32, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w5_t, MD5C33, MD5S33);
MD5_STEP (MD5_I , a, b, c, d, wc_t, MD5C34, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, w3_t, MD5C35, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, wa_t, MD5C36, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w1_t, MD5C37, MD5S33);
MD5_STEP (MD5_I , a, b, c, d, w8_t, MD5C38, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, wf_t, MD5C39, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, w6_t, MD5C3a, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, wd_t, MD5C3b, MD5S33);
MD5_STEP (MD5_I , a, b, c, d, w4_t, MD5C3c, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, wb_t, MD5C3d, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, w2_t, MD5C3e, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w9_t, MD5C3f, MD5S33);
digest[0] += a;
digest[1] += b;
digest[2] += c;
digest[3] += d;
}
void md5_init_vector (md5_ctx_vector_t *ctx)
{
ctx->h[0] = MD5M_A;
ctx->h[1] = MD5M_B;
ctx->h[2] = MD5M_C;
ctx->h[3] = MD5M_D;
ctx->w0[0] = 0;
ctx->w0[1] = 0;
ctx->w0[2] = 0;
ctx->w0[3] = 0;
ctx->w1[0] = 0;
ctx->w1[1] = 0;
ctx->w1[2] = 0;
ctx->w1[3] = 0;
ctx->w2[0] = 0;
ctx->w2[1] = 0;
ctx->w2[2] = 0;
ctx->w2[3] = 0;
ctx->w3[0] = 0;
ctx->w3[1] = 0;
ctx->w3[2] = 0;
ctx->w3[3] = 0;
ctx->len = 0;
}
void md5_update_vector_64 (md5_ctx_vector_t *ctx, u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const int len)
{
const int pos = ctx->len & 63;
ctx->len += len;
if ((pos + len) < 64)
{
switch_buffer_by_offset_le (w0, w1, w2, w3, pos);
ctx->w0[0] |= w0[0];
ctx->w0[1] |= w0[1];
ctx->w0[2] |= w0[2];
ctx->w0[3] |= w0[3];
ctx->w1[0] |= w1[0];
ctx->w1[1] |= w1[1];
ctx->w1[2] |= w1[2];
ctx->w1[3] |= w1[3];
ctx->w2[0] |= w2[0];
ctx->w2[1] |= w2[1];
ctx->w2[2] |= w2[2];
ctx->w2[3] |= w2[3];
ctx->w3[0] |= w3[0];
ctx->w3[1] |= w3[1];
ctx->w3[2] |= w3[2];
ctx->w3[3] |= w3[3];
}
else
{
u32x c0[4] = { 0 };
u32x c1[4] = { 0 };
u32x c2[4] = { 0 };
u32x c3[4] = { 0 };
switch_buffer_by_offset_carry_le (w0, w1, w2, w3, c0, c1, c2, c3, pos);
ctx->w0[0] |= w0[0];
ctx->w0[1] |= w0[1];
ctx->w0[2] |= w0[2];
ctx->w0[3] |= w0[3];
ctx->w1[0] |= w1[0];
ctx->w1[1] |= w1[1];
ctx->w1[2] |= w1[2];
ctx->w1[3] |= w1[3];
ctx->w2[0] |= w2[0];
ctx->w2[1] |= w2[1];
ctx->w2[2] |= w2[2];
ctx->w2[3] |= w2[3];
ctx->w3[0] |= w3[0];
ctx->w3[1] |= w3[1];
ctx->w3[2] |= w3[2];
ctx->w3[3] |= w3[3];
md5_transform_vector (ctx->w0, ctx->w1, ctx->w2, ctx->w3, ctx->h);
ctx->w0[0] = c0[0];
ctx->w0[1] = c0[1];
ctx->w0[2] = c0[2];
ctx->w0[3] = c0[3];
ctx->w1[0] = c1[0];
ctx->w1[1] = c1[1];
ctx->w1[2] = c1[2];
ctx->w1[3] = c1[3];
ctx->w2[0] = c2[0];
ctx->w2[1] = c2[1];
ctx->w2[2] = c2[2];
ctx->w2[3] = c2[3];
ctx->w3[0] = c3[0];
ctx->w3[1] = c3[1];
ctx->w3[2] = c3[2];
ctx->w3[3] = c3[3];
}
}
void md5_update_vector (md5_ctx_vector_t *ctx, const u32x *w, const int len)
{
u32x w0[4];
u32x w1[4];
u32x w2[4];
u32x w3[4];
int pos1;
int pos4;
for (pos1 = 0, pos4 = 0; pos1 < len - 64; pos1 += 64, pos4 += 16)
{
w0[0] = w[pos4 + 0];
w0[1] = w[pos4 + 1];
w0[2] = w[pos4 + 2];
w0[3] = w[pos4 + 3];
w1[0] = w[pos4 + 4];
w1[1] = w[pos4 + 5];
w1[2] = w[pos4 + 6];
w1[3] = w[pos4 + 7];
w2[0] = w[pos4 + 8];
w2[1] = w[pos4 + 9];
w2[2] = w[pos4 + 10];
w2[3] = w[pos4 + 11];
w3[0] = w[pos4 + 12];
w3[1] = w[pos4 + 13];
w3[2] = w[pos4 + 14];
w3[3] = w[pos4 + 15];
md5_update_vector_64 (ctx, w0, w1, w2, w3, 64);
}
w0[0] = w[pos4 + 0];
w0[1] = w[pos4 + 1];
w0[2] = w[pos4 + 2];
w0[3] = w[pos4 + 3];
w1[0] = w[pos4 + 4];
w1[1] = w[pos4 + 5];
w1[2] = w[pos4 + 6];
w1[3] = w[pos4 + 7];
w2[0] = w[pos4 + 8];
w2[1] = w[pos4 + 9];
w2[2] = w[pos4 + 10];
w2[3] = w[pos4 + 11];
w3[0] = w[pos4 + 12];
w3[1] = w[pos4 + 13];
w3[2] = w[pos4 + 14];
w3[3] = w[pos4 + 15];
md5_update_vector_64 (ctx, w0, w1, w2, w3, len - pos1);
}
void md5_final_vector (md5_ctx_vector_t *ctx)
{
const int pos = ctx->len & 63;
append_0x80_4x4 (ctx->w0, ctx->w1, ctx->w2, ctx->w3, pos);
if (pos >= 56)
{
md5_transform_vector (ctx->w0, ctx->w1, ctx->w2, ctx->w3, ctx->h);
ctx->w0[0] = 0;
ctx->w0[1] = 0;
ctx->w0[2] = 0;
ctx->w0[3] = 0;
ctx->w1[0] = 0;
ctx->w1[1] = 0;
ctx->w1[2] = 0;
ctx->w1[3] = 0;
ctx->w2[0] = 0;
ctx->w2[1] = 0;
ctx->w2[2] = 0;
ctx->w2[3] = 0;
ctx->w3[0] = 0;
ctx->w3[1] = 0;
ctx->w3[2] = 0;
ctx->w3[3] = 0;
}
ctx->w3[2] = ctx->len * 8;
ctx->w3[3] = 0;
md5_transform_vector (ctx->w0, ctx->w1, ctx->w2, ctx->w3, ctx->h);
}
void md5_optimize_max_length_vector (md5_ctx_vector_t *ctx, const int bits)
{
ctx->len &= (1 << bits) - 1;
}

@ -3,7 +3,7 @@
* License.....: MIT
*/
#define NEW_SIMD_CODE
//#define NEW_SIMD_CODE
#include "inc_vendor.cl"
#include "inc_hash_constants.h"
@ -11,206 +11,11 @@
#include "inc_types.cl"
#include "inc_common.cl"
#include "inc_simd.cl"
#include "inc_hash_md5.cl"
#define COMPARE_S "inc_comp_single.cl"
#define COMPARE_M "inc_comp_multi.cl"
void md5_transform_S (const u32 w0[4], const u32 w1[4], const u32 w2[4], const u32 w3[4], u32 digest[4])
{
u32 a = digest[0];
u32 b = digest[1];
u32 c = digest[2];
u32 d = digest[3];
u32 w0_t = w0[0];
u32 w1_t = w0[1];
u32 w2_t = w0[2];
u32 w3_t = w0[3];
u32 w4_t = w1[0];
u32 w5_t = w1[1];
u32 w6_t = w1[2];
u32 w7_t = w1[3];
u32 w8_t = w2[0];
u32 w9_t = w2[1];
u32 wa_t = w2[2];
u32 wb_t = w2[3];
u32 wc_t = w3[0];
u32 wd_t = w3[1];
u32 we_t = w3[2];
u32 wf_t = 0;
MD5_STEP_S (MD5_Fo, a, b, c, d, w0_t, MD5C00, MD5S00);
MD5_STEP_S (MD5_Fo, d, a, b, c, w1_t, MD5C01, MD5S01);
MD5_STEP_S (MD5_Fo, c, d, a, b, w2_t, MD5C02, MD5S02);
MD5_STEP_S (MD5_Fo, b, c, d, a, w3_t, MD5C03, MD5S03);
MD5_STEP_S (MD5_Fo, a, b, c, d, w4_t, MD5C04, MD5S00);
MD5_STEP_S (MD5_Fo, d, a, b, c, w5_t, MD5C05, MD5S01);
MD5_STEP_S (MD5_Fo, c, d, a, b, w6_t, MD5C06, MD5S02);
MD5_STEP_S (MD5_Fo, b, c, d, a, w7_t, MD5C07, MD5S03);
MD5_STEP_S (MD5_Fo, a, b, c, d, w8_t, MD5C08, MD5S00);
MD5_STEP_S (MD5_Fo, d, a, b, c, w9_t, MD5C09, MD5S01);
MD5_STEP_S (MD5_Fo, c, d, a, b, wa_t, MD5C0a, MD5S02);
MD5_STEP_S (MD5_Fo, b, c, d, a, wb_t, MD5C0b, MD5S03);
MD5_STEP_S (MD5_Fo, a, b, c, d, wc_t, MD5C0c, MD5S00);
MD5_STEP_S (MD5_Fo, d, a, b, c, wd_t, MD5C0d, MD5S01);
MD5_STEP_S (MD5_Fo, c, d, a, b, we_t, MD5C0e, MD5S02);
MD5_STEP_S (MD5_Fo, b, c, d, a, wf_t, MD5C0f, MD5S03);
MD5_STEP_S (MD5_Go, a, b, c, d, w1_t, MD5C10, MD5S10);
MD5_STEP_S (MD5_Go, d, a, b, c, w6_t, MD5C11, MD5S11);
MD5_STEP_S (MD5_Go, c, d, a, b, wb_t, MD5C12, MD5S12);
MD5_STEP_S (MD5_Go, b, c, d, a, w0_t, MD5C13, MD5S13);
MD5_STEP_S (MD5_Go, a, b, c, d, w5_t, MD5C14, MD5S10);
MD5_STEP_S (MD5_Go, d, a, b, c, wa_t, MD5C15, MD5S11);
MD5_STEP_S (MD5_Go, c, d, a, b, wf_t, MD5C16, MD5S12);
MD5_STEP_S (MD5_Go, b, c, d, a, w4_t, MD5C17, MD5S13);
MD5_STEP_S (MD5_Go, a, b, c, d, w9_t, MD5C18, MD5S10);
MD5_STEP_S (MD5_Go, d, a, b, c, we_t, MD5C19, MD5S11);
MD5_STEP_S (MD5_Go, c, d, a, b, w3_t, MD5C1a, MD5S12);
MD5_STEP_S (MD5_Go, b, c, d, a, w8_t, MD5C1b, MD5S13);
MD5_STEP_S (MD5_Go, a, b, c, d, wd_t, MD5C1c, MD5S10);
MD5_STEP_S (MD5_Go, d, a, b, c, w2_t, MD5C1d, MD5S11);
MD5_STEP_S (MD5_Go, c, d, a, b, w7_t, MD5C1e, MD5S12);
MD5_STEP_S (MD5_Go, b, c, d, a, wc_t, MD5C1f, MD5S13);
MD5_STEP_S (MD5_H , a, b, c, d, w5_t, MD5C20, MD5S20);
MD5_STEP_S (MD5_H , d, a, b, c, w8_t, MD5C21, MD5S21);
MD5_STEP_S (MD5_H , c, d, a, b, wb_t, MD5C22, MD5S22);
MD5_STEP_S (MD5_H , b, c, d, a, we_t, MD5C23, MD5S23);
MD5_STEP_S (MD5_H , a, b, c, d, w1_t, MD5C24, MD5S20);
MD5_STEP_S (MD5_H , d, a, b, c, w4_t, MD5C25, MD5S21);
MD5_STEP_S (MD5_H , c, d, a, b, w7_t, MD5C26, MD5S22);
MD5_STEP_S (MD5_H , b, c, d, a, wa_t, MD5C27, MD5S23);
MD5_STEP_S (MD5_H , a, b, c, d, wd_t, MD5C28, MD5S20);
MD5_STEP_S (MD5_H , d, a, b, c, w0_t, MD5C29, MD5S21);
MD5_STEP_S (MD5_H , c, d, a, b, w3_t, MD5C2a, MD5S22);
MD5_STEP_S (MD5_H , b, c, d, a, w6_t, MD5C2b, MD5S23);
MD5_STEP_S (MD5_H , a, b, c, d, w9_t, MD5C2c, MD5S20);
MD5_STEP_S (MD5_H , d, a, b, c, wc_t, MD5C2d, MD5S21);
MD5_STEP_S (MD5_H , c, d, a, b, wf_t, MD5C2e, MD5S22);
MD5_STEP_S (MD5_H , b, c, d, a, w2_t, MD5C2f, MD5S23);
MD5_STEP_S (MD5_I , a, b, c, d, w0_t, MD5C30, MD5S30);
MD5_STEP_S (MD5_I , d, a, b, c, w7_t, MD5C31, MD5S31);
MD5_STEP_S (MD5_I , c, d, a, b, we_t, MD5C32, MD5S32);
MD5_STEP_S (MD5_I , b, c, d, a, w5_t, MD5C33, MD5S33);
MD5_STEP_S (MD5_I , a, b, c, d, wc_t, MD5C34, MD5S30);
MD5_STEP_S (MD5_I , d, a, b, c, w3_t, MD5C35, MD5S31);
MD5_STEP_S (MD5_I , c, d, a, b, wa_t, MD5C36, MD5S32);
MD5_STEP_S (MD5_I , b, c, d, a, w1_t, MD5C37, MD5S33);
MD5_STEP_S (MD5_I , a, b, c, d, w8_t, MD5C38, MD5S30);
MD5_STEP_S (MD5_I , d, a, b, c, wf_t, MD5C39, MD5S31);
MD5_STEP_S (MD5_I , c, d, a, b, w6_t, MD5C3a, MD5S32);
MD5_STEP_S (MD5_I , b, c, d, a, wd_t, MD5C3b, MD5S33);
MD5_STEP_S (MD5_I , a, b, c, d, w4_t, MD5C3c, MD5S30);
MD5_STEP_S (MD5_I , d, a, b, c, wb_t, MD5C3d, MD5S31);
MD5_STEP_S (MD5_I , c, d, a, b, w2_t, MD5C3e, MD5S32);
MD5_STEP_S (MD5_I , b, c, d, a, w9_t, MD5C3f, MD5S33);
digest[0] += a;
digest[1] += b;
digest[2] += c;
digest[3] += d;
}
void md5_transform_V (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u32x digest[4])
{
u32x a = digest[0];
u32x b = digest[1];
u32x c = digest[2];
u32x d = digest[3];
u32x w0_t = w0[0];
u32x w1_t = w0[1];
u32x w2_t = w0[2];
u32x w3_t = w0[3];
u32x w4_t = w1[0];
u32x w5_t = w1[1];
u32x w6_t = w1[2];
u32x w7_t = w1[3];
u32x w8_t = w2[0];
u32x w9_t = w2[1];
u32x wa_t = w2[2];
u32x wb_t = w2[3];
u32x wc_t = w3[0];
u32x wd_t = w3[1];
u32x we_t = w3[2];
u32x wf_t = 0;
MD5_STEP (MD5_Fo, a, b, c, d, w0_t, MD5C00, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w1_t, MD5C01, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, w2_t, MD5C02, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, w3_t, MD5C03, MD5S03);
MD5_STEP (MD5_Fo, a, b, c, d, w4_t, MD5C04, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w5_t, MD5C05, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, w6_t, MD5C06, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, w7_t, MD5C07, MD5S03);
MD5_STEP (MD5_Fo, a, b, c, d, w8_t, MD5C08, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w9_t, MD5C09, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, wa_t, MD5C0a, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, wb_t, MD5C0b, MD5S03);
MD5_STEP (MD5_Fo, a, b, c, d, wc_t, MD5C0c, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, wd_t, MD5C0d, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, we_t, MD5C0e, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, wf_t, MD5C0f, MD5S03);
MD5_STEP (MD5_Go, a, b, c, d, w1_t, MD5C10, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, w6_t, MD5C11, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, wb_t, MD5C12, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w0_t, MD5C13, MD5S13);
MD5_STEP (MD5_Go, a, b, c, d, w5_t, MD5C14, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, wa_t, MD5C15, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, wf_t, MD5C16, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w4_t, MD5C17, MD5S13);
MD5_STEP (MD5_Go, a, b, c, d, w9_t, MD5C18, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, we_t, MD5C19, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, w3_t, MD5C1a, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w8_t, MD5C1b, MD5S13);
MD5_STEP (MD5_Go, a, b, c, d, wd_t, MD5C1c, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, w2_t, MD5C1d, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, w7_t, MD5C1e, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, wc_t, MD5C1f, MD5S13);
MD5_STEP (MD5_H , a, b, c, d, w5_t, MD5C20, MD5S20);
MD5_STEP (MD5_H , d, a, b, c, w8_t, MD5C21, MD5S21);
MD5_STEP (MD5_H , c, d, a, b, wb_t, MD5C22, MD5S22);
MD5_STEP (MD5_H , b, c, d, a, we_t, MD5C23, MD5S23);
MD5_STEP (MD5_H , a, b, c, d, w1_t, MD5C24, MD5S20);
MD5_STEP (MD5_H , d, a, b, c, w4_t, MD5C25, MD5S21);
MD5_STEP (MD5_H , c, d, a, b, w7_t, MD5C26, MD5S22);
MD5_STEP (MD5_H , b, c, d, a, wa_t, MD5C27, MD5S23);
MD5_STEP (MD5_H , a, b, c, d, wd_t, MD5C28, MD5S20);
MD5_STEP (MD5_H , d, a, b, c, w0_t, MD5C29, MD5S21);
MD5_STEP (MD5_H , c, d, a, b, w3_t, MD5C2a, MD5S22);
MD5_STEP (MD5_H , b, c, d, a, w6_t, MD5C2b, MD5S23);
MD5_STEP (MD5_H , a, b, c, d, w9_t, MD5C2c, MD5S20);
MD5_STEP (MD5_H , d, a, b, c, wc_t, MD5C2d, MD5S21);
MD5_STEP (MD5_H , c, d, a, b, wf_t, MD5C2e, MD5S22);
MD5_STEP (MD5_H , b, c, d, a, w2_t, MD5C2f, MD5S23);
MD5_STEP (MD5_I , a, b, c, d, w0_t, MD5C30, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, w7_t, MD5C31, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, we_t, MD5C32, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w5_t, MD5C33, MD5S33);
MD5_STEP (MD5_I , a, b, c, d, wc_t, MD5C34, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, w3_t, MD5C35, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, wa_t, MD5C36, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w1_t, MD5C37, MD5S33);
MD5_STEP (MD5_I , a, b, c, d, w8_t, MD5C38, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, wf_t, MD5C39, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, w6_t, MD5C3a, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, wd_t, MD5C3b, MD5S33);
MD5_STEP (MD5_I , a, b, c, d, w4_t, MD5C3c, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, wb_t, MD5C3d, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, w2_t, MD5C3e, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w9_t, MD5C3f, MD5S33);
digest[0] += a;
digest[1] += b;
digest[2] += c;
digest[3] += d;
}
__kernel void m00400_init (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const comb_t *combs_buf, __global const bf_t *bfs_buf, __global phpass_tmp_t *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
{
/**
@ -221,86 +26,26 @@ __kernel void m00400_init (__global pw_t *pws, __global const kernel_rule_t *rul
if (gid >= gid_max) return;
u32 w0[4];
w0[0] = pws[gid].i[ 0];
w0[1] = pws[gid].i[ 1];
w0[2] = pws[gid].i[ 2];
w0[3] = pws[gid].i[ 3];
u32 w1[4];
w1[0] = pws[gid].i[ 4];
w1[1] = pws[gid].i[ 5];
w1[2] = pws[gid].i[ 6];
w1[3] = pws[gid].i[ 7];
u32 w2[4];
w2[0] = pws[gid].i[ 8];
w2[1] = pws[gid].i[ 9];
w2[2] = 0;
w2[3] = 0;
const u32 pw_len = pws[gid].pw_len;
/**
* salt
*/
u32 salt_buf[2];
salt_buf[0] = salt_bufs[salt_pos].salt_buf[0];
salt_buf[1] = salt_bufs[salt_pos].salt_buf[1];
/**
* init
*/
u32 block_len = 8 + pw_len;
u32 block0[4];
block0[0] = salt_buf[0];
block0[1] = salt_buf[1];
block0[2] = w0[0];
block0[3] = w0[1];
u32 block1[4];
md5_ctx_t md5_ctx;
block1[0] = w0[2];
block1[1] = w0[3];
block1[2] = w1[0];
block1[3] = w1[1];
md5_init (&md5_ctx);
u32 block2[4];
md5_update_global (&md5_ctx, salt_bufs[salt_pos].salt_buf, salt_bufs[salt_pos].salt_len);
block2[0] = w1[2];
block2[1] = w1[3];
block2[2] = w2[0];
block2[3] = w2[1];
md5_update_global (&md5_ctx, pws[gid].i, pws[gid].pw_len);
u32 block3[4];
block3[0] = 0;
block3[1] = 0;
block3[2] = block_len * 8;
block3[3] = 0;
append_0x80_4x4_S (block0, block1, block2, block3, block_len);
/**
* init
*/
md5_final (&md5_ctx);
u32 digest[4];
digest[0] = MD5M_A;
digest[1] = MD5M_B;
digest[2] = MD5M_C;
digest[3] = MD5M_D;
md5_transform_S (block0, block1, block2, block3, digest);
digest[0] = md5_ctx.h[0];
digest[1] = md5_ctx.h[1];
digest[2] = md5_ctx.h[2];
digest[3] = md5_ctx.h[3];
tmps[gid].digest_buf[0] = digest[0];
tmps[gid].digest_buf[1] = digest[1];
@ -316,87 +61,101 @@ __kernel void m00400_loop (__global pw_t *pws, __global const kernel_rule_t *rul
const u32 gid = get_global_id (0);
if ((gid * VECT_SIZE) >= gid_max) return;
if (gid >= gid_max) return;
/**
* init
*/
const u32 pw_len = pws[gid].pw_len;
u32x w0[4];
u32x w1[4];
u32x w2[4];
const u32 pw_lenv = ceil ((float) pw_len / 4);
w0[0] = packv (pws, i, gid, 0);
w0[1] = packv (pws, i, gid, 1);
w0[2] = packv (pws, i, gid, 2);
w0[3] = packv (pws, i, gid, 3);
w1[0] = packv (pws, i, gid, 4);
w1[1] = packv (pws, i, gid, 5);
w1[2] = packv (pws, i, gid, 6);
w1[3] = packv (pws, i, gid, 7);
w2[0] = packv (pws, i, gid, 8);
w2[1] = packv (pws, i, gid, 9);
w2[2] = 0;
w2[3] = 0;
u32 w[64] = { 0 };
u32x pw_len = packvf (pws, pw_len, gid);
for (int idx = 0; idx < pw_lenv; idx++)
{
w[idx] = pws[gid].i[idx];
barrier (CLK_GLOBAL_MEM_FENCE);
}
u32x digest[4];
u32 digest[4];
digest[0] = packv (tmps, digest_buf, gid, 0);
digest[1] = packv (tmps, digest_buf, gid, 1);
digest[2] = packv (tmps, digest_buf, gid, 2);
digest[3] = packv (tmps, digest_buf, gid, 3);
digest[0] = tmps[gid].digest_buf[0];
digest[1] = tmps[gid].digest_buf[1];
digest[2] = tmps[gid].digest_buf[2];
digest[3] = tmps[gid].digest_buf[3];
/**
* loop
*/
u32x block_len = (16 + pw_len);
u32x block0[4];
u32x block1[4];
u32x block2[4];
u32x block3[4];
block0[0] = 0;
block0[1] = 0;
block0[2] = 0;
block0[3] = 0;
block1[0] = w0[0];
block1[1] = w0[1];
block1[2] = w0[2];
block1[3] = w0[3];
block2[0] = w1[0];
block2[1] = w1[1];
block2[2] = w1[2];
block2[3] = w1[3];
block3[0] = w2[0];
block3[1] = w2[1];
block3[2] = block_len * 8;
block3[3] = 0;
append_0x80_4x4_VV (block0, block1, block2, block3, block_len);
md5_ctx_t md5_ctx;
/**
* init
*/
md5_init (&md5_ctx);
for (u32 i = 0; i < loop_cnt; i++)
md5_ctx.w0[0] = digest[0];
md5_ctx.w0[1] = digest[1];
md5_ctx.w0[2] = digest[2];
md5_ctx.w0[3] = digest[3];
md5_ctx.len = 16;
md5_update (&md5_ctx, w, pw_len);
md5_final (&md5_ctx);
digest[0] = md5_ctx.h[0];
digest[1] = md5_ctx.h[1];
digest[2] = md5_ctx.h[2];
digest[3] = md5_ctx.h[3];
if ((16 + pw_len + 1) >= 56)
{
block0[0] = digest[0];
block0[1] = digest[1];
block0[2] = digest[2];
block0[3] = digest[3];
for (u32 i = 1; i < loop_cnt; i++)
{
md5_init (&md5_ctx);
md5_ctx.w0[0] = digest[0];
md5_ctx.w0[1] = digest[1];
md5_ctx.w0[2] = digest[2];
md5_ctx.w0[3] = digest[3];
md5_ctx.len = 16;
digest[0] = MD5M_A;
digest[1] = MD5M_B;
digest[2] = MD5M_C;
digest[3] = MD5M_D;
md5_update (&md5_ctx, w, pw_len);
md5_transform_V (block0, block1, block2, block3, digest);
md5_final (&md5_ctx);
digest[0] = md5_ctx.h[0];
digest[1] = md5_ctx.h[1];
digest[2] = md5_ctx.h[2];
digest[3] = md5_ctx.h[3];
}
}
else
{
for (u32 i = 1; i < loop_cnt; i++)
{
md5_ctx.w0[0] = digest[0];
md5_ctx.w0[1] = digest[1];
md5_ctx.w0[2] = digest[2];
md5_ctx.w0[3] = digest[3];
digest[0] = MD5M_A;
digest[1] = MD5M_B;
digest[2] = MD5M_C;
digest[3] = MD5M_D;
md5_transform (md5_ctx.w0, md5_ctx.w1, md5_ctx.w2, md5_ctx.w3, digest);
}
}
unpackv (tmps, digest_buf, gid, 0, digest[0]);
unpackv (tmps, digest_buf, gid, 1, digest[1]);
unpackv (tmps, digest_buf, gid, 2, digest[2]);
unpackv (tmps, digest_buf, gid, 3, digest[3]);
tmps[gid].digest_buf[0] = digest[0];
tmps[gid].digest_buf[1] = digest[1];
tmps[gid].digest_buf[2] = digest[2];
tmps[gid].digest_buf[3] = digest[3];
}
__kernel void m00400_comp (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const comb_t *combs_buf, __global const bf_t *bfs_buf, __global phpass_tmp_t *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)

@ -20336,8 +20336,7 @@ int hashconfig_init (hashcat_ctx_t *hashcat_ctx)
hashconfig->kern_type = KERN_TYPE_PHPASS;
hashconfig->dgst_size = DGST_SIZE_4_4;
hashconfig->parse_func = phpass_parse_hash;
hashconfig->opti_type = OPTI_TYPE_ZERO_BYTE
| OPTI_TYPE_SLOW_HASH_SIMD_LOOP;
hashconfig->opti_type = OPTI_TYPE_ZERO_BYTE;
hashconfig->dgst_pos0 = 0;
hashconfig->dgst_pos1 = 1;
hashconfig->dgst_pos2 = 2;
@ -24454,6 +24453,8 @@ int hashconfig_init (hashcat_ctx_t *hashcat_ctx)
switch (hashconfig->hash_mode)
{
case 400: hashconfig->pw_max = 256;
break;
case 2100: hashconfig->pw_max = 256;
break;
}

@ -53,7 +53,7 @@ my @modes = (0, 10, 11, 12, 20, 21, 22, 23, 30, 40, 50, 60, 100, 101, 110, 111,
#my %is_utf16le = map { $_ => 1 } qw (30 40 130 131 132 133 140 141 1000 1100 1430 1440 1441 1730 1740 1731 2100 5500 5600 8000 9400 9500 9600 9700 9800 11600 13500 13800);
my %is_utf16le = map { $_ => 1 } qw (30 40 130 131 132 133 140 141 1000 1100 1430 1440 1441 1730 1740 1731 5500 5600 8000 9400 9500 9600 9700 9800 11600 13500 13800);
my %less_fifteen = map { $_ => 1 } qw (500 1600 1800 2400 2410 3200 6300 7400 10500 10700);
my %allow_long_salt = map { $_ => 1 } qw (2100 2500 4520 4521 5500 5600 7100 7200 7300 9400 9500 9600 9700 9800 10400 10500 10600 10700 1100 11000 11200 11300 11400 11600 12600 13500 13800 15000);
my %allow_long_salt = map { $_ => 1 } qw (400 2100 2500 4520 4521 5500 5600 7100 7200 7300 9400 9500 9600 9700 9800 10400 10500 10600 10700 1100 11000 11200 11300 11400 11600 12600 13500 13800 15000);
my @lotus_magic_table =
(
@ -3807,7 +3807,7 @@ sub single
}
elsif ($mode == 111 || $mode == 122 || $mode == 125 || $mode == 131 || $mode == 132 || $mode == 400 || $mode == 500 || $mode == 1600 || $mode == 1722 || $mode == 1731 || $mode == 6300 || $mode == 7900 || $mode == 8100 || $mode == 11100)
{
for (my $i = 1; $i < 32; $i++)
for (my $i = 1; $i < 256; $i++)
{
if ($len != 0)
{

Loading…
Cancel
Save