RC4 Kernels: Improved performance by 20%+ for hash-modes Kerberos 5 (etype 23), MS Office (<= 2003) and PDF (<= 1.6) by using new RC4 code

pull/2593/head^2
Jens Steube 3 years ago
parent 4e565efcf9
commit 8901e657a5

@ -8,21 +8,21 @@
// Pattern linear
DECLSPEC u8 GET_KEY8 (LOCAL_AS u32 *S, const int k)
DECLSPEC u8 GET_KEY8 (LOCAL_AS u32 *S, const u8 k)
{
LOCAL_AS u8 *S8 = (LOCAL_AS u8 *) S;
return S8[k];
}
DECLSPEC void SET_KEY8 (LOCAL_AS u32 *S, const int k, const u8 v)
DECLSPEC void SET_KEY8 (LOCAL_AS u32 *S, const u8 k, const u8 v)
{
LOCAL_AS u8 *S8 = (LOCAL_AS u8 *) S;
S8[k] = v;
}
DECLSPEC void SET_KEY32 (LOCAL_AS u32 *S, const int k, const u32 v)
DECLSPEC void SET_KEY32 (LOCAL_AS u32 *S, const u8 k, const u32 v)
{
S[k] = v;
}
@ -74,7 +74,7 @@ DECLSPEC void SET_KEY32 (LOCAL_AS u32 *S, const int k, const u32 v)
#define KEY8(t,k) (((k) & 3) + (((k) / 4) * 128) + (((t) & 31) * 4) + (((t) / 32) * 8192))
DECLSPEC u8 GET_KEY8 (LOCAL_AS u32 *S, const int k)
DECLSPEC u8 GET_KEY8 (LOCAL_AS u32 *S, const u8 k)
{
const u64 lid = get_local_id (0);
@ -83,7 +83,7 @@ DECLSPEC u8 GET_KEY8 (LOCAL_AS u32 *S, const int k)
return S8[KEY8 (lid, k)];
}
DECLSPEC void SET_KEY8 (LOCAL_AS u32 *S, const int k, const u8 v)
DECLSPEC void SET_KEY8 (LOCAL_AS u32 *S, const u8 k, const u8 v)
{
const u64 lid = get_local_id (0);
@ -94,7 +94,7 @@ DECLSPEC void SET_KEY8 (LOCAL_AS u32 *S, const int k, const u8 v)
#define KEY32(t,k) (((k) * 32) + ((t) & 31) + (((t) / 32) * 2048))
DECLSPEC void SET_KEY32 (LOCAL_AS u32 *S, const int k, const u32 v)
DECLSPEC void SET_KEY32 (LOCAL_AS u32 *S, const u8 k, const u32 v)
{
const u64 lid = get_local_id (0);
@ -114,18 +114,18 @@ DECLSPEC void rc4_init_40 (LOCAL_AS u32 *S, const u32 *key)
#ifdef _unroll
#pragma unroll
#endif
for (u32 i = 0; i < 64; i++)
for (u8 i = 0; i < 64; i++)
{
SET_KEY32 (S, i, v); v += a;
}
const u32 d0 = key[0] >> 0;
const u32 d1 = key[0] >> 8;
const u32 d2 = key[0] >> 16;
const u32 d3 = key[0] >> 24;
const u32 d4 = key[1] >> 0;
const u8 d0 = v8a_from_v32_S (key[0]);
const u8 d1 = v8b_from_v32_S (key[0]);
const u8 d2 = v8c_from_v32_S (key[0]);
const u8 d3 = v8d_from_v32_S (key[0]);
const u8 d4 = v8a_from_v32_S (key[1]);
u32 j = 0;
u8 j = 0;
#ifdef _unroll
#pragma unroll
@ -150,16 +150,16 @@ DECLSPEC void rc4_init_128 (LOCAL_AS u32 *S, const u32 *key)
#ifdef _unroll
#pragma unroll
#endif
for (u32 i = 0; i < 64; i++)
for (u8 i = 0; i < 64; i++)
{
SET_KEY32 (S, i, v); v += a;
}
u32 j = 0;
u8 j = 0;
for (u32 i = 0; i < 16; i++)
{
u32 idx = i * 16;
u8 idx = i * 16;
u32 v;
@ -204,66 +204,66 @@ DECLSPEC void rc4_swap (LOCAL_AS u32 *S, const u8 i, const u8 j)
DECLSPEC u8 rc4_next_16 (LOCAL_AS u32 *S, const u8 i, const u8 j, const u32 *in, u32 *out)
{
u8 src = i;
u8 dst = j;
u8 a = i;
u8 b = j;
#ifdef _unroll
#pragma unroll
#endif
for (u32 k = 0; k < 4; k++)
for (int k = 0; k < 4; k++)
{
u32 xor4 = 0;
u8 idx;
u32 tmp;
u32 r;
u8 idx;
src += 1;
dst += GET_KEY8 (S, src);
a += 1;
b += GET_KEY8 (S, a);
rc4_swap (S, src, dst);
rc4_swap (S, a, b);
idx = GET_KEY8 (S, src) + GET_KEY8 (S, dst);
idx = GET_KEY8 (S, a) + GET_KEY8 (S, b);
r = GET_KEY8 (S, idx);
tmp = GET_KEY8 (S, idx);
xor4 |= r << 0;
xor4 |= tmp << 0;
src += 1;
dst += GET_KEY8 (S, src);
a += 1;
b += GET_KEY8 (S, a);
rc4_swap (S, src, dst);
rc4_swap (S, a, b);
idx = GET_KEY8 (S, src) + GET_KEY8 (S, dst);
idx = GET_KEY8 (S, a) + GET_KEY8 (S, b);
r = GET_KEY8 (S, idx);
tmp = GET_KEY8 (S, idx);
xor4 |= r << 8;
xor4 |= tmp << 8;
src += 1;
dst += GET_KEY8 (S, src);
a += 1;
b += GET_KEY8 (S, a);
rc4_swap (S, src, dst);
rc4_swap (S, a, b);
idx = GET_KEY8 (S, src) + GET_KEY8 (S, dst);
idx = GET_KEY8 (S, a) + GET_KEY8 (S, b);
r = GET_KEY8 (S, idx);
tmp = GET_KEY8 (S, idx);
xor4 |= r << 16;
xor4 |= tmp << 16;
src += 1;
dst += GET_KEY8 (S, src);
a += 1;
b += GET_KEY8 (S, a);
rc4_swap (S, src, dst);
rc4_swap (S, a, b);
idx = GET_KEY8 (S, src) + GET_KEY8 (S, dst);
idx = GET_KEY8 (S, a) + GET_KEY8 (S, b);
r = GET_KEY8 (S, idx);
tmp = GET_KEY8 (S, idx);
xor4 |= r << 24;
xor4 |= tmp << 24;
out[k] = in[k] ^ xor4;
}
return dst;
return b;
}

@ -6,9 +6,9 @@
#ifndef _INC_CIPHER_RC4_H
#define _INC_CIPHER_RC4_H
DECLSPEC u8 GET_KEY8 (LOCAL_AS u32 *S, const int k);
DECLSPEC void SET_KEY8 (LOCAL_AS u32 *S, const int k, const u8 v);
DECLSPEC void SET_KEY32 (LOCAL_AS u32 *S, const int k, const u32 v);
DECLSPEC u8 GET_KEY8 (LOCAL_AS u32 *S, const u8 k);
DECLSPEC void SET_KEY8 (LOCAL_AS u32 *S, const u8 k, const u8 v);
DECLSPEC void SET_KEY32 (LOCAL_AS u32 *S, const u8 k, const u32 v);
DECLSPEC void rc4_init_40 (LOCAL_AS u32 *S, const u32 *key);
DECLSPEC void rc4_init_128 (LOCAL_AS u32 *S, const u32 *key);

@ -15,6 +15,7 @@
#include "inc_rp_optimized.cl"
#include "inc_simd.cl"
#include "inc_hash_sha1.cl"
#include "inc_cipher_rc4.cl"
#endif
#define MIN_NULL_BYTES 10
@ -30,129 +31,6 @@ typedef struct oldoffice34
} oldoffice34_t;
typedef struct
{
u8 S[256];
u32 wtf_its_faster;
} RC4_KEY;
DECLSPEC void swap (LOCAL_AS RC4_KEY *rc4_key, const u8 i, const u8 j)
{
u8 tmp;
tmp = rc4_key->S[i];
rc4_key->S[i] = rc4_key->S[j];
rc4_key->S[j] = tmp;
}
DECLSPEC void rc4_init_16 (LOCAL_AS RC4_KEY *rc4_key, const u32 *data)
{
u32 v = 0x03020100;
u32 a = 0x04040404;
LOCAL_AS u32 *ptr = (LOCAL_AS u32 *) rc4_key->S;
#ifdef _unroll
#pragma unroll
#endif
for (u32 i = 0; i < 64; i++)
{
*ptr++ = v; v += a;
}
u32 j = 0;
for (u32 i = 0; i < 16; i++)
{
u32 idx = i * 16;
u32 v;
v = data[0];
j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++;
v = data[1];
j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++;
v = data[2];
j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++;
v = data[3];
j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++;
}
}
DECLSPEC u8 rc4_next_16 (LOCAL_AS RC4_KEY *rc4_key, u8 i, u8 j, const u32 *in, u32 *out)
{
#ifdef _unroll
#pragma unroll
#endif
for (u32 k = 0; k < 4; k++)
{
u32 xor4 = 0;
u8 idx;
i += 1;
j += rc4_key->S[i];
swap (rc4_key, i, j);
idx = rc4_key->S[i] + rc4_key->S[j];
xor4 |= rc4_key->S[idx] << 0;
i += 1;
j += rc4_key->S[i];
swap (rc4_key, i, j);
idx = rc4_key->S[i] + rc4_key->S[j];
xor4 |= rc4_key->S[idx] << 8;
i += 1;
j += rc4_key->S[i];
swap (rc4_key, i, j);
idx = rc4_key->S[i] + rc4_key->S[j];
xor4 |= rc4_key->S[idx] << 16;
i += 1;
j += rc4_key->S[i];
swap (rc4_key, i, j);
idx = rc4_key->S[i] + rc4_key->S[j];
xor4 |= rc4_key->S[idx] << 24;
out[k] = in[k] ^ xor4;
}
return j;
}
KERNEL_FQ void m09800_m04 (KERN_ATTR_RULES_ESALT (oldoffice34_t))
{
/**
@ -187,9 +65,7 @@ KERNEL_FQ void m09800_m04 (KERN_ATTR_RULES_ESALT (oldoffice34_t))
* shared
*/
LOCAL_VK RC4_KEY rc4_keys[64];
LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid];
LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE];
/**
* salt
@ -305,11 +181,11 @@ KERNEL_FQ void m09800_m04 (KERN_ATTR_RULES_ESALT (oldoffice34_t))
digest[3] = 0;
}
rc4_init_16 (rc4_key, digest);
rc4_init_128 (S, digest);
u32 out[4];
u8 j = rc4_next_16 (rc4_key, 0, 0, encryptedVerifier, out);
u8 j = rc4_next_16 (S, 0, 0, encryptedVerifier, out);
w0[0] = hc_swap32 (out[0]);
w0[1] = hc_swap32 (out[1]);
@ -341,7 +217,7 @@ KERNEL_FQ void m09800_m04 (KERN_ATTR_RULES_ESALT (oldoffice34_t))
digest[2] = hc_swap32_S (digest[2]);
digest[3] = hc_swap32_S (digest[3]);
rc4_next_16 (rc4_key, 16, j, digest, out);
rc4_next_16 (S, 16, j, digest, out);
// initial compare
@ -385,7 +261,7 @@ KERNEL_FQ void m09800_m04 (KERN_ATTR_RULES_ESALT (oldoffice34_t))
// second block decrypt:
rc4_init_16 (rc4_key, digest);
rc4_init_128 (S, digest);
u32 secondBlockData[4];
@ -394,7 +270,7 @@ KERNEL_FQ void m09800_m04 (KERN_ATTR_RULES_ESALT (oldoffice34_t))
secondBlockData[2] = esalt_bufs[DIGESTS_OFFSET].secondBlockData[2];
secondBlockData[3] = esalt_bufs[DIGESTS_OFFSET].secondBlockData[3];
j = rc4_next_16 (rc4_key, 0, 0, secondBlockData, out);
j = rc4_next_16 (S, 0, 0, secondBlockData, out);
int null_bytes = 0;
@ -411,7 +287,7 @@ KERNEL_FQ void m09800_m04 (KERN_ATTR_RULES_ESALT (oldoffice34_t))
secondBlockData[2] = esalt_bufs[DIGESTS_OFFSET].secondBlockData[6];
secondBlockData[3] = esalt_bufs[DIGESTS_OFFSET].secondBlockData[7];
rc4_next_16 (rc4_key, 16, j, secondBlockData, out);
rc4_next_16 (S, 16, j, secondBlockData, out);
for (int k = 0; k < 4; k++)
{
@ -475,9 +351,7 @@ KERNEL_FQ void m09800_s04 (KERN_ATTR_RULES_ESALT (oldoffice34_t))
* shared
*/
LOCAL_VK RC4_KEY rc4_keys[64];
LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid];
LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE];
/**
* salt
@ -605,11 +479,11 @@ KERNEL_FQ void m09800_s04 (KERN_ATTR_RULES_ESALT (oldoffice34_t))
digest[3] = 0;
}
rc4_init_16 (rc4_key, digest);
rc4_init_128 (S, digest);
u32 out[4];
u8 j = rc4_next_16 (rc4_key, 0, 0, encryptedVerifier, out);
u8 j = rc4_next_16 (S, 0, 0, encryptedVerifier, out);
w0[0] = hc_swap32 (out[0]);
w0[1] = hc_swap32 (out[1]);
@ -641,7 +515,7 @@ KERNEL_FQ void m09800_s04 (KERN_ATTR_RULES_ESALT (oldoffice34_t))
digest[2] = hc_swap32_S (digest[2]);
digest[3] = hc_swap32_S (digest[3]);
rc4_next_16 (rc4_key, 16, j, digest, out);
rc4_next_16 (S, 16, j, digest, out);
// initial compare
@ -686,7 +560,7 @@ KERNEL_FQ void m09800_s04 (KERN_ATTR_RULES_ESALT (oldoffice34_t))
// second block decrypt:
rc4_init_16 (rc4_key, digest);
rc4_init_128 (S, digest);
u32 secondBlockData[4];
@ -695,7 +569,7 @@ KERNEL_FQ void m09800_s04 (KERN_ATTR_RULES_ESALT (oldoffice34_t))
secondBlockData[2] = esalt_bufs[DIGESTS_OFFSET].secondBlockData[2];
secondBlockData[3] = esalt_bufs[DIGESTS_OFFSET].secondBlockData[3];
j = rc4_next_16 (rc4_key, 0, 0, secondBlockData, out);
j = rc4_next_16 (S, 0, 0, secondBlockData, out);
int null_bytes = 0;
@ -712,7 +586,7 @@ KERNEL_FQ void m09800_s04 (KERN_ATTR_RULES_ESALT (oldoffice34_t))
secondBlockData[2] = esalt_bufs[DIGESTS_OFFSET].secondBlockData[6];
secondBlockData[3] = esalt_bufs[DIGESTS_OFFSET].secondBlockData[7];
rc4_next_16 (rc4_key, 16, j, secondBlockData, out);
rc4_next_16 (S, 16, j, secondBlockData, out);
for (int k = 0; k < 4; k++)
{

@ -13,6 +13,7 @@
#include "inc_common.cl"
#include "inc_simd.cl"
#include "inc_hash_sha1.cl"
#include "inc_cipher_rc4.cl"
#endif
#define MIN_NULL_BYTES 10
@ -28,129 +29,6 @@ typedef struct oldoffice34
} oldoffice34_t;
typedef struct
{
u8 S[256];
u32 wtf_its_faster;
} RC4_KEY;
DECLSPEC void swap (LOCAL_AS RC4_KEY *rc4_key, const u8 i, const u8 j)
{
u8 tmp;
tmp = rc4_key->S[i];
rc4_key->S[i] = rc4_key->S[j];
rc4_key->S[j] = tmp;
}
DECLSPEC void rc4_init_16 (LOCAL_AS RC4_KEY *rc4_key, const u32 *data)
{
u32 v = 0x03020100;
u32 a = 0x04040404;
LOCAL_AS u32 *ptr = (LOCAL_AS u32 *) rc4_key->S;
#ifdef _unroll
#pragma unroll
#endif
for (u32 i = 0; i < 64; i++)
{
*ptr++ = v; v += a;
}
u32 j = 0;
for (u32 i = 0; i < 16; i++)
{
u32 idx = i * 16;
u32 v;
v = data[0];
j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++;
v = data[1];
j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++;
v = data[2];
j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++;
v = data[3];
j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++;
}
}
DECLSPEC u8 rc4_next_16 (LOCAL_AS RC4_KEY *rc4_key, u8 i, u8 j, const u32 *in, u32 *out)
{
#ifdef _unroll
#pragma unroll
#endif
for (u32 k = 0; k < 4; k++)
{
u32 xor4 = 0;
u8 idx;
i += 1;
j += rc4_key->S[i];
swap (rc4_key, i, j);
idx = rc4_key->S[i] + rc4_key->S[j];
xor4 |= rc4_key->S[idx] << 0;
i += 1;
j += rc4_key->S[i];
swap (rc4_key, i, j);
idx = rc4_key->S[i] + rc4_key->S[j];
xor4 |= rc4_key->S[idx] << 8;
i += 1;
j += rc4_key->S[i];
swap (rc4_key, i, j);
idx = rc4_key->S[i] + rc4_key->S[j];
xor4 |= rc4_key->S[idx] << 16;
i += 1;
j += rc4_key->S[i];
swap (rc4_key, i, j);
idx = rc4_key->S[i] + rc4_key->S[j];
xor4 |= rc4_key->S[idx] << 24;
out[k] = in[k] ^ xor4;
}
return j;
}
KERNEL_FQ void m09800_m04 (KERN_ATTR_ESALT (oldoffice34_t))
{
/**
@ -185,9 +63,7 @@ KERNEL_FQ void m09800_m04 (KERN_ATTR_ESALT (oldoffice34_t))
* shared
*/
LOCAL_VK RC4_KEY rc4_keys[64];
LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid];
LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE];
/**
* salt
@ -353,11 +229,11 @@ KERNEL_FQ void m09800_m04 (KERN_ATTR_ESALT (oldoffice34_t))
digest[3] = 0;
}
rc4_init_16 (rc4_key, digest);
rc4_init_128 (S, digest);
u32 out[4];
u8 j = rc4_next_16 (rc4_key, 0, 0, encryptedVerifier, out);
u8 j = rc4_next_16 (S, 0, 0, encryptedVerifier, out);
w0[0] = hc_swap32 (out[0]);
w0[1] = hc_swap32 (out[1]);
@ -389,7 +265,7 @@ KERNEL_FQ void m09800_m04 (KERN_ATTR_ESALT (oldoffice34_t))
digest[2] = hc_swap32_S (digest[2]);
digest[3] = hc_swap32_S (digest[3]);
rc4_next_16 (rc4_key, 16, j, digest, out);
rc4_next_16 (S, 16, j, digest, out);
// initial compare
@ -433,7 +309,7 @@ KERNEL_FQ void m09800_m04 (KERN_ATTR_ESALT (oldoffice34_t))
// second block decrypt:
rc4_init_16 (rc4_key, digest);
rc4_init_128 (S, digest);
u32 secondBlockData[4];
@ -442,7 +318,7 @@ KERNEL_FQ void m09800_m04 (KERN_ATTR_ESALT (oldoffice34_t))
secondBlockData[2] = esalt_bufs[DIGESTS_OFFSET].secondBlockData[2];
secondBlockData[3] = esalt_bufs[DIGESTS_OFFSET].secondBlockData[3];
j = rc4_next_16 (rc4_key, 0, 0, secondBlockData, out);
j = rc4_next_16 (S, 0, 0, secondBlockData, out);
int null_bytes = 0;
@ -459,7 +335,7 @@ KERNEL_FQ void m09800_m04 (KERN_ATTR_ESALT (oldoffice34_t))
secondBlockData[2] = esalt_bufs[DIGESTS_OFFSET].secondBlockData[6];
secondBlockData[3] = esalt_bufs[DIGESTS_OFFSET].secondBlockData[7];
rc4_next_16 (rc4_key, 16, j, secondBlockData, out);
rc4_next_16 (S, 16, j, secondBlockData, out);
for (int k = 0; k < 4; k++)
{
@ -523,9 +399,7 @@ KERNEL_FQ void m09800_s04 (KERN_ATTR_ESALT (oldoffice34_t))
* shared
*/
LOCAL_VK RC4_KEY rc4_keys[64];
LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid];
LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE];
/**
* salt
@ -703,11 +577,11 @@ KERNEL_FQ void m09800_s04 (KERN_ATTR_ESALT (oldoffice34_t))
digest[3] = 0;
}
rc4_init_16 (rc4_key, digest);
rc4_init_128 (S, digest);
u32 out[4];
u8 j = rc4_next_16 (rc4_key, 0, 0, encryptedVerifier, out);
u8 j = rc4_next_16 (S, 0, 0, encryptedVerifier, out);
w0[0] = hc_swap32 (out[0]);
w0[1] = hc_swap32 (out[1]);
@ -739,7 +613,7 @@ KERNEL_FQ void m09800_s04 (KERN_ATTR_ESALT (oldoffice34_t))
digest[2] = hc_swap32_S (digest[2]);
digest[3] = hc_swap32_S (digest[3]);
rc4_next_16 (rc4_key, 16, j, digest, out);
rc4_next_16 (S, 16, j, digest, out);
// initial compare
@ -784,7 +658,7 @@ KERNEL_FQ void m09800_s04 (KERN_ATTR_ESALT (oldoffice34_t))
// second block decrypt:
rc4_init_16 (rc4_key, digest);
rc4_init_128 (S, digest);
u32 secondBlockData[4];
@ -793,7 +667,7 @@ KERNEL_FQ void m09800_s04 (KERN_ATTR_ESALT (oldoffice34_t))
secondBlockData[2] = esalt_bufs[DIGESTS_OFFSET].secondBlockData[2];
secondBlockData[3] = esalt_bufs[DIGESTS_OFFSET].secondBlockData[3];
j = rc4_next_16 (rc4_key, 0, 0, secondBlockData, out);
j = rc4_next_16 (S, 0, 0, secondBlockData, out);
int null_bytes = 0;
@ -810,7 +684,7 @@ KERNEL_FQ void m09800_s04 (KERN_ATTR_ESALT (oldoffice34_t))
secondBlockData[2] = esalt_bufs[DIGESTS_OFFSET].secondBlockData[6];
secondBlockData[3] = esalt_bufs[DIGESTS_OFFSET].secondBlockData[7];
rc4_next_16 (rc4_key, 16, j, secondBlockData, out);
rc4_next_16 (S, 16, j, secondBlockData, out);
for (int k = 0; k < 4; k++)
{

@ -10,6 +10,7 @@
#include "inc_common.cl"
#include "inc_simd.cl"
#include "inc_hash_sha1.cl"
#include "inc_cipher_rc4.cl"
#endif
#define MIN_NULL_BYTES 10
@ -25,130 +26,7 @@ typedef struct oldoffice34
} oldoffice34_t;
typedef struct
{
u8 S[256];
u32 wtf_its_faster;
} RC4_KEY;
DECLSPEC void swap (LOCAL_AS RC4_KEY *rc4_key, const u8 i, const u8 j)
{
u8 tmp;
tmp = rc4_key->S[i];
rc4_key->S[i] = rc4_key->S[j];
rc4_key->S[j] = tmp;
}
DECLSPEC void rc4_init_16 (LOCAL_AS RC4_KEY *rc4_key, const u32 *data)
{
u32 v = 0x03020100;
u32 a = 0x04040404;
LOCAL_AS u32 *ptr = (LOCAL_AS u32 *) rc4_key->S;
#ifdef _unroll
#pragma unroll
#endif
for (u32 i = 0; i < 64; i++)
{
*ptr++ = v; v += a;
}
u32 j = 0;
for (u32 i = 0; i < 16; i++)
{
u32 idx = i * 16;
u32 v;
v = data[0];
j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++;
v = data[1];
j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++;
v = data[2];
j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++;
v = data[3];
j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++;
}
}
DECLSPEC u8 rc4_next_16 (LOCAL_AS RC4_KEY *rc4_key, u8 i, u8 j, const u32 *in, u32 *out)
{
#ifdef _unroll
#pragma unroll
#endif
for (u32 k = 0; k < 4; k++)
{
u32 xor4 = 0;
u8 idx;
i += 1;
j += rc4_key->S[i];
swap (rc4_key, i, j);
idx = rc4_key->S[i] + rc4_key->S[j];
xor4 |= rc4_key->S[idx] << 0;
i += 1;
j += rc4_key->S[i];
swap (rc4_key, i, j);
idx = rc4_key->S[i] + rc4_key->S[j];
xor4 |= rc4_key->S[idx] << 8;
i += 1;
j += rc4_key->S[i];
swap (rc4_key, i, j);
idx = rc4_key->S[i] + rc4_key->S[j];
xor4 |= rc4_key->S[idx] << 16;
i += 1;
j += rc4_key->S[i];
swap (rc4_key, i, j);
idx = rc4_key->S[i] + rc4_key->S[j];
xor4 |= rc4_key->S[idx] << 24;
out[k] = in[k] ^ xor4;
}
return j;
}
DECLSPEC void m09800m (LOCAL_AS RC4_KEY *rc4_keys, u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KERN_ATTR_ESALT (oldoffice34_t))
DECLSPEC void m09800m (LOCAL_AS u32 *S, u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KERN_ATTR_ESALT (oldoffice34_t))
{
/**
* modifier
@ -157,12 +35,6 @@ DECLSPEC void m09800m (LOCAL_AS RC4_KEY *rc4_keys, u32 *w0, u32 *w1, u32 *w2, u3
const u64 gid = get_global_id (0);
const u64 lid = get_local_id (0);
/**
* shared
*/
LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid];
/**
* salt
*/
@ -276,11 +148,11 @@ DECLSPEC void m09800m (LOCAL_AS RC4_KEY *rc4_keys, u32 *w0, u32 *w1, u32 *w2, u3
digest[3] = 0;
}
rc4_init_16 (rc4_key, digest);
rc4_init_128 (S, digest);
u32 out[4];
u8 j = rc4_next_16 (rc4_key, 0, 0, encryptedVerifier, out);
u8 j = rc4_next_16 (S, 0, 0, encryptedVerifier, out);
w0_t[0] = hc_swap32_S (out[0]);
w0_t[1] = hc_swap32_S (out[1]);
@ -312,7 +184,7 @@ DECLSPEC void m09800m (LOCAL_AS RC4_KEY *rc4_keys, u32 *w0, u32 *w1, u32 *w2, u3
digest[2] = hc_swap32_S (digest[2]);
digest[3] = hc_swap32_S (digest[3]);
rc4_next_16 (rc4_key, 16, j, digest, out);
rc4_next_16 (S, 16, j, digest, out);
// initial compare
@ -356,7 +228,7 @@ DECLSPEC void m09800m (LOCAL_AS RC4_KEY *rc4_keys, u32 *w0, u32 *w1, u32 *w2, u3
// second block decrypt:
rc4_init_16 (rc4_key, digest);
rc4_init_128 (S, digest);
u32 secondBlockData[4];
@ -365,7 +237,7 @@ DECLSPEC void m09800m (LOCAL_AS RC4_KEY *rc4_keys, u32 *w0, u32 *w1, u32 *w2, u3
secondBlockData[2] = esalt_bufs[DIGESTS_OFFSET].secondBlockData[2];
secondBlockData[3] = esalt_bufs[DIGESTS_OFFSET].secondBlockData[3];
j = rc4_next_16 (rc4_key, 0, 0, secondBlockData, out);
j = rc4_next_16 (S, 0, 0, secondBlockData, out);
int null_bytes = 0;
@ -382,7 +254,7 @@ DECLSPEC void m09800m (LOCAL_AS RC4_KEY *rc4_keys, u32 *w0, u32 *w1, u32 *w2, u3
secondBlockData[2] = esalt_bufs[DIGESTS_OFFSET].secondBlockData[6];
secondBlockData[3] = esalt_bufs[DIGESTS_OFFSET].secondBlockData[7];
rc4_next_16 (rc4_key, 16, j, secondBlockData, out);
rc4_next_16 (S, 16, j, secondBlockData, out);
for (int k = 0; k < 4; k++)
{
@ -404,7 +276,7 @@ DECLSPEC void m09800m (LOCAL_AS RC4_KEY *rc4_keys, u32 *w0, u32 *w1, u32 *w2, u3
}
}
DECLSPEC void m09800s (LOCAL_AS RC4_KEY *rc4_keys, u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KERN_ATTR_ESALT (oldoffice34_t))
DECLSPEC void m09800s (LOCAL_AS u32 *S, u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KERN_ATTR_ESALT (oldoffice34_t))
{
/**
* modifier
@ -413,12 +285,6 @@ DECLSPEC void m09800s (LOCAL_AS RC4_KEY *rc4_keys, u32 *w0, u32 *w1, u32 *w2, u3
const u64 gid = get_global_id (0);
const u64 lid = get_local_id (0);
/**
* shared
*/
LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid];
/**
* salt
*/
@ -544,11 +410,11 @@ DECLSPEC void m09800s (LOCAL_AS RC4_KEY *rc4_keys, u32 *w0, u32 *w1, u32 *w2, u3
digest[3] = 0;
}
rc4_init_16 (rc4_key, digest);
rc4_init_128 (S, digest);
u32 out[4];
u8 j = rc4_next_16 (rc4_key, 0, 0, encryptedVerifier, out);
u8 j = rc4_next_16 (S, 0, 0, encryptedVerifier, out);
w0_t[0] = hc_swap32_S (out[0]);
w0_t[1] = hc_swap32_S (out[1]);
@ -580,7 +446,7 @@ DECLSPEC void m09800s (LOCAL_AS RC4_KEY *rc4_keys, u32 *w0, u32 *w1, u32 *w2, u3
digest[2] = hc_swap32_S (digest[2]);
digest[3] = hc_swap32_S (digest[3]);
rc4_next_16 (rc4_key, 16, j, digest, out);
rc4_next_16 (S, 16, j, digest, out);
// initial compare
@ -625,7 +491,7 @@ DECLSPEC void m09800s (LOCAL_AS RC4_KEY *rc4_keys, u32 *w0, u32 *w1, u32 *w2, u3
// second block decrypt:
rc4_init_16 (rc4_key, digest);
rc4_init_128 (S, digest);
u32 secondBlockData[4];
@ -634,7 +500,7 @@ DECLSPEC void m09800s (LOCAL_AS RC4_KEY *rc4_keys, u32 *w0, u32 *w1, u32 *w2, u3
secondBlockData[2] = esalt_bufs[DIGESTS_OFFSET].secondBlockData[2];
secondBlockData[3] = esalt_bufs[DIGESTS_OFFSET].secondBlockData[3];
j = rc4_next_16 (rc4_key, 0, 0, secondBlockData, out);
j = rc4_next_16 (S, 0, 0, secondBlockData, out);
int null_bytes = 0;
@ -651,7 +517,7 @@ DECLSPEC void m09800s (LOCAL_AS RC4_KEY *rc4_keys, u32 *w0, u32 *w1, u32 *w2, u3
secondBlockData[2] = esalt_bufs[DIGESTS_OFFSET].secondBlockData[6];
secondBlockData[3] = esalt_bufs[DIGESTS_OFFSET].secondBlockData[7];
rc4_next_16 (rc4_key, 16, j, secondBlockData, out);
rc4_next_16 (S, 16, j, secondBlockData, out);
for (int k = 0; k < 4; k++)
{
@ -715,9 +581,9 @@ KERNEL_FQ void m09800_m04 (KERN_ATTR_ESALT (oldoffice34_t))
* main
*/
LOCAL_VK RC4_KEY rc4_keys[64];
LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE];
m09800m (rc4_keys, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max);
m09800m (S, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max);
}
KERNEL_FQ void m09800_m08 (KERN_ATTR_ESALT (oldoffice34_t))
@ -764,9 +630,9 @@ KERNEL_FQ void m09800_m08 (KERN_ATTR_ESALT (oldoffice34_t))
* main
*/
LOCAL_VK RC4_KEY rc4_keys[64];
LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE];
m09800m (rc4_keys, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max);
m09800m (S, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max);
}
KERNEL_FQ void m09800_m16 (KERN_ATTR_ESALT (oldoffice34_t))
@ -813,9 +679,9 @@ KERNEL_FQ void m09800_m16 (KERN_ATTR_ESALT (oldoffice34_t))
* main
*/
LOCAL_VK RC4_KEY rc4_keys[64];
LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE];
m09800m (rc4_keys, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max);
m09800m (S, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max);
}
KERNEL_FQ void m09800_s04 (KERN_ATTR_ESALT (oldoffice34_t))
@ -862,9 +728,9 @@ KERNEL_FQ void m09800_s04 (KERN_ATTR_ESALT (oldoffice34_t))
* main
*/
LOCAL_VK RC4_KEY rc4_keys[64];
LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE];
m09800s (rc4_keys, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max);
m09800s (S, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max);
}
KERNEL_FQ void m09800_s08 (KERN_ATTR_ESALT (oldoffice34_t))
@ -911,9 +777,9 @@ KERNEL_FQ void m09800_s08 (KERN_ATTR_ESALT (oldoffice34_t))
* main
*/
LOCAL_VK RC4_KEY rc4_keys[64];
LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE];
m09800s (rc4_keys, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max);
m09800s (S, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max);
}
KERNEL_FQ void m09800_s16 (KERN_ATTR_ESALT (oldoffice34_t))
@ -960,7 +826,7 @@ KERNEL_FQ void m09800_s16 (KERN_ATTR_ESALT (oldoffice34_t))
* main
*/
LOCAL_VK RC4_KEY rc4_keys[64];
LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE];
m09800s (rc4_keys, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max);
m09800s (S, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max);
}

@ -15,6 +15,7 @@
#include "inc_rp_optimized.cl"
#include "inc_simd.cl"
#include "inc_hash_sha1.cl"
#include "inc_cipher_rc4.cl"
#endif
typedef struct oldoffice34
@ -28,129 +29,6 @@ typedef struct oldoffice34
} oldoffice34_t;
typedef struct
{
u8 S[256];
u32 wtf_its_faster;
} RC4_KEY;
DECLSPEC void swap (LOCAL_AS RC4_KEY *rc4_key, const u8 i, const u8 j)
{
u8 tmp;
tmp = rc4_key->S[i];
rc4_key->S[i] = rc4_key->S[j];
rc4_key->S[j] = tmp;
}
DECLSPEC void rc4_init_16 (LOCAL_AS RC4_KEY *rc4_key, const u32 *data)
{
u32 v = 0x03020100;
u32 a = 0x04040404;
LOCAL_AS u32 *ptr = (LOCAL_AS u32 *) rc4_key->S;
#ifdef _unroll
#pragma unroll
#endif
for (u32 i = 0; i < 64; i++)
{
*ptr++ = v; v += a;
}
u32 j = 0;
for (u32 i = 0; i < 16; i++)
{
u32 idx = i * 16;
u32 v;
v = data[0];
j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++;
v = data[1];
j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++;
v = data[2];
j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++;
v = data[3];
j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++;
}
}
DECLSPEC u8 rc4_next_16 (LOCAL_AS RC4_KEY *rc4_key, u8 i, u8 j, const u32 *in, u32 *out)
{
#ifdef _unroll
#pragma unroll
#endif
for (u32 k = 0; k < 4; k++)
{
u32 xor4 = 0;
u8 idx;
i += 1;
j += rc4_key->S[i];
swap (rc4_key, i, j);
idx = rc4_key->S[i] + rc4_key->S[j];
xor4 |= rc4_key->S[idx] << 0;
i += 1;
j += rc4_key->S[i];
swap (rc4_key, i, j);
idx = rc4_key->S[i] + rc4_key->S[j];
xor4 |= rc4_key->S[idx] << 8;
i += 1;
j += rc4_key->S[i];
swap (rc4_key, i, j);
idx = rc4_key->S[i] + rc4_key->S[j];
xor4 |= rc4_key->S[idx] << 16;
i += 1;
j += rc4_key->S[i];
swap (rc4_key, i, j);
idx = rc4_key->S[i] + rc4_key->S[j];
xor4 |= rc4_key->S[idx] << 24;
out[k] = in[k] ^ xor4;
}
return j;
}
KERNEL_FQ void m09810_m04 (KERN_ATTR_RULES_ESALT (oldoffice34_t))
{
/**
@ -185,9 +63,7 @@ KERNEL_FQ void m09810_m04 (KERN_ATTR_RULES_ESALT (oldoffice34_t))
* shared
*/
LOCAL_VK RC4_KEY rc4_keys[64];
LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid];
LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE];
/**
* esalt
@ -224,11 +100,11 @@ KERNEL_FQ void m09810_m04 (KERN_ATTR_RULES_ESALT (oldoffice34_t))
key[2] = 0;
key[3] = 0;
rc4_init_16 (rc4_key, key);
rc4_init_128 (S, key);
u32 out[4];
u8 j = rc4_next_16 (rc4_key, 0, 0, encryptedVerifier, out);
u8 j = rc4_next_16 (S, 0, 0, encryptedVerifier, out);
w0[0] = hc_swap32 (out[0]);
w0[1] = hc_swap32 (out[1]);
@ -262,7 +138,7 @@ KERNEL_FQ void m09810_m04 (KERN_ATTR_RULES_ESALT (oldoffice34_t))
digest[2] = hc_swap32_S (digest[2]);
digest[3] = hc_swap32_S (digest[3]);
rc4_next_16 (rc4_key, 16, j, digest, out);
rc4_next_16 (S, 16, j, digest, out);
COMPARE_M_SIMD (out[0], out[1], out[2], out[3]);
}
@ -310,9 +186,7 @@ KERNEL_FQ void m09810_s04 (KERN_ATTR_RULES_ESALT (oldoffice34_t))
* shared
*/
LOCAL_VK RC4_KEY rc4_keys[64];
LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid];
LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE];
/**
* esalt
@ -361,11 +235,11 @@ KERNEL_FQ void m09810_s04 (KERN_ATTR_RULES_ESALT (oldoffice34_t))
key[2] = 0;
key[3] = 0;
rc4_init_16 (rc4_key, key);
rc4_init_128 (S, key);
u32 out[4];
u8 j = rc4_next_16 (rc4_key, 0, 0, encryptedVerifier, out);
u8 j = rc4_next_16 (S, 0, 0, encryptedVerifier, out);
w0[0] = hc_swap32 (out[0]);
w0[1] = hc_swap32 (out[1]);
@ -399,7 +273,7 @@ KERNEL_FQ void m09810_s04 (KERN_ATTR_RULES_ESALT (oldoffice34_t))
digest[2] = hc_swap32_S (digest[2]);
digest[3] = hc_swap32_S (digest[3]);
rc4_next_16 (rc4_key, 16, j, digest, out);
rc4_next_16 (S, 16, j, digest, out);
COMPARE_S_SIMD (out[0], out[1], out[2], out[3]);
}

@ -13,6 +13,7 @@
#include "inc_common.cl"
#include "inc_simd.cl"
#include "inc_hash_sha1.cl"
#include "inc_cipher_rc4.cl"
#endif
typedef struct oldoffice34
@ -26,129 +27,6 @@ typedef struct oldoffice34
} oldoffice34_t;
typedef struct
{
u8 S[256];
u32 wtf_its_faster;
} RC4_KEY;
DECLSPEC void swap (LOCAL_AS RC4_KEY *rc4_key, const u8 i, const u8 j)
{
u8 tmp;
tmp = rc4_key->S[i];
rc4_key->S[i] = rc4_key->S[j];
rc4_key->S[j] = tmp;
}
DECLSPEC void rc4_init_16 (LOCAL_AS RC4_KEY *rc4_key, const u32 *data)
{
u32 v = 0x03020100;
u32 a = 0x04040404;
LOCAL_AS u32 *ptr = (LOCAL_AS u32 *) rc4_key->S;
#ifdef _unroll
#pragma unroll
#endif
for (u32 i = 0; i < 64; i++)
{
*ptr++ = v; v += a;
}
u32 j = 0;
for (u32 i = 0; i < 16; i++)
{
u32 idx = i * 16;
u32 v;
v = data[0];
j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++;
v = data[1];
j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++;
v = data[2];
j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++;
v = data[3];
j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++;
}
}
DECLSPEC u8 rc4_next_16 (LOCAL_AS RC4_KEY *rc4_key, u8 i, u8 j, const u32 *in, u32 *out)
{
#ifdef _unroll
#pragma unroll
#endif
for (u32 k = 0; k < 4; k++)
{
u32 xor4 = 0;
u8 idx;
i += 1;
j += rc4_key->S[i];
swap (rc4_key, i, j);
idx = rc4_key->S[i] + rc4_key->S[j];
xor4 |= rc4_key->S[idx] << 0;
i += 1;
j += rc4_key->S[i];
swap (rc4_key, i, j);
idx = rc4_key->S[i] + rc4_key->S[j];
xor4 |= rc4_key->S[idx] << 8;
i += 1;
j += rc4_key->S[i];
swap (rc4_key, i, j);
idx = rc4_key->S[i] + rc4_key->S[j];
xor4 |= rc4_key->S[idx] << 16;
i += 1;
j += rc4_key->S[i];
swap (rc4_key, i, j);
idx = rc4_key->S[i] + rc4_key->S[j];
xor4 |= rc4_key->S[idx] << 24;
out[k] = in[k] ^ xor4;
}
return j;
}
KERNEL_FQ void m09810_m04 (KERN_ATTR_ESALT (oldoffice34_t))
{
/**
@ -183,9 +61,7 @@ KERNEL_FQ void m09810_m04 (KERN_ATTR_ESALT (oldoffice34_t))
* shared
*/
LOCAL_VK RC4_KEY rc4_keys[64];
LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid];
LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE];
/**
* esalt
@ -268,11 +144,11 @@ KERNEL_FQ void m09810_m04 (KERN_ATTR_ESALT (oldoffice34_t))
key[2] = 0;
key[3] = 0;
rc4_init_16 (rc4_key, key);
rc4_init_128 (S, key);
u32 out[4];
u8 j = rc4_next_16 (rc4_key, 0, 0, encryptedVerifier, out);
u8 j = rc4_next_16 (S, 0, 0, encryptedVerifier, out);
w0[0] = hc_swap32 (out[0]);
w0[1] = hc_swap32 (out[1]);
@ -306,7 +182,7 @@ KERNEL_FQ void m09810_m04 (KERN_ATTR_ESALT (oldoffice34_t))
digest[2] = hc_swap32_S (digest[2]);
digest[3] = hc_swap32_S (digest[3]);
rc4_next_16 (rc4_key, 16, j, digest, out);
rc4_next_16 (S, 16, j, digest, out);
COMPARE_M_SIMD (out[0], out[1], out[2], out[3]);
}
@ -354,9 +230,7 @@ KERNEL_FQ void m09810_s04 (KERN_ATTR_ESALT (oldoffice34_t))
* shared
*/
LOCAL_VK RC4_KEY rc4_keys[64];
LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid];
LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE];
/**
* esalt
@ -451,11 +325,11 @@ KERNEL_FQ void m09810_s04 (KERN_ATTR_ESALT (oldoffice34_t))
key[2] = 0;
key[3] = 0;
rc4_init_16 (rc4_key, key);
rc4_init_128 (S, key);
u32 out[4];
u8 j = rc4_next_16 (rc4_key, 0, 0, encryptedVerifier, out);
u8 j = rc4_next_16 (S, 0, 0, encryptedVerifier, out);
w0[0] = hc_swap32 (out[0]);
w0[1] = hc_swap32 (out[1]);
@ -489,7 +363,7 @@ KERNEL_FQ void m09810_s04 (KERN_ATTR_ESALT (oldoffice34_t))
digest[2] = hc_swap32_S (digest[2]);
digest[3] = hc_swap32_S (digest[3]);
rc4_next_16 (rc4_key, 16, j, digest, out);
rc4_next_16 (S, 16, j, digest, out);
COMPARE_S_SIMD (out[0], out[1], out[2], out[3]);
}

@ -13,6 +13,7 @@
#include "inc_common.cl"
#include "inc_simd.cl"
#include "inc_hash_sha1.cl"
#include "inc_cipher_rc4.cl"
#endif
typedef struct oldoffice34
@ -26,130 +27,7 @@ typedef struct oldoffice34
} oldoffice34_t;
typedef struct
{
u8 S[256];
u32 wtf_its_faster;
} RC4_KEY;
DECLSPEC void swap (LOCAL_AS RC4_KEY *rc4_key, const u8 i, const u8 j)
{
u8 tmp;
tmp = rc4_key->S[i];
rc4_key->S[i] = rc4_key->S[j];
rc4_key->S[j] = tmp;
}
DECLSPEC void rc4_init_16 (LOCAL_AS RC4_KEY *rc4_key, const u32 *data)
{
u32 v = 0x03020100;
u32 a = 0x04040404;
LOCAL_AS u32 *ptr = (LOCAL_AS u32 *) rc4_key->S;
#ifdef _unroll
#pragma unroll
#endif
for (u32 i = 0; i < 64; i++)
{
*ptr++ = v; v += a;
}
u32 j = 0;
for (u32 i = 0; i < 16; i++)
{
u32 idx = i * 16;
u32 v;
v = data[0];
j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++;
v = data[1];
j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++;
v = data[2];
j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++;
v = data[3];
j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++;
}
}
DECLSPEC u8 rc4_next_16 (LOCAL_AS RC4_KEY *rc4_key, u8 i, u8 j, const u32 *in, u32 *out)
{
#ifdef _unroll
#pragma unroll
#endif
for (u32 k = 0; k < 4; k++)
{
u32 xor4 = 0;
u8 idx;
i += 1;
j += rc4_key->S[i];
swap (rc4_key, i, j);
idx = rc4_key->S[i] + rc4_key->S[j];
xor4 |= rc4_key->S[idx] << 0;
i += 1;
j += rc4_key->S[i];
swap (rc4_key, i, j);
idx = rc4_key->S[i] + rc4_key->S[j];
xor4 |= rc4_key->S[idx] << 8;
i += 1;
j += rc4_key->S[i];
swap (rc4_key, i, j);
idx = rc4_key->S[i] + rc4_key->S[j];
xor4 |= rc4_key->S[idx] << 16;
i += 1;
j += rc4_key->S[i];
swap (rc4_key, i, j);
idx = rc4_key->S[i] + rc4_key->S[j];
xor4 |= rc4_key->S[idx] << 24;
out[k] = in[k] ^ xor4;
}
return j;
}
DECLSPEC void m09810m (LOCAL_AS RC4_KEY *rc4_keys, u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KERN_ATTR_ESALT (oldoffice34_t))
DECLSPEC void m09810m (LOCAL_AS u32 *S, u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KERN_ATTR_ESALT (oldoffice34_t))
{
/**
* modifier
@ -158,12 +36,6 @@ DECLSPEC void m09810m (LOCAL_AS RC4_KEY *rc4_keys, u32 *w0, u32 *w1, u32 *w2, u3
const u64 gid = get_global_id (0);
const u64 lid = get_local_id (0);
/**
* shared
*/
LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid];
/**
* esalt
*/
@ -194,11 +66,11 @@ DECLSPEC void m09810m (LOCAL_AS RC4_KEY *rc4_keys, u32 *w0, u32 *w1, u32 *w2, u3
key[2] = 0;
key[3] = 0;
rc4_init_16 (rc4_key, key);
rc4_init_128 (S, key);
u32 out[4];
u8 j = rc4_next_16 (rc4_key, 0, 0, encryptedVerifier, out);
u8 j = rc4_next_16 (S, 0, 0, encryptedVerifier, out);
u32 w0_t[4];
u32 w1_t[4];
@ -237,13 +109,13 @@ DECLSPEC void m09810m (LOCAL_AS RC4_KEY *rc4_keys, u32 *w0, u32 *w1, u32 *w2, u3
digest[2] = hc_swap32_S (digest[2]);
digest[3] = hc_swap32_S (digest[3]);
rc4_next_16 (rc4_key, 16, j, digest, out);
rc4_next_16 (S, 16, j, digest, out);
COMPARE_M_SIMD (out[0], out[1], out[2], out[3]);
}
}
DECLSPEC void m09810s (LOCAL_AS RC4_KEY *rc4_keys, u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KERN_ATTR_ESALT (oldoffice34_t))
DECLSPEC void m09810s (LOCAL_AS u32 *S, u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KERN_ATTR_ESALT (oldoffice34_t))
{
/**
* modifier
@ -252,12 +124,6 @@ DECLSPEC void m09810s (LOCAL_AS RC4_KEY *rc4_keys, u32 *w0, u32 *w1, u32 *w2, u3
const u64 gid = get_global_id (0);
const u64 lid = get_local_id (0);
/**
* shared
*/
LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid];
/**
* esalt
*/
@ -300,11 +166,11 @@ DECLSPEC void m09810s (LOCAL_AS RC4_KEY *rc4_keys, u32 *w0, u32 *w1, u32 *w2, u3
key[2] = 0;
key[3] = 0;
rc4_init_16 (rc4_key, key);
rc4_init_128 (S, key);
u32 out[4];
u8 j = rc4_next_16 (rc4_key, 0, 0, encryptedVerifier, out);
u8 j = rc4_next_16 (S, 0, 0, encryptedVerifier, out);
u32 w0_t[4];
u32 w1_t[4];
@ -343,7 +209,7 @@ DECLSPEC void m09810s (LOCAL_AS RC4_KEY *rc4_keys, u32 *w0, u32 *w1, u32 *w2, u3
digest[2] = hc_swap32_S (digest[2]);
digest[3] = hc_swap32_S (digest[3]);
rc4_next_16 (rc4_key, 16, j, digest, out);
rc4_next_16 (S, 16, j, digest, out);
COMPARE_S_SIMD (out[0], out[1], out[2], out[3]);
}
@ -393,9 +259,9 @@ KERNEL_FQ void m09810_m04 (KERN_ATTR_ESALT (oldoffice34_t))
* main
*/
LOCAL_VK RC4_KEY rc4_keys[64];
LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE];
m09810m (rc4_keys, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max);
m09810m (S, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max);
}
KERNEL_FQ void m09810_m08 (KERN_ATTR_ESALT (oldoffice34_t))
@ -442,9 +308,9 @@ KERNEL_FQ void m09810_m08 (KERN_ATTR_ESALT (oldoffice34_t))
* main
*/
LOCAL_VK RC4_KEY rc4_keys[64];
LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE];
m09810m (rc4_keys, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max);
m09810m (S, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max);
}
KERNEL_FQ void m09810_m16 (KERN_ATTR_ESALT (oldoffice34_t))
@ -491,9 +357,9 @@ KERNEL_FQ void m09810_m16 (KERN_ATTR_ESALT (oldoffice34_t))
* main
*/
LOCAL_VK RC4_KEY rc4_keys[64];
LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE];
m09810m (rc4_keys, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max);
m09810m (S, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max);
}
KERNEL_FQ void m09810_s04 (KERN_ATTR_ESALT (oldoffice34_t))
@ -540,9 +406,9 @@ KERNEL_FQ void m09810_s04 (KERN_ATTR_ESALT (oldoffice34_t))
* main
*/
LOCAL_VK RC4_KEY rc4_keys[64];
LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE];
m09810s (rc4_keys, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max);
m09810s (S, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max);
}
KERNEL_FQ void m09810_s08 (KERN_ATTR_ESALT (oldoffice34_t))
@ -589,9 +455,9 @@ KERNEL_FQ void m09810_s08 (KERN_ATTR_ESALT (oldoffice34_t))
* main
*/
LOCAL_VK RC4_KEY rc4_keys[64];
LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE];
m09810s (rc4_keys, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max);
m09810s (S, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max);
}
KERNEL_FQ void m09810_s16 (KERN_ATTR_ESALT (oldoffice34_t))
@ -638,7 +504,7 @@ KERNEL_FQ void m09810_s16 (KERN_ATTR_ESALT (oldoffice34_t))
* main
*/
LOCAL_VK RC4_KEY rc4_keys[64];
LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE];
m09810s (rc4_keys, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max);
m09810s (S, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max);
}

@ -15,6 +15,7 @@
#include "inc_rp_optimized.cl"
#include "inc_simd.cl"
#include "inc_hash_sha1.cl"
#include "inc_cipher_rc4.cl"
#endif
#define MIN_NULL_BYTES 10
@ -30,129 +31,6 @@ typedef struct oldoffice34
} oldoffice34_t;
typedef struct
{
u8 S[256];
u32 wtf_its_faster;
} RC4_KEY;
DECLSPEC void swap (LOCAL_AS RC4_KEY *rc4_key, const u8 i, const u8 j)
{
u8 tmp;
tmp = rc4_key->S[i];
rc4_key->S[i] = rc4_key->S[j];
rc4_key->S[j] = tmp;
}
DECLSPEC void rc4_init_16 (LOCAL_AS RC4_KEY *rc4_key, const u32 *data)
{
u32 v = 0x03020100;
u32 a = 0x04040404;
LOCAL_AS u32 *ptr = (LOCAL_AS u32 *) rc4_key->S;
#ifdef _unroll
#pragma unroll
#endif
for (u32 i = 0; i < 64; i++)
{
*ptr++ = v; v += a;
}
u32 j = 0;
for (u32 i = 0; i < 16; i++)
{
u32 idx = i * 16;
u32 v;
v = data[0];
j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++;
v = data[1];
j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++;
v = data[2];
j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++;
v = data[3];
j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++;
}
}
DECLSPEC u8 rc4_next_16 (LOCAL_AS RC4_KEY *rc4_key, u8 i, u8 j, const u32 *in, u32 *out)
{
#ifdef _unroll
#pragma unroll
#endif
for (u32 k = 0; k < 4; k++)
{
u32 xor4 = 0;
u8 idx;
i += 1;
j += rc4_key->S[i];
swap (rc4_key, i, j);
idx = rc4_key->S[i] + rc4_key->S[j];
xor4 |= rc4_key->S[idx] << 0;
i += 1;
j += rc4_key->S[i];
swap (rc4_key, i, j);
idx = rc4_key->S[i] + rc4_key->S[j];
xor4 |= rc4_key->S[idx] << 8;
i += 1;
j += rc4_key->S[i];
swap (rc4_key, i, j);
idx = rc4_key->S[i] + rc4_key->S[j];
xor4 |= rc4_key->S[idx] << 16;
i += 1;
j += rc4_key->S[i];
swap (rc4_key, i, j);
idx = rc4_key->S[i] + rc4_key->S[j];
xor4 |= rc4_key->S[idx] << 24;
out[k] = in[k] ^ xor4;
}
return j;
}
KERNEL_FQ void m09820_m04 (KERN_ATTR_RULES_ESALT (oldoffice34_t))
{
/**
@ -187,9 +65,7 @@ KERNEL_FQ void m09820_m04 (KERN_ATTR_RULES_ESALT (oldoffice34_t))
* shared
*/
LOCAL_VK RC4_KEY rc4_keys[64];
LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid];
LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE];
/**
* salt
@ -327,7 +203,7 @@ KERNEL_FQ void m09820_m04 (KERN_ATTR_RULES_ESALT (oldoffice34_t))
// second block decrypt:
rc4_init_16 (rc4_key, digest);
rc4_init_128 (S, digest);
u32 secondBlockData[4];
@ -338,7 +214,7 @@ KERNEL_FQ void m09820_m04 (KERN_ATTR_RULES_ESALT (oldoffice34_t))
u32 out[4];
u32 j = rc4_next_16 (rc4_key, 0, 0, secondBlockData, out);
u32 j = rc4_next_16 (S, 0, 0, secondBlockData, out);
int null_bytes = 0;
@ -355,7 +231,7 @@ KERNEL_FQ void m09820_m04 (KERN_ATTR_RULES_ESALT (oldoffice34_t))
secondBlockData[2] = esalt_bufs[DIGESTS_OFFSET].secondBlockData[6];
secondBlockData[3] = esalt_bufs[DIGESTS_OFFSET].secondBlockData[7];
rc4_next_16 (rc4_key, 16, j, secondBlockData, out);
rc4_next_16 (S, 16, j, secondBlockData, out);
for (int k = 0; k < 4; k++)
{
@ -419,9 +295,7 @@ KERNEL_FQ void m09820_s04 (KERN_ATTR_RULES_ESALT (oldoffice34_t))
* shared
*/
LOCAL_VK RC4_KEY rc4_keys[64];
LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid];
LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE];
/**
* salt
@ -570,7 +444,7 @@ KERNEL_FQ void m09820_s04 (KERN_ATTR_RULES_ESALT (oldoffice34_t))
// second block decrypt:
rc4_init_16 (rc4_key, digest);
rc4_init_128 (S, digest);
u32 secondBlockData[4];
@ -581,7 +455,7 @@ KERNEL_FQ void m09820_s04 (KERN_ATTR_RULES_ESALT (oldoffice34_t))
u32 out[4];
u32 j = rc4_next_16 (rc4_key, 0, 0, secondBlockData, out);
u32 j = rc4_next_16 (S, 0, 0, secondBlockData, out);
int null_bytes = 0;
@ -598,7 +472,7 @@ KERNEL_FQ void m09820_s04 (KERN_ATTR_RULES_ESALT (oldoffice34_t))
secondBlockData[2] = esalt_bufs[DIGESTS_OFFSET].secondBlockData[6];
secondBlockData[3] = esalt_bufs[DIGESTS_OFFSET].secondBlockData[7];
rc4_next_16 (rc4_key, 16, j, secondBlockData, out);
rc4_next_16 (S, 16, j, secondBlockData, out);
for (int k = 0; k < 4; k++)
{

@ -13,6 +13,7 @@
#include "inc_common.cl"
#include "inc_simd.cl"
#include "inc_hash_sha1.cl"
#include "inc_cipher_rc4.cl"
#endif
#define MIN_NULL_BYTES 10
@ -28,129 +29,6 @@ typedef struct oldoffice34
} oldoffice34_t;
typedef struct
{
u8 S[256];
u32 wtf_its_faster;
} RC4_KEY;
DECLSPEC void swap (LOCAL_AS RC4_KEY *rc4_key, const u8 i, const u8 j)
{
u8 tmp;
tmp = rc4_key->S[i];
rc4_key->S[i] = rc4_key->S[j];
rc4_key->S[j] = tmp;
}
DECLSPEC void rc4_init_16 (LOCAL_AS RC4_KEY *rc4_key, const u32 *data)
{
u32 v = 0x03020100;
u32 a = 0x04040404;
LOCAL_AS u32 *ptr = (LOCAL_AS u32 *) rc4_key->S;
#ifdef _unroll
#pragma unroll
#endif
for (u32 i = 0; i < 64; i++)
{
*ptr++ = v; v += a;
}
u32 j = 0;
for (u32 i = 0; i < 16; i++)
{
u32 idx = i * 16;
u32 v;
v = data[0];
j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++;
v = data[1];
j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++;
v = data[2];
j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++;
v = data[3];
j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++;
}
}
DECLSPEC u8 rc4_next_16 (LOCAL_AS RC4_KEY *rc4_key, u8 i, u8 j, const u32 *in, u32 *out)
{
#ifdef _unroll
#pragma unroll
#endif
for (u32 k = 0; k < 4; k++)
{
u32 xor4 = 0;
u8 idx;
i += 1;
j += rc4_key->S[i];
swap (rc4_key, i, j);
idx = rc4_key->S[i] + rc4_key->S[j];
xor4 |= rc4_key->S[idx] << 0;
i += 1;
j += rc4_key->S[i];
swap (rc4_key, i, j);
idx = rc4_key->S[i] + rc4_key->S[j];
xor4 |= rc4_key->S[idx] << 8;
i += 1;
j += rc4_key->S[i];
swap (rc4_key, i, j);
idx = rc4_key->S[i] + rc4_key->S[j];
xor4 |= rc4_key->S[idx] << 16;
i += 1;
j += rc4_key->S[i];
swap (rc4_key, i, j);
idx = rc4_key->S[i] + rc4_key->S[j];
xor4 |= rc4_key->S[idx] << 24;
out[k] = in[k] ^ xor4;
}
return j;
}
KERNEL_FQ void m09820_m04 (KERN_ATTR_ESALT (oldoffice34_t))
{
/**
@ -185,9 +63,7 @@ KERNEL_FQ void m09820_m04 (KERN_ATTR_ESALT (oldoffice34_t))
* shared
*/
LOCAL_VK RC4_KEY rc4_keys[64];
LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid];
LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE];
/**
* salt
@ -375,7 +251,7 @@ KERNEL_FQ void m09820_m04 (KERN_ATTR_ESALT (oldoffice34_t))
// second block decrypt:
rc4_init_16 (rc4_key, digest);
rc4_init_128 (S, digest);
u32 secondBlockData[4];
@ -386,7 +262,7 @@ KERNEL_FQ void m09820_m04 (KERN_ATTR_ESALT (oldoffice34_t))
u32 out[4];
u32 j = rc4_next_16 (rc4_key, 0, 0, secondBlockData, out);
u32 j = rc4_next_16 (S, 0, 0, secondBlockData, out);
int null_bytes = 0;
@ -403,7 +279,7 @@ KERNEL_FQ void m09820_m04 (KERN_ATTR_ESALT (oldoffice34_t))
secondBlockData[2] = esalt_bufs[DIGESTS_OFFSET].secondBlockData[6];
secondBlockData[3] = esalt_bufs[DIGESTS_OFFSET].secondBlockData[7];
rc4_next_16 (rc4_key, 16, j, secondBlockData, out);
rc4_next_16 (S, 16, j, secondBlockData, out);
for (int k = 0; k < 4; k++)
{
@ -467,9 +343,7 @@ KERNEL_FQ void m09820_s04 (KERN_ATTR_ESALT (oldoffice34_t))
* shared
*/
LOCAL_VK RC4_KEY rc4_keys[64];
LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid];
LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE];
/**
* salt
@ -668,7 +542,7 @@ KERNEL_FQ void m09820_s04 (KERN_ATTR_ESALT (oldoffice34_t))
// second block decrypt:
rc4_init_16 (rc4_key, digest);
rc4_init_128 (S, digest);
u32 secondBlockData[4];
@ -679,7 +553,7 @@ KERNEL_FQ void m09820_s04 (KERN_ATTR_ESALT (oldoffice34_t))
u32 out[4];
u32 j = rc4_next_16 (rc4_key, 0, 0, secondBlockData, out);
u32 j = rc4_next_16 (S, 0, 0, secondBlockData, out);
int null_bytes = 0;
@ -696,7 +570,7 @@ KERNEL_FQ void m09820_s04 (KERN_ATTR_ESALT (oldoffice34_t))
secondBlockData[2] = esalt_bufs[DIGESTS_OFFSET].secondBlockData[6];
secondBlockData[3] = esalt_bufs[DIGESTS_OFFSET].secondBlockData[7];
rc4_next_16 (rc4_key, 16, j, secondBlockData, out);
rc4_next_16 (S, 16, j, secondBlockData, out);
for (int k = 0; k < 4; k++)
{

@ -13,6 +13,7 @@
#include "inc_common.cl"
#include "inc_simd.cl"
#include "inc_hash_sha1.cl"
#include "inc_cipher_rc4.cl"
#endif
#define MIN_NULL_BYTES 10
@ -28,130 +29,7 @@ typedef struct oldoffice34
} oldoffice34_t;
typedef struct
{
u8 S[256];
u32 wtf_its_faster;
} RC4_KEY;
DECLSPEC void swap (LOCAL_AS RC4_KEY *rc4_key, const u8 i, const u8 j)
{
u8 tmp;
tmp = rc4_key->S[i];
rc4_key->S[i] = rc4_key->S[j];
rc4_key->S[j] = tmp;
}
DECLSPEC void rc4_init_16 (LOCAL_AS RC4_KEY *rc4_key, const u32 *data)
{
u32 v = 0x03020100;
u32 a = 0x04040404;
LOCAL_AS u32 *ptr = (LOCAL_AS u32 *) rc4_key->S;
#ifdef _unroll
#pragma unroll
#endif
for (u32 i = 0; i < 64; i++)
{
*ptr++ = v; v += a;
}
u32 j = 0;
for (u32 i = 0; i < 16; i++)
{
u32 idx = i * 16;
u32 v;
v = data[0];
j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++;
v = data[1];
j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++;
v = data[2];
j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++;
v = data[3];
j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++;
}
}
DECLSPEC u8 rc4_next_16 (LOCAL_AS RC4_KEY *rc4_key, u8 i, u8 j, const u32 *in, u32 *out)
{
#ifdef _unroll
#pragma unroll
#endif
for (u32 k = 0; k < 4; k++)
{
u32 xor4 = 0;
u8 idx;
i += 1;
j += rc4_key->S[i];
swap (rc4_key, i, j);
idx = rc4_key->S[i] + rc4_key->S[j];
xor4 |= rc4_key->S[idx] << 0;
i += 1;
j += rc4_key->S[i];
swap (rc4_key, i, j);
idx = rc4_key->S[i] + rc4_key->S[j];
xor4 |= rc4_key->S[idx] << 8;
i += 1;
j += rc4_key->S[i];
swap (rc4_key, i, j);
idx = rc4_key->S[i] + rc4_key->S[j];
xor4 |= rc4_key->S[idx] << 16;
i += 1;
j += rc4_key->S[i];
swap (rc4_key, i, j);
idx = rc4_key->S[i] + rc4_key->S[j];
xor4 |= rc4_key->S[idx] << 24;
out[k] = in[k] ^ xor4;
}
return j;
}
DECLSPEC void m09820m (LOCAL_AS RC4_KEY *rc4_keys, u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KERN_ATTR_ESALT (oldoffice34_t))
DECLSPEC void m09820m (LOCAL_AS u32 *S, u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KERN_ATTR_ESALT (oldoffice34_t))
{
/**
* modifier
@ -160,12 +38,6 @@ DECLSPEC void m09820m (LOCAL_AS RC4_KEY *rc4_keys, u32 *w0, u32 *w1, u32 *w2, u3
const u64 gid = get_global_id (0);
const u64 lid = get_local_id (0);
/**
* shared
*/
LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid];
/**
* salt
*/
@ -299,7 +171,7 @@ DECLSPEC void m09820m (LOCAL_AS RC4_KEY *rc4_keys, u32 *w0, u32 *w1, u32 *w2, u3
// second block decrypt:
rc4_init_16 (rc4_key, digest);
rc4_init_128 (S, digest);
u32 secondBlockData[4];
@ -310,7 +182,7 @@ DECLSPEC void m09820m (LOCAL_AS RC4_KEY *rc4_keys, u32 *w0, u32 *w1, u32 *w2, u3
u32 out[4];
u32 j = rc4_next_16 (rc4_key, 0, 0, secondBlockData, out);
u32 j = rc4_next_16 (S, 0, 0, secondBlockData, out);
int null_bytes = 0;
@ -327,7 +199,7 @@ DECLSPEC void m09820m (LOCAL_AS RC4_KEY *rc4_keys, u32 *w0, u32 *w1, u32 *w2, u3
secondBlockData[2] = esalt_bufs[DIGESTS_OFFSET].secondBlockData[6];
secondBlockData[3] = esalt_bufs[DIGESTS_OFFSET].secondBlockData[7];
rc4_next_16 (rc4_key, 16, j, secondBlockData, out);
rc4_next_16 (S, 16, j, secondBlockData, out);
for (int k = 0; k < 4; k++)
{
@ -349,7 +221,7 @@ DECLSPEC void m09820m (LOCAL_AS RC4_KEY *rc4_keys, u32 *w0, u32 *w1, u32 *w2, u3
}
}
DECLSPEC void m09820s (LOCAL_AS RC4_KEY *rc4_keys, u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KERN_ATTR_ESALT (oldoffice34_t))
DECLSPEC void m09820s (LOCAL_AS u32 *S, u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KERN_ATTR_ESALT (oldoffice34_t))
{
/**
* modifier
@ -358,12 +230,6 @@ DECLSPEC void m09820s (LOCAL_AS RC4_KEY *rc4_keys, u32 *w0, u32 *w1, u32 *w2, u3
const u64 gid = get_global_id (0);
const u64 lid = get_local_id (0);
/**
* shared
*/
LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid];
/**
* salt
*/
@ -508,7 +374,7 @@ DECLSPEC void m09820s (LOCAL_AS RC4_KEY *rc4_keys, u32 *w0, u32 *w1, u32 *w2, u3
// second block decrypt:
rc4_init_16 (rc4_key, digest);
rc4_init_128 (S, digest);
u32 secondBlockData[4];
@ -519,7 +385,7 @@ DECLSPEC void m09820s (LOCAL_AS RC4_KEY *rc4_keys, u32 *w0, u32 *w1, u32 *w2, u3
u32 out[4];
u32 j = rc4_next_16 (rc4_key, 0, 0, secondBlockData, out);
u32 j = rc4_next_16 (S, 0, 0, secondBlockData, out);
int null_bytes = 0;
@ -536,7 +402,7 @@ DECLSPEC void m09820s (LOCAL_AS RC4_KEY *rc4_keys, u32 *w0, u32 *w1, u32 *w2, u3
secondBlockData[2] = esalt_bufs[DIGESTS_OFFSET].secondBlockData[6];
secondBlockData[3] = esalt_bufs[DIGESTS_OFFSET].secondBlockData[7];
rc4_next_16 (rc4_key, 16, j, secondBlockData, out);
rc4_next_16 (S, 16, j, secondBlockData, out);
for (int k = 0; k < 4; k++)
{
@ -600,9 +466,9 @@ KERNEL_FQ void m09820_m04 (KERN_ATTR_ESALT (oldoffice34_t))
* main
*/
LOCAL_VK RC4_KEY rc4_keys[64];
LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE];
m09820m (rc4_keys, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max);
m09820m (S, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max);
}
KERNEL_FQ void m09820_m08 (KERN_ATTR_ESALT (oldoffice34_t))
@ -649,9 +515,9 @@ KERNEL_FQ void m09820_m08 (KERN_ATTR_ESALT (oldoffice34_t))
* main
*/
LOCAL_VK RC4_KEY rc4_keys[64];
LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE];
m09820m (rc4_keys, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max);
m09820m (S, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max);
}
KERNEL_FQ void m09820_m16 (KERN_ATTR_ESALT (oldoffice34_t))
@ -698,9 +564,9 @@ KERNEL_FQ void m09820_m16 (KERN_ATTR_ESALT (oldoffice34_t))
* main
*/
LOCAL_VK RC4_KEY rc4_keys[64];
LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE];
m09820m (rc4_keys, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max);
m09820m (S, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max);
}
KERNEL_FQ void m09820_s04 (KERN_ATTR_ESALT (oldoffice34_t))
@ -747,9 +613,9 @@ KERNEL_FQ void m09820_s04 (KERN_ATTR_ESALT (oldoffice34_t))
* main
*/
LOCAL_VK RC4_KEY rc4_keys[64];
LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE];
m09820s (rc4_keys, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max);
m09820s (S, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max);
}
KERNEL_FQ void m09820_s08 (KERN_ATTR_ESALT (oldoffice34_t))
@ -796,9 +662,9 @@ KERNEL_FQ void m09820_s08 (KERN_ATTR_ESALT (oldoffice34_t))
* main
*/
LOCAL_VK RC4_KEY rc4_keys[64];
LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE];
m09820s (rc4_keys, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max);
m09820s (S, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max);
}
KERNEL_FQ void m09820_s16 (KERN_ATTR_ESALT (oldoffice34_t))
@ -845,7 +711,7 @@ KERNEL_FQ void m09820_s16 (KERN_ATTR_ESALT (oldoffice34_t))
* main
*/
LOCAL_VK RC4_KEY rc4_keys[64];
LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE];
m09820s (rc4_keys, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max);
m09820s (S, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max);
}

@ -9,23 +9,12 @@
#include "inc_platform.cl"
#include "inc_common.cl"
#include "inc_hash_md5.cl"
#include "inc_cipher_rc4.cl"
#endif
#define COMPARE_S "inc_comp_single.cl"
#define COMPARE_M "inc_comp_multi.cl"
CONSTANT_VK u32a padding[8] =
{
0x5e4ebf28,
0x418a754e,
0x564e0064,
0x0801faff,
0xb6002e2e,
0x803e68d0,
0xfea90c2f,
0x7a695364
};
typedef struct pdf
{
int V;
@ -54,132 +43,6 @@ typedef struct pdf14_tmp
} pdf14_tmp_t;
typedef struct
{
u8 S[256];
u32 wtf_its_faster;
} RC4_KEY;
DECLSPEC void swap (LOCAL_AS RC4_KEY *rc4_key, const u8 i, const u8 j)
{
u8 tmp;
tmp = rc4_key->S[i];
rc4_key->S[i] = rc4_key->S[j];
rc4_key->S[j] = tmp;
}
DECLSPEC void rc4_init_16 (LOCAL_AS RC4_KEY *rc4_key, const u32 *data)
{
u32 v = 0x03020100;
u32 a = 0x04040404;
LOCAL_AS u32 *ptr = (LOCAL_AS u32 *) rc4_key->S;
#ifdef _unroll
#pragma unroll
#endif
for (u32 i = 0; i < 64; i++)
{
*ptr++ = v; v += a;
}
u32 j = 0;
#ifdef _unroll
#pragma unroll
#endif
for (u32 i = 0; i < 16; i++)
{
u32 idx = i * 16;
u32 v;
v = data[0];
j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++;
v = data[1];
j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++;
v = data[2];
j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++;
v = data[3];
j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++;
}
}
DECLSPEC u8 rc4_next_16 (LOCAL_AS RC4_KEY *rc4_key, u8 i, u8 j, const u32 *in, u32 *out)
{
#ifdef _unroll
#pragma unroll
#endif
for (u32 k = 0; k < 4; k++)
{
u32 xor4 = 0;
u8 idx;
i += 1;
j += rc4_key->S[i];
swap (rc4_key, i, j);
idx = rc4_key->S[i] + rc4_key->S[j];
xor4 |= rc4_key->S[idx] << 0;
i += 1;
j += rc4_key->S[i];
swap (rc4_key, i, j);
idx = rc4_key->S[i] + rc4_key->S[j];
xor4 |= rc4_key->S[idx] << 8;
i += 1;
j += rc4_key->S[i];
swap (rc4_key, i, j);
idx = rc4_key->S[i] + rc4_key->S[j];
xor4 |= rc4_key->S[idx] << 16;
i += 1;
j += rc4_key->S[i];
swap (rc4_key, i, j);
idx = rc4_key->S[i] + rc4_key->S[j];
xor4 |= rc4_key->S[idx] << 24;
out[k] = in[k] ^ xor4;
}
return j;
}
KERNEL_FQ void m10500_init (KERN_ATTR_TMPS_ESALT (pdf14_tmp_t, pdf_t))
{
/**
@ -207,13 +70,6 @@ KERNEL_FQ void m10500_init (KERN_ATTR_TMPS_ESALT (pdf14_tmp_t, pdf_t))
const u32 pw_len = pws[gid].pw_len;
/**
* shared
*/
//LOCAL_AS RC4_KEY rc4_keys[64];
//LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid];
/**
* U_buf
*/
@ -283,6 +139,18 @@ KERNEL_FQ void m10500_init (KERN_ATTR_TMPS_ESALT (pdf14_tmp_t, pdf_t))
// max length supported by pdf11 is 32
const u32 padding[8] =
{
0x5e4ebf28,
0x418a754e,
0x564e0064,
0x0801faff,
0xb6002e2e,
0x803e68d0,
0xfea90c2f,
0x7a695364
};
w0_t[0] = padding[0];
w0_t[1] = padding[1];
w0_t[2] = padding[2];
@ -377,9 +245,7 @@ KERNEL_FQ void m10500_loop (KERN_ATTR_TMPS_ESALT (pdf14_tmp_t, pdf_t))
* shared
*/
LOCAL_VK RC4_KEY rc4_keys[64];
LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid];
LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE];
/**
* loop
@ -448,9 +314,9 @@ KERNEL_FQ void m10500_loop (KERN_ATTR_TMPS_ESALT (pdf14_tmp_t, pdf_t))
tmp[2] = digest[2] ^ xv;
tmp[3] = digest[3] ^ xv;
rc4_init_16 (rc4_key, tmp);
rc4_init_128 (S, tmp);
rc4_next_16 (rc4_key, 0, 0, out, out);
rc4_next_16 (S, 0, 0, out, out);
}
}

@ -16,6 +16,7 @@
#include "inc_simd.cl"
#include "inc_hash_md4.cl"
#include "inc_hash_md5.cl"
#include "inc_cipher_rc4.cl"
#endif
typedef struct krb5tgs
@ -27,129 +28,6 @@ typedef struct krb5tgs
} krb5tgs_t;
typedef struct
{
u8 S[256];
u32 wtf_its_faster;
} RC4_KEY;
DECLSPEC void swap (LOCAL_AS RC4_KEY *rc4_key, const u8 i, const u8 j)
{
u8 tmp;
tmp = rc4_key->S[i];
rc4_key->S[i] = rc4_key->S[j];
rc4_key->S[j] = tmp;
}
DECLSPEC void rc4_init_16 (LOCAL_AS RC4_KEY *rc4_key, const u32 *data)
{
u32 v = 0x03020100;
u32 a = 0x04040404;
LOCAL_AS u32 *ptr = (LOCAL_AS u32 *) rc4_key->S;
#ifdef _unroll
#pragma unroll
#endif
for (u32 i = 0; i < 64; i++)
{
*ptr++ = v; v += a;
}
u32 j = 0;
for (u32 i = 0; i < 16; i++)
{
u32 idx = i * 16;
u32 v;
v = data[0];
j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++;
v = data[1];
j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++;
v = data[2];
j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++;
v = data[3];
j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++;
}
}
DECLSPEC u8 rc4_next_16 (LOCAL_AS RC4_KEY *rc4_key, u8 i, u8 j, GLOBAL_AS const u32 *in, u32 *out)
{
#ifdef _unroll
#pragma unroll
#endif
for (u32 k = 0; k < 4; k++)
{
u32 xor4 = 0;
u8 idx;
i += 1;
j += rc4_key->S[i];
swap (rc4_key, i, j);
idx = rc4_key->S[i] + rc4_key->S[j];
xor4 |= rc4_key->S[idx] << 0;
i += 1;
j += rc4_key->S[i];
swap (rc4_key, i, j);
idx = rc4_key->S[i] + rc4_key->S[j];
xor4 |= rc4_key->S[idx] << 8;
i += 1;
j += rc4_key->S[i];
swap (rc4_key, i, j);
idx = rc4_key->S[i] + rc4_key->S[j];
xor4 |= rc4_key->S[idx] << 16;
i += 1;
j += rc4_key->S[i];
swap (rc4_key, i, j);
idx = rc4_key->S[i] + rc4_key->S[j];
xor4 |= rc4_key->S[idx] << 24;
out[k] = in[k] ^ xor4;
}
return j;
}
DECLSPEC void hmac_md5_pad (u32 *w0, u32 *w1, u32 *w2, u32 *w3, u32 *ipad, u32 *opad)
{
w0[0] = w0[0] ^ 0x36363636;
@ -235,9 +113,9 @@ DECLSPEC void hmac_md5_run (u32 *w0, u32 *w1, u32 *w2, u32 *w3, u32 *ipad, u32 *
md5_transform (w0, w1, w2, w3, digest);
}
DECLSPEC int decrypt_and_check (LOCAL_AS RC4_KEY *rc4_key, u32 *data, GLOBAL_AS const u32 *edata2, const u32 edata2_len, const u32 *K2, const u32 *checksum)
DECLSPEC int decrypt_and_check (LOCAL_AS u32 *S, u32 *data, GLOBAL_AS const u32 *edata2, const u32 edata2_len, const u32 *K2, const u32 *checksum)
{
rc4_init_16 (rc4_key, data);
rc4_init_128 (S, data);
u32 out0[4];
u32 out1[4];
@ -256,15 +134,15 @@ DECLSPEC int decrypt_and_check (LOCAL_AS RC4_KEY *rc4_key, u32 *data, GLOBAL_AS
next headers follow the same ASN1 "type-length-data" scheme
*/
j = rc4_next_16 (rc4_key, i, j, edata2 + 0, out0); i += 16;
j = rc4_next_16 (S, i, j, edata2 + 0, out0); i += 16;
if (((out0[2] & 0xff00ffff) != 0x30008163) && ((out0[2] & 0x0000ffff) != 0x00008263)) return 0;
j = rc4_next_16 (rc4_key, i, j, edata2 + 4, out1); i += 16;
j = rc4_next_16 (S, i, j, edata2 + 4, out1); i += 16;
if (((out1[0] & 0x00ffffff) != 0x00000503) && (out1[0] != 0x050307A0)) return 0;
rc4_init_16 (rc4_key, data);
rc4_init_128 (S, data);
i = 0;
j = 0;
@ -302,10 +180,10 @@ DECLSPEC int decrypt_and_check (LOCAL_AS RC4_KEY *rc4_key, u32 *data, GLOBAL_AS
for (edata2_left = edata2_len; edata2_left >= 64; edata2_left -= 64)
{
j = rc4_next_16 (rc4_key, i, j, edata2, w0); i += 16; edata2 += 4;
j = rc4_next_16 (rc4_key, i, j, edata2, w1); i += 16; edata2 += 4;
j = rc4_next_16 (rc4_key, i, j, edata2, w2); i += 16; edata2 += 4;
j = rc4_next_16 (rc4_key, i, j, edata2, w3); i += 16; edata2 += 4;
j = rc4_next_16 (S, i, j, edata2, w0); i += 16; edata2 += 4;
j = rc4_next_16 (S, i, j, edata2, w1); i += 16; edata2 += 4;
j = rc4_next_16 (S, i, j, edata2, w2); i += 16; edata2 += 4;
j = rc4_next_16 (S, i, j, edata2, w3); i += 16; edata2 += 4;
md5_transform (w0, w1, w2, w3, ipad);
}
@ -329,7 +207,7 @@ DECLSPEC int decrypt_and_check (LOCAL_AS RC4_KEY *rc4_key, u32 *data, GLOBAL_AS
if (edata2_left < 16)
{
j = rc4_next_16 (rc4_key, i, j, edata2, w0); i += 16; edata2 += 4;
j = rc4_next_16 (S, i, j, edata2, w0); i += 16; edata2 += 4;
truncate_block_4x4_le_S (w0, edata2_left & 0xf);
@ -342,8 +220,8 @@ DECLSPEC int decrypt_and_check (LOCAL_AS RC4_KEY *rc4_key, u32 *data, GLOBAL_AS
}
else if (edata2_left < 32)
{
j = rc4_next_16 (rc4_key, i, j, edata2, w0); i += 16; edata2 += 4;
j = rc4_next_16 (rc4_key, i, j, edata2, w1); i += 16; edata2 += 4;
j = rc4_next_16 (S, i, j, edata2, w0); i += 16; edata2 += 4;
j = rc4_next_16 (S, i, j, edata2, w1); i += 16; edata2 += 4;
truncate_block_4x4_le_S (w1, edata2_left & 0xf);
@ -356,9 +234,9 @@ DECLSPEC int decrypt_and_check (LOCAL_AS RC4_KEY *rc4_key, u32 *data, GLOBAL_AS
}
else if (edata2_left < 48)
{
j = rc4_next_16 (rc4_key, i, j, edata2, w0); i += 16; edata2 += 4;
j = rc4_next_16 (rc4_key, i, j, edata2, w1); i += 16; edata2 += 4;
j = rc4_next_16 (rc4_key, i, j, edata2, w2); i += 16; edata2 += 4;
j = rc4_next_16 (S, i, j, edata2, w0); i += 16; edata2 += 4;
j = rc4_next_16 (S, i, j, edata2, w1); i += 16; edata2 += 4;
j = rc4_next_16 (S, i, j, edata2, w2); i += 16; edata2 += 4;
truncate_block_4x4_le_S (w2, edata2_left & 0xf);
@ -371,10 +249,10 @@ DECLSPEC int decrypt_and_check (LOCAL_AS RC4_KEY *rc4_key, u32 *data, GLOBAL_AS
}
else
{
j = rc4_next_16 (rc4_key, i, j, edata2, w0); i += 16; edata2 += 4;
j = rc4_next_16 (rc4_key, i, j, edata2, w1); i += 16; edata2 += 4;
j = rc4_next_16 (rc4_key, i, j, edata2, w2); i += 16; edata2 += 4;
j = rc4_next_16 (rc4_key, i, j, edata2, w3); i += 16; edata2 += 4;
j = rc4_next_16 (S, i, j, edata2, w0); i += 16; edata2 += 4;
j = rc4_next_16 (S, i, j, edata2, w1); i += 16; edata2 += 4;
j = rc4_next_16 (S, i, j, edata2, w2); i += 16; edata2 += 4;
j = rc4_next_16 (S, i, j, edata2, w3); i += 16; edata2 += 4;
truncate_block_4x4_le_S (w3, edata2_left & 0xf);
@ -610,9 +488,7 @@ KERNEL_FQ void m13100_m04 (KERN_ATTR_RULES_ESALT (krb5tgs_t))
* shared
*/
LOCAL_VK RC4_KEY rc4_keys[64];
LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid];
LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE];
/**
* salt
@ -655,7 +531,7 @@ KERNEL_FQ void m13100_m04 (KERN_ATTR_RULES_ESALT (krb5tgs_t))
tmp[2] = digest[2];
tmp[3] = digest[3];
if (decrypt_and_check (rc4_key, tmp, esalt_bufs[DIGESTS_OFFSET].edata2, esalt_bufs[DIGESTS_OFFSET].edata2_len, K2, checksum) == 1)
if (decrypt_and_check (S, tmp, esalt_bufs[DIGESTS_OFFSET].edata2, esalt_bufs[DIGESTS_OFFSET].edata2_len, K2, checksum) == 1)
{
if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0)
{
@ -708,9 +584,7 @@ KERNEL_FQ void m13100_s04 (KERN_ATTR_RULES_ESALT (krb5tgs_t))
* shared
*/
LOCAL_VK RC4_KEY rc4_keys[64];
LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid];
LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE];
/**
* salt
@ -753,7 +627,7 @@ KERNEL_FQ void m13100_s04 (KERN_ATTR_RULES_ESALT (krb5tgs_t))
tmp[2] = digest[2];
tmp[3] = digest[3];
if (decrypt_and_check (rc4_key, tmp, esalt_bufs[DIGESTS_OFFSET].edata2, esalt_bufs[DIGESTS_OFFSET].edata2_len, K2, checksum) == 1)
if (decrypt_and_check (S, tmp, esalt_bufs[DIGESTS_OFFSET].edata2, esalt_bufs[DIGESTS_OFFSET].edata2_len, K2, checksum) == 1)
{
if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0)
{

@ -15,6 +15,7 @@
#include "inc_rp.cl"
#include "inc_hash_md4.cl"
#include "inc_hash_md5.cl"
#include "inc_cipher_rc4.cl"
#endif
typedef struct krb5tgs
@ -26,132 +27,9 @@ typedef struct krb5tgs
} krb5tgs_t;
typedef struct
DECLSPEC int decrypt_and_check (LOCAL_AS u32 *S, u32 *data, GLOBAL_AS const u32 *edata2, const u32 edata2_len, const u32 *K2, const u32 *checksum)
{
u8 S[256];
u32 wtf_its_faster;
} RC4_KEY;
DECLSPEC void swap (LOCAL_AS RC4_KEY *rc4_key, const u8 i, const u8 j)
{
u8 tmp;
tmp = rc4_key->S[i];
rc4_key->S[i] = rc4_key->S[j];
rc4_key->S[j] = tmp;
}
DECLSPEC void rc4_init_16 (LOCAL_AS RC4_KEY *rc4_key, const u32 *data)
{
u32 v = 0x03020100;
u32 a = 0x04040404;
LOCAL_AS u32 *ptr = (LOCAL_AS u32 *) rc4_key->S;
#ifdef _unroll
#pragma unroll
#endif
for (u32 i = 0; i < 64; i++)
{
*ptr++ = v; v += a;
}
u32 j = 0;
for (u32 i = 0; i < 16; i++)
{
u32 idx = i * 16;
u32 v;
v = data[0];
j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++;
v = data[1];
j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++;
v = data[2];
j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++;
v = data[3];
j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++;
}
}
DECLSPEC u8 rc4_next_16 (LOCAL_AS RC4_KEY *rc4_key, u8 i, u8 j, GLOBAL_AS const u32 *in, u32 *out)
{
#ifdef _unroll
#pragma unroll
#endif
for (u32 k = 0; k < 4; k++)
{
u32 xor4 = 0;
u8 idx;
i += 1;
j += rc4_key->S[i];
swap (rc4_key, i, j);
idx = rc4_key->S[i] + rc4_key->S[j];
xor4 |= rc4_key->S[idx] << 0;
i += 1;
j += rc4_key->S[i];
swap (rc4_key, i, j);
idx = rc4_key->S[i] + rc4_key->S[j];
xor4 |= rc4_key->S[idx] << 8;
i += 1;
j += rc4_key->S[i];
swap (rc4_key, i, j);
idx = rc4_key->S[i] + rc4_key->S[j];
xor4 |= rc4_key->S[idx] << 16;
i += 1;
j += rc4_key->S[i];
swap (rc4_key, i, j);
idx = rc4_key->S[i] + rc4_key->S[j];
xor4 |= rc4_key->S[idx] << 24;
out[k] = in[k] ^ xor4;
}
return j;
}
DECLSPEC int decrypt_and_check (LOCAL_AS RC4_KEY *rc4_key, u32 *data, GLOBAL_AS const u32 *edata2, const u32 edata2_len, const u32 *K2, const u32 *checksum)
{
rc4_init_16 (rc4_key, data);
rc4_init_128 (S, data);
u32 out0[4];
u32 out1[4];
@ -170,15 +48,15 @@ DECLSPEC int decrypt_and_check (LOCAL_AS RC4_KEY *rc4_key, u32 *data, GLOBAL_AS
next headers follow the same ASN1 "type-length-data" scheme
*/
j = rc4_next_16 (rc4_key, i, j, edata2 + 0, out0); i += 16;
j = rc4_next_16 (S, i, j, edata2 + 0, out0); i += 16;
if (((out0[2] & 0xff00ffff) != 0x30008163) && ((out0[2] & 0x0000ffff) != 0x00008263)) return 0;
j = rc4_next_16 (rc4_key, i, j, edata2 + 4, out1); i += 16;
j = rc4_next_16 (S, i, j, edata2 + 4, out1); i += 16;
if (((out1[0] & 0x00ffffff) != 0x00000503) && (out1[0] != 0x050307A0)) return 0;
rc4_init_16 (rc4_key, data);
rc4_init_128 (S, data);
i = 0;
j = 0;
@ -215,10 +93,10 @@ DECLSPEC int decrypt_and_check (LOCAL_AS RC4_KEY *rc4_key, u32 *data, GLOBAL_AS
for (edata2_left = edata2_len; edata2_left >= 64; edata2_left -= 64)
{
j = rc4_next_16 (rc4_key, i, j, edata2, w0); i += 16; edata2 += 4;
j = rc4_next_16 (rc4_key, i, j, edata2, w1); i += 16; edata2 += 4;
j = rc4_next_16 (rc4_key, i, j, edata2, w2); i += 16; edata2 += 4;
j = rc4_next_16 (rc4_key, i, j, edata2, w3); i += 16; edata2 += 4;
j = rc4_next_16 (S, i, j, edata2, w0); i += 16; edata2 += 4;
j = rc4_next_16 (S, i, j, edata2, w1); i += 16; edata2 += 4;
j = rc4_next_16 (S, i, j, edata2, w2); i += 16; edata2 += 4;
j = rc4_next_16 (S, i, j, edata2, w3); i += 16; edata2 += 4;
md5_hmac_update_64 (&ctx, w0, w1, w2, w3, 64);
}
@ -242,31 +120,31 @@ DECLSPEC int decrypt_and_check (LOCAL_AS RC4_KEY *rc4_key, u32 *data, GLOBAL_AS
if (edata2_left < 16)
{
j = rc4_next_16 (rc4_key, i, j, edata2, w0); i += 16; edata2 += 4;
j = rc4_next_16 (S, i, j, edata2, w0); i += 16; edata2 += 4;
truncate_block_4x4_le_S (w0, edata2_left & 0xf);
}
else if (edata2_left < 32)
{
j = rc4_next_16 (rc4_key, i, j, edata2, w0); i += 16; edata2 += 4;
j = rc4_next_16 (rc4_key, i, j, edata2, w1); i += 16; edata2 += 4;
j = rc4_next_16 (S, i, j, edata2, w0); i += 16; edata2 += 4;
j = rc4_next_16 (S, i, j, edata2, w1); i += 16; edata2 += 4;
truncate_block_4x4_le_S (w1, edata2_left & 0xf);
}
else if (edata2_left < 48)
{
j = rc4_next_16 (rc4_key, i, j, edata2, w0); i += 16; edata2 += 4;
j = rc4_next_16 (rc4_key, i, j, edata2, w1); i += 16; edata2 += 4;
j = rc4_next_16 (rc4_key, i, j, edata2, w2); i += 16; edata2 += 4;
j = rc4_next_16 (S, i, j, edata2, w0); i += 16; edata2 += 4;
j = rc4_next_16 (S, i, j, edata2, w1); i += 16; edata2 += 4;
j = rc4_next_16 (S, i, j, edata2, w2); i += 16; edata2 += 4;
truncate_block_4x4_le_S (w2, edata2_left & 0xf);
}
else
{
j = rc4_next_16 (rc4_key, i, j, edata2, w0); i += 16; edata2 += 4;
j = rc4_next_16 (rc4_key, i, j, edata2, w1); i += 16; edata2 += 4;
j = rc4_next_16 (rc4_key, i, j, edata2, w2); i += 16; edata2 += 4;
j = rc4_next_16 (rc4_key, i, j, edata2, w3); i += 16; edata2 += 4;
j = rc4_next_16 (S, i, j, edata2, w0); i += 16; edata2 += 4;
j = rc4_next_16 (S, i, j, edata2, w1); i += 16; edata2 += 4;
j = rc4_next_16 (S, i, j, edata2, w2); i += 16; edata2 += 4;
j = rc4_next_16 (S, i, j, edata2, w3); i += 16; edata2 += 4;
truncate_block_4x4_le_S (w3, edata2_left & 0xf);
}
@ -404,9 +282,7 @@ KERNEL_FQ void m13100_mxx (KERN_ATTR_RULES_ESALT (krb5tgs_t))
COPY_PW (pws[gid]);
LOCAL_VK RC4_KEY rc4_keys[64];
LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid];
LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE];
u32 checksum[4];
@ -439,7 +315,7 @@ KERNEL_FQ void m13100_mxx (KERN_ATTR_RULES_ESALT (krb5tgs_t))
kerb_prepare (ctx.h, checksum, digest, K2);
if (decrypt_and_check (rc4_key, digest, esalt_bufs[DIGESTS_OFFSET].edata2, esalt_bufs[DIGESTS_OFFSET].edata2_len, K2, checksum) == 1)
if (decrypt_and_check (S, digest, esalt_bufs[DIGESTS_OFFSET].edata2, esalt_bufs[DIGESTS_OFFSET].edata2_len, K2, checksum) == 1)
{
if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0)
{
@ -466,9 +342,7 @@ KERNEL_FQ void m13100_sxx (KERN_ATTR_RULES_ESALT (krb5tgs_t))
COPY_PW (pws[gid]);
LOCAL_VK RC4_KEY rc4_keys[64];
LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid];
LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE];
u32 checksum[4];
@ -501,7 +375,7 @@ KERNEL_FQ void m13100_sxx (KERN_ATTR_RULES_ESALT (krb5tgs_t))
kerb_prepare (ctx.h, checksum, digest, K2);
if (decrypt_and_check (rc4_key, digest, esalt_bufs[DIGESTS_OFFSET].edata2, esalt_bufs[DIGESTS_OFFSET].edata2_len, K2, checksum) == 1)
if (decrypt_and_check (S, digest, esalt_bufs[DIGESTS_OFFSET].edata2, esalt_bufs[DIGESTS_OFFSET].edata2_len, K2, checksum) == 1)
{
if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0)
{

@ -14,6 +14,7 @@
#include "inc_simd.cl"
#include "inc_hash_md4.cl"
#include "inc_hash_md5.cl"
#include "inc_cipher_rc4.cl"
#endif
typedef struct krb5tgs
@ -25,129 +26,6 @@ typedef struct krb5tgs
} krb5tgs_t;
typedef struct
{
u8 S[256];
u32 wtf_its_faster;
} RC4_KEY;
DECLSPEC void swap (LOCAL_AS RC4_KEY *rc4_key, const u8 i, const u8 j)
{
u8 tmp;
tmp = rc4_key->S[i];
rc4_key->S[i] = rc4_key->S[j];
rc4_key->S[j] = tmp;
}
DECLSPEC void rc4_init_16 (LOCAL_AS RC4_KEY *rc4_key, const u32 *data)
{
u32 v = 0x03020100;
u32 a = 0x04040404;
LOCAL_AS u32 *ptr = (LOCAL_AS u32 *) rc4_key->S;
#ifdef _unroll
#pragma unroll
#endif
for (u32 i = 0; i < 64; i++)
{
*ptr++ = v; v += a;
}
u32 j = 0;
for (u32 i = 0; i < 16; i++)
{
u32 idx = i * 16;
u32 v;
v = data[0];
j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++;
v = data[1];
j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++;
v = data[2];
j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++;
v = data[3];
j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++;
}
}
DECLSPEC u8 rc4_next_16 (LOCAL_AS RC4_KEY *rc4_key, u8 i, u8 j, GLOBAL_AS const u32 *in, u32 *out)
{
#ifdef _unroll
#pragma unroll
#endif
for (u32 k = 0; k < 4; k++)
{
u32 xor4 = 0;
u8 idx;
i += 1;
j += rc4_key->S[i];
swap (rc4_key, i, j);
idx = rc4_key->S[i] + rc4_key->S[j];
xor4 |= rc4_key->S[idx] << 0;
i += 1;
j += rc4_key->S[i];
swap (rc4_key, i, j);
idx = rc4_key->S[i] + rc4_key->S[j];
xor4 |= rc4_key->S[idx] << 8;
i += 1;
j += rc4_key->S[i];
swap (rc4_key, i, j);
idx = rc4_key->S[i] + rc4_key->S[j];
xor4 |= rc4_key->S[idx] << 16;
i += 1;
j += rc4_key->S[i];
swap (rc4_key, i, j);
idx = rc4_key->S[i] + rc4_key->S[j];
xor4 |= rc4_key->S[idx] << 24;
out[k] = in[k] ^ xor4;
}
return j;
}
DECLSPEC void hmac_md5_pad (u32 *w0, u32 *w1, u32 *w2, u32 *w3, u32 *ipad, u32 *opad)
{
w0[0] = w0[0] ^ 0x36363636;
@ -233,9 +111,9 @@ DECLSPEC void hmac_md5_run (u32 *w0, u32 *w1, u32 *w2, u32 *w3, u32 *ipad, u32 *
md5_transform (w0, w1, w2, w3, digest);
}
DECLSPEC int decrypt_and_check (LOCAL_AS RC4_KEY *rc4_key, u32 *data, GLOBAL_AS const u32 *edata2, const u32 edata2_len, const u32 *K2, const u32 *checksum)
DECLSPEC int decrypt_and_check (LOCAL_AS u32 *S, u32 *data, GLOBAL_AS const u32 *edata2, const u32 edata2_len, const u32 *K2, const u32 *checksum)
{
rc4_init_16 (rc4_key, data);
rc4_init_128 (S, data);
u32 out0[4];
u32 out1[4];
@ -254,15 +132,15 @@ DECLSPEC int decrypt_and_check (LOCAL_AS RC4_KEY *rc4_key, u32 *data, GLOBAL_AS
next headers follow the same ASN1 "type-length-data" scheme
*/
j = rc4_next_16 (rc4_key, i, j, edata2 + 0, out0); i += 16;
j = rc4_next_16 (S, i, j, edata2 + 0, out0); i += 16;
if (((out0[2] & 0xff00ffff) != 0x30008163) && ((out0[2] & 0x0000ffff) != 0x00008263)) return 0;
j = rc4_next_16 (rc4_key, i, j, edata2 + 4, out1); i += 16;
j = rc4_next_16 (S, i, j, edata2 + 4, out1); i += 16;
if (((out1[0] & 0x00ffffff) != 0x00000503) && (out1[0] != 0x050307A0)) return 0;
rc4_init_16 (rc4_key, data);
rc4_init_128 (S, data);
i = 0;
j = 0;
@ -300,10 +178,10 @@ DECLSPEC int decrypt_and_check (LOCAL_AS RC4_KEY *rc4_key, u32 *data, GLOBAL_AS
for (edata2_left = edata2_len; edata2_left >= 64; edata2_left -= 64)
{
j = rc4_next_16 (rc4_key, i, j, edata2, w0); i += 16; edata2 += 4;
j = rc4_next_16 (rc4_key, i, j, edata2, w1); i += 16; edata2 += 4;
j = rc4_next_16 (rc4_key, i, j, edata2, w2); i += 16; edata2 += 4;
j = rc4_next_16 (rc4_key, i, j, edata2, w3); i += 16; edata2 += 4;
j = rc4_next_16 (S, i, j, edata2, w0); i += 16; edata2 += 4;
j = rc4_next_16 (S, i, j, edata2, w1); i += 16; edata2 += 4;
j = rc4_next_16 (S, i, j, edata2, w2); i += 16; edata2 += 4;
j = rc4_next_16 (S, i, j, edata2, w3); i += 16; edata2 += 4;
md5_transform (w0, w1, w2, w3, ipad);
}
@ -327,7 +205,7 @@ DECLSPEC int decrypt_and_check (LOCAL_AS RC4_KEY *rc4_key, u32 *data, GLOBAL_AS
if (edata2_left < 16)
{
j = rc4_next_16 (rc4_key, i, j, edata2, w0); i += 16; edata2 += 4;
j = rc4_next_16 (S, i, j, edata2, w0); i += 16; edata2 += 4;
truncate_block_4x4_le_S (w0, edata2_left & 0xf);
@ -340,8 +218,8 @@ DECLSPEC int decrypt_and_check (LOCAL_AS RC4_KEY *rc4_key, u32 *data, GLOBAL_AS
}
else if (edata2_left < 32)
{
j = rc4_next_16 (rc4_key, i, j, edata2, w0); i += 16; edata2 += 4;
j = rc4_next_16 (rc4_key, i, j, edata2, w1); i += 16; edata2 += 4;
j = rc4_next_16 (S, i, j, edata2, w0); i += 16; edata2 += 4;
j = rc4_next_16 (S, i, j, edata2, w1); i += 16; edata2 += 4;
truncate_block_4x4_le_S (w1, edata2_left & 0xf);
@ -354,9 +232,9 @@ DECLSPEC int decrypt_and_check (LOCAL_AS RC4_KEY *rc4_key, u32 *data, GLOBAL_AS
}
else if (edata2_left < 48)
{
j = rc4_next_16 (rc4_key, i, j, edata2, w0); i += 16; edata2 += 4;
j = rc4_next_16 (rc4_key, i, j, edata2, w1); i += 16; edata2 += 4;
j = rc4_next_16 (rc4_key, i, j, edata2, w2); i += 16; edata2 += 4;
j = rc4_next_16 (S, i, j, edata2, w0); i += 16; edata2 += 4;
j = rc4_next_16 (S, i, j, edata2, w1); i += 16; edata2 += 4;
j = rc4_next_16 (S, i, j, edata2, w2); i += 16; edata2 += 4;
truncate_block_4x4_le_S (w2, edata2_left & 0xf);
@ -369,10 +247,10 @@ DECLSPEC int decrypt_and_check (LOCAL_AS RC4_KEY *rc4_key, u32 *data, GLOBAL_AS
}
else
{
j = rc4_next_16 (rc4_key, i, j, edata2, w0); i += 16; edata2 += 4;
j = rc4_next_16 (rc4_key, i, j, edata2, w1); i += 16; edata2 += 4;
j = rc4_next_16 (rc4_key, i, j, edata2, w2); i += 16; edata2 += 4;
j = rc4_next_16 (rc4_key, i, j, edata2, w3); i += 16; edata2 += 4;
j = rc4_next_16 (S, i, j, edata2, w0); i += 16; edata2 += 4;
j = rc4_next_16 (S, i, j, edata2, w1); i += 16; edata2 += 4;
j = rc4_next_16 (S, i, j, edata2, w2); i += 16; edata2 += 4;
j = rc4_next_16 (S, i, j, edata2, w3); i += 16; edata2 += 4;
truncate_block_4x4_le_S (w3, edata2_left & 0xf);
@ -607,9 +485,7 @@ KERNEL_FQ void m13100_m04 (KERN_ATTR_ESALT (krb5tgs_t))
* shared
*/
LOCAL_VK RC4_KEY rc4_keys[64];
LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid];
LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE];
/**
* salt
@ -702,7 +578,7 @@ KERNEL_FQ void m13100_m04 (KERN_ATTR_ESALT (krb5tgs_t))
tmp[2] = digest[2];
tmp[3] = digest[3];
if (decrypt_and_check (rc4_key, tmp, esalt_bufs[DIGESTS_OFFSET].edata2, esalt_bufs[DIGESTS_OFFSET].edata2_len, K2, checksum) == 1)
if (decrypt_and_check (S, tmp, esalt_bufs[DIGESTS_OFFSET].edata2, esalt_bufs[DIGESTS_OFFSET].edata2_len, K2, checksum) == 1)
{
if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0)
{
@ -754,9 +630,7 @@ KERNEL_FQ void m13100_s04 (KERN_ATTR_ESALT (krb5tgs_t))
* shared
*/
LOCAL_VK RC4_KEY rc4_keys[64];
LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid];
LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE];
/**
* salt
@ -849,7 +723,7 @@ KERNEL_FQ void m13100_s04 (KERN_ATTR_ESALT (krb5tgs_t))
tmp[2] = digest[2];
tmp[3] = digest[3];
if (decrypt_and_check (rc4_key, tmp, esalt_bufs[DIGESTS_OFFSET].edata2, esalt_bufs[DIGESTS_OFFSET].edata2_len, K2, checksum) == 1)
if (decrypt_and_check (S, tmp, esalt_bufs[DIGESTS_OFFSET].edata2, esalt_bufs[DIGESTS_OFFSET].edata2_len, K2, checksum) == 1)
{
if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0)
{

@ -13,6 +13,7 @@
#include "inc_common.cl"
#include "inc_hash_md4.cl"
#include "inc_hash_md5.cl"
#include "inc_cipher_rc4.cl"
#endif
typedef struct krb5tgs
@ -24,132 +25,9 @@ typedef struct krb5tgs
} krb5tgs_t;
typedef struct
DECLSPEC int decrypt_and_check (LOCAL_AS u32 *S, u32 *data, GLOBAL_AS const u32 *edata2, const u32 edata2_len, const u32 *K2, const u32 *checksum)
{
u8 S[256];
u32 wtf_its_faster;
} RC4_KEY;
DECLSPEC void swap (LOCAL_AS RC4_KEY *rc4_key, const u8 i, const u8 j)
{
u8 tmp;
tmp = rc4_key->S[i];
rc4_key->S[i] = rc4_key->S[j];
rc4_key->S[j] = tmp;
}
DECLSPEC void rc4_init_16 (LOCAL_AS RC4_KEY *rc4_key, const u32 *data)
{
u32 v = 0x03020100;
u32 a = 0x04040404;
LOCAL_AS u32 *ptr = (LOCAL_AS u32 *) rc4_key->S;
#ifdef _unroll
#pragma unroll
#endif
for (u32 i = 0; i < 64; i++)
{
*ptr++ = v; v += a;
}
u32 j = 0;
for (u32 i = 0; i < 16; i++)
{
u32 idx = i * 16;
u32 v;
v = data[0];
j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++;
v = data[1];
j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++;
v = data[2];
j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++;
v = data[3];
j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++;
}
}
DECLSPEC u8 rc4_next_16 (LOCAL_AS RC4_KEY *rc4_key, u8 i, u8 j, GLOBAL_AS const u32 *in, u32 *out)
{
#ifdef _unroll
#pragma unroll
#endif
for (u32 k = 0; k < 4; k++)
{
u32 xor4 = 0;
u8 idx;
i += 1;
j += rc4_key->S[i];
swap (rc4_key, i, j);
idx = rc4_key->S[i] + rc4_key->S[j];
xor4 |= rc4_key->S[idx] << 0;
i += 1;
j += rc4_key->S[i];
swap (rc4_key, i, j);
idx = rc4_key->S[i] + rc4_key->S[j];
xor4 |= rc4_key->S[idx] << 8;
i += 1;
j += rc4_key->S[i];
swap (rc4_key, i, j);
idx = rc4_key->S[i] + rc4_key->S[j];
xor4 |= rc4_key->S[idx] << 16;
i += 1;
j += rc4_key->S[i];
swap (rc4_key, i, j);
idx = rc4_key->S[i] + rc4_key->S[j];
xor4 |= rc4_key->S[idx] << 24;
out[k] = in[k] ^ xor4;
}
return j;
}
DECLSPEC int decrypt_and_check (LOCAL_AS RC4_KEY *rc4_key, u32 *data, GLOBAL_AS const u32 *edata2, const u32 edata2_len, const u32 *K2, const u32 *checksum)
{
rc4_init_16 (rc4_key, data);
rc4_init_128 (S, data);
u32 out0[4];
u32 out1[4];
@ -168,15 +46,15 @@ DECLSPEC int decrypt_and_check (LOCAL_AS RC4_KEY *rc4_key, u32 *data, GLOBAL_AS
next headers follow the same ASN1 "type-length-data" scheme
*/
j = rc4_next_16 (rc4_key, i, j, edata2 + 0, out0); i += 16;
j = rc4_next_16 (S, i, j, edata2 + 0, out0); i += 16;
if (((out0[2] & 0xff00ffff) != 0x30008163) && ((out0[2] & 0x0000ffff) != 0x00008263)) return 0;
j = rc4_next_16 (rc4_key, i, j, edata2 + 4, out1); i += 16;
j = rc4_next_16 (S, i, j, edata2 + 4, out1); i += 16;
if (((out1[0] & 0x00ffffff) != 0x00000503) && (out1[0] != 0x050307A0)) return 0;
rc4_init_16 (rc4_key, data);
rc4_init_128 (S, data);
i = 0;
j = 0;
@ -213,10 +91,10 @@ DECLSPEC int decrypt_and_check (LOCAL_AS RC4_KEY *rc4_key, u32 *data, GLOBAL_AS
for (edata2_left = edata2_len; edata2_left >= 64; edata2_left -= 64)
{
j = rc4_next_16 (rc4_key, i, j, edata2, w0); i += 16; edata2 += 4;
j = rc4_next_16 (rc4_key, i, j, edata2, w1); i += 16; edata2 += 4;
j = rc4_next_16 (rc4_key, i, j, edata2, w2); i += 16; edata2 += 4;
j = rc4_next_16 (rc4_key, i, j, edata2, w3); i += 16; edata2 += 4;
j = rc4_next_16 (S, i, j, edata2, w0); i += 16; edata2 += 4;
j = rc4_next_16 (S, i, j, edata2, w1); i += 16; edata2 += 4;
j = rc4_next_16 (S, i, j, edata2, w2); i += 16; edata2 += 4;
j = rc4_next_16 (S, i, j, edata2, w3); i += 16; edata2 += 4;
md5_hmac_update_64 (&ctx, w0, w1, w2, w3, 64);
}
@ -240,31 +118,31 @@ DECLSPEC int decrypt_and_check (LOCAL_AS RC4_KEY *rc4_key, u32 *data, GLOBAL_AS
if (edata2_left < 16)
{
j = rc4_next_16 (rc4_key, i, j, edata2, w0); i += 16; edata2 += 4;
j = rc4_next_16 (S, i, j, edata2, w0); i += 16; edata2 += 4;
truncate_block_4x4_le_S (w0, edata2_left & 0xf);
}
else if (edata2_left < 32)
{
j = rc4_next_16 (rc4_key, i, j, edata2, w0); i += 16; edata2 += 4;
j = rc4_next_16 (rc4_key, i, j, edata2, w1); i += 16; edata2 += 4;
j = rc4_next_16 (S, i, j, edata2, w0); i += 16; edata2 += 4;
j = rc4_next_16 (S, i, j, edata2, w1); i += 16; edata2 += 4;
truncate_block_4x4_le_S (w1, edata2_left & 0xf);
}
else if (edata2_left < 48)
{
j = rc4_next_16 (rc4_key, i, j, edata2, w0); i += 16; edata2 += 4;
j = rc4_next_16 (rc4_key, i, j, edata2, w1); i += 16; edata2 += 4;
j = rc4_next_16 (rc4_key, i, j, edata2, w2); i += 16; edata2 += 4;
j = rc4_next_16 (S, i, j, edata2, w0); i += 16; edata2 += 4;
j = rc4_next_16 (S, i, j, edata2, w1); i += 16; edata2 += 4;
j = rc4_next_16 (S, i, j, edata2, w2); i += 16; edata2 += 4;
truncate_block_4x4_le_S (w2, edata2_left & 0xf);
}
else
{
j = rc4_next_16 (rc4_key, i, j, edata2, w0); i += 16; edata2 += 4;
j = rc4_next_16 (rc4_key, i, j, edata2, w1); i += 16; edata2 += 4;
j = rc4_next_16 (rc4_key, i, j, edata2, w2); i += 16; edata2 += 4;
j = rc4_next_16 (rc4_key, i, j, edata2, w3); i += 16; edata2 += 4;
j = rc4_next_16 (S, i, j, edata2, w0); i += 16; edata2 += 4;
j = rc4_next_16 (S, i, j, edata2, w1); i += 16; edata2 += 4;
j = rc4_next_16 (S, i, j, edata2, w2); i += 16; edata2 += 4;
j = rc4_next_16 (S, i, j, edata2, w3); i += 16; edata2 += 4;
truncate_block_4x4_le_S (w3, edata2_left & 0xf);
}
@ -400,9 +278,7 @@ KERNEL_FQ void m13100_mxx (KERN_ATTR_ESALT (krb5tgs_t))
* base
*/
LOCAL_VK RC4_KEY rc4_keys[64];
LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid];
LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE];
u32 checksum[4];
@ -435,7 +311,7 @@ KERNEL_FQ void m13100_mxx (KERN_ATTR_ESALT (krb5tgs_t))
kerb_prepare (ctx.h, checksum, digest, K2);
if (decrypt_and_check (rc4_key, digest, esalt_bufs[DIGESTS_OFFSET].edata2, esalt_bufs[DIGESTS_OFFSET].edata2_len, K2, checksum) == 1)
if (decrypt_and_check (S, digest, esalt_bufs[DIGESTS_OFFSET].edata2, esalt_bufs[DIGESTS_OFFSET].edata2_len, K2, checksum) == 1)
{
if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0)
{
@ -460,9 +336,7 @@ KERNEL_FQ void m13100_sxx (KERN_ATTR_ESALT (krb5tgs_t))
* base
*/
LOCAL_VK RC4_KEY rc4_keys[64];
LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid];
LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE];
u32 checksum[4];
@ -495,7 +369,7 @@ KERNEL_FQ void m13100_sxx (KERN_ATTR_ESALT (krb5tgs_t))
kerb_prepare (ctx.h, checksum, digest, K2);
if (decrypt_and_check (rc4_key, digest, esalt_bufs[DIGESTS_OFFSET].edata2, esalt_bufs[DIGESTS_OFFSET].edata2_len, K2, checksum) == 1)
if (decrypt_and_check (S, digest, esalt_bufs[DIGESTS_OFFSET].edata2, esalt_bufs[DIGESTS_OFFSET].edata2_len, K2, checksum) == 1)
{
if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0)
{

@ -14,6 +14,7 @@
#include "inc_simd.cl"
#include "inc_hash_md4.cl"
#include "inc_hash_md5.cl"
#include "inc_cipher_rc4.cl"
#endif
typedef struct krb5tgs
@ -25,129 +26,6 @@ typedef struct krb5tgs
} krb5tgs_t;
typedef struct
{
u8 S[256];
u32 wtf_its_faster;
} RC4_KEY;
DECLSPEC void swap (LOCAL_AS RC4_KEY *rc4_key, const u8 i, const u8 j)
{
u8 tmp;
tmp = rc4_key->S[i];
rc4_key->S[i] = rc4_key->S[j];
rc4_key->S[j] = tmp;
}
DECLSPEC void rc4_init_16 (LOCAL_AS RC4_KEY *rc4_key, const u32 *data)
{
u32 v = 0x03020100;
u32 a = 0x04040404;
LOCAL_AS u32 *ptr = (LOCAL_AS u32 *) rc4_key->S;
#ifdef _unroll
#pragma unroll
#endif
for (u32 i = 0; i < 64; i++)
{
*ptr++ = v; v += a;
}
u32 j = 0;
for (u32 i = 0; i < 16; i++)
{
u32 idx = i * 16;
u32 v;
v = data[0];
j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++;
v = data[1];
j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++;
v = data[2];
j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++;
v = data[3];
j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++;
}
}
DECLSPEC u8 rc4_next_16 (LOCAL_AS RC4_KEY *rc4_key, u8 i, u8 j, GLOBAL_AS const u32 *in, u32 *out)
{
#ifdef _unroll
#pragma unroll
#endif
for (u32 k = 0; k < 4; k++)
{
u32 xor4 = 0;
u8 idx;
i += 1;
j += rc4_key->S[i];
swap (rc4_key, i, j);
idx = rc4_key->S[i] + rc4_key->S[j];
xor4 |= rc4_key->S[idx] << 0;
i += 1;
j += rc4_key->S[i];
swap (rc4_key, i, j);
idx = rc4_key->S[i] + rc4_key->S[j];
xor4 |= rc4_key->S[idx] << 8;
i += 1;
j += rc4_key->S[i];
swap (rc4_key, i, j);
idx = rc4_key->S[i] + rc4_key->S[j];
xor4 |= rc4_key->S[idx] << 16;
i += 1;
j += rc4_key->S[i];
swap (rc4_key, i, j);
idx = rc4_key->S[i] + rc4_key->S[j];
xor4 |= rc4_key->S[idx] << 24;
out[k] = in[k] ^ xor4;
}
return j;
}
DECLSPEC void hmac_md5_pad (u32 *w0, u32 *w1, u32 *w2, u32 *w3, u32 *ipad, u32 *opad)
{
w0[0] = w0[0] ^ 0x36363636;
@ -233,9 +111,9 @@ DECLSPEC void hmac_md5_run (u32 *w0, u32 *w1, u32 *w2, u32 *w3, u32 *ipad, u32 *
md5_transform (w0, w1, w2, w3, digest);
}
DECLSPEC int decrypt_and_check (LOCAL_AS RC4_KEY *rc4_key, u32 *data, GLOBAL_AS const u32 *edata2, const u32 edata2_len, const u32 *K2, const u32 *checksum)
DECLSPEC int decrypt_and_check (LOCAL_AS u32 *S, u32 *data, GLOBAL_AS const u32 *edata2, const u32 edata2_len, const u32 *K2, const u32 *checksum)
{
rc4_init_16 (rc4_key, data);
rc4_init_128 (S, data);
u32 out0[4];
u32 out1[4];
@ -254,15 +132,15 @@ DECLSPEC int decrypt_and_check (LOCAL_AS RC4_KEY *rc4_key, u32 *data, GLOBAL_AS
next headers follow the same ASN1 "type-length-data" scheme
*/
j = rc4_next_16 (rc4_key, i, j, edata2 + 0, out0); i += 16;
j = rc4_next_16 (S, i, j, edata2 + 0, out0); i += 16;
if (((out0[2] & 0xff00ffff) != 0x30008163) && ((out0[2] & 0x0000ffff) != 0x00008263)) return 0;
j = rc4_next_16 (rc4_key, i, j, edata2 + 4, out1); i += 16;
j = rc4_next_16 (S, i, j, edata2 + 4, out1); i += 16;
if (((out1[0] & 0x00ffffff) != 0x00000503) && (out1[0] != 0x050307A0)) return 0;
rc4_init_16 (rc4_key, data);
rc4_init_128 (S, data);
i = 0;
j = 0;
@ -300,10 +178,10 @@ DECLSPEC int decrypt_and_check (LOCAL_AS RC4_KEY *rc4_key, u32 *data, GLOBAL_AS
for (edata2_left = edata2_len; edata2_left >= 64; edata2_left -= 64)
{
j = rc4_next_16 (rc4_key, i, j, edata2, w0); i += 16; edata2 += 4;
j = rc4_next_16 (rc4_key, i, j, edata2, w1); i += 16; edata2 += 4;
j = rc4_next_16 (rc4_key, i, j, edata2, w2); i += 16; edata2 += 4;
j = rc4_next_16 (rc4_key, i, j, edata2, w3); i += 16; edata2 += 4;
j = rc4_next_16 (S, i, j, edata2, w0); i += 16; edata2 += 4;
j = rc4_next_16 (S, i, j, edata2, w1); i += 16; edata2 += 4;
j = rc4_next_16 (S, i, j, edata2, w2); i += 16; edata2 += 4;
j = rc4_next_16 (S, i, j, edata2, w3); i += 16; edata2 += 4;
md5_transform (w0, w1, w2, w3, ipad);
}
@ -327,7 +205,7 @@ DECLSPEC int decrypt_and_check (LOCAL_AS RC4_KEY *rc4_key, u32 *data, GLOBAL_AS
if (edata2_left < 16)
{
j = rc4_next_16 (rc4_key, i, j, edata2, w0); i += 16; edata2 += 4;
j = rc4_next_16 (S, i, j, edata2, w0); i += 16; edata2 += 4;
truncate_block_4x4_le_S (w0, edata2_left & 0xf);
@ -340,8 +218,8 @@ DECLSPEC int decrypt_and_check (LOCAL_AS RC4_KEY *rc4_key, u32 *data, GLOBAL_AS
}
else if (edata2_left < 32)
{
j = rc4_next_16 (rc4_key, i, j, edata2, w0); i += 16; edata2 += 4;
j = rc4_next_16 (rc4_key, i, j, edata2, w1); i += 16; edata2 += 4;
j = rc4_next_16 (S, i, j, edata2, w0); i += 16; edata2 += 4;
j = rc4_next_16 (S, i, j, edata2, w1); i += 16; edata2 += 4;
truncate_block_4x4_le_S (w1, edata2_left & 0xf);
@ -354,9 +232,9 @@ DECLSPEC int decrypt_and_check (LOCAL_AS RC4_KEY *rc4_key, u32 *data, GLOBAL_AS
}
else if (edata2_left < 48)
{
j = rc4_next_16 (rc4_key, i, j, edata2, w0); i += 16; edata2 += 4;
j = rc4_next_16 (rc4_key, i, j, edata2, w1); i += 16; edata2 += 4;
j = rc4_next_16 (rc4_key, i, j, edata2, w2); i += 16; edata2 += 4;
j = rc4_next_16 (S, i, j, edata2, w0); i += 16; edata2 += 4;
j = rc4_next_16 (S, i, j, edata2, w1); i += 16; edata2 += 4;
j = rc4_next_16 (S, i, j, edata2, w2); i += 16; edata2 += 4;
truncate_block_4x4_le_S (w2, edata2_left & 0xf);
@ -369,10 +247,10 @@ DECLSPEC int decrypt_and_check (LOCAL_AS RC4_KEY *rc4_key, u32 *data, GLOBAL_AS
}
else
{
j = rc4_next_16 (rc4_key, i, j, edata2, w0); i += 16; edata2 += 4;
j = rc4_next_16 (rc4_key, i, j, edata2, w1); i += 16; edata2 += 4;
j = rc4_next_16 (rc4_key, i, j, edata2, w2); i += 16; edata2 += 4;
j = rc4_next_16 (rc4_key, i, j, edata2, w3); i += 16; edata2 += 4;
j = rc4_next_16 (S, i, j, edata2, w0); i += 16; edata2 += 4;
j = rc4_next_16 (S, i, j, edata2, w1); i += 16; edata2 += 4;
j = rc4_next_16 (S, i, j, edata2, w2); i += 16; edata2 += 4;
j = rc4_next_16 (S, i, j, edata2, w3); i += 16; edata2 += 4;
truncate_block_4x4_le_S (w3, edata2_left & 0xf);
@ -573,7 +451,7 @@ DECLSPEC void kerb_prepare (const u32 *w0, const u32 *w1, const u32 pw_len, cons
hmac_md5_run (w0_t, w1_t, w2_t, w3_t, ipad, opad, digest);
}
DECLSPEC void m13100 (LOCAL_AS RC4_KEY *rc4_key, u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KERN_ATTR_ESALT (krb5tgs_t))
DECLSPEC void m13100 (LOCAL_AS u32 *S, u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KERN_ATTR_ESALT (krb5tgs_t))
{
/**
* modifier
@ -622,7 +500,7 @@ DECLSPEC void m13100 (LOCAL_AS RC4_KEY *rc4_key, u32 *w0, u32 *w1, u32 *w2, u32
tmp[2] = digest[2];
tmp[3] = digest[3];
if (decrypt_and_check (rc4_key, tmp, esalt_bufs[DIGESTS_OFFSET].edata2, esalt_bufs[DIGESTS_OFFSET].edata2_len, K2, checksum) == 1)
if (decrypt_and_check (S, tmp, esalt_bufs[DIGESTS_OFFSET].edata2, esalt_bufs[DIGESTS_OFFSET].edata2_len, K2, checksum) == 1)
{
if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0)
{
@ -677,11 +555,9 @@ KERNEL_FQ void m13100_m04 (KERN_ATTR_ESALT (krb5tgs_t))
* main
*/
LOCAL_VK RC4_KEY rc4_keys[64];
LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid];
LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE];
m13100 (rc4_key, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max);
m13100 (S, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max);
}
KERNEL_FQ void m13100_m08 (KERN_ATTR_ESALT (krb5tgs_t))
@ -729,11 +605,9 @@ KERNEL_FQ void m13100_m08 (KERN_ATTR_ESALT (krb5tgs_t))
* main
*/
LOCAL_VK RC4_KEY rc4_keys[64];
LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE];
LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid];
m13100 (rc4_key, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max);
m13100 (S, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max);
}
KERNEL_FQ void m13100_m16 (KERN_ATTR_ESALT (krb5tgs_t))
@ -785,11 +659,9 @@ KERNEL_FQ void m13100_s04 (KERN_ATTR_ESALT (krb5tgs_t))
* main
*/
LOCAL_VK RC4_KEY rc4_keys[64];
LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid];
LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE];
m13100 (rc4_key, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max);
m13100 (S, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max);
}
KERNEL_FQ void m13100_s08 (KERN_ATTR_ESALT (krb5tgs_t))
@ -837,11 +709,9 @@ KERNEL_FQ void m13100_s08 (KERN_ATTR_ESALT (krb5tgs_t))
* main
*/
LOCAL_VK RC4_KEY rc4_keys[64];
LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid];
LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE];
m13100 (rc4_key, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max);
m13100 (S, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max);
}
KERNEL_FQ void m13100_s16 (KERN_ATTR_ESALT (krb5tgs_t))

@ -13,6 +13,7 @@
#include "inc_common.cl"
#include "inc_hash_md4.cl"
#include "inc_hash_md5.cl"
#include "inc_cipher_rc4.cl"
#endif
typedef struct krb5tgs
@ -24,132 +25,9 @@ typedef struct krb5tgs
} krb5tgs_t;
typedef struct
DECLSPEC int decrypt_and_check (LOCAL_AS u32 *S, u32 *data, GLOBAL_AS const u32 *edata2, const u32 edata2_len, const u32 *K2, const u32 *checksum)
{
u8 S[256];
u32 wtf_its_faster;
} RC4_KEY;
DECLSPEC void swap (LOCAL_AS RC4_KEY *rc4_key, const u8 i, const u8 j)
{
u8 tmp;
tmp = rc4_key->S[i];
rc4_key->S[i] = rc4_key->S[j];
rc4_key->S[j] = tmp;
}
DECLSPEC void rc4_init_16 (LOCAL_AS RC4_KEY *rc4_key, const u32 *data)
{
u32 v = 0x03020100;
u32 a = 0x04040404;
LOCAL_AS u32 *ptr = (LOCAL_AS u32 *) rc4_key->S;
#ifdef _unroll
#pragma unroll
#endif
for (u32 i = 0; i < 64; i++)
{
*ptr++ = v; v += a;
}
u32 j = 0;
for (u32 i = 0; i < 16; i++)
{
u32 idx = i * 16;
u32 v;
v = data[0];
j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++;
v = data[1];
j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++;
v = data[2];
j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++;
v = data[3];
j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++;
}
}
DECLSPEC u8 rc4_next_16 (LOCAL_AS RC4_KEY *rc4_key, u8 i, u8 j, GLOBAL_AS const u32 *in, u32 *out)
{
#ifdef _unroll
#pragma unroll
#endif
for (u32 k = 0; k < 4; k++)
{
u32 xor4 = 0;
u8 idx;
i += 1;
j += rc4_key->S[i];
swap (rc4_key, i, j);
idx = rc4_key->S[i] + rc4_key->S[j];
xor4 |= rc4_key->S[idx] << 0;
i += 1;
j += rc4_key->S[i];
swap (rc4_key, i, j);
idx = rc4_key->S[i] + rc4_key->S[j];
xor4 |= rc4_key->S[idx] << 8;
i += 1;
j += rc4_key->S[i];
swap (rc4_key, i, j);
idx = rc4_key->S[i] + rc4_key->S[j];
xor4 |= rc4_key->S[idx] << 16;
i += 1;
j += rc4_key->S[i];
swap (rc4_key, i, j);
idx = rc4_key->S[i] + rc4_key->S[j];
xor4 |= rc4_key->S[idx] << 24;
out[k] = in[k] ^ xor4;
}
return j;
}
DECLSPEC int decrypt_and_check (LOCAL_AS RC4_KEY *rc4_key, u32 *data, GLOBAL_AS const u32 *edata2, const u32 edata2_len, const u32 *K2, const u32 *checksum)
{
rc4_init_16 (rc4_key, data);
rc4_init_128 (S, data);
u32 out0[4];
u32 out1[4];
@ -168,15 +46,15 @@ DECLSPEC int decrypt_and_check (LOCAL_AS RC4_KEY *rc4_key, u32 *data, GLOBAL_AS
next headers follow the same ASN1 "type-length-data" scheme
*/
j = rc4_next_16 (rc4_key, i, j, edata2 + 0, out0); i += 16;
j = rc4_next_16 (S, i, j, edata2 + 0, out0); i += 16;
if (((out0[2] & 0xff00ffff) != 0x30008163) && ((out0[2] & 0x0000ffff) != 0x00008263)) return 0;
j = rc4_next_16 (rc4_key, i, j, edata2 + 4, out1); i += 16;
j = rc4_next_16 (S, i, j, edata2 + 4, out1); i += 16;
if (((out1[0] & 0x00ffffff) != 0x00000503) && (out1[0] != 0x050307A0)) return 0;
rc4_init_16 (rc4_key, data);
rc4_init_128 (S, data);
i = 0;
j = 0;
@ -213,10 +91,10 @@ DECLSPEC int decrypt_and_check (LOCAL_AS RC4_KEY *rc4_key, u32 *data, GLOBAL_AS
for (edata2_left = edata2_len; edata2_left >= 64; edata2_left -= 64)
{
j = rc4_next_16 (rc4_key, i, j, edata2, w0); i += 16; edata2 += 4;
j = rc4_next_16 (rc4_key, i, j, edata2, w1); i += 16; edata2 += 4;
j = rc4_next_16 (rc4_key, i, j, edata2, w2); i += 16; edata2 += 4;
j = rc4_next_16 (rc4_key, i, j, edata2, w3); i += 16; edata2 += 4;
j = rc4_next_16 (S, i, j, edata2, w0); i += 16; edata2 += 4;
j = rc4_next_16 (S, i, j, edata2, w1); i += 16; edata2 += 4;
j = rc4_next_16 (S, i, j, edata2, w2); i += 16; edata2 += 4;
j = rc4_next_16 (S, i, j, edata2, w3); i += 16; edata2 += 4;
md5_hmac_update_64 (&ctx, w0, w1, w2, w3, 64);
}
@ -240,31 +118,31 @@ DECLSPEC int decrypt_and_check (LOCAL_AS RC4_KEY *rc4_key, u32 *data, GLOBAL_AS
if (edata2_left < 16)
{
j = rc4_next_16 (rc4_key, i, j, edata2, w0); i += 16; edata2 += 4;
j = rc4_next_16 (S, i, j, edata2, w0); i += 16; edata2 += 4;
truncate_block_4x4_le_S (w0, edata2_left & 0xf);
}
else if (edata2_left < 32)
{
j = rc4_next_16 (rc4_key, i, j, edata2, w0); i += 16; edata2 += 4;
j = rc4_next_16 (rc4_key, i, j, edata2, w1); i += 16; edata2 += 4;
j = rc4_next_16 (S, i, j, edata2, w0); i += 16; edata2 += 4;
j = rc4_next_16 (S, i, j, edata2, w1); i += 16; edata2 += 4;
truncate_block_4x4_le_S (w1, edata2_left & 0xf);
}
else if (edata2_left < 48)
{
j = rc4_next_16 (rc4_key, i, j, edata2, w0); i += 16; edata2 += 4;
j = rc4_next_16 (rc4_key, i, j, edata2, w1); i += 16; edata2 += 4;
j = rc4_next_16 (rc4_key, i, j, edata2, w2); i += 16; edata2 += 4;
j = rc4_next_16 (S, i, j, edata2, w0); i += 16; edata2 += 4;
j = rc4_next_16 (S, i, j, edata2, w1); i += 16; edata2 += 4;
j = rc4_next_16 (S, i, j, edata2, w2); i += 16; edata2 += 4;
truncate_block_4x4_le_S (w2, edata2_left & 0xf);
}
else
{
j = rc4_next_16 (rc4_key, i, j, edata2, w0); i += 16; edata2 += 4;
j = rc4_next_16 (rc4_key, i, j, edata2, w1); i += 16; edata2 += 4;
j = rc4_next_16 (rc4_key, i, j, edata2, w2); i += 16; edata2 += 4;
j = rc4_next_16 (rc4_key, i, j, edata2, w3); i += 16; edata2 += 4;
j = rc4_next_16 (S, i, j, edata2, w0); i += 16; edata2 += 4;
j = rc4_next_16 (S, i, j, edata2, w1); i += 16; edata2 += 4;
j = rc4_next_16 (S, i, j, edata2, w2); i += 16; edata2 += 4;
j = rc4_next_16 (S, i, j, edata2, w3); i += 16; edata2 += 4;
truncate_block_4x4_le_S (w3, edata2_left & 0xf);
}
@ -409,9 +287,7 @@ KERNEL_FQ void m13100_mxx (KERN_ATTR_VECTOR_ESALT (krb5tgs_t))
w[idx] = pws[gid].i[idx];
}
LOCAL_VK RC4_KEY rc4_keys[64];
LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid];
LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE];
u32 checksum[4];
@ -448,7 +324,7 @@ KERNEL_FQ void m13100_mxx (KERN_ATTR_VECTOR_ESALT (krb5tgs_t))
kerb_prepare (ctx.h, checksum, digest, K2);
if (decrypt_and_check (rc4_key, digest, esalt_bufs[DIGESTS_OFFSET].edata2, esalt_bufs[DIGESTS_OFFSET].edata2_len, K2, checksum) == 1)
if (decrypt_and_check (S, digest, esalt_bufs[DIGESTS_OFFSET].edata2, esalt_bufs[DIGESTS_OFFSET].edata2_len, K2, checksum) == 1)
{
if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0)
{
@ -482,9 +358,7 @@ KERNEL_FQ void m13100_sxx (KERN_ATTR_VECTOR_ESALT (krb5tgs_t))
w[idx] = pws[gid].i[idx];
}
LOCAL_VK RC4_KEY rc4_keys[64];
LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid];
LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE];
u32 checksum[4];
@ -521,7 +395,7 @@ KERNEL_FQ void m13100_sxx (KERN_ATTR_VECTOR_ESALT (krb5tgs_t))
kerb_prepare (ctx.h, checksum, digest, K2);
if (decrypt_and_check (rc4_key, digest, esalt_bufs[DIGESTS_OFFSET].edata2, esalt_bufs[DIGESTS_OFFSET].edata2_len, K2, checksum) == 1)
if (decrypt_and_check (S, digest, esalt_bufs[DIGESTS_OFFSET].edata2, esalt_bufs[DIGESTS_OFFSET].edata2_len, K2, checksum) == 1)
{
if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0)
{

@ -16,6 +16,7 @@
#include "inc_simd.cl"
#include "inc_hash_md4.cl"
#include "inc_hash_md5.cl"
#include "inc_cipher_rc4.cl"
#endif
typedef struct krb5asrep
@ -27,129 +28,6 @@ typedef struct krb5asrep
} krb5asrep_t;
typedef struct
{
u8 S[256];
u32 wtf_its_faster;
} RC4_KEY;
DECLSPEC void swap (LOCAL_AS RC4_KEY *rc4_key, const u8 i, const u8 j)
{
u8 tmp;
tmp = rc4_key->S[i];
rc4_key->S[i] = rc4_key->S[j];
rc4_key->S[j] = tmp;
}
DECLSPEC void rc4_init_16 (LOCAL_AS RC4_KEY *rc4_key, const u32 *data)
{
u32 v = 0x03020100;
u32 a = 0x04040404;
LOCAL_AS u32 *ptr = (LOCAL_AS u32 *) rc4_key->S;
#ifdef _unroll
#pragma unroll
#endif
for (u32 i = 0; i < 64; i++)
{
*ptr++ = v; v += a;
}
u32 j = 0;
for (u32 i = 0; i < 16; i++)
{
u32 idx = i * 16;
u32 v;
v = data[0];
j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++;
v = data[1];
j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++;
v = data[2];
j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++;
v = data[3];
j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++;
}
}
DECLSPEC u8 rc4_next_16 (LOCAL_AS RC4_KEY *rc4_key, u8 i, u8 j, GLOBAL_AS const u32 *in, u32 *out)
{
#ifdef _unroll
#pragma unroll
#endif
for (u32 k = 0; k < 4; k++)
{
u32 xor4 = 0;
u8 idx;
i += 1;
j += rc4_key->S[i];
swap (rc4_key, i, j);
idx = rc4_key->S[i] + rc4_key->S[j];
xor4 |= rc4_key->S[idx] << 0;
i += 1;
j += rc4_key->S[i];
swap (rc4_key, i, j);
idx = rc4_key->S[i] + rc4_key->S[j];
xor4 |= rc4_key->S[idx] << 8;
i += 1;
j += rc4_key->S[i];
swap (rc4_key, i, j);
idx = rc4_key->S[i] + rc4_key->S[j];
xor4 |= rc4_key->S[idx] << 16;
i += 1;
j += rc4_key->S[i];
swap (rc4_key, i, j);
idx = rc4_key->S[i] + rc4_key->S[j];
xor4 |= rc4_key->S[idx] << 24;
out[k] = in[k] ^ xor4;
}
return j;
}
DECLSPEC void hmac_md5_pad (u32 *w0, u32 *w1, u32 *w2, u32 *w3, u32 *ipad, u32 *opad)
{
w0[0] = w0[0] ^ 0x36363636;
@ -235,9 +113,9 @@ DECLSPEC void hmac_md5_run (u32 *w0, u32 *w1, u32 *w2, u32 *w3, u32 *ipad, u32 *
md5_transform (w0, w1, w2, w3, digest);
}
DECLSPEC int decrypt_and_check (LOCAL_AS RC4_KEY *rc4_key, u32 *data, GLOBAL_AS const u32 *edata2, const u32 edata2_len, const u32 *K2, const u32 *checksum)
DECLSPEC int decrypt_and_check (LOCAL_AS u32 *S, u32 *data, GLOBAL_AS const u32 *edata2, const u32 edata2_len, const u32 *K2, const u32 *checksum)
{
rc4_init_16 (rc4_key, data);
rc4_init_128 (S, data);
u32 out0[4];
@ -255,14 +133,14 @@ DECLSPEC int decrypt_and_check (LOCAL_AS RC4_KEY *rc4_key, u32 *data, GLOBAL_AS
length is on 3 bytes, the first byte is 0x82, and the fourth byte is 0x30 (class=SEQUENCE)
*/
rc4_next_16 (rc4_key, 0, 0, edata2 + 0, out0);
rc4_next_16 (S, 0, 0, edata2 + 0, out0);
if (((out0[2] & 0x00ff80ff) != 0x00300079) &&
((out0[2] & 0xFF00FFFF) != 0x30008179) &&
((out0[2] & 0x0000FFFF) != 0x00008279 || (out0[3] & 0x000000FF) != 0x00000030))
return 0;
rc4_init_16 (rc4_key, data);
rc4_init_128 (S, data);
u8 i = 0;
u8 j = 0;
@ -300,10 +178,10 @@ DECLSPEC int decrypt_and_check (LOCAL_AS RC4_KEY *rc4_key, u32 *data, GLOBAL_AS
for (edata2_left = edata2_len; edata2_left >= 64; edata2_left -= 64)
{
j = rc4_next_16 (rc4_key, i, j, edata2, w0); i += 16; edata2 += 4;
j = rc4_next_16 (rc4_key, i, j, edata2, w1); i += 16; edata2 += 4;
j = rc4_next_16 (rc4_key, i, j, edata2, w2); i += 16; edata2 += 4;
j = rc4_next_16 (rc4_key, i, j, edata2, w3); i += 16; edata2 += 4;
j = rc4_next_16 (S, i, j, edata2, w0); i += 16; edata2 += 4;
j = rc4_next_16 (S, i, j, edata2, w1); i += 16; edata2 += 4;
j = rc4_next_16 (S, i, j, edata2, w2); i += 16; edata2 += 4;
j = rc4_next_16 (S, i, j, edata2, w3); i += 16; edata2 += 4;
md5_transform (w0, w1, w2, w3, ipad);
}
@ -327,7 +205,7 @@ DECLSPEC int decrypt_and_check (LOCAL_AS RC4_KEY *rc4_key, u32 *data, GLOBAL_AS
if (edata2_left < 16)
{
j = rc4_next_16 (rc4_key, i, j, edata2, w0); i += 16; edata2 += 4;
j = rc4_next_16 (S, i, j, edata2, w0); i += 16; edata2 += 4;
truncate_block_4x4_le_S (w0, edata2_left & 0xf);
@ -340,8 +218,8 @@ DECLSPEC int decrypt_and_check (LOCAL_AS RC4_KEY *rc4_key, u32 *data, GLOBAL_AS
}
else if (edata2_left < 32)
{
j = rc4_next_16 (rc4_key, i, j, edata2, w0); i += 16; edata2 += 4;
j = rc4_next_16 (rc4_key, i, j, edata2, w1); i += 16; edata2 += 4;
j = rc4_next_16 (S, i, j, edata2, w0); i += 16; edata2 += 4;
j = rc4_next_16 (S, i, j, edata2, w1); i += 16; edata2 += 4;
truncate_block_4x4_le_S (w1, edata2_left & 0xf);
@ -354,9 +232,9 @@ DECLSPEC int decrypt_and_check (LOCAL_AS RC4_KEY *rc4_key, u32 *data, GLOBAL_AS
}
else if (edata2_left < 48)
{
j = rc4_next_16 (rc4_key, i, j, edata2, w0); i += 16; edata2 += 4;
j = rc4_next_16 (rc4_key, i, j, edata2, w1); i += 16; edata2 += 4;
j = rc4_next_16 (rc4_key, i, j, edata2, w2); i += 16; edata2 += 4;
j = rc4_next_16 (S, i, j, edata2, w0); i += 16; edata2 += 4;
j = rc4_next_16 (S, i, j, edata2, w1); i += 16; edata2 += 4;
j = rc4_next_16 (S, i, j, edata2, w2); i += 16; edata2 += 4;
truncate_block_4x4_le_S (w2, edata2_left & 0xf);
@ -369,10 +247,10 @@ DECLSPEC int decrypt_and_check (LOCAL_AS RC4_KEY *rc4_key, u32 *data, GLOBAL_AS
}
else
{
j = rc4_next_16 (rc4_key, i, j, edata2, w0); i += 16; edata2 += 4;
j = rc4_next_16 (rc4_key, i, j, edata2, w1); i += 16; edata2 += 4;
j = rc4_next_16 (rc4_key, i, j, edata2, w2); i += 16; edata2 += 4;
j = rc4_next_16 (rc4_key, i, j, edata2, w3); i += 16; edata2 += 4;
j = rc4_next_16 (S, i, j, edata2, w0); i += 16; edata2 += 4;
j = rc4_next_16 (S, i, j, edata2, w1); i += 16; edata2 += 4;
j = rc4_next_16 (S, i, j, edata2, w2); i += 16; edata2 += 4;
j = rc4_next_16 (S, i, j, edata2, w3); i += 16; edata2 += 4;
truncate_block_4x4_le_S (w3, edata2_left & 0xf);
@ -608,9 +486,7 @@ KERNEL_FQ void m18200_m04 (KERN_ATTR_RULES_ESALT (krb5asrep_t))
* shared
*/
LOCAL_VK RC4_KEY rc4_keys[64];
LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid];
LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE];
/**
* salt
@ -653,7 +529,7 @@ KERNEL_FQ void m18200_m04 (KERN_ATTR_RULES_ESALT (krb5asrep_t))
tmp[2] = digest[2];
tmp[3] = digest[3];
if (decrypt_and_check (rc4_key, tmp, esalt_bufs[DIGESTS_OFFSET].edata2, esalt_bufs[DIGESTS_OFFSET].edata2_len, K2, checksum) == 1)
if (decrypt_and_check (S, tmp, esalt_bufs[DIGESTS_OFFSET].edata2, esalt_bufs[DIGESTS_OFFSET].edata2_len, K2, checksum) == 1)
{
if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0)
{
@ -706,9 +582,7 @@ KERNEL_FQ void m18200_s04 (KERN_ATTR_RULES_ESALT (krb5asrep_t))
* shared
*/
LOCAL_VK RC4_KEY rc4_keys[64];
LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid];
LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE];
/**
* salt
@ -751,7 +625,7 @@ KERNEL_FQ void m18200_s04 (KERN_ATTR_RULES_ESALT (krb5asrep_t))
tmp[2] = digest[2];
tmp[3] = digest[3];
if (decrypt_and_check (rc4_key, tmp, esalt_bufs[DIGESTS_OFFSET].edata2, esalt_bufs[DIGESTS_OFFSET].edata2_len, K2, checksum) == 1)
if (decrypt_and_check (S, tmp, esalt_bufs[DIGESTS_OFFSET].edata2, esalt_bufs[DIGESTS_OFFSET].edata2_len, K2, checksum) == 1)
{
if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0)
{

@ -15,6 +15,7 @@
#include "inc_rp.cl"
#include "inc_hash_md4.cl"
#include "inc_hash_md5.cl"
#include "inc_cipher_rc4.cl"
#endif
typedef struct krb5asrep
@ -26,132 +27,9 @@ typedef struct krb5asrep
} krb5asrep_t;
typedef struct
DECLSPEC int decrypt_and_check (LOCAL_AS u32 *S, u32 *data, GLOBAL_AS const u32 *edata2, const u32 edata2_len, const u32 *K2, const u32 *checksum)
{
u8 S[256];
u32 wtf_its_faster;
} RC4_KEY;
DECLSPEC void swap (LOCAL_AS RC4_KEY *rc4_key, const u8 i, const u8 j)
{
u8 tmp;
tmp = rc4_key->S[i];
rc4_key->S[i] = rc4_key->S[j];
rc4_key->S[j] = tmp;
}
DECLSPEC void rc4_init_16 (LOCAL_AS RC4_KEY *rc4_key, const u32 *data)
{
u32 v = 0x03020100;
u32 a = 0x04040404;
LOCAL_AS u32 *ptr = (LOCAL_AS u32 *) rc4_key->S;
#ifdef _unroll
#pragma unroll
#endif
for (u32 i = 0; i < 64; i++)
{
*ptr++ = v; v += a;
}
u32 j = 0;
for (u32 i = 0; i < 16; i++)
{
u32 idx = i * 16;
u32 v;
v = data[0];
j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++;
v = data[1];
j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++;
v = data[2];
j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++;
v = data[3];
j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++;
}
}
DECLSPEC u8 rc4_next_16 (LOCAL_AS RC4_KEY *rc4_key, u8 i, u8 j, GLOBAL_AS const u32 *in, u32 *out)
{
#ifdef _unroll
#pragma unroll
#endif
for (u32 k = 0; k < 4; k++)
{
u32 xor4 = 0;
u8 idx;
i += 1;
j += rc4_key->S[i];
swap (rc4_key, i, j);
idx = rc4_key->S[i] + rc4_key->S[j];
xor4 |= rc4_key->S[idx] << 0;
i += 1;
j += rc4_key->S[i];
swap (rc4_key, i, j);
idx = rc4_key->S[i] + rc4_key->S[j];
xor4 |= rc4_key->S[idx] << 8;
i += 1;
j += rc4_key->S[i];
swap (rc4_key, i, j);
idx = rc4_key->S[i] + rc4_key->S[j];
xor4 |= rc4_key->S[idx] << 16;
i += 1;
j += rc4_key->S[i];
swap (rc4_key, i, j);
idx = rc4_key->S[i] + rc4_key->S[j];
xor4 |= rc4_key->S[idx] << 24;
out[k] = in[k] ^ xor4;
}
return j;
}
DECLSPEC int decrypt_and_check (LOCAL_AS RC4_KEY *rc4_key, u32 *data, GLOBAL_AS const u32 *edata2, const u32 edata2_len, const u32 *K2, const u32 *checksum)
{
rc4_init_16 (rc4_key, data);
rc4_init_128 (S, data);
u32 out0[4];
@ -169,14 +47,14 @@ DECLSPEC int decrypt_and_check (LOCAL_AS RC4_KEY *rc4_key, u32 *data, GLOBAL_AS
length is on 3 bytes, the first byte is 0x82, and the fourth byte is 0x30 (class=SEQUENCE)
*/
rc4_next_16 (rc4_key, 0, 0, edata2 + 0, out0);
rc4_next_16 (S, 0, 0, edata2 + 0, out0);
if (((out0[2] & 0x00ff80ff) != 0x00300079) &&
((out0[2] & 0xFF00FFFF) != 0x30008179) &&
((out0[2] & 0x0000FFFF) != 0x00008279 || (out0[3] & 0x000000FF) != 0x00000030))
return 0;
rc4_init_16 (rc4_key, data);
rc4_init_128 (S, data);
u8 i = 0;
u8 j = 0;
@ -213,10 +91,10 @@ DECLSPEC int decrypt_and_check (LOCAL_AS RC4_KEY *rc4_key, u32 *data, GLOBAL_AS
for (edata2_left = edata2_len; edata2_left >= 64; edata2_left -= 64)
{
j = rc4_next_16 (rc4_key, i, j, edata2, w0); i += 16; edata2 += 4;
j = rc4_next_16 (rc4_key, i, j, edata2, w1); i += 16; edata2 += 4;
j = rc4_next_16 (rc4_key, i, j, edata2, w2); i += 16; edata2 += 4;
j = rc4_next_16 (rc4_key, i, j, edata2, w3); i += 16; edata2 += 4;
j = rc4_next_16 (S, i, j, edata2, w0); i += 16; edata2 += 4;
j = rc4_next_16 (S, i, j, edata2, w1); i += 16; edata2 += 4;
j = rc4_next_16 (S, i, j, edata2, w2); i += 16; edata2 += 4;
j = rc4_next_16 (S, i, j, edata2, w3); i += 16; edata2 += 4;
md5_hmac_update_64 (&ctx, w0, w1, w2, w3, 64);
}
@ -240,31 +118,31 @@ DECLSPEC int decrypt_and_check (LOCAL_AS RC4_KEY *rc4_key, u32 *data, GLOBAL_AS
if (edata2_left < 16)
{
j = rc4_next_16 (rc4_key, i, j, edata2, w0); i += 16; edata2 += 4;
j = rc4_next_16 (S, i, j, edata2, w0); i += 16; edata2 += 4;
truncate_block_4x4_le_S (w0, edata2_left & 0xf);
}
else if (edata2_left < 32)
{
j = rc4_next_16 (rc4_key, i, j, edata2, w0); i += 16; edata2 += 4;
j = rc4_next_16 (rc4_key, i, j, edata2, w1); i += 16; edata2 += 4;
j = rc4_next_16 (S, i, j, edata2, w0); i += 16; edata2 += 4;
j = rc4_next_16 (S, i, j, edata2, w1); i += 16; edata2 += 4;
truncate_block_4x4_le_S (w1, edata2_left & 0xf);
}
else if (edata2_left < 48)
{
j = rc4_next_16 (rc4_key, i, j, edata2, w0); i += 16; edata2 += 4;
j = rc4_next_16 (rc4_key, i, j, edata2, w1); i += 16; edata2 += 4;
j = rc4_next_16 (rc4_key, i, j, edata2, w2); i += 16; edata2 += 4;
j = rc4_next_16 (S, i, j, edata2, w0); i += 16; edata2 += 4;
j = rc4_next_16 (S, i, j, edata2, w1); i += 16; edata2 += 4;
j = rc4_next_16 (S, i, j, edata2, w2); i += 16; edata2 += 4;
truncate_block_4x4_le_S (w2, edata2_left & 0xf);
}
else
{
j = rc4_next_16 (rc4_key, i, j, edata2, w0); i += 16; edata2 += 4;
j = rc4_next_16 (rc4_key, i, j, edata2, w1); i += 16; edata2 += 4;
j = rc4_next_16 (rc4_key, i, j, edata2, w2); i += 16; edata2 += 4;
j = rc4_next_16 (rc4_key, i, j, edata2, w3); i += 16; edata2 += 4;
j = rc4_next_16 (S, i, j, edata2, w0); i += 16; edata2 += 4;
j = rc4_next_16 (S, i, j, edata2, w1); i += 16; edata2 += 4;
j = rc4_next_16 (S, i, j, edata2, w2); i += 16; edata2 += 4;
j = rc4_next_16 (S, i, j, edata2, w3); i += 16; edata2 += 4;
truncate_block_4x4_le_S (w3, edata2_left & 0xf);
}
@ -402,9 +280,7 @@ KERNEL_FQ void m18200_mxx (KERN_ATTR_RULES_ESALT (krb5asrep_t))
COPY_PW (pws[gid]);
LOCAL_VK RC4_KEY rc4_keys[64];
LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid];
LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE];
u32 checksum[4];
@ -437,7 +313,7 @@ KERNEL_FQ void m18200_mxx (KERN_ATTR_RULES_ESALT (krb5asrep_t))
kerb_prepare (ctx.h, checksum, digest, K2);
if (decrypt_and_check (rc4_key, digest, esalt_bufs[DIGESTS_OFFSET].edata2, esalt_bufs[DIGESTS_OFFSET].edata2_len, K2, checksum) == 1)
if (decrypt_and_check (S, digest, esalt_bufs[DIGESTS_OFFSET].edata2, esalt_bufs[DIGESTS_OFFSET].edata2_len, K2, checksum) == 1)
{
if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0)
{
@ -464,9 +340,7 @@ KERNEL_FQ void m18200_sxx (KERN_ATTR_RULES_ESALT (krb5asrep_t))
COPY_PW (pws[gid]);
LOCAL_VK RC4_KEY rc4_keys[64];
LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid];
LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE];
u32 checksum[4];
@ -499,7 +373,7 @@ KERNEL_FQ void m18200_sxx (KERN_ATTR_RULES_ESALT (krb5asrep_t))
kerb_prepare (ctx.h, checksum, digest, K2);
if (decrypt_and_check (rc4_key, digest, esalt_bufs[DIGESTS_OFFSET].edata2, esalt_bufs[DIGESTS_OFFSET].edata2_len, K2, checksum) == 1)
if (decrypt_and_check (S, digest, esalt_bufs[DIGESTS_OFFSET].edata2, esalt_bufs[DIGESTS_OFFSET].edata2_len, K2, checksum) == 1)
{
if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0)
{

@ -14,6 +14,7 @@
#include "inc_simd.cl"
#include "inc_hash_md4.cl"
#include "inc_hash_md5.cl"
#include "inc_cipher_rc4.cl"
#endif
typedef struct krb5asrep
@ -25,129 +26,6 @@ typedef struct krb5asrep
} krb5asrep_t;
typedef struct
{
u8 S[256];
u32 wtf_its_faster;
} RC4_KEY;
DECLSPEC void swap (LOCAL_AS RC4_KEY *rc4_key, const u8 i, const u8 j)
{
u8 tmp;
tmp = rc4_key->S[i];
rc4_key->S[i] = rc4_key->S[j];
rc4_key->S[j] = tmp;
}
DECLSPEC void rc4_init_16 (LOCAL_AS RC4_KEY *rc4_key, const u32 *data)
{
u32 v = 0x03020100;
u32 a = 0x04040404;
LOCAL_AS u32 *ptr = (LOCAL_AS u32 *) rc4_key->S;
#ifdef _unroll
#pragma unroll
#endif
for (u32 i = 0; i < 64; i++)
{
*ptr++ = v; v += a;
}
u32 j = 0;
for (u32 i = 0; i < 16; i++)
{
u32 idx = i * 16;
u32 v;
v = data[0];
j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++;
v = data[1];
j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++;
v = data[2];
j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++;
v = data[3];
j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++;
}
}
DECLSPEC u8 rc4_next_16 (LOCAL_AS RC4_KEY *rc4_key, u8 i, u8 j, GLOBAL_AS const u32 *in, u32 *out)
{
#ifdef _unroll
#pragma unroll
#endif
for (u32 k = 0; k < 4; k++)
{
u32 xor4 = 0;
u8 idx;
i += 1;
j += rc4_key->S[i];
swap (rc4_key, i, j);
idx = rc4_key->S[i] + rc4_key->S[j];
xor4 |= rc4_key->S[idx] << 0;
i += 1;
j += rc4_key->S[i];
swap (rc4_key, i, j);
idx = rc4_key->S[i] + rc4_key->S[j];
xor4 |= rc4_key->S[idx] << 8;
i += 1;
j += rc4_key->S[i];
swap (rc4_key, i, j);
idx = rc4_key->S[i] + rc4_key->S[j];
xor4 |= rc4_key->S[idx] << 16;
i += 1;
j += rc4_key->S[i];
swap (rc4_key, i, j);
idx = rc4_key->S[i] + rc4_key->S[j];
xor4 |= rc4_key->S[idx] << 24;
out[k] = in[k] ^ xor4;
}
return j;
}
DECLSPEC void hmac_md5_pad (u32 *w0, u32 *w1, u32 *w2, u32 *w3, u32 *ipad, u32 *opad)
{
w0[0] = w0[0] ^ 0x36363636;
@ -233,9 +111,9 @@ DECLSPEC void hmac_md5_run (u32 *w0, u32 *w1, u32 *w2, u32 *w3, u32 *ipad, u32 *
md5_transform (w0, w1, w2, w3, digest);
}
DECLSPEC int decrypt_and_check (LOCAL_AS RC4_KEY *rc4_key, u32 *data, GLOBAL_AS const u32 *edata2, const u32 edata2_len, const u32 *K2, const u32 *checksum)
DECLSPEC int decrypt_and_check (LOCAL_AS u32 *S, u32 *data, GLOBAL_AS const u32 *edata2, const u32 edata2_len, const u32 *K2, const u32 *checksum)
{
rc4_init_16 (rc4_key, data);
rc4_init_128 (S, data);
u32 out0[4];
@ -253,14 +131,14 @@ DECLSPEC int decrypt_and_check (LOCAL_AS RC4_KEY *rc4_key, u32 *data, GLOBAL_AS
length is on 3 bytes, the first byte is 0x82, and the fourth byte is 0x30 (class=SEQUENCE)
*/
rc4_next_16 (rc4_key, 0, 0, edata2 + 0, out0);
rc4_next_16 (S, 0, 0, edata2 + 0, out0);
if (((out0[2] & 0x00ff80ff) != 0x00300079) &&
((out0[2] & 0xFF00FFFF) != 0x30008179) &&
((out0[2] & 0x0000FFFF) != 0x00008279 || (out0[3] & 0x000000FF) != 0x00000030))
return 0;
rc4_init_16 (rc4_key, data);
rc4_init_128 (S, data);
u8 i = 0;
u8 j = 0;
@ -298,10 +176,10 @@ DECLSPEC int decrypt_and_check (LOCAL_AS RC4_KEY *rc4_key, u32 *data, GLOBAL_AS
for (edata2_left = edata2_len; edata2_left >= 64; edata2_left -= 64)
{
j = rc4_next_16 (rc4_key, i, j, edata2, w0); i += 16; edata2 += 4;
j = rc4_next_16 (rc4_key, i, j, edata2, w1); i += 16; edata2 += 4;
j = rc4_next_16 (rc4_key, i, j, edata2, w2); i += 16; edata2 += 4;
j = rc4_next_16 (rc4_key, i, j, edata2, w3); i += 16; edata2 += 4;
j = rc4_next_16 (S, i, j, edata2, w0); i += 16; edata2 += 4;
j = rc4_next_16 (S, i, j, edata2, w1); i += 16; edata2 += 4;
j = rc4_next_16 (S, i, j, edata2, w2); i += 16; edata2 += 4;
j = rc4_next_16 (S, i, j, edata2, w3); i += 16; edata2 += 4;
md5_transform (w0, w1, w2, w3, ipad);
}
@ -325,7 +203,7 @@ DECLSPEC int decrypt_and_check (LOCAL_AS RC4_KEY *rc4_key, u32 *data, GLOBAL_AS
if (edata2_left < 16)
{
j = rc4_next_16 (rc4_key, i, j, edata2, w0); i += 16; edata2 += 4;
j = rc4_next_16 (S, i, j, edata2, w0); i += 16; edata2 += 4;
truncate_block_4x4_le_S (w0, edata2_left & 0xf);
@ -338,8 +216,8 @@ DECLSPEC int decrypt_and_check (LOCAL_AS RC4_KEY *rc4_key, u32 *data, GLOBAL_AS
}
else if (edata2_left < 32)
{
j = rc4_next_16 (rc4_key, i, j, edata2, w0); i += 16; edata2 += 4;
j = rc4_next_16 (rc4_key, i, j, edata2, w1); i += 16; edata2 += 4;
j = rc4_next_16 (S, i, j, edata2, w0); i += 16; edata2 += 4;
j = rc4_next_16 (S, i, j, edata2, w1); i += 16; edata2 += 4;
truncate_block_4x4_le_S (w1, edata2_left & 0xf);
@ -352,9 +230,9 @@ DECLSPEC int decrypt_and_check (LOCAL_AS RC4_KEY *rc4_key, u32 *data, GLOBAL_AS
}
else if (edata2_left < 48)
{
j = rc4_next_16 (rc4_key, i, j, edata2, w0); i += 16; edata2 += 4;
j = rc4_next_16 (rc4_key, i, j, edata2, w1); i += 16; edata2 += 4;
j = rc4_next_16 (rc4_key, i, j, edata2, w2); i += 16; edata2 += 4;
j = rc4_next_16 (S, i, j, edata2, w0); i += 16; edata2 += 4;
j = rc4_next_16 (S, i, j, edata2, w1); i += 16; edata2 += 4;
j = rc4_next_16 (S, i, j, edata2, w2); i += 16; edata2 += 4;
truncate_block_4x4_le_S (w2, edata2_left & 0xf);
@ -367,10 +245,10 @@ DECLSPEC int decrypt_and_check (LOCAL_AS RC4_KEY *rc4_key, u32 *data, GLOBAL_AS
}
else
{
j = rc4_next_16 (rc4_key, i, j, edata2, w0); i += 16; edata2 += 4;
j = rc4_next_16 (rc4_key, i, j, edata2, w1); i += 16; edata2 += 4;
j = rc4_next_16 (rc4_key, i, j, edata2, w2); i += 16; edata2 += 4;
j = rc4_next_16 (rc4_key, i, j, edata2, w3); i += 16; edata2 += 4;
j = rc4_next_16 (S, i, j, edata2, w0); i += 16; edata2 += 4;
j = rc4_next_16 (S, i, j, edata2, w1); i += 16; edata2 += 4;
j = rc4_next_16 (S, i, j, edata2, w2); i += 16; edata2 += 4;
j = rc4_next_16 (S, i, j, edata2, w3); i += 16; edata2 += 4;
truncate_block_4x4_le_S (w3, edata2_left & 0xf);
@ -605,9 +483,7 @@ KERNEL_FQ void m18200_m04 (KERN_ATTR_ESALT (krb5asrep_t))
* shared
*/
LOCAL_VK RC4_KEY rc4_keys[64];
LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid];
LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE];
/**
* salt
@ -700,7 +576,7 @@ KERNEL_FQ void m18200_m04 (KERN_ATTR_ESALT (krb5asrep_t))
tmp[2] = digest[2];
tmp[3] = digest[3];
if (decrypt_and_check (rc4_key, tmp, esalt_bufs[DIGESTS_OFFSET].edata2, esalt_bufs[DIGESTS_OFFSET].edata2_len, K2, checksum) == 1)
if (decrypt_and_check (S, tmp, esalt_bufs[DIGESTS_OFFSET].edata2, esalt_bufs[DIGESTS_OFFSET].edata2_len, K2, checksum) == 1)
{
if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0)
{
@ -752,9 +628,7 @@ KERNEL_FQ void m18200_s04 (KERN_ATTR_ESALT (krb5asrep_t))
* shared
*/
LOCAL_VK RC4_KEY rc4_keys[64];
LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid];
LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE];
/**
* salt
@ -847,7 +721,7 @@ KERNEL_FQ void m18200_s04 (KERN_ATTR_ESALT (krb5asrep_t))
tmp[2] = digest[2];
tmp[3] = digest[3];
if (decrypt_and_check (rc4_key, tmp, esalt_bufs[DIGESTS_OFFSET].edata2, esalt_bufs[DIGESTS_OFFSET].edata2_len, K2, checksum) == 1)
if (decrypt_and_check (S, tmp, esalt_bufs[DIGESTS_OFFSET].edata2, esalt_bufs[DIGESTS_OFFSET].edata2_len, K2, checksum) == 1)
{
if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0)
{

@ -13,6 +13,7 @@
#include "inc_common.cl"
#include "inc_hash_md4.cl"
#include "inc_hash_md5.cl"
#include "inc_cipher_rc4.cl"
#endif
typedef struct krb5asrep
@ -24,132 +25,9 @@ typedef struct krb5asrep
} krb5asrep_t;
typedef struct
DECLSPEC int decrypt_and_check (LOCAL_AS u32 *S, u32 *data, GLOBAL_AS const u32 *edata2, const u32 edata2_len, const u32 *K2, const u32 *checksum)
{
u8 S[256];
u32 wtf_its_faster;
} RC4_KEY;
DECLSPEC void swap (LOCAL_AS RC4_KEY *rc4_key, const u8 i, const u8 j)
{
u8 tmp;
tmp = rc4_key->S[i];
rc4_key->S[i] = rc4_key->S[j];
rc4_key->S[j] = tmp;
}
DECLSPEC void rc4_init_16 (LOCAL_AS RC4_KEY *rc4_key, const u32 *data)
{
u32 v = 0x03020100;
u32 a = 0x04040404;
LOCAL_AS u32 *ptr = (LOCAL_AS u32 *) rc4_key->S;
#ifdef _unroll
#pragma unroll
#endif
for (u32 i = 0; i < 64; i++)
{
*ptr++ = v; v += a;
}
u32 j = 0;
for (u32 i = 0; i < 16; i++)
{
u32 idx = i * 16;
u32 v;
v = data[0];
j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++;
v = data[1];
j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++;
v = data[2];
j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++;
v = data[3];
j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++;
}
}
DECLSPEC u8 rc4_next_16 (LOCAL_AS RC4_KEY *rc4_key, u8 i, u8 j, GLOBAL_AS const u32 *in, u32 *out)
{
#ifdef _unroll
#pragma unroll
#endif
for (u32 k = 0; k < 4; k++)
{
u32 xor4 = 0;
u8 idx;
i += 1;
j += rc4_key->S[i];
swap (rc4_key, i, j);
idx = rc4_key->S[i] + rc4_key->S[j];
xor4 |= rc4_key->S[idx] << 0;
i += 1;
j += rc4_key->S[i];
swap (rc4_key, i, j);
idx = rc4_key->S[i] + rc4_key->S[j];
xor4 |= rc4_key->S[idx] << 8;
i += 1;
j += rc4_key->S[i];
swap (rc4_key, i, j);
idx = rc4_key->S[i] + rc4_key->S[j];
xor4 |= rc4_key->S[idx] << 16;
i += 1;
j += rc4_key->S[i];
swap (rc4_key, i, j);
idx = rc4_key->S[i] + rc4_key->S[j];
xor4 |= rc4_key->S[idx] << 24;
out[k] = in[k] ^ xor4;
}
return j;
}
DECLSPEC int decrypt_and_check (LOCAL_AS RC4_KEY *rc4_key, u32 *data, GLOBAL_AS const u32 *edata2, const u32 edata2_len, const u32 *K2, const u32 *checksum)
{
rc4_init_16 (rc4_key, data);
rc4_init_128 (S, data);
u32 out0[4];
@ -167,14 +45,14 @@ DECLSPEC int decrypt_and_check (LOCAL_AS RC4_KEY *rc4_key, u32 *data, GLOBAL_AS
length is on 3 bytes, the first byte is 0x82, and the fourth byte is 0x30 (class=SEQUENCE)
*/
rc4_next_16 (rc4_key, 0, 0, edata2 + 0, out0);
rc4_next_16 (S, 0, 0, edata2 + 0, out0);
if (((out0[2] & 0x00ff80ff) != 0x00300079) &&
((out0[2] & 0xFF00FFFF) != 0x30008179) &&
((out0[2] & 0x0000FFFF) != 0x00008279 || (out0[3] & 0x000000FF) != 0x00000030))
return 0;
rc4_init_16 (rc4_key, data);
rc4_init_128 (S, data);
u8 i = 0;
u8 j = 0;
@ -211,10 +89,10 @@ DECLSPEC int decrypt_and_check (LOCAL_AS RC4_KEY *rc4_key, u32 *data, GLOBAL_AS
for (edata2_left = edata2_len; edata2_left >= 64; edata2_left -= 64)
{
j = rc4_next_16 (rc4_key, i, j, edata2, w0); i += 16; edata2 += 4;
j = rc4_next_16 (rc4_key, i, j, edata2, w1); i += 16; edata2 += 4;
j = rc4_next_16 (rc4_key, i, j, edata2, w2); i += 16; edata2 += 4;
j = rc4_next_16 (rc4_key, i, j, edata2, w3); i += 16; edata2 += 4;
j = rc4_next_16 (S, i, j, edata2, w0); i += 16; edata2 += 4;
j = rc4_next_16 (S, i, j, edata2, w1); i += 16; edata2 += 4;
j = rc4_next_16 (S, i, j, edata2, w2); i += 16; edata2 += 4;
j = rc4_next_16 (S, i, j, edata2, w3); i += 16; edata2 += 4;
md5_hmac_update_64 (&ctx, w0, w1, w2, w3, 64);
}
@ -238,31 +116,31 @@ DECLSPEC int decrypt_and_check (LOCAL_AS RC4_KEY *rc4_key, u32 *data, GLOBAL_AS
if (edata2_left < 16)
{
j = rc4_next_16 (rc4_key, i, j, edata2, w0); i += 16; edata2 += 4;
j = rc4_next_16 (S, i, j, edata2, w0); i += 16; edata2 += 4;
truncate_block_4x4_le_S (w0, edata2_left & 0xf);
}
else if (edata2_left < 32)
{
j = rc4_next_16 (rc4_key, i, j, edata2, w0); i += 16; edata2 += 4;
j = rc4_next_16 (rc4_key, i, j, edata2, w1); i += 16; edata2 += 4;
j = rc4_next_16 (S, i, j, edata2, w0); i += 16; edata2 += 4;
j = rc4_next_16 (S, i, j, edata2, w1); i += 16; edata2 += 4;
truncate_block_4x4_le_S (w1, edata2_left & 0xf);
}
else if (edata2_left < 48)
{
j = rc4_next_16 (rc4_key, i, j, edata2, w0); i += 16; edata2 += 4;
j = rc4_next_16 (rc4_key, i, j, edata2, w1); i += 16; edata2 += 4;
j = rc4_next_16 (rc4_key, i, j, edata2, w2); i += 16; edata2 += 4;
j = rc4_next_16 (S, i, j, edata2, w0); i += 16; edata2 += 4;
j = rc4_next_16 (S, i, j, edata2, w1); i += 16; edata2 += 4;
j = rc4_next_16 (S, i, j, edata2, w2); i += 16; edata2 += 4;
truncate_block_4x4_le_S (w2, edata2_left & 0xf);
}
else
{
j = rc4_next_16 (rc4_key, i, j, edata2, w0); i += 16; edata2 += 4;
j = rc4_next_16 (rc4_key, i, j, edata2, w1); i += 16; edata2 += 4;
j = rc4_next_16 (rc4_key, i, j, edata2, w2); i += 16; edata2 += 4;
j = rc4_next_16 (rc4_key, i, j, edata2, w3); i += 16; edata2 += 4;
j = rc4_next_16 (S, i, j, edata2, w0); i += 16; edata2 += 4;
j = rc4_next_16 (S, i, j, edata2, w1); i += 16; edata2 += 4;
j = rc4_next_16 (S, i, j, edata2, w2); i += 16; edata2 += 4;
j = rc4_next_16 (S, i, j, edata2, w3); i += 16; edata2 += 4;
truncate_block_4x4_le_S (w3, edata2_left & 0xf);
}
@ -398,9 +276,7 @@ KERNEL_FQ void m18200_mxx (KERN_ATTR_ESALT (krb5asrep_t))
* base
*/
LOCAL_VK RC4_KEY rc4_keys[64];
LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid];
LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE];
u32 checksum[4];
@ -433,7 +309,7 @@ KERNEL_FQ void m18200_mxx (KERN_ATTR_ESALT (krb5asrep_t))
kerb_prepare (ctx.h, checksum, digest, K2);
if (decrypt_and_check (rc4_key, digest, esalt_bufs[DIGESTS_OFFSET].edata2, esalt_bufs[DIGESTS_OFFSET].edata2_len, K2, checksum) == 1)
if (decrypt_and_check (S, digest, esalt_bufs[DIGESTS_OFFSET].edata2, esalt_bufs[DIGESTS_OFFSET].edata2_len, K2, checksum) == 1)
{
if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0)
{
@ -458,9 +334,7 @@ KERNEL_FQ void m18200_sxx (KERN_ATTR_ESALT (krb5asrep_t))
* base
*/
LOCAL_VK RC4_KEY rc4_keys[64];
LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid];
LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE];
u32 checksum[4];
@ -493,7 +367,7 @@ KERNEL_FQ void m18200_sxx (KERN_ATTR_ESALT (krb5asrep_t))
kerb_prepare (ctx.h, checksum, digest, K2);
if (decrypt_and_check (rc4_key, digest, esalt_bufs[DIGESTS_OFFSET].edata2, esalt_bufs[DIGESTS_OFFSET].edata2_len, K2, checksum) == 1)
if (decrypt_and_check (S, digest, esalt_bufs[DIGESTS_OFFSET].edata2, esalt_bufs[DIGESTS_OFFSET].edata2_len, K2, checksum) == 1)
{
if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0)
{

@ -14,6 +14,7 @@
#include "inc_simd.cl"
#include "inc_hash_md4.cl"
#include "inc_hash_md5.cl"
#include "inc_cipher_rc4.cl"
#endif
typedef struct krb5asrep
@ -25,129 +26,6 @@ typedef struct krb5asrep
} krb5asrep_t;
typedef struct
{
u8 S[256];
u32 wtf_its_faster;
} RC4_KEY;
DECLSPEC void swap (LOCAL_AS RC4_KEY *rc4_key, const u8 i, const u8 j)
{
u8 tmp;
tmp = rc4_key->S[i];
rc4_key->S[i] = rc4_key->S[j];
rc4_key->S[j] = tmp;
}
DECLSPEC void rc4_init_16 (LOCAL_AS RC4_KEY *rc4_key, const u32 *data)
{
u32 v = 0x03020100;
u32 a = 0x04040404;
LOCAL_AS u32 *ptr = (LOCAL_AS u32 *) rc4_key->S;
#ifdef _unroll
#pragma unroll
#endif
for (u32 i = 0; i < 64; i++)
{
*ptr++ = v; v += a;
}
u32 j = 0;
for (u32 i = 0; i < 16; i++)
{
u32 idx = i * 16;
u32 v;
v = data[0];
j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++;
v = data[1];
j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++;
v = data[2];
j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++;
v = data[3];
j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++;
}
}
DECLSPEC u8 rc4_next_16 (LOCAL_AS RC4_KEY *rc4_key, u8 i, u8 j, GLOBAL_AS const u32 *in, u32 *out)
{
#ifdef _unroll
#pragma unroll
#endif
for (u32 k = 0; k < 4; k++)
{
u32 xor4 = 0;
u8 idx;
i += 1;
j += rc4_key->S[i];
swap (rc4_key, i, j);
idx = rc4_key->S[i] + rc4_key->S[j];
xor4 |= rc4_key->S[idx] << 0;
i += 1;
j += rc4_key->S[i];
swap (rc4_key, i, j);
idx = rc4_key->S[i] + rc4_key->S[j];
xor4 |= rc4_key->S[idx] << 8;
i += 1;
j += rc4_key->S[i];
swap (rc4_key, i, j);
idx = rc4_key->S[i] + rc4_key->S[j];
xor4 |= rc4_key->S[idx] << 16;
i += 1;
j += rc4_key->S[i];
swap (rc4_key, i, j);
idx = rc4_key->S[i] + rc4_key->S[j];
xor4 |= rc4_key->S[idx] << 24;
out[k] = in[k] ^ xor4;
}
return j;
}
DECLSPEC void hmac_md5_pad (u32 *w0, u32 *w1, u32 *w2, u32 *w3, u32 *ipad, u32 *opad)
{
w0[0] = w0[0] ^ 0x36363636;
@ -233,9 +111,9 @@ DECLSPEC void hmac_md5_run (u32 *w0, u32 *w1, u32 *w2, u32 *w3, u32 *ipad, u32 *
md5_transform (w0, w1, w2, w3, digest);
}
DECLSPEC int decrypt_and_check (LOCAL_AS RC4_KEY *rc4_key, u32 *data, GLOBAL_AS const u32 *edata2, const u32 edata2_len, const u32 *K2, const u32 *checksum)
DECLSPEC int decrypt_and_check (LOCAL_AS u32 *S, u32 *data, GLOBAL_AS const u32 *edata2, const u32 edata2_len, const u32 *K2, const u32 *checksum)
{
rc4_init_16 (rc4_key, data);
rc4_init_128 (S, data);
u32 out0[4];
@ -253,14 +131,14 @@ DECLSPEC int decrypt_and_check (LOCAL_AS RC4_KEY *rc4_key, u32 *data, GLOBAL_AS
length is on 3 bytes, the first byte is 0x82, and the fourth byte is 0x30 (class=SEQUENCE)
*/
rc4_next_16 (rc4_key, 0, 0, edata2 + 0, out0);
rc4_next_16 (S, 0, 0, edata2 + 0, out0);
if (((out0[2] & 0x00ff80ff) != 0x00300079) &&
((out0[2] & 0xFF00FFFF) != 0x30008179) &&
((out0[2] & 0x0000FFFF) != 0x00008279 || (out0[3] & 0x000000FF) != 0x00000030))
return 0;
rc4_init_16 (rc4_key, data);
rc4_init_128 (S, data);
u8 i = 0;
u8 j = 0;
@ -298,10 +176,10 @@ DECLSPEC int decrypt_and_check (LOCAL_AS RC4_KEY *rc4_key, u32 *data, GLOBAL_AS
for (edata2_left = edata2_len; edata2_left >= 64; edata2_left -= 64)
{
j = rc4_next_16 (rc4_key, i, j, edata2, w0); i += 16; edata2 += 4;
j = rc4_next_16 (rc4_key, i, j, edata2, w1); i += 16; edata2 += 4;
j = rc4_next_16 (rc4_key, i, j, edata2, w2); i += 16; edata2 += 4;
j = rc4_next_16 (rc4_key, i, j, edata2, w3); i += 16; edata2 += 4;
j = rc4_next_16 (S, i, j, edata2, w0); i += 16; edata2 += 4;
j = rc4_next_16 (S, i, j, edata2, w1); i += 16; edata2 += 4;
j = rc4_next_16 (S, i, j, edata2, w2); i += 16; edata2 += 4;
j = rc4_next_16 (S, i, j, edata2, w3); i += 16; edata2 += 4;
md5_transform (w0, w1, w2, w3, ipad);
}
@ -325,7 +203,7 @@ DECLSPEC int decrypt_and_check (LOCAL_AS RC4_KEY *rc4_key, u32 *data, GLOBAL_AS
if (edata2_left < 16)
{
j = rc4_next_16 (rc4_key, i, j, edata2, w0); i += 16; edata2 += 4;
j = rc4_next_16 (S, i, j, edata2, w0); i += 16; edata2 += 4;
truncate_block_4x4_le_S (w0, edata2_left & 0xf);
@ -338,8 +216,8 @@ DECLSPEC int decrypt_and_check (LOCAL_AS RC4_KEY *rc4_key, u32 *data, GLOBAL_AS
}
else if (edata2_left < 32)
{
j = rc4_next_16 (rc4_key, i, j, edata2, w0); i += 16; edata2 += 4;
j = rc4_next_16 (rc4_key, i, j, edata2, w1); i += 16; edata2 += 4;
j = rc4_next_16 (S, i, j, edata2, w0); i += 16; edata2 += 4;
j = rc4_next_16 (S, i, j, edata2, w1); i += 16; edata2 += 4;
truncate_block_4x4_le_S (w1, edata2_left & 0xf);
@ -352,9 +230,9 @@ DECLSPEC int decrypt_and_check (LOCAL_AS RC4_KEY *rc4_key, u32 *data, GLOBAL_AS
}
else if (edata2_left < 48)
{
j = rc4_next_16 (rc4_key, i, j, edata2, w0); i += 16; edata2 += 4;
j = rc4_next_16 (rc4_key, i, j, edata2, w1); i += 16; edata2 += 4;
j = rc4_next_16 (rc4_key, i, j, edata2, w2); i += 16; edata2 += 4;
j = rc4_next_16 (S, i, j, edata2, w0); i += 16; edata2 += 4;
j = rc4_next_16 (S, i, j, edata2, w1); i += 16; edata2 += 4;
j = rc4_next_16 (S, i, j, edata2, w2); i += 16; edata2 += 4;
truncate_block_4x4_le_S (w2, edata2_left & 0xf);
@ -367,10 +245,10 @@ DECLSPEC int decrypt_and_check (LOCAL_AS RC4_KEY *rc4_key, u32 *data, GLOBAL_AS
}
else
{
j = rc4_next_16 (rc4_key, i, j, edata2, w0); i += 16; edata2 += 4;
j = rc4_next_16 (rc4_key, i, j, edata2, w1); i += 16; edata2 += 4;
j = rc4_next_16 (rc4_key, i, j, edata2, w2); i += 16; edata2 += 4;
j = rc4_next_16 (rc4_key, i, j, edata2, w3); i += 16; edata2 += 4;
j = rc4_next_16 (S, i, j, edata2, w0); i += 16; edata2 += 4;
j = rc4_next_16 (S, i, j, edata2, w1); i += 16; edata2 += 4;
j = rc4_next_16 (S, i, j, edata2, w2); i += 16; edata2 += 4;
j = rc4_next_16 (S, i, j, edata2, w3); i += 16; edata2 += 4;
truncate_block_4x4_le_S (w3, edata2_left & 0xf);
@ -571,7 +449,7 @@ DECLSPEC void kerb_prepare (const u32 *w0, const u32 *w1, const u32 pw_len, cons
hmac_md5_run (w0_t, w1_t, w2_t, w3_t, ipad, opad, digest);
}
DECLSPEC void m18200 (LOCAL_AS RC4_KEY *rc4_key, u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KERN_ATTR_ESALT (krb5asrep_t))
DECLSPEC void m18200 (LOCAL_AS u32 *S, u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KERN_ATTR_ESALT (krb5asrep_t))
{
/**
* modifier
@ -620,7 +498,7 @@ DECLSPEC void m18200 (LOCAL_AS RC4_KEY *rc4_key, u32 *w0, u32 *w1, u32 *w2, u32
tmp[2] = digest[2];
tmp[3] = digest[3];
if (decrypt_and_check (rc4_key, tmp, esalt_bufs[DIGESTS_OFFSET].edata2, esalt_bufs[DIGESTS_OFFSET].edata2_len, K2, checksum) == 1)
if (decrypt_and_check (S, tmp, esalt_bufs[DIGESTS_OFFSET].edata2, esalt_bufs[DIGESTS_OFFSET].edata2_len, K2, checksum) == 1)
{
if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0)
{
@ -675,11 +553,9 @@ KERNEL_FQ void m18200_m04 (KERN_ATTR_ESALT (krb5asrep_t))
* main
*/
LOCAL_VK RC4_KEY rc4_keys[64];
LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid];
LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE];
m18200 (rc4_key, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max);
m18200 (S, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max);
}
KERNEL_FQ void m18200_m08 (KERN_ATTR_ESALT (krb5asrep_t))
@ -727,11 +603,9 @@ KERNEL_FQ void m18200_m08 (KERN_ATTR_ESALT (krb5asrep_t))
* main
*/
LOCAL_VK RC4_KEY rc4_keys[64];
LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE];
LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid];
m18200 (rc4_key, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max);
m18200 (S, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max);
}
KERNEL_FQ void m18200_m16 (KERN_ATTR_ESALT (krb5asrep_t))
@ -783,11 +657,9 @@ KERNEL_FQ void m18200_s04 (KERN_ATTR_ESALT (krb5asrep_t))
* main
*/
LOCAL_VK RC4_KEY rc4_keys[64];
LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid];
LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE];
m18200 (rc4_key, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max);
m18200 (S, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max);
}
KERNEL_FQ void m18200_s08 (KERN_ATTR_ESALT (krb5asrep_t))
@ -835,11 +707,9 @@ KERNEL_FQ void m18200_s08 (KERN_ATTR_ESALT (krb5asrep_t))
* main
*/
LOCAL_VK RC4_KEY rc4_keys[64];
LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid];
LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE];
m18200 (rc4_key, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max);
m18200 (S, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max);
}
KERNEL_FQ void m18200_s16 (KERN_ATTR_ESALT (krb5asrep_t))

@ -13,6 +13,7 @@
#include "inc_common.cl"
#include "inc_hash_md4.cl"
#include "inc_hash_md5.cl"
#include "inc_cipher_rc4.cl"
#endif
typedef struct krb5asrep
@ -24,132 +25,9 @@ typedef struct krb5asrep
} krb5asrep_t;
typedef struct
DECLSPEC int decrypt_and_check (LOCAL_AS u32 *S, u32 *data, GLOBAL_AS const u32 *edata2, const u32 edata2_len, const u32 *K2, const u32 *checksum)
{
u8 S[256];
u32 wtf_its_faster;
} RC4_KEY;
DECLSPEC void swap (LOCAL_AS RC4_KEY *rc4_key, const u8 i, const u8 j)
{
u8 tmp;
tmp = rc4_key->S[i];
rc4_key->S[i] = rc4_key->S[j];
rc4_key->S[j] = tmp;
}
DECLSPEC void rc4_init_16 (LOCAL_AS RC4_KEY *rc4_key, const u32 *data)
{
u32 v = 0x03020100;
u32 a = 0x04040404;
LOCAL_AS u32 *ptr = (LOCAL_AS u32 *) rc4_key->S;
#ifdef _unroll
#pragma unroll
#endif
for (u32 i = 0; i < 64; i++)
{
*ptr++ = v; v += a;
}
u32 j = 0;
for (u32 i = 0; i < 16; i++)
{
u32 idx = i * 16;
u32 v;
v = data[0];
j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++;
v = data[1];
j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++;
v = data[2];
j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++;
v = data[3];
j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++;
}
}
DECLSPEC u8 rc4_next_16 (LOCAL_AS RC4_KEY *rc4_key, u8 i, u8 j, GLOBAL_AS const u32 *in, u32 *out)
{
#ifdef _unroll
#pragma unroll
#endif
for (u32 k = 0; k < 4; k++)
{
u32 xor4 = 0;
u8 idx;
i += 1;
j += rc4_key->S[i];
swap (rc4_key, i, j);
idx = rc4_key->S[i] + rc4_key->S[j];
xor4 |= rc4_key->S[idx] << 0;
i += 1;
j += rc4_key->S[i];
swap (rc4_key, i, j);
idx = rc4_key->S[i] + rc4_key->S[j];
xor4 |= rc4_key->S[idx] << 8;
i += 1;
j += rc4_key->S[i];
swap (rc4_key, i, j);
idx = rc4_key->S[i] + rc4_key->S[j];
xor4 |= rc4_key->S[idx] << 16;
i += 1;
j += rc4_key->S[i];
swap (rc4_key, i, j);
idx = rc4_key->S[i] + rc4_key->S[j];
xor4 |= rc4_key->S[idx] << 24;
out[k] = in[k] ^ xor4;
}
return j;
}
DECLSPEC int decrypt_and_check (LOCAL_AS RC4_KEY *rc4_key, u32 *data, GLOBAL_AS const u32 *edata2, const u32 edata2_len, const u32 *K2, const u32 *checksum)
{
rc4_init_16 (rc4_key, data);
rc4_init_128 (S, data);
u32 out0[4];
@ -167,14 +45,14 @@ DECLSPEC int decrypt_and_check (LOCAL_AS RC4_KEY *rc4_key, u32 *data, GLOBAL_AS
length is on 3 bytes, the first byte is 0x82, and the fourth byte is 0x30 (class=SEQUENCE)
*/
rc4_next_16 (rc4_key, 0, 0, edata2 + 0, out0);
rc4_next_16 (S, 0, 0, edata2 + 0, out0);
if (((out0[2] & 0x00ff80ff) != 0x00300079) &&
((out0[2] & 0xFF00FFFF) != 0x30008179) &&
((out0[2] & 0x0000FFFF) != 0x00008279 || (out0[3] & 0x000000FF) != 0x00000030))
return 0;
rc4_init_16 (rc4_key, data);
rc4_init_128 (S, data);
u8 i = 0;
u8 j = 0;
@ -211,10 +89,10 @@ DECLSPEC int decrypt_and_check (LOCAL_AS RC4_KEY *rc4_key, u32 *data, GLOBAL_AS
for (edata2_left = edata2_len; edata2_left >= 64; edata2_left -= 64)
{
j = rc4_next_16 (rc4_key, i, j, edata2, w0); i += 16; edata2 += 4;
j = rc4_next_16 (rc4_key, i, j, edata2, w1); i += 16; edata2 += 4;
j = rc4_next_16 (rc4_key, i, j, edata2, w2); i += 16; edata2 += 4;
j = rc4_next_16 (rc4_key, i, j, edata2, w3); i += 16; edata2 += 4;
j = rc4_next_16 (S, i, j, edata2, w0); i += 16; edata2 += 4;
j = rc4_next_16 (S, i, j, edata2, w1); i += 16; edata2 += 4;
j = rc4_next_16 (S, i, j, edata2, w2); i += 16; edata2 += 4;
j = rc4_next_16 (S, i, j, edata2, w3); i += 16; edata2 += 4;
md5_hmac_update_64 (&ctx, w0, w1, w2, w3, 64);
}
@ -238,31 +116,31 @@ DECLSPEC int decrypt_and_check (LOCAL_AS RC4_KEY *rc4_key, u32 *data, GLOBAL_AS
if (edata2_left < 16)
{
j = rc4_next_16 (rc4_key, i, j, edata2, w0); i += 16; edata2 += 4;
j = rc4_next_16 (S, i, j, edata2, w0); i += 16; edata2 += 4;
truncate_block_4x4_le_S (w0, edata2_left & 0xf);
}
else if (edata2_left < 32)
{
j = rc4_next_16 (rc4_key, i, j, edata2, w0); i += 16; edata2 += 4;
j = rc4_next_16 (rc4_key, i, j, edata2, w1); i += 16; edata2 += 4;
j = rc4_next_16 (S, i, j, edata2, w0); i += 16; edata2 += 4;
j = rc4_next_16 (S, i, j, edata2, w1); i += 16; edata2 += 4;
truncate_block_4x4_le_S (w1, edata2_left & 0xf);
}
else if (edata2_left < 48)
{
j = rc4_next_16 (rc4_key, i, j, edata2, w0); i += 16; edata2 += 4;
j = rc4_next_16 (rc4_key, i, j, edata2, w1); i += 16; edata2 += 4;
j = rc4_next_16 (rc4_key, i, j, edata2, w2); i += 16; edata2 += 4;
j = rc4_next_16 (S, i, j, edata2, w0); i += 16; edata2 += 4;
j = rc4_next_16 (S, i, j, edata2, w1); i += 16; edata2 += 4;
j = rc4_next_16 (S, i, j, edata2, w2); i += 16; edata2 += 4;
truncate_block_4x4_le_S (w2, edata2_left & 0xf);
}
else
{
j = rc4_next_16 (rc4_key, i, j, edata2, w0); i += 16; edata2 += 4;
j = rc4_next_16 (rc4_key, i, j, edata2, w1); i += 16; edata2 += 4;
j = rc4_next_16 (rc4_key, i, j, edata2, w2); i += 16; edata2 += 4;
j = rc4_next_16 (rc4_key, i, j, edata2, w3); i += 16; edata2 += 4;
j = rc4_next_16 (S, i, j, edata2, w0); i += 16; edata2 += 4;
j = rc4_next_16 (S, i, j, edata2, w1); i += 16; edata2 += 4;
j = rc4_next_16 (S, i, j, edata2, w2); i += 16; edata2 += 4;
j = rc4_next_16 (S, i, j, edata2, w3); i += 16; edata2 += 4;
truncate_block_4x4_le_S (w3, edata2_left & 0xf);
}
@ -407,9 +285,7 @@ KERNEL_FQ void m18200_mxx (KERN_ATTR_VECTOR_ESALT (krb5asrep_t))
w[idx] = pws[gid].i[idx];
}
LOCAL_VK RC4_KEY rc4_keys[64];
LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid];
LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE];
u32 checksum[4];
@ -446,7 +322,7 @@ KERNEL_FQ void m18200_mxx (KERN_ATTR_VECTOR_ESALT (krb5asrep_t))
kerb_prepare (ctx.h, checksum, digest, K2);
if (decrypt_and_check (rc4_key, digest, esalt_bufs[DIGESTS_OFFSET].edata2, esalt_bufs[DIGESTS_OFFSET].edata2_len, K2, checksum) == 1)
if (decrypt_and_check (S, digest, esalt_bufs[DIGESTS_OFFSET].edata2, esalt_bufs[DIGESTS_OFFSET].edata2_len, K2, checksum) == 1)
{
if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0)
{
@ -480,9 +356,7 @@ KERNEL_FQ void m18200_sxx (KERN_ATTR_VECTOR_ESALT (krb5asrep_t))
w[idx] = pws[gid].i[idx];
}
LOCAL_VK RC4_KEY rc4_keys[64];
LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid];
LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE];
u32 checksum[4];
@ -519,7 +393,7 @@ KERNEL_FQ void m18200_sxx (KERN_ATTR_VECTOR_ESALT (krb5asrep_t))
kerb_prepare (ctx.h, checksum, digest, K2);
if (decrypt_and_check (rc4_key, digest, esalt_bufs[DIGESTS_OFFSET].edata2, esalt_bufs[DIGESTS_OFFSET].edata2_len, K2, checksum) == 1)
if (decrypt_and_check (S, digest, esalt_bufs[DIGESTS_OFFSET].edata2, esalt_bufs[DIGESTS_OFFSET].edata2_len, K2, checksum) == 1)
{
if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0)
{

@ -12,23 +12,12 @@
#include "inc_platform.cl"
#include "inc_common.cl"
#include "inc_hash_md5.cl"
#include "inc_cipher_rc4.cl"
#endif
#define COMPARE_S "inc_comp_single.cl"
#define COMPARE_M "inc_comp_multi.cl"
CONSTANT_VK u32a padding[8] =
{
0x5e4ebf28,
0x418a754e,
0x564e0064,
0x0801faff,
0xb6002e2e,
0x803e68d0,
0xfea90c2f,
0x7a695364
};
typedef struct pdf
{
int V;
@ -57,132 +46,6 @@ typedef struct pdf14_tmp
} pdf14_tmp_t;
typedef struct
{
u8 S[256];
u32 wtf_its_faster;
} RC4_KEY;
DECLSPEC void swap (LOCAL_AS RC4_KEY *rc4_key, const u8 i, const u8 j)
{
u8 tmp;
tmp = rc4_key->S[i];
rc4_key->S[i] = rc4_key->S[j];
rc4_key->S[j] = tmp;
}
DECLSPEC void rc4_init_16 (LOCAL_AS RC4_KEY *rc4_key, const u32 *data)
{
u32 v = 0x03020100;
u32 a = 0x04040404;
LOCAL_AS u32 *ptr = (LOCAL_AS u32 *) rc4_key->S;
#ifdef _unroll
#pragma unroll
#endif
for (u32 i = 0; i < 64; i++)
{
*ptr++ = v; v += a;
}
u32 j = 0;
#ifdef _unroll
#pragma unroll
#endif
for (u32 i = 0; i < 16; i++)
{
u32 idx = i * 16;
u32 v;
v = data[0];
j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++;
v = data[1];
j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++;
v = data[2];
j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++;
v = data[3];
j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++;
j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++;
}
}
DECLSPEC u8 rc4_next_16 (LOCAL_AS RC4_KEY *rc4_key, u8 i, u8 j, const u32 *in, u32 *out)
{
#ifdef _unroll
#pragma unroll
#endif
for (u32 k = 0; k < 4; k++)
{
u32 xor4 = 0;
u8 idx;
i += 1;
j += rc4_key->S[i];
swap (rc4_key, i, j);
idx = rc4_key->S[i] + rc4_key->S[j];
xor4 |= rc4_key->S[idx] << 0;
i += 1;
j += rc4_key->S[i];
swap (rc4_key, i, j);
idx = rc4_key->S[i] + rc4_key->S[j];
xor4 |= rc4_key->S[idx] << 8;
i += 1;
j += rc4_key->S[i];
swap (rc4_key, i, j);
idx = rc4_key->S[i] + rc4_key->S[j];
xor4 |= rc4_key->S[idx] << 16;
i += 1;
j += rc4_key->S[i];
swap (rc4_key, i, j);
idx = rc4_key->S[i] + rc4_key->S[j];
xor4 |= rc4_key->S[idx] << 24;
out[k] = in[k] ^ xor4;
}
return j;
}
KERNEL_FQ void m25400_init (KERN_ATTR_TMPS_ESALT (pdf14_tmp_t, pdf_t))
{
/**
@ -210,13 +73,22 @@ KERNEL_FQ void m25400_init (KERN_ATTR_TMPS_ESALT (pdf14_tmp_t, pdf_t))
const u32 pw_len = pws[gid].pw_len;
const u32 padding[8] =
{
0x5e4ebf28,
0x418a754e,
0x564e0064,
0x0801faff,
0xb6002e2e,
0x803e68d0,
0xfea90c2f,
0x7a695364
};
/**
* shared
*/
//LOCAL_AS RC4_KEY rc4_keys[64];
//LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid];
u32 P = esalt_bufs[DIGESTS_OFFSET].P;
u32 id_buf[12];
@ -327,9 +199,7 @@ KERNEL_FQ void m25400_loop (KERN_ATTR_TMPS_ESALT (pdf14_tmp_t, pdf_t))
* shared
*/
LOCAL_VK RC4_KEY rc4_keys[64];
LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid];
LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE];
/**
* loop
@ -398,9 +268,9 @@ KERNEL_FQ void m25400_loop (KERN_ATTR_TMPS_ESALT (pdf14_tmp_t, pdf_t))
tmp[2] = digest[2] ^ xv;
tmp[3] = digest[3] ^ xv;
rc4_init_16 (rc4_key, tmp);
rc4_init_128 (S, tmp);
rc4_next_16 (rc4_key, 0, 0, out, out);
rc4_next_16 (S, 0, 0, out, out);
}
}

@ -30,7 +30,7 @@
- AES Crypt Plugin: Reduced max password length from 256 to 128 which improved performance by 22%
- RAR3-p (Compressed): Fix workaround in unrar library in AES constant table generation to enable multi-threading support
- CRC32: Prevent decompression of data used in CRC32 calculation on host. This leads to false negatives with TrueCrypt/VeraCrypt keyfiles
- RC4: Updated hash-mode 7500, 9710, 9720, 10400 and 10410 to new RC4 crypto library code, improving performance by 20% or more
- RC4 Kernels: Improved performance by 20%+ for hash-modes Kerberos 5 (etype 23), MS Office (<= 2003) and PDF (<= 1.6) by using new RC4 code
##
## Technical
@ -40,7 +40,7 @@
- Dependencies: Updated xxHash from 0.1.0 to v0.8.0 - Stable XXH3
- Documentation: Update missing documentation in plugin developer guide for OPTS_TYPE_MP_MULTI_DISABLE and OPTS_TYPE_NATIVE_THREADS
- Kernels: Add standalone true UTF8 to UTF16 converter kernel that runs after amplifier. Use OPTS_TYPE_POST_AMP_UTF16LE from plugin
- Kernels: Add RC4 cipher to crypto library with shared memory access pattern which is not causing any bank conflicts
- Kernels: Add RC4 cipher to crypto library with optimized shared memory access pattern which will not cause any bank conflicts if -u <= 32
- Modules: Recategorized HASH_CATEGORY option in various modules
* changes v6.2.0 -> v6.2.1

@ -59,25 +59,42 @@ static const char *SIGNATURE_OLDOFFICE = "$oldoffice$";
static const char *SIGNATURE_OLDOFFICE3 = "$oldoffice$3";
static const char *SIGNATURE_OLDOFFICE4 = "$oldoffice$4";
u64 module_esalt_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra, MAYBE_UNUSED const hashes_t *hashes, MAYBE_UNUSED const hc_device_param_t *device_param)
{
const u64 esalt_size = (const u64) sizeof (oldoffice34_t);
char *jit_build_options = NULL;
return esalt_size;
}
u32 native_threads = 0;
u32 module_kernel_threads_min (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
{
const u32 kernel_threads_min = 64; // RC4
if (device_param->opencl_device_type & CL_DEVICE_TYPE_CPU)
{
native_threads = 1;
}
else if (device_param->opencl_device_type & CL_DEVICE_TYPE_GPU)
{
if (device_param->opencl_device_vendor_id == VENDOR_ID_INTEL_SDK)
{
native_threads = 8;
}
else if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD)
{
native_threads = 64;
}
else
{
native_threads = 32;
}
}
return kernel_threads_min;
hc_asprintf (&jit_build_options, "-D FIXED_LOCAL_SIZE=%u -D _unroll", native_threads);
return jit_build_options;
}
u32 module_kernel_threads_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
u64 module_esalt_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
{
const u32 kernel_threads_max = 64; // RC4
const u64 esalt_size = (const u64) sizeof (oldoffice34_t);
return kernel_threads_max;
return esalt_size;
}
u32 module_pw_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
@ -314,14 +331,14 @@ void module_init (module_ctx_t *module_ctx)
module_ctx->module_hook23 = MODULE_DEFAULT;
module_ctx->module_hook_salt_size = MODULE_DEFAULT;
module_ctx->module_hook_size = MODULE_DEFAULT;
module_ctx->module_jit_build_options = MODULE_DEFAULT;
module_ctx->module_jit_build_options = module_jit_build_options;
module_ctx->module_jit_cache_disable = MODULE_DEFAULT;
module_ctx->module_kernel_accel_max = MODULE_DEFAULT;
module_ctx->module_kernel_accel_min = MODULE_DEFAULT;
module_ctx->module_kernel_loops_max = MODULE_DEFAULT;
module_ctx->module_kernel_loops_min = MODULE_DEFAULT;
module_ctx->module_kernel_threads_max = module_kernel_threads_max;
module_ctx->module_kernel_threads_min = module_kernel_threads_min;
module_ctx->module_kernel_threads_max = MODULE_DEFAULT;
module_ctx->module_kernel_threads_min = MODULE_DEFAULT;
module_ctx->module_kern_type = module_kern_type;
module_ctx->module_kern_type_dynamic = MODULE_DEFAULT;
module_ctx->module_opti_type = module_opti_type;

@ -58,25 +58,42 @@ static const char *SIGNATURE_OLDOFFICE = "$oldoffice$";
static const char *SIGNATURE_OLDOFFICE3 = "$oldoffice$3";
static const char *SIGNATURE_OLDOFFICE4 = "$oldoffice$4";
u64 module_esalt_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra, MAYBE_UNUSED const hashes_t *hashes, MAYBE_UNUSED const hc_device_param_t *device_param)
{
const u64 esalt_size = (const u64) sizeof (oldoffice34_t);
char *jit_build_options = NULL;
return esalt_size;
}
u32 native_threads = 0;
u32 module_kernel_threads_min (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
{
const u32 kernel_threads_min = 64; // RC4
if (device_param->opencl_device_type & CL_DEVICE_TYPE_CPU)
{
native_threads = 1;
}
else if (device_param->opencl_device_type & CL_DEVICE_TYPE_GPU)
{
if (device_param->opencl_device_vendor_id == VENDOR_ID_INTEL_SDK)
{
native_threads = 8;
}
else if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD)
{
native_threads = 64;
}
else
{
native_threads = 32;
}
}
return kernel_threads_min;
hc_asprintf (&jit_build_options, "-D FIXED_LOCAL_SIZE=%u -D _unroll", native_threads);
return jit_build_options;
}
u32 module_kernel_threads_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
u64 module_esalt_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
{
const u32 kernel_threads_max = 64; // RC4
const u64 esalt_size = (const u64) sizeof (oldoffice34_t);
return kernel_threads_max;
return esalt_size;
}
u32 module_pw_min (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
@ -332,14 +349,14 @@ void module_init (module_ctx_t *module_ctx)
module_ctx->module_hook23 = MODULE_DEFAULT;
module_ctx->module_hook_salt_size = MODULE_DEFAULT;
module_ctx->module_hook_size = MODULE_DEFAULT;
module_ctx->module_jit_build_options = MODULE_DEFAULT;
module_ctx->module_jit_build_options = module_jit_build_options;
module_ctx->module_jit_cache_disable = MODULE_DEFAULT;
module_ctx->module_kernel_accel_max = MODULE_DEFAULT;
module_ctx->module_kernel_accel_min = MODULE_DEFAULT;
module_ctx->module_kernel_loops_max = MODULE_DEFAULT;
module_ctx->module_kernel_loops_min = MODULE_DEFAULT;
module_ctx->module_kernel_threads_max = module_kernel_threads_max;
module_ctx->module_kernel_threads_min = module_kernel_threads_min;
module_ctx->module_kernel_threads_max = MODULE_DEFAULT;
module_ctx->module_kernel_threads_min = MODULE_DEFAULT;
module_ctx->module_kern_type = module_kern_type;
module_ctx->module_kern_type_dynamic = MODULE_DEFAULT;
module_ctx->module_opti_type = module_opti_type;

@ -60,6 +60,37 @@ static const char *SIGNATURE_OLDOFFICE = "$oldoffice$";
static const char *SIGNATURE_OLDOFFICE3 = "$oldoffice$3";
//static const char *SIGNATURE_OLDOFFICE4 = "$oldoffice$4";
char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra, MAYBE_UNUSED const hashes_t *hashes, MAYBE_UNUSED const hc_device_param_t *device_param)
{
char *jit_build_options = NULL;
u32 native_threads = 0;
if (device_param->opencl_device_type & CL_DEVICE_TYPE_CPU)
{
native_threads = 1;
}
else if (device_param->opencl_device_type & CL_DEVICE_TYPE_GPU)
{
if (device_param->opencl_device_vendor_id == VENDOR_ID_INTEL_SDK)
{
native_threads = 8;
}
else if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD)
{
native_threads = 64;
}
else
{
native_threads = 32;
}
}
hc_asprintf (&jit_build_options, "-D FIXED_LOCAL_SIZE=%u -D _unroll", native_threads);
return jit_build_options;
}
u64 module_esalt_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
{
const u64 esalt_size = (const u64) sizeof (oldoffice34_t);
@ -340,7 +371,7 @@ void module_init (module_ctx_t *module_ctx)
module_ctx->module_hook23 = MODULE_DEFAULT;
module_ctx->module_hook_salt_size = MODULE_DEFAULT;
module_ctx->module_hook_size = MODULE_DEFAULT;
module_ctx->module_jit_build_options = MODULE_DEFAULT;
module_ctx->module_jit_build_options = module_jit_build_options;
module_ctx->module_jit_cache_disable = MODULE_DEFAULT;
module_ctx->module_kernel_accel_max = MODULE_DEFAULT;
module_ctx->module_kernel_accel_min = MODULE_DEFAULT;

@ -92,24 +92,30 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY
{
char *jit_build_options = NULL;
// Extra treatment for Apple systems
if (device_param->opencl_platform_vendor_id == VENDOR_ID_APPLE)
{
return jit_build_options;
}
u32 native_threads = 0;
// Intel CPU
if ((device_param->opencl_device_vendor_id == VENDOR_ID_INTEL_SDK) && (device_param->opencl_device_type & CL_DEVICE_TYPE_CPU))
if (device_param->opencl_device_type & CL_DEVICE_TYPE_CPU)
{
hc_asprintf (&jit_build_options, "-D _unroll");
native_threads = 1;
}
// ROCM
if ((device_param->opencl_device_vendor_id == VENDOR_ID_AMD) && (device_param->has_vperm == true))
else if (device_param->opencl_device_type & CL_DEVICE_TYPE_GPU)
{
hc_asprintf (&jit_build_options, "-D _unroll");
if (device_param->opencl_device_vendor_id == VENDOR_ID_INTEL_SDK)
{
native_threads = 8;
}
else if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD)
{
native_threads = 64;
}
else
{
native_threads = 32;
}
}
hc_asprintf (&jit_build_options, "-D FIXED_LOCAL_SIZE=%u -D _unroll", native_threads);
return jit_build_options;
}
@ -127,20 +133,6 @@ u64 module_tmp_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED c
return tmp_size;
}
u32 module_kernel_threads_min (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
{
const u32 kernel_threads_min = 64; // RC4
return kernel_threads_min;
}
u32 module_kernel_threads_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
{
const u32 kernel_threads_max = 64; // RC4
return kernel_threads_max;
}
u32 module_pw_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
{
const u32 pw_max = 32; // https://www.pdflib.com/knowledge-base/pdf-password-security/encryption/
@ -505,8 +497,8 @@ void module_init (module_ctx_t *module_ctx)
module_ctx->module_kernel_accel_min = MODULE_DEFAULT;
module_ctx->module_kernel_loops_max = MODULE_DEFAULT;
module_ctx->module_kernel_loops_min = MODULE_DEFAULT;
module_ctx->module_kernel_threads_max = module_kernel_threads_max;
module_ctx->module_kernel_threads_min = module_kernel_threads_min;
module_ctx->module_kernel_threads_max = MODULE_DEFAULT;
module_ctx->module_kernel_threads_min = MODULE_DEFAULT;
module_ctx->module_kern_type = module_kern_type;
module_ctx->module_kern_type_dynamic = MODULE_DEFAULT;
module_ctx->module_opti_type = module_opti_type;

@ -52,48 +52,51 @@ typedef struct krb5tgs
static const char *SIGNATURE_KRB5TGS = "$krb5tgs$23$";
u64 module_esalt_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
{
const u64 esalt_size = (const u64) sizeof (krb5tgs_t);
return esalt_size;
}
u32 module_kernel_threads_min (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
{
const u32 kernel_threads_min = 64;
return kernel_threads_min;
}
u32 module_kernel_threads_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
{
const u32 kernel_threads_max = 64;
return kernel_threads_max;
}
char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra, MAYBE_UNUSED const hashes_t *hashes, MAYBE_UNUSED const hc_device_param_t *device_param)
{
char *jit_build_options = NULL;
// in pure -a 0 mode we reserve pws_t with 64 threads = 256 + 4 bytes = 16640.
// the RC4_KEY with 64 threads requires (256 + 4) 16640.
u32 native_threads = 0;
if ((hashconfig->opti_type & OPTI_TYPE_OPTIMIZED_KERNEL) == 0)
if (device_param->opencl_device_type & CL_DEVICE_TYPE_CPU)
{
if (user_options_extra->attack_kern == ATTACK_KERN_STRAIGHT)
native_threads = 1;
}
else if (device_param->opencl_device_type & CL_DEVICE_TYPE_GPU)
{
if (device_param->opencl_device_vendor_id == VENDOR_ID_INTEL_SDK)
{
native_threads = 8;
}
else if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD)
{
if (device_param->device_local_mem_size < 49152)
{
hc_asprintf (&jit_build_options, "-D FORCE_DISABLE_SHM");
native_threads = 32;
}
else
{
native_threads = 64;
}
}
else
{
native_threads = 32;
}
}
hc_asprintf (&jit_build_options, "-D FIXED_LOCAL_SIZE=%u -D _unroll", native_threads);
return jit_build_options;
}
u64 module_esalt_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
{
const u64 esalt_size = (const u64) sizeof (krb5tgs_t);
return esalt_size;
}
bool module_unstable_warning (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra, MAYBE_UNUSED const hc_device_param_t *device_param)
{
// amdgpu-pro-20.50-1234664-ubuntu-20.04 (legacy)
@ -308,14 +311,14 @@ void module_init (module_ctx_t *module_ctx)
module_ctx->module_hook23 = MODULE_DEFAULT;
module_ctx->module_hook_salt_size = MODULE_DEFAULT;
module_ctx->module_hook_size = MODULE_DEFAULT;
module_ctx->module_jit_build_options = MODULE_DEFAULT;
module_ctx->module_jit_build_options = module_jit_build_options;
module_ctx->module_jit_cache_disable = MODULE_DEFAULT;
module_ctx->module_kernel_accel_max = MODULE_DEFAULT;
module_ctx->module_kernel_accel_min = MODULE_DEFAULT;
module_ctx->module_kernel_loops_max = MODULE_DEFAULT;
module_ctx->module_kernel_loops_min = MODULE_DEFAULT;
module_ctx->module_kernel_threads_max = module_kernel_threads_max;
module_ctx->module_kernel_threads_min = module_kernel_threads_min;
module_ctx->module_kernel_threads_max = MODULE_DEFAULT;
module_ctx->module_kernel_threads_min = MODULE_DEFAULT;
module_ctx->module_kern_type = module_kern_type;
module_ctx->module_kern_type_dynamic = MODULE_DEFAULT;
module_ctx->module_opti_type = module_opti_type;

@ -56,20 +56,37 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY
{
char *jit_build_options = NULL;
// in pure -a 0 mode we reserve pws_t with 64 threads = 256 + 4 bytes = 16640.
// the RC4_KEY with 64 threads requires (256 + 4) 16640.
u32 native_threads = 0;
if ((hashconfig->opti_type & OPTI_TYPE_OPTIMIZED_KERNEL) == 0)
if (device_param->opencl_device_type & CL_DEVICE_TYPE_CPU)
{
if (user_options_extra->attack_kern == ATTACK_KERN_STRAIGHT)
native_threads = 1;
}
else if (device_param->opencl_device_type & CL_DEVICE_TYPE_GPU)
{
if (device_param->opencl_device_vendor_id == VENDOR_ID_INTEL_SDK)
{
native_threads = 8;
}
else if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD)
{
if (device_param->device_local_mem_size < 49152)
{
hc_asprintf (&jit_build_options, "-D FORCE_DISABLE_SHM");
native_threads = 32;
}
else
{
native_threads = 64;
}
}
else
{
native_threads = 32;
}
}
hc_asprintf (&jit_build_options, "-D FIXED_LOCAL_SIZE=%u -D _unroll", native_threads);
return jit_build_options;
}
@ -80,20 +97,6 @@ u64 module_esalt_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED
return esalt_size;
}
u32 module_kernel_threads_min (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
{
const u32 kernel_threads_min = 64;
return kernel_threads_min;
}
u32 module_kernel_threads_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
{
const u32 kernel_threads_max = 64;
return kernel_threads_max;
}
bool module_unstable_warning (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra, MAYBE_UNUSED const hc_device_param_t *device_param)
{
// amdgpu-pro-20.50-1234664-ubuntu-20.04 (legacy)
@ -276,8 +279,8 @@ void module_init (module_ctx_t *module_ctx)
module_ctx->module_kernel_accel_min = MODULE_DEFAULT;
module_ctx->module_kernel_loops_max = MODULE_DEFAULT;
module_ctx->module_kernel_loops_min = MODULE_DEFAULT;
module_ctx->module_kernel_threads_max = module_kernel_threads_max;
module_ctx->module_kernel_threads_min = module_kernel_threads_min;
module_ctx->module_kernel_threads_max = MODULE_DEFAULT;
module_ctx->module_kernel_threads_min = MODULE_DEFAULT;
module_ctx->module_kern_type = module_kern_type;
module_ctx->module_kern_type_dynamic = MODULE_DEFAULT;
module_ctx->module_opti_type = module_opti_type;

@ -95,24 +95,37 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY
{
char *jit_build_options = NULL;
// Extra treatment for Apple systems
if (device_param->opencl_platform_vendor_id == VENDOR_ID_APPLE)
{
return jit_build_options;
}
u32 native_threads = 0;
// Intel CPU
if ((device_param->opencl_device_vendor_id == VENDOR_ID_INTEL_SDK) && (device_param->opencl_device_type & CL_DEVICE_TYPE_CPU))
if (device_param->opencl_device_type & CL_DEVICE_TYPE_CPU)
{
hc_asprintf (&jit_build_options, "-D _unroll");
native_threads = 1;
}
// ROCM
if ((device_param->opencl_device_vendor_id == VENDOR_ID_AMD) && (device_param->has_vperm == true))
else if (device_param->opencl_device_type & CL_DEVICE_TYPE_GPU)
{
hc_asprintf (&jit_build_options, "-D _unroll");
if (device_param->opencl_device_vendor_id == VENDOR_ID_INTEL_SDK)
{
native_threads = 8;
}
else if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD)
{
if (device_param->device_local_mem_size < 49152)
{
native_threads = 32;
}
else
{
native_threads = 64;
}
}
else
{
native_threads = 32;
}
}
hc_asprintf (&jit_build_options, "-D FIXED_LOCAL_SIZE=%u -D _unroll", native_threads);
return jit_build_options;
}
@ -130,20 +143,6 @@ u64 module_tmp_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED c
return tmp_size;
}
u32 module_kernel_threads_min (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
{
const u32 kernel_threads_min = 64; // RC4
return kernel_threads_min;
}
u32 module_kernel_threads_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
{
const u32 kernel_threads_max = 64; // RC4
return kernel_threads_max;
}
u32 module_pw_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
{
const u32 pw_max = 32; // https://www.pdflib.com/knowledge-base/pdf-password-security/encryption/
@ -508,8 +507,8 @@ void module_init (module_ctx_t *module_ctx)
module_ctx->module_kernel_accel_min = MODULE_DEFAULT;
module_ctx->module_kernel_loops_max = MODULE_DEFAULT;
module_ctx->module_kernel_loops_min = MODULE_DEFAULT;
module_ctx->module_kernel_threads_max = module_kernel_threads_max;
module_ctx->module_kernel_threads_min = module_kernel_threads_min;
module_ctx->module_kernel_threads_max = MODULE_DEFAULT;
module_ctx->module_kernel_threads_min = MODULE_DEFAULT;
module_ctx->module_kern_type = module_kern_type;
module_ctx->module_kern_type_dynamic = MODULE_DEFAULT;
module_ctx->module_opti_type = module_opti_type;

Loading…
Cancel
Save