From 5c6b3fa7ab5c8d2baf637514286ad9388e700f74 Mon Sep 17 00:00:00 2001 From: jsteube Date: Mon, 7 Aug 2017 15:22:18 +0200 Subject: [PATCH] Add pure kernels for Kerberos 5 TGS-REP etype 23 --- OpenCL/m13100_a0.cl | 516 +++++++++++++++++++++++++++++++++++++++++++ OpenCL/m13100_a1.cl | 492 +++++++++++++++++++++++++++++++++++++++++ OpenCL/m13100_a3.cl | 526 ++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 1534 insertions(+) create mode 100644 OpenCL/m13100_a0.cl create mode 100644 OpenCL/m13100_a1.cl create mode 100644 OpenCL/m13100_a3.cl diff --git a/OpenCL/m13100_a0.cl b/OpenCL/m13100_a0.cl new file mode 100644 index 000000000..d01619666 --- /dev/null +++ b/OpenCL/m13100_a0.cl @@ -0,0 +1,516 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +//shared mem too small +//#define NEW_SIMD_CODE + +#include "inc_vendor.cl" +#include "inc_hash_constants.h" +#include "inc_hash_functions.cl" +#include "inc_types.cl" +#include "inc_common.cl" +#include "inc_rp.h" +#include "inc_rp.cl" +#include "inc_hash_md4.cl" +#include "inc_hash_md5.cl" + +typedef struct +{ + u8 S[256]; + + u32 wtf_its_faster; + +} RC4_KEY; + +void swap (__local RC4_KEY *rc4_key, const u8 i, const u8 j) +{ + u8 tmp; + + tmp = rc4_key->S[i]; + rc4_key->S[i] = rc4_key->S[j]; + rc4_key->S[j] = tmp; +} + +void rc4_init_16 (__local RC4_KEY *rc4_key, const u32 data[4]) +{ + u32 v = 0x03020100; + u32 a = 0x04040404; + + __local u32 *ptr = (__local u32 *) rc4_key->S; + + #ifdef _unroll + #pragma unroll + #endif + for (u32 i = 0; i < 64; i++) + { + *ptr++ = v; v += a; + } + + u32 j = 0; + + for (u32 i = 0; i < 16; i++) + { + u32 idx = i * 16; + + u32 v; + + v = data[0]; + + j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; + j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; + j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; + j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; + + v = data[1]; + + j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; + j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; + j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; + j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; + + v = data[2]; + + j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; + j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; + j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; + j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; + + v = data[3]; + + j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; + j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; + j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; + j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; + } +} + +u8 rc4_next_16 (__local RC4_KEY *rc4_key, u8 i, u8 j, __global const u32 in[4], u32 out[4]) +{ + #ifdef _unroll + #pragma unroll + #endif + for (u32 k = 0; k < 4; k++) + { + u32 xor4 = 0; + + u8 idx; + + i += 1; + j += rc4_key->S[i]; + + swap (rc4_key, i, j); + + idx = rc4_key->S[i] + rc4_key->S[j]; + + xor4 |= rc4_key->S[idx] << 0; + + i += 1; + j += rc4_key->S[i]; + + swap (rc4_key, i, j); + + idx = rc4_key->S[i] + rc4_key->S[j]; + + xor4 |= rc4_key->S[idx] << 8; + + i += 1; + j += rc4_key->S[i]; + + swap (rc4_key, i, j); + + idx = rc4_key->S[i] + rc4_key->S[j]; + + xor4 |= rc4_key->S[idx] << 16; + + i += 1; + j += rc4_key->S[i]; + + swap (rc4_key, i, j); + + idx = rc4_key->S[i] + rc4_key->S[j]; + + xor4 |= rc4_key->S[idx] << 24; + + out[k] = in[k] ^ xor4; + } + + return j; +} + +int decrypt_and_check (__local RC4_KEY *rc4_key, u32 data[4], __global const u32 *edata2, const u32 edata2_len, const u32 K2[4], const u32 checksum[4]) +{ + rc4_init_16 (rc4_key, data); + + u32 out0[4]; + u32 out1[4]; + + u8 i = 0; + u8 j = 0; + + /* + 8 first bytes are nonce, then ASN1 structs (DER encoding: type-length-data) + + if length >= 128 bytes: + length is on 2 bytes and type is \x63\x82 (encode_krb5_enc_tkt_part) and data is an ASN1 sequence \x30\x82 + else: + length is on 1 byte and type is \x63\x81 and data is an ASN1 sequence \x30\x81 + + next headers follow the same ASN1 "type-length-data" scheme + */ + + j = rc4_next_16 (rc4_key, i, j, edata2 + 0, out0); i += 16; + + if (((out0[2] & 0xff00ffff) != 0x30008163) && ((out0[2] & 0x0000ffff) != 0x00008263)) return 0; + + j = rc4_next_16 (rc4_key, i, j, edata2 + 4, out1); i += 16; + + if (((out1[0] & 0x00ffffff) != 0x00000503) && (out1[0] != 0x050307A0)) return 0; + + rc4_init_16 (rc4_key, data); + + i = 0; + j = 0; + + // init hmac + + u32 w0[4]; + u32 w1[4]; + u32 w2[4]; + u32 w3[4]; + + w0[0] = K2[0]; + w0[1] = K2[1]; + w0[2] = K2[2]; + w0[3] = K2[3]; + w1[0] = 0; + w1[1] = 0; + w1[2] = 0; + w1[3] = 0; + w2[0] = 0; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = 0; + + md5_hmac_ctx_t ctx; + + md5_hmac_init_64 (&ctx, w0, w1, w2, w3); + + int edata2_left; + + for (edata2_left = edata2_len; edata2_left >= 64; edata2_left -= 64) + { + j = rc4_next_16 (rc4_key, i, j, edata2, w0); i += 16; edata2 += 4; + j = rc4_next_16 (rc4_key, i, j, edata2, w1); i += 16; edata2 += 4; + j = rc4_next_16 (rc4_key, i, j, edata2, w2); i += 16; edata2 += 4; + j = rc4_next_16 (rc4_key, i, j, edata2, w3); i += 16; edata2 += 4; + + md5_hmac_update_64 (&ctx, w0, w1, w2, w3, 64); + } + + w0[0] = 0; + w0[1] = 0; + w0[2] = 0; + w0[3] = 0; + w1[0] = 0; + w1[1] = 0; + w1[2] = 0; + w1[3] = 0; + w2[0] = 0; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = 0; + + if (edata2_left < 16) + { + j = rc4_next_16 (rc4_key, i, j, edata2, w0); i += 16; edata2 += 4; + + truncate_block_4x4_le (w0, edata2_left & 0xf); + } + else if (edata2_left < 32) + { + j = rc4_next_16 (rc4_key, i, j, edata2, w0); i += 16; edata2 += 4; + j = rc4_next_16 (rc4_key, i, j, edata2, w1); i += 16; edata2 += 4; + + truncate_block_4x4_le (w1, edata2_left & 0xf); + } + else if (edata2_left < 48) + { + j = rc4_next_16 (rc4_key, i, j, edata2, w0); i += 16; edata2 += 4; + j = rc4_next_16 (rc4_key, i, j, edata2, w1); i += 16; edata2 += 4; + j = rc4_next_16 (rc4_key, i, j, edata2, w2); i += 16; edata2 += 4; + + truncate_block_4x4_le (w2, edata2_left & 0xf); + } + else + { + j = rc4_next_16 (rc4_key, i, j, edata2, w0); i += 16; edata2 += 4; + j = rc4_next_16 (rc4_key, i, j, edata2, w1); i += 16; edata2 += 4; + j = rc4_next_16 (rc4_key, i, j, edata2, w2); i += 16; edata2 += 4; + j = rc4_next_16 (rc4_key, i, j, edata2, w3); i += 16; edata2 += 4; + + truncate_block_4x4_le (w3, edata2_left & 0xf); + } + + md5_hmac_update_64 (&ctx, w0, w1, w2, w3, edata2_left); + + md5_hmac_final (&ctx); + + if (checksum[0] != ctx.opad.h[0]) return 0; + if (checksum[1] != ctx.opad.h[1]) return 0; + if (checksum[2] != ctx.opad.h[2]) return 0; + if (checksum[3] != ctx.opad.h[3]) return 0; + + return 1; +} + +void kerb_prepare (const u32 K[4], const u32 checksum[4], u32 digest[4], u32 K2[4]) +{ + // K1=MD5_HMAC(K,1); with 1 encoded as little indian on 4 bytes (01000000 in hexa); + + u32 w0[4]; + u32 w1[4]; + u32 w2[4]; + u32 w3[4]; + + w0[0] = K[0]; + w0[1] = K[1]; + w0[2] = K[2]; + w0[3] = K[3]; + w1[0] = 0; + w1[1] = 0; + w1[2] = 0; + w1[3] = 0; + w2[0] = 0; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = 0; + + md5_hmac_ctx_t ctx1; + + md5_hmac_init_64 (&ctx1, w0, w1, w2, w3); + + w0[0] = 2; + w0[1] = 0; + w0[2] = 0; + w0[3] = 0; + w1[0] = 0; + w1[1] = 0; + w1[2] = 0; + w1[3] = 0; + w2[0] = 0; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = 0; + + md5_hmac_update_64 (&ctx1, w0, w1, w2, w3, 4); + + md5_hmac_final (&ctx1); + + w0[0] = ctx1.opad.h[0]; + w0[1] = ctx1.opad.h[1]; + w0[2] = ctx1.opad.h[2]; + w0[3] = ctx1.opad.h[3]; + w1[0] = 0; + w1[1] = 0; + w1[2] = 0; + w1[3] = 0; + w2[0] = 0; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = 0; + + md5_hmac_ctx_t ctx; + + md5_hmac_init_64 (&ctx, w0, w1, w2, w3); + + w0[0] = checksum[0]; + w0[1] = checksum[1]; + w0[2] = checksum[2]; + w0[3] = checksum[3]; + w1[0] = 0; + w1[1] = 0; + w1[2] = 0; + w1[3] = 0; + w2[0] = 0; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = 0; + + md5_hmac_update_64 (&ctx, w0, w1, w2, w3, 16); + + md5_hmac_final (&ctx); + + digest[0] = ctx.opad.h[0]; + digest[1] = ctx.opad.h[1]; + digest[2] = ctx.opad.h[2]; + digest[3] = ctx.opad.h[3]; + + K2[0] = ctx1.opad.h[0]; + K2[1] = ctx1.opad.h[1]; + K2[2] = ctx1.opad.h[2]; + K2[3] = ctx1.opad.h[3]; +} + +__kernel void m13100_mxx (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const krb5tgs_t *krb5tgs_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) +{ + /** + * modifier + */ + + const u32 lid = get_local_id (0); + const u32 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * base + */ + + const u32 pw_len = pws[gid].pw_len; + + const u32 pw_lenv = ceil ((float) pw_len / 4); + + u32 w[64] = { 0 }; + + for (int idx = 0; idx < pw_lenv; idx++) + { + w[idx] = pws[gid].i[idx]; + + barrier (CLK_GLOBAL_MEM_FENCE); + } + + __local RC4_KEY rc4_keys[64]; + + u32 checksum[4]; + + checksum[0] = krb5tgs_bufs[digests_offset].checksum[0]; + checksum[1] = krb5tgs_bufs[digests_offset].checksum[1]; + checksum[2] = krb5tgs_bufs[digests_offset].checksum[2]; + checksum[3] = krb5tgs_bufs[digests_offset].checksum[3]; + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + // todo: add rules engine + + md4_ctx_t ctx; + + md4_init (&ctx); + + md4_update_utf16le (&ctx, w, pw_len); + + md4_final (&ctx); + + u32 digest[4]; + + u32 K2[4]; + + kerb_prepare (ctx.h, checksum, digest, K2); + + if (decrypt_and_check (&rc4_keys[lid], digest, krb5tgs_bufs[digests_offset].edata2, krb5tgs_bufs[digests_offset].edata2_len, K2, checksum) == 1) + { + if (atomic_inc (&hashes_shown[digests_offset]) == 0) + { + mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, digests_offset + 0, gid, il_pos); + } + } + } +} + +__kernel void m13100_sxx (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const krb5tgs_t *krb5tgs_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) +{ + /** + * modifier + */ + + const u32 lid = get_local_id (0); + const u32 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * base + */ + + const u32 pw_len = pws[gid].pw_len; + + const u32 pw_lenv = ceil ((float) pw_len / 4); + + u32 w[64] = { 0 }; + + for (int idx = 0; idx < pw_lenv; idx++) + { + w[idx] = pws[gid].i[idx]; + + barrier (CLK_GLOBAL_MEM_FENCE); + } + + __local RC4_KEY rc4_keys[64]; + + u32 checksum[4]; + + checksum[0] = krb5tgs_bufs[digests_offset].checksum[0]; + checksum[1] = krb5tgs_bufs[digests_offset].checksum[1]; + checksum[2] = krb5tgs_bufs[digests_offset].checksum[2]; + checksum[3] = krb5tgs_bufs[digests_offset].checksum[3]; + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + // todo: add rules engine + + md4_ctx_t ctx; + + md4_init (&ctx); + + md4_update_utf16le (&ctx, w, pw_len); + + md4_final (&ctx); + + u32 digest[4]; + + u32 K2[4]; + + kerb_prepare (ctx.h, checksum, digest, K2); + + if (decrypt_and_check (&rc4_keys[lid], digest, krb5tgs_bufs[digests_offset].edata2, krb5tgs_bufs[digests_offset].edata2_len, K2, checksum) == 1) + { + if (atomic_inc (&hashes_shown[digests_offset]) == 0) + { + mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, digests_offset + 0, gid, il_pos); + } + } + } +} diff --git a/OpenCL/m13100_a1.cl b/OpenCL/m13100_a1.cl new file mode 100644 index 000000000..38b1ed643 --- /dev/null +++ b/OpenCL/m13100_a1.cl @@ -0,0 +1,492 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +//shared mem too small +//#define NEW_SIMD_CODE + +#include "inc_vendor.cl" +#include "inc_hash_constants.h" +#include "inc_hash_functions.cl" +#include "inc_types.cl" +#include "inc_common.cl" +#include "inc_hash_md4.cl" +#include "inc_hash_md5.cl" + +typedef struct +{ + u8 S[256]; + + u32 wtf_its_faster; + +} RC4_KEY; + +void swap (__local RC4_KEY *rc4_key, const u8 i, const u8 j) +{ + u8 tmp; + + tmp = rc4_key->S[i]; + rc4_key->S[i] = rc4_key->S[j]; + rc4_key->S[j] = tmp; +} + +void rc4_init_16 (__local RC4_KEY *rc4_key, const u32 data[4]) +{ + u32 v = 0x03020100; + u32 a = 0x04040404; + + __local u32 *ptr = (__local u32 *) rc4_key->S; + + #ifdef _unroll + #pragma unroll + #endif + for (u32 i = 0; i < 64; i++) + { + *ptr++ = v; v += a; + } + + u32 j = 0; + + for (u32 i = 0; i < 16; i++) + { + u32 idx = i * 16; + + u32 v; + + v = data[0]; + + j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; + j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; + j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; + j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; + + v = data[1]; + + j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; + j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; + j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; + j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; + + v = data[2]; + + j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; + j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; + j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; + j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; + + v = data[3]; + + j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; + j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; + j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; + j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; + } +} + +u8 rc4_next_16 (__local RC4_KEY *rc4_key, u8 i, u8 j, __global const u32 in[4], u32 out[4]) +{ + #ifdef _unroll + #pragma unroll + #endif + for (u32 k = 0; k < 4; k++) + { + u32 xor4 = 0; + + u8 idx; + + i += 1; + j += rc4_key->S[i]; + + swap (rc4_key, i, j); + + idx = rc4_key->S[i] + rc4_key->S[j]; + + xor4 |= rc4_key->S[idx] << 0; + + i += 1; + j += rc4_key->S[i]; + + swap (rc4_key, i, j); + + idx = rc4_key->S[i] + rc4_key->S[j]; + + xor4 |= rc4_key->S[idx] << 8; + + i += 1; + j += rc4_key->S[i]; + + swap (rc4_key, i, j); + + idx = rc4_key->S[i] + rc4_key->S[j]; + + xor4 |= rc4_key->S[idx] << 16; + + i += 1; + j += rc4_key->S[i]; + + swap (rc4_key, i, j); + + idx = rc4_key->S[i] + rc4_key->S[j]; + + xor4 |= rc4_key->S[idx] << 24; + + out[k] = in[k] ^ xor4; + } + + return j; +} + +int decrypt_and_check (__local RC4_KEY *rc4_key, u32 data[4], __global const u32 *edata2, const u32 edata2_len, const u32 K2[4], const u32 checksum[4]) +{ + rc4_init_16 (rc4_key, data); + + u32 out0[4]; + u32 out1[4]; + + u8 i = 0; + u8 j = 0; + + /* + 8 first bytes are nonce, then ASN1 structs (DER encoding: type-length-data) + + if length >= 128 bytes: + length is on 2 bytes and type is \x63\x82 (encode_krb5_enc_tkt_part) and data is an ASN1 sequence \x30\x82 + else: + length is on 1 byte and type is \x63\x81 and data is an ASN1 sequence \x30\x81 + + next headers follow the same ASN1 "type-length-data" scheme + */ + + j = rc4_next_16 (rc4_key, i, j, edata2 + 0, out0); i += 16; + + if (((out0[2] & 0xff00ffff) != 0x30008163) && ((out0[2] & 0x0000ffff) != 0x00008263)) return 0; + + j = rc4_next_16 (rc4_key, i, j, edata2 + 4, out1); i += 16; + + if (((out1[0] & 0x00ffffff) != 0x00000503) && (out1[0] != 0x050307A0)) return 0; + + rc4_init_16 (rc4_key, data); + + i = 0; + j = 0; + + // init hmac + + u32 w0[4]; + u32 w1[4]; + u32 w2[4]; + u32 w3[4]; + + w0[0] = K2[0]; + w0[1] = K2[1]; + w0[2] = K2[2]; + w0[3] = K2[3]; + w1[0] = 0; + w1[1] = 0; + w1[2] = 0; + w1[3] = 0; + w2[0] = 0; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = 0; + + md5_hmac_ctx_t ctx; + + md5_hmac_init_64 (&ctx, w0, w1, w2, w3); + + int edata2_left; + + for (edata2_left = edata2_len; edata2_left >= 64; edata2_left -= 64) + { + j = rc4_next_16 (rc4_key, i, j, edata2, w0); i += 16; edata2 += 4; + j = rc4_next_16 (rc4_key, i, j, edata2, w1); i += 16; edata2 += 4; + j = rc4_next_16 (rc4_key, i, j, edata2, w2); i += 16; edata2 += 4; + j = rc4_next_16 (rc4_key, i, j, edata2, w3); i += 16; edata2 += 4; + + md5_hmac_update_64 (&ctx, w0, w1, w2, w3, 64); + } + + w0[0] = 0; + w0[1] = 0; + w0[2] = 0; + w0[3] = 0; + w1[0] = 0; + w1[1] = 0; + w1[2] = 0; + w1[3] = 0; + w2[0] = 0; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = 0; + + if (edata2_left < 16) + { + j = rc4_next_16 (rc4_key, i, j, edata2, w0); i += 16; edata2 += 4; + + truncate_block_4x4_le (w0, edata2_left & 0xf); + } + else if (edata2_left < 32) + { + j = rc4_next_16 (rc4_key, i, j, edata2, w0); i += 16; edata2 += 4; + j = rc4_next_16 (rc4_key, i, j, edata2, w1); i += 16; edata2 += 4; + + truncate_block_4x4_le (w1, edata2_left & 0xf); + } + else if (edata2_left < 48) + { + j = rc4_next_16 (rc4_key, i, j, edata2, w0); i += 16; edata2 += 4; + j = rc4_next_16 (rc4_key, i, j, edata2, w1); i += 16; edata2 += 4; + j = rc4_next_16 (rc4_key, i, j, edata2, w2); i += 16; edata2 += 4; + + truncate_block_4x4_le (w2, edata2_left & 0xf); + } + else + { + j = rc4_next_16 (rc4_key, i, j, edata2, w0); i += 16; edata2 += 4; + j = rc4_next_16 (rc4_key, i, j, edata2, w1); i += 16; edata2 += 4; + j = rc4_next_16 (rc4_key, i, j, edata2, w2); i += 16; edata2 += 4; + j = rc4_next_16 (rc4_key, i, j, edata2, w3); i += 16; edata2 += 4; + + truncate_block_4x4_le (w3, edata2_left & 0xf); + } + + md5_hmac_update_64 (&ctx, w0, w1, w2, w3, edata2_left); + + md5_hmac_final (&ctx); + + if (checksum[0] != ctx.opad.h[0]) return 0; + if (checksum[1] != ctx.opad.h[1]) return 0; + if (checksum[2] != ctx.opad.h[2]) return 0; + if (checksum[3] != ctx.opad.h[3]) return 0; + + return 1; +} + +void kerb_prepare (const u32 K[4], const u32 checksum[4], u32 digest[4], u32 K2[4]) +{ + // K1=MD5_HMAC(K,1); with 1 encoded as little indian on 4 bytes (01000000 in hexa); + + u32 w0[4]; + u32 w1[4]; + u32 w2[4]; + u32 w3[4]; + + w0[0] = K[0]; + w0[1] = K[1]; + w0[2] = K[2]; + w0[3] = K[3]; + w1[0] = 0; + w1[1] = 0; + w1[2] = 0; + w1[3] = 0; + w2[0] = 0; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = 0; + + md5_hmac_ctx_t ctx1; + + md5_hmac_init_64 (&ctx1, w0, w1, w2, w3); + + w0[0] = 2; + w0[1] = 0; + w0[2] = 0; + w0[3] = 0; + w1[0] = 0; + w1[1] = 0; + w1[2] = 0; + w1[3] = 0; + w2[0] = 0; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = 0; + + md5_hmac_update_64 (&ctx1, w0, w1, w2, w3, 4); + + md5_hmac_final (&ctx1); + + w0[0] = ctx1.opad.h[0]; + w0[1] = ctx1.opad.h[1]; + w0[2] = ctx1.opad.h[2]; + w0[3] = ctx1.opad.h[3]; + w1[0] = 0; + w1[1] = 0; + w1[2] = 0; + w1[3] = 0; + w2[0] = 0; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = 0; + + md5_hmac_ctx_t ctx; + + md5_hmac_init_64 (&ctx, w0, w1, w2, w3); + + w0[0] = checksum[0]; + w0[1] = checksum[1]; + w0[2] = checksum[2]; + w0[3] = checksum[3]; + w1[0] = 0; + w1[1] = 0; + w1[2] = 0; + w1[3] = 0; + w2[0] = 0; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = 0; + + md5_hmac_update_64 (&ctx, w0, w1, w2, w3, 16); + + md5_hmac_final (&ctx); + + digest[0] = ctx.opad.h[0]; + digest[1] = ctx.opad.h[1]; + digest[2] = ctx.opad.h[2]; + digest[3] = ctx.opad.h[3]; + + K2[0] = ctx1.opad.h[0]; + K2[1] = ctx1.opad.h[1]; + K2[2] = ctx1.opad.h[2]; + K2[3] = ctx1.opad.h[3]; +} + +__kernel void m13100_mxx (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const krb5tgs_t *krb5tgs_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) +{ + /** + * modifier + */ + + const u32 lid = get_local_id (0); + const u32 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * base + */ + + __local RC4_KEY rc4_keys[64]; + + u32 checksum[4]; + + checksum[0] = krb5tgs_bufs[digests_offset].checksum[0]; + checksum[1] = krb5tgs_bufs[digests_offset].checksum[1]; + checksum[2] = krb5tgs_bufs[digests_offset].checksum[2]; + checksum[3] = krb5tgs_bufs[digests_offset].checksum[3]; + + md4_ctx_t ctx0; + + md4_init (&ctx0); + + md4_update_global_utf16le (&ctx0, pws[gid].i, pws[gid].pw_len); + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + md4_ctx_t ctx = ctx0; + + md4_update_global_utf16le (&ctx, combs_buf[il_pos].i, combs_buf[il_pos].pw_len); + + md4_final (&ctx); + + u32 digest[4]; + + u32 K2[4]; + + kerb_prepare (ctx.h, checksum, digest, K2); + + if (decrypt_and_check (&rc4_keys[lid], digest, krb5tgs_bufs[digests_offset].edata2, krb5tgs_bufs[digests_offset].edata2_len, K2, checksum) == 1) + { + if (atomic_inc (&hashes_shown[digests_offset]) == 0) + { + mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, digests_offset + 0, gid, il_pos); + } + } + } +} + +__kernel void m13100_sxx (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const krb5tgs_t *krb5tgs_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) +{ + /** + * modifier + */ + + const u32 lid = get_local_id (0); + const u32 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * base + */ + + __local RC4_KEY rc4_keys[64]; + + u32 checksum[4]; + + checksum[0] = krb5tgs_bufs[digests_offset].checksum[0]; + checksum[1] = krb5tgs_bufs[digests_offset].checksum[1]; + checksum[2] = krb5tgs_bufs[digests_offset].checksum[2]; + checksum[3] = krb5tgs_bufs[digests_offset].checksum[3]; + + md4_ctx_t ctx0; + + md4_init (&ctx0); + + md4_update_global_utf16le (&ctx0, pws[gid].i, pws[gid].pw_len); + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + md4_ctx_t ctx = ctx0; + + md4_update_global_utf16le (&ctx, combs_buf[il_pos].i, combs_buf[il_pos].pw_len); + + md4_final (&ctx); + + u32 digest[4]; + + u32 K2[4]; + + kerb_prepare (ctx.h, checksum, digest, K2); + + if (decrypt_and_check (&rc4_keys[lid], digest, krb5tgs_bufs[digests_offset].edata2, krb5tgs_bufs[digests_offset].edata2_len, K2, checksum) == 1) + { + if (atomic_inc (&hashes_shown[digests_offset]) == 0) + { + mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, digests_offset + 0, gid, il_pos); + } + } + } +} diff --git a/OpenCL/m13100_a3.cl b/OpenCL/m13100_a3.cl new file mode 100644 index 000000000..7b1ac5a05 --- /dev/null +++ b/OpenCL/m13100_a3.cl @@ -0,0 +1,526 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +//shared mem too small +//#define NEW_SIMD_CODE + +#include "inc_vendor.cl" +#include "inc_hash_constants.h" +#include "inc_hash_functions.cl" +#include "inc_types.cl" +#include "inc_common.cl" +#include "inc_hash_md4.cl" +#include "inc_hash_md5.cl" + +typedef struct +{ + u8 S[256]; + + u32 wtf_its_faster; + +} RC4_KEY; + +void swap (__local RC4_KEY *rc4_key, const u8 i, const u8 j) +{ + u8 tmp; + + tmp = rc4_key->S[i]; + rc4_key->S[i] = rc4_key->S[j]; + rc4_key->S[j] = tmp; +} + +void rc4_init_16 (__local RC4_KEY *rc4_key, const u32 data[4]) +{ + u32 v = 0x03020100; + u32 a = 0x04040404; + + __local u32 *ptr = (__local u32 *) rc4_key->S; + + #ifdef _unroll + #pragma unroll + #endif + for (u32 i = 0; i < 64; i++) + { + *ptr++ = v; v += a; + } + + u32 j = 0; + + for (u32 i = 0; i < 16; i++) + { + u32 idx = i * 16; + + u32 v; + + v = data[0]; + + j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; + j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; + j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; + j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; + + v = data[1]; + + j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; + j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; + j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; + j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; + + v = data[2]; + + j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; + j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; + j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; + j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; + + v = data[3]; + + j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; + j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; + j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; + j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; + } +} + +u8 rc4_next_16 (__local RC4_KEY *rc4_key, u8 i, u8 j, __global const u32 in[4], u32 out[4]) +{ + #ifdef _unroll + #pragma unroll + #endif + for (u32 k = 0; k < 4; k++) + { + u32 xor4 = 0; + + u8 idx; + + i += 1; + j += rc4_key->S[i]; + + swap (rc4_key, i, j); + + idx = rc4_key->S[i] + rc4_key->S[j]; + + xor4 |= rc4_key->S[idx] << 0; + + i += 1; + j += rc4_key->S[i]; + + swap (rc4_key, i, j); + + idx = rc4_key->S[i] + rc4_key->S[j]; + + xor4 |= rc4_key->S[idx] << 8; + + i += 1; + j += rc4_key->S[i]; + + swap (rc4_key, i, j); + + idx = rc4_key->S[i] + rc4_key->S[j]; + + xor4 |= rc4_key->S[idx] << 16; + + i += 1; + j += rc4_key->S[i]; + + swap (rc4_key, i, j); + + idx = rc4_key->S[i] + rc4_key->S[j]; + + xor4 |= rc4_key->S[idx] << 24; + + out[k] = in[k] ^ xor4; + } + + return j; +} + +int decrypt_and_check (__local RC4_KEY *rc4_key, u32 data[4], __global const u32 *edata2, const u32 edata2_len, const u32 K2[4], const u32 checksum[4]) +{ + rc4_init_16 (rc4_key, data); + + u32 out0[4]; + u32 out1[4]; + + u8 i = 0; + u8 j = 0; + + /* + 8 first bytes are nonce, then ASN1 structs (DER encoding: type-length-data) + + if length >= 128 bytes: + length is on 2 bytes and type is \x63\x82 (encode_krb5_enc_tkt_part) and data is an ASN1 sequence \x30\x82 + else: + length is on 1 byte and type is \x63\x81 and data is an ASN1 sequence \x30\x81 + + next headers follow the same ASN1 "type-length-data" scheme + */ + + j = rc4_next_16 (rc4_key, i, j, edata2 + 0, out0); i += 16; + + if (((out0[2] & 0xff00ffff) != 0x30008163) && ((out0[2] & 0x0000ffff) != 0x00008263)) return 0; + + j = rc4_next_16 (rc4_key, i, j, edata2 + 4, out1); i += 16; + + if (((out1[0] & 0x00ffffff) != 0x00000503) && (out1[0] != 0x050307A0)) return 0; + + rc4_init_16 (rc4_key, data); + + i = 0; + j = 0; + + // init hmac + + u32 w0[4]; + u32 w1[4]; + u32 w2[4]; + u32 w3[4]; + + w0[0] = K2[0]; + w0[1] = K2[1]; + w0[2] = K2[2]; + w0[3] = K2[3]; + w1[0] = 0; + w1[1] = 0; + w1[2] = 0; + w1[3] = 0; + w2[0] = 0; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = 0; + + md5_hmac_ctx_t ctx; + + md5_hmac_init_64 (&ctx, w0, w1, w2, w3); + + int edata2_left; + + for (edata2_left = edata2_len; edata2_left >= 64; edata2_left -= 64) + { + j = rc4_next_16 (rc4_key, i, j, edata2, w0); i += 16; edata2 += 4; + j = rc4_next_16 (rc4_key, i, j, edata2, w1); i += 16; edata2 += 4; + j = rc4_next_16 (rc4_key, i, j, edata2, w2); i += 16; edata2 += 4; + j = rc4_next_16 (rc4_key, i, j, edata2, w3); i += 16; edata2 += 4; + + md5_hmac_update_64 (&ctx, w0, w1, w2, w3, 64); + } + + w0[0] = 0; + w0[1] = 0; + w0[2] = 0; + w0[3] = 0; + w1[0] = 0; + w1[1] = 0; + w1[2] = 0; + w1[3] = 0; + w2[0] = 0; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = 0; + + if (edata2_left < 16) + { + j = rc4_next_16 (rc4_key, i, j, edata2, w0); i += 16; edata2 += 4; + + truncate_block_4x4_le (w0, edata2_left & 0xf); + } + else if (edata2_left < 32) + { + j = rc4_next_16 (rc4_key, i, j, edata2, w0); i += 16; edata2 += 4; + j = rc4_next_16 (rc4_key, i, j, edata2, w1); i += 16; edata2 += 4; + + truncate_block_4x4_le (w1, edata2_left & 0xf); + } + else if (edata2_left < 48) + { + j = rc4_next_16 (rc4_key, i, j, edata2, w0); i += 16; edata2 += 4; + j = rc4_next_16 (rc4_key, i, j, edata2, w1); i += 16; edata2 += 4; + j = rc4_next_16 (rc4_key, i, j, edata2, w2); i += 16; edata2 += 4; + + truncate_block_4x4_le (w2, edata2_left & 0xf); + } + else + { + j = rc4_next_16 (rc4_key, i, j, edata2, w0); i += 16; edata2 += 4; + j = rc4_next_16 (rc4_key, i, j, edata2, w1); i += 16; edata2 += 4; + j = rc4_next_16 (rc4_key, i, j, edata2, w2); i += 16; edata2 += 4; + j = rc4_next_16 (rc4_key, i, j, edata2, w3); i += 16; edata2 += 4; + + truncate_block_4x4_le (w3, edata2_left & 0xf); + } + + md5_hmac_update_64 (&ctx, w0, w1, w2, w3, edata2_left); + + md5_hmac_final (&ctx); + + if (checksum[0] != ctx.opad.h[0]) return 0; + if (checksum[1] != ctx.opad.h[1]) return 0; + if (checksum[2] != ctx.opad.h[2]) return 0; + if (checksum[3] != ctx.opad.h[3]) return 0; + + return 1; +} + +void kerb_prepare (const u32 K[4], const u32 checksum[4], u32 digest[4], u32 K2[4]) +{ + // K1=MD5_HMAC(K,1); with 1 encoded as little indian on 4 bytes (01000000 in hexa); + + u32 w0[4]; + u32 w1[4]; + u32 w2[4]; + u32 w3[4]; + + w0[0] = K[0]; + w0[1] = K[1]; + w0[2] = K[2]; + w0[3] = K[3]; + w1[0] = 0; + w1[1] = 0; + w1[2] = 0; + w1[3] = 0; + w2[0] = 0; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = 0; + + md5_hmac_ctx_t ctx1; + + md5_hmac_init_64 (&ctx1, w0, w1, w2, w3); + + w0[0] = 2; + w0[1] = 0; + w0[2] = 0; + w0[3] = 0; + w1[0] = 0; + w1[1] = 0; + w1[2] = 0; + w1[3] = 0; + w2[0] = 0; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = 0; + + md5_hmac_update_64 (&ctx1, w0, w1, w2, w3, 4); + + md5_hmac_final (&ctx1); + + w0[0] = ctx1.opad.h[0]; + w0[1] = ctx1.opad.h[1]; + w0[2] = ctx1.opad.h[2]; + w0[3] = ctx1.opad.h[3]; + w1[0] = 0; + w1[1] = 0; + w1[2] = 0; + w1[3] = 0; + w2[0] = 0; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = 0; + + md5_hmac_ctx_t ctx; + + md5_hmac_init_64 (&ctx, w0, w1, w2, w3); + + w0[0] = checksum[0]; + w0[1] = checksum[1]; + w0[2] = checksum[2]; + w0[3] = checksum[3]; + w1[0] = 0; + w1[1] = 0; + w1[2] = 0; + w1[3] = 0; + w2[0] = 0; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = 0; + + md5_hmac_update_64 (&ctx, w0, w1, w2, w3, 16); + + md5_hmac_final (&ctx); + + digest[0] = ctx.opad.h[0]; + digest[1] = ctx.opad.h[1]; + digest[2] = ctx.opad.h[2]; + digest[3] = ctx.opad.h[3]; + + K2[0] = ctx1.opad.h[0]; + K2[1] = ctx1.opad.h[1]; + K2[2] = ctx1.opad.h[2]; + K2[3] = ctx1.opad.h[3]; +} + +__kernel void m13100_mxx (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __constant const u32x *words_buf_r, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const krb5tgs_t *krb5tgs_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) +{ + /** + * modifier + */ + + const u32 lid = get_local_id (0); + const u32 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * base + */ + + const u32 pw_len = pws[gid].pw_len; + + const u32 pw_lenv = ceil ((float) pw_len / 4); + + u32x w[64] = { 0 }; + + for (int idx = 0; idx < pw_lenv; idx++) + { + w[idx] = pws[gid].i[idx]; + + barrier (CLK_GLOBAL_MEM_FENCE); + } + + __local RC4_KEY rc4_keys[64]; + + u32 checksum[4]; + + checksum[0] = krb5tgs_bufs[digests_offset].checksum[0]; + checksum[1] = krb5tgs_bufs[digests_offset].checksum[1]; + checksum[2] = krb5tgs_bufs[digests_offset].checksum[2]; + checksum[3] = krb5tgs_bufs[digests_offset].checksum[3]; + + /** + * loop + */ + + u32x w0l = w[0]; + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x w0r = words_buf_r[il_pos / VECT_SIZE]; + + const u32x w0 = w0l | w0r; + + w[0] = w0; + + md4_ctx_t ctx; + + md4_init (&ctx); + + md4_update_utf16le (&ctx, w, pw_len); + + md4_final (&ctx); + + u32 digest[4]; + + u32 K2[4]; + + kerb_prepare (ctx.h, checksum, digest, K2); + + if (decrypt_and_check (&rc4_keys[lid], digest, krb5tgs_bufs[digests_offset].edata2, krb5tgs_bufs[digests_offset].edata2_len, K2, checksum) == 1) + { + if (atomic_inc (&hashes_shown[digests_offset]) == 0) + { + mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, digests_offset + 0, gid, il_pos); + } + } + } +} + +__kernel void m13100_sxx (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __constant const u32x *words_buf_r, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const krb5tgs_t *krb5tgs_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) +{ + /** + * modifier + */ + + const u32 lid = get_local_id (0); + const u32 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * base + */ + + const u32 pw_len = pws[gid].pw_len; + + const u32 pw_lenv = ceil ((float) pw_len / 4); + + u32x w[64] = { 0 }; + + for (int idx = 0; idx < pw_lenv; idx++) + { + w[idx] = pws[gid].i[idx]; + + barrier (CLK_GLOBAL_MEM_FENCE); + } + + __local RC4_KEY rc4_keys[64]; + + u32 checksum[4]; + + checksum[0] = krb5tgs_bufs[digests_offset].checksum[0]; + checksum[1] = krb5tgs_bufs[digests_offset].checksum[1]; + checksum[2] = krb5tgs_bufs[digests_offset].checksum[2]; + checksum[3] = krb5tgs_bufs[digests_offset].checksum[3]; + + /** + * loop + */ + + u32x w0l = w[0]; + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x w0r = words_buf_r[il_pos / VECT_SIZE]; + + const u32x w0 = w0l | w0r; + + w[0] = w0; + + md4_ctx_t ctx; + + md4_init (&ctx); + + md4_update_utf16le (&ctx, w, pw_len); + + md4_final (&ctx); + + u32 digest[4]; + + u32 K2[4]; + + kerb_prepare (ctx.h, checksum, digest, K2); + + if (decrypt_and_check (&rc4_keys[lid], digest, krb5tgs_bufs[digests_offset].edata2, krb5tgs_bufs[digests_offset].edata2_len, K2, checksum) == 1) + { + if (atomic_inc (&hashes_shown[digests_offset]) == 0) + { + mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, digests_offset + 0, gid, il_pos); + } + } + } +}