From f573c1d96d6ee3fecd6baebded4aa2a58d0e7e74 Mon Sep 17 00:00:00 2001 From: jsteube Date: Sun, 3 Dec 2017 14:35:39 +0100 Subject: [PATCH] Add optimized -m 16100 kernels --- OpenCL/m16100_a0-optimized.cl | 585 +++++++++++++++++++++++++ OpenCL/m16100_a1-optimized.cl | 705 ++++++++++++++++++++++++++++++ OpenCL/m16100_a3-optimized.cl | 801 ++++++++++++++++++++++++++++++++++ src/interface.c | 5 +- 4 files changed, 2095 insertions(+), 1 deletion(-) create mode 100644 OpenCL/m16100_a0-optimized.cl create mode 100644 OpenCL/m16100_a1-optimized.cl create mode 100644 OpenCL/m16100_a3-optimized.cl diff --git a/OpenCL/m16100_a0-optimized.cl b/OpenCL/m16100_a0-optimized.cl new file mode 100644 index 000000000..ad9968811 --- /dev/null +++ b/OpenCL/m16100_a0-optimized.cl @@ -0,0 +1,585 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +//#define NEW_SIMD_CODE + +#include "inc_vendor.cl" +#include "inc_hash_constants.h" +#include "inc_hash_functions.cl" +#include "inc_types.cl" +#include "inc_common.cl" +#include "inc_rp_optimized.h" +#include "inc_rp_optimized.cl" +#include "inc_simd.cl" + +__kernel void m16100_m04 (__global pw_t *pws, __constant const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const tacacs_plus_t *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 pw_buf0[4]; + u32 pw_buf1[4]; + + pw_buf0[0] = pws[gid].i[0]; + pw_buf0[1] = pws[gid].i[1]; + pw_buf0[2] = pws[gid].i[2]; + pw_buf0[3] = pws[gid].i[3]; + pw_buf1[0] = pws[gid].i[4]; + pw_buf1[1] = pws[gid].i[5]; + pw_buf1[2] = pws[gid].i[6]; + pw_buf1[3] = pws[gid].i[7]; + + const u32 pw_len = pws[gid].pw_len; + + /** + * salt + */ + + const u32 session_buf = esalt_bufs[digests_offset].session_buf[0]; + const u32 sequence_buf = esalt_bufs[digests_offset].sequence_buf[0]; + + /** + * digest + */ + + const u32 ct_len = esalt_bufs[digests_offset].ct_data_len; + + u32 ct_buf[2]; + + ct_buf[0] = esalt_bufs[digests_offset].ct_data_buf[0]; + ct_buf[1] = esalt_bufs[digests_offset].ct_data_buf[1]; + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + u32x w0[4] = { 0 }; + u32x w1[4] = { 0 }; + u32x w2[4] = { 0 }; + u32x w3[4] = { 0 }; + + const u32x out_len = apply_rules_vect (pw_buf0, pw_buf1, pw_len, rules_buf, il_pos, w0, w1); + + /** + * append salt + */ + + u32 s0[4]; + u32 s1[4]; + u32 s2[4]; + u32 s3[4]; + + s0[0] = sequence_buf | 0x00800000; + s0[1] = 0; + s0[2] = 0; + s0[3] = 0; + s1[0] = 0; + s1[1] = 0; + s1[2] = 0; + s1[3] = 0; + s2[0] = 0; + s2[1] = 0; + s2[2] = 0; + s2[3] = 0; + s3[0] = 0; + s3[1] = 0; + s3[2] = 0; + s3[3] = 0; + + switch_buffer_by_offset_le (s0, s1, s2, s3, 4 + out_len); + + const u32x pw_salt_len = 4 + out_len + 2; + + s0[0] = session_buf; + s0[1] |= w0[0]; + s0[2] |= w0[1]; + s0[3] |= w0[2]; + s1[0] |= w0[3]; + s1[1] |= w1[0]; + s1[2] |= w1[1]; + s1[3] |= w1[2]; + s2[0] |= w1[3]; + s2[1] |= 0; + s2[2] |= 0; + s2[3] |= 0; + s3[0] |= 0; + s3[1] |= 0; + s3[2] = pw_salt_len * 8; + s3[3] = 0; + + w0[0] = s0[0]; + w0[1] = s0[1]; + w0[2] = s0[2]; + w0[3] = s0[3]; + w1[0] = s1[0]; + w1[1] = s1[1]; + w1[2] = s1[2]; + w1[3] = s1[3]; + w2[0] = s2[0]; + w2[1] = s2[1]; + w2[2] = s2[2]; + w2[3] = s2[3]; + w3[0] = s3[0]; + w3[1] = s3[1]; + w3[2] = s3[2]; + w3[3] = s3[3]; + + /** + * md5 + */ + + u32x a = MD5M_A; + u32x b = MD5M_B; + u32x c = MD5M_C; + u32x d = MD5M_D; + + MD5_STEP (MD5_Fo, a, b, c, d, w0[0], MD5C00, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w0[1], MD5C01, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w0[2], MD5C02, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w0[3], MD5C03, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, w1[0], MD5C04, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w1[1], MD5C05, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w1[2], MD5C06, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w1[3], MD5C07, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, w2[0], MD5C08, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w2[1], MD5C09, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w2[2], MD5C0a, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w2[3], MD5C0b, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, w3[0], MD5C0c, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w3[1], MD5C0d, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w3[2], MD5C0e, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w3[3], MD5C0f, MD5S03); + + MD5_STEP (MD5_Go, a, b, c, d, w0[1], MD5C10, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w1[2], MD5C11, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w2[3], MD5C12, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w0[0], MD5C13, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, w1[1], MD5C14, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w2[2], MD5C15, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w3[3], MD5C16, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w1[0], MD5C17, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, w2[1], MD5C18, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w3[2], MD5C19, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w0[3], MD5C1a, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w2[0], MD5C1b, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, w3[1], MD5C1c, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w0[2], MD5C1d, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w1[3], MD5C1e, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w3[0], MD5C1f, MD5S13); + + u32x t; + + MD5_STEP (MD5_H1, a, b, c, d, w1[1], MD5C20, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w2[0], MD5C21, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w2[3], MD5C22, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w3[2], MD5C23, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, w0[1], MD5C24, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w1[0], MD5C25, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w1[3], MD5C26, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w2[2], MD5C27, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, w3[1], MD5C28, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w0[0], MD5C29, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w0[3], MD5C2a, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w1[2], MD5C2b, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, w2[1], MD5C2c, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w3[0], MD5C2d, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w3[3], MD5C2e, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w0[2], MD5C2f, MD5S23); + + MD5_STEP (MD5_I , a, b, c, d, w0[0], MD5C30, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w1[3], MD5C31, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w3[2], MD5C32, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w1[1], MD5C33, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, w3[0], MD5C34, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w0[3], MD5C35, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w2[2], MD5C36, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w0[1], MD5C37, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, w2[0], MD5C38, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w3[3], MD5C39, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w1[2], MD5C3a, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w3[1], MD5C3b, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, w1[0], MD5C3c, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w2[3], MD5C3d, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w0[2], MD5C3e, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w2[1], MD5C3f, MD5S33); + + a += MD5M_A; + b += MD5M_B; + c += MD5M_C; + d += MD5M_D; + + u32 test[2]; + + test[0] = a ^ ct_buf[0]; + test[1] = b ^ ct_buf[1]; + + if (sequence_buf == 0x01c0) + { + const u32 action = ((test[0] >> 0) & 0xff); + // can have more than predefined ones + // const u32 priv_lvl = ((test[0] >> 8) & 0xff); + const u32 authen_type = ((test[0] >> 16) & 0xff); + const u32 authen_service = ((test[0] >> 24) & 0xff); + const u32 user_len = ((test[1] >> 0) & 0xff); + const u32 port_len = ((test[1] >> 8) & 0xff); + const u32 rem_addr_len = ((test[1] >> 16) & 0xff); + const u32 data_len = ((test[1] >> 24) & 0xff); + + if (((action == 0x01) || (action == 0x02) || (action == 0x04)) + && ((authen_type >= 0x01) && (authen_type <= 0x06)) + && ((authen_service >= 0x00) && (authen_service <= 0x09)) + && ((8 + user_len + port_len + rem_addr_len + data_len) == ct_len)) + { + if (atomic_inc (&hashes_shown[digests_offset]) == 0) + { + mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, digests_offset + 0, gid, il_pos); + } + } + } + else if ((sequence_buf == 0x03c0) || (sequence_buf == 0x05c0)) + { + const u32 msg_len = ((test[0] >> 0) & 0xff) << 8 + | ((test[0] >> 8) & 0xff) << 0; + const u32 data_len = ((test[0] >> 16) & 0xff) << 8 + | ((test[0] >> 24) & 0xff) << 0; + const u32 flags = ((test[1] >> 0) & 0xff); + + if (((5 + msg_len) == ct_len) + && (data_len == 0) + && (flags == 0)) + { + if (atomic_inc (&hashes_shown[digests_offset]) == 0) + { + mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, digests_offset + 0, gid, il_pos); + } + } + } + else + { + const u32 status = ((test[0] >> 0) & 0xff); + const u32 flags = ((test[0] >> 8) & 0xff); + const u32 msg_len = ((test[0] >> 16) & 0xff) << 8 + | ((test[0] >> 24) & 0xff) << 0; + const u32 data_len = ((test[1] >> 0) & 0xff) << 8 + | ((test[1] >> 8) & 0xff) << 0; + + if (((status >= 0x01 && status <= 0x07) || status == 0x21) + && (flags == 0x01 || flags == 0x00) + && (6 + msg_len + data_len == ct_len)) + { + if (atomic_inc (&hashes_shown[digests_offset]) == 0) + { + mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, digests_offset + 0, gid, il_pos); + } + } + } + } +} + +__kernel void m16100_m08 (__global pw_t *pws, __constant const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const tacacs_plus_t *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max) +{ +} + +__kernel void m16100_m16 (__global pw_t *pws, __constant const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const tacacs_plus_t *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max) +{ +} + +__kernel void m16100_s04 (__global pw_t *pws, __constant const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const tacacs_plus_t *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 pw_buf0[4]; + u32 pw_buf1[4]; + + pw_buf0[0] = pws[gid].i[0]; + pw_buf0[1] = pws[gid].i[1]; + pw_buf0[2] = pws[gid].i[2]; + pw_buf0[3] = pws[gid].i[3]; + pw_buf1[0] = pws[gid].i[4]; + pw_buf1[1] = pws[gid].i[5]; + pw_buf1[2] = pws[gid].i[6]; + pw_buf1[3] = pws[gid].i[7]; + + const u32 pw_len = pws[gid].pw_len; + + /** + * salt + */ + + const u32 session_buf = esalt_bufs[digests_offset].session_buf[0]; + const u32 sequence_buf = esalt_bufs[digests_offset].sequence_buf[0]; + + /** + * digest + */ + + const u32 ct_len = esalt_bufs[digests_offset].ct_data_len; + + u32 ct_buf[2]; + + ct_buf[0] = esalt_bufs[digests_offset].ct_data_buf[0]; + ct_buf[1] = esalt_bufs[digests_offset].ct_data_buf[1]; + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + u32x w0[4] = { 0 }; + u32x w1[4] = { 0 }; + u32x w2[4] = { 0 }; + u32x w3[4] = { 0 }; + + const u32x out_len = apply_rules_vect (pw_buf0, pw_buf1, pw_len, rules_buf, il_pos, w0, w1); + + /** + * append salt + */ + + u32 s0[4]; + u32 s1[4]; + u32 s2[4]; + u32 s3[4]; + + s0[0] = sequence_buf | 0x00800000; + s0[1] = 0; + s0[2] = 0; + s0[3] = 0; + s1[0] = 0; + s1[1] = 0; + s1[2] = 0; + s1[3] = 0; + s2[0] = 0; + s2[1] = 0; + s2[2] = 0; + s2[3] = 0; + s3[0] = 0; + s3[1] = 0; + s3[2] = 0; + s3[3] = 0; + + switch_buffer_by_offset_le (s0, s1, s2, s3, 4 + out_len); + + const u32x pw_salt_len = 4 + out_len + 2; + + s0[0] = session_buf; + s0[1] |= w0[0]; + s0[2] |= w0[1]; + s0[3] |= w0[2]; + s1[0] |= w0[3]; + s1[1] |= w1[0]; + s1[2] |= w1[1]; + s1[3] |= w1[2]; + s2[0] |= w1[3]; + s2[1] |= 0; + s2[2] |= 0; + s2[3] |= 0; + s3[0] |= 0; + s3[1] |= 0; + s3[2] = pw_salt_len * 8; + s3[3] = 0; + + w0[0] = s0[0]; + w0[1] = s0[1]; + w0[2] = s0[2]; + w0[3] = s0[3]; + w1[0] = s1[0]; + w1[1] = s1[1]; + w1[2] = s1[2]; + w1[3] = s1[3]; + w2[0] = s2[0]; + w2[1] = s2[1]; + w2[2] = s2[2]; + w2[3] = s2[3]; + w3[0] = s3[0]; + w3[1] = s3[1]; + w3[2] = s3[2]; + w3[3] = s3[3]; + + /** + * md5 + */ + + u32x a = MD5M_A; + u32x b = MD5M_B; + u32x c = MD5M_C; + u32x d = MD5M_D; + + MD5_STEP (MD5_Fo, a, b, c, d, w0[0], MD5C00, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w0[1], MD5C01, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w0[2], MD5C02, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w0[3], MD5C03, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, w1[0], MD5C04, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w1[1], MD5C05, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w1[2], MD5C06, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w1[3], MD5C07, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, w2[0], MD5C08, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w2[1], MD5C09, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w2[2], MD5C0a, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w2[3], MD5C0b, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, w3[0], MD5C0c, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w3[1], MD5C0d, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w3[2], MD5C0e, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w3[3], MD5C0f, MD5S03); + + MD5_STEP (MD5_Go, a, b, c, d, w0[1], MD5C10, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w1[2], MD5C11, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w2[3], MD5C12, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w0[0], MD5C13, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, w1[1], MD5C14, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w2[2], MD5C15, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w3[3], MD5C16, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w1[0], MD5C17, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, w2[1], MD5C18, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w3[2], MD5C19, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w0[3], MD5C1a, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w2[0], MD5C1b, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, w3[1], MD5C1c, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w0[2], MD5C1d, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w1[3], MD5C1e, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w3[0], MD5C1f, MD5S13); + + u32x t; + + MD5_STEP (MD5_H1, a, b, c, d, w1[1], MD5C20, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w2[0], MD5C21, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w2[3], MD5C22, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w3[2], MD5C23, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, w0[1], MD5C24, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w1[0], MD5C25, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w1[3], MD5C26, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w2[2], MD5C27, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, w3[1], MD5C28, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w0[0], MD5C29, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w0[3], MD5C2a, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w1[2], MD5C2b, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, w2[1], MD5C2c, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w3[0], MD5C2d, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w3[3], MD5C2e, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w0[2], MD5C2f, MD5S23); + + MD5_STEP (MD5_I , a, b, c, d, w0[0], MD5C30, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w1[3], MD5C31, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w3[2], MD5C32, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w1[1], MD5C33, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, w3[0], MD5C34, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w0[3], MD5C35, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w2[2], MD5C36, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w0[1], MD5C37, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, w2[0], MD5C38, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w3[3], MD5C39, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w1[2], MD5C3a, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w3[1], MD5C3b, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, w1[0], MD5C3c, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w2[3], MD5C3d, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w0[2], MD5C3e, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w2[1], MD5C3f, MD5S33); + + a += MD5M_A; + b += MD5M_B; + c += MD5M_C; + d += MD5M_D; + + u32 test[2]; + + test[0] = a ^ ct_buf[0]; + test[1] = b ^ ct_buf[1]; + + if (sequence_buf == 0x01c0) + { + const u32 action = ((test[0] >> 0) & 0xff); + // can have more than predefined ones + // const u32 priv_lvl = ((test[0] >> 8) & 0xff); + const u32 authen_type = ((test[0] >> 16) & 0xff); + const u32 authen_service = ((test[0] >> 24) & 0xff); + const u32 user_len = ((test[1] >> 0) & 0xff); + const u32 port_len = ((test[1] >> 8) & 0xff); + const u32 rem_addr_len = ((test[1] >> 16) & 0xff); + const u32 data_len = ((test[1] >> 24) & 0xff); + + if (((action == 0x01) || (action == 0x02) || (action == 0x04)) + && ((authen_type >= 0x01) && (authen_type <= 0x06)) + && ((authen_service >= 0x00) && (authen_service <= 0x09)) + && ((8 + user_len + port_len + rem_addr_len + data_len) == ct_len)) + { + if (atomic_inc (&hashes_shown[digests_offset]) == 0) + { + mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, digests_offset + 0, gid, il_pos); + } + } + } + else if ((sequence_buf == 0x03c0) || (sequence_buf == 0x05c0)) + { + const u32 msg_len = ((test[0] >> 0) & 0xff) << 8 + | ((test[0] >> 8) & 0xff) << 0; + const u32 data_len = ((test[0] >> 16) & 0xff) << 8 + | ((test[0] >> 24) & 0xff) << 0; + const u32 flags = ((test[1] >> 0) & 0xff); + + if (((5 + msg_len) == ct_len) + && (data_len == 0) + && (flags == 0)) + { + if (atomic_inc (&hashes_shown[digests_offset]) == 0) + { + mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, digests_offset + 0, gid, il_pos); + } + } + } + else + { + const u32 status = ((test[0] >> 0) & 0xff); + const u32 flags = ((test[0] >> 8) & 0xff); + const u32 msg_len = ((test[0] >> 16) & 0xff) << 8 + | ((test[0] >> 24) & 0xff) << 0; + const u32 data_len = ((test[1] >> 0) & 0xff) << 8 + | ((test[1] >> 8) & 0xff) << 0; + + if (((status >= 0x01 && status <= 0x07) || status == 0x21) + && (flags == 0x01 || flags == 0x00) + && (6 + msg_len + data_len == ct_len)) + { + if (atomic_inc (&hashes_shown[digests_offset]) == 0) + { + mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, digests_offset + 0, gid, il_pos); + } + } + } + } +} + +__kernel void m16100_s08 (__global pw_t *pws, __constant const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const tacacs_plus_t *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max) +{ +} + +__kernel void m16100_s16 (__global pw_t *pws, __constant const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const tacacs_plus_t *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max) +{ +} diff --git a/OpenCL/m16100_a1-optimized.cl b/OpenCL/m16100_a1-optimized.cl new file mode 100644 index 000000000..96714ee11 --- /dev/null +++ b/OpenCL/m16100_a1-optimized.cl @@ -0,0 +1,705 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +//#define NEW_SIMD_CODE + +#include "inc_vendor.cl" +#include "inc_hash_constants.h" +#include "inc_hash_functions.cl" +#include "inc_types.cl" +#include "inc_common.cl" +#include "inc_simd.cl" + +__kernel void m16100_m04 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const tacacs_plus_t *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 pw_buf0[4]; + u32 pw_buf1[4]; + + pw_buf0[0] = pws[gid].i[0]; + pw_buf0[1] = pws[gid].i[1]; + pw_buf0[2] = pws[gid].i[2]; + pw_buf0[3] = pws[gid].i[3]; + pw_buf1[0] = pws[gid].i[4]; + pw_buf1[1] = pws[gid].i[5]; + pw_buf1[2] = pws[gid].i[6]; + pw_buf1[3] = pws[gid].i[7]; + + const u32 pw_l_len = pws[gid].pw_len; + + /** + * salt + */ + + const u32 session_buf = esalt_bufs[digests_offset].session_buf[0]; + const u32 sequence_buf = esalt_bufs[digests_offset].sequence_buf[0]; + + /** + * digest + */ + + const u32 ct_len = esalt_bufs[digests_offset].ct_data_len; + + u32 ct_buf[2]; + + ct_buf[0] = esalt_bufs[digests_offset].ct_data_buf[0]; + ct_buf[1] = esalt_bufs[digests_offset].ct_data_buf[1]; + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos); + + const u32x pw_len = pw_l_len + pw_r_len; + + /** + * concat password candidate + */ + + u32x wordl0[4] = { 0 }; + u32x wordl1[4] = { 0 }; + u32x wordl2[4] = { 0 }; + u32x wordl3[4] = { 0 }; + + wordl0[0] = pw_buf0[0]; + wordl0[1] = pw_buf0[1]; + wordl0[2] = pw_buf0[2]; + wordl0[3] = pw_buf0[3]; + wordl1[0] = pw_buf1[0]; + wordl1[1] = pw_buf1[1]; + wordl1[2] = pw_buf1[2]; + wordl1[3] = pw_buf1[3]; + + u32x wordr0[4] = { 0 }; + u32x wordr1[4] = { 0 }; + u32x wordr2[4] = { 0 }; + u32x wordr3[4] = { 0 }; + + wordr0[0] = ix_create_combt (combs_buf, il_pos, 0); + wordr0[1] = ix_create_combt (combs_buf, il_pos, 1); + wordr0[2] = ix_create_combt (combs_buf, il_pos, 2); + wordr0[3] = ix_create_combt (combs_buf, il_pos, 3); + wordr1[0] = ix_create_combt (combs_buf, il_pos, 4); + wordr1[1] = ix_create_combt (combs_buf, il_pos, 5); + wordr1[2] = ix_create_combt (combs_buf, il_pos, 6); + wordr1[3] = ix_create_combt (combs_buf, il_pos, 7); + + if (combs_mode == COMBINATOR_MODE_BASE_LEFT) + { + switch_buffer_by_offset_le_VV (wordr0, wordr1, wordr2, wordr3, pw_l_len); + } + else + { + switch_buffer_by_offset_le_VV (wordl0, wordl1, wordl2, wordl3, pw_r_len); + } + + u32x w0[4]; + u32x w1[4]; + u32x w2[4]; + u32x w3[4]; + + w0[0] = wordl0[0] | wordr0[0]; + w0[1] = wordl0[1] | wordr0[1]; + w0[2] = wordl0[2] | wordr0[2]; + w0[3] = wordl0[3] | wordr0[3]; + w1[0] = wordl1[0] | wordr1[0]; + w1[1] = wordl1[1] | wordr1[1]; + w1[2] = wordl1[2] | wordr1[2]; + w1[3] = wordl1[3] | wordr1[3]; + w2[0] = wordl2[0] | wordr2[0]; + w2[1] = wordl2[1] | wordr2[1]; + w2[2] = wordl2[2] | wordr2[2]; + w2[3] = wordl2[3] | wordr2[3]; + w3[0] = wordl3[0] | wordr3[0]; + w3[1] = wordl3[1] | wordr3[1]; + w3[2] = wordl3[2] | wordr3[2]; + w3[3] = wordl3[3] | wordr3[3]; + + /** + * append salt + */ + + u32 s0[4]; + u32 s1[4]; + u32 s2[4]; + u32 s3[4]; + + s0[0] = sequence_buf | 0x00800000; + s0[1] = 0; + s0[2] = 0; + s0[3] = 0; + s1[0] = 0; + s1[1] = 0; + s1[2] = 0; + s1[3] = 0; + s2[0] = 0; + s2[1] = 0; + s2[2] = 0; + s2[3] = 0; + s3[0] = 0; + s3[1] = 0; + s3[2] = 0; + s3[3] = 0; + + switch_buffer_by_offset_le (s0, s1, s2, s3, 4 + pw_len); + + const u32x pw_salt_len = 4 + pw_len + 2; + + s0[0] = session_buf; + s0[1] |= w0[0]; + s0[2] |= w0[1]; + s0[3] |= w0[2]; + s1[0] |= w0[3]; + s1[1] |= w1[0]; + s1[2] |= w1[1]; + s1[3] |= w1[2]; + s2[0] |= w1[3]; + s2[1] |= 0; + s2[2] |= 0; + s2[3] |= 0; + s3[0] |= 0; + s3[1] |= 0; + s3[2] = pw_salt_len * 8; + s3[3] = 0; + + w0[0] = s0[0]; + w0[1] = s0[1]; + w0[2] = s0[2]; + w0[3] = s0[3]; + w1[0] = s1[0]; + w1[1] = s1[1]; + w1[2] = s1[2]; + w1[3] = s1[3]; + w2[0] = s2[0]; + w2[1] = s2[1]; + w2[2] = s2[2]; + w2[3] = s2[3]; + w3[0] = s3[0]; + w3[1] = s3[1]; + w3[2] = s3[2]; + w3[3] = s3[3]; + + /** + * md5 + */ + + u32x a = MD5M_A; + u32x b = MD5M_B; + u32x c = MD5M_C; + u32x d = MD5M_D; + + MD5_STEP (MD5_Fo, a, b, c, d, w0[0], MD5C00, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w0[1], MD5C01, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w0[2], MD5C02, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w0[3], MD5C03, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, w1[0], MD5C04, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w1[1], MD5C05, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w1[2], MD5C06, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w1[3], MD5C07, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, w2[0], MD5C08, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w2[1], MD5C09, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w2[2], MD5C0a, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w2[3], MD5C0b, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, w3[0], MD5C0c, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w3[1], MD5C0d, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w3[2], MD5C0e, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w3[3], MD5C0f, MD5S03); + + MD5_STEP (MD5_Go, a, b, c, d, w0[1], MD5C10, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w1[2], MD5C11, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w2[3], MD5C12, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w0[0], MD5C13, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, w1[1], MD5C14, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w2[2], MD5C15, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w3[3], MD5C16, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w1[0], MD5C17, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, w2[1], MD5C18, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w3[2], MD5C19, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w0[3], MD5C1a, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w2[0], MD5C1b, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, w3[1], MD5C1c, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w0[2], MD5C1d, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w1[3], MD5C1e, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w3[0], MD5C1f, MD5S13); + + u32x t; + + MD5_STEP (MD5_H1, a, b, c, d, w1[1], MD5C20, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w2[0], MD5C21, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w2[3], MD5C22, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w3[2], MD5C23, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, w0[1], MD5C24, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w1[0], MD5C25, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w1[3], MD5C26, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w2[2], MD5C27, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, w3[1], MD5C28, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w0[0], MD5C29, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w0[3], MD5C2a, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w1[2], MD5C2b, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, w2[1], MD5C2c, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w3[0], MD5C2d, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w3[3], MD5C2e, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w0[2], MD5C2f, MD5S23); + + MD5_STEP (MD5_I , a, b, c, d, w0[0], MD5C30, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w1[3], MD5C31, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w3[2], MD5C32, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w1[1], MD5C33, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, w3[0], MD5C34, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w0[3], MD5C35, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w2[2], MD5C36, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w0[1], MD5C37, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, w2[0], MD5C38, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w3[3], MD5C39, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w1[2], MD5C3a, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w3[1], MD5C3b, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, w1[0], MD5C3c, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w2[3], MD5C3d, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w0[2], MD5C3e, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w2[1], MD5C3f, MD5S33); + + + a += MD5M_A; + b += MD5M_B; + c += MD5M_C; + d += MD5M_D; + + u32 test[2]; + + test[0] = a ^ ct_buf[0]; + test[1] = b ^ ct_buf[1]; + + if (sequence_buf == 0x01c0) + { + const u32 action = ((test[0] >> 0) & 0xff); + // can have more than predefined ones + // const u32 priv_lvl = ((test[0] >> 8) & 0xff); + const u32 authen_type = ((test[0] >> 16) & 0xff); + const u32 authen_service = ((test[0] >> 24) & 0xff); + const u32 user_len = ((test[1] >> 0) & 0xff); + const u32 port_len = ((test[1] >> 8) & 0xff); + const u32 rem_addr_len = ((test[1] >> 16) & 0xff); + const u32 data_len = ((test[1] >> 24) & 0xff); + + if (((action == 0x01) || (action == 0x02) || (action == 0x04)) + && ((authen_type >= 0x01) && (authen_type <= 0x06)) + && ((authen_service >= 0x00) && (authen_service <= 0x09)) + && ((8 + user_len + port_len + rem_addr_len + data_len) == ct_len)) + { + if (atomic_inc (&hashes_shown[digests_offset]) == 0) + { + mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, digests_offset + 0, gid, il_pos); + } + } + } + else if ((sequence_buf == 0x03c0) || (sequence_buf == 0x05c0)) + { + const u32 msg_len = ((test[0] >> 0) & 0xff) << 8 + | ((test[0] >> 8) & 0xff) << 0; + const u32 data_len = ((test[0] >> 16) & 0xff) << 8 + | ((test[0] >> 24) & 0xff) << 0; + const u32 flags = ((test[1] >> 0) & 0xff); + + if (((5 + msg_len) == ct_len) + && (data_len == 0) + && (flags == 0)) + { + if (atomic_inc (&hashes_shown[digests_offset]) == 0) + { + mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, digests_offset + 0, gid, il_pos); + } + } + } + else + { + const u32 status = ((test[0] >> 0) & 0xff); + const u32 flags = ((test[0] >> 8) & 0xff); + const u32 msg_len = ((test[0] >> 16) & 0xff) << 8 + | ((test[0] >> 24) & 0xff) << 0; + const u32 data_len = ((test[1] >> 0) & 0xff) << 8 + | ((test[1] >> 8) & 0xff) << 0; + + if (((status >= 0x01 && status <= 0x07) || status == 0x21) + && (flags == 0x01 || flags == 0x00) + && (6 + msg_len + data_len == ct_len)) + { + if (atomic_inc (&hashes_shown[digests_offset]) == 0) + { + mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, digests_offset + 0, gid, il_pos); + } + } + } + } +} + +__kernel void m16100_m08 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const tacacs_plus_t *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max) +{ +} + +__kernel void m16100_m16 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const tacacs_plus_t *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max) +{ +} + +__kernel void m16100_s04 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const tacacs_plus_t *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 pw_buf0[4]; + u32 pw_buf1[4]; + + pw_buf0[0] = pws[gid].i[0]; + pw_buf0[1] = pws[gid].i[1]; + pw_buf0[2] = pws[gid].i[2]; + pw_buf0[3] = pws[gid].i[3]; + pw_buf1[0] = pws[gid].i[4]; + pw_buf1[1] = pws[gid].i[5]; + pw_buf1[2] = pws[gid].i[6]; + pw_buf1[3] = pws[gid].i[7]; + + const u32 pw_l_len = pws[gid].pw_len; + + /** + * salt + */ + + const u32 session_buf = esalt_bufs[digests_offset].session_buf[0]; + const u32 sequence_buf = esalt_bufs[digests_offset].sequence_buf[0]; + + /** + * digest + */ + + const u32 ct_len = esalt_bufs[digests_offset].ct_data_len; + + u32 ct_buf[2]; + + ct_buf[0] = esalt_bufs[digests_offset].ct_data_buf[0]; + ct_buf[1] = esalt_bufs[digests_offset].ct_data_buf[1]; + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos); + + const u32x pw_len = pw_l_len + pw_r_len; + + /** + * concat password candidate + */ + + u32x wordl0[4] = { 0 }; + u32x wordl1[4] = { 0 }; + u32x wordl2[4] = { 0 }; + u32x wordl3[4] = { 0 }; + + wordl0[0] = pw_buf0[0]; + wordl0[1] = pw_buf0[1]; + wordl0[2] = pw_buf0[2]; + wordl0[3] = pw_buf0[3]; + wordl1[0] = pw_buf1[0]; + wordl1[1] = pw_buf1[1]; + wordl1[2] = pw_buf1[2]; + wordl1[3] = pw_buf1[3]; + + u32x wordr0[4] = { 0 }; + u32x wordr1[4] = { 0 }; + u32x wordr2[4] = { 0 }; + u32x wordr3[4] = { 0 }; + + wordr0[0] = ix_create_combt (combs_buf, il_pos, 0); + wordr0[1] = ix_create_combt (combs_buf, il_pos, 1); + wordr0[2] = ix_create_combt (combs_buf, il_pos, 2); + wordr0[3] = ix_create_combt (combs_buf, il_pos, 3); + wordr1[0] = ix_create_combt (combs_buf, il_pos, 4); + wordr1[1] = ix_create_combt (combs_buf, il_pos, 5); + wordr1[2] = ix_create_combt (combs_buf, il_pos, 6); + wordr1[3] = ix_create_combt (combs_buf, il_pos, 7); + + if (combs_mode == COMBINATOR_MODE_BASE_LEFT) + { + switch_buffer_by_offset_le_VV (wordr0, wordr1, wordr2, wordr3, pw_l_len); + } + else + { + switch_buffer_by_offset_le_VV (wordl0, wordl1, wordl2, wordl3, pw_r_len); + } + + u32x w0[4]; + u32x w1[4]; + u32x w2[4]; + u32x w3[4]; + + w0[0] = wordl0[0] | wordr0[0]; + w0[1] = wordl0[1] | wordr0[1]; + w0[2] = wordl0[2] | wordr0[2]; + w0[3] = wordl0[3] | wordr0[3]; + w1[0] = wordl1[0] | wordr1[0]; + w1[1] = wordl1[1] | wordr1[1]; + w1[2] = wordl1[2] | wordr1[2]; + w1[3] = wordl1[3] | wordr1[3]; + w2[0] = wordl2[0] | wordr2[0]; + w2[1] = wordl2[1] | wordr2[1]; + w2[2] = wordl2[2] | wordr2[2]; + w2[3] = wordl2[3] | wordr2[3]; + w3[0] = wordl3[0] | wordr3[0]; + w3[1] = wordl3[1] | wordr3[1]; + w3[2] = wordl3[2] | wordr3[2]; + w3[3] = wordl3[3] | wordr3[3]; + + /** + * append salt + */ + + u32 s0[4]; + u32 s1[4]; + u32 s2[4]; + u32 s3[4]; + + s0[0] = sequence_buf | 0x00800000; + s0[1] = 0; + s0[2] = 0; + s0[3] = 0; + s1[0] = 0; + s1[1] = 0; + s1[2] = 0; + s1[3] = 0; + s2[0] = 0; + s2[1] = 0; + s2[2] = 0; + s2[3] = 0; + s3[0] = 0; + s3[1] = 0; + s3[2] = 0; + s3[3] = 0; + + switch_buffer_by_offset_le (s0, s1, s2, s3, 4 + pw_len); + + const u32x pw_salt_len = 4 + pw_len + 2; + + s0[0] = session_buf; + s0[1] |= w0[0]; + s0[2] |= w0[1]; + s0[3] |= w0[2]; + s1[0] |= w0[3]; + s1[1] |= w1[0]; + s1[2] |= w1[1]; + s1[3] |= w1[2]; + s2[0] |= w1[3]; + s2[1] |= 0; + s2[2] |= 0; + s2[3] |= 0; + s3[0] |= 0; + s3[1] |= 0; + s3[2] = pw_salt_len * 8; + s3[3] = 0; + + w0[0] = s0[0]; + w0[1] = s0[1]; + w0[2] = s0[2]; + w0[3] = s0[3]; + w1[0] = s1[0]; + w1[1] = s1[1]; + w1[2] = s1[2]; + w1[3] = s1[3]; + w2[0] = s2[0]; + w2[1] = s2[1]; + w2[2] = s2[2]; + w2[3] = s2[3]; + w3[0] = s3[0]; + w3[1] = s3[1]; + w3[2] = s3[2]; + w3[3] = s3[3]; + + /** + * md5 + */ + + u32x a = MD5M_A; + u32x b = MD5M_B; + u32x c = MD5M_C; + u32x d = MD5M_D; + + MD5_STEP (MD5_Fo, a, b, c, d, w0[0], MD5C00, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w0[1], MD5C01, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w0[2], MD5C02, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w0[3], MD5C03, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, w1[0], MD5C04, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w1[1], MD5C05, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w1[2], MD5C06, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w1[3], MD5C07, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, w2[0], MD5C08, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w2[1], MD5C09, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w2[2], MD5C0a, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w2[3], MD5C0b, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, w3[0], MD5C0c, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w3[1], MD5C0d, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w3[2], MD5C0e, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w3[3], MD5C0f, MD5S03); + + MD5_STEP (MD5_Go, a, b, c, d, w0[1], MD5C10, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w1[2], MD5C11, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w2[3], MD5C12, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w0[0], MD5C13, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, w1[1], MD5C14, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w2[2], MD5C15, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w3[3], MD5C16, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w1[0], MD5C17, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, w2[1], MD5C18, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w3[2], MD5C19, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w0[3], MD5C1a, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w2[0], MD5C1b, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, w3[1], MD5C1c, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w0[2], MD5C1d, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w1[3], MD5C1e, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w3[0], MD5C1f, MD5S13); + + u32x t; + + MD5_STEP (MD5_H1, a, b, c, d, w1[1], MD5C20, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w2[0], MD5C21, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w2[3], MD5C22, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w3[2], MD5C23, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, w0[1], MD5C24, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w1[0], MD5C25, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w1[3], MD5C26, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w2[2], MD5C27, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, w3[1], MD5C28, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w0[0], MD5C29, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w0[3], MD5C2a, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w1[2], MD5C2b, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, w2[1], MD5C2c, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w3[0], MD5C2d, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w3[3], MD5C2e, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w0[2], MD5C2f, MD5S23); + + MD5_STEP (MD5_I , a, b, c, d, w0[0], MD5C30, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w1[3], MD5C31, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w3[2], MD5C32, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w1[1], MD5C33, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, w3[0], MD5C34, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w0[3], MD5C35, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w2[2], MD5C36, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w0[1], MD5C37, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, w2[0], MD5C38, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w3[3], MD5C39, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w1[2], MD5C3a, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w3[1], MD5C3b, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, w1[0], MD5C3c, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w2[3], MD5C3d, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w0[2], MD5C3e, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w2[1], MD5C3f, MD5S33); + + + a += MD5M_A; + b += MD5M_B; + c += MD5M_C; + d += MD5M_D; + + u32 test[2]; + + test[0] = a ^ ct_buf[0]; + test[1] = b ^ ct_buf[1]; + + if (sequence_buf == 0x01c0) + { + const u32 action = ((test[0] >> 0) & 0xff); + // can have more than predefined ones + // const u32 priv_lvl = ((test[0] >> 8) & 0xff); + const u32 authen_type = ((test[0] >> 16) & 0xff); + const u32 authen_service = ((test[0] >> 24) & 0xff); + const u32 user_len = ((test[1] >> 0) & 0xff); + const u32 port_len = ((test[1] >> 8) & 0xff); + const u32 rem_addr_len = ((test[1] >> 16) & 0xff); + const u32 data_len = ((test[1] >> 24) & 0xff); + + if (((action == 0x01) || (action == 0x02) || (action == 0x04)) + && ((authen_type >= 0x01) && (authen_type <= 0x06)) + && ((authen_service >= 0x00) && (authen_service <= 0x09)) + && ((8 + user_len + port_len + rem_addr_len + data_len) == ct_len)) + { + if (atomic_inc (&hashes_shown[digests_offset]) == 0) + { + mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, digests_offset + 0, gid, il_pos); + } + } + } + else if ((sequence_buf == 0x03c0) || (sequence_buf == 0x05c0)) + { + const u32 msg_len = ((test[0] >> 0) & 0xff) << 8 + | ((test[0] >> 8) & 0xff) << 0; + const u32 data_len = ((test[0] >> 16) & 0xff) << 8 + | ((test[0] >> 24) & 0xff) << 0; + const u32 flags = ((test[1] >> 0) & 0xff); + + if (((5 + msg_len) == ct_len) + && (data_len == 0) + && (flags == 0)) + { + if (atomic_inc (&hashes_shown[digests_offset]) == 0) + { + mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, digests_offset + 0, gid, il_pos); + } + } + } + else + { + const u32 status = ((test[0] >> 0) & 0xff); + const u32 flags = ((test[0] >> 8) & 0xff); + const u32 msg_len = ((test[0] >> 16) & 0xff) << 8 + | ((test[0] >> 24) & 0xff) << 0; + const u32 data_len = ((test[1] >> 0) & 0xff) << 8 + | ((test[1] >> 8) & 0xff) << 0; + + if (((status >= 0x01 && status <= 0x07) || status == 0x21) + && (flags == 0x01 || flags == 0x00) + && (6 + msg_len + data_len == ct_len)) + { + if (atomic_inc (&hashes_shown[digests_offset]) == 0) + { + mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, digests_offset + 0, gid, il_pos); + } + } + } + } +} + +__kernel void m16100_s08 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const tacacs_plus_t *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max) +{ +} + +__kernel void m16100_s16 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const tacacs_plus_t *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max) +{ +} diff --git a/OpenCL/m16100_a3-optimized.cl b/OpenCL/m16100_a3-optimized.cl new file mode 100644 index 000000000..148a91be8 --- /dev/null +++ b/OpenCL/m16100_a3-optimized.cl @@ -0,0 +1,801 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +//#define NEW_SIMD_CODE + +#include "inc_vendor.cl" +#include "inc_hash_constants.h" +#include "inc_hash_functions.cl" +#include "inc_types.cl" +#include "inc_common.cl" +#include "inc_simd.cl" + +void m16100m (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_len, __global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const tacacs_plus_t *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + + /** + * digest + */ + + const u32 ct_len = esalt_bufs[digests_offset].ct_data_len; + + u32 ct_buf[2]; + + ct_buf[0] = esalt_bufs[digests_offset].ct_data_buf[0]; + ct_buf[1] = esalt_bufs[digests_offset].ct_data_buf[1]; + + /** + * salt + */ + + const u32 session_buf = esalt_bufs[digests_offset].session_buf[0]; + const u32 sequence_buf = esalt_bufs[digests_offset].sequence_buf[0]; + + const u32 pw_salt_len = 4 + pw_len + 2; + + u32 s0[4]; + u32 s1[4]; + u32 s2[4]; + u32 s3[4]; + + s0[0] = sequence_buf | 0x00800000; + s0[1] = 0; + s0[2] = 0; + s0[3] = 0; + s1[0] = 0; + s1[1] = 0; + s1[2] = 0; + s1[3] = 0; + s2[0] = 0; + s2[1] = 0; + s2[2] = 0; + s2[3] = 0; + s3[0] = 0; + s3[1] = 0; + s3[2] = 0; + s3[3] = 0; + + switch_buffer_by_offset_le (s0, s1, s2, s3, 4 + pw_len); + + /** + * loop + */ + + const u32 w0l = w0[0]; + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x w0r = ix_create_bft (bfs_buf, il_pos); + + const u32x w0lr = w0l | w0r; + + u32x t0[4]; + u32x t1[4]; + u32x t2[4]; + u32x t3[4]; + + t0[0] = session_buf; + t0[1] = w0lr; + t0[2] = w0[1]; + t0[3] = w0[2]; + t1[0] = w0[3]; + t1[1] = w1[0]; + t1[2] = w1[1]; + t1[3] = w1[2]; + t2[0] = w1[3]; + t2[1] = w2[0]; + t2[2] = w2[1]; + t2[3] = w2[2]; + t3[0] = w2[3]; + t3[1] = w3[0]; + t3[2] = pw_salt_len * 8; + t3[3] = 0; + + t0[1] |= s0[1]; + t0[2] |= s0[2]; + t0[3] |= s0[3]; + t1[0] |= s1[0]; + t1[1] |= s1[1]; + t1[2] |= s1[2]; + t1[3] |= s1[3]; + t2[0] |= s2[0]; + t2[1] |= s2[1]; + t2[2] |= s2[2]; + t2[3] |= s2[3]; + t3[0] |= s3[0]; + t3[1] |= s3[1]; + + /** + * md5 + */ + + u32x a = MD5M_A; + u32x b = MD5M_B; + u32x c = MD5M_C; + u32x d = MD5M_D; + + MD5_STEP (MD5_Fo, a, b, c, d, t0[0], MD5C00, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, t0[1], MD5C01, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, t0[2], MD5C02, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, t0[3], MD5C03, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, t1[0], MD5C04, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, t1[1], MD5C05, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, t1[2], MD5C06, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, t1[3], MD5C07, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, t2[0], MD5C08, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, t2[1], MD5C09, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, t2[2], MD5C0a, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, t2[3], MD5C0b, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, t3[0], MD5C0c, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, t3[1], MD5C0d, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, t3[2], MD5C0e, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, t3[3], MD5C0f, MD5S03); + + MD5_STEP (MD5_Go, a, b, c, d, t0[1], MD5C10, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, t1[2], MD5C11, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, t2[3], MD5C12, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, t0[0], MD5C13, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, t1[1], MD5C14, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, t2[2], MD5C15, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, t3[3], MD5C16, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, t1[0], MD5C17, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, t2[1], MD5C18, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, t3[2], MD5C19, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, t0[3], MD5C1a, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, t2[0], MD5C1b, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, t3[1], MD5C1c, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, t0[2], MD5C1d, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, t1[3], MD5C1e, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, t3[0], MD5C1f, MD5S13); + + u32x t; + + MD5_STEP (MD5_H1, a, b, c, d, t1[1], MD5C20, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, t2[0], MD5C21, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, t2[3], MD5C22, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, t3[2], MD5C23, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, t0[1], MD5C24, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, t1[0], MD5C25, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, t1[3], MD5C26, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, t2[2], MD5C27, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, t3[1], MD5C28, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, t0[0], MD5C29, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, t0[3], MD5C2a, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, t1[2], MD5C2b, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, t2[1], MD5C2c, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, t3[0], MD5C2d, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, t3[3], MD5C2e, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, t0[2], MD5C2f, MD5S23); + + MD5_STEP (MD5_I , a, b, c, d, t0[0], MD5C30, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, t1[3], MD5C31, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, t3[2], MD5C32, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, t1[1], MD5C33, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, t3[0], MD5C34, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, t0[3], MD5C35, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, t2[2], MD5C36, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, t0[1], MD5C37, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, t2[0], MD5C38, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, t3[3], MD5C39, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, t1[2], MD5C3a, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, t3[1], MD5C3b, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, t1[0], MD5C3c, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, t2[3], MD5C3d, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, t0[2], MD5C3e, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, t2[1], MD5C3f, MD5S33); + + a += MD5M_A; + b += MD5M_B; + c += MD5M_C; + d += MD5M_D; + + u32 test[2]; + + test[0] = a ^ ct_buf[0]; + test[1] = b ^ ct_buf[1]; + + if (sequence_buf == 0x01c0) + { + const u32 action = ((test[0] >> 0) & 0xff); + // can have more than predefined ones + // const u32 priv_lvl = ((test[0] >> 8) & 0xff); + const u32 authen_type = ((test[0] >> 16) & 0xff); + const u32 authen_service = ((test[0] >> 24) & 0xff); + const u32 user_len = ((test[1] >> 0) & 0xff); + const u32 port_len = ((test[1] >> 8) & 0xff); + const u32 rem_addr_len = ((test[1] >> 16) & 0xff); + const u32 data_len = ((test[1] >> 24) & 0xff); + + if (((action == 0x01) || (action == 0x02) || (action == 0x04)) + && ((authen_type >= 0x01) && (authen_type <= 0x06)) + && ((authen_service >= 0x00) && (authen_service <= 0x09)) + && ((8 + user_len + port_len + rem_addr_len + data_len) == ct_len)) + { + if (atomic_inc (&hashes_shown[digests_offset]) == 0) + { + mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, digests_offset + 0, gid, il_pos); + } + } + } + else if ((sequence_buf == 0x03c0) || (sequence_buf == 0x05c0)) + { + const u32 msg_len = ((test[0] >> 0) & 0xff) << 8 + | ((test[0] >> 8) & 0xff) << 0; + const u32 data_len = ((test[0] >> 16) & 0xff) << 8 + | ((test[0] >> 24) & 0xff) << 0; + const u32 flags = ((test[1] >> 0) & 0xff); + + if (((5 + msg_len) == ct_len) + && (data_len == 0) + && (flags == 0)) + { + if (atomic_inc (&hashes_shown[digests_offset]) == 0) + { + mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, digests_offset + 0, gid, il_pos); + } + } + } + else + { + const u32 status = ((test[0] >> 0) & 0xff); + const u32 flags = ((test[0] >> 8) & 0xff); + const u32 msg_len = ((test[0] >> 16) & 0xff) << 8 + | ((test[0] >> 24) & 0xff) << 0; + const u32 data_len = ((test[1] >> 0) & 0xff) << 8 + | ((test[1] >> 8) & 0xff) << 0; + + if (((status >= 0x01 && status <= 0x07) || status == 0x21) + && (flags == 0x01 || flags == 0x00) + && (6 + msg_len + data_len == ct_len)) + { + if (atomic_inc (&hashes_shown[digests_offset]) == 0) + { + mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, digests_offset + 0, gid, il_pos); + } + } + } + } +} + +void m16100s (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_len, __global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const tacacs_plus_t *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + + /** + * digest + */ + + const u32 ct_len = esalt_bufs[digests_offset].ct_data_len; + + u32 ct_buf[2]; + + ct_buf[0] = esalt_bufs[digests_offset].ct_data_buf[0]; + ct_buf[1] = esalt_bufs[digests_offset].ct_data_buf[1]; + + /** + * salt + */ + + const u32 session_buf = esalt_bufs[digests_offset].session_buf[0]; + const u32 sequence_buf = esalt_bufs[digests_offset].sequence_buf[0]; + + const u32 pw_salt_len = 4 + pw_len + 2; + + u32 s0[4]; + u32 s1[4]; + u32 s2[4]; + u32 s3[4]; + + s0[0] = sequence_buf | 0x00800000; + s0[1] = 0; + s0[2] = 0; + s0[3] = 0; + s1[0] = 0; + s1[1] = 0; + s1[2] = 0; + s1[3] = 0; + s2[0] = 0; + s2[1] = 0; + s2[2] = 0; + s2[3] = 0; + s3[0] = 0; + s3[1] = 0; + s3[2] = 0; + s3[3] = 0; + + switch_buffer_by_offset_le (s0, s1, s2, s3, 4 + pw_len); + + /** + * loop + */ + + const u32 w0l = w0[0]; + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x w0r = ix_create_bft (bfs_buf, il_pos); + + const u32x w0lr = w0l | w0r; + + u32x t0[4]; + u32x t1[4]; + u32x t2[4]; + u32x t3[4]; + + t0[0] = session_buf; + t0[1] = w0lr; + t0[2] = w0[1]; + t0[3] = w0[2]; + t1[0] = w0[3]; + t1[1] = w1[0]; + t1[2] = w1[1]; + t1[3] = w1[2]; + t2[0] = w1[3]; + t2[1] = w2[0]; + t2[2] = w2[1]; + t2[3] = w2[2]; + t3[0] = w2[3]; + t3[1] = w3[0]; + t3[2] = pw_salt_len * 8; + t3[3] = 0; + + t0[1] |= s0[1]; + t0[2] |= s0[2]; + t0[3] |= s0[3]; + t1[0] |= s1[0]; + t1[1] |= s1[1]; + t1[2] |= s1[2]; + t1[3] |= s1[3]; + t2[0] |= s2[0]; + t2[1] |= s2[1]; + t2[2] |= s2[2]; + t2[3] |= s2[3]; + t3[0] |= s3[0]; + t3[1] |= s3[1]; + + /** + * md5 + */ + + u32x a = MD5M_A; + u32x b = MD5M_B; + u32x c = MD5M_C; + u32x d = MD5M_D; + + MD5_STEP (MD5_Fo, a, b, c, d, t0[0], MD5C00, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, t0[1], MD5C01, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, t0[2], MD5C02, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, t0[3], MD5C03, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, t1[0], MD5C04, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, t1[1], MD5C05, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, t1[2], MD5C06, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, t1[3], MD5C07, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, t2[0], MD5C08, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, t2[1], MD5C09, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, t2[2], MD5C0a, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, t2[3], MD5C0b, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, t3[0], MD5C0c, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, t3[1], MD5C0d, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, t3[2], MD5C0e, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, t3[3], MD5C0f, MD5S03); + + MD5_STEP (MD5_Go, a, b, c, d, t0[1], MD5C10, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, t1[2], MD5C11, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, t2[3], MD5C12, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, t0[0], MD5C13, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, t1[1], MD5C14, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, t2[2], MD5C15, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, t3[3], MD5C16, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, t1[0], MD5C17, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, t2[1], MD5C18, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, t3[2], MD5C19, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, t0[3], MD5C1a, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, t2[0], MD5C1b, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, t3[1], MD5C1c, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, t0[2], MD5C1d, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, t1[3], MD5C1e, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, t3[0], MD5C1f, MD5S13); + + u32x t; + + MD5_STEP (MD5_H1, a, b, c, d, t1[1], MD5C20, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, t2[0], MD5C21, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, t2[3], MD5C22, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, t3[2], MD5C23, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, t0[1], MD5C24, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, t1[0], MD5C25, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, t1[3], MD5C26, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, t2[2], MD5C27, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, t3[1], MD5C28, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, t0[0], MD5C29, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, t0[3], MD5C2a, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, t1[2], MD5C2b, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, t2[1], MD5C2c, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, t3[0], MD5C2d, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, t3[3], MD5C2e, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, t0[2], MD5C2f, MD5S23); + + MD5_STEP (MD5_I , a, b, c, d, t0[0], MD5C30, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, t1[3], MD5C31, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, t3[2], MD5C32, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, t1[1], MD5C33, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, t3[0], MD5C34, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, t0[3], MD5C35, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, t2[2], MD5C36, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, t0[1], MD5C37, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, t2[0], MD5C38, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, t3[3], MD5C39, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, t1[2], MD5C3a, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, t3[1], MD5C3b, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, t1[0], MD5C3c, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, t2[3], MD5C3d, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, t0[2], MD5C3e, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, t2[1], MD5C3f, MD5S33); + + a += MD5M_A; + b += MD5M_B; + c += MD5M_C; + d += MD5M_D; + + u32 test[2]; + + test[0] = a ^ ct_buf[0]; + test[1] = b ^ ct_buf[1]; + + if (sequence_buf == 0x01c0) + { + const u32 action = ((test[0] >> 0) & 0xff); + // can have more than predefined ones + // const u32 priv_lvl = ((test[0] >> 8) & 0xff); + const u32 authen_type = ((test[0] >> 16) & 0xff); + const u32 authen_service = ((test[0] >> 24) & 0xff); + const u32 user_len = ((test[1] >> 0) & 0xff); + const u32 port_len = ((test[1] >> 8) & 0xff); + const u32 rem_addr_len = ((test[1] >> 16) & 0xff); + const u32 data_len = ((test[1] >> 24) & 0xff); + + if (((action == 0x01) || (action == 0x02) || (action == 0x04)) + && ((authen_type >= 0x01) && (authen_type <= 0x06)) + && ((authen_service >= 0x00) && (authen_service <= 0x09)) + && ((8 + user_len + port_len + rem_addr_len + data_len) == ct_len)) + { + if (atomic_inc (&hashes_shown[digests_offset]) == 0) + { + mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, digests_offset + 0, gid, il_pos); + } + } + } + else if ((sequence_buf == 0x03c0) || (sequence_buf == 0x05c0)) + { + const u32 msg_len = ((test[0] >> 0) & 0xff) << 8 + | ((test[0] >> 8) & 0xff) << 0; + const u32 data_len = ((test[0] >> 16) & 0xff) << 8 + | ((test[0] >> 24) & 0xff) << 0; + const u32 flags = ((test[1] >> 0) & 0xff); + + if (((5 + msg_len) == ct_len) + && (data_len == 0) + && (flags == 0)) + { + if (atomic_inc (&hashes_shown[digests_offset]) == 0) + { + mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, digests_offset + 0, gid, il_pos); + } + } + } + else + { + const u32 status = ((test[0] >> 0) & 0xff); + const u32 flags = ((test[0] >> 8) & 0xff); + const u32 msg_len = ((test[0] >> 16) & 0xff) << 8 + | ((test[0] >> 24) & 0xff) << 0; + const u32 data_len = ((test[1] >> 0) & 0xff) << 8 + | ((test[1] >> 8) & 0xff) << 0; + + if (((status >= 0x01 && status <= 0x07) || status == 0x21) + && (flags == 0x01 || flags == 0x00) + && (6 + msg_len + data_len == ct_len)) + { + if (atomic_inc (&hashes_shown[digests_offset]) == 0) + { + mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, digests_offset + 0, gid, il_pos); + } + } + } + } +} + +__kernel void m16100_m04 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const tacacs_plus_t *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 w0[4]; + + w0[0] = pws[gid].i[ 0]; + w0[1] = pws[gid].i[ 1]; + w0[2] = pws[gid].i[ 2]; + w0[3] = pws[gid].i[ 3]; + + u32 w1[4]; + + w1[0] = 0; + w1[1] = 0; + w1[2] = 0; + w1[3] = 0; + + u32 w2[4]; + + w2[0] = 0; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + + u32 w3[4]; + + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = 0; + + const u32 pw_len = pws[gid].pw_len; + + /** + * main + */ + + m16100m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV0_buf, d_scryptV1_buf, d_scryptV2_buf, d_scryptV3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset); +} + +__kernel void m16100_m08 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const tacacs_plus_t *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 w0[4]; + + w0[0] = pws[gid].i[ 0]; + w0[1] = pws[gid].i[ 1]; + w0[2] = pws[gid].i[ 2]; + w0[3] = pws[gid].i[ 3]; + + u32 w1[4]; + + w1[0] = pws[gid].i[ 4]; + w1[1] = pws[gid].i[ 5]; + w1[2] = pws[gid].i[ 6]; + w1[3] = pws[gid].i[ 7]; + + u32 w2[4]; + + w2[0] = 0; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + + u32 w3[4]; + + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = 0; + + const u32 pw_len = pws[gid].pw_len; + + /** + * main + */ + + m16100m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV0_buf, d_scryptV1_buf, d_scryptV2_buf, d_scryptV3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset); +} + +__kernel void m16100_m16 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const tacacs_plus_t *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 w0[4]; + + w0[0] = pws[gid].i[ 0]; + w0[1] = pws[gid].i[ 1]; + w0[2] = pws[gid].i[ 2]; + w0[3] = pws[gid].i[ 3]; + + u32 w1[4]; + + w1[0] = pws[gid].i[ 4]; + w1[1] = pws[gid].i[ 5]; + w1[2] = pws[gid].i[ 6]; + w1[3] = pws[gid].i[ 7]; + + u32 w2[4]; + + w2[0] = pws[gid].i[ 8]; + w2[1] = pws[gid].i[ 9]; + w2[2] = pws[gid].i[10]; + w2[3] = pws[gid].i[11]; + + u32 w3[4]; + + w3[0] = pws[gid].i[12]; + w3[1] = pws[gid].i[13]; + w3[2] = 0; + w3[3] = 0; + + const u32 pw_len = pws[gid].pw_len; + + /** + * main + */ + + m16100m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV0_buf, d_scryptV1_buf, d_scryptV2_buf, d_scryptV3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset); +} + +__kernel void m16100_s04 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const tacacs_plus_t *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 w0[4]; + + w0[0] = pws[gid].i[ 0]; + w0[1] = pws[gid].i[ 1]; + w0[2] = pws[gid].i[ 2]; + w0[3] = pws[gid].i[ 3]; + + u32 w1[4]; + + w1[0] = 0; + w1[1] = 0; + w1[2] = 0; + w1[3] = 0; + + u32 w2[4]; + + w2[0] = 0; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + + u32 w3[4]; + + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = 0; + + const u32 pw_len = pws[gid].pw_len; + + /** + * main + */ + + m16100s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV0_buf, d_scryptV1_buf, d_scryptV2_buf, d_scryptV3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset); +} + +__kernel void m16100_s08 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const tacacs_plus_t *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 w0[4]; + + w0[0] = pws[gid].i[ 0]; + w0[1] = pws[gid].i[ 1]; + w0[2] = pws[gid].i[ 2]; + w0[3] = pws[gid].i[ 3]; + + u32 w1[4]; + + w1[0] = pws[gid].i[ 4]; + w1[1] = pws[gid].i[ 5]; + w1[2] = pws[gid].i[ 6]; + w1[3] = pws[gid].i[ 7]; + + u32 w2[4]; + + w2[0] = 0; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + + u32 w3[4]; + + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = 0; + + const u32 pw_len = pws[gid].pw_len; + + /** + * main + */ + + m16100s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV0_buf, d_scryptV1_buf, d_scryptV2_buf, d_scryptV3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset); +} + +__kernel void m16100_s16 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const tacacs_plus_t *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 w0[4]; + + w0[0] = pws[gid].i[ 0]; + w0[1] = pws[gid].i[ 1]; + w0[2] = pws[gid].i[ 2]; + w0[3] = pws[gid].i[ 3]; + + u32 w1[4]; + + w1[0] = pws[gid].i[ 4]; + w1[1] = pws[gid].i[ 5]; + w1[2] = pws[gid].i[ 6]; + w1[3] = pws[gid].i[ 7]; + + u32 w2[4]; + + w2[0] = pws[gid].i[ 8]; + w2[1] = pws[gid].i[ 9]; + w2[2] = pws[gid].i[10]; + w2[3] = pws[gid].i[11]; + + u32 w3[4]; + + w3[0] = pws[gid].i[12]; + w3[1] = pws[gid].i[13]; + w3[2] = 0; + w3[3] = 0; + + const u32 pw_len = pws[gid].pw_len; + + /** + * main + */ + + m16100s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV0_buf, d_scryptV1_buf, d_scryptV2_buf, d_scryptV3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset); +} diff --git a/src/interface.c b/src/interface.c index 761488041..31ab89709 100644 --- a/src/interface.c +++ b/src/interface.c @@ -24658,7 +24658,10 @@ int hashconfig_init (hashcat_ctx_t *hashcat_ctx) hashconfig->kern_type = KERN_TYPE_TACACS_PLUS; hashconfig->dgst_size = DGST_SIZE_4_4; // originally DGST_SIZE_4_2 hashconfig->parse_func = tacacs_plus_parse_hash; - hashconfig->opti_type = OPTI_TYPE_ZERO_BYTE; + hashconfig->opti_type = OPTI_TYPE_ZERO_BYTE + | OPTI_TYPE_PRECOMPUTE_INIT + | OPTI_TYPE_NOT_ITERATED + | OPTI_TYPE_RAW_HASH; hashconfig->dgst_pos0 = 0; hashconfig->dgst_pos1 = 1; hashconfig->dgst_pos2 = 2;