mirror of https://github.com/hashcat/hashcat.git
commit
75d3d5503d
@ -0,0 +1,592 @@
|
||||
/**
|
||||
* Author......: See docs/credits.txt
|
||||
* License.....: MIT
|
||||
*/
|
||||
|
||||
//#define NEW_SIMD_CODE
|
||||
|
||||
#ifdef KERNEL_STATIC
|
||||
#include "inc_vendor.h"
|
||||
#include "inc_types.h"
|
||||
#include "inc_platform.cl"
|
||||
#include "inc_common.cl"
|
||||
#include "inc_simd.cl"
|
||||
#include "inc_hash_md5.cl"
|
||||
#include "inc_hash_sha1.cl"
|
||||
#endif
|
||||
|
||||
#define COMPARE_S "inc_comp_single.cl"
|
||||
#define COMPARE_M "inc_comp_multi.cl"
|
||||
|
||||
#define SNMPV3_SALT_MAX 1500
|
||||
#define SNMPV3_ENGINEID_MAX 34
|
||||
#define SNMPV3_MSG_AUTH_PARAMS_LEN 12
|
||||
#define SNMPV3_ROUNDS 1048576
|
||||
#define SNMPV3_MAX_PW_LENGTH 64
|
||||
|
||||
#define SNMPV3_TMP_ELEMS 4096 // 4096 = (256 (max pw length) * 64) / sizeof (u32)
|
||||
#define SNMPV3_HASH_ELEMS_MD5 4
|
||||
#define SNMPV3_HASH_ELEMS_SHA1 8 // 8 = aligned 5
|
||||
|
||||
#define SNMPV3_MAX_SALT_ELEMS 512 // 512 * 4 = 2048 > 1500, also has to be multiple of 64
|
||||
#define SNMPV3_MAX_ENGINE_ELEMS 16 // 16 * 4 = 64 > 32, also has to be multiple of 64
|
||||
#define SNMPV3_MAX_PNUM_ELEMS 4 // 4 * 4 = 16 > 9
|
||||
|
||||
#define SNMPV3_MAX_PW_LENGTH_OPT 64
|
||||
#define SNMPV3_TMP_ELEMS_OPT ((SNMPV3_MAX_PW_LENGTH_OPT * SNMPV3_MAX_PW_LENGTH) / 4)
|
||||
// (64 * 64) / 4 = 1024
|
||||
// for pw length > 64 we use global memory reads
|
||||
|
||||
typedef struct hmac_md5_tmp
|
||||
{
|
||||
u32 tmp_md5[SNMPV3_TMP_ELEMS];
|
||||
u32 tmp_sha1[SNMPV3_TMP_ELEMS];
|
||||
|
||||
u32 h_md5[SNMPV3_HASH_ELEMS_MD5];
|
||||
u32 h_sha1[SNMPV3_HASH_ELEMS_SHA1];
|
||||
|
||||
} hmac_md5_tmp_t;
|
||||
|
||||
typedef struct snmpv3
|
||||
{
|
||||
u32 salt_buf[SNMPV3_MAX_SALT_ELEMS];
|
||||
u32 salt_len;
|
||||
|
||||
u32 engineID_buf[SNMPV3_MAX_ENGINE_ELEMS];
|
||||
u32 engineID_len;
|
||||
|
||||
u32 packet_number[SNMPV3_MAX_PNUM_ELEMS];
|
||||
|
||||
} snmpv3_t;
|
||||
|
||||
KERNEL_FQ void m25000_init (KERN_ATTR_TMPS_ESALT (hmac_md5_tmp_t, snmpv3_t))
|
||||
{
|
||||
/**
|
||||
* modifier
|
||||
*/
|
||||
|
||||
const u64 gid = get_global_id (0);
|
||||
|
||||
if (gid >= gid_max) return;
|
||||
|
||||
/**
|
||||
* base
|
||||
*/
|
||||
|
||||
const u32 pw_len = pws[gid].pw_len;
|
||||
|
||||
u32 w[64] = { 0 };
|
||||
|
||||
for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1)
|
||||
{
|
||||
w[idx] = pws[gid].i[idx];
|
||||
}
|
||||
|
||||
u8 *src_ptr = (u8 *) w;
|
||||
|
||||
// password 64 times, also swapped
|
||||
|
||||
u32 dst_buf[16];
|
||||
|
||||
u8 *dst_ptr = (u8 *) dst_buf;
|
||||
|
||||
int tmp_idx = 0;
|
||||
|
||||
for (int i = 0; i < 64; i++)
|
||||
{
|
||||
for (int j = 0; j < pw_len; j++)
|
||||
{
|
||||
const int dst_idx = tmp_idx & 63;
|
||||
|
||||
dst_ptr[dst_idx] = src_ptr[j];
|
||||
|
||||
// write to global memory every time 64 byte are written into cache
|
||||
|
||||
if (dst_idx == 63)
|
||||
{
|
||||
const int tmp_idx4 = (tmp_idx - 63) / 4;
|
||||
|
||||
// md5
|
||||
|
||||
tmps[gid].tmp_md5[tmp_idx4 + 0] = dst_buf[ 0];
|
||||
tmps[gid].tmp_md5[tmp_idx4 + 1] = dst_buf[ 1];
|
||||
tmps[gid].tmp_md5[tmp_idx4 + 2] = dst_buf[ 2];
|
||||
tmps[gid].tmp_md5[tmp_idx4 + 3] = dst_buf[ 3];
|
||||
tmps[gid].tmp_md5[tmp_idx4 + 4] = dst_buf[ 4];
|
||||
tmps[gid].tmp_md5[tmp_idx4 + 5] = dst_buf[ 5];
|
||||
tmps[gid].tmp_md5[tmp_idx4 + 6] = dst_buf[ 6];
|
||||
tmps[gid].tmp_md5[tmp_idx4 + 7] = dst_buf[ 7];
|
||||
tmps[gid].tmp_md5[tmp_idx4 + 8] = dst_buf[ 8];
|
||||
tmps[gid].tmp_md5[tmp_idx4 + 9] = dst_buf[ 9];
|
||||
tmps[gid].tmp_md5[tmp_idx4 + 10] = dst_buf[10];
|
||||
tmps[gid].tmp_md5[tmp_idx4 + 11] = dst_buf[11];
|
||||
tmps[gid].tmp_md5[tmp_idx4 + 12] = dst_buf[12];
|
||||
tmps[gid].tmp_md5[tmp_idx4 + 13] = dst_buf[13];
|
||||
tmps[gid].tmp_md5[tmp_idx4 + 14] = dst_buf[14];
|
||||
tmps[gid].tmp_md5[tmp_idx4 + 15] = dst_buf[15];
|
||||
|
||||
// sha1
|
||||
|
||||
tmps[gid].tmp_sha1[tmp_idx4 + 0] = hc_swap32_S (dst_buf[ 0]);
|
||||
tmps[gid].tmp_sha1[tmp_idx4 + 1] = hc_swap32_S (dst_buf[ 1]);
|
||||
tmps[gid].tmp_sha1[tmp_idx4 + 2] = hc_swap32_S (dst_buf[ 2]);
|
||||
tmps[gid].tmp_sha1[tmp_idx4 + 3] = hc_swap32_S (dst_buf[ 3]);
|
||||
tmps[gid].tmp_sha1[tmp_idx4 + 4] = hc_swap32_S (dst_buf[ 4]);
|
||||
tmps[gid].tmp_sha1[tmp_idx4 + 5] = hc_swap32_S (dst_buf[ 5]);
|
||||
tmps[gid].tmp_sha1[tmp_idx4 + 6] = hc_swap32_S (dst_buf[ 6]);
|
||||
tmps[gid].tmp_sha1[tmp_idx4 + 7] = hc_swap32_S (dst_buf[ 7]);
|
||||
tmps[gid].tmp_sha1[tmp_idx4 + 8] = hc_swap32_S (dst_buf[ 8]);
|
||||
tmps[gid].tmp_sha1[tmp_idx4 + 9] = hc_swap32_S (dst_buf[ 9]);
|
||||
tmps[gid].tmp_sha1[tmp_idx4 + 10] = hc_swap32_S (dst_buf[10]);
|
||||
tmps[gid].tmp_sha1[tmp_idx4 + 11] = hc_swap32_S (dst_buf[11]);
|
||||
tmps[gid].tmp_sha1[tmp_idx4 + 12] = hc_swap32_S (dst_buf[12]);
|
||||
tmps[gid].tmp_sha1[tmp_idx4 + 13] = hc_swap32_S (dst_buf[13]);
|
||||
tmps[gid].tmp_sha1[tmp_idx4 + 14] = hc_swap32_S (dst_buf[14]);
|
||||
tmps[gid].tmp_sha1[tmp_idx4 + 15] = hc_swap32_S (dst_buf[15]);
|
||||
}
|
||||
|
||||
tmp_idx++;
|
||||
}
|
||||
}
|
||||
|
||||
// hash md5
|
||||
|
||||
tmps[gid].h_md5[0] = MD5M_A;
|
||||
tmps[gid].h_md5[1] = MD5M_B;
|
||||
tmps[gid].h_md5[2] = MD5M_C;
|
||||
tmps[gid].h_md5[3] = MD5M_D;
|
||||
|
||||
// hash sha1
|
||||
|
||||
tmps[gid].h_sha1[0] = SHA1M_A;
|
||||
tmps[gid].h_sha1[1] = SHA1M_B;
|
||||
tmps[gid].h_sha1[2] = SHA1M_C;
|
||||
tmps[gid].h_sha1[3] = SHA1M_D;
|
||||
tmps[gid].h_sha1[4] = SHA1M_E;
|
||||
}
|
||||
|
||||
KERNEL_FQ void m25000_loop (KERN_ATTR_TMPS_ESALT (hmac_md5_tmp_t, snmpv3_t))
|
||||
{
|
||||
/**
|
||||
* base
|
||||
*/
|
||||
|
||||
const u64 gid = get_global_id (0);
|
||||
|
||||
if (gid >= gid_max) return;
|
||||
|
||||
u32 h_md5[4];
|
||||
|
||||
h_md5[0] = tmps[gid].h_md5[0];
|
||||
h_md5[1] = tmps[gid].h_md5[1];
|
||||
h_md5[2] = tmps[gid].h_md5[2];
|
||||
h_md5[3] = tmps[gid].h_md5[3];
|
||||
|
||||
u32 h_sha1[5];
|
||||
|
||||
h_sha1[0] = tmps[gid].h_sha1[0];
|
||||
h_sha1[1] = tmps[gid].h_sha1[1];
|
||||
h_sha1[2] = tmps[gid].h_sha1[2];
|
||||
h_sha1[3] = tmps[gid].h_sha1[3];
|
||||
h_sha1[4] = tmps[gid].h_sha1[4];
|
||||
|
||||
const u32 pw_len = pws[gid].pw_len;
|
||||
|
||||
const int pw_len64 = pw_len * 64;
|
||||
|
||||
if (pw_len <= SNMPV3_MAX_PW_LENGTH_OPT)
|
||||
{
|
||||
u32 tmp_shared[SNMPV3_TMP_ELEMS_OPT];
|
||||
|
||||
// md5
|
||||
|
||||
for (int i = 0; i < pw_len64 / 4; i++)
|
||||
{
|
||||
tmp_shared[i] = tmps[gid].tmp_md5[i];
|
||||
}
|
||||
|
||||
for (int i = 0, j = loop_pos; i < loop_cnt; i += 64, j += 64)
|
||||
{
|
||||
const int idx = (j % pw_len64) / 4; // the optimization trick is to be able to do this
|
||||
|
||||
u32 w0[4];
|
||||
u32 w1[4];
|
||||
u32 w2[4];
|
||||
u32 w3[4];
|
||||
|
||||
w0[0] = tmp_shared[idx + 0];
|
||||
w0[1] = tmp_shared[idx + 1];
|
||||
w0[2] = tmp_shared[idx + 2];
|
||||
w0[3] = tmp_shared[idx + 3];
|
||||
w1[0] = tmp_shared[idx + 4];
|
||||
w1[1] = tmp_shared[idx + 5];
|
||||
w1[2] = tmp_shared[idx + 6];
|
||||
w1[3] = tmp_shared[idx + 7];
|
||||
w2[0] = tmp_shared[idx + 8];
|
||||
w2[1] = tmp_shared[idx + 9];
|
||||
w2[2] = tmp_shared[idx + 10];
|
||||
w2[3] = tmp_shared[idx + 11];
|
||||
w3[0] = tmp_shared[idx + 12];
|
||||
w3[1] = tmp_shared[idx + 13];
|
||||
w3[2] = tmp_shared[idx + 14];
|
||||
w3[3] = tmp_shared[idx + 15];
|
||||
|
||||
md5_transform (w0, w1, w2, w3, h_md5);
|
||||
}
|
||||
|
||||
// sha1
|
||||
|
||||
for (int i = 0; i < pw_len64 / 4; i++)
|
||||
{
|
||||
tmp_shared[i] = tmps[gid].tmp_sha1[i];
|
||||
}
|
||||
|
||||
for (int i = 0, j = loop_pos; i < loop_cnt; i += 64, j += 64)
|
||||
{
|
||||
const int idx = (j % pw_len64) / 4; // the optimization trick is to be able to do this
|
||||
|
||||
u32 w0[4];
|
||||
u32 w1[4];
|
||||
u32 w2[4];
|
||||
u32 w3[4];
|
||||
|
||||
w0[0] = tmp_shared[idx + 0];
|
||||
w0[1] = tmp_shared[idx + 1];
|
||||
w0[2] = tmp_shared[idx + 2];
|
||||
w0[3] = tmp_shared[idx + 3];
|
||||
w1[0] = tmp_shared[idx + 4];
|
||||
w1[1] = tmp_shared[idx + 5];
|
||||
w1[2] = tmp_shared[idx + 6];
|
||||
w1[3] = tmp_shared[idx + 7];
|
||||
w2[0] = tmp_shared[idx + 8];
|
||||
w2[1] = tmp_shared[idx + 9];
|
||||
w2[2] = tmp_shared[idx + 10];
|
||||
w2[3] = tmp_shared[idx + 11];
|
||||
w3[0] = tmp_shared[idx + 12];
|
||||
w3[1] = tmp_shared[idx + 13];
|
||||
w3[2] = tmp_shared[idx + 14];
|
||||
w3[3] = tmp_shared[idx + 15];
|
||||
|
||||
sha1_transform (w0, w1, w2, w3, h_sha1);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
for (int i = 0, j = loop_pos; i < loop_cnt; i += 64, j += 64)
|
||||
{
|
||||
const int idx = (j % pw_len64) / 4; // the optimization trick is to be able to do this
|
||||
|
||||
u32 w0[4];
|
||||
u32 w1[4];
|
||||
u32 w2[4];
|
||||
u32 w3[4];
|
||||
|
||||
// md5
|
||||
|
||||
w0[0] = tmps[gid].tmp_md5[idx + 0];
|
||||
w0[1] = tmps[gid].tmp_md5[idx + 1];
|
||||
w0[2] = tmps[gid].tmp_md5[idx + 2];
|
||||
w0[3] = tmps[gid].tmp_md5[idx + 3];
|
||||
w1[0] = tmps[gid].tmp_md5[idx + 4];
|
||||
w1[1] = tmps[gid].tmp_md5[idx + 5];
|
||||
w1[2] = tmps[gid].tmp_md5[idx + 6];
|
||||
w1[3] = tmps[gid].tmp_md5[idx + 7];
|
||||
w2[0] = tmps[gid].tmp_md5[idx + 8];
|
||||
w2[1] = tmps[gid].tmp_md5[idx + 9];
|
||||
w2[2] = tmps[gid].tmp_md5[idx + 10];
|
||||
w2[3] = tmps[gid].tmp_md5[idx + 11];
|
||||
w3[0] = tmps[gid].tmp_md5[idx + 12];
|
||||
w3[1] = tmps[gid].tmp_md5[idx + 13];
|
||||
w3[2] = tmps[gid].tmp_md5[idx + 14];
|
||||
w3[3] = tmps[gid].tmp_md5[idx + 15];
|
||||
|
||||
md5_transform (w0, w1, w2, w3, h_md5);
|
||||
|
||||
// sha1
|
||||
|
||||
w0[0] = tmps[gid].tmp_sha1[idx + 0];
|
||||
w0[1] = tmps[gid].tmp_sha1[idx + 1];
|
||||
w0[2] = tmps[gid].tmp_sha1[idx + 2];
|
||||
w0[3] = tmps[gid].tmp_sha1[idx + 3];
|
||||
w1[0] = tmps[gid].tmp_sha1[idx + 4];
|
||||
w1[1] = tmps[gid].tmp_sha1[idx + 5];
|
||||
w1[2] = tmps[gid].tmp_sha1[idx + 6];
|
||||
w1[3] = tmps[gid].tmp_sha1[idx + 7];
|
||||
w2[0] = tmps[gid].tmp_sha1[idx + 8];
|
||||
w2[1] = tmps[gid].tmp_sha1[idx + 9];
|
||||
w2[2] = tmps[gid].tmp_sha1[idx + 10];
|
||||
w2[3] = tmps[gid].tmp_sha1[idx + 11];
|
||||
w3[0] = tmps[gid].tmp_sha1[idx + 12];
|
||||
w3[1] = tmps[gid].tmp_sha1[idx + 13];
|
||||
w3[2] = tmps[gid].tmp_sha1[idx + 14];
|
||||
w3[3] = tmps[gid].tmp_sha1[idx + 15];
|
||||
|
||||
sha1_transform (w0, w1, w2, w3, h_sha1);
|
||||
}
|
||||
}
|
||||
|
||||
tmps[gid].h_md5[0] = h_md5[0];
|
||||
tmps[gid].h_md5[1] = h_md5[1];
|
||||
tmps[gid].h_md5[2] = h_md5[2];
|
||||
tmps[gid].h_md5[3] = h_md5[3];
|
||||
|
||||
tmps[gid].h_sha1[0] = h_sha1[0];
|
||||
tmps[gid].h_sha1[1] = h_sha1[1];
|
||||
tmps[gid].h_sha1[2] = h_sha1[2];
|
||||
tmps[gid].h_sha1[3] = h_sha1[3];
|
||||
tmps[gid].h_sha1[4] = h_sha1[4];
|
||||
}
|
||||
|
||||
KERNEL_FQ void m25000_comp (KERN_ATTR_TMPS_ESALT (hmac_md5_tmp_t, snmpv3_t))
|
||||
{
|
||||
/**
|
||||
* modifier
|
||||
*/
|
||||
|
||||
const u64 gid = get_global_id (0);
|
||||
|
||||
if (gid >= gid_max) return;
|
||||
|
||||
u32 w0[4];
|
||||
u32 w1[4];
|
||||
u32 w2[4];
|
||||
u32 w3[4];
|
||||
|
||||
// md5
|
||||
|
||||
w0[0] = 0x00000080;
|
||||
w0[1] = 0;
|
||||
w0[2] = 0;
|
||||
w0[3] = 0;
|
||||
w1[0] = 0;
|
||||
w1[1] = 0;
|
||||
w1[2] = 0;
|
||||
w1[3] = 0;
|
||||
w2[0] = 0;
|
||||
w2[1] = 0;
|
||||
w2[2] = 0;
|
||||
w2[3] = 0;
|
||||
w3[0] = 0;
|
||||
w3[1] = 0;
|
||||
w3[2] = 1048576 * 8;
|
||||
w3[3] = 0;
|
||||
|
||||
u32 h_md5[4];
|
||||
|
||||
h_md5[0] = tmps[gid].h_md5[0];
|
||||
h_md5[1] = tmps[gid].h_md5[1];
|
||||
h_md5[2] = tmps[gid].h_md5[2];
|
||||
h_md5[3] = tmps[gid].h_md5[3];
|
||||
|
||||
md5_transform (w0, w1, w2, w3, h_md5);
|
||||
|
||||
// sha1
|
||||
|
||||
w0[0] = 0x80000000;
|
||||
w0[1] = 0;
|
||||
w0[2] = 0;
|
||||
w0[3] = 0;
|
||||
w1[0] = 0;
|
||||
w1[1] = 0;
|
||||
w1[2] = 0;
|
||||
w1[3] = 0;
|
||||
w2[0] = 0;
|
||||
w2[1] = 0;
|
||||
w2[2] = 0;
|
||||
w2[3] = 0;
|
||||
w3[0] = 0;
|
||||
w3[1] = 0;
|
||||
w3[2] = 0;
|
||||
w3[3] = 1048576 * 8;
|
||||
|
||||
u32 h_sha1[5];
|
||||
|
||||
h_sha1[0] = tmps[gid].h_sha1[0];
|
||||
h_sha1[1] = tmps[gid].h_sha1[1];
|
||||
h_sha1[2] = tmps[gid].h_sha1[2];
|
||||
h_sha1[3] = tmps[gid].h_sha1[3];
|
||||
h_sha1[4] = tmps[gid].h_sha1[4];
|
||||
|
||||
sha1_transform (w0, w1, w2, w3, h_sha1);
|
||||
|
||||
md5_ctx_t md5_ctx;
|
||||
sha1_ctx_t sha1_ctx;
|
||||
|
||||
md5_init (&md5_ctx);
|
||||
sha1_init (&sha1_ctx);
|
||||
|
||||
u32 w[16];
|
||||
|
||||
// md5
|
||||
|
||||
w[ 0] = h_md5[0];
|
||||
w[ 1] = h_md5[1];
|
||||
w[ 2] = h_md5[2];
|
||||
w[ 3] = h_md5[3];
|
||||
w[ 4] = 0;
|
||||
w[ 5] = 0;
|
||||
w[ 6] = 0;
|
||||
w[ 7] = 0;
|
||||
w[ 8] = 0;
|
||||
w[ 9] = 0;
|
||||
w[10] = 0;
|
||||
w[11] = 0;
|
||||
w[12] = 0;
|
||||
w[13] = 0;
|
||||
w[14] = 0;
|
||||
w[15] = 0;
|
||||
|
||||
md5_update (&md5_ctx, w, 16);
|
||||
|
||||
// sha1
|
||||
|
||||
w[ 0] = h_sha1[0];
|
||||
w[ 1] = h_sha1[1];
|
||||
w[ 2] = h_sha1[2];
|
||||
w[ 3] = h_sha1[3];
|
||||
w[ 4] = h_sha1[4];
|
||||
w[ 5] = 0;
|
||||
w[ 6] = 0;
|
||||
w[ 7] = 0;
|
||||
w[ 8] = 0;
|
||||
w[ 9] = 0;
|
||||
w[10] = 0;
|
||||
w[11] = 0;
|
||||
w[12] = 0;
|
||||
w[13] = 0;
|
||||
w[14] = 0;
|
||||
w[15] = 0;
|
||||
|
||||
sha1_update (&sha1_ctx, w, 20);
|
||||
|
||||
// engineID
|
||||
|
||||
md5_update_global (&md5_ctx, esalt_bufs[DIGESTS_OFFSET].engineID_buf, esalt_bufs[DIGESTS_OFFSET].engineID_len);
|
||||
|
||||
sha1_update_global_swap (&sha1_ctx, esalt_bufs[DIGESTS_OFFSET].engineID_buf, esalt_bufs[DIGESTS_OFFSET].engineID_len);
|
||||
|
||||
// md5
|
||||
|
||||
w[ 0] = h_md5[0];
|
||||
w[ 1] = h_md5[1];
|
||||
w[ 2] = h_md5[2];
|
||||
w[ 3] = h_md5[3];
|
||||
w[ 4] = 0;
|
||||
w[ 5] = 0;
|
||||
w[ 6] = 0;
|
||||
w[ 7] = 0;
|
||||
w[ 8] = 0;
|
||||
w[ 9] = 0;
|
||||
w[10] = 0;
|
||||
w[11] = 0;
|
||||
w[12] = 0;
|
||||
w[13] = 0;
|
||||
w[14] = 0;
|
||||
w[15] = 0;
|
||||
|
||||
md5_update (&md5_ctx, w, 16);
|
||||
|
||||
// sha1
|
||||
|
||||
w[ 0] = h_sha1[0];
|
||||
w[ 1] = h_sha1[1];
|
||||
w[ 2] = h_sha1[2];
|
||||
w[ 3] = h_sha1[3];
|
||||
w[ 4] = h_sha1[4];
|
||||
w[ 5] = 0;
|
||||
w[ 6] = 0;
|
||||
w[ 7] = 0;
|
||||
w[ 8] = 0;
|
||||
w[ 9] = 0;
|
||||
w[10] = 0;
|
||||
w[11] = 0;
|
||||
w[12] = 0;
|
||||
w[13] = 0;
|
||||
w[14] = 0;
|
||||
w[15] = 0;
|
||||
|
||||
sha1_update (&sha1_ctx, w, 20);
|
||||
|
||||
md5_final (&md5_ctx);
|
||||
sha1_final (&sha1_ctx);
|
||||
|
||||
// md5
|
||||
|
||||
w[ 0] = md5_ctx.h[0];
|
||||
w[ 1] = md5_ctx.h[1];
|
||||
w[ 2] = md5_ctx.h[2];
|
||||
w[ 3] = md5_ctx.h[3];
|
||||
w[ 4] = 0;
|
||||
w[ 5] = 0;
|
||||
w[ 6] = 0;
|
||||
w[ 7] = 0;
|
||||
w[ 8] = 0;
|
||||
w[ 9] = 0;
|
||||
w[10] = 0;
|
||||
w[11] = 0;
|
||||
w[12] = 0;
|
||||
w[13] = 0;
|
||||
w[14] = 0;
|
||||
w[15] = 0;
|
||||
|
||||
md5_hmac_ctx_t md5_hmac_ctx;
|
||||
|
||||
md5_hmac_init (&md5_hmac_ctx, w, 16);
|
||||
|
||||
md5_hmac_update_global (&md5_hmac_ctx, esalt_bufs[DIGESTS_OFFSET].salt_buf, esalt_bufs[DIGESTS_OFFSET].salt_len);
|
||||
|
||||
md5_hmac_final (&md5_hmac_ctx);
|
||||
|
||||
{
|
||||
const u32 r0 = hc_swap32_S (md5_hmac_ctx.opad.h[DGST_R0]);
|
||||
const u32 r1 = hc_swap32_S (md5_hmac_ctx.opad.h[DGST_R1]);
|
||||
const u32 r2 = hc_swap32_S (md5_hmac_ctx.opad.h[DGST_R2]);
|
||||
const u32 r3 = 0;
|
||||
|
||||
#define il_pos 0
|
||||
|
||||
#ifdef KERNEL_STATIC
|
||||
#include COMPARE_M
|
||||
#endif
|
||||
}
|
||||
|
||||
// sha1
|
||||
|
||||
w[ 0] = sha1_ctx.h[0];
|
||||
w[ 1] = sha1_ctx.h[1];
|
||||
w[ 2] = sha1_ctx.h[2];
|
||||
w[ 3] = sha1_ctx.h[3];
|
||||
w[ 4] = sha1_ctx.h[4];
|
||||
w[ 5] = 0;
|
||||
w[ 6] = 0;
|
||||
w[ 7] = 0;
|
||||
w[ 8] = 0;
|
||||
w[ 9] = 0;
|
||||
w[10] = 0;
|
||||
w[11] = 0;
|
||||
w[12] = 0;
|
||||
w[13] = 0;
|
||||
w[14] = 0;
|
||||
w[15] = 0;
|
||||
|
||||
sha1_hmac_ctx_t sha1_hmac_ctx;
|
||||
|
||||
sha1_hmac_init (&sha1_hmac_ctx, w, 20);
|
||||
|
||||
sha1_hmac_update_global_swap (&sha1_hmac_ctx, esalt_bufs[DIGESTS_OFFSET].salt_buf, esalt_bufs[DIGESTS_OFFSET].salt_len);
|
||||
|
||||
sha1_hmac_final (&sha1_hmac_ctx);
|
||||
|
||||
{
|
||||
const u32 r0 = sha1_hmac_ctx.opad.h[DGST_R0];
|
||||
const u32 r1 = sha1_hmac_ctx.opad.h[DGST_R1];
|
||||
const u32 r2 = sha1_hmac_ctx.opad.h[DGST_R2];
|
||||
const u32 r3 = 0;
|
||||
|
||||
#define il_pos 0
|
||||
|
||||
#ifdef KERNEL_STATIC
|
||||
#include COMPARE_M
|
||||
#endif
|
||||
}
|
||||
}
|
@ -0,0 +1,355 @@
|
||||
/**
|
||||
* Author......: See docs/credits.txt
|
||||
* License.....: MIT
|
||||
*/
|
||||
|
||||
//#define NEW_SIMD_CODE
|
||||
|
||||
#ifdef KERNEL_STATIC
|
||||
#include "inc_vendor.h"
|
||||
#include "inc_types.h"
|
||||
#include "inc_platform.cl"
|
||||
#include "inc_common.cl"
|
||||
#include "inc_simd.cl"
|
||||
#include "inc_hash_md5.cl"
|
||||
#endif
|
||||
|
||||
#define COMPARE_S "inc_comp_single.cl"
|
||||
#define COMPARE_M "inc_comp_multi.cl"
|
||||
|
||||
#define SNMPV3_SALT_MAX 1500
|
||||
#define SNMPV3_ENGINEID_MAX 34
|
||||
#define SNMPV3_MSG_AUTH_PARAMS_LEN 12
|
||||
#define SNMPV3_ROUNDS 1048576
|
||||
#define SNMPV3_MAX_PW_LENGTH 64
|
||||
|
||||
#define SNMPV3_TMP_ELEMS 4096 // 4096 = (256 (max pw length) * 64) / sizeof (u32)
|
||||
#define SNMPV3_HASH_ELEMS 4
|
||||
|
||||
#define SNMPV3_MAX_SALT_ELEMS 512 // 512 * 4 = 2048 > 1500, also has to be multiple of 64
|
||||
#define SNMPV3_MAX_ENGINE_ELEMS 16 // 16 * 4 = 64 > 32, also has to be multiple of 64
|
||||
#define SNMPV3_MAX_PNUM_ELEMS 4 // 4 * 4 = 16 > 9
|
||||
|
||||
#define SNMPV3_MAX_PW_LENGTH_OPT 64
|
||||
#define SNMPV3_TMP_ELEMS_OPT ((SNMPV3_MAX_PW_LENGTH_OPT * SNMPV3_MAX_PW_LENGTH) / 4)
|
||||
// (64 * 64) / 4 = 1024
|
||||
// for pw length > 64 we use global memory reads
|
||||
|
||||
typedef struct hmac_md5_tmp
|
||||
{
|
||||
u32 tmp[SNMPV3_TMP_ELEMS];
|
||||
u32 h[SNMPV3_HASH_ELEMS];
|
||||
|
||||
} hmac_md5_tmp_t;
|
||||
|
||||
typedef struct snmpv3
|
||||
{
|
||||
u32 salt_buf[SNMPV3_MAX_SALT_ELEMS];
|
||||
u32 salt_len;
|
||||
|
||||
u32 engineID_buf[SNMPV3_MAX_ENGINE_ELEMS];
|
||||
u32 engineID_len;
|
||||
|
||||
u32 packet_number[SNMPV3_MAX_PNUM_ELEMS];
|
||||
|
||||
} snmpv3_t;
|
||||
|
||||
KERNEL_FQ void m25100_init (KERN_ATTR_TMPS_ESALT (hmac_md5_tmp_t, snmpv3_t))
|
||||
{
|
||||
/**
|
||||
* modifier
|
||||
*/
|
||||
|
||||
const u64 gid = get_global_id (0);
|
||||
|
||||
if (gid >= gid_max) return;
|
||||
|
||||
/**
|
||||
* base
|
||||
*/
|
||||
|
||||
const u32 pw_len = pws[gid].pw_len;
|
||||
|
||||
u32 w[64] = { 0 };
|
||||
|
||||
for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1)
|
||||
{
|
||||
w[idx] = pws[gid].i[idx];
|
||||
}
|
||||
|
||||
u8 *src_ptr = (u8 *) w;
|
||||
|
||||
// password 64 times, also swapped
|
||||
|
||||
u32 dst_buf[16];
|
||||
|
||||
u8 *dst_ptr = (u8 *) dst_buf;
|
||||
|
||||
int tmp_idx = 0;
|
||||
|
||||
for (int i = 0; i < 64; i++)
|
||||
{
|
||||
for (int j = 0; j < pw_len; j++)
|
||||
{
|
||||
const int dst_idx = tmp_idx & 63;
|
||||
|
||||
dst_ptr[dst_idx] = src_ptr[j];
|
||||
|
||||
// write to global memory every time 64 byte are written into cache
|
||||
|
||||
if (dst_idx == 63)
|
||||
{
|
||||
const int tmp_idx4 = (tmp_idx - 63) / 4;
|
||||
|
||||
tmps[gid].tmp[tmp_idx4 + 0] = dst_buf[ 0];
|
||||
tmps[gid].tmp[tmp_idx4 + 1] = dst_buf[ 1];
|
||||
tmps[gid].tmp[tmp_idx4 + 2] = dst_buf[ 2];
|
||||
tmps[gid].tmp[tmp_idx4 + 3] = dst_buf[ 3];
|
||||
tmps[gid].tmp[tmp_idx4 + 4] = dst_buf[ 4];
|
||||
tmps[gid].tmp[tmp_idx4 + 5] = dst_buf[ 5];
|
||||
tmps[gid].tmp[tmp_idx4 + 6] = dst_buf[ 6];
|
||||
tmps[gid].tmp[tmp_idx4 + 7] = dst_buf[ 7];
|
||||
tmps[gid].tmp[tmp_idx4 + 8] = dst_buf[ 8];
|
||||
tmps[gid].tmp[tmp_idx4 + 9] = dst_buf[ 9];
|
||||
tmps[gid].tmp[tmp_idx4 + 10] = dst_buf[10];
|
||||
tmps[gid].tmp[tmp_idx4 + 11] = dst_buf[11];
|
||||
tmps[gid].tmp[tmp_idx4 + 12] = dst_buf[12];
|
||||
tmps[gid].tmp[tmp_idx4 + 13] = dst_buf[13];
|
||||
tmps[gid].tmp[tmp_idx4 + 14] = dst_buf[14];
|
||||
tmps[gid].tmp[tmp_idx4 + 15] = dst_buf[15];
|
||||
}
|
||||
|
||||
tmp_idx++;
|
||||
}
|
||||
}
|
||||
|
||||
// hash
|
||||
|
||||
tmps[gid].h[0] = MD5M_A;
|
||||
tmps[gid].h[1] = MD5M_B;
|
||||
tmps[gid].h[2] = MD5M_C;
|
||||
tmps[gid].h[3] = MD5M_D;
|
||||
}
|
||||
|
||||
KERNEL_FQ void m25100_loop (KERN_ATTR_TMPS_ESALT (hmac_md5_tmp_t, snmpv3_t))
|
||||
{
|
||||
/**
|
||||
* base
|
||||
*/
|
||||
|
||||
const u64 gid = get_global_id (0);
|
||||
|
||||
if (gid >= gid_max) return;
|
||||
|
||||
u32 h[4];
|
||||
|
||||
h[0] = tmps[gid].h[0];
|
||||
h[1] = tmps[gid].h[1];
|
||||
h[2] = tmps[gid].h[2];
|
||||
h[3] = tmps[gid].h[3];
|
||||
|
||||
const u32 pw_len = pws[gid].pw_len;
|
||||
|
||||
const int pw_len64 = pw_len * 64;
|
||||
|
||||
if (pw_len <= SNMPV3_MAX_PW_LENGTH_OPT)
|
||||
{
|
||||
u32 tmp[SNMPV3_TMP_ELEMS_OPT];
|
||||
|
||||
for (int i = 0; i < pw_len64 / 4; i++)
|
||||
{
|
||||
tmp[i] = tmps[gid].tmp[i];
|
||||
}
|
||||
|
||||
for (int i = 0, j = loop_pos; i < loop_cnt; i += 64, j += 64)
|
||||
{
|
||||
const int idx = (j % pw_len64) / 4; // the optimization trick is to be able to do this
|
||||
|
||||
u32 w0[4];
|
||||
u32 w1[4];
|
||||
u32 w2[4];
|
||||
u32 w3[4];
|
||||
|
||||
w0[0] = tmp[idx + 0];
|
||||
w0[1] = tmp[idx + 1];
|
||||
w0[2] = tmp[idx + 2];
|
||||
w0[3] = tmp[idx + 3];
|
||||
w1[0] = tmp[idx + 4];
|
||||
w1[1] = tmp[idx + 5];
|
||||
w1[2] = tmp[idx + 6];
|
||||
w1[3] = tmp[idx + 7];
|
||||
w2[0] = tmp[idx + 8];
|
||||
w2[1] = tmp[idx + 9];
|
||||
w2[2] = tmp[idx + 10];
|
||||
w2[3] = tmp[idx + 11];
|
||||
w3[0] = tmp[idx + 12];
|
||||
w3[1] = tmp[idx + 13];
|
||||
w3[2] = tmp[idx + 14];
|
||||
w3[3] = tmp[idx + 15];
|
||||
|
||||
md5_transform (w0, w1, w2, w3, h);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
for (int i = 0, j = loop_pos; i < loop_cnt; i += 64, j += 64)
|
||||
{
|
||||
const int idx = (j % pw_len64) / 4; // the optimization trick is to be able to do this
|
||||
|
||||
u32 w0[4];
|
||||
u32 w1[4];
|
||||
u32 w2[4];
|
||||
u32 w3[4];
|
||||
|
||||
w0[0] = tmps[gid].tmp[idx + 0];
|
||||
w0[1] = tmps[gid].tmp[idx + 1];
|
||||
w0[2] = tmps[gid].tmp[idx + 2];
|
||||
w0[3] = tmps[gid].tmp[idx + 3];
|
||||
w1[0] = tmps[gid].tmp[idx + 4];
|
||||
w1[1] = tmps[gid].tmp[idx + 5];
|
||||
w1[2] = tmps[gid].tmp[idx + 6];
|
||||
w1[3] = tmps[gid].tmp[idx + 7];
|
||||
w2[0] = tmps[gid].tmp[idx + 8];
|
||||
w2[1] = tmps[gid].tmp[idx + 9];
|
||||
w2[2] = tmps[gid].tmp[idx + 10];
|
||||
w2[3] = tmps[gid].tmp[idx + 11];
|
||||
w3[0] = tmps[gid].tmp[idx + 12];
|
||||
w3[1] = tmps[gid].tmp[idx + 13];
|
||||
w3[2] = tmps[gid].tmp[idx + 14];
|
||||
w3[3] = tmps[gid].tmp[idx + 15];
|
||||
|
||||
md5_transform (w0, w1, w2, w3, h);
|
||||
}
|
||||
}
|
||||
|
||||
tmps[gid].h[0] = h[0];
|
||||
tmps[gid].h[1] = h[1];
|
||||
tmps[gid].h[2] = h[2];
|
||||
tmps[gid].h[3] = h[3];
|
||||
}
|
||||
|
||||
KERNEL_FQ void m25100_comp (KERN_ATTR_TMPS_ESALT (hmac_md5_tmp_t, snmpv3_t))
|
||||
{
|
||||
/**
|
||||
* modifier
|
||||
*/
|
||||
|
||||
const u64 gid = get_global_id (0);
|
||||
|
||||
if (gid >= gid_max) return;
|
||||
|
||||
u32 w0[4];
|
||||
u32 w1[4];
|
||||
u32 w2[4];
|
||||
u32 w3[4];
|
||||
|
||||
w0[0] = 0x00000080;
|
||||
w0[1] = 0;
|
||||
w0[2] = 0;
|
||||
w0[3] = 0;
|
||||
w1[0] = 0;
|
||||
w1[1] = 0;
|
||||
w1[2] = 0;
|
||||
w1[3] = 0;
|
||||
w2[0] = 0;
|
||||
w2[1] = 0;
|
||||
w2[2] = 0;
|
||||
w2[3] = 0;
|
||||
w3[0] = 0;
|
||||
w3[1] = 0;
|
||||
w3[2] = 1048576 * 8;
|
||||
w3[3] = 0;
|
||||
|
||||
u32 h[4];
|
||||
|
||||
h[0] = tmps[gid].h[0];
|
||||
h[1] = tmps[gid].h[1];
|
||||
h[2] = tmps[gid].h[2];
|
||||
h[3] = tmps[gid].h[3];
|
||||
|
||||
md5_transform (w0, w1, w2, w3, h);
|
||||
|
||||
md5_ctx_t ctx;
|
||||
|
||||
md5_init (&ctx);
|
||||
|
||||
u32 w[16];
|
||||
|
||||
w[ 0] = h[0];
|
||||
w[ 1] = h[1];
|
||||
w[ 2] = h[2];
|
||||
w[ 3] = h[3];
|
||||
w[ 4] = 0;
|
||||
w[ 5] = 0;
|
||||
w[ 6] = 0;
|
||||
w[ 7] = 0;
|
||||
w[ 8] = 0;
|
||||
w[ 9] = 0;
|
||||
w[10] = 0;
|
||||
w[11] = 0;
|
||||
w[12] = 0;
|
||||
w[13] = 0;
|
||||
w[14] = 0;
|
||||
w[15] = 0;
|
||||
|
||||
md5_update (&ctx, w, 16);
|
||||
|
||||
md5_update_global (&ctx, esalt_bufs[DIGESTS_OFFSET].engineID_buf, esalt_bufs[DIGESTS_OFFSET].engineID_len);
|
||||
|
||||
w[ 0] = h[0];
|
||||
w[ 1] = h[1];
|
||||
w[ 2] = h[2];
|
||||
w[ 3] = h[3];
|
||||
w[ 4] = 0;
|
||||
w[ 5] = 0;
|
||||
w[ 6] = 0;
|
||||
w[ 7] = 0;
|
||||
w[ 8] = 0;
|
||||
w[ 9] = 0;
|
||||
w[10] = 0;
|
||||
w[11] = 0;
|
||||
w[12] = 0;
|
||||
w[13] = 0;
|
||||
w[14] = 0;
|
||||
w[15] = 0;
|
||||
|
||||
md5_update (&ctx, w, 16);
|
||||
|
||||
md5_final (&ctx);
|
||||
|
||||
w[ 0] = ctx.h[0];
|
||||
w[ 1] = ctx.h[1];
|
||||
w[ 2] = ctx.h[2];
|
||||
w[ 3] = ctx.h[3];
|
||||
w[ 4] = 0;
|
||||
w[ 5] = 0;
|
||||
w[ 6] = 0;
|
||||
w[ 7] = 0;
|
||||
w[ 8] = 0;
|
||||
w[ 9] = 0;
|
||||
w[10] = 0;
|
||||
w[11] = 0;
|
||||
w[12] = 0;
|
||||
w[13] = 0;
|
||||
w[14] = 0;
|
||||
w[15] = 0;
|
||||
|
||||
md5_hmac_ctx_t hmac_ctx;
|
||||
|
||||
md5_hmac_init (&hmac_ctx, w, 16);
|
||||
|
||||
md5_hmac_update_global (&hmac_ctx, esalt_bufs[DIGESTS_OFFSET].salt_buf, esalt_bufs[DIGESTS_OFFSET].salt_len);
|
||||
|
||||
md5_hmac_final (&hmac_ctx);
|
||||
|
||||
const u32 r0 = hmac_ctx.opad.h[DGST_R0];
|
||||
const u32 r1 = hmac_ctx.opad.h[DGST_R1];
|
||||
const u32 r2 = hmac_ctx.opad.h[DGST_R2];
|
||||
const u32 r3 = 0;
|
||||
|
||||
#define il_pos 0
|
||||
|
||||
#ifdef KERNEL_STATIC
|
||||
#include COMPARE_M
|
||||
#endif
|
||||
}
|
@ -0,0 +1,360 @@
|
||||
/**
|
||||
* Author......: See docs/credits.txt
|
||||
* License.....: MIT
|
||||
*/
|
||||
|
||||
//#define NEW_SIMD_CODE
|
||||
|
||||
#ifdef KERNEL_STATIC
|
||||
#include "inc_vendor.h"
|
||||
#include "inc_types.h"
|
||||
#include "inc_platform.cl"
|
||||
#include "inc_common.cl"
|
||||
#include "inc_simd.cl"
|
||||
#include "inc_hash_sha1.cl"
|
||||
#endif
|
||||
|
||||
#define COMPARE_S "inc_comp_single.cl"
|
||||
#define COMPARE_M "inc_comp_multi.cl"
|
||||
|
||||
#define SNMPV3_SALT_MAX 1500
|
||||
#define SNMPV3_ENGINEID_MAX 34
|
||||
#define SNMPV3_MSG_AUTH_PARAMS_LEN 12
|
||||
#define SNMPV3_ROUNDS 1048576
|
||||
#define SNMPV3_MAX_PW_LENGTH 64
|
||||
|
||||
#define SNMPV3_TMP_ELEMS 4096 // 4096 = (256 (max pw length) * 64) / sizeof (u32)
|
||||
#define SNMPV3_HASH_ELEMS 8 // 8 = aligned 5
|
||||
|
||||
#define SNMPV3_MAX_SALT_ELEMS 512 // 512 * 4 = 2048 > 1500, also has to be multiple of 64
|
||||
#define SNMPV3_MAX_ENGINE_ELEMS 16 // 16 * 4 = 64 > 32, also has to be multiple of 64
|
||||
#define SNMPV3_MAX_PNUM_ELEMS 4 // 4 * 4 = 16 > 9
|
||||
|
||||
#define SNMPV3_MAX_PW_LENGTH_OPT 64
|
||||
#define SNMPV3_TMP_ELEMS_OPT ((SNMPV3_MAX_PW_LENGTH_OPT * SNMPV3_MAX_PW_LENGTH) / 4)
|
||||
// (64 * 64) / 4 = 1024
|
||||
// for pw length > 64 we use global memory reads
|
||||
|
||||
typedef struct hmac_sha1_tmp
|
||||
{
|
||||
u32 tmp[SNMPV3_TMP_ELEMS];
|
||||
u32 h[SNMPV3_HASH_ELEMS];
|
||||
|
||||
} hmac_sha1_tmp_t;
|
||||
|
||||
typedef struct snmpv3
|
||||
{
|
||||
u32 salt_buf[SNMPV3_MAX_SALT_ELEMS];
|
||||
u32 salt_len;
|
||||
|
||||
u32 engineID_buf[SNMPV3_MAX_ENGINE_ELEMS];
|
||||
u32 engineID_len;
|
||||
|
||||
u32 packet_number[SNMPV3_MAX_PNUM_ELEMS];
|
||||
|
||||
} snmpv3_t;
|
||||
|
||||
KERNEL_FQ void m25200_init (KERN_ATTR_TMPS_ESALT (hmac_sha1_tmp_t, snmpv3_t))
|
||||
{
|
||||
/**
|
||||
* modifier
|
||||
*/
|
||||
|
||||
const u64 gid = get_global_id (0);
|
||||
|
||||
if (gid >= gid_max) return;
|
||||
|
||||
/**
|
||||
* base
|
||||
*/
|
||||
|
||||
const u32 pw_len = pws[gid].pw_len;
|
||||
|
||||
u32 w[64] = { 0 };
|
||||
|
||||
for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1)
|
||||
{
|
||||
w[idx] = pws[gid].i[idx];
|
||||
}
|
||||
|
||||
u8 *src_ptr = (u8 *) w;
|
||||
|
||||
// password 64 times, also swapped
|
||||
|
||||
u32 dst_buf[16];
|
||||
|
||||
u8 *dst_ptr = (u8 *) dst_buf;
|
||||
|
||||
int tmp_idx = 0;
|
||||
|
||||
for (int i = 0; i < 64; i++)
|
||||
{
|
||||
for (int j = 0; j < pw_len; j++)
|
||||
{
|
||||
const int dst_idx = tmp_idx & 63;
|
||||
|
||||
dst_ptr[dst_idx] = src_ptr[j];
|
||||
|
||||
// write to global memory every time 64 byte are written into cache
|
||||
|
||||
if (dst_idx == 63)
|
||||
{
|
||||
const int tmp_idx4 = (tmp_idx - 63) / 4;
|
||||
|
||||
tmps[gid].tmp[tmp_idx4 + 0] = hc_swap32_S (dst_buf[ 0]);
|
||||
tmps[gid].tmp[tmp_idx4 + 1] = hc_swap32_S (dst_buf[ 1]);
|
||||
tmps[gid].tmp[tmp_idx4 + 2] = hc_swap32_S (dst_buf[ 2]);
|
||||
tmps[gid].tmp[tmp_idx4 + 3] = hc_swap32_S (dst_buf[ 3]);
|
||||
tmps[gid].tmp[tmp_idx4 + 4] = hc_swap32_S (dst_buf[ 4]);
|
||||
tmps[gid].tmp[tmp_idx4 + 5] = hc_swap32_S (dst_buf[ 5]);
|
||||
tmps[gid].tmp[tmp_idx4 + 6] = hc_swap32_S (dst_buf[ 6]);
|
||||
tmps[gid].tmp[tmp_idx4 + 7] = hc_swap32_S (dst_buf[ 7]);
|
||||
tmps[gid].tmp[tmp_idx4 + 8] = hc_swap32_S (dst_buf[ 8]);
|
||||
tmps[gid].tmp[tmp_idx4 + 9] = hc_swap32_S (dst_buf[ 9]);
|
||||
tmps[gid].tmp[tmp_idx4 + 10] = hc_swap32_S (dst_buf[10]);
|
||||
tmps[gid].tmp[tmp_idx4 + 11] = hc_swap32_S (dst_buf[11]);
|
||||
tmps[gid].tmp[tmp_idx4 + 12] = hc_swap32_S (dst_buf[12]);
|
||||
tmps[gid].tmp[tmp_idx4 + 13] = hc_swap32_S (dst_buf[13]);
|
||||
tmps[gid].tmp[tmp_idx4 + 14] = hc_swap32_S (dst_buf[14]);
|
||||
tmps[gid].tmp[tmp_idx4 + 15] = hc_swap32_S (dst_buf[15]);
|
||||
}
|
||||
|
||||
tmp_idx++;
|
||||
}
|
||||
}
|
||||
|
||||
// hash
|
||||
|
||||
tmps[gid].h[0] = SHA1M_A;
|
||||
tmps[gid].h[1] = SHA1M_B;
|
||||
tmps[gid].h[2] = SHA1M_C;
|
||||
tmps[gid].h[3] = SHA1M_D;
|
||||
tmps[gid].h[4] = SHA1M_E;
|
||||
}
|
||||
|
||||
KERNEL_FQ void m25200_loop (KERN_ATTR_TMPS_ESALT (hmac_sha1_tmp_t, snmpv3_t))
|
||||
{
|
||||
/**
|
||||
* base
|
||||
*/
|
||||
|
||||
const u64 gid = get_global_id (0);
|
||||
|
||||
if (gid >= gid_max) return;
|
||||
|
||||
u32 h[5];
|
||||
|
||||
h[0] = tmps[gid].h[0];
|
||||
h[1] = tmps[gid].h[1];
|
||||
h[2] = tmps[gid].h[2];
|
||||
h[3] = tmps[gid].h[3];
|
||||
h[4] = tmps[gid].h[4];
|
||||
|
||||
const u32 pw_len = pws[gid].pw_len;
|
||||
|
||||
const int pw_len64 = pw_len * 64;
|
||||
|
||||
if (pw_len <= SNMPV3_MAX_PW_LENGTH_OPT)
|
||||
{
|
||||
u32 tmp[SNMPV3_TMP_ELEMS_OPT];
|
||||
|
||||
for (int i = 0; i < pw_len64 / 4; i++)
|
||||
{
|
||||
tmp[i] = tmps[gid].tmp[i];
|
||||
}
|
||||
|
||||
for (int i = 0, j = loop_pos; i < loop_cnt; i += 64, j += 64)
|
||||
{
|
||||
const int idx = (j % pw_len64) / 4; // the optimization trick is to be able to do this
|
||||
|
||||
u32 w0[4];
|
||||
u32 w1[4];
|
||||
u32 w2[4];
|
||||
u32 w3[4];
|
||||
|
||||
w0[0] = tmp[idx + 0];
|
||||
w0[1] = tmp[idx + 1];
|
||||
w0[2] = tmp[idx + 2];
|
||||
w0[3] = tmp[idx + 3];
|
||||
w1[0] = tmp[idx + 4];
|
||||
w1[1] = tmp[idx + 5];
|
||||
w1[2] = tmp[idx + 6];
|
||||
w1[3] = tmp[idx + 7];
|
||||
w2[0] = tmp[idx + 8];
|
||||
w2[1] = tmp[idx + 9];
|
||||
w2[2] = tmp[idx + 10];
|
||||
w2[3] = tmp[idx + 11];
|
||||
w3[0] = tmp[idx + 12];
|
||||
w3[1] = tmp[idx + 13];
|
||||
w3[2] = tmp[idx + 14];
|
||||
w3[3] = tmp[idx + 15];
|
||||
|
||||
sha1_transform (w0, w1, w2, w3, h);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
for (int i = 0, j = loop_pos; i < loop_cnt; i += 64, j += 64)
|
||||
{
|
||||
const int idx = (j % pw_len64) / 4; // the optimization trick is to be able to do this
|
||||
|
||||
u32 w0[4];
|
||||
u32 w1[4];
|
||||
u32 w2[4];
|
||||
u32 w3[4];
|
||||
|
||||
w0[0] = tmps[gid].tmp[idx + 0];
|
||||
w0[1] = tmps[gid].tmp[idx + 1];
|
||||
w0[2] = tmps[gid].tmp[idx + 2];
|
||||
w0[3] = tmps[gid].tmp[idx + 3];
|
||||
w1[0] = tmps[gid].tmp[idx + 4];
|
||||
w1[1] = tmps[gid].tmp[idx + 5];
|
||||
w1[2] = tmps[gid].tmp[idx + 6];
|
||||
w1[3] = tmps[gid].tmp[idx + 7];
|
||||
w2[0] = tmps[gid].tmp[idx + 8];
|
||||
w2[1] = tmps[gid].tmp[idx + 9];
|
||||
w2[2] = tmps[gid].tmp[idx + 10];
|
||||
w2[3] = tmps[gid].tmp[idx + 11];
|
||||
w3[0] = tmps[gid].tmp[idx + 12];
|
||||
w3[1] = tmps[gid].tmp[idx + 13];
|
||||
w3[2] = tmps[gid].tmp[idx + 14];
|
||||
w3[3] = tmps[gid].tmp[idx + 15];
|
||||
|
||||
sha1_transform (w0, w1, w2, w3, h);
|
||||
}
|
||||
}
|
||||
|
||||
tmps[gid].h[0] = h[0];
|
||||
tmps[gid].h[1] = h[1];
|
||||
tmps[gid].h[2] = h[2];
|
||||
tmps[gid].h[3] = h[3];
|
||||
tmps[gid].h[4] = h[4];
|
||||
}
|
||||
|
||||
KERNEL_FQ void m25200_comp (KERN_ATTR_TMPS_ESALT (hmac_sha1_tmp_t, snmpv3_t))
|
||||
{
|
||||
/**
|
||||
* modifier
|
||||
*/
|
||||
|
||||
const u64 gid = get_global_id (0);
|
||||
|
||||
if (gid >= gid_max) return;
|
||||
|
||||
u32 w0[4];
|
||||
u32 w1[4];
|
||||
u32 w2[4];
|
||||
u32 w3[4];
|
||||
|
||||
w0[0] = 0x80000000;
|
||||
w0[1] = 0;
|
||||
w0[2] = 0;
|
||||
w0[3] = 0;
|
||||
w1[0] = 0;
|
||||
w1[1] = 0;
|
||||
w1[2] = 0;
|
||||
w1[3] = 0;
|
||||
w2[0] = 0;
|
||||
w2[1] = 0;
|
||||
w2[2] = 0;
|
||||
w2[3] = 0;
|
||||
w3[0] = 0;
|
||||
w3[1] = 0;
|
||||
w3[2] = 0;
|
||||
w3[3] = 1048576 * 8;
|
||||
|
||||
u32 h[5];
|
||||
|
||||
h[0] = tmps[gid].h[0];
|
||||
h[1] = tmps[gid].h[1];
|
||||
h[2] = tmps[gid].h[2];
|
||||
h[3] = tmps[gid].h[3];
|
||||
h[4] = tmps[gid].h[4];
|
||||
|
||||
sha1_transform (w0, w1, w2, w3, h);
|
||||
|
||||
sha1_ctx_t ctx;
|
||||
|
||||
sha1_init (&ctx);
|
||||
|
||||
u32 w[16];
|
||||
|
||||
w[ 0] = h[0];
|
||||
w[ 1] = h[1];
|
||||
w[ 2] = h[2];
|
||||
w[ 3] = h[3];
|
||||
w[ 4] = h[4];
|
||||
w[ 5] = 0;
|
||||
w[ 6] = 0;
|
||||
w[ 7] = 0;
|
||||
w[ 8] = 0;
|
||||
w[ 9] = 0;
|
||||
w[10] = 0;
|
||||
w[11] = 0;
|
||||
w[12] = 0;
|
||||
w[13] = 0;
|
||||
w[14] = 0;
|
||||
w[15] = 0;
|
||||
|
||||
sha1_update (&ctx, w, 20);
|
||||
|
||||
sha1_update_global_swap (&ctx, esalt_bufs[DIGESTS_OFFSET].engineID_buf, esalt_bufs[DIGESTS_OFFSET].engineID_len);
|
||||
|
||||
w[ 0] = h[0];
|
||||
w[ 1] = h[1];
|
||||
w[ 2] = h[2];
|
||||
w[ 3] = h[3];
|
||||
w[ 4] = h[4];
|
||||
w[ 5] = 0;
|
||||
w[ 6] = 0;
|
||||
w[ 7] = 0;
|
||||
w[ 8] = 0;
|
||||
w[ 9] = 0;
|
||||
w[10] = 0;
|
||||
w[11] = 0;
|
||||
w[12] = 0;
|
||||
w[13] = 0;
|
||||
w[14] = 0;
|
||||
w[15] = 0;
|
||||
|
||||
sha1_update (&ctx, w, 20);
|
||||
|
||||
sha1_final (&ctx);
|
||||
|
||||
w[ 0] = ctx.h[0];
|
||||
w[ 1] = ctx.h[1];
|
||||
w[ 2] = ctx.h[2];
|
||||
w[ 3] = ctx.h[3];
|
||||
w[ 4] = ctx.h[4];
|
||||
w[ 5] = 0;
|
||||
w[ 6] = 0;
|
||||
w[ 7] = 0;
|
||||
w[ 8] = 0;
|
||||
w[ 9] = 0;
|
||||
w[10] = 0;
|
||||
w[11] = 0;
|
||||
w[12] = 0;
|
||||
w[13] = 0;
|
||||
w[14] = 0;
|
||||
w[15] = 0;
|
||||
|
||||
sha1_hmac_ctx_t hmac_ctx;
|
||||
|
||||
sha1_hmac_init (&hmac_ctx, w, 20);
|
||||
|
||||
sha1_hmac_update_global_swap (&hmac_ctx, esalt_bufs[DIGESTS_OFFSET].salt_buf, esalt_bufs[DIGESTS_OFFSET].salt_len);
|
||||
|
||||
sha1_hmac_final (&hmac_ctx);
|
||||
|
||||
const u32 r0 = hmac_ctx.opad.h[DGST_R0];
|
||||
const u32 r1 = hmac_ctx.opad.h[DGST_R1];
|
||||
const u32 r2 = hmac_ctx.opad.h[DGST_R2];
|
||||
const u32 r3 = 0;
|
||||
|
||||
#define il_pos 0
|
||||
|
||||
#ifdef KERNEL_STATIC
|
||||
#include COMPARE_M
|
||||
#endif
|
||||
}
|
||||
|
@ -0,0 +1,371 @@
|
||||
/**
|
||||
* Author......: See docs/credits.txt
|
||||
* License.....: MIT
|
||||
*/
|
||||
|
||||
//#define NEW_SIMD_CODE
|
||||
|
||||
#ifdef KERNEL_STATIC
|
||||
#include "inc_vendor.h"
|
||||
#include "inc_types.h"
|
||||
#include "inc_platform.cl"
|
||||
#include "inc_common.cl"
|
||||
#include "inc_simd.cl"
|
||||
#include "inc_hash_sha224.cl"
|
||||
#endif
|
||||
|
||||
#define COMPARE_S "inc_comp_single.cl"
|
||||
#define COMPARE_M "inc_comp_multi.cl"
|
||||
|
||||
#define SNMPV3_SALT_MAX 1500
|
||||
#define SNMPV3_ENGINEID_MAX 34
|
||||
#define SNMPV3_MSG_AUTH_PARAMS_MAX 16
|
||||
#define SNMPV3_ROUNDS 1048576
|
||||
#define SNMPV3_MAX_PW_LENGTH 64
|
||||
|
||||
#define SNMPV3_TMP_ELEMS 4096 // 4096 = (256 (max pw length) * 64) / sizeof (u32)
|
||||
#define SNMPV3_HASH_ELEMS 8
|
||||
|
||||
#define SNMPV3_MAX_SALT_ELEMS 512 // 512 * 4 = 2048 > 1500, also has to be multiple of 64
|
||||
#define SNMPV3_MAX_ENGINE_ELEMS 16 // 16 * 4 = 64 > 32, also has to be multiple of 64
|
||||
#define SNMPV3_MAX_PNUM_ELEMS 4 // 4 * 4 = 16 > 9
|
||||
|
||||
#define SNMPV3_MAX_PW_LENGTH_OPT 64
|
||||
#define SNMPV3_TMP_ELEMS_OPT ((SNMPV3_MAX_PW_LENGTH_OPT * SNMPV3_MAX_PW_LENGTH) / 4)
|
||||
// (64 * 64) / 4 = 1024
|
||||
// for pw length > 64 we use global memory reads
|
||||
|
||||
typedef struct hmac_sha224_tmp
|
||||
{
|
||||
u32 tmp[SNMPV3_TMP_ELEMS];
|
||||
u32 h[SNMPV3_HASH_ELEMS];
|
||||
|
||||
} hmac_sha224_tmp_t;
|
||||
|
||||
typedef struct snmpv3
|
||||
{
|
||||
u32 salt_buf[SNMPV3_MAX_SALT_ELEMS];
|
||||
u32 salt_len;
|
||||
|
||||
u32 engineID_buf[SNMPV3_MAX_ENGINE_ELEMS];
|
||||
u32 engineID_len;
|
||||
|
||||
u32 packet_number[SNMPV3_MAX_PNUM_ELEMS];
|
||||
|
||||
} snmpv3_t;
|
||||
|
||||
KERNEL_FQ void m26700_init (KERN_ATTR_TMPS_ESALT (hmac_sha224_tmp_t, snmpv3_t))
|
||||
{
|
||||
/**
|
||||
* modifier
|
||||
*/
|
||||
|
||||
const u64 gid = get_global_id (0);
|
||||
|
||||
if (gid >= gid_max) return;
|
||||
|
||||
/**
|
||||
* base
|
||||
*/
|
||||
|
||||
const u32 pw_len = pws[gid].pw_len;
|
||||
|
||||
u32 w[64] = { 0 };
|
||||
|
||||
for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1)
|
||||
{
|
||||
w[idx] = pws[gid].i[idx];
|
||||
}
|
||||
|
||||
u8 *src_ptr = (u8 *) w;
|
||||
|
||||
// password 64 times, also swapped
|
||||
|
||||
u32 dst_buf[16];
|
||||
|
||||
u8 *dst_ptr = (u8 *) dst_buf;
|
||||
|
||||
int tmp_idx = 0;
|
||||
|
||||
for (int i = 0; i < 64; i++)
|
||||
{
|
||||
for (int j = 0; j < pw_len; j++)
|
||||
{
|
||||
const int dst_idx = tmp_idx & 63;
|
||||
|
||||
dst_ptr[dst_idx] = src_ptr[j];
|
||||
|
||||
// write to global memory every time 64 byte are written into cache
|
||||
|
||||
if (dst_idx == 63)
|
||||
{
|
||||
const int tmp_idx4 = (tmp_idx - 63) / 4;
|
||||
|
||||
tmps[gid].tmp[tmp_idx4 + 0] = hc_swap32_S (dst_buf[ 0]);
|
||||
tmps[gid].tmp[tmp_idx4 + 1] = hc_swap32_S (dst_buf[ 1]);
|
||||
tmps[gid].tmp[tmp_idx4 + 2] = hc_swap32_S (dst_buf[ 2]);
|
||||
tmps[gid].tmp[tmp_idx4 + 3] = hc_swap32_S (dst_buf[ 3]);
|
||||
tmps[gid].tmp[tmp_idx4 + 4] = hc_swap32_S (dst_buf[ 4]);
|
||||
tmps[gid].tmp[tmp_idx4 + 5] = hc_swap32_S (dst_buf[ 5]);
|
||||
tmps[gid].tmp[tmp_idx4 + 6] = hc_swap32_S (dst_buf[ 6]);
|
||||
tmps[gid].tmp[tmp_idx4 + 7] = hc_swap32_S (dst_buf[ 7]);
|
||||
tmps[gid].tmp[tmp_idx4 + 8] = hc_swap32_S (dst_buf[ 8]);
|
||||
tmps[gid].tmp[tmp_idx4 + 9] = hc_swap32_S (dst_buf[ 9]);
|
||||
tmps[gid].tmp[tmp_idx4 + 10] = hc_swap32_S (dst_buf[10]);
|
||||
tmps[gid].tmp[tmp_idx4 + 11] = hc_swap32_S (dst_buf[11]);
|
||||
tmps[gid].tmp[tmp_idx4 + 12] = hc_swap32_S (dst_buf[12]);
|
||||
tmps[gid].tmp[tmp_idx4 + 13] = hc_swap32_S (dst_buf[13]);
|
||||
tmps[gid].tmp[tmp_idx4 + 14] = hc_swap32_S (dst_buf[14]);
|
||||
tmps[gid].tmp[tmp_idx4 + 15] = hc_swap32_S (dst_buf[15]);
|
||||
}
|
||||
|
||||
tmp_idx++;
|
||||
}
|
||||
}
|
||||
|
||||
// hash
|
||||
|
||||
tmps[gid].h[0] = SHA224M_A;
|
||||
tmps[gid].h[1] = SHA224M_B;
|
||||
tmps[gid].h[2] = SHA224M_C;
|
||||
tmps[gid].h[3] = SHA224M_D;
|
||||
tmps[gid].h[4] = SHA224M_E;
|
||||
tmps[gid].h[5] = SHA224M_F;
|
||||
tmps[gid].h[6] = SHA224M_G;
|
||||
tmps[gid].h[7] = SHA224M_H;
|
||||
}
|
||||
|
||||
KERNEL_FQ void m26700_loop (KERN_ATTR_TMPS_ESALT (hmac_sha224_tmp_t, snmpv3_t))
|
||||
{
|
||||
/**
|
||||
* base
|
||||
*/
|
||||
|
||||
const u64 gid = get_global_id (0);
|
||||
|
||||
if (gid >= gid_max) return;
|
||||
|
||||
u32 h[8];
|
||||
|
||||
h[0] = tmps[gid].h[0];
|
||||
h[1] = tmps[gid].h[1];
|
||||
h[2] = tmps[gid].h[2];
|
||||
h[3] = tmps[gid].h[3];
|
||||
h[4] = tmps[gid].h[4];
|
||||
h[5] = tmps[gid].h[5];
|
||||
h[6] = tmps[gid].h[6];
|
||||
h[7] = tmps[gid].h[7];
|
||||
|
||||
const u32 pw_len = pws[gid].pw_len;
|
||||
|
||||
const int pw_len64 = pw_len * 64;
|
||||
|
||||
if (pw_len <= SNMPV3_MAX_PW_LENGTH_OPT)
|
||||
{
|
||||
u32 tmp[SNMPV3_TMP_ELEMS_OPT];
|
||||
|
||||
for (int i = 0; i < pw_len64 / 4; i++)
|
||||
{
|
||||
tmp[i] = tmps[gid].tmp[i];
|
||||
}
|
||||
|
||||
for (int i = 0, j = loop_pos; i < loop_cnt; i += 64, j += 64)
|
||||
{
|
||||
const int idx = (j % pw_len64) / 4; // the optimization trick is to be able to do this
|
||||
|
||||
u32 w0[4];
|
||||
u32 w1[4];
|
||||
u32 w2[4];
|
||||
u32 w3[4];
|
||||
|
||||
w0[0] = tmp[idx + 0];
|
||||
w0[1] = tmp[idx + 1];
|
||||
w0[2] = tmp[idx + 2];
|
||||
w0[3] = tmp[idx + 3];
|
||||
w1[0] = tmp[idx + 4];
|
||||
w1[1] = tmp[idx + 5];
|
||||
w1[2] = tmp[idx + 6];
|
||||
w1[3] = tmp[idx + 7];
|
||||
w2[0] = tmp[idx + 8];
|
||||
w2[1] = tmp[idx + 9];
|
||||
w2[2] = tmp[idx + 10];
|
||||
w2[3] = tmp[idx + 11];
|
||||
w3[0] = tmp[idx + 12];
|
||||
w3[1] = tmp[idx + 13];
|
||||
w3[2] = tmp[idx + 14];
|
||||
w3[3] = tmp[idx + 15];
|
||||
|
||||
sha224_transform (w0, w1, w2, w3, h);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
for (int i = 0, j = loop_pos; i < loop_cnt; i += 64, j += 64)
|
||||
{
|
||||
const int idx = (j % pw_len64) / 4; // the optimization trick is to be able to do this
|
||||
|
||||
u32 w0[4];
|
||||
u32 w1[4];
|
||||
u32 w2[4];
|
||||
u32 w3[4];
|
||||
|
||||
w0[0] = tmps[gid].tmp[idx + 0];
|
||||
w0[1] = tmps[gid].tmp[idx + 1];
|
||||
w0[2] = tmps[gid].tmp[idx + 2];
|
||||
w0[3] = tmps[gid].tmp[idx + 3];
|
||||
w1[0] = tmps[gid].tmp[idx + 4];
|
||||
w1[1] = tmps[gid].tmp[idx + 5];
|
||||
w1[2] = tmps[gid].tmp[idx + 6];
|
||||
w1[3] = tmps[gid].tmp[idx + 7];
|
||||
w2[0] = tmps[gid].tmp[idx + 8];
|
||||
w2[1] = tmps[gid].tmp[idx + 9];
|
||||
w2[2] = tmps[gid].tmp[idx + 10];
|
||||
w2[3] = tmps[gid].tmp[idx + 11];
|
||||
w3[0] = tmps[gid].tmp[idx + 12];
|
||||
w3[1] = tmps[gid].tmp[idx + 13];
|
||||
w3[2] = tmps[gid].tmp[idx + 14];
|
||||
w3[3] = tmps[gid].tmp[idx + 15];
|
||||
|
||||
sha224_transform (w0, w1, w2, w3, h);
|
||||
}
|
||||
}
|
||||
|
||||
tmps[gid].h[0] = h[0];
|
||||
tmps[gid].h[1] = h[1];
|
||||
tmps[gid].h[2] = h[2];
|
||||
tmps[gid].h[3] = h[3];
|
||||
tmps[gid].h[4] = h[4];
|
||||
tmps[gid].h[5] = h[5];
|
||||
tmps[gid].h[6] = h[6];
|
||||
tmps[gid].h[7] = h[7];
|
||||
}
|
||||
|
||||
KERNEL_FQ void m26700_comp (KERN_ATTR_TMPS_ESALT (hmac_sha224_tmp_t, snmpv3_t))
|
||||
{
|
||||
/**
|
||||
* modifier
|
||||
*/
|
||||
|
||||
const u64 gid = get_global_id (0);
|
||||
|
||||
if (gid >= gid_max) return;
|
||||
|
||||
u32 w0[4];
|
||||
u32 w1[4];
|
||||
u32 w2[4];
|
||||
u32 w3[4];
|
||||
|
||||
w0[0] = 0x80000000;
|
||||
w0[1] = 0;
|
||||
w0[2] = 0;
|
||||
w0[3] = 0;
|
||||
w1[0] = 0;
|
||||
w1[1] = 0;
|
||||
w1[2] = 0;
|
||||
w1[3] = 0;
|
||||
w2[0] = 0;
|
||||
w2[1] = 0;
|
||||
w2[2] = 0;
|
||||
w2[3] = 0;
|
||||
w3[0] = 0;
|
||||
w3[1] = 0;
|
||||
w3[2] = 0;
|
||||
w3[3] = 1048576 * 8;
|
||||
|
||||
u32 h[8];
|
||||
|
||||
h[0] = tmps[gid].h[0];
|
||||
h[1] = tmps[gid].h[1];
|
||||
h[2] = tmps[gid].h[2];
|
||||
h[3] = tmps[gid].h[3];
|
||||
h[4] = tmps[gid].h[4];
|
||||
h[5] = tmps[gid].h[5];
|
||||
h[6] = tmps[gid].h[6];
|
||||
h[7] = tmps[gid].h[7];
|
||||
|
||||
sha224_transform (w0, w1, w2, w3, h);
|
||||
|
||||
sha224_ctx_t ctx;
|
||||
|
||||
sha224_init (&ctx);
|
||||
|
||||
u32 w[16];
|
||||
|
||||
w[ 0] = h[0];
|
||||
w[ 1] = h[1];
|
||||
w[ 2] = h[2];
|
||||
w[ 3] = h[3];
|
||||
w[ 4] = h[4];
|
||||
w[ 5] = h[5];
|
||||
w[ 6] = h[6];
|
||||
w[ 7] = 0;
|
||||
w[ 8] = 0;
|
||||
w[ 9] = 0;
|
||||
w[10] = 0;
|
||||
w[11] = 0;
|
||||
w[12] = 0;
|
||||
w[13] = 0;
|
||||
w[14] = 0;
|
||||
w[15] = 0;
|
||||
|
||||
sha224_update (&ctx, w, 28);
|
||||
|
||||
sha224_update_global_swap (&ctx, esalt_bufs[DIGESTS_OFFSET].engineID_buf, esalt_bufs[DIGESTS_OFFSET].engineID_len);
|
||||
|
||||
w[ 0] = h[0];
|
||||
w[ 1] = h[1];
|
||||
w[ 2] = h[2];
|
||||
w[ 3] = h[3];
|
||||
w[ 4] = h[4];
|
||||
w[ 5] = h[5];
|
||||
w[ 6] = h[6];
|
||||
w[ 7] = 0;
|
||||
w[ 8] = 0;
|
||||
w[ 9] = 0;
|
||||
w[10] = 0;
|
||||
w[11] = 0;
|
||||
w[12] = 0;
|
||||
w[13] = 0;
|
||||
w[14] = 0;
|
||||
w[15] = 0;
|
||||
|
||||
sha224_update (&ctx, w, 28);
|
||||
|
||||
sha224_final (&ctx);
|
||||
|
||||
w[ 0] = ctx.h[0];
|
||||
w[ 1] = ctx.h[1];
|
||||
w[ 2] = ctx.h[2];
|
||||
w[ 3] = ctx.h[3];
|
||||
w[ 4] = ctx.h[4];
|
||||
w[ 5] = ctx.h[5];
|
||||
w[ 6] = ctx.h[6];
|
||||
w[ 7] = 0;
|
||||
w[ 8] = 0;
|
||||
w[ 9] = 0;
|
||||
w[10] = 0;
|
||||
w[11] = 0;
|
||||
w[12] = 0;
|
||||
w[13] = 0;
|
||||
w[14] = 0;
|
||||
w[15] = 0;
|
||||
|
||||
sha224_hmac_ctx_t hmac_ctx;
|
||||
|
||||
sha224_hmac_init (&hmac_ctx, w, 28);
|
||||
|
||||
sha224_hmac_update_global_swap (&hmac_ctx, esalt_bufs[DIGESTS_OFFSET].salt_buf, esalt_bufs[DIGESTS_OFFSET].salt_len);
|
||||
|
||||
sha224_hmac_final (&hmac_ctx);
|
||||
|
||||
const u32 r0 = hmac_ctx.opad.h[DGST_R0];
|
||||
const u32 r1 = hmac_ctx.opad.h[DGST_R1];
|
||||
const u32 r2 = hmac_ctx.opad.h[DGST_R2];
|
||||
const u32 r3 = hmac_ctx.opad.h[DGST_R3];
|
||||
|
||||
#define il_pos 0
|
||||
|
||||
#ifdef KERNEL_STATIC
|
||||
#include COMPARE_M
|
||||
#endif
|
||||
}
|
@ -0,0 +1,371 @@
|
||||
/**
|
||||
* Author......: See docs/credits.txt
|
||||
* License.....: MIT
|
||||
*/
|
||||
|
||||
//#define NEW_SIMD_CODE
|
||||
|
||||
#ifdef KERNEL_STATIC
|
||||
#include "inc_vendor.h"
|
||||
#include "inc_types.h"
|
||||
#include "inc_platform.cl"
|
||||
#include "inc_common.cl"
|
||||
#include "inc_simd.cl"
|
||||
#include "inc_hash_sha256.cl"
|
||||
#endif
|
||||
|
||||
#define COMPARE_S "inc_comp_single.cl"
|
||||
#define COMPARE_M "inc_comp_multi.cl"
|
||||
|
||||
#define SNMPV3_SALT_MAX 1500
|
||||
#define SNMPV3_ENGINEID_MAX 34
|
||||
#define SNMPV3_MSG_AUTH_PARAMS_MAX 24
|
||||
#define SNMPV3_ROUNDS 1048576
|
||||
#define SNMPV3_MAX_PW_LENGTH 64
|
||||
|
||||
#define SNMPV3_TMP_ELEMS 4096 // 4096 = (256 (max pw length) * 64) / sizeof (u32)
|
||||
#define SNMPV3_HASH_ELEMS 8
|
||||
|
||||
#define SNMPV3_MAX_SALT_ELEMS 512 // 512 * 4 = 2048 > 1500, also has to be multiple of 64
|
||||
#define SNMPV3_MAX_ENGINE_ELEMS 16 // 16 * 4 = 64 > 32, also has to be multiple of 64
|
||||
#define SNMPV3_MAX_PNUM_ELEMS 4 // 4 * 4 = 16 > 9
|
||||
|
||||
#define SNMPV3_MAX_PW_LENGTH_OPT 64
|
||||
#define SNMPV3_TMP_ELEMS_OPT ((SNMPV3_MAX_PW_LENGTH_OPT * SNMPV3_MAX_PW_LENGTH) / 4)
|
||||
// (64 * 64) / 4 = 1024
|
||||
// for pw length > 64 we use global memory reads
|
||||
|
||||
typedef struct hmac_sha256_tmp
|
||||
{
|
||||
u32 tmp[SNMPV3_TMP_ELEMS];
|
||||
u32 h[SNMPV3_HASH_ELEMS];
|
||||
|
||||
} hmac_sha256_tmp_t;
|
||||
|
||||
typedef struct snmpv3
|
||||
{
|
||||
u32 salt_buf[SNMPV3_MAX_SALT_ELEMS];
|
||||
u32 salt_len;
|
||||
|
||||
u32 engineID_buf[SNMPV3_MAX_ENGINE_ELEMS];
|
||||
u32 engineID_len;
|
||||
|
||||
u32 packet_number[SNMPV3_MAX_PNUM_ELEMS];
|
||||
|
||||
} snmpv3_t;
|
||||
|
||||
KERNEL_FQ void m26800_init (KERN_ATTR_TMPS_ESALT (hmac_sha256_tmp_t, snmpv3_t))
|
||||
{
|
||||
/**
|
||||
* modifier
|
||||
*/
|
||||
|
||||
const u64 gid = get_global_id (0);
|
||||
|
||||
if (gid >= gid_max) return;
|
||||
|
||||
/**
|
||||
* base
|
||||
*/
|
||||
|
||||
const u32 pw_len = pws[gid].pw_len;
|
||||
|
||||
u32 w[64] = { 0 };
|
||||
|
||||
for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1)
|
||||
{
|
||||
w[idx] = pws[gid].i[idx];
|
||||
}
|
||||
|
||||
u8 *src_ptr = (u8 *) w;
|
||||
|
||||
// password 64 times, also swapped
|
||||
|
||||
u32 dst_buf[16];
|
||||
|
||||
u8 *dst_ptr = (u8 *) dst_buf;
|
||||
|
||||
int tmp_idx = 0;
|
||||
|
||||
for (int i = 0; i < 64; i++)
|
||||
{
|
||||
for (int j = 0; j < pw_len; j++)
|
||||
{
|
||||
const int dst_idx = tmp_idx & 63;
|
||||
|
||||
dst_ptr[dst_idx] = src_ptr[j];
|
||||
|
||||
// write to global memory every time 64 byte are written into cache
|
||||
|
||||
if (dst_idx == 63)
|
||||
{
|
||||
const int tmp_idx4 = (tmp_idx - 63) / 4;
|
||||
|
||||
tmps[gid].tmp[tmp_idx4 + 0] = hc_swap32_S (dst_buf[ 0]);
|
||||
tmps[gid].tmp[tmp_idx4 + 1] = hc_swap32_S (dst_buf[ 1]);
|
||||
tmps[gid].tmp[tmp_idx4 + 2] = hc_swap32_S (dst_buf[ 2]);
|
||||
tmps[gid].tmp[tmp_idx4 + 3] = hc_swap32_S (dst_buf[ 3]);
|
||||
tmps[gid].tmp[tmp_idx4 + 4] = hc_swap32_S (dst_buf[ 4]);
|
||||
tmps[gid].tmp[tmp_idx4 + 5] = hc_swap32_S (dst_buf[ 5]);
|
||||
tmps[gid].tmp[tmp_idx4 + 6] = hc_swap32_S (dst_buf[ 6]);
|
||||
tmps[gid].tmp[tmp_idx4 + 7] = hc_swap32_S (dst_buf[ 7]);
|
||||
tmps[gid].tmp[tmp_idx4 + 8] = hc_swap32_S (dst_buf[ 8]);
|
||||
tmps[gid].tmp[tmp_idx4 + 9] = hc_swap32_S (dst_buf[ 9]);
|
||||
tmps[gid].tmp[tmp_idx4 + 10] = hc_swap32_S (dst_buf[10]);
|
||||
tmps[gid].tmp[tmp_idx4 + 11] = hc_swap32_S (dst_buf[11]);
|
||||
tmps[gid].tmp[tmp_idx4 + 12] = hc_swap32_S (dst_buf[12]);
|
||||
tmps[gid].tmp[tmp_idx4 + 13] = hc_swap32_S (dst_buf[13]);
|
||||
tmps[gid].tmp[tmp_idx4 + 14] = hc_swap32_S (dst_buf[14]);
|
||||
tmps[gid].tmp[tmp_idx4 + 15] = hc_swap32_S (dst_buf[15]);
|
||||
}
|
||||
|
||||
tmp_idx++;
|
||||
}
|
||||
}
|
||||
|
||||
// hash
|
||||
|
||||
tmps[gid].h[0] = SHA256M_A;
|
||||
tmps[gid].h[1] = SHA256M_B;
|
||||
tmps[gid].h[2] = SHA256M_C;
|
||||
tmps[gid].h[3] = SHA256M_D;
|
||||
tmps[gid].h[4] = SHA256M_E;
|
||||
tmps[gid].h[5] = SHA256M_F;
|
||||
tmps[gid].h[6] = SHA256M_G;
|
||||
tmps[gid].h[7] = SHA256M_H;
|
||||
}
|
||||
|
||||
KERNEL_FQ void m26800_loop (KERN_ATTR_TMPS_ESALT (hmac_sha256_tmp_t, snmpv3_t))
|
||||
{
|
||||
/**
|
||||
* base
|
||||
*/
|
||||
|
||||
const u64 gid = get_global_id (0);
|
||||
|
||||
if (gid >= gid_max) return;
|
||||
|
||||
u32 h[8];
|
||||
|
||||
h[0] = tmps[gid].h[0];
|
||||
h[1] = tmps[gid].h[1];
|
||||
h[2] = tmps[gid].h[2];
|
||||
h[3] = tmps[gid].h[3];
|
||||
h[4] = tmps[gid].h[4];
|
||||
h[5] = tmps[gid].h[5];
|
||||
h[6] = tmps[gid].h[6];
|
||||
h[7] = tmps[gid].h[7];
|
||||
|
||||
const u32 pw_len = pws[gid].pw_len;
|
||||
|
||||
const int pw_len64 = pw_len * 64;
|
||||
|
||||
if (pw_len <= SNMPV3_MAX_PW_LENGTH_OPT)
|
||||
{
|
||||
u32 tmp[SNMPV3_TMP_ELEMS_OPT];
|
||||
|
||||
for (int i = 0; i < pw_len64 / 4; i++)
|
||||
{
|
||||
tmp[i] = tmps[gid].tmp[i];
|
||||
}
|
||||
|
||||
for (int i = 0, j = loop_pos; i < loop_cnt; i += 64, j += 64)
|
||||
{
|
||||
const int idx = (j % pw_len64) / 4; // the optimization trick is to be able to do this
|
||||
|
||||
u32 w0[4];
|
||||
u32 w1[4];
|
||||
u32 w2[4];
|
||||
u32 w3[4];
|
||||
|
||||
w0[0] = tmp[idx + 0];
|
||||
w0[1] = tmp[idx + 1];
|
||||
w0[2] = tmp[idx + 2];
|
||||
w0[3] = tmp[idx + 3];
|
||||
w1[0] = tmp[idx + 4];
|
||||
w1[1] = tmp[idx + 5];
|
||||
w1[2] = tmp[idx + 6];
|
||||
w1[3] = tmp[idx + 7];
|
||||
w2[0] = tmp[idx + 8];
|
||||
w2[1] = tmp[idx + 9];
|
||||
w2[2] = tmp[idx + 10];
|
||||
w2[3] = tmp[idx + 11];
|
||||
w3[0] = tmp[idx + 12];
|
||||
w3[1] = tmp[idx + 13];
|
||||
w3[2] = tmp[idx + 14];
|
||||
w3[3] = tmp[idx + 15];
|
||||
|
||||
sha256_transform (w0, w1, w2, w3, h);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
for (int i = 0, j = loop_pos; i < loop_cnt; i += 64, j += 64)
|
||||
{
|
||||
const int idx = (j % pw_len64) / 4; // the optimization trick is to be able to do this
|
||||
|
||||
u32 w0[4];
|
||||
u32 w1[4];
|
||||
u32 w2[4];
|
||||
u32 w3[4];
|
||||
|
||||
w0[0] = tmps[gid].tmp[idx + 0];
|
||||
w0[1] = tmps[gid].tmp[idx + 1];
|
||||
w0[2] = tmps[gid].tmp[idx + 2];
|
||||
w0[3] = tmps[gid].tmp[idx + 3];
|
||||
w1[0] = tmps[gid].tmp[idx + 4];
|
||||
w1[1] = tmps[gid].tmp[idx + 5];
|
||||
w1[2] = tmps[gid].tmp[idx + 6];
|
||||
w1[3] = tmps[gid].tmp[idx + 7];
|
||||
w2[0] = tmps[gid].tmp[idx + 8];
|
||||
w2[1] = tmps[gid].tmp[idx + 9];
|
||||
w2[2] = tmps[gid].tmp[idx + 10];
|
||||
w2[3] = tmps[gid].tmp[idx + 11];
|
||||
w3[0] = tmps[gid].tmp[idx + 12];
|
||||
w3[1] = tmps[gid].tmp[idx + 13];
|
||||
w3[2] = tmps[gid].tmp[idx + 14];
|
||||
w3[3] = tmps[gid].tmp[idx + 15];
|
||||
|
||||
sha256_transform (w0, w1, w2, w3, h);
|
||||
}
|
||||
}
|
||||
|
||||
tmps[gid].h[0] = h[0];
|
||||
tmps[gid].h[1] = h[1];
|
||||
tmps[gid].h[2] = h[2];
|
||||
tmps[gid].h[3] = h[3];
|
||||
tmps[gid].h[4] = h[4];
|
||||
tmps[gid].h[5] = h[5];
|
||||
tmps[gid].h[6] = h[6];
|
||||
tmps[gid].h[7] = h[7];
|
||||
}
|
||||
|
||||
KERNEL_FQ void m26800_comp (KERN_ATTR_TMPS_ESALT (hmac_sha256_tmp_t, snmpv3_t))
|
||||
{
|
||||
/**
|
||||
* modifier
|
||||
*/
|
||||
|
||||
const u64 gid = get_global_id (0);
|
||||
|
||||
if (gid >= gid_max) return;
|
||||
|
||||
u32 w0[4];
|
||||
u32 w1[4];
|
||||
u32 w2[4];
|
||||
u32 w3[4];
|
||||
|
||||
w0[0] = 0x80000000;
|
||||
w0[1] = 0;
|
||||
w0[2] = 0;
|
||||
w0[3] = 0;
|
||||
w1[0] = 0;
|
||||
w1[1] = 0;
|
||||
w1[2] = 0;
|
||||
w1[3] = 0;
|
||||
w2[0] = 0;
|
||||
w2[1] = 0;
|
||||
w2[2] = 0;
|
||||
w2[3] = 0;
|
||||
w3[0] = 0;
|
||||
w3[1] = 0;
|
||||
w3[2] = 0;
|
||||
w3[3] = 1048576 * 8;
|
||||
|
||||
u32 h[8];
|
||||
|
||||
h[0] = tmps[gid].h[0];
|
||||
h[1] = tmps[gid].h[1];
|
||||
h[2] = tmps[gid].h[2];
|
||||
h[3] = tmps[gid].h[3];
|
||||
h[4] = tmps[gid].h[4];
|
||||
h[5] = tmps[gid].h[5];
|
||||
h[6] = tmps[gid].h[6];
|
||||
h[7] = tmps[gid].h[7];
|
||||
|
||||
sha256_transform (w0, w1, w2, w3, h);
|
||||
|
||||
sha256_ctx_t ctx;
|
||||
|
||||
sha256_init (&ctx);
|
||||
|
||||
u32 w[16];
|
||||
|
||||
w[ 0] = h[0];
|
||||
w[ 1] = h[1];
|
||||
w[ 2] = h[2];
|
||||
w[ 3] = h[3];
|
||||
w[ 4] = h[4];
|
||||
w[ 5] = h[5];
|
||||
w[ 6] = h[6];
|
||||
w[ 7] = h[7];
|
||||
w[ 8] = 0;
|
||||
w[ 9] = 0;
|
||||
w[10] = 0;
|
||||
w[11] = 0;
|
||||
w[12] = 0;
|
||||
w[13] = 0;
|
||||
w[14] = 0;
|
||||
w[15] = 0;
|
||||
|
||||
sha256_update (&ctx, w, 32);
|
||||
|
||||
sha256_update_global_swap (&ctx, esalt_bufs[DIGESTS_OFFSET].engineID_buf, esalt_bufs[DIGESTS_OFFSET].engineID_len);
|
||||
|
||||
w[ 0] = h[0];
|
||||
w[ 1] = h[1];
|
||||
w[ 2] = h[2];
|
||||
w[ 3] = h[3];
|
||||
w[ 4] = h[4];
|
||||
w[ 5] = h[5];
|
||||
w[ 6] = h[6];
|
||||
w[ 7] = h[7];
|
||||
w[ 8] = 0;
|
||||
w[ 9] = 0;
|
||||
w[10] = 0;
|
||||
w[11] = 0;
|
||||
w[12] = 0;
|
||||
w[13] = 0;
|
||||
w[14] = 0;
|
||||
w[15] = 0;
|
||||
|
||||
sha256_update (&ctx, w, 32);
|
||||
|
||||
sha256_final (&ctx);
|
||||
|
||||
w[ 0] = ctx.h[0];
|
||||
w[ 1] = ctx.h[1];
|
||||
w[ 2] = ctx.h[2];
|
||||
w[ 3] = ctx.h[3];
|
||||
w[ 4] = ctx.h[4];
|
||||
w[ 5] = ctx.h[5];
|
||||
w[ 6] = ctx.h[6];
|
||||
w[ 7] = ctx.h[7];
|
||||
w[ 8] = 0;
|
||||
w[ 9] = 0;
|
||||
w[10] = 0;
|
||||
w[11] = 0;
|
||||
w[12] = 0;
|
||||
w[13] = 0;
|
||||
w[14] = 0;
|
||||
w[15] = 0;
|
||||
|
||||
sha256_hmac_ctx_t hmac_ctx;
|
||||
|
||||
sha256_hmac_init (&hmac_ctx, w, 32);
|
||||
|
||||
sha256_hmac_update_global_swap (&hmac_ctx, esalt_bufs[DIGESTS_OFFSET].salt_buf, esalt_bufs[DIGESTS_OFFSET].salt_len);
|
||||
|
||||
sha256_hmac_final (&hmac_ctx);
|
||||
|
||||
const u32 r0 = hmac_ctx.opad.h[DGST_R0];
|
||||
const u32 r1 = hmac_ctx.opad.h[DGST_R1];
|
||||
const u32 r2 = hmac_ctx.opad.h[DGST_R2];
|
||||
const u32 r3 = hmac_ctx.opad.h[DGST_R3];
|
||||
|
||||
#define il_pos 0
|
||||
|
||||
#ifdef KERNEL_STATIC
|
||||
#include COMPARE_M
|
||||
#endif
|
||||
}
|
@ -0,0 +1,495 @@
|
||||
/**
|
||||
* Author......: See docs/credits.txt
|
||||
* License.....: MIT
|
||||
*/
|
||||
|
||||
//#define NEW_SIMD_CODE
|
||||
|
||||
#ifdef KERNEL_STATIC
|
||||
#include "inc_vendor.h"
|
||||
#include "inc_types.h"
|
||||
#include "inc_platform.cl"
|
||||
#include "inc_common.cl"
|
||||
#include "inc_simd.cl"
|
||||
#include "inc_hash_sha384.cl"
|
||||
#endif
|
||||
|
||||
#define COMPARE_S "inc_comp_single.cl"
|
||||
#define COMPARE_M "inc_comp_multi.cl"
|
||||
|
||||
#define SNMPV3_SALT_MAX 1500
|
||||
#define SNMPV3_ENGINEID_MAX 34
|
||||
#define SNMPV3_MSG_AUTH_PARAMS_MAX 32
|
||||
#define SNMPV3_ROUNDS 1048576
|
||||
#define SNMPV3_MAX_PW_LENGTH 128
|
||||
|
||||
#define SNMPV3_TMP_ELEMS 8192 // 8192 = (256 (max pw length) * SNMPV3_MAX_PW_LENGTH) / sizeof (u32)
|
||||
#define SNMPV3_HASH_ELEMS 8
|
||||
|
||||
#define SNMPV3_MAX_SALT_ELEMS 512 // 512 * 4 = 2048 > 1500, also has to be multiple of SNMPV3_MAX_PW_LENGTH
|
||||
#define SNMPV3_MAX_ENGINE_ELEMS 32 // 32 * 4 = 128 > 34, also has to be multiple of SNMPV3_MAX_PW_LENGTH
|
||||
#define SNMPV3_MAX_PNUM_ELEMS 4 // 4 * 4 = 16 > 9
|
||||
|
||||
#define SNMPV3_MAX_PW_LENGTH_OPT 32
|
||||
#define SNMPV3_TMP_ELEMS_OPT ((SNMPV3_MAX_PW_LENGTH_OPT * SNMPV3_MAX_PW_LENGTH) / 4)
|
||||
// (32 * 128) / 4 = 1024
|
||||
// for pw length > 32 we use global memory reads
|
||||
|
||||
typedef struct hmac_sha384_tmp
|
||||
{
|
||||
u32 tmp[SNMPV3_TMP_ELEMS];
|
||||
u64 h[SNMPV3_HASH_ELEMS];
|
||||
|
||||
} hmac_sha384_tmp_t;
|
||||
|
||||
typedef struct snmpv3
|
||||
{
|
||||
u32 salt_buf[SNMPV3_MAX_SALT_ELEMS];
|
||||
u32 salt_len;
|
||||
|
||||
u32 engineID_buf[SNMPV3_MAX_ENGINE_ELEMS];
|
||||
u32 engineID_len;
|
||||
|
||||
u32 packet_number[SNMPV3_MAX_PNUM_ELEMS];
|
||||
|
||||
} snmpv3_t;
|
||||
|
||||
KERNEL_FQ void m26900_init (KERN_ATTR_TMPS_ESALT (hmac_sha384_tmp_t, snmpv3_t))
|
||||
{
|
||||
/**
|
||||
* modifier
|
||||
*/
|
||||
|
||||
const u64 gid = get_global_id (0);
|
||||
|
||||
if (gid >= gid_max) return;
|
||||
|
||||
/**
|
||||
* base
|
||||
*/
|
||||
|
||||
const u32 pw_len = pws[gid].pw_len;
|
||||
|
||||
u32 w[128] = { 0 };
|
||||
|
||||
for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1)
|
||||
{
|
||||
w[idx] = pws[gid].i[idx];
|
||||
}
|
||||
|
||||
u8 *src_ptr = (u8 *) w;
|
||||
|
||||
// password 128 times, also swapped
|
||||
|
||||
u32 dst_buf[32];
|
||||
|
||||
u8 *dst_ptr = (u8 *) dst_buf;
|
||||
|
||||
int tmp_idx = 0;
|
||||
|
||||
for (int i = 0; i < 128; i++)
|
||||
{
|
||||
for (u32 j = 0; j < pw_len; j++)
|
||||
{
|
||||
const int dst_idx = tmp_idx & 127;
|
||||
|
||||
dst_ptr[dst_idx] = src_ptr[j];
|
||||
|
||||
// write to global memory every time 64 byte are written into cache
|
||||
|
||||
if (dst_idx == 127)
|
||||
{
|
||||
const int tmp_idx4 = (tmp_idx - 127) / 4;
|
||||
|
||||
tmps[gid].tmp[tmp_idx4 + 0] = hc_swap32_S (dst_buf[ 0]);
|
||||
tmps[gid].tmp[tmp_idx4 + 1] = hc_swap32_S (dst_buf[ 1]);
|
||||
tmps[gid].tmp[tmp_idx4 + 2] = hc_swap32_S (dst_buf[ 2]);
|
||||
tmps[gid].tmp[tmp_idx4 + 3] = hc_swap32_S (dst_buf[ 3]);
|
||||
tmps[gid].tmp[tmp_idx4 + 4] = hc_swap32_S (dst_buf[ 4]);
|
||||
tmps[gid].tmp[tmp_idx4 + 5] = hc_swap32_S (dst_buf[ 5]);
|
||||
tmps[gid].tmp[tmp_idx4 + 6] = hc_swap32_S (dst_buf[ 6]);
|
||||
tmps[gid].tmp[tmp_idx4 + 7] = hc_swap32_S (dst_buf[ 7]);
|
||||
tmps[gid].tmp[tmp_idx4 + 8] = hc_swap32_S (dst_buf[ 8]);
|
||||
tmps[gid].tmp[tmp_idx4 + 9] = hc_swap32_S (dst_buf[ 9]);
|
||||
tmps[gid].tmp[tmp_idx4 + 10] = hc_swap32_S (dst_buf[10]);
|
||||
tmps[gid].tmp[tmp_idx4 + 11] = hc_swap32_S (dst_buf[11]);
|
||||
tmps[gid].tmp[tmp_idx4 + 12] = hc_swap32_S (dst_buf[12]);
|
||||
tmps[gid].tmp[tmp_idx4 + 13] = hc_swap32_S (dst_buf[13]);
|
||||
tmps[gid].tmp[tmp_idx4 + 14] = hc_swap32_S (dst_buf[14]);
|
||||
tmps[gid].tmp[tmp_idx4 + 15] = hc_swap32_S (dst_buf[15]);
|
||||
tmps[gid].tmp[tmp_idx4 + 16] = hc_swap32_S (dst_buf[16]);
|
||||
tmps[gid].tmp[tmp_idx4 + 17] = hc_swap32_S (dst_buf[17]);
|
||||
tmps[gid].tmp[tmp_idx4 + 18] = hc_swap32_S (dst_buf[18]);
|
||||
tmps[gid].tmp[tmp_idx4 + 19] = hc_swap32_S (dst_buf[19]);
|
||||
tmps[gid].tmp[tmp_idx4 + 20] = hc_swap32_S (dst_buf[20]);
|
||||
tmps[gid].tmp[tmp_idx4 + 21] = hc_swap32_S (dst_buf[21]);
|
||||
tmps[gid].tmp[tmp_idx4 + 22] = hc_swap32_S (dst_buf[22]);
|
||||
tmps[gid].tmp[tmp_idx4 + 23] = hc_swap32_S (dst_buf[23]);
|
||||
tmps[gid].tmp[tmp_idx4 + 24] = hc_swap32_S (dst_buf[24]);
|
||||
tmps[gid].tmp[tmp_idx4 + 25] = hc_swap32_S (dst_buf[25]);
|
||||
tmps[gid].tmp[tmp_idx4 + 26] = hc_swap32_S (dst_buf[26]);
|
||||
tmps[gid].tmp[tmp_idx4 + 27] = hc_swap32_S (dst_buf[27]);
|
||||
tmps[gid].tmp[tmp_idx4 + 28] = hc_swap32_S (dst_buf[28]);
|
||||
tmps[gid].tmp[tmp_idx4 + 29] = hc_swap32_S (dst_buf[29]);
|
||||
tmps[gid].tmp[tmp_idx4 + 30] = hc_swap32_S (dst_buf[30]);
|
||||
tmps[gid].tmp[tmp_idx4 + 31] = hc_swap32_S (dst_buf[31]);
|
||||
}
|
||||
|
||||
tmp_idx++;
|
||||
}
|
||||
}
|
||||
|
||||
// hash
|
||||
|
||||
tmps[gid].h[0] = SHA384M_A;
|
||||
tmps[gid].h[1] = SHA384M_B;
|
||||
tmps[gid].h[2] = SHA384M_C;
|
||||
tmps[gid].h[3] = SHA384M_D;
|
||||
tmps[gid].h[4] = SHA384M_E;
|
||||
tmps[gid].h[5] = SHA384M_F;
|
||||
tmps[gid].h[6] = SHA384M_G;
|
||||
tmps[gid].h[7] = SHA384M_H;
|
||||
}
|
||||
|
||||
KERNEL_FQ void m26900_loop (KERN_ATTR_TMPS_ESALT (hmac_sha384_tmp_t, snmpv3_t))
|
||||
{
|
||||
/**
|
||||
* base
|
||||
*/
|
||||
|
||||
const u64 gid = get_global_id (0);
|
||||
|
||||
if (gid >= gid_max) return;
|
||||
|
||||
u64 h[8];
|
||||
|
||||
h[0] = tmps[gid].h[0];
|
||||
h[1] = tmps[gid].h[1];
|
||||
h[2] = tmps[gid].h[2];
|
||||
h[3] = tmps[gid].h[3];
|
||||
h[4] = tmps[gid].h[4];
|
||||
h[5] = tmps[gid].h[5];
|
||||
h[6] = tmps[gid].h[6];
|
||||
h[7] = tmps[gid].h[7];
|
||||
|
||||
const u32 pw_len = pws[gid].pw_len;
|
||||
|
||||
const int pw_len128 = pw_len * 128;
|
||||
|
||||
if (pw_len <= SNMPV3_MAX_PW_LENGTH_OPT)
|
||||
{
|
||||
u32 tmp[SNMPV3_TMP_ELEMS_OPT];
|
||||
|
||||
for (int i = 0; i < pw_len128 / 4; i++)
|
||||
{
|
||||
tmp[i] = tmps[gid].tmp[i];
|
||||
}
|
||||
|
||||
for (u32 i = 0, j = loop_pos; i < loop_cnt; i += 128, j += 128)
|
||||
{
|
||||
const int idx = (j % pw_len128) / 4; // the optimization trick is to be able to do this
|
||||
|
||||
u32 w0[4];
|
||||
u32 w1[4];
|
||||
u32 w2[4];
|
||||
u32 w3[4];
|
||||
u32 w4[4];
|
||||
u32 w5[4];
|
||||
u32 w6[4];
|
||||
u32 w7[4];
|
||||
|
||||
w0[0] = tmp[idx + 0];
|
||||
w0[1] = tmp[idx + 1];
|
||||
w0[2] = tmp[idx + 2];
|
||||
w0[3] = tmp[idx + 3];
|
||||
w1[0] = tmp[idx + 4];
|
||||
w1[1] = tmp[idx + 5];
|
||||
w1[2] = tmp[idx + 6];
|
||||
w1[3] = tmp[idx + 7];
|
||||
w2[0] = tmp[idx + 8];
|
||||
w2[1] = tmp[idx + 9];
|
||||
w2[2] = tmp[idx + 10];
|
||||
w2[3] = tmp[idx + 11];
|
||||
w3[0] = tmp[idx + 12];
|
||||
w3[1] = tmp[idx + 13];
|
||||
w3[2] = tmp[idx + 14];
|
||||
w3[3] = tmp[idx + 15];
|
||||
w4[0] = tmp[idx + 16];
|
||||
w4[1] = tmp[idx + 17];
|
||||
w4[2] = tmp[idx + 18];
|
||||
w4[3] = tmp[idx + 19];
|
||||
w5[0] = tmp[idx + 20];
|
||||
w5[1] = tmp[idx + 21];
|
||||
w5[2] = tmp[idx + 22];
|
||||
w5[3] = tmp[idx + 23];
|
||||
w6[0] = tmp[idx + 24];
|
||||
w6[1] = tmp[idx + 25];
|
||||
w6[2] = tmp[idx + 26];
|
||||
w6[3] = tmp[idx + 27];
|
||||
w7[0] = tmp[idx + 28];
|
||||
w7[1] = tmp[idx + 29];
|
||||
w7[2] = tmp[idx + 30];
|
||||
w7[3] = tmp[idx + 31];
|
||||
|
||||
sha384_transform (w0, w1, w2, w3, w4, w5, w6, w7, h);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
for (u32 i = 0, j = loop_pos; i < loop_cnt; i += 128, j += 128)
|
||||
{
|
||||
const int idx = (j % pw_len128) / 4; // the optimization trick is to be able to do this
|
||||
|
||||
u32 w0[4];
|
||||
u32 w1[4];
|
||||
u32 w2[4];
|
||||
u32 w3[4];
|
||||
u32 w4[4];
|
||||
u32 w5[4];
|
||||
u32 w6[4];
|
||||
u32 w7[4];
|
||||
|
||||
w0[0] = tmps[gid].tmp[idx + 0];
|
||||
w0[1] = tmps[gid].tmp[idx + 1];
|
||||
w0[2] = tmps[gid].tmp[idx + 2];
|
||||
w0[3] = tmps[gid].tmp[idx + 3];
|
||||
w1[0] = tmps[gid].tmp[idx + 4];
|
||||
w1[1] = tmps[gid].tmp[idx + 5];
|
||||
w1[2] = tmps[gid].tmp[idx + 6];
|
||||
w1[3] = tmps[gid].tmp[idx + 7];
|
||||
w2[0] = tmps[gid].tmp[idx + 8];
|
||||
w2[1] = tmps[gid].tmp[idx + 9];
|
||||
w2[2] = tmps[gid].tmp[idx + 10];
|
||||
w2[3] = tmps[gid].tmp[idx + 11];
|
||||
w3[0] = tmps[gid].tmp[idx + 12];
|
||||
w3[1] = tmps[gid].tmp[idx + 13];
|
||||
w3[2] = tmps[gid].tmp[idx + 14];
|
||||
w3[3] = tmps[gid].tmp[idx + 15];
|
||||
w4[0] = tmps[gid].tmp[idx + 16];
|
||||
w4[1] = tmps[gid].tmp[idx + 17];
|
||||
w4[2] = tmps[gid].tmp[idx + 18];
|
||||
w4[3] = tmps[gid].tmp[idx + 19];
|
||||
w5[0] = tmps[gid].tmp[idx + 20];
|
||||
w5[1] = tmps[gid].tmp[idx + 21];
|
||||
w5[2] = tmps[gid].tmp[idx + 22];
|
||||
w5[3] = tmps[gid].tmp[idx + 23];
|
||||
w6[0] = tmps[gid].tmp[idx + 24];
|
||||
w6[1] = tmps[gid].tmp[idx + 25];
|
||||
w6[2] = tmps[gid].tmp[idx + 26];
|
||||
w6[3] = tmps[gid].tmp[idx + 27];
|
||||
w7[0] = tmps[gid].tmp[idx + 28];
|
||||
w7[1] = tmps[gid].tmp[idx + 29];
|
||||
w7[2] = tmps[gid].tmp[idx + 30];
|
||||
w7[3] = tmps[gid].tmp[idx + 31];
|
||||
|
||||
sha384_transform (w0, w1, w2, w3, w4, w5, w6, w7, h);
|
||||
}
|
||||
}
|
||||
|
||||
tmps[gid].h[0] = h[0];
|
||||
tmps[gid].h[1] = h[1];
|
||||
tmps[gid].h[2] = h[2];
|
||||
tmps[gid].h[3] = h[3];
|
||||
tmps[gid].h[4] = h[4];
|
||||
tmps[gid].h[5] = h[5];
|
||||
tmps[gid].h[6] = h[6];
|
||||
tmps[gid].h[7] = h[7];
|
||||
}
|
||||
|
||||
KERNEL_FQ void m26900_comp (KERN_ATTR_TMPS_ESALT (hmac_sha384_tmp_t, snmpv3_t))
|
||||
{
|
||||
/**
|
||||
* modifier
|
||||
*/
|
||||
|
||||
const u64 gid = get_global_id (0);
|
||||
|
||||
if (gid >= gid_max) return;
|
||||
|
||||
u32 w0[4];
|
||||
u32 w1[4];
|
||||
u32 w2[4];
|
||||
u32 w3[4];
|
||||
u32 w4[4];
|
||||
u32 w5[4];
|
||||
u32 w6[4];
|
||||
u32 w7[4];
|
||||
|
||||
w0[0] = 0x80000000;
|
||||
w0[1] = 0;
|
||||
w0[2] = 0;
|
||||
w0[3] = 0;
|
||||
w1[0] = 0;
|
||||
w1[1] = 0;
|
||||
w1[2] = 0;
|
||||
w1[3] = 0;
|
||||
w2[0] = 0;
|
||||
w2[1] = 0;
|
||||
w2[2] = 0;
|
||||
w2[3] = 0;
|
||||
w3[0] = 0;
|
||||
w3[1] = 0;
|
||||
w3[2] = 0;
|
||||
w3[3] = 0;
|
||||
w4[0] = 0;
|
||||
w4[1] = 0;
|
||||
w4[2] = 0;
|
||||
w4[3] = 0;
|
||||
w5[0] = 0;
|
||||
w5[1] = 0;
|
||||
w5[2] = 0;
|
||||
w5[3] = 0;
|
||||
w6[0] = 0;
|
||||
w6[1] = 0;
|
||||
w6[2] = 0;
|
||||
w6[3] = 0;
|
||||
w7[0] = 0;
|
||||
w7[1] = 0;
|
||||
w7[2] = 0;
|
||||
w7[3] = 1048576 * 8;
|
||||
|
||||
u64 h[8];
|
||||
|
||||
h[0] = tmps[gid].h[0];
|
||||
h[1] = tmps[gid].h[1];
|
||||
h[2] = tmps[gid].h[2];
|
||||
h[3] = tmps[gid].h[3];
|
||||
h[4] = tmps[gid].h[4];
|
||||
h[5] = tmps[gid].h[5];
|
||||
h[6] = tmps[gid].h[6];
|
||||
h[7] = tmps[gid].h[7];
|
||||
|
||||
sha384_transform (w0, w1, w2, w3, w4, w5, w6, w7, h);
|
||||
|
||||
sha384_ctx_t ctx;
|
||||
|
||||
sha384_init (&ctx);
|
||||
|
||||
u32 w[32];
|
||||
|
||||
w[ 0] = h32_from_64_S (h[0]);
|
||||
w[ 1] = l32_from_64_S (h[0]);
|
||||
w[ 2] = h32_from_64_S (h[1]);
|
||||
w[ 3] = l32_from_64_S (h[1]);
|
||||
w[ 4] = h32_from_64_S (h[2]);
|
||||
w[ 5] = l32_from_64_S (h[2]);
|
||||
w[ 6] = h32_from_64_S (h[3]);
|
||||
w[ 7] = l32_from_64_S (h[3]);
|
||||
w[ 8] = h32_from_64_S (h[4]);
|
||||
w[ 9] = l32_from_64_S (h[4]);
|
||||
w[10] = h32_from_64_S (h[5]);
|
||||
w[11] = l32_from_64_S (h[5]);
|
||||
w[12] = 0;
|
||||
w[13] = 0;
|
||||
w[14] = 0;
|
||||
w[15] = 0;
|
||||
w[16] = 0;
|
||||
w[17] = 0;
|
||||
w[18] = 0;
|
||||
w[19] = 0;
|
||||
w[20] = 0;
|
||||
w[21] = 0;
|
||||
w[22] = 0;
|
||||
w[23] = 0;
|
||||
w[24] = 0;
|
||||
w[25] = 0;
|
||||
w[26] = 0;
|
||||
w[27] = 0;
|
||||
w[28] = 0;
|
||||
w[29] = 0;
|
||||
w[30] = 0;
|
||||
w[31] = 0;
|
||||
|
||||
sha384_update (&ctx, w, 48);
|
||||
|
||||
sha384_update_global_swap (&ctx, esalt_bufs[DIGESTS_OFFSET].engineID_buf, esalt_bufs[DIGESTS_OFFSET].engineID_len);
|
||||
|
||||
w[ 0] = h32_from_64_S (h[0]);
|
||||
w[ 1] = l32_from_64_S (h[0]);
|
||||
w[ 2] = h32_from_64_S (h[1]);
|
||||
w[ 3] = l32_from_64_S (h[1]);
|
||||
w[ 4] = h32_from_64_S (h[2]);
|
||||
w[ 5] = l32_from_64_S (h[2]);
|
||||
w[ 6] = h32_from_64_S (h[3]);
|
||||
w[ 7] = l32_from_64_S (h[3]);
|
||||
w[ 8] = h32_from_64_S (h[4]);
|
||||
w[ 9] = l32_from_64_S (h[4]);
|
||||
w[10] = h32_from_64_S (h[5]);
|
||||
w[11] = l32_from_64_S (h[5]);
|
||||
w[12] = 0;
|
||||
w[13] = 0;
|
||||
w[14] = 0;
|
||||
w[15] = 0;
|
||||
w[16] = 0;
|
||||
w[17] = 0;
|
||||
w[18] = 0;
|
||||
w[19] = 0;
|
||||
w[20] = 0;
|
||||
w[21] = 0;
|
||||
w[22] = 0;
|
||||
w[23] = 0;
|
||||
w[24] = 0;
|
||||
w[25] = 0;
|
||||
w[26] = 0;
|
||||
w[27] = 0;
|
||||
w[28] = 0;
|
||||
w[29] = 0;
|
||||
w[30] = 0;
|
||||
w[31] = 0;
|
||||
|
||||
sha384_update (&ctx, w, 48);
|
||||
|
||||
sha384_final (&ctx);
|
||||
|
||||
w[ 0] = h32_from_64_S (ctx.h[0]);
|
||||
w[ 1] = l32_from_64_S (ctx.h[0]);
|
||||
w[ 2] = h32_from_64_S (ctx.h[1]);
|
||||
w[ 3] = l32_from_64_S (ctx.h[1]);
|
||||
w[ 4] = h32_from_64_S (ctx.h[2]);
|
||||
w[ 5] = l32_from_64_S (ctx.h[2]);
|
||||
w[ 6] = h32_from_64_S (ctx.h[3]);
|
||||
w[ 7] = l32_from_64_S (ctx.h[3]);
|
||||
w[ 8] = h32_from_64_S (ctx.h[4]);
|
||||
w[ 9] = l32_from_64_S (ctx.h[4]);
|
||||
w[10] = h32_from_64_S (ctx.h[5]);
|
||||
w[11] = l32_from_64_S (ctx.h[5]);
|
||||
w[12] = 0;
|
||||
w[13] = 0;
|
||||
w[14] = 0;
|
||||
w[15] = 0;
|
||||
w[16] = 0;
|
||||
w[17] = 0;
|
||||
w[18] = 0;
|
||||
w[19] = 0;
|
||||
w[20] = 0;
|
||||
w[21] = 0;
|
||||
w[22] = 0;
|
||||
w[23] = 0;
|
||||
w[24] = 0;
|
||||
w[25] = 0;
|
||||
w[26] = 0;
|
||||
w[27] = 0;
|
||||
w[28] = 0;
|
||||
w[29] = 0;
|
||||
w[30] = 0;
|
||||
w[31] = 0;
|
||||
|
||||
sha384_hmac_ctx_t hmac_ctx;
|
||||
|
||||
sha384_hmac_init (&hmac_ctx, w, 48);
|
||||
|
||||
sha384_hmac_update_global_swap (&hmac_ctx, esalt_bufs[DIGESTS_OFFSET].salt_buf, esalt_bufs[DIGESTS_OFFSET].salt_len);
|
||||
|
||||
sha384_hmac_final (&hmac_ctx);
|
||||
|
||||
const u32 r0 = l32_from_64 (hmac_ctx.opad.h[1]);
|
||||
const u32 r1 = h32_from_64 (hmac_ctx.opad.h[1]);
|
||||
const u32 r2 = l32_from_64 (hmac_ctx.opad.h[0]);
|
||||
const u32 r3 = h32_from_64 (hmac_ctx.opad.h[0]);
|
||||
|
||||
#define il_pos 0
|
||||
|
||||
#ifdef KERNEL_STATIC
|
||||
#include COMPARE_M
|
||||
#endif
|
||||
}
|
@ -0,0 +1,697 @@
|
||||
/**
|
||||
* Author......: See docs/credits.txt
|
||||
* License.....: MIT
|
||||
*/
|
||||
|
||||
//#define NEW_SIMD_CODE
|
||||
|
||||
#ifdef KERNEL_STATIC
|
||||
#include "inc_vendor.h"
|
||||
#include "inc_types.h"
|
||||
#include "inc_platform.cl"
|
||||
#include "inc_common.cl"
|
||||
#include "inc_rp.h"
|
||||
#include "inc_rp.cl"
|
||||
#include "inc_scalar.cl"
|
||||
#include "inc_hash_md4.cl"
|
||||
#endif
|
||||
|
||||
#define COMPARE_S "inc_comp_single.cl"
|
||||
#define COMPARE_M "inc_comp_multi.cl"
|
||||
|
||||
#define PERM_OP(a,b,tt,n,m) \
|
||||
{ \
|
||||
tt = a >> n; \
|
||||
tt = tt ^ b; \
|
||||
tt = tt & m; \
|
||||
b = b ^ tt; \
|
||||
tt = tt << n; \
|
||||
a = a ^ tt; \
|
||||
}
|
||||
|
||||
#define HPERM_OP(a,tt,n,m) \
|
||||
{ \
|
||||
tt = a << (16 + n); \
|
||||
tt = tt ^ a; \
|
||||
tt = tt & m; \
|
||||
a = a ^ tt; \
|
||||
tt = tt >> (16 + n); \
|
||||
a = a ^ tt; \
|
||||
}
|
||||
|
||||
CONSTANT_VK u32a c_SPtrans[8][64] =
|
||||
{
|
||||
{
|
||||
0x02080800, 0x00080000, 0x02000002, 0x02080802,
|
||||
0x02000000, 0x00080802, 0x00080002, 0x02000002,
|
||||
0x00080802, 0x02080800, 0x02080000, 0x00000802,
|
||||
0x02000802, 0x02000000, 0x00000000, 0x00080002,
|
||||
0x00080000, 0x00000002, 0x02000800, 0x00080800,
|
||||
0x02080802, 0x02080000, 0x00000802, 0x02000800,
|
||||
0x00000002, 0x00000800, 0x00080800, 0x02080002,
|
||||
0x00000800, 0x02000802, 0x02080002, 0x00000000,
|
||||
0x00000000, 0x02080802, 0x02000800, 0x00080002,
|
||||
0x02080800, 0x00080000, 0x00000802, 0x02000800,
|
||||
0x02080002, 0x00000800, 0x00080800, 0x02000002,
|
||||
0x00080802, 0x00000002, 0x02000002, 0x02080000,
|
||||
0x02080802, 0x00080800, 0x02080000, 0x02000802,
|
||||
0x02000000, 0x00000802, 0x00080002, 0x00000000,
|
||||
0x00080000, 0x02000000, 0x02000802, 0x02080800,
|
||||
0x00000002, 0x02080002, 0x00000800, 0x00080802,
|
||||
},
|
||||
{
|
||||
0x40108010, 0x00000000, 0x00108000, 0x40100000,
|
||||
0x40000010, 0x00008010, 0x40008000, 0x00108000,
|
||||
0x00008000, 0x40100010, 0x00000010, 0x40008000,
|
||||
0x00100010, 0x40108000, 0x40100000, 0x00000010,
|
||||
0x00100000, 0x40008010, 0x40100010, 0x00008000,
|
||||
0x00108010, 0x40000000, 0x00000000, 0x00100010,
|
||||
0x40008010, 0x00108010, 0x40108000, 0x40000010,
|
||||
0x40000000, 0x00100000, 0x00008010, 0x40108010,
|
||||
0x00100010, 0x40108000, 0x40008000, 0x00108010,
|
||||
0x40108010, 0x00100010, 0x40000010, 0x00000000,
|
||||
0x40000000, 0x00008010, 0x00100000, 0x40100010,
|
||||
0x00008000, 0x40000000, 0x00108010, 0x40008010,
|
||||
0x40108000, 0x00008000, 0x00000000, 0x40000010,
|
||||
0x00000010, 0x40108010, 0x00108000, 0x40100000,
|
||||
0x40100010, 0x00100000, 0x00008010, 0x40008000,
|
||||
0x40008010, 0x00000010, 0x40100000, 0x00108000,
|
||||
},
|
||||
{
|
||||
0x04000001, 0x04040100, 0x00000100, 0x04000101,
|
||||
0x00040001, 0x04000000, 0x04000101, 0x00040100,
|
||||
0x04000100, 0x00040000, 0x04040000, 0x00000001,
|
||||
0x04040101, 0x00000101, 0x00000001, 0x04040001,
|
||||
0x00000000, 0x00040001, 0x04040100, 0x00000100,
|
||||
0x00000101, 0x04040101, 0x00040000, 0x04000001,
|
||||
0x04040001, 0x04000100, 0x00040101, 0x04040000,
|
||||
0x00040100, 0x00000000, 0x04000000, 0x00040101,
|
||||
0x04040100, 0x00000100, 0x00000001, 0x00040000,
|
||||
0x00000101, 0x00040001, 0x04040000, 0x04000101,
|
||||
0x00000000, 0x04040100, 0x00040100, 0x04040001,
|
||||
0x00040001, 0x04000000, 0x04040101, 0x00000001,
|
||||
0x00040101, 0x04000001, 0x04000000, 0x04040101,
|
||||
0x00040000, 0x04000100, 0x04000101, 0x00040100,
|
||||
0x04000100, 0x00000000, 0x04040001, 0x00000101,
|
||||
0x04000001, 0x00040101, 0x00000100, 0x04040000,
|
||||
},
|
||||
{
|
||||
0x00401008, 0x10001000, 0x00000008, 0x10401008,
|
||||
0x00000000, 0x10400000, 0x10001008, 0x00400008,
|
||||
0x10401000, 0x10000008, 0x10000000, 0x00001008,
|
||||
0x10000008, 0x00401008, 0x00400000, 0x10000000,
|
||||
0x10400008, 0x00401000, 0x00001000, 0x00000008,
|
||||
0x00401000, 0x10001008, 0x10400000, 0x00001000,
|
||||
0x00001008, 0x00000000, 0x00400008, 0x10401000,
|
||||
0x10001000, 0x10400008, 0x10401008, 0x00400000,
|
||||
0x10400008, 0x00001008, 0x00400000, 0x10000008,
|
||||
0x00401000, 0x10001000, 0x00000008, 0x10400000,
|
||||
0x10001008, 0x00000000, 0x00001000, 0x00400008,
|
||||
0x00000000, 0x10400008, 0x10401000, 0x00001000,
|
||||
0x10000000, 0x10401008, 0x00401008, 0x00400000,
|
||||
0x10401008, 0x00000008, 0x10001000, 0x00401008,
|
||||
0x00400008, 0x00401000, 0x10400000, 0x10001008,
|
||||
0x00001008, 0x10000000, 0x10000008, 0x10401000,
|
||||
},
|
||||
{
|
||||
0x08000000, 0x00010000, 0x00000400, 0x08010420,
|
||||
0x08010020, 0x08000400, 0x00010420, 0x08010000,
|
||||
0x00010000, 0x00000020, 0x08000020, 0x00010400,
|
||||
0x08000420, 0x08010020, 0x08010400, 0x00000000,
|
||||
0x00010400, 0x08000000, 0x00010020, 0x00000420,
|
||||
0x08000400, 0x00010420, 0x00000000, 0x08000020,
|
||||
0x00000020, 0x08000420, 0x08010420, 0x00010020,
|
||||
0x08010000, 0x00000400, 0x00000420, 0x08010400,
|
||||
0x08010400, 0x08000420, 0x00010020, 0x08010000,
|
||||
0x00010000, 0x00000020, 0x08000020, 0x08000400,
|
||||
0x08000000, 0x00010400, 0x08010420, 0x00000000,
|
||||
0x00010420, 0x08000000, 0x00000400, 0x00010020,
|
||||
0x08000420, 0x00000400, 0x00000000, 0x08010420,
|
||||
0x08010020, 0x08010400, 0x00000420, 0x00010000,
|
||||
0x00010400, 0x08010020, 0x08000400, 0x00000420,
|
||||
0x00000020, 0x00010420, 0x08010000, 0x08000020,
|
||||
},
|
||||
{
|
||||
0x80000040, 0x00200040, 0x00000000, 0x80202000,
|
||||
0x00200040, 0x00002000, 0x80002040, 0x00200000,
|
||||
0x00002040, 0x80202040, 0x00202000, 0x80000000,
|
||||
0x80002000, 0x80000040, 0x80200000, 0x00202040,
|
||||
0x00200000, 0x80002040, 0x80200040, 0x00000000,
|
||||
0x00002000, 0x00000040, 0x80202000, 0x80200040,
|
||||
0x80202040, 0x80200000, 0x80000000, 0x00002040,
|
||||
0x00000040, 0x00202000, 0x00202040, 0x80002000,
|
||||
0x00002040, 0x80000000, 0x80002000, 0x00202040,
|
||||
0x80202000, 0x00200040, 0x00000000, 0x80002000,
|
||||
0x80000000, 0x00002000, 0x80200040, 0x00200000,
|
||||
0x00200040, 0x80202040, 0x00202000, 0x00000040,
|
||||
0x80202040, 0x00202000, 0x00200000, 0x80002040,
|
||||
0x80000040, 0x80200000, 0x00202040, 0x00000000,
|
||||
0x00002000, 0x80000040, 0x80002040, 0x80202000,
|
||||
0x80200000, 0x00002040, 0x00000040, 0x80200040,
|
||||
},
|
||||
{
|
||||
0x00004000, 0x00000200, 0x01000200, 0x01000004,
|
||||
0x01004204, 0x00004004, 0x00004200, 0x00000000,
|
||||
0x01000000, 0x01000204, 0x00000204, 0x01004000,
|
||||
0x00000004, 0x01004200, 0x01004000, 0x00000204,
|
||||
0x01000204, 0x00004000, 0x00004004, 0x01004204,
|
||||
0x00000000, 0x01000200, 0x01000004, 0x00004200,
|
||||
0x01004004, 0x00004204, 0x01004200, 0x00000004,
|
||||
0x00004204, 0x01004004, 0x00000200, 0x01000000,
|
||||
0x00004204, 0x01004000, 0x01004004, 0x00000204,
|
||||
0x00004000, 0x00000200, 0x01000000, 0x01004004,
|
||||
0x01000204, 0x00004204, 0x00004200, 0x00000000,
|
||||
0x00000200, 0x01000004, 0x00000004, 0x01000200,
|
||||
0x00000000, 0x01000204, 0x01000200, 0x00004200,
|
||||
0x00000204, 0x00004000, 0x01004204, 0x01000000,
|
||||
0x01004200, 0x00000004, 0x00004004, 0x01004204,
|
||||
0x01000004, 0x01004200, 0x01004000, 0x00004004,
|
||||
},
|
||||
{
|
||||
0x20800080, 0x20820000, 0x00020080, 0x00000000,
|
||||
0x20020000, 0x00800080, 0x20800000, 0x20820080,
|
||||
0x00000080, 0x20000000, 0x00820000, 0x00020080,
|
||||
0x00820080, 0x20020080, 0x20000080, 0x20800000,
|
||||
0x00020000, 0x00820080, 0x00800080, 0x20020000,
|
||||
0x20820080, 0x20000080, 0x00000000, 0x00820000,
|
||||
0x20000000, 0x00800000, 0x20020080, 0x20800080,
|
||||
0x00800000, 0x00020000, 0x20820000, 0x00000080,
|
||||
0x00800000, 0x00020000, 0x20000080, 0x20820080,
|
||||
0x00020080, 0x20000000, 0x00000000, 0x00820000,
|
||||
0x20800080, 0x20020080, 0x20020000, 0x00800080,
|
||||
0x20820000, 0x00000080, 0x00800080, 0x20020000,
|
||||
0x20820080, 0x00800000, 0x20800000, 0x20000080,
|
||||
0x00820000, 0x00020080, 0x20020080, 0x20800000,
|
||||
0x00000080, 0x20820000, 0x00820080, 0x00000000,
|
||||
0x20000000, 0x20800080, 0x00020000, 0x00820080,
|
||||
}
|
||||
};
|
||||
|
||||
CONSTANT_VK u32a c_skb[8][64] =
|
||||
{
|
||||
{
|
||||
0x00000000, 0x00000010, 0x20000000, 0x20000010,
|
||||
0x00010000, 0x00010010, 0x20010000, 0x20010010,
|
||||
0x00000800, 0x00000810, 0x20000800, 0x20000810,
|
||||
0x00010800, 0x00010810, 0x20010800, 0x20010810,
|
||||
0x00000020, 0x00000030, 0x20000020, 0x20000030,
|
||||
0x00010020, 0x00010030, 0x20010020, 0x20010030,
|
||||
0x00000820, 0x00000830, 0x20000820, 0x20000830,
|
||||
0x00010820, 0x00010830, 0x20010820, 0x20010830,
|
||||
0x00080000, 0x00080010, 0x20080000, 0x20080010,
|
||||
0x00090000, 0x00090010, 0x20090000, 0x20090010,
|
||||
0x00080800, 0x00080810, 0x20080800, 0x20080810,
|
||||
0x00090800, 0x00090810, 0x20090800, 0x20090810,
|
||||
0x00080020, 0x00080030, 0x20080020, 0x20080030,
|
||||
0x00090020, 0x00090030, 0x20090020, 0x20090030,
|
||||
0x00080820, 0x00080830, 0x20080820, 0x20080830,
|
||||
0x00090820, 0x00090830, 0x20090820, 0x20090830,
|
||||
},
|
||||
{
|
||||
0x00000000, 0x02000000, 0x00002000, 0x02002000,
|
||||
0x00200000, 0x02200000, 0x00202000, 0x02202000,
|
||||
0x00000004, 0x02000004, 0x00002004, 0x02002004,
|
||||
0x00200004, 0x02200004, 0x00202004, 0x02202004,
|
||||
0x00000400, 0x02000400, 0x00002400, 0x02002400,
|
||||
0x00200400, 0x02200400, 0x00202400, 0x02202400,
|
||||
0x00000404, 0x02000404, 0x00002404, 0x02002404,
|
||||
0x00200404, 0x02200404, 0x00202404, 0x02202404,
|
||||
0x10000000, 0x12000000, 0x10002000, 0x12002000,
|
||||
0x10200000, 0x12200000, 0x10202000, 0x12202000,
|
||||
0x10000004, 0x12000004, 0x10002004, 0x12002004,
|
||||
0x10200004, 0x12200004, 0x10202004, 0x12202004,
|
||||
0x10000400, 0x12000400, 0x10002400, 0x12002400,
|
||||
0x10200400, 0x12200400, 0x10202400, 0x12202400,
|
||||
0x10000404, 0x12000404, 0x10002404, 0x12002404,
|
||||
0x10200404, 0x12200404, 0x10202404, 0x12202404,
|
||||
},
|
||||
{
|
||||
0x00000000, 0x00000001, 0x00040000, 0x00040001,
|
||||
0x01000000, 0x01000001, 0x01040000, 0x01040001,
|
||||
0x00000002, 0x00000003, 0x00040002, 0x00040003,
|
||||
0x01000002, 0x01000003, 0x01040002, 0x01040003,
|
||||
0x00000200, 0x00000201, 0x00040200, 0x00040201,
|
||||
0x01000200, 0x01000201, 0x01040200, 0x01040201,
|
||||
0x00000202, 0x00000203, 0x00040202, 0x00040203,
|
||||
0x01000202, 0x01000203, 0x01040202, 0x01040203,
|
||||
0x08000000, 0x08000001, 0x08040000, 0x08040001,
|
||||
0x09000000, 0x09000001, 0x09040000, 0x09040001,
|
||||
0x08000002, 0x08000003, 0x08040002, 0x08040003,
|
||||
0x09000002, 0x09000003, 0x09040002, 0x09040003,
|
||||
0x08000200, 0x08000201, 0x08040200, 0x08040201,
|
||||
0x09000200, 0x09000201, 0x09040200, 0x09040201,
|
||||
0x08000202, 0x08000203, 0x08040202, 0x08040203,
|
||||
0x09000202, 0x09000203, 0x09040202, 0x09040203,
|
||||
},
|
||||
{
|
||||
0x00000000, 0x00100000, 0x00000100, 0x00100100,
|
||||
0x00000008, 0x00100008, 0x00000108, 0x00100108,
|
||||
0x00001000, 0x00101000, 0x00001100, 0x00101100,
|
||||
0x00001008, 0x00101008, 0x00001108, 0x00101108,
|
||||
0x04000000, 0x04100000, 0x04000100, 0x04100100,
|
||||
0x04000008, 0x04100008, 0x04000108, 0x04100108,
|
||||
0x04001000, 0x04101000, 0x04001100, 0x04101100,
|
||||
0x04001008, 0x04101008, 0x04001108, 0x04101108,
|
||||
0x00020000, 0x00120000, 0x00020100, 0x00120100,
|
||||
0x00020008, 0x00120008, 0x00020108, 0x00120108,
|
||||
0x00021000, 0x00121000, 0x00021100, 0x00121100,
|
||||
0x00021008, 0x00121008, 0x00021108, 0x00121108,
|
||||
0x04020000, 0x04120000, 0x04020100, 0x04120100,
|
||||
0x04020008, 0x04120008, 0x04020108, 0x04120108,
|
||||
0x04021000, 0x04121000, 0x04021100, 0x04121100,
|
||||
0x04021008, 0x04121008, 0x04021108, 0x04121108,
|
||||
},
|
||||
{
|
||||
0x00000000, 0x10000000, 0x00010000, 0x10010000,
|
||||
0x00000004, 0x10000004, 0x00010004, 0x10010004,
|
||||
0x20000000, 0x30000000, 0x20010000, 0x30010000,
|
||||
0x20000004, 0x30000004, 0x20010004, 0x30010004,
|
||||
0x00100000, 0x10100000, 0x00110000, 0x10110000,
|
||||
0x00100004, 0x10100004, 0x00110004, 0x10110004,
|
||||
0x20100000, 0x30100000, 0x20110000, 0x30110000,
|
||||
0x20100004, 0x30100004, 0x20110004, 0x30110004,
|
||||
0x00001000, 0x10001000, 0x00011000, 0x10011000,
|
||||
0x00001004, 0x10001004, 0x00011004, 0x10011004,
|
||||
0x20001000, 0x30001000, 0x20011000, 0x30011000,
|
||||
0x20001004, 0x30001004, 0x20011004, 0x30011004,
|
||||
0x00101000, 0x10101000, 0x00111000, 0x10111000,
|
||||
0x00101004, 0x10101004, 0x00111004, 0x10111004,
|
||||
0x20101000, 0x30101000, 0x20111000, 0x30111000,
|
||||
0x20101004, 0x30101004, 0x20111004, 0x30111004,
|
||||
},
|
||||
{
|
||||
0x00000000, 0x08000000, 0x00000008, 0x08000008,
|
||||
0x00000400, 0x08000400, 0x00000408, 0x08000408,
|
||||
0x00020000, 0x08020000, 0x00020008, 0x08020008,
|
||||
0x00020400, 0x08020400, 0x00020408, 0x08020408,
|
||||
0x00000001, 0x08000001, 0x00000009, 0x08000009,
|
||||
0x00000401, 0x08000401, 0x00000409, 0x08000409,
|
||||
0x00020001, 0x08020001, 0x00020009, 0x08020009,
|
||||
0x00020401, 0x08020401, 0x00020409, 0x08020409,
|
||||
0x02000000, 0x0A000000, 0x02000008, 0x0A000008,
|
||||
0x02000400, 0x0A000400, 0x02000408, 0x0A000408,
|
||||
0x02020000, 0x0A020000, 0x02020008, 0x0A020008,
|
||||
0x02020400, 0x0A020400, 0x02020408, 0x0A020408,
|
||||
0x02000001, 0x0A000001, 0x02000009, 0x0A000009,
|
||||
0x02000401, 0x0A000401, 0x02000409, 0x0A000409,
|
||||
0x02020001, 0x0A020001, 0x02020009, 0x0A020009,
|
||||
0x02020401, 0x0A020401, 0x02020409, 0x0A020409,
|
||||
},
|
||||
{
|
||||
0x00000000, 0x00000100, 0x00080000, 0x00080100,
|
||||
0x01000000, 0x01000100, 0x01080000, 0x01080100,
|
||||
0x00000010, 0x00000110, 0x00080010, 0x00080110,
|
||||
0x01000010, 0x01000110, 0x01080010, 0x01080110,
|
||||
0x00200000, 0x00200100, 0x00280000, 0x00280100,
|
||||
0x01200000, 0x01200100, 0x01280000, 0x01280100,
|
||||
0x00200010, 0x00200110, 0x00280010, 0x00280110,
|
||||
0x01200010, 0x01200110, 0x01280010, 0x01280110,
|
||||
0x00000200, 0x00000300, 0x00080200, 0x00080300,
|
||||
0x01000200, 0x01000300, 0x01080200, 0x01080300,
|
||||
0x00000210, 0x00000310, 0x00080210, 0x00080310,
|
||||
0x01000210, 0x01000310, 0x01080210, 0x01080310,
|
||||
0x00200200, 0x00200300, 0x00280200, 0x00280300,
|
||||
0x01200200, 0x01200300, 0x01280200, 0x01280300,
|
||||
0x00200210, 0x00200310, 0x00280210, 0x00280310,
|
||||
0x01200210, 0x01200310, 0x01280210, 0x01280310,
|
||||
},
|
||||
{
|
||||
0x00000000, 0x04000000, 0x00040000, 0x04040000,
|
||||
0x00000002, 0x04000002, 0x00040002, 0x04040002,
|
||||
0x00002000, 0x04002000, 0x00042000, 0x04042000,
|
||||
0x00002002, 0x04002002, 0x00042002, 0x04042002,
|
||||
0x00000020, 0x04000020, 0x00040020, 0x04040020,
|
||||
0x00000022, 0x04000022, 0x00040022, 0x04040022,
|
||||
0x00002020, 0x04002020, 0x00042020, 0x04042020,
|
||||
0x00002022, 0x04002022, 0x00042022, 0x04042022,
|
||||
0x00000800, 0x04000800, 0x00040800, 0x04040800,
|
||||
0x00000802, 0x04000802, 0x00040802, 0x04040802,
|
||||
0x00002800, 0x04002800, 0x00042800, 0x04042800,
|
||||
0x00002802, 0x04002802, 0x00042802, 0x04042802,
|
||||
0x00000820, 0x04000820, 0x00040820, 0x04040820,
|
||||
0x00000822, 0x04000822, 0x00040822, 0x04040822,
|
||||
0x00002820, 0x04002820, 0x00042820, 0x04042820,
|
||||
0x00002822, 0x04002822, 0x00042822, 0x04042822
|
||||
}
|
||||
};
|
||||
|
||||
#if VECT_SIZE == 1
|
||||
#define BOX(i,n,S) (S)[(n)][(i)]
|
||||
#elif VECT_SIZE == 2
|
||||
#define BOX(i,n,S) make_u32x ((S)[(n)][(i).s0], (S)[(n)][(i).s1])
|
||||
#elif VECT_SIZE == 4
|
||||
#define BOX(i,n,S) make_u32x ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3])
|
||||
#elif VECT_SIZE == 8
|
||||
#define BOX(i,n,S) make_u32x ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3], (S)[(n)][(i).s4], (S)[(n)][(i).s5], (S)[(n)][(i).s6], (S)[(n)][(i).s7])
|
||||
#elif VECT_SIZE == 16
|
||||
#define BOX(i,n,S) make_u32x ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3], (S)[(n)][(i).s4], (S)[(n)][(i).s5], (S)[(n)][(i).s6], (S)[(n)][(i).s7], (S)[(n)][(i).s8], (S)[(n)][(i).s9], (S)[(n)][(i).sa], (S)[(n)][(i).sb], (S)[(n)][(i).sc], (S)[(n)][(i).sd], (S)[(n)][(i).se], (S)[(n)][(i).sf])
|
||||
#endif
|
||||
|
||||
DECLSPEC void _des_crypt_encrypt (u32 *iv, u32 *data, u32 *Kc, u32 *Kd, SHM_TYPE u32 (*s_SPtrans)[64])
|
||||
{
|
||||
u32 r = data[0];
|
||||
u32 l = data[1];
|
||||
|
||||
#ifdef _unroll
|
||||
#pragma unroll
|
||||
#endif
|
||||
for (u32 i = 0; i < 16; i += 2)
|
||||
{
|
||||
u32 u;
|
||||
u32 t;
|
||||
|
||||
u = Kc[i + 0] ^ hc_rotl32 (r, 30u);
|
||||
t = Kd[i + 0] ^ hc_rotl32 (r, 26u);
|
||||
|
||||
l ^= BOX (((u >> 0) & 0x3f), 0, s_SPtrans)
|
||||
| BOX (((u >> 8) & 0x3f), 2, s_SPtrans)
|
||||
| BOX (((u >> 16) & 0x3f), 4, s_SPtrans)
|
||||
| BOX (((u >> 24) & 0x3f), 6, s_SPtrans)
|
||||
| BOX (((t >> 0) & 0x3f), 1, s_SPtrans)
|
||||
| BOX (((t >> 8) & 0x3f), 3, s_SPtrans)
|
||||
| BOX (((t >> 16) & 0x3f), 5, s_SPtrans)
|
||||
| BOX (((t >> 24) & 0x3f), 7, s_SPtrans);
|
||||
|
||||
u = Kc[i + 1] ^ hc_rotl32 (l, 30u);
|
||||
t = Kd[i + 1] ^ hc_rotl32 (l, 26u);
|
||||
|
||||
r ^= BOX (((u >> 0) & 0x3f), 0, s_SPtrans)
|
||||
| BOX (((u >> 8) & 0x3f), 2, s_SPtrans)
|
||||
| BOX (((u >> 16) & 0x3f), 4, s_SPtrans)
|
||||
| BOX (((u >> 24) & 0x3f), 6, s_SPtrans)
|
||||
| BOX (((t >> 0) & 0x3f), 1, s_SPtrans)
|
||||
| BOX (((t >> 8) & 0x3f), 3, s_SPtrans)
|
||||
| BOX (((t >> 16) & 0x3f), 5, s_SPtrans)
|
||||
| BOX (((t >> 24) & 0x3f), 7, s_SPtrans);
|
||||
}
|
||||
|
||||
iv[0] = l;
|
||||
iv[1] = r;
|
||||
}
|
||||
|
||||
DECLSPEC void _des_crypt_keysetup (u32 c, u32 d, u32 *Kc, u32 *Kd, SHM_TYPE u32 (*s_skb)[64])
|
||||
{
|
||||
u32 tt;
|
||||
|
||||
PERM_OP (d, c, tt, 4, 0x0f0f0f0f);
|
||||
HPERM_OP (c, tt, 2, 0xcccc0000);
|
||||
HPERM_OP (d, tt, 2, 0xcccc0000);
|
||||
PERM_OP (d, c, tt, 1, 0x55555555);
|
||||
PERM_OP (c, d, tt, 8, 0x00ff00ff);
|
||||
PERM_OP (d, c, tt, 1, 0x55555555);
|
||||
|
||||
d = ((d & 0x000000ff) << 16)
|
||||
| ((d & 0x0000ff00) << 0)
|
||||
| ((d & 0x00ff0000) >> 16)
|
||||
| ((c & 0xf0000000) >> 4);
|
||||
|
||||
c = c & 0x0fffffff;
|
||||
|
||||
#ifdef _unroll
|
||||
#pragma unroll
|
||||
#endif
|
||||
for (u32 i = 0; i < 16; i++)
|
||||
{
|
||||
if ((i < 2) || (i == 8) || (i == 15))
|
||||
{
|
||||
c = ((c >> 1) | (c << 27));
|
||||
d = ((d >> 1) | (d << 27));
|
||||
}
|
||||
else
|
||||
{
|
||||
c = ((c >> 2) | (c << 26));
|
||||
d = ((d >> 2) | (d << 26));
|
||||
}
|
||||
|
||||
c = c & 0x0fffffff;
|
||||
d = d & 0x0fffffff;
|
||||
|
||||
const u32 c00 = (c >> 0) & 0x0000003f;
|
||||
const u32 c06 = (c >> 6) & 0x00383003;
|
||||
const u32 c07 = (c >> 7) & 0x0000003c;
|
||||
const u32 c13 = (c >> 13) & 0x0000060f;
|
||||
const u32 c20 = (c >> 20) & 0x00000001;
|
||||
|
||||
u32 s = BOX (((c00 >> 0) & 0xff), 0, s_skb)
|
||||
| BOX (((c06 >> 0) & 0xff)
|
||||
|((c07 >> 0) & 0xff), 1, s_skb)
|
||||
| BOX (((c13 >> 0) & 0xff)
|
||||
|((c06 >> 8) & 0xff), 2, s_skb)
|
||||
| BOX (((c20 >> 0) & 0xff)
|
||||
|((c13 >> 8) & 0xff)
|
||||
|((c06 >> 16) & 0xff), 3, s_skb);
|
||||
|
||||
const u32 d00 = (d >> 0) & 0x00003c3f;
|
||||
const u32 d07 = (d >> 7) & 0x00003f03;
|
||||
const u32 d21 = (d >> 21) & 0x0000000f;
|
||||
const u32 d22 = (d >> 22) & 0x00000030;
|
||||
|
||||
u32 t = BOX (((d00 >> 0) & 0xff), 4, s_skb)
|
||||
| BOX (((d07 >> 0) & 0xff)
|
||||
|((d00 >> 8) & 0xff), 5, s_skb)
|
||||
| BOX (((d07 >> 8) & 0xff), 6, s_skb)
|
||||
| BOX (((d21 >> 0) & 0xff)
|
||||
|((d22 >> 0) & 0xff), 7, s_skb);
|
||||
|
||||
Kc[i] = ((t << 16) | (s & 0x0000ffff));
|
||||
Kd[i] = ((s >> 16) | (t & 0xffff0000));
|
||||
}
|
||||
}
|
||||
|
||||
DECLSPEC void transform_netntlmv1_key (const u32 w0, const u32 w1, u32 *out)
|
||||
{
|
||||
u32 t[8];
|
||||
|
||||
t[0] = (w0 >> 0) & 0xff;
|
||||
t[1] = (w0 >> 8) & 0xff;
|
||||
t[2] = (w0 >> 16) & 0xff;
|
||||
t[3] = (w0 >> 24) & 0xff;
|
||||
t[4] = (w1 >> 0) & 0xff;
|
||||
t[5] = (w1 >> 8) & 0xff;
|
||||
t[6] = (w1 >> 16) & 0xff;
|
||||
t[7] = (w1 >> 24) & 0xff;
|
||||
|
||||
u32 k[8];
|
||||
|
||||
k[0] = (t[0] >> 0);
|
||||
k[1] = (t[0] << 7) | (t[1] >> 1);
|
||||
k[2] = (t[1] << 6) | (t[2] >> 2);
|
||||
k[3] = (t[2] << 5) | (t[3] >> 3);
|
||||
k[4] = (t[3] << 4) | (t[4] >> 4);
|
||||
k[5] = (t[4] << 3) | (t[5] >> 5);
|
||||
k[6] = (t[5] << 2) | (t[6] >> 6);
|
||||
k[7] = (t[6] << 1);
|
||||
|
||||
out[0] = ((k[0] & 0xff) << 0)
|
||||
| ((k[1] & 0xff) << 8)
|
||||
| ((k[2] & 0xff) << 16)
|
||||
| ((k[3] & 0xff) << 24);
|
||||
|
||||
out[1] = ((k[4] & 0xff) << 0)
|
||||
| ((k[5] & 0xff) << 8)
|
||||
| ((k[6] & 0xff) << 16)
|
||||
| ((k[7] & 0xff) << 24);
|
||||
}
|
||||
|
||||
#ifdef KERNEL_STATIC
|
||||
DECLSPEC u8 hex_convert (const u8 c)
|
||||
{
|
||||
return (c & 15) + (c >> 6) * 9;
|
||||
}
|
||||
|
||||
DECLSPEC u8 hex_to_u8 (const u8 *hex)
|
||||
{
|
||||
u8 v = 0;
|
||||
|
||||
v |= ((u8) hex_convert (hex[1]) << 0);
|
||||
v |= ((u8) hex_convert (hex[0]) << 4);
|
||||
|
||||
return (v);
|
||||
}
|
||||
#endif
|
||||
|
||||
typedef struct netntlm
|
||||
{
|
||||
u32 user_len;
|
||||
u32 domain_len;
|
||||
u32 srvchall_len;
|
||||
u32 clichall_len;
|
||||
|
||||
u32 userdomain_buf[64];
|
||||
u32 chall_buf[256];
|
||||
|
||||
} netntlm_t;
|
||||
|
||||
typedef struct netntlm_tmp
|
||||
{
|
||||
u32 digest_buf[4];
|
||||
|
||||
} netntlm_tmp_t;
|
||||
|
||||
KERNEL_FQ void m27000_init (KERN_ATTR_TMPS_ESALT (netntlm_tmp_t, netntlm_t))
|
||||
{
|
||||
/**
|
||||
* modifier
|
||||
*/
|
||||
|
||||
const u64 gid = get_global_id (0);
|
||||
const u64 lid = get_local_id (0);
|
||||
const u64 lsz = get_local_size (0);
|
||||
|
||||
if (gid >= gid_max) return;
|
||||
|
||||
/**
|
||||
* salt
|
||||
*/
|
||||
|
||||
u32 in[16];
|
||||
|
||||
in[ 0] = pws[gid].i[ 0];
|
||||
in[ 1] = pws[gid].i[ 1];
|
||||
in[ 2] = pws[gid].i[ 2];
|
||||
in[ 3] = pws[gid].i[ 3];
|
||||
in[ 4] = pws[gid].i[ 4];
|
||||
in[ 5] = pws[gid].i[ 5];
|
||||
in[ 6] = pws[gid].i[ 6];
|
||||
in[ 7] = pws[gid].i[ 7];
|
||||
|
||||
u8 *in_ptr = (u8 *) in;
|
||||
|
||||
u32 out[4];
|
||||
|
||||
u8 *out_ptr = (u8 *) out;
|
||||
|
||||
for (int i = 0, j = 0; i < 16; i += 1, j += 2)
|
||||
{
|
||||
out_ptr[i] = hex_to_u8 (in_ptr + j);
|
||||
}
|
||||
|
||||
tmps[gid].digest_buf[0] = out[ 0];
|
||||
tmps[gid].digest_buf[1] = out[ 1];
|
||||
tmps[gid].digest_buf[2] = out[ 2];
|
||||
tmps[gid].digest_buf[3] = out[ 3];
|
||||
|
||||
}
|
||||
|
||||
KERNEL_FQ void m27000_loop (KERN_ATTR_TMPS_ESALT (netntlm_tmp_t, netntlm_t))
|
||||
{
|
||||
|
||||
}
|
||||
|
||||
KERNEL_FQ void m27000_comp (KERN_ATTR_TMPS_ESALT (netntlm_tmp_t, netntlm_t))
|
||||
{
|
||||
/**
|
||||
* modifier
|
||||
*/
|
||||
|
||||
const u64 gid = get_global_id (0);
|
||||
const u64 lid = get_local_id (0);
|
||||
const u64 lsz = get_local_size (0);
|
||||
|
||||
/**
|
||||
* sbox, kbox
|
||||
*/
|
||||
|
||||
#ifdef REAL_SHM
|
||||
|
||||
LOCAL_VK u32 s_SPtrans[8][64];
|
||||
LOCAL_VK u32 s_skb[8][64];
|
||||
|
||||
for (u32 i = lid; i < 64; i += lsz)
|
||||
{
|
||||
s_SPtrans[0][i] = c_SPtrans[0][i];
|
||||
s_SPtrans[1][i] = c_SPtrans[1][i];
|
||||
s_SPtrans[2][i] = c_SPtrans[2][i];
|
||||
s_SPtrans[3][i] = c_SPtrans[3][i];
|
||||
s_SPtrans[4][i] = c_SPtrans[4][i];
|
||||
s_SPtrans[5][i] = c_SPtrans[5][i];
|
||||
s_SPtrans[6][i] = c_SPtrans[6][i];
|
||||
s_SPtrans[7][i] = c_SPtrans[7][i];
|
||||
|
||||
s_skb[0][i] = c_skb[0][i];
|
||||
s_skb[1][i] = c_skb[1][i];
|
||||
s_skb[2][i] = c_skb[2][i];
|
||||
s_skb[3][i] = c_skb[3][i];
|
||||
s_skb[4][i] = c_skb[4][i];
|
||||
s_skb[5][i] = c_skb[5][i];
|
||||
s_skb[6][i] = c_skb[6][i];
|
||||
s_skb[7][i] = c_skb[7][i];
|
||||
}
|
||||
|
||||
SYNC_THREADS ();
|
||||
|
||||
#else
|
||||
|
||||
CONSTANT_AS u32a (*s_SPtrans)[64] = c_SPtrans;
|
||||
CONSTANT_AS u32a (*s_skb)[64] = c_skb;
|
||||
|
||||
#endif
|
||||
|
||||
if (gid >= gid_max) return;
|
||||
|
||||
/**
|
||||
* base
|
||||
*/
|
||||
|
||||
const u32 s0 = salt_bufs[SALT_POS].salt_buf[0];
|
||||
const u32 s1 = salt_bufs[SALT_POS].salt_buf[1];
|
||||
const u32 s2 = salt_bufs[SALT_POS].salt_buf[2];
|
||||
|
||||
const u32 a = tmps[gid].digest_buf[0];
|
||||
const u32 b = tmps[gid].digest_buf[1];
|
||||
const u32 c = tmps[gid].digest_buf[2];
|
||||
const u32 d = tmps[gid].digest_buf[3];
|
||||
|
||||
// I believe this matches the last 2 bytes and throws away.
|
||||
// Taken from 5500.
|
||||
if ((d >> 16) != s2) return;
|
||||
|
||||
/**
|
||||
* DES1
|
||||
*/
|
||||
|
||||
u32 key[2];
|
||||
|
||||
transform_netntlmv1_key (a, b, key);
|
||||
|
||||
u32 Kc[16];
|
||||
u32 Kd[16];
|
||||
|
||||
_des_crypt_keysetup (key[0], key[1], Kc, Kd, s_skb);
|
||||
|
||||
u32 data[2];
|
||||
|
||||
data[0] = s0;
|
||||
data[1] = s1;
|
||||
|
||||
u32 out1[2];
|
||||
|
||||
_des_crypt_encrypt (out1, data, Kc, Kd, s_SPtrans);
|
||||
|
||||
/**
|
||||
* DES2
|
||||
*/
|
||||
|
||||
transform_netntlmv1_key (((b >> 24) | (c << 8)), ((c >> 24) | (d << 8)), key);
|
||||
|
||||
_des_crypt_keysetup (key[0], key[1], Kc, Kd, s_skb);
|
||||
|
||||
u32 out2[2];
|
||||
|
||||
_des_crypt_encrypt (out2, data, Kc, Kd, s_SPtrans);
|
||||
|
||||
/**
|
||||
* digest
|
||||
*/
|
||||
|
||||
const u32 r0 = out1[0];
|
||||
const u32 r1 = out1[1];
|
||||
const u32 r2 = out2[0];
|
||||
const u32 r3 = out2[1];
|
||||
|
||||
#define il_pos 0
|
||||
|
||||
#ifdef KERNEL_STATIC
|
||||
#include COMPARE_M
|
||||
#endif
|
||||
}
|
@ -0,0 +1,197 @@
|
||||
/**
|
||||
* Author......: See docs/credits.txt
|
||||
* License.....: MIT
|
||||
*/
|
||||
|
||||
// #define NEW_SIMD_CODE
|
||||
|
||||
#ifdef KERNEL_STATIC
|
||||
#include "inc_vendor.h"
|
||||
#include "inc_types.h"
|
||||
#include "inc_platform.cl"
|
||||
#include "inc_common.cl"
|
||||
#include "inc_rp.h"
|
||||
#include "inc_rp.cl"
|
||||
#include "inc_scalar.cl"
|
||||
#include "inc_hash_md4.cl"
|
||||
#include "inc_hash_md5.cl"
|
||||
#endif
|
||||
|
||||
#define COMPARE_S "inc_comp_single.cl"
|
||||
#define COMPARE_M "inc_comp_multi.cl"
|
||||
|
||||
#ifdef KERNEL_STATIC
|
||||
DECLSPEC u8 hex_convert (const u8 c)
|
||||
{
|
||||
return (c & 15) + (c >> 6) * 9;
|
||||
}
|
||||
|
||||
DECLSPEC u8 hex_to_u8 (const u8 *hex)
|
||||
{
|
||||
u8 v = 0;
|
||||
|
||||
v |= ((u8) hex_convert (hex[1]) << 0);
|
||||
v |= ((u8) hex_convert (hex[0]) << 4);
|
||||
|
||||
return (v);
|
||||
}
|
||||
#endif
|
||||
|
||||
typedef struct netntlm
|
||||
{
|
||||
u32 user_len;
|
||||
u32 domain_len;
|
||||
u32 srvchall_len;
|
||||
u32 clichall_len;
|
||||
|
||||
u32 userdomain_buf[64];
|
||||
u32 chall_buf[256];
|
||||
|
||||
} netntlm_t;
|
||||
|
||||
typedef struct netntlmv2_tmp
|
||||
{
|
||||
u32 digest_buf[4];
|
||||
|
||||
} netntlm_tmp_t;
|
||||
|
||||
|
||||
KERNEL_FQ void m27100_init (KERN_ATTR_TMPS_ESALT (netntlm_tmp_t, netntlm_t))
|
||||
{
|
||||
/**
|
||||
* modifier
|
||||
*/
|
||||
|
||||
const u64 lid = get_local_id (0);
|
||||
const u64 gid = get_global_id (0);
|
||||
|
||||
if (gid >= gid_max) return;
|
||||
|
||||
/**
|
||||
* base
|
||||
*/
|
||||
|
||||
u32 in[16];
|
||||
|
||||
in[ 0] = pws[gid].i[ 0];
|
||||
in[ 1] = pws[gid].i[ 1];
|
||||
in[ 2] = pws[gid].i[ 2];
|
||||
in[ 3] = pws[gid].i[ 3];
|
||||
in[ 4] = pws[gid].i[ 4];
|
||||
in[ 5] = pws[gid].i[ 5];
|
||||
in[ 6] = pws[gid].i[ 6];
|
||||
in[ 7] = pws[gid].i[ 7];
|
||||
|
||||
u8 *in_ptr = (u8 *) in;
|
||||
|
||||
u32 out[4];
|
||||
|
||||
u8 *out_ptr = (u8 *) out;
|
||||
|
||||
for (int i = 0, j = 0; i < 16; i += 1, j += 2)
|
||||
{
|
||||
out_ptr[i] = hex_to_u8 (in_ptr + j);
|
||||
}
|
||||
|
||||
tmps[gid].digest_buf[0] = out[ 0];
|
||||
tmps[gid].digest_buf[1] = out[ 1];
|
||||
tmps[gid].digest_buf[2] = out[ 2];
|
||||
tmps[gid].digest_buf[3] = out[ 3];
|
||||
|
||||
}
|
||||
|
||||
|
||||
KERNEL_FQ void m27100_loop (KERN_ATTR_TMPS_ESALT (netntlm_tmp_t, netntlm_t))
|
||||
{
|
||||
|
||||
|
||||
}
|
||||
|
||||
KERNEL_FQ void m27100_comp (KERN_ATTR_TMPS_ESALT (netntlm_tmp_t, netntlm_t))
|
||||
{
|
||||
/**
|
||||
* modifier
|
||||
*/
|
||||
|
||||
const u64 gid = get_global_id (0);
|
||||
|
||||
if (gid >= gid_max) return;
|
||||
|
||||
const u64 lid = get_local_id (0);
|
||||
|
||||
u32 w0[4];
|
||||
u32 w1[4];
|
||||
u32 w2[4];
|
||||
u32 w3[4];
|
||||
|
||||
w0[0] = tmps[gid].digest_buf[0];
|
||||
w0[1] = tmps[gid].digest_buf[1];
|
||||
w0[2] = tmps[gid].digest_buf[2];
|
||||
w0[3] = tmps[gid].digest_buf[3];
|
||||
w1[0] = 0;
|
||||
w1[1] = 0;
|
||||
w1[2] = 0;
|
||||
w1[3] = 0;
|
||||
w2[0] = 0;
|
||||
w2[1] = 0;
|
||||
w2[2] = 0;
|
||||
w2[3] = 0;
|
||||
w3[0] = 0;
|
||||
w3[1] = 0;
|
||||
w3[2] = 0;
|
||||
w3[3] = 0;
|
||||
|
||||
md5_hmac_ctx_t ctx0;
|
||||
|
||||
md5_hmac_init_64 (&ctx0, w0, w1, w2, w3);
|
||||
|
||||
md5_hmac_update_global (&ctx0, esalt_bufs[DIGESTS_OFFSET].userdomain_buf, esalt_bufs[DIGESTS_OFFSET].user_len + esalt_bufs[DIGESTS_OFFSET].domain_len);
|
||||
|
||||
md5_hmac_final (&ctx0);
|
||||
|
||||
w0[0] = ctx0.opad.h[0];
|
||||
w0[1] = ctx0.opad.h[1];
|
||||
w0[2] = ctx0.opad.h[2];
|
||||
w0[3] = ctx0.opad.h[3];
|
||||
w1[0] = 0;
|
||||
w1[1] = 0;
|
||||
w1[2] = 0;
|
||||
w1[3] = 0;
|
||||
w2[0] = 0;
|
||||
w2[1] = 0;
|
||||
w2[2] = 0;
|
||||
w2[3] = 0;
|
||||
w3[0] = 0;
|
||||
w3[1] = 0;
|
||||
w3[2] = 0;
|
||||
w3[3] = 0;
|
||||
|
||||
md5_hmac_ctx_t ctx;
|
||||
|
||||
md5_hmac_init_64 (&ctx, w0, w1, w2, w3);
|
||||
|
||||
md5_hmac_update_global (&ctx, esalt_bufs[DIGESTS_OFFSET].chall_buf, esalt_bufs[DIGESTS_OFFSET].srvchall_len + esalt_bufs[DIGESTS_OFFSET].clichall_len);
|
||||
|
||||
md5_hmac_final (&ctx);
|
||||
|
||||
tmps[gid].digest_buf[0] = ctx.opad.h[0];
|
||||
tmps[gid].digest_buf[1] = ctx.opad.h[1];
|
||||
tmps[gid].digest_buf[2] = ctx.opad.h[2];
|
||||
tmps[gid].digest_buf[3] = ctx.opad.h[3];
|
||||
|
||||
|
||||
/**
|
||||
* digest
|
||||
*/
|
||||
|
||||
const u32 r0 = ctx.opad.h[DGST_R0];
|
||||
const u32 r1 = ctx.opad.h[DGST_R1];
|
||||
const u32 r2 = ctx.opad.h[DGST_R2];
|
||||
const u32 r3 = ctx.opad.h[DGST_R3];
|
||||
|
||||
#define il_pos 0
|
||||
|
||||
#ifdef KERNEL_STATIC
|
||||
#include COMPARE_M
|
||||
#endif
|
||||
}
|
@ -0,0 +1,513 @@
|
||||
/**
|
||||
* Author......: See docs/credits.txt
|
||||
* License.....: MIT
|
||||
*/
|
||||
|
||||
#define NEW_SIMD_CODE
|
||||
|
||||
#ifdef KERNEL_STATIC
|
||||
#include "inc_vendor.h"
|
||||
#include "inc_types.h"
|
||||
#include "inc_platform.cl"
|
||||
#include "inc_common.cl"
|
||||
#include "inc_rp_optimized.h"
|
||||
#include "inc_rp_optimized.cl"
|
||||
#include "inc_simd.cl"
|
||||
#include "inc_hash_sha1.cl"
|
||||
#endif
|
||||
|
||||
KERNEL_FQ void m27200_m04 (KERN_ATTR_RULES ())
|
||||
{
|
||||
/**
|
||||
* modifier
|
||||
*/
|
||||
|
||||
const u64 lid = get_local_id (0);
|
||||
|
||||
/**
|
||||
* base
|
||||
*/
|
||||
|
||||
const u64 gid = get_global_id (0);
|
||||
|
||||
if (gid >= gid_max) return;
|
||||
|
||||
u32 pw_buf0[4];
|
||||
u32 pw_buf1[4];
|
||||
|
||||
pw_buf0[0] = pws[gid].i[0];
|
||||
pw_buf0[1] = pws[gid].i[1];
|
||||
pw_buf0[2] = pws[gid].i[2];
|
||||
pw_buf0[3] = pws[gid].i[3];
|
||||
pw_buf1[0] = pws[gid].i[4];
|
||||
pw_buf1[1] = pws[gid].i[5];
|
||||
pw_buf1[2] = pws[gid].i[6];
|
||||
pw_buf1[3] = pws[gid].i[7];
|
||||
|
||||
const u32 pw_len = pws[gid].pw_len & 63;
|
||||
|
||||
/**
|
||||
* salt
|
||||
*/
|
||||
|
||||
u32 salt_buf0[4] = { 0x2d2d, 0, 0, 0 };
|
||||
u32 salt_buf1[4];
|
||||
u32 salt_buf2[4];
|
||||
u32 salt_buf3[4] = { 0 };
|
||||
|
||||
salt_buf0[0] |= salt_bufs[SALT_POS].salt_buf[ 0] << 16;
|
||||
salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[ 0] >> 16 | salt_bufs[SALT_POS].salt_buf[ 1] << 16;
|
||||
salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[ 1] >> 16 | salt_bufs[SALT_POS].salt_buf[ 2] << 16;
|
||||
salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[ 2] >> 16 | salt_bufs[SALT_POS].salt_buf[ 3] << 16;
|
||||
salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[ 3] >> 16 | salt_bufs[SALT_POS].salt_buf[ 4] << 16;
|
||||
salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[ 4] >> 16 | salt_bufs[SALT_POS].salt_buf[ 5] << 16;
|
||||
salt_buf1[2] = salt_bufs[SALT_POS].salt_buf[ 5] >> 16 | salt_bufs[SALT_POS].salt_buf[ 6] << 16;
|
||||
salt_buf1[3] = salt_bufs[SALT_POS].salt_buf[ 6] >> 16 | salt_bufs[SALT_POS].salt_buf[ 7] << 16;
|
||||
salt_buf2[0] = salt_bufs[SALT_POS].salt_buf[ 7] >> 16 | salt_bufs[SALT_POS].salt_buf[ 8] << 16;
|
||||
salt_buf2[1] = salt_bufs[SALT_POS].salt_buf[ 8] >> 16 | salt_bufs[SALT_POS].salt_buf[ 9] << 16;
|
||||
salt_buf2[2] = salt_bufs[SALT_POS].salt_buf[ 9] >> 16 | 0x2d2d0000;
|
||||
salt_buf2[3] = 0;
|
||||
|
||||
const u32 salt_len = 44; //salt_bufs[SALT_POS].salt_len;
|
||||
|
||||
/**
|
||||
* digest
|
||||
*/
|
||||
|
||||
const u32 search[4] =
|
||||
{
|
||||
digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0],
|
||||
digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1],
|
||||
digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2],
|
||||
digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3]
|
||||
};
|
||||
|
||||
/**
|
||||
* loop
|
||||
*/
|
||||
|
||||
for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE)
|
||||
{
|
||||
u32x w0[4] = { 0 };
|
||||
u32x w1[4] = { 0 };
|
||||
u32x w2[4] = { 0 };
|
||||
u32x w3[4] = { 0 };
|
||||
|
||||
const u32x out_len = apply_rules_vect_optimized (pw_buf0, pw_buf1, pw_len, rules_buf, il_pos, w0, w1);
|
||||
|
||||
/**
|
||||
* prepend salt
|
||||
*/
|
||||
|
||||
const u32x out_salt_len = out_len + salt_len;
|
||||
|
||||
switch_buffer_by_offset_le_VV (w0, w1, w2, w3, salt_len);
|
||||
|
||||
w0[0] |= salt_buf0[0];
|
||||
w0[1] |= salt_buf0[1];
|
||||
w0[2] |= salt_buf0[2];
|
||||
w0[3] |= salt_buf0[3];
|
||||
w1[0] |= salt_buf1[0];
|
||||
w1[1] |= salt_buf1[1];
|
||||
w1[2] |= salt_buf1[2];
|
||||
w1[3] |= salt_buf1[3];
|
||||
w2[0] |= salt_buf2[0];
|
||||
w2[1] |= salt_buf2[1];
|
||||
w2[2] |= salt_buf2[2];
|
||||
w2[3] |= salt_buf2[3];
|
||||
w3[0] |= salt_buf3[0];
|
||||
w3[1] |= salt_buf3[1];
|
||||
w3[2] |= salt_buf3[2];
|
||||
w3[3] |= salt_buf3[3];
|
||||
|
||||
append_0x2d_4x4_VV (w0, w1, w2, w3, out_salt_len);
|
||||
append_0x2d_4x4_VV (w0, w1, w2, w3, out_salt_len + 1);
|
||||
append_0x80_4x4_VV (w0, w1, w2, w3, out_salt_len + 2);
|
||||
|
||||
/**
|
||||
* sha1
|
||||
*/
|
||||
|
||||
u32x w0_t = hc_swap32 (w0[0]);
|
||||
u32x w1_t = hc_swap32 (w0[1]);
|
||||
u32x w2_t = hc_swap32 (w0[2]);
|
||||
u32x w3_t = hc_swap32 (w0[3]);
|
||||
u32x w4_t = hc_swap32 (w1[0]);
|
||||
u32x w5_t = hc_swap32 (w1[1]);
|
||||
u32x w6_t = hc_swap32 (w1[2]);
|
||||
u32x w7_t = hc_swap32 (w1[3]);
|
||||
u32x w8_t = hc_swap32 (w2[0]);
|
||||
u32x w9_t = hc_swap32 (w2[1]);
|
||||
u32x wa_t = hc_swap32 (w2[2]);
|
||||
u32x wb_t = hc_swap32 (w2[3]);
|
||||
u32x wc_t = hc_swap32 (w3[0]);
|
||||
u32x wd_t = hc_swap32 (w3[1]);
|
||||
u32x we_t = 0;
|
||||
u32x wf_t = (out_salt_len + 2) * 8;
|
||||
|
||||
u32x a = SHA1M_A;
|
||||
u32x b = SHA1M_B;
|
||||
u32x c = SHA1M_C;
|
||||
u32x d = SHA1M_D;
|
||||
u32x e = SHA1M_E;
|
||||
|
||||
#undef K
|
||||
#define K SHA1C00
|
||||
|
||||
SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0_t);
|
||||
SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1_t);
|
||||
SHA1_STEP (SHA1_F0o, d, e, a, b, c, w2_t);
|
||||
SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3_t);
|
||||
SHA1_STEP (SHA1_F0o, b, c, d, e, a, w4_t);
|
||||
SHA1_STEP (SHA1_F0o, a, b, c, d, e, w5_t);
|
||||
SHA1_STEP (SHA1_F0o, e, a, b, c, d, w6_t);
|
||||
SHA1_STEP (SHA1_F0o, d, e, a, b, c, w7_t);
|
||||
SHA1_STEP (SHA1_F0o, c, d, e, a, b, w8_t);
|
||||
SHA1_STEP (SHA1_F0o, b, c, d, e, a, w9_t);
|
||||
SHA1_STEP (SHA1_F0o, a, b, c, d, e, wa_t);
|
||||
SHA1_STEP (SHA1_F0o, e, a, b, c, d, wb_t);
|
||||
SHA1_STEP (SHA1_F0o, d, e, a, b, c, wc_t);
|
||||
SHA1_STEP (SHA1_F0o, c, d, e, a, b, wd_t);
|
||||
SHA1_STEP (SHA1_F0o, b, c, d, e, a, we_t);
|
||||
SHA1_STEP (SHA1_F0o, a, b, c, d, e, wf_t);
|
||||
w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t);
|
||||
w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1_t);
|
||||
w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2_t);
|
||||
w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3_t);
|
||||
|
||||
#undef K
|
||||
#define K SHA1C01
|
||||
|
||||
w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w4_t);
|
||||
w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w5_t);
|
||||
w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w6_t);
|
||||
w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w7_t);
|
||||
w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w8_t);
|
||||
w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w9_t);
|
||||
wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wa_t);
|
||||
wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wb_t);
|
||||
wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wc_t);
|
||||
wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wd_t);
|
||||
we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, we_t);
|
||||
wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wf_t);
|
||||
w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t);
|
||||
w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t);
|
||||
w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t);
|
||||
w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t);
|
||||
w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w4_t);
|
||||
w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w5_t);
|
||||
w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w6_t);
|
||||
w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w7_t);
|
||||
|
||||
#undef K
|
||||
#define K SHA1C02
|
||||
|
||||
w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w8_t);
|
||||
w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w9_t);
|
||||
wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wa_t);
|
||||
wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wb_t);
|
||||
wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wc_t);
|
||||
wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, wd_t);
|
||||
we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, we_t);
|
||||
wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wf_t);
|
||||
w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0_t);
|
||||
w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1_t);
|
||||
w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2_t);
|
||||
w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3_t);
|
||||
w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w4_t);
|
||||
w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w5_t);
|
||||
w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w6_t);
|
||||
w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w7_t);
|
||||
w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w8_t);
|
||||
w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w9_t);
|
||||
wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wa_t);
|
||||
wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wb_t);
|
||||
|
||||
#undef K
|
||||
#define K SHA1C03
|
||||
|
||||
wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wc_t);
|
||||
wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wd_t);
|
||||
we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, we_t);
|
||||
wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wf_t);
|
||||
w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t);
|
||||
w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t);
|
||||
w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w2_t);
|
||||
w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t);
|
||||
w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w4_t);
|
||||
w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w5_t);
|
||||
w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w6_t);
|
||||
w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w7_t);
|
||||
w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w8_t);
|
||||
w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w9_t);
|
||||
wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wa_t);
|
||||
wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wb_t);
|
||||
wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wc_t);
|
||||
wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wd_t);
|
||||
we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t);
|
||||
wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t);
|
||||
|
||||
COMPARE_M_SIMD (d, e, c, b);
|
||||
}
|
||||
}
|
||||
|
||||
KERNEL_FQ void m27200_m08 (KERN_ATTR_RULES ())
|
||||
{
|
||||
}
|
||||
|
||||
KERNEL_FQ void m27200_m16 (KERN_ATTR_RULES ())
|
||||
{
|
||||
}
|
||||
|
||||
KERNEL_FQ void m27200_s04 (KERN_ATTR_RULES ())
|
||||
{
|
||||
/**
|
||||
* modifier
|
||||
*/
|
||||
|
||||
const u64 lid = get_local_id (0);
|
||||
|
||||
/**
|
||||
* base
|
||||
*/
|
||||
|
||||
const u64 gid = get_global_id (0);
|
||||
|
||||
if (gid >= gid_max) return;
|
||||
|
||||
u32 pw_buf0[4];
|
||||
u32 pw_buf1[4];
|
||||
|
||||
pw_buf0[0] = pws[gid].i[0];
|
||||
pw_buf0[1] = pws[gid].i[1];
|
||||
pw_buf0[2] = pws[gid].i[2];
|
||||
pw_buf0[3] = pws[gid].i[3];
|
||||
pw_buf1[0] = pws[gid].i[4];
|
||||
pw_buf1[1] = pws[gid].i[5];
|
||||
pw_buf1[2] = pws[gid].i[6];
|
||||
pw_buf1[3] = pws[gid].i[7];
|
||||
|
||||
const u32 pw_len = pws[gid].pw_len & 63;
|
||||
|
||||
/**
|
||||
* salt
|
||||
*/
|
||||
|
||||
u32 salt_buf0[4] = { 0x2d2d, 0, 0, 0 };
|
||||
u32 salt_buf1[4];
|
||||
u32 salt_buf2[4];
|
||||
u32 salt_buf3[4] = { 0 };
|
||||
|
||||
salt_buf0[0] |= salt_bufs[SALT_POS].salt_buf[ 0] << 16;
|
||||
salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[ 0] >> 16 | salt_bufs[SALT_POS].salt_buf[ 1] << 16;
|
||||
salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[ 1] >> 16 | salt_bufs[SALT_POS].salt_buf[ 2] << 16;
|
||||
salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[ 2] >> 16 | salt_bufs[SALT_POS].salt_buf[ 3] << 16;
|
||||
salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[ 3] >> 16 | salt_bufs[SALT_POS].salt_buf[ 4] << 16;
|
||||
salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[ 4] >> 16 | salt_bufs[SALT_POS].salt_buf[ 5] << 16;
|
||||
salt_buf1[2] = salt_bufs[SALT_POS].salt_buf[ 5] >> 16 | salt_bufs[SALT_POS].salt_buf[ 6] << 16;
|
||||
salt_buf1[3] = salt_bufs[SALT_POS].salt_buf[ 6] >> 16 | salt_bufs[SALT_POS].salt_buf[ 7] << 16;
|
||||
salt_buf2[0] = salt_bufs[SALT_POS].salt_buf[ 7] >> 16 | salt_bufs[SALT_POS].salt_buf[ 8] << 16;
|
||||
salt_buf2[1] = salt_bufs[SALT_POS].salt_buf[ 8] >> 16 | salt_bufs[SALT_POS].salt_buf[ 9] << 16;
|
||||
salt_buf2[2] = salt_bufs[SALT_POS].salt_buf[ 9] >> 16 | 0x2d2d0000;
|
||||
salt_buf2[3] = 0;
|
||||
|
||||
const u32 salt_len = 44;
|
||||
|
||||
/**
|
||||
* digest
|
||||
*/
|
||||
|
||||
const u32 search[4] =
|
||||
{
|
||||
digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0],
|
||||
digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1],
|
||||
digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2],
|
||||
digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3]
|
||||
};
|
||||
|
||||
/**
|
||||
* reverse
|
||||
*/
|
||||
|
||||
const u32 e_rev = hc_rotl32_S (search[1], 2u);
|
||||
|
||||
/**
|
||||
* loop
|
||||
*/
|
||||
|
||||
for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE)
|
||||
{
|
||||
u32x w0[4] = { 0 };
|
||||
u32x w1[4] = { 0 };
|
||||
u32x w2[4] = { 0 };
|
||||
u32x w3[4] = { 0 };
|
||||
|
||||
const u32x out_len = apply_rules_vect_optimized (pw_buf0, pw_buf1, pw_len, rules_buf, il_pos, w0, w1);
|
||||
|
||||
/**
|
||||
* prepend salt
|
||||
*/
|
||||
|
||||
const u32x out_salt_len = out_len + salt_len;
|
||||
|
||||
switch_buffer_by_offset_le_VV (w0, w1, w2, w3, salt_len);
|
||||
|
||||
w0[0] |= salt_buf0[0];
|
||||
w0[1] |= salt_buf0[1];
|
||||
w0[2] |= salt_buf0[2];
|
||||
w0[3] |= salt_buf0[3];
|
||||
w1[0] |= salt_buf1[0];
|
||||
w1[1] |= salt_buf1[1];
|
||||
w1[2] |= salt_buf1[2];
|
||||
w1[3] |= salt_buf1[3];
|
||||
w2[0] |= salt_buf2[0];
|
||||
w2[1] |= salt_buf2[1];
|
||||
w2[2] |= salt_buf2[2];
|
||||
w2[3] |= salt_buf2[3];
|
||||
w3[0] |= salt_buf3[0];
|
||||
w3[1] |= salt_buf3[1];
|
||||
w3[2] |= salt_buf3[2];
|
||||
w3[3] |= salt_buf3[3];
|
||||
|
||||
append_0x2d_4x4_VV (w0, w1, w2, w3, out_salt_len);
|
||||
append_0x2d_4x4_VV (w0, w1, w2, w3, out_salt_len + 1);
|
||||
append_0x80_4x4_VV (w0, w1, w2, w3, out_salt_len + 2);
|
||||
|
||||
/**
|
||||
* sha1
|
||||
*/
|
||||
|
||||
u32x w0_t = hc_swap32 (w0[0]);
|
||||
u32x w1_t = hc_swap32 (w0[1]);
|
||||
u32x w2_t = hc_swap32 (w0[2]);
|
||||
u32x w3_t = hc_swap32 (w0[3]);
|
||||
u32x w4_t = hc_swap32 (w1[0]);
|
||||
u32x w5_t = hc_swap32 (w1[1]);
|
||||
u32x w6_t = hc_swap32 (w1[2]);
|
||||
u32x w7_t = hc_swap32 (w1[3]);
|
||||
u32x w8_t = hc_swap32 (w2[0]);
|
||||
u32x w9_t = hc_swap32 (w2[1]);
|
||||
u32x wa_t = hc_swap32 (w2[2]);
|
||||
u32x wb_t = hc_swap32 (w2[3]);
|
||||
u32x wc_t = hc_swap32 (w3[0]);
|
||||
u32x wd_t = hc_swap32 (w3[1]);
|
||||
u32x we_t = 0;
|
||||
u32x wf_t = (out_salt_len + 2) * 8;
|
||||
|
||||
|
||||
u32x a = SHA1M_A;
|
||||
u32x b = SHA1M_B;
|
||||
u32x c = SHA1M_C;
|
||||
u32x d = SHA1M_D;
|
||||
u32x e = SHA1M_E;
|
||||
|
||||
#undef K
|
||||
#define K SHA1C00
|
||||
|
||||
SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0_t);
|
||||
SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1_t);
|
||||
SHA1_STEP (SHA1_F0o, d, e, a, b, c, w2_t);
|
||||
SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3_t);
|
||||
SHA1_STEP (SHA1_F0o, b, c, d, e, a, w4_t);
|
||||
SHA1_STEP (SHA1_F0o, a, b, c, d, e, w5_t);
|
||||
SHA1_STEP (SHA1_F0o, e, a, b, c, d, w6_t);
|
||||
SHA1_STEP (SHA1_F0o, d, e, a, b, c, w7_t);
|
||||
SHA1_STEP (SHA1_F0o, c, d, e, a, b, w8_t);
|
||||
SHA1_STEP (SHA1_F0o, b, c, d, e, a, w9_t);
|
||||
SHA1_STEP (SHA1_F0o, a, b, c, d, e, wa_t);
|
||||
SHA1_STEP (SHA1_F0o, e, a, b, c, d, wb_t);
|
||||
SHA1_STEP (SHA1_F0o, d, e, a, b, c, wc_t);
|
||||
SHA1_STEP (SHA1_F0o, c, d, e, a, b, wd_t);
|
||||
SHA1_STEP (SHA1_F0o, b, c, d, e, a, we_t);
|
||||
SHA1_STEP (SHA1_F0o, a, b, c, d, e, wf_t);
|
||||
w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t);
|
||||
w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1_t);
|
||||
w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2_t);
|
||||
w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3_t);
|
||||
|
||||
#undef K
|
||||
#define K SHA1C01
|
||||
|
||||
w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w4_t);
|
||||
w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w5_t);
|
||||
w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w6_t);
|
||||
w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w7_t);
|
||||
w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w8_t);
|
||||
w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w9_t);
|
||||
wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wa_t);
|
||||
wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wb_t);
|
||||
wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wc_t);
|
||||
wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wd_t);
|
||||
we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, we_t);
|
||||
wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wf_t);
|
||||
w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t);
|
||||
w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t);
|
||||
w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t);
|
||||
w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t);
|
||||
w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w4_t);
|
||||
w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w5_t);
|
||||
w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w6_t);
|
||||
w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w7_t);
|
||||
|
||||
#undef K
|
||||
#define K SHA1C02
|
||||
|
||||
w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w8_t);
|
||||
w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w9_t);
|
||||
wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wa_t);
|
||||
wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wb_t);
|
||||
wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wc_t);
|
||||
wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, wd_t);
|
||||
we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, we_t);
|
||||
wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wf_t);
|
||||
w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0_t);
|
||||
w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1_t);
|
||||
w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2_t);
|
||||
w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3_t);
|
||||
w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w4_t);
|
||||
w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w5_t);
|
||||
w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w6_t);
|
||||
w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w7_t);
|
||||
w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w8_t);
|
||||
w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w9_t);
|
||||
wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wa_t);
|
||||
wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wb_t);
|
||||
|
||||
#undef K
|
||||
#define K SHA1C03
|
||||
|
||||
wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wc_t);
|
||||
wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wd_t);
|
||||
we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, we_t);
|
||||
wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wf_t);
|
||||
w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t);
|
||||
w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t);
|
||||
w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w2_t);
|
||||
w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t);
|
||||
w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w4_t);
|
||||
w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w5_t);
|
||||
w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w6_t);
|
||||
w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w7_t);
|
||||
w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w8_t);
|
||||
w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w9_t);
|
||||
wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wa_t);
|
||||
wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wb_t);
|
||||
|
||||
if (MATCHES_NONE_VS (e, e_rev)) continue;
|
||||
|
||||
wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wc_t);
|
||||
wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wd_t);
|
||||
we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t);
|
||||
wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t);
|
||||
|
||||
COMPARE_S_SIMD (d, e, c, b);
|
||||
}
|
||||
}
|
||||
|
||||
KERNEL_FQ void m27200_s08 (KERN_ATTR_RULES ())
|
||||
{
|
||||
}
|
||||
|
||||
KERNEL_FQ void m27200_s16 (KERN_ATTR_RULES ())
|
||||
{
|
||||
}
|
@ -0,0 +1,142 @@
|
||||
/**
|
||||
* Author......: See docs/credits.txt
|
||||
* License.....: MIT
|
||||
*/
|
||||
|
||||
//#define NEW_SIMD_CODE
|
||||
|
||||
#ifdef KERNEL_STATIC
|
||||
#include "inc_vendor.h"
|
||||
#include "inc_types.h"
|
||||
#include "inc_platform.cl"
|
||||
#include "inc_common.cl"
|
||||
#include "inc_rp.h"
|
||||
#include "inc_rp.cl"
|
||||
#include "inc_scalar.cl"
|
||||
#include "inc_hash_sha1.cl"
|
||||
#endif
|
||||
|
||||
KERNEL_FQ void m27200_mxx (KERN_ATTR_RULES ())
|
||||
{
|
||||
/**
|
||||
* modifier
|
||||
*/
|
||||
|
||||
const u64 lid = get_local_id (0);
|
||||
const u64 gid = get_global_id (0);
|
||||
|
||||
if (gid >= gid_max) return;
|
||||
|
||||
const u32 dash[16] = { 0x2d2d0000, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
|
||||
|
||||
/**
|
||||
* base
|
||||
*/
|
||||
|
||||
COPY_PW (pws[gid]);
|
||||
|
||||
sha1_ctx_t ctx0;
|
||||
|
||||
sha1_init (&ctx0);
|
||||
|
||||
ctx0.w0[0] = dash[0];
|
||||
ctx0.w0[1] = dash[1];
|
||||
|
||||
ctx0.len = 2;
|
||||
|
||||
sha1_update_global_swap (&ctx0, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len);
|
||||
sha1_update (&ctx0, dash, 2);
|
||||
|
||||
/**
|
||||
* loop
|
||||
*/
|
||||
|
||||
for (u32 il_pos = 0; il_pos < il_cnt; il_pos++)
|
||||
{
|
||||
pw_t tmp = PASTE_PW;
|
||||
|
||||
tmp.pw_len = apply_rules (rules_buf[il_pos].cmds, tmp.i, tmp.pw_len);
|
||||
|
||||
sha1_ctx_t ctx = ctx0;
|
||||
|
||||
sha1_update_swap (&ctx, tmp.i, tmp.pw_len);
|
||||
sha1_update (&ctx, dash, 2);
|
||||
sha1_final (&ctx);
|
||||
|
||||
const u32 r0 = ctx.h[DGST_R0];
|
||||
const u32 r1 = ctx.h[DGST_R1];
|
||||
const u32 r2 = ctx.h[DGST_R2];
|
||||
const u32 r3 = ctx.h[DGST_R3];
|
||||
|
||||
COMPARE_M_SCALAR (r0, r1, r2, r3);
|
||||
}
|
||||
}
|
||||
|
||||
KERNEL_FQ void m27200_sxx (KERN_ATTR_RULES ())
|
||||
{
|
||||
/**
|
||||
* modifier
|
||||
*/
|
||||
|
||||
const u64 lid = get_local_id (0);
|
||||
const u64 gid = get_global_id (0);
|
||||
|
||||
if (gid >= gid_max) return;
|
||||
|
||||
const u32 dash[16] = { 0x2d2d0000, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
|
||||
|
||||
/**
|
||||
* digest
|
||||
*/
|
||||
|
||||
const u32 search[4] =
|
||||
{
|
||||
digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0],
|
||||
digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1],
|
||||
digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2],
|
||||
digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3]
|
||||
};
|
||||
|
||||
/**
|
||||
* base
|
||||
*/
|
||||
|
||||
COPY_PW (pws[gid]);
|
||||
|
||||
sha1_ctx_t ctx0;
|
||||
|
||||
sha1_init (&ctx0);
|
||||
|
||||
ctx0.w0[0] = dash[0];
|
||||
ctx0.w0[1] = dash[1];
|
||||
|
||||
ctx0.len = 2;
|
||||
|
||||
sha1_update_global_swap (&ctx0, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len);
|
||||
sha1_update (&ctx0, dash, 2);
|
||||
|
||||
/**
|
||||
* loop
|
||||
*/
|
||||
|
||||
for (u32 il_pos = 0; il_pos < il_cnt; il_pos++)
|
||||
{
|
||||
pw_t tmp = PASTE_PW;
|
||||
|
||||
tmp.pw_len = apply_rules (rules_buf[il_pos].cmds, tmp.i, tmp.pw_len);
|
||||
|
||||
sha1_ctx_t ctx = ctx0;
|
||||
|
||||
sha1_update_swap (&ctx, tmp.i, tmp.pw_len);
|
||||
sha1_update (&ctx, dash, 2);
|
||||
|
||||
sha1_final (&ctx);
|
||||
|
||||
const u32 r0 = ctx.h[DGST_R0];
|
||||
const u32 r1 = ctx.h[DGST_R1];
|
||||
const u32 r2 = ctx.h[DGST_R2];
|
||||
const u32 r3 = ctx.h[DGST_R3];
|
||||
|
||||
COMPARE_S_SCALAR (r0, r1, r2, r3);
|
||||
}
|
||||
}
|
@ -0,0 +1,630 @@
|
||||
/**
|
||||
* Author......: See docs/credits.txt
|
||||
* License.....: MIT
|
||||
*/
|
||||
|
||||
#define NEW_SIMD_CODE
|
||||
|
||||
#ifdef KERNEL_STATIC
|
||||
#include "inc_vendor.h"
|
||||
#include "inc_types.h"
|
||||
#include "inc_platform.cl"
|
||||
#include "inc_common.cl"
|
||||
#include "inc_simd.cl"
|
||||
#include "inc_hash_sha1.cl"
|
||||
#endif
|
||||
|
||||
KERNEL_FQ void m27200_m04 (KERN_ATTR_BASIC ())
|
||||
{
|
||||
/**
|
||||
* modifier
|
||||
*/
|
||||
|
||||
const u64 lid = get_local_id (0);
|
||||
|
||||
/**
|
||||
* base
|
||||
*/
|
||||
|
||||
const u64 gid = get_global_id (0);
|
||||
|
||||
if (gid >= gid_max) return;
|
||||
|
||||
u32 pw_buf0[4];
|
||||
u32 pw_buf1[4];
|
||||
|
||||
pw_buf0[0] = pws[gid].i[0];
|
||||
pw_buf0[1] = pws[gid].i[1];
|
||||
pw_buf0[2] = pws[gid].i[2];
|
||||
pw_buf0[3] = pws[gid].i[3];
|
||||
pw_buf1[0] = pws[gid].i[4];
|
||||
pw_buf1[1] = pws[gid].i[5];
|
||||
pw_buf1[2] = pws[gid].i[6];
|
||||
pw_buf1[3] = pws[gid].i[7];
|
||||
|
||||
const u32 pw_l_len = pws[gid].pw_len & 63;
|
||||
|
||||
/**
|
||||
* salt
|
||||
*/
|
||||
|
||||
u32 salt_buf0[4] = { 0x2d2d, 0, 0, 0 };
|
||||
u32 salt_buf1[4];
|
||||
u32 salt_buf2[4];
|
||||
u32 salt_buf3[4] = { 0 };
|
||||
|
||||
salt_buf0[0] |= salt_bufs[SALT_POS].salt_buf[ 0] << 16;
|
||||
salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[ 0] >> 16 | salt_bufs[SALT_POS].salt_buf[ 1] << 16;
|
||||
salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[ 1] >> 16 | salt_bufs[SALT_POS].salt_buf[ 2] << 16;
|
||||
salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[ 2] >> 16 | salt_bufs[SALT_POS].salt_buf[ 3] << 16;
|
||||
salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[ 3] >> 16 | salt_bufs[SALT_POS].salt_buf[ 4] << 16;
|
||||
salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[ 4] >> 16 | salt_bufs[SALT_POS].salt_buf[ 5] << 16;
|
||||
salt_buf1[2] = salt_bufs[SALT_POS].salt_buf[ 5] >> 16 | salt_bufs[SALT_POS].salt_buf[ 6] << 16;
|
||||
salt_buf1[3] = salt_bufs[SALT_POS].salt_buf[ 6] >> 16 | salt_bufs[SALT_POS].salt_buf[ 7] << 16;
|
||||
salt_buf2[0] = salt_bufs[SALT_POS].salt_buf[ 7] >> 16 | salt_bufs[SALT_POS].salt_buf[ 8] << 16;
|
||||
salt_buf2[1] = salt_bufs[SALT_POS].salt_buf[ 8] >> 16 | salt_bufs[SALT_POS].salt_buf[ 9] << 16;
|
||||
salt_buf2[2] = salt_bufs[SALT_POS].salt_buf[ 9] >> 16 | 0x2d2d0000;
|
||||
salt_buf2[3] = 0;
|
||||
|
||||
const u32 salt_len = 44;
|
||||
|
||||
/**
|
||||
* digest
|
||||
*/
|
||||
|
||||
const u32 search[4] =
|
||||
{
|
||||
digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0],
|
||||
digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1],
|
||||
digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2],
|
||||
digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3]
|
||||
};
|
||||
|
||||
/**
|
||||
* loop
|
||||
*/
|
||||
|
||||
for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE)
|
||||
{
|
||||
const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos) & 63;
|
||||
|
||||
const u32x pw_len = (pw_l_len + pw_r_len) & 63;
|
||||
|
||||
/**
|
||||
* concat password candidate
|
||||
*/
|
||||
|
||||
u32x wordl0[4] = { 0 };
|
||||
u32x wordl1[4] = { 0 };
|
||||
u32x wordl2[4] = { 0 };
|
||||
u32x wordl3[4] = { 0 };
|
||||
|
||||
wordl0[0] = pw_buf0[0];
|
||||
wordl0[1] = pw_buf0[1];
|
||||
wordl0[2] = pw_buf0[2];
|
||||
wordl0[3] = pw_buf0[3];
|
||||
wordl1[0] = pw_buf1[0];
|
||||
wordl1[1] = pw_buf1[1];
|
||||
wordl1[2] = pw_buf1[2];
|
||||
wordl1[3] = pw_buf1[3];
|
||||
|
||||
u32x wordr0[4] = { 0 };
|
||||
u32x wordr1[4] = { 0 };
|
||||
u32x wordr2[4] = { 0 };
|
||||
u32x wordr3[4] = { 0 };
|
||||
|
||||
wordr0[0] = ix_create_combt (combs_buf, il_pos, 0);
|
||||
wordr0[1] = ix_create_combt (combs_buf, il_pos, 1);
|
||||
wordr0[2] = ix_create_combt (combs_buf, il_pos, 2);
|
||||
wordr0[3] = ix_create_combt (combs_buf, il_pos, 3);
|
||||
wordr1[0] = ix_create_combt (combs_buf, il_pos, 4);
|
||||
wordr1[1] = ix_create_combt (combs_buf, il_pos, 5);
|
||||
wordr1[2] = ix_create_combt (combs_buf, il_pos, 6);
|
||||
wordr1[3] = ix_create_combt (combs_buf, il_pos, 7);
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
|
||||
{
|
||||
switch_buffer_by_offset_le_VV (wordr0, wordr1, wordr2, wordr3, pw_l_len);
|
||||
}
|
||||
else
|
||||
{
|
||||
switch_buffer_by_offset_le_VV (wordl0, wordl1, wordl2, wordl3, pw_r_len);
|
||||
}
|
||||
|
||||
u32x w0[4];
|
||||
u32x w1[4];
|
||||
u32x w2[4];
|
||||
u32x w3[4];
|
||||
|
||||
w0[0] = wordl0[0] | wordr0[0];
|
||||
w0[1] = wordl0[1] | wordr0[1];
|
||||
w0[2] = wordl0[2] | wordr0[2];
|
||||
w0[3] = wordl0[3] | wordr0[3];
|
||||
w1[0] = wordl1[0] | wordr1[0];
|
||||
w1[1] = wordl1[1] | wordr1[1];
|
||||
w1[2] = wordl1[2] | wordr1[2];
|
||||
w1[3] = wordl1[3] | wordr1[3];
|
||||
w2[0] = wordl2[0] | wordr2[0];
|
||||
w2[1] = wordl2[1] | wordr2[1];
|
||||
w2[2] = wordl2[2] | wordr2[2];
|
||||
w2[3] = wordl2[3] | wordr2[3];
|
||||
w3[0] = wordl3[0] | wordr3[0];
|
||||
w3[1] = wordl3[1] | wordr3[1];
|
||||
w3[2] = wordl3[2] | wordr3[2];
|
||||
w3[3] = wordl3[3] | wordr3[3];
|
||||
|
||||
/**
|
||||
* prepend salt
|
||||
*/
|
||||
|
||||
switch_buffer_by_offset_le (w0, w1, w2, w3, salt_len);
|
||||
|
||||
const u32x pw_salt_len = pw_len + salt_len;
|
||||
|
||||
w0[0] |= salt_buf0[0];
|
||||
w0[1] |= salt_buf0[1];
|
||||
w0[2] |= salt_buf0[2];
|
||||
w0[3] |= salt_buf0[3];
|
||||
w1[0] |= salt_buf1[0];
|
||||
w1[1] |= salt_buf1[1];
|
||||
w1[2] |= salt_buf1[2];
|
||||
w1[3] |= salt_buf1[3];
|
||||
w2[0] |= salt_buf2[0];
|
||||
w2[1] |= salt_buf2[1];
|
||||
w2[2] |= salt_buf2[2];
|
||||
w2[3] |= salt_buf2[3];
|
||||
w3[0] |= salt_buf3[0];
|
||||
w3[1] |= salt_buf3[1];
|
||||
w3[2] |= salt_buf3[2];
|
||||
w3[3] |= salt_buf3[3];
|
||||
|
||||
append_0x2d_4x4_VV (w0, w1, w2, w3, pw_salt_len);
|
||||
append_0x2d_4x4_VV (w0, w1, w2, w3, pw_salt_len + 1);
|
||||
append_0x80_4x4_VV (w0, w1, w2, w3, pw_salt_len + 2);
|
||||
|
||||
/**
|
||||
* sha1
|
||||
*/
|
||||
|
||||
u32x w0_t = hc_swap32 (w0[0]);
|
||||
u32x w1_t = hc_swap32 (w0[1]);
|
||||
u32x w2_t = hc_swap32 (w0[2]);
|
||||
u32x w3_t = hc_swap32 (w0[3]);
|
||||
u32x w4_t = hc_swap32 (w1[0]);
|
||||
u32x w5_t = hc_swap32 (w1[1]);
|
||||
u32x w6_t = hc_swap32 (w1[2]);
|
||||
u32x w7_t = hc_swap32 (w1[3]);
|
||||
u32x w8_t = hc_swap32 (w2[0]);
|
||||
u32x w9_t = hc_swap32 (w2[1]);
|
||||
u32x wa_t = hc_swap32 (w2[2]);
|
||||
u32x wb_t = hc_swap32 (w2[3]);
|
||||
u32x wc_t = hc_swap32 (w3[0]);
|
||||
u32x wd_t = hc_swap32 (w3[1]);
|
||||
u32x we_t = 0;
|
||||
u32x wf_t = (pw_salt_len + 2) * 8;
|
||||
|
||||
u32x a = SHA1M_A;
|
||||
u32x b = SHA1M_B;
|
||||
u32x c = SHA1M_C;
|
||||
u32x d = SHA1M_D;
|
||||
u32x e = SHA1M_E;
|
||||
|
||||
#undef K
|
||||
#define K SHA1C00
|
||||
|
||||
SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0_t);
|
||||
SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1_t);
|
||||
SHA1_STEP (SHA1_F0o, d, e, a, b, c, w2_t);
|
||||
SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3_t);
|
||||
SHA1_STEP (SHA1_F0o, b, c, d, e, a, w4_t);
|
||||
SHA1_STEP (SHA1_F0o, a, b, c, d, e, w5_t);
|
||||
SHA1_STEP (SHA1_F0o, e, a, b, c, d, w6_t);
|
||||
SHA1_STEP (SHA1_F0o, d, e, a, b, c, w7_t);
|
||||
SHA1_STEP (SHA1_F0o, c, d, e, a, b, w8_t);
|
||||
SHA1_STEP (SHA1_F0o, b, c, d, e, a, w9_t);
|
||||
SHA1_STEP (SHA1_F0o, a, b, c, d, e, wa_t);
|
||||
SHA1_STEP (SHA1_F0o, e, a, b, c, d, wb_t);
|
||||
SHA1_STEP (SHA1_F0o, d, e, a, b, c, wc_t);
|
||||
SHA1_STEP (SHA1_F0o, c, d, e, a, b, wd_t);
|
||||
SHA1_STEP (SHA1_F0o, b, c, d, e, a, we_t);
|
||||
SHA1_STEP (SHA1_F0o, a, b, c, d, e, wf_t);
|
||||
w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t);
|
||||
w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1_t);
|
||||
w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2_t);
|
||||
w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3_t);
|
||||
|
||||
#undef K
|
||||
#define K SHA1C01
|
||||
|
||||
w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w4_t);
|
||||
w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w5_t);
|
||||
w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w6_t);
|
||||
w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w7_t);
|
||||
w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w8_t);
|
||||
w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w9_t);
|
||||
wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wa_t);
|
||||
wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wb_t);
|
||||
wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wc_t);
|
||||
wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wd_t);
|
||||
we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, we_t);
|
||||
wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wf_t);
|
||||
w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t);
|
||||
w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t);
|
||||
w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t);
|
||||
w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t);
|
||||
w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w4_t);
|
||||
w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w5_t);
|
||||
w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w6_t);
|
||||
w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w7_t);
|
||||
|
||||
#undef K
|
||||
#define K SHA1C02
|
||||
|
||||
w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w8_t);
|
||||
w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w9_t);
|
||||
wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wa_t);
|
||||
wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wb_t);
|
||||
wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wc_t);
|
||||
wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, wd_t);
|
||||
we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, we_t);
|
||||
wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wf_t);
|
||||
w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0_t);
|
||||
w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1_t);
|
||||
w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2_t);
|
||||
w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3_t);
|
||||
w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w4_t);
|
||||
w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w5_t);
|
||||
w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w6_t);
|
||||
w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w7_t);
|
||||
w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w8_t);
|
||||
w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w9_t);
|
||||
wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wa_t);
|
||||
wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wb_t);
|
||||
|
||||
#undef K
|
||||
#define K SHA1C03
|
||||
|
||||
wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wc_t);
|
||||
wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wd_t);
|
||||
we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, we_t);
|
||||
wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wf_t);
|
||||
w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t);
|
||||
w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t);
|
||||
w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w2_t);
|
||||
w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t);
|
||||
w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w4_t);
|
||||
w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w5_t);
|
||||
w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w6_t);
|
||||
w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w7_t);
|
||||
w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w8_t);
|
||||
w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w9_t);
|
||||
wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wa_t);
|
||||
wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wb_t);
|
||||
wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wc_t);
|
||||
wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wd_t);
|
||||
we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t);
|
||||
wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t);
|
||||
|
||||
COMPARE_M_SIMD (d, e, c, b);
|
||||
}
|
||||
}
|
||||
|
||||
KERNEL_FQ void m27200_m08 (KERN_ATTR_BASIC ())
|
||||
{
|
||||
}
|
||||
|
||||
KERNEL_FQ void m27200_m16 (KERN_ATTR_BASIC ())
|
||||
{
|
||||
}
|
||||
|
||||
KERNEL_FQ void m27200_s04 (KERN_ATTR_BASIC ())
|
||||
{
|
||||
/**
|
||||
* modifier
|
||||
*/
|
||||
|
||||
const u64 lid = get_local_id (0);
|
||||
|
||||
/**
|
||||
* base
|
||||
*/
|
||||
|
||||
const u64 gid = get_global_id (0);
|
||||
|
||||
if (gid >= gid_max) return;
|
||||
|
||||
u32 pw_buf0[4];
|
||||
u32 pw_buf1[4];
|
||||
|
||||
pw_buf0[0] = pws[gid].i[0];
|
||||
pw_buf0[1] = pws[gid].i[1];
|
||||
pw_buf0[2] = pws[gid].i[2];
|
||||
pw_buf0[3] = pws[gid].i[3];
|
||||
pw_buf1[0] = pws[gid].i[4];
|
||||
pw_buf1[1] = pws[gid].i[5];
|
||||
pw_buf1[2] = pws[gid].i[6];
|
||||
pw_buf1[3] = pws[gid].i[7];
|
||||
|
||||
const u32 pw_l_len = pws[gid].pw_len & 63;
|
||||
|
||||
/**
|
||||
* salt
|
||||
*/
|
||||
|
||||
u32 salt_buf0[4] = { 0x2d2d, 0, 0, 0 };
|
||||
u32 salt_buf1[4];
|
||||
u32 salt_buf2[4];
|
||||
u32 salt_buf3[4] = { 0 };
|
||||
|
||||
salt_buf0[0] |= salt_bufs[SALT_POS].salt_buf[ 0] << 16;
|
||||
salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[ 0] >> 16 | salt_bufs[SALT_POS].salt_buf[ 1] << 16;
|
||||
salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[ 1] >> 16 | salt_bufs[SALT_POS].salt_buf[ 2] << 16;
|
||||
salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[ 2] >> 16 | salt_bufs[SALT_POS].salt_buf[ 3] << 16;
|
||||
salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[ 3] >> 16 | salt_bufs[SALT_POS].salt_buf[ 4] << 16;
|
||||
salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[ 4] >> 16 | salt_bufs[SALT_POS].salt_buf[ 5] << 16;
|
||||
salt_buf1[2] = salt_bufs[SALT_POS].salt_buf[ 5] >> 16 | salt_bufs[SALT_POS].salt_buf[ 6] << 16;
|
||||
salt_buf1[3] = salt_bufs[SALT_POS].salt_buf[ 6] >> 16 | salt_bufs[SALT_POS].salt_buf[ 7] << 16;
|
||||
salt_buf2[0] = salt_bufs[SALT_POS].salt_buf[ 7] >> 16 | salt_bufs[SALT_POS].salt_buf[ 8] << 16;
|
||||
salt_buf2[1] = salt_bufs[SALT_POS].salt_buf[ 8] >> 16 | salt_bufs[SALT_POS].salt_buf[ 9] << 16;
|
||||
salt_buf2[2] = salt_bufs[SALT_POS].salt_buf[ 9] >> 16 | 0x2d2d0000;
|
||||
salt_buf2[3] = 0;
|
||||
|
||||
const u32 salt_len = 44;
|
||||
|
||||
/**
|
||||
* digest
|
||||
*/
|
||||
|
||||
const u32 search[4] =
|
||||
{
|
||||
digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0],
|
||||
digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1],
|
||||
digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2],
|
||||
digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3]
|
||||
};
|
||||
|
||||
/**
|
||||
* reverse
|
||||
*/
|
||||
|
||||
const u32 e_rev = hc_rotl32_S (search[1], 2u);
|
||||
|
||||
/**
|
||||
* loop
|
||||
*/
|
||||
|
||||
for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE)
|
||||
{
|
||||
const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos) & 63;
|
||||
|
||||
const u32x pw_len = (pw_l_len + pw_r_len) & 63;
|
||||
|
||||
/**
|
||||
* concat password candidate
|
||||
*/
|
||||
|
||||
u32x wordl0[4] = { 0 };
|
||||
u32x wordl1[4] = { 0 };
|
||||
u32x wordl2[4] = { 0 };
|
||||
u32x wordl3[4] = { 0 };
|
||||
|
||||
wordl0[0] = pw_buf0[0];
|
||||
wordl0[1] = pw_buf0[1];
|
||||
wordl0[2] = pw_buf0[2];
|
||||
wordl0[3] = pw_buf0[3];
|
||||
wordl1[0] = pw_buf1[0];
|
||||
wordl1[1] = pw_buf1[1];
|
||||
wordl1[2] = pw_buf1[2];
|
||||
wordl1[3] = pw_buf1[3];
|
||||
|
||||
u32x wordr0[4] = { 0 };
|
||||
u32x wordr1[4] = { 0 };
|
||||
u32x wordr2[4] = { 0 };
|
||||
u32x wordr3[4] = { 0 };
|
||||
|
||||
wordr0[0] = ix_create_combt (combs_buf, il_pos, 0);
|
||||
wordr0[1] = ix_create_combt (combs_buf, il_pos, 1);
|
||||
wordr0[2] = ix_create_combt (combs_buf, il_pos, 2);
|
||||
wordr0[3] = ix_create_combt (combs_buf, il_pos, 3);
|
||||
wordr1[0] = ix_create_combt (combs_buf, il_pos, 4);
|
||||
wordr1[1] = ix_create_combt (combs_buf, il_pos, 5);
|
||||
wordr1[2] = ix_create_combt (combs_buf, il_pos, 6);
|
||||
wordr1[3] = ix_create_combt (combs_buf, il_pos, 7);
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
|
||||
{
|
||||
switch_buffer_by_offset_le_VV (wordr0, wordr1, wordr2, wordr3, pw_l_len);
|
||||
}
|
||||
else
|
||||
{
|
||||
switch_buffer_by_offset_le_VV (wordl0, wordl1, wordl2, wordl3, pw_r_len);
|
||||
}
|
||||
|
||||
u32x w0[4];
|
||||
u32x w1[4];
|
||||
u32x w2[4];
|
||||
u32x w3[4];
|
||||
|
||||
w0[0] = wordl0[0] | wordr0[0];
|
||||
w0[1] = wordl0[1] | wordr0[1];
|
||||
w0[2] = wordl0[2] | wordr0[2];
|
||||
w0[3] = wordl0[3] | wordr0[3];
|
||||
w1[0] = wordl1[0] | wordr1[0];
|
||||
w1[1] = wordl1[1] | wordr1[1];
|
||||
w1[2] = wordl1[2] | wordr1[2];
|
||||
w1[3] = wordl1[3] | wordr1[3];
|
||||
w2[0] = wordl2[0] | wordr2[0];
|
||||
w2[1] = wordl2[1] | wordr2[1];
|
||||
w2[2] = wordl2[2] | wordr2[2];
|
||||
w2[3] = wordl2[3] | wordr2[3];
|
||||
w3[0] = wordl3[0] | wordr3[0];
|
||||
w3[1] = wordl3[1] | wordr3[1];
|
||||
w3[2] = wordl3[2] | wordr3[2];
|
||||
w3[3] = wordl3[3] | wordr3[3];
|
||||
|
||||
/**
|
||||
* prepend salt
|
||||
*/
|
||||
|
||||
switch_buffer_by_offset_le (w0, w1, w2, w3, salt_len);
|
||||
|
||||
const u32x pw_salt_len = pw_len + salt_len;
|
||||
|
||||
w0[0] |= salt_buf0[0];
|
||||
w0[1] |= salt_buf0[1];
|
||||
w0[2] |= salt_buf0[2];
|
||||
w0[3] |= salt_buf0[3];
|
||||
w1[0] |= salt_buf1[0];
|
||||
w1[1] |= salt_buf1[1];
|
||||
w1[2] |= salt_buf1[2];
|
||||
w1[3] |= salt_buf1[3];
|
||||
w2[0] |= salt_buf2[0];
|
||||
w2[1] |= salt_buf2[1];
|
||||
w2[2] |= salt_buf2[2];
|
||||
w2[3] |= salt_buf2[3];
|
||||
w3[0] |= salt_buf3[0];
|
||||
w3[1] |= salt_buf3[1];
|
||||
w3[2] |= salt_buf3[2];
|
||||
w3[3] |= salt_buf3[3];
|
||||
|
||||
append_0x2d_4x4_VV (w0, w1, w2, w3, pw_salt_len);
|
||||
append_0x2d_4x4_VV (w0, w1, w2, w3, pw_salt_len + 1);
|
||||
append_0x80_4x4_VV (w0, w1, w2, w3, pw_salt_len + 2);
|
||||
|
||||
/**
|
||||
* sha1
|
||||
*/
|
||||
|
||||
u32x w0_t = hc_swap32 (w0[0]);
|
||||
u32x w1_t = hc_swap32 (w0[1]);
|
||||
u32x w2_t = hc_swap32 (w0[2]);
|
||||
u32x w3_t = hc_swap32 (w0[3]);
|
||||
u32x w4_t = hc_swap32 (w1[0]);
|
||||
u32x w5_t = hc_swap32 (w1[1]);
|
||||
u32x w6_t = hc_swap32 (w1[2]);
|
||||
u32x w7_t = hc_swap32 (w1[3]);
|
||||
u32x w8_t = hc_swap32 (w2[0]);
|
||||
u32x w9_t = hc_swap32 (w2[1]);
|
||||
u32x wa_t = hc_swap32 (w2[2]);
|
||||
u32x wb_t = hc_swap32 (w2[3]);
|
||||
u32x wc_t = hc_swap32 (w3[0]);
|
||||
u32x wd_t = hc_swap32 (w3[1]);
|
||||
u32x we_t = 0;
|
||||
u32x wf_t = (pw_salt_len + 2) * 8;
|
||||
|
||||
u32x a = SHA1M_A;
|
||||
u32x b = SHA1M_B;
|
||||
u32x c = SHA1M_C;
|
||||
u32x d = SHA1M_D;
|
||||
u32x e = SHA1M_E;
|
||||
|
||||
#undef K
|
||||
#define K SHA1C00
|
||||
|
||||
SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0_t);
|
||||
SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1_t);
|
||||
SHA1_STEP (SHA1_F0o, d, e, a, b, c, w2_t);
|
||||
SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3_t);
|
||||
SHA1_STEP (SHA1_F0o, b, c, d, e, a, w4_t);
|
||||
SHA1_STEP (SHA1_F0o, a, b, c, d, e, w5_t);
|
||||
SHA1_STEP (SHA1_F0o, e, a, b, c, d, w6_t);
|
||||
SHA1_STEP (SHA1_F0o, d, e, a, b, c, w7_t);
|
||||
SHA1_STEP (SHA1_F0o, c, d, e, a, b, w8_t);
|
||||
SHA1_STEP (SHA1_F0o, b, c, d, e, a, w9_t);
|
||||
SHA1_STEP (SHA1_F0o, a, b, c, d, e, wa_t);
|
||||
SHA1_STEP (SHA1_F0o, e, a, b, c, d, wb_t);
|
||||
SHA1_STEP (SHA1_F0o, d, e, a, b, c, wc_t);
|
||||
SHA1_STEP (SHA1_F0o, c, d, e, a, b, wd_t);
|
||||
SHA1_STEP (SHA1_F0o, b, c, d, e, a, we_t);
|
||||
SHA1_STEP (SHA1_F0o, a, b, c, d, e, wf_t);
|
||||
w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t);
|
||||
w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1_t);
|
||||
w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2_t);
|
||||
w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3_t);
|
||||
|
||||
#undef K
|
||||
#define K SHA1C01
|
||||
|
||||
w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w4_t);
|
||||
w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w5_t);
|
||||
w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w6_t);
|
||||
w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w7_t);
|
||||
w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w8_t);
|
||||
w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w9_t);
|
||||
wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wa_t);
|
||||
wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wb_t);
|
||||
wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wc_t);
|
||||
wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wd_t);
|
||||
we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, we_t);
|
||||
wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wf_t);
|
||||
w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t);
|
||||
w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t);
|
||||
w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t);
|
||||
w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t);
|
||||
w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w4_t);
|
||||
w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w5_t);
|
||||
w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w6_t);
|
||||
w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w7_t);
|
||||
|
||||
#undef K
|
||||
#define K SHA1C02
|
||||
|
||||
w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w8_t);
|
||||
w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w9_t);
|
||||
wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wa_t);
|
||||
wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wb_t);
|
||||
wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wc_t);
|
||||
wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, wd_t);
|
||||
we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, we_t);
|
||||
wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wf_t);
|
||||
w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0_t);
|
||||
w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1_t);
|
||||
w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2_t);
|
||||
w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3_t);
|
||||
w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w4_t);
|
||||
w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w5_t);
|
||||
w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w6_t);
|
||||
w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w7_t);
|
||||
w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w8_t);
|
||||
w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w9_t);
|
||||
wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wa_t);
|
||||
wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wb_t);
|
||||
|
||||
#undef K
|
||||
#define K SHA1C03
|
||||
|
||||
wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wc_t);
|
||||
wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wd_t);
|
||||
we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, we_t);
|
||||
wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wf_t);
|
||||
w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t);
|
||||
w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t);
|
||||
w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w2_t);
|
||||
w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t);
|
||||
w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w4_t);
|
||||
w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w5_t);
|
||||
w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w6_t);
|
||||
w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w7_t);
|
||||
w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w8_t);
|
||||
w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w9_t);
|
||||
wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wa_t);
|
||||
wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wb_t);
|
||||
|
||||
if (MATCHES_NONE_VS (e, e_rev)) continue;
|
||||
|
||||
wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wc_t);
|
||||
wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wd_t);
|
||||
we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t);
|
||||
wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t);
|
||||
|
||||
COMPARE_S_SIMD (d, e, c, b);
|
||||
}
|
||||
}
|
||||
|
||||
KERNEL_FQ void m27200_s08 (KERN_ATTR_BASIC ())
|
||||
{
|
||||
}
|
||||
|
||||
KERNEL_FQ void m27200_s16 (KERN_ATTR_BASIC ())
|
||||
{
|
||||
}
|
@ -0,0 +1,131 @@
|
||||
/**
|
||||
* Author......: See docs/credits.txt
|
||||
* License.....: MIT
|
||||
*/
|
||||
|
||||
//#define NEW_SIMD_CODE
|
||||
|
||||
#ifdef KERNEL_STATIC
|
||||
#include "inc_vendor.h"
|
||||
#include "inc_types.h"
|
||||
#include "inc_platform.cl"
|
||||
#include "inc_common.cl"
|
||||
#include "inc_scalar.cl"
|
||||
#include "inc_hash_sha1.cl"
|
||||
#endif
|
||||
|
||||
KERNEL_FQ void m27200_mxx (KERN_ATTR_BASIC ())
|
||||
{
|
||||
/**
|
||||
* modifier
|
||||
*/
|
||||
|
||||
const u64 lid = get_local_id (0);
|
||||
const u64 gid = get_global_id (0);
|
||||
|
||||
if (gid >= gid_max) return;
|
||||
|
||||
const u32 dash[16] = { 0x2d2d0000, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
|
||||
|
||||
/**
|
||||
* base
|
||||
*/
|
||||
|
||||
sha1_ctx_t ctx0;
|
||||
|
||||
sha1_init (&ctx0);
|
||||
|
||||
ctx0.w0[0] = dash[0];
|
||||
ctx0.w0[1] = dash[1];
|
||||
|
||||
ctx0.len = 2;
|
||||
|
||||
sha1_update_global_swap (&ctx0, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len);
|
||||
sha1_update (&ctx0, dash, 2);
|
||||
|
||||
sha1_update_global_swap (&ctx0, pws[gid].i, pws[gid].pw_len);
|
||||
|
||||
/**
|
||||
* loop
|
||||
*/
|
||||
|
||||
for (u32 il_pos = 0; il_pos < il_cnt; il_pos++)
|
||||
{
|
||||
sha1_ctx_t ctx = ctx0;
|
||||
|
||||
sha1_update_global_swap (&ctx, combs_buf[il_pos].i, combs_buf[il_pos].pw_len);
|
||||
sha1_update (&ctx, dash, 2);
|
||||
sha1_final (&ctx);
|
||||
|
||||
const u32 r0 = ctx.h[DGST_R0];
|
||||
const u32 r1 = ctx.h[DGST_R1];
|
||||
const u32 r2 = ctx.h[DGST_R2];
|
||||
const u32 r3 = ctx.h[DGST_R3];
|
||||
|
||||
COMPARE_M_SCALAR (r0, r1, r2, r3);
|
||||
}
|
||||
}
|
||||
|
||||
KERNEL_FQ void m27200_sxx (KERN_ATTR_BASIC ())
|
||||
{
|
||||
/**
|
||||
* modifier
|
||||
*/
|
||||
|
||||
const u64 lid = get_local_id (0);
|
||||
const u64 gid = get_global_id (0);
|
||||
|
||||
if (gid >= gid_max) return;
|
||||
|
||||
const u32 dash[16] = { 0x2d2d0000, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
|
||||
|
||||
/**
|
||||
* digest
|
||||
*/
|
||||
|
||||
const u32 search[4] =
|
||||
{
|
||||
digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0],
|
||||
digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1],
|
||||
digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2],
|
||||
digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3]
|
||||
};
|
||||
|
||||
/**
|
||||
* base
|
||||
*/
|
||||
|
||||
sha1_ctx_t ctx0;
|
||||
|
||||
sha1_init (&ctx0);
|
||||
|
||||
ctx0.w0[0] = dash[0];
|
||||
ctx0.w0[1] = dash[1];
|
||||
|
||||
ctx0.len = 2;
|
||||
|
||||
sha1_update_global_swap (&ctx0, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len);
|
||||
sha1_update (&ctx0, dash, 2);
|
||||
|
||||
sha1_update_global_swap (&ctx0, pws[gid].i, pws[gid].pw_len);
|
||||
|
||||
/**
|
||||
* loop
|
||||
*/
|
||||
|
||||
for (u32 il_pos = 0; il_pos < il_cnt; il_pos++)
|
||||
{
|
||||
sha1_ctx_t ctx = ctx0;
|
||||
|
||||
sha1_update_global_swap (&ctx, combs_buf[il_pos].i, combs_buf[il_pos].pw_len);
|
||||
sha1_update (&ctx, dash, 2);
|
||||
sha1_final (&ctx);
|
||||
|
||||
const u32 r0 = ctx.h[DGST_R0];
|
||||
const u32 r1 = ctx.h[DGST_R1];
|
||||
const u32 r2 = ctx.h[DGST_R2];
|
||||
const u32 r3 = ctx.h[DGST_R3];
|
||||
|
||||
COMPARE_S_SCALAR (r0, r1, r2, r3);
|
||||
}
|
||||
}
|
@ -0,0 +1,828 @@
|
||||
/**
|
||||
* Author......: See docs/credits.txt
|
||||
* License.....: MIT
|
||||
*/
|
||||
|
||||
#define NEW_SIMD_CODE
|
||||
|
||||
#ifdef KERNEL_STATIC
|
||||
#include "inc_vendor.h"
|
||||
#include "inc_types.h"
|
||||
#include "inc_platform.cl"
|
||||
#include "inc_common.cl"
|
||||
#include "inc_simd.cl"
|
||||
#include "inc_hash_sha1.cl"
|
||||
#endif
|
||||
|
||||
DECLSPEC void append_4 (const u32 offset, u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 src_r0)
|
||||
{
|
||||
u32 tmp[2];
|
||||
|
||||
switch (offset & 3)
|
||||
{
|
||||
case 0: tmp[0] = src_r0;
|
||||
tmp[1] = 0;
|
||||
break;
|
||||
case 1: tmp[0] = src_r0 >> 8;
|
||||
tmp[1] = src_r0 << 24;
|
||||
break;
|
||||
case 2: tmp[0] = src_r0 >> 16;
|
||||
tmp[1] = src_r0 << 16;
|
||||
break;
|
||||
case 3: tmp[0] = src_r0 >> 24;
|
||||
tmp[1] = src_r0 << 8;
|
||||
break;
|
||||
}
|
||||
|
||||
switch (offset / 4)
|
||||
{
|
||||
case 0: w0[0] |= tmp[0];
|
||||
w0[1] = tmp[1];
|
||||
break;
|
||||
case 1: w0[1] |= tmp[0];
|
||||
w0[2] = tmp[1];
|
||||
break;
|
||||
case 2: w0[2] |= tmp[0];
|
||||
w0[3] = tmp[1];
|
||||
break;
|
||||
case 3: w0[3] |= tmp[0];
|
||||
w1[0] = tmp[1];
|
||||
break;
|
||||
case 4: w1[0] |= tmp[0];
|
||||
w1[1] = tmp[1];
|
||||
break;
|
||||
case 5: w1[1] |= tmp[0];
|
||||
w1[2] = tmp[1];
|
||||
break;
|
||||
case 6: w1[2] |= tmp[0];
|
||||
w1[3] = tmp[1];
|
||||
break;
|
||||
case 7: w1[3] |= tmp[0];
|
||||
w2[0] = tmp[1];
|
||||
break;
|
||||
case 8: w2[0] |= tmp[0];
|
||||
w2[1] = tmp[1];
|
||||
break;
|
||||
case 9: w2[1] |= tmp[0];
|
||||
w2[2] = tmp[1];
|
||||
break;
|
||||
case 10: w2[2] |= tmp[0];
|
||||
w2[3] = tmp[1];
|
||||
break;
|
||||
case 11: w2[3] |= tmp[0];
|
||||
w3[0] = tmp[1];
|
||||
break;
|
||||
case 12: w3[0] |= tmp[0];
|
||||
w3[1] = tmp[1];
|
||||
break;
|
||||
case 13: w3[1] |= tmp[0];
|
||||
w3[2] = tmp[1];
|
||||
break;
|
||||
case 14: w3[2] |= tmp[0];
|
||||
w3[3] = tmp[1];
|
||||
break;
|
||||
case 15: w3[3] |= tmp[0];
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
DECLSPEC void m27200m (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KERN_ATTR_BASIC ())
|
||||
{
|
||||
/**
|
||||
* modifier
|
||||
*/
|
||||
|
||||
const u64 gid = get_global_id (0);
|
||||
const u64 lid = get_local_id (0);
|
||||
|
||||
const u32 dash_stop = 0x2d2d8000;
|
||||
|
||||
/**
|
||||
* salt
|
||||
*/
|
||||
|
||||
u32 salt_buf0[4] = { 0x2d2d0000, 0, 0, 0 };
|
||||
u32 salt_buf1[4];
|
||||
u32 salt_buf2[4];
|
||||
u32 salt_buf3[4] = { 0 };
|
||||
|
||||
salt_buf0[0] |= hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 0] << 16);
|
||||
salt_buf0[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 0] >> 16 | salt_bufs[SALT_POS].salt_buf[ 1] << 16);
|
||||
salt_buf0[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 1] >> 16 | salt_bufs[SALT_POS].salt_buf[ 2] << 16);
|
||||
salt_buf0[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 2] >> 16 | salt_bufs[SALT_POS].salt_buf[ 3] << 16);
|
||||
salt_buf1[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 3] >> 16 | salt_bufs[SALT_POS].salt_buf[ 4] << 16);
|
||||
salt_buf1[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 4] >> 16 | salt_bufs[SALT_POS].salt_buf[ 5] << 16);
|
||||
salt_buf1[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 5] >> 16 | salt_bufs[SALT_POS].salt_buf[ 6] << 16);
|
||||
salt_buf1[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 6] >> 16 | salt_bufs[SALT_POS].salt_buf[ 7] << 16);
|
||||
salt_buf2[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 7] >> 16 | salt_bufs[SALT_POS].salt_buf[ 8] << 16);
|
||||
salt_buf2[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 8] >> 16 | salt_bufs[SALT_POS].salt_buf[ 9] << 16);
|
||||
salt_buf2[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 9] >> 16 | 0x2d2d0000);
|
||||
salt_buf2[3] = 0;
|
||||
|
||||
const u32 salt_len = 44;
|
||||
|
||||
append_4 (pw_len, w0, w1, w2, w3, dash_stop);
|
||||
|
||||
const u32 pw_salt_len = pw_len + 2 + salt_len;
|
||||
|
||||
/**
|
||||
* loop
|
||||
*/
|
||||
|
||||
const u32 w0l = w0[0];
|
||||
|
||||
for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE)
|
||||
{
|
||||
const u32x w0r = ix_create_bft (bfs_buf, il_pos);
|
||||
|
||||
const u32x w0lr = w0l | w0r;
|
||||
|
||||
u32x t0[4];
|
||||
u32x t1[4];
|
||||
u32x t2[4];
|
||||
u32x t3[4];
|
||||
|
||||
t0[0] = w0lr;
|
||||
t0[1] = w0[1];
|
||||
t0[2] = w0[2];
|
||||
t0[3] = w0[3];
|
||||
t1[0] = w1[0];
|
||||
t1[1] = w1[1];
|
||||
t1[2] = w1[2];
|
||||
t1[3] = w1[3];
|
||||
t2[0] = w2[0];
|
||||
t2[1] = w2[1];
|
||||
t2[2] = w2[2];
|
||||
t2[3] = w2[3];
|
||||
t3[0] = w3[0];
|
||||
t3[1] = w3[1];
|
||||
t3[2] = w3[2];
|
||||
t3[3] = w3[3];
|
||||
|
||||
switch_buffer_by_offset_be (t0, t1, t2, t3, salt_len);
|
||||
|
||||
t0[0] |= salt_buf0[0];
|
||||
t0[1] |= salt_buf0[1];
|
||||
t0[2] |= salt_buf0[2];
|
||||
t0[3] |= salt_buf0[3];
|
||||
t1[0] |= salt_buf1[0];
|
||||
t1[1] |= salt_buf1[1];
|
||||
t1[2] |= salt_buf1[2];
|
||||
t1[3] |= salt_buf1[3];
|
||||
t2[0] |= salt_buf2[0];
|
||||
t2[1] |= salt_buf2[1];
|
||||
t2[2] |= salt_buf2[2];
|
||||
t2[3] |= salt_buf2[3];
|
||||
t3[0] |= salt_buf3[0];
|
||||
t3[1] |= salt_buf3[1];
|
||||
t3[2] = 0;
|
||||
t3[3] = (pw_salt_len) * 8;
|
||||
|
||||
/**
|
||||
* sha1
|
||||
*/
|
||||
|
||||
u32x w0_t = t0[0];
|
||||
u32x w1_t = t0[1];
|
||||
u32x w2_t = t0[2];
|
||||
u32x w3_t = t0[3];
|
||||
u32x w4_t = t1[0];
|
||||
u32x w5_t = t1[1];
|
||||
u32x w6_t = t1[2];
|
||||
u32x w7_t = t1[3];
|
||||
u32x w8_t = t2[0];
|
||||
u32x w9_t = t2[1];
|
||||
u32x wa_t = t2[2];
|
||||
u32x wb_t = t2[3];
|
||||
u32x wc_t = t3[0];
|
||||
u32x wd_t = t3[1];
|
||||
u32x we_t = 0;
|
||||
u32x wf_t = (pw_salt_len) * 8;
|
||||
|
||||
u32x a = SHA1M_A;
|
||||
u32x b = SHA1M_B;
|
||||
u32x c = SHA1M_C;
|
||||
u32x d = SHA1M_D;
|
||||
u32x e = SHA1M_E;
|
||||
|
||||
#undef K
|
||||
#define K SHA1C00
|
||||
|
||||
SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0_t);
|
||||
SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1_t);
|
||||
SHA1_STEP (SHA1_F0o, d, e, a, b, c, w2_t);
|
||||
SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3_t);
|
||||
SHA1_STEP (SHA1_F0o, b, c, d, e, a, w4_t);
|
||||
SHA1_STEP (SHA1_F0o, a, b, c, d, e, w5_t);
|
||||
SHA1_STEP (SHA1_F0o, e, a, b, c, d, w6_t);
|
||||
SHA1_STEP (SHA1_F0o, d, e, a, b, c, w7_t);
|
||||
SHA1_STEP (SHA1_F0o, c, d, e, a, b, w8_t);
|
||||
SHA1_STEP (SHA1_F0o, b, c, d, e, a, w9_t);
|
||||
SHA1_STEP (SHA1_F0o, a, b, c, d, e, wa_t);
|
||||
SHA1_STEP (SHA1_F0o, e, a, b, c, d, wb_t);
|
||||
SHA1_STEP (SHA1_F0o, d, e, a, b, c, wc_t);
|
||||
SHA1_STEP (SHA1_F0o, c, d, e, a, b, wd_t);
|
||||
SHA1_STEP (SHA1_F0o, b, c, d, e, a, we_t);
|
||||
SHA1_STEP (SHA1_F0o, a, b, c, d, e, wf_t);
|
||||
w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t);
|
||||
w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1_t);
|
||||
w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2_t);
|
||||
w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3_t);
|
||||
|
||||
#undef K
|
||||
#define K SHA1C01
|
||||
|
||||
w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w4_t);
|
||||
w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w5_t);
|
||||
w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w6_t);
|
||||
w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w7_t);
|
||||
w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w8_t);
|
||||
w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w9_t);
|
||||
wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wa_t);
|
||||
wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wb_t);
|
||||
wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wc_t);
|
||||
wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wd_t);
|
||||
we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, we_t);
|
||||
wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wf_t);
|
||||
w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t);
|
||||
w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t);
|
||||
w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t);
|
||||
w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t);
|
||||
w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w4_t);
|
||||
w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w5_t);
|
||||
w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w6_t);
|
||||
w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w7_t);
|
||||
|
||||
#undef K
|
||||
#define K SHA1C02
|
||||
|
||||
w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w8_t);
|
||||
w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w9_t);
|
||||
wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wa_t);
|
||||
wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wb_t);
|
||||
wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wc_t);
|
||||
wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, wd_t);
|
||||
we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, we_t);
|
||||
wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wf_t);
|
||||
w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0_t);
|
||||
w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1_t);
|
||||
w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2_t);
|
||||
w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3_t);
|
||||
w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w4_t);
|
||||
w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w5_t);
|
||||
w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w6_t);
|
||||
w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w7_t);
|
||||
w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w8_t);
|
||||
w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w9_t);
|
||||
wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wa_t);
|
||||
wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wb_t);
|
||||
|
||||
#undef K
|
||||
#define K SHA1C03
|
||||
|
||||
wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wc_t);
|
||||
wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wd_t);
|
||||
we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, we_t);
|
||||
wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wf_t);
|
||||
w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t);
|
||||
w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t);
|
||||
w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w2_t);
|
||||
w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t);
|
||||
w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w4_t);
|
||||
w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w5_t);
|
||||
w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w6_t);
|
||||
w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w7_t);
|
||||
w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w8_t);
|
||||
w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w9_t);
|
||||
wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wa_t);
|
||||
wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wb_t);
|
||||
wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wc_t);
|
||||
wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wd_t);
|
||||
we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t);
|
||||
wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t);
|
||||
|
||||
COMPARE_M_SIMD (d, e, c, b);
|
||||
}
|
||||
}
|
||||
|
||||
DECLSPEC void m27200s (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KERN_ATTR_BASIC ())
|
||||
{
|
||||
/**
|
||||
* modifier
|
||||
*/
|
||||
|
||||
const u64 gid = get_global_id (0);
|
||||
const u64 lid = get_local_id (0);
|
||||
|
||||
const u32 dash_stop = 0x2d2d8000;
|
||||
|
||||
/**
|
||||
* digest
|
||||
*/
|
||||
|
||||
const u32 search[4] =
|
||||
{
|
||||
digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0],
|
||||
digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1],
|
||||
digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2],
|
||||
digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3]
|
||||
};
|
||||
|
||||
/**
|
||||
* reverse
|
||||
*/
|
||||
|
||||
const u32 e_rev = hc_rotl32_S (search[1], 2u);
|
||||
|
||||
/**
|
||||
* salt
|
||||
*/
|
||||
|
||||
u32 salt_buf0[4] = { 0x2d2d0000, 0, 0, 0 };
|
||||
u32 salt_buf1[4];
|
||||
u32 salt_buf2[4];
|
||||
u32 salt_buf3[4] = { 0 };
|
||||
|
||||
salt_buf0[0] |= hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 0] << 16);
|
||||
salt_buf0[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 0] >> 16 | salt_bufs[SALT_POS].salt_buf[ 1] << 16);
|
||||
salt_buf0[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 1] >> 16 | salt_bufs[SALT_POS].salt_buf[ 2] << 16);
|
||||
salt_buf0[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 2] >> 16 | salt_bufs[SALT_POS].salt_buf[ 3] << 16);
|
||||
salt_buf1[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 3] >> 16 | salt_bufs[SALT_POS].salt_buf[ 4] << 16);
|
||||
salt_buf1[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 4] >> 16 | salt_bufs[SALT_POS].salt_buf[ 5] << 16);
|
||||
salt_buf1[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 5] >> 16 | salt_bufs[SALT_POS].salt_buf[ 6] << 16);
|
||||
salt_buf1[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 6] >> 16 | salt_bufs[SALT_POS].salt_buf[ 7] << 16);
|
||||
salt_buf2[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 7] >> 16 | salt_bufs[SALT_POS].salt_buf[ 8] << 16);
|
||||
salt_buf2[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 8] >> 16 | salt_bufs[SALT_POS].salt_buf[ 9] << 16);
|
||||
salt_buf2[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 9] >> 16 | 0x2d2d0000);
|
||||
salt_buf2[3] = 0;
|
||||
|
||||
const u32 salt_len = 44;
|
||||
|
||||
append_4 (pw_len, w0, w1, w2, w3, dash_stop);
|
||||
|
||||
const u32 pw_salt_len = pw_len + 2 + salt_len;
|
||||
|
||||
/**
|
||||
* loop
|
||||
*/
|
||||
|
||||
const u32 w0l = w0[0];
|
||||
|
||||
for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE)
|
||||
{
|
||||
const u32x w0r = ix_create_bft (bfs_buf, il_pos);
|
||||
|
||||
const u32x w0lr = w0l | w0r;
|
||||
|
||||
u32x t0[4];
|
||||
u32x t1[4];
|
||||
u32x t2[4];
|
||||
u32x t3[4];
|
||||
|
||||
t0[0] = w0lr;
|
||||
t0[1] = w0[1];
|
||||
t0[2] = w0[2];
|
||||
t0[3] = w0[3];
|
||||
t1[0] = w1[0];
|
||||
t1[1] = w1[1];
|
||||
t1[2] = w1[2];
|
||||
t1[3] = w1[3];
|
||||
t2[0] = w2[0];
|
||||
t2[1] = w2[1];
|
||||
t2[2] = w2[2];
|
||||
t2[3] = w2[3];
|
||||
t3[0] = w3[0];
|
||||
t3[1] = w3[1];
|
||||
t3[2] = w3[2];
|
||||
t3[3] = w3[3];
|
||||
|
||||
switch_buffer_by_offset_be (t0, t1, t2, t3, salt_len);
|
||||
|
||||
t0[0] |= salt_buf0[0];
|
||||
t0[1] |= salt_buf0[1];
|
||||
t0[2] |= salt_buf0[2];
|
||||
t0[3] |= salt_buf0[3];
|
||||
t1[0] |= salt_buf1[0];
|
||||
t1[1] |= salt_buf1[1];
|
||||
t1[2] |= salt_buf1[2];
|
||||
t1[3] |= salt_buf1[3];
|
||||
t2[0] |= salt_buf2[0];
|
||||
t2[1] |= salt_buf2[1];
|
||||
t2[2] |= salt_buf2[2];
|
||||
t2[3] |= salt_buf2[3];
|
||||
t3[0] |= salt_buf3[0];
|
||||
t3[1] |= salt_buf3[1];
|
||||
t3[2] = 0;
|
||||
t3[3] = pw_salt_len * 8;
|
||||
|
||||
/**
|
||||
* sha1
|
||||
*/
|
||||
|
||||
u32x w0_t = t0[0];
|
||||
u32x w1_t = t0[1];
|
||||
u32x w2_t = t0[2];
|
||||
u32x w3_t = t0[3];
|
||||
u32x w4_t = t1[0];
|
||||
u32x w5_t = t1[1];
|
||||
u32x w6_t = t1[2];
|
||||
u32x w7_t = t1[3];
|
||||
u32x w8_t = t2[0];
|
||||
u32x w9_t = t2[1];
|
||||
u32x wa_t = t2[2];
|
||||
u32x wb_t = t2[3];
|
||||
u32x wc_t = t3[0];
|
||||
u32x wd_t = t3[1];
|
||||
u32x we_t = 0;
|
||||
u32x wf_t = pw_salt_len * 8;
|
||||
|
||||
u32x a = SHA1M_A;
|
||||
u32x b = SHA1M_B;
|
||||
u32x c = SHA1M_C;
|
||||
u32x d = SHA1M_D;
|
||||
u32x e = SHA1M_E;
|
||||
|
||||
#undef K
|
||||
#define K SHA1C00
|
||||
|
||||
SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0_t);
|
||||
SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1_t);
|
||||
SHA1_STEP (SHA1_F0o, d, e, a, b, c, w2_t);
|
||||
SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3_t);
|
||||
SHA1_STEP (SHA1_F0o, b, c, d, e, a, w4_t);
|
||||
SHA1_STEP (SHA1_F0o, a, b, c, d, e, w5_t);
|
||||
SHA1_STEP (SHA1_F0o, e, a, b, c, d, w6_t);
|
||||
SHA1_STEP (SHA1_F0o, d, e, a, b, c, w7_t);
|
||||
SHA1_STEP (SHA1_F0o, c, d, e, a, b, w8_t);
|
||||
SHA1_STEP (SHA1_F0o, b, c, d, e, a, w9_t);
|
||||
SHA1_STEP (SHA1_F0o, a, b, c, d, e, wa_t);
|
||||
SHA1_STEP (SHA1_F0o, e, a, b, c, d, wb_t);
|
||||
SHA1_STEP (SHA1_F0o, d, e, a, b, c, wc_t);
|
||||
SHA1_STEP (SHA1_F0o, c, d, e, a, b, wd_t);
|
||||
SHA1_STEP (SHA1_F0o, b, c, d, e, a, we_t);
|
||||
SHA1_STEP (SHA1_F0o, a, b, c, d, e, wf_t);
|
||||
w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t);
|
||||
w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1_t);
|
||||
w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2_t);
|
||||
w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3_t);
|
||||
|
||||
#undef K
|
||||
#define K SHA1C01
|
||||
|
||||
w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w4_t);
|
||||
w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w5_t);
|
||||
w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w6_t);
|
||||
w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w7_t);
|
||||
w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w8_t);
|
||||
w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w9_t);
|
||||
wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wa_t);
|
||||
wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wb_t);
|
||||
wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wc_t);
|
||||
wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wd_t);
|
||||
we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, we_t);
|
||||
wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wf_t);
|
||||
w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t);
|
||||
w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t);
|
||||
w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t);
|
||||
w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t);
|
||||
w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w4_t);
|
||||
w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w5_t);
|
||||
w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w6_t);
|
||||
w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w7_t);
|
||||
|
||||
#undef K
|
||||
#define K SHA1C02
|
||||
|
||||
w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w8_t);
|
||||
w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w9_t);
|
||||
wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wa_t);
|
||||
wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wb_t);
|
||||
wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wc_t);
|
||||
wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, wd_t);
|
||||
we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, we_t);
|
||||
wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wf_t);
|
||||
w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0_t);
|
||||
w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1_t);
|
||||
w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2_t);
|
||||
w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3_t);
|
||||
w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w4_t);
|
||||
w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w5_t);
|
||||
w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w6_t);
|
||||
w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w7_t);
|
||||
w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w8_t);
|
||||
w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w9_t);
|
||||
wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wa_t);
|
||||
wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wb_t);
|
||||
|
||||
#undef K
|
||||
#define K SHA1C03
|
||||
|
||||
wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wc_t);
|
||||
wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wd_t);
|
||||
we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, we_t);
|
||||
wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wf_t);
|
||||
w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t);
|
||||
w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t);
|
||||
w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w2_t);
|
||||
w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t);
|
||||
w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w4_t);
|
||||
w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w5_t);
|
||||
w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w6_t);
|
||||
w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w7_t);
|
||||
w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w8_t);
|
||||
w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w9_t);
|
||||
wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wa_t);
|
||||
wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wb_t);
|
||||
|
||||
if (MATCHES_NONE_VS (e, e_rev)) continue;
|
||||
|
||||
wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wc_t);
|
||||
wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wd_t);
|
||||
we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t);
|
||||
wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t);
|
||||
|
||||
COMPARE_S_SIMD (d, e, c, b);
|
||||
}
|
||||
}
|
||||
|
||||
KERNEL_FQ void m27200_m04 (KERN_ATTR_BASIC ())
|
||||
{
|
||||
/**
|
||||
* base
|
||||
*/
|
||||
|
||||
const u64 gid = get_global_id (0);
|
||||
|
||||
if (gid >= gid_max) return;
|
||||
|
||||
u32 w0[4];
|
||||
|
||||
w0[0] = pws[gid].i[ 0];
|
||||
w0[1] = pws[gid].i[ 1];
|
||||
w0[2] = pws[gid].i[ 2];
|
||||
w0[3] = pws[gid].i[ 3];
|
||||
|
||||
u32 w1[4];
|
||||
|
||||
w1[0] = 0;
|
||||
w1[1] = 0;
|
||||
w1[2] = 0;
|
||||
w1[3] = 0;
|
||||
|
||||
u32 w2[4];
|
||||
|
||||
w2[0] = 0;
|
||||
w2[1] = 0;
|
||||
w2[2] = 0;
|
||||
w2[3] = 0;
|
||||
|
||||
u32 w3[4];
|
||||
|
||||
w3[0] = 0;
|
||||
w3[1] = 0;
|
||||
w3[2] = 0;
|
||||
w3[3] = 0;
|
||||
|
||||
const u32 pw_len = pws[gid].pw_len & 63;
|
||||
|
||||
/**
|
||||
* main
|
||||
*/
|
||||
|
||||
m27200m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max);
|
||||
}
|
||||
|
||||
KERNEL_FQ void m27200_m08 (KERN_ATTR_BASIC ())
|
||||
{
|
||||
/**
|
||||
* base
|
||||
*/
|
||||
|
||||
const u64 gid = get_global_id (0);
|
||||
|
||||
if (gid >= gid_max) return;
|
||||
|
||||
u32 w0[4];
|
||||
|
||||
w0[0] = pws[gid].i[ 0];
|
||||
w0[1] = pws[gid].i[ 1];
|
||||
w0[2] = pws[gid].i[ 2];
|
||||
w0[3] = pws[gid].i[ 3];
|
||||
|
||||
u32 w1[4];
|
||||
|
||||
w1[0] = pws[gid].i[ 4];
|
||||
w1[1] = pws[gid].i[ 5];
|
||||
w1[2] = pws[gid].i[ 6];
|
||||
w1[3] = pws[gid].i[ 7];
|
||||
|
||||
u32 w2[4];
|
||||
|
||||
w2[0] = 0;
|
||||
w2[1] = 0;
|
||||
w2[2] = 0;
|
||||
w2[3] = 0;
|
||||
|
||||
u32 w3[4];
|
||||
|
||||
w3[0] = 0;
|
||||
w3[1] = 0;
|
||||
w3[2] = 0;
|
||||
w3[3] = 0;
|
||||
|
||||
const u32 pw_len = pws[gid].pw_len & 63;
|
||||
|
||||
/**
|
||||
* main
|
||||
*/
|
||||
|
||||
m27200m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max);
|
||||
}
|
||||
|
||||
KERNEL_FQ void m27200_m16 (KERN_ATTR_BASIC ())
|
||||
{
|
||||
/**
|
||||
* base
|
||||
*/
|
||||
|
||||
const u64 gid = get_global_id (0);
|
||||
|
||||
if (gid >= gid_max) return;
|
||||
|
||||
u32 w0[4];
|
||||
|
||||
w0[0] = pws[gid].i[ 0];
|
||||
w0[1] = pws[gid].i[ 1];
|
||||
w0[2] = pws[gid].i[ 2];
|
||||
w0[3] = pws[gid].i[ 3];
|
||||
|
||||
u32 w1[4];
|
||||
|
||||
w1[0] = pws[gid].i[ 4];
|
||||
w1[1] = pws[gid].i[ 5];
|
||||
w1[2] = pws[gid].i[ 6];
|
||||
w1[3] = pws[gid].i[ 7];
|
||||
|
||||
u32 w2[4];
|
||||
|
||||
w2[0] = pws[gid].i[ 8];
|
||||
w2[1] = pws[gid].i[ 9];
|
||||
w2[2] = pws[gid].i[10];
|
||||
w2[3] = pws[gid].i[11];
|
||||
|
||||
u32 w3[4];
|
||||
|
||||
w3[0] = pws[gid].i[12];
|
||||
w3[1] = pws[gid].i[13];
|
||||
w3[2] = 0;
|
||||
w3[3] = 0;
|
||||
|
||||
const u32 pw_len = pws[gid].pw_len & 63;
|
||||
|
||||
/**
|
||||
* main
|
||||
*/
|
||||
|
||||
m27200m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max);
|
||||
}
|
||||
|
||||
KERNEL_FQ void m27200_s04 (KERN_ATTR_BASIC ())
|
||||
{
|
||||
/**
|
||||
* base
|
||||
*/
|
||||
|
||||
const u64 gid = get_global_id (0);
|
||||
|
||||
if (gid >= gid_max) return;
|
||||
|
||||
u32 w0[4];
|
||||
|
||||
w0[0] = pws[gid].i[ 0];
|
||||
w0[1] = pws[gid].i[ 1];
|
||||
w0[2] = pws[gid].i[ 2];
|
||||
w0[3] = pws[gid].i[ 3];
|
||||
|
||||
u32 w1[4];
|
||||
|
||||
w1[0] = 0;
|
||||
w1[1] = 0;
|
||||
w1[2] = 0;
|
||||
w1[3] = 0;
|
||||
|
||||
u32 w2[4];
|
||||
|
||||
w2[0] = 0;
|
||||
w2[1] = 0;
|
||||
w2[2] = 0;
|
||||
w2[3] = 0;
|
||||
|
||||
u32 w3[4];
|
||||
|
||||
w3[0] = 0;
|
||||
w3[1] = 0;
|
||||
w3[2] = 0;
|
||||
w3[3] = 0;
|
||||
|
||||
const u32 pw_len = pws[gid].pw_len & 63;
|
||||
|
||||
/**
|
||||
* main
|
||||
*/
|
||||
|
||||
m27200s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max);
|
||||
}
|
||||
|
||||
KERNEL_FQ void m27200_s08 (KERN_ATTR_BASIC ())
|
||||
{
|
||||
/**
|
||||
* base
|
||||
*/
|
||||
|
||||
const u64 gid = get_global_id (0);
|
||||
|
||||
if (gid >= gid_max) return;
|
||||
|
||||
u32 w0[4];
|
||||
|
||||
w0[0] = pws[gid].i[ 0];
|
||||
w0[1] = pws[gid].i[ 1];
|
||||
w0[2] = pws[gid].i[ 2];
|
||||
w0[3] = pws[gid].i[ 3];
|
||||
|
||||
u32 w1[4];
|
||||
|
||||
w1[0] = pws[gid].i[ 4];
|
||||
w1[1] = pws[gid].i[ 5];
|
||||
w1[2] = pws[gid].i[ 6];
|
||||
w1[3] = pws[gid].i[ 7];
|
||||
|
||||
u32 w2[4];
|
||||
|
||||
w2[0] = 0;
|
||||
w2[1] = 0;
|
||||
w2[2] = 0;
|
||||
w2[3] = 0;
|
||||
|
||||
u32 w3[4];
|
||||
|
||||
w3[0] = 0;
|
||||
w3[1] = 0;
|
||||
w3[2] = 0;
|
||||
w3[3] = 0;
|
||||
|
||||
const u32 pw_len = pws[gid].pw_len & 63;
|
||||
|
||||
/**
|
||||
* main
|
||||
*/
|
||||
|
||||
m27200s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max);
|
||||
}
|
||||
|
||||
KERNEL_FQ void m27200_s16 (KERN_ATTR_BASIC ())
|
||||
{
|
||||
/**
|
||||
* base
|
||||
*/
|
||||
|
||||
const u64 gid = get_global_id (0);
|
||||
|
||||
if (gid >= gid_max) return;
|
||||
|
||||
u32 w0[4];
|
||||
|
||||
w0[0] = pws[gid].i[ 0];
|
||||
w0[1] = pws[gid].i[ 1];
|
||||
w0[2] = pws[gid].i[ 2];
|
||||
w0[3] = pws[gid].i[ 3];
|
||||
|
||||
u32 w1[4];
|
||||
|
||||
w1[0] = pws[gid].i[ 4];
|
||||
w1[1] = pws[gid].i[ 5];
|
||||
w1[2] = pws[gid].i[ 6];
|
||||
w1[3] = pws[gid].i[ 7];
|
||||
|
||||
u32 w2[4];
|
||||
|
||||
w2[0] = pws[gid].i[ 8];
|
||||
w2[1] = pws[gid].i[ 9];
|
||||
w2[2] = pws[gid].i[10];
|
||||
w2[3] = pws[gid].i[11];
|
||||
|
||||
u32 w3[4];
|
||||
|
||||
w3[0] = pws[gid].i[12];
|
||||
w3[1] = pws[gid].i[13];
|
||||
w3[2] = 0;
|
||||
w3[3] = 0;
|
||||
|
||||
const u32 pw_len = pws[gid].pw_len & 63;
|
||||
|
||||
/**
|
||||
* main
|
||||
*/
|
||||
|
||||
m27200s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max);
|
||||
}
|
@ -0,0 +1,169 @@
|
||||
/**
|
||||
* Author......: See docs/credits.txt
|
||||
* License.....: MIT
|
||||
*/
|
||||
|
||||
#define NEW_SIMD_CODE
|
||||
|
||||
#ifdef KERNEL_STATIC
|
||||
#include "inc_vendor.h"
|
||||
#include "inc_types.h"
|
||||
#include "inc_platform.cl"
|
||||
#include "inc_common.cl"
|
||||
#include "inc_simd.cl"
|
||||
#include "inc_hash_sha1.cl"
|
||||
#endif
|
||||
|
||||
KERNEL_FQ void m27200_mxx (KERN_ATTR_VECTOR ())
|
||||
{
|
||||
/**
|
||||
* modifier
|
||||
*/
|
||||
|
||||
const u64 lid = get_local_id (0);
|
||||
const u64 gid = get_global_id (0);
|
||||
|
||||
if (gid >= gid_max) return;
|
||||
|
||||
const u32x dash_vector[16] = { 0x2d2d0000, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
|
||||
const u32 dash_scalar[16] = { 0x2d2d0000, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
|
||||
|
||||
/**
|
||||
* base
|
||||
*/
|
||||
|
||||
const u32 pw_len = pws[gid].pw_len;
|
||||
|
||||
u32x w[64] = { 0 };
|
||||
|
||||
for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1)
|
||||
{
|
||||
w[idx] = pws[gid].i[idx];
|
||||
}
|
||||
|
||||
sha1_ctx_t ctx0;
|
||||
|
||||
sha1_init (&ctx0);
|
||||
|
||||
ctx0.w0[0] = dash_scalar[0];
|
||||
ctx0.w0[1] = dash_scalar[1];
|
||||
|
||||
ctx0.len = 2;
|
||||
|
||||
sha1_update_global_swap (&ctx0, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len);
|
||||
sha1_update (&ctx0, dash_scalar, 2);
|
||||
|
||||
/**
|
||||
* loop
|
||||
*/
|
||||
|
||||
u32x w0l = w[0];
|
||||
|
||||
for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE)
|
||||
{
|
||||
const u32x w0r = words_buf_r[il_pos / VECT_SIZE];
|
||||
|
||||
const u32x w0 = w0l | w0r;
|
||||
|
||||
w[0] = w0;
|
||||
|
||||
sha1_ctx_vector_t ctx;
|
||||
|
||||
sha1_init_vector_from_scalar (&ctx, &ctx0);
|
||||
|
||||
sha1_update_vector (&ctx, w, pw_len);
|
||||
sha1_update_vector (&ctx, dash_vector, 2);
|
||||
|
||||
sha1_final_vector (&ctx);
|
||||
|
||||
const u32x r0 = ctx.h[DGST_R0];
|
||||
const u32x r1 = ctx.h[DGST_R1];
|
||||
const u32x r2 = ctx.h[DGST_R2];
|
||||
const u32x r3 = ctx.h[DGST_R3];
|
||||
|
||||
COMPARE_M_SIMD (r0, r1, r2, r3);
|
||||
}
|
||||
}
|
||||
|
||||
KERNEL_FQ void m27200_sxx (KERN_ATTR_VECTOR ())
|
||||
{
|
||||
/**
|
||||
* modifier
|
||||
*/
|
||||
|
||||
const u64 lid = get_local_id (0);
|
||||
const u64 gid = get_global_id (0);
|
||||
|
||||
if (gid >= gid_max) return;
|
||||
|
||||
const u32x dash_vector[16] = { 0x2d2d0000, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
|
||||
const u32 dash_scalar[16] = { 0x2d2d0000, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
|
||||
|
||||
/**
|
||||
* digest
|
||||
*/
|
||||
|
||||
const u32 search[4] =
|
||||
{
|
||||
digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0],
|
||||
digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1],
|
||||
digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2],
|
||||
digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3]
|
||||
};
|
||||
|
||||
/**
|
||||
* base
|
||||
*/
|
||||
|
||||
const u32 pw_len = pws[gid].pw_len;
|
||||
|
||||
u32x w[64] = { 0 };
|
||||
|
||||
for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1)
|
||||
{
|
||||
w[idx] = pws[gid].i[idx];
|
||||
}
|
||||
|
||||
sha1_ctx_t ctx0;
|
||||
|
||||
sha1_init (&ctx0);
|
||||
|
||||
ctx0.w0[0] = dash_scalar[0];
|
||||
ctx0.w0[1] = dash_scalar[1];
|
||||
|
||||
ctx0.len = 2;
|
||||
|
||||
sha1_update_global_swap (&ctx0, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len);
|
||||
sha1_update (&ctx0, dash_scalar, 2);
|
||||
|
||||
/**
|
||||
* loop
|
||||
*/
|
||||
|
||||
u32x w0l = w[0];
|
||||
|
||||
for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE)
|
||||
{
|
||||
const u32x w0r = words_buf_r[il_pos / VECT_SIZE];
|
||||
|
||||
const u32x w0 = w0l | w0r;
|
||||
|
||||
w[0] = w0;
|
||||
|
||||
sha1_ctx_vector_t ctx;
|
||||
|
||||
sha1_init_vector_from_scalar (&ctx, &ctx0);
|
||||
|
||||
sha1_update_vector (&ctx, w, pw_len);
|
||||
sha1_update_vector (&ctx, dash_vector, 2);
|
||||
|
||||
sha1_final_vector (&ctx);
|
||||
|
||||
const u32x r0 = ctx.h[DGST_R0];
|
||||
const u32x r1 = ctx.h[DGST_R1];
|
||||
const u32x r2 = ctx.h[DGST_R2];
|
||||
const u32x r3 = ctx.h[DGST_R3];
|
||||
|
||||
COMPARE_S_SIMD (r0, r1, r2, r3);
|
||||
}
|
||||
}
|
@ -0,0 +1,495 @@
|
||||
/**
|
||||
* Author......: See docs/credits.txt
|
||||
* License.....: MIT
|
||||
*/
|
||||
|
||||
//#define NEW_SIMD_CODE
|
||||
|
||||
#ifdef KERNEL_STATIC
|
||||
#include "inc_vendor.h"
|
||||
#include "inc_types.h"
|
||||
#include "inc_platform.cl"
|
||||
#include "inc_common.cl"
|
||||
#include "inc_simd.cl"
|
||||
#include "inc_hash_sha512.cl"
|
||||
#endif
|
||||
|
||||
#define COMPARE_S "inc_comp_single.cl"
|
||||
#define COMPARE_M "inc_comp_multi.cl"
|
||||
|
||||
#define SNMPV3_SALT_MAX 1500
|
||||
#define SNMPV3_ENGINEID_MAX 34
|
||||
#define SNMPV3_MSG_AUTH_PARAMS_MAX 48
|
||||
#define SNMPV3_ROUNDS 1048576
|
||||
#define SNMPV3_MAX_PW_LENGTH 128
|
||||
|
||||
#define SNMPV3_TMP_ELEMS 8192 // 8192 = (256 (max pw length) * SNMPV3_MAX_PW_LENGTH) / sizeof (u32)
|
||||
#define SNMPV3_HASH_ELEMS 8
|
||||
|
||||
#define SNMPV3_MAX_SALT_ELEMS 512 // 512 * 4 = 2048 > 1500, also has to be multiple of SNMPV3_MAX_PW_LENGTH
|
||||
#define SNMPV3_MAX_ENGINE_ELEMS 32 // 32 * 4 = 128 > 34, also has to be multiple of SNMPV3_MAX_PW_LENGTH
|
||||
#define SNMPV3_MAX_PNUM_ELEMS 4 // 4 * 4 = 16 > 9
|
||||
|
||||
#define SNMPV3_MAX_PW_LENGTH_OPT 32
|
||||
#define SNMPV3_TMP_ELEMS_OPT ((SNMPV3_MAX_PW_LENGTH_OPT * SNMPV3_MAX_PW_LENGTH) / 4)
|
||||
// (32 * 128) / 4 = 1024
|
||||
// for pw length > 32 we use global memory reads
|
||||
|
||||
typedef struct hmac_sha512_tmp
|
||||
{
|
||||
u32 tmp[SNMPV3_TMP_ELEMS];
|
||||
u64 h[SNMPV3_HASH_ELEMS];
|
||||
|
||||
} hmac_sha512_tmp_t;
|
||||
|
||||
typedef struct snmpv3
|
||||
{
|
||||
u32 salt_buf[SNMPV3_MAX_SALT_ELEMS];
|
||||
u32 salt_len;
|
||||
|
||||
u32 engineID_buf[SNMPV3_MAX_ENGINE_ELEMS];
|
||||
u32 engineID_len;
|
||||
|
||||
u32 packet_number[SNMPV3_MAX_PNUM_ELEMS];
|
||||
|
||||
} snmpv3_t;
|
||||
|
||||
KERNEL_FQ void m27300_init (KERN_ATTR_TMPS_ESALT (hmac_sha512_tmp_t, snmpv3_t))
|
||||
{
|
||||
/**
|
||||
* modifier
|
||||
*/
|
||||
|
||||
const u64 gid = get_global_id (0);
|
||||
|
||||
if (gid >= gid_max) return;
|
||||
|
||||
/**
|
||||
* base
|
||||
*/
|
||||
|
||||
const u32 pw_len = pws[gid].pw_len;
|
||||
|
||||
u32 w[128] = { 0 };
|
||||
|
||||
for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1)
|
||||
{
|
||||
w[idx] = pws[gid].i[idx];
|
||||
}
|
||||
|
||||
u8 *src_ptr = (u8 *) w;
|
||||
|
||||
// password 128 times, also swapped
|
||||
|
||||
u32 dst_buf[32];
|
||||
|
||||
u8 *dst_ptr = (u8 *) dst_buf;
|
||||
|
||||
int tmp_idx = 0;
|
||||
|
||||
for (int i = 0; i < 128; i++)
|
||||
{
|
||||
for (u32 j = 0; j < pw_len; j++)
|
||||
{
|
||||
const int dst_idx = tmp_idx & 127;
|
||||
|
||||
dst_ptr[dst_idx] = src_ptr[j];
|
||||
|
||||
// write to global memory every time 64 byte are written into cache
|
||||
|
||||
if (dst_idx == 127)
|
||||
{
|
||||
const int tmp_idx4 = (tmp_idx - 127) / 4;
|
||||
|
||||
tmps[gid].tmp[tmp_idx4 + 0] = hc_swap32_S (dst_buf[ 0]);
|
||||
tmps[gid].tmp[tmp_idx4 + 1] = hc_swap32_S (dst_buf[ 1]);
|
||||
tmps[gid].tmp[tmp_idx4 + 2] = hc_swap32_S (dst_buf[ 2]);
|
||||
tmps[gid].tmp[tmp_idx4 + 3] = hc_swap32_S (dst_buf[ 3]);
|
||||
tmps[gid].tmp[tmp_idx4 + 4] = hc_swap32_S (dst_buf[ 4]);
|
||||
tmps[gid].tmp[tmp_idx4 + 5] = hc_swap32_S (dst_buf[ 5]);
|
||||
tmps[gid].tmp[tmp_idx4 + 6] = hc_swap32_S (dst_buf[ 6]);
|
||||
tmps[gid].tmp[tmp_idx4 + 7] = hc_swap32_S (dst_buf[ 7]);
|
||||
tmps[gid].tmp[tmp_idx4 + 8] = hc_swap32_S (dst_buf[ 8]);
|
||||
tmps[gid].tmp[tmp_idx4 + 9] = hc_swap32_S (dst_buf[ 9]);
|
||||
tmps[gid].tmp[tmp_idx4 + 10] = hc_swap32_S (dst_buf[10]);
|
||||
tmps[gid].tmp[tmp_idx4 + 11] = hc_swap32_S (dst_buf[11]);
|
||||
tmps[gid].tmp[tmp_idx4 + 12] = hc_swap32_S (dst_buf[12]);
|
||||
tmps[gid].tmp[tmp_idx4 + 13] = hc_swap32_S (dst_buf[13]);
|
||||
tmps[gid].tmp[tmp_idx4 + 14] = hc_swap32_S (dst_buf[14]);
|
||||
tmps[gid].tmp[tmp_idx4 + 15] = hc_swap32_S (dst_buf[15]);
|
||||
tmps[gid].tmp[tmp_idx4 + 16] = hc_swap32_S (dst_buf[16]);
|
||||
tmps[gid].tmp[tmp_idx4 + 17] = hc_swap32_S (dst_buf[17]);
|
||||
tmps[gid].tmp[tmp_idx4 + 18] = hc_swap32_S (dst_buf[18]);
|
||||
tmps[gid].tmp[tmp_idx4 + 19] = hc_swap32_S (dst_buf[19]);
|
||||
tmps[gid].tmp[tmp_idx4 + 20] = hc_swap32_S (dst_buf[20]);
|
||||
tmps[gid].tmp[tmp_idx4 + 21] = hc_swap32_S (dst_buf[21]);
|
||||
tmps[gid].tmp[tmp_idx4 + 22] = hc_swap32_S (dst_buf[22]);
|
||||
tmps[gid].tmp[tmp_idx4 + 23] = hc_swap32_S (dst_buf[23]);
|
||||
tmps[gid].tmp[tmp_idx4 + 24] = hc_swap32_S (dst_buf[24]);
|
||||
tmps[gid].tmp[tmp_idx4 + 25] = hc_swap32_S (dst_buf[25]);
|
||||
tmps[gid].tmp[tmp_idx4 + 26] = hc_swap32_S (dst_buf[26]);
|
||||
tmps[gid].tmp[tmp_idx4 + 27] = hc_swap32_S (dst_buf[27]);
|
||||
tmps[gid].tmp[tmp_idx4 + 28] = hc_swap32_S (dst_buf[28]);
|
||||
tmps[gid].tmp[tmp_idx4 + 29] = hc_swap32_S (dst_buf[29]);
|
||||
tmps[gid].tmp[tmp_idx4 + 30] = hc_swap32_S (dst_buf[30]);
|
||||
tmps[gid].tmp[tmp_idx4 + 31] = hc_swap32_S (dst_buf[31]);
|
||||
}
|
||||
|
||||
tmp_idx++;
|
||||
}
|
||||
}
|
||||
|
||||
// hash
|
||||
|
||||
tmps[gid].h[0] = SHA512M_A;
|
||||
tmps[gid].h[1] = SHA512M_B;
|
||||
tmps[gid].h[2] = SHA512M_C;
|
||||
tmps[gid].h[3] = SHA512M_D;
|
||||
tmps[gid].h[4] = SHA512M_E;
|
||||
tmps[gid].h[5] = SHA512M_F;
|
||||
tmps[gid].h[6] = SHA512M_G;
|
||||
tmps[gid].h[7] = SHA512M_H;
|
||||
}
|
||||
|
||||
KERNEL_FQ void m27300_loop (KERN_ATTR_TMPS_ESALT (hmac_sha512_tmp_t, snmpv3_t))
|
||||
{
|
||||
/**
|
||||
* base
|
||||
*/
|
||||
|
||||
const u64 gid = get_global_id (0);
|
||||
|
||||
if (gid >= gid_max) return;
|
||||
|
||||
u64 h[8];
|
||||
|
||||
h[0] = tmps[gid].h[0];
|
||||
h[1] = tmps[gid].h[1];
|
||||
h[2] = tmps[gid].h[2];
|
||||
h[3] = tmps[gid].h[3];
|
||||
h[4] = tmps[gid].h[4];
|
||||
h[5] = tmps[gid].h[5];
|
||||
h[6] = tmps[gid].h[6];
|
||||
h[7] = tmps[gid].h[7];
|
||||
|
||||
const u32 pw_len = pws[gid].pw_len;
|
||||
|
||||
const int pw_len128 = pw_len * 128;
|
||||
|
||||
if (pw_len <= SNMPV3_MAX_PW_LENGTH_OPT)
|
||||
{
|
||||
u32 tmp[SNMPV3_TMP_ELEMS_OPT];
|
||||
|
||||
for (int i = 0; i < pw_len128 / 4; i++)
|
||||
{
|
||||
tmp[i] = tmps[gid].tmp[i];
|
||||
}
|
||||
|
||||
for (u32 i = 0, j = loop_pos; i < loop_cnt; i += 128, j += 128)
|
||||
{
|
||||
const int idx = (j % pw_len128) / 4; // the optimization trick is to be able to do this
|
||||
|
||||
u32 w0[4];
|
||||
u32 w1[4];
|
||||
u32 w2[4];
|
||||
u32 w3[4];
|
||||
u32 w4[4];
|
||||
u32 w5[4];
|
||||
u32 w6[4];
|
||||
u32 w7[4];
|
||||
|
||||
w0[0] = tmp[idx + 0];
|
||||
w0[1] = tmp[idx + 1];
|
||||
w0[2] = tmp[idx + 2];
|
||||
w0[3] = tmp[idx + 3];
|
||||
w1[0] = tmp[idx + 4];
|
||||
w1[1] = tmp[idx + 5];
|
||||
w1[2] = tmp[idx + 6];
|
||||
w1[3] = tmp[idx + 7];
|
||||
w2[0] = tmp[idx + 8];
|
||||
w2[1] = tmp[idx + 9];
|
||||
w2[2] = tmp[idx + 10];
|
||||
w2[3] = tmp[idx + 11];
|
||||
w3[0] = tmp[idx + 12];
|
||||
w3[1] = tmp[idx + 13];
|
||||
w3[2] = tmp[idx + 14];
|
||||
w3[3] = tmp[idx + 15];
|
||||
w4[0] = tmp[idx + 16];
|
||||
w4[1] = tmp[idx + 17];
|
||||
w4[2] = tmp[idx + 18];
|
||||
w4[3] = tmp[idx + 19];
|
||||
w5[0] = tmp[idx + 20];
|
||||
w5[1] = tmp[idx + 21];
|
||||
w5[2] = tmp[idx + 22];
|
||||
w5[3] = tmp[idx + 23];
|
||||
w6[0] = tmp[idx + 24];
|
||||
w6[1] = tmp[idx + 25];
|
||||
w6[2] = tmp[idx + 26];
|
||||
w6[3] = tmp[idx + 27];
|
||||
w7[0] = tmp[idx + 28];
|
||||
w7[1] = tmp[idx + 29];
|
||||
w7[2] = tmp[idx + 30];
|
||||
w7[3] = tmp[idx + 31];
|
||||
|
||||
sha512_transform (w0, w1, w2, w3, w4, w5, w6, w7, h);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
for (u32 i = 0, j = loop_pos; i < loop_cnt; i += 128, j += 128)
|
||||
{
|
||||
const int idx = (j % pw_len128) / 4; // the optimization trick is to be able to do this
|
||||
|
||||
u32 w0[4];
|
||||
u32 w1[4];
|
||||
u32 w2[4];
|
||||
u32 w3[4];
|
||||
u32 w4[4];
|
||||
u32 w5[4];
|
||||
u32 w6[4];
|
||||
u32 w7[4];
|
||||
|
||||
w0[0] = tmps[gid].tmp[idx + 0];
|
||||
w0[1] = tmps[gid].tmp[idx + 1];
|
||||
w0[2] = tmps[gid].tmp[idx + 2];
|
||||
w0[3] = tmps[gid].tmp[idx + 3];
|
||||
w1[0] = tmps[gid].tmp[idx + 4];
|
||||
w1[1] = tmps[gid].tmp[idx + 5];
|
||||
w1[2] = tmps[gid].tmp[idx + 6];
|
||||
w1[3] = tmps[gid].tmp[idx + 7];
|
||||
w2[0] = tmps[gid].tmp[idx + 8];
|
||||
w2[1] = tmps[gid].tmp[idx + 9];
|
||||
w2[2] = tmps[gid].tmp[idx + 10];
|
||||
w2[3] = tmps[gid].tmp[idx + 11];
|
||||
w3[0] = tmps[gid].tmp[idx + 12];
|
||||
w3[1] = tmps[gid].tmp[idx + 13];
|
||||
w3[2] = tmps[gid].tmp[idx + 14];
|
||||
w3[3] = tmps[gid].tmp[idx + 15];
|
||||
w4[0] = tmps[gid].tmp[idx + 16];
|
||||
w4[1] = tmps[gid].tmp[idx + 17];
|
||||
w4[2] = tmps[gid].tmp[idx + 18];
|
||||
w4[3] = tmps[gid].tmp[idx + 19];
|
||||
w5[0] = tmps[gid].tmp[idx + 20];
|
||||
w5[1] = tmps[gid].tmp[idx + 21];
|
||||
w5[2] = tmps[gid].tmp[idx + 22];
|
||||
w5[3] = tmps[gid].tmp[idx + 23];
|
||||
w6[0] = tmps[gid].tmp[idx + 24];
|
||||
w6[1] = tmps[gid].tmp[idx + 25];
|
||||
w6[2] = tmps[gid].tmp[idx + 26];
|
||||
w6[3] = tmps[gid].tmp[idx + 27];
|
||||
w7[0] = tmps[gid].tmp[idx + 28];
|
||||
w7[1] = tmps[gid].tmp[idx + 29];
|
||||
w7[2] = tmps[gid].tmp[idx + 30];
|
||||
w7[3] = tmps[gid].tmp[idx + 31];
|
||||
|
||||
sha512_transform (w0, w1, w2, w3, w4, w5, w6, w7, h);
|
||||
}
|
||||
}
|
||||
|
||||
tmps[gid].h[0] = h[0];
|
||||
tmps[gid].h[1] = h[1];
|
||||
tmps[gid].h[2] = h[2];
|
||||
tmps[gid].h[3] = h[3];
|
||||
tmps[gid].h[4] = h[4];
|
||||
tmps[gid].h[5] = h[5];
|
||||
tmps[gid].h[6] = h[6];
|
||||
tmps[gid].h[7] = h[7];
|
||||
}
|
||||
|
||||
KERNEL_FQ void m27300_comp (KERN_ATTR_TMPS_ESALT (hmac_sha512_tmp_t, snmpv3_t))
|
||||
{
|
||||
/**
|
||||
* modifier
|
||||
*/
|
||||
|
||||
const u64 gid = get_global_id (0);
|
||||
|
||||
if (gid >= gid_max) return;
|
||||
|
||||
u32 w0[4];
|
||||
u32 w1[4];
|
||||
u32 w2[4];
|
||||
u32 w3[4];
|
||||
u32 w4[4];
|
||||
u32 w5[4];
|
||||
u32 w6[4];
|
||||
u32 w7[4];
|
||||
|
||||
w0[0] = 0x80000000;
|
||||
w0[1] = 0;
|
||||
w0[2] = 0;
|
||||
w0[3] = 0;
|
||||
w1[0] = 0;
|
||||
w1[1] = 0;
|
||||
w1[2] = 0;
|
||||
w1[3] = 0;
|
||||
w2[0] = 0;
|
||||
w2[1] = 0;
|
||||
w2[2] = 0;
|
||||
w2[3] = 0;
|
||||
w3[0] = 0;
|
||||
w3[1] = 0;
|
||||
w3[2] = 0;
|
||||
w3[3] = 0;
|
||||
w4[0] = 0;
|
||||
w4[1] = 0;
|
||||
w4[2] = 0;
|
||||
w4[3] = 0;
|
||||
w5[0] = 0;
|
||||
w5[1] = 0;
|
||||
w5[2] = 0;
|
||||
w5[3] = 0;
|
||||
w6[0] = 0;
|
||||
w6[1] = 0;
|
||||
w6[2] = 0;
|
||||
w6[3] = 0;
|
||||
w7[0] = 0;
|
||||
w7[1] = 0;
|
||||
w7[2] = 0;
|
||||
w7[3] = 1048576 * 8;
|
||||
|
||||
u64 h[8];
|
||||
|
||||
h[0] = tmps[gid].h[0];
|
||||
h[1] = tmps[gid].h[1];
|
||||
h[2] = tmps[gid].h[2];
|
||||
h[3] = tmps[gid].h[3];
|
||||
h[4] = tmps[gid].h[4];
|
||||
h[5] = tmps[gid].h[5];
|
||||
h[6] = tmps[gid].h[6];
|
||||
h[7] = tmps[gid].h[7];
|
||||
|
||||
sha512_transform (w0, w1, w2, w3, w4, w5, w6, w7, h);
|
||||
|
||||
sha512_ctx_t ctx;
|
||||
|
||||
sha512_init (&ctx);
|
||||
|
||||
u32 w[32];
|
||||
|
||||
w[ 0] = h32_from_64_S (h[0]);
|
||||
w[ 1] = l32_from_64_S (h[0]);
|
||||
w[ 2] = h32_from_64_S (h[1]);
|
||||
w[ 3] = l32_from_64_S (h[1]);
|
||||
w[ 4] = h32_from_64_S (h[2]);
|
||||
w[ 5] = l32_from_64_S (h[2]);
|
||||
w[ 6] = h32_from_64_S (h[3]);
|
||||
w[ 7] = l32_from_64_S (h[3]);
|
||||
w[ 8] = h32_from_64_S (h[4]);
|
||||
w[ 9] = l32_from_64_S (h[4]);
|
||||
w[10] = h32_from_64_S (h[5]);
|
||||
w[11] = l32_from_64_S (h[5]);
|
||||
w[12] = h32_from_64_S (h[6]);
|
||||
w[13] = l32_from_64_S (h[6]);
|
||||
w[14] = h32_from_64_S (h[7]);
|
||||
w[15] = l32_from_64_S (h[7]);
|
||||
w[16] = 0;
|
||||
w[17] = 0;
|
||||
w[18] = 0;
|
||||
w[19] = 0;
|
||||
w[20] = 0;
|
||||
w[21] = 0;
|
||||
w[22] = 0;
|
||||
w[23] = 0;
|
||||
w[24] = 0;
|
||||
w[25] = 0;
|
||||
w[26] = 0;
|
||||
w[27] = 0;
|
||||
w[28] = 0;
|
||||
w[29] = 0;
|
||||
w[30] = 0;
|
||||
w[31] = 0;
|
||||
|
||||
sha512_update (&ctx, w, 64);
|
||||
|
||||
sha512_update_global_swap (&ctx, esalt_bufs[DIGESTS_OFFSET].engineID_buf, esalt_bufs[DIGESTS_OFFSET].engineID_len);
|
||||
|
||||
w[ 0] = h32_from_64_S (h[0]);
|
||||
w[ 1] = l32_from_64_S (h[0]);
|
||||
w[ 2] = h32_from_64_S (h[1]);
|
||||
w[ 3] = l32_from_64_S (h[1]);
|
||||
w[ 4] = h32_from_64_S (h[2]);
|
||||
w[ 5] = l32_from_64_S (h[2]);
|
||||
w[ 6] = h32_from_64_S (h[3]);
|
||||
w[ 7] = l32_from_64_S (h[3]);
|
||||
w[ 8] = h32_from_64_S (h[4]);
|
||||
w[ 9] = l32_from_64_S (h[4]);
|
||||
w[10] = h32_from_64_S (h[5]);
|
||||
w[11] = l32_from_64_S (h[5]);
|
||||
w[12] = h32_from_64_S (h[6]);
|
||||
w[13] = l32_from_64_S (h[6]);
|
||||
w[14] = h32_from_64_S (h[7]);
|
||||
w[15] = l32_from_64_S (h[7]);
|
||||
w[16] = 0;
|
||||
w[17] = 0;
|
||||
w[18] = 0;
|
||||
w[19] = 0;
|
||||
w[20] = 0;
|
||||
w[21] = 0;
|
||||
w[22] = 0;
|
||||
w[23] = 0;
|
||||
w[24] = 0;
|
||||
w[25] = 0;
|
||||
w[26] = 0;
|
||||
w[27] = 0;
|
||||
w[28] = 0;
|
||||
w[29] = 0;
|
||||
w[30] = 0;
|
||||
w[31] = 0;
|
||||
|
||||
sha512_update (&ctx, w, 64);
|
||||
|
||||
sha512_final (&ctx);
|
||||
|
||||
w[ 0] = h32_from_64_S (ctx.h[0]);
|
||||
w[ 1] = l32_from_64_S (ctx.h[0]);
|
||||
w[ 2] = h32_from_64_S (ctx.h[1]);
|
||||
w[ 3] = l32_from_64_S (ctx.h[1]);
|
||||
w[ 4] = h32_from_64_S (ctx.h[2]);
|
||||
w[ 5] = l32_from_64_S (ctx.h[2]);
|
||||
w[ 6] = h32_from_64_S (ctx.h[3]);
|
||||
w[ 7] = l32_from_64_S (ctx.h[3]);
|
||||
w[ 8] = h32_from_64_S (ctx.h[4]);
|
||||
w[ 9] = l32_from_64_S (ctx.h[4]);
|
||||
w[10] = h32_from_64_S (ctx.h[5]);
|
||||
w[11] = l32_from_64_S (ctx.h[5]);
|
||||
w[12] = h32_from_64_S (ctx.h[6]);
|
||||
w[13] = l32_from_64_S (ctx.h[6]);
|
||||
w[14] = h32_from_64_S (ctx.h[7]);
|
||||
w[15] = l32_from_64_S (ctx.h[7]);
|
||||
w[16] = 0;
|
||||
w[17] = 0;
|
||||
w[18] = 0;
|
||||
w[19] = 0;
|
||||
w[20] = 0;
|
||||
w[21] = 0;
|
||||
w[22] = 0;
|
||||
w[23] = 0;
|
||||
w[24] = 0;
|
||||
w[25] = 0;
|
||||
w[26] = 0;
|
||||
w[27] = 0;
|
||||
w[28] = 0;
|
||||
w[29] = 0;
|
||||
w[30] = 0;
|
||||
w[31] = 0;
|
||||
|
||||
sha512_hmac_ctx_t hmac_ctx;
|
||||
|
||||
sha512_hmac_init (&hmac_ctx, w, 64);
|
||||
|
||||
sha512_hmac_update_global_swap (&hmac_ctx, esalt_bufs[DIGESTS_OFFSET].salt_buf, esalt_bufs[DIGESTS_OFFSET].salt_len);
|
||||
|
||||
sha512_hmac_final (&hmac_ctx);
|
||||
|
||||
const u32 r0 = l32_from_64 (hmac_ctx.opad.h[1]);
|
||||
const u32 r1 = h32_from_64 (hmac_ctx.opad.h[1]);
|
||||
const u32 r2 = l32_from_64 (hmac_ctx.opad.h[0]);
|
||||
const u32 r3 = h32_from_64 (hmac_ctx.opad.h[0]);
|
||||
|
||||
#define il_pos 0
|
||||
|
||||
#ifdef KERNEL_STATIC
|
||||
#include COMPARE_M
|
||||
#endif
|
||||
}
|
@ -0,0 +1,351 @@
|
||||
/**
|
||||
* Author......: See docs/credits.txt
|
||||
* License.....: MIT
|
||||
*/
|
||||
|
||||
#define NEW_SIMD_CODE
|
||||
|
||||
#ifdef KERNEL_STATIC
|
||||
#include "inc_vendor.h"
|
||||
#include "inc_types.h"
|
||||
#include "inc_platform.cl"
|
||||
#include "inc_common.cl"
|
||||
#include "inc_simd.cl"
|
||||
#include "inc_hash_sha1.cl"
|
||||
#include "inc_cipher_aes.cl"
|
||||
#endif
|
||||
|
||||
#define COMPARE_S "inc_comp_single.cl"
|
||||
#define COMPARE_M "inc_comp_multi.cl"
|
||||
|
||||
typedef struct pbkdf2_sha1_tmp
|
||||
{
|
||||
u32 ipad[5];
|
||||
u32 opad[5];
|
||||
|
||||
u32 dgst[32];
|
||||
u32 out[32];
|
||||
|
||||
} pbkdf2_sha1_tmp_t;
|
||||
|
||||
typedef struct vmware_vmx
|
||||
{
|
||||
u32 salt_buf[64];
|
||||
u32 iv_buf[4];
|
||||
u32 ct_buf[4];
|
||||
|
||||
} vmware_vmx_t;
|
||||
|
||||
DECLSPEC void hmac_sha1_run_V (u32x *w0, u32x *w1, u32x *w2, u32x *w3, u32x *ipad, u32x *opad, u32x *digest)
|
||||
{
|
||||
digest[0] = ipad[0];
|
||||
digest[1] = ipad[1];
|
||||
digest[2] = ipad[2];
|
||||
digest[3] = ipad[3];
|
||||
digest[4] = ipad[4];
|
||||
|
||||
sha1_transform_vector (w0, w1, w2, w3, digest);
|
||||
|
||||
w0[0] = digest[0];
|
||||
w0[1] = digest[1];
|
||||
w0[2] = digest[2];
|
||||
w0[3] = digest[3];
|
||||
w1[0] = digest[4];
|
||||
w1[1] = 0x80000000;
|
||||
w1[2] = 0;
|
||||
w1[3] = 0;
|
||||
w2[0] = 0;
|
||||
w2[1] = 0;
|
||||
w2[2] = 0;
|
||||
w2[3] = 0;
|
||||
w3[0] = 0;
|
||||
w3[1] = 0;
|
||||
w3[2] = 0;
|
||||
w3[3] = (64 + 20) * 8;
|
||||
|
||||
digest[0] = opad[0];
|
||||
digest[1] = opad[1];
|
||||
digest[2] = opad[2];
|
||||
digest[3] = opad[3];
|
||||
digest[4] = opad[4];
|
||||
|
||||
sha1_transform_vector (w0, w1, w2, w3, digest);
|
||||
}
|
||||
|
||||
KERNEL_FQ void m27400_init (KERN_ATTR_TMPS_ESALT (pbkdf2_sha1_tmp_t, vmware_vmx_t))
|
||||
{
|
||||
/**
|
||||
* base
|
||||
*/
|
||||
|
||||
const u64 gid = get_global_id (0);
|
||||
|
||||
if (gid >= gid_max) return;
|
||||
|
||||
sha1_hmac_ctx_t sha1_hmac_ctx;
|
||||
|
||||
sha1_hmac_init_global_swap (&sha1_hmac_ctx, pws[gid].i, pws[gid].pw_len);
|
||||
|
||||
tmps[gid].ipad[0] = sha1_hmac_ctx.ipad.h[0];
|
||||
tmps[gid].ipad[1] = sha1_hmac_ctx.ipad.h[1];
|
||||
tmps[gid].ipad[2] = sha1_hmac_ctx.ipad.h[2];
|
||||
tmps[gid].ipad[3] = sha1_hmac_ctx.ipad.h[3];
|
||||
tmps[gid].ipad[4] = sha1_hmac_ctx.ipad.h[4];
|
||||
|
||||
tmps[gid].opad[0] = sha1_hmac_ctx.opad.h[0];
|
||||
tmps[gid].opad[1] = sha1_hmac_ctx.opad.h[1];
|
||||
tmps[gid].opad[2] = sha1_hmac_ctx.opad.h[2];
|
||||
tmps[gid].opad[3] = sha1_hmac_ctx.opad.h[3];
|
||||
tmps[gid].opad[4] = sha1_hmac_ctx.opad.h[4];
|
||||
|
||||
sha1_hmac_update_global_swap (&sha1_hmac_ctx, esalt_bufs[DIGESTS_OFFSET].salt_buf, salt_bufs[SALT_POS].salt_len);
|
||||
|
||||
for (u32 i = 0, j = 1; i < 8; i += 5, j += 1)
|
||||
{
|
||||
sha1_hmac_ctx_t sha1_hmac_ctx2 = sha1_hmac_ctx;
|
||||
|
||||
u32 w0[4];
|
||||
u32 w1[4];
|
||||
u32 w2[4];
|
||||
u32 w3[4];
|
||||
|
||||
w0[0] = j;
|
||||
w0[1] = 0;
|
||||
w0[2] = 0;
|
||||
w0[3] = 0;
|
||||
w1[0] = 0;
|
||||
w1[1] = 0;
|
||||
w1[2] = 0;
|
||||
w1[3] = 0;
|
||||
w2[0] = 0;
|
||||
w2[1] = 0;
|
||||
w2[2] = 0;
|
||||
w2[3] = 0;
|
||||
w3[0] = 0;
|
||||
w3[1] = 0;
|
||||
w3[2] = 0;
|
||||
w3[3] = 0;
|
||||
|
||||
sha1_hmac_update_64 (&sha1_hmac_ctx2, w0, w1, w2, w3, 4);
|
||||
|
||||
sha1_hmac_final (&sha1_hmac_ctx2);
|
||||
|
||||
tmps[gid].dgst[i + 0] = sha1_hmac_ctx2.opad.h[0];
|
||||
tmps[gid].dgst[i + 1] = sha1_hmac_ctx2.opad.h[1];
|
||||
tmps[gid].dgst[i + 2] = sha1_hmac_ctx2.opad.h[2];
|
||||
tmps[gid].dgst[i + 3] = sha1_hmac_ctx2.opad.h[3];
|
||||
tmps[gid].dgst[i + 4] = sha1_hmac_ctx2.opad.h[4];
|
||||
|
||||
tmps[gid].out[i + 0] = tmps[gid].dgst[i + 0];
|
||||
tmps[gid].out[i + 1] = tmps[gid].dgst[i + 1];
|
||||
tmps[gid].out[i + 2] = tmps[gid].dgst[i + 2];
|
||||
tmps[gid].out[i + 3] = tmps[gid].dgst[i + 3];
|
||||
tmps[gid].out[i + 4] = tmps[gid].dgst[i + 4];
|
||||
}
|
||||
}
|
||||
|
||||
KERNEL_FQ void m27400_loop (KERN_ATTR_TMPS_ESALT (pbkdf2_sha1_tmp_t, vmware_vmx_t))
|
||||
{
|
||||
const u64 gid = get_global_id (0);
|
||||
|
||||
if ((gid * VECT_SIZE) >= gid_max) return;
|
||||
|
||||
u32x ipad[5];
|
||||
u32x opad[5];
|
||||
|
||||
ipad[0] = packv (tmps, ipad, gid, 0);
|
||||
ipad[1] = packv (tmps, ipad, gid, 1);
|
||||
ipad[2] = packv (tmps, ipad, gid, 2);
|
||||
ipad[3] = packv (tmps, ipad, gid, 3);
|
||||
ipad[4] = packv (tmps, ipad, gid, 4);
|
||||
|
||||
opad[0] = packv (tmps, opad, gid, 0);
|
||||
opad[1] = packv (tmps, opad, gid, 1);
|
||||
opad[2] = packv (tmps, opad, gid, 2);
|
||||
opad[3] = packv (tmps, opad, gid, 3);
|
||||
opad[4] = packv (tmps, opad, gid, 4);
|
||||
|
||||
for (u32 i = 0; i < 8; i += 5)
|
||||
{
|
||||
u32x dgst[5];
|
||||
u32x out[5];
|
||||
|
||||
dgst[0] = packv (tmps, dgst, gid, i + 0);
|
||||
dgst[1] = packv (tmps, dgst, gid, i + 1);
|
||||
dgst[2] = packv (tmps, dgst, gid, i + 2);
|
||||
dgst[3] = packv (tmps, dgst, gid, i + 3);
|
||||
dgst[4] = packv (tmps, dgst, gid, i + 4);
|
||||
|
||||
out[0] = packv (tmps, out, gid, i + 0);
|
||||
out[1] = packv (tmps, out, gid, i + 1);
|
||||
out[2] = packv (tmps, out, gid, i + 2);
|
||||
out[3] = packv (tmps, out, gid, i + 3);
|
||||
out[4] = packv (tmps, out, gid, i + 4);
|
||||
|
||||
for (u32 j = 0; j < loop_cnt; j++)
|
||||
{
|
||||
u32x w0[4];
|
||||
u32x w1[4];
|
||||
u32x w2[4];
|
||||
u32x w3[4];
|
||||
|
||||
w0[0] = dgst[0];
|
||||
w0[1] = dgst[1];
|
||||
w0[2] = dgst[2];
|
||||
w0[3] = dgst[3];
|
||||
w1[0] = dgst[4];
|
||||
w1[1] = 0x80000000;
|
||||
w1[2] = 0;
|
||||
w1[3] = 0;
|
||||
w2[0] = 0;
|
||||
w2[1] = 0;
|
||||
w2[2] = 0;
|
||||
w2[3] = 0;
|
||||
w3[0] = 0;
|
||||
w3[1] = 0;
|
||||
w3[2] = 0;
|
||||
w3[3] = (64 + 20) * 8;
|
||||
|
||||
hmac_sha1_run_V (w0, w1, w2, w3, ipad, opad, dgst);
|
||||
|
||||
out[0] ^= dgst[0];
|
||||
out[1] ^= dgst[1];
|
||||
out[2] ^= dgst[2];
|
||||
out[3] ^= dgst[3];
|
||||
out[4] ^= dgst[4];
|
||||
}
|
||||
|
||||
unpackv (tmps, dgst, gid, i + 0, dgst[0]);
|
||||
unpackv (tmps, dgst, gid, i + 1, dgst[1]);
|
||||
unpackv (tmps, dgst, gid, i + 2, dgst[2]);
|
||||
unpackv (tmps, dgst, gid, i + 3, dgst[3]);
|
||||
unpackv (tmps, dgst, gid, i + 4, dgst[4]);
|
||||
|
||||
unpackv (tmps, out, gid, i + 0, out[0]);
|
||||
unpackv (tmps, out, gid, i + 1, out[1]);
|
||||
unpackv (tmps, out, gid, i + 2, out[2]);
|
||||
unpackv (tmps, out, gid, i + 3, out[3]);
|
||||
unpackv (tmps, out, gid, i + 4, out[4]);
|
||||
}
|
||||
}
|
||||
|
||||
KERNEL_FQ void m27400_comp (KERN_ATTR_TMPS_ESALT (pbkdf2_sha1_tmp_t, vmware_vmx_t))
|
||||
{
|
||||
/**
|
||||
* base
|
||||
*/
|
||||
|
||||
const u64 gid = get_global_id (0);
|
||||
const u64 lid = get_local_id(0);
|
||||
const u64 lsz = get_local_size(0);
|
||||
|
||||
/**
|
||||
* aes shared
|
||||
*/
|
||||
|
||||
#ifdef REAL_SHM
|
||||
|
||||
LOCAL_VK u32 s_td0[256];
|
||||
LOCAL_VK u32 s_td1[256];
|
||||
LOCAL_VK u32 s_td2[256];
|
||||
LOCAL_VK u32 s_td3[256];
|
||||
LOCAL_VK u32 s_td4[256];
|
||||
|
||||
LOCAL_VK u32 s_te0[256];
|
||||
LOCAL_VK u32 s_te1[256];
|
||||
LOCAL_VK u32 s_te2[256];
|
||||
LOCAL_VK u32 s_te3[256];
|
||||
LOCAL_VK u32 s_te4[256];
|
||||
|
||||
for (u32 i = lid; i < 256; i += lsz)
|
||||
{
|
||||
s_td0[i] = td0[i];
|
||||
s_td1[i] = td1[i];
|
||||
s_td2[i] = td2[i];
|
||||
s_td3[i] = td3[i];
|
||||
s_td4[i] = td4[i];
|
||||
|
||||
s_te0[i] = te0[i];
|
||||
s_te1[i] = te1[i];
|
||||
s_te2[i] = te2[i];
|
||||
s_te3[i] = te3[i];
|
||||
s_te4[i] = te4[i];
|
||||
}
|
||||
|
||||
SYNC_THREADS();
|
||||
|
||||
#else
|
||||
|
||||
CONSTANT_AS u32a* s_td0 = td0;
|
||||
CONSTANT_AS u32a* s_td1 = td1;
|
||||
CONSTANT_AS u32a* s_td2 = td2;
|
||||
CONSTANT_AS u32a* s_td3 = td3;
|
||||
CONSTANT_AS u32a* s_td4 = td4;
|
||||
|
||||
CONSTANT_AS u32a* s_te0 = te0;
|
||||
CONSTANT_AS u32a* s_te1 = te1;
|
||||
CONSTANT_AS u32a* s_te2 = te2;
|
||||
CONSTANT_AS u32a* s_te3 = te3;
|
||||
CONSTANT_AS u32a* s_te4 = te4;
|
||||
|
||||
#endif
|
||||
|
||||
if (gid >= gid_max) return;
|
||||
|
||||
u32 ukey[8];
|
||||
|
||||
ukey[0] = tmps[gid].out[0];
|
||||
ukey[1] = tmps[gid].out[1];
|
||||
ukey[2] = tmps[gid].out[2];
|
||||
ukey[3] = tmps[gid].out[3];
|
||||
ukey[4] = tmps[gid].out[4];
|
||||
ukey[5] = tmps[gid].out[5];
|
||||
ukey[6] = tmps[gid].out[6];
|
||||
ukey[7] = tmps[gid].out[7];
|
||||
|
||||
u32 ks[60];
|
||||
|
||||
AES256_set_decrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3, s_td0, s_td1, s_td2, s_td3);
|
||||
|
||||
// iv
|
||||
|
||||
u32 iv_buf[4];
|
||||
|
||||
iv_buf[0] = esalt_bufs[DIGESTS_OFFSET].iv_buf[0];
|
||||
iv_buf[1] = esalt_bufs[DIGESTS_OFFSET].iv_buf[1];
|
||||
iv_buf[2] = esalt_bufs[DIGESTS_OFFSET].iv_buf[2];
|
||||
iv_buf[3] = esalt_bufs[DIGESTS_OFFSET].iv_buf[3];
|
||||
|
||||
// ct
|
||||
|
||||
u32 ct_buf[4];
|
||||
|
||||
ct_buf[0] = esalt_bufs[DIGESTS_OFFSET].ct_buf[0];
|
||||
ct_buf[1] = esalt_bufs[DIGESTS_OFFSET].ct_buf[1];
|
||||
ct_buf[2] = esalt_bufs[DIGESTS_OFFSET].ct_buf[2];
|
||||
ct_buf[3] = esalt_bufs[DIGESTS_OFFSET].ct_buf[3];
|
||||
|
||||
// decrypt first block
|
||||
|
||||
u32 pt_buf[4];
|
||||
|
||||
AES256_decrypt (ks, ct_buf, pt_buf, s_td0, s_td1, s_td2, s_td3, s_td4);
|
||||
|
||||
pt_buf[0] ^= iv_buf[0];
|
||||
pt_buf[1] ^= iv_buf[1];
|
||||
pt_buf[2] ^= iv_buf[2];
|
||||
pt_buf[3] ^= iv_buf[3];
|
||||
|
||||
// check
|
||||
|
||||
const u32 r0 = pt_buf[0];
|
||||
const u32 r1 = pt_buf[1];
|
||||
const u32 r2 = pt_buf[2];
|
||||
const u32 r3 = pt_buf[3];
|
||||
|
||||
#define il_pos 0
|
||||
|
||||
#ifdef KERNEL_STATIC
|
||||
#include COMPARE_M
|
||||
#endif
|
||||
}
|
@ -1,2 +1,3 @@
|
||||
web: https://hashcat.net
|
||||
irc: freenode #hashcat
|
||||
discord: https://discord.gg/HFS523HGBT
|
||||
irc: Libera Chat #hashcat
|
||||
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in new issue