mirror of
https://github.com/hashcat/hashcat.git
synced 2025-07-29 18:08:46 +00:00

This affects the inner core of nearly all kernels and thus impacts almost all hash modes. The only functional change is that we now manually unroll the individual steps of the transform() functions, saving a small amount of constant memory. In most cases, JIT compilers would likely detect the unused constant buffer and remove it automatically, but this makes it explicit. Tested on newer NVIDIA devices: no speed change observed. Tested on older NVIDIA devices: visible speed increase. Tested on AMD devices: visible speed increase across all tested GPUs. Not yet tested: CPUs, Intel iGPUs, Intel dGPUs.
732 lines
21 KiB
Common Lisp
732 lines
21 KiB
Common Lisp
/**
|
|
* Author......: See docs/credits.txt
|
|
* License.....: MIT
|
|
*/
|
|
|
|
#define NEW_SIMD_CODE
|
|
|
|
#ifdef KERNEL_STATIC
|
|
#include M2S(INCLUDE_PATH/inc_vendor.h)
|
|
#include M2S(INCLUDE_PATH/inc_types.h)
|
|
#include M2S(INCLUDE_PATH/inc_platform.cl)
|
|
#include M2S(INCLUDE_PATH/inc_common.cl)
|
|
#include M2S(INCLUDE_PATH/inc_simd.cl)
|
|
#include M2S(INCLUDE_PATH/inc_hash_sha256.cl)
|
|
#endif
|
|
|
|
CONSTANT_VK u32a k_sha256[64] =
|
|
{
|
|
SHA256C00, SHA256C01, SHA256C02, SHA256C03,
|
|
SHA256C04, SHA256C05, SHA256C06, SHA256C07,
|
|
SHA256C08, SHA256C09, SHA256C0a, SHA256C0b,
|
|
SHA256C0c, SHA256C0d, SHA256C0e, SHA256C0f,
|
|
SHA256C10, SHA256C11, SHA256C12, SHA256C13,
|
|
SHA256C14, SHA256C15, SHA256C16, SHA256C17,
|
|
SHA256C18, SHA256C19, SHA256C1a, SHA256C1b,
|
|
SHA256C1c, SHA256C1d, SHA256C1e, SHA256C1f,
|
|
SHA256C20, SHA256C21, SHA256C22, SHA256C23,
|
|
SHA256C24, SHA256C25, SHA256C26, SHA256C27,
|
|
SHA256C28, SHA256C29, SHA256C2a, SHA256C2b,
|
|
SHA256C2c, SHA256C2d, SHA256C2e, SHA256C2f,
|
|
SHA256C30, SHA256C31, SHA256C32, SHA256C33,
|
|
SHA256C34, SHA256C35, SHA256C36, SHA256C37,
|
|
SHA256C38, SHA256C39, SHA256C3a, SHA256C3b,
|
|
SHA256C3c, SHA256C3d, SHA256C3e, SHA256C3f,
|
|
};
|
|
|
|
DECLSPEC void sha256_transform_m (PRIVATE_AS u32x *digest, PRIVATE_AS const u32x *w)
|
|
{
|
|
u32x a = digest[0];
|
|
u32x b = digest[1];
|
|
u32x c = digest[2];
|
|
u32x d = digest[3];
|
|
u32x e = digest[4];
|
|
u32x f = digest[5];
|
|
u32x g = digest[6];
|
|
u32x h = digest[7];
|
|
|
|
u32x w0_t = w[ 0];
|
|
u32x w1_t = w[ 1];
|
|
u32x w2_t = w[ 2];
|
|
u32x w3_t = w[ 3];
|
|
u32x w4_t = w[ 4];
|
|
u32x w5_t = w[ 5];
|
|
u32x w6_t = w[ 6];
|
|
u32x w7_t = w[ 7];
|
|
u32x w8_t = w[ 8];
|
|
u32x w9_t = w[ 9];
|
|
u32x wa_t = w[10];
|
|
u32x wb_t = w[11];
|
|
u32x wc_t = w[12];
|
|
u32x wd_t = w[13];
|
|
u32x we_t = w[14];
|
|
u32x wf_t = w[15];
|
|
|
|
#define ROUND_EXPAND() \
|
|
{ \
|
|
w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); \
|
|
w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); \
|
|
w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); \
|
|
w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); \
|
|
w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); \
|
|
w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); \
|
|
w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); \
|
|
w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); \
|
|
w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); \
|
|
w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); \
|
|
wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); \
|
|
wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); \
|
|
wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); \
|
|
wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); \
|
|
we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); \
|
|
wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); \
|
|
}
|
|
|
|
#define ROUND_STEP(i) \
|
|
{ \
|
|
SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, k_sha256[i + 0]); \
|
|
SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, k_sha256[i + 1]); \
|
|
SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, k_sha256[i + 2]); \
|
|
SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, k_sha256[i + 3]); \
|
|
SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, k_sha256[i + 4]); \
|
|
SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, k_sha256[i + 5]); \
|
|
SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, k_sha256[i + 6]); \
|
|
SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, k_sha256[i + 7]); \
|
|
SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, k_sha256[i + 8]); \
|
|
SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, k_sha256[i + 9]); \
|
|
SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, k_sha256[i + 10]); \
|
|
SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, k_sha256[i + 11]); \
|
|
SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, k_sha256[i + 12]); \
|
|
SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, k_sha256[i + 13]); \
|
|
SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, k_sha256[i + 14]); \
|
|
SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, k_sha256[i + 15]); \
|
|
}
|
|
|
|
ROUND_STEP (0);
|
|
|
|
#if defined IS_CUDA
|
|
ROUND_EXPAND (); ROUND_STEP (16);
|
|
ROUND_EXPAND (); ROUND_STEP (32);
|
|
ROUND_EXPAND (); ROUND_STEP (48);
|
|
#else
|
|
#ifdef _unroll
|
|
#pragma unroll
|
|
#endif
|
|
for (int i = 16; i < 64; i += 16)
|
|
{
|
|
ROUND_EXPAND (); ROUND_STEP (i);
|
|
}
|
|
#endif
|
|
|
|
digest[0] += a;
|
|
digest[1] += b;
|
|
digest[2] += c;
|
|
digest[3] += d;
|
|
digest[4] += e;
|
|
digest[5] += f;
|
|
digest[6] += g;
|
|
digest[7] += h;
|
|
}
|
|
|
|
DECLSPEC void sha256_transform_z (PRIVATE_AS u32x *digest)
|
|
{
|
|
u32x a = digest[0];
|
|
u32x b = digest[1];
|
|
u32x c = digest[2];
|
|
u32x d = digest[3];
|
|
u32x e = digest[4];
|
|
u32x f = digest[5];
|
|
u32x g = digest[6];
|
|
u32x h = digest[7];
|
|
|
|
#define ROUND_STEP_Z(i) \
|
|
{ \
|
|
SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, 0, k_sha256[i + 0]); \
|
|
SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, 0, k_sha256[i + 1]); \
|
|
SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, 0, k_sha256[i + 2]); \
|
|
SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, 0, k_sha256[i + 3]); \
|
|
SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, 0, k_sha256[i + 4]); \
|
|
SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, 0, k_sha256[i + 5]); \
|
|
SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, 0, k_sha256[i + 6]); \
|
|
SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, 0, k_sha256[i + 7]); \
|
|
SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, 0, k_sha256[i + 8]); \
|
|
SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, 0, k_sha256[i + 9]); \
|
|
SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, 0, k_sha256[i + 10]); \
|
|
SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, 0, k_sha256[i + 11]); \
|
|
SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, 0, k_sha256[i + 12]); \
|
|
SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, 0, k_sha256[i + 13]); \
|
|
SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, 0, k_sha256[i + 14]); \
|
|
SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, 0, k_sha256[i + 15]); \
|
|
}
|
|
|
|
ROUND_STEP_Z (0);
|
|
|
|
#if defined IS_CUDA
|
|
ROUND_STEP_Z (16);
|
|
ROUND_STEP_Z (32);
|
|
ROUND_STEP_Z (48);
|
|
#else
|
|
#ifdef _unroll
|
|
#pragma unroll
|
|
#endif
|
|
for (int i = 16; i < 64; i += 16)
|
|
{
|
|
ROUND_STEP_Z (i);
|
|
}
|
|
#endif
|
|
|
|
digest[0] += a;
|
|
digest[1] += b;
|
|
digest[2] += c;
|
|
digest[3] += d;
|
|
digest[4] += e;
|
|
digest[5] += f;
|
|
digest[6] += g;
|
|
digest[7] += h;
|
|
}
|
|
|
|
DECLSPEC void sha256_transform_s (PRIVATE_AS u32x *digest, LOCAL_AS u32 *w)
|
|
{
|
|
u32x a = digest[0];
|
|
u32x b = digest[1];
|
|
u32x c = digest[2];
|
|
u32x d = digest[3];
|
|
u32x e = digest[4];
|
|
u32x f = digest[5];
|
|
u32x g = digest[6];
|
|
u32x h = digest[7];
|
|
|
|
#define ROUND_STEP_S(i) \
|
|
{ \
|
|
SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w[i + 0], k_sha256[i + 0]); \
|
|
SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w[i + 1], k_sha256[i + 1]); \
|
|
SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w[i + 2], k_sha256[i + 2]); \
|
|
SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w[i + 3], k_sha256[i + 3]); \
|
|
SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w[i + 4], k_sha256[i + 4]); \
|
|
SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w[i + 5], k_sha256[i + 5]); \
|
|
SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w[i + 6], k_sha256[i + 6]); \
|
|
SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w[i + 7], k_sha256[i + 7]); \
|
|
SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w[i + 8], k_sha256[i + 8]); \
|
|
SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w[i + 9], k_sha256[i + 9]); \
|
|
SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w[i + 10], k_sha256[i + 10]); \
|
|
SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w[i + 11], k_sha256[i + 11]); \
|
|
SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w[i + 12], k_sha256[i + 12]); \
|
|
SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w[i + 13], k_sha256[i + 13]); \
|
|
SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w[i + 14], k_sha256[i + 14]); \
|
|
SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w[i + 15], k_sha256[i + 15]); \
|
|
}
|
|
|
|
ROUND_STEP_S (0);
|
|
|
|
#ifdef _unroll
|
|
#pragma unroll
|
|
#endif
|
|
for (int i = 16; i < 64; i += 16)
|
|
{
|
|
ROUND_STEP_S (i);
|
|
}
|
|
|
|
digest[0] += a;
|
|
digest[1] += b;
|
|
digest[2] += c;
|
|
digest[3] += d;
|
|
digest[4] += e;
|
|
digest[5] += f;
|
|
digest[6] += g;
|
|
digest[7] += h;
|
|
}
|
|
|
|
DECLSPEC void m08000m (LOCAL_AS u32 *w_s1, LOCAL_AS u32 *w_s2, PRIVATE_AS u32 *w, const u32 pw_len, KERN_ATTR_FUNC_VECTOR ())
|
|
{
|
|
/**
|
|
* modifiers are taken from args
|
|
*/
|
|
|
|
/**
|
|
* salt
|
|
*/
|
|
|
|
const u32 salt_buf0 = hc_swap32_S (salt_bufs[SALT_POS_HOST].salt_buf[ 0]);
|
|
const u32 salt_buf1 = hc_swap32_S (salt_bufs[SALT_POS_HOST].salt_buf[ 1]);
|
|
const u32 salt_buf2 = hc_swap32_S (salt_bufs[SALT_POS_HOST].salt_buf[ 2]); // 0x80
|
|
|
|
/**
|
|
* precompute final msg blocks
|
|
*/
|
|
|
|
for (u32 i = lid; i < 64; i += lsz)
|
|
{
|
|
w_s1[i] = 0;
|
|
w_s2[i] = 0;
|
|
}
|
|
|
|
SYNC_THREADS ();
|
|
|
|
if (lid == 0)
|
|
{
|
|
w_s1[15] = 0 | salt_buf0 >> 16;
|
|
|
|
#ifdef _unroll
|
|
#pragma unroll
|
|
#endif
|
|
for (int i = 16; i < 64; i++)
|
|
{
|
|
w_s1[i] = SHA256_EXPAND_S (w_s1[i - 2], w_s1[i - 7], w_s1[i - 15], w_s1[i - 16]);
|
|
}
|
|
|
|
w_s2[ 0] = salt_buf0 << 16 | salt_buf1 >> 16;
|
|
w_s2[ 1] = salt_buf1 << 16 | salt_buf2 >> 16;
|
|
w_s2[ 2] = salt_buf2 << 16 | 0;
|
|
w_s2[15] = (510 + 8) * 8;
|
|
|
|
#ifdef _unroll
|
|
#pragma unroll
|
|
#endif
|
|
for (int i = 16; i < 64; i++)
|
|
{
|
|
w_s2[i] = SHA256_EXPAND_S (w_s2[i - 2], w_s2[i - 7], w_s2[i - 15], w_s2[i - 16]);
|
|
}
|
|
}
|
|
|
|
SYNC_THREADS ();
|
|
|
|
if (gid >= GID_CNT) return;
|
|
|
|
/**
|
|
* modifier
|
|
*/
|
|
|
|
u32x w_t[16];
|
|
|
|
w_t[ 0] = w[ 0] >> 8;
|
|
w_t[ 1] = w[ 1] >> 8;
|
|
w_t[ 2] = w[ 2] >> 8;
|
|
w_t[ 3] = w[ 3] >> 8;
|
|
w_t[ 4] = w[ 4] >> 8;
|
|
w_t[ 5] = w[ 5] >> 8;
|
|
w_t[ 6] = w[ 6] >> 8;
|
|
w_t[ 7] = w[ 7] >> 8;
|
|
w_t[ 8] = w[ 8] >> 8;
|
|
w_t[ 9] = w[ 9] >> 8;
|
|
w_t[10] = w[10] >> 8;
|
|
w_t[11] = w[11] >> 8;
|
|
w_t[12] = w[12] >> 8;
|
|
w_t[13] = w[13] >> 8;
|
|
w_t[14] = w[14] >> 8;
|
|
w_t[15] = w[15] >> 8;
|
|
|
|
/**
|
|
* loop
|
|
*/
|
|
|
|
u32 w0l = w[0];
|
|
|
|
for (u32 il_pos = 0; il_pos < IL_CNT; il_pos += VECT_SIZE)
|
|
{
|
|
const u32x w0r = words_buf_r[il_pos / VECT_SIZE];
|
|
|
|
const u32x w0lr = w0l | w0r;
|
|
|
|
w_t[0] = w0lr >> 8;
|
|
|
|
u32x digest[8];
|
|
|
|
digest[0] = SHA256M_A;
|
|
digest[1] = SHA256M_B;
|
|
digest[2] = SHA256M_C;
|
|
digest[3] = SHA256M_D;
|
|
digest[4] = SHA256M_E;
|
|
digest[5] = SHA256M_F;
|
|
digest[6] = SHA256M_G;
|
|
digest[7] = SHA256M_H;
|
|
|
|
sha256_transform_m (digest, w_t); // 0 - 64
|
|
sha256_transform_z (digest); // 64 - 128
|
|
sha256_transform_z (digest); // 128 - 192
|
|
sha256_transform_z (digest); // 192 - 256
|
|
sha256_transform_z (digest); // 256 - 320
|
|
sha256_transform_z (digest); // 320 - 384
|
|
sha256_transform_z (digest); // 384 - 448
|
|
sha256_transform_s (digest, w_s1); // 448 - 512
|
|
sha256_transform_s (digest, w_s2); // 512 - 576
|
|
|
|
COMPARE_M_SIMD (digest[3], digest[7], digest[2], digest[6]);
|
|
}
|
|
}
|
|
|
|
DECLSPEC void m08000s (LOCAL_AS u32 *w_s1, LOCAL_AS u32 *w_s2, PRIVATE_AS u32 *w, const u32 pw_len, KERN_ATTR_FUNC_VECTOR ())
|
|
{
|
|
/**
|
|
* modifiers are taken from args
|
|
*/
|
|
|
|
/**
|
|
* salt
|
|
*/
|
|
|
|
const u32 salt_buf0 = hc_swap32_S (salt_bufs[SALT_POS_HOST].salt_buf[ 0]);
|
|
const u32 salt_buf1 = hc_swap32_S (salt_bufs[SALT_POS_HOST].salt_buf[ 1]);
|
|
const u32 salt_buf2 = hc_swap32_S (salt_bufs[SALT_POS_HOST].salt_buf[ 2]); // 0x80
|
|
|
|
/**
|
|
* precompute final msg blocks
|
|
*/
|
|
|
|
for (u32 i = lid; i < 64; i += lsz)
|
|
{
|
|
w_s1[i] = 0;
|
|
w_s2[i] = 0;
|
|
}
|
|
|
|
SYNC_THREADS ();
|
|
|
|
if (lid == 0)
|
|
{
|
|
w_s1[15] = 0 | salt_buf0 >> 16;
|
|
|
|
#ifdef _unroll
|
|
#pragma unroll
|
|
#endif
|
|
for (int i = 16; i < 64; i++)
|
|
{
|
|
w_s1[i] = SHA256_EXPAND_S (w_s1[i - 2], w_s1[i - 7], w_s1[i - 15], w_s1[i - 16]);
|
|
}
|
|
|
|
w_s2[ 0] = salt_buf0 << 16 | salt_buf1 >> 16;
|
|
w_s2[ 1] = salt_buf1 << 16 | salt_buf2 >> 16;
|
|
w_s2[ 2] = salt_buf2 << 16 | 0;
|
|
w_s2[15] = (510 + 8) * 8;
|
|
|
|
#ifdef _unroll
|
|
#pragma unroll
|
|
#endif
|
|
for (int i = 16; i < 64; i++)
|
|
{
|
|
w_s2[i] = SHA256_EXPAND_S (w_s2[i - 2], w_s2[i - 7], w_s2[i - 15], w_s2[i - 16]);
|
|
}
|
|
}
|
|
|
|
SYNC_THREADS ();
|
|
|
|
if (gid >= GID_CNT) return;
|
|
|
|
/**
|
|
* modifier
|
|
*/
|
|
|
|
u32x w_t[16];
|
|
|
|
w_t[ 0] = w[ 0] >> 8;
|
|
w_t[ 1] = w[ 1] >> 8;
|
|
w_t[ 2] = w[ 2] >> 8;
|
|
w_t[ 3] = w[ 3] >> 8;
|
|
w_t[ 4] = w[ 4] >> 8;
|
|
w_t[ 5] = w[ 5] >> 8;
|
|
w_t[ 6] = w[ 6] >> 8;
|
|
w_t[ 7] = w[ 7] >> 8;
|
|
w_t[ 8] = w[ 8] >> 8;
|
|
w_t[ 9] = w[ 9] >> 8;
|
|
w_t[10] = w[10] >> 8;
|
|
w_t[11] = w[11] >> 8;
|
|
w_t[12] = w[12] >> 8;
|
|
w_t[13] = w[13] >> 8;
|
|
w_t[14] = w[14] >> 8;
|
|
w_t[15] = w[15] >> 8;
|
|
|
|
/**
|
|
* digest
|
|
*/
|
|
|
|
const u32 search[4] =
|
|
{
|
|
digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R0],
|
|
digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R1],
|
|
digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R2],
|
|
digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R3]
|
|
};
|
|
|
|
/**
|
|
* loop
|
|
*/
|
|
|
|
u32 w0l = w[0];
|
|
|
|
for (u32 il_pos = 0; il_pos < IL_CNT; il_pos += VECT_SIZE)
|
|
{
|
|
const u32x w0r = words_buf_r[il_pos / VECT_SIZE];
|
|
|
|
const u32x w0lr = w0l | w0r;
|
|
|
|
w_t[0] = w0lr >> 8;
|
|
|
|
u32x digest[8];
|
|
|
|
digest[0] = SHA256M_A;
|
|
digest[1] = SHA256M_B;
|
|
digest[2] = SHA256M_C;
|
|
digest[3] = SHA256M_D;
|
|
digest[4] = SHA256M_E;
|
|
digest[5] = SHA256M_F;
|
|
digest[6] = SHA256M_G;
|
|
digest[7] = SHA256M_H;
|
|
|
|
sha256_transform_m (digest, w_t); // 0 - 64
|
|
sha256_transform_z (digest); // 64 - 128
|
|
sha256_transform_z (digest); // 128 - 192
|
|
sha256_transform_z (digest); // 192 - 256
|
|
sha256_transform_z (digest); // 256 - 320
|
|
sha256_transform_z (digest); // 320 - 384
|
|
sha256_transform_z (digest); // 384 - 448
|
|
sha256_transform_s (digest, w_s1); // 448 - 512
|
|
sha256_transform_s (digest, w_s2); // 512 - 576
|
|
|
|
COMPARE_S_SIMD (digest[3], digest[7], digest[2], digest[6]);
|
|
}
|
|
}
|
|
|
|
KERNEL_FQ KERNEL_FA void m08000_m04 (KERN_ATTR_VECTOR ())
|
|
{
|
|
/**
|
|
* base
|
|
*/
|
|
|
|
const u64 lid = get_local_id (0);
|
|
const u64 gid = get_global_id (0);
|
|
const u64 lsz = get_local_size (0);
|
|
|
|
u32 w[16];
|
|
|
|
w[ 0] = pws[gid].i[ 0];
|
|
w[ 1] = pws[gid].i[ 1];
|
|
w[ 2] = pws[gid].i[ 2];
|
|
w[ 3] = pws[gid].i[ 3];
|
|
w[ 4] = 0;
|
|
w[ 5] = 0;
|
|
w[ 6] = 0;
|
|
w[ 7] = 0;
|
|
w[ 8] = 0;
|
|
w[ 9] = 0;
|
|
w[10] = 0;
|
|
w[11] = 0;
|
|
w[12] = 0;
|
|
w[13] = 0;
|
|
w[14] = 0;
|
|
w[15] = 0;
|
|
|
|
const u32 pw_len = pws[gid].pw_len & 63;
|
|
|
|
LOCAL_VK u32 w_s1[64];
|
|
LOCAL_VK u32 w_s2[64];
|
|
|
|
/**
|
|
* main
|
|
*/
|
|
|
|
m08000m (w_s1, w_s2, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, kernel_param, gid, lid, lsz);
|
|
}
|
|
|
|
KERNEL_FQ KERNEL_FA void m08000_m08 (KERN_ATTR_VECTOR ())
|
|
{
|
|
/**
|
|
* base
|
|
*/
|
|
|
|
const u64 lid = get_local_id (0);
|
|
const u64 gid = get_global_id (0);
|
|
const u64 lsz = get_local_size (0);
|
|
|
|
u32 w[16];
|
|
|
|
w[ 0] = pws[gid].i[ 0];
|
|
w[ 1] = pws[gid].i[ 1];
|
|
w[ 2] = pws[gid].i[ 2];
|
|
w[ 3] = pws[gid].i[ 3];
|
|
w[ 4] = pws[gid].i[ 4];
|
|
w[ 5] = pws[gid].i[ 5];
|
|
w[ 6] = pws[gid].i[ 6];
|
|
w[ 7] = pws[gid].i[ 7];
|
|
w[ 8] = 0;
|
|
w[ 9] = 0;
|
|
w[10] = 0;
|
|
w[11] = 0;
|
|
w[12] = 0;
|
|
w[13] = 0;
|
|
w[14] = 0;
|
|
w[15] = 0;
|
|
|
|
const u32 pw_len = pws[gid].pw_len & 63;
|
|
|
|
LOCAL_VK u32 w_s1[64];
|
|
LOCAL_VK u32 w_s2[64];
|
|
|
|
/**
|
|
* main
|
|
*/
|
|
|
|
m08000m (w_s1, w_s2, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, kernel_param, gid, lid, lsz);
|
|
}
|
|
|
|
KERNEL_FQ KERNEL_FA void m08000_m16 (KERN_ATTR_VECTOR ())
|
|
{
|
|
/**
|
|
* base
|
|
*/
|
|
|
|
const u64 lid = get_local_id (0);
|
|
const u64 gid = get_global_id (0);
|
|
const u64 lsz = get_local_size (0);
|
|
|
|
u32 w[16];
|
|
|
|
w[ 0] = pws[gid].i[ 0];
|
|
w[ 1] = pws[gid].i[ 1];
|
|
w[ 2] = pws[gid].i[ 2];
|
|
w[ 3] = pws[gid].i[ 3];
|
|
w[ 4] = pws[gid].i[ 4];
|
|
w[ 5] = pws[gid].i[ 5];
|
|
w[ 6] = pws[gid].i[ 6];
|
|
w[ 7] = pws[gid].i[ 7];
|
|
w[ 8] = pws[gid].i[ 8];
|
|
w[ 9] = pws[gid].i[ 9];
|
|
w[10] = pws[gid].i[10];
|
|
w[11] = pws[gid].i[11];
|
|
w[12] = pws[gid].i[12];
|
|
w[13] = pws[gid].i[13];
|
|
w[14] = pws[gid].i[14];
|
|
w[15] = pws[gid].i[15];
|
|
|
|
const u32 pw_len = pws[gid].pw_len & 63;
|
|
|
|
LOCAL_VK u32 w_s1[64];
|
|
LOCAL_VK u32 w_s2[64];
|
|
|
|
/**
|
|
* main
|
|
*/
|
|
|
|
m08000m (w_s1, w_s2, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, kernel_param, gid, lid, lsz);
|
|
}
|
|
|
|
KERNEL_FQ KERNEL_FA void m08000_s04 (KERN_ATTR_VECTOR ())
|
|
{
|
|
/**
|
|
* base
|
|
*/
|
|
|
|
const u64 lid = get_local_id (0);
|
|
const u64 gid = get_global_id (0);
|
|
const u64 lsz = get_local_size (0);
|
|
|
|
u32 w[16];
|
|
|
|
w[ 0] = pws[gid].i[ 0];
|
|
w[ 1] = pws[gid].i[ 1];
|
|
w[ 2] = pws[gid].i[ 2];
|
|
w[ 3] = pws[gid].i[ 3];
|
|
w[ 4] = 0;
|
|
w[ 5] = 0;
|
|
w[ 6] = 0;
|
|
w[ 7] = 0;
|
|
w[ 8] = 0;
|
|
w[ 9] = 0;
|
|
w[10] = 0;
|
|
w[11] = 0;
|
|
w[12] = 0;
|
|
w[13] = 0;
|
|
w[14] = 0;
|
|
w[15] = 0;
|
|
|
|
const u32 pw_len = pws[gid].pw_len & 63;
|
|
|
|
LOCAL_VK u32 w_s1[64];
|
|
LOCAL_VK u32 w_s2[64];
|
|
|
|
/**
|
|
* main
|
|
*/
|
|
|
|
m08000s (w_s1, w_s2, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, kernel_param, gid, lid, lsz);
|
|
}
|
|
|
|
KERNEL_FQ KERNEL_FA void m08000_s08 (KERN_ATTR_VECTOR ())
|
|
{
|
|
/**
|
|
* base
|
|
*/
|
|
|
|
const u64 lid = get_local_id (0);
|
|
const u64 gid = get_global_id (0);
|
|
const u64 lsz = get_local_size (0);
|
|
|
|
u32 w[16];
|
|
|
|
w[ 0] = pws[gid].i[ 0];
|
|
w[ 1] = pws[gid].i[ 1];
|
|
w[ 2] = pws[gid].i[ 2];
|
|
w[ 3] = pws[gid].i[ 3];
|
|
w[ 4] = pws[gid].i[ 4];
|
|
w[ 5] = pws[gid].i[ 5];
|
|
w[ 6] = pws[gid].i[ 6];
|
|
w[ 7] = pws[gid].i[ 7];
|
|
w[ 8] = 0;
|
|
w[ 9] = 0;
|
|
w[10] = 0;
|
|
w[11] = 0;
|
|
w[12] = 0;
|
|
w[13] = 0;
|
|
w[14] = 0;
|
|
w[15] = 0;
|
|
|
|
const u32 pw_len = pws[gid].pw_len & 63;
|
|
|
|
LOCAL_VK u32 w_s1[64];
|
|
LOCAL_VK u32 w_s2[64];
|
|
|
|
/**
|
|
* main
|
|
*/
|
|
|
|
m08000s (w_s1, w_s2, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, kernel_param, gid, lid, lsz);
|
|
}
|
|
|
|
KERNEL_FQ KERNEL_FA void m08000_s16 (KERN_ATTR_VECTOR ())
|
|
{
|
|
/**
|
|
* base
|
|
*/
|
|
|
|
const u64 lid = get_local_id (0);
|
|
const u64 gid = get_global_id (0);
|
|
const u64 lsz = get_local_size (0);
|
|
|
|
u32 w[16];
|
|
|
|
w[ 0] = pws[gid].i[ 0];
|
|
w[ 1] = pws[gid].i[ 1];
|
|
w[ 2] = pws[gid].i[ 2];
|
|
w[ 3] = pws[gid].i[ 3];
|
|
w[ 4] = pws[gid].i[ 4];
|
|
w[ 5] = pws[gid].i[ 5];
|
|
w[ 6] = pws[gid].i[ 6];
|
|
w[ 7] = pws[gid].i[ 7];
|
|
w[ 8] = pws[gid].i[ 8];
|
|
w[ 9] = pws[gid].i[ 9];
|
|
w[10] = pws[gid].i[10];
|
|
w[11] = pws[gid].i[11];
|
|
w[12] = pws[gid].i[12];
|
|
w[13] = pws[gid].i[13];
|
|
w[14] = pws[gid].i[14];
|
|
w[15] = pws[gid].i[15];
|
|
|
|
const u32 pw_len = pws[gid].pw_len & 63;
|
|
|
|
LOCAL_VK u32 w_s1[64];
|
|
LOCAL_VK u32 w_s2[64];
|
|
|
|
/**
|
|
* main
|
|
*/
|
|
|
|
m08000s (w_s1, w_s2, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, kernel_param, gid, lid, lsz);
|
|
}
|