Merge pull request #1 from hashcat/master

Merge up with master
pull/2181/head
Chick3nman 5 years ago committed by GitHub
commit 3b4fdfc32b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

1
.gitignore vendored

@ -18,6 +18,7 @@ kernels/**
lib/*.a
modules/*.dll
modules/*.so
obj/*/*/*.o
obj/*.o
obj/*.a
include/CL

@ -127,6 +127,7 @@ DECLSPEC void sha512_hmac_init (sha512_hmac_ctx_t *ctx, const u32 *w, const int
DECLSPEC void sha512_hmac_init_swap (sha512_hmac_ctx_t *ctx, const u32 *w, const int len);
DECLSPEC void sha512_hmac_init_global (sha512_hmac_ctx_t *ctx, GLOBAL_AS const u32 *w, const int len);
DECLSPEC void sha512_hmac_init_global_swap (sha512_hmac_ctx_t *ctx, GLOBAL_AS const u32 *w, const int len);
DECLSPEC void sha512_hmac_init_global_utf16le_swap (sha512_hmac_ctx_t *ctx, GLOBAL_AS const u32 *w, const int len);
DECLSPEC void sha512_hmac_update_128 (sha512_hmac_ctx_t *ctx, u32 *w0, u32 *w1, u32 *w2, u32 *w3, u32 *w4, u32 *w5, u32 *w6, u32 *w7, const int len);
DECLSPEC void sha512_hmac_update (sha512_hmac_ctx_t *ctx, const u32 *w, const int len);
DECLSPEC void sha512_hmac_update_swap (sha512_hmac_ctx_t *ctx, const u32 *w, const int len);

File diff suppressed because it is too large Load Diff

@ -0,0 +1,276 @@
/**
* Author......: See docs/credits.txt
* License.....: MIT
*/
//#define NEW_SIMD_CODE
#ifdef KERNEL_STATIC
#include "inc_vendor.h"
#include "inc_types.h"
#include "inc_platform.cl"
#include "inc_common.cl"
#include "inc_rp.h"
#include "inc_rp.cl"
#include "inc_scalar.cl"
#include "inc_hash_md5.cl"
#include "inc_hash_sha1.cl"
#endif
#if VECT_SIZE == 1
#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i)])
#elif VECT_SIZE == 2
#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
#elif VECT_SIZE == 4
#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
#elif VECT_SIZE == 8
#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
#elif VECT_SIZE == 16
#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
#endif
KERNEL_FQ void m04710_mxx (KERN_ATTR_RULES ())
{
/**
* modifier
*/
const u64 gid = get_global_id (0);
const u64 lid = get_local_id (0);
const u64 lsz = get_local_size (0);
/**
* bin2asc table
*/
LOCAL_VK u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;
l_bin2asc[i] = ((i0 < 10) ? '0' + i0 : 'a' - 10 + i0) << 0
| ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 8;
}
SYNC_THREADS ();
if (gid >= gid_max) return;
/**
* base
*/
COPY_PW (pws[gid]);
const u32 salt_len = salt_bufs[salt_pos].salt_len;
u32 s[64] = { 0 };
for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1)
{
s[idx] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[idx]);
}
/**
* loop
*/
u32 w0[4];
u32 w1[4];
u32 w2[4];
u32 w3[4];
for (u32 il_pos = 0; il_pos < il_cnt; il_pos++)
{
pw_t tmp = PASTE_PW;
tmp.pw_len = apply_rules (rules_buf[il_pos].cmds, tmp.i, tmp.pw_len);
md5_ctx_t ctx0;
md5_init (&ctx0);
md5_update (&ctx0, tmp.i, tmp.pw_len);
md5_final (&ctx0);
const u32 a = ctx0.h[0];
const u32 b = ctx0.h[1];
const u32 c = ctx0.h[2];
const u32 d = ctx0.h[3];
sha1_ctx_t ctx;
sha1_init (&ctx);
w0[0] = uint_to_hex_lower8_le ((a >> 8) & 255) << 0
| uint_to_hex_lower8_le ((a >> 0) & 255) << 16;
w0[1] = uint_to_hex_lower8_le ((a >> 24) & 255) << 0
| uint_to_hex_lower8_le ((a >> 16) & 255) << 16;
w0[2] = uint_to_hex_lower8_le ((b >> 8) & 255) << 0
| uint_to_hex_lower8_le ((b >> 0) & 255) << 16;
w0[3] = uint_to_hex_lower8_le ((b >> 24) & 255) << 0
| uint_to_hex_lower8_le ((b >> 16) & 255) << 16;
w1[0] = uint_to_hex_lower8_le ((c >> 8) & 255) << 0
| uint_to_hex_lower8_le ((c >> 0) & 255) << 16;
w1[1] = uint_to_hex_lower8_le ((c >> 24) & 255) << 0
| uint_to_hex_lower8_le ((c >> 16) & 255) << 16;
w1[2] = uint_to_hex_lower8_le ((d >> 8) & 255) << 0
| uint_to_hex_lower8_le ((d >> 0) & 255) << 16;
w1[3] = uint_to_hex_lower8_le ((d >> 24) & 255) << 0
| uint_to_hex_lower8_le ((d >> 16) & 255) << 16;
w2[0] = 0;
w2[1] = 0;
w2[2] = 0;
w2[3] = 0;
w3[0] = 0;
w3[1] = 0;
w3[2] = 0;
w3[3] = 0;
sha1_update_64 (&ctx, w0, w1, w2, w3, 32);
sha1_update (&ctx, s, salt_len);
sha1_final (&ctx);
const u32 r0 = ctx.h[DGST_R0];
const u32 r1 = ctx.h[DGST_R1];
const u32 r2 = ctx.h[DGST_R2];
const u32 r3 = ctx.h[DGST_R3];
COMPARE_M_SCALAR (r0, r1, r2, r3);
}
}
KERNEL_FQ void m04710_sxx (KERN_ATTR_RULES ())
{
/**
* modifier
*/
const u64 gid = get_global_id (0);
const u64 lid = get_local_id (0);
const u64 lsz = get_local_size (0);
/**
* bin2asc table
*/
LOCAL_VK u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;
l_bin2asc[i] = ((i0 < 10) ? '0' + i0 : 'a' - 10 + i0) << 0
| ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 8;
}
SYNC_THREADS ();
if (gid >= gid_max) return;
/**
* digest
*/
const u32 search[4] =
{
digests_buf[digests_offset].digest_buf[DGST_R0],
digests_buf[digests_offset].digest_buf[DGST_R1],
digests_buf[digests_offset].digest_buf[DGST_R2],
digests_buf[digests_offset].digest_buf[DGST_R3]
};
/**
* base
*/
COPY_PW (pws[gid]);
const u32 salt_len = salt_bufs[salt_pos].salt_len;
u32 s[64] = { 0 };
for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1)
{
s[idx] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[idx]);
}
/**
* loop
*/
u32 w0[4];
u32 w1[4];
u32 w2[4];
u32 w3[4];
for (u32 il_pos = 0; il_pos < il_cnt; il_pos++)
{
pw_t tmp = PASTE_PW;
tmp.pw_len = apply_rules (rules_buf[il_pos].cmds, tmp.i, tmp.pw_len);
md5_ctx_t ctx0;
md5_init (&ctx0);
md5_update (&ctx0, tmp.i, tmp.pw_len);
md5_final (&ctx0);
const u32 a = ctx0.h[0];
const u32 b = ctx0.h[1];
const u32 c = ctx0.h[2];
const u32 d = ctx0.h[3];
sha1_ctx_t ctx;
sha1_init (&ctx);
w0[0] = uint_to_hex_lower8_le ((a >> 8) & 255) << 0
| uint_to_hex_lower8_le ((a >> 0) & 255) << 16;
w0[1] = uint_to_hex_lower8_le ((a >> 24) & 255) << 0
| uint_to_hex_lower8_le ((a >> 16) & 255) << 16;
w0[2] = uint_to_hex_lower8_le ((b >> 8) & 255) << 0
| uint_to_hex_lower8_le ((b >> 0) & 255) << 16;
w0[3] = uint_to_hex_lower8_le ((b >> 24) & 255) << 0
| uint_to_hex_lower8_le ((b >> 16) & 255) << 16;
w1[0] = uint_to_hex_lower8_le ((c >> 8) & 255) << 0
| uint_to_hex_lower8_le ((c >> 0) & 255) << 16;
w1[1] = uint_to_hex_lower8_le ((c >> 24) & 255) << 0
| uint_to_hex_lower8_le ((c >> 16) & 255) << 16;
w1[2] = uint_to_hex_lower8_le ((d >> 8) & 255) << 0
| uint_to_hex_lower8_le ((d >> 0) & 255) << 16;
w1[3] = uint_to_hex_lower8_le ((d >> 24) & 255) << 0
| uint_to_hex_lower8_le ((d >> 16) & 255) << 16;
w2[0] = 0;
w2[1] = 0;
w2[2] = 0;
w2[3] = 0;
w3[0] = 0;
w3[1] = 0;
w3[2] = 0;
w3[3] = 0;
sha1_update_64 (&ctx, w0, w1, w2, w3, 32);
sha1_update (&ctx, s, salt_len);
sha1_final (&ctx);
const u32 r0 = ctx.h[DGST_R0];
const u32 r1 = ctx.h[DGST_R1];
const u32 r2 = ctx.h[DGST_R2];
const u32 r3 = ctx.h[DGST_R3];
COMPARE_S_SCALAR (r0, r1, r2, r3);
}
}

File diff suppressed because it is too large Load Diff

@ -0,0 +1,270 @@
/**
* Author......: See docs/credits.txt
* License.....: MIT
*/
//#define NEW_SIMD_CODE
#ifdef KERNEL_STATIC
#include "inc_vendor.h"
#include "inc_types.h"
#include "inc_platform.cl"
#include "inc_common.cl"
#include "inc_scalar.cl"
#include "inc_hash_md5.cl"
#include "inc_hash_sha1.cl"
#endif
#if VECT_SIZE == 1
#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i)])
#elif VECT_SIZE == 2
#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
#elif VECT_SIZE == 4
#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
#elif VECT_SIZE == 8
#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
#elif VECT_SIZE == 16
#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
#endif
KERNEL_FQ void m04710_mxx (KERN_ATTR_BASIC ())
{
/**
* modifier
*/
const u64 gid = get_global_id (0);
const u64 lid = get_local_id (0);
const u64 lsz = get_local_size (0);
/**
* bin2asc table
*/
LOCAL_VK u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;
l_bin2asc[i] = ((i0 < 10) ? '0' + i0 : 'a' - 10 + i0) << 0
| ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 8;
}
SYNC_THREADS ();
if (gid >= gid_max) return;
/**
* base
*/
const u32 salt_len = salt_bufs[salt_pos].salt_len;
u32 s[64] = { 0 };
for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1)
{
s[idx] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[idx]);
}
md5_ctx_t ctx0;
md5_init (&ctx0);
md5_update_global (&ctx0, pws[gid].i, pws[gid].pw_len);
/**
* loop
*/
u32 w0[4];
u32 w1[4];
u32 w2[4];
u32 w3[4];
for (u32 il_pos = 0; il_pos < il_cnt; il_pos++)
{
md5_ctx_t ctx1 = ctx0;
md5_update_global (&ctx1, combs_buf[il_pos].i, combs_buf[il_pos].pw_len);
md5_final (&ctx1);
const u32 a = ctx1.h[0];
const u32 b = ctx1.h[1];
const u32 c = ctx1.h[2];
const u32 d = ctx1.h[3];
sha1_ctx_t ctx;
sha1_init (&ctx);
w0[0] = uint_to_hex_lower8_le ((a >> 8) & 255) << 0
| uint_to_hex_lower8_le ((a >> 0) & 255) << 16;
w0[1] = uint_to_hex_lower8_le ((a >> 24) & 255) << 0
| uint_to_hex_lower8_le ((a >> 16) & 255) << 16;
w0[2] = uint_to_hex_lower8_le ((b >> 8) & 255) << 0
| uint_to_hex_lower8_le ((b >> 0) & 255) << 16;
w0[3] = uint_to_hex_lower8_le ((b >> 24) & 255) << 0
| uint_to_hex_lower8_le ((b >> 16) & 255) << 16;
w1[0] = uint_to_hex_lower8_le ((c >> 8) & 255) << 0
| uint_to_hex_lower8_le ((c >> 0) & 255) << 16;
w1[1] = uint_to_hex_lower8_le ((c >> 24) & 255) << 0
| uint_to_hex_lower8_le ((c >> 16) & 255) << 16;
w1[2] = uint_to_hex_lower8_le ((d >> 8) & 255) << 0
| uint_to_hex_lower8_le ((d >> 0) & 255) << 16;
w1[3] = uint_to_hex_lower8_le ((d >> 24) & 255) << 0
| uint_to_hex_lower8_le ((d >> 16) & 255) << 16;
w2[0] = 0;
w2[1] = 0;
w2[2] = 0;
w2[3] = 0;
w3[0] = 0;
w3[1] = 0;
w3[2] = 0;
w3[3] = 0;
sha1_update_64 (&ctx, w0, w1, w2, w3, 32);
sha1_update (&ctx, s, salt_len);
sha1_final (&ctx);
const u32 r0 = ctx.h[DGST_R0];
const u32 r1 = ctx.h[DGST_R1];
const u32 r2 = ctx.h[DGST_R2];
const u32 r3 = ctx.h[DGST_R3];
COMPARE_M_SCALAR (r0, r1, r2, r3);
}
}
KERNEL_FQ void m04710_sxx (KERN_ATTR_BASIC ())
{
/**
* modifier
*/
const u64 gid = get_global_id (0);
const u64 lid = get_local_id (0);
const u64 lsz = get_local_size (0);
/**
* bin2asc table
*/
LOCAL_VK u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;
l_bin2asc[i] = ((i0 < 10) ? '0' + i0 : 'a' - 10 + i0) << 0
| ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 8;
}
SYNC_THREADS ();
if (gid >= gid_max) return;
/**
* digest
*/
const u32 search[4] =
{
digests_buf[digests_offset].digest_buf[DGST_R0],
digests_buf[digests_offset].digest_buf[DGST_R1],
digests_buf[digests_offset].digest_buf[DGST_R2],
digests_buf[digests_offset].digest_buf[DGST_R3]
};
/**
* base
*/
const u32 salt_len = salt_bufs[salt_pos].salt_len;
u32 s[64] = { 0 };
for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1)
{
s[idx] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[idx]);
}
md5_ctx_t ctx0;
md5_init (&ctx0);
md5_update_global (&ctx0, pws[gid].i, pws[gid].pw_len);
/**
* loop
*/
u32 w0[4];
u32 w1[4];
u32 w2[4];
u32 w3[4];
for (u32 il_pos = 0; il_pos < il_cnt; il_pos++)
{
md5_ctx_t ctx1 = ctx0;
md5_update_global (&ctx1, combs_buf[il_pos].i, combs_buf[il_pos].pw_len);
md5_final (&ctx1);
const u32 a = ctx1.h[0];
const u32 b = ctx1.h[1];
const u32 c = ctx1.h[2];
const u32 d = ctx1.h[3];
sha1_ctx_t ctx;
sha1_init (&ctx);
w0[0] = uint_to_hex_lower8_le ((a >> 8) & 255) << 0
| uint_to_hex_lower8_le ((a >> 0) & 255) << 16;
w0[1] = uint_to_hex_lower8_le ((a >> 24) & 255) << 0
| uint_to_hex_lower8_le ((a >> 16) & 255) << 16;
w0[2] = uint_to_hex_lower8_le ((b >> 8) & 255) << 0
| uint_to_hex_lower8_le ((b >> 0) & 255) << 16;
w0[3] = uint_to_hex_lower8_le ((b >> 24) & 255) << 0
| uint_to_hex_lower8_le ((b >> 16) & 255) << 16;
w1[0] = uint_to_hex_lower8_le ((c >> 8) & 255) << 0
| uint_to_hex_lower8_le ((c >> 0) & 255) << 16;
w1[1] = uint_to_hex_lower8_le ((c >> 24) & 255) << 0
| uint_to_hex_lower8_le ((c >> 16) & 255) << 16;
w1[2] = uint_to_hex_lower8_le ((d >> 8) & 255) << 0
| uint_to_hex_lower8_le ((d >> 0) & 255) << 16;
w1[3] = uint_to_hex_lower8_le ((d >> 24) & 255) << 0
| uint_to_hex_lower8_le ((d >> 16) & 255) << 16;
w2[0] = 0;
w2[1] = 0;
w2[2] = 0;
w2[3] = 0;
w3[0] = 0;
w3[1] = 0;
w3[2] = 0;
w3[3] = 0;
sha1_update_64 (&ctx, w0, w1, w2, w3, 32);
sha1_update (&ctx, s, salt_len);
sha1_final (&ctx);
const u32 r0 = ctx.h[DGST_R0];
const u32 r1 = ctx.h[DGST_R1];
const u32 r2 = ctx.h[DGST_R2];
const u32 r3 = ctx.h[DGST_R3];
COMPARE_S_SCALAR (r0, r1, r2, r3);
}
}

File diff suppressed because it is too large Load Diff

@ -0,0 +1,296 @@
/**
* Author......: See docs/credits.txt
* License.....: MIT
*/
#define NEW_SIMD_CODE
#ifdef KERNEL_STATIC
#include "inc_vendor.h"
#include "inc_types.h"
#include "inc_platform.cl"
#include "inc_common.cl"
#include "inc_simd.cl"
#include "inc_hash_md5.cl"
#include "inc_hash_sha1.cl"
#endif
#if VECT_SIZE == 1
#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i)])
#elif VECT_SIZE == 2
#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
#elif VECT_SIZE == 4
#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
#elif VECT_SIZE == 8
#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
#elif VECT_SIZE == 16
#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
#endif
KERNEL_FQ void m04710_mxx (KERN_ATTR_VECTOR ())
{
/**
* modifier
*/
const u64 gid = get_global_id (0);
const u64 lid = get_local_id (0);
const u64 lsz = get_local_size (0);
/**
* bin2asc table
*/
LOCAL_VK u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;
l_bin2asc[i] = ((i0 < 10) ? '0' + i0 : 'a' - 10 + i0) << 0
| ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 8;
}
SYNC_THREADS ();
if (gid >= gid_max) return;
/**
* base
*/
const u32 pw_len = pws[gid].pw_len;
u32x w[64] = { 0 };
for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1)
{
w[idx] = pws[gid].i[idx];
}
const u32 salt_len = salt_bufs[salt_pos].salt_len;
u32x s[64] = { 0 };
for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1)
{
s[idx] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[idx]);
}
/**
* loop
*/
u32x _w0[4];
u32x _w1[4];
u32x _w2[4];
u32x _w3[4];
u32x w0l = w[0];
for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE)
{
const u32x w0r = words_buf_r[il_pos / VECT_SIZE];
const u32x w0 = w0l | w0r;
w[0] = w0;
md5_ctx_vector_t ctx0;
md5_init_vector (&ctx0);
md5_update_vector (&ctx0, w, pw_len);
md5_final_vector (&ctx0);
const u32x a = ctx0.h[0];
const u32x b = ctx0.h[1];
const u32x c = ctx0.h[2];
const u32x d = ctx0.h[3];
sha1_ctx_vector_t ctx;
sha1_init_vector (&ctx);
_w0[0] = uint_to_hex_lower8_le ((a >> 8) & 255) << 0
| uint_to_hex_lower8_le ((a >> 0) & 255) << 16;
_w0[1] = uint_to_hex_lower8_le ((a >> 24) & 255) << 0
| uint_to_hex_lower8_le ((a >> 16) & 255) << 16;
_w0[2] = uint_to_hex_lower8_le ((b >> 8) & 255) << 0
| uint_to_hex_lower8_le ((b >> 0) & 255) << 16;
_w0[3] = uint_to_hex_lower8_le ((b >> 24) & 255) << 0
| uint_to_hex_lower8_le ((b >> 16) & 255) << 16;
_w1[0] = uint_to_hex_lower8_le ((c >> 8) & 255) << 0
| uint_to_hex_lower8_le ((c >> 0) & 255) << 16;
_w1[1] = uint_to_hex_lower8_le ((c >> 24) & 255) << 0
| uint_to_hex_lower8_le ((c >> 16) & 255) << 16;
_w1[2] = uint_to_hex_lower8_le ((d >> 8) & 255) << 0
| uint_to_hex_lower8_le ((d >> 0) & 255) << 16;
_w1[3] = uint_to_hex_lower8_le ((d >> 24) & 255) << 0
| uint_to_hex_lower8_le ((d >> 16) & 255) << 16;
_w2[0] = 0;
_w2[1] = 0;
_w2[2] = 0;
_w2[3] = 0;
_w3[0] = 0;
_w3[1] = 0;
_w3[2] = 0;
_w3[3] = 0;
sha1_update_vector_64 (&ctx, _w0, _w1, _w2, _w3, 32);
sha1_update_vector (&ctx, s, salt_len);
sha1_final_vector (&ctx);
const u32x r0 = ctx.h[DGST_R0];
const u32x r1 = ctx.h[DGST_R1];
const u32x r2 = ctx.h[DGST_R2];
const u32x r3 = ctx.h[DGST_R3];
COMPARE_M_SIMD (r0, r1, r2, r3);
}
}
KERNEL_FQ void m04710_sxx (KERN_ATTR_VECTOR ())
{
/**
* modifier
*/
const u64 gid = get_global_id (0);
const u64 lid = get_local_id (0);
const u64 lsz = get_local_size (0);
/**
* bin2asc table
*/
LOCAL_VK u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;
l_bin2asc[i] = ((i0 < 10) ? '0' + i0 : 'a' - 10 + i0) << 0
| ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 8;
}
SYNC_THREADS ();
if (gid >= gid_max) return;
/**
* digest
*/
const u32 search[4] =
{
digests_buf[digests_offset].digest_buf[DGST_R0],
digests_buf[digests_offset].digest_buf[DGST_R1],
digests_buf[digests_offset].digest_buf[DGST_R2],
digests_buf[digests_offset].digest_buf[DGST_R3]
};
/**
* base
*/
const u32 pw_len = pws[gid].pw_len;
u32x w[64] = { 0 };
for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1)
{
w[idx] = pws[gid].i[idx];
}
const u32 salt_len = salt_bufs[salt_pos].salt_len;
u32x s[8] = { 0 };
for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1)
{
s[idx] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[idx]);
}
/**
* loop
*/
u32x _w0[4];
u32x _w1[4];
u32x _w2[4];
u32x _w3[4];
u32x w0l = w[0];
for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE)
{
const u32x w0r = words_buf_r[il_pos / VECT_SIZE];
const u32x w0 = w0l | w0r;
w[0] = w0;
md5_ctx_vector_t ctx0;
md5_init_vector (&ctx0);
md5_update_vector (&ctx0, w, pw_len);
md5_final_vector (&ctx0);
const u32x a = ctx0.h[0];
const u32x b = ctx0.h[1];
const u32x c = ctx0.h[2];
const u32x d = ctx0.h[3];
sha1_ctx_vector_t ctx;
sha1_init_vector (&ctx);
_w0[0] = uint_to_hex_lower8_le ((a >> 8) & 255) << 0
| uint_to_hex_lower8_le ((a >> 0) & 255) << 16;
_w0[1] = uint_to_hex_lower8_le ((a >> 24) & 255) << 0
| uint_to_hex_lower8_le ((a >> 16) & 255) << 16;
_w0[2] = uint_to_hex_lower8_le ((b >> 8) & 255) << 0
| uint_to_hex_lower8_le ((b >> 0) & 255) << 16;
_w0[3] = uint_to_hex_lower8_le ((b >> 24) & 255) << 0
| uint_to_hex_lower8_le ((b >> 16) & 255) << 16;
_w1[0] = uint_to_hex_lower8_le ((c >> 8) & 255) << 0
| uint_to_hex_lower8_le ((c >> 0) & 255) << 16;
_w1[1] = uint_to_hex_lower8_le ((c >> 24) & 255) << 0
| uint_to_hex_lower8_le ((c >> 16) & 255) << 16;
_w1[2] = uint_to_hex_lower8_le ((d >> 8) & 255) << 0
| uint_to_hex_lower8_le ((d >> 0) & 255) << 16;
_w1[3] = uint_to_hex_lower8_le ((d >> 24) & 255) << 0
| uint_to_hex_lower8_le ((d >> 16) & 255) << 16;
_w2[0] = 0;
_w2[1] = 0;
_w2[2] = 0;
_w2[3] = 0;
_w3[0] = 0;
_w3[1] = 0;
_w3[2] = 0;
_w3[3] = 0;
sha1_update_vector_64 (&ctx, _w0, _w1, _w2, _w3, 32);
sha1_update_vector (&ctx, s, salt_len);
sha1_final_vector (&ctx);
const u32x r0 = ctx.h[DGST_R0];
const u32x r1 = ctx.h[DGST_R1];
const u32x r2 = ctx.h[DGST_R2];
const u32x r3 = ctx.h[DGST_R3];
COMPARE_S_SIMD (r0, r1, r2, r3);
}
}

@ -32,7 +32,7 @@ inline __device__ uint4 operator >> (const uint4 a, const u32 b) { return mak
inline __device__ uint4 operator + (const uint4 a, const uint4 b) { return make_uint4 ((a.x + b.x), (a.y + b.y), (a.z + b.z), (a.w + b.w)); }
inline __device__ uint4 operator ^ (const uint4 a, const uint4 b) { return make_uint4 ((a.x ^ b.x), (a.y ^ b.y), (a.z ^ b.z), (a.w ^ b.w)); }
inline __device__ uint4 operator | (const uint4 a, const uint4 b) { return make_uint4 ((a.x | b.x), (a.y | b.y), (a.z | b.z), (a.w | b.w)); }
inline __device__ uint4 operator ^= ( uint4 &a, const uint4 b) { a.x ^= b.x; a.y ^= b.y; a.z ^= b.z; a.w ^= b.w; }
inline __device__ void operator ^= ( uint4 &a, const uint4 b) { a.x ^= b.x; a.y ^= b.y; a.z ^= b.z; a.w ^= b.w; }
inline __device__ uint4 rotate (const uint4 a, const int n)
{

@ -1019,6 +1019,51 @@ KERNEL_FQ void m09700_m04 (KERN_ATTR_ESALT (oldoffice01_t))
KERNEL_FQ void m09700_m08 (KERN_ATTR_ESALT (oldoffice01_t))
{
/**
* base
*/
const u64 gid = get_global_id (0);
if (gid >= gid_max) return;
u32 w0[4];
w0[0] = pws[gid].i[ 0];
w0[1] = pws[gid].i[ 1];
w0[2] = pws[gid].i[ 2];
w0[3] = pws[gid].i[ 3];
u32 w1[4];
w1[0] = pws[gid].i[ 4];
w1[1] = pws[gid].i[ 5];
w1[2] = pws[gid].i[ 6];
w1[3] = pws[gid].i[ 7];
u32 w2[4];
w2[0] = 0;
w2[1] = 0;
w2[2] = 0;
w2[3] = 0;
u32 w3[4];
w3[0] = 0;
w3[1] = 0;
w3[2] = 0;
w3[3] = 0;
const u32 pw_len = pws[gid].pw_len & 63;
/**
* main
*/
LOCAL_VK RC4_KEY rc4_keys[64];
m09700m (rc4_keys, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max);
}
KERNEL_FQ void m09700_m16 (KERN_ATTR_ESALT (oldoffice01_t))
@ -1076,6 +1121,51 @@ KERNEL_FQ void m09700_s04 (KERN_ATTR_ESALT (oldoffice01_t))
KERNEL_FQ void m09700_s08 (KERN_ATTR_ESALT (oldoffice01_t))
{
/**
* base
*/
const u64 gid = get_global_id (0);
if (gid >= gid_max) return;
u32 w0[4];
w0[0] = pws[gid].i[ 0];
w0[1] = pws[gid].i[ 1];
w0[2] = pws[gid].i[ 2];
w0[3] = pws[gid].i[ 3];
u32 w1[4];
w1[0] = pws[gid].i[ 4];
w1[1] = pws[gid].i[ 5];
w1[2] = pws[gid].i[ 6];
w1[3] = pws[gid].i[ 7];
u32 w2[4];
w2[0] = 0;
w2[1] = 0;
w2[2] = 0;
w2[3] = 0;
u32 w3[4];
w3[0] = 0;
w3[1] = 0;
w3[2] = 0;
w3[3] = 0;
const u32 pw_len = pws[gid].pw_len & 63;
/**
* main
*/
LOCAL_VK RC4_KEY rc4_keys[64];
m09700s (rc4_keys, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max);
}
KERNEL_FQ void m09700_s16 (KERN_ATTR_ESALT (oldoffice01_t))

@ -608,6 +608,49 @@ KERNEL_FQ void m09720_m04 (KERN_ATTR_ESALT (oldoffice01_t))
KERNEL_FQ void m09720_m08 (KERN_ATTR_ESALT (oldoffice01_t))
{
/**
* base
*/
const u64 gid = get_global_id (0);
if (gid >= gid_max) return;
u32 w0[4];
w0[0] = pws[gid].i[ 0];
w0[1] = pws[gid].i[ 1];
w0[2] = pws[gid].i[ 2];
w0[3] = pws[gid].i[ 3];
u32 w1[4];
w1[0] = pws[gid].i[ 4];
w1[1] = pws[gid].i[ 5];
w1[2] = pws[gid].i[ 6];
w1[3] = pws[gid].i[ 7];
u32 w2[4];
w2[0] = 0;
w2[1] = 0;
w2[2] = 0;
w2[3] = 0;
u32 w3[4];
w3[0] = 0;
w3[1] = 0;
w3[2] = 0;
w3[3] = 0;
const u32 pw_len = pws[gid].pw_len & 63;
/**
* main
*/
m09720m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max);
}
KERNEL_FQ void m09720_m16 (KERN_ATTR_ESALT (oldoffice01_t))
@ -663,6 +706,49 @@ KERNEL_FQ void m09720_s04 (KERN_ATTR_ESALT (oldoffice01_t))
KERNEL_FQ void m09720_s08 (KERN_ATTR_ESALT (oldoffice01_t))
{
/**
* base
*/
const u64 gid = get_global_id (0);
if (gid >= gid_max) return;
u32 w0[4];
w0[0] = pws[gid].i[ 0];
w0[1] = pws[gid].i[ 1];
w0[2] = pws[gid].i[ 2];
w0[3] = pws[gid].i[ 3];
u32 w1[4];
w1[0] = pws[gid].i[ 4];
w1[1] = pws[gid].i[ 5];
w1[2] = pws[gid].i[ 6];
w1[3] = pws[gid].i[ 7];
u32 w2[4];
w2[0] = 0;
w2[1] = 0;
w2[2] = 0;
w2[3] = 0;
u32 w3[4];
w3[0] = 0;
w3[1] = 0;
w3[2] = 0;
w3[3] = 0;
const u32 pw_len = pws[gid].pw_len & 63;
/**
* main
*/
m09720s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max);
}
KERNEL_FQ void m09720_s16 (KERN_ATTR_ESALT (oldoffice01_t))

@ -32,11 +32,11 @@ inline __device__ uint4 operator >> (const uint4 a, const u32 b) { return mak
inline __device__ uint4 operator + (const uint4 a, const uint4 b) { return make_uint4 ((a.x + b.x), (a.y + b.y), (a.z + b.z), (a.w + b.w)); }
inline __device__ uint4 operator ^ (const uint4 a, const uint4 b) { return make_uint4 ((a.x ^ b.x), (a.y ^ b.y), (a.z ^ b.z), (a.w ^ b.w)); }
inline __device__ uint4 operator | (const uint4 a, const uint4 b) { return make_uint4 ((a.x | b.x), (a.y | b.y), (a.z | b.z), (a.w | b.w)); }
inline __device__ uint4 operator ^= ( uint4 &a, const uint4 b) { a.x ^= b.x; a.y ^= b.y; a.z ^= b.z; a.w ^= b.w; }
inline __device__ void operator ^= ( uint4 &a, const uint4 b) { a.x ^= b.x; a.y ^= b.y; a.z ^= b.z; a.w ^= b.w; }
inline __device__ uint4 rotate (const uint4 a, const int n)
{
return ((a >> n) | ((a >> (32 - n))));
return ((a << n) | ((a >> (32 - n))));
}
#endif

@ -150,10 +150,10 @@ KERNEL_FQ void m20600_comp (KERN_ATTR_TMPS (omt_sha256_tmp_t))
* digest
*/
const u32x r0 = hc_swap32_S (tmps[gid].digest_buf[DGST_R0]);
const u32x r1 = hc_swap32_S (tmps[gid].digest_buf[DGST_R1]);
const u32x r2 = hc_swap32_S (tmps[gid].digest_buf[DGST_R2]);
const u32x r3 = hc_swap32_S (tmps[gid].digest_buf[DGST_R3]);
const u32 r0 = hc_swap32_S (tmps[gid].digest_buf[DGST_R0]);
const u32 r1 = hc_swap32_S (tmps[gid].digest_buf[DGST_R1]);
const u32 r2 = hc_swap32_S (tmps[gid].digest_buf[DGST_R2]);
const u32 r3 = hc_swap32_S (tmps[gid].digest_buf[DGST_R3]);
#define il_pos 0

File diff suppressed because it is too large Load Diff

@ -0,0 +1,265 @@
/**
* Author......: See docs/credits.txt
* License.....: MIT
*/
//#define NEW_SIMD_CODE
#ifdef KERNEL_STATIC
#include "inc_vendor.h"
#include "inc_types.h"
#include "inc_platform.cl"
#include "inc_common.cl"
#include "inc_rp.h"
#include "inc_rp.cl"
#include "inc_scalar.cl"
#include "inc_hash_sha256.cl"
#endif
#if VECT_SIZE == 1
#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i)])
#elif VECT_SIZE == 2
#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
#elif VECT_SIZE == 4
#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
#elif VECT_SIZE == 8
#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
#elif VECT_SIZE == 16
#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
#endif
KERNEL_FQ void m20710_mxx (KERN_ATTR_RULES ())
{
/**
* modifier
*/
const u64 gid = get_global_id (0);
const u64 lid = get_local_id (0);
const u64 lsz = get_local_size (0);
/**
* bin2asc table
*/
LOCAL_VK u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;
l_bin2asc[i] = ((i0 < 10) ? '0' + i0 : 'a' - 10 + i0) << 0
| ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 8;
}
SYNC_THREADS ();
if (gid >= gid_max) return;
/**
* base
*/
u32 w0[4];
u32 w1[4];
u32 w2[4];
u32 w3[4];
COPY_PW (pws[gid]);
const u32 salt_len = salt_bufs[salt_pos].salt_len;
u32 s[64] = { 0 };
for (int i = 0, idx = 0; i < salt_len; i += 4, idx += 1)
{
s[idx] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[idx]);
}
/**
* loop
*/
for (u32 il_pos = 0; il_pos < il_cnt; il_pos++)
{
pw_t tmp = PASTE_PW;
tmp.pw_len = apply_rules (rules_buf[il_pos].cmds, tmp.i, tmp.pw_len);
sha256_ctx_t ctx0;
sha256_init (&ctx0);
sha256_update_swap (&ctx0, tmp.i, tmp.pw_len);
sha256_final (&ctx0);
const u32 a = ctx0.h[0];
const u32 b = ctx0.h[1];
const u32 c = ctx0.h[2];
const u32 d = ctx0.h[3];
const u32 e = ctx0.h[4];
const u32 f = ctx0.h[5];
const u32 g = ctx0.h[6];
const u32 h = ctx0.h[7];
sha256_ctx_t ctx;
sha256_init (&ctx);
w0[0] = uint_to_hex_lower8_le ((a >> 16) & 255) << 0 | uint_to_hex_lower8_le ((a >> 24) & 255) << 16;
w0[1] = uint_to_hex_lower8_le ((a >> 0) & 255) << 0 | uint_to_hex_lower8_le ((a >> 8) & 255) << 16;
w0[2] = uint_to_hex_lower8_le ((b >> 16) & 255) << 0 | uint_to_hex_lower8_le ((b >> 24) & 255) << 16;
w0[3] = uint_to_hex_lower8_le ((b >> 0) & 255) << 0 | uint_to_hex_lower8_le ((b >> 8) & 255) << 16;
w1[0] = uint_to_hex_lower8_le ((c >> 16) & 255) << 0 | uint_to_hex_lower8_le ((c >> 24) & 255) << 16;
w1[1] = uint_to_hex_lower8_le ((c >> 0) & 255) << 0 | uint_to_hex_lower8_le ((c >> 8) & 255) << 16;
w1[2] = uint_to_hex_lower8_le ((d >> 16) & 255) << 0 | uint_to_hex_lower8_le ((d >> 24) & 255) << 16;
w1[3] = uint_to_hex_lower8_le ((d >> 0) & 255) << 0 | uint_to_hex_lower8_le ((d >> 8) & 255) << 16;
w2[0] = uint_to_hex_lower8_le ((e >> 16) & 255) << 0 | uint_to_hex_lower8_le ((e >> 24) & 255) << 16;
w2[1] = uint_to_hex_lower8_le ((e >> 0) & 255) << 0 | uint_to_hex_lower8_le ((e >> 8) & 255) << 16;
w2[2] = uint_to_hex_lower8_le ((f >> 16) & 255) << 0 | uint_to_hex_lower8_le ((f >> 24) & 255) << 16;
w2[3] = uint_to_hex_lower8_le ((f >> 0) & 255) << 0 | uint_to_hex_lower8_le ((f >> 8) & 255) << 16;
w3[0] = uint_to_hex_lower8_le ((g >> 16) & 255) << 0 | uint_to_hex_lower8_le ((g >> 24) & 255) << 16;
w3[1] = uint_to_hex_lower8_le ((g >> 0) & 255) << 0 | uint_to_hex_lower8_le ((g >> 8) & 255) << 16;
w3[2] = uint_to_hex_lower8_le ((h >> 16) & 255) << 0 | uint_to_hex_lower8_le ((h >> 24) & 255) << 16;
w3[3] = uint_to_hex_lower8_le ((h >> 0) & 255) << 0 | uint_to_hex_lower8_le ((h >> 8) & 255) << 16;
sha256_update_64 (&ctx, w0, w1, w2, w3, 64);
sha256_update (&ctx, s, salt_len);
sha256_final (&ctx);
const u32 r0 = ctx.h[DGST_R0];
const u32 r1 = ctx.h[DGST_R1];
const u32 r2 = ctx.h[DGST_R2];
const u32 r3 = ctx.h[DGST_R3];
COMPARE_M_SCALAR (r0, r1, r2, r3);
}
}
KERNEL_FQ void m20710_sxx (KERN_ATTR_RULES ())
{
/**
* modifier
*/
const u64 gid = get_global_id (0);
const u64 lid = get_local_id (0);
const u64 lsz = get_local_size (0);
/**
* bin2asc table
*/
LOCAL_VK u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;
l_bin2asc[i] = ((i0 < 10) ? '0' + i0 : 'a' - 10 + i0) << 0
| ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 8;
}
SYNC_THREADS ();
if (gid >= gid_max) return;
/**
* digest
*/
const u32 search[4] =
{
digests_buf[digests_offset].digest_buf[DGST_R0],
digests_buf[digests_offset].digest_buf[DGST_R1],
digests_buf[digests_offset].digest_buf[DGST_R2],
digests_buf[digests_offset].digest_buf[DGST_R3]
};
/**
* base
*/
u32 w0[4];
u32 w1[4];
u32 w2[4];
u32 w3[4];
COPY_PW (pws[gid]);
const u32 salt_len = salt_bufs[salt_pos].salt_len;
u32 s[64] = { 0 };
for (int i = 0, idx = 0; i < salt_len; i += 4, idx += 1)
{
s[idx] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[idx]);
}
/**
* loop
*/
for (u32 il_pos = 0; il_pos < il_cnt; il_pos++)
{
pw_t tmp = PASTE_PW;
tmp.pw_len = apply_rules (rules_buf[il_pos].cmds, tmp.i, tmp.pw_len);
sha256_ctx_t ctx0;
sha256_init (&ctx0);
sha256_update_swap (&ctx0, tmp.i, tmp.pw_len);
sha256_final (&ctx0);
const u32 a = ctx0.h[0];
const u32 b = ctx0.h[1];
const u32 c = ctx0.h[2];
const u32 d = ctx0.h[3];
const u32 e = ctx0.h[4];
const u32 f = ctx0.h[5];
const u32 g = ctx0.h[6];
const u32 h = ctx0.h[7];
sha256_ctx_t ctx;
sha256_init (&ctx);
w0[0] = uint_to_hex_lower8_le ((a >> 16) & 255) << 0 | uint_to_hex_lower8_le ((a >> 24) & 255) << 16;
w0[1] = uint_to_hex_lower8_le ((a >> 0) & 255) << 0 | uint_to_hex_lower8_le ((a >> 8) & 255) << 16;
w0[2] = uint_to_hex_lower8_le ((b >> 16) & 255) << 0 | uint_to_hex_lower8_le ((b >> 24) & 255) << 16;
w0[3] = uint_to_hex_lower8_le ((b >> 0) & 255) << 0 | uint_to_hex_lower8_le ((b >> 8) & 255) << 16;
w1[0] = uint_to_hex_lower8_le ((c >> 16) & 255) << 0 | uint_to_hex_lower8_le ((c >> 24) & 255) << 16;
w1[1] = uint_to_hex_lower8_le ((c >> 0) & 255) << 0 | uint_to_hex_lower8_le ((c >> 8) & 255) << 16;
w1[2] = uint_to_hex_lower8_le ((d >> 16) & 255) << 0 | uint_to_hex_lower8_le ((d >> 24) & 255) << 16;
w1[3] = uint_to_hex_lower8_le ((d >> 0) & 255) << 0 | uint_to_hex_lower8_le ((d >> 8) & 255) << 16;
w2[0] = uint_to_hex_lower8_le ((e >> 16) & 255) << 0 | uint_to_hex_lower8_le ((e >> 24) & 255) << 16;
w2[1] = uint_to_hex_lower8_le ((e >> 0) & 255) << 0 | uint_to_hex_lower8_le ((e >> 8) & 255) << 16;
w2[2] = uint_to_hex_lower8_le ((f >> 16) & 255) << 0 | uint_to_hex_lower8_le ((f >> 24) & 255) << 16;
w2[3] = uint_to_hex_lower8_le ((f >> 0) & 255) << 0 | uint_to_hex_lower8_le ((f >> 8) & 255) << 16;
w3[0] = uint_to_hex_lower8_le ((g >> 16) & 255) << 0 | uint_to_hex_lower8_le ((g >> 24) & 255) << 16;
w3[1] = uint_to_hex_lower8_le ((g >> 0) & 255) << 0 | uint_to_hex_lower8_le ((g >> 8) & 255) << 16;
w3[2] = uint_to_hex_lower8_le ((h >> 16) & 255) << 0 | uint_to_hex_lower8_le ((h >> 24) & 255) << 16;
w3[3] = uint_to_hex_lower8_le ((h >> 0) & 255) << 0 | uint_to_hex_lower8_le ((h >> 8) & 255) << 16;
sha256_update_64 (&ctx, w0, w1, w2, w3, 64);
sha256_update (&ctx, s, salt_len);
sha256_final (&ctx);
const u32 r0 = ctx.h[DGST_R0];
const u32 r1 = ctx.h[DGST_R1];
const u32 r2 = ctx.h[DGST_R2];
const u32 r3 = ctx.h[DGST_R3];
COMPARE_S_SCALAR (r0, r1, r2, r3);
}
}

File diff suppressed because it is too large Load Diff

@ -0,0 +1,259 @@
/**
* Author......: See docs/credits.txt
* License.....: MIT
*/
//#define NEW_SIMD_CODE
#ifdef KERNEL_STATIC
#include "inc_vendor.h"
#include "inc_types.h"
#include "inc_platform.cl"
#include "inc_common.cl"
#include "inc_scalar.cl"
#include "inc_hash_sha256.cl"
#endif
#if VECT_SIZE == 1
#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i)])
#elif VECT_SIZE == 2
#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
#elif VECT_SIZE == 4
#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
#elif VECT_SIZE == 8
#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
#elif VECT_SIZE == 16
#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
#endif
KERNEL_FQ void m20710_mxx (KERN_ATTR_BASIC ())
{
/**
* modifier
*/
const u64 lid = get_local_id (0);
const u64 gid = get_global_id (0);
const u64 lsz = get_local_size (0);
/**
* bin2asc table
*/
LOCAL_VK u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;
l_bin2asc[i] = ((i0 < 10) ? '0' + i0 : 'a' - 10 + i0) << 0
| ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 8;
}
SYNC_THREADS ();
if (gid >= gid_max) return;
/**
* base
*/
u32 w0[4];
u32 w1[4];
u32 w2[4];
u32 w3[4];
u32 s[64] = { 0 };
const u32 salt_len = salt_bufs[salt_pos].salt_len;
for (int i = 0, idx = 0; i < salt_len; i += 4, idx += 1)
{
s[idx] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[idx]);
}
sha256_ctx_t ctx1;
sha256_init (&ctx1);
sha256_update_global_swap (&ctx1, pws[gid].i, pws[gid].pw_len);
/**
* loop
*/
for (u32 il_pos = 0; il_pos < il_cnt; il_pos++)
{
sha256_ctx_t ctx0 = ctx1;
sha256_update_global_swap (&ctx0, combs_buf[il_pos].i, combs_buf[il_pos].pw_len);
sha256_final (&ctx0);
const u32 a = ctx0.h[0];
const u32 b = ctx0.h[1];
const u32 c = ctx0.h[2];
const u32 d = ctx0.h[3];
const u32 e = ctx0.h[4];
const u32 f = ctx0.h[5];
const u32 g = ctx0.h[6];
const u32 h = ctx0.h[7];
sha256_ctx_t ctx;
sha256_init (&ctx);
w0[0] = uint_to_hex_lower8_le ((a >> 16) & 255) << 0 | uint_to_hex_lower8_le ((a >> 24) & 255) << 16;
w0[1] = uint_to_hex_lower8_le ((a >> 0) & 255) << 0 | uint_to_hex_lower8_le ((a >> 8) & 255) << 16;
w0[2] = uint_to_hex_lower8_le ((b >> 16) & 255) << 0 | uint_to_hex_lower8_le ((b >> 24) & 255) << 16;
w0[3] = uint_to_hex_lower8_le ((b >> 0) & 255) << 0 | uint_to_hex_lower8_le ((b >> 8) & 255) << 16;
w1[0] = uint_to_hex_lower8_le ((c >> 16) & 255) << 0 | uint_to_hex_lower8_le ((c >> 24) & 255) << 16;
w1[1] = uint_to_hex_lower8_le ((c >> 0) & 255) << 0 | uint_to_hex_lower8_le ((c >> 8) & 255) << 16;
w1[2] = uint_to_hex_lower8_le ((d >> 16) & 255) << 0 | uint_to_hex_lower8_le ((d >> 24) & 255) << 16;
w1[3] = uint_to_hex_lower8_le ((d >> 0) & 255) << 0 | uint_to_hex_lower8_le ((d >> 8) & 255) << 16;
w2[0] = uint_to_hex_lower8_le ((e >> 16) & 255) << 0 | uint_to_hex_lower8_le ((e >> 24) & 255) << 16;
w2[1] = uint_to_hex_lower8_le ((e >> 0) & 255) << 0 | uint_to_hex_lower8_le ((e >> 8) & 255) << 16;
w2[2] = uint_to_hex_lower8_le ((f >> 16) & 255) << 0 | uint_to_hex_lower8_le ((f >> 24) & 255) << 16;
w2[3] = uint_to_hex_lower8_le ((f >> 0) & 255) << 0 | uint_to_hex_lower8_le ((f >> 8) & 255) << 16;
w3[0] = uint_to_hex_lower8_le ((g >> 16) & 255) << 0 | uint_to_hex_lower8_le ((g >> 24) & 255) << 16;
w3[1] = uint_to_hex_lower8_le ((g >> 0) & 255) << 0 | uint_to_hex_lower8_le ((g >> 8) & 255) << 16;
w3[2] = uint_to_hex_lower8_le ((h >> 16) & 255) << 0 | uint_to_hex_lower8_le ((h >> 24) & 255) << 16;
w3[3] = uint_to_hex_lower8_le ((h >> 0) & 255) << 0 | uint_to_hex_lower8_le ((h >> 8) & 255) << 16;
sha256_update_64 (&ctx, w0, w1, w2, w3, 64);
sha256_update (&ctx, s, salt_len);
sha256_final (&ctx);
const u32 r0 = ctx.h[DGST_R0];
const u32 r1 = ctx.h[DGST_R1];
const u32 r2 = ctx.h[DGST_R2];
const u32 r3 = ctx.h[DGST_R3];
COMPARE_M_SCALAR (r0, r1, r2, r3);
}
}
KERNEL_FQ void m20710_sxx (KERN_ATTR_BASIC ())
{
/**
* modifier
*/
const u64 lid = get_local_id (0);
const u64 gid = get_global_id (0);
const u64 lsz = get_local_size (0);
/**
* bin2asc table
*/
LOCAL_VK u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;
l_bin2asc[i] = ((i0 < 10) ? '0' + i0 : 'a' - 10 + i0) << 0
| ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 8;
}
SYNC_THREADS ();
if (gid >= gid_max) return;
/**
* digest
*/
const u32 search[4] =
{
digests_buf[digests_offset].digest_buf[DGST_R0],
digests_buf[digests_offset].digest_buf[DGST_R1],
digests_buf[digests_offset].digest_buf[DGST_R2],
digests_buf[digests_offset].digest_buf[DGST_R3]
};
/**
* base
*/
u32 w0[4];
u32 w1[4];
u32 w2[4];
u32 w3[4];
const u32 salt_len = salt_bufs[salt_pos].salt_len;
u32 s[64] = { 0 };
for (int i = 0, idx = 0; i < salt_len; i += 4, idx += 1)
{
s[idx] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[idx]);
}
sha256_ctx_t ctx1;
sha256_init (&ctx1);
sha256_update_global_swap (&ctx1, pws[gid].i, pws[gid].pw_len);
/**
* loop
*/
for (u32 il_pos = 0; il_pos < il_cnt; il_pos++)
{
sha256_ctx_t ctx0 = ctx1;
sha256_update_global_swap (&ctx0, combs_buf[il_pos].i, combs_buf[il_pos].pw_len);
sha256_final (&ctx0);
const u32 a = ctx0.h[0];
const u32 b = ctx0.h[1];
const u32 c = ctx0.h[2];
const u32 d = ctx0.h[3];
const u32 e = ctx0.h[4];
const u32 f = ctx0.h[5];
const u32 g = ctx0.h[6];
const u32 h = ctx0.h[7];
sha256_ctx_t ctx;
sha256_init (&ctx);
w0[0] = uint_to_hex_lower8_le ((a >> 16) & 255) << 0 | uint_to_hex_lower8_le ((a >> 24) & 255) << 16;
w0[1] = uint_to_hex_lower8_le ((a >> 0) & 255) << 0 | uint_to_hex_lower8_le ((a >> 8) & 255) << 16;
w0[2] = uint_to_hex_lower8_le ((b >> 16) & 255) << 0 | uint_to_hex_lower8_le ((b >> 24) & 255) << 16;
w0[3] = uint_to_hex_lower8_le ((b >> 0) & 255) << 0 | uint_to_hex_lower8_le ((b >> 8) & 255) << 16;
w1[0] = uint_to_hex_lower8_le ((c >> 16) & 255) << 0 | uint_to_hex_lower8_le ((c >> 24) & 255) << 16;
w1[1] = uint_to_hex_lower8_le ((c >> 0) & 255) << 0 | uint_to_hex_lower8_le ((c >> 8) & 255) << 16;
w1[2] = uint_to_hex_lower8_le ((d >> 16) & 255) << 0 | uint_to_hex_lower8_le ((d >> 24) & 255) << 16;
w1[3] = uint_to_hex_lower8_le ((d >> 0) & 255) << 0 | uint_to_hex_lower8_le ((d >> 8) & 255) << 16;
w2[0] = uint_to_hex_lower8_le ((e >> 16) & 255) << 0 | uint_to_hex_lower8_le ((e >> 24) & 255) << 16;
w2[1] = uint_to_hex_lower8_le ((e >> 0) & 255) << 0 | uint_to_hex_lower8_le ((e >> 8) & 255) << 16;
w2[2] = uint_to_hex_lower8_le ((f >> 16) & 255) << 0 | uint_to_hex_lower8_le ((f >> 24) & 255) << 16;
w2[3] = uint_to_hex_lower8_le ((f >> 0) & 255) << 0 | uint_to_hex_lower8_le ((f >> 8) & 255) << 16;
w3[0] = uint_to_hex_lower8_le ((g >> 16) & 255) << 0 | uint_to_hex_lower8_le ((g >> 24) & 255) << 16;
w3[1] = uint_to_hex_lower8_le ((g >> 0) & 255) << 0 | uint_to_hex_lower8_le ((g >> 8) & 255) << 16;
w3[2] = uint_to_hex_lower8_le ((h >> 16) & 255) << 0 | uint_to_hex_lower8_le ((h >> 24) & 255) << 16;
w3[3] = uint_to_hex_lower8_le ((h >> 0) & 255) << 0 | uint_to_hex_lower8_le ((h >> 8) & 255) << 16;
sha256_update_64 (&ctx, w0, w1, w2, w3, 64);
sha256_update (&ctx, s, salt_len);
sha256_final (&ctx);
const u32 r0 = ctx.h[DGST_R0];
const u32 r1 = ctx.h[DGST_R1];
const u32 r2 = ctx.h[DGST_R2];
const u32 r3 = ctx.h[DGST_R3];
COMPARE_S_SCALAR (r0, r1, r2, r3);
}
}

File diff suppressed because it is too large Load Diff

@ -0,0 +1,285 @@
/**
* Author......: See docs/credits.txt
* License.....: MIT
*/
#define NEW_SIMD_CODE
#ifdef KERNEL_STATIC
#include "inc_vendor.h"
#include "inc_types.h"
#include "inc_platform.cl"
#include "inc_common.cl"
#include "inc_simd.cl"
#include "inc_hash_sha256.cl"
#endif
#if VECT_SIZE == 1
#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i)])
#elif VECT_SIZE == 2
#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
#elif VECT_SIZE == 4
#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
#elif VECT_SIZE == 8
#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
#elif VECT_SIZE == 16
#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
#endif
KERNEL_FQ void m20710_mxx (KERN_ATTR_VECTOR ())
{
/**
* modifier
*/
const u64 gid = get_global_id (0);
const u64 lid = get_local_id (0);
const u64 lsz = get_local_size (0);
/**
* bin2asc table
*/
LOCAL_VK u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;
l_bin2asc[i] = ((i0 < 10) ? '0' + i0 : 'a' - 10 + i0) << 0
| ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 8;
}
SYNC_THREADS ();
if (gid >= gid_max) return;
/**
* base
*/
u32x _w0[4];
u32x _w1[4];
u32x _w2[4];
u32x _w3[4];
const u32 pw_len = pws[gid].pw_len;
u32x w[64] = { 0 };
for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1)
{
w[idx] = pws[gid].i[idx];
}
const u32 salt_len = salt_bufs[salt_pos].salt_len;
u32x s[64] = { 0 };
for (int i = 0, idx = 0; i < salt_len; i += 4, idx += 1)
{
s[idx] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[idx]);
}
/**
* loop
*/
u32x w0l = w[0];
for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE)
{
const u32x w0r = words_buf_r[il_pos / VECT_SIZE];
const u32x w0 = w0l | w0r;
w[0] = w0;
sha256_ctx_vector_t ctx0;
sha256_init_vector (&ctx0);
sha256_update_vector (&ctx0, w, pw_len);
sha256_final_vector (&ctx0);
const u32x a = ctx0.h[0];
const u32x b = ctx0.h[1];
const u32x c = ctx0.h[2];
const u32x d = ctx0.h[3];
const u32x e = ctx0.h[4];
const u32x f = ctx0.h[5];
const u32x g = ctx0.h[6];
const u32x h = ctx0.h[7];
sha256_ctx_vector_t ctx;
sha256_init_vector (&ctx);
_w0[0] = uint_to_hex_lower8_le ((a >> 16) & 255) << 0 | uint_to_hex_lower8_le ((a >> 24) & 255) << 16;
_w0[1] = uint_to_hex_lower8_le ((a >> 0) & 255) << 0 | uint_to_hex_lower8_le ((a >> 8) & 255) << 16;
_w0[2] = uint_to_hex_lower8_le ((b >> 16) & 255) << 0 | uint_to_hex_lower8_le ((b >> 24) & 255) << 16;
_w0[3] = uint_to_hex_lower8_le ((b >> 0) & 255) << 0 | uint_to_hex_lower8_le ((b >> 8) & 255) << 16;
_w1[0] = uint_to_hex_lower8_le ((c >> 16) & 255) << 0 | uint_to_hex_lower8_le ((c >> 24) & 255) << 16;
_w1[1] = uint_to_hex_lower8_le ((c >> 0) & 255) << 0 | uint_to_hex_lower8_le ((c >> 8) & 255) << 16;
_w1[2] = uint_to_hex_lower8_le ((d >> 16) & 255) << 0 | uint_to_hex_lower8_le ((d >> 24) & 255) << 16;
_w1[3] = uint_to_hex_lower8_le ((d >> 0) & 255) << 0 | uint_to_hex_lower8_le ((d >> 8) & 255) << 16;
_w2[0] = uint_to_hex_lower8_le ((e >> 16) & 255) << 0 | uint_to_hex_lower8_le ((e >> 24) & 255) << 16;
_w2[1] = uint_to_hex_lower8_le ((e >> 0) & 255) << 0 | uint_to_hex_lower8_le ((e >> 8) & 255) << 16;
_w2[2] = uint_to_hex_lower8_le ((f >> 16) & 255) << 0 | uint_to_hex_lower8_le ((f >> 24) & 255) << 16;
_w2[3] = uint_to_hex_lower8_le ((f >> 0) & 255) << 0 | uint_to_hex_lower8_le ((f >> 8) & 255) << 16;
_w3[0] = uint_to_hex_lower8_le ((g >> 16) & 255) << 0 | uint_to_hex_lower8_le ((g >> 24) & 255) << 16;
_w3[1] = uint_to_hex_lower8_le ((g >> 0) & 255) << 0 | uint_to_hex_lower8_le ((g >> 8) & 255) << 16;
_w3[2] = uint_to_hex_lower8_le ((h >> 16) & 255) << 0 | uint_to_hex_lower8_le ((h >> 24) & 255) << 16;
_w3[3] = uint_to_hex_lower8_le ((h >> 0) & 255) << 0 | uint_to_hex_lower8_le ((h >> 8) & 255) << 16;
sha256_update_vector_64 (&ctx, _w0, _w1, _w2, _w3, 64);
sha256_update_vector (&ctx, s, salt_len);
sha256_final_vector (&ctx);
const u32x r0 = ctx.h[DGST_R0];
const u32x r1 = ctx.h[DGST_R1];
const u32x r2 = ctx.h[DGST_R2];
const u32x r3 = ctx.h[DGST_R3];
COMPARE_M_SIMD (r0, r1, r2, r3);
}
}
KERNEL_FQ void m20710_sxx (KERN_ATTR_VECTOR ())
{
/**
* modifier
*/
const u64 gid = get_global_id (0);
const u64 lid = get_local_id (0);
const u64 lsz = get_local_size (0);
/**
* bin2asc table
*/
LOCAL_VK u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;
l_bin2asc[i] = ((i0 < 10) ? '0' + i0 : 'a' - 10 + i0) << 0
| ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 8;
}
SYNC_THREADS ();
if (gid >= gid_max) return;
/**
* digest
*/
const u32 search[4] =
{
digests_buf[digests_offset].digest_buf[DGST_R0],
digests_buf[digests_offset].digest_buf[DGST_R1],
digests_buf[digests_offset].digest_buf[DGST_R2],
digests_buf[digests_offset].digest_buf[DGST_R3]
};
/**
* base
*/
u32x _w0[4];
u32x _w1[4];
u32x _w2[4];
u32x _w3[4];
const u32 pw_len = pws[gid].pw_len;
u32x w[64] = { 0 };
for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1)
{
w[idx] = pws[gid].i[idx];
}
const u32 salt_len = salt_bufs[salt_pos].salt_len;
u32x s[64] = { 0 };
for (int i = 0, idx = 0; i < salt_len; i += 4, idx += 1)
{
s[idx] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[idx]);
}
/**
* loop
*/
u32x w0l = w[0];
for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE)
{
const u32x w0r = words_buf_r[il_pos / VECT_SIZE];
const u32x w0 = w0l | w0r;
w[0] = w0;
sha256_ctx_vector_t ctx0;
sha256_init_vector (&ctx0);
sha256_update_vector (&ctx0, w, pw_len);
sha256_final_vector (&ctx0);
const u32x a = ctx0.h[0];
const u32x b = ctx0.h[1];
const u32x c = ctx0.h[2];
const u32x d = ctx0.h[3];
const u32x e = ctx0.h[4];
const u32x f = ctx0.h[5];
const u32x g = ctx0.h[6];
const u32x h = ctx0.h[7];
sha256_ctx_vector_t ctx;
sha256_init_vector (&ctx);
_w0[0] = uint_to_hex_lower8_le ((a >> 16) & 255) << 0 | uint_to_hex_lower8_le ((a >> 24) & 255) << 16;
_w0[1] = uint_to_hex_lower8_le ((a >> 0) & 255) << 0 | uint_to_hex_lower8_le ((a >> 8) & 255) << 16;
_w0[2] = uint_to_hex_lower8_le ((b >> 16) & 255) << 0 | uint_to_hex_lower8_le ((b >> 24) & 255) << 16;
_w0[3] = uint_to_hex_lower8_le ((b >> 0) & 255) << 0 | uint_to_hex_lower8_le ((b >> 8) & 255) << 16;
_w1[0] = uint_to_hex_lower8_le ((c >> 16) & 255) << 0 | uint_to_hex_lower8_le ((c >> 24) & 255) << 16;
_w1[1] = uint_to_hex_lower8_le ((c >> 0) & 255) << 0 | uint_to_hex_lower8_le ((c >> 8) & 255) << 16;
_w1[2] = uint_to_hex_lower8_le ((d >> 16) & 255) << 0 | uint_to_hex_lower8_le ((d >> 24) & 255) << 16;
_w1[3] = uint_to_hex_lower8_le ((d >> 0) & 255) << 0 | uint_to_hex_lower8_le ((d >> 8) & 255) << 16;
_w2[0] = uint_to_hex_lower8_le ((e >> 16) & 255) << 0 | uint_to_hex_lower8_le ((e >> 24) & 255) << 16;
_w2[1] = uint_to_hex_lower8_le ((e >> 0) & 255) << 0 | uint_to_hex_lower8_le ((e >> 8) & 255) << 16;
_w2[2] = uint_to_hex_lower8_le ((f >> 16) & 255) << 0 | uint_to_hex_lower8_le ((f >> 24) & 255) << 16;
_w2[3] = uint_to_hex_lower8_le ((f >> 0) & 255) << 0 | uint_to_hex_lower8_le ((f >> 8) & 255) << 16;
_w3[0] = uint_to_hex_lower8_le ((g >> 16) & 255) << 0 | uint_to_hex_lower8_le ((g >> 24) & 255) << 16;
_w3[1] = uint_to_hex_lower8_le ((g >> 0) & 255) << 0 | uint_to_hex_lower8_le ((g >> 8) & 255) << 16;
_w3[2] = uint_to_hex_lower8_le ((h >> 16) & 255) << 0 | uint_to_hex_lower8_le ((h >> 24) & 255) << 16;
_w3[3] = uint_to_hex_lower8_le ((h >> 0) & 255) << 0 | uint_to_hex_lower8_le ((h >> 8) & 255) << 16;
sha256_update_vector_64 (&ctx, _w0, _w1, _w2, _w3, 64);
sha256_update_vector (&ctx, s, salt_len);
sha256_final_vector (&ctx);
const u32x r0 = ctx.h[DGST_R0];
const u32x r1 = ctx.h[DGST_R1];
const u32x r2 = ctx.h[DGST_R2];
const u32x r3 = ctx.h[DGST_R3];
COMPARE_S_SIMD (r0, r1, r2, r3);
}
}

@ -0,0 +1,619 @@
/**
* Author......: See docs/credits.txt
* License.....: MIT
*/
#define NEW_SIMD_CODE
#ifdef KERNEL_STATIC
#include "inc_vendor.h"
#include "inc_types.h"
#include "inc_platform.cl"
#include "inc_common.cl"
#include "inc_rp_optimized.h"
#include "inc_rp_optimized.cl"
#include "inc_simd.cl"
#include "inc_hash_md5.cl"
#include "inc_hash_sha256.cl"
#endif
#if VECT_SIZE == 1
#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i)])
#elif VECT_SIZE == 2
#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
#elif VECT_SIZE == 4
#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
#elif VECT_SIZE == 8
#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
#elif VECT_SIZE == 16
#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
#endif
#define SHA256_STEP_REV(a,b,c,d,e,f,g,h) \
{ \
u32 t2 = SHA256_S2_S(b) + SHA256_F0o(b,c,d); \
u32 t1 = a - t2; \
a = b; \
b = c; \
c = d; \
d = e - t1; \
e = f; \
f = g; \
g = h; \
h = 0; \
}
KERNEL_FQ void m20800_m04 (KERN_ATTR_RULES ())
{
/**
* modifier
*/
const u64 gid = get_global_id (0);
const u64 lid = get_local_id (0);
const u64 lsz = get_local_size (0);
/**
* bin2asc table
*/
LOCAL_VK u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;
l_bin2asc[i] = ((i0 < 10) ? '0' + i0 : 'a' - 10 + i0) << 0
| ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 8;
}
SYNC_THREADS ();
if (gid >= gid_max) return;
/**
* base
*/
u32 pw_buf0[4];
u32 pw_buf1[4];
pw_buf0[0] = pws[gid].i[0];
pw_buf0[1] = pws[gid].i[1];
pw_buf0[2] = pws[gid].i[2];
pw_buf0[3] = pws[gid].i[3];
pw_buf1[0] = pws[gid].i[4];
pw_buf1[1] = pws[gid].i[5];
pw_buf1[2] = pws[gid].i[6];
pw_buf1[3] = pws[gid].i[7];
const u32 pw_len = pws[gid].pw_len & 63;
/**
* loop
*/
for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE)
{
u32x w0[4] = { 0 };
u32x w1[4] = { 0 };
u32x w2[4] = { 0 };
u32x w3[4] = { 0 };
const u32x out_len = apply_rules_vect_optimized (pw_buf0, pw_buf1, pw_len, rules_buf, il_pos, w0, w1);
append_0x80_2x4_VV (w0, w1, out_len);
w3[2] = out_len * 8;
w3[3] = 0;
/**
* md5
*/
u32x a = MD5M_A;
u32x b = MD5M_B;
u32x c = MD5M_C;
u32x d = MD5M_D;
u32x e = 0;
u32x f = 0;
u32x g = 0;
u32x h = 0;
MD5_STEP (MD5_Fo, a, b, c, d, w0[0], MD5C00, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w0[1], MD5C01, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, w0[2], MD5C02, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, w0[3], MD5C03, MD5S03);
MD5_STEP (MD5_Fo, a, b, c, d, w1[0], MD5C04, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w1[1], MD5C05, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, w1[2], MD5C06, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, w1[3], MD5C07, MD5S03);
MD5_STEP (MD5_Fo, a, b, c, d, w2[0], MD5C08, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w2[1], MD5C09, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, w2[2], MD5C0a, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, w2[3], MD5C0b, MD5S03);
MD5_STEP (MD5_Fo, a, b, c, d, w3[0], MD5C0c, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w3[1], MD5C0d, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, w3[2], MD5C0e, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, w3[3], MD5C0f, MD5S03);
MD5_STEP (MD5_Go, a, b, c, d, w0[1], MD5C10, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, w1[2], MD5C11, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, w2[3], MD5C12, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w0[0], MD5C13, MD5S13);
MD5_STEP (MD5_Go, a, b, c, d, w1[1], MD5C14, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, w2[2], MD5C15, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, w3[3], MD5C16, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w1[0], MD5C17, MD5S13);
MD5_STEP (MD5_Go, a, b, c, d, w2[1], MD5C18, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, w3[2], MD5C19, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, w0[3], MD5C1a, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w2[0], MD5C1b, MD5S13);
MD5_STEP (MD5_Go, a, b, c, d, w3[1], MD5C1c, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, w0[2], MD5C1d, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, w1[3], MD5C1e, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w3[0], MD5C1f, MD5S13);
u32x t;
MD5_STEP (MD5_H1, a, b, c, d, w1[1], MD5C20, MD5S20);
MD5_STEP (MD5_H2, d, a, b, c, w2[0], MD5C21, MD5S21);
MD5_STEP (MD5_H1, c, d, a, b, w2[3], MD5C22, MD5S22);
MD5_STEP (MD5_H2, b, c, d, a, w3[2], MD5C23, MD5S23);
MD5_STEP (MD5_H1, a, b, c, d, w0[1], MD5C24, MD5S20);
MD5_STEP (MD5_H2, d, a, b, c, w1[0], MD5C25, MD5S21);
MD5_STEP (MD5_H1, c, d, a, b, w1[3], MD5C26, MD5S22);
MD5_STEP (MD5_H2, b, c, d, a, w2[2], MD5C27, MD5S23);
MD5_STEP (MD5_H1, a, b, c, d, w3[1], MD5C28, MD5S20);
MD5_STEP (MD5_H2, d, a, b, c, w0[0], MD5C29, MD5S21);
MD5_STEP (MD5_H1, c, d, a, b, w0[3], MD5C2a, MD5S22);
MD5_STEP (MD5_H2, b, c, d, a, w1[2], MD5C2b, MD5S23);
MD5_STEP (MD5_H1, a, b, c, d, w2[1], MD5C2c, MD5S20);
MD5_STEP (MD5_H2, d, a, b, c, w3[0], MD5C2d, MD5S21);
MD5_STEP (MD5_H1, c, d, a, b, w3[3], MD5C2e, MD5S22);
MD5_STEP (MD5_H2, b, c, d, a, w0[2], MD5C2f, MD5S23);
MD5_STEP (MD5_I , a, b, c, d, w0[0], MD5C30, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, w1[3], MD5C31, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, w3[2], MD5C32, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w1[1], MD5C33, MD5S33);
MD5_STEP (MD5_I , a, b, c, d, w3[0], MD5C34, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, w0[3], MD5C35, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, w2[2], MD5C36, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w0[1], MD5C37, MD5S33);
MD5_STEP (MD5_I , a, b, c, d, w2[0], MD5C38, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, w3[3], MD5C39, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, w1[2], MD5C3a, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w3[1], MD5C3b, MD5S33);
MD5_STEP (MD5_I , a, b, c, d, w1[0], MD5C3c, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, w2[3], MD5C3d, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, w0[2], MD5C3e, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w2[1], MD5C3f, MD5S33);
a += MD5M_A;
b += MD5M_B;
c += MD5M_C;
d += MD5M_D;
/*
* sha256
*/
u32x w0_t = uint_to_hex_lower8_le ((a >> 8) & 255) << 0
| uint_to_hex_lower8_le ((a >> 0) & 255) << 16;
u32x w1_t = uint_to_hex_lower8_le ((a >> 24) & 255) << 0
| uint_to_hex_lower8_le ((a >> 16) & 255) << 16;
u32x w2_t = uint_to_hex_lower8_le ((b >> 8) & 255) << 0
| uint_to_hex_lower8_le ((b >> 0) & 255) << 16;
u32x w3_t = uint_to_hex_lower8_le ((b >> 24) & 255) << 0
| uint_to_hex_lower8_le ((b >> 16) & 255) << 16;
u32x w4_t = uint_to_hex_lower8_le ((c >> 8) & 255) << 0
| uint_to_hex_lower8_le ((c >> 0) & 255) << 16;
u32x w5_t = uint_to_hex_lower8_le ((c >> 24) & 255) << 0
| uint_to_hex_lower8_le ((c >> 16) & 255) << 16;
u32x w6_t = uint_to_hex_lower8_le ((d >> 8) & 255) << 0
| uint_to_hex_lower8_le ((d >> 0) & 255) << 16;
u32x w7_t = uint_to_hex_lower8_le ((d >> 24) & 255) << 0
| uint_to_hex_lower8_le ((d >> 16) & 255) << 16;
u32x w8_t = 0x80000000;
u32x w9_t = 0;
u32x wa_t = 0;
u32x wb_t = 0;
u32x wc_t = 0;
u32x wd_t = 0;
u32x we_t = 0;
u32x wf_t = 32 * 8;
a = SHA256M_A;
b = SHA256M_B;
c = SHA256M_C;
d = SHA256M_D;
e = SHA256M_E;
f = SHA256M_F;
g = SHA256M_G;
h = SHA256M_H;
SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C00);
SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C01);
SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C02);
SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C03);
SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C04);
SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C05);
SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C06);
SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C07);
SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C08);
SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C09);
SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C0a);
SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C0b);
SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C0c);
SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C0d);
SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C0e);
SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C0f);
w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C10);
w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C11);
w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C12);
w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C13);
w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C14);
w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C15);
w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C16);
w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C17);
w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C18);
w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C19);
wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C1a);
wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C1b);
wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C1c);
wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C1d);
we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C1e);
wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C1f);
w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C20);
w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C21);
w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C22);
w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C23);
w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C24);
w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C25);
w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C26);
w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C27);
w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C28);
w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C29);
wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C2a);
wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C2b);
wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C2c);
wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C2d);
we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C2e);
wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C2f);
w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C30);
w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C31);
w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C32);
w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C33);
w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C34);
w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C35);
w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C36);
w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C37);
w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C38);
w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C39);
wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C3a);
wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C3b);
wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C3c);
wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C3d);
we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C3e);
wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C3f);
COMPARE_M_SIMD (d, h, c, g);
}
}
KERNEL_FQ void m20800_m08 (KERN_ATTR_RULES ())
{
}
KERNEL_FQ void m20800_m16 (KERN_ATTR_RULES ())
{
}
KERNEL_FQ void m20800_s04 (KERN_ATTR_RULES ())
{
/**
* modifier
*/
const u64 gid = get_global_id (0);
const u64 lid = get_local_id (0);
const u64 lsz = get_local_size (0);
/**
* bin2asc table
*/
LOCAL_VK u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;
l_bin2asc[i] = ((i0 < 10) ? '0' + i0 : 'a' - 10 + i0) << 0
| ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 8;
}
SYNC_THREADS ();
if (gid >= gid_max) return;
/**
* base
*/
u32 pw_buf0[4];
u32 pw_buf1[4];
pw_buf0[0] = pws[gid].i[0];
pw_buf0[1] = pws[gid].i[1];
pw_buf0[2] = pws[gid].i[2];
pw_buf0[3] = pws[gid].i[3];
pw_buf1[0] = pws[gid].i[4];
pw_buf1[1] = pws[gid].i[5];
pw_buf1[2] = pws[gid].i[6];
pw_buf1[3] = pws[gid].i[7];
const u32 pw_len = pws[gid].pw_len & 63;
/**
* digest
*/
const u32 search[4] =
{
digests_buf[digests_offset].digest_buf[DGST_R0],
digests_buf[digests_offset].digest_buf[DGST_R1],
digests_buf[digests_offset].digest_buf[DGST_R2],
digests_buf[digests_offset].digest_buf[DGST_R3]
};
/**
* reverse
*/
u32 a_rev = digests_buf[digests_offset].digest_buf[0];
u32 b_rev = digests_buf[digests_offset].digest_buf[1];
u32 c_rev = digests_buf[digests_offset].digest_buf[2];
u32 d_rev = digests_buf[digests_offset].digest_buf[3];
u32 e_rev = digests_buf[digests_offset].digest_buf[4];
u32 f_rev = digests_buf[digests_offset].digest_buf[5];
u32 g_rev = digests_buf[digests_offset].digest_buf[6];
u32 h_rev = digests_buf[digests_offset].digest_buf[7];
SHA256_STEP_REV (a_rev, b_rev, c_rev, d_rev, e_rev, f_rev, g_rev, h_rev);
SHA256_STEP_REV (a_rev, b_rev, c_rev, d_rev, e_rev, f_rev, g_rev, h_rev);
SHA256_STEP_REV (a_rev, b_rev, c_rev, d_rev, e_rev, f_rev, g_rev, h_rev);
SHA256_STEP_REV (a_rev, b_rev, c_rev, d_rev, e_rev, f_rev, g_rev, h_rev);
/**
* loop
*/
for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE)
{
u32x w0[4] = { 0 };
u32x w1[4] = { 0 };
u32x w2[4] = { 0 };
u32x w3[4] = { 0 };
const u32x out_len = apply_rules_vect_optimized (pw_buf0, pw_buf1, pw_len, rules_buf, il_pos, w0, w1);
append_0x80_2x4_VV (w0, w1, out_len);
w3[2] = out_len * 8;
w3[3] = 0;
/**
* md5
*/
u32x a = MD5M_A;
u32x b = MD5M_B;
u32x c = MD5M_C;
u32x d = MD5M_D;
u32x e = 0;
u32x f = 0;
u32x g = 0;
u32x h = 0;
MD5_STEP (MD5_Fo, a, b, c, d, w0[0], MD5C00, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w0[1], MD5C01, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, w0[2], MD5C02, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, w0[3], MD5C03, MD5S03);
MD5_STEP (MD5_Fo, a, b, c, d, w1[0], MD5C04, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w1[1], MD5C05, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, w1[2], MD5C06, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, w1[3], MD5C07, MD5S03);
MD5_STEP (MD5_Fo, a, b, c, d, w2[0], MD5C08, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w2[1], MD5C09, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, w2[2], MD5C0a, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, w2[3], MD5C0b, MD5S03);
MD5_STEP (MD5_Fo, a, b, c, d, w3[0], MD5C0c, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w3[1], MD5C0d, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, w3[2], MD5C0e, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, w3[3], MD5C0f, MD5S03);
MD5_STEP (MD5_Go, a, b, c, d, w0[1], MD5C10, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, w1[2], MD5C11, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, w2[3], MD5C12, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w0[0], MD5C13, MD5S13);
MD5_STEP (MD5_Go, a, b, c, d, w1[1], MD5C14, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, w2[2], MD5C15, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, w3[3], MD5C16, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w1[0], MD5C17, MD5S13);
MD5_STEP (MD5_Go, a, b, c, d, w2[1], MD5C18, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, w3[2], MD5C19, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, w0[3], MD5C1a, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w2[0], MD5C1b, MD5S13);
MD5_STEP (MD5_Go, a, b, c, d, w3[1], MD5C1c, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, w0[2], MD5C1d, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, w1[3], MD5C1e, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w3[0], MD5C1f, MD5S13);
u32x t;
MD5_STEP (MD5_H1, a, b, c, d, w1[1], MD5C20, MD5S20);
MD5_STEP (MD5_H2, d, a, b, c, w2[0], MD5C21, MD5S21);
MD5_STEP (MD5_H1, c, d, a, b, w2[3], MD5C22, MD5S22);
MD5_STEP (MD5_H2, b, c, d, a, w3[2], MD5C23, MD5S23);
MD5_STEP (MD5_H1, a, b, c, d, w0[1], MD5C24, MD5S20);
MD5_STEP (MD5_H2, d, a, b, c, w1[0], MD5C25, MD5S21);
MD5_STEP (MD5_H1, c, d, a, b, w1[3], MD5C26, MD5S22);
MD5_STEP (MD5_H2, b, c, d, a, w2[2], MD5C27, MD5S23);
MD5_STEP (MD5_H1, a, b, c, d, w3[1], MD5C28, MD5S20);
MD5_STEP (MD5_H2, d, a, b, c, w0[0], MD5C29, MD5S21);
MD5_STEP (MD5_H1, c, d, a, b, w0[3], MD5C2a, MD5S22);
MD5_STEP (MD5_H2, b, c, d, a, w1[2], MD5C2b, MD5S23);
MD5_STEP (MD5_H1, a, b, c, d, w2[1], MD5C2c, MD5S20);
MD5_STEP (MD5_H2, d, a, b, c, w3[0], MD5C2d, MD5S21);
MD5_STEP (MD5_H1, c, d, a, b, w3[3], MD5C2e, MD5S22);
MD5_STEP (MD5_H2, b, c, d, a, w0[2], MD5C2f, MD5S23);
MD5_STEP (MD5_I , a, b, c, d, w0[0], MD5C30, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, w1[3], MD5C31, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, w3[2], MD5C32, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w1[1], MD5C33, MD5S33);
MD5_STEP (MD5_I , a, b, c, d, w3[0], MD5C34, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, w0[3], MD5C35, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, w2[2], MD5C36, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w0[1], MD5C37, MD5S33);
MD5_STEP (MD5_I , a, b, c, d, w2[0], MD5C38, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, w3[3], MD5C39, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, w1[2], MD5C3a, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w3[1], MD5C3b, MD5S33);
MD5_STEP (MD5_I , a, b, c, d, w1[0], MD5C3c, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, w2[3], MD5C3d, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, w0[2], MD5C3e, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w2[1], MD5C3f, MD5S33);
a += MD5M_A;
b += MD5M_B;
c += MD5M_C;
d += MD5M_D;
/*
* sha256
*/
u32x w0_t = uint_to_hex_lower8_le ((a >> 8) & 255) << 0
| uint_to_hex_lower8_le ((a >> 0) & 255) << 16;
u32x w1_t = uint_to_hex_lower8_le ((a >> 24) & 255) << 0
| uint_to_hex_lower8_le ((a >> 16) & 255) << 16;
u32x w2_t = uint_to_hex_lower8_le ((b >> 8) & 255) << 0
| uint_to_hex_lower8_le ((b >> 0) & 255) << 16;
u32x w3_t = uint_to_hex_lower8_le ((b >> 24) & 255) << 0
| uint_to_hex_lower8_le ((b >> 16) & 255) << 16;
u32x w4_t = uint_to_hex_lower8_le ((c >> 8) & 255) << 0
| uint_to_hex_lower8_le ((c >> 0) & 255) << 16;
u32x w5_t = uint_to_hex_lower8_le ((c >> 24) & 255) << 0
| uint_to_hex_lower8_le ((c >> 16) & 255) << 16;
u32x w6_t = uint_to_hex_lower8_le ((d >> 8) & 255) << 0
| uint_to_hex_lower8_le ((d >> 0) & 255) << 16;
u32x w7_t = uint_to_hex_lower8_le ((d >> 24) & 255) << 0
| uint_to_hex_lower8_le ((d >> 16) & 255) << 16;
u32x w8_t = 0x80000000;
u32x w9_t = 0;
u32x wa_t = 0;
u32x wb_t = 0;
u32x wc_t = 0;
u32x wd_t = 0;
u32x we_t = 0;
u32x wf_t = 32 * 8;
a = SHA256M_A;
b = SHA256M_B;
c = SHA256M_C;
d = SHA256M_D;
e = SHA256M_E;
f = SHA256M_F;
g = SHA256M_G;
h = SHA256M_H;
SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C00);
SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C01);
SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C02);
SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C03);
SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C04);
SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C05);
SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C06);
SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C07);
SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C08);
SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C09);
SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C0a);
SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C0b);
SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C0c);
SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C0d);
SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C0e);
SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C0f);
w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C10);
w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C11);
w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C12);
w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C13);
w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C14);
w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C15);
w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C16);
w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C17);
w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C18);
w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C19);
wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C1a);
wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C1b);
wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C1c);
wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C1d);
we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C1e);
wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C1f);
w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C20);
w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C21);
w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C22);
w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C23);
w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C24);
w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C25);
w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C26);
w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C27);
w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C28);
w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C29);
wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C2a);
wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C2b);
wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C2c);
wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C2d);
we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C2e);
wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C2f);
w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C30);
w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C31);
w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C32);
w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C33);
w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C34);
w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C35);
w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C36);
w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C37);
w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C38);
if (MATCHES_NONE_VS (h, d_rev)) continue;
w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C39);
wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C3a);
wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C3b);
wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C3c);
wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C3d);
we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C3e);
wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C3f);
COMPARE_S_SIMD (d, h, c, g);
}
}
KERNEL_FQ void m20800_s08 (KERN_ATTR_RULES ())
{
}
KERNEL_FQ void m20800_s16 (KERN_ATTR_RULES ())
{
}

@ -0,0 +1,244 @@
/**
* Author......: See docs/credits.txt
* License.....: MIT
*/
//#define NEW_SIMD_CODE
#ifdef KERNEL_STATIC
#include "inc_vendor.h"
#include "inc_types.h"
#include "inc_platform.cl"
#include "inc_common.cl"
#include "inc_rp.h"
#include "inc_rp.cl"
#include "inc_scalar.cl"
#include "inc_hash_md5.cl"
#include "inc_hash_sha256.cl"
#endif
#if VECT_SIZE == 1
#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i)])
#elif VECT_SIZE == 2
#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
#elif VECT_SIZE == 4
#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
#elif VECT_SIZE == 8
#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
#elif VECT_SIZE == 16
#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
#endif
KERNEL_FQ void m20800_mxx (KERN_ATTR_RULES ())
{
/**
* modifier
*/
const u64 gid = get_global_id (0);
const u64 lid = get_local_id (0);
const u64 lsz = get_local_size (0);
/**
* bin2asc table
*/
LOCAL_VK u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;
l_bin2asc[i] = ((i0 < 10) ? '0' + i0 : 'a' - 10 + i0) << 0
| ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 8;
}
SYNC_THREADS ();
if (gid >= gid_max) return;
/**
* base
*/
COPY_PW (pws[gid]);
/**
* loop
*/
for (u32 il_pos = 0; il_pos < il_cnt; il_pos++)
{
pw_t tmp = PASTE_PW;
tmp.pw_len = apply_rules (rules_buf[il_pos].cmds, tmp.i, tmp.pw_len);
md5_ctx_t ctx0;
md5_init (&ctx0);
md5_update (&ctx0, tmp.i, tmp.pw_len);
md5_final (&ctx0);
const u32 a = ctx0.h[0];
const u32 b = ctx0.h[1];
const u32 c = ctx0.h[2];
const u32 d = ctx0.h[3];
sha256_ctx_t ctx;
sha256_init (&ctx);
ctx.w0[0] = uint_to_hex_lower8_le ((a >> 8) & 255) << 0
| uint_to_hex_lower8_le ((a >> 0) & 255) << 16;
ctx.w0[1] = uint_to_hex_lower8_le ((a >> 24) & 255) << 0
| uint_to_hex_lower8_le ((a >> 16) & 255) << 16;
ctx.w0[2] = uint_to_hex_lower8_le ((b >> 8) & 255) << 0
| uint_to_hex_lower8_le ((b >> 0) & 255) << 16;
ctx.w0[3] = uint_to_hex_lower8_le ((b >> 24) & 255) << 0
| uint_to_hex_lower8_le ((b >> 16) & 255) << 16;
ctx.w1[0] = uint_to_hex_lower8_le ((c >> 8) & 255) << 0
| uint_to_hex_lower8_le ((c >> 0) & 255) << 16;
ctx.w1[1] = uint_to_hex_lower8_le ((c >> 24) & 255) << 0
| uint_to_hex_lower8_le ((c >> 16) & 255) << 16;
ctx.w1[2] = uint_to_hex_lower8_le ((d >> 8) & 255) << 0
| uint_to_hex_lower8_le ((d >> 0) & 255) << 16;
ctx.w1[3] = uint_to_hex_lower8_le ((d >> 24) & 255) << 0
| uint_to_hex_lower8_le ((d >> 16) & 255) << 16;
ctx.w2[0] = 0;
ctx.w2[1] = 0;
ctx.w2[2] = 0;
ctx.w2[3] = 0;
ctx.w3[0] = 0;
ctx.w3[1] = 0;
ctx.w3[2] = 0;
ctx.w3[3] = 0;
ctx.len = 32;
sha256_final (&ctx);
const u32 r0 = ctx.h[DGST_R0];
const u32 r1 = ctx.h[DGST_R1];
const u32 r2 = ctx.h[DGST_R2];
const u32 r3 = ctx.h[DGST_R3];
COMPARE_M_SCALAR (r0, r1, r2, r3);
}
}
KERNEL_FQ void m20800_sxx (KERN_ATTR_RULES ())
{
/**
* modifier
*/
const u64 gid = get_global_id (0);
const u64 lid = get_local_id (0);
const u64 lsz = get_local_size (0);
/**
* bin2asc table
*/
LOCAL_VK u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;
l_bin2asc[i] = ((i0 < 10) ? '0' + i0 : 'a' - 10 + i0) << 0
| ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 8;
}
SYNC_THREADS ();
if (gid >= gid_max) return;
/**
* digest
*/
const u32 search[4] =
{
digests_buf[digests_offset].digest_buf[DGST_R0],
digests_buf[digests_offset].digest_buf[DGST_R1],
digests_buf[digests_offset].digest_buf[DGST_R2],
digests_buf[digests_offset].digest_buf[DGST_R3]
};
/**
* base
*/
COPY_PW (pws[gid]);
/**
* loop
*/
for (u32 il_pos = 0; il_pos < il_cnt; il_pos++)
{
pw_t tmp = PASTE_PW;
tmp.pw_len = apply_rules (rules_buf[il_pos].cmds, tmp.i, tmp.pw_len);
md5_ctx_t ctx0;
md5_init (&ctx0);
md5_update (&ctx0, tmp.i, tmp.pw_len);
md5_final (&ctx0);
const u32 a = ctx0.h[0];
const u32 b = ctx0.h[1];
const u32 c = ctx0.h[2];
const u32 d = ctx0.h[3];
sha256_ctx_t ctx;
sha256_init (&ctx);
ctx.w0[0] = uint_to_hex_lower8_le ((a >> 8) & 255) << 0
| uint_to_hex_lower8_le ((a >> 0) & 255) << 16;
ctx.w0[1] = uint_to_hex_lower8_le ((a >> 24) & 255) << 0
| uint_to_hex_lower8_le ((a >> 16) & 255) << 16;
ctx.w0[2] = uint_to_hex_lower8_le ((b >> 8) & 255) << 0
| uint_to_hex_lower8_le ((b >> 0) & 255) << 16;
ctx.w0[3] = uint_to_hex_lower8_le ((b >> 24) & 255) << 0
| uint_to_hex_lower8_le ((b >> 16) & 255) << 16;
ctx.w1[0] = uint_to_hex_lower8_le ((c >> 8) & 255) << 0
| uint_to_hex_lower8_le ((c >> 0) & 255) << 16;
ctx.w1[1] = uint_to_hex_lower8_le ((c >> 24) & 255) << 0
| uint_to_hex_lower8_le ((c >> 16) & 255) << 16;
ctx.w1[2] = uint_to_hex_lower8_le ((d >> 8) & 255) << 0
| uint_to_hex_lower8_le ((d >> 0) & 255) << 16;
ctx.w1[3] = uint_to_hex_lower8_le ((d >> 24) & 255) << 0
| uint_to_hex_lower8_le ((d >> 16) & 255) << 16;
ctx.w2[0] = 0;
ctx.w2[1] = 0;
ctx.w2[2] = 0;
ctx.w2[3] = 0;
ctx.w3[0] = 0;
ctx.w3[1] = 0;
ctx.w3[2] = 0;
ctx.w3[3] = 0;
ctx.len = 32;
sha256_final (&ctx);
const u32 r0 = ctx.h[DGST_R0];
const u32 r1 = ctx.h[DGST_R1];
const u32 r2 = ctx.h[DGST_R2];
const u32 r3 = ctx.h[DGST_R3];
COMPARE_S_SCALAR (r0, r1, r2, r3);
}
}

@ -0,0 +1,727 @@
/**
* Author......: See docs/credits.txt
* License.....: MIT
*/
#define NEW_SIMD_CODE
#ifdef KERNEL_STATIC
#include "inc_vendor.h"
#include "inc_types.h"
#include "inc_platform.cl"
#include "inc_common.cl"
#include "inc_simd.cl"
#include "inc_hash_md5.cl"
#include "inc_hash_sha256.cl"
#endif
#if VECT_SIZE == 1
#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i)])
#elif VECT_SIZE == 2
#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
#elif VECT_SIZE == 4
#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
#elif VECT_SIZE == 8
#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
#elif VECT_SIZE == 16
#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
#endif
#define SHA256_STEP_REV(a,b,c,d,e,f,g,h) \
{ \
u32 t2 = SHA256_S2_S(b) + SHA256_F0o(b,c,d); \
u32 t1 = a - t2; \
a = b; \
b = c; \
c = d; \
d = e - t1; \
e = f; \
f = g; \
g = h; \
h = 0; \
}
KERNEL_FQ void m20800_m04 (KERN_ATTR_BASIC ())
{
/**
* modifier
*/
const u64 gid = get_global_id (0);
const u64 lid = get_local_id (0);
const u64 lsz = get_local_size (0);
/**
* bin2asc table
*/
LOCAL_VK u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;
l_bin2asc[i] = ((i0 < 10) ? '0' + i0 : 'a' - 10 + i0) << 0
| ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 8;
}
SYNC_THREADS ();
if (gid >= gid_max) return;
/**
* base
*/
u32 pw_buf0[4];
u32 pw_buf1[4];
pw_buf0[0] = pws[gid].i[0];
pw_buf0[1] = pws[gid].i[1];
pw_buf0[2] = pws[gid].i[2];
pw_buf0[3] = pws[gid].i[3];
pw_buf1[0] = pws[gid].i[4];
pw_buf1[1] = pws[gid].i[5];
pw_buf1[2] = pws[gid].i[6];
pw_buf1[3] = pws[gid].i[7];
const u32 pw_l_len = pws[gid].pw_len & 63;
/**
* loop
*/
for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE)
{
const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos) & 63;
const u32x pw_len = (pw_l_len + pw_r_len) & 63;
/**
* concat password candidate
*/
u32x wordl0[4] = { 0 };
u32x wordl1[4] = { 0 };
u32x wordl2[4] = { 0 };
u32x wordl3[4] = { 0 };
wordl0[0] = pw_buf0[0];
wordl0[1] = pw_buf0[1];
wordl0[2] = pw_buf0[2];
wordl0[3] = pw_buf0[3];
wordl1[0] = pw_buf1[0];
wordl1[1] = pw_buf1[1];
wordl1[2] = pw_buf1[2];
wordl1[3] = pw_buf1[3];
u32x wordr0[4] = { 0 };
u32x wordr1[4] = { 0 };
u32x wordr2[4] = { 0 };
u32x wordr3[4] = { 0 };
wordr0[0] = ix_create_combt (combs_buf, il_pos, 0);
wordr0[1] = ix_create_combt (combs_buf, il_pos, 1);
wordr0[2] = ix_create_combt (combs_buf, il_pos, 2);
wordr0[3] = ix_create_combt (combs_buf, il_pos, 3);
wordr1[0] = ix_create_combt (combs_buf, il_pos, 4);
wordr1[1] = ix_create_combt (combs_buf, il_pos, 5);
wordr1[2] = ix_create_combt (combs_buf, il_pos, 6);
wordr1[3] = ix_create_combt (combs_buf, il_pos, 7);
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
{
switch_buffer_by_offset_le_VV (wordr0, wordr1, wordr2, wordr3, pw_l_len);
}
else
{
switch_buffer_by_offset_le_VV (wordl0, wordl1, wordl2, wordl3, pw_r_len);
}
u32x w0[4];
u32x w1[4];
u32x w2[4];
u32x w3[4];
w0[0] = wordl0[0] | wordr0[0];
w0[1] = wordl0[1] | wordr0[1];
w0[2] = wordl0[2] | wordr0[2];
w0[3] = wordl0[3] | wordr0[3];
w1[0] = wordl1[0] | wordr1[0];
w1[1] = wordl1[1] | wordr1[1];
w1[2] = wordl1[2] | wordr1[2];
w1[3] = wordl1[3] | wordr1[3];
w2[0] = wordl2[0] | wordr2[0];
w2[1] = wordl2[1] | wordr2[1];
w2[2] = wordl2[2] | wordr2[2];
w2[3] = wordl2[3] | wordr2[3];
w3[0] = wordl3[0] | wordr3[0];
w3[1] = wordl3[1] | wordr3[1];
w3[2] = pw_len * 8;
w3[3] = 0;
/**
* md5
*/
u32x a = MD5M_A;
u32x b = MD5M_B;
u32x c = MD5M_C;
u32x d = MD5M_D;
u32x e = 0;
u32x f = 0;
u32x g = 0;
u32x h = 0;
MD5_STEP (MD5_Fo, a, b, c, d, w0[0], MD5C00, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w0[1], MD5C01, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, w0[2], MD5C02, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, w0[3], MD5C03, MD5S03);
MD5_STEP (MD5_Fo, a, b, c, d, w1[0], MD5C04, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w1[1], MD5C05, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, w1[2], MD5C06, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, w1[3], MD5C07, MD5S03);
MD5_STEP (MD5_Fo, a, b, c, d, w2[0], MD5C08, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w2[1], MD5C09, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, w2[2], MD5C0a, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, w2[3], MD5C0b, MD5S03);
MD5_STEP (MD5_Fo, a, b, c, d, w3[0], MD5C0c, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w3[1], MD5C0d, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, w3[2], MD5C0e, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, w3[3], MD5C0f, MD5S03);
MD5_STEP (MD5_Go, a, b, c, d, w0[1], MD5C10, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, w1[2], MD5C11, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, w2[3], MD5C12, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w0[0], MD5C13, MD5S13);
MD5_STEP (MD5_Go, a, b, c, d, w1[1], MD5C14, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, w2[2], MD5C15, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, w3[3], MD5C16, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w1[0], MD5C17, MD5S13);
MD5_STEP (MD5_Go, a, b, c, d, w2[1], MD5C18, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, w3[2], MD5C19, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, w0[3], MD5C1a, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w2[0], MD5C1b, MD5S13);
MD5_STEP (MD5_Go, a, b, c, d, w3[1], MD5C1c, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, w0[2], MD5C1d, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, w1[3], MD5C1e, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w3[0], MD5C1f, MD5S13);
u32x t;
MD5_STEP (MD5_H1, a, b, c, d, w1[1], MD5C20, MD5S20);
MD5_STEP (MD5_H2, d, a, b, c, w2[0], MD5C21, MD5S21);
MD5_STEP (MD5_H1, c, d, a, b, w2[3], MD5C22, MD5S22);
MD5_STEP (MD5_H2, b, c, d, a, w3[2], MD5C23, MD5S23);
MD5_STEP (MD5_H1, a, b, c, d, w0[1], MD5C24, MD5S20);
MD5_STEP (MD5_H2, d, a, b, c, w1[0], MD5C25, MD5S21);
MD5_STEP (MD5_H1, c, d, a, b, w1[3], MD5C26, MD5S22);
MD5_STEP (MD5_H2, b, c, d, a, w2[2], MD5C27, MD5S23);
MD5_STEP (MD5_H1, a, b, c, d, w3[1], MD5C28, MD5S20);
MD5_STEP (MD5_H2, d, a, b, c, w0[0], MD5C29, MD5S21);
MD5_STEP (MD5_H1, c, d, a, b, w0[3], MD5C2a, MD5S22);
MD5_STEP (MD5_H2, b, c, d, a, w1[2], MD5C2b, MD5S23);
MD5_STEP (MD5_H1, a, b, c, d, w2[1], MD5C2c, MD5S20);
MD5_STEP (MD5_H2, d, a, b, c, w3[0], MD5C2d, MD5S21);
MD5_STEP (MD5_H1, c, d, a, b, w3[3], MD5C2e, MD5S22);
MD5_STEP (MD5_H2, b, c, d, a, w0[2], MD5C2f, MD5S23);
MD5_STEP (MD5_I , a, b, c, d, w0[0], MD5C30, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, w1[3], MD5C31, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, w3[2], MD5C32, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w1[1], MD5C33, MD5S33);
MD5_STEP (MD5_I , a, b, c, d, w3[0], MD5C34, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, w0[3], MD5C35, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, w2[2], MD5C36, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w0[1], MD5C37, MD5S33);
MD5_STEP (MD5_I , a, b, c, d, w2[0], MD5C38, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, w3[3], MD5C39, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, w1[2], MD5C3a, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w3[1], MD5C3b, MD5S33);
MD5_STEP (MD5_I , a, b, c, d, w1[0], MD5C3c, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, w2[3], MD5C3d, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, w0[2], MD5C3e, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w2[1], MD5C3f, MD5S33);
a += MD5M_A;
b += MD5M_B;
c += MD5M_C;
d += MD5M_D;
/*
* sha256
*/
u32x w0_t = uint_to_hex_lower8_le ((a >> 8) & 255) << 0
| uint_to_hex_lower8_le ((a >> 0) & 255) << 16;
u32x w1_t = uint_to_hex_lower8_le ((a >> 24) & 255) << 0
| uint_to_hex_lower8_le ((a >> 16) & 255) << 16;
u32x w2_t = uint_to_hex_lower8_le ((b >> 8) & 255) << 0
| uint_to_hex_lower8_le ((b >> 0) & 255) << 16;
u32x w3_t = uint_to_hex_lower8_le ((b >> 24) & 255) << 0
| uint_to_hex_lower8_le ((b >> 16) & 255) << 16;
u32x w4_t = uint_to_hex_lower8_le ((c >> 8) & 255) << 0
| uint_to_hex_lower8_le ((c >> 0) & 255) << 16;
u32x w5_t = uint_to_hex_lower8_le ((c >> 24) & 255) << 0
| uint_to_hex_lower8_le ((c >> 16) & 255) << 16;
u32x w6_t = uint_to_hex_lower8_le ((d >> 8) & 255) << 0
| uint_to_hex_lower8_le ((d >> 0) & 255) << 16;
u32x w7_t = uint_to_hex_lower8_le ((d >> 24) & 255) << 0
| uint_to_hex_lower8_le ((d >> 16) & 255) << 16;
u32x w8_t = 0x80000000;
u32x w9_t = 0;
u32x wa_t = 0;
u32x wb_t = 0;
u32x wc_t = 0;
u32x wd_t = 0;
u32x we_t = 0;
u32x wf_t = 32 * 8;
a = SHA256M_A;
b = SHA256M_B;
c = SHA256M_C;
d = SHA256M_D;
e = SHA256M_E;
f = SHA256M_F;
g = SHA256M_G;
h = SHA256M_H;
SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C00);
SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C01);
SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C02);
SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C03);
SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C04);
SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C05);
SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C06);
SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C07);
SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C08);
SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C09);
SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C0a);
SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C0b);
SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C0c);
SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C0d);
SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C0e);
SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C0f);
w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C10);
w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C11);
w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C12);
w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C13);
w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C14);
w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C15);
w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C16);
w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C17);
w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C18);
w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C19);
wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C1a);
wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C1b);
wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C1c);
wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C1d);
we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C1e);
wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C1f);
w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C20);
w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C21);
w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C22);
w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C23);
w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C24);
w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C25);
w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C26);
w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C27);
w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C28);
w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C29);
wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C2a);
wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C2b);
wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C2c);
wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C2d);
we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C2e);
wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C2f);
w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C30);
w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C31);
w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C32);
w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C33);
w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C34);
w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C35);
w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C36);
w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C37);
w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C38);
w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C39);
wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C3a);
wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C3b);
wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C3c);
wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C3d);
we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C3e);
wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C3f);
COMPARE_M_SIMD (d, h, c, g);
}
}
KERNEL_FQ void m20800_m08 (KERN_ATTR_BASIC ())
{
}
KERNEL_FQ void m20800_m16 (KERN_ATTR_BASIC ())
{
}
KERNEL_FQ void m20800_s04 (KERN_ATTR_BASIC ())
{
/**
* modifier
*/
const u64 gid = get_global_id (0);
const u64 lid = get_local_id (0);
const u64 lsz = get_local_size (0);
/**
* bin2asc table
*/
LOCAL_VK u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;
l_bin2asc[i] = ((i0 < 10) ? '0' + i0 : 'a' - 10 + i0) << 0
| ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 8;
}
SYNC_THREADS ();
if (gid >= gid_max) return;
/**
* base
*/
u32 pw_buf0[4];
u32 pw_buf1[4];
pw_buf0[0] = pws[gid].i[0];
pw_buf0[1] = pws[gid].i[1];
pw_buf0[2] = pws[gid].i[2];
pw_buf0[3] = pws[gid].i[3];
pw_buf1[0] = pws[gid].i[4];
pw_buf1[1] = pws[gid].i[5];
pw_buf1[2] = pws[gid].i[6];
pw_buf1[3] = pws[gid].i[7];
const u32 pw_l_len = pws[gid].pw_len & 63;
/**
* digest
*/
const u32 search[4] =
{
digests_buf[digests_offset].digest_buf[DGST_R0],
digests_buf[digests_offset].digest_buf[DGST_R1],
digests_buf[digests_offset].digest_buf[DGST_R2],
digests_buf[digests_offset].digest_buf[DGST_R3]
};
/**
* reverse
*/
u32 a_rev = digests_buf[digests_offset].digest_buf[0];
u32 b_rev = digests_buf[digests_offset].digest_buf[1];
u32 c_rev = digests_buf[digests_offset].digest_buf[2];
u32 d_rev = digests_buf[digests_offset].digest_buf[3];
u32 e_rev = digests_buf[digests_offset].digest_buf[4];
u32 f_rev = digests_buf[digests_offset].digest_buf[5];
u32 g_rev = digests_buf[digests_offset].digest_buf[6];
u32 h_rev = digests_buf[digests_offset].digest_buf[7];
SHA256_STEP_REV (a_rev, b_rev, c_rev, d_rev, e_rev, f_rev, g_rev, h_rev);
SHA256_STEP_REV (a_rev, b_rev, c_rev, d_rev, e_rev, f_rev, g_rev, h_rev);
SHA256_STEP_REV (a_rev, b_rev, c_rev, d_rev, e_rev, f_rev, g_rev, h_rev);
SHA256_STEP_REV (a_rev, b_rev, c_rev, d_rev, e_rev, f_rev, g_rev, h_rev);
/**
* loop
*/
for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE)
{
const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos) & 63;
const u32x pw_len = (pw_l_len + pw_r_len) & 63;
/**
* concat password candidate
*/
u32x wordl0[4] = { 0 };
u32x wordl1[4] = { 0 };
u32x wordl2[4] = { 0 };
u32x wordl3[4] = { 0 };
wordl0[0] = pw_buf0[0];
wordl0[1] = pw_buf0[1];
wordl0[2] = pw_buf0[2];
wordl0[3] = pw_buf0[3];
wordl1[0] = pw_buf1[0];
wordl1[1] = pw_buf1[1];
wordl1[2] = pw_buf1[2];
wordl1[3] = pw_buf1[3];
u32x wordr0[4] = { 0 };
u32x wordr1[4] = { 0 };
u32x wordr2[4] = { 0 };
u32x wordr3[4] = { 0 };
wordr0[0] = ix_create_combt (combs_buf, il_pos, 0);
wordr0[1] = ix_create_combt (combs_buf, il_pos, 1);
wordr0[2] = ix_create_combt (combs_buf, il_pos, 2);
wordr0[3] = ix_create_combt (combs_buf, il_pos, 3);
wordr1[0] = ix_create_combt (combs_buf, il_pos, 4);
wordr1[1] = ix_create_combt (combs_buf, il_pos, 5);
wordr1[2] = ix_create_combt (combs_buf, il_pos, 6);
wordr1[3] = ix_create_combt (combs_buf, il_pos, 7);
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
{
switch_buffer_by_offset_le_VV (wordr0, wordr1, wordr2, wordr3, pw_l_len);
}
else
{
switch_buffer_by_offset_le_VV (wordl0, wordl1, wordl2, wordl3, pw_r_len);
}
u32x w0[4];
u32x w1[4];
u32x w2[4];
u32x w3[4];
w0[0] = wordl0[0] | wordr0[0];
w0[1] = wordl0[1] | wordr0[1];
w0[2] = wordl0[2] | wordr0[2];
w0[3] = wordl0[3] | wordr0[3];
w1[0] = wordl1[0] | wordr1[0];
w1[1] = wordl1[1] | wordr1[1];
w1[2] = wordl1[2] | wordr1[2];
w1[3] = wordl1[3] | wordr1[3];
w2[0] = wordl2[0] | wordr2[0];
w2[1] = wordl2[1] | wordr2[1];
w2[2] = wordl2[2] | wordr2[2];
w2[3] = wordl2[3] | wordr2[3];
w3[0] = wordl3[0] | wordr3[0];
w3[1] = wordl3[1] | wordr3[1];
w3[2] = pw_len * 8;
w3[3] = 0;
/**
* md5
*/
u32x a = MD5M_A;
u32x b = MD5M_B;
u32x c = MD5M_C;
u32x d = MD5M_D;
u32x e = 0;
u32x f = 0;
u32x g = 0;
u32x h = 0;
MD5_STEP (MD5_Fo, a, b, c, d, w0[0], MD5C00, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w0[1], MD5C01, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, w0[2], MD5C02, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, w0[3], MD5C03, MD5S03);
MD5_STEP (MD5_Fo, a, b, c, d, w1[0], MD5C04, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w1[1], MD5C05, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, w1[2], MD5C06, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, w1[3], MD5C07, MD5S03);
MD5_STEP (MD5_Fo, a, b, c, d, w2[0], MD5C08, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w2[1], MD5C09, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, w2[2], MD5C0a, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, w2[3], MD5C0b, MD5S03);
MD5_STEP (MD5_Fo, a, b, c, d, w3[0], MD5C0c, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w3[1], MD5C0d, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, w3[2], MD5C0e, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, w3[3], MD5C0f, MD5S03);
MD5_STEP (MD5_Go, a, b, c, d, w0[1], MD5C10, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, w1[2], MD5C11, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, w2[3], MD5C12, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w0[0], MD5C13, MD5S13);
MD5_STEP (MD5_Go, a, b, c, d, w1[1], MD5C14, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, w2[2], MD5C15, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, w3[3], MD5C16, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w1[0], MD5C17, MD5S13);
MD5_STEP (MD5_Go, a, b, c, d, w2[1], MD5C18, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, w3[2], MD5C19, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, w0[3], MD5C1a, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w2[0], MD5C1b, MD5S13);
MD5_STEP (MD5_Go, a, b, c, d, w3[1], MD5C1c, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, w0[2], MD5C1d, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, w1[3], MD5C1e, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w3[0], MD5C1f, MD5S13);
u32x t;
MD5_STEP (MD5_H1, a, b, c, d, w1[1], MD5C20, MD5S20);
MD5_STEP (MD5_H2, d, a, b, c, w2[0], MD5C21, MD5S21);
MD5_STEP (MD5_H1, c, d, a, b, w2[3], MD5C22, MD5S22);
MD5_STEP (MD5_H2, b, c, d, a, w3[2], MD5C23, MD5S23);
MD5_STEP (MD5_H1, a, b, c, d, w0[1], MD5C24, MD5S20);
MD5_STEP (MD5_H2, d, a, b, c, w1[0], MD5C25, MD5S21);
MD5_STEP (MD5_H1, c, d, a, b, w1[3], MD5C26, MD5S22);
MD5_STEP (MD5_H2, b, c, d, a, w2[2], MD5C27, MD5S23);
MD5_STEP (MD5_H1, a, b, c, d, w3[1], MD5C28, MD5S20);
MD5_STEP (MD5_H2, d, a, b, c, w0[0], MD5C29, MD5S21);
MD5_STEP (MD5_H1, c, d, a, b, w0[3], MD5C2a, MD5S22);
MD5_STEP (MD5_H2, b, c, d, a, w1[2], MD5C2b, MD5S23);
MD5_STEP (MD5_H1, a, b, c, d, w2[1], MD5C2c, MD5S20);
MD5_STEP (MD5_H2, d, a, b, c, w3[0], MD5C2d, MD5S21);
MD5_STEP (MD5_H1, c, d, a, b, w3[3], MD5C2e, MD5S22);
MD5_STEP (MD5_H2, b, c, d, a, w0[2], MD5C2f, MD5S23);
MD5_STEP (MD5_I , a, b, c, d, w0[0], MD5C30, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, w1[3], MD5C31, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, w3[2], MD5C32, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w1[1], MD5C33, MD5S33);
MD5_STEP (MD5_I , a, b, c, d, w3[0], MD5C34, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, w0[3], MD5C35, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, w2[2], MD5C36, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w0[1], MD5C37, MD5S33);
MD5_STEP (MD5_I , a, b, c, d, w2[0], MD5C38, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, w3[3], MD5C39, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, w1[2], MD5C3a, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w3[1], MD5C3b, MD5S33);
MD5_STEP (MD5_I , a, b, c, d, w1[0], MD5C3c, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, w2[3], MD5C3d, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, w0[2], MD5C3e, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w2[1], MD5C3f, MD5S33);
a += MD5M_A;
b += MD5M_B;
c += MD5M_C;
d += MD5M_D;
/*
* sha256
*/
u32x w0_t = uint_to_hex_lower8_le ((a >> 8) & 255) << 0
| uint_to_hex_lower8_le ((a >> 0) & 255) << 16;
u32x w1_t = uint_to_hex_lower8_le ((a >> 24) & 255) << 0
| uint_to_hex_lower8_le ((a >> 16) & 255) << 16;
u32x w2_t = uint_to_hex_lower8_le ((b >> 8) & 255) << 0
| uint_to_hex_lower8_le ((b >> 0) & 255) << 16;
u32x w3_t = uint_to_hex_lower8_le ((b >> 24) & 255) << 0
| uint_to_hex_lower8_le ((b >> 16) & 255) << 16;
u32x w4_t = uint_to_hex_lower8_le ((c >> 8) & 255) << 0
| uint_to_hex_lower8_le ((c >> 0) & 255) << 16;
u32x w5_t = uint_to_hex_lower8_le ((c >> 24) & 255) << 0
| uint_to_hex_lower8_le ((c >> 16) & 255) << 16;
u32x w6_t = uint_to_hex_lower8_le ((d >> 8) & 255) << 0
| uint_to_hex_lower8_le ((d >> 0) & 255) << 16;
u32x w7_t = uint_to_hex_lower8_le ((d >> 24) & 255) << 0
| uint_to_hex_lower8_le ((d >> 16) & 255) << 16;
u32x w8_t = 0x80000000;
u32x w9_t = 0;
u32x wa_t = 0;
u32x wb_t = 0;
u32x wc_t = 0;
u32x wd_t = 0;
u32x we_t = 0;
u32x wf_t = 32 * 8;
a = SHA256M_A;
b = SHA256M_B;
c = SHA256M_C;
d = SHA256M_D;
e = SHA256M_E;
f = SHA256M_F;
g = SHA256M_G;
h = SHA256M_H;
SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C00);
SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C01);
SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C02);
SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C03);
SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C04);
SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C05);
SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C06);
SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C07);
SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C08);
SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C09);
SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C0a);
SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C0b);
SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C0c);
SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C0d);
SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C0e);
SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C0f);
w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C10);
w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C11);
w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C12);
w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C13);
w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C14);
w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C15);
w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C16);
w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C17);
w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C18);
w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C19);
wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C1a);
wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C1b);
wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C1c);
wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C1d);
we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C1e);
wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C1f);
w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C20);
w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C21);
w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C22);
w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C23);
w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C24);
w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C25);
w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C26);
w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C27);
w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C28);
w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C29);
wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C2a);
wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C2b);
wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C2c);
wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C2d);
we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C2e);
wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C2f);
w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C30);
w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C31);
w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C32);
w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C33);
w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C34);
w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C35);
w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C36);
w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C37);
w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C38);
if (MATCHES_NONE_VS (h, d_rev)) continue;
w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C39);
wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C3a);
wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C3b);
wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C3c);
wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C3d);
we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C3e);
wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C3f);
COMPARE_S_SIMD (d, h, c, g);
}
}
KERNEL_FQ void m20800_s08 (KERN_ATTR_BASIC ())
{
}
KERNEL_FQ void m20800_s16 (KERN_ATTR_BASIC ())
{
}

@ -0,0 +1,238 @@
/**
* Author......: See docs/credits.txt
* License.....: MIT
*/
//#define NEW_SIMD_CODE
#ifdef KERNEL_STATIC
#include "inc_vendor.h"
#include "inc_types.h"
#include "inc_platform.cl"
#include "inc_common.cl"
#include "inc_scalar.cl"
#include "inc_hash_md5.cl"
#include "inc_hash_sha256.cl"
#endif
#if VECT_SIZE == 1
#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i)])
#elif VECT_SIZE == 2
#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
#elif VECT_SIZE == 4
#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
#elif VECT_SIZE == 8
#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
#elif VECT_SIZE == 16
#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
#endif
KERNEL_FQ void m20800_mxx (KERN_ATTR_BASIC ())
{
/**
* modifier
*/
const u64 gid = get_global_id (0);
const u64 lid = get_local_id (0);
const u64 lsz = get_local_size (0);
/**
* bin2asc table
*/
LOCAL_VK u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;
l_bin2asc[i] = ((i0 < 10) ? '0' + i0 : 'a' - 10 + i0) << 0
| ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 8;
}
SYNC_THREADS ();
if (gid >= gid_max) return;
/**
* base
*/
md5_ctx_t ctx0;
md5_init (&ctx0);
md5_update_global (&ctx0, pws[gid].i, pws[gid].pw_len);
/**
* loop
*/
for (u32 il_pos = 0; il_pos < il_cnt; il_pos++)
{
md5_ctx_t ctx1 = ctx0;
md5_update_global (&ctx1, combs_buf[il_pos].i, combs_buf[il_pos].pw_len);
md5_final (&ctx1);
const u32 a = ctx1.h[0];
const u32 b = ctx1.h[1];
const u32 c = ctx1.h[2];
const u32 d = ctx1.h[3];
sha256_ctx_t ctx;
sha256_init (&ctx);
ctx.w0[0] = uint_to_hex_lower8_le ((a >> 8) & 255) << 0
| uint_to_hex_lower8_le ((a >> 0) & 255) << 16;
ctx.w0[1] = uint_to_hex_lower8_le ((a >> 24) & 255) << 0
| uint_to_hex_lower8_le ((a >> 16) & 255) << 16;
ctx.w0[2] = uint_to_hex_lower8_le ((b >> 8) & 255) << 0
| uint_to_hex_lower8_le ((b >> 0) & 255) << 16;
ctx.w0[3] = uint_to_hex_lower8_le ((b >> 24) & 255) << 0
| uint_to_hex_lower8_le ((b >> 16) & 255) << 16;
ctx.w1[0] = uint_to_hex_lower8_le ((c >> 8) & 255) << 0
| uint_to_hex_lower8_le ((c >> 0) & 255) << 16;
ctx.w1[1] = uint_to_hex_lower8_le ((c >> 24) & 255) << 0
| uint_to_hex_lower8_le ((c >> 16) & 255) << 16;
ctx.w1[2] = uint_to_hex_lower8_le ((d >> 8) & 255) << 0
| uint_to_hex_lower8_le ((d >> 0) & 255) << 16;
ctx.w1[3] = uint_to_hex_lower8_le ((d >> 24) & 255) << 0
| uint_to_hex_lower8_le ((d >> 16) & 255) << 16;
ctx.w2[0] = 0;
ctx.w2[1] = 0;
ctx.w2[2] = 0;
ctx.w2[3] = 0;
ctx.w3[0] = 0;
ctx.w3[1] = 0;
ctx.w3[2] = 0;
ctx.w3[3] = 0;
ctx.len = 32;
sha256_final (&ctx);
const u32 r0 = ctx.h[DGST_R0];
const u32 r1 = ctx.h[DGST_R1];
const u32 r2 = ctx.h[DGST_R2];
const u32 r3 = ctx.h[DGST_R3];
COMPARE_M_SCALAR (r0, r1, r2, r3);
}
}
KERNEL_FQ void m20800_sxx (KERN_ATTR_BASIC ())
{
/**
* modifier
*/
const u64 gid = get_global_id (0);
const u64 lid = get_local_id (0);
const u64 lsz = get_local_size (0);
/**
* bin2asc table
*/
LOCAL_VK u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;
l_bin2asc[i] = ((i0 < 10) ? '0' + i0 : 'a' - 10 + i0) << 0
| ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 8;
}
SYNC_THREADS ();
if (gid >= gid_max) return;
/**
* digest
*/
const u32 search[4] =
{
digests_buf[digests_offset].digest_buf[DGST_R0],
digests_buf[digests_offset].digest_buf[DGST_R1],
digests_buf[digests_offset].digest_buf[DGST_R2],
digests_buf[digests_offset].digest_buf[DGST_R3]
};
/**
* base
*/
md5_ctx_t ctx0;
md5_init (&ctx0);
md5_update_global (&ctx0, pws[gid].i, pws[gid].pw_len);
/**
* loop
*/
for (u32 il_pos = 0; il_pos < il_cnt; il_pos++)
{
md5_ctx_t ctx1 = ctx0;
md5_update_global (&ctx1, combs_buf[il_pos].i, combs_buf[il_pos].pw_len);
md5_final (&ctx1);
const u32 a = ctx1.h[0];
const u32 b = ctx1.h[1];
const u32 c = ctx1.h[2];
const u32 d = ctx1.h[3];
sha256_ctx_t ctx;
sha256_init (&ctx);
ctx.w0[0] = uint_to_hex_lower8_le ((a >> 8) & 255) << 0
| uint_to_hex_lower8_le ((a >> 0) & 255) << 16;
ctx.w0[1] = uint_to_hex_lower8_le ((a >> 24) & 255) << 0
| uint_to_hex_lower8_le ((a >> 16) & 255) << 16;
ctx.w0[2] = uint_to_hex_lower8_le ((b >> 8) & 255) << 0
| uint_to_hex_lower8_le ((b >> 0) & 255) << 16;
ctx.w0[3] = uint_to_hex_lower8_le ((b >> 24) & 255) << 0
| uint_to_hex_lower8_le ((b >> 16) & 255) << 16;
ctx.w1[0] = uint_to_hex_lower8_le ((c >> 8) & 255) << 0
| uint_to_hex_lower8_le ((c >> 0) & 255) << 16;
ctx.w1[1] = uint_to_hex_lower8_le ((c >> 24) & 255) << 0
| uint_to_hex_lower8_le ((c >> 16) & 255) << 16;
ctx.w1[2] = uint_to_hex_lower8_le ((d >> 8) & 255) << 0
| uint_to_hex_lower8_le ((d >> 0) & 255) << 16;
ctx.w1[3] = uint_to_hex_lower8_le ((d >> 24) & 255) << 0
| uint_to_hex_lower8_le ((d >> 16) & 255) << 16;
ctx.w2[0] = 0;
ctx.w2[1] = 0;
ctx.w2[2] = 0;
ctx.w2[3] = 0;
ctx.w3[0] = 0;
ctx.w3[1] = 0;
ctx.w3[2] = 0;
ctx.w3[3] = 0;
ctx.len = 32;
sha256_final (&ctx);
const u32 r0 = ctx.h[DGST_R0];
const u32 r1 = ctx.h[DGST_R1];
const u32 r2 = ctx.h[DGST_R2];
const u32 r3 = ctx.h[DGST_R3];
COMPARE_S_SCALAR (r0, r1, r2, r3);
}
}

@ -0,0 +1,967 @@
/**
* Author......: See docs/credits.txt
* License.....: MIT
*/
#define NEW_SIMD_CODE
#ifdef KERNEL_STATIC
#include "inc_vendor.h"
#include "inc_types.h"
#include "inc_platform.cl"
#include "inc_common.cl"
#include "inc_simd.cl"
#include "inc_hash_md5.cl"
#include "inc_hash_sha256.cl"
#endif
#if VECT_SIZE == 1
#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i)])
#elif VECT_SIZE == 2
#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
#elif VECT_SIZE == 4
#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
#elif VECT_SIZE == 8
#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
#elif VECT_SIZE == 16
#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
#endif
#define SHA256_STEP_REV(a,b,c,d,e,f,g,h) \
{ \
u32 t2 = SHA256_S2_S(b) + SHA256_F0o(b,c,d); \
u32 t1 = a - t2; \
a = b; \
b = c; \
c = d; \
d = e - t1; \
e = f; \
f = g; \
g = h; \
h = 0; \
}
DECLSPEC void m20800m (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KERN_ATTR_BASIC (), LOCAL_AS u32 *l_bin2asc)
{
/**
* modifier
*/
const u64 gid = get_global_id (0);
const u64 lid = get_local_id (0);
/**
* loop
*/
u32 w0l = w0[0];
for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE)
{
const u32x w0r = ix_create_bft (bfs_buf, il_pos);
const u32x w0lr = w0l | w0r;
u32x w0_t = w0lr;
u32x w1_t = w0[1];
u32x w2_t = w0[2];
u32x w3_t = w0[3];
u32x w4_t = w1[0];
u32x w5_t = w1[1];
u32x w6_t = w1[2];
u32x w7_t = w1[3];
u32x w8_t = w2[0];
u32x w9_t = w2[1];
u32x wa_t = w2[2];
u32x wb_t = w2[3];
u32x wc_t = w3[0];
u32x wd_t = w3[1];
u32x we_t = w3[2];
u32x wf_t = w3[3];
/**
* md5
*/
u32x a = MD5M_A;
u32x b = MD5M_B;
u32x c = MD5M_C;
u32x d = MD5M_D;
u32x e = 0;
u32x f = 0;
u32x g = 0;
u32x h = 0;
MD5_STEP (MD5_Fo, a, b, c, d, w0_t, MD5C00, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w1_t, MD5C01, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, w2_t, MD5C02, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, w3_t, MD5C03, MD5S03);
MD5_STEP (MD5_Fo, a, b, c, d, w4_t, MD5C04, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w5_t, MD5C05, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, w6_t, MD5C06, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, w7_t, MD5C07, MD5S03);
MD5_STEP (MD5_Fo, a, b, c, d, w8_t, MD5C08, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w9_t, MD5C09, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, wa_t, MD5C0a, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, wb_t, MD5C0b, MD5S03);
MD5_STEP (MD5_Fo, a, b, c, d, wc_t, MD5C0c, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, wd_t, MD5C0d, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, we_t, MD5C0e, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, wf_t, MD5C0f, MD5S03);
MD5_STEP (MD5_Go, a, b, c, d, w1_t, MD5C10, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, w6_t, MD5C11, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, wb_t, MD5C12, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w0_t, MD5C13, MD5S13);
MD5_STEP (MD5_Go, a, b, c, d, w5_t, MD5C14, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, wa_t, MD5C15, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, wf_t, MD5C16, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w4_t, MD5C17, MD5S13);
MD5_STEP (MD5_Go, a, b, c, d, w9_t, MD5C18, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, we_t, MD5C19, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, w3_t, MD5C1a, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w8_t, MD5C1b, MD5S13);
MD5_STEP (MD5_Go, a, b, c, d, wd_t, MD5C1c, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, w2_t, MD5C1d, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, w7_t, MD5C1e, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, wc_t, MD5C1f, MD5S13);
u32x t;
MD5_STEP (MD5_H1, a, b, c, d, w5_t, MD5C20, MD5S20);
MD5_STEP (MD5_H2, d, a, b, c, w8_t, MD5C21, MD5S21);
MD5_STEP (MD5_H1, c, d, a, b, wb_t, MD5C22, MD5S22);
MD5_STEP (MD5_H2, b, c, d, a, we_t, MD5C23, MD5S23);
MD5_STEP (MD5_H1, a, b, c, d, w1_t, MD5C24, MD5S20);
MD5_STEP (MD5_H2, d, a, b, c, w4_t, MD5C25, MD5S21);
MD5_STEP (MD5_H1, c, d, a, b, w7_t, MD5C26, MD5S22);
MD5_STEP (MD5_H2, b, c, d, a, wa_t, MD5C27, MD5S23);
MD5_STEP (MD5_H1, a, b, c, d, wd_t, MD5C28, MD5S20);
MD5_STEP (MD5_H2, d, a, b, c, w0_t, MD5C29, MD5S21);
MD5_STEP (MD5_H1, c, d, a, b, w3_t, MD5C2a, MD5S22);
MD5_STEP (MD5_H2, b, c, d, a, w6_t, MD5C2b, MD5S23);
MD5_STEP (MD5_H1, a, b, c, d, w9_t, MD5C2c, MD5S20);
MD5_STEP (MD5_H2, d, a, b, c, wc_t, MD5C2d, MD5S21);
MD5_STEP (MD5_H1, c, d, a, b, wf_t, MD5C2e, MD5S22);
MD5_STEP (MD5_H2, b, c, d, a, w2_t, MD5C2f, MD5S23);
MD5_STEP (MD5_I , a, b, c, d, w0_t, MD5C30, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, w7_t, MD5C31, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, we_t, MD5C32, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w5_t, MD5C33, MD5S33);
MD5_STEP (MD5_I , a, b, c, d, wc_t, MD5C34, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, w3_t, MD5C35, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, wa_t, MD5C36, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w1_t, MD5C37, MD5S33);
MD5_STEP (MD5_I , a, b, c, d, w8_t, MD5C38, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, wf_t, MD5C39, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, w6_t, MD5C3a, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, wd_t, MD5C3b, MD5S33);
MD5_STEP (MD5_I , a, b, c, d, w4_t, MD5C3c, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, wb_t, MD5C3d, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, w2_t, MD5C3e, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w9_t, MD5C3f, MD5S33);
a += MD5M_A;
b += MD5M_B;
c += MD5M_C;
d += MD5M_D;
/*
* sha256
*/
w0_t = uint_to_hex_lower8_le ((a >> 8) & 255) << 0
| uint_to_hex_lower8_le ((a >> 0) & 255) << 16;
w1_t = uint_to_hex_lower8_le ((a >> 24) & 255) << 0
| uint_to_hex_lower8_le ((a >> 16) & 255) << 16;
w2_t = uint_to_hex_lower8_le ((b >> 8) & 255) << 0
| uint_to_hex_lower8_le ((b >> 0) & 255) << 16;
w3_t = uint_to_hex_lower8_le ((b >> 24) & 255) << 0
| uint_to_hex_lower8_le ((b >> 16) & 255) << 16;
w4_t = uint_to_hex_lower8_le ((c >> 8) & 255) << 0
| uint_to_hex_lower8_le ((c >> 0) & 255) << 16;
w5_t = uint_to_hex_lower8_le ((c >> 24) & 255) << 0
| uint_to_hex_lower8_le ((c >> 16) & 255) << 16;
w6_t = uint_to_hex_lower8_le ((d >> 8) & 255) << 0
| uint_to_hex_lower8_le ((d >> 0) & 255) << 16;
w7_t = uint_to_hex_lower8_le ((d >> 24) & 255) << 0
| uint_to_hex_lower8_le ((d >> 16) & 255) << 16;
w8_t = 0x80000000;
w9_t = 0;
wa_t = 0;
wb_t = 0;
wc_t = 0;
wd_t = 0;
we_t = 0;
wf_t = 32 * 8;
a = SHA256M_A;
b = SHA256M_B;
c = SHA256M_C;
d = SHA256M_D;
e = SHA256M_E;
f = SHA256M_F;
g = SHA256M_G;
h = SHA256M_H;
SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C00);
SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C01);
SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C02);
SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C03);
SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C04);
SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C05);
SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C06);
SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C07);
SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C08);
SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C09);
SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C0a);
SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C0b);
SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C0c);
SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C0d);
SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C0e);
SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C0f);
w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C10);
w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C11);
w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C12);
w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C13);
w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C14);
w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C15);
w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C16);
w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C17);
w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C18);
w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C19);
wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C1a);
wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C1b);
wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C1c);
wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C1d);
we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C1e);
wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C1f);
w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C20);
w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C21);
w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C22);
w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C23);
w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C24);
w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C25);
w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C26);
w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C27);
w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C28);
w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C29);
wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C2a);
wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C2b);
wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C2c);
wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C2d);
we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C2e);
wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C2f);
w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C30);
w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C31);
w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C32);
w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C33);
w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C34);
w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C35);
w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C36);
w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C37);
w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C38);
w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C39);
wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C3a);
wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C3b);
wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C3c);
wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C3d);
we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C3e);
wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C3f);
COMPARE_M_SIMD (d, h, c, g);
}
}
DECLSPEC void m20800s (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KERN_ATTR_BASIC (), LOCAL_AS u32 *l_bin2asc)
{
/**
* modifier
*/
const u64 gid = get_global_id (0);
const u64 lid = get_local_id (0);
/**
* digest
*/
const u32 search[4] =
{
digests_buf[digests_offset].digest_buf[DGST_R0],
digests_buf[digests_offset].digest_buf[DGST_R1],
digests_buf[digests_offset].digest_buf[DGST_R2],
digests_buf[digests_offset].digest_buf[DGST_R3]
};
/**
* reverse
*/
u32 a_rev = digests_buf[digests_offset].digest_buf[0];
u32 b_rev = digests_buf[digests_offset].digest_buf[1];
u32 c_rev = digests_buf[digests_offset].digest_buf[2];
u32 d_rev = digests_buf[digests_offset].digest_buf[3];
u32 e_rev = digests_buf[digests_offset].digest_buf[4];
u32 f_rev = digests_buf[digests_offset].digest_buf[5];
u32 g_rev = digests_buf[digests_offset].digest_buf[6];
u32 h_rev = digests_buf[digests_offset].digest_buf[7];
SHA256_STEP_REV (a_rev, b_rev, c_rev, d_rev, e_rev, f_rev, g_rev, h_rev);
SHA256_STEP_REV (a_rev, b_rev, c_rev, d_rev, e_rev, f_rev, g_rev, h_rev);
SHA256_STEP_REV (a_rev, b_rev, c_rev, d_rev, e_rev, f_rev, g_rev, h_rev);
SHA256_STEP_REV (a_rev, b_rev, c_rev, d_rev, e_rev, f_rev, g_rev, h_rev);
/**
* loop
*/
u32 w0l = w0[0];
for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE)
{
const u32x w0r = ix_create_bft (bfs_buf, il_pos);
const u32x w0lr = w0l | w0r;
u32x w0_t = w0lr;
u32x w1_t = w0[1];
u32x w2_t = w0[2];
u32x w3_t = w0[3];
u32x w4_t = w1[0];
u32x w5_t = w1[1];
u32x w6_t = w1[2];
u32x w7_t = w1[3];
u32x w8_t = w2[0];
u32x w9_t = w2[1];
u32x wa_t = w2[2];
u32x wb_t = w2[3];
u32x wc_t = w3[0];
u32x wd_t = w3[1];
u32x we_t = w3[2];
u32x wf_t = w3[3];
/**
* md5
*/
u32x a = MD5M_A;
u32x b = MD5M_B;
u32x c = MD5M_C;
u32x d = MD5M_D;
u32x e = 0;
u32x f = 0;
u32x g = 0;
u32x h = 0;
MD5_STEP (MD5_Fo, a, b, c, d, w0_t, MD5C00, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w1_t, MD5C01, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, w2_t, MD5C02, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, w3_t, MD5C03, MD5S03);
MD5_STEP (MD5_Fo, a, b, c, d, w4_t, MD5C04, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w5_t, MD5C05, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, w6_t, MD5C06, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, w7_t, MD5C07, MD5S03);
MD5_STEP (MD5_Fo, a, b, c, d, w8_t, MD5C08, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w9_t, MD5C09, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, wa_t, MD5C0a, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, wb_t, MD5C0b, MD5S03);
MD5_STEP (MD5_Fo, a, b, c, d, wc_t, MD5C0c, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, wd_t, MD5C0d, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, we_t, MD5C0e, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, wf_t, MD5C0f, MD5S03);
MD5_STEP (MD5_Go, a, b, c, d, w1_t, MD5C10, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, w6_t, MD5C11, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, wb_t, MD5C12, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w0_t, MD5C13, MD5S13);
MD5_STEP (MD5_Go, a, b, c, d, w5_t, MD5C14, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, wa_t, MD5C15, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, wf_t, MD5C16, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w4_t, MD5C17, MD5S13);
MD5_STEP (MD5_Go, a, b, c, d, w9_t, MD5C18, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, we_t, MD5C19, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, w3_t, MD5C1a, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w8_t, MD5C1b, MD5S13);
MD5_STEP (MD5_Go, a, b, c, d, wd_t, MD5C1c, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, w2_t, MD5C1d, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, w7_t, MD5C1e, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, wc_t, MD5C1f, MD5S13);
u32x t;
MD5_STEP (MD5_H1, a, b, c, d, w5_t, MD5C20, MD5S20);
MD5_STEP (MD5_H2, d, a, b, c, w8_t, MD5C21, MD5S21);
MD5_STEP (MD5_H1, c, d, a, b, wb_t, MD5C22, MD5S22);
MD5_STEP (MD5_H2, b, c, d, a, we_t, MD5C23, MD5S23);
MD5_STEP (MD5_H1, a, b, c, d, w1_t, MD5C24, MD5S20);
MD5_STEP (MD5_H2, d, a, b, c, w4_t, MD5C25, MD5S21);
MD5_STEP (MD5_H1, c, d, a, b, w7_t, MD5C26, MD5S22);
MD5_STEP (MD5_H2, b, c, d, a, wa_t, MD5C27, MD5S23);
MD5_STEP (MD5_H1, a, b, c, d, wd_t, MD5C28, MD5S20);
MD5_STEP (MD5_H2, d, a, b, c, w0_t, MD5C29, MD5S21);
MD5_STEP (MD5_H1, c, d, a, b, w3_t, MD5C2a, MD5S22);
MD5_STEP (MD5_H2, b, c, d, a, w6_t, MD5C2b, MD5S23);
MD5_STEP (MD5_H1, a, b, c, d, w9_t, MD5C2c, MD5S20);
MD5_STEP (MD5_H2, d, a, b, c, wc_t, MD5C2d, MD5S21);
MD5_STEP (MD5_H1, c, d, a, b, wf_t, MD5C2e, MD5S22);
MD5_STEP (MD5_H2, b, c, d, a, w2_t, MD5C2f, MD5S23);
MD5_STEP (MD5_I , a, b, c, d, w0_t, MD5C30, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, w7_t, MD5C31, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, we_t, MD5C32, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w5_t, MD5C33, MD5S33);
MD5_STEP (MD5_I , a, b, c, d, wc_t, MD5C34, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, w3_t, MD5C35, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, wa_t, MD5C36, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w1_t, MD5C37, MD5S33);
MD5_STEP (MD5_I , a, b, c, d, w8_t, MD5C38, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, wf_t, MD5C39, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, w6_t, MD5C3a, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, wd_t, MD5C3b, MD5S33);
MD5_STEP (MD5_I , a, b, c, d, w4_t, MD5C3c, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, wb_t, MD5C3d, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, w2_t, MD5C3e, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w9_t, MD5C3f, MD5S33);
a += MD5M_A;
b += MD5M_B;
c += MD5M_C;
d += MD5M_D;
/*
* sha256
*/
w0_t = uint_to_hex_lower8_le ((a >> 8) & 255) << 0
| uint_to_hex_lower8_le ((a >> 0) & 255) << 16;
w1_t = uint_to_hex_lower8_le ((a >> 24) & 255) << 0
| uint_to_hex_lower8_le ((a >> 16) & 255) << 16;
w2_t = uint_to_hex_lower8_le ((b >> 8) & 255) << 0
| uint_to_hex_lower8_le ((b >> 0) & 255) << 16;
w3_t = uint_to_hex_lower8_le ((b >> 24) & 255) << 0
| uint_to_hex_lower8_le ((b >> 16) & 255) << 16;
w4_t = uint_to_hex_lower8_le ((c >> 8) & 255) << 0
| uint_to_hex_lower8_le ((c >> 0) & 255) << 16;
w5_t = uint_to_hex_lower8_le ((c >> 24) & 255) << 0
| uint_to_hex_lower8_le ((c >> 16) & 255) << 16;
w6_t = uint_to_hex_lower8_le ((d >> 8) & 255) << 0
| uint_to_hex_lower8_le ((d >> 0) & 255) << 16;
w7_t = uint_to_hex_lower8_le ((d >> 24) & 255) << 0
| uint_to_hex_lower8_le ((d >> 16) & 255) << 16;
w8_t = 0x80000000;
w9_t = 0;
wa_t = 0;
wb_t = 0;
wc_t = 0;
wd_t = 0;
we_t = 0;
wf_t = 32 * 8;
a = SHA256M_A;
b = SHA256M_B;
c = SHA256M_C;
d = SHA256M_D;
e = SHA256M_E;
f = SHA256M_F;
g = SHA256M_G;
h = SHA256M_H;
SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C00);
SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C01);
SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C02);
SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C03);
SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C04);
SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C05);
SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C06);
SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C07);
SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C08);
SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C09);
SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C0a);
SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C0b);
SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C0c);
SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C0d);
SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C0e);
SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C0f);
w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C10);
w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C11);
w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C12);
w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C13);
w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C14);
w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C15);
w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C16);
w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C17);
w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C18);
w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C19);
wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C1a);
wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C1b);
wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C1c);
wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C1d);
we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C1e);
wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C1f);
w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C20);
w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C21);
w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C22);
w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C23);
w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C24);
w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C25);
w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C26);
w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C27);
w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C28);
w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C29);
wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C2a);
wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C2b);
wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C2c);
wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C2d);
we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C2e);
wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C2f);
w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C30);
w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C31);
w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C32);
w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C33);
w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C34);
w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C35);
w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C36);
w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C37);
w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C38);
if (MATCHES_NONE_VS (h, d_rev)) continue;
w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C39);
wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C3a);
wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C3b);
wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C3c);
wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C3d);
we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C3e);
wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C3f);
COMPARE_S_SIMD (d, h, c, g);
}
}
KERNEL_FQ void m20800_m04 (KERN_ATTR_BASIC ())
{
/**
* base
*/
const u64 gid = get_global_id (0);
const u64 lid = get_local_id (0);
const u64 lsz = get_local_size (0);
/**
* bin2asc table
*/
LOCAL_VK u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;
l_bin2asc[i] = ((i0 < 10) ? '0' + i0 : 'a' - 10 + i0) << 0
| ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 8;
}
SYNC_THREADS ();
if (gid >= gid_max) return;
/**
* modifier
*/
u32 w0[4];
w0[0] = pws[gid].i[ 0];
w0[1] = pws[gid].i[ 1];
w0[2] = pws[gid].i[ 2];
w0[3] = pws[gid].i[ 3];
u32 w1[4];
w1[0] = 0;
w1[1] = 0;
w1[2] = 0;
w1[3] = 0;
u32 w2[4];
w2[0] = 0;
w2[1] = 0;
w2[2] = 0;
w2[3] = 0;
u32 w3[4];
w3[0] = 0;
w3[1] = 0;
w3[2] = pws[gid].i[15];
w3[3] = 0;
const u32 pw_len = pws[gid].pw_len & 63;
/**
* main
*/
m20800m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max, l_bin2asc);
}
KERNEL_FQ void m20800_m08 (KERN_ATTR_BASIC ())
{
/**
* base
*/
const u64 gid = get_global_id (0);
const u64 lid = get_local_id (0);
const u64 lsz = get_local_size (0);
/**
* bin2asc table
*/
LOCAL_VK u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;
l_bin2asc[i] = ((i0 < 10) ? '0' + i0 : 'a' - 10 + i0) << 0
| ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 8;
}
SYNC_THREADS ();
if (gid >= gid_max) return;
/**
* modifier
*/
u32 w0[4];
w0[0] = pws[gid].i[ 0];
w0[1] = pws[gid].i[ 1];
w0[2] = pws[gid].i[ 2];
w0[3] = pws[gid].i[ 3];
u32 w1[4];
w1[0] = pws[gid].i[ 4];
w1[1] = pws[gid].i[ 5];
w1[2] = pws[gid].i[ 6];
w1[3] = pws[gid].i[ 7];
u32 w2[4];
w2[0] = 0;
w2[1] = 0;
w2[2] = 0;
w2[3] = 0;
u32 w3[4];
w3[0] = 0;
w3[1] = 0;
w3[2] = pws[gid].i[15];
w3[3] = 0;
const u32 pw_len = pws[gid].pw_len & 63;
/**
* main
*/
m20800m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max, l_bin2asc);
}
KERNEL_FQ void m20800_m16 (KERN_ATTR_BASIC ())
{
/**
* base
*/
const u64 gid = get_global_id (0);
const u64 lid = get_local_id (0);
const u64 lsz = get_local_size (0);
/**
* bin2asc table
*/
LOCAL_VK u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;
l_bin2asc[i] = ((i0 < 10) ? '0' + i0 : 'a' - 10 + i0) << 0
| ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 8;
}
SYNC_THREADS ();
if (gid >= gid_max) return;
/**
* modifier
*/
u32 w0[4];
w0[0] = pws[gid].i[ 0];
w0[1] = pws[gid].i[ 1];
w0[2] = pws[gid].i[ 2];
w0[3] = pws[gid].i[ 3];
u32 w1[4];
w1[0] = pws[gid].i[ 4];
w1[1] = pws[gid].i[ 5];
w1[2] = pws[gid].i[ 6];
w1[3] = pws[gid].i[ 7];
u32 w2[4];
w2[0] = pws[gid].i[ 8];
w2[1] = pws[gid].i[ 9];
w2[2] = pws[gid].i[10];
w2[3] = pws[gid].i[11];
u32 w3[4];
w3[0] = pws[gid].i[12];
w3[1] = pws[gid].i[13];
w3[2] = pws[gid].i[14];
w3[3] = pws[gid].i[15];
const u32 pw_len = pws[gid].pw_len & 63;
/**
* main
*/
m20800m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max, l_bin2asc);
}
KERNEL_FQ void m20800_s04 (KERN_ATTR_BASIC ())
{
/**
* base
*/
const u64 gid = get_global_id (0);
const u64 lid = get_local_id (0);
const u64 lsz = get_local_size (0);
/**
* bin2asc table
*/
LOCAL_VK u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;
l_bin2asc[i] = ((i0 < 10) ? '0' + i0 : 'a' - 10 + i0) << 0
| ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 8;
}
SYNC_THREADS ();
if (gid >= gid_max) return;
/**
* modifier
*/
u32 w0[4];
w0[0] = pws[gid].i[ 0];
w0[1] = pws[gid].i[ 1];
w0[2] = pws[gid].i[ 2];
w0[3] = pws[gid].i[ 3];
u32 w1[4];
w1[0] = 0;
w1[1] = 0;
w1[2] = 0;
w1[3] = 0;
u32 w2[4];
w2[0] = 0;
w2[1] = 0;
w2[2] = 0;
w2[3] = 0;
u32 w3[4];
w3[0] = 0;
w3[1] = 0;
w3[2] = pws[gid].i[15];
w3[3] = 0;
const u32 pw_len = pws[gid].pw_len & 63;
/**
* main
*/
m20800s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max, l_bin2asc);
}
KERNEL_FQ void m20800_s08 (KERN_ATTR_BASIC ())
{
/**
* base
*/
const u64 gid = get_global_id (0);
const u64 lid = get_local_id (0);
const u64 lsz = get_local_size (0);
/**
* bin2asc table
*/
LOCAL_VK u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;
l_bin2asc[i] = ((i0 < 10) ? '0' + i0 : 'a' - 10 + i0) << 0
| ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 8;
}
SYNC_THREADS ();
if (gid >= gid_max) return;
/**
* modifier
*/
u32 w0[4];
w0[0] = pws[gid].i[ 0];
w0[1] = pws[gid].i[ 1];
w0[2] = pws[gid].i[ 2];
w0[3] = pws[gid].i[ 3];
u32 w1[4];
w1[0] = pws[gid].i[ 4];
w1[1] = pws[gid].i[ 5];
w1[2] = pws[gid].i[ 6];
w1[3] = pws[gid].i[ 7];
u32 w2[4];
w2[0] = 0;
w2[1] = 0;
w2[2] = 0;
w2[3] = 0;
u32 w3[4];
w3[0] = 0;
w3[1] = 0;
w3[2] = pws[gid].i[15];
w3[3] = 0;
const u32 pw_len = pws[gid].pw_len & 63;
/**
* main
*/
m20800s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max, l_bin2asc);
}
KERNEL_FQ void m20800_s16 (KERN_ATTR_BASIC ())
{
/**
* base
*/
const u64 gid = get_global_id (0);
const u64 lid = get_local_id (0);
const u64 lsz = get_local_size (0);
/**
* bin2asc table
*/
LOCAL_VK u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;
l_bin2asc[i] = ((i0 < 10) ? '0' + i0 : 'a' - 10 + i0) << 0
| ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 8;
}
SYNC_THREADS ();
if (gid >= gid_max) return;
/**
* modifier
*/
u32 w0[4];
w0[0] = pws[gid].i[ 0];
w0[1] = pws[gid].i[ 1];
w0[2] = pws[gid].i[ 2];
w0[3] = pws[gid].i[ 3];
u32 w1[4];
w1[0] = pws[gid].i[ 4];
w1[1] = pws[gid].i[ 5];
w1[2] = pws[gid].i[ 6];
w1[3] = pws[gid].i[ 7];
u32 w2[4];
w2[0] = pws[gid].i[ 8];
w2[1] = pws[gid].i[ 9];
w2[2] = pws[gid].i[10];
w2[3] = pws[gid].i[11];
u32 w3[4];
w3[0] = pws[gid].i[12];
w3[1] = pws[gid].i[13];
w3[2] = pws[gid].i[14];
w3[3] = pws[gid].i[15];
const u32 pw_len = pws[gid].pw_len & 63;
/**
* main
*/
m20800s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max, l_bin2asc);
}

@ -0,0 +1,264 @@
/**
* Author......: See docs/credits.txt
* License.....: MIT
*/
#define NEW_SIMD_CODE
#ifdef KERNEL_STATIC
#include "inc_vendor.h"
#include "inc_types.h"
#include "inc_platform.cl"
#include "inc_common.cl"
#include "inc_simd.cl"
#include "inc_hash_md5.cl"
#include "inc_hash_sha256.cl"
#endif
#if VECT_SIZE == 1
#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i)])
#elif VECT_SIZE == 2
#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
#elif VECT_SIZE == 4
#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
#elif VECT_SIZE == 8
#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
#elif VECT_SIZE == 16
#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
#endif
KERNEL_FQ void m20800_mxx (KERN_ATTR_VECTOR ())
{
/**
* modifier
*/
const u64 gid = get_global_id (0);
const u64 lid = get_local_id (0);
const u64 lsz = get_local_size (0);
/**
* bin2asc table
*/
LOCAL_VK u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;
l_bin2asc[i] = ((i0 < 10) ? '0' + i0 : 'a' - 10 + i0) << 0
| ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 8;
}
SYNC_THREADS ();
if (gid >= gid_max) return;
/**
* base
*/
const u32 pw_len = pws[gid].pw_len;
u32x w[64] = { 0 };
for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1)
{
w[idx] = pws[gid].i[idx];
}
/**
* loop
*/
u32x w0l = w[0];
for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE)
{
const u32x w0r = words_buf_r[il_pos / VECT_SIZE];
const u32x w0 = w0l | w0r;
w[0] = w0;
md5_ctx_vector_t ctx0;
md5_init_vector (&ctx0);
md5_update_vector (&ctx0, w, pw_len);
md5_final_vector (&ctx0);
const u32x a = ctx0.h[0];
const u32x b = ctx0.h[1];
const u32x c = ctx0.h[2];
const u32x d = ctx0.h[3];
sha256_ctx_vector_t ctx;
sha256_init_vector (&ctx);
ctx.w0[0] = uint_to_hex_lower8_le ((a >> 8) & 255) << 0
| uint_to_hex_lower8_le ((a >> 0) & 255) << 16;
ctx.w0[1] = uint_to_hex_lower8_le ((a >> 24) & 255) << 0
| uint_to_hex_lower8_le ((a >> 16) & 255) << 16;
ctx.w0[2] = uint_to_hex_lower8_le ((b >> 8) & 255) << 0
| uint_to_hex_lower8_le ((b >> 0) & 255) << 16;
ctx.w0[3] = uint_to_hex_lower8_le ((b >> 24) & 255) << 0
| uint_to_hex_lower8_le ((b >> 16) & 255) << 16;
ctx.w1[0] = uint_to_hex_lower8_le ((c >> 8) & 255) << 0
| uint_to_hex_lower8_le ((c >> 0) & 255) << 16;
ctx.w1[1] = uint_to_hex_lower8_le ((c >> 24) & 255) << 0
| uint_to_hex_lower8_le ((c >> 16) & 255) << 16;
ctx.w1[2] = uint_to_hex_lower8_le ((d >> 8) & 255) << 0
| uint_to_hex_lower8_le ((d >> 0) & 255) << 16;
ctx.w1[3] = uint_to_hex_lower8_le ((d >> 24) & 255) << 0
| uint_to_hex_lower8_le ((d >> 16) & 255) << 16;
ctx.w2[0] = 0;
ctx.w2[1] = 0;
ctx.w2[2] = 0;
ctx.w2[3] = 0;
ctx.w3[0] = 0;
ctx.w3[1] = 0;
ctx.w3[2] = 0;
ctx.w3[3] = 0;
ctx.len = 32;
sha256_final_vector (&ctx);
const u32x r0 = ctx.h[DGST_R0];
const u32x r1 = ctx.h[DGST_R1];
const u32x r2 = ctx.h[DGST_R2];
const u32x r3 = ctx.h[DGST_R3];
COMPARE_M_SIMD (r0, r1, r2, r3);
}
}
KERNEL_FQ void m20800_sxx (KERN_ATTR_VECTOR ())
{
/**
* modifier
*/
const u64 gid = get_global_id (0);
const u64 lid = get_local_id (0);
const u64 lsz = get_local_size (0);
/**
* bin2asc table
*/
LOCAL_VK u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;
l_bin2asc[i] = ((i0 < 10) ? '0' + i0 : 'a' - 10 + i0) << 0
| ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 8;
}
SYNC_THREADS ();
if (gid >= gid_max) return;
/**
* digest
*/
const u32 search[4] =
{
digests_buf[digests_offset].digest_buf[DGST_R0],
digests_buf[digests_offset].digest_buf[DGST_R1],
digests_buf[digests_offset].digest_buf[DGST_R2],
digests_buf[digests_offset].digest_buf[DGST_R3]
};
/**
* base
*/
const u32 pw_len = pws[gid].pw_len;
u32x w[64] = { 0 };
for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1)
{
w[idx] = pws[gid].i[idx];
}
/**
* loop
*/
u32x w0l = w[0];
for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE)
{
const u32x w0r = words_buf_r[il_pos / VECT_SIZE];
const u32x w0 = w0l | w0r;
w[0] = w0;
md5_ctx_vector_t ctx0;
md5_init_vector (&ctx0);
md5_update_vector (&ctx0, w, pw_len);
md5_final_vector (&ctx0);
const u32x a = ctx0.h[0];
const u32x b = ctx0.h[1];
const u32x c = ctx0.h[2];
const u32x d = ctx0.h[3];
sha256_ctx_vector_t ctx;
sha256_init_vector (&ctx);
ctx.w0[0] = uint_to_hex_lower8_le ((a >> 8) & 255) << 0
| uint_to_hex_lower8_le ((a >> 0) & 255) << 16;
ctx.w0[1] = uint_to_hex_lower8_le ((a >> 24) & 255) << 0
| uint_to_hex_lower8_le ((a >> 16) & 255) << 16;
ctx.w0[2] = uint_to_hex_lower8_le ((b >> 8) & 255) << 0
| uint_to_hex_lower8_le ((b >> 0) & 255) << 16;
ctx.w0[3] = uint_to_hex_lower8_le ((b >> 24) & 255) << 0
| uint_to_hex_lower8_le ((b >> 16) & 255) << 16;
ctx.w1[0] = uint_to_hex_lower8_le ((c >> 8) & 255) << 0
| uint_to_hex_lower8_le ((c >> 0) & 255) << 16;
ctx.w1[1] = uint_to_hex_lower8_le ((c >> 24) & 255) << 0
| uint_to_hex_lower8_le ((c >> 16) & 255) << 16;
ctx.w1[2] = uint_to_hex_lower8_le ((d >> 8) & 255) << 0
| uint_to_hex_lower8_le ((d >> 0) & 255) << 16;
ctx.w1[3] = uint_to_hex_lower8_le ((d >> 24) & 255) << 0
| uint_to_hex_lower8_le ((d >> 16) & 255) << 16;
ctx.w2[0] = 0;
ctx.w2[1] = 0;
ctx.w2[2] = 0;
ctx.w2[3] = 0;
ctx.w3[0] = 0;
ctx.w3[1] = 0;
ctx.w3[2] = 0;
ctx.w3[3] = 0;
ctx.len = 32;
sha256_final_vector (&ctx);
const u32x r0 = ctx.h[DGST_R0];
const u32x r1 = ctx.h[DGST_R1];
const u32x r2 = ctx.h[DGST_R2];
const u32x r3 = ctx.h[DGST_R3];
COMPARE_S_SIMD (r0, r1, r2, r3);
}
}

File diff suppressed because it is too large Load Diff

@ -0,0 +1,402 @@
/**
* Author......: See docs/credits.txt
* License.....: MIT
*/
//#define NEW_SIMD_CODE
#ifdef KERNEL_STATIC
#include "inc_vendor.h"
#include "inc_types.h"
#include "inc_platform.cl"
#include "inc_common.cl"
#include "inc_rp.h"
#include "inc_rp.cl"
#include "inc_scalar.cl"
#include "inc_hash_md5.cl"
#include "inc_hash_sha1.cl"
#endif
#if VECT_SIZE == 1
#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i)])
#elif VECT_SIZE == 2
#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
#elif VECT_SIZE == 4
#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
#elif VECT_SIZE == 8
#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
#elif VECT_SIZE == 16
#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
#endif
KERNEL_FQ void m20900_mxx (KERN_ATTR_RULES ())
{
/**
* modifier
*/
const u64 gid = get_global_id (0);
const u64 lid = get_local_id (0);
const u64 lsz = get_local_size (0);
/**
* bin2asc table
*/
LOCAL_VK u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;
l_bin2asc[i] = ((i0 < 10) ? '0' + i0 : 'a' - 10 + i0) << 8
| ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0;
}
SYNC_THREADS ();
if (gid >= gid_max) return;
/**
* base
*/
COPY_PW (pws[gid]);
/**
* loop
*/
u32 w0[4];
u32 w1[4];
u32 w2[4];
u32 w3[4];
for (u32 il_pos = 0; il_pos < il_cnt; il_pos++)
{
pw_t tmp = PASTE_PW;
tmp.pw_len = apply_rules (rules_buf[il_pos].cmds, tmp.i, tmp.pw_len);
sha1_ctx_t ctx0;
sha1_init (&ctx0);
sha1_update_swap (&ctx0, tmp.i, tmp.pw_len);
sha1_final (&ctx0);
const u32 a0 = ctx0.h[0];
const u32 b0 = ctx0.h[1];
const u32 c0 = ctx0.h[2];
const u32 d0 = ctx0.h[3];
const u32 e0 = ctx0.h[4];
md5_ctx_t ctx1;
md5_init (&ctx1);
md5_update (&ctx1, tmp.i, tmp.pw_len);
md5_final (&ctx1);
const u32 a1 = hc_swap32 (ctx1.h[0]);
const u32 b1 = hc_swap32 (ctx1.h[1]);
const u32 c1 = hc_swap32 (ctx1.h[2]);
const u32 d1 = hc_swap32 (ctx1.h[3]);
md5_ctx_t ctx;
md5_init (&ctx);
w0[0] = uint_to_hex_lower8 ((a0 >> 24) & 255) << 0
| uint_to_hex_lower8 ((a0 >> 16) & 255) << 16;
w0[1] = uint_to_hex_lower8 ((a0 >> 8) & 255) << 0
| uint_to_hex_lower8 ((a0 >> 0) & 255) << 16;
w0[2] = uint_to_hex_lower8 ((b0 >> 24) & 255) << 0
| uint_to_hex_lower8 ((b0 >> 16) & 255) << 16;
w0[3] = uint_to_hex_lower8 ((b0 >> 8) & 255) << 0
| uint_to_hex_lower8 ((b0 >> 0) & 255) << 16;
w1[0] = uint_to_hex_lower8 ((c0 >> 24) & 255) << 0
| uint_to_hex_lower8 ((c0 >> 16) & 255) << 16;
w1[1] = uint_to_hex_lower8 ((c0 >> 8) & 255) << 0
| uint_to_hex_lower8 ((c0 >> 0) & 255) << 16;
w1[2] = uint_to_hex_lower8 ((d0 >> 24) & 255) << 0
| uint_to_hex_lower8 ((d0 >> 16) & 255) << 16;
w1[3] = uint_to_hex_lower8 ((d0 >> 8) & 255) << 0
| uint_to_hex_lower8 ((d0 >> 0) & 255) << 16;
w2[0] = uint_to_hex_lower8 ((e0 >> 24) & 255) << 0
| uint_to_hex_lower8 ((e0 >> 16) & 255) << 16;
w2[1] = uint_to_hex_lower8 ((e0 >> 8) & 255) << 0
| uint_to_hex_lower8 ((e0 >> 0) & 255) << 16;
w2[2] = 0;
w2[3] = 0;
w3[0] = 0;
w3[1] = 0;
w3[2] = 0;
w3[3] = 0;
md5_update_64 (&ctx, w0, w1, w2, w3, 40);
w0[0] = uint_to_hex_lower8 ((a1 >> 24) & 255) << 0
| uint_to_hex_lower8 ((a1 >> 16) & 255) << 16;
w0[1] = uint_to_hex_lower8 ((a1 >> 8) & 255) << 0
| uint_to_hex_lower8 ((a1 >> 0) & 255) << 16;
w0[2] = uint_to_hex_lower8 ((b1 >> 24) & 255) << 0
| uint_to_hex_lower8 ((b1 >> 16) & 255) << 16;
w0[3] = uint_to_hex_lower8 ((b1 >> 8) & 255) << 0
| uint_to_hex_lower8 ((b1 >> 0) & 255) << 16;
w1[0] = uint_to_hex_lower8 ((c1 >> 24) & 255) << 0
| uint_to_hex_lower8 ((c1 >> 16) & 255) << 16;
w1[1] = uint_to_hex_lower8 ((c1 >> 8) & 255) << 0
| uint_to_hex_lower8 ((c1 >> 0) & 255) << 16;
w1[2] = uint_to_hex_lower8 ((d1 >> 24) & 255) << 0
| uint_to_hex_lower8 ((d1 >> 16) & 255) << 16;
w1[3] = uint_to_hex_lower8 ((d1 >> 8) & 255) << 0
| uint_to_hex_lower8 ((d1 >> 0) & 255) << 16;
w2[0] = 0;
w2[1] = 0;
w2[2] = 0;
w2[3] = 0;
w3[0] = 0;
w3[1] = 0;
w3[2] = 0;
w3[3] = 0;
md5_update_64 (&ctx, w0, w1, w2, w3, 32);
w0[0] = uint_to_hex_lower8 ((a0 >> 24) & 255) << 0
| uint_to_hex_lower8 ((a0 >> 16) & 255) << 16;
w0[1] = uint_to_hex_lower8 ((a0 >> 8) & 255) << 0
| uint_to_hex_lower8 ((a0 >> 0) & 255) << 16;
w0[2] = uint_to_hex_lower8 ((b0 >> 24) & 255) << 0
| uint_to_hex_lower8 ((b0 >> 16) & 255) << 16;
w0[3] = uint_to_hex_lower8 ((b0 >> 8) & 255) << 0
| uint_to_hex_lower8 ((b0 >> 0) & 255) << 16;
w1[0] = uint_to_hex_lower8 ((c0 >> 24) & 255) << 0
| uint_to_hex_lower8 ((c0 >> 16) & 255) << 16;
w1[1] = uint_to_hex_lower8 ((c0 >> 8) & 255) << 0
| uint_to_hex_lower8 ((c0 >> 0) & 255) << 16;
w1[2] = uint_to_hex_lower8 ((d0 >> 24) & 255) << 0
| uint_to_hex_lower8 ((d0 >> 16) & 255) << 16;
w1[3] = uint_to_hex_lower8 ((d0 >> 8) & 255) << 0
| uint_to_hex_lower8 ((d0 >> 0) & 255) << 16;
w2[0] = uint_to_hex_lower8 ((e0 >> 24) & 255) << 0
| uint_to_hex_lower8 ((e0 >> 16) & 255) << 16;
w2[1] = uint_to_hex_lower8 ((e0 >> 8) & 255) << 0
| uint_to_hex_lower8 ((e0 >> 0) & 255) << 16;
w2[2] = 0;
w2[3] = 0;
w3[0] = 0;
w3[1] = 0;
w3[2] = 0;
w3[3] = 0;
md5_update_64 (&ctx, w0, w1, w2, w3, 40);
md5_final (&ctx);
const u32 r0 = ctx.h[DGST_R0];
const u32 r1 = ctx.h[DGST_R1];
const u32 r2 = ctx.h[DGST_R2];
const u32 r3 = ctx.h[DGST_R3];
COMPARE_M_SCALAR (r0, r1, r2, r3);
}
}
KERNEL_FQ void m20900_sxx (KERN_ATTR_RULES ())
{
/**
* modifier
*/
const u64 gid = get_global_id (0);
const u64 lid = get_local_id (0);
const u64 lsz = get_local_size (0);
/**
* bin2asc table
*/
LOCAL_VK u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;
l_bin2asc[i] = ((i0 < 10) ? '0' + i0 : 'a' - 10 + i0) << 8
| ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0;
}
SYNC_THREADS ();
if (gid >= gid_max) return;
/**
* digest
*/
const u32 search[4] =
{
digests_buf[digests_offset].digest_buf[DGST_R0],
digests_buf[digests_offset].digest_buf[DGST_R1],
digests_buf[digests_offset].digest_buf[DGST_R2],
digests_buf[digests_offset].digest_buf[DGST_R3]
};
/**
* base
*/
COPY_PW (pws[gid]);
/**
* loop
*/
u32 w0[4];
u32 w1[4];
u32 w2[4];
u32 w3[4];
for (u32 il_pos = 0; il_pos < il_cnt; il_pos++)
{
pw_t tmp = PASTE_PW;
tmp.pw_len = apply_rules (rules_buf[il_pos].cmds, tmp.i, tmp.pw_len);
sha1_ctx_t ctx0;
sha1_init (&ctx0);
sha1_update_swap (&ctx0, tmp.i, tmp.pw_len);
sha1_final (&ctx0);
const u32 a0 = ctx0.h[0];
const u32 b0 = ctx0.h[1];
const u32 c0 = ctx0.h[2];
const u32 d0 = ctx0.h[3];
const u32 e0 = ctx0.h[4];
md5_ctx_t ctx1;
md5_init (&ctx1);
md5_update (&ctx1, tmp.i, tmp.pw_len);
md5_final (&ctx1);
const u32 a1 = hc_swap32 (ctx1.h[0]);
const u32 b1 = hc_swap32 (ctx1.h[1]);
const u32 c1 = hc_swap32 (ctx1.h[2]);
const u32 d1 = hc_swap32 (ctx1.h[3]);
md5_ctx_t ctx;
md5_init (&ctx);
w0[0] = uint_to_hex_lower8 ((a0 >> 24) & 255) << 0
| uint_to_hex_lower8 ((a0 >> 16) & 255) << 16;
w0[1] = uint_to_hex_lower8 ((a0 >> 8) & 255) << 0
| uint_to_hex_lower8 ((a0 >> 0) & 255) << 16;
w0[2] = uint_to_hex_lower8 ((b0 >> 24) & 255) << 0
| uint_to_hex_lower8 ((b0 >> 16) & 255) << 16;
w0[3] = uint_to_hex_lower8 ((b0 >> 8) & 255) << 0
| uint_to_hex_lower8 ((b0 >> 0) & 255) << 16;
w1[0] = uint_to_hex_lower8 ((c0 >> 24) & 255) << 0
| uint_to_hex_lower8 ((c0 >> 16) & 255) << 16;
w1[1] = uint_to_hex_lower8 ((c0 >> 8) & 255) << 0
| uint_to_hex_lower8 ((c0 >> 0) & 255) << 16;
w1[2] = uint_to_hex_lower8 ((d0 >> 24) & 255) << 0
| uint_to_hex_lower8 ((d0 >> 16) & 255) << 16;
w1[3] = uint_to_hex_lower8 ((d0 >> 8) & 255) << 0
| uint_to_hex_lower8 ((d0 >> 0) & 255) << 16;
w2[0] = uint_to_hex_lower8 ((e0 >> 24) & 255) << 0
| uint_to_hex_lower8 ((e0 >> 16) & 255) << 16;
w2[1] = uint_to_hex_lower8 ((e0 >> 8) & 255) << 0
| uint_to_hex_lower8 ((e0 >> 0) & 255) << 16;
w2[2] = 0;
w2[3] = 0;
w3[0] = 0;
w3[1] = 0;
w3[2] = 0;
w3[3] = 0;
md5_update_64 (&ctx, w0, w1, w2, w3, 40);
w0[0] = uint_to_hex_lower8 ((a1 >> 24) & 255) << 0
| uint_to_hex_lower8 ((a1 >> 16) & 255) << 16;
w0[1] = uint_to_hex_lower8 ((a1 >> 8) & 255) << 0
| uint_to_hex_lower8 ((a1 >> 0) & 255) << 16;
w0[2] = uint_to_hex_lower8 ((b1 >> 24) & 255) << 0
| uint_to_hex_lower8 ((b1 >> 16) & 255) << 16;
w0[3] = uint_to_hex_lower8 ((b1 >> 8) & 255) << 0
| uint_to_hex_lower8 ((b1 >> 0) & 255) << 16;
w1[0] = uint_to_hex_lower8 ((c1 >> 24) & 255) << 0
| uint_to_hex_lower8 ((c1 >> 16) & 255) << 16;
w1[1] = uint_to_hex_lower8 ((c1 >> 8) & 255) << 0
| uint_to_hex_lower8 ((c1 >> 0) & 255) << 16;
w1[2] = uint_to_hex_lower8 ((d1 >> 24) & 255) << 0
| uint_to_hex_lower8 ((d1 >> 16) & 255) << 16;
w1[3] = uint_to_hex_lower8 ((d1 >> 8) & 255) << 0
| uint_to_hex_lower8 ((d1 >> 0) & 255) << 16;
w2[0] = 0;
w2[1] = 0;
w2[2] = 0;
w2[3] = 0;
w3[0] = 0;
w3[1] = 0;
w3[2] = 0;
w3[3] = 0;
md5_update_64 (&ctx, w0, w1, w2, w3, 32);
w0[0] = uint_to_hex_lower8 ((a0 >> 24) & 255) << 0
| uint_to_hex_lower8 ((a0 >> 16) & 255) << 16;
w0[1] = uint_to_hex_lower8 ((a0 >> 8) & 255) << 0
| uint_to_hex_lower8 ((a0 >> 0) & 255) << 16;
w0[2] = uint_to_hex_lower8 ((b0 >> 24) & 255) << 0
| uint_to_hex_lower8 ((b0 >> 16) & 255) << 16;
w0[3] = uint_to_hex_lower8 ((b0 >> 8) & 255) << 0
| uint_to_hex_lower8 ((b0 >> 0) & 255) << 16;
w1[0] = uint_to_hex_lower8 ((c0 >> 24) & 255) << 0
| uint_to_hex_lower8 ((c0 >> 16) & 255) << 16;
w1[1] = uint_to_hex_lower8 ((c0 >> 8) & 255) << 0
| uint_to_hex_lower8 ((c0 >> 0) & 255) << 16;
w1[2] = uint_to_hex_lower8 ((d0 >> 24) & 255) << 0
| uint_to_hex_lower8 ((d0 >> 16) & 255) << 16;
w1[3] = uint_to_hex_lower8 ((d0 >> 8) & 255) << 0
| uint_to_hex_lower8 ((d0 >> 0) & 255) << 16;
w2[0] = uint_to_hex_lower8 ((e0 >> 24) & 255) << 0
| uint_to_hex_lower8 ((e0 >> 16) & 255) << 16;
w2[1] = uint_to_hex_lower8 ((e0 >> 8) & 255) << 0
| uint_to_hex_lower8 ((e0 >> 0) & 255) << 16;
w2[2] = 0;
w2[3] = 0;
w3[0] = 0;
w3[1] = 0;
w3[2] = 0;
w3[3] = 0;
md5_update_64 (&ctx, w0, w1, w2, w3, 40);
md5_final (&ctx);
const u32 r0 = ctx.h[DGST_R0];
const u32 r1 = ctx.h[DGST_R1];
const u32 r2 = ctx.h[DGST_R2];
const u32 r3 = ctx.h[DGST_R3];
COMPARE_S_SCALAR (r0, r1, r2, r3);
}
}

File diff suppressed because it is too large Load Diff

@ -0,0 +1,404 @@
/**
* Author......: See docs/credits.txt
* License.....: MIT
*/
//#define NEW_SIMD_CODE
#ifdef KERNEL_STATIC
#include "inc_vendor.h"
#include "inc_types.h"
#include "inc_platform.cl"
#include "inc_common.cl"
#include "inc_scalar.cl"
#include "inc_hash_md5.cl"
#include "inc_hash_sha1.cl"
#endif
#if VECT_SIZE == 1
#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i)])
#elif VECT_SIZE == 2
#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
#elif VECT_SIZE == 4
#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
#elif VECT_SIZE == 8
#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
#elif VECT_SIZE == 16
#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
#endif
KERNEL_FQ void m20900_mxx (KERN_ATTR_BASIC ())
{
/**
* modifier
*/
const u64 gid = get_global_id (0);
const u64 lid = get_local_id (0);
const u64 lsz = get_local_size (0);
/**
* bin2asc table
*/
LOCAL_VK u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;
l_bin2asc[i] = ((i0 < 10) ? '0' + i0 : 'a' - 10 + i0) << 8
| ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0;
}
SYNC_THREADS ();
if (gid >= gid_max) return;
/**
* base
*/
sha1_ctx_t ctx00;
sha1_init (&ctx00);
sha1_update_global_swap (&ctx00, pws[gid].i, pws[gid].pw_len);
md5_ctx_t ctx11;
md5_init (&ctx11);
md5_update_global (&ctx11, pws[gid].i, pws[gid].pw_len);
/**
* loop
*/
u32 w0[4];
u32 w1[4];
u32 w2[4];
u32 w3[4];
for (u32 il_pos = 0; il_pos < il_cnt; il_pos++)
{
sha1_ctx_t ctx0 = ctx00;
sha1_update_global_swap (&ctx0, combs_buf[il_pos].i, combs_buf[il_pos].pw_len);
sha1_final (&ctx0);
const u32 a0 = ctx0.h[0];
const u32 b0 = ctx0.h[1];
const u32 c0 = ctx0.h[2];
const u32 d0 = ctx0.h[3];
const u32 e0 = ctx0.h[4];
md5_ctx_t ctx1 = ctx11;
md5_update_global (&ctx1, combs_buf[il_pos].i, combs_buf[il_pos].pw_len);
md5_final (&ctx1);
const u32 a1 = hc_swap32 (ctx1.h[0]);
const u32 b1 = hc_swap32 (ctx1.h[1]);
const u32 c1 = hc_swap32 (ctx1.h[2]);
const u32 d1 = hc_swap32 (ctx1.h[3]);
md5_ctx_t ctx;
md5_init (&ctx);
w0[0] = uint_to_hex_lower8 ((a0 >> 24) & 255) << 0
| uint_to_hex_lower8 ((a0 >> 16) & 255) << 16;
w0[1] = uint_to_hex_lower8 ((a0 >> 8) & 255) << 0
| uint_to_hex_lower8 ((a0 >> 0) & 255) << 16;
w0[2] = uint_to_hex_lower8 ((b0 >> 24) & 255) << 0
| uint_to_hex_lower8 ((b0 >> 16) & 255) << 16;
w0[3] = uint_to_hex_lower8 ((b0 >> 8) & 255) << 0
| uint_to_hex_lower8 ((b0 >> 0) & 255) << 16;
w1[0] = uint_to_hex_lower8 ((c0 >> 24) & 255) << 0
| uint_to_hex_lower8 ((c0 >> 16) & 255) << 16;
w1[1] = uint_to_hex_lower8 ((c0 >> 8) & 255) << 0
| uint_to_hex_lower8 ((c0 >> 0) & 255) << 16;
w1[2] = uint_to_hex_lower8 ((d0 >> 24) & 255) << 0
| uint_to_hex_lower8 ((d0 >> 16) & 255) << 16;
w1[3] = uint_to_hex_lower8 ((d0 >> 8) & 255) << 0
| uint_to_hex_lower8 ((d0 >> 0) & 255) << 16;
w2[0] = uint_to_hex_lower8 ((e0 >> 24) & 255) << 0
| uint_to_hex_lower8 ((e0 >> 16) & 255) << 16;
w2[1] = uint_to_hex_lower8 ((e0 >> 8) & 255) << 0
| uint_to_hex_lower8 ((e0 >> 0) & 255) << 16;
w2[2] = 0;
w2[3] = 0;
w3[0] = 0;
w3[1] = 0;
w3[2] = 0;
w3[3] = 0;
md5_update_64 (&ctx, w0, w1, w2, w3, 40);
w0[0] = uint_to_hex_lower8 ((a1 >> 24) & 255) << 0
| uint_to_hex_lower8 ((a1 >> 16) & 255) << 16;
w0[1] = uint_to_hex_lower8 ((a1 >> 8) & 255) << 0
| uint_to_hex_lower8 ((a1 >> 0) & 255) << 16;
w0[2] = uint_to_hex_lower8 ((b1 >> 24) & 255) << 0
| uint_to_hex_lower8 ((b1 >> 16) & 255) << 16;
w0[3] = uint_to_hex_lower8 ((b1 >> 8) & 255) << 0
| uint_to_hex_lower8 ((b1 >> 0) & 255) << 16;
w1[0] = uint_to_hex_lower8 ((c1 >> 24) & 255) << 0
| uint_to_hex_lower8 ((c1 >> 16) & 255) << 16;
w1[1] = uint_to_hex_lower8 ((c1 >> 8) & 255) << 0
| uint_to_hex_lower8 ((c1 >> 0) & 255) << 16;
w1[2] = uint_to_hex_lower8 ((d1 >> 24) & 255) << 0
| uint_to_hex_lower8 ((d1 >> 16) & 255) << 16;
w1[3] = uint_to_hex_lower8 ((d1 >> 8) & 255) << 0
| uint_to_hex_lower8 ((d1 >> 0) & 255) << 16;
w2[0] = 0;
w2[1] = 0;
w2[2] = 0;
w2[3] = 0;
w3[0] = 0;
w3[1] = 0;
w3[2] = 0;
w3[3] = 0;
md5_update_64 (&ctx, w0, w1, w2, w3, 32);
w0[0] = uint_to_hex_lower8 ((a0 >> 24) & 255) << 0
| uint_to_hex_lower8 ((a0 >> 16) & 255) << 16;
w0[1] = uint_to_hex_lower8 ((a0 >> 8) & 255) << 0
| uint_to_hex_lower8 ((a0 >> 0) & 255) << 16;
w0[2] = uint_to_hex_lower8 ((b0 >> 24) & 255) << 0
| uint_to_hex_lower8 ((b0 >> 16) & 255) << 16;
w0[3] = uint_to_hex_lower8 ((b0 >> 8) & 255) << 0
| uint_to_hex_lower8 ((b0 >> 0) & 255) << 16;
w1[0] = uint_to_hex_lower8 ((c0 >> 24) & 255) << 0
| uint_to_hex_lower8 ((c0 >> 16) & 255) << 16;
w1[1] = uint_to_hex_lower8 ((c0 >> 8) & 255) << 0
| uint_to_hex_lower8 ((c0 >> 0) & 255) << 16;
w1[2] = uint_to_hex_lower8 ((d0 >> 24) & 255) << 0
| uint_to_hex_lower8 ((d0 >> 16) & 255) << 16;
w1[3] = uint_to_hex_lower8 ((d0 >> 8) & 255) << 0
| uint_to_hex_lower8 ((d0 >> 0) & 255) << 16;
w2[0] = uint_to_hex_lower8 ((e0 >> 24) & 255) << 0
| uint_to_hex_lower8 ((e0 >> 16) & 255) << 16;
w2[1] = uint_to_hex_lower8 ((e0 >> 8) & 255) << 0
| uint_to_hex_lower8 ((e0 >> 0) & 255) << 16;
w2[2] = 0;
w2[3] = 0;
w3[0] = 0;
w3[1] = 0;
w3[2] = 0;
w3[3] = 0;
md5_update_64 (&ctx, w0, w1, w2, w3, 40);
md5_final (&ctx);
const u32 r0 = ctx.h[DGST_R0];
const u32 r1 = ctx.h[DGST_R1];
const u32 r2 = ctx.h[DGST_R2];
const u32 r3 = ctx.h[DGST_R3];
COMPARE_M_SCALAR (r0, r1, r2, r3);
}
}
KERNEL_FQ void m20900_sxx (KERN_ATTR_BASIC ())
{
/**
* modifier
*/
const u64 gid = get_global_id (0);
const u64 lid = get_local_id (0);
const u64 lsz = get_local_size (0);
/**
* bin2asc table
*/
LOCAL_VK u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;
l_bin2asc[i] = ((i0 < 10) ? '0' + i0 : 'a' - 10 + i0) << 8
| ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0;
}
SYNC_THREADS ();
if (gid >= gid_max) return;
/**
* digest
*/
const u32 search[4] =
{
digests_buf[digests_offset].digest_buf[DGST_R0],
digests_buf[digests_offset].digest_buf[DGST_R1],
digests_buf[digests_offset].digest_buf[DGST_R2],
digests_buf[digests_offset].digest_buf[DGST_R3]
};
/**
* base
*/
sha1_ctx_t ctx00;
sha1_init (&ctx00);
sha1_update_global_swap (&ctx00, pws[gid].i, pws[gid].pw_len);
md5_ctx_t ctx11;
md5_init (&ctx11);
md5_update_global (&ctx11, pws[gid].i, pws[gid].pw_len);
/**
* loop
*/
u32 w0[4];
u32 w1[4];
u32 w2[4];
u32 w3[4];
for (u32 il_pos = 0; il_pos < il_cnt; il_pos++)
{
sha1_ctx_t ctx0 = ctx00;
sha1_update_global_swap (&ctx0, combs_buf[il_pos].i, combs_buf[il_pos].pw_len);
sha1_final (&ctx0);
const u32 a0 = ctx0.h[0];
const u32 b0 = ctx0.h[1];
const u32 c0 = ctx0.h[2];
const u32 d0 = ctx0.h[3];
const u32 e0 = ctx0.h[4];
md5_ctx_t ctx1 = ctx11;
md5_update_global (&ctx1, combs_buf[il_pos].i, combs_buf[il_pos].pw_len);
md5_final (&ctx1);
const u32 a1 = hc_swap32 (ctx1.h[0]);
const u32 b1 = hc_swap32 (ctx1.h[1]);
const u32 c1 = hc_swap32 (ctx1.h[2]);
const u32 d1 = hc_swap32 (ctx1.h[3]);
md5_ctx_t ctx;
md5_init (&ctx);
w0[0] = uint_to_hex_lower8 ((a0 >> 24) & 255) << 0
| uint_to_hex_lower8 ((a0 >> 16) & 255) << 16;
w0[1] = uint_to_hex_lower8 ((a0 >> 8) & 255) << 0
| uint_to_hex_lower8 ((a0 >> 0) & 255) << 16;
w0[2] = uint_to_hex_lower8 ((b0 >> 24) & 255) << 0
| uint_to_hex_lower8 ((b0 >> 16) & 255) << 16;
w0[3] = uint_to_hex_lower8 ((b0 >> 8) & 255) << 0
| uint_to_hex_lower8 ((b0 >> 0) & 255) << 16;
w1[0] = uint_to_hex_lower8 ((c0 >> 24) & 255) << 0
| uint_to_hex_lower8 ((c0 >> 16) & 255) << 16;
w1[1] = uint_to_hex_lower8 ((c0 >> 8) & 255) << 0
| uint_to_hex_lower8 ((c0 >> 0) & 255) << 16;
w1[2] = uint_to_hex_lower8 ((d0 >> 24) & 255) << 0
| uint_to_hex_lower8 ((d0 >> 16) & 255) << 16;
w1[3] = uint_to_hex_lower8 ((d0 >> 8) & 255) << 0
| uint_to_hex_lower8 ((d0 >> 0) & 255) << 16;
w2[0] = uint_to_hex_lower8 ((e0 >> 24) & 255) << 0
| uint_to_hex_lower8 ((e0 >> 16) & 255) << 16;
w2[1] = uint_to_hex_lower8 ((e0 >> 8) & 255) << 0
| uint_to_hex_lower8 ((e0 >> 0) & 255) << 16;
w2[2] = 0;
w2[3] = 0;
w3[0] = 0;
w3[1] = 0;
w3[2] = 0;
w3[3] = 0;
md5_update_64 (&ctx, w0, w1, w2, w3, 40);
w0[0] = uint_to_hex_lower8 ((a1 >> 24) & 255) << 0
| uint_to_hex_lower8 ((a1 >> 16) & 255) << 16;
w0[1] = uint_to_hex_lower8 ((a1 >> 8) & 255) << 0
| uint_to_hex_lower8 ((a1 >> 0) & 255) << 16;
w0[2] = uint_to_hex_lower8 ((b1 >> 24) & 255) << 0
| uint_to_hex_lower8 ((b1 >> 16) & 255) << 16;
w0[3] = uint_to_hex_lower8 ((b1 >> 8) & 255) << 0
| uint_to_hex_lower8 ((b1 >> 0) & 255) << 16;
w1[0] = uint_to_hex_lower8 ((c1 >> 24) & 255) << 0
| uint_to_hex_lower8 ((c1 >> 16) & 255) << 16;
w1[1] = uint_to_hex_lower8 ((c1 >> 8) & 255) << 0
| uint_to_hex_lower8 ((c1 >> 0) & 255) << 16;
w1[2] = uint_to_hex_lower8 ((d1 >> 24) & 255) << 0
| uint_to_hex_lower8 ((d1 >> 16) & 255) << 16;
w1[3] = uint_to_hex_lower8 ((d1 >> 8) & 255) << 0
| uint_to_hex_lower8 ((d1 >> 0) & 255) << 16;
w2[0] = 0;
w2[1] = 0;
w2[2] = 0;
w2[3] = 0;
w3[0] = 0;
w3[1] = 0;
w3[2] = 0;
w3[3] = 0;
md5_update_64 (&ctx, w0, w1, w2, w3, 32);
w0[0] = uint_to_hex_lower8 ((a0 >> 24) & 255) << 0
| uint_to_hex_lower8 ((a0 >> 16) & 255) << 16;
w0[1] = uint_to_hex_lower8 ((a0 >> 8) & 255) << 0
| uint_to_hex_lower8 ((a0 >> 0) & 255) << 16;
w0[2] = uint_to_hex_lower8 ((b0 >> 24) & 255) << 0
| uint_to_hex_lower8 ((b0 >> 16) & 255) << 16;
w0[3] = uint_to_hex_lower8 ((b0 >> 8) & 255) << 0
| uint_to_hex_lower8 ((b0 >> 0) & 255) << 16;
w1[0] = uint_to_hex_lower8 ((c0 >> 24) & 255) << 0
| uint_to_hex_lower8 ((c0 >> 16) & 255) << 16;
w1[1] = uint_to_hex_lower8 ((c0 >> 8) & 255) << 0
| uint_to_hex_lower8 ((c0 >> 0) & 255) << 16;
w1[2] = uint_to_hex_lower8 ((d0 >> 24) & 255) << 0
| uint_to_hex_lower8 ((d0 >> 16) & 255) << 16;
w1[3] = uint_to_hex_lower8 ((d0 >> 8) & 255) << 0
| uint_to_hex_lower8 ((d0 >> 0) & 255) << 16;
w2[0] = uint_to_hex_lower8 ((e0 >> 24) & 255) << 0
| uint_to_hex_lower8 ((e0 >> 16) & 255) << 16;
w2[1] = uint_to_hex_lower8 ((e0 >> 8) & 255) << 0
| uint_to_hex_lower8 ((e0 >> 0) & 255) << 16;
w2[2] = 0;
w2[3] = 0;
w3[0] = 0;
w3[1] = 0;
w3[2] = 0;
w3[3] = 0;
md5_update_64 (&ctx, w0, w1, w2, w3, 40);
md5_final (&ctx);
const u32 r0 = ctx.h[DGST_R0];
const u32 r1 = ctx.h[DGST_R1];
const u32 r2 = ctx.h[DGST_R2];
const u32 r3 = ctx.h[DGST_R3];
COMPARE_S_SCALAR (r0, r1, r2, r3);
}
}

File diff suppressed because it is too large Load Diff

@ -0,0 +1,422 @@
/**
* Author......: See docs/credits.txt
* License.....: MIT
*/
#define NEW_SIMD_CODE
#ifdef KERNEL_STATIC
#include "inc_vendor.h"
#include "inc_types.h"
#include "inc_platform.cl"
#include "inc_common.cl"
#include "inc_simd.cl"
#include "inc_hash_md5.cl"
#include "inc_hash_sha1.cl"
#endif
#if VECT_SIZE == 1
#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i)])
#elif VECT_SIZE == 2
#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
#elif VECT_SIZE == 4
#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
#elif VECT_SIZE == 8
#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
#elif VECT_SIZE == 16
#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
#endif
KERNEL_FQ void m20900_mxx (KERN_ATTR_VECTOR ())
{
/**
* modifier
*/
const u64 gid = get_global_id (0);
const u64 lid = get_local_id (0);
const u64 lsz = get_local_size (0);
/**
* bin2asc table
*/
LOCAL_VK u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;
l_bin2asc[i] = ((i0 < 10) ? '0' + i0 : 'a' - 10 + i0) << 8
| ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0;
}
SYNC_THREADS ();
if (gid >= gid_max) return;
/**
* base
*/
const u32 pw_len = pws[gid].pw_len;
u32x w[64] = { 0 };
for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1)
{
w[idx] = pws[gid].i[idx];
}
/**
* loop
*/
u32x _w0[4];
u32x _w1[4];
u32x _w2[4];
u32x _w3[4];
u32x w0l = w[0];
for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE)
{
const u32x w0r = words_buf_r[il_pos / VECT_SIZE];
const u32x w0 = w0l | w0r;
w[0] = w0;
sha1_ctx_vector_t ctx0;
sha1_init_vector (&ctx0);
sha1_update_vector_swap (&ctx0, w, pw_len);
sha1_final_vector (&ctx0);
const u32x a0 = ctx0.h[0];
const u32x b0 = ctx0.h[1];
const u32x c0 = ctx0.h[2];
const u32x d0 = ctx0.h[3];
const u32x e0 = ctx0.h[4];
md5_ctx_vector_t ctx1;
md5_init_vector (&ctx1);
md5_update_vector (&ctx1, w, pw_len);
md5_final_vector (&ctx1);
const u32x a1 = hc_swap32 (ctx1.h[0]);
const u32x b1 = hc_swap32 (ctx1.h[1]);
const u32x c1 = hc_swap32 (ctx1.h[2]);
const u32x d1 = hc_swap32 (ctx1.h[3]);
md5_ctx_vector_t ctx;
md5_init_vector (&ctx);
_w0[0] = uint_to_hex_lower8 ((a0 >> 24) & 255) << 0
| uint_to_hex_lower8 ((a0 >> 16) & 255) << 16;
_w0[1] = uint_to_hex_lower8 ((a0 >> 8) & 255) << 0
| uint_to_hex_lower8 ((a0 >> 0) & 255) << 16;
_w0[2] = uint_to_hex_lower8 ((b0 >> 24) & 255) << 0
| uint_to_hex_lower8 ((b0 >> 16) & 255) << 16;
_w0[3] = uint_to_hex_lower8 ((b0 >> 8) & 255) << 0
| uint_to_hex_lower8 ((b0 >> 0) & 255) << 16;
_w1[0] = uint_to_hex_lower8 ((c0 >> 24) & 255) << 0
| uint_to_hex_lower8 ((c0 >> 16) & 255) << 16;
_w1[1] = uint_to_hex_lower8 ((c0 >> 8) & 255) << 0
| uint_to_hex_lower8 ((c0 >> 0) & 255) << 16;
_w1[2] = uint_to_hex_lower8 ((d0 >> 24) & 255) << 0
| uint_to_hex_lower8 ((d0 >> 16) & 255) << 16;
_w1[3] = uint_to_hex_lower8 ((d0 >> 8) & 255) << 0
| uint_to_hex_lower8 ((d0 >> 0) & 255) << 16;
_w2[0] = uint_to_hex_lower8 ((e0 >> 24) & 255) << 0
| uint_to_hex_lower8 ((e0 >> 16) & 255) << 16;
_w2[1] = uint_to_hex_lower8 ((e0 >> 8) & 255) << 0
| uint_to_hex_lower8 ((e0 >> 0) & 255) << 16;
_w2[2] = 0;
_w2[3] = 0;
_w3[0] = 0;
_w3[1] = 0;
_w3[2] = 0;
_w3[3] = 0;
md5_update_vector_64 (&ctx, _w0, _w1, _w2, _w3, 40);
_w0[0] = uint_to_hex_lower8 ((a1 >> 24) & 255) << 0
| uint_to_hex_lower8 ((a1 >> 16) & 255) << 16;
_w0[1] = uint_to_hex_lower8 ((a1 >> 8) & 255) << 0
| uint_to_hex_lower8 ((a1 >> 0) & 255) << 16;
_w0[2] = uint_to_hex_lower8 ((b1 >> 24) & 255) << 0
| uint_to_hex_lower8 ((b1 >> 16) & 255) << 16;
_w0[3] = uint_to_hex_lower8 ((b1 >> 8) & 255) << 0
| uint_to_hex_lower8 ((b1 >> 0) & 255) << 16;
_w1[0] = uint_to_hex_lower8 ((c1 >> 24) & 255) << 0
| uint_to_hex_lower8 ((c1 >> 16) & 255) << 16;
_w1[1] = uint_to_hex_lower8 ((c1 >> 8) & 255) << 0
| uint_to_hex_lower8 ((c1 >> 0) & 255) << 16;
_w1[2] = uint_to_hex_lower8 ((d1 >> 24) & 255) << 0
| uint_to_hex_lower8 ((d1 >> 16) & 255) << 16;
_w1[3] = uint_to_hex_lower8 ((d1 >> 8) & 255) << 0
| uint_to_hex_lower8 ((d1 >> 0) & 255) << 16;
_w2[0] = 0;
_w2[1] = 0;
_w2[2] = 0;
_w2[3] = 0;
_w3[0] = 0;
_w3[1] = 0;
_w3[2] = 0;
_w3[3] = 0;
md5_update_vector_64 (&ctx, _w0, _w1, _w2, _w3, 32);
_w0[0] = uint_to_hex_lower8 ((a0 >> 24) & 255) << 0
| uint_to_hex_lower8 ((a0 >> 16) & 255) << 16;
_w0[1] = uint_to_hex_lower8 ((a0 >> 8) & 255) << 0
| uint_to_hex_lower8 ((a0 >> 0) & 255) << 16;
_w0[2] = uint_to_hex_lower8 ((b0 >> 24) & 255) << 0
| uint_to_hex_lower8 ((b0 >> 16) & 255) << 16;
_w0[3] = uint_to_hex_lower8 ((b0 >> 8) & 255) << 0
| uint_to_hex_lower8 ((b0 >> 0) & 255) << 16;
_w1[0] = uint_to_hex_lower8 ((c0 >> 24) & 255) << 0
| uint_to_hex_lower8 ((c0 >> 16) & 255) << 16;
_w1[1] = uint_to_hex_lower8 ((c0 >> 8) & 255) << 0
| uint_to_hex_lower8 ((c0 >> 0) & 255) << 16;
_w1[2] = uint_to_hex_lower8 ((d0 >> 24) & 255) << 0
| uint_to_hex_lower8 ((d0 >> 16) & 255) << 16;
_w1[3] = uint_to_hex_lower8 ((d0 >> 8) & 255) << 0
| uint_to_hex_lower8 ((d0 >> 0) & 255) << 16;
_w2[0] = uint_to_hex_lower8 ((e0 >> 24) & 255) << 0
| uint_to_hex_lower8 ((e0 >> 16) & 255) << 16;
_w2[1] = uint_to_hex_lower8 ((e0 >> 8) & 255) << 0
| uint_to_hex_lower8 ((e0 >> 0) & 255) << 16;
_w2[2] = 0;
_w2[3] = 0;
_w3[0] = 0;
_w3[1] = 0;
_w3[2] = 0;
_w3[3] = 0;
md5_update_vector_64 (&ctx, _w0, _w1, _w2, _w3, 40);
md5_final_vector (&ctx);
const u32x r0 = ctx.h[DGST_R0];
const u32x r1 = ctx.h[DGST_R1];
const u32x r2 = ctx.h[DGST_R2];
const u32x r3 = ctx.h[DGST_R3];
COMPARE_M_SIMD (r0, r1, r2, r3);
}
}
KERNEL_FQ void m20900_sxx (KERN_ATTR_VECTOR ())
{
/**
* modifier
*/
const u64 gid = get_global_id (0);
const u64 lid = get_local_id (0);
const u64 lsz = get_local_size (0);
/**
* bin2asc table
*/
LOCAL_VK u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;
l_bin2asc[i] = ((i0 < 10) ? '0' + i0 : 'a' - 10 + i0) << 8
| ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0;
}
SYNC_THREADS ();
if (gid >= gid_max) return;
/**
* digest
*/
const u32 search[4] =
{
digests_buf[digests_offset].digest_buf[DGST_R0],
digests_buf[digests_offset].digest_buf[DGST_R1],
digests_buf[digests_offset].digest_buf[DGST_R2],
digests_buf[digests_offset].digest_buf[DGST_R3]
};
/**
* base
*/
const u32 pw_len = pws[gid].pw_len;
u32x w[64] = { 0 };
for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1)
{
w[idx] = pws[gid].i[idx];
}
/**
* loop
*/
u32x _w0[4];
u32x _w1[4];
u32x _w2[4];
u32x _w3[4];
u32x w0l = w[0];
for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE)
{
const u32x w0r = words_buf_r[il_pos / VECT_SIZE];
const u32x w0 = w0l | w0r;
w[0] = w0;
sha1_ctx_vector_t ctx0;
sha1_init_vector (&ctx0);
sha1_update_vector_swap (&ctx0, w, pw_len);
sha1_final_vector (&ctx0);
const u32x a0 = ctx0.h[0];
const u32x b0 = ctx0.h[1];
const u32x c0 = ctx0.h[2];
const u32x d0 = ctx0.h[3];
const u32x e0 = ctx0.h[4];
md5_ctx_vector_t ctx1;
md5_init_vector (&ctx1);
md5_update_vector (&ctx1, w, pw_len);
md5_final_vector (&ctx1);
const u32x a1 = hc_swap32 (ctx1.h[0]);
const u32x b1 = hc_swap32 (ctx1.h[1]);
const u32x c1 = hc_swap32 (ctx1.h[2]);
const u32x d1 = hc_swap32 (ctx1.h[3]);
md5_ctx_vector_t ctx;
md5_init_vector (&ctx);
_w0[0] = uint_to_hex_lower8 ((a0 >> 24) & 255) << 0
| uint_to_hex_lower8 ((a0 >> 16) & 255) << 16;
_w0[1] = uint_to_hex_lower8 ((a0 >> 8) & 255) << 0
| uint_to_hex_lower8 ((a0 >> 0) & 255) << 16;
_w0[2] = uint_to_hex_lower8 ((b0 >> 24) & 255) << 0
| uint_to_hex_lower8 ((b0 >> 16) & 255) << 16;
_w0[3] = uint_to_hex_lower8 ((b0 >> 8) & 255) << 0
| uint_to_hex_lower8 ((b0 >> 0) & 255) << 16;
_w1[0] = uint_to_hex_lower8 ((c0 >> 24) & 255) << 0
| uint_to_hex_lower8 ((c0 >> 16) & 255) << 16;
_w1[1] = uint_to_hex_lower8 ((c0 >> 8) & 255) << 0
| uint_to_hex_lower8 ((c0 >> 0) & 255) << 16;
_w1[2] = uint_to_hex_lower8 ((d0 >> 24) & 255) << 0
| uint_to_hex_lower8 ((d0 >> 16) & 255) << 16;
_w1[3] = uint_to_hex_lower8 ((d0 >> 8) & 255) << 0
| uint_to_hex_lower8 ((d0 >> 0) & 255) << 16;
_w2[0] = uint_to_hex_lower8 ((e0 >> 24) & 255) << 0
| uint_to_hex_lower8 ((e0 >> 16) & 255) << 16;
_w2[1] = uint_to_hex_lower8 ((e0 >> 8) & 255) << 0
| uint_to_hex_lower8 ((e0 >> 0) & 255) << 16;
_w2[2] = 0;
_w2[3] = 0;
_w3[0] = 0;
_w3[1] = 0;
_w3[2] = 0;
_w3[3] = 0;
md5_update_vector_64 (&ctx, _w0, _w1, _w2, _w3, 40);
_w0[0] = uint_to_hex_lower8 ((a1 >> 24) & 255) << 0
| uint_to_hex_lower8 ((a1 >> 16) & 255) << 16;
_w0[1] = uint_to_hex_lower8 ((a1 >> 8) & 255) << 0
| uint_to_hex_lower8 ((a1 >> 0) & 255) << 16;
_w0[2] = uint_to_hex_lower8 ((b1 >> 24) & 255) << 0
| uint_to_hex_lower8 ((b1 >> 16) & 255) << 16;
_w0[3] = uint_to_hex_lower8 ((b1 >> 8) & 255) << 0
| uint_to_hex_lower8 ((b1 >> 0) & 255) << 16;
_w1[0] = uint_to_hex_lower8 ((c1 >> 24) & 255) << 0
| uint_to_hex_lower8 ((c1 >> 16) & 255) << 16;
_w1[1] = uint_to_hex_lower8 ((c1 >> 8) & 255) << 0
| uint_to_hex_lower8 ((c1 >> 0) & 255) << 16;
_w1[2] = uint_to_hex_lower8 ((d1 >> 24) & 255) << 0
| uint_to_hex_lower8 ((d1 >> 16) & 255) << 16;
_w1[3] = uint_to_hex_lower8 ((d1 >> 8) & 255) << 0
| uint_to_hex_lower8 ((d1 >> 0) & 255) << 16;
_w2[0] = 0;
_w2[1] = 0;
_w2[2] = 0;
_w2[3] = 0;
_w3[0] = 0;
_w3[1] = 0;
_w3[2] = 0;
_w3[3] = 0;
md5_update_vector_64 (&ctx, _w0, _w1, _w2, _w3, 32);
_w0[0] = uint_to_hex_lower8 ((a0 >> 24) & 255) << 0
| uint_to_hex_lower8 ((a0 >> 16) & 255) << 16;
_w0[1] = uint_to_hex_lower8 ((a0 >> 8) & 255) << 0
| uint_to_hex_lower8 ((a0 >> 0) & 255) << 16;
_w0[2] = uint_to_hex_lower8 ((b0 >> 24) & 255) << 0
| uint_to_hex_lower8 ((b0 >> 16) & 255) << 16;
_w0[3] = uint_to_hex_lower8 ((b0 >> 8) & 255) << 0
| uint_to_hex_lower8 ((b0 >> 0) & 255) << 16;
_w1[0] = uint_to_hex_lower8 ((c0 >> 24) & 255) << 0
| uint_to_hex_lower8 ((c0 >> 16) & 255) << 16;
_w1[1] = uint_to_hex_lower8 ((c0 >> 8) & 255) << 0
| uint_to_hex_lower8 ((c0 >> 0) & 255) << 16;
_w1[2] = uint_to_hex_lower8 ((d0 >> 24) & 255) << 0
| uint_to_hex_lower8 ((d0 >> 16) & 255) << 16;
_w1[3] = uint_to_hex_lower8 ((d0 >> 8) & 255) << 0
| uint_to_hex_lower8 ((d0 >> 0) & 255) << 16;
_w2[0] = uint_to_hex_lower8 ((e0 >> 24) & 255) << 0
| uint_to_hex_lower8 ((e0 >> 16) & 255) << 16;
_w2[1] = uint_to_hex_lower8 ((e0 >> 8) & 255) << 0
| uint_to_hex_lower8 ((e0 >> 0) & 255) << 16;
_w2[2] = 0;
_w2[3] = 0;
_w3[0] = 0;
_w3[1] = 0;
_w3[2] = 0;
_w3[3] = 0;
md5_update_vector_64 (&ctx, _w0, _w1, _w2, _w3, 40);
md5_final_vector (&ctx);
const u32x r0 = ctx.h[DGST_R0];
const u32x r1 = ctx.h[DGST_R1];
const u32x r2 = ctx.h[DGST_R2];
const u32x r3 = ctx.h[DGST_R3];
COMPARE_S_SIMD (r0, r1, r2, r3);
}
}

@ -0,0 +1,436 @@
/**
* Author......: See docs/credits.txt
* License.....: MIT
*/
#define NEW_SIMD_CODE
#ifdef KERNEL_STATIC
#include "inc_vendor.h"
#include "inc_types.h"
#include "inc_platform.cl"
#include "inc_common.cl"
#include "inc_rp_optimized.h"
#include "inc_rp_optimized.cl"
#include "inc_simd.cl"
#include "inc_hash_sha512.cl"
#endif
DECLSPEC void sha512_transform_opt (const u32x *w0, const u32x *w1, const u32x *w2, const u32x *w3, u64x *digest)
{
u32x wend = 0x80000000;
u32x wlen = 64*8;
u64x w0_t = hl32_to_64 (w0[0], w0[1]);
u64x w1_t = hl32_to_64 (w0[2], w0[3]);
u64x w2_t = hl32_to_64 (w1[0], w1[1]);
u64x w3_t = hl32_to_64 (w1[2], w1[3]);
u64x w4_t = hl32_to_64 (w2[0], w2[1]);
u64x w5_t = hl32_to_64 (w2[2], w2[3]);
u64x w6_t = hl32_to_64 (w3[0], w3[1]);
u64x w7_t = hl32_to_64 (w3[2], w3[3]);
u64x w8_t = hl32_to_64 (wend, 0);
u64x w9_t = 0;
u64x wa_t = 0;
u64x wb_t = 0;
u64x wc_t = 0;
u64x wd_t = 0;
u64x we_t = 0;
u64x wf_t = hl32_to_64 (0, wlen);
u64x a = digest[0];
u64x b = digest[1];
u64x c = digest[2];
u64x d = digest[3];
u64x e = digest[4];
u64x f = digest[5];
u64x g = digest[6];
u64x h = digest[7];
#define ROUND_EXPAND() \
{ \
w0_t = SHA512_EXPAND (we_t, w9_t, w1_t, w0_t); \
w1_t = SHA512_EXPAND (wf_t, wa_t, w2_t, w1_t); \
w2_t = SHA512_EXPAND (w0_t, wb_t, w3_t, w2_t); \
w3_t = SHA512_EXPAND (w1_t, wc_t, w4_t, w3_t); \
w4_t = SHA512_EXPAND (w2_t, wd_t, w5_t, w4_t); \
w5_t = SHA512_EXPAND (w3_t, we_t, w6_t, w5_t); \
w6_t = SHA512_EXPAND (w4_t, wf_t, w7_t, w6_t); \
w7_t = SHA512_EXPAND (w5_t, w0_t, w8_t, w7_t); \
w8_t = SHA512_EXPAND (w6_t, w1_t, w9_t, w8_t); \
w9_t = SHA512_EXPAND (w7_t, w2_t, wa_t, w9_t); \
wa_t = SHA512_EXPAND (w8_t, w3_t, wb_t, wa_t); \
wb_t = SHA512_EXPAND (w9_t, w4_t, wc_t, wb_t); \
wc_t = SHA512_EXPAND (wa_t, w5_t, wd_t, wc_t); \
wd_t = SHA512_EXPAND (wb_t, w6_t, we_t, wd_t); \
we_t = SHA512_EXPAND (wc_t, w7_t, wf_t, we_t); \
wf_t = SHA512_EXPAND (wd_t, w8_t, w0_t, wf_t); \
}
#define ROUND_STEP(i) \
{ \
SHA512_STEP (SHA512_F0o, SHA512_F1o, a, b, c, d, e, f, g, h, w0_t, k_sha512[i + 0]); \
SHA512_STEP (SHA512_F0o, SHA512_F1o, h, a, b, c, d, e, f, g, w1_t, k_sha512[i + 1]); \
SHA512_STEP (SHA512_F0o, SHA512_F1o, g, h, a, b, c, d, e, f, w2_t, k_sha512[i + 2]); \
SHA512_STEP (SHA512_F0o, SHA512_F1o, f, g, h, a, b, c, d, e, w3_t, k_sha512[i + 3]); \
SHA512_STEP (SHA512_F0o, SHA512_F1o, e, f, g, h, a, b, c, d, w4_t, k_sha512[i + 4]); \
SHA512_STEP (SHA512_F0o, SHA512_F1o, d, e, f, g, h, a, b, c, w5_t, k_sha512[i + 5]); \
SHA512_STEP (SHA512_F0o, SHA512_F1o, c, d, e, f, g, h, a, b, w6_t, k_sha512[i + 6]); \
SHA512_STEP (SHA512_F0o, SHA512_F1o, b, c, d, e, f, g, h, a, w7_t, k_sha512[i + 7]); \
SHA512_STEP (SHA512_F0o, SHA512_F1o, a, b, c, d, e, f, g, h, w8_t, k_sha512[i + 8]); \
SHA512_STEP (SHA512_F0o, SHA512_F1o, h, a, b, c, d, e, f, g, w9_t, k_sha512[i + 9]); \
SHA512_STEP (SHA512_F0o, SHA512_F1o, g, h, a, b, c, d, e, f, wa_t, k_sha512[i + 10]); \
SHA512_STEP (SHA512_F0o, SHA512_F1o, f, g, h, a, b, c, d, e, wb_t, k_sha512[i + 11]); \
SHA512_STEP (SHA512_F0o, SHA512_F1o, e, f, g, h, a, b, c, d, wc_t, k_sha512[i + 12]); \
SHA512_STEP (SHA512_F0o, SHA512_F1o, d, e, f, g, h, a, b, c, wd_t, k_sha512[i + 13]); \
SHA512_STEP (SHA512_F0o, SHA512_F1o, c, d, e, f, g, h, a, b, we_t, k_sha512[i + 14]); \
SHA512_STEP (SHA512_F0o, SHA512_F1o, b, c, d, e, f, g, h, a, wf_t, k_sha512[i + 15]); \
}
ROUND_STEP (0);
#ifdef IS_CUDA
ROUND_EXPAND (); ROUND_STEP (16);
ROUND_EXPAND (); ROUND_STEP (32);
ROUND_EXPAND (); ROUND_STEP (48);
ROUND_EXPAND (); ROUND_STEP (64);
#else
#ifdef _unroll
#pragma unroll
#endif
for (int i = 16; i < 80; i += 16)
{
ROUND_EXPAND (); ROUND_STEP (i);
}
#endif
/* rev
digest[0] += a;
digest[1] += b;
digest[2] += c;
digest[3] += d;
digest[4] += e;
digest[5] += f;
digest[6] += g;
digest[7] += h;
*/
digest[0] = a;
digest[1] = b;
digest[2] = c;
digest[3] = d;
digest[4] = e;
digest[5] = f;
digest[6] = g;
digest[7] = h;
}
KERNEL_FQ void m21000_m04 (KERN_ATTR_RULES ())
{
/**
* modifier
*/
const u64 lid = get_local_id (0);
/**
* base
*/
const u64 gid = get_global_id (0);
if (gid >= gid_max) return;
u32 pw_buf0[4];
u32 pw_buf1[4];
pw_buf0[0] = pws[gid].i[0];
pw_buf0[1] = pws[gid].i[1];
pw_buf0[2] = pws[gid].i[2];
pw_buf0[3] = pws[gid].i[3];
pw_buf1[0] = pws[gid].i[4];
pw_buf1[1] = pws[gid].i[5];
pw_buf1[2] = pws[gid].i[6];
pw_buf1[3] = pws[gid].i[7];
const u32 pw_len = pws[gid].pw_len & 63;
/**
* loop
*/
for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE)
{
u32x w0[4] = { 0 };
u32x w1[4] = { 0 };
u32x w2[4] = { 0 };
u32x w3[4] = { 0 };
const u32x out_len = apply_rules_vect_optimized (pw_buf0, pw_buf1, pw_len, rules_buf, il_pos, w0, w1);
append_0x80_2x4_VV (w0, w1, out_len);
/**
* sha512
*/
u32x w0_t[4];
u32x w1_t[4];
u32x w2_t[4];
u32x w3_t[4];
u32x w4_t[4];
u32x w5_t[4];
u32x w6_t[4];
u32x w7_t[4];
w0_t[0] = hc_swap32 (w0[0]);
w0_t[1] = hc_swap32 (w0[1]);
w0_t[2] = hc_swap32 (w0[2]);
w0_t[3] = hc_swap32 (w0[3]);
w1_t[0] = hc_swap32 (w1[0]);
w1_t[1] = hc_swap32 (w1[1]);
w1_t[2] = hc_swap32 (w1[2]);
w1_t[3] = hc_swap32 (w1[3]);
w2_t[0] = hc_swap32 (w2[0]);
w2_t[1] = hc_swap32 (w2[1]);
w2_t[2] = hc_swap32 (w2[2]);
w2_t[3] = hc_swap32 (w2[3]);
w3_t[0] = hc_swap32 (w3[0]);
w3_t[1] = hc_swap32 (w3[1]);
w3_t[2] = 0;
w3_t[3] = 0;
w4_t[0] = 0;
w4_t[1] = 0;
w4_t[2] = 0;
w4_t[3] = 0;
w5_t[0] = 0;
w5_t[1] = 0;
w5_t[2] = 0;
w5_t[3] = 0;
w6_t[0] = 0;
w6_t[1] = 0;
w6_t[2] = 0;
w6_t[3] = 0;
w7_t[0] = 0;
w7_t[1] = 0;
w7_t[2] = 0;
w7_t[3] = out_len * 8;
u64x digest[8];
digest[0] = SHA512M_A;
digest[1] = SHA512M_B;
digest[2] = SHA512M_C;
digest[3] = SHA512M_D;
digest[4] = SHA512M_E;
digest[5] = SHA512M_F;
digest[6] = SHA512M_G;
digest[7] = SHA512M_H;
sha512_transform_vector (w0_t, w1_t, w2_t, w3_t, w4_t, w5_t, w6_t, w7_t, digest);
w0_t[0] = h32_from_64 (digest[0]);
w0_t[1] = l32_from_64 (digest[0]);
w0_t[2] = h32_from_64 (digest[1]);
w0_t[3] = l32_from_64 (digest[1]);
w1_t[0] = h32_from_64 (digest[2]);
w1_t[1] = l32_from_64 (digest[2]);
w1_t[2] = h32_from_64 (digest[3]);
w1_t[3] = l32_from_64 (digest[3]);
w2_t[0] = h32_from_64 (digest[4]);
w2_t[1] = l32_from_64 (digest[4]);
w2_t[2] = h32_from_64 (digest[5]);
w2_t[3] = l32_from_64 (digest[5]);
w3_t[0] = h32_from_64 (digest[6]);
w3_t[1] = l32_from_64 (digest[6]);
w3_t[2] = h32_from_64 (digest[7]);
w3_t[3] = l32_from_64 (digest[7]);
digest[0] = SHA512M_A;
digest[1] = SHA512M_B;
digest[2] = SHA512M_C;
digest[3] = SHA512M_D;
digest[4] = SHA512M_E;
digest[5] = SHA512M_F;
digest[6] = SHA512M_G;
digest[7] = SHA512M_H;
sha512_transform_opt (w0_t, w1_t, w2_t, w3_t, digest);
const u32x r0 = l32_from_64 (digest[7]);
const u32x r1 = h32_from_64 (digest[7]);
const u32x r2 = l32_from_64 (digest[3]);
const u32x r3 = h32_from_64 (digest[3]);
COMPARE_M_SIMD (r0, r1, r2, r3);
}
}
KERNEL_FQ void m21000_m08 (KERN_ATTR_RULES ())
{
}
KERNEL_FQ void m21000_m16 (KERN_ATTR_RULES ())
{
}
KERNEL_FQ void m21000_s04 (KERN_ATTR_RULES ())
{
/**
* modifier
*/
const u64 lid = get_local_id (0);
/**
* base
*/
const u64 gid = get_global_id (0);
if (gid >= gid_max) return;
u32 pw_buf0[4];
u32 pw_buf1[4];
pw_buf0[0] = pws[gid].i[0];
pw_buf0[1] = pws[gid].i[1];
pw_buf0[2] = pws[gid].i[2];
pw_buf0[3] = pws[gid].i[3];
pw_buf1[0] = pws[gid].i[4];
pw_buf1[1] = pws[gid].i[5];
pw_buf1[2] = pws[gid].i[6];
pw_buf1[3] = pws[gid].i[7];
const u32 pw_len = pws[gid].pw_len & 63;
/**
* digest
*/
const u32 search[4] =
{
digests_buf[digests_offset].digest_buf[DGST_R0],
digests_buf[digests_offset].digest_buf[DGST_R1],
digests_buf[digests_offset].digest_buf[DGST_R2],
digests_buf[digests_offset].digest_buf[DGST_R3]
};
/**
* loop
*/
for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE)
{
u32x w0[4] = { 0 };
u32x w1[4] = { 0 };
u32x w2[4] = { 0 };
u32x w3[4] = { 0 };
const u32x out_len = apply_rules_vect_optimized (pw_buf0, pw_buf1, pw_len, rules_buf, il_pos, w0, w1);
append_0x80_2x4_VV (w0, w1, out_len);
/**
* sha512
*/
u32x w0_t[4];
u32x w1_t[4];
u32x w2_t[4];
u32x w3_t[4];
u32x w4_t[4];
u32x w5_t[4];
u32x w6_t[4];
u32x w7_t[4];
w0_t[0] = hc_swap32 (w0[0]);
w0_t[1] = hc_swap32 (w0[1]);
w0_t[2] = hc_swap32 (w0[2]);
w0_t[3] = hc_swap32 (w0[3]);
w1_t[0] = hc_swap32 (w1[0]);
w1_t[1] = hc_swap32 (w1[1]);
w1_t[2] = hc_swap32 (w1[2]);
w1_t[3] = hc_swap32 (w1[3]);
w2_t[0] = hc_swap32 (w2[0]);
w2_t[1] = hc_swap32 (w2[1]);
w2_t[2] = hc_swap32 (w2[2]);
w2_t[3] = hc_swap32 (w2[3]);
w3_t[0] = hc_swap32 (w3[0]);
w3_t[1] = hc_swap32 (w3[1]);
w3_t[2] = 0;
w3_t[3] = 0;
w4_t[0] = 0;
w4_t[1] = 0;
w4_t[2] = 0;
w4_t[3] = 0;
w5_t[0] = 0;
w5_t[1] = 0;
w5_t[2] = 0;
w5_t[3] = 0;
w6_t[0] = 0;
w6_t[1] = 0;
w6_t[2] = 0;
w6_t[3] = 0;
w7_t[0] = 0;
w7_t[1] = 0;
w7_t[2] = 0;
w7_t[3] = out_len * 8;
u64x digest[8];
digest[0] = SHA512M_A;
digest[1] = SHA512M_B;
digest[2] = SHA512M_C;
digest[3] = SHA512M_D;
digest[4] = SHA512M_E;
digest[5] = SHA512M_F;
digest[6] = SHA512M_G;
digest[7] = SHA512M_H;
sha512_transform_vector (w0_t, w1_t, w2_t, w3_t, w4_t, w5_t, w6_t, w7_t, digest);
w0_t[0] = h32_from_64 (digest[0]);
w0_t[1] = l32_from_64 (digest[0]);
w0_t[2] = h32_from_64 (digest[1]);
w0_t[3] = l32_from_64 (digest[1]);
w1_t[0] = h32_from_64 (digest[2]);
w1_t[1] = l32_from_64 (digest[2]);
w1_t[2] = h32_from_64 (digest[3]);
w1_t[3] = l32_from_64 (digest[3]);
w2_t[0] = h32_from_64 (digest[4]);
w2_t[1] = l32_from_64 (digest[4]);
w2_t[2] = h32_from_64 (digest[5]);
w2_t[3] = l32_from_64 (digest[5]);
w3_t[0] = h32_from_64 (digest[6]);
w3_t[1] = l32_from_64 (digest[6]);
w3_t[2] = h32_from_64 (digest[7]);
w3_t[3] = l32_from_64 (digest[7]);
digest[0] = SHA512M_A;
digest[1] = SHA512M_B;
digest[2] = SHA512M_C;
digest[3] = SHA512M_D;
digest[4] = SHA512M_E;
digest[5] = SHA512M_F;
digest[6] = SHA512M_G;
digest[7] = SHA512M_H;
sha512_transform_opt (w0_t, w1_t, w2_t, w3_t, digest);
const u32x r0 = l32_from_64 (digest[7]);
const u32x r1 = h32_from_64 (digest[7]);
const u32x r2 = l32_from_64 (digest[3]);
const u32x r3 = h32_from_64 (digest[3]);
COMPARE_S_SIMD (r0, r1, r2, r3);
}
}
KERNEL_FQ void m21000_s08 (KERN_ATTR_RULES ())
{
}
KERNEL_FQ void m21000_s16 (KERN_ATTR_RULES ())
{
}

@ -0,0 +1,171 @@
/**
* Author......: See docs/credits.txt
* License.....: MIT
*/
//#define NEW_SIMD_CODE
#ifdef KERNEL_STATIC
#include "inc_vendor.h"
#include "inc_types.h"
#include "inc_platform.cl"
#include "inc_common.cl"
#include "inc_rp.h"
#include "inc_rp.cl"
#include "inc_scalar.cl"
#include "inc_hash_sha512.cl"
#endif
KERNEL_FQ void m21000_mxx (KERN_ATTR_RULES ())
{
/**
* modifier
*/
const u64 lid = get_local_id (0);
const u64 gid = get_global_id (0);
if (gid >= gid_max) return;
/**
* base
*/
COPY_PW (pws[gid]);
/**
* loop
*/
for (u32 il_pos = 0; il_pos < il_cnt; il_pos++)
{
pw_t tmp = PASTE_PW;
tmp.pw_len = apply_rules (rules_buf[il_pos].cmds, tmp.i, tmp.pw_len);
sha512_ctx_t ctx0;
sha512_init (&ctx0);
sha512_update_swap (&ctx0, tmp.i, tmp.pw_len);
sha512_final (&ctx0);
sha512_ctx_t ctx;
sha512_init (&ctx);
u32 final[32] = { 0 };
final[ 0] = h32_from_64_S (ctx0.h[0]);
final[ 1] = l32_from_64_S (ctx0.h[0]);
final[ 2] = h32_from_64_S (ctx0.h[1]);
final[ 3] = l32_from_64_S (ctx0.h[1]);
final[ 4] = h32_from_64_S (ctx0.h[2]);
final[ 5] = l32_from_64_S (ctx0.h[2]);
final[ 6] = h32_from_64_S (ctx0.h[3]);
final[ 7] = l32_from_64_S (ctx0.h[3]);
final[ 8] = h32_from_64_S (ctx0.h[4]);
final[ 9] = l32_from_64_S (ctx0.h[4]);
final[10] = h32_from_64_S (ctx0.h[5]);
final[11] = l32_from_64_S (ctx0.h[5]);
final[12] = h32_from_64_S (ctx0.h[6]);
final[13] = l32_from_64_S (ctx0.h[6]);
final[14] = h32_from_64_S (ctx0.h[7]);
final[15] = l32_from_64_S (ctx0.h[7]);
sha512_update (&ctx, final, 64);
sha512_final (&ctx);
const u32 r0 = l32_from_64_S (ctx.h[7]);
const u32 r1 = h32_from_64_S (ctx.h[7]);
const u32 r2 = l32_from_64_S (ctx.h[3]);
const u32 r3 = h32_from_64_S (ctx.h[3]);
COMPARE_M_SCALAR (r0, r1, r2, r3);
}
}
KERNEL_FQ void m21000_sxx (KERN_ATTR_RULES ())
{
/**
* modifier
*/
const u64 lid = get_local_id (0);
const u64 gid = get_global_id (0);
if (gid >= gid_max) return;
/**
* digest
*/
const u32 search[4] =
{
digests_buf[digests_offset].digest_buf[DGST_R0],
digests_buf[digests_offset].digest_buf[DGST_R1],
digests_buf[digests_offset].digest_buf[DGST_R2],
digests_buf[digests_offset].digest_buf[DGST_R3]
};
/**
* base
*/
COPY_PW (pws[gid]);
/**
* loop
*/
for (u32 il_pos = 0; il_pos < il_cnt; il_pos++)
{
pw_t tmp = PASTE_PW;
tmp.pw_len = apply_rules (rules_buf[il_pos].cmds, tmp.i, tmp.pw_len);
sha512_ctx_t ctx0;
sha512_init (&ctx0);
sha512_update_swap (&ctx0, tmp.i, tmp.pw_len);
sha512_final (&ctx0);
sha512_ctx_t ctx;
sha512_init (&ctx);
u32 final[32] = { 0 };
final[ 0] = h32_from_64_S (ctx0.h[0]);
final[ 1] = l32_from_64_S (ctx0.h[0]);
final[ 2] = h32_from_64_S (ctx0.h[1]);
final[ 3] = l32_from_64_S (ctx0.h[1]);
final[ 4] = h32_from_64_S (ctx0.h[2]);
final[ 5] = l32_from_64_S (ctx0.h[2]);
final[ 6] = h32_from_64_S (ctx0.h[3]);
final[ 7] = l32_from_64_S (ctx0.h[3]);
final[ 8] = h32_from_64_S (ctx0.h[4]);
final[ 9] = l32_from_64_S (ctx0.h[4]);
final[10] = h32_from_64_S (ctx0.h[5]);
final[11] = l32_from_64_S (ctx0.h[5]);
final[12] = h32_from_64_S (ctx0.h[6]);
final[13] = l32_from_64_S (ctx0.h[6]);
final[14] = h32_from_64_S (ctx0.h[7]);
final[15] = l32_from_64_S (ctx0.h[7]);
sha512_update (&ctx, final, 64);
sha512_final (&ctx);
const u32 r0 = l32_from_64_S (ctx.h[7]);
const u32 r1 = h32_from_64_S (ctx.h[7]);
const u32 r2 = l32_from_64_S (ctx.h[3]);
const u32 r3 = h32_from_64_S (ctx.h[3]);
COMPARE_S_SCALAR (r0, r1, r2, r3);
}
}

@ -0,0 +1,605 @@
/**
* Author......: See docs/credits.txt
* License.....: MIT
*/
#define NEW_SIMD_CODE
#ifdef KERNEL_STATIC
#include "inc_vendor.h"
#include "inc_types.h"
#include "inc_platform.cl"
#include "inc_common.cl"
#include "inc_simd.cl"
#include "inc_hash_sha512.cl"
#endif
DECLSPEC void sha512_transform_full (const u32x *w0, const u32x *w1, const u32x *w2, const u32x *w3, u64x *digest)
{
u64x w0_t = hl32_to_64 (w0[0], w0[1]);
u64x w1_t = hl32_to_64 (w0[2], w0[3]);
u64x w2_t = hl32_to_64 (w1[0], w1[1]);
u64x w3_t = hl32_to_64 (w1[2], w1[3]);
u64x w4_t = hl32_to_64 (w2[0], w2[1]);
u64x w5_t = hl32_to_64 (w2[2], w2[3]);
u64x w6_t = hl32_to_64 (w3[0], w3[1]);
u64x w7_t = 0;
u64x w8_t = 0;
u64x w9_t = 0;
u64x wa_t = 0;
u64x wb_t = 0;
u64x wc_t = 0;
u64x wd_t = 0;
u64x we_t = 0;
u64x wf_t = hl32_to_64 (w3[2], w3[3]);
u64x a = digest[0];
u64x b = digest[1];
u64x c = digest[2];
u64x d = digest[3];
u64x e = digest[4];
u64x f = digest[5];
u64x g = digest[6];
u64x h = digest[7];
#define ROUND_EXPAND() \
{ \
w0_t = SHA512_EXPAND (we_t, w9_t, w1_t, w0_t); \
w1_t = SHA512_EXPAND (wf_t, wa_t, w2_t, w1_t); \
w2_t = SHA512_EXPAND (w0_t, wb_t, w3_t, w2_t); \
w3_t = SHA512_EXPAND (w1_t, wc_t, w4_t, w3_t); \
w4_t = SHA512_EXPAND (w2_t, wd_t, w5_t, w4_t); \
w5_t = SHA512_EXPAND (w3_t, we_t, w6_t, w5_t); \
w6_t = SHA512_EXPAND (w4_t, wf_t, w7_t, w6_t); \
w7_t = SHA512_EXPAND (w5_t, w0_t, w8_t, w7_t); \
w8_t = SHA512_EXPAND (w6_t, w1_t, w9_t, w8_t); \
w9_t = SHA512_EXPAND (w7_t, w2_t, wa_t, w9_t); \
wa_t = SHA512_EXPAND (w8_t, w3_t, wb_t, wa_t); \
wb_t = SHA512_EXPAND (w9_t, w4_t, wc_t, wb_t); \
wc_t = SHA512_EXPAND (wa_t, w5_t, wd_t, wc_t); \
wd_t = SHA512_EXPAND (wb_t, w6_t, we_t, wd_t); \
we_t = SHA512_EXPAND (wc_t, w7_t, wf_t, we_t); \
wf_t = SHA512_EXPAND (wd_t, w8_t, w0_t, wf_t); \
}
#define ROUND_STEP(i) \
{ \
SHA512_STEP (SHA512_F0o, SHA512_F1o, a, b, c, d, e, f, g, h, w0_t, k_sha512[i + 0]); \
SHA512_STEP (SHA512_F0o, SHA512_F1o, h, a, b, c, d, e, f, g, w1_t, k_sha512[i + 1]); \
SHA512_STEP (SHA512_F0o, SHA512_F1o, g, h, a, b, c, d, e, f, w2_t, k_sha512[i + 2]); \
SHA512_STEP (SHA512_F0o, SHA512_F1o, f, g, h, a, b, c, d, e, w3_t, k_sha512[i + 3]); \
SHA512_STEP (SHA512_F0o, SHA512_F1o, e, f, g, h, a, b, c, d, w4_t, k_sha512[i + 4]); \
SHA512_STEP (SHA512_F0o, SHA512_F1o, d, e, f, g, h, a, b, c, w5_t, k_sha512[i + 5]); \
SHA512_STEP (SHA512_F0o, SHA512_F1o, c, d, e, f, g, h, a, b, w6_t, k_sha512[i + 6]); \
SHA512_STEP (SHA512_F0o, SHA512_F1o, b, c, d, e, f, g, h, a, w7_t, k_sha512[i + 7]); \
SHA512_STEP (SHA512_F0o, SHA512_F1o, a, b, c, d, e, f, g, h, w8_t, k_sha512[i + 8]); \
SHA512_STEP (SHA512_F0o, SHA512_F1o, h, a, b, c, d, e, f, g, w9_t, k_sha512[i + 9]); \
SHA512_STEP (SHA512_F0o, SHA512_F1o, g, h, a, b, c, d, e, f, wa_t, k_sha512[i + 10]); \
SHA512_STEP (SHA512_F0o, SHA512_F1o, f, g, h, a, b, c, d, e, wb_t, k_sha512[i + 11]); \
SHA512_STEP (SHA512_F0o, SHA512_F1o, e, f, g, h, a, b, c, d, wc_t, k_sha512[i + 12]); \
SHA512_STEP (SHA512_F0o, SHA512_F1o, d, e, f, g, h, a, b, c, wd_t, k_sha512[i + 13]); \
SHA512_STEP (SHA512_F0o, SHA512_F1o, c, d, e, f, g, h, a, b, we_t, k_sha512[i + 14]); \
SHA512_STEP (SHA512_F0o, SHA512_F1o, b, c, d, e, f, g, h, a, wf_t, k_sha512[i + 15]); \
}
ROUND_STEP (0);
#ifdef IS_CUDA
ROUND_EXPAND (); ROUND_STEP (16);
ROUND_EXPAND (); ROUND_STEP (32);
ROUND_EXPAND (); ROUND_STEP (48);
ROUND_EXPAND (); ROUND_STEP (64);
#else
#ifdef _unroll
#pragma unroll
#endif
for (int i = 16; i < 80; i += 16)
{
ROUND_EXPAND (); ROUND_STEP (i);
}
#endif
digest[0] += a;
digest[1] += b;
digest[2] += c;
digest[3] += d;
digest[4] += e;
digest[5] += f;
digest[6] += g;
digest[7] += h;
}
DECLSPEC void sha512_transform_opt (const u32x *w0, const u32x *w1, const u32x *w2, const u32x *w3, u64x *digest)
{
u32x wend = 0x80000000;
u32x wlen = 64*8;
u64x w0_t = hl32_to_64 (w0[0], w0[1]);
u64x w1_t = hl32_to_64 (w0[2], w0[3]);
u64x w2_t = hl32_to_64 (w1[0], w1[1]);
u64x w3_t = hl32_to_64 (w1[2], w1[3]);
u64x w4_t = hl32_to_64 (w2[0], w2[1]);
u64x w5_t = hl32_to_64 (w2[2], w2[3]);
u64x w6_t = hl32_to_64 (w3[0], w3[1]);
u64x w7_t = hl32_to_64 (w3[2], w3[3]);
u64x w8_t = hl32_to_64 (wend, 0);;
u64x w9_t = 0;
u64x wa_t = 0;
u64x wb_t = 0;
u64x wc_t = 0;
u64x wd_t = 0;
u64x we_t = 0;
u64x wf_t = hl32_to_64 (0, wlen);
u64x a = digest[0];
u64x b = digest[1];
u64x c = digest[2];
u64x d = digest[3];
u64x e = digest[4];
u64x f = digest[5];
u64x g = digest[6];
u64x h = digest[7];
#define ROUND_EXPAND() \
{ \
w0_t = SHA512_EXPAND (we_t, w9_t, w1_t, w0_t); \
w1_t = SHA512_EXPAND (wf_t, wa_t, w2_t, w1_t); \
w2_t = SHA512_EXPAND (w0_t, wb_t, w3_t, w2_t); \
w3_t = SHA512_EXPAND (w1_t, wc_t, w4_t, w3_t); \
w4_t = SHA512_EXPAND (w2_t, wd_t, w5_t, w4_t); \
w5_t = SHA512_EXPAND (w3_t, we_t, w6_t, w5_t); \
w6_t = SHA512_EXPAND (w4_t, wf_t, w7_t, w6_t); \
w7_t = SHA512_EXPAND (w5_t, w0_t, w8_t, w7_t); \
w8_t = SHA512_EXPAND (w6_t, w1_t, w9_t, w8_t); \
w9_t = SHA512_EXPAND (w7_t, w2_t, wa_t, w9_t); \
wa_t = SHA512_EXPAND (w8_t, w3_t, wb_t, wa_t); \
wb_t = SHA512_EXPAND (w9_t, w4_t, wc_t, wb_t); \
wc_t = SHA512_EXPAND (wa_t, w5_t, wd_t, wc_t); \
wd_t = SHA512_EXPAND (wb_t, w6_t, we_t, wd_t); \
we_t = SHA512_EXPAND (wc_t, w7_t, wf_t, we_t); \
wf_t = SHA512_EXPAND (wd_t, w8_t, w0_t, wf_t); \
}
#define ROUND_STEP(i) \
{ \
SHA512_STEP (SHA512_F0o, SHA512_F1o, a, b, c, d, e, f, g, h, w0_t, k_sha512[i + 0]); \
SHA512_STEP (SHA512_F0o, SHA512_F1o, h, a, b, c, d, e, f, g, w1_t, k_sha512[i + 1]); \
SHA512_STEP (SHA512_F0o, SHA512_F1o, g, h, a, b, c, d, e, f, w2_t, k_sha512[i + 2]); \
SHA512_STEP (SHA512_F0o, SHA512_F1o, f, g, h, a, b, c, d, e, w3_t, k_sha512[i + 3]); \
SHA512_STEP (SHA512_F0o, SHA512_F1o, e, f, g, h, a, b, c, d, w4_t, k_sha512[i + 4]); \
SHA512_STEP (SHA512_F0o, SHA512_F1o, d, e, f, g, h, a, b, c, w5_t, k_sha512[i + 5]); \
SHA512_STEP (SHA512_F0o, SHA512_F1o, c, d, e, f, g, h, a, b, w6_t, k_sha512[i + 6]); \
SHA512_STEP (SHA512_F0o, SHA512_F1o, b, c, d, e, f, g, h, a, w7_t, k_sha512[i + 7]); \
SHA512_STEP (SHA512_F0o, SHA512_F1o, a, b, c, d, e, f, g, h, w8_t, k_sha512[i + 8]); \
SHA512_STEP (SHA512_F0o, SHA512_F1o, h, a, b, c, d, e, f, g, w9_t, k_sha512[i + 9]); \
SHA512_STEP (SHA512_F0o, SHA512_F1o, g, h, a, b, c, d, e, f, wa_t, k_sha512[i + 10]); \
SHA512_STEP (SHA512_F0o, SHA512_F1o, f, g, h, a, b, c, d, e, wb_t, k_sha512[i + 11]); \
SHA512_STEP (SHA512_F0o, SHA512_F1o, e, f, g, h, a, b, c, d, wc_t, k_sha512[i + 12]); \
SHA512_STEP (SHA512_F0o, SHA512_F1o, d, e, f, g, h, a, b, c, wd_t, k_sha512[i + 13]); \
SHA512_STEP (SHA512_F0o, SHA512_F1o, c, d, e, f, g, h, a, b, we_t, k_sha512[i + 14]); \
SHA512_STEP (SHA512_F0o, SHA512_F1o, b, c, d, e, f, g, h, a, wf_t, k_sha512[i + 15]); \
}
ROUND_STEP (0);
#ifdef IS_CUDA
ROUND_EXPAND (); ROUND_STEP (16);
ROUND_EXPAND (); ROUND_STEP (32);
ROUND_EXPAND (); ROUND_STEP (48);
ROUND_EXPAND (); ROUND_STEP (64);
#else
#ifdef _unroll
#pragma unroll
#endif
for (int i = 16; i < 80; i += 16)
{
ROUND_EXPAND (); ROUND_STEP (i);
}
#endif
/* rev
digest[0] += a;
digest[1] += b;
digest[2] += c;
digest[3] += d;
digest[4] += e;
digest[5] += f;
digest[6] += g;
digest[7] += h;
*/
digest[0] = a;
digest[1] = b;
digest[2] = c;
digest[3] = d;
digest[4] = e;
digest[5] = f;
digest[6] = g;
digest[7] = h;
}
KERNEL_FQ void m21000_m04 (KERN_ATTR_BASIC ())
{
/**
* modifier
*/
const u64 lid = get_local_id (0);
/**
* base
*/
const u64 gid = get_global_id (0);
if (gid >= gid_max) return;
u32 pw_buf0[4];
u32 pw_buf1[4];
pw_buf0[0] = pws[gid].i[0];
pw_buf0[1] = pws[gid].i[1];
pw_buf0[2] = pws[gid].i[2];
pw_buf0[3] = pws[gid].i[3];
pw_buf1[0] = pws[gid].i[4];
pw_buf1[1] = pws[gid].i[5];
pw_buf1[2] = pws[gid].i[6];
pw_buf1[3] = pws[gid].i[7];
const u32 pw_l_len = pws[gid].pw_len & 63;
/**
* loop
*/
for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE)
{
const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos) & 63;
const u32x pw_len = (pw_l_len + pw_r_len) & 63;
/**
* concat password candidate
*/
u32x wordl0[4] = { 0 };
u32x wordl1[4] = { 0 };
u32x wordl2[4] = { 0 };
u32x wordl3[4] = { 0 };
wordl0[0] = pw_buf0[0];
wordl0[1] = pw_buf0[1];
wordl0[2] = pw_buf0[2];
wordl0[3] = pw_buf0[3];
wordl1[0] = pw_buf1[0];
wordl1[1] = pw_buf1[1];
wordl1[2] = pw_buf1[2];
wordl1[3] = pw_buf1[3];
u32x wordr0[4] = { 0 };
u32x wordr1[4] = { 0 };
u32x wordr2[4] = { 0 };
u32x wordr3[4] = { 0 };
wordr0[0] = ix_create_combt (combs_buf, il_pos, 0);
wordr0[1] = ix_create_combt (combs_buf, il_pos, 1);
wordr0[2] = ix_create_combt (combs_buf, il_pos, 2);
wordr0[3] = ix_create_combt (combs_buf, il_pos, 3);
wordr1[0] = ix_create_combt (combs_buf, il_pos, 4);
wordr1[1] = ix_create_combt (combs_buf, il_pos, 5);
wordr1[2] = ix_create_combt (combs_buf, il_pos, 6);
wordr1[3] = ix_create_combt (combs_buf, il_pos, 7);
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
{
switch_buffer_by_offset_le_VV (wordr0, wordr1, wordr2, wordr3, pw_l_len);
}
else
{
switch_buffer_by_offset_le_VV (wordl0, wordl1, wordl2, wordl3, pw_r_len);
}
u32x w0[4];
u32x w1[4];
u32x w2[4];
u32x w3[4];
w0[0] = wordl0[0] | wordr0[0];
w0[1] = wordl0[1] | wordr0[1];
w0[2] = wordl0[2] | wordr0[2];
w0[3] = wordl0[3] | wordr0[3];
w1[0] = wordl1[0] | wordr1[0];
w1[1] = wordl1[1] | wordr1[1];
w1[2] = wordl1[2] | wordr1[2];
w1[3] = wordl1[3] | wordr1[3];
w2[0] = wordl2[0] | wordr2[0];
w2[1] = wordl2[1] | wordr2[1];
w2[2] = wordl2[2] | wordr2[2];
w2[3] = wordl2[3] | wordr2[3];
w3[0] = wordl3[0] | wordr3[0];
w3[1] = wordl3[1] | wordr3[1];
w3[2] = wordl3[2] | wordr3[2];
w3[3] = wordl3[3] | wordr3[3];
/**
* sha512
*/
u32x w0_t[4];
u32x w1_t[4];
u32x w2_t[4];
u32x w3_t[4];
w0_t[0] = hc_swap32 (w0[0]);
w0_t[1] = hc_swap32 (w0[1]);
w0_t[2] = hc_swap32 (w0[2]);
w0_t[3] = hc_swap32 (w0[3]);
w1_t[0] = hc_swap32 (w1[0]);
w1_t[1] = hc_swap32 (w1[1]);
w1_t[2] = hc_swap32 (w1[2]);
w1_t[3] = hc_swap32 (w1[3]);
w2_t[0] = hc_swap32 (w2[0]);
w2_t[1] = hc_swap32 (w2[1]);
w2_t[2] = hc_swap32 (w2[2]);
w2_t[3] = hc_swap32 (w2[3]);
w3_t[0] = hc_swap32 (w3[0]);
w3_t[1] = hc_swap32 (w3[1]);
w3_t[2] = 0;
w3_t[3] = pw_len * 8;
u64x digest[8];
digest[0] = SHA512M_A;
digest[1] = SHA512M_B;
digest[2] = SHA512M_C;
digest[3] = SHA512M_D;
digest[4] = SHA512M_E;
digest[5] = SHA512M_F;
digest[6] = SHA512M_G;
digest[7] = SHA512M_H;
sha512_transform_full (w0_t, w1_t, w2_t, w3_t, digest);
w0_t[0] = h32_from_64 (digest[0]);
w0_t[1] = l32_from_64 (digest[0]);
w0_t[2] = h32_from_64 (digest[1]);
w0_t[3] = l32_from_64 (digest[1]);
w1_t[0] = h32_from_64 (digest[2]);
w1_t[1] = l32_from_64 (digest[2]);
w1_t[2] = h32_from_64 (digest[3]);
w1_t[3] = l32_from_64 (digest[3]);
w2_t[0] = h32_from_64 (digest[4]);
w2_t[1] = l32_from_64 (digest[4]);
w2_t[2] = h32_from_64 (digest[5]);
w2_t[3] = l32_from_64 (digest[5]);
w3_t[0] = h32_from_64 (digest[6]);
w3_t[1] = l32_from_64 (digest[6]);
w3_t[2] = h32_from_64 (digest[7]);
w3_t[3] = l32_from_64 (digest[7]);
digest[0] = SHA512M_A;
digest[1] = SHA512M_B;
digest[2] = SHA512M_C;
digest[3] = SHA512M_D;
digest[4] = SHA512M_E;
digest[5] = SHA512M_F;
digest[6] = SHA512M_G;
digest[7] = SHA512M_H;
sha512_transform_opt (w0_t, w1_t, w2_t, w3_t, digest);
const u32x r0 = l32_from_64 (digest[7]);
const u32x r1 = h32_from_64 (digest[7]);
const u32x r2 = l32_from_64 (digest[3]);
const u32x r3 = h32_from_64 (digest[3]);
COMPARE_M_SIMD (r0, r1, r2, r3);
}
}
KERNEL_FQ void m21000_m08 (KERN_ATTR_BASIC ())
{
}
KERNEL_FQ void m21000_m16 (KERN_ATTR_BASIC ())
{
}
KERNEL_FQ void m21000_s04 (KERN_ATTR_BASIC ())
{
/**
* modifier
*/
const u64 lid = get_local_id (0);
/**
* base
*/
const u64 gid = get_global_id (0);
if (gid >= gid_max) return;
u32 pw_buf0[4];
u32 pw_buf1[4];
pw_buf0[0] = pws[gid].i[0];
pw_buf0[1] = pws[gid].i[1];
pw_buf0[2] = pws[gid].i[2];
pw_buf0[3] = pws[gid].i[3];
pw_buf1[0] = pws[gid].i[4];
pw_buf1[1] = pws[gid].i[5];
pw_buf1[2] = pws[gid].i[6];
pw_buf1[3] = pws[gid].i[7];
const u32 pw_l_len = pws[gid].pw_len & 63;
/**
* digest
*/
const u32 search[4] =
{
digests_buf[digests_offset].digest_buf[DGST_R0],
digests_buf[digests_offset].digest_buf[DGST_R1],
digests_buf[digests_offset].digest_buf[DGST_R2],
digests_buf[digests_offset].digest_buf[DGST_R3]
};
/**
* loop
*/
for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE)
{
const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos) & 63;
const u32x pw_len = (pw_l_len + pw_r_len) & 63;
/**
* concat password candidate
*/
u32x wordl0[4] = { 0 };
u32x wordl1[4] = { 0 };
u32x wordl2[4] = { 0 };
u32x wordl3[4] = { 0 };
wordl0[0] = pw_buf0[0];
wordl0[1] = pw_buf0[1];
wordl0[2] = pw_buf0[2];
wordl0[3] = pw_buf0[3];
wordl1[0] = pw_buf1[0];
wordl1[1] = pw_buf1[1];
wordl1[2] = pw_buf1[2];
wordl1[3] = pw_buf1[3];
u32x wordr0[4] = { 0 };
u32x wordr1[4] = { 0 };
u32x wordr2[4] = { 0 };
u32x wordr3[4] = { 0 };
wordr0[0] = ix_create_combt (combs_buf, il_pos, 0);
wordr0[1] = ix_create_combt (combs_buf, il_pos, 1);
wordr0[2] = ix_create_combt (combs_buf, il_pos, 2);
wordr0[3] = ix_create_combt (combs_buf, il_pos, 3);
wordr1[0] = ix_create_combt (combs_buf, il_pos, 4);
wordr1[1] = ix_create_combt (combs_buf, il_pos, 5);
wordr1[2] = ix_create_combt (combs_buf, il_pos, 6);
wordr1[3] = ix_create_combt (combs_buf, il_pos, 7);
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
{
switch_buffer_by_offset_le_VV (wordr0, wordr1, wordr2, wordr3, pw_l_len);
}
else
{
switch_buffer_by_offset_le_VV (wordl0, wordl1, wordl2, wordl3, pw_r_len);
}
u32x w0[4];
u32x w1[4];
u32x w2[4];
u32x w3[4];
w0[0] = wordl0[0] | wordr0[0];
w0[1] = wordl0[1] | wordr0[1];
w0[2] = wordl0[2] | wordr0[2];
w0[3] = wordl0[3] | wordr0[3];
w1[0] = wordl1[0] | wordr1[0];
w1[1] = wordl1[1] | wordr1[1];
w1[2] = wordl1[2] | wordr1[2];
w1[3] = wordl1[3] | wordr1[3];
w2[0] = wordl2[0] | wordr2[0];
w2[1] = wordl2[1] | wordr2[1];
w2[2] = wordl2[2] | wordr2[2];
w2[3] = wordl2[3] | wordr2[3];
w3[0] = wordl3[0] | wordr3[0];
w3[1] = wordl3[1] | wordr3[1];
w3[2] = wordl3[2] | wordr3[2];
w3[3] = wordl3[3] | wordr3[3];
/**
* sha512
*/
u32x w0_t[4];
u32x w1_t[4];
u32x w2_t[4];
u32x w3_t[4];
w0_t[0] = hc_swap32 (w0[0]);
w0_t[1] = hc_swap32 (w0[1]);
w0_t[2] = hc_swap32 (w0[2]);
w0_t[3] = hc_swap32 (w0[3]);
w1_t[0] = hc_swap32 (w1[0]);
w1_t[1] = hc_swap32 (w1[1]);
w1_t[2] = hc_swap32 (w1[2]);
w1_t[3] = hc_swap32 (w1[3]);
w2_t[0] = hc_swap32 (w2[0]);
w2_t[1] = hc_swap32 (w2[1]);
w2_t[2] = hc_swap32 (w2[2]);
w2_t[3] = hc_swap32 (w2[3]);
w3_t[0] = hc_swap32 (w3[0]);
w3_t[1] = hc_swap32 (w3[1]);
w3_t[2] = 0;
w3_t[3] = pw_len * 8;
u64x digest[8];
digest[0] = SHA512M_A;
digest[1] = SHA512M_B;
digest[2] = SHA512M_C;
digest[3] = SHA512M_D;
digest[4] = SHA512M_E;
digest[5] = SHA512M_F;
digest[6] = SHA512M_G;
digest[7] = SHA512M_H;
sha512_transform_full (w0_t, w1_t, w2_t, w3_t, digest);
w0_t[0] = h32_from_64 (digest[0]);
w0_t[1] = l32_from_64 (digest[0]);
w0_t[2] = h32_from_64 (digest[1]);
w0_t[3] = l32_from_64 (digest[1]);
w1_t[0] = h32_from_64 (digest[2]);
w1_t[1] = l32_from_64 (digest[2]);
w1_t[2] = h32_from_64 (digest[3]);
w1_t[3] = l32_from_64 (digest[3]);
w2_t[0] = h32_from_64 (digest[4]);
w2_t[1] = l32_from_64 (digest[4]);
w2_t[2] = h32_from_64 (digest[5]);
w2_t[3] = l32_from_64 (digest[5]);
w3_t[0] = h32_from_64 (digest[6]);
w3_t[1] = l32_from_64 (digest[6]);
w3_t[2] = h32_from_64 (digest[7]);
w3_t[3] = l32_from_64 (digest[7]);
digest[0] = SHA512M_A;
digest[1] = SHA512M_B;
digest[2] = SHA512M_C;
digest[3] = SHA512M_D;
digest[4] = SHA512M_E;
digest[5] = SHA512M_F;
digest[6] = SHA512M_G;
digest[7] = SHA512M_H;
sha512_transform_opt (w0_t, w1_t, w2_t, w3_t, digest);
const u32x r0 = l32_from_64 (digest[7]);
const u32x r1 = h32_from_64 (digest[7]);
const u32x r2 = l32_from_64 (digest[3]);
const u32x r3 = h32_from_64 (digest[3]);
COMPARE_S_SIMD (r0, r1, r2, r3);
}
}
KERNEL_FQ void m21000_s08 (KERN_ATTR_BASIC ())
{
}
KERNEL_FQ void m21000_s16 (KERN_ATTR_BASIC ())
{
}

@ -0,0 +1,165 @@
/**
* Author......: See docs/credits.txt
* License.....: MIT
*/
//#define NEW_SIMD_CODE
#ifdef KERNEL_STATIC
#include "inc_vendor.h"
#include "inc_types.h"
#include "inc_platform.cl"
#include "inc_common.cl"
#include "inc_scalar.cl"
#include "inc_hash_sha512.cl"
#endif
KERNEL_FQ void m21000_mxx (KERN_ATTR_BASIC ())
{
/**
* modifier
*/
const u64 lid = get_local_id (0);
const u64 gid = get_global_id (0);
if (gid >= gid_max) return;
/**
* base
*/
sha512_ctx_t ctx00;
sha512_init (&ctx00);
sha512_update_global_swap (&ctx00, pws[gid].i, pws[gid].pw_len);
/**
* loop
*/
for (u32 il_pos = 0; il_pos < il_cnt; il_pos++)
{
sha512_ctx_t ctx0 = ctx00;
sha512_update_global_swap (&ctx0, combs_buf[il_pos].i, combs_buf[il_pos].pw_len);
sha512_final (&ctx0);
sha512_ctx_t ctx;
sha512_init (&ctx);
u32 final[32] = { 0 };
final[ 0] = h32_from_64_S (ctx0.h[0]);
final[ 1] = l32_from_64_S (ctx0.h[0]);
final[ 2] = h32_from_64_S (ctx0.h[1]);
final[ 3] = l32_from_64_S (ctx0.h[1]);
final[ 4] = h32_from_64_S (ctx0.h[2]);
final[ 5] = l32_from_64_S (ctx0.h[2]);
final[ 6] = h32_from_64_S (ctx0.h[3]);
final[ 7] = l32_from_64_S (ctx0.h[3]);
final[ 8] = h32_from_64_S (ctx0.h[4]);
final[ 9] = l32_from_64_S (ctx0.h[4]);
final[10] = h32_from_64_S (ctx0.h[5]);
final[11] = l32_from_64_S (ctx0.h[5]);
final[12] = h32_from_64_S (ctx0.h[6]);
final[13] = l32_from_64_S (ctx0.h[6]);
final[14] = h32_from_64_S (ctx0.h[7]);
final[15] = l32_from_64_S (ctx0.h[7]);
sha512_update (&ctx, final, 64);
sha512_final (&ctx);
const u32 r0 = l32_from_64_S (ctx.h[7]);
const u32 r1 = h32_from_64_S (ctx.h[7]);
const u32 r2 = l32_from_64_S (ctx.h[3]);
const u32 r3 = h32_from_64_S (ctx.h[3]);
COMPARE_M_SCALAR (r0, r1, r2, r3);
}
}
KERNEL_FQ void m21000_sxx (KERN_ATTR_BASIC ())
{
/**
* modifier
*/
const u64 lid = get_local_id (0);
const u64 gid = get_global_id (0);
if (gid >= gid_max) return;
/**
* digest
*/
const u32 search[4] =
{
digests_buf[digests_offset].digest_buf[DGST_R0],
digests_buf[digests_offset].digest_buf[DGST_R1],
digests_buf[digests_offset].digest_buf[DGST_R2],
digests_buf[digests_offset].digest_buf[DGST_R3]
};
/**
* base
*/
sha512_ctx_t ctx00;
sha512_init (&ctx00);
sha512_update_global_swap (&ctx00, pws[gid].i, pws[gid].pw_len);
/**
* loop
*/
for (u32 il_pos = 0; il_pos < il_cnt; il_pos++)
{
sha512_ctx_t ctx0 = ctx00;
sha512_update_global_swap (&ctx0, combs_buf[il_pos].i, combs_buf[il_pos].pw_len);
sha512_final (&ctx0);
sha512_ctx_t ctx;
sha512_init (&ctx);
u32 final[32] = { 0 };
final[ 0] = h32_from_64_S (ctx0.h[0]);
final[ 1] = l32_from_64_S (ctx0.h[0]);
final[ 2] = h32_from_64_S (ctx0.h[1]);
final[ 3] = l32_from_64_S (ctx0.h[1]);
final[ 4] = h32_from_64_S (ctx0.h[2]);
final[ 5] = l32_from_64_S (ctx0.h[2]);
final[ 6] = h32_from_64_S (ctx0.h[3]);
final[ 7] = l32_from_64_S (ctx0.h[3]);
final[ 8] = h32_from_64_S (ctx0.h[4]);
final[ 9] = l32_from_64_S (ctx0.h[4]);
final[10] = h32_from_64_S (ctx0.h[5]);
final[11] = l32_from_64_S (ctx0.h[5]);
final[12] = h32_from_64_S (ctx0.h[6]);
final[13] = l32_from_64_S (ctx0.h[6]);
final[14] = h32_from_64_S (ctx0.h[7]);
final[15] = l32_from_64_S (ctx0.h[7]);
sha512_update (&ctx, final, 64);
sha512_final (&ctx);
const u32 r0 = l32_from_64_S (ctx.h[7]);
const u32 r1 = h32_from_64_S (ctx.h[7]);
const u32 r2 = l32_from_64_S (ctx.h[3]);
const u32 r3 = h32_from_64_S (ctx.h[3]);
COMPARE_S_SCALAR (r0, r1, r2, r3);
}
}

@ -0,0 +1,645 @@
/**
* Author......: See docs/credits.txt
* License.....: MIT
*/
#define NEW_SIMD_CODE
#ifdef KERNEL_STATIC
#include "inc_vendor.h"
#include "inc_types.h"
#include "inc_platform.cl"
#include "inc_common.cl"
#include "inc_simd.cl"
#include "inc_hash_sha512.cl"
#endif
DECLSPEC void sha512_transform_full (const u32x *w0, const u32x *w1, const u32x *w2, const u32x *w3, u64x *digest)
{
u64x w0_t = hl32_to_64 (w0[0], w0[1]);
u64x w1_t = hl32_to_64 (w0[2], w0[3]);
u64x w2_t = hl32_to_64 (w1[0], w1[1]);
u64x w3_t = hl32_to_64 (w1[2], w1[3]);
u64x w4_t = hl32_to_64 (w2[0], w2[1]);
u64x w5_t = hl32_to_64 (w2[2], w2[3]);
u64x w6_t = hl32_to_64 (w3[0], w3[1]);
u64x w7_t = 0;
u64x w8_t = 0;
u64x w9_t = 0;
u64x wa_t = 0;
u64x wb_t = 0;
u64x wc_t = 0;
u64x wd_t = 0;
u64x we_t = 0;
u64x wf_t = hl32_to_64 (w3[2], w3[3]);
u64x a = digest[0];
u64x b = digest[1];
u64x c = digest[2];
u64x d = digest[3];
u64x e = digest[4];
u64x f = digest[5];
u64x g = digest[6];
u64x h = digest[7];
#define ROUND_EXPAND() \
{ \
w0_t = SHA512_EXPAND (we_t, w9_t, w1_t, w0_t); \
w1_t = SHA512_EXPAND (wf_t, wa_t, w2_t, w1_t); \
w2_t = SHA512_EXPAND (w0_t, wb_t, w3_t, w2_t); \
w3_t = SHA512_EXPAND (w1_t, wc_t, w4_t, w3_t); \
w4_t = SHA512_EXPAND (w2_t, wd_t, w5_t, w4_t); \
w5_t = SHA512_EXPAND (w3_t, we_t, w6_t, w5_t); \
w6_t = SHA512_EXPAND (w4_t, wf_t, w7_t, w6_t); \
w7_t = SHA512_EXPAND (w5_t, w0_t, w8_t, w7_t); \
w8_t = SHA512_EXPAND (w6_t, w1_t, w9_t, w8_t); \
w9_t = SHA512_EXPAND (w7_t, w2_t, wa_t, w9_t); \
wa_t = SHA512_EXPAND (w8_t, w3_t, wb_t, wa_t); \
wb_t = SHA512_EXPAND (w9_t, w4_t, wc_t, wb_t); \
wc_t = SHA512_EXPAND (wa_t, w5_t, wd_t, wc_t); \
wd_t = SHA512_EXPAND (wb_t, w6_t, we_t, wd_t); \
we_t = SHA512_EXPAND (wc_t, w7_t, wf_t, we_t); \
wf_t = SHA512_EXPAND (wd_t, w8_t, w0_t, wf_t); \
}
#define ROUND_STEP(i) \
{ \
SHA512_STEP (SHA512_F0o, SHA512_F1o, a, b, c, d, e, f, g, h, w0_t, k_sha512[i + 0]); \
SHA512_STEP (SHA512_F0o, SHA512_F1o, h, a, b, c, d, e, f, g, w1_t, k_sha512[i + 1]); \
SHA512_STEP (SHA512_F0o, SHA512_F1o, g, h, a, b, c, d, e, f, w2_t, k_sha512[i + 2]); \
SHA512_STEP (SHA512_F0o, SHA512_F1o, f, g, h, a, b, c, d, e, w3_t, k_sha512[i + 3]); \
SHA512_STEP (SHA512_F0o, SHA512_F1o, e, f, g, h, a, b, c, d, w4_t, k_sha512[i + 4]); \
SHA512_STEP (SHA512_F0o, SHA512_F1o, d, e, f, g, h, a, b, c, w5_t, k_sha512[i + 5]); \
SHA512_STEP (SHA512_F0o, SHA512_F1o, c, d, e, f, g, h, a, b, w6_t, k_sha512[i + 6]); \
SHA512_STEP (SHA512_F0o, SHA512_F1o, b, c, d, e, f, g, h, a, w7_t, k_sha512[i + 7]); \
SHA512_STEP (SHA512_F0o, SHA512_F1o, a, b, c, d, e, f, g, h, w8_t, k_sha512[i + 8]); \
SHA512_STEP (SHA512_F0o, SHA512_F1o, h, a, b, c, d, e, f, g, w9_t, k_sha512[i + 9]); \
SHA512_STEP (SHA512_F0o, SHA512_F1o, g, h, a, b, c, d, e, f, wa_t, k_sha512[i + 10]); \
SHA512_STEP (SHA512_F0o, SHA512_F1o, f, g, h, a, b, c, d, e, wb_t, k_sha512[i + 11]); \
SHA512_STEP (SHA512_F0o, SHA512_F1o, e, f, g, h, a, b, c, d, wc_t, k_sha512[i + 12]); \
SHA512_STEP (SHA512_F0o, SHA512_F1o, d, e, f, g, h, a, b, c, wd_t, k_sha512[i + 13]); \
SHA512_STEP (SHA512_F0o, SHA512_F1o, c, d, e, f, g, h, a, b, we_t, k_sha512[i + 14]); \
SHA512_STEP (SHA512_F0o, SHA512_F1o, b, c, d, e, f, g, h, a, wf_t, k_sha512[i + 15]); \
}
ROUND_STEP (0);
#ifdef IS_CUDA
ROUND_EXPAND (); ROUND_STEP (16);
ROUND_EXPAND (); ROUND_STEP (32);
ROUND_EXPAND (); ROUND_STEP (48);
ROUND_EXPAND (); ROUND_STEP (64);
#else
#ifdef _unroll
#pragma unroll
#endif
for (int i = 16; i < 80; i += 16)
{
ROUND_EXPAND (); ROUND_STEP (i);
}
#endif
digest[0] += a;
digest[1] += b;
digest[2] += c;
digest[3] += d;
digest[4] += e;
digest[5] += f;
digest[6] += g;
digest[7] += h;
}
DECLSPEC void sha512_transform_opt (const u32x *w0, const u32x *w1, const u32x *w2, const u32x *w3, u64x *digest)
{
u32x wend = 0x80000000;
u32x wlen = 64*8;
u64x w0_t = hl32_to_64 (w0[0], w0[1]);
u64x w1_t = hl32_to_64 (w0[2], w0[3]);
u64x w2_t = hl32_to_64 (w1[0], w1[1]);
u64x w3_t = hl32_to_64 (w1[2], w1[3]);
u64x w4_t = hl32_to_64 (w2[0], w2[1]);
u64x w5_t = hl32_to_64 (w2[2], w2[3]);
u64x w6_t = hl32_to_64 (w3[0], w3[1]);
u64x w7_t = hl32_to_64 (w3[2], w3[3]);
u64x w8_t = hl32_to_64 (wend, 0);;
u64x w9_t = 0;
u64x wa_t = 0;
u64x wb_t = 0;
u64x wc_t = 0;
u64x wd_t = 0;
u64x we_t = 0;
u64x wf_t = hl32_to_64 (0, wlen);
u64x a = digest[0];
u64x b = digest[1];
u64x c = digest[2];
u64x d = digest[3];
u64x e = digest[4];
u64x f = digest[5];
u64x g = digest[6];
u64x h = digest[7];
#define ROUND_EXPAND() \
{ \
w0_t = SHA512_EXPAND (we_t, w9_t, w1_t, w0_t); \
w1_t = SHA512_EXPAND (wf_t, wa_t, w2_t, w1_t); \
w2_t = SHA512_EXPAND (w0_t, wb_t, w3_t, w2_t); \
w3_t = SHA512_EXPAND (w1_t, wc_t, w4_t, w3_t); \
w4_t = SHA512_EXPAND (w2_t, wd_t, w5_t, w4_t); \
w5_t = SHA512_EXPAND (w3_t, we_t, w6_t, w5_t); \
w6_t = SHA512_EXPAND (w4_t, wf_t, w7_t, w6_t); \
w7_t = SHA512_EXPAND (w5_t, w0_t, w8_t, w7_t); \
w8_t = SHA512_EXPAND (w6_t, w1_t, w9_t, w8_t); \
w9_t = SHA512_EXPAND (w7_t, w2_t, wa_t, w9_t); \
wa_t = SHA512_EXPAND (w8_t, w3_t, wb_t, wa_t); \
wb_t = SHA512_EXPAND (w9_t, w4_t, wc_t, wb_t); \
wc_t = SHA512_EXPAND (wa_t, w5_t, wd_t, wc_t); \
wd_t = SHA512_EXPAND (wb_t, w6_t, we_t, wd_t); \
we_t = SHA512_EXPAND (wc_t, w7_t, wf_t, we_t); \
wf_t = SHA512_EXPAND (wd_t, w8_t, w0_t, wf_t); \
}
#define ROUND_STEP(i) \
{ \
SHA512_STEP (SHA512_F0o, SHA512_F1o, a, b, c, d, e, f, g, h, w0_t, k_sha512[i + 0]); \
SHA512_STEP (SHA512_F0o, SHA512_F1o, h, a, b, c, d, e, f, g, w1_t, k_sha512[i + 1]); \
SHA512_STEP (SHA512_F0o, SHA512_F1o, g, h, a, b, c, d, e, f, w2_t, k_sha512[i + 2]); \
SHA512_STEP (SHA512_F0o, SHA512_F1o, f, g, h, a, b, c, d, e, w3_t, k_sha512[i + 3]); \
SHA512_STEP (SHA512_F0o, SHA512_F1o, e, f, g, h, a, b, c, d, w4_t, k_sha512[i + 4]); \
SHA512_STEP (SHA512_F0o, SHA512_F1o, d, e, f, g, h, a, b, c, w5_t, k_sha512[i + 5]); \
SHA512_STEP (SHA512_F0o, SHA512_F1o, c, d, e, f, g, h, a, b, w6_t, k_sha512[i + 6]); \
SHA512_STEP (SHA512_F0o, SHA512_F1o, b, c, d, e, f, g, h, a, w7_t, k_sha512[i + 7]); \
SHA512_STEP (SHA512_F0o, SHA512_F1o, a, b, c, d, e, f, g, h, w8_t, k_sha512[i + 8]); \
SHA512_STEP (SHA512_F0o, SHA512_F1o, h, a, b, c, d, e, f, g, w9_t, k_sha512[i + 9]); \
SHA512_STEP (SHA512_F0o, SHA512_F1o, g, h, a, b, c, d, e, f, wa_t, k_sha512[i + 10]); \
SHA512_STEP (SHA512_F0o, SHA512_F1o, f, g, h, a, b, c, d, e, wb_t, k_sha512[i + 11]); \
SHA512_STEP (SHA512_F0o, SHA512_F1o, e, f, g, h, a, b, c, d, wc_t, k_sha512[i + 12]); \
SHA512_STEP (SHA512_F0o, SHA512_F1o, d, e, f, g, h, a, b, c, wd_t, k_sha512[i + 13]); \
SHA512_STEP (SHA512_F0o, SHA512_F1o, c, d, e, f, g, h, a, b, we_t, k_sha512[i + 14]); \
SHA512_STEP (SHA512_F0o, SHA512_F1o, b, c, d, e, f, g, h, a, wf_t, k_sha512[i + 15]); \
}
ROUND_STEP (0);
#ifdef IS_CUDA
ROUND_EXPAND (); ROUND_STEP (16);
ROUND_EXPAND (); ROUND_STEP (32);
ROUND_EXPAND (); ROUND_STEP (48);
ROUND_EXPAND (); ROUND_STEP (64);
#else
#ifdef _unroll
#pragma unroll
#endif
for (int i = 16; i < 80; i += 16)
{
ROUND_EXPAND (); ROUND_STEP (i);
}
#endif
/* rev
digest[0] += a;
digest[1] += b;
digest[2] += c;
digest[3] += d;
digest[4] += e;
digest[5] += f;
digest[6] += g;
digest[7] += h;
*/
digest[0] = a;
digest[1] = b;
digest[2] = c;
digest[3] = d;
digest[4] = e;
digest[5] = f;
digest[6] = g;
digest[7] = h;
}
DECLSPEC void m21000m (u32 *w, const u32 pw_len, KERN_ATTR_VECTOR ())
{
/**
* modifier
*/
const u64 gid = get_global_id (0);
const u64 lid = get_local_id (0);
/**
* loop
*/
u32 w0l = w[0];
for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE)
{
const u32x w0r = words_buf_r[il_pos / VECT_SIZE];
const u32x w0 = w0l | w0r;
u32x w0_t[4];
u32x w1_t[4];
u32x w2_t[4];
u32x w3_t[4];
w0_t[0] = w0;
w0_t[1] = w[ 1];
w0_t[2] = w[ 2];
w0_t[3] = w[ 3];
w1_t[0] = w[ 4];
w1_t[1] = w[ 5];
w1_t[2] = w[ 6];
w1_t[3] = w[ 7];
w2_t[0] = w[ 8];
w2_t[1] = w[ 9];
w2_t[2] = w[10];
w2_t[3] = w[11];
w3_t[0] = w[12];
w3_t[1] = w[13];
w3_t[2] = w[14];
w3_t[3] = w[15];
u64x digest[8];
digest[0] = SHA512M_A;
digest[1] = SHA512M_B;
digest[2] = SHA512M_C;
digest[3] = SHA512M_D;
digest[4] = SHA512M_E;
digest[5] = SHA512M_F;
digest[6] = SHA512M_G;
digest[7] = SHA512M_H;
sha512_transform_full (w0_t, w1_t, w2_t, w3_t, digest);
w0_t[0] = h32_from_64 (digest[0]);
w0_t[1] = l32_from_64 (digest[0]);
w0_t[2] = h32_from_64 (digest[1]);
w0_t[3] = l32_from_64 (digest[1]);
w1_t[0] = h32_from_64 (digest[2]);
w1_t[1] = l32_from_64 (digest[2]);
w1_t[2] = h32_from_64 (digest[3]);
w1_t[3] = l32_from_64 (digest[3]);
w2_t[0] = h32_from_64 (digest[4]);
w2_t[1] = l32_from_64 (digest[4]);
w2_t[2] = h32_from_64 (digest[5]);
w2_t[3] = l32_from_64 (digest[5]);
w3_t[0] = h32_from_64 (digest[6]);
w3_t[1] = l32_from_64 (digest[6]);
w3_t[2] = h32_from_64 (digest[7]);
w3_t[3] = l32_from_64 (digest[7]);
digest[0] = SHA512M_A;
digest[1] = SHA512M_B;
digest[2] = SHA512M_C;
digest[3] = SHA512M_D;
digest[4] = SHA512M_E;
digest[5] = SHA512M_F;
digest[6] = SHA512M_G;
digest[7] = SHA512M_H;
sha512_transform_opt (w0_t, w1_t, w2_t, w3_t, digest);
const u32x r0 = l32_from_64 (digest[7]);
const u32x r1 = h32_from_64 (digest[7]);
const u32x r2 = l32_from_64 (digest[3]);
const u32x r3 = h32_from_64 (digest[3]);
COMPARE_M_SIMD (r0, r1, r2, r3);
}
}
DECLSPEC void m21000s (u32 *w, const u32 pw_len, KERN_ATTR_VECTOR ())
{
/**
* modifier
*/
const u64 gid = get_global_id (0);
const u64 lid = get_local_id (0);
/**
* digest
*/
const u32 search[4] =
{
digests_buf[digests_offset].digest_buf[DGST_R0],
digests_buf[digests_offset].digest_buf[DGST_R1],
digests_buf[digests_offset].digest_buf[DGST_R2],
digests_buf[digests_offset].digest_buf[DGST_R3]
};
/**
* loop
*/
u32 w0l = w[0];
for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE)
{
const u32x w0r = words_buf_r[il_pos / VECT_SIZE];
const u32x w0 = w0l | w0r;
u32x w0_t[4];
u32x w1_t[4];
u32x w2_t[4];
u32x w3_t[4];
w0_t[0] = w0;
w0_t[1] = w[ 1];
w0_t[2] = w[ 2];
w0_t[3] = w[ 3];
w1_t[0] = w[ 4];
w1_t[1] = w[ 5];
w1_t[2] = w[ 6];
w1_t[3] = w[ 7];
w2_t[0] = w[ 8];
w2_t[1] = w[ 9];
w2_t[2] = w[10];
w2_t[3] = w[11];
w3_t[0] = w[12];
w3_t[1] = w[13];
w3_t[2] = w[14];
w3_t[3] = w[15];
u64x digest[8];
digest[0] = SHA512M_A;
digest[1] = SHA512M_B;
digest[2] = SHA512M_C;
digest[3] = SHA512M_D;
digest[4] = SHA512M_E;
digest[5] = SHA512M_F;
digest[6] = SHA512M_G;
digest[7] = SHA512M_H;
sha512_transform_full (w0_t, w1_t, w2_t, w3_t, digest);
w0_t[0] = h32_from_64 (digest[0]);
w0_t[1] = l32_from_64 (digest[0]);
w0_t[2] = h32_from_64 (digest[1]);
w0_t[3] = l32_from_64 (digest[1]);
w1_t[0] = h32_from_64 (digest[2]);
w1_t[1] = l32_from_64 (digest[2]);
w1_t[2] = h32_from_64 (digest[3]);
w1_t[3] = l32_from_64 (digest[3]);
w2_t[0] = h32_from_64 (digest[4]);
w2_t[1] = l32_from_64 (digest[4]);
w2_t[2] = h32_from_64 (digest[5]);
w2_t[3] = l32_from_64 (digest[5]);
w3_t[0] = h32_from_64 (digest[6]);
w3_t[1] = l32_from_64 (digest[6]);
w3_t[2] = h32_from_64 (digest[7]);
w3_t[3] = l32_from_64 (digest[7]);
digest[0] = SHA512M_A;
digest[1] = SHA512M_B;
digest[2] = SHA512M_C;
digest[3] = SHA512M_D;
digest[4] = SHA512M_E;
digest[5] = SHA512M_F;
digest[6] = SHA512M_G;
digest[7] = SHA512M_H;
sha512_transform_opt (w0_t, w1_t, w2_t, w3_t, digest);
const u32x r0 = l32_from_64 (digest[7]);
const u32x r1 = h32_from_64 (digest[7]);
const u32x r2 = l32_from_64 (digest[3]);
const u32x r3 = h32_from_64 (digest[3]);
COMPARE_S_SIMD (r0, r1, r2, r3);
}
}
KERNEL_FQ void m21000_m04 (KERN_ATTR_VECTOR ())
{
/**
* base
*/
const u64 gid = get_global_id (0);
if (gid >= gid_max) return;
u32 w[16];
w[ 0] = pws[gid].i[ 0];
w[ 1] = pws[gid].i[ 1];
w[ 2] = pws[gid].i[ 2];
w[ 3] = pws[gid].i[ 3];
w[ 4] = 0;
w[ 5] = 0;
w[ 6] = 0;
w[ 7] = 0;
w[ 8] = 0;
w[ 9] = 0;
w[10] = 0;
w[11] = 0;
w[12] = 0;
w[13] = 0;
w[14] = 0;
w[15] = pws[gid].i[15];
const u32 pw_len = pws[gid].pw_len & 63;
/**
* main
*/
m21000m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max);
}
KERNEL_FQ void m21000_m08 (KERN_ATTR_VECTOR ())
{
/**
* base
*/
const u64 gid = get_global_id (0);
if (gid >= gid_max) return;
u32 w[16];
w[ 0] = pws[gid].i[ 0];
w[ 1] = pws[gid].i[ 1];
w[ 2] = pws[gid].i[ 2];
w[ 3] = pws[gid].i[ 3];
w[ 4] = pws[gid].i[ 4];
w[ 5] = pws[gid].i[ 5];
w[ 6] = pws[gid].i[ 6];
w[ 7] = pws[gid].i[ 7];
w[ 8] = 0;
w[ 9] = 0;
w[10] = 0;
w[11] = 0;
w[12] = 0;
w[13] = 0;
w[14] = 0;
w[15] = pws[gid].i[15];
const u32 pw_len = pws[gid].pw_len & 63;
/**
* main
*/
m21000m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max);
}
KERNEL_FQ void m21000_m16 (KERN_ATTR_VECTOR ())
{
/**
* base
*/
const u64 gid = get_global_id (0);
if (gid >= gid_max) return;
u32 w[16];
w[ 0] = pws[gid].i[ 0];
w[ 1] = pws[gid].i[ 1];
w[ 2] = pws[gid].i[ 2];
w[ 3] = pws[gid].i[ 3];
w[ 4] = pws[gid].i[ 4];
w[ 5] = pws[gid].i[ 5];
w[ 6] = pws[gid].i[ 6];
w[ 7] = pws[gid].i[ 7];
w[ 8] = pws[gid].i[ 8];
w[ 9] = pws[gid].i[ 9];
w[10] = pws[gid].i[10];
w[11] = pws[gid].i[11];
w[12] = pws[gid].i[12];
w[13] = pws[gid].i[13];
w[14] = pws[gid].i[14];
w[15] = pws[gid].i[15];
const u32 pw_len = pws[gid].pw_len & 63;
/**
* main
*/
m21000m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max);
}
KERNEL_FQ void m21000_s04 (KERN_ATTR_VECTOR ())
{
/**
* base
*/
const u64 gid = get_global_id (0);
if (gid >= gid_max) return;
u32 w[16];
w[ 0] = pws[gid].i[ 0];
w[ 1] = pws[gid].i[ 1];
w[ 2] = pws[gid].i[ 2];
w[ 3] = pws[gid].i[ 3];
w[ 4] = 0;
w[ 5] = 0;
w[ 6] = 0;
w[ 7] = 0;
w[ 8] = 0;
w[ 9] = 0;
w[10] = 0;
w[11] = 0;
w[12] = 0;
w[13] = 0;
w[14] = 0;
w[15] = pws[gid].i[15];
const u32 pw_len = pws[gid].pw_len & 63;
/**
* main
*/
m21000s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max);
}
KERNEL_FQ void m21000_s08 (KERN_ATTR_VECTOR ())
{
/**
* base
*/
const u64 gid = get_global_id (0);
if (gid >= gid_max) return;
u32 w[16];
w[ 0] = pws[gid].i[ 0];
w[ 1] = pws[gid].i[ 1];
w[ 2] = pws[gid].i[ 2];
w[ 3] = pws[gid].i[ 3];
w[ 4] = pws[gid].i[ 4];
w[ 5] = pws[gid].i[ 5];
w[ 6] = pws[gid].i[ 6];
w[ 7] = pws[gid].i[ 7];
w[ 8] = 0;
w[ 9] = 0;
w[10] = 0;
w[11] = 0;
w[12] = 0;
w[13] = 0;
w[14] = 0;
w[15] = pws[gid].i[15];
const u32 pw_len = pws[gid].pw_len & 63;
/**
* main
*/
m21000s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max);
}
KERNEL_FQ void m21000_s16 (KERN_ATTR_VECTOR ())
{
/**
* base
*/
const u64 gid = get_global_id (0);
if (gid >= gid_max) return;
u32 w[16];
w[ 0] = pws[gid].i[ 0];
w[ 1] = pws[gid].i[ 1];
w[ 2] = pws[gid].i[ 2];
w[ 3] = pws[gid].i[ 3];
w[ 4] = pws[gid].i[ 4];
w[ 5] = pws[gid].i[ 5];
w[ 6] = pws[gid].i[ 6];
w[ 7] = pws[gid].i[ 7];
w[ 8] = pws[gid].i[ 8];
w[ 9] = pws[gid].i[ 9];
w[10] = pws[gid].i[10];
w[11] = pws[gid].i[11];
w[12] = pws[gid].i[12];
w[13] = pws[gid].i[13];
w[14] = pws[gid].i[14];
w[15] = pws[gid].i[15];
const u32 pw_len = pws[gid].pw_len & 63;
/**
* main
*/
m21000s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max);
}

@ -0,0 +1,191 @@
/**
* Author......: See docs/credits.txt
* License.....: MIT
*/
#define NEW_SIMD_CODE
#ifdef KERNEL_STATIC
#include "inc_vendor.h"
#include "inc_types.h"
#include "inc_platform.cl"
#include "inc_common.cl"
#include "inc_simd.cl"
#include "inc_hash_sha512.cl"
#endif
KERNEL_FQ void m21000_mxx (KERN_ATTR_VECTOR ())
{
/**
* modifier
*/
const u64 lid = get_local_id (0);
const u64 gid = get_global_id (0);
if (gid >= gid_max) return;
/**
* base
*/
const u32 pw_len = pws[gid].pw_len;
u32x w[64] = { 0 };
for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1)
{
w[idx] = pws[gid].i[idx];
}
/**
* loop
*/
u32x w0l = w[0];
for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE)
{
const u32x w0r = words_buf_r[il_pos / VECT_SIZE];
const u32x w0 = w0l | w0r;
w[0] = w0;
sha512_ctx_vector_t ctx0;
sha512_init_vector (&ctx0);
sha512_update_vector (&ctx0, w, pw_len);
sha512_final_vector (&ctx0);
sha512_ctx_vector_t ctx;
sha512_init_vector (&ctx);
u32x final[32] = { 0 };
final[ 0] = h32_from_64_S (ctx0.h[0]);
final[ 1] = l32_from_64_S (ctx0.h[0]);
final[ 2] = h32_from_64_S (ctx0.h[1]);
final[ 3] = l32_from_64_S (ctx0.h[1]);
final[ 4] = h32_from_64_S (ctx0.h[2]);
final[ 5] = l32_from_64_S (ctx0.h[2]);
final[ 6] = h32_from_64_S (ctx0.h[3]);
final[ 7] = l32_from_64_S (ctx0.h[3]);
final[ 8] = h32_from_64_S (ctx0.h[4]);
final[ 9] = l32_from_64_S (ctx0.h[4]);
final[10] = h32_from_64_S (ctx0.h[5]);
final[11] = l32_from_64_S (ctx0.h[5]);
final[12] = h32_from_64_S (ctx0.h[6]);
final[13] = l32_from_64_S (ctx0.h[6]);
final[14] = h32_from_64_S (ctx0.h[7]);
final[15] = l32_from_64_S (ctx0.h[7]);
sha512_update_vector (&ctx, final, 64);
sha512_final_vector (&ctx);
const u32x r0 = l32_from_64 (ctx.h[7]);
const u32x r1 = h32_from_64 (ctx.h[7]);
const u32x r2 = l32_from_64 (ctx.h[3]);
const u32x r3 = h32_from_64 (ctx.h[3]);
COMPARE_M_SIMD (r0, r1, r2, r3);
}
}
KERNEL_FQ void m21000_sxx (KERN_ATTR_VECTOR ())
{
/**
* modifier
*/
const u64 lid = get_local_id (0);
const u64 gid = get_global_id (0);
if (gid >= gid_max) return;
/**
* digest
*/
const u32 search[4] =
{
digests_buf[digests_offset].digest_buf[DGST_R0],
digests_buf[digests_offset].digest_buf[DGST_R1],
digests_buf[digests_offset].digest_buf[DGST_R2],
digests_buf[digests_offset].digest_buf[DGST_R3]
};
/**
* base
*/
const u32 pw_len = pws[gid].pw_len;
u32x w[64] = { 0 };
for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1)
{
w[idx] = pws[gid].i[idx];
}
/**
* loop
*/
u32x w0l = w[0];
for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE)
{
const u32x w0r = words_buf_r[il_pos / VECT_SIZE];
const u32x w0 = w0l | w0r;
w[0] = w0;
sha512_ctx_vector_t ctx0;
sha512_init_vector (&ctx0);
sha512_update_vector (&ctx0, w, pw_len);
sha512_final_vector (&ctx0);
sha512_ctx_vector_t ctx;
sha512_init_vector (&ctx);
u32x final[32] = { 0 };
final[ 0] = h32_from_64_S (ctx0.h[0]);
final[ 1] = l32_from_64_S (ctx0.h[0]);
final[ 2] = h32_from_64_S (ctx0.h[1]);
final[ 3] = l32_from_64_S (ctx0.h[1]);
final[ 4] = h32_from_64_S (ctx0.h[2]);
final[ 5] = l32_from_64_S (ctx0.h[2]);
final[ 6] = h32_from_64_S (ctx0.h[3]);
final[ 7] = l32_from_64_S (ctx0.h[3]);
final[ 8] = h32_from_64_S (ctx0.h[4]);
final[ 9] = l32_from_64_S (ctx0.h[4]);
final[10] = h32_from_64_S (ctx0.h[5]);
final[11] = l32_from_64_S (ctx0.h[5]);
final[12] = h32_from_64_S (ctx0.h[6]);
final[13] = l32_from_64_S (ctx0.h[6]);
final[14] = h32_from_64_S (ctx0.h[7]);
final[15] = l32_from_64_S (ctx0.h[7]);
sha512_update_vector (&ctx, final, 64);
sha512_final_vector (&ctx);
const u32x r0 = l32_from_64 (ctx.h[7]);
const u32x r1 = h32_from_64 (ctx.h[7]);
const u32x r2 = l32_from_64 (ctx.h[3]);
const u32x r3 = h32_from_64 (ctx.h[3]);
COMPARE_S_SIMD (r0, r1, r2, r3);
}
}

@ -0,0 +1,781 @@
/**
* Author......: See docs/credits.txt
* License.....: MIT
*/
#define NEW_SIMD_CODE
#ifdef KERNEL_STATIC
#include "inc_vendor.h"
#include "inc_types.h"
#include "inc_platform.cl"
#include "inc_common.cl"
#include "inc_rp_optimized.h"
#include "inc_rp_optimized.cl"
#include "inc_simd.cl"
#include "inc_hash_md5.cl"
#include "inc_hash_sha1.cl"
#endif
#if VECT_SIZE == 1
#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i)])
#elif VECT_SIZE == 2
#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
#elif VECT_SIZE == 4
#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
#elif VECT_SIZE == 8
#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
#elif VECT_SIZE == 16
#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
#endif
KERNEL_FQ void m21100_m04 (KERN_ATTR_RULES ())
{
/**
* modifier
*/
const u64 gid = get_global_id (0);
const u64 lid = get_local_id (0);
const u64 lsz = get_local_size (0);
/**
* bin2asc table
*/
LOCAL_VK u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;
l_bin2asc[i] = ((i0 < 10) ? '0' + i0 : 'a' - 10 + i0) << 0
| ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 8;
}
SYNC_THREADS ();
if (gid >= gid_max) return;
/**
* base
*/
u32 pw_buf0[4];
u32 pw_buf1[4];
pw_buf0[0] = pws[gid].i[0];
pw_buf0[1] = pws[gid].i[1];
pw_buf0[2] = pws[gid].i[2];
pw_buf0[3] = pws[gid].i[3];
pw_buf1[0] = pws[gid].i[4];
pw_buf1[1] = pws[gid].i[5];
pw_buf1[2] = pws[gid].i[6];
pw_buf1[3] = pws[gid].i[7];
const u32 pw_len = pws[gid].pw_len & 63;
/**
* salt
*/
u32 salt_buf0[4];
u32 salt_buf1[4];
u32 salt_buf2[4];
u32 salt_buf3[4];
salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0];
salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1];
salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2];
salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3];
salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4];
salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5];
salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6];
salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7];
salt_buf2[0] = salt_bufs[salt_pos].salt_buf[ 8];
salt_buf2[1] = salt_bufs[salt_pos].salt_buf[ 9];
salt_buf2[2] = salt_bufs[salt_pos].salt_buf[10];
salt_buf2[3] = salt_bufs[salt_pos].salt_buf[11];
salt_buf3[0] = salt_bufs[salt_pos].salt_buf[12];
salt_buf3[1] = salt_bufs[salt_pos].salt_buf[13];
salt_buf3[2] = salt_bufs[salt_pos].salt_buf[14];
salt_buf3[3] = salt_bufs[salt_pos].salt_buf[15];
const u32 salt_len = salt_bufs[salt_pos].salt_len;
append_0x80_4x4_S (salt_buf0, salt_buf1, salt_buf2, salt_buf3, salt_len);
/**
* loop
*/
for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE)
{
u32x w0[4] = { 0 };
u32x w1[4] = { 0 };
u32x w2[4] = { 0 };
u32x w3[4] = { 0 };
const u32x out_len = apply_rules_vect_optimized (pw_buf0, pw_buf1, pw_len, rules_buf, il_pos, w0, w1);
/**
* append salt
*/
u32x s0[4];
u32x s1[4];
u32x s2[4];
u32x s3[4];
s0[0] = salt_buf0[0];
s0[1] = salt_buf0[1];
s0[2] = salt_buf0[2];
s0[3] = salt_buf0[3];
s1[0] = salt_buf1[0];
s1[1] = salt_buf1[1];
s1[2] = salt_buf1[2];
s1[3] = salt_buf1[3];
s2[0] = salt_buf2[0];
s2[1] = salt_buf2[1];
s2[2] = salt_buf2[2];
s2[3] = salt_buf2[3];
s3[0] = salt_buf3[0];
s3[1] = salt_buf3[1];
s3[2] = salt_buf3[2];
s3[3] = salt_buf3[3];
switch_buffer_by_offset_le_VV (s0, s1, s2, s3, out_len);
const u32x pw_salt_len = out_len + salt_len;
w0[0] |= s0[0];
w0[1] |= s0[1];
w0[2] |= s0[2];
w0[3] |= s0[3];
w1[0] |= s1[0];
w1[1] |= s1[1];
w1[2] |= s1[2];
w1[3] |= s1[3];
w2[0] |= s2[0];
w2[1] |= s2[1];
w2[2] |= s2[2];
w2[3] |= s2[3];
w3[0] |= s3[0];
w3[1] |= s3[1];
w3[2] = pw_salt_len * 8;
w3[3] = 0;
/**
* md5
*/
u32x a = MD5M_A;
u32x b = MD5M_B;
u32x c = MD5M_C;
u32x d = MD5M_D;
u32x e = 0;
MD5_STEP (MD5_Fo, a, b, c, d, w0[0], MD5C00, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w0[1], MD5C01, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, w0[2], MD5C02, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, w0[3], MD5C03, MD5S03);
MD5_STEP (MD5_Fo, a, b, c, d, w1[0], MD5C04, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w1[1], MD5C05, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, w1[2], MD5C06, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, w1[3], MD5C07, MD5S03);
MD5_STEP (MD5_Fo, a, b, c, d, w2[0], MD5C08, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w2[1], MD5C09, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, w2[2], MD5C0a, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, w2[3], MD5C0b, MD5S03);
MD5_STEP (MD5_Fo, a, b, c, d, w3[0], MD5C0c, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w3[1], MD5C0d, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, w3[2], MD5C0e, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, w3[3], MD5C0f, MD5S03);
MD5_STEP (MD5_Go, a, b, c, d, w0[1], MD5C10, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, w1[2], MD5C11, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, w2[3], MD5C12, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w0[0], MD5C13, MD5S13);
MD5_STEP (MD5_Go, a, b, c, d, w1[1], MD5C14, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, w2[2], MD5C15, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, w3[3], MD5C16, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w1[0], MD5C17, MD5S13);
MD5_STEP (MD5_Go, a, b, c, d, w2[1], MD5C18, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, w3[2], MD5C19, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, w0[3], MD5C1a, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w2[0], MD5C1b, MD5S13);
MD5_STEP (MD5_Go, a, b, c, d, w3[1], MD5C1c, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, w0[2], MD5C1d, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, w1[3], MD5C1e, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w3[0], MD5C1f, MD5S13);
u32x t;
MD5_STEP (MD5_H1, a, b, c, d, w1[1], MD5C20, MD5S20);
MD5_STEP (MD5_H2, d, a, b, c, w2[0], MD5C21, MD5S21);
MD5_STEP (MD5_H1, c, d, a, b, w2[3], MD5C22, MD5S22);
MD5_STEP (MD5_H2, b, c, d, a, w3[2], MD5C23, MD5S23);
MD5_STEP (MD5_H1, a, b, c, d, w0[1], MD5C24, MD5S20);
MD5_STEP (MD5_H2, d, a, b, c, w1[0], MD5C25, MD5S21);
MD5_STEP (MD5_H1, c, d, a, b, w1[3], MD5C26, MD5S22);
MD5_STEP (MD5_H2, b, c, d, a, w2[2], MD5C27, MD5S23);
MD5_STEP (MD5_H1, a, b, c, d, w3[1], MD5C28, MD5S20);
MD5_STEP (MD5_H2, d, a, b, c, w0[0], MD5C29, MD5S21);
MD5_STEP (MD5_H1, c, d, a, b, w0[3], MD5C2a, MD5S22);
MD5_STEP (MD5_H2, b, c, d, a, w1[2], MD5C2b, MD5S23);
MD5_STEP (MD5_H1, a, b, c, d, w2[1], MD5C2c, MD5S20);
MD5_STEP (MD5_H2, d, a, b, c, w3[0], MD5C2d, MD5S21);
MD5_STEP (MD5_H1, c, d, a, b, w3[3], MD5C2e, MD5S22);
MD5_STEP (MD5_H2, b, c, d, a, w0[2], MD5C2f, MD5S23);
MD5_STEP (MD5_I , a, b, c, d, w0[0], MD5C30, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, w1[3], MD5C31, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, w3[2], MD5C32, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w1[1], MD5C33, MD5S33);
MD5_STEP (MD5_I , a, b, c, d, w3[0], MD5C34, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, w0[3], MD5C35, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, w2[2], MD5C36, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w0[1], MD5C37, MD5S33);
MD5_STEP (MD5_I , a, b, c, d, w2[0], MD5C38, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, w3[3], MD5C39, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, w1[2], MD5C3a, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w3[1], MD5C3b, MD5S33);
MD5_STEP (MD5_I , a, b, c, d, w1[0], MD5C3c, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, w2[3], MD5C3d, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, w0[2], MD5C3e, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w2[1], MD5C3f, MD5S33);
a += MD5M_A;
b += MD5M_B;
c += MD5M_C;
d += MD5M_D;
/*
* sha1
*/
u32x w0_t = uint_to_hex_lower8_le ((a >> 8) & 255) << 0
| uint_to_hex_lower8_le ((a >> 0) & 255) << 16;
u32x w1_t = uint_to_hex_lower8_le ((a >> 24) & 255) << 0
| uint_to_hex_lower8_le ((a >> 16) & 255) << 16;
u32x w2_t = uint_to_hex_lower8_le ((b >> 8) & 255) << 0
| uint_to_hex_lower8_le ((b >> 0) & 255) << 16;
u32x w3_t = uint_to_hex_lower8_le ((b >> 24) & 255) << 0
| uint_to_hex_lower8_le ((b >> 16) & 255) << 16;
u32x w4_t = uint_to_hex_lower8_le ((c >> 8) & 255) << 0
| uint_to_hex_lower8_le ((c >> 0) & 255) << 16;
u32x w5_t = uint_to_hex_lower8_le ((c >> 24) & 255) << 0
| uint_to_hex_lower8_le ((c >> 16) & 255) << 16;
u32x w6_t = uint_to_hex_lower8_le ((d >> 8) & 255) << 0
| uint_to_hex_lower8_le ((d >> 0) & 255) << 16;
u32x w7_t = uint_to_hex_lower8_le ((d >> 24) & 255) << 0
| uint_to_hex_lower8_le ((d >> 16) & 255) << 16;
u32x w8_t = 0x80000000;
u32x w9_t = 0;
u32x wa_t = 0;
u32x wb_t = 0;
u32x wc_t = 0;
u32x wd_t = 0;
u32x we_t = 0;
u32x wf_t = 32 * 8;
a = SHA1M_A;
b = SHA1M_B;
c = SHA1M_C;
d = SHA1M_D;
e = SHA1M_E;
#undef K
#define K SHA1C00
SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0_t);
SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1_t);
SHA1_STEP (SHA1_F0o, d, e, a, b, c, w2_t);
SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3_t);
SHA1_STEP (SHA1_F0o, b, c, d, e, a, w4_t);
SHA1_STEP (SHA1_F0o, a, b, c, d, e, w5_t);
SHA1_STEP (SHA1_F0o, e, a, b, c, d, w6_t);
SHA1_STEP (SHA1_F0o, d, e, a, b, c, w7_t);
SHA1_STEP (SHA1_F0o, c, d, e, a, b, w8_t);
SHA1_STEP (SHA1_F0o, b, c, d, e, a, w9_t);
SHA1_STEP (SHA1_F0o, a, b, c, d, e, wa_t);
SHA1_STEP (SHA1_F0o, e, a, b, c, d, wb_t);
SHA1_STEP (SHA1_F0o, d, e, a, b, c, wc_t);
SHA1_STEP (SHA1_F0o, c, d, e, a, b, wd_t);
SHA1_STEP (SHA1_F0o, b, c, d, e, a, we_t);
SHA1_STEP (SHA1_F0o, a, b, c, d, e, wf_t);
w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t);
w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1_t);
w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2_t);
w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3_t);
#undef K
#define K SHA1C01
w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w4_t);
w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w5_t);
w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w6_t);
w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w7_t);
w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w8_t);
w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w9_t);
wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wa_t);
wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wb_t);
wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wc_t);
wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wd_t);
we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, we_t);
wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wf_t);
w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t);
w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t);
w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t);
w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t);
w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w4_t);
w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w5_t);
w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w6_t);
w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w7_t);
#undef K
#define K SHA1C02
w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w8_t);
w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w9_t);
wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wa_t);
wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wb_t);
wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wc_t);
wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, wd_t);
we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, we_t);
wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wf_t);
w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0_t);
w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1_t);
w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2_t);
w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3_t);
w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w4_t);
w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w5_t);
w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w6_t);
w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w7_t);
w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w8_t);
w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w9_t);
wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wa_t);
wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wb_t);
#undef K
#define K SHA1C03
wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wc_t);
wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wd_t);
we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, we_t);
wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wf_t);
w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t);
w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t);
w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w2_t);
w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t);
w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w4_t);
w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w5_t);
w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w6_t);
w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w7_t);
w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w8_t);
w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w9_t);
wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wa_t);
wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wb_t);
wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wc_t);
wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wd_t);
we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t);
wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t);
COMPARE_M_SIMD (d, e, c, b);
}
}
KERNEL_FQ void m21100_m08 (KERN_ATTR_RULES ())
{
}
KERNEL_FQ void m21100_m16 (KERN_ATTR_RULES ())
{
}
KERNEL_FQ void m21100_s04 (KERN_ATTR_RULES ())
{
/**
* modifier
*/
const u64 gid = get_global_id (0);
const u64 lid = get_local_id (0);
const u64 lsz = get_local_size (0);
/**
* bin2asc table
*/
LOCAL_VK u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;
l_bin2asc[i] = ((i0 < 10) ? '0' + i0 : 'a' - 10 + i0) << 0
| ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 8;
}
SYNC_THREADS ();
if (gid >= gid_max) return;
/**
* base
*/
u32 pw_buf0[4];
u32 pw_buf1[4];
pw_buf0[0] = pws[gid].i[0];
pw_buf0[1] = pws[gid].i[1];
pw_buf0[2] = pws[gid].i[2];
pw_buf0[3] = pws[gid].i[3];
pw_buf1[0] = pws[gid].i[4];
pw_buf1[1] = pws[gid].i[5];
pw_buf1[2] = pws[gid].i[6];
pw_buf1[3] = pws[gid].i[7];
const u32 pw_len = pws[gid].pw_len & 63;
/**
* salt
*/
u32 salt_buf0[4];
u32 salt_buf1[4];
u32 salt_buf2[4];
u32 salt_buf3[4];
salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0];
salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1];
salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2];
salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3];
salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4];
salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5];
salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6];
salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7];
salt_buf2[0] = salt_bufs[salt_pos].salt_buf[ 8];
salt_buf2[1] = salt_bufs[salt_pos].salt_buf[ 9];
salt_buf2[2] = salt_bufs[salt_pos].salt_buf[10];
salt_buf2[3] = salt_bufs[salt_pos].salt_buf[11];
salt_buf3[0] = salt_bufs[salt_pos].salt_buf[12];
salt_buf3[1] = salt_bufs[salt_pos].salt_buf[13];
salt_buf3[2] = salt_bufs[salt_pos].salt_buf[14];
salt_buf3[3] = salt_bufs[salt_pos].salt_buf[15];
const u32 salt_len = salt_bufs[salt_pos].salt_len;
append_0x80_4x4_S (salt_buf0, salt_buf1, salt_buf2, salt_buf3, salt_len);
/**
* digest
*/
const u32 search[4] =
{
digests_buf[digests_offset].digest_buf[DGST_R0],
digests_buf[digests_offset].digest_buf[DGST_R1],
digests_buf[digests_offset].digest_buf[DGST_R2],
digests_buf[digests_offset].digest_buf[DGST_R3]
};
/**
* reverse
*/
const u32 e_rev = hc_rotl32_S (search[1], 2u);
/**
* loop
*/
for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE)
{
u32x w0[4] = { 0 };
u32x w1[4] = { 0 };
u32x w2[4] = { 0 };
u32x w3[4] = { 0 };
const u32x out_len = apply_rules_vect_optimized (pw_buf0, pw_buf1, pw_len, rules_buf, il_pos, w0, w1);
/**
* append salt
*/
u32x s0[4];
u32x s1[4];
u32x s2[4];
u32x s3[4];
s0[0] = salt_buf0[0];
s0[1] = salt_buf0[1];
s0[2] = salt_buf0[2];
s0[3] = salt_buf0[3];
s1[0] = salt_buf1[0];
s1[1] = salt_buf1[1];
s1[2] = salt_buf1[2];
s1[3] = salt_buf1[3];
s2[0] = salt_buf2[0];
s2[1] = salt_buf2[1];
s2[2] = salt_buf2[2];
s2[3] = salt_buf2[3];
s3[0] = salt_buf3[0];
s3[1] = salt_buf3[1];
s3[2] = salt_buf3[2];
s3[3] = salt_buf3[3];
switch_buffer_by_offset_le_VV (s0, s1, s2, s3, out_len);
const u32x pw_salt_len = out_len + salt_len;
w0[0] |= s0[0];
w0[1] |= s0[1];
w0[2] |= s0[2];
w0[3] |= s0[3];
w1[0] |= s1[0];
w1[1] |= s1[1];
w1[2] |= s1[2];
w1[3] |= s1[3];
w2[0] |= s2[0];
w2[1] |= s2[1];
w2[2] |= s2[2];
w2[3] |= s2[3];
w3[0] |= s3[0];
w3[1] |= s3[1];
w3[2] = pw_salt_len * 8;
w3[3] = 0;
/**
* md5
*/
u32x a = MD5M_A;
u32x b = MD5M_B;
u32x c = MD5M_C;
u32x d = MD5M_D;
u32x e = 0;
MD5_STEP (MD5_Fo, a, b, c, d, w0[0], MD5C00, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w0[1], MD5C01, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, w0[2], MD5C02, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, w0[3], MD5C03, MD5S03);
MD5_STEP (MD5_Fo, a, b, c, d, w1[0], MD5C04, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w1[1], MD5C05, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, w1[2], MD5C06, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, w1[3], MD5C07, MD5S03);
MD5_STEP (MD5_Fo, a, b, c, d, w2[0], MD5C08, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w2[1], MD5C09, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, w2[2], MD5C0a, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, w2[3], MD5C0b, MD5S03);
MD5_STEP (MD5_Fo, a, b, c, d, w3[0], MD5C0c, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w3[1], MD5C0d, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, w3[2], MD5C0e, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, w3[3], MD5C0f, MD5S03);
MD5_STEP (MD5_Go, a, b, c, d, w0[1], MD5C10, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, w1[2], MD5C11, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, w2[3], MD5C12, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w0[0], MD5C13, MD5S13);
MD5_STEP (MD5_Go, a, b, c, d, w1[1], MD5C14, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, w2[2], MD5C15, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, w3[3], MD5C16, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w1[0], MD5C17, MD5S13);
MD5_STEP (MD5_Go, a, b, c, d, w2[1], MD5C18, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, w3[2], MD5C19, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, w0[3], MD5C1a, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w2[0], MD5C1b, MD5S13);
MD5_STEP (MD5_Go, a, b, c, d, w3[1], MD5C1c, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, w0[2], MD5C1d, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, w1[3], MD5C1e, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w3[0], MD5C1f, MD5S13);
u32x t;
MD5_STEP (MD5_H1, a, b, c, d, w1[1], MD5C20, MD5S20);
MD5_STEP (MD5_H2, d, a, b, c, w2[0], MD5C21, MD5S21);
MD5_STEP (MD5_H1, c, d, a, b, w2[3], MD5C22, MD5S22);
MD5_STEP (MD5_H2, b, c, d, a, w3[2], MD5C23, MD5S23);
MD5_STEP (MD5_H1, a, b, c, d, w0[1], MD5C24, MD5S20);
MD5_STEP (MD5_H2, d, a, b, c, w1[0], MD5C25, MD5S21);
MD5_STEP (MD5_H1, c, d, a, b, w1[3], MD5C26, MD5S22);
MD5_STEP (MD5_H2, b, c, d, a, w2[2], MD5C27, MD5S23);
MD5_STEP (MD5_H1, a, b, c, d, w3[1], MD5C28, MD5S20);
MD5_STEP (MD5_H2, d, a, b, c, w0[0], MD5C29, MD5S21);
MD5_STEP (MD5_H1, c, d, a, b, w0[3], MD5C2a, MD5S22);
MD5_STEP (MD5_H2, b, c, d, a, w1[2], MD5C2b, MD5S23);
MD5_STEP (MD5_H1, a, b, c, d, w2[1], MD5C2c, MD5S20);
MD5_STEP (MD5_H2, d, a, b, c, w3[0], MD5C2d, MD5S21);
MD5_STEP (MD5_H1, c, d, a, b, w3[3], MD5C2e, MD5S22);
MD5_STEP (MD5_H2, b, c, d, a, w0[2], MD5C2f, MD5S23);
MD5_STEP (MD5_I , a, b, c, d, w0[0], MD5C30, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, w1[3], MD5C31, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, w3[2], MD5C32, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w1[1], MD5C33, MD5S33);
MD5_STEP (MD5_I , a, b, c, d, w3[0], MD5C34, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, w0[3], MD5C35, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, w2[2], MD5C36, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w0[1], MD5C37, MD5S33);
MD5_STEP (MD5_I , a, b, c, d, w2[0], MD5C38, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, w3[3], MD5C39, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, w1[2], MD5C3a, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w3[1], MD5C3b, MD5S33);
MD5_STEP (MD5_I , a, b, c, d, w1[0], MD5C3c, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, w2[3], MD5C3d, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, w0[2], MD5C3e, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w2[1], MD5C3f, MD5S33);
a += MD5M_A;
b += MD5M_B;
c += MD5M_C;
d += MD5M_D;
/*
* sha1
*/
u32x w0_t = uint_to_hex_lower8_le ((a >> 8) & 255) << 0
| uint_to_hex_lower8_le ((a >> 0) & 255) << 16;
u32x w1_t = uint_to_hex_lower8_le ((a >> 24) & 255) << 0
| uint_to_hex_lower8_le ((a >> 16) & 255) << 16;
u32x w2_t = uint_to_hex_lower8_le ((b >> 8) & 255) << 0
| uint_to_hex_lower8_le ((b >> 0) & 255) << 16;
u32x w3_t = uint_to_hex_lower8_le ((b >> 24) & 255) << 0
| uint_to_hex_lower8_le ((b >> 16) & 255) << 16;
u32x w4_t = uint_to_hex_lower8_le ((c >> 8) & 255) << 0
| uint_to_hex_lower8_le ((c >> 0) & 255) << 16;
u32x w5_t = uint_to_hex_lower8_le ((c >> 24) & 255) << 0
| uint_to_hex_lower8_le ((c >> 16) & 255) << 16;
u32x w6_t = uint_to_hex_lower8_le ((d >> 8) & 255) << 0
| uint_to_hex_lower8_le ((d >> 0) & 255) << 16;
u32x w7_t = uint_to_hex_lower8_le ((d >> 24) & 255) << 0
| uint_to_hex_lower8_le ((d >> 16) & 255) << 16;
u32x w8_t = 0x80000000;
u32x w9_t = 0;
u32x wa_t = 0;
u32x wb_t = 0;
u32x wc_t = 0;
u32x wd_t = 0;
u32x we_t = 0;
u32x wf_t = 32 * 8;
a = SHA1M_A;
b = SHA1M_B;
c = SHA1M_C;
d = SHA1M_D;
e = SHA1M_E;
#undef K
#define K SHA1C00
SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0_t);
SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1_t);
SHA1_STEP (SHA1_F0o, d, e, a, b, c, w2_t);
SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3_t);
SHA1_STEP (SHA1_F0o, b, c, d, e, a, w4_t);
SHA1_STEP (SHA1_F0o, a, b, c, d, e, w5_t);
SHA1_STEP (SHA1_F0o, e, a, b, c, d, w6_t);
SHA1_STEP (SHA1_F0o, d, e, a, b, c, w7_t);
SHA1_STEP (SHA1_F0o, c, d, e, a, b, w8_t);
SHA1_STEP (SHA1_F0o, b, c, d, e, a, w9_t);
SHA1_STEP (SHA1_F0o, a, b, c, d, e, wa_t);
SHA1_STEP (SHA1_F0o, e, a, b, c, d, wb_t);
SHA1_STEP (SHA1_F0o, d, e, a, b, c, wc_t);
SHA1_STEP (SHA1_F0o, c, d, e, a, b, wd_t);
SHA1_STEP (SHA1_F0o, b, c, d, e, a, we_t);
SHA1_STEP (SHA1_F0o, a, b, c, d, e, wf_t);
w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t);
w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1_t);
w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2_t);
w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3_t);
#undef K
#define K SHA1C01
w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w4_t);
w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w5_t);
w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w6_t);
w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w7_t);
w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w8_t);
w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w9_t);
wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wa_t);
wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wb_t);
wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wc_t);
wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wd_t);
we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, we_t);
wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wf_t);
w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t);
w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t);
w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t);
w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t);
w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w4_t);
w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w5_t);
w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w6_t);
w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w7_t);
#undef K
#define K SHA1C02
w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w8_t);
w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w9_t);
wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wa_t);
wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wb_t);
wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wc_t);
wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, wd_t);
we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, we_t);
wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wf_t);
w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0_t);
w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1_t);
w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2_t);
w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3_t);
w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w4_t);
w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w5_t);
w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w6_t);
w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w7_t);
w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w8_t);
w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w9_t);
wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wa_t);
wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wb_t);
#undef K
#define K SHA1C03
wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wc_t);
wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wd_t);
we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, we_t);
wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wf_t);
w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t);
w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t);
w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w2_t);
w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t);
w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w4_t);
w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w5_t);
w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w6_t);
w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w7_t);
w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w8_t);
w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w9_t);
wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wa_t);
wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wb_t);
if (MATCHES_NONE_VS (e, e_rev)) continue;
wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wc_t);
wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wd_t);
we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t);
wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t);
COMPARE_S_SIMD (d, e, c, b);
}
}
KERNEL_FQ void m21100_s08 (KERN_ATTR_RULES ())
{
}
KERNEL_FQ void m21100_s16 (KERN_ATTR_RULES ())
{
}

@ -0,0 +1,276 @@
/**
* Author......: See docs/credits.txt
* License.....: MIT
*/
//#define NEW_SIMD_CODE
#ifdef KERNEL_STATIC
#include "inc_vendor.h"
#include "inc_types.h"
#include "inc_platform.cl"
#include "inc_common.cl"
#include "inc_rp.h"
#include "inc_rp.cl"
#include "inc_scalar.cl"
#include "inc_hash_md5.cl"
#include "inc_hash_sha1.cl"
#endif
#if VECT_SIZE == 1
#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i)])
#elif VECT_SIZE == 2
#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
#elif VECT_SIZE == 4
#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
#elif VECT_SIZE == 8
#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
#elif VECT_SIZE == 16
#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
#endif
KERNEL_FQ void m21100_mxx (KERN_ATTR_RULES ())
{
/**
* modifier
*/
const u64 gid = get_global_id (0);
const u64 lid = get_local_id (0);
const u64 lsz = get_local_size (0);
/**
* bin2asc table
*/
LOCAL_VK u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;
l_bin2asc[i] = ((i0 < 10) ? '0' + i0 : 'a' - 10 + i0) << 0
| ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 8;
}
SYNC_THREADS ();
if (gid >= gid_max) return;
/**
* base
*/
COPY_PW (pws[gid]);
const u32 salt_len = salt_bufs[salt_pos].salt_len;
u32 s[64] = { 0 };
for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1)
{
s[idx] = salt_bufs[salt_pos].salt_buf[idx];
}
/**
* loop
*/
u32 w0[4];
u32 w1[4];
u32 w2[4];
u32 w3[4];
for (u32 il_pos = 0; il_pos < il_cnt; il_pos++)
{
pw_t tmp = PASTE_PW;
tmp.pw_len = apply_rules (rules_buf[il_pos].cmds, tmp.i, tmp.pw_len);
md5_ctx_t ctx0;
md5_init (&ctx0);
md5_update (&ctx0, tmp.i, tmp.pw_len);
md5_update (&ctx0, s, salt_len);
md5_final (&ctx0);
const u32 a = ctx0.h[0];
const u32 b = ctx0.h[1];
const u32 c = ctx0.h[2];
const u32 d = ctx0.h[3];
sha1_ctx_t ctx;
sha1_init (&ctx);
w0[0] = uint_to_hex_lower8_le ((a >> 8) & 255) << 0
| uint_to_hex_lower8_le ((a >> 0) & 255) << 16;
w0[1] = uint_to_hex_lower8_le ((a >> 24) & 255) << 0
| uint_to_hex_lower8_le ((a >> 16) & 255) << 16;
w0[2] = uint_to_hex_lower8_le ((b >> 8) & 255) << 0
| uint_to_hex_lower8_le ((b >> 0) & 255) << 16;
w0[3] = uint_to_hex_lower8_le ((b >> 24) & 255) << 0
| uint_to_hex_lower8_le ((b >> 16) & 255) << 16;
w1[0] = uint_to_hex_lower8_le ((c >> 8) & 255) << 0
| uint_to_hex_lower8_le ((c >> 0) & 255) << 16;
w1[1] = uint_to_hex_lower8_le ((c >> 24) & 255) << 0
| uint_to_hex_lower8_le ((c >> 16) & 255) << 16;
w1[2] = uint_to_hex_lower8_le ((d >> 8) & 255) << 0
| uint_to_hex_lower8_le ((d >> 0) & 255) << 16;
w1[3] = uint_to_hex_lower8_le ((d >> 24) & 255) << 0
| uint_to_hex_lower8_le ((d >> 16) & 255) << 16;
w2[0] = 0;
w2[1] = 0;
w2[2] = 0;
w2[3] = 0;
w3[0] = 0;
w3[1] = 0;
w3[2] = 0;
w3[3] = 0;
sha1_update_64 (&ctx, w0, w1, w2, w3, 32);
sha1_final (&ctx);
const u32 r0 = ctx.h[DGST_R0];
const u32 r1 = ctx.h[DGST_R1];
const u32 r2 = ctx.h[DGST_R2];
const u32 r3 = ctx.h[DGST_R3];
COMPARE_M_SCALAR (r0, r1, r2, r3);
}
}
KERNEL_FQ void m21100_sxx (KERN_ATTR_RULES ())
{
/**
* modifier
*/
const u64 gid = get_global_id (0);
const u64 lid = get_local_id (0);
const u64 lsz = get_local_size (0);
/**
* bin2asc table
*/
LOCAL_VK u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;
l_bin2asc[i] = ((i0 < 10) ? '0' + i0 : 'a' - 10 + i0) << 0
| ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 8;
}
SYNC_THREADS ();
if (gid >= gid_max) return;
/**
* digest
*/
const u32 search[4] =
{
digests_buf[digests_offset].digest_buf[DGST_R0],
digests_buf[digests_offset].digest_buf[DGST_R1],
digests_buf[digests_offset].digest_buf[DGST_R2],
digests_buf[digests_offset].digest_buf[DGST_R3]
};
/**
* base
*/
COPY_PW (pws[gid]);
const u32 salt_len = salt_bufs[salt_pos].salt_len;
u32 s[64] = { 0 };
for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1)
{
s[idx] = salt_bufs[salt_pos].salt_buf[idx];
}
/**
* loop
*/
u32 w0[4];
u32 w1[4];
u32 w2[4];
u32 w3[4];
for (u32 il_pos = 0; il_pos < il_cnt; il_pos++)
{
pw_t tmp = PASTE_PW;
tmp.pw_len = apply_rules (rules_buf[il_pos].cmds, tmp.i, tmp.pw_len);
md5_ctx_t ctx0;
md5_init (&ctx0);
md5_update (&ctx0, tmp.i, tmp.pw_len);
md5_update (&ctx0, s, salt_len);
md5_final (&ctx0);
const u32 a = ctx0.h[0];
const u32 b = ctx0.h[1];
const u32 c = ctx0.h[2];
const u32 d = ctx0.h[3];
sha1_ctx_t ctx;
sha1_init (&ctx);
w0[0] = uint_to_hex_lower8_le ((a >> 8) & 255) << 0
| uint_to_hex_lower8_le ((a >> 0) & 255) << 16;
w0[1] = uint_to_hex_lower8_le ((a >> 24) & 255) << 0
| uint_to_hex_lower8_le ((a >> 16) & 255) << 16;
w0[2] = uint_to_hex_lower8_le ((b >> 8) & 255) << 0
| uint_to_hex_lower8_le ((b >> 0) & 255) << 16;
w0[3] = uint_to_hex_lower8_le ((b >> 24) & 255) << 0
| uint_to_hex_lower8_le ((b >> 16) & 255) << 16;
w1[0] = uint_to_hex_lower8_le ((c >> 8) & 255) << 0
| uint_to_hex_lower8_le ((c >> 0) & 255) << 16;
w1[1] = uint_to_hex_lower8_le ((c >> 24) & 255) << 0
| uint_to_hex_lower8_le ((c >> 16) & 255) << 16;
w1[2] = uint_to_hex_lower8_le ((d >> 8) & 255) << 0
| uint_to_hex_lower8_le ((d >> 0) & 255) << 16;
w1[3] = uint_to_hex_lower8_le ((d >> 24) & 255) << 0
| uint_to_hex_lower8_le ((d >> 16) & 255) << 16;
w2[0] = 0;
w2[1] = 0;
w2[2] = 0;
w2[3] = 0;
w3[0] = 0;
w3[1] = 0;
w3[2] = 0;
w3[3] = 0;
sha1_update_64 (&ctx, w0, w1, w2, w3, 32);
sha1_final (&ctx);
const u32 r0 = ctx.h[DGST_R0];
const u32 r1 = ctx.h[DGST_R1];
const u32 r2 = ctx.h[DGST_R2];
const u32 r3 = ctx.h[DGST_R3];
COMPARE_S_SCALAR (r0, r1, r2, r3);
}
}

@ -0,0 +1,895 @@
/**
* Author......: See docs/credits.txt
* License.....: MIT
*/
#define NEW_SIMD_CODE
#ifdef KERNEL_STATIC
#include "inc_vendor.h"
#include "inc_types.h"
#include "inc_platform.cl"
#include "inc_common.cl"
#include "inc_simd.cl"
#include "inc_hash_md5.cl"
#include "inc_hash_sha1.cl"
#endif
#if VECT_SIZE == 1
#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i)])
#elif VECT_SIZE == 2
#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
#elif VECT_SIZE == 4
#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
#elif VECT_SIZE == 8
#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
#elif VECT_SIZE == 16
#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
#endif
KERNEL_FQ void m21100_m04 (KERN_ATTR_BASIC ())
{
/**
* modifier
*/
const u64 gid = get_global_id (0);
const u64 lid = get_local_id (0);
const u64 lsz = get_local_size (0);
/**
* bin2asc table
*/
LOCAL_VK u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;
l_bin2asc[i] = ((i0 < 10) ? '0' + i0 : 'a' - 10 + i0) << 0
| ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 8;
}
SYNC_THREADS ();
if (gid >= gid_max) return;
/**
* base
*/
u32 pw_buf0[4];
u32 pw_buf1[4];
pw_buf0[0] = pws[gid].i[0];
pw_buf0[1] = pws[gid].i[1];
pw_buf0[2] = pws[gid].i[2];
pw_buf0[3] = pws[gid].i[3];
pw_buf1[0] = pws[gid].i[4];
pw_buf1[1] = pws[gid].i[5];
pw_buf1[2] = pws[gid].i[6];
pw_buf1[3] = pws[gid].i[7];
const u32 pw_l_len = pws[gid].pw_len & 63;
/**
* salt
*/
u32 salt_buf0[4];
u32 salt_buf1[4];
u32 salt_buf2[4];
u32 salt_buf3[4];
salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0];
salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1];
salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2];
salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3];
salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4];
salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5];
salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6];
salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7];
salt_buf2[0] = salt_bufs[salt_pos].salt_buf[ 8];
salt_buf2[1] = salt_bufs[salt_pos].salt_buf[ 9];
salt_buf2[2] = salt_bufs[salt_pos].salt_buf[10];
salt_buf2[3] = salt_bufs[salt_pos].salt_buf[11];
salt_buf3[0] = salt_bufs[salt_pos].salt_buf[12];
salt_buf3[1] = salt_bufs[salt_pos].salt_buf[13];
salt_buf3[2] = salt_bufs[salt_pos].salt_buf[14];
salt_buf3[3] = salt_bufs[salt_pos].salt_buf[15];
const u32 salt_len = salt_bufs[salt_pos].salt_len;
/**
* loop
*/
for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE)
{
const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos) & 63;
const u32x pw_len = (pw_l_len + pw_r_len) & 63;
/**
* concat password candidate
*/
u32x wordl0[4] = { 0 };
u32x wordl1[4] = { 0 };
u32x wordl2[4] = { 0 };
u32x wordl3[4] = { 0 };
wordl0[0] = pw_buf0[0];
wordl0[1] = pw_buf0[1];
wordl0[2] = pw_buf0[2];
wordl0[3] = pw_buf0[3];
wordl1[0] = pw_buf1[0];
wordl1[1] = pw_buf1[1];
wordl1[2] = pw_buf1[2];
wordl1[3] = pw_buf1[3];
u32x wordr0[4] = { 0 };
u32x wordr1[4] = { 0 };
u32x wordr2[4] = { 0 };
u32x wordr3[4] = { 0 };
wordr0[0] = ix_create_combt (combs_buf, il_pos, 0);
wordr0[1] = ix_create_combt (combs_buf, il_pos, 1);
wordr0[2] = ix_create_combt (combs_buf, il_pos, 2);
wordr0[3] = ix_create_combt (combs_buf, il_pos, 3);
wordr1[0] = ix_create_combt (combs_buf, il_pos, 4);
wordr1[1] = ix_create_combt (combs_buf, il_pos, 5);
wordr1[2] = ix_create_combt (combs_buf, il_pos, 6);
wordr1[3] = ix_create_combt (combs_buf, il_pos, 7);
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
{
switch_buffer_by_offset_le_VV (wordr0, wordr1, wordr2, wordr3, pw_l_len);
}
else
{
switch_buffer_by_offset_le_VV (wordl0, wordl1, wordl2, wordl3, pw_r_len);
}
u32x w0[4];
u32x w1[4];
u32x w2[4];
u32x w3[4];
w0[0] = wordl0[0] | wordr0[0];
w0[1] = wordl0[1] | wordr0[1];
w0[2] = wordl0[2] | wordr0[2];
w0[3] = wordl0[3] | wordr0[3];
w1[0] = wordl1[0] | wordr1[0];
w1[1] = wordl1[1] | wordr1[1];
w1[2] = wordl1[2] | wordr1[2];
w1[3] = wordl1[3] | wordr1[3];
w2[0] = wordl2[0] | wordr2[0];
w2[1] = wordl2[1] | wordr2[1];
w2[2] = wordl2[2] | wordr2[2];
w2[3] = wordl2[3] | wordr2[3];
w3[0] = wordl3[0] | wordr3[0];
w3[1] = wordl3[1] | wordr3[1];
w3[2] = wordl3[2] | wordr3[2];
w3[3] = wordl3[3] | wordr3[3];
/**
* append salt
*/
u32x s0[4];
u32x s1[4];
u32x s2[4];
u32x s3[4];
s0[0] = salt_buf0[0];
s0[1] = salt_buf0[1];
s0[2] = salt_buf0[2];
s0[3] = salt_buf0[3];
s1[0] = salt_buf1[0];
s1[1] = salt_buf1[1];
s1[2] = salt_buf1[2];
s1[3] = salt_buf1[3];
s2[0] = salt_buf2[0];
s2[1] = salt_buf2[1];
s2[2] = salt_buf2[2];
s2[3] = salt_buf2[3];
s3[0] = salt_buf3[0];
s3[1] = salt_buf3[1];
s3[2] = salt_buf3[2];
s3[3] = salt_buf3[3];
switch_buffer_by_offset_le_VV (s0, s1, s2, s3, pw_len);
const u32x pw_salt_len = pw_len + salt_len;
w0[0] |= s0[0];
w0[1] |= s0[1];
w0[2] |= s0[2];
w0[3] |= s0[3];
w1[0] |= s1[0];
w1[1] |= s1[1];
w1[2] |= s1[2];
w1[3] |= s1[3];
w2[0] |= s2[0];
w2[1] |= s2[1];
w2[2] |= s2[2];
w2[3] |= s2[3];
w3[0] |= s3[0];
w3[1] |= s3[1];
w3[2] = pw_salt_len * 8;
w3[3] = 0;
/**
* md5
*/
u32x a = MD5M_A;
u32x b = MD5M_B;
u32x c = MD5M_C;
u32x d = MD5M_D;
u32x e = 0;
MD5_STEP (MD5_Fo, a, b, c, d, w0[0], MD5C00, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w0[1], MD5C01, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, w0[2], MD5C02, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, w0[3], MD5C03, MD5S03);
MD5_STEP (MD5_Fo, a, b, c, d, w1[0], MD5C04, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w1[1], MD5C05, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, w1[2], MD5C06, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, w1[3], MD5C07, MD5S03);
MD5_STEP (MD5_Fo, a, b, c, d, w2[0], MD5C08, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w2[1], MD5C09, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, w2[2], MD5C0a, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, w2[3], MD5C0b, MD5S03);
MD5_STEP (MD5_Fo, a, b, c, d, w3[0], MD5C0c, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w3[1], MD5C0d, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, w3[2], MD5C0e, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, w3[3], MD5C0f, MD5S03);
MD5_STEP (MD5_Go, a, b, c, d, w0[1], MD5C10, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, w1[2], MD5C11, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, w2[3], MD5C12, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w0[0], MD5C13, MD5S13);
MD5_STEP (MD5_Go, a, b, c, d, w1[1], MD5C14, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, w2[2], MD5C15, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, w3[3], MD5C16, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w1[0], MD5C17, MD5S13);
MD5_STEP (MD5_Go, a, b, c, d, w2[1], MD5C18, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, w3[2], MD5C19, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, w0[3], MD5C1a, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w2[0], MD5C1b, MD5S13);
MD5_STEP (MD5_Go, a, b, c, d, w3[1], MD5C1c, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, w0[2], MD5C1d, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, w1[3], MD5C1e, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w3[0], MD5C1f, MD5S13);
u32x t;
MD5_STEP (MD5_H1, a, b, c, d, w1[1], MD5C20, MD5S20);
MD5_STEP (MD5_H2, d, a, b, c, w2[0], MD5C21, MD5S21);
MD5_STEP (MD5_H1, c, d, a, b, w2[3], MD5C22, MD5S22);
MD5_STEP (MD5_H2, b, c, d, a, w3[2], MD5C23, MD5S23);
MD5_STEP (MD5_H1, a, b, c, d, w0[1], MD5C24, MD5S20);
MD5_STEP (MD5_H2, d, a, b, c, w1[0], MD5C25, MD5S21);
MD5_STEP (MD5_H1, c, d, a, b, w1[3], MD5C26, MD5S22);
MD5_STEP (MD5_H2, b, c, d, a, w2[2], MD5C27, MD5S23);
MD5_STEP (MD5_H1, a, b, c, d, w3[1], MD5C28, MD5S20);
MD5_STEP (MD5_H2, d, a, b, c, w0[0], MD5C29, MD5S21);
MD5_STEP (MD5_H1, c, d, a, b, w0[3], MD5C2a, MD5S22);
MD5_STEP (MD5_H2, b, c, d, a, w1[2], MD5C2b, MD5S23);
MD5_STEP (MD5_H1, a, b, c, d, w2[1], MD5C2c, MD5S20);
MD5_STEP (MD5_H2, d, a, b, c, w3[0], MD5C2d, MD5S21);
MD5_STEP (MD5_H1, c, d, a, b, w3[3], MD5C2e, MD5S22);
MD5_STEP (MD5_H2, b, c, d, a, w0[2], MD5C2f, MD5S23);
MD5_STEP (MD5_I , a, b, c, d, w0[0], MD5C30, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, w1[3], MD5C31, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, w3[2], MD5C32, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w1[1], MD5C33, MD5S33);
MD5_STEP (MD5_I , a, b, c, d, w3[0], MD5C34, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, w0[3], MD5C35, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, w2[2], MD5C36, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w0[1], MD5C37, MD5S33);
MD5_STEP (MD5_I , a, b, c, d, w2[0], MD5C38, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, w3[3], MD5C39, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, w1[2], MD5C3a, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w3[1], MD5C3b, MD5S33);
MD5_STEP (MD5_I , a, b, c, d, w1[0], MD5C3c, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, w2[3], MD5C3d, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, w0[2], MD5C3e, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w2[1], MD5C3f, MD5S33);
a += MD5M_A;
b += MD5M_B;
c += MD5M_C;
d += MD5M_D;
/*
* sha1
*/
u32x w0_t = uint_to_hex_lower8_le ((a >> 8) & 255) << 0
| uint_to_hex_lower8_le ((a >> 0) & 255) << 16;
u32x w1_t = uint_to_hex_lower8_le ((a >> 24) & 255) << 0
| uint_to_hex_lower8_le ((a >> 16) & 255) << 16;
u32x w2_t = uint_to_hex_lower8_le ((b >> 8) & 255) << 0
| uint_to_hex_lower8_le ((b >> 0) & 255) << 16;
u32x w3_t = uint_to_hex_lower8_le ((b >> 24) & 255) << 0
| uint_to_hex_lower8_le ((b >> 16) & 255) << 16;
u32x w4_t = uint_to_hex_lower8_le ((c >> 8) & 255) << 0
| uint_to_hex_lower8_le ((c >> 0) & 255) << 16;
u32x w5_t = uint_to_hex_lower8_le ((c >> 24) & 255) << 0
| uint_to_hex_lower8_le ((c >> 16) & 255) << 16;
u32x w6_t = uint_to_hex_lower8_le ((d >> 8) & 255) << 0
| uint_to_hex_lower8_le ((d >> 0) & 255) << 16;
u32x w7_t = uint_to_hex_lower8_le ((d >> 24) & 255) << 0
| uint_to_hex_lower8_le ((d >> 16) & 255) << 16;
u32x w8_t = 0x80000000;
u32x w9_t = 0;
u32x wa_t = 0;
u32x wb_t = 0;
u32x wc_t = 0;
u32x wd_t = 0;
u32x we_t = 0;
u32x wf_t = 32 * 8;
a = SHA1M_A;
b = SHA1M_B;
c = SHA1M_C;
d = SHA1M_D;
e = SHA1M_E;
#undef K
#define K SHA1C00
SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0_t);
SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1_t);
SHA1_STEP (SHA1_F0o, d, e, a, b, c, w2_t);
SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3_t);
SHA1_STEP (SHA1_F0o, b, c, d, e, a, w4_t);
SHA1_STEP (SHA1_F0o, a, b, c, d, e, w5_t);
SHA1_STEP (SHA1_F0o, e, a, b, c, d, w6_t);
SHA1_STEP (SHA1_F0o, d, e, a, b, c, w7_t);
SHA1_STEP (SHA1_F0o, c, d, e, a, b, w8_t);
SHA1_STEP (SHA1_F0o, b, c, d, e, a, w9_t);
SHA1_STEP (SHA1_F0o, a, b, c, d, e, wa_t);
SHA1_STEP (SHA1_F0o, e, a, b, c, d, wb_t);
SHA1_STEP (SHA1_F0o, d, e, a, b, c, wc_t);
SHA1_STEP (SHA1_F0o, c, d, e, a, b, wd_t);
SHA1_STEP (SHA1_F0o, b, c, d, e, a, we_t);
SHA1_STEP (SHA1_F0o, a, b, c, d, e, wf_t);
w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t);
w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1_t);
w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2_t);
w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3_t);
#undef K
#define K SHA1C01
w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w4_t);
w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w5_t);
w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w6_t);
w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w7_t);
w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w8_t);
w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w9_t);
wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wa_t);
wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wb_t);
wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wc_t);
wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wd_t);
we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, we_t);
wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wf_t);
w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t);
w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t);
w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t);
w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t);
w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w4_t);
w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w5_t);
w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w6_t);
w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w7_t);
#undef K
#define K SHA1C02
w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w8_t);
w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w9_t);
wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wa_t);
wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wb_t);
wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wc_t);
wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, wd_t);
we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, we_t);
wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wf_t);
w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0_t);
w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1_t);
w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2_t);
w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3_t);
w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w4_t);
w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w5_t);
w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w6_t);
w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w7_t);
w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w8_t);
w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w9_t);
wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wa_t);
wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wb_t);
#undef K
#define K SHA1C03
wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wc_t);
wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wd_t);
we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, we_t);
wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wf_t);
w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t);
w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t);
w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w2_t);
w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t);
w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w4_t);
w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w5_t);
w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w6_t);
w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w7_t);
w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w8_t);
w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w9_t);
wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wa_t);
wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wb_t);
wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wc_t);
wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wd_t);
we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t);
wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t);
COMPARE_M_SIMD (d, e, c, b);
}
}
KERNEL_FQ void m21100_m08 (KERN_ATTR_BASIC ())
{
}
KERNEL_FQ void m21100_m16 (KERN_ATTR_BASIC ())
{
}
KERNEL_FQ void m21100_s04 (KERN_ATTR_BASIC ())
{
/**
* modifier
*/
const u64 gid = get_global_id (0);
const u64 lid = get_local_id (0);
const u64 lsz = get_local_size (0);
/**
* bin2asc table
*/
LOCAL_VK u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;
l_bin2asc[i] = ((i0 < 10) ? '0' + i0 : 'a' - 10 + i0) << 0
| ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 8;
}
SYNC_THREADS ();
if (gid >= gid_max) return;
/**
* base
*/
u32 pw_buf0[4];
u32 pw_buf1[4];
pw_buf0[0] = pws[gid].i[0];
pw_buf0[1] = pws[gid].i[1];
pw_buf0[2] = pws[gid].i[2];
pw_buf0[3] = pws[gid].i[3];
pw_buf1[0] = pws[gid].i[4];
pw_buf1[1] = pws[gid].i[5];
pw_buf1[2] = pws[gid].i[6];
pw_buf1[3] = pws[gid].i[7];
const u32 pw_l_len = pws[gid].pw_len & 63;
/**
* digest
*/
const u32 search[4] =
{
digests_buf[digests_offset].digest_buf[DGST_R0],
digests_buf[digests_offset].digest_buf[DGST_R1],
digests_buf[digests_offset].digest_buf[DGST_R2],
digests_buf[digests_offset].digest_buf[DGST_R3]
};
/**
* reverse
*/
const u32 e_rev = hc_rotl32_S (search[1], 2u);
/**
* salt
*/
u32 salt_buf0[4];
u32 salt_buf1[4];
u32 salt_buf2[4];
u32 salt_buf3[4];
salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0];
salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1];
salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2];
salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3];
salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4];
salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5];
salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6];
salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7];
salt_buf2[0] = salt_bufs[salt_pos].salt_buf[ 8];
salt_buf2[1] = salt_bufs[salt_pos].salt_buf[ 9];
salt_buf2[2] = salt_bufs[salt_pos].salt_buf[10];
salt_buf2[3] = salt_bufs[salt_pos].salt_buf[11];
salt_buf3[0] = salt_bufs[salt_pos].salt_buf[12];
salt_buf3[1] = salt_bufs[salt_pos].salt_buf[13];
salt_buf3[2] = salt_bufs[salt_pos].salt_buf[14];
salt_buf3[3] = salt_bufs[salt_pos].salt_buf[15];
const u32 salt_len = salt_bufs[salt_pos].salt_len;
/**
* loop
*/
for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE)
{
const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos) & 63;
const u32x pw_len = (pw_l_len + pw_r_len) & 63;
/**
* concat password candidate
*/
u32x wordl0[4] = { 0 };
u32x wordl1[4] = { 0 };
u32x wordl2[4] = { 0 };
u32x wordl3[4] = { 0 };
wordl0[0] = pw_buf0[0];
wordl0[1] = pw_buf0[1];
wordl0[2] = pw_buf0[2];
wordl0[3] = pw_buf0[3];
wordl1[0] = pw_buf1[0];
wordl1[1] = pw_buf1[1];
wordl1[2] = pw_buf1[2];
wordl1[3] = pw_buf1[3];
u32x wordr0[4] = { 0 };
u32x wordr1[4] = { 0 };
u32x wordr2[4] = { 0 };
u32x wordr3[4] = { 0 };
wordr0[0] = ix_create_combt (combs_buf, il_pos, 0);
wordr0[1] = ix_create_combt (combs_buf, il_pos, 1);
wordr0[2] = ix_create_combt (combs_buf, il_pos, 2);
wordr0[3] = ix_create_combt (combs_buf, il_pos, 3);
wordr1[0] = ix_create_combt (combs_buf, il_pos, 4);
wordr1[1] = ix_create_combt (combs_buf, il_pos, 5);
wordr1[2] = ix_create_combt (combs_buf, il_pos, 6);
wordr1[3] = ix_create_combt (combs_buf, il_pos, 7);
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
{
switch_buffer_by_offset_le_VV (wordr0, wordr1, wordr2, wordr3, pw_l_len);
}
else
{
switch_buffer_by_offset_le_VV (wordl0, wordl1, wordl2, wordl3, pw_r_len);
}
u32x w0[4];
u32x w1[4];
u32x w2[4];
u32x w3[4];
w0[0] = wordl0[0] | wordr0[0];
w0[1] = wordl0[1] | wordr0[1];
w0[2] = wordl0[2] | wordr0[2];
w0[3] = wordl0[3] | wordr0[3];
w1[0] = wordl1[0] | wordr1[0];
w1[1] = wordl1[1] | wordr1[1];
w1[2] = wordl1[2] | wordr1[2];
w1[3] = wordl1[3] | wordr1[3];
w2[0] = wordl2[0] | wordr2[0];
w2[1] = wordl2[1] | wordr2[1];
w2[2] = wordl2[2] | wordr2[2];
w2[3] = wordl2[3] | wordr2[3];
w3[0] = wordl3[0] | wordr3[0];
w3[1] = wordl3[1] | wordr3[1];
w3[2] = wordl3[2] | wordr3[2];
w3[3] = wordl3[3] | wordr3[3];
/**
* append salt
*/
u32x s0[4];
u32x s1[4];
u32x s2[4];
u32x s3[4];
s0[0] = salt_buf0[0];
s0[1] = salt_buf0[1];
s0[2] = salt_buf0[2];
s0[3] = salt_buf0[3];
s1[0] = salt_buf1[0];
s1[1] = salt_buf1[1];
s1[2] = salt_buf1[2];
s1[3] = salt_buf1[3];
s2[0] = salt_buf2[0];
s2[1] = salt_buf2[1];
s2[2] = salt_buf2[2];
s2[3] = salt_buf2[3];
s3[0] = salt_buf3[0];
s3[1] = salt_buf3[1];
s3[2] = salt_buf3[2];
s3[3] = salt_buf3[3];
switch_buffer_by_offset_le_VV (s0, s1, s2, s3, pw_len);
const u32x pw_salt_len = pw_len + salt_len;
w0[0] |= s0[0];
w0[1] |= s0[1];
w0[2] |= s0[2];
w0[3] |= s0[3];
w1[0] |= s1[0];
w1[1] |= s1[1];
w1[2] |= s1[2];
w1[3] |= s1[3];
w2[0] |= s2[0];
w2[1] |= s2[1];
w2[2] |= s2[2];
w2[3] |= s2[3];
w3[0] |= s3[0];
w3[1] |= s3[1];
w3[2] = pw_salt_len * 8;
w3[3] = 0;
/**
* md5
*/
u32x a = MD5M_A;
u32x b = MD5M_B;
u32x c = MD5M_C;
u32x d = MD5M_D;
u32x e = 0;
MD5_STEP (MD5_Fo, a, b, c, d, w0[0], MD5C00, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w0[1], MD5C01, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, w0[2], MD5C02, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, w0[3], MD5C03, MD5S03);
MD5_STEP (MD5_Fo, a, b, c, d, w1[0], MD5C04, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w1[1], MD5C05, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, w1[2], MD5C06, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, w1[3], MD5C07, MD5S03);
MD5_STEP (MD5_Fo, a, b, c, d, w2[0], MD5C08, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w2[1], MD5C09, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, w2[2], MD5C0a, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, w2[3], MD5C0b, MD5S03);
MD5_STEP (MD5_Fo, a, b, c, d, w3[0], MD5C0c, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w3[1], MD5C0d, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, w3[2], MD5C0e, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, w3[3], MD5C0f, MD5S03);
MD5_STEP (MD5_Go, a, b, c, d, w0[1], MD5C10, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, w1[2], MD5C11, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, w2[3], MD5C12, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w0[0], MD5C13, MD5S13);
MD5_STEP (MD5_Go, a, b, c, d, w1[1], MD5C14, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, w2[2], MD5C15, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, w3[3], MD5C16, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w1[0], MD5C17, MD5S13);
MD5_STEP (MD5_Go, a, b, c, d, w2[1], MD5C18, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, w3[2], MD5C19, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, w0[3], MD5C1a, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w2[0], MD5C1b, MD5S13);
MD5_STEP (MD5_Go, a, b, c, d, w3[1], MD5C1c, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, w0[2], MD5C1d, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, w1[3], MD5C1e, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w3[0], MD5C1f, MD5S13);
u32x t;
MD5_STEP (MD5_H1, a, b, c, d, w1[1], MD5C20, MD5S20);
MD5_STEP (MD5_H2, d, a, b, c, w2[0], MD5C21, MD5S21);
MD5_STEP (MD5_H1, c, d, a, b, w2[3], MD5C22, MD5S22);
MD5_STEP (MD5_H2, b, c, d, a, w3[2], MD5C23, MD5S23);
MD5_STEP (MD5_H1, a, b, c, d, w0[1], MD5C24, MD5S20);
MD5_STEP (MD5_H2, d, a, b, c, w1[0], MD5C25, MD5S21);
MD5_STEP (MD5_H1, c, d, a, b, w1[3], MD5C26, MD5S22);
MD5_STEP (MD5_H2, b, c, d, a, w2[2], MD5C27, MD5S23);
MD5_STEP (MD5_H1, a, b, c, d, w3[1], MD5C28, MD5S20);
MD5_STEP (MD5_H2, d, a, b, c, w0[0], MD5C29, MD5S21);
MD5_STEP (MD5_H1, c, d, a, b, w0[3], MD5C2a, MD5S22);
MD5_STEP (MD5_H2, b, c, d, a, w1[2], MD5C2b, MD5S23);
MD5_STEP (MD5_H1, a, b, c, d, w2[1], MD5C2c, MD5S20);
MD5_STEP (MD5_H2, d, a, b, c, w3[0], MD5C2d, MD5S21);
MD5_STEP (MD5_H1, c, d, a, b, w3[3], MD5C2e, MD5S22);
MD5_STEP (MD5_H2, b, c, d, a, w0[2], MD5C2f, MD5S23);
MD5_STEP (MD5_I , a, b, c, d, w0[0], MD5C30, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, w1[3], MD5C31, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, w3[2], MD5C32, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w1[1], MD5C33, MD5S33);
MD5_STEP (MD5_I , a, b, c, d, w3[0], MD5C34, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, w0[3], MD5C35, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, w2[2], MD5C36, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w0[1], MD5C37, MD5S33);
MD5_STEP (MD5_I , a, b, c, d, w2[0], MD5C38, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, w3[3], MD5C39, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, w1[2], MD5C3a, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w3[1], MD5C3b, MD5S33);
MD5_STEP (MD5_I , a, b, c, d, w1[0], MD5C3c, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, w2[3], MD5C3d, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, w0[2], MD5C3e, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w2[1], MD5C3f, MD5S33);
a += MD5M_A;
b += MD5M_B;
c += MD5M_C;
d += MD5M_D;
/*
* sha1
*/
u32x w0_t = uint_to_hex_lower8_le ((a >> 8) & 255) << 0
| uint_to_hex_lower8_le ((a >> 0) & 255) << 16;
u32x w1_t = uint_to_hex_lower8_le ((a >> 24) & 255) << 0
| uint_to_hex_lower8_le ((a >> 16) & 255) << 16;
u32x w2_t = uint_to_hex_lower8_le ((b >> 8) & 255) << 0
| uint_to_hex_lower8_le ((b >> 0) & 255) << 16;
u32x w3_t = uint_to_hex_lower8_le ((b >> 24) & 255) << 0
| uint_to_hex_lower8_le ((b >> 16) & 255) << 16;
u32x w4_t = uint_to_hex_lower8_le ((c >> 8) & 255) << 0
| uint_to_hex_lower8_le ((c >> 0) & 255) << 16;
u32x w5_t = uint_to_hex_lower8_le ((c >> 24) & 255) << 0
| uint_to_hex_lower8_le ((c >> 16) & 255) << 16;
u32x w6_t = uint_to_hex_lower8_le ((d >> 8) & 255) << 0
| uint_to_hex_lower8_le ((d >> 0) & 255) << 16;
u32x w7_t = uint_to_hex_lower8_le ((d >> 24) & 255) << 0
| uint_to_hex_lower8_le ((d >> 16) & 255) << 16;
u32x w8_t = 0x80000000;
u32x w9_t = 0;
u32x wa_t = 0;
u32x wb_t = 0;
u32x wc_t = 0;
u32x wd_t = 0;
u32x we_t = 0;
u32x wf_t = 32 * 8;
a = SHA1M_A;
b = SHA1M_B;
c = SHA1M_C;
d = SHA1M_D;
e = SHA1M_E;
#undef K
#define K SHA1C00
SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0_t);
SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1_t);
SHA1_STEP (SHA1_F0o, d, e, a, b, c, w2_t);
SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3_t);
SHA1_STEP (SHA1_F0o, b, c, d, e, a, w4_t);
SHA1_STEP (SHA1_F0o, a, b, c, d, e, w5_t);
SHA1_STEP (SHA1_F0o, e, a, b, c, d, w6_t);
SHA1_STEP (SHA1_F0o, d, e, a, b, c, w7_t);
SHA1_STEP (SHA1_F0o, c, d, e, a, b, w8_t);
SHA1_STEP (SHA1_F0o, b, c, d, e, a, w9_t);
SHA1_STEP (SHA1_F0o, a, b, c, d, e, wa_t);
SHA1_STEP (SHA1_F0o, e, a, b, c, d, wb_t);
SHA1_STEP (SHA1_F0o, d, e, a, b, c, wc_t);
SHA1_STEP (SHA1_F0o, c, d, e, a, b, wd_t);
SHA1_STEP (SHA1_F0o, b, c, d, e, a, we_t);
SHA1_STEP (SHA1_F0o, a, b, c, d, e, wf_t);
w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t);
w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1_t);
w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2_t);
w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3_t);
#undef K
#define K SHA1C01
w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w4_t);
w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w5_t);
w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w6_t);
w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w7_t);
w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w8_t);
w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w9_t);
wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wa_t);
wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wb_t);
wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wc_t);
wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wd_t);
we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, we_t);
wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wf_t);
w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t);
w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t);
w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t);
w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t);
w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w4_t);
w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w5_t);
w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w6_t);
w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w7_t);
#undef K
#define K SHA1C02
w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w8_t);
w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w9_t);
wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wa_t);
wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wb_t);
wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wc_t);
wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, wd_t);
we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, we_t);
wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wf_t);
w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0_t);
w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1_t);
w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2_t);
w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3_t);
w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w4_t);
w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w5_t);
w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w6_t);
w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w7_t);
w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w8_t);
w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w9_t);
wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wa_t);
wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wb_t);
#undef K
#define K SHA1C03
wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wc_t);
wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wd_t);
we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, we_t);
wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wf_t);
w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t);
w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t);
w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w2_t);
w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t);
w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w4_t);
w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w5_t);
w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w6_t);
w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w7_t);
w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w8_t);
w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w9_t);
wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wa_t);
wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wb_t);
if (MATCHES_NONE_VS (e, e_rev)) continue;
wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wc_t);
wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wd_t);
we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t);
wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t);
COMPARE_S_SIMD (d, e, c, b);
}
}
KERNEL_FQ void m21100_s08 (KERN_ATTR_BASIC ())
{
}
KERNEL_FQ void m21100_s16 (KERN_ATTR_BASIC ())
{
}

@ -0,0 +1,270 @@
/**
* Author......: See docs/credits.txt
* License.....: MIT
*/
//#define NEW_SIMD_CODE
#ifdef KERNEL_STATIC
#include "inc_vendor.h"
#include "inc_types.h"
#include "inc_platform.cl"
#include "inc_common.cl"
#include "inc_scalar.cl"
#include "inc_hash_md5.cl"
#include "inc_hash_sha1.cl"
#endif
#if VECT_SIZE == 1
#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i)])
#elif VECT_SIZE == 2
#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
#elif VECT_SIZE == 4
#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
#elif VECT_SIZE == 8
#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
#elif VECT_SIZE == 16
#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
#endif
KERNEL_FQ void m21100_mxx (KERN_ATTR_BASIC ())
{
/**
* modifier
*/
const u64 gid = get_global_id (0);
const u64 lid = get_local_id (0);
const u64 lsz = get_local_size (0);
/**
* bin2asc table
*/
LOCAL_VK u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;
l_bin2asc[i] = ((i0 < 10) ? '0' + i0 : 'a' - 10 + i0) << 0
| ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 8;
}
SYNC_THREADS ();
if (gid >= gid_max) return;
/**
* base
*/
const u32 salt_len = salt_bufs[salt_pos].salt_len;
u32 s[64] = { 0 };
for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1)
{
s[idx] = salt_bufs[salt_pos].salt_buf[idx];
}
md5_ctx_t ctx0;
md5_init (&ctx0);
md5_update_global (&ctx0, pws[gid].i, pws[gid].pw_len);
/**
* loop
*/
u32 w0[4];
u32 w1[4];
u32 w2[4];
u32 w3[4];
for (u32 il_pos = 0; il_pos < il_cnt; il_pos++)
{
md5_ctx_t ctx1 = ctx0;
md5_update_global (&ctx1, combs_buf[il_pos].i, combs_buf[il_pos].pw_len);
md5_update (&ctx1, s, salt_len);
md5_final (&ctx1);
const u32 a = ctx1.h[0];
const u32 b = ctx1.h[1];
const u32 c = ctx1.h[2];
const u32 d = ctx1.h[3];
sha1_ctx_t ctx;
sha1_init (&ctx);
w0[0] = uint_to_hex_lower8_le ((a >> 8) & 255) << 0
| uint_to_hex_lower8_le ((a >> 0) & 255) << 16;
w0[1] = uint_to_hex_lower8_le ((a >> 24) & 255) << 0
| uint_to_hex_lower8_le ((a >> 16) & 255) << 16;
w0[2] = uint_to_hex_lower8_le ((b >> 8) & 255) << 0
| uint_to_hex_lower8_le ((b >> 0) & 255) << 16;
w0[3] = uint_to_hex_lower8_le ((b >> 24) & 255) << 0
| uint_to_hex_lower8_le ((b >> 16) & 255) << 16;
w1[0] = uint_to_hex_lower8_le ((c >> 8) & 255) << 0
| uint_to_hex_lower8_le ((c >> 0) & 255) << 16;
w1[1] = uint_to_hex_lower8_le ((c >> 24) & 255) << 0
| uint_to_hex_lower8_le ((c >> 16) & 255) << 16;
w1[2] = uint_to_hex_lower8_le ((d >> 8) & 255) << 0
| uint_to_hex_lower8_le ((d >> 0) & 255) << 16;
w1[3] = uint_to_hex_lower8_le ((d >> 24) & 255) << 0
| uint_to_hex_lower8_le ((d >> 16) & 255) << 16;
w2[0] = 0;
w2[1] = 0;
w2[2] = 0;
w2[3] = 0;
w3[0] = 0;
w3[1] = 0;
w3[2] = 0;
w3[3] = 0;
sha1_update_64 (&ctx, w0, w1, w2, w3, 32);
sha1_final (&ctx);
const u32 r0 = ctx.h[DGST_R0];
const u32 r1 = ctx.h[DGST_R1];
const u32 r2 = ctx.h[DGST_R2];
const u32 r3 = ctx.h[DGST_R3];
COMPARE_M_SCALAR (r0, r1, r2, r3);
}
}
KERNEL_FQ void m21100_sxx (KERN_ATTR_BASIC ())
{
/**
* modifier
*/
const u64 gid = get_global_id (0);
const u64 lid = get_local_id (0);
const u64 lsz = get_local_size (0);
/**
* bin2asc table
*/
LOCAL_VK u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;
l_bin2asc[i] = ((i0 < 10) ? '0' + i0 : 'a' - 10 + i0) << 0
| ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 8;
}
SYNC_THREADS ();
if (gid >= gid_max) return;
/**
* digest
*/
const u32 search[4] =
{
digests_buf[digests_offset].digest_buf[DGST_R0],
digests_buf[digests_offset].digest_buf[DGST_R1],
digests_buf[digests_offset].digest_buf[DGST_R2],
digests_buf[digests_offset].digest_buf[DGST_R3]
};
/**
* base
*/
const u32 salt_len = salt_bufs[salt_pos].salt_len;
u32 s[64] = { 0 };
for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1)
{
s[idx] = salt_bufs[salt_pos].salt_buf[idx];
}
md5_ctx_t ctx0;
md5_init (&ctx0);
md5_update_global (&ctx0, pws[gid].i, pws[gid].pw_len);
/**
* loop
*/
u32 w0[4];
u32 w1[4];
u32 w2[4];
u32 w3[4];
for (u32 il_pos = 0; il_pos < il_cnt; il_pos++)
{
md5_ctx_t ctx1 = ctx0;
md5_update_global (&ctx1, combs_buf[il_pos].i, combs_buf[il_pos].pw_len);
md5_update (&ctx1, s, salt_len);
md5_final (&ctx1);
const u32 a = ctx1.h[0];
const u32 b = ctx1.h[1];
const u32 c = ctx1.h[2];
const u32 d = ctx1.h[3];
sha1_ctx_t ctx;
sha1_init (&ctx);
w0[0] = uint_to_hex_lower8_le ((a >> 8) & 255) << 0
| uint_to_hex_lower8_le ((a >> 0) & 255) << 16;
w0[1] = uint_to_hex_lower8_le ((a >> 24) & 255) << 0
| uint_to_hex_lower8_le ((a >> 16) & 255) << 16;
w0[2] = uint_to_hex_lower8_le ((b >> 8) & 255) << 0
| uint_to_hex_lower8_le ((b >> 0) & 255) << 16;
w0[3] = uint_to_hex_lower8_le ((b >> 24) & 255) << 0
| uint_to_hex_lower8_le ((b >> 16) & 255) << 16;
w1[0] = uint_to_hex_lower8_le ((c >> 8) & 255) << 0
| uint_to_hex_lower8_le ((c >> 0) & 255) << 16;
w1[1] = uint_to_hex_lower8_le ((c >> 24) & 255) << 0
| uint_to_hex_lower8_le ((c >> 16) & 255) << 16;
w1[2] = uint_to_hex_lower8_le ((d >> 8) & 255) << 0
| uint_to_hex_lower8_le ((d >> 0) & 255) << 16;
w1[3] = uint_to_hex_lower8_le ((d >> 24) & 255) << 0
| uint_to_hex_lower8_le ((d >> 16) & 255) << 16;
w2[0] = 0;
w2[1] = 0;
w2[2] = 0;
w2[3] = 0;
w3[0] = 0;
w3[1] = 0;
w3[2] = 0;
w3[3] = 0;
sha1_update_64 (&ctx, w0, w1, w2, w3, 32);
sha1_final (&ctx);
const u32 r0 = ctx.h[DGST_R0];
const u32 r1 = ctx.h[DGST_R1];
const u32 r2 = ctx.h[DGST_R2];
const u32 r3 = ctx.h[DGST_R3];
COMPARE_S_SCALAR (r0, r1, r2, r3);
}
}

File diff suppressed because it is too large Load Diff

@ -0,0 +1,296 @@
/**
* Author......: See docs/credits.txt
* License.....: MIT
*/
#define NEW_SIMD_CODE
#ifdef KERNEL_STATIC
#include "inc_vendor.h"
#include "inc_types.h"
#include "inc_platform.cl"
#include "inc_common.cl"
#include "inc_simd.cl"
#include "inc_hash_md5.cl"
#include "inc_hash_sha1.cl"
#endif
#if VECT_SIZE == 1
#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i)])
#elif VECT_SIZE == 2
#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
#elif VECT_SIZE == 4
#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
#elif VECT_SIZE == 8
#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
#elif VECT_SIZE == 16
#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
#endif
KERNEL_FQ void m21100_mxx (KERN_ATTR_VECTOR ())
{
/**
* modifier
*/
const u64 gid = get_global_id (0);
const u64 lid = get_local_id (0);
const u64 lsz = get_local_size (0);
/**
* bin2asc table
*/
LOCAL_VK u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;
l_bin2asc[i] = ((i0 < 10) ? '0' + i0 : 'a' - 10 + i0) << 0
| ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 8;
}
SYNC_THREADS ();
if (gid >= gid_max) return;
/**
* base
*/
const u32 pw_len = pws[gid].pw_len;
u32x w[64] = { 0 };
for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1)
{
w[idx] = pws[gid].i[idx];
}
const u32 salt_len = salt_bufs[salt_pos].salt_len;
u32 s[64] = { 0 };
for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1)
{
s[idx] = salt_bufs[salt_pos].salt_buf[idx];
}
/**
* loop
*/
u32x _w0[4];
u32x _w1[4];
u32x _w2[4];
u32x _w3[4];
u32x w0l = w[0];
for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE)
{
const u32x w0r = words_buf_r[il_pos / VECT_SIZE];
const u32x w0 = w0l | w0r;
w[0] = w0;
md5_ctx_vector_t ctx0;
md5_init_vector (&ctx0);
md5_update_vector (&ctx0, w, pw_len);
md5_update_vector (&ctx0, s, salt_len);
md5_final_vector (&ctx0);
const u32x a = ctx0.h[0];
const u32x b = ctx0.h[1];
const u32x c = ctx0.h[2];
const u32x d = ctx0.h[3];
sha1_ctx_vector_t ctx;
sha1_init_vector (&ctx);
_w0[0] = uint_to_hex_lower8_le ((a >> 8) & 255) << 0
| uint_to_hex_lower8_le ((a >> 0) & 255) << 16;
_w0[1] = uint_to_hex_lower8_le ((a >> 24) & 255) << 0
| uint_to_hex_lower8_le ((a >> 16) & 255) << 16;
_w0[2] = uint_to_hex_lower8_le ((b >> 8) & 255) << 0
| uint_to_hex_lower8_le ((b >> 0) & 255) << 16;
_w0[3] = uint_to_hex_lower8_le ((b >> 24) & 255) << 0
| uint_to_hex_lower8_le ((b >> 16) & 255) << 16;
_w1[0] = uint_to_hex_lower8_le ((c >> 8) & 255) << 0
| uint_to_hex_lower8_le ((c >> 0) & 255) << 16;
_w1[1] = uint_to_hex_lower8_le ((c >> 24) & 255) << 0
| uint_to_hex_lower8_le ((c >> 16) & 255) << 16;
_w1[2] = uint_to_hex_lower8_le ((d >> 8) & 255) << 0
| uint_to_hex_lower8_le ((d >> 0) & 255) << 16;
_w1[3] = uint_to_hex_lower8_le ((d >> 24) & 255) << 0
| uint_to_hex_lower8_le ((d >> 16) & 255) << 16;
_w2[0] = 0;
_w2[1] = 0;
_w2[2] = 0;
_w2[3] = 0;
_w3[0] = 0;
_w3[1] = 0;
_w3[2] = 0;
_w3[3] = 0;
sha1_update_vector_64 (&ctx, _w0, _w1, _w2, _w3, 32);
sha1_final_vector (&ctx);
const u32x r0 = ctx.h[DGST_R0];
const u32x r1 = ctx.h[DGST_R1];
const u32x r2 = ctx.h[DGST_R2];
const u32x r3 = ctx.h[DGST_R3];
COMPARE_M_SIMD (r0, r1, r2, r3);
}
}
KERNEL_FQ void m21100_sxx (KERN_ATTR_VECTOR ())
{
/**
* modifier
*/
const u64 gid = get_global_id (0);
const u64 lid = get_local_id (0);
const u64 lsz = get_local_size (0);
/**
* bin2asc table
*/
LOCAL_VK u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;
l_bin2asc[i] = ((i0 < 10) ? '0' + i0 : 'a' - 10 + i0) << 0
| ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 8;
}
SYNC_THREADS ();
if (gid >= gid_max) return;
/**
* digest
*/
const u32 search[4] =
{
digests_buf[digests_offset].digest_buf[DGST_R0],
digests_buf[digests_offset].digest_buf[DGST_R1],
digests_buf[digests_offset].digest_buf[DGST_R2],
digests_buf[digests_offset].digest_buf[DGST_R3]
};
/**
* base
*/
const u32 pw_len = pws[gid].pw_len;
u32x w[64] = { 0 };
for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1)
{
w[idx] = pws[gid].i[idx];
}
const u32 salt_len = salt_bufs[salt_pos].salt_len;
u32 s[64] = { 0 };
for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1)
{
s[idx] = salt_bufs[salt_pos].salt_buf[idx];
}
/**
* loop
*/
u32x _w0[4];
u32x _w1[4];
u32x _w2[4];
u32x _w3[4];
u32x w0l = w[0];
for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE)
{
const u32x w0r = words_buf_r[il_pos / VECT_SIZE];
const u32x w0 = w0l | w0r;
w[0] = w0;
md5_ctx_vector_t ctx0;
md5_init_vector (&ctx0);
md5_update_vector (&ctx0, w, pw_len);
md5_update_vector (&ctx0, s, salt_len);
md5_final_vector (&ctx0);
const u32x a = ctx0.h[0];
const u32x b = ctx0.h[1];
const u32x c = ctx0.h[2];
const u32x d = ctx0.h[3];
sha1_ctx_vector_t ctx;
sha1_init_vector (&ctx);
_w0[0] = uint_to_hex_lower8_le ((a >> 8) & 255) << 0
| uint_to_hex_lower8_le ((a >> 0) & 255) << 16;
_w0[1] = uint_to_hex_lower8_le ((a >> 24) & 255) << 0
| uint_to_hex_lower8_le ((a >> 16) & 255) << 16;
_w0[2] = uint_to_hex_lower8_le ((b >> 8) & 255) << 0
| uint_to_hex_lower8_le ((b >> 0) & 255) << 16;
_w0[3] = uint_to_hex_lower8_le ((b >> 24) & 255) << 0
| uint_to_hex_lower8_le ((b >> 16) & 255) << 16;
_w1[0] = uint_to_hex_lower8_le ((c >> 8) & 255) << 0
| uint_to_hex_lower8_le ((c >> 0) & 255) << 16;
_w1[1] = uint_to_hex_lower8_le ((c >> 24) & 255) << 0
| uint_to_hex_lower8_le ((c >> 16) & 255) << 16;
_w1[2] = uint_to_hex_lower8_le ((d >> 8) & 255) << 0
| uint_to_hex_lower8_le ((d >> 0) & 255) << 16;
_w1[3] = uint_to_hex_lower8_le ((d >> 24) & 255) << 0
| uint_to_hex_lower8_le ((d >> 16) & 255) << 16;
_w2[0] = 0;
_w2[1] = 0;
_w2[2] = 0;
_w2[3] = 0;
_w3[0] = 0;
_w3[1] = 0;
_w3[2] = 0;
_w3[3] = 0;
sha1_update_vector_64 (&ctx, _w0, _w1, _w2, _w3, 32);
sha1_final_vector (&ctx);
const u32x r0 = ctx.h[DGST_R0];
const u32x r1 = ctx.h[DGST_R1];
const u32x r2 = ctx.h[DGST_R2];
const u32x r3 = ctx.h[DGST_R3];
COMPARE_S_SIMD (r0, r1, r2, r3);
}
}

File diff suppressed because it is too large Load Diff

@ -0,0 +1,360 @@
/**
* Author......: See docs/credits.txt
* License.....: MIT
*/
//#define NEW_SIMD_CODE
#ifdef KERNEL_STATIC
#include "inc_vendor.h"
#include "inc_types.h"
#include "inc_platform.cl"
#include "inc_common.cl"
#include "inc_rp.h"
#include "inc_rp.cl"
#include "inc_scalar.cl"
#include "inc_hash_md5.cl"
#include "inc_hash_sha1.cl"
#endif
#if VECT_SIZE == 1
#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i)])
#elif VECT_SIZE == 2
#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
#elif VECT_SIZE == 4
#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
#elif VECT_SIZE == 8
#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
#elif VECT_SIZE == 16
#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
#endif
KERNEL_FQ void m21200_mxx (KERN_ATTR_RULES ())
{
/**
* modifier
*/
const u64 gid = get_global_id (0);
const u64 lid = get_local_id (0);
const u64 lsz = get_local_size (0);
/**
* bin2asc table
*/
LOCAL_VK u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;
l_bin2asc[i] = ((i0 < 10) ? '0' + i0 : 'a' - 10 + i0) << 8
| ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0;
}
SYNC_THREADS ();
if (gid >= gid_max) return;
/**
* base
*/
COPY_PW (pws[gid]);
const u32 salt_len = salt_bufs[salt_pos].salt_len;
u32 s[64] = { 0 };
for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1)
{
s[idx] = salt_bufs[salt_pos].salt_buf[idx];
}
sha1_ctx_t ctx0;
sha1_init (&ctx0);
sha1_update_swap (&ctx0, s, salt_len);
sha1_final (&ctx0);
const u32 a0 = ctx0.h[0];
const u32 b0 = ctx0.h[1];
const u32 c0 = ctx0.h[2];
const u32 d0 = ctx0.h[3];
const u32 e0 = ctx0.h[4];
/**
* loop
*/
u32 w0[4];
u32 w1[4];
u32 w2[4];
u32 w3[4];
for (u32 il_pos = 0; il_pos < il_cnt; il_pos++)
{
pw_t tmp = PASTE_PW;
tmp.pw_len = apply_rules (rules_buf[il_pos].cmds, tmp.i, tmp.pw_len);
md5_ctx_t ctx1;
md5_init (&ctx1);
md5_update (&ctx1, tmp.i, tmp.pw_len);
md5_final (&ctx1);
const u32 a1 = hc_swap32 (ctx1.h[0]);
const u32 b1 = hc_swap32 (ctx1.h[1]);
const u32 c1 = hc_swap32 (ctx1.h[2]);
const u32 d1 = hc_swap32 (ctx1.h[3]);
md5_ctx_t ctx;
md5_init (&ctx);
w0[0] = uint_to_hex_lower8 ((a0 >> 24) & 255) << 0
| uint_to_hex_lower8 ((a0 >> 16) & 255) << 16;
w0[1] = uint_to_hex_lower8 ((a0 >> 8) & 255) << 0
| uint_to_hex_lower8 ((a0 >> 0) & 255) << 16;
w0[2] = uint_to_hex_lower8 ((b0 >> 24) & 255) << 0
| uint_to_hex_lower8 ((b0 >> 16) & 255) << 16;
w0[3] = uint_to_hex_lower8 ((b0 >> 8) & 255) << 0
| uint_to_hex_lower8 ((b0 >> 0) & 255) << 16;
w1[0] = uint_to_hex_lower8 ((c0 >> 24) & 255) << 0
| uint_to_hex_lower8 ((c0 >> 16) & 255) << 16;
w1[1] = uint_to_hex_lower8 ((c0 >> 8) & 255) << 0
| uint_to_hex_lower8 ((c0 >> 0) & 255) << 16;
w1[2] = uint_to_hex_lower8 ((d0 >> 24) & 255) << 0
| uint_to_hex_lower8 ((d0 >> 16) & 255) << 16;
w1[3] = uint_to_hex_lower8 ((d0 >> 8) & 255) << 0
| uint_to_hex_lower8 ((d0 >> 0) & 255) << 16;
w2[0] = uint_to_hex_lower8 ((e0 >> 24) & 255) << 0
| uint_to_hex_lower8 ((e0 >> 16) & 255) << 16;
w2[1] = uint_to_hex_lower8 ((e0 >> 8) & 255) << 0
| uint_to_hex_lower8 ((e0 >> 0) & 255) << 16;
w2[2] = 0;
w2[3] = 0;
w3[0] = 0;
w3[1] = 0;
w3[2] = 0;
w3[3] = 0;
md5_update_64 (&ctx, w0, w1, w2, w3, 40);
w0[0] = uint_to_hex_lower8 ((a1 >> 24) & 255) << 0
| uint_to_hex_lower8 ((a1 >> 16) & 255) << 16;
w0[1] = uint_to_hex_lower8 ((a1 >> 8) & 255) << 0
| uint_to_hex_lower8 ((a1 >> 0) & 255) << 16;
w0[2] = uint_to_hex_lower8 ((b1 >> 24) & 255) << 0
| uint_to_hex_lower8 ((b1 >> 16) & 255) << 16;
w0[3] = uint_to_hex_lower8 ((b1 >> 8) & 255) << 0
| uint_to_hex_lower8 ((b1 >> 0) & 255) << 16;
w1[0] = uint_to_hex_lower8 ((c1 >> 24) & 255) << 0
| uint_to_hex_lower8 ((c1 >> 16) & 255) << 16;
w1[1] = uint_to_hex_lower8 ((c1 >> 8) & 255) << 0
| uint_to_hex_lower8 ((c1 >> 0) & 255) << 16;
w1[2] = uint_to_hex_lower8 ((d1 >> 24) & 255) << 0
| uint_to_hex_lower8 ((d1 >> 16) & 255) << 16;
w1[3] = uint_to_hex_lower8 ((d1 >> 8) & 255) << 0
| uint_to_hex_lower8 ((d1 >> 0) & 255) << 16;
w2[0] = 0;
w2[1] = 0;
w2[2] = 0;
w2[3] = 0;
w3[0] = 0;
w3[1] = 0;
w3[2] = 0;
w3[3] = 0;
md5_update_64 (&ctx, w0, w1, w2, w3, 32);
md5_final (&ctx);
const u32 r0 = ctx.h[DGST_R0];
const u32 r1 = ctx.h[DGST_R1];
const u32 r2 = ctx.h[DGST_R2];
const u32 r3 = ctx.h[DGST_R3];
COMPARE_M_SCALAR (r0, r1, r2, r3);
}
}
KERNEL_FQ void m21200_sxx (KERN_ATTR_RULES ())
{
/**
* modifier
*/
const u64 gid = get_global_id (0);
const u64 lid = get_local_id (0);
const u64 lsz = get_local_size (0);
/**
* bin2asc table
*/
LOCAL_VK u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;
l_bin2asc[i] = ((i0 < 10) ? '0' + i0 : 'a' - 10 + i0) << 8
| ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0;
}
SYNC_THREADS ();
if (gid >= gid_max) return;
/**
* digest
*/
const u32 search[4] =
{
digests_buf[digests_offset].digest_buf[DGST_R0],
digests_buf[digests_offset].digest_buf[DGST_R1],
digests_buf[digests_offset].digest_buf[DGST_R2],
digests_buf[digests_offset].digest_buf[DGST_R3]
};
/**
* base
*/
COPY_PW (pws[gid]);
const u32 salt_len = salt_bufs[salt_pos].salt_len;
u32 s[64] = { 0 };
for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1)
{
s[idx] = salt_bufs[salt_pos].salt_buf[idx];
}
sha1_ctx_t ctx0;
sha1_init (&ctx0);
sha1_update_swap (&ctx0, s, salt_len);
sha1_final (&ctx0);
const u32 a0 = ctx0.h[0];
const u32 b0 = ctx0.h[1];
const u32 c0 = ctx0.h[2];
const u32 d0 = ctx0.h[3];
const u32 e0 = ctx0.h[4];
/**
* loop
*/
u32 w0[4];
u32 w1[4];
u32 w2[4];
u32 w3[4];
for (u32 il_pos = 0; il_pos < il_cnt; il_pos++)
{
pw_t tmp = PASTE_PW;
tmp.pw_len = apply_rules (rules_buf[il_pos].cmds, tmp.i, tmp.pw_len);
md5_ctx_t ctx1;
md5_init (&ctx1);
md5_update (&ctx1, tmp.i, tmp.pw_len);
md5_final (&ctx1);
const u32 a1 = hc_swap32 (ctx1.h[0]);
const u32 b1 = hc_swap32 (ctx1.h[1]);
const u32 c1 = hc_swap32 (ctx1.h[2]);
const u32 d1 = hc_swap32 (ctx1.h[3]);
md5_ctx_t ctx;
md5_init (&ctx);
w0[0] = uint_to_hex_lower8 ((a0 >> 24) & 255) << 0
| uint_to_hex_lower8 ((a0 >> 16) & 255) << 16;
w0[1] = uint_to_hex_lower8 ((a0 >> 8) & 255) << 0
| uint_to_hex_lower8 ((a0 >> 0) & 255) << 16;
w0[2] = uint_to_hex_lower8 ((b0 >> 24) & 255) << 0
| uint_to_hex_lower8 ((b0 >> 16) & 255) << 16;
w0[3] = uint_to_hex_lower8 ((b0 >> 8) & 255) << 0
| uint_to_hex_lower8 ((b0 >> 0) & 255) << 16;
w1[0] = uint_to_hex_lower8 ((c0 >> 24) & 255) << 0
| uint_to_hex_lower8 ((c0 >> 16) & 255) << 16;
w1[1] = uint_to_hex_lower8 ((c0 >> 8) & 255) << 0
| uint_to_hex_lower8 ((c0 >> 0) & 255) << 16;
w1[2] = uint_to_hex_lower8 ((d0 >> 24) & 255) << 0
| uint_to_hex_lower8 ((d0 >> 16) & 255) << 16;
w1[3] = uint_to_hex_lower8 ((d0 >> 8) & 255) << 0
| uint_to_hex_lower8 ((d0 >> 0) & 255) << 16;
w2[0] = uint_to_hex_lower8 ((e0 >> 24) & 255) << 0
| uint_to_hex_lower8 ((e0 >> 16) & 255) << 16;
w2[1] = uint_to_hex_lower8 ((e0 >> 8) & 255) << 0
| uint_to_hex_lower8 ((e0 >> 0) & 255) << 16;
w2[2] = 0;
w2[3] = 0;
w3[0] = 0;
w3[1] = 0;
w3[2] = 0;
w3[3] = 0;
md5_update_64 (&ctx, w0, w1, w2, w3, 40);
w0[0] = uint_to_hex_lower8 ((a1 >> 24) & 255) << 0
| uint_to_hex_lower8 ((a1 >> 16) & 255) << 16;
w0[1] = uint_to_hex_lower8 ((a1 >> 8) & 255) << 0
| uint_to_hex_lower8 ((a1 >> 0) & 255) << 16;
w0[2] = uint_to_hex_lower8 ((b1 >> 24) & 255) << 0
| uint_to_hex_lower8 ((b1 >> 16) & 255) << 16;
w0[3] = uint_to_hex_lower8 ((b1 >> 8) & 255) << 0
| uint_to_hex_lower8 ((b1 >> 0) & 255) << 16;
w1[0] = uint_to_hex_lower8 ((c1 >> 24) & 255) << 0
| uint_to_hex_lower8 ((c1 >> 16) & 255) << 16;
w1[1] = uint_to_hex_lower8 ((c1 >> 8) & 255) << 0
| uint_to_hex_lower8 ((c1 >> 0) & 255) << 16;
w1[2] = uint_to_hex_lower8 ((d1 >> 24) & 255) << 0
| uint_to_hex_lower8 ((d1 >> 16) & 255) << 16;
w1[3] = uint_to_hex_lower8 ((d1 >> 8) & 255) << 0
| uint_to_hex_lower8 ((d1 >> 0) & 255) << 16;
w2[0] = 0;
w2[1] = 0;
w2[2] = 0;
w2[3] = 0;
w3[0] = 0;
w3[1] = 0;
w3[2] = 0;
w3[3] = 0;
md5_update_64 (&ctx, w0, w1, w2, w3, 32);
md5_final (&ctx);
const u32 r0 = ctx.h[DGST_R0];
const u32 r1 = ctx.h[DGST_R1];
const u32 r2 = ctx.h[DGST_R2];
const u32 r3 = ctx.h[DGST_R3];
COMPARE_S_SCALAR (r0, r1, r2, r3);
}
}

File diff suppressed because it is too large Load Diff

@ -0,0 +1,354 @@
/**
* Author......: See docs/credits.txt
* License.....: MIT
*/
//#define NEW_SIMD_CODE
#ifdef KERNEL_STATIC
#include "inc_vendor.h"
#include "inc_types.h"
#include "inc_platform.cl"
#include "inc_common.cl"
#include "inc_scalar.cl"
#include "inc_hash_md5.cl"
#include "inc_hash_sha1.cl"
#endif
#if VECT_SIZE == 1
#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i)])
#elif VECT_SIZE == 2
#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
#elif VECT_SIZE == 4
#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
#elif VECT_SIZE == 8
#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
#elif VECT_SIZE == 16
#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
#endif
KERNEL_FQ void m21200_mxx (KERN_ATTR_BASIC ())
{
/**
* modifier
*/
const u64 gid = get_global_id (0);
const u64 lid = get_local_id (0);
const u64 lsz = get_local_size (0);
/**
* bin2asc table
*/
LOCAL_VK u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;
l_bin2asc[i] = ((i0 < 10) ? '0' + i0 : 'a' - 10 + i0) << 8
| ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0;
}
SYNC_THREADS ();
if (gid >= gid_max) return;
/**
* base
*/
const u32 salt_len = salt_bufs[salt_pos].salt_len;
u32 s[64] = { 0 };
for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1)
{
s[idx] = salt_bufs[salt_pos].salt_buf[idx];
}
sha1_ctx_t ctx0;
sha1_init (&ctx0);
sha1_update_swap (&ctx0, s, salt_len);
sha1_final (&ctx0);
const u32 a0 = ctx0.h[0];
const u32 b0 = ctx0.h[1];
const u32 c0 = ctx0.h[2];
const u32 d0 = ctx0.h[3];
const u32 e0 = ctx0.h[4];
md5_ctx_t ctx11;
md5_init (&ctx11);
md5_update_global (&ctx11, pws[gid].i, pws[gid].pw_len);
/**
* loop
*/
u32 w0[4];
u32 w1[4];
u32 w2[4];
u32 w3[4];
for (u32 il_pos = 0; il_pos < il_cnt; il_pos++)
{
md5_ctx_t ctx1 = ctx11;
md5_update_global (&ctx1, combs_buf[il_pos].i, combs_buf[il_pos].pw_len);
md5_final (&ctx1);
const u32 a1 = hc_swap32 (ctx1.h[0]);
const u32 b1 = hc_swap32 (ctx1.h[1]);
const u32 c1 = hc_swap32 (ctx1.h[2]);
const u32 d1 = hc_swap32 (ctx1.h[3]);
md5_ctx_t ctx;
md5_init (&ctx);
w0[0] = uint_to_hex_lower8 ((a0 >> 24) & 255) << 0
| uint_to_hex_lower8 ((a0 >> 16) & 255) << 16;
w0[1] = uint_to_hex_lower8 ((a0 >> 8) & 255) << 0
| uint_to_hex_lower8 ((a0 >> 0) & 255) << 16;
w0[2] = uint_to_hex_lower8 ((b0 >> 24) & 255) << 0
| uint_to_hex_lower8 ((b0 >> 16) & 255) << 16;
w0[3] = uint_to_hex_lower8 ((b0 >> 8) & 255) << 0
| uint_to_hex_lower8 ((b0 >> 0) & 255) << 16;
w1[0] = uint_to_hex_lower8 ((c0 >> 24) & 255) << 0
| uint_to_hex_lower8 ((c0 >> 16) & 255) << 16;
w1[1] = uint_to_hex_lower8 ((c0 >> 8) & 255) << 0
| uint_to_hex_lower8 ((c0 >> 0) & 255) << 16;
w1[2] = uint_to_hex_lower8 ((d0 >> 24) & 255) << 0
| uint_to_hex_lower8 ((d0 >> 16) & 255) << 16;
w1[3] = uint_to_hex_lower8 ((d0 >> 8) & 255) << 0
| uint_to_hex_lower8 ((d0 >> 0) & 255) << 16;
w2[0] = uint_to_hex_lower8 ((e0 >> 24) & 255) << 0
| uint_to_hex_lower8 ((e0 >> 16) & 255) << 16;
w2[1] = uint_to_hex_lower8 ((e0 >> 8) & 255) << 0
| uint_to_hex_lower8 ((e0 >> 0) & 255) << 16;
w2[2] = 0;
w2[3] = 0;
w3[0] = 0;
w3[1] = 0;
w3[2] = 0;
w3[3] = 0;
md5_update_64 (&ctx, w0, w1, w2, w3, 40);
w0[0] = uint_to_hex_lower8 ((a1 >> 24) & 255) << 0
| uint_to_hex_lower8 ((a1 >> 16) & 255) << 16;
w0[1] = uint_to_hex_lower8 ((a1 >> 8) & 255) << 0
| uint_to_hex_lower8 ((a1 >> 0) & 255) << 16;
w0[2] = uint_to_hex_lower8 ((b1 >> 24) & 255) << 0
| uint_to_hex_lower8 ((b1 >> 16) & 255) << 16;
w0[3] = uint_to_hex_lower8 ((b1 >> 8) & 255) << 0
| uint_to_hex_lower8 ((b1 >> 0) & 255) << 16;
w1[0] = uint_to_hex_lower8 ((c1 >> 24) & 255) << 0
| uint_to_hex_lower8 ((c1 >> 16) & 255) << 16;
w1[1] = uint_to_hex_lower8 ((c1 >> 8) & 255) << 0
| uint_to_hex_lower8 ((c1 >> 0) & 255) << 16;
w1[2] = uint_to_hex_lower8 ((d1 >> 24) & 255) << 0
| uint_to_hex_lower8 ((d1 >> 16) & 255) << 16;
w1[3] = uint_to_hex_lower8 ((d1 >> 8) & 255) << 0
| uint_to_hex_lower8 ((d1 >> 0) & 255) << 16;
w2[0] = 0;
w2[1] = 0;
w2[2] = 0;
w2[3] = 0;
w3[0] = 0;
w3[1] = 0;
w3[2] = 0;
w3[3] = 0;
md5_update_64 (&ctx, w0, w1, w2, w3, 32);
md5_final (&ctx);
const u32 r0 = ctx.h[DGST_R0];
const u32 r1 = ctx.h[DGST_R1];
const u32 r2 = ctx.h[DGST_R2];
const u32 r3 = ctx.h[DGST_R3];
COMPARE_M_SCALAR (r0, r1, r2, r3);
}
}
KERNEL_FQ void m21200_sxx (KERN_ATTR_BASIC ())
{
/**
* modifier
*/
const u64 gid = get_global_id (0);
const u64 lid = get_local_id (0);
const u64 lsz = get_local_size (0);
/**
* bin2asc table
*/
LOCAL_VK u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;
l_bin2asc[i] = ((i0 < 10) ? '0' + i0 : 'a' - 10 + i0) << 8
| ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0;
}
SYNC_THREADS ();
if (gid >= gid_max) return;
/**
* digest
*/
const u32 search[4] =
{
digests_buf[digests_offset].digest_buf[DGST_R0],
digests_buf[digests_offset].digest_buf[DGST_R1],
digests_buf[digests_offset].digest_buf[DGST_R2],
digests_buf[digests_offset].digest_buf[DGST_R3]
};
/**
* base
*/
const u32 salt_len = salt_bufs[salt_pos].salt_len;
u32 s[64] = { 0 };
for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1)
{
s[idx] = salt_bufs[salt_pos].salt_buf[idx];
}
sha1_ctx_t ctx0;
sha1_init (&ctx0);
sha1_update_swap (&ctx0, s, salt_len);
sha1_final (&ctx0);
const u32 a0 = ctx0.h[0];
const u32 b0 = ctx0.h[1];
const u32 c0 = ctx0.h[2];
const u32 d0 = ctx0.h[3];
const u32 e0 = ctx0.h[4];
md5_ctx_t ctx11;
md5_init (&ctx11);
md5_update_global (&ctx11, pws[gid].i, pws[gid].pw_len);
/**
* loop
*/
u32 w0[4];
u32 w1[4];
u32 w2[4];
u32 w3[4];
for (u32 il_pos = 0; il_pos < il_cnt; il_pos++)
{
md5_ctx_t ctx1 = ctx11;
md5_update_global (&ctx1, combs_buf[il_pos].i, combs_buf[il_pos].pw_len);
md5_final (&ctx1);
const u32 a1 = hc_swap32 (ctx1.h[0]);
const u32 b1 = hc_swap32 (ctx1.h[1]);
const u32 c1 = hc_swap32 (ctx1.h[2]);
const u32 d1 = hc_swap32 (ctx1.h[3]);
md5_ctx_t ctx;
md5_init (&ctx);
w0[0] = uint_to_hex_lower8 ((a0 >> 24) & 255) << 0
| uint_to_hex_lower8 ((a0 >> 16) & 255) << 16;
w0[1] = uint_to_hex_lower8 ((a0 >> 8) & 255) << 0
| uint_to_hex_lower8 ((a0 >> 0) & 255) << 16;
w0[2] = uint_to_hex_lower8 ((b0 >> 24) & 255) << 0
| uint_to_hex_lower8 ((b0 >> 16) & 255) << 16;
w0[3] = uint_to_hex_lower8 ((b0 >> 8) & 255) << 0
| uint_to_hex_lower8 ((b0 >> 0) & 255) << 16;
w1[0] = uint_to_hex_lower8 ((c0 >> 24) & 255) << 0
| uint_to_hex_lower8 ((c0 >> 16) & 255) << 16;
w1[1] = uint_to_hex_lower8 ((c0 >> 8) & 255) << 0
| uint_to_hex_lower8 ((c0 >> 0) & 255) << 16;
w1[2] = uint_to_hex_lower8 ((d0 >> 24) & 255) << 0
| uint_to_hex_lower8 ((d0 >> 16) & 255) << 16;
w1[3] = uint_to_hex_lower8 ((d0 >> 8) & 255) << 0
| uint_to_hex_lower8 ((d0 >> 0) & 255) << 16;
w2[0] = uint_to_hex_lower8 ((e0 >> 24) & 255) << 0
| uint_to_hex_lower8 ((e0 >> 16) & 255) << 16;
w2[1] = uint_to_hex_lower8 ((e0 >> 8) & 255) << 0
| uint_to_hex_lower8 ((e0 >> 0) & 255) << 16;
w2[2] = 0;
w2[3] = 0;
w3[0] = 0;
w3[1] = 0;
w3[2] = 0;
w3[3] = 0;
md5_update_64 (&ctx, w0, w1, w2, w3, 40);
w0[0] = uint_to_hex_lower8 ((a1 >> 24) & 255) << 0
| uint_to_hex_lower8 ((a1 >> 16) & 255) << 16;
w0[1] = uint_to_hex_lower8 ((a1 >> 8) & 255) << 0
| uint_to_hex_lower8 ((a1 >> 0) & 255) << 16;
w0[2] = uint_to_hex_lower8 ((b1 >> 24) & 255) << 0
| uint_to_hex_lower8 ((b1 >> 16) & 255) << 16;
w0[3] = uint_to_hex_lower8 ((b1 >> 8) & 255) << 0
| uint_to_hex_lower8 ((b1 >> 0) & 255) << 16;
w1[0] = uint_to_hex_lower8 ((c1 >> 24) & 255) << 0
| uint_to_hex_lower8 ((c1 >> 16) & 255) << 16;
w1[1] = uint_to_hex_lower8 ((c1 >> 8) & 255) << 0
| uint_to_hex_lower8 ((c1 >> 0) & 255) << 16;
w1[2] = uint_to_hex_lower8 ((d1 >> 24) & 255) << 0
| uint_to_hex_lower8 ((d1 >> 16) & 255) << 16;
w1[3] = uint_to_hex_lower8 ((d1 >> 8) & 255) << 0
| uint_to_hex_lower8 ((d1 >> 0) & 255) << 16;
w2[0] = 0;
w2[1] = 0;
w2[2] = 0;
w2[3] = 0;
w3[0] = 0;
w3[1] = 0;
w3[2] = 0;
w3[3] = 0;
md5_update_64 (&ctx, w0, w1, w2, w3, 32);
md5_final (&ctx);
const u32 r0 = ctx.h[DGST_R0];
const u32 r1 = ctx.h[DGST_R1];
const u32 r2 = ctx.h[DGST_R2];
const u32 r3 = ctx.h[DGST_R3];
COMPARE_S_SCALAR (r0, r1, r2, r3);
}
}

File diff suppressed because it is too large Load Diff

@ -0,0 +1,380 @@
/**
* Author......: See docs/credits.txt
* License.....: MIT
*/
#define NEW_SIMD_CODE
#ifdef KERNEL_STATIC
#include "inc_vendor.h"
#include "inc_types.h"
#include "inc_platform.cl"
#include "inc_common.cl"
#include "inc_simd.cl"
#include "inc_hash_md5.cl"
#include "inc_hash_sha1.cl"
#endif
#if VECT_SIZE == 1
#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i)])
#elif VECT_SIZE == 2
#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
#elif VECT_SIZE == 4
#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
#elif VECT_SIZE == 8
#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
#elif VECT_SIZE == 16
#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
#endif
KERNEL_FQ void m21200_mxx (KERN_ATTR_VECTOR ())
{
/**
* modifier
*/
const u64 gid = get_global_id (0);
const u64 lid = get_local_id (0);
const u64 lsz = get_local_size (0);
/**
* bin2asc table
*/
LOCAL_VK u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;
l_bin2asc[i] = ((i0 < 10) ? '0' + i0 : 'a' - 10 + i0) << 8
| ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0;
}
SYNC_THREADS ();
if (gid >= gid_max) return;
/**
* base
*/
const u32 pw_len = pws[gid].pw_len;
u32x w[64] = { 0 };
for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1)
{
w[idx] = pws[gid].i[idx];
}
const u32 salt_len = salt_bufs[salt_pos].salt_len;
u32 s[64] = { 0 };
for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1)
{
s[idx] = salt_bufs[salt_pos].salt_buf[idx];
}
sha1_ctx_t ctx0;
sha1_init (&ctx0);
sha1_update_swap (&ctx0, s, salt_len);
sha1_final (&ctx0);
const u32x a0 = ctx0.h[0];
const u32x b0 = ctx0.h[1];
const u32x c0 = ctx0.h[2];
const u32x d0 = ctx0.h[3];
const u32x e0 = ctx0.h[4];
/**
* loop
*/
u32x _w0[4];
u32x _w1[4];
u32x _w2[4];
u32x _w3[4];
u32x w0l = w[0];
for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE)
{
const u32x w0r = words_buf_r[il_pos / VECT_SIZE];
const u32x w0 = w0l | w0r;
w[0] = w0;
md5_ctx_vector_t ctx1;
md5_init_vector (&ctx1);
md5_update_vector (&ctx1, w, pw_len);
md5_final_vector (&ctx1);
const u32x a1 = hc_swap32 (ctx1.h[0]);
const u32x b1 = hc_swap32 (ctx1.h[1]);
const u32x c1 = hc_swap32 (ctx1.h[2]);
const u32x d1 = hc_swap32 (ctx1.h[3]);
md5_ctx_vector_t ctx;
md5_init_vector (&ctx);
_w0[0] = uint_to_hex_lower8 ((a0 >> 24) & 255) << 0
| uint_to_hex_lower8 ((a0 >> 16) & 255) << 16;
_w0[1] = uint_to_hex_lower8 ((a0 >> 8) & 255) << 0
| uint_to_hex_lower8 ((a0 >> 0) & 255) << 16;
_w0[2] = uint_to_hex_lower8 ((b0 >> 24) & 255) << 0
| uint_to_hex_lower8 ((b0 >> 16) & 255) << 16;
_w0[3] = uint_to_hex_lower8 ((b0 >> 8) & 255) << 0
| uint_to_hex_lower8 ((b0 >> 0) & 255) << 16;
_w1[0] = uint_to_hex_lower8 ((c0 >> 24) & 255) << 0
| uint_to_hex_lower8 ((c0 >> 16) & 255) << 16;
_w1[1] = uint_to_hex_lower8 ((c0 >> 8) & 255) << 0
| uint_to_hex_lower8 ((c0 >> 0) & 255) << 16;
_w1[2] = uint_to_hex_lower8 ((d0 >> 24) & 255) << 0
| uint_to_hex_lower8 ((d0 >> 16) & 255) << 16;
_w1[3] = uint_to_hex_lower8 ((d0 >> 8) & 255) << 0
| uint_to_hex_lower8 ((d0 >> 0) & 255) << 16;
_w2[0] = uint_to_hex_lower8 ((e0 >> 24) & 255) << 0
| uint_to_hex_lower8 ((e0 >> 16) & 255) << 16;
_w2[1] = uint_to_hex_lower8 ((e0 >> 8) & 255) << 0
| uint_to_hex_lower8 ((e0 >> 0) & 255) << 16;
_w2[2] = 0;
_w2[3] = 0;
_w3[0] = 0;
_w3[1] = 0;
_w3[2] = 0;
_w3[3] = 0;
md5_update_vector_64 (&ctx, _w0, _w1, _w2, _w3, 40);
_w0[0] = uint_to_hex_lower8 ((a1 >> 24) & 255) << 0
| uint_to_hex_lower8 ((a1 >> 16) & 255) << 16;
_w0[1] = uint_to_hex_lower8 ((a1 >> 8) & 255) << 0
| uint_to_hex_lower8 ((a1 >> 0) & 255) << 16;
_w0[2] = uint_to_hex_lower8 ((b1 >> 24) & 255) << 0
| uint_to_hex_lower8 ((b1 >> 16) & 255) << 16;
_w0[3] = uint_to_hex_lower8 ((b1 >> 8) & 255) << 0
| uint_to_hex_lower8 ((b1 >> 0) & 255) << 16;
_w1[0] = uint_to_hex_lower8 ((c1 >> 24) & 255) << 0
| uint_to_hex_lower8 ((c1 >> 16) & 255) << 16;
_w1[1] = uint_to_hex_lower8 ((c1 >> 8) & 255) << 0
| uint_to_hex_lower8 ((c1 >> 0) & 255) << 16;
_w1[2] = uint_to_hex_lower8 ((d1 >> 24) & 255) << 0
| uint_to_hex_lower8 ((d1 >> 16) & 255) << 16;
_w1[3] = uint_to_hex_lower8 ((d1 >> 8) & 255) << 0
| uint_to_hex_lower8 ((d1 >> 0) & 255) << 16;
_w2[0] = 0;
_w2[1] = 0;
_w2[2] = 0;
_w2[3] = 0;
_w3[0] = 0;
_w3[1] = 0;
_w3[2] = 0;
_w3[3] = 0;
md5_update_vector_64 (&ctx, _w0, _w1, _w2, _w3, 32);
md5_final_vector (&ctx);
const u32x r0 = ctx.h[DGST_R0];
const u32x r1 = ctx.h[DGST_R1];
const u32x r2 = ctx.h[DGST_R2];
const u32x r3 = ctx.h[DGST_R3];
COMPARE_M_SIMD (r0, r1, r2, r3);
}
}
KERNEL_FQ void m21200_sxx (KERN_ATTR_VECTOR ())
{
/**
* modifier
*/
const u64 gid = get_global_id (0);
const u64 lid = get_local_id (0);
const u64 lsz = get_local_size (0);
/**
* bin2asc table
*/
LOCAL_VK u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;
l_bin2asc[i] = ((i0 < 10) ? '0' + i0 : 'a' - 10 + i0) << 8
| ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0;
}
SYNC_THREADS ();
if (gid >= gid_max) return;
/**
* digest
*/
const u32 search[4] =
{
digests_buf[digests_offset].digest_buf[DGST_R0],
digests_buf[digests_offset].digest_buf[DGST_R1],
digests_buf[digests_offset].digest_buf[DGST_R2],
digests_buf[digests_offset].digest_buf[DGST_R3]
};
/**
* base
*/
const u32 pw_len = pws[gid].pw_len;
u32x w[64] = { 0 };
for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1)
{
w[idx] = pws[gid].i[idx];
}
const u32 salt_len = salt_bufs[salt_pos].salt_len;
u32 s[64] = { 0 };
for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1)
{
s[idx] = salt_bufs[salt_pos].salt_buf[idx];
}
sha1_ctx_t ctx0;
sha1_init (&ctx0);
sha1_update_swap (&ctx0, s, salt_len);
sha1_final (&ctx0);
const u32x a0 = ctx0.h[0];
const u32x b0 = ctx0.h[1];
const u32x c0 = ctx0.h[2];
const u32x d0 = ctx0.h[3];
const u32x e0 = ctx0.h[4];
/**
* loop
*/
u32x _w0[4];
u32x _w1[4];
u32x _w2[4];
u32x _w3[4];
u32x w0l = w[0];
for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE)
{
const u32x w0r = words_buf_r[il_pos / VECT_SIZE];
const u32x w0 = w0l | w0r;
w[0] = w0;
md5_ctx_vector_t ctx1;
md5_init_vector (&ctx1);
md5_update_vector (&ctx1, w, pw_len);
md5_final_vector (&ctx1);
const u32x a1 = hc_swap32 (ctx1.h[0]);
const u32x b1 = hc_swap32 (ctx1.h[1]);
const u32x c1 = hc_swap32 (ctx1.h[2]);
const u32x d1 = hc_swap32 (ctx1.h[3]);
md5_ctx_vector_t ctx;
md5_init_vector (&ctx);
_w0[0] = uint_to_hex_lower8 ((a0 >> 24) & 255) << 0
| uint_to_hex_lower8 ((a0 >> 16) & 255) << 16;
_w0[1] = uint_to_hex_lower8 ((a0 >> 8) & 255) << 0
| uint_to_hex_lower8 ((a0 >> 0) & 255) << 16;
_w0[2] = uint_to_hex_lower8 ((b0 >> 24) & 255) << 0
| uint_to_hex_lower8 ((b0 >> 16) & 255) << 16;
_w0[3] = uint_to_hex_lower8 ((b0 >> 8) & 255) << 0
| uint_to_hex_lower8 ((b0 >> 0) & 255) << 16;
_w1[0] = uint_to_hex_lower8 ((c0 >> 24) & 255) << 0
| uint_to_hex_lower8 ((c0 >> 16) & 255) << 16;
_w1[1] = uint_to_hex_lower8 ((c0 >> 8) & 255) << 0
| uint_to_hex_lower8 ((c0 >> 0) & 255) << 16;
_w1[2] = uint_to_hex_lower8 ((d0 >> 24) & 255) << 0
| uint_to_hex_lower8 ((d0 >> 16) & 255) << 16;
_w1[3] = uint_to_hex_lower8 ((d0 >> 8) & 255) << 0
| uint_to_hex_lower8 ((d0 >> 0) & 255) << 16;
_w2[0] = uint_to_hex_lower8 ((e0 >> 24) & 255) << 0
| uint_to_hex_lower8 ((e0 >> 16) & 255) << 16;
_w2[1] = uint_to_hex_lower8 ((e0 >> 8) & 255) << 0
| uint_to_hex_lower8 ((e0 >> 0) & 255) << 16;
_w2[2] = 0;
_w2[3] = 0;
_w3[0] = 0;
_w3[1] = 0;
_w3[2] = 0;
_w3[3] = 0;
md5_update_vector_64 (&ctx, _w0, _w1, _w2, _w3, 40);
_w0[0] = uint_to_hex_lower8 ((a1 >> 24) & 255) << 0
| uint_to_hex_lower8 ((a1 >> 16) & 255) << 16;
_w0[1] = uint_to_hex_lower8 ((a1 >> 8) & 255) << 0
| uint_to_hex_lower8 ((a1 >> 0) & 255) << 16;
_w0[2] = uint_to_hex_lower8 ((b1 >> 24) & 255) << 0
| uint_to_hex_lower8 ((b1 >> 16) & 255) << 16;
_w0[3] = uint_to_hex_lower8 ((b1 >> 8) & 255) << 0
| uint_to_hex_lower8 ((b1 >> 0) & 255) << 16;
_w1[0] = uint_to_hex_lower8 ((c1 >> 24) & 255) << 0
| uint_to_hex_lower8 ((c1 >> 16) & 255) << 16;
_w1[1] = uint_to_hex_lower8 ((c1 >> 8) & 255) << 0
| uint_to_hex_lower8 ((c1 >> 0) & 255) << 16;
_w1[2] = uint_to_hex_lower8 ((d1 >> 24) & 255) << 0
| uint_to_hex_lower8 ((d1 >> 16) & 255) << 16;
_w1[3] = uint_to_hex_lower8 ((d1 >> 8) & 255) << 0
| uint_to_hex_lower8 ((d1 >> 0) & 255) << 16;
_w2[0] = 0;
_w2[1] = 0;
_w2[2] = 0;
_w2[3] = 0;
_w3[0] = 0;
_w3[1] = 0;
_w3[2] = 0;
_w3[3] = 0;
md5_update_vector_64 (&ctx, _w0, _w1, _w2, _w3, 32);
md5_final_vector (&ctx);
const u32x r0 = ctx.h[DGST_R0];
const u32x r1 = ctx.h[DGST_R1];
const u32x r2 = ctx.h[DGST_R2];
const u32x r3 = ctx.h[DGST_R3];
COMPARE_S_SIMD (r0, r1, r2, r3);
}
}

@ -0,0 +1,276 @@
/**
* Author......: See docs/credits.txt
* License.....: MIT
*/
//#define NEW_SIMD_CODE
#ifdef KERNEL_STATIC
#include "inc_vendor.h"
#include "inc_types.h"
#include "inc_platform.cl"
#include "inc_common.cl"
#include "inc_rp.h"
#include "inc_rp.cl"
#include "inc_scalar.cl"
#include "inc_hash_md5.cl"
#include "inc_hash_sha1.cl"
#endif
#if VECT_SIZE == 1
#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i)])
#elif VECT_SIZE == 2
#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
#elif VECT_SIZE == 4
#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
#elif VECT_SIZE == 8
#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
#elif VECT_SIZE == 16
#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
#endif
KERNEL_FQ void m21300_mxx (KERN_ATTR_RULES ())
{
/**
* modifier
*/
const u64 gid = get_global_id (0);
const u64 lid = get_local_id (0);
const u64 lsz = get_local_size (0);
/**
* bin2asc table
*/
LOCAL_VK u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;
l_bin2asc[i] = ((i0 < 10) ? '0' + i0 : 'a' - 10 + i0) << 8
| ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0;
}
SYNC_THREADS ();
if (gid >= gid_max) return;
/**
* base
*/
COPY_PW (pws[gid]);
sha1_ctx_t ctx00;
sha1_init(&ctx00);
sha1_update_global_swap (&ctx00, salt_bufs[salt_pos].salt_buf, salt_bufs[salt_pos].salt_len);
md5_ctx_t ctx11;
md5_init (&ctx11);
md5_update_global (&ctx11, salt_bufs[salt_pos].salt_buf, salt_bufs[salt_pos].salt_len);
/**
* loop
*/
u32 w0[4];
u32 w1[4];
u32 w2[4];
u32 w3[4];
for (u32 il_pos = 0; il_pos < il_cnt; il_pos++)
{
pw_t tmp = PASTE_PW;
tmp.pw_len = apply_rules (rules_buf[il_pos].cmds, tmp.i, tmp.pw_len);
sha1_ctx_t ctx0 = ctx00;
sha1_update_swap (&ctx0, tmp.i, tmp.pw_len);
sha1_final (&ctx0);
const u32 a = ctx0.h[0];
const u32 b = ctx0.h[1];
const u32 c = ctx0.h[2];
const u32 d = ctx0.h[3];
const u32 e = ctx0.h[4];
md5_ctx_t ctx = ctx11;
w0[0] = uint_to_hex_lower8 ((a >> 24) & 255) << 0
| uint_to_hex_lower8 ((a >> 16) & 255) << 16;
w0[1] = uint_to_hex_lower8 ((a >> 8) & 255) << 0
| uint_to_hex_lower8 ((a >> 0) & 255) << 16;
w0[2] = uint_to_hex_lower8 ((b >> 24) & 255) << 0
| uint_to_hex_lower8 ((b >> 16) & 255) << 16;
w0[3] = uint_to_hex_lower8 ((b >> 8) & 255) << 0
| uint_to_hex_lower8 ((b >> 0) & 255) << 16;
w1[0] = uint_to_hex_lower8 ((c >> 24) & 255) << 0
| uint_to_hex_lower8 ((c >> 16) & 255) << 16;
w1[1] = uint_to_hex_lower8 ((c >> 8) & 255) << 0
| uint_to_hex_lower8 ((c >> 0) & 255) << 16;
w1[2] = uint_to_hex_lower8 ((d >> 24) & 255) << 0
| uint_to_hex_lower8 ((d >> 16) & 255) << 16;
w1[3] = uint_to_hex_lower8 ((d >> 8) & 255) << 0
| uint_to_hex_lower8 ((d >> 0) & 255) << 16;
w2[0] = uint_to_hex_lower8 ((e >> 24) & 255) << 0
| uint_to_hex_lower8 ((e >> 16) & 255) << 16;
w2[1] = uint_to_hex_lower8 ((e >> 8) & 255) << 0
| uint_to_hex_lower8 ((e >> 0) & 255) << 16;
w2[2] = 0;
w2[3] = 0;
w3[0] = 0;
w3[1] = 0;
w3[2] = 0;
w3[3] = 0;
md5_update_64 (&ctx, w0, w1, w2, w3, 40);
md5_final (&ctx);
const u32 r0 = ctx.h[DGST_R0];
const u32 r1 = ctx.h[DGST_R1];
const u32 r2 = ctx.h[DGST_R2];
const u32 r3 = ctx.h[DGST_R3];
COMPARE_M_SCALAR (r0, r1, r2, r3);
}
}
KERNEL_FQ void m21300_sxx (KERN_ATTR_RULES ())
{
/**
* modifier
*/
const u64 gid = get_global_id (0);
const u64 lid = get_local_id (0);
const u64 lsz = get_local_size (0);
/**
* bin2asc table
*/
LOCAL_VK u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;
l_bin2asc[i] = ((i0 < 10) ? '0' + i0 : 'a' - 10 + i0) << 8
| ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0;
}
SYNC_THREADS ();
if (gid >= gid_max) return;
/**
* digest
*/
const u32 search[4] =
{
digests_buf[digests_offset].digest_buf[DGST_R0],
digests_buf[digests_offset].digest_buf[DGST_R1],
digests_buf[digests_offset].digest_buf[DGST_R2],
digests_buf[digests_offset].digest_buf[DGST_R3]
};
/**
* base
*/
COPY_PW (pws[gid]);
sha1_ctx_t ctx00;
sha1_init(&ctx00);
sha1_update_global_swap (&ctx00, salt_bufs[salt_pos].salt_buf, salt_bufs[salt_pos].salt_len);
md5_ctx_t ctx11;
md5_init (&ctx11);
md5_update_global (&ctx11, salt_bufs[salt_pos].salt_buf, salt_bufs[salt_pos].salt_len);
/**
* loop
*/
u32 w0[4];
u32 w1[4];
u32 w2[4];
u32 w3[4];
for (u32 il_pos = 0; il_pos < il_cnt; il_pos++)
{
pw_t tmp = PASTE_PW;
tmp.pw_len = apply_rules (rules_buf[il_pos].cmds, tmp.i, tmp.pw_len);
sha1_ctx_t ctx0 = ctx00;
sha1_update_swap (&ctx0, tmp.i, tmp.pw_len);
sha1_final (&ctx0);
const u32 a = ctx0.h[0];
const u32 b = ctx0.h[1];
const u32 c = ctx0.h[2];
const u32 d = ctx0.h[3];
const u32 e = ctx0.h[4];
md5_ctx_t ctx = ctx11;
w0[0] = uint_to_hex_lower8 ((a >> 24) & 255) << 0
| uint_to_hex_lower8 ((a >> 16) & 255) << 16;
w0[1] = uint_to_hex_lower8 ((a >> 8) & 255) << 0
| uint_to_hex_lower8 ((a >> 0) & 255) << 16;
w0[2] = uint_to_hex_lower8 ((b >> 24) & 255) << 0
| uint_to_hex_lower8 ((b >> 16) & 255) << 16;
w0[3] = uint_to_hex_lower8 ((b >> 8) & 255) << 0
| uint_to_hex_lower8 ((b >> 0) & 255) << 16;
w1[0] = uint_to_hex_lower8 ((c >> 24) & 255) << 0
| uint_to_hex_lower8 ((c >> 16) & 255) << 16;
w1[1] = uint_to_hex_lower8 ((c >> 8) & 255) << 0
| uint_to_hex_lower8 ((c >> 0) & 255) << 16;
w1[2] = uint_to_hex_lower8 ((d >> 24) & 255) << 0
| uint_to_hex_lower8 ((d >> 16) & 255) << 16;
w1[3] = uint_to_hex_lower8 ((d >> 8) & 255) << 0
| uint_to_hex_lower8 ((d >> 0) & 255) << 16;
w2[0] = uint_to_hex_lower8 ((e >> 24) & 255) << 0
| uint_to_hex_lower8 ((e >> 16) & 255) << 16;
w2[1] = uint_to_hex_lower8 ((e >> 8) & 255) << 0
| uint_to_hex_lower8 ((e >> 0) & 255) << 16;
w2[2] = 0;
w2[3] = 0;
w3[0] = 0;
w3[1] = 0;
w3[2] = 0;
w3[3] = 0;
md5_update_64 (&ctx, w0, w1, w2, w3, 40);
md5_final (&ctx);
const u32 r0 = ctx.h[DGST_R0];
const u32 r1 = ctx.h[DGST_R1];
const u32 r2 = ctx.h[DGST_R2];
const u32 r3 = ctx.h[DGST_R3];
COMPARE_S_SCALAR (r0, r1, r2, r3);
}
}

@ -0,0 +1,266 @@
/**
* Author......: See docs/credits.txt
* License.....: MIT
*/
//#define NEW_SIMD_CODE
#ifdef KERNEL_STATIC
#include "inc_vendor.h"
#include "inc_types.h"
#include "inc_platform.cl"
#include "inc_common.cl"
#include "inc_scalar.cl"
#include "inc_hash_md5.cl"
#include "inc_hash_sha1.cl"
#endif
#if VECT_SIZE == 1
#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i)])
#elif VECT_SIZE == 2
#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
#elif VECT_SIZE == 4
#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
#elif VECT_SIZE == 8
#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
#elif VECT_SIZE == 16
#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
#endif
KERNEL_FQ void m21300_mxx (KERN_ATTR_BASIC ())
{
/**
* modifier
*/
const u64 gid = get_global_id (0);
const u64 lid = get_local_id (0);
const u64 lsz = get_local_size (0);
/**
* bin2asc table
*/
LOCAL_VK u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;
l_bin2asc[i] = ((i0 < 10) ? '0' + i0 : 'a' - 10 + i0) << 8
| ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0;
}
SYNC_THREADS ();
if (gid >= gid_max) return;
/**
* base
*/
sha1_ctx_t ctx00;
sha1_init (&ctx00);
sha1_update_global_swap (&ctx00, salt_bufs[salt_pos].salt_buf, salt_bufs[salt_pos].salt_len);
sha1_update_global_swap (&ctx00, pws[gid].i, pws[gid].pw_len);
md5_ctx_t ctx11;
md5_init (&ctx11);
md5_update_global (&ctx11, salt_bufs[salt_pos].salt_buf, salt_bufs[salt_pos].salt_len);
/**
* loop
*/
u32 w0[4];
u32 w1[4];
u32 w2[4];
u32 w3[4];
for (u32 il_pos = 0; il_pos < il_cnt; il_pos++)
{
sha1_ctx_t ctx0 = ctx00;
sha1_update_global_swap (&ctx0, combs_buf[il_pos].i, combs_buf[il_pos].pw_len);
sha1_final (&ctx0);
const u32 a = ctx0.h[0];
const u32 b = ctx0.h[1];
const u32 c = ctx0.h[2];
const u32 d = ctx0.h[3];
const u32 e = ctx0.h[4];
md5_ctx_t ctx = ctx11;
w0[0] = uint_to_hex_lower8 ((a >> 24) & 255) << 0
| uint_to_hex_lower8 ((a >> 16) & 255) << 16;
w0[1] = uint_to_hex_lower8 ((a >> 8) & 255) << 0
| uint_to_hex_lower8 ((a >> 0) & 255) << 16;
w0[2] = uint_to_hex_lower8 ((b >> 24) & 255) << 0
| uint_to_hex_lower8 ((b >> 16) & 255) << 16;
w0[3] = uint_to_hex_lower8 ((b >> 8) & 255) << 0
| uint_to_hex_lower8 ((b >> 0) & 255) << 16;
w1[0] = uint_to_hex_lower8 ((c >> 24) & 255) << 0
| uint_to_hex_lower8 ((c >> 16) & 255) << 16;
w1[1] = uint_to_hex_lower8 ((c >> 8) & 255) << 0
| uint_to_hex_lower8 ((c >> 0) & 255) << 16;
w1[2] = uint_to_hex_lower8 ((d >> 24) & 255) << 0
| uint_to_hex_lower8 ((d >> 16) & 255) << 16;
w1[3] = uint_to_hex_lower8 ((d >> 8) & 255) << 0
| uint_to_hex_lower8 ((d >> 0) & 255) << 16;
w2[0] = uint_to_hex_lower8 ((e >> 24) & 255) << 0
| uint_to_hex_lower8 ((e >> 16) & 255) << 16;
w2[1] = uint_to_hex_lower8 ((e >> 8) & 255) << 0
| uint_to_hex_lower8 ((e >> 0) & 255) << 16;
w2[2] = 0;
w2[3] = 0;
w3[0] = 0;
w3[1] = 0;
w3[2] = 0;
w3[3] = 0;
md5_update_64 (&ctx, w0, w1, w2, w3, 40);
md5_final (&ctx);
const u32 r0 = ctx.h[DGST_R0];
const u32 r1 = ctx.h[DGST_R1];
const u32 r2 = ctx.h[DGST_R2];
const u32 r3 = ctx.h[DGST_R3];
COMPARE_M_SCALAR (r0, r1, r2, r3);
}
}
KERNEL_FQ void m21300_sxx (KERN_ATTR_BASIC ())
{
/**
* modifier
*/
const u64 gid = get_global_id (0);
const u64 lid = get_local_id (0);
const u64 lsz = get_local_size (0);
/**
* bin2asc table
*/
LOCAL_VK u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;
l_bin2asc[i] = ((i0 < 10) ? '0' + i0 : 'a' - 10 + i0) << 8
| ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0;
}
SYNC_THREADS ();
if (gid >= gid_max) return;
/**
* digest
*/
const u32 search[4] =
{
digests_buf[digests_offset].digest_buf[DGST_R0],
digests_buf[digests_offset].digest_buf[DGST_R1],
digests_buf[digests_offset].digest_buf[DGST_R2],
digests_buf[digests_offset].digest_buf[DGST_R3]
};
/**
* base
*/
sha1_ctx_t ctx00;
sha1_init (&ctx00);
sha1_update_global_swap (&ctx00, salt_bufs[salt_pos].salt_buf, salt_bufs[salt_pos].salt_len);
sha1_update_global_swap (&ctx00, pws[gid].i, pws[gid].pw_len);
md5_ctx_t ctx11;
md5_init (&ctx11);
md5_update_global (&ctx11, salt_bufs[salt_pos].salt_buf, salt_bufs[salt_pos].salt_len);
/**
* loop
*/
u32 w0[4];
u32 w1[4];
u32 w2[4];
u32 w3[4];
for (u32 il_pos = 0; il_pos < il_cnt; il_pos++)
{
sha1_ctx_t ctx0 = ctx00;
sha1_update_global_swap (&ctx0, combs_buf[il_pos].i, combs_buf[il_pos].pw_len);
sha1_final (&ctx0);
const u32 a = ctx0.h[0];
const u32 b = ctx0.h[1];
const u32 c = ctx0.h[2];
const u32 d = ctx0.h[3];
const u32 e = ctx0.h[4];
md5_ctx_t ctx = ctx11;
w0[0] = uint_to_hex_lower8 ((a >> 24) & 255) << 0
| uint_to_hex_lower8 ((a >> 16) & 255) << 16;
w0[1] = uint_to_hex_lower8 ((a >> 8) & 255) << 0
| uint_to_hex_lower8 ((a >> 0) & 255) << 16;
w0[2] = uint_to_hex_lower8 ((b >> 24) & 255) << 0
| uint_to_hex_lower8 ((b >> 16) & 255) << 16;
w0[3] = uint_to_hex_lower8 ((b >> 8) & 255) << 0
| uint_to_hex_lower8 ((b >> 0) & 255) << 16;
w1[0] = uint_to_hex_lower8 ((c >> 24) & 255) << 0
| uint_to_hex_lower8 ((c >> 16) & 255) << 16;
w1[1] = uint_to_hex_lower8 ((c >> 8) & 255) << 0
| uint_to_hex_lower8 ((c >> 0) & 255) << 16;
w1[2] = uint_to_hex_lower8 ((d >> 24) & 255) << 0
| uint_to_hex_lower8 ((d >> 16) & 255) << 16;
w1[3] = uint_to_hex_lower8 ((d >> 8) & 255) << 0
| uint_to_hex_lower8 ((d >> 0) & 255) << 16;
w2[0] = uint_to_hex_lower8 ((e >> 24) & 255) << 0
| uint_to_hex_lower8 ((e >> 16) & 255) << 16;
w2[1] = uint_to_hex_lower8 ((e >> 8) & 255) << 0
| uint_to_hex_lower8 ((e >> 0) & 255) << 16;
w2[2] = 0;
w2[3] = 0;
w3[0] = 0;
w3[1] = 0;
w3[2] = 0;
w3[3] = 0;
md5_update_64 (&ctx, w0, w1, w2, w3, 40);
md5_final (&ctx);
const u32 r0 = ctx.h[DGST_R0];
const u32 r1 = ctx.h[DGST_R1];
const u32 r2 = ctx.h[DGST_R2];
const u32 r3 = ctx.h[DGST_R3];
COMPARE_S_SCALAR (r0, r1, r2, r3);
}
}

@ -0,0 +1,304 @@
/**
* Author......: See docs/credits.txt
* License.....: MIT
*/
#define NEW_SIMD_CODE
#ifdef KERNEL_STATIC
#include "inc_vendor.h"
#include "inc_types.h"
#include "inc_platform.cl"
#include "inc_common.cl"
#include "inc_simd.cl"
#include "inc_hash_md5.cl"
#include "inc_hash_sha1.cl"
#endif
#if VECT_SIZE == 1
#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i)])
#elif VECT_SIZE == 2
#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
#elif VECT_SIZE == 4
#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
#elif VECT_SIZE == 8
#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
#elif VECT_SIZE == 16
#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
#endif
KERNEL_FQ void m21300_mxx (KERN_ATTR_VECTOR ())
{
/**
* modifier
*/
const u64 gid = get_global_id (0);
const u64 lid = get_local_id (0);
const u64 lsz = get_local_size (0);
/**
* bin2asc table
*/
LOCAL_VK u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;
l_bin2asc[i] = ((i0 < 10) ? '0' + i0 : 'a' - 10 + i0) << 8
| ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0;
}
SYNC_THREADS ();
if (gid >= gid_max) return;
/**
* base
*/
const u32 pw_len = pws[gid].pw_len;
u32x w[64] = { 0 };
for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1)
{
w[idx] = pws[gid].i[idx];
}
sha1_ctx_t ctx00;
sha1_init (&ctx00);
sha1_update_global_swap (&ctx00, salt_bufs[salt_pos].salt_buf, salt_bufs[salt_pos].salt_len);
md5_ctx_t ctx11;
md5_init (&ctx11);
md5_update_global (&ctx11, salt_bufs[salt_pos].salt_buf, salt_bufs[salt_pos].salt_len);
/**
* loop
*/
u32x _w0[4];
u32x _w1[4];
u32x _w2[4];
u32x _w3[4];
u32x w0l = w[0];
for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE)
{
const u32x w0r = words_buf_r[il_pos / VECT_SIZE];
const u32x w0 = w0l | w0r;
w[0] = w0;
sha1_ctx_vector_t ctx0;
sha1_init_vector_from_scalar (&ctx0, &ctx00);
sha1_update_vector_swap (&ctx0, w, pw_len);
sha1_final_vector (&ctx0);
const u32x a = ctx0.h[0];
const u32x b = ctx0.h[1];
const u32x c = ctx0.h[2];
const u32x d = ctx0.h[3];
const u32x e = ctx0.h[4];
md5_ctx_vector_t ctx;
md5_init_vector_from_scalar (&ctx, &ctx11);
_w0[0] = uint_to_hex_lower8 ((a >> 24) & 255) << 0
| uint_to_hex_lower8 ((a >> 16) & 255) << 16;
_w0[1] = uint_to_hex_lower8 ((a >> 8) & 255) << 0
| uint_to_hex_lower8 ((a >> 0) & 255) << 16;
_w0[2] = uint_to_hex_lower8 ((b >> 24) & 255) << 0
| uint_to_hex_lower8 ((b >> 16) & 255) << 16;
_w0[3] = uint_to_hex_lower8 ((b >> 8) & 255) << 0
| uint_to_hex_lower8 ((b >> 0) & 255) << 16;
_w1[0] = uint_to_hex_lower8 ((c >> 24) & 255) << 0
| uint_to_hex_lower8 ((c >> 16) & 255) << 16;
_w1[1] = uint_to_hex_lower8 ((c >> 8) & 255) << 0
| uint_to_hex_lower8 ((c >> 0) & 255) << 16;
_w1[2] = uint_to_hex_lower8 ((d >> 24) & 255) << 0
| uint_to_hex_lower8 ((d >> 16) & 255) << 16;
_w1[3] = uint_to_hex_lower8 ((d >> 8) & 255) << 0
| uint_to_hex_lower8 ((d >> 0) & 255) << 16;
_w2[0] = uint_to_hex_lower8 ((e >> 24) & 255) << 0
| uint_to_hex_lower8 ((e >> 16) & 255) << 16;
_w2[1] = uint_to_hex_lower8 ((e >> 8) & 255) << 0
| uint_to_hex_lower8 ((e >> 0) & 255) << 16;
_w2[2] = 0;
_w2[3] = 0;
_w3[0] = 0;
_w3[1] = 0;
_w3[2] = 0;
_w3[3] = 0;
md5_update_vector_64 (&ctx, _w0, _w1, _w2, _w3, 40);
md5_final_vector (&ctx);
const u32x r0 = ctx.h[DGST_R0];
const u32x r1 = ctx.h[DGST_R1];
const u32x r2 = ctx.h[DGST_R2];
const u32x r3 = ctx.h[DGST_R3];
COMPARE_M_SIMD (r0, r1, r2, r3);
}
}
KERNEL_FQ void m21300_sxx (KERN_ATTR_VECTOR ())
{
/**
* modifier
*/
const u64 gid = get_global_id (0);
const u64 lid = get_local_id (0);
const u64 lsz = get_local_size (0);
/**
* bin2asc table
*/
LOCAL_VK u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;
l_bin2asc[i] = ((i0 < 10) ? '0' + i0 : 'a' - 10 + i0) << 8
| ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0;
}
SYNC_THREADS ();
if (gid >= gid_max) return;
/**
* digest
*/
const u32 search[4] =
{
digests_buf[digests_offset].digest_buf[DGST_R0],
digests_buf[digests_offset].digest_buf[DGST_R1],
digests_buf[digests_offset].digest_buf[DGST_R2],
digests_buf[digests_offset].digest_buf[DGST_R3]
};
/**
* base
*/
const u32 pw_len = pws[gid].pw_len;
u32x w[64] = { 0 };
for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1)
{
w[idx] = pws[gid].i[idx];
}
sha1_ctx_t ctx00;
sha1_init (&ctx00);
sha1_update_global_swap (&ctx00, salt_bufs[salt_pos].salt_buf, salt_bufs[salt_pos].salt_len);
md5_ctx_t ctx11;
md5_init (&ctx11);
md5_update_global (&ctx11, salt_bufs[salt_pos].salt_buf, salt_bufs[salt_pos].salt_len);
/**
* loop
*/
u32x _w0[4];
u32x _w1[4];
u32x _w2[4];
u32x _w3[4];
u32x w0l = w[0];
for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE)
{
const u32x w0r = words_buf_r[il_pos / VECT_SIZE];
const u32x w0 = w0l | w0r;
w[0] = w0;
sha1_ctx_vector_t ctx0;
sha1_init_vector_from_scalar (&ctx0, &ctx00);
sha1_update_vector_swap (&ctx0, w, pw_len);
sha1_final_vector (&ctx0);
const u32x a = ctx0.h[0];
const u32x b = ctx0.h[1];
const u32x c = ctx0.h[2];
const u32x d = ctx0.h[3];
const u32x e = ctx0.h[4];
md5_ctx_vector_t ctx;
md5_init_vector_from_scalar (&ctx, &ctx11);
_w0[0] = uint_to_hex_lower8 ((a >> 24) & 255) << 0
| uint_to_hex_lower8 ((a >> 16) & 255) << 16;
_w0[1] = uint_to_hex_lower8 ((a >> 8) & 255) << 0
| uint_to_hex_lower8 ((a >> 0) & 255) << 16;
_w0[2] = uint_to_hex_lower8 ((b >> 24) & 255) << 0
| uint_to_hex_lower8 ((b >> 16) & 255) << 16;
_w0[3] = uint_to_hex_lower8 ((b >> 8) & 255) << 0
| uint_to_hex_lower8 ((b >> 0) & 255) << 16;
_w1[0] = uint_to_hex_lower8 ((c >> 24) & 255) << 0
| uint_to_hex_lower8 ((c >> 16) & 255) << 16;
_w1[1] = uint_to_hex_lower8 ((c >> 8) & 255) << 0
| uint_to_hex_lower8 ((c >> 0) & 255) << 16;
_w1[2] = uint_to_hex_lower8 ((d >> 24) & 255) << 0
| uint_to_hex_lower8 ((d >> 16) & 255) << 16;
_w1[3] = uint_to_hex_lower8 ((d >> 8) & 255) << 0
| uint_to_hex_lower8 ((d >> 0) & 255) << 16;
_w2[0] = uint_to_hex_lower8 ((e >> 24) & 255) << 0
| uint_to_hex_lower8 ((e >> 16) & 255) << 16;
_w2[1] = uint_to_hex_lower8 ((e >> 8) & 255) << 0
| uint_to_hex_lower8 ((e >> 0) & 255) << 16;
_w2[2] = 0;
_w2[3] = 0;
_w3[0] = 0;
_w3[1] = 0;
_w3[2] = 0;
_w3[3] = 0;
md5_update_vector_64 (&ctx, _w0, _w1, _w2, _w3, 40);
md5_final_vector (&ctx);
const u32x r0 = ctx.h[DGST_R0];
const u32x r1 = ctx.h[DGST_R1];
const u32x r2 = ctx.h[DGST_R2];
const u32x r3 = ctx.h[DGST_R3];
COMPARE_S_SIMD (r0, r1, r2, r3);
}
}

@ -0,0 +1,574 @@
/**
* Author......: See docs/credits.txt
* License.....: MIT
*/
#define NEW_SIMD_CODE
#ifdef KERNEL_STATIC
#include "inc_vendor.h"
#include "inc_types.h"
#include "inc_platform.cl"
#include "inc_common.cl"
#include "inc_rp_optimized.h"
#include "inc_rp_optimized.cl"
#include "inc_simd.cl"
#include "inc_hash_sha256.cl"
#endif
#define SHA256_STEP_REV(a,b,c,d,e,f,g,h) \
{ \
u32 t2 = SHA256_S2_S(b) + SHA256_F0o(b,c,d); \
u32 t1 = a - t2; \
a = b; \
b = c; \
c = d; \
d = e - t1; \
e = f; \
f = g; \
g = h; \
h = 0; \
}
KERNEL_FQ void m21400_m04 (KERN_ATTR_RULES ())
{
/**
* modifier
*/
const u64 lid = get_local_id (0);
/**
* base
*/
const u64 gid = get_global_id (0);
if (gid >= gid_max) return;
u32 pw_buf0[4];
u32 pw_buf1[4];
pw_buf0[0] = pws[gid].i[0];
pw_buf0[1] = pws[gid].i[1];
pw_buf0[2] = pws[gid].i[2];
pw_buf0[3] = pws[gid].i[3];
pw_buf1[0] = pws[gid].i[4];
pw_buf1[1] = pws[gid].i[5];
pw_buf1[2] = pws[gid].i[6];
pw_buf1[3] = pws[gid].i[7];
const u32 pw_len = pws[gid].pw_len & 63;
/**
* loop
*/
for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE)
{
u32x w0[4] = { 0 };
u32x w1[4] = { 0 };
u32x w2[4] = { 0 };
u32x w3[4] = { 0 };
const u32x out_len = apply_rules_vect_optimized (pw_buf0, pw_buf1, pw_len, rules_buf, il_pos, w0, w1);
append_0x80_2x4_VV (w0, w1, out_len);
/**
* sha256
*/
u32x w0_t = hc_swap32 (w0[0]);
u32x w1_t = hc_swap32 (w0[1]);
u32x w2_t = hc_swap32 (w0[2]);
u32x w3_t = hc_swap32 (w0[3]);
u32x w4_t = hc_swap32 (w1[0]);
u32x w5_t = hc_swap32 (w1[1]);
u32x w6_t = hc_swap32 (w1[2]);
u32x w7_t = hc_swap32 (w1[3]);
u32x w8_t = hc_swap32 (w2[0]);
u32x w9_t = hc_swap32 (w2[1]);
u32x wa_t = hc_swap32 (w2[2]);
u32x wb_t = hc_swap32 (w2[3]);
u32x wc_t = hc_swap32 (w3[0]);
u32x wd_t = hc_swap32 (w3[1]);
u32x we_t = 0;
u32x wf_t = out_len * 8;
u32x a = SHA256M_A;
u32x b = SHA256M_B;
u32x c = SHA256M_C;
u32x d = SHA256M_D;
u32x e = SHA256M_E;
u32x f = SHA256M_F;
u32x g = SHA256M_G;
u32x h = SHA256M_H;
SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C00);
SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C01);
SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C02);
SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C03);
SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C04);
SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C05);
SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C06);
SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C07);
SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C08);
SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C09);
SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C0a);
SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C0b);
SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C0c);
SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C0d);
SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C0e);
SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C0f);
w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C10);
w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C11);
w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C12);
w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C13);
w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C14);
w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C15);
w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C16);
w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C17);
w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C18);
w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C19);
wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C1a);
wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C1b);
wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C1c);
wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C1d);
we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C1e);
wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C1f);
w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C20);
w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C21);
w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C22);
w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C23);
w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C24);
w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C25);
w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C26);
w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C27);
w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C28);
w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C29);
wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C2a);
wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C2b);
wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C2c);
wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C2d);
we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C2e);
wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C2f);
w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C30);
w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C31);
w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C32);
w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C33);
w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C34);
w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C35);
w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C36);
w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C37);
w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C38);
w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C39);
wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C3a);
wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C3b);
wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C3c);
wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C3d);
we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C3e);
wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C3f);
// 2nd round
/**
* sha256
*/
w0_t = a + SHA256M_A;
w1_t = b + SHA256M_B;
w2_t = c + SHA256M_C;
w3_t = d + SHA256M_D;
w4_t = e + SHA256M_E;
w5_t = f + SHA256M_F;
w6_t = g + SHA256M_G;
w7_t = h + SHA256M_H;
w8_t = 0x80000000;
w9_t = 0;
wa_t = 0;
wb_t = 0;
wc_t = 0;
wd_t = 0;
we_t = 0;
wf_t = 32 * 8;
a = SHA256M_A;
b = SHA256M_B;
c = SHA256M_C;
d = SHA256M_D;
e = SHA256M_E;
f = SHA256M_F;
g = SHA256M_G;
h = SHA256M_H;
SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C00);
SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C01);
SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C02);
SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C03);
SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C04);
SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C05);
SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C06);
SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C07);
SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C08);
SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C09);
SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C0a);
SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C0b);
SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C0c);
SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C0d);
SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C0e);
SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C0f);
w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C10);
w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C11);
w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C12);
w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C13);
w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C14);
w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C15);
w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C16);
w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C17);
w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C18);
w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C19);
wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C1a);
wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C1b);
wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C1c);
wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C1d);
we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C1e);
wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C1f);
w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C20);
w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C21);
w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C22);
w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C23);
w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C24);
w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C25);
w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C26);
w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C27);
w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C28);
w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C29);
wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C2a);
wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C2b);
wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C2c);
wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C2d);
we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C2e);
wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C2f);
w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C30);
w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C31);
w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C32);
w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C33);
w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C34);
w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C35);
w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C36);
w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C37);
w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C38);
w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C39);
wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C3a);
wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C3b);
wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C3c);
wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C3d);
we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C3e);
wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C3f);
COMPARE_M_SIMD (d, h, c, g);
}
}
KERNEL_FQ void m21400_m08 (KERN_ATTR_RULES ())
{
}
KERNEL_FQ void m21400_m16 (KERN_ATTR_RULES ())
{
}
KERNEL_FQ void m21400_s04 (KERN_ATTR_RULES ())
{
/**
* modifier
*/
const u64 lid = get_local_id (0);
/**
* base
*/
const u64 gid = get_global_id (0);
if (gid >= gid_max) return;
u32 pw_buf0[4];
u32 pw_buf1[4];
pw_buf0[0] = pws[gid].i[0];
pw_buf0[1] = pws[gid].i[1];
pw_buf0[2] = pws[gid].i[2];
pw_buf0[3] = pws[gid].i[3];
pw_buf1[0] = pws[gid].i[4];
pw_buf1[1] = pws[gid].i[5];
pw_buf1[2] = pws[gid].i[6];
pw_buf1[3] = pws[gid].i[7];
const u32 pw_len = pws[gid].pw_len & 63;
/**
* digest
*/
const u32 search[4] =
{
digests_buf[digests_offset].digest_buf[DGST_R0],
digests_buf[digests_offset].digest_buf[DGST_R1],
digests_buf[digests_offset].digest_buf[DGST_R2],
digests_buf[digests_offset].digest_buf[DGST_R3]
};
/**
* reverse
*/
u32 a_rev = digests_buf[digests_offset].digest_buf[0];
u32 b_rev = digests_buf[digests_offset].digest_buf[1];
u32 c_rev = digests_buf[digests_offset].digest_buf[2];
u32 d_rev = digests_buf[digests_offset].digest_buf[3];
u32 e_rev = digests_buf[digests_offset].digest_buf[4];
u32 f_rev = digests_buf[digests_offset].digest_buf[5];
u32 g_rev = digests_buf[digests_offset].digest_buf[6];
u32 h_rev = digests_buf[digests_offset].digest_buf[7];
SHA256_STEP_REV (a_rev, b_rev, c_rev, d_rev, e_rev, f_rev, g_rev, h_rev);
SHA256_STEP_REV (a_rev, b_rev, c_rev, d_rev, e_rev, f_rev, g_rev, h_rev);
SHA256_STEP_REV (a_rev, b_rev, c_rev, d_rev, e_rev, f_rev, g_rev, h_rev);
SHA256_STEP_REV (a_rev, b_rev, c_rev, d_rev, e_rev, f_rev, g_rev, h_rev);
/**
* loop
*/
for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE)
{
u32x w0[4] = { 0 };
u32x w1[4] = { 0 };
u32x w2[4] = { 0 };
u32x w3[4] = { 0 };
const u32x out_len = apply_rules_vect_optimized (pw_buf0, pw_buf1, pw_len, rules_buf, il_pos, w0, w1);
append_0x80_2x4_VV (w0, w1, out_len);
/**
* sha256
*/
u32x w0_t = hc_swap32 (w0[0]);
u32x w1_t = hc_swap32 (w0[1]);
u32x w2_t = hc_swap32 (w0[2]);
u32x w3_t = hc_swap32 (w0[3]);
u32x w4_t = hc_swap32 (w1[0]);
u32x w5_t = hc_swap32 (w1[1]);
u32x w6_t = hc_swap32 (w1[2]);
u32x w7_t = hc_swap32 (w1[3]);
u32x w8_t = hc_swap32 (w2[0]);
u32x w9_t = hc_swap32 (w2[1]);
u32x wa_t = hc_swap32 (w2[2]);
u32x wb_t = hc_swap32 (w2[3]);
u32x wc_t = hc_swap32 (w3[0]);
u32x wd_t = hc_swap32 (w3[1]);
u32x we_t = 0;
u32x wf_t = out_len * 8;
u32x a = SHA256M_A;
u32x b = SHA256M_B;
u32x c = SHA256M_C;
u32x d = SHA256M_D;
u32x e = SHA256M_E;
u32x f = SHA256M_F;
u32x g = SHA256M_G;
u32x h = SHA256M_H;
SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C00);
SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C01);
SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C02);
SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C03);
SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C04);
SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C05);
SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C06);
SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C07);
SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C08);
SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C09);
SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C0a);
SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C0b);
SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C0c);
SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C0d);
SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C0e);
SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C0f);
w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C10);
w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C11);
w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C12);
w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C13);
w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C14);
w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C15);
w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C16);
w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C17);
w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C18);
w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C19);
wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C1a);
wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C1b);
wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C1c);
wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C1d);
we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C1e);
wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C1f);
w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C20);
w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C21);
w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C22);
w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C23);
w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C24);
w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C25);
w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C26);
w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C27);
w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C28);
w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C29);
wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C2a);
wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C2b);
wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C2c);
wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C2d);
we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C2e);
wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C2f);
w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C30);
w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C31);
w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C32);
w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C33);
w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C34);
w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C35);
w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C36);
w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C37);
w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C38);
w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C39);
wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C3a);
wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C3b);
wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C3c);
wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C3d);
we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C3e);
wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C3f);
// start 2nd round
/**
* sha256
*/
w0_t = a + SHA256M_A;
w1_t = b + SHA256M_B;
w2_t = c + SHA256M_C;
w3_t = d + SHA256M_D;
w4_t = e + SHA256M_E;
w5_t = f + SHA256M_F;
w6_t = g + SHA256M_G;
w7_t = h + SHA256M_H;
w8_t = 0x80000000;
w9_t = 0;
wa_t = 0;
wb_t = 0;
wc_t = 0;
wd_t = 0;
we_t = 0;
wf_t = 32 * 8;
a = SHA256M_A;
b = SHA256M_B;
c = SHA256M_C;
d = SHA256M_D;
e = SHA256M_E;
f = SHA256M_F;
g = SHA256M_G;
h = SHA256M_H;
SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C00);
SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C01);
SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C02);
SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C03);
SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C04);
SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C05);
SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C06);
SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C07);
SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C08);
SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C09);
SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C0a);
SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C0b);
SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C0c);
SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C0d);
SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C0e);
SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C0f);
w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C10);
w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C11);
w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C12);
w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C13);
w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C14);
w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C15);
w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C16);
w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C17);
w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C18);
w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C19);
wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C1a);
wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C1b);
wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C1c);
wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C1d);
we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C1e);
wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C1f);
w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C20);
w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C21);
w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C22);
w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C23);
w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C24);
w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C25);
w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C26);
w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C27);
w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C28);
w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C29);
wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C2a);
wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C2b);
wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C2c);
wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C2d);
we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C2e);
wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C2f);
w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C30);
w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C31);
w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C32);
w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C33);
w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C34);
w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C35);
w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C36);
w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C37);
w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C38);
if (MATCHES_NONE_VS (h, d_rev)) continue;
w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C39);
wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C3a);
wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C3b);
wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C3c);
wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C3d);
we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C3e);
wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C3f);
COMPARE_S_SIMD (d, h, c, g);
}
}
KERNEL_FQ void m21400_s08 (KERN_ATTR_RULES ())
{
}
KERNEL_FQ void m21400_s16 (KERN_ATTR_RULES ())
{
}

@ -0,0 +1,179 @@
/**
* Author......: See docs/credits.txt
* License.....: MIT
*/
//#define NEW_SIMD_CODE
#ifdef KERNEL_STATIC
#include "inc_vendor.h"
#include "inc_types.h"
#include "inc_platform.cl"
#include "inc_common.cl"
#include "inc_rp.h"
#include "inc_rp.cl"
#include "inc_scalar.cl"
#include "inc_hash_sha256.cl"
#endif
KERNEL_FQ void m21400_mxx (KERN_ATTR_RULES ())
{
/**
* modifier
*/
const u64 lid = get_local_id (0);
const u64 gid = get_global_id (0);
if (gid >= gid_max) return;
/**
* base
*/
COPY_PW (pws[gid]);
/**
* loop
*/
u32 w0[4];
u32 w1[4];
u32 w2[4];
u32 w3[4];
for (u32 il_pos = 0; il_pos < il_cnt; il_pos++)
{
pw_t tmp = PASTE_PW;
tmp.pw_len = apply_rules (rules_buf[il_pos].cmds, tmp.i, tmp.pw_len);
sha256_ctx_t ctx0;
sha256_init (&ctx0);
sha256_update_swap (&ctx0, tmp.i, tmp.pw_len);
sha256_final (&ctx0);
sha256_ctx_t ctx;
sha256_init (&ctx);
w0[0] = ctx0.h[0];
w0[1] = ctx0.h[1];
w0[2] = ctx0.h[2];
w0[3] = ctx0.h[3];
w1[0] = ctx0.h[4];
w1[1] = ctx0.h[5];
w1[2] = ctx0.h[6];
w1[3] = ctx0.h[7];
w2[0] = 0;
w2[1] = 0;
w2[2] = 0;
w2[3] = 0;
w3[0] = 0;
w3[1] = 0;
w3[2] = 0;
w3[3] = 0;
sha256_update_64 (&ctx, w0, w1, w2, w3, 32);
sha256_final (&ctx);
const u32 r0 = ctx.h[DGST_R0];
const u32 r1 = ctx.h[DGST_R1];
const u32 r2 = ctx.h[DGST_R2];
const u32 r3 = ctx.h[DGST_R3];
COMPARE_M_SCALAR (r0, r1, r2, r3);
}
}
KERNEL_FQ void m21400_sxx (KERN_ATTR_RULES ())
{
/**
* modifier
*/
const u64 lid = get_local_id (0);
const u64 gid = get_global_id (0);
if (gid >= gid_max) return;
/**
* digest
*/
const u32 search[4] =
{
digests_buf[digests_offset].digest_buf[DGST_R0],
digests_buf[digests_offset].digest_buf[DGST_R1],
digests_buf[digests_offset].digest_buf[DGST_R2],
digests_buf[digests_offset].digest_buf[DGST_R3]
};
/**
* base
*/
COPY_PW (pws[gid]);
/**
* loop
*/
u32 w0[4];
u32 w1[4];
u32 w2[4];
u32 w3[4];
for (u32 il_pos = 0; il_pos < il_cnt; il_pos++)
{
pw_t tmp = PASTE_PW;
tmp.pw_len = apply_rules (rules_buf[il_pos].cmds, tmp.i, tmp.pw_len);
sha256_ctx_t ctx0;
sha256_init (&ctx0);
sha256_update_swap (&ctx0, tmp.i, tmp.pw_len);
sha256_final (&ctx0);
sha256_ctx_t ctx;
sha256_init (&ctx);
w0[0] = ctx0.h[0];
w0[1] = ctx0.h[1];
w0[2] = ctx0.h[2];
w0[3] = ctx0.h[3];
w1[0] = ctx0.h[4];
w1[1] = ctx0.h[5];
w1[2] = ctx0.h[6];
w1[3] = ctx0.h[7];
w2[0] = 0;
w2[1] = 0;
w2[2] = 0;
w2[3] = 0;
w3[0] = 0;
w3[1] = 0;
w3[2] = 0;
w3[3] = 0;
sha256_update_64 (&ctx, w0, w1, w2, w3, 32);
sha256_final (&ctx);
const u32 r0 = ctx.h[DGST_R0];
const u32 r1 = ctx.h[DGST_R1];
const u32 r2 = ctx.h[DGST_R2];
const u32 r3 = ctx.h[DGST_R3];
COMPARE_S_SCALAR (r0, r1, r2, r3);
}
}

@ -0,0 +1,688 @@
/**
* Author......: See docs/credits.txt
* License.....: MIT
*/
#define NEW_SIMD_CODE
#ifdef KERNEL_STATIC
#include "inc_vendor.h"
#include "inc_types.h"
#include "inc_platform.cl"
#include "inc_common.cl"
#include "inc_simd.cl"
#include "inc_hash_sha256.cl"
#endif
#define SHA256_STEP_REV(a,b,c,d,e,f,g,h) \
{ \
u32 t2 = SHA256_S2_S(b) + SHA256_F0o(b,c,d); \
u32 t1 = a - t2; \
a = b; \
b = c; \
c = d; \
d = e - t1; \
e = f; \
f = g; \
g = h; \
h = 0; \
}
KERNEL_FQ void m21400_m04 (KERN_ATTR_BASIC ())
{
/**
* modifier
*/
const u64 lid = get_local_id (0);
/**
* base
*/
const u64 gid = get_global_id (0);
if (gid >= gid_max) return;
u32 pw_buf0[4];
u32 pw_buf1[4];
pw_buf0[0] = pws[gid].i[0];
pw_buf0[1] = pws[gid].i[1];
pw_buf0[2] = pws[gid].i[2];
pw_buf0[3] = pws[gid].i[3];
pw_buf1[0] = pws[gid].i[4];
pw_buf1[1] = pws[gid].i[5];
pw_buf1[2] = pws[gid].i[6];
pw_buf1[3] = pws[gid].i[7];
const u32 pw_l_len = pws[gid].pw_len & 63;
/**
* loop
*/
for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE)
{
const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos) & 63;
const u32x pw_len = (pw_l_len + pw_r_len) & 63;
/**
* concat password candidate
*/
u32x wordl0[4] = { 0 };
u32x wordl1[4] = { 0 };
u32x wordl2[4] = { 0 };
u32x wordl3[4] = { 0 };
wordl0[0] = pw_buf0[0];
wordl0[1] = pw_buf0[1];
wordl0[2] = pw_buf0[2];
wordl0[3] = pw_buf0[3];
wordl1[0] = pw_buf1[0];
wordl1[1] = pw_buf1[1];
wordl1[2] = pw_buf1[2];
wordl1[3] = pw_buf1[3];
u32x wordr0[4] = { 0 };
u32x wordr1[4] = { 0 };
u32x wordr2[4] = { 0 };
u32x wordr3[4] = { 0 };
wordr0[0] = ix_create_combt (combs_buf, il_pos, 0);
wordr0[1] = ix_create_combt (combs_buf, il_pos, 1);
wordr0[2] = ix_create_combt (combs_buf, il_pos, 2);
wordr0[3] = ix_create_combt (combs_buf, il_pos, 3);
wordr1[0] = ix_create_combt (combs_buf, il_pos, 4);
wordr1[1] = ix_create_combt (combs_buf, il_pos, 5);
wordr1[2] = ix_create_combt (combs_buf, il_pos, 6);
wordr1[3] = ix_create_combt (combs_buf, il_pos, 7);
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
{
switch_buffer_by_offset_le_VV (wordr0, wordr1, wordr2, wordr3, pw_l_len);
}
else
{
switch_buffer_by_offset_le_VV (wordl0, wordl1, wordl2, wordl3, pw_r_len);
}
u32x w0[4];
u32x w1[4];
u32x w2[4];
u32x w3[4];
w0[0] = wordl0[0] | wordr0[0];
w0[1] = wordl0[1] | wordr0[1];
w0[2] = wordl0[2] | wordr0[2];
w0[3] = wordl0[3] | wordr0[3];
w1[0] = wordl1[0] | wordr1[0];
w1[1] = wordl1[1] | wordr1[1];
w1[2] = wordl1[2] | wordr1[2];
w1[3] = wordl1[3] | wordr1[3];
w2[0] = wordl2[0] | wordr2[0];
w2[1] = wordl2[1] | wordr2[1];
w2[2] = wordl2[2] | wordr2[2];
w2[3] = wordl2[3] | wordr2[3];
w3[0] = wordl3[0] | wordr3[0];
w3[1] = wordl3[1] | wordr3[1];
w3[2] = wordl3[2] | wordr3[2];
w3[3] = wordl3[3] | wordr3[3];
/**
* sha256
*/
u32x w0_t = hc_swap32 (w0[0]);
u32x w1_t = hc_swap32 (w0[1]);
u32x w2_t = hc_swap32 (w0[2]);
u32x w3_t = hc_swap32 (w0[3]);
u32x w4_t = hc_swap32 (w1[0]);
u32x w5_t = hc_swap32 (w1[1]);
u32x w6_t = hc_swap32 (w1[2]);
u32x w7_t = hc_swap32 (w1[3]);
u32x w8_t = hc_swap32 (w2[0]);
u32x w9_t = hc_swap32 (w2[1]);
u32x wa_t = hc_swap32 (w2[2]);
u32x wb_t = hc_swap32 (w2[3]);
u32x wc_t = hc_swap32 (w3[0]);
u32x wd_t = hc_swap32 (w3[1]);
u32x we_t = 0;
u32x wf_t = pw_len * 8;
u32x a = SHA256M_A;
u32x b = SHA256M_B;
u32x c = SHA256M_C;
u32x d = SHA256M_D;
u32x e = SHA256M_E;
u32x f = SHA256M_F;
u32x g = SHA256M_G;
u32x h = SHA256M_H;
SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C00);
SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C01);
SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C02);
SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C03);
SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C04);
SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C05);
SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C06);
SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C07);
SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C08);
SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C09);
SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C0a);
SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C0b);
SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C0c);
SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C0d);
SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C0e);
SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C0f);
w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C10);
w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C11);
w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C12);
w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C13);
w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C14);
w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C15);
w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C16);
w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C17);
w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C18);
w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C19);
wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C1a);
wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C1b);
wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C1c);
wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C1d);
we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C1e);
wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C1f);
w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C20);
w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C21);
w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C22);
w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C23);
w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C24);
w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C25);
w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C26);
w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C27);
w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C28);
w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C29);
wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C2a);
wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C2b);
wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C2c);
wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C2d);
we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C2e);
wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C2f);
w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C30);
w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C31);
w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C32);
w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C33);
w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C34);
w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C35);
w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C36);
w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C37);
w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C38);
w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C39);
wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C3a);
wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C3b);
wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C3c);
wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C3d);
we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C3e);
wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C3f);
// 2nd round
/**
* sha256
*/
w0_t = a + SHA256M_A;
w1_t = b + SHA256M_B;
w2_t = c + SHA256M_C;
w3_t = d + SHA256M_D;
w4_t = e + SHA256M_E;
w5_t = f + SHA256M_F;
w6_t = g + SHA256M_G;
w7_t = h + SHA256M_H;
w8_t = 0x80000000;
w9_t = 0;
wa_t = 0;
wb_t = 0;
wc_t = 0;
wd_t = 0;
we_t = 0;
wf_t = 32 * 8;
a = SHA256M_A;
b = SHA256M_B;
c = SHA256M_C;
d = SHA256M_D;
e = SHA256M_E;
f = SHA256M_F;
g = SHA256M_G;
h = SHA256M_H;
SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C00);
SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C01);
SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C02);
SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C03);
SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C04);
SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C05);
SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C06);
SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C07);
SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C08);
SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C09);
SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C0a);
SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C0b);
SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C0c);
SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C0d);
SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C0e);
SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C0f);
w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C10);
w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C11);
w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C12);
w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C13);
w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C14);
w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C15);
w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C16);
w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C17);
w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C18);
w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C19);
wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C1a);
wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C1b);
wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C1c);
wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C1d);
we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C1e);
wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C1f);
w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C20);
w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C21);
w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C22);
w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C23);
w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C24);
w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C25);
w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C26);
w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C27);
w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C28);
w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C29);
wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C2a);
wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C2b);
wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C2c);
wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C2d);
we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C2e);
wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C2f);
w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C30);
w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C31);
w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C32);
w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C33);
w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C34);
w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C35);
w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C36);
w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C37);
w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C38);
w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C39);
wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C3a);
wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C3b);
wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C3c);
wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C3d);
we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C3e);
wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C3f);
COMPARE_M_SIMD (d, h, c, g);
}
}
KERNEL_FQ void m21400_m08 (KERN_ATTR_BASIC ())
{
}
KERNEL_FQ void m21400_m16 (KERN_ATTR_BASIC ())
{
}
KERNEL_FQ void m21400_s04 (KERN_ATTR_BASIC ())
{
/**
* modifier
*/
const u64 lid = get_local_id (0);
/**
* base
*/
const u64 gid = get_global_id (0);
if (gid >= gid_max) return;
u32 pw_buf0[4];
u32 pw_buf1[4];
pw_buf0[0] = pws[gid].i[0];
pw_buf0[1] = pws[gid].i[1];
pw_buf0[2] = pws[gid].i[2];
pw_buf0[3] = pws[gid].i[3];
pw_buf1[0] = pws[gid].i[4];
pw_buf1[1] = pws[gid].i[5];
pw_buf1[2] = pws[gid].i[6];
pw_buf1[3] = pws[gid].i[7];
const u32 pw_l_len = pws[gid].pw_len & 63;
/**
* digest
*/
const u32 search[4] =
{
digests_buf[digests_offset].digest_buf[DGST_R0],
digests_buf[digests_offset].digest_buf[DGST_R1],
digests_buf[digests_offset].digest_buf[DGST_R2],
digests_buf[digests_offset].digest_buf[DGST_R3]
};
/**
* reverse
*/
u32 a_rev = digests_buf[digests_offset].digest_buf[0];
u32 b_rev = digests_buf[digests_offset].digest_buf[1];
u32 c_rev = digests_buf[digests_offset].digest_buf[2];
u32 d_rev = digests_buf[digests_offset].digest_buf[3];
u32 e_rev = digests_buf[digests_offset].digest_buf[4];
u32 f_rev = digests_buf[digests_offset].digest_buf[5];
u32 g_rev = digests_buf[digests_offset].digest_buf[6];
u32 h_rev = digests_buf[digests_offset].digest_buf[7];
SHA256_STEP_REV (a_rev, b_rev, c_rev, d_rev, e_rev, f_rev, g_rev, h_rev);
SHA256_STEP_REV (a_rev, b_rev, c_rev, d_rev, e_rev, f_rev, g_rev, h_rev);
SHA256_STEP_REV (a_rev, b_rev, c_rev, d_rev, e_rev, f_rev, g_rev, h_rev);
SHA256_STEP_REV (a_rev, b_rev, c_rev, d_rev, e_rev, f_rev, g_rev, h_rev);
/**
* loop
*/
for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE)
{
const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos) & 63;
const u32x pw_len = (pw_l_len + pw_r_len) & 63;
/**
* concat password candidate
*/
u32x wordl0[4] = { 0 };
u32x wordl1[4] = { 0 };
u32x wordl2[4] = { 0 };
u32x wordl3[4] = { 0 };
wordl0[0] = pw_buf0[0];
wordl0[1] = pw_buf0[1];
wordl0[2] = pw_buf0[2];
wordl0[3] = pw_buf0[3];
wordl1[0] = pw_buf1[0];
wordl1[1] = pw_buf1[1];
wordl1[2] = pw_buf1[2];
wordl1[3] = pw_buf1[3];
u32x wordr0[4] = { 0 };
u32x wordr1[4] = { 0 };
u32x wordr2[4] = { 0 };
u32x wordr3[4] = { 0 };
wordr0[0] = ix_create_combt (combs_buf, il_pos, 0);
wordr0[1] = ix_create_combt (combs_buf, il_pos, 1);
wordr0[2] = ix_create_combt (combs_buf, il_pos, 2);
wordr0[3] = ix_create_combt (combs_buf, il_pos, 3);
wordr1[0] = ix_create_combt (combs_buf, il_pos, 4);
wordr1[1] = ix_create_combt (combs_buf, il_pos, 5);
wordr1[2] = ix_create_combt (combs_buf, il_pos, 6);
wordr1[3] = ix_create_combt (combs_buf, il_pos, 7);
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
{
switch_buffer_by_offset_le_VV (wordr0, wordr1, wordr2, wordr3, pw_l_len);
}
else
{
switch_buffer_by_offset_le_VV (wordl0, wordl1, wordl2, wordl3, pw_r_len);
}
u32x w0[4];
u32x w1[4];
u32x w2[4];
u32x w3[4];
w0[0] = wordl0[0] | wordr0[0];
w0[1] = wordl0[1] | wordr0[1];
w0[2] = wordl0[2] | wordr0[2];
w0[3] = wordl0[3] | wordr0[3];
w1[0] = wordl1[0] | wordr1[0];
w1[1] = wordl1[1] | wordr1[1];
w1[2] = wordl1[2] | wordr1[2];
w1[3] = wordl1[3] | wordr1[3];
w2[0] = wordl2[0] | wordr2[0];
w2[1] = wordl2[1] | wordr2[1];
w2[2] = wordl2[2] | wordr2[2];
w2[3] = wordl2[3] | wordr2[3];
w3[0] = wordl3[0] | wordr3[0];
w3[1] = wordl3[1] | wordr3[1];
w3[2] = wordl3[2] | wordr3[2];
w3[3] = wordl3[3] | wordr3[3];
/**
* sha256
*/
u32x w0_t = hc_swap32 (w0[0]);
u32x w1_t = hc_swap32 (w0[1]);
u32x w2_t = hc_swap32 (w0[2]);
u32x w3_t = hc_swap32 (w0[3]);
u32x w4_t = hc_swap32 (w1[0]);
u32x w5_t = hc_swap32 (w1[1]);
u32x w6_t = hc_swap32 (w1[2]);
u32x w7_t = hc_swap32 (w1[3]);
u32x w8_t = hc_swap32 (w2[0]);
u32x w9_t = hc_swap32 (w2[1]);
u32x wa_t = hc_swap32 (w2[2]);
u32x wb_t = hc_swap32 (w2[3]);
u32x wc_t = hc_swap32 (w3[0]);
u32x wd_t = hc_swap32 (w3[1]);
u32x we_t = 0;
u32x wf_t = pw_len * 8;
u32x a = SHA256M_A;
u32x b = SHA256M_B;
u32x c = SHA256M_C;
u32x d = SHA256M_D;
u32x e = SHA256M_E;
u32x f = SHA256M_F;
u32x g = SHA256M_G;
u32x h = SHA256M_H;
SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C00);
SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C01);
SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C02);
SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C03);
SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C04);
SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C05);
SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C06);
SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C07);
SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C08);
SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C09);
SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C0a);
SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C0b);
SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C0c);
SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C0d);
SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C0e);
SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C0f);
w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C10);
w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C11);
w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C12);
w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C13);
w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C14);
w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C15);
w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C16);
w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C17);
w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C18);
w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C19);
wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C1a);
wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C1b);
wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C1c);
wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C1d);
we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C1e);
wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C1f);
w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C20);
w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C21);
w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C22);
w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C23);
w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C24);
w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C25);
w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C26);
w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C27);
w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C28);
w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C29);
wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C2a);
wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C2b);
wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C2c);
wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C2d);
we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C2e);
wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C2f);
w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C30);
w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C31);
w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C32);
w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C33);
w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C34);
w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C35);
w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C36);
w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C37);
w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C38);
w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C39);
wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C3a);
wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C3b);
wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C3c);
wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C3d);
we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C3e);
wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C3f);
// start 2nd round
/**
* sha256
*/
w0_t = a + SHA256M_A;
w1_t = b + SHA256M_B;
w2_t = c + SHA256M_C;
w3_t = d + SHA256M_D;
w4_t = e + SHA256M_E;
w5_t = f + SHA256M_F;
w6_t = g + SHA256M_G;
w7_t = h + SHA256M_H;
w8_t = 0x80000000;
w9_t = 0;
wa_t = 0;
wb_t = 0;
wc_t = 0;
wd_t = 0;
we_t = 0;
wf_t = 32 * 8;
a = SHA256M_A;
b = SHA256M_B;
c = SHA256M_C;
d = SHA256M_D;
e = SHA256M_E;
f = SHA256M_F;
g = SHA256M_G;
h = SHA256M_H;
SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C00);
SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C01);
SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C02);
SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C03);
SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C04);
SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C05);
SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C06);
SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C07);
SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C08);
SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C09);
SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C0a);
SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C0b);
SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C0c);
SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C0d);
SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C0e);
SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C0f);
w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C10);
w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C11);
w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C12);
w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C13);
w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C14);
w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C15);
w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C16);
w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C17);
w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C18);
w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C19);
wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C1a);
wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C1b);
wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C1c);
wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C1d);
we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C1e);
wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C1f);
w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C20);
w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C21);
w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C22);
w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C23);
w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C24);
w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C25);
w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C26);
w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C27);
w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C28);
w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C29);
wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C2a);
wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C2b);
wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C2c);
wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C2d);
we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C2e);
wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C2f);
w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C30);
w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C31);
w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C32);
w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C33);
w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C34);
w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C35);
w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C36);
w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C37);
w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C38);
if (MATCHES_NONE_VS (h, d_rev)) continue;
w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C39);
wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C3a);
wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C3b);
wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C3c);
wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C3d);
we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C3e);
wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C3f);
COMPARE_S_SIMD (d, h, c, g);
}
}
KERNEL_FQ void m21400_s08 (KERN_ATTR_BASIC ())
{
}
KERNEL_FQ void m21400_s16 (KERN_ATTR_BASIC ())
{
}

@ -0,0 +1,173 @@
/**
* Author......: See docs/credits.txt
* License.....: MIT
*/
//#define NEW_SIMD_CODE
#ifdef KERNEL_STATIC
#include "inc_vendor.h"
#include "inc_types.h"
#include "inc_platform.cl"
#include "inc_common.cl"
#include "inc_scalar.cl"
#include "inc_hash_sha256.cl"
#endif
KERNEL_FQ void m21400_mxx (KERN_ATTR_BASIC ())
{
/**
* modifier
*/
const u64 lid = get_local_id (0);
const u64 gid = get_global_id (0);
if (gid >= gid_max) return;
/**
* base
*/
sha256_ctx_t ctx1;
sha256_init (&ctx1);
sha256_update_global_swap (&ctx1, pws[gid].i, pws[gid].pw_len);
/**
* loop
*/
u32 w0[4];
u32 w1[4];
u32 w2[4];
u32 w3[4];
for (u32 il_pos = 0; il_pos < il_cnt; il_pos++)
{
sha256_ctx_t ctx0 = ctx1;
sha256_update_global_swap (&ctx0, combs_buf[il_pos].i, combs_buf[il_pos].pw_len);
sha256_final (&ctx0);
sha256_ctx_t ctx;
sha256_init (&ctx);
w0[0] = ctx0.h[0];
w0[1] = ctx0.h[1];
w0[2] = ctx0.h[2];
w0[3] = ctx0.h[3];
w1[0] = ctx0.h[4];
w1[1] = ctx0.h[5];
w1[2] = ctx0.h[6];
w1[3] = ctx0.h[7];
w2[0] = 0;
w2[1] = 0;
w2[2] = 0;
w2[3] = 0;
w3[0] = 0;
w3[1] = 0;
w3[2] = 0;
w3[3] = 0;
sha256_update_64 (&ctx, w0, w1, w2, w3, 32);
sha256_final (&ctx);
const u32 r0 = ctx.h[DGST_R0];
const u32 r1 = ctx.h[DGST_R1];
const u32 r2 = ctx.h[DGST_R2];
const u32 r3 = ctx.h[DGST_R3];
COMPARE_M_SCALAR (r0, r1, r2, r3);
}
}
KERNEL_FQ void m21400_sxx (KERN_ATTR_BASIC ())
{
/**
* modifier
*/
const u64 lid = get_local_id (0);
const u64 gid = get_global_id (0);
if (gid >= gid_max) return;
/**
* digest
*/
const u32 search[4] =
{
digests_buf[digests_offset].digest_buf[DGST_R0],
digests_buf[digests_offset].digest_buf[DGST_R1],
digests_buf[digests_offset].digest_buf[DGST_R2],
digests_buf[digests_offset].digest_buf[DGST_R3]
};
/**
* base
*/
sha256_ctx_t ctx1;
sha256_init (&ctx1);
sha256_update_global_swap (&ctx1, pws[gid].i, pws[gid].pw_len);
/**
* loop
*/
u32 w0[4];
u32 w1[4];
u32 w2[4];
u32 w3[4];
for (u32 il_pos = 0; il_pos < il_cnt; il_pos++)
{
sha256_ctx_t ctx0 = ctx1;
sha256_update_global_swap (&ctx0, combs_buf[il_pos].i, combs_buf[il_pos].pw_len);
sha256_final (&ctx0);
sha256_ctx_t ctx;
sha256_init (&ctx);
w0[0] = ctx0.h[0];
w0[1] = ctx0.h[1];
w0[2] = ctx0.h[2];
w0[3] = ctx0.h[3];
w1[0] = ctx0.h[4];
w1[1] = ctx0.h[5];
w1[2] = ctx0.h[6];
w1[3] = ctx0.h[7];
w2[0] = 0;
w2[1] = 0;
w2[2] = 0;
w2[3] = 0;
w3[0] = 0;
w3[1] = 0;
w3[2] = 0;
w3[3] = 0;
sha256_update_64 (&ctx, w0, w1, w2, w3, 32);
sha256_final (&ctx);
const u32 r0 = ctx.h[DGST_R0];
const u32 r1 = ctx.h[DGST_R1];
const u32 r2 = ctx.h[DGST_R2];
const u32 r3 = ctx.h[DGST_R3];
COMPARE_S_SCALAR (r0, r1, r2, r3);
}
}

@ -0,0 +1,728 @@
/**
* Author......: See docs/credits.txt
* License.....: MIT
*/
#define NEW_SIMD_CODE
#ifdef KERNEL_STATIC
#include "inc_vendor.h"
#include "inc_types.h"
#include "inc_platform.cl"
#include "inc_common.cl"
#include "inc_simd.cl"
#include "inc_hash_sha256.cl"
#endif
#define SHA256_STEP_REV(a,b,c,d,e,f,g,h) \
{ \
u32 t2 = SHA256_S2_S(b) + SHA256_F0o(b,c,d); \
u32 t1 = a - t2; \
a = b; \
b = c; \
c = d; \
d = e - t1; \
e = f; \
f = g; \
g = h; \
h = 0; \
}
DECLSPEC void m21400m (u32 *w, const u32 pw_len, KERN_ATTR_VECTOR ())
{
/**
* modifier
*/
const u64 gid = get_global_id (0);
const u64 lid = get_local_id (0);
/**
* loop
*/
u32 w0l = w[0];
for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE)
{
const u32x w0r = words_buf_r[il_pos / VECT_SIZE];
const u32x w0 = w0l | w0r;
u32x w0_t = w0;
u32x w1_t = w[ 1];
u32x w2_t = w[ 2];
u32x w3_t = w[ 3];
u32x w4_t = w[ 4];
u32x w5_t = w[ 5];
u32x w6_t = w[ 6];
u32x w7_t = w[ 7];
u32x w8_t = w[ 8];
u32x w9_t = w[ 9];
u32x wa_t = w[10];
u32x wb_t = w[11];
u32x wc_t = w[12];
u32x wd_t = w[13];
u32x we_t = w[14];
u32x wf_t = w[15];
u32x a = SHA256M_A;
u32x b = SHA256M_B;
u32x c = SHA256M_C;
u32x d = SHA256M_D;
u32x e = SHA256M_E;
u32x f = SHA256M_F;
u32x g = SHA256M_G;
u32x h = SHA256M_H;
SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C00);
SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C01);
SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C02);
SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C03);
SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C04);
SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C05);
SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C06);
SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C07);
SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C08);
SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C09);
SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C0a);
SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C0b);
SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C0c);
SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C0d);
SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C0e);
SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C0f);
w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C10);
w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C11);
w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C12);
w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C13);
w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C14);
w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C15);
w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C16);
w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C17);
w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C18);
w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C19);
wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C1a);
wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C1b);
wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C1c);
wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C1d);
we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C1e);
wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C1f);
w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C20);
w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C21);
w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C22);
w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C23);
w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C24);
w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C25);
w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C26);
w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C27);
w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C28);
w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C29);
wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C2a);
wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C2b);
wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C2c);
wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C2d);
we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C2e);
wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C2f);
w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C30);
w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C31);
w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C32);
w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C33);
w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C34);
w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C35);
w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C36);
w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C37);
w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C38);
w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C39);
wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C3a);
wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C3b);
wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C3c);
wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C3d);
we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C3e);
wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C3f);
// 2nd round
/**
* sha256
*/
w0_t = a + SHA256M_A;
w1_t = b + SHA256M_B;
w2_t = c + SHA256M_C;
w3_t = d + SHA256M_D;
w4_t = e + SHA256M_E;
w5_t = f + SHA256M_F;
w6_t = g + SHA256M_G;
w7_t = h + SHA256M_H;
w8_t = 0x80000000;
w9_t = 0;
wa_t = 0;
wb_t = 0;
wc_t = 0;
wd_t = 0;
we_t = 0;
wf_t = 32 * 8;
a = SHA256M_A;
b = SHA256M_B;
c = SHA256M_C;
d = SHA256M_D;
e = SHA256M_E;
f = SHA256M_F;
g = SHA256M_G;
h = SHA256M_H;
SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C00);
SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C01);
SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C02);
SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C03);
SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C04);
SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C05);
SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C06);
SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C07);
SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C08);
SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C09);
SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C0a);
SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C0b);
SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C0c);
SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C0d);
SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C0e);
SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C0f);
w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C10);
w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C11);
w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C12);
w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C13);
w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C14);
w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C15);
w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C16);
w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C17);
w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C18);
w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C19);
wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C1a);
wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C1b);
wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C1c);
wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C1d);
we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C1e);
wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C1f);
w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C20);
w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C21);
w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C22);
w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C23);
w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C24);
w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C25);
w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C26);
w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C27);
w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C28);
w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C29);
wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C2a);
wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C2b);
wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C2c);
wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C2d);
we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C2e);
wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C2f);
w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C30);
w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C31);
w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C32);
w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C33);
w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C34);
w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C35);
w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C36);
w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C37);
w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C38);
w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C39);
wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C3a);
wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C3b);
wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C3c);
wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C3d);
we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C3e);
wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C3f);
COMPARE_M_SIMD (d, h, c, g);
}
}
DECLSPEC void m21400s (u32 *w, const u32 pw_len, KERN_ATTR_VECTOR ())
{
/**
* modifier
*/
const u64 gid = get_global_id (0);
const u64 lid = get_local_id (0);
/**
* digest
*/
const u32 search[4] =
{
digests_buf[digests_offset].digest_buf[DGST_R0],
digests_buf[digests_offset].digest_buf[DGST_R1],
digests_buf[digests_offset].digest_buf[DGST_R2],
digests_buf[digests_offset].digest_buf[DGST_R3]
};
/**
* reverse
*/
u32 a_rev = digests_buf[digests_offset].digest_buf[0];
u32 b_rev = digests_buf[digests_offset].digest_buf[1];
u32 c_rev = digests_buf[digests_offset].digest_buf[2];
u32 d_rev = digests_buf[digests_offset].digest_buf[3];
u32 e_rev = digests_buf[digests_offset].digest_buf[4];
u32 f_rev = digests_buf[digests_offset].digest_buf[5];
u32 g_rev = digests_buf[digests_offset].digest_buf[6];
u32 h_rev = digests_buf[digests_offset].digest_buf[7];
SHA256_STEP_REV (a_rev, b_rev, c_rev, d_rev, e_rev, f_rev, g_rev, h_rev);
SHA256_STEP_REV (a_rev, b_rev, c_rev, d_rev, e_rev, f_rev, g_rev, h_rev);
SHA256_STEP_REV (a_rev, b_rev, c_rev, d_rev, e_rev, f_rev, g_rev, h_rev);
SHA256_STEP_REV (a_rev, b_rev, c_rev, d_rev, e_rev, f_rev, g_rev, h_rev);
/**
* loop
*/
u32 w0l = w[0];
for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE)
{
const u32x w0r = words_buf_r[il_pos / VECT_SIZE];
const u32x w0 = w0l | w0r;
u32x w0_t = w0;
u32x w1_t = w[ 1];
u32x w2_t = w[ 2];
u32x w3_t = w[ 3];
u32x w4_t = w[ 4];
u32x w5_t = w[ 5];
u32x w6_t = w[ 6];
u32x w7_t = w[ 7];
u32x w8_t = w[ 8];
u32x w9_t = w[ 9];
u32x wa_t = w[10];
u32x wb_t = w[11];
u32x wc_t = w[12];
u32x wd_t = w[13];
u32x we_t = w[14];
u32x wf_t = w[15];
u32x a = SHA256M_A;
u32x b = SHA256M_B;
u32x c = SHA256M_C;
u32x d = SHA256M_D;
u32x e = SHA256M_E;
u32x f = SHA256M_F;
u32x g = SHA256M_G;
u32x h = SHA256M_H;
SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C00);
SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C01);
SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C02);
SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C03);
SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C04);
SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C05);
SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C06);
SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C07);
SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C08);
SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C09);
SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C0a);
SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C0b);
SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C0c);
SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C0d);
SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C0e);
SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C0f);
w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C10);
w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C11);
w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C12);
w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C13);
w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C14);
w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C15);
w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C16);
w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C17);
w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C18);
w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C19);
wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C1a);
wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C1b);
wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C1c);
wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C1d);
we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C1e);
wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C1f);
w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C20);
w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C21);
w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C22);
w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C23);
w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C24);
w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C25);
w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C26);
w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C27);
w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C28);
w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C29);
wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C2a);
wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C2b);
wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C2c);
wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C2d);
we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C2e);
wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C2f);
w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C30);
w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C31);
w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C32);
w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C33);
w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C34);
w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C35);
w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C36);
w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C37);
w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C38);
w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C39);
wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C3a);
wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C3b);
wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C3c);
wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C3d);
we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C3e);
wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C3f);
// start 2nd round
/**
* sha256
*/
w0_t = a + SHA256M_A;
w1_t = b + SHA256M_B;
w2_t = c + SHA256M_C;
w3_t = d + SHA256M_D;
w4_t = e + SHA256M_E;
w5_t = f + SHA256M_F;
w6_t = g + SHA256M_G;
w7_t = h + SHA256M_H;
w8_t = 0x80000000;
w9_t = 0;
wa_t = 0;
wb_t = 0;
wc_t = 0;
wd_t = 0;
we_t = 0;
wf_t = 32 * 8;
a = SHA256M_A;
b = SHA256M_B;
c = SHA256M_C;
d = SHA256M_D;
e = SHA256M_E;
f = SHA256M_F;
g = SHA256M_G;
h = SHA256M_H;
SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C00);
SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C01);
SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C02);
SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C03);
SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C04);
SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C05);
SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C06);
SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C07);
SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C08);
SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C09);
SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C0a);
SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C0b);
SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C0c);
SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C0d);
SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C0e);
SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C0f);
w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C10);
w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C11);
w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C12);
w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C13);
w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C14);
w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C15);
w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C16);
w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C17);
w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C18);
w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C19);
wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C1a);
wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C1b);
wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C1c);
wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C1d);
we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C1e);
wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C1f);
w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C20);
w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C21);
w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C22);
w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C23);
w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C24);
w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C25);
w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C26);
w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C27);
w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C28);
w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C29);
wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C2a);
wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C2b);
wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C2c);
wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C2d);
we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C2e);
wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C2f);
w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C30);
w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C31);
w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C32);
w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C33);
w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C34);
w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C35);
w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C36);
w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C37);
w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C38);
if (MATCHES_NONE_VS (h, d_rev)) continue;
w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C39);
wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C3a);
wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C3b);
wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C3c);
wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C3d);
we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C3e);
wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C3f);
COMPARE_S_SIMD (d, h, c, g);
}
}
KERNEL_FQ void m21400_m04 (KERN_ATTR_VECTOR ())
{
/**
* base
*/
const u64 gid = get_global_id (0);
if (gid >= gid_max) return;
u32 w[16];
w[ 0] = pws[gid].i[ 0];
w[ 1] = pws[gid].i[ 1];
w[ 2] = pws[gid].i[ 2];
w[ 3] = pws[gid].i[ 3];
w[ 4] = 0;
w[ 5] = 0;
w[ 6] = 0;
w[ 7] = 0;
w[ 8] = 0;
w[ 9] = 0;
w[10] = 0;
w[11] = 0;
w[12] = 0;
w[13] = 0;
w[14] = 0;
w[15] = pws[gid].i[15];
const u32 pw_len = pws[gid].pw_len & 63;
/**
* main
*/
m21400m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max);
}
KERNEL_FQ void m21400_m08 (KERN_ATTR_VECTOR ())
{
/**
* base
*/
const u64 gid = get_global_id (0);
if (gid >= gid_max) return;
u32 w[16];
w[ 0] = pws[gid].i[ 0];
w[ 1] = pws[gid].i[ 1];
w[ 2] = pws[gid].i[ 2];
w[ 3] = pws[gid].i[ 3];
w[ 4] = pws[gid].i[ 4];
w[ 5] = pws[gid].i[ 5];
w[ 6] = pws[gid].i[ 6];
w[ 7] = pws[gid].i[ 7];
w[ 8] = 0;
w[ 9] = 0;
w[10] = 0;
w[11] = 0;
w[12] = 0;
w[13] = 0;
w[14] = 0;
w[15] = pws[gid].i[15];
const u32 pw_len = pws[gid].pw_len & 63;
/**
* main
*/
m21400m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max);
}
KERNEL_FQ void m21400_m16 (KERN_ATTR_VECTOR ())
{
/**
* base
*/
const u64 gid = get_global_id (0);
if (gid >= gid_max) return;
u32 w[16];
w[ 0] = pws[gid].i[ 0];
w[ 1] = pws[gid].i[ 1];
w[ 2] = pws[gid].i[ 2];
w[ 3] = pws[gid].i[ 3];
w[ 4] = pws[gid].i[ 4];
w[ 5] = pws[gid].i[ 5];
w[ 6] = pws[gid].i[ 6];
w[ 7] = pws[gid].i[ 7];
w[ 8] = pws[gid].i[ 8];
w[ 9] = pws[gid].i[ 9];
w[10] = pws[gid].i[10];
w[11] = pws[gid].i[11];
w[12] = pws[gid].i[12];
w[13] = pws[gid].i[13];
w[14] = pws[gid].i[14];
w[15] = pws[gid].i[15];
const u32 pw_len = pws[gid].pw_len & 63;
/**
* main
*/
m21400m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max);
}
KERNEL_FQ void m21400_s04 (KERN_ATTR_VECTOR ())
{
/**
* base
*/
const u64 gid = get_global_id (0);
if (gid >= gid_max) return;
u32 w[16];
w[ 0] = pws[gid].i[ 0];
w[ 1] = pws[gid].i[ 1];
w[ 2] = pws[gid].i[ 2];
w[ 3] = pws[gid].i[ 3];
w[ 4] = 0;
w[ 5] = 0;
w[ 6] = 0;
w[ 7] = 0;
w[ 8] = 0;
w[ 9] = 0;
w[10] = 0;
w[11] = 0;
w[12] = 0;
w[13] = 0;
w[14] = 0;
w[15] = pws[gid].i[15];
const u32 pw_len = pws[gid].pw_len & 63;
/**
* main
*/
m21400s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max);
}
KERNEL_FQ void m21400_s08 (KERN_ATTR_VECTOR ())
{
/**
* base
*/
const u64 gid = get_global_id (0);
if (gid >= gid_max) return;
u32 w[16];
w[ 0] = pws[gid].i[ 0];
w[ 1] = pws[gid].i[ 1];
w[ 2] = pws[gid].i[ 2];
w[ 3] = pws[gid].i[ 3];
w[ 4] = pws[gid].i[ 4];
w[ 5] = pws[gid].i[ 5];
w[ 6] = pws[gid].i[ 6];
w[ 7] = pws[gid].i[ 7];
w[ 8] = 0;
w[ 9] = 0;
w[10] = 0;
w[11] = 0;
w[12] = 0;
w[13] = 0;
w[14] = 0;
w[15] = pws[gid].i[15];
const u32 pw_len = pws[gid].pw_len & 63;
/**
* main
*/
m21400s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max);
}
KERNEL_FQ void m21400_s16 (KERN_ATTR_VECTOR ())
{
/**
* base
*/
const u64 gid = get_global_id (0);
if (gid >= gid_max) return;
u32 w[16];
w[ 0] = pws[gid].i[ 0];
w[ 1] = pws[gid].i[ 1];
w[ 2] = pws[gid].i[ 2];
w[ 3] = pws[gid].i[ 3];
w[ 4] = pws[gid].i[ 4];
w[ 5] = pws[gid].i[ 5];
w[ 6] = pws[gid].i[ 6];
w[ 7] = pws[gid].i[ 7];
w[ 8] = pws[gid].i[ 8];
w[ 9] = pws[gid].i[ 9];
w[10] = pws[gid].i[10];
w[11] = pws[gid].i[11];
w[12] = pws[gid].i[12];
w[13] = pws[gid].i[13];
w[14] = pws[gid].i[14];
w[15] = pws[gid].i[15];
const u32 pw_len = pws[gid].pw_len & 63;
/**
* main
*/
m21400s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max);
}

@ -0,0 +1,199 @@
/**
* Author......: See docs/credits.txt
* License.....: MIT
*/
#define NEW_SIMD_CODE
#ifdef KERNEL_STATIC
#include "inc_vendor.h"
#include "inc_types.h"
#include "inc_platform.cl"
#include "inc_common.cl"
#include "inc_simd.cl"
#include "inc_hash_sha256.cl"
#endif
KERNEL_FQ void m21400_mxx (KERN_ATTR_VECTOR ())
{
/**
* modifier
*/
const u64 lid = get_local_id (0);
const u64 gid = get_global_id (0);
if (gid >= gid_max) return;
/**
* base
*/
const u32 pw_len = pws[gid].pw_len;
u32x w[64] = { 0 };
for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1)
{
w[idx] = pws[gid].i[idx];
}
/**
* loop
*/
u32x _w0[4];
u32x _w1[4];
u32x _w2[4];
u32x _w3[4];
u32x w0l = w[0];
for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE)
{
const u32x w0r = words_buf_r[il_pos / VECT_SIZE];
const u32x w0 = w0l | w0r;
w[0] = w0;
sha256_ctx_vector_t ctx0;
sha256_init_vector (&ctx0);
sha256_update_vector (&ctx0, w, pw_len);
sha256_final_vector (&ctx0);
sha256_ctx_vector_t ctx;
sha256_init_vector (&ctx);
_w0[0] = ctx0.h[0];
_w0[1] = ctx0.h[1];
_w0[2] = ctx0.h[2];
_w0[3] = ctx0.h[3];
_w1[0] = ctx0.h[4];
_w1[1] = ctx0.h[5];
_w1[2] = ctx0.h[6];
_w1[3] = ctx0.h[7];
_w2[0] = 0;
_w2[1] = 0;
_w2[2] = 0;
_w2[3] = 0;
_w3[0] = 0;
_w3[1] = 0;
_w3[2] = 0;
_w3[3] = 0;
sha256_update_vector_64 (&ctx, _w0, _w1, _w2, _w3, 32);
sha256_final_vector (&ctx);
const u32x r0 = ctx.h[DGST_R0];
const u32x r1 = ctx.h[DGST_R1];
const u32x r2 = ctx.h[DGST_R2];
const u32x r3 = ctx.h[DGST_R3];
COMPARE_M_SIMD (r0, r1, r2, r3);
}
}
KERNEL_FQ void m21400_sxx (KERN_ATTR_VECTOR ())
{
/**
* modifier
*/
const u64 lid = get_local_id (0);
const u64 gid = get_global_id (0);
if (gid >= gid_max) return;
/**
* digest
*/
const u32 search[4] =
{
digests_buf[digests_offset].digest_buf[DGST_R0],
digests_buf[digests_offset].digest_buf[DGST_R1],
digests_buf[digests_offset].digest_buf[DGST_R2],
digests_buf[digests_offset].digest_buf[DGST_R3]
};
/**
* base
*/
const u32 pw_len = pws[gid].pw_len;
u32x w[64] = { 0 };
for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1)
{
w[idx] = pws[gid].i[idx];
}
/**
* loop
*/
u32x _w0[4];
u32x _w1[4];
u32x _w2[4];
u32x _w3[4];
u32x w0l = w[0];
for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE)
{
const u32x w0r = words_buf_r[il_pos / VECT_SIZE];
const u32x w0 = w0l | w0r;
w[0] = w0;
sha256_ctx_vector_t ctx0;
sha256_init_vector (&ctx0);
sha256_update_vector (&ctx0, w, pw_len);
sha256_final_vector (&ctx0);
sha256_ctx_vector_t ctx;
sha256_init_vector (&ctx);
_w0[0] = ctx0.h[0];
_w0[1] = ctx0.h[1];
_w0[2] = ctx0.h[2];
_w0[3] = ctx0.h[3];
_w1[0] = ctx0.h[4];
_w1[1] = ctx0.h[5];
_w1[2] = ctx0.h[6];
_w1[3] = ctx0.h[7];
_w2[0] = 0;
_w2[1] = 0;
_w2[2] = 0;
_w2[3] = 0;
_w3[0] = 0;
_w3[1] = 0;
_w3[2] = 0;
_w3[3] = 0;
sha256_update_vector_64 (&ctx, _w0, _w1, _w2, _w3, 32);
sha256_final_vector (&ctx);
const u32x r0 = ctx.h[DGST_R0];
const u32x r1 = ctx.h[DGST_R1];
const u32x r2 = ctx.h[DGST_R2];
const u32x r3 = ctx.h[DGST_R3];
COMPARE_S_SIMD (r0, r1, r2, r3);
}
}

@ -0,0 +1,260 @@
/**
* Author......: See docs/credits.txt
* License.....: MIT
*/
#define NEW_SIMD_CODE
#ifdef KERNEL_STATIC
#include "inc_vendor.h"
#include "inc_types.h"
#include "inc_platform.cl"
#include "inc_common.cl"
#include "inc_simd.cl"
#include "inc_hash_sha1.cl"
#include "inc_hash_sha512.cl"
#endif
#define COMPARE_S "inc_comp_single.cl"
#define COMPARE_M "inc_comp_multi.cl"
typedef struct solarwinds_tmp
{
u32 ipad[5];
u32 opad[5];
u32 dgst[260];
u32 out[260];
} solarwinds_tmp_t;
typedef struct solarwinds
{
u32 salt_buf[64 + 1];
} solarwinds_t;
DECLSPEC void hmac_sha1_run_V (u32x *w0, u32x *w1, u32x *w2, u32x *w3, u32x *ipad, u32x *opad, u32x *digest)
{
digest[0] = ipad[0];
digest[1] = ipad[1];
digest[2] = ipad[2];
digest[3] = ipad[3];
digest[4] = ipad[4];
sha1_transform_vector (w0, w1, w2, w3, digest);
w0[0] = digest[0];
w0[1] = digest[1];
w0[2] = digest[2];
w0[3] = digest[3];
w1[0] = digest[4];
w1[1] = 0x80000000;
w1[2] = 0;
w1[3] = 0;
w2[0] = 0;
w2[1] = 0;
w2[2] = 0;
w2[3] = 0;
w3[0] = 0;
w3[1] = 0;
w3[2] = 0;
w3[3] = (64 + 20) * 8;
digest[0] = opad[0];
digest[1] = opad[1];
digest[2] = opad[2];
digest[3] = opad[3];
digest[4] = opad[4];
sha1_transform_vector (w0, w1, w2, w3, digest);
}
KERNEL_FQ void m21500_init (KERN_ATTR_TMPS_ESALT (solarwinds_tmp_t, solarwinds_t))
{
/**
* base
*/
const u64 gid = get_global_id (0);
if (gid >= gid_max) return;
sha1_hmac_ctx_t sha1_hmac_ctx;
sha1_hmac_init_global_swap (&sha1_hmac_ctx, pws[gid].i, pws[gid].pw_len);
tmps[gid].ipad[0] = sha1_hmac_ctx.ipad.h[0];
tmps[gid].ipad[1] = sha1_hmac_ctx.ipad.h[1];
tmps[gid].ipad[2] = sha1_hmac_ctx.ipad.h[2];
tmps[gid].ipad[3] = sha1_hmac_ctx.ipad.h[3];
tmps[gid].ipad[4] = sha1_hmac_ctx.ipad.h[4];
tmps[gid].opad[0] = sha1_hmac_ctx.opad.h[0];
tmps[gid].opad[1] = sha1_hmac_ctx.opad.h[1];
tmps[gid].opad[2] = sha1_hmac_ctx.opad.h[2];
tmps[gid].opad[3] = sha1_hmac_ctx.opad.h[3];
tmps[gid].opad[4] = sha1_hmac_ctx.opad.h[4];
sha1_hmac_update_global_swap (&sha1_hmac_ctx, salt_bufs[salt_pos].salt_buf, salt_bufs[salt_pos].salt_len);
for (u32 i = 0, j = 1; i < 256; i += 5, j += 1)
{
sha1_hmac_ctx_t sha1_hmac_ctx2 = sha1_hmac_ctx;
u32 w0[4];
u32 w1[4];
u32 w2[4];
u32 w3[4];
w0[0] = j;
w0[1] = 0;
w0[2] = 0;
w0[3] = 0;
w1[0] = 0;
w1[1] = 0;
w1[2] = 0;
w1[3] = 0;
w2[0] = 0;
w2[1] = 0;
w2[2] = 0;
w2[3] = 0;
w3[0] = 0;
w3[1] = 0;
w3[2] = 0;
w3[3] = 0;
sha1_hmac_update_64 (&sha1_hmac_ctx2, w0, w1, w2, w3, 4);
sha1_hmac_final (&sha1_hmac_ctx2);
tmps[gid].dgst[i + 0] = sha1_hmac_ctx2.opad.h[0];
tmps[gid].dgst[i + 1] = sha1_hmac_ctx2.opad.h[1];
tmps[gid].dgst[i + 2] = sha1_hmac_ctx2.opad.h[2];
tmps[gid].dgst[i + 3] = sha1_hmac_ctx2.opad.h[3];
tmps[gid].dgst[i + 4] = sha1_hmac_ctx2.opad.h[4];
tmps[gid].out[i + 0] = tmps[gid].dgst[i + 0];
tmps[gid].out[i + 1] = tmps[gid].dgst[i + 1];
tmps[gid].out[i + 2] = tmps[gid].dgst[i + 2];
tmps[gid].out[i + 3] = tmps[gid].dgst[i + 3];
tmps[gid].out[i + 4] = tmps[gid].dgst[i + 4];
}
}
KERNEL_FQ void m21500_loop (KERN_ATTR_TMPS_ESALT (solarwinds_tmp_t, solarwinds_t))
{
const u64 gid = get_global_id (0);
if ((gid * VECT_SIZE) >= gid_max) return;
u32x ipad[5];
u32x opad[5];
ipad[0] = packv (tmps, ipad, gid, 0);
ipad[1] = packv (tmps, ipad, gid, 1);
ipad[2] = packv (tmps, ipad, gid, 2);
ipad[3] = packv (tmps, ipad, gid, 3);
ipad[4] = packv (tmps, ipad, gid, 4);
opad[0] = packv (tmps, opad, gid, 0);
opad[1] = packv (tmps, opad, gid, 1);
opad[2] = packv (tmps, opad, gid, 2);
opad[3] = packv (tmps, opad, gid, 3);
opad[4] = packv (tmps, opad, gid, 4);
for (u32 i = 0; i < 256; i += 5)
{
u32x dgst[5];
u32x out[5];
dgst[0] = packv (tmps, dgst, gid, i + 0);
dgst[1] = packv (tmps, dgst, gid, i + 1);
dgst[2] = packv (tmps, dgst, gid, i + 2);
dgst[3] = packv (tmps, dgst, gid, i + 3);
dgst[4] = packv (tmps, dgst, gid, i + 4);
out[0] = packv (tmps, out, gid, i + 0);
out[1] = packv (tmps, out, gid, i + 1);
out[2] = packv (tmps, out, gid, i + 2);
out[3] = packv (tmps, out, gid, i + 3);
out[4] = packv (tmps, out, gid, i + 4);
for (u32 j = 0; j < loop_cnt; j++)
{
u32x w0[4];
u32x w1[4];
u32x w2[4];
u32x w3[4];
w0[0] = dgst[0];
w0[1] = dgst[1];
w0[2] = dgst[2];
w0[3] = dgst[3];
w1[0] = dgst[4];
w1[1] = 0x80000000;
w1[2] = 0;
w1[3] = 0;
w2[0] = 0;
w2[1] = 0;
w2[2] = 0;
w2[3] = 0;
w3[0] = 0;
w3[1] = 0;
w3[2] = 0;
w3[3] = (64 + 20) * 8;
hmac_sha1_run_V (w0, w1, w2, w3, ipad, opad, dgst);
out[0] ^= dgst[0];
out[1] ^= dgst[1];
out[2] ^= dgst[2];
out[3] ^= dgst[3];
out[4] ^= dgst[4];
}
unpackv (tmps, dgst, gid, i + 0, dgst[0]);
unpackv (tmps, dgst, gid, i + 1, dgst[1]);
unpackv (tmps, dgst, gid, i + 2, dgst[2]);
unpackv (tmps, dgst, gid, i + 3, dgst[3]);
unpackv (tmps, dgst, gid, i + 4, dgst[4]);
unpackv (tmps, out, gid, i + 0, out[0]);
unpackv (tmps, out, gid, i + 1, out[1]);
unpackv (tmps, out, gid, i + 2, out[2]);
unpackv (tmps, out, gid, i + 3, out[3]);
unpackv (tmps, out, gid, i + 4, out[4]);
}
}
KERNEL_FQ void m21500_comp (KERN_ATTR_TMPS_ESALT (solarwinds_tmp_t, solarwinds_t))
{
/**
* base
*/
const u64 gid = get_global_id (0);
if (gid >= gid_max) return;
const u64 lid = get_local_id (0);
sha512_ctx_t ctx;
sha512_init (&ctx);
sha512_update_global (&ctx, tmps[gid].out, 1024);
sha512_final (&ctx);
const u32 r0 = l32_from_64_S (ctx.h[7]);
const u32 r1 = h32_from_64_S (ctx.h[7]);
const u32 r2 = l32_from_64_S (ctx.h[3]);
const u32 r3 = h32_from_64_S (ctx.h[3]);
#define il_pos 0
#ifdef KERNEL_STATIC
#include COMPARE_M
#endif
}

@ -1,5 +1,5 @@
/* 7zArcIn.c -- 7z Input functions
2017-04-03 : Igor Pavlov : Public domain */
2018-12-31 : Igor Pavlov : Public domain */
#include "Precomp.h"
@ -19,7 +19,7 @@
{ MY_ALLOC(Byte, to, size, alloc); memcpy(to, from, size); }
#define MY_ALLOC_ZE_AND_CPY(to, size, from, alloc) \
{ if ((size) == 0) p = NULL; else { MY_ALLOC_AND_CPY(to, size, from, alloc) } }
{ if ((size) == 0) to = NULL; else { MY_ALLOC_AND_CPY(to, size, from, alloc) } }
#define k7zMajorVersion 0
@ -666,7 +666,7 @@ static SRes ReadUnpackInfo(CSzAr *p,
MY_ALLOC(size_t, p->FoCodersOffsets, (size_t)numFolders + 1, alloc);
MY_ALLOC(UInt32, p->FoStartPackStreamIndex, (size_t)numFolders + 1, alloc);
MY_ALLOC(UInt32, p->FoToCoderUnpackSizes, (size_t)numFolders + 1, alloc);
MY_ALLOC(Byte, p->FoToMainUnpackSizeIndex, (size_t)numFolders, alloc);
MY_ALLOC_ZE(Byte, p->FoToMainUnpackSizeIndex, (size_t)numFolders, alloc);
startBufPtr = sd.Data;
@ -1744,7 +1744,7 @@ size_t SzArEx_GetFullNameLen(const CSzArEx *p, size_t fileIndex)
UInt16 *SzArEx_GetFullNameUtf16_Back(const CSzArEx *p, size_t fileIndex, UInt16 *dest)
{
Bool needSlash;
BoolInt needSlash;
if (!p->FileNameOffsets)
{
*(--dest) = 0;

@ -1,5 +1,5 @@
/* 7zDec.c -- Decoding from 7z folder
2017-04-03 : Igor Pavlov : Public domain */
2019-02-02 : Igor Pavlov : Public domain */
#include "Precomp.h"
@ -44,7 +44,7 @@ typedef struct
const Byte *end;
const Byte *begin;
UInt64 processed;
Bool extra;
BoolInt extra;
SRes res;
const ILookInStream *inStream;
} CByteInToLook;
@ -156,7 +156,7 @@ static SRes SzDecodeLzma(const Byte *props, unsigned propsSize, UInt64 inSize, I
{
SizeT inProcessed = (SizeT)lookahead, dicPos = state.dicPos;
ELzmaStatus status;
res = LzmaDec_DecodeToDic(&state, outSize, inBuf, &inProcessed, LZMA_FINISH_END, &status);
res = LzmaDec_DecodeToDic(&state, outSize, (const Byte *)inBuf, &inProcessed, LZMA_FINISH_END, &status);
lookahead -= inProcessed;
inSize -= inProcessed;
if (res != SZ_OK)
@ -218,7 +218,7 @@ static SRes SzDecodeLzma2(const Byte *props, unsigned propsSize, UInt64 inSize,
{
SizeT inProcessed = (SizeT)lookahead, dicPos = state.decoder.dicPos;
ELzmaStatus status;
res = Lzma2Dec_DecodeToDic(&state, outSize, inBuf, &inProcessed, LZMA_FINISH_END, &status);
res = Lzma2Dec_DecodeToDic(&state, outSize, (const Byte *)inBuf, &inProcessed, LZMA_FINISH_END, &status);
lookahead -= inProcessed;
inSize -= inProcessed;
if (res != SZ_OK)
@ -269,7 +269,7 @@ static SRes SzDecodeCopy(UInt64 inSize, ILookInStream *inStream, Byte *outBuffer
return SZ_OK;
}
static Bool IS_MAIN_METHOD(UInt32 m)
static BoolInt IS_MAIN_METHOD(UInt32 m)
{
switch (m)
{
@ -286,7 +286,7 @@ static Bool IS_MAIN_METHOD(UInt32 m)
return False;
}
static Bool IS_SUPPORTED_CODER(const CSzCoderInfo *c)
static BoolInt IS_SUPPORTED_CODER(const CSzCoderInfo *c)
{
return
c->NumStreams == 1

@ -1,5 +1,5 @@
/* 7zTypes.h -- Basic types
2017-07-17 : Igor Pavlov : Public domain */
2018-08-04 : Igor Pavlov : Public domain */
#ifndef __7Z_TYPES_H
#define __7Z_TYPES_H
@ -103,7 +103,8 @@ typedef UInt32 SizeT;
typedef size_t SizeT;
#endif
typedef int Bool;
typedef int BoolInt;
/* typedef BoolInt Bool; */
#define True 1
#define False 0

@ -1,7 +1,7 @@
#define MY_VER_MAJOR 18
#define MY_VER_MINOR 05
#define MY_VER_MAJOR 19
#define MY_VER_MINOR 00
#define MY_VER_BUILD 0
#define MY_VERSION_NUMBERS "18.05"
#define MY_VERSION_NUMBERS "19.00"
#define MY_VERSION MY_VERSION_NUMBERS
#ifdef MY_CPU_NAME
@ -10,7 +10,7 @@
#define MY_VERSION_CPU MY_VERSION
#endif
#define MY_DATE "2018-04-30"
#define MY_DATE "2019-02-21"
#undef MY_COPYRIGHT
#undef MY_VERSION_COPYRIGHT_DATE
#define MY_AUTHOR_NAME "Igor Pavlov"

@ -1,5 +1,5 @@
/* Bcj2Enc.c -- BCJ2 Encoder (Converter for x86 code)
2017-04-28 : Igor Pavlov : Public domain */
2019-02-02 : Igor Pavlov : Public domain */
#include "Precomp.h"
@ -52,7 +52,7 @@ void Bcj2Enc_Init(CBcj2Enc *p)
p->probs[i] = kBitModelTotal >> 1;
}
static Bool MY_FAST_CALL RangeEnc_ShiftLow(CBcj2Enc *p)
static BoolInt MY_FAST_CALL RangeEnc_ShiftLow(CBcj2Enc *p)
{
if ((UInt32)p->low < (UInt32)0xFF000000 || (UInt32)(p->low >> 32) != 0)
{
@ -165,7 +165,7 @@ static void Bcj2Enc_Encode_2(CBcj2Enc *p)
{
Byte context = (Byte)(num == 0 ? p->prevByte : src[-1]);
Bool needConvert;
BoolInt needConvert;
p->bufs[BCJ2_STREAM_MAIN] = dest + 1;
p->ip += (UInt32)num + 1;
@ -253,7 +253,7 @@ void Bcj2Enc_Encode(CBcj2Enc *p)
{
const Byte *src = p->src;
const Byte *srcLim = p->srcLim;
unsigned finishMode = p->finishMode;
EBcj2Enc_FinishMode finishMode = p->finishMode;
p->src = p->temp;
p->srcLim = p->temp + p->tempPos;

@ -1,5 +1,5 @@
/* CpuArch.c -- CPU specific code
2016-02-25: Igor Pavlov : Public domain */
2018-02-18: Igor Pavlov : Public domain */
#include "Precomp.h"
@ -115,7 +115,7 @@ void MyCPUID(UInt32 function, UInt32 *a, UInt32 *b, UInt32 *c, UInt32 *d)
#endif
}
Bool x86cpuid_CheckAndRead(Cx86cpuid *p)
BoolInt x86cpuid_CheckAndRead(Cx86cpuid *p)
{
CHECK_CPUID_IS_SUPPORTED
MyCPUID(0, &p->maxFunc, &p->vendor[0], &p->vendor[2], &p->vendor[1]);
@ -144,7 +144,7 @@ int x86cpuid_GetFirm(const Cx86cpuid *p)
return -1;
}
Bool CPU_Is_InOrder()
BoolInt CPU_Is_InOrder()
{
Cx86cpuid p;
int firm;
@ -175,7 +175,7 @@ Bool CPU_Is_InOrder()
#if !defined(MY_CPU_AMD64) && defined(_WIN32)
#include <windows.h>
static Bool CPU_Sys_Is_SSE_Supported()
static BoolInt CPU_Sys_Is_SSE_Supported()
{
OSVERSIONINFO vi;
vi.dwOSVersionInfoSize = sizeof(vi);
@ -188,7 +188,7 @@ static Bool CPU_Sys_Is_SSE_Supported()
#define CHECK_SYS_SSE_SUPPORT
#endif
Bool CPU_Is_Aes_Supported()
BoolInt CPU_Is_Aes_Supported()
{
Cx86cpuid p;
CHECK_SYS_SSE_SUPPORT
@ -197,4 +197,22 @@ Bool CPU_Is_Aes_Supported()
return (p.c >> 25) & 1;
}
BoolInt CPU_IsSupported_PageGB()
{
Cx86cpuid cpuid;
if (!x86cpuid_CheckAndRead(&cpuid))
return False;
{
UInt32 d[4] = { 0 };
MyCPUID(0x80000000, &d[0], &d[1], &d[2], &d[3]);
if (d[0] < 0x80000001)
return False;
}
{
UInt32 d[4] = { 0 };
MyCPUID(0x80000001, &d[0], &d[1], &d[2], &d[3]);
return (d[3] >> 26) & 1;
}
}
#endif

@ -1,5 +1,5 @@
/* CpuArch.h -- CPU specific code
2017-09-04 : Igor Pavlov : Public domain */
2018-02-18 : Igor Pavlov : Public domain */
#ifndef __CPU_ARCH_H
#define __CPU_ARCH_H
@ -318,15 +318,16 @@ enum
void MyCPUID(UInt32 function, UInt32 *a, UInt32 *b, UInt32 *c, UInt32 *d);
Bool x86cpuid_CheckAndRead(Cx86cpuid *p);
BoolInt x86cpuid_CheckAndRead(Cx86cpuid *p);
int x86cpuid_GetFirm(const Cx86cpuid *p);
#define x86cpuid_GetFamily(ver) (((ver >> 16) & 0xFF0) | ((ver >> 8) & 0xF))
#define x86cpuid_GetModel(ver) (((ver >> 12) & 0xF0) | ((ver >> 4) & 0xF))
#define x86cpuid_GetStepping(ver) (ver & 0xF)
Bool CPU_Is_InOrder();
Bool CPU_Is_Aes_Supported();
BoolInt CPU_Is_InOrder();
BoolInt CPU_Is_Aes_Supported();
BoolInt CPU_IsSupported_PageGB();
#endif

@ -1,5 +1,5 @@
/* DllSecur.c -- DLL loading security
2016-10-04 : Igor Pavlov : Public domain */
2018-02-21 : Igor Pavlov : Public domain */
#include "Precomp.h"
@ -28,10 +28,31 @@ static const char * const g_Dlls =
"CRYPTBASE\0"
"OLEACC\0"
"CLBCATQ\0"
"VERSION\0"
;
#endif
void My_SetDefaultDllDirectories()
{
#ifndef UNDER_CE
OSVERSIONINFO vi;
vi.dwOSVersionInfoSize = sizeof(vi);
GetVersionEx(&vi);
if (!GetVersionEx(&vi) || vi.dwMajorVersion != 6 || vi.dwMinorVersion != 0)
{
Func_SetDefaultDllDirectories setDllDirs = (Func_SetDefaultDllDirectories)
GetProcAddress(GetModuleHandle(TEXT("kernel32.dll")), "SetDefaultDllDirectories");
if (setDllDirs)
if (setDllDirs(MY_LOAD_LIBRARY_SEARCH_SYSTEM32 | MY_LOAD_LIBRARY_SEARCH_USER_DIRS))
return;
}
#endif
}
void LoadSecurityDlls()
{
#ifndef UNDER_CE
@ -70,7 +91,7 @@ void LoadSecurityDlls()
for (;;)
{
char c = *dll++;
buf[pos + k] = c;
buf[pos + k] = (Byte)c;
k++;
if (c == 0)
break;

@ -1,5 +1,5 @@
/* DllSecur.h -- DLL loading for security
2016-06-08 : Igor Pavlov : Public domain */
2018-02-19 : Igor Pavlov : Public domain */
#ifndef __DLL_SECUR_H
#define __DLL_SECUR_H
@ -10,6 +10,7 @@ EXTERN_C_BEGIN
#ifdef _WIN32
void My_SetDefaultDllDirectories();
void LoadSecurityDlls();
#endif

@ -1,5 +1,5 @@
/* LzFind.c -- Match finder for LZ algorithms
2017-06-10 : Igor Pavlov : Public domain */
2018-07-08 : Igor Pavlov : Public domain */
#include "Precomp.h"
@ -138,7 +138,7 @@ static void MatchFinder_SetDefaultSettings(CMatchFinder *p)
void MatchFinder_Construct(CMatchFinder *p)
{
UInt32 i;
unsigned i;
p->bufferBase = NULL;
p->directInput = 0;
p->hash = NULL;
@ -147,7 +147,7 @@ void MatchFinder_Construct(CMatchFinder *p)
for (i = 0; i < 256; i++)
{
UInt32 r = i;
UInt32 r = (UInt32)i;
unsigned j;
for (j = 0; j < 8; j++)
r = (r >> 1) ^ (kCrcPoly & ((UInt32)0 - (r & 1)));
@ -368,6 +368,8 @@ static void MatchFinder_Normalize(CMatchFinder *p)
MatchFinder_ReduceOffsets(p, subValue);
}
MY_NO_INLINE
static void MatchFinder_CheckLimits(CMatchFinder *p)
{
if (p->pos == kMaxValForNormalize)
@ -379,10 +381,16 @@ static void MatchFinder_CheckLimits(CMatchFinder *p)
MatchFinder_SetLimits(p);
}
static UInt32 * Hc_GetMatchesSpec(UInt32 lenLimit, UInt32 curMatch, UInt32 pos, const Byte *cur, CLzRef *son,
/*
(lenLimit > maxLen)
*/
MY_FORCE_INLINE
static UInt32 * Hc_GetMatchesSpec(unsigned lenLimit, UInt32 curMatch, UInt32 pos, const Byte *cur, CLzRef *son,
UInt32 _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 cutValue,
UInt32 *distances, UInt32 maxLen)
UInt32 *distances, unsigned maxLen)
{
/*
son[_cyclicBufferPos] = curMatch;
for (;;)
{
@ -400,7 +408,8 @@ static UInt32 * Hc_GetMatchesSpec(UInt32 lenLimit, UInt32 curMatch, UInt32 pos,
break;
if (maxLen < len)
{
*distances++ = maxLen = len;
maxLen = len;
*distances++ = len;
*distances++ = delta - 1;
if (len == lenLimit)
return distances;
@ -408,15 +417,58 @@ static UInt32 * Hc_GetMatchesSpec(UInt32 lenLimit, UInt32 curMatch, UInt32 pos,
}
}
}
*/
const Byte *lim = cur + lenLimit;
son[_cyclicBufferPos] = curMatch;
do
{
UInt32 delta = pos - curMatch;
if (delta >= _cyclicBufferSize)
break;
{
ptrdiff_t diff;
curMatch = son[_cyclicBufferPos - delta + ((delta > _cyclicBufferPos) ? _cyclicBufferSize : 0)];
diff = (ptrdiff_t)0 - delta;
if (cur[maxLen] == cur[maxLen + diff])
{
const Byte *c = cur;
while (*c == c[diff])
{
if (++c == lim)
{
distances[0] = (UInt32)(lim - cur);
distances[1] = delta - 1;
return distances + 2;
}
}
{
unsigned len = (unsigned)(c - cur);
if (maxLen < len)
{
maxLen = len;
distances[0] = (UInt32)len;
distances[1] = delta - 1;
distances += 2;
}
}
}
}
}
while (--cutValue);
return distances;
}
MY_FORCE_INLINE
UInt32 * GetMatchesSpec1(UInt32 lenLimit, UInt32 curMatch, UInt32 pos, const Byte *cur, CLzRef *son,
UInt32 _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 cutValue,
UInt32 *distances, UInt32 maxLen)
{
CLzRef *ptr0 = son + (_cyclicBufferPos << 1) + 1;
CLzRef *ptr1 = son + (_cyclicBufferPos << 1);
UInt32 len0 = 0, len1 = 0;
CLzRef *ptr0 = son + ((size_t)_cyclicBufferPos << 1) + 1;
CLzRef *ptr1 = son + ((size_t)_cyclicBufferPos << 1);
unsigned len0 = 0, len1 = 0;
for (;;)
{
UInt32 delta = pos - curMatch;
@ -426,9 +478,10 @@ UInt32 * GetMatchesSpec1(UInt32 lenLimit, UInt32 curMatch, UInt32 pos, const Byt
return distances;
}
{
CLzRef *pair = son + ((_cyclicBufferPos - delta + ((delta > _cyclicBufferPos) ? _cyclicBufferSize : 0)) << 1);
CLzRef *pair = son + ((size_t)(_cyclicBufferPos - delta + ((delta > _cyclicBufferPos) ? _cyclicBufferSize : 0)) << 1);
const Byte *pb = cur - delta;
UInt32 len = (len0 < len1 ? len0 : len1);
unsigned len = (len0 < len1 ? len0 : len1);
UInt32 pair0 = pair[0];
if (pb[len] == cur[len])
{
if (++len != lenLimit && pb[len] == cur[len])
@ -437,11 +490,12 @@ UInt32 * GetMatchesSpec1(UInt32 lenLimit, UInt32 curMatch, UInt32 pos, const Byt
break;
if (maxLen < len)
{
*distances++ = maxLen = len;
maxLen = (UInt32)len;
*distances++ = (UInt32)len;
*distances++ = delta - 1;
if (len == lenLimit)
{
*ptr1 = pair[0];
*ptr1 = pair0;
*ptr0 = pair[1];
return distances;
}
@ -468,9 +522,9 @@ UInt32 * GetMatchesSpec1(UInt32 lenLimit, UInt32 curMatch, UInt32 pos, const Byt
static void SkipMatchesSpec(UInt32 lenLimit, UInt32 curMatch, UInt32 pos, const Byte *cur, CLzRef *son,
UInt32 _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 cutValue)
{
CLzRef *ptr0 = son + (_cyclicBufferPos << 1) + 1;
CLzRef *ptr1 = son + (_cyclicBufferPos << 1);
UInt32 len0 = 0, len1 = 0;
CLzRef *ptr0 = son + ((size_t)_cyclicBufferPos << 1) + 1;
CLzRef *ptr1 = son + ((size_t)_cyclicBufferPos << 1);
unsigned len0 = 0, len1 = 0;
for (;;)
{
UInt32 delta = pos - curMatch;
@ -480,9 +534,9 @@ static void SkipMatchesSpec(UInt32 lenLimit, UInt32 curMatch, UInt32 pos, const
return;
}
{
CLzRef *pair = son + ((_cyclicBufferPos - delta + ((delta > _cyclicBufferPos) ? _cyclicBufferSize : 0)) << 1);
CLzRef *pair = son + ((size_t)(_cyclicBufferPos - delta + ((delta > _cyclicBufferPos) ? _cyclicBufferSize : 0)) << 1);
const Byte *pb = cur - delta;
UInt32 len = (len0 < len1 ? len0 : len1);
unsigned len = (len0 < len1 ? len0 : len1);
if (pb[len] == cur[len])
{
while (++len != lenLimit)
@ -520,13 +574,13 @@ static void SkipMatchesSpec(UInt32 lenLimit, UInt32 curMatch, UInt32 pos, const
p->buffer++; \
if (++p->pos == p->posLimit) MatchFinder_CheckLimits(p);
#define MOVE_POS_RET MOVE_POS return offset;
#define MOVE_POS_RET MOVE_POS return (UInt32)offset;
static void MatchFinder_MovePos(CMatchFinder *p) { MOVE_POS; }
#define GET_MATCHES_HEADER2(minLen, ret_op) \
UInt32 lenLimit; UInt32 hv; const Byte *cur; UInt32 curMatch; \
lenLimit = p->lenLimit; { if (lenLimit < minLen) { MatchFinder_MovePos(p); ret_op; }} \
unsigned lenLimit; UInt32 hv; const Byte *cur; UInt32 curMatch; \
lenLimit = (unsigned)p->lenLimit; { if (lenLimit < minLen) { MatchFinder_MovePos(p); ret_op; }} \
cur = p->buffer;
#define GET_MATCHES_HEADER(minLen) GET_MATCHES_HEADER2(minLen, return 0)
@ -535,22 +589,22 @@ static void MatchFinder_MovePos(CMatchFinder *p) { MOVE_POS; }
#define MF_PARAMS(p) p->pos, p->buffer, p->son, p->cyclicBufferPos, p->cyclicBufferSize, p->cutValue
#define GET_MATCHES_FOOTER(offset, maxLen) \
offset = (UInt32)(GetMatchesSpec1(lenLimit, curMatch, MF_PARAMS(p), \
distances + offset, maxLen) - distances); MOVE_POS_RET;
offset = (unsigned)(GetMatchesSpec1((UInt32)lenLimit, curMatch, MF_PARAMS(p), \
distances + offset, (UInt32)maxLen) - distances); MOVE_POS_RET;
#define SKIP_FOOTER \
SkipMatchesSpec(lenLimit, curMatch, MF_PARAMS(p)); MOVE_POS;
SkipMatchesSpec((UInt32)lenLimit, curMatch, MF_PARAMS(p)); MOVE_POS;
#define UPDATE_maxLen { \
ptrdiff_t diff = (ptrdiff_t)0 - d2; \
const Byte *c = cur + maxLen; \
const Byte *lim = cur + lenLimit; \
for (; c != lim; c++) if (*(c + diff) != *c) break; \
maxLen = (UInt32)(c - cur); }
maxLen = (unsigned)(c - cur); }
static UInt32 Bt2_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
{
UInt32 offset;
unsigned offset;
GET_MATCHES_HEADER(2)
HASH2_CALC;
curMatch = p->hash[hv];
@ -561,7 +615,7 @@ static UInt32 Bt2_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
UInt32 Bt3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
{
UInt32 offset;
unsigned offset;
GET_MATCHES_HEADER(3)
HASH_ZIP_CALC;
curMatch = p->hash[hv];
@ -572,7 +626,8 @@ UInt32 Bt3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
static UInt32 Bt3_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
{
UInt32 h2, d2, maxLen, offset, pos;
UInt32 h2, d2, pos;
unsigned maxLen, offset;
UInt32 *hash;
GET_MATCHES_HEADER(3)
@ -594,12 +649,12 @@ static UInt32 Bt3_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
if (d2 < p->cyclicBufferSize && *(cur - d2) == *cur)
{
UPDATE_maxLen
distances[0] = maxLen;
distances[0] = (UInt32)maxLen;
distances[1] = d2 - 1;
offset = 2;
if (maxLen == lenLimit)
{
SkipMatchesSpec(lenLimit, curMatch, MF_PARAMS(p));
SkipMatchesSpec((UInt32)lenLimit, curMatch, MF_PARAMS(p));
MOVE_POS_RET;
}
}
@ -609,7 +664,8 @@ static UInt32 Bt3_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
static UInt32 Bt4_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
{
UInt32 h2, h3, d2, d3, maxLen, offset, pos;
UInt32 h2, h3, d2, d3, pos;
unsigned maxLen, offset;
UInt32 *hash;
GET_MATCHES_HEADER(4)
@ -618,12 +674,12 @@ static UInt32 Bt4_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
hash = p->hash;
pos = p->pos;
d2 = pos - hash[ h2];
d2 = pos - hash [h2];
d3 = pos - (hash + kFix3HashSize)[h3];
curMatch = (hash + kFix4HashSize)[hv];
hash[ h2] = pos;
hash [h2] = pos;
(hash + kFix3HashSize)[h3] = pos;
(hash + kFix4HashSize)[hv] = pos;
@ -632,7 +688,8 @@ static UInt32 Bt4_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
if (d2 < p->cyclicBufferSize && *(cur - d2) == *cur)
{
distances[0] = maxLen = 2;
maxLen = 2;
distances[0] = 2;
distances[1] = d2 - 1;
offset = 2;
}
@ -648,10 +705,10 @@ static UInt32 Bt4_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
if (offset != 0)
{
UPDATE_maxLen
distances[(size_t)offset - 2] = maxLen;
distances[(size_t)offset - 2] = (UInt32)maxLen;
if (maxLen == lenLimit)
{
SkipMatchesSpec(lenLimit, curMatch, MF_PARAMS(p));
SkipMatchesSpec((UInt32)lenLimit, curMatch, MF_PARAMS(p));
MOVE_POS_RET;
}
}
@ -674,13 +731,13 @@ static UInt32 Bt5_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
hash = p->hash;
pos = p->pos;
d2 = pos - hash[ h2];
d2 = pos - hash [h2];
d3 = pos - (hash + kFix3HashSize)[h3];
d4 = pos - (hash + kFix4HashSize)[h4];
curMatch = (hash + kFix5HashSize)[hv];
hash[ h2] = pos;
hash [h2] = pos;
(hash + kFix3HashSize)[h3] = pos;
(hash + kFix4HashSize)[h4] = pos;
(hash + kFix5HashSize)[hv] = pos;
@ -741,7 +798,8 @@ static UInt32 Bt5_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
static UInt32 Hc4_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
{
UInt32 h2, h3, d2, d3, maxLen, offset, pos;
UInt32 h2, h3, d2, d3, pos;
unsigned maxLen, offset;
UInt32 *hash;
GET_MATCHES_HEADER(4)
@ -750,12 +808,11 @@ static UInt32 Hc4_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
hash = p->hash;
pos = p->pos;
d2 = pos - hash[ h2];
d2 = pos - hash [h2];
d3 = pos - (hash + kFix3HashSize)[h3];
curMatch = (hash + kFix4HashSize)[hv];
hash[ h2] = pos;
hash [h2] = pos;
(hash + kFix3HashSize)[h3] = pos;
(hash + kFix4HashSize)[hv] = pos;
@ -764,7 +821,8 @@ static UInt32 Hc4_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
if (d2 < p->cyclicBufferSize && *(cur - d2) == *cur)
{
distances[0] = maxLen = 2;
maxLen = 2;
distances[0] = 2;
distances[1] = d2 - 1;
offset = 2;
}
@ -780,7 +838,7 @@ static UInt32 Hc4_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
if (offset != 0)
{
UPDATE_maxLen
distances[(size_t)offset - 2] = maxLen;
distances[(size_t)offset - 2] = (UInt32)maxLen;
if (maxLen == lenLimit)
{
p->son[p->cyclicBufferPos] = curMatch;
@ -791,7 +849,7 @@ static UInt32 Hc4_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
if (maxLen < 3)
maxLen = 3;
offset = (UInt32)(Hc_GetMatchesSpec(lenLimit, curMatch, MF_PARAMS(p),
offset = (unsigned)(Hc_GetMatchesSpec(lenLimit, curMatch, MF_PARAMS(p),
distances + offset, maxLen) - (distances));
MOVE_POS_RET
}
@ -808,13 +866,13 @@ static UInt32 Hc5_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
hash = p->hash;
pos = p->pos;
d2 = pos - hash[ h2];
d2 = pos - hash [h2];
d3 = pos - (hash + kFix3HashSize)[h3];
d4 = pos - (hash + kFix4HashSize)[h4];
curMatch = (hash + kFix5HashSize)[hv];
hash[ h2] = pos;
hash [h2] = pos;
(hash + kFix3HashSize)[h3] = pos;
(hash + kFix4HashSize)[h4] = pos;
(hash + kFix5HashSize)[hv] = pos;
@ -877,12 +935,12 @@ static UInt32 Hc5_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
UInt32 Hc3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
{
UInt32 offset;
unsigned offset;
GET_MATCHES_HEADER(3)
HASH_ZIP_CALC;
curMatch = p->hash[hv];
p->hash[hv] = p->pos;
offset = (UInt32)(Hc_GetMatchesSpec(lenLimit, curMatch, MF_PARAMS(p),
offset = (unsigned)(Hc_GetMatchesSpec(lenLimit, curMatch, MF_PARAMS(p),
distances, 2) - (distances));
MOVE_POS_RET
}
@ -940,7 +998,7 @@ static void Bt4_MatchFinder_Skip(CMatchFinder *p, UInt32 num)
HASH4_CALC;
hash = p->hash;
curMatch = (hash + kFix4HashSize)[hv];
hash[ h2] =
hash [h2] =
(hash + kFix3HashSize)[h3] =
(hash + kFix4HashSize)[hv] = p->pos;
SKIP_FOOTER
@ -959,7 +1017,7 @@ static void Bt5_MatchFinder_Skip(CMatchFinder *p, UInt32 num)
HASH5_CALC;
hash = p->hash;
curMatch = (hash + kFix5HashSize)[hv];
hash[ h2] =
hash [h2] =
(hash + kFix3HashSize)[h3] =
(hash + kFix4HashSize)[h4] =
(hash + kFix5HashSize)[hv] = p->pos;
@ -979,7 +1037,7 @@ static void Hc4_MatchFinder_Skip(CMatchFinder *p, UInt32 num)
HASH4_CALC;
hash = p->hash;
curMatch = (hash + kFix4HashSize)[hv];
hash[ h2] =
hash [h2] =
(hash + kFix3HashSize)[h3] =
(hash + kFix4HashSize)[hv] = p->pos;
p->son[p->cyclicBufferPos] = curMatch;
@ -999,7 +1057,7 @@ static void Hc5_MatchFinder_Skip(CMatchFinder *p, UInt32 num)
HASH5_CALC;
hash = p->hash;
curMatch = hash + kFix5HashSize)[hv];
hash[ h2] =
hash [h2] =
(hash + kFix3HashSize)[h3] =
(hash + kFix4HashSize)[h4] =
(hash + kFix5HashSize)[hv] = p->pos;

@ -1,5 +1,5 @@
/* LzFindMt.c -- multithreaded Match finder for LZ algorithms
2017-06-10 : Igor Pavlov : Public domain */
2018-12-29 : Igor Pavlov : Public domain */
#include "Precomp.h"
@ -232,38 +232,57 @@ static void MatchFinderMt_GetNextBlock_Hash(CMatchFinderMt *p)
#define kEmptyHashValue 0
/* #define MFMT_GM_INLINE */
#define MFMT_GM_INLINE
#ifdef MFMT_GM_INLINE
#define NO_INLINE MY_FAST_CALL
/*
we use size_t for _cyclicBufferPos instead of UInt32
to eliminate "movsx" BUG in old MSVC x64 compiler.
*/
static Int32 NO_INLINE GetMatchesSpecN(UInt32 lenLimit, UInt32 pos, const Byte *cur, CLzRef *son,
UInt32 _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 _cutValue,
UInt32 *_distances, UInt32 _maxLen, const UInt32 *hash, Int32 limit, UInt32 size, UInt32 *posRes)
MY_NO_INLINE
static UInt32 *GetMatchesSpecN(UInt32 lenLimit, UInt32 pos, const Byte *cur, CLzRef *son,
size_t _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 _cutValue,
UInt32 *distances, UInt32 _maxLen, const UInt32 *hash, const UInt32 *limit, UInt32 size, UInt32 *posRes)
{
do
{
UInt32 *distances = _distances + 1;
UInt32 curMatch = pos - *hash++;
UInt32 *_distances = ++distances;
UInt32 delta = *hash++;
CLzRef *ptr0 = son + (_cyclicBufferPos << 1) + 1;
CLzRef *ptr1 = son + (_cyclicBufferPos << 1);
UInt32 len0 = 0, len1 = 0;
CLzRef *ptr0 = son + ((size_t)_cyclicBufferPos << 1) + 1;
CLzRef *ptr1 = son + ((size_t)_cyclicBufferPos << 1);
unsigned len0 = 0, len1 = 0;
UInt32 cutValue = _cutValue;
UInt32 maxLen = _maxLen;
for (;;)
unsigned maxLen = (unsigned)_maxLen;
/*
if (size > 1)
{
UInt32 delta = pos - curMatch;
if (cutValue-- == 0 || delta >= _cyclicBufferSize)
UInt32 delta = *hash;
if (delta < _cyclicBufferSize)
{
*ptr0 = *ptr1 = kEmptyHashValue;
break;
UInt32 cyc1 = _cyclicBufferPos + 1;
CLzRef *pair = son + ((size_t)(cyc1 - delta + ((delta > cyc1) ? _cyclicBufferSize : 0)) << 1);
Byte b = *(cur + 1 - delta);
_distances[0] = pair[0];
_distances[1] = b;
}
}
*/
if (cutValue == 0 || delta >= _cyclicBufferSize)
{
*ptr0 = *ptr1 = kEmptyHashValue;
}
else
for(;;)
{
{
CLzRef *pair = son + ((_cyclicBufferPos - delta + ((delta > _cyclicBufferPos) ? _cyclicBufferSize : 0)) << 1);
CLzRef *pair = son + ((size_t)(_cyclicBufferPos - delta + ((_cyclicBufferPos < delta) ? _cyclicBufferSize : 0)) << 1);
const Byte *pb = cur - delta;
UInt32 len = (len0 < len1 ? len0 : len1);
unsigned len = (len0 < len1 ? len0 : len1);
UInt32 pair0 = *pair;
if (pb[len] == cur[len])
{
if (++len != lenLimit && pb[len] == cur[len])
@ -272,54 +291,66 @@ static Int32 NO_INLINE GetMatchesSpecN(UInt32 lenLimit, UInt32 pos, const Byte *
break;
if (maxLen < len)
{
*distances++ = maxLen = len;
maxLen = len;
*distances++ = (UInt32)len;
*distances++ = delta - 1;
if (len == lenLimit)
{
*ptr1 = pair[0];
*ptr0 = pair[1];
UInt32 pair1 = pair[1];
*ptr1 = pair0;
*ptr0 = pair1;
break;
}
}
}
if (pb[len] < cur[len])
{
*ptr1 = curMatch;
ptr1 = pair + 1;
curMatch = *ptr1;
len1 = len;
}
else
{
*ptr0 = curMatch;
ptr0 = pair;
curMatch = *ptr0;
len0 = len;
UInt32 curMatch = pos - delta;
// delta = pos - *pair;
// delta = pos - pair[((UInt32)pb[len] - (UInt32)cur[len]) >> 31];
if (pb[len] < cur[len])
{
delta = pos - pair[1];
*ptr1 = curMatch;
ptr1 = pair + 1;
len1 = len;
}
else
{
delta = pos - *pair;
*ptr0 = curMatch;
ptr0 = pair;
len0 = len;
}
}
}
if (--cutValue == 0 || delta >= _cyclicBufferSize)
{
*ptr0 = *ptr1 = kEmptyHashValue;
break;
}
}
pos++;
_cyclicBufferPos++;
cur++;
{
UInt32 num = (UInt32)(distances - _distances);
*_distances = num - 1;
_distances += num;
limit -= num;
_distances[-1] = num;
}
}
while (limit > 0 && --size != 0);
while (distances < limit && --size != 0);
*posRes = pos;
return limit;
return distances;
}
#endif
static void BtGetMatches(CMatchFinderMt *p, UInt32 *distances)
{
UInt32 numProcessed = 0;
UInt32 curPos = 2;
UInt32 limit = kMtBtBlockSize - (p->matchMaxLen * 2);
UInt32 limit = kMtBtBlockSize - (p->matchMaxLen * 2); // * 2
distances[1] = p->hashNumAvail;
@ -369,8 +400,10 @@ static void BtGetMatches(CMatchFinderMt *p, UInt32 *distances)
#else
{
UInt32 posRes;
curPos = limit - GetMatchesSpecN(lenLimit, pos, p->buffer, p->son, cyclicBufferPos, p->cyclicBufferSize, p->cutValue,
distances + curPos, p->numHashBytes - 1, p->hashBuf + p->hashBufPos, (Int32)(limit - curPos), size, &posRes);
curPos = (UInt32)(GetMatchesSpecN(lenLimit, pos, p->buffer, p->son, cyclicBufferPos, p->cyclicBufferSize, p->cutValue,
distances + curPos, p->numHashBytes - 1, p->hashBuf + p->hashBufPos,
distances + limit,
size, &posRes) - distances);
p->hashBufPos += posRes - pos;
cyclicBufferPos += posRes - pos;
p->buffer += posRes - pos;

@ -1,5 +1,5 @@
/* LzFindMt.h -- multithreaded Match finder for LZ algorithms
2017-04-03 : Igor Pavlov : Public domain */
2018-07-04 : Igor Pavlov : Public domain */
#ifndef __LZ_FIND_MT_H
#define __LZ_FIND_MT_H
@ -19,10 +19,10 @@ EXTERN_C_BEGIN
typedef struct _CMtSync
{
Bool wasCreated;
Bool needStart;
Bool exit;
Bool stopWriting;
BoolInt wasCreated;
BoolInt needStart;
BoolInt exit;
BoolInt stopWriting;
CThread thread;
CAutoResetEvent canStart;
@ -30,8 +30,8 @@ typedef struct _CMtSync
CAutoResetEvent wasStopped;
CSemaphore freeSemaphore;
CSemaphore filledSemaphore;
Bool csWasInitialized;
Bool csWasEntered;
BoolInt csWasInitialized;
BoolInt csWasEntered;
CCriticalSection cs;
UInt32 numProcessedBlocks;
} CMtSync;

@ -1,5 +1,5 @@
/* Lzma2Dec.c -- LZMA2 Decoder
2018-02-19 : Igor Pavlov : Public domain */
2019-02-02 : Igor Pavlov : Public domain */
/* #define SHOW_DEBUG_INFO */
@ -169,7 +169,7 @@ static void LzmaDec_UpdateWithUncompressed(CLzmaDec *p, const Byte *src, SizeT s
p->processedPos += (UInt32)size;
}
void LzmaDec_InitDicAndState(CLzmaDec *p, Bool initDic, Bool initState);
void LzmaDec_InitDicAndState(CLzmaDec *p, BoolInt initDic, BoolInt initState);
SRes Lzma2Dec_DecodeToDic(CLzma2Dec *p, SizeT dicLimit,
@ -232,7 +232,7 @@ SRes Lzma2Dec_DecodeToDic(CLzma2Dec *p, SizeT dicLimit,
if (p->state == LZMA2_STATE_DATA)
{
Bool initDic = (p->control == LZMA2_CONTROL_COPY_RESET_DIC);
BoolInt initDic = (p->control == LZMA2_CONTROL_COPY_RESET_DIC);
LzmaDec_InitDicAndState(&p->decoder, initDic, False);
}
@ -254,8 +254,8 @@ SRes Lzma2Dec_DecodeToDic(CLzma2Dec *p, SizeT dicLimit,
if (p->state == LZMA2_STATE_DATA)
{
Bool initDic = (p->control >= 0xE0);
Bool initState = (p->control >= 0xA0);
BoolInt initDic = (p->control >= 0xE0);
BoolInt initState = (p->control >= 0xA0);
LzmaDec_InitDicAndState(&p->decoder, initDic, initState);
p->state = LZMA2_STATE_DATA_CONT;
}
@ -314,15 +314,15 @@ ELzma2ParseStatus Lzma2Dec_Parse(CLzma2Dec *p,
while (p->state != LZMA2_STATE_ERROR)
{
if (p->state == LZMA2_STATE_FINISHED)
return LZMA_STATUS_FINISHED_WITH_MARK;
return (ELzma2ParseStatus)LZMA_STATUS_FINISHED_WITH_MARK;
if (outSize == 0 && !checkFinishBlock)
return LZMA_STATUS_NOT_FINISHED;
return (ELzma2ParseStatus)LZMA_STATUS_NOT_FINISHED;
if (p->state != LZMA2_STATE_DATA && p->state != LZMA2_STATE_DATA_CONT)
{
if (*srcLen == inSize)
return LZMA_STATUS_NEEDS_MORE_INPUT;
return (ELzma2ParseStatus)LZMA_STATUS_NEEDS_MORE_INPUT;
(*srcLen)++;
p->state = Lzma2Dec_UpdateState(p, *src++);
@ -344,7 +344,7 @@ ELzma2ParseStatus Lzma2Dec_Parse(CLzma2Dec *p,
// checkFinishBlock is true. So we expect that block must be finished,
// We can return LZMA_STATUS_NOT_SPECIFIED or LZMA_STATUS_NOT_FINISHED here
// break;
return LZMA_STATUS_NOT_FINISHED;
return (ELzma2ParseStatus)LZMA_STATUS_NOT_FINISHED;
}
if (p->state == LZMA2_STATE_DATA)
@ -354,7 +354,7 @@ ELzma2ParseStatus Lzma2Dec_Parse(CLzma2Dec *p,
}
if (outSize == 0)
return LZMA_STATUS_NOT_FINISHED;
return (ELzma2ParseStatus)LZMA_STATUS_NOT_FINISHED;
{
SizeT inCur = inSize - *srcLen;
@ -362,7 +362,7 @@ ELzma2ParseStatus Lzma2Dec_Parse(CLzma2Dec *p,
if (LZMA2_IS_UNCOMPRESSED_STATE(p))
{
if (inCur == 0)
return LZMA_STATUS_NEEDS_MORE_INPUT;
return (ELzma2ParseStatus)LZMA_STATUS_NEEDS_MORE_INPUT;
if (inCur > p->unpackSize)
inCur = p->unpackSize;
if (inCur > outSize)
@ -381,7 +381,7 @@ ELzma2ParseStatus Lzma2Dec_Parse(CLzma2Dec *p,
if (inCur == 0)
{
if (p->packSize != 0)
return LZMA_STATUS_NEEDS_MORE_INPUT;
return (ELzma2ParseStatus)LZMA_STATUS_NEEDS_MORE_INPUT;
}
else if (p->state == LZMA2_STATE_DATA)
{
@ -418,7 +418,7 @@ ELzma2ParseStatus Lzma2Dec_Parse(CLzma2Dec *p,
}
p->state = LZMA2_STATE_ERROR;
return LZMA_STATUS_NOT_SPECIFIED;
return (ELzma2ParseStatus)LZMA_STATUS_NOT_SPECIFIED;
}

@ -1,5 +1,5 @@
/* Lzma2DecMt.c -- LZMA2 Decoder Multi-thread
2018-03-02 : Igor Pavlov : Public domain */
2019-02-02 : Igor Pavlov : Public domain */
#include "Precomp.h"
@ -94,13 +94,13 @@ typedef struct
ISeqOutStream *outStream;
ICompressProgress *progress;
Bool finishMode;
Bool outSize_Defined;
BoolInt finishMode;
BoolInt outSize_Defined;
UInt64 outSize;
UInt64 outProcessed;
UInt64 inProcessed;
Bool readWasFinished;
BoolInt readWasFinished;
SRes readRes;
Byte *inBuf;
@ -113,7 +113,7 @@ typedef struct
#ifndef _7ZIP_ST
UInt64 outProcessed_Parse;
Bool mtc_WasConstructed;
BoolInt mtc_WasConstructed;
CMtDec mtc;
CLzma2DecMtThread coders[MTDEC__THREADS_MAX];
#endif
@ -265,7 +265,7 @@ static void Lzma2DecMt_MtCallback_Parse(void *obj, unsigned coderIndex, CMtDecCa
t->outPreSize = 0;
// t->blockWasFinished = False;
// t->finishedWithMark = False;
t->parseStatus = LZMA_STATUS_NOT_SPECIFIED;
t->parseStatus = (ELzma2ParseStatus)LZMA_STATUS_NOT_SPECIFIED;
t->state = MTDEC_PARSE_CONTINUE;
t->inCodeSize = 0;
@ -277,7 +277,7 @@ static void Lzma2DecMt_MtCallback_Parse(void *obj, unsigned coderIndex, CMtDecCa
{
ELzma2ParseStatus status;
Bool overflow;
BoolInt overflow;
UInt32 unpackRem = 0;
int checkFinishBlock = True;
@ -477,7 +477,7 @@ static SRes Lzma2DecMt_MtCallback_Code(void *pp, unsigned coderIndex,
{
ELzmaStatus status;
size_t srcProcessed = srcSize;
Bool blockWasFinished =
BoolInt blockWasFinished =
((int)t->parseStatus == LZMA_STATUS_FINISHED_WITH_MARK
|| t->parseStatus == LZMA2_PARSE_STATUS_NEW_BLOCK);
@ -526,15 +526,15 @@ static SRes Lzma2DecMt_MtCallback_Code(void *pp, unsigned coderIndex,
#define LZMA2DECMT_STREAM_WRITE_STEP (1 << 24)
static SRes Lzma2DecMt_MtCallback_Write(void *pp, unsigned coderIndex,
Bool needWriteToStream,
BoolInt needWriteToStream,
const Byte *src, size_t srcSize,
Bool *needContinue, Bool *canRecode)
BoolInt *needContinue, BoolInt *canRecode)
{
CLzma2DecMt *me = (CLzma2DecMt *)pp;
const CLzma2DecMtThread *t = &me->coders[coderIndex];
size_t size = t->outCodeSize;
const Byte *data = t->outBuf;
Bool needContinue2 = True;
BoolInt needContinue2 = True;
PRF_STR_INT_2("Write", coderIndex, srcSize);
@ -633,7 +633,7 @@ static SRes Lzma2Dec_Prepare_ST(CLzma2DecMt *p)
static SRes Lzma2Dec_Decode_ST(CLzma2DecMt *p
#ifndef _7ZIP_ST
, Bool tMode
, BoolInt tMode
#endif
)
{
@ -674,8 +674,8 @@ static SRes Lzma2Dec_Decode_ST(CLzma2DecMt *p
SRes res;
SizeT outProcessed;
Bool outFinished;
Bool needStop;
BoolInt outFinished;
BoolInt needStop;
if (inPos == inLim)
{
@ -810,7 +810,7 @@ SRes Lzma2DecMt_Decode(CLzma2DecMtHandle pp,
{
CLzma2DecMt *p = (CLzma2DecMt *)pp;
#ifndef _7ZIP_ST
Bool tMode;
BoolInt tMode;
#endif
*inProcessed = 0;
@ -903,7 +903,7 @@ SRes Lzma2DecMt_Decode(CLzma2DecMtHandle pp,
vt.Write = Lzma2DecMt_MtCallback_Write;
{
Bool needContinue = False;
BoolInt needContinue = False;
SRes res = MtDec_Code(&p->mtc);

@ -1,5 +1,5 @@
/* Lzma2Enc.c -- LZMA2 Encoder
2018-04-27 : Igor Pavlov : Public domain */
2018-07-04 : Igor Pavlov : Public domain */
#include "Precomp.h"
@ -115,7 +115,7 @@ SRes LzmaEnc_PrepareForLzma2(CLzmaEncHandle pp, ISeqInStream *inStream, UInt32 k
ISzAllocPtr alloc, ISzAllocPtr allocBig);
SRes LzmaEnc_MemPrepare(CLzmaEncHandle pp, const Byte *src, SizeT srcLen,
UInt32 keepWindowSize, ISzAllocPtr alloc, ISzAllocPtr allocBig);
SRes LzmaEnc_CodeOneMemBlock(CLzmaEncHandle pp, Bool reInit,
SRes LzmaEnc_CodeOneMemBlock(CLzmaEncHandle pp, BoolInt reInit,
Byte *dest, size_t *destLen, UInt32 desiredPackSize, UInt32 *unpackSize);
const Byte *LzmaEnc_GetCurBuf(CLzmaEncHandle pp);
void LzmaEnc_Finish(CLzmaEncHandle pp);
@ -133,7 +133,7 @@ static SRes Lzma2EncInt_EncodeSubblock(CLzma2EncInt *p, Byte *outBuf,
size_t packSize = packSizeLimit;
UInt32 unpackSize = LZMA2_UNPACK_SIZE_MAX;
unsigned lzHeaderSize = 5 + (p->needInitProp ? 1 : 0);
Bool useCopyBlock;
BoolInt useCopyBlock;
SRes res;
*packSizeRes = 0;
@ -373,7 +373,7 @@ typedef struct
size_t outBufSize; /* size of allocated outBufs[i] */
size_t outBufsDataSizes[MTCODER__BLOCKS_MAX];
Bool mtCoder_WasConstructed;
BoolInt mtCoder_WasConstructed;
CMtCoder mtCoder;
Byte *outBufs[MTCODER__BLOCKS_MAX];

@ -1,5 +1,5 @@
/* Lzma86Enc.c -- LZMA + x86 (BCJ) Filter Encoder
2016-05-16 : Igor Pavlov : Public domain */
2018-07-04 : Igor Pavlov : Public domain */
#include "Precomp.h"
@ -18,7 +18,7 @@ int Lzma86_Encode(Byte *dest, size_t *destLen, const Byte *src, size_t srcLen,
{
size_t outSize2 = *destLen;
Byte *filteredStream;
Bool useFilter;
BoolInt useFilter;
int mainResult = SZ_ERROR_OUTPUT_EOF;
CLzmaEncProps props;
LzmaEncProps_Init(&props);
@ -56,7 +56,7 @@ int Lzma86_Encode(Byte *dest, size_t *destLen, const Byte *src, size_t srcLen,
{
size_t minSize = 0;
Bool bestIsFiltered = False;
BoolInt bestIsFiltered = False;
/* passes for SZ_FILTER_AUTO:
0 - BCJ + LZMA
@ -71,7 +71,7 @@ int Lzma86_Encode(Byte *dest, size_t *destLen, const Byte *src, size_t srcLen,
size_t outSizeProcessed = outSize2 - LZMA86_HEADER_SIZE;
size_t outPropsSize = 5;
SRes curRes;
Bool curModeIsFiltered = (numPasses > 1 && i == numPasses - 1);
BoolInt curModeIsFiltered = (numPasses > 1 && i == numPasses - 1);
if (curModeIsFiltered && !bestIsFiltered)
break;
if (useFilter && i == 0)

@ -1,13 +1,13 @@
/* LzmaDec.c -- LZMA Decoder
2018-02-28 : Igor Pavlov : Public domain */
2018-07-04 : Igor Pavlov : Public domain */
#include "Precomp.h"
#include <string.h>
/* #include "CpuArch.h" */
#include "LzmaDec.h"
#include <string.h>
#define kNumTopBits 24
#define kTopValue ((UInt32)1 << kNumTopBits)
@ -19,7 +19,7 @@
#define NORMALIZE if (range < kTopValue) { range <<= 8; code = (code << 8) | (*buf++); }
#define IF_BIT_0(p) ttt = *(p); NORMALIZE; bound = (range >> kNumBitModelTotalBits) * ttt; if (code < bound)
#define IF_BIT_0(p) ttt = *(p); NORMALIZE; bound = (range >> kNumBitModelTotalBits) * (UInt32)ttt; if (code < bound)
#define UPDATE_0(p) range = bound; *(p) = (CLzmaProb)(ttt + ((kBitModelTotal - ttt) >> kNumMoveBits));
#define UPDATE_1(p) range -= bound; code -= bound; *(p) = (CLzmaProb)(ttt - (ttt >> kNumMoveBits));
#define GET_BIT2(p, i, A0, A1) IF_BIT_0(p) \
@ -66,7 +66,7 @@
#define NORMALIZE_CHECK if (range < kTopValue) { if (buf >= bufLimit) return DUMMY_ERROR; range <<= 8; code = (code << 8) | (*buf++); }
#define IF_BIT_0_CHECK(p) ttt = *(p); NORMALIZE_CHECK; bound = (range >> kNumBitModelTotalBits) * ttt; if (code < bound)
#define IF_BIT_0_CHECK(p) ttt = *(p); NORMALIZE_CHECK; bound = (range >> kNumBitModelTotalBits) * (UInt32)ttt; if (code < bound)
#define UPDATE_0_CHECK range = bound;
#define UPDATE_1_CHECK range -= bound; code -= bound;
#define GET_BIT2_CHECK(p, i, A0, A1) IF_BIT_0_CHECK(p) \
@ -539,7 +539,7 @@ int MY_FAST_CALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit
curLen = ((rem < len) ? (unsigned)rem : len);
pos = dicPos - rep0 + (dicPos < rep0 ? dicBufSize : 0);
processedPos += curLen;
processedPos += (UInt32)curLen;
len -= curLen;
if (curLen <= dicBufSize - pos)
@ -547,7 +547,7 @@ int MY_FAST_CALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit
Byte *dest = dic + dicPos;
ptrdiff_t src = (ptrdiff_t)pos - (ptrdiff_t)dicPos;
const Byte *lim = dest + curLen;
dicPos += curLen;
dicPos += (SizeT)curLen;
do
*(dest) = (Byte)*(dest + src);
while (++dest != lim);
@ -572,14 +572,14 @@ int MY_FAST_CALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit
p->buf = buf;
p->range = range;
p->code = code;
p->remainLen = len;
p->remainLen = (UInt32)len;
p->dicPos = dicPos;
p->processedPos = processedPos;
p->reps[0] = rep0;
p->reps[1] = rep1;
p->reps[2] = rep2;
p->reps[3] = rep3;
p->state = state;
p->state = (UInt32)state;
return SZ_OK;
}
@ -601,8 +601,8 @@ static void MY_FAST_CALL LzmaDec_WriteRem(CLzmaDec *p, SizeT limit)
if (p->checkDicSize == 0 && p->prop.dicSize - p->processedPos <= len)
p->checkDicSize = p->prop.dicSize;
p->processedPos += len;
p->remainLen -= len;
p->processedPos += (UInt32)len;
p->remainLen -= (UInt32)len;
while (len != 0)
{
len--;
@ -850,7 +850,7 @@ static ELzmaDummy LzmaDec_TryDummy(const CLzmaDec *p, const Byte *buf, SizeT inS
}
void LzmaDec_InitDicAndState(CLzmaDec *p, Bool initDic, Bool initState)
void LzmaDec_InitDicAndState(CLzmaDec *p, BoolInt initDic, BoolInt initState)
{
p->remainLen = kMatchSpecLenStart + 1;
p->tempBufSize = 0;
@ -979,10 +979,10 @@ SRes LzmaDec_DecodeToDic(CLzmaDec *p, SizeT dicLimit, const Byte *src, SizeT *sr
p->tempBufSize = rem;
if (rem < LZMA_REQUIRED_INPUT_MAX || checkEndMarkNow)
{
int dummyRes = LzmaDec_TryDummy(p, p->tempBuf, rem);
int dummyRes = LzmaDec_TryDummy(p, p->tempBuf, (SizeT)rem);
if (dummyRes == DUMMY_ERROR)
{
(*srcLen) += lookAhead;
(*srcLen) += (SizeT)lookAhead;
*status = LZMA_STATUS_NEEDS_MORE_INPUT;
return SZ_OK;
}
@ -1005,9 +1005,9 @@ SRes LzmaDec_DecodeToDic(CLzmaDec *p, SizeT dicLimit, const Byte *src, SizeT *sr
return SZ_ERROR_FAIL; /* some internal error */
lookAhead -= rem;
}
(*srcLen) += lookAhead;
(*srcLen) += (SizeT)lookAhead;
src += lookAhead;
inSize -= lookAhead;
inSize -= (SizeT)lookAhead;
p->tempBufSize = 0;
}
}

File diff suppressed because it is too large Load Diff

@ -1,5 +1,5 @@
/* MtCoder.c -- Multi-thread Coder
2018-02-21 : Igor Pavlov : Public domain */
2018-07-04 : Igor Pavlov : Public domain */
#include "Precomp.h"
@ -119,7 +119,7 @@ static SRes ThreadFunc2(CMtCoderThread *t)
unsigned bi;
SRes res;
SRes res2;
Bool finished;
BoolInt finished;
unsigned bufIndex;
size_t size;
const Byte *inData;
@ -294,7 +294,7 @@ static SRes ThreadFunc2(CMtCoderThread *t)
if (++wi >= mtc->numBlocksMax)
wi = 0;
{
Bool isReady;
BoolInt isReady;
CriticalSection_Enter(&mtc->cs);
@ -547,7 +547,7 @@ SRes MtCoder_Code(CMtCoder *p)
{
const CMtCoderBlock *block = &p->blocks[bi];
unsigned bufIndex = block->bufIndex;
Bool finished = block->finished;
BoolInt finished = block->finished;
if (res == SZ_OK && block->res != SZ_OK)
res = block->res;

@ -1,5 +1,5 @@
/* MtCoder.h -- Multi-thread Coder
2018-02-21 : Igor Pavlov : Public domain */
2018-07-04 : Igor Pavlov : Public domain */
#ifndef __MT_CODER_H
#define __MT_CODER_H
@ -67,7 +67,7 @@ typedef struct
{
SRes res;
unsigned bufIndex;
Bool finished;
BoolInt finished;
} CMtCoderBlock;
@ -97,7 +97,7 @@ typedef struct _CMtCoder
CAutoResetEvent readEvent;
CSemaphore blocksSemaphore;
Bool stopReading;
BoolInt stopReading;
SRes readRes;
#ifdef MTCODER__USE_WRITE_THREAD

@ -1,5 +1,5 @@
/* MtDec.c -- Multi-thread Decoder
2018-03-02 : Igor Pavlov : Public domain */
2019-02-02 : Igor Pavlov : Public domain */
#include "Precomp.h"
@ -88,12 +88,13 @@ static WRes ArEvent_OptCreate_And_Reset(CEvent *p)
}
typedef struct
struct __CMtDecBufLink
{
void *next;
struct __CMtDecBufLink *next;
void *pad[3];
} CMtDecBufLink;
};
typedef struct __CMtDecBufLink CMtDecBufLink;
#define MTDEC__LINK_DATA_OFFSET sizeof(CMtDecBufLink)
#define MTDEC__DATA_PTR_FROM_LINK(link) ((Byte *)(link) + MTDEC__LINK_DATA_OFFSET)
@ -197,7 +198,7 @@ static SRes FullRead(ISeqInStream *stream, Byte *data, size_t *processedSize)
}
static SRes MtDec_GetError_Spec(CMtDec *p, UInt64 interruptIndex, Bool *wasInterrupted)
static SRes MtDec_GetError_Spec(CMtDec *p, UInt64 interruptIndex, BoolInt *wasInterrupted)
{
SRes res;
CriticalSection_Enter(&p->mtProgress.cs);
@ -207,7 +208,7 @@ static SRes MtDec_GetError_Spec(CMtDec *p, UInt64 interruptIndex, Bool *wasInter
return res;
}
static SRes MtDec_Progress_GetError_Spec(CMtDec *p, UInt64 inSize, UInt64 outSize, UInt64 interruptIndex, Bool *wasInterrupted)
static SRes MtDec_Progress_GetError_Spec(CMtDec *p, UInt64 inSize, UInt64 outSize, UInt64 interruptIndex, BoolInt *wasInterrupted)
{
SRes res;
CriticalSection_Enter(&p->mtProgress.cs);
@ -271,9 +272,9 @@ static WRes ThreadFunc2(CMtDecThread *t)
for (;;)
{
SRes res, codeRes;
Bool wasInterrupted, isAllocError, overflow, finish;
BoolInt wasInterrupted, isAllocError, overflow, finish;
SRes threadingErrorSRes;
Bool needCode, needWrite, needContinue;
BoolInt needCode, needWrite, needContinue;
size_t inDataSize_Start;
UInt64 inDataSize;
@ -289,7 +290,7 @@ static WRes ThreadFunc2(CMtDecThread *t)
Byte *afterEndData = NULL;
size_t afterEndData_Size = 0;
Bool canCreateNewThread = False;
BoolInt canCreateNewThread = False;
// CMtDecCallbackInfo parse;
CMtDecThread *nextThread;
@ -629,7 +630,7 @@ static WRes ThreadFunc2(CMtDecThread *t)
if (res == SZ_OK && needCode && codeRes == SZ_OK)
{
Bool isStartBlock = True;
BoolInt isStartBlock = True;
CMtDecBufLink *link = (CMtDecBufLink *)t->inBuf;
for (;;)
@ -691,9 +692,9 @@ static WRes ThreadFunc2(CMtDecThread *t)
RINOK_THREAD(Event_Wait(&t->canWrite));
{
Bool isErrorMode = False;
Bool canRecode = True;
Bool needWriteToStream = needWrite;
BoolInt isErrorMode = False;
BoolInt canRecode = True;
BoolInt needWriteToStream = needWrite;
if (p->exitThread) return 0; // it's never executed in normal cases

@ -1,5 +1,5 @@
/* MtDec.h -- Multi-thread Decoder
2018-03-02 : Igor Pavlov : Public domain */
2018-07-04 : Igor Pavlov : Public domain */
#ifndef __MT_DEC_H
#define __MT_DEC_H
@ -76,7 +76,7 @@ typedef struct
// out
EMtDecParseState state;
Bool canCreateNewThread;
BoolInt canCreateNewThread;
UInt64 outPos; // check it (size_t)
} CMtDecCallbackInfo;
@ -107,11 +107,11 @@ typedef struct
if (*canRecode), we didn't flush current block data, so we still can decode current block later.
*/
SRes (*Write)(void *p, unsigned coderIndex,
Bool needWriteToStream,
BoolInt needWriteToStream,
const Byte *src, size_t srcSize,
// int srcFinished,
Bool *needContinue,
Bool *canRecode);
BoolInt *needContinue,
BoolInt *canRecode);
} IMtDecCallback;
@ -140,22 +140,22 @@ typedef struct _CMtDec
size_t allocatedBufsSize;
Bool exitThread;
BoolInt exitThread;
WRes exitThreadWRes;
UInt64 blockIndex;
Bool isAllocError;
Bool overflow;
BoolInt isAllocError;
BoolInt overflow;
SRes threadingErrorSRes;
Bool needContinue;
BoolInt needContinue;
// CAutoResetEvent finishedEvent;
SRes readRes;
SRes codeRes;
Bool wasInterrupted;
BoolInt wasInterrupted;
unsigned numStartedThreads_Limit;
unsigned numStartedThreads;
@ -164,14 +164,14 @@ typedef struct _CMtDec
size_t crossStart;
size_t crossEnd;
UInt64 readProcessed;
Bool readWasFinished;
BoolInt readWasFinished;
UInt64 inProcessed;
unsigned filledThreadStart;
unsigned numFilledThreads;
#ifndef _7ZIP_ST
Bool needInterrupt;
BoolInt needInterrupt;
UInt64 interruptIndex;
CMtProgress mtProgress;
CMtDecThread threads[MTDEC__THREADS_MAX];

@ -1,5 +1,5 @@
/* Ppmd7.c -- PPMdH codec
2017-04-03 : Igor Pavlov : Public domain
2018-07-04 : Igor Pavlov : Public domain
This code is based on PPMd var.H (2001): Dmitry Shkarin : Public domain */
#include "Precomp.h"
@ -95,7 +95,7 @@ void Ppmd7_Free(CPpmd7 *p, ISzAllocPtr alloc)
p->Base = 0;
}
Bool Ppmd7_Alloc(CPpmd7 *p, UInt32 size, ISzAllocPtr alloc)
BoolInt Ppmd7_Alloc(CPpmd7 *p, UInt32 size, ISzAllocPtr alloc)
{
if (!p->Base || p->Size != size)
{
@ -342,7 +342,7 @@ void Ppmd7_Init(CPpmd7 *p, unsigned maxOrder)
p->DummySee.Count = 64; /* unused */
}
static CTX_PTR CreateSuccessors(CPpmd7 *p, Bool skip)
static CTX_PTR CreateSuccessors(CPpmd7 *p, BoolInt skip)
{
CPpmd_State upState;
CTX_PTR c = p->MinContext;

@ -1,5 +1,5 @@
/* Ppmd7.h -- PPMdH compression codec
2017-04-03 : Igor Pavlov : Public domain
2018-07-04 : Igor Pavlov : Public domain
This code is based on PPMd var.H (2001): Dmitry Shkarin : Public domain */
/* This code supports virtual RangeDecoder and includes the implementation
@ -60,7 +60,7 @@ typedef struct
} CPpmd7;
void Ppmd7_Construct(CPpmd7 *p);
Bool Ppmd7_Alloc(CPpmd7 *p, UInt32 size, ISzAllocPtr alloc);
BoolInt Ppmd7_Alloc(CPpmd7 *p, UInt32 size, ISzAllocPtr alloc);
void Ppmd7_Free(CPpmd7 *p, ISzAllocPtr alloc);
void Ppmd7_Init(CPpmd7 *p, unsigned maxOrder);
#define Ppmd7_WasAllocated(p) ((p)->Base != NULL)
@ -115,7 +115,7 @@ typedef struct
} CPpmd7z_RangeDec;
void Ppmd7z_RangeDec_CreateVTable(CPpmd7z_RangeDec *p);
Bool Ppmd7z_RangeDec_Init(CPpmd7z_RangeDec *p);
BoolInt Ppmd7z_RangeDec_Init(CPpmd7z_RangeDec *p);
#define Ppmd7z_RangeDec_IsFinishedOK(p) ((p)->Code == 0)
int Ppmd7_DecodeSymbol(CPpmd7 *p, const IPpmd7_RangeDec *rc);

@ -1,5 +1,5 @@
/* Ppmd7Dec.c -- PPMdH Decoder
2017-04-03 : Igor Pavlov : Public domain
2018-07-04 : Igor Pavlov : Public domain
This code is based on PPMd var.H (2001): Dmitry Shkarin : Public domain */
#include "Precomp.h"
@ -8,7 +8,7 @@ This code is based on PPMd var.H (2001): Dmitry Shkarin : Public domain */
#define kTopValue (1 << 24)
Bool Ppmd7z_RangeDec_Init(CPpmd7z_RangeDec *p)
BoolInt Ppmd7z_RangeDec_Init(CPpmd7z_RangeDec *p)
{
unsigned i;
p->Code = 0;

@ -1,5 +1,5 @@
/* 7zMain.c - Test application for 7z Decoder
2018-04-19 : Igor Pavlov : Public domain */
2019-02-02 : Igor Pavlov : Public domain */
#include "Precomp.h"
@ -176,7 +176,7 @@ static SRes Utf16_To_Char(CBuf *buf, const UInt16 *s
char defaultChar = '_';
BOOL defUsed;
unsigned numChars = 0;
numChars = WideCharToMultiByte(codePage, 0, s, len, (char *)buf->data, size, &defaultChar, &defUsed);
numChars = WideCharToMultiByte(codePage, 0, (LPCWSTR)s, len, (char *)buf->data, size, &defaultChar, &defUsed);
if (numChars == 0 || numChars >= size)
return SZ_ERROR_FAIL;
buf->data[numChars] = 0;
@ -202,7 +202,7 @@ static WRes MyCreateDir(const UInt16 *name)
{
#ifdef USE_WINDOWS_FILE
return CreateDirectoryW(name, NULL) ? 0 : GetLastError();
return CreateDirectoryW((LPCWSTR)name, NULL) ? 0 : GetLastError();
#else
@ -227,7 +227,7 @@ static WRes MyCreateDir(const UInt16 *name)
static WRes OutFile_OpenUtf16(CSzFile *p, const UInt16 *name)
{
#ifdef USE_WINDOWS_FILE
return OutFile_OpenW(p, name);
return OutFile_OpenW(p, (LPCWSTR)name);
#else
CBuf buf;
WRes res;
@ -354,7 +354,7 @@ static void PrintError(char *s)
PrintLF();
}
static void GetAttribString(UInt32 wa, Bool isDir, char *s)
static void GetAttribString(UInt32 wa, BoolInt isDir, char *s)
{
#ifdef USE_WINDOWS_FILE
s[0] = (char)(((wa & FILE_ATTRIBUTE_DIRECTORY) != 0 || isDir) ? 'D' : '.');
@ -430,7 +430,7 @@ int MY_CDECL main(int numargs, char *args[])
res = SZ_OK;
{
lookStream.buf = ISzAlloc_Alloc(&allocImp, kInputBufSize);
lookStream.buf = (Byte *)ISzAlloc_Alloc(&allocImp, kInputBufSize);
if (!lookStream.buf)
res = SZ_ERROR_MEM;
else
@ -647,7 +647,7 @@ int MY_CDECL main(int numargs, char *args[])
We remove posix bits, if we detect posix mode field */
if ((attrib & 0xF0000000) != 0)
attrib &= 0x7FFF;
SetFileAttributesW(destPath, attrib);
SetFileAttributesW((LPCWSTR)destPath, attrib);
}
#endif
}

@ -1,5 +1,5 @@
/* LzmaUtil.c -- Test application for LZMA compression
2017-04-27 : Igor Pavlov : Public domain */
2018-07-04 : Igor Pavlov : Public domain */
#include "../../Precomp.h"
@ -177,7 +177,7 @@ static int main2(int numArgs, const char *args[], char *rs)
char c;
int res;
int encodeMode;
Bool useOutFile = False;
BoolInt useOutFile = False;
FileSeqInStream_CreateVTable(&inStream);
File_Construct(&inStream.file);

@ -1,5 +1,5 @@
/* SfxSetup.c - 7z SFX Setup
2017-04-04 : Igor Pavlov : Public domain */
2019-02-02 : Igor Pavlov : Public domain */
#include "Precomp.h"
@ -127,7 +127,7 @@ static WRes MyCreateDir(const WCHAR *name)
#define kSignatureSearchLimit (1 << 22)
static Bool FindSignature(CSzFile *stream, UInt64 *resPos)
static BoolInt FindSignature(CSzFile *stream, UInt64 *resPos)
{
Byte buf[kBufferSize];
size_t numPrevBytes = 0;
@ -163,7 +163,7 @@ static Bool FindSignature(CSzFile *stream, UInt64 *resPos)
}
}
static Bool DoesFileOrDirExist(const WCHAR *path)
static BoolInt DoesFileOrDirExist(const WCHAR *path)
{
WIN32_FIND_DATAW fd;
HANDLE handle;
@ -254,7 +254,7 @@ int APIENTRY WinMain(HINSTANCE hInstance, HINSTANCE hPrevInstance,
DWORD winRes;
const wchar_t *cmdLineParams;
const char *errorMessage = NULL;
Bool useShellExecute = True;
BoolInt useShellExecute = True;
DWORD exitCode = 0;
LoadSecurityDlls();
@ -287,7 +287,7 @@ int APIENTRY WinMain(HINSTANCE hInstance, HINSTANCE hPrevInstance,
cmdLineParams = GetCommandLineW();
#ifndef UNDER_CE
{
Bool quoteMode = False;
BoolInt quoteMode = False;
for (;; cmdLineParams++)
{
wchar_t c = *cmdLineParams;
@ -379,7 +379,7 @@ int APIENTRY WinMain(HINSTANCE hInstance, HINSTANCE hPrevInstance,
if (res == SZ_OK)
{
lookStream.buf = ISzAlloc_Alloc(&allocImp, kInputBufSize);
lookStream.buf = (Byte *)ISzAlloc_Alloc(&allocImp, kInputBufSize);
if (!lookStream.buf)
res = SZ_ERROR_MEM;
else
@ -420,7 +420,7 @@ int APIENTRY WinMain(HINSTANCE hInstance, HINSTANCE hPrevInstance,
temp = path + pathLen;
SzArEx_GetFileNameUtf16(&db, i, temp);
SzArEx_GetFileNameUtf16(&db, i, (UInt16 *)temp);
{
res = SzArEx_Extract(&db, &lookStream.vt, i,
&blockIndex, &outBuffer, &outBufferSize,
@ -527,7 +527,7 @@ int APIENTRY WinMain(HINSTANCE hInstance, HINSTANCE hPrevInstance,
{
WCHAR *temp = path + pathLen;
UInt32 j;
SzArEx_GetFileNameUtf16(&db, executeFileIndex, temp);
SzArEx_GetFileNameUtf16(&db, executeFileIndex, (UInt16 *)temp);
for (j = 0; temp[j] != 0; j++)
if (temp[j] == '/')
temp[j] = CHAR_PATH_SEPARATOR;

@ -1,5 +1,5 @@
/* Xz.h - Xz interface
2018-02-28 : Igor Pavlov : Public domain */
2018-07-04 : Igor Pavlov : Public domain */
#ifndef __XZ_H
#define __XZ_H
@ -53,7 +53,7 @@ typedef struct
#define XzBlock_HasUnsupportedFlags(p) (((p)->flags & ~(XZ_BF_NUM_FILTERS_MASK | XZ_BF_PACK_SIZE | XZ_BF_UNPACK_SIZE)) != 0)
SRes XzBlock_Parse(CXzBlock *p, const Byte *header);
SRes XzBlock_ReadHeader(CXzBlock *p, ISeqInStream *inStream, Bool *isIndex, UInt32 *headerSizeRes);
SRes XzBlock_ReadHeader(CXzBlock *p, ISeqInStream *inStream, BoolInt *isIndex, UInt32 *headerSizeRes);
/* ---------- xz stream ---------- */
@ -186,10 +186,10 @@ typedef struct
Byte *outBuf;
size_t outBufSize;
size_t outWritten; // is equal to lzmaDecoder.dicPos (in outBuf mode)
Bool wasFinished;
BoolInt wasFinished;
SRes res;
ECoderStatus status;
// Bool SingleBufMode;
// BoolInt SingleBufMode;
int finished[MIXCODER_NUM_FILTERS_MAX - 1];
size_t pos[MIXCODER_NUM_FILTERS_MAX - 1];
@ -241,9 +241,9 @@ typedef struct
CXzCheck check;
CSha256 sha;
Bool parseMode;
Bool headerParsedOk;
Bool decodeToStreamSignature;
BoolInt parseMode;
BoolInt headerParsedOk;
BoolInt decodeToStreamSignature;
unsigned decodeOnlyOneBlock;
Byte *outBuf;
@ -335,7 +335,7 @@ SRes XzUnpacker_CodeFull(CXzUnpacker *p, Byte *dest, SizeT *destLen,
const Byte *src, SizeT *srcLen,
ECoderFinishMode finishMode, ECoderStatus *status);
Bool XzUnpacker_IsStreamWasFinished(const CXzUnpacker *p);
BoolInt XzUnpacker_IsStreamWasFinished(const CXzUnpacker *p);
/*
XzUnpacker_GetExtraSize() returns then number of uncofirmed bytes,
@ -365,7 +365,7 @@ UInt64 XzUnpacker_GetExtraSize(const CXzUnpacker *p);
*/
void XzUnpacker_PrepareToRandomBlockDecoding(CXzUnpacker *p);
Bool XzUnpacker_IsBlockFinished(const CXzUnpacker *p);
BoolInt XzUnpacker_IsBlockFinished(const CXzUnpacker *p);
#define XzUnpacker_GetPackSizeForIndex(p) ((p)->packSize + (p)->blockHeaderSize + XzFlags_GetCheckSize((p)->streamFlags))
@ -378,7 +378,7 @@ typedef struct
{
size_t inBufSize_ST;
size_t outStep_ST;
Bool ignoreErrors;
BoolInt ignoreErrors;
#ifndef _7ZIP_ST
unsigned numThreads;

@ -1,5 +1,5 @@
/* XzDec.c -- Xz Decode
2018-04-24 : Igor Pavlov : Public domain */
2019-02-02 : Igor Pavlov : Public domain */
#include "Precomp.h"
@ -245,7 +245,7 @@ SRes BraState_SetFromMethod(IStateCoder *p, UInt64 id, int encodeMode, ISzAllocP
CBraState *decoder;
if (id < XZ_ID_Delta || id > XZ_ID_SPARC)
return SZ_ERROR_UNSUPPORTED;
decoder = p->p;
decoder = (CBraState *)p->p;
if (!decoder)
{
decoder = (CBraState *)ISzAlloc_Alloc(alloc, sizeof(CBraState));
@ -341,7 +341,7 @@ static SRes SbState_SetFromMethod(IStateCoder *p, ISzAllocPtr alloc)
typedef struct
{
CLzma2Dec decoder;
Bool outBufMode;
BoolInt outBufMode;
} CLzma2Dec_Spec;
@ -400,7 +400,7 @@ static SRes Lzma2State_Code2(void *pp, Byte *dest, SizeT *destLen, const Byte *s
res = Lzma2Dec_DecodeToBuf(&spec->decoder, dest, destLen, src, srcLen, (ELzmaFinishMode)finishMode, &status2);
// *wasFinished = (status2 == LZMA_STATUS_FINISHED_WITH_MARK);
// ECoderStatus values are identical to ELzmaStatus values of LZMA2 decoder
*status = status2;
*status = (ECoderStatus)status2;
return res;
}
@ -637,8 +637,8 @@ static SRes MixCoder_Code(CMixCoder *p,
for (;;)
{
Bool processed = False;
Bool allFinished = True;
BoolInt processed = False;
BoolInt allFinished = True;
SRes resMain = SZ_OK;
unsigned i;
@ -761,7 +761,7 @@ SRes Xz_ParseHeader(CXzStreamFlags *p, const Byte *buf)
return XzFlags_IsSupported(*p) ? SZ_OK : SZ_ERROR_UNSUPPORTED;
}
static Bool Xz_CheckFooter(CXzStreamFlags flags, UInt64 indexSize, const Byte *buf)
static BoolInt Xz_CheckFooter(CXzStreamFlags flags, UInt64 indexSize, const Byte *buf)
{
return indexSize == (((UInt64)GetUi32(buf + 4) + 1) << 2)
&& GetUi32(buf) == CrcCalc(buf + 4, 6)
@ -775,7 +775,7 @@ static Bool Xz_CheckFooter(CXzStreamFlags flags, UInt64 indexSize, const Byte *b
if (s == 0) return SZ_ERROR_ARCHIVE; pos += s; }
static Bool XzBlock_AreSupportedFilters(const CXzBlock *p)
static BoolInt XzBlock_AreSupportedFilters(const CXzBlock *p)
{
unsigned numFilters = XzBlock_GetNumFilters(p) - 1;
unsigned i;
@ -866,7 +866,7 @@ SRes XzBlock_Parse(CXzBlock *p, const Byte *header)
static SRes XzDecMix_Init(CMixCoder *p, const CXzBlock *block, Byte *outBuf, size_t outBufSize)
{
unsigned i;
Bool needReInit = True;
BoolInt needReInit = True;
unsigned numFilters = XzBlock_GetNumFilters(block);
if (numFilters == p->numCoders && ((p->outBuf && outBuf) || (!p->outBuf && !outBuf)))
@ -999,8 +999,8 @@ SRes XzUnpacker_Code(CXzUnpacker *p, Byte *dest, SizeT *destLen,
SRes res;
ECoderFinishMode finishMode2 = finishMode;
Bool srcFinished2 = srcFinished;
Bool destFinish = False;
BoolInt srcFinished2 = srcFinished;
BoolInt destFinish = False;
if (p->block.packSize != (UInt64)(Int64)-1)
{
@ -1346,12 +1346,12 @@ SRes XzUnpacker_CodeFull(CXzUnpacker *p, Byte *dest, SizeT *destLen,
}
Bool XzUnpacker_IsBlockFinished(const CXzUnpacker *p)
BoolInt XzUnpacker_IsBlockFinished(const CXzUnpacker *p)
{
return (p->state == XZ_STATE_BLOCK_HEADER) && (p->pos == 0);
}
Bool XzUnpacker_IsStreamWasFinished(const CXzUnpacker *p)
BoolInt XzUnpacker_IsStreamWasFinished(const CXzUnpacker *p)
{
return (p->state == XZ_STATE_STREAM_PADDING) && (((UInt32)p->padSize & 3) == 0);
}
@ -1423,18 +1423,18 @@ typedef struct
size_t outCodeSize;
ECoderStatus status;
SRes codeRes;
Bool skipMode;
// Bool finishedWithMark;
BoolInt skipMode;
// BoolInt finishedWithMark;
EMtDecParseState parseState;
Bool parsing_Truncated;
Bool atBlockHeader;
BoolInt parsing_Truncated;
BoolInt atBlockHeader;
CXzStreamFlags streamFlags;
// UInt64 numFinishedStreams
UInt64 numStreams;
UInt64 numTotalBlocks;
UInt64 numBlocks;
Bool dec_created;
BoolInt dec_created;
CXzUnpacker dec;
Byte mtPad[1 << 7];
@ -1458,14 +1458,14 @@ typedef struct
ICompressProgress *progress;
// CXzStatInfo *stat;
Bool finishMode;
Bool outSize_Defined;
BoolInt finishMode;
BoolInt outSize_Defined;
UInt64 outSize;
UInt64 outProcessed;
UInt64 inProcessed;
UInt64 readProcessed;
Bool readWasFinished;
BoolInt readWasFinished;
SRes readRes;
SRes writeRes;
@ -1473,14 +1473,14 @@ typedef struct
size_t outBufSize;
Byte *inBuf;
size_t inBufSize;
Bool dec_created;
CXzUnpacker dec;
ECoderStatus status;
SRes codeRes;
#ifndef _7ZIP_ST
Bool mainDecoderWasCalled;
BoolInt mainDecoderWasCalled;
// int statErrorDefined;
int finishedDecoderIndex;
@ -1494,12 +1494,12 @@ typedef struct
// UInt64 numBadBlocks;
SRes mainErrorCode;
Bool isBlockHeaderState_Parse;
Bool isBlockHeaderState_Write;
BoolInt isBlockHeaderState_Parse;
BoolInt isBlockHeaderState_Write;
UInt64 outProcessed_Parse;
Bool parsing_Truncated;
BoolInt parsing_Truncated;
Bool mtc_WasConstructed;
BoolInt mtc_WasConstructed;
CMtDec mtc;
CXzDecMtThread coders[MTDEC__THREADS_MAX];
#endif
@ -1525,7 +1525,8 @@ CXzDecMtHandle XzDecMt_Create(ISzAllocPtr alloc, ISzAllocPtr allocMid)
p->outBufSize = 0;
p->inBuf = NULL;
p->inBufSize = 0;
p->dec_created = False;
XzUnpacker_Construct(&p->dec, &p->alignOffsetAlloc.vt);
p->unpackBlockMaxSize = 0;
@ -1573,11 +1574,7 @@ static void XzDecMt_FreeOutBufs(CXzDecMt *p)
static void XzDecMt_FreeSt(CXzDecMt *p)
{
if (p->dec_created)
{
XzUnpacker_Free(&p->dec);
p->dec_created = False;
}
XzUnpacker_Free(&p->dec);
if (p->outBuf)
{
@ -1968,11 +1965,11 @@ static SRes XzDecMt_Callback_Code(void *pp, unsigned coderIndex,
#define XZDECMT_STREAM_WRITE_STEP (1 << 24)
static SRes XzDecMt_Callback_Write(void *pp, unsigned coderIndex,
Bool needWriteToStream,
BoolInt needWriteToStream,
const Byte *src, size_t srcSize,
// int srcFinished,
Bool *needContinue,
Bool *canRecode)
BoolInt *needContinue,
BoolInt *canRecode)
{
CXzDecMt *me = (CXzDecMt *)pp;
const CXzDecMtThread *coder = &me->coders[coderIndex];
@ -2302,7 +2299,7 @@ void XzStatInfo_Clear(CXzStatInfo *p)
static SRes XzDecMt_Decode_ST(CXzDecMt *p
#ifndef _7ZIP_ST
, Bool tMode
, BoolInt tMode
#endif
, CXzStatInfo *stat)
{
@ -2358,7 +2355,7 @@ static SRes XzDecMt_Decode_ST(CXzDecMt *p
for (;;)
{
SizeT outSize;
Bool finished;
BoolInt finished;
ECoderFinishMode finishMode;
SizeT inProcessed;
ECoderStatus status;
@ -2466,7 +2463,7 @@ static SRes XzStatInfo_SetStat(const CXzUnpacker *dec,
int finishMode,
UInt64 readProcessed, UInt64 inProcessed,
SRes res, ECoderStatus status,
Bool decodingTruncated,
BoolInt decodingTruncated,
CXzStatInfo *stat)
{
UInt64 extraSize;
@ -2531,7 +2528,7 @@ SRes XzDecMt_Decode(CXzDecMtHandle pp,
{
CXzDecMt *p = (CXzDecMt *)pp;
#ifndef _7ZIP_ST
Bool tMode;
BoolInt tMode;
#endif
XzStatInfo_Clear(stat);
@ -2564,13 +2561,7 @@ SRes XzDecMt_Decode(CXzDecMtHandle pp,
p->codeRes = 0;
p->status = CODER_STATUS_NOT_SPECIFIED;
if (!p->dec_created)
{
XzUnpacker_Construct(&p->dec, &p->alignOffsetAlloc.vt);
p->dec_created = True;
}
XzUnpacker_Init(&p->dec);
*isMT = False;
@ -2600,6 +2591,8 @@ SRes XzDecMt_Decode(CXzDecMtHandle pp,
{
IMtDecCallback vt;
// we just free ST buffers here
// but we still keep state variables, that was set in XzUnpacker_Init()
XzDecMt_FreeSt(p);
p->outProcessed_Parse = 0;
@ -2636,7 +2629,7 @@ SRes XzDecMt_Decode(CXzDecMtHandle pp,
vt.Write = XzDecMt_Callback_Write;
{
Bool needContinue;
BoolInt needContinue;
SRes res = MtDec_Code(&p->mtc);
@ -2665,7 +2658,7 @@ SRes XzDecMt_Decode(CXzDecMtHandle pp,
if (!needContinue)
{
SRes codeRes;
Bool truncated = False;
BoolInt truncated = False;
ECoderStatus status;
CXzUnpacker *dec;

@ -1,5 +1,5 @@
/* XzEnc.c -- Xz Encode
2018-04-28 : Igor Pavlov : Public domain */
2019-02-02 : Igor Pavlov : Public domain */
#include "Precomp.h"
@ -366,7 +366,7 @@ static SRes SeqInFilter_Read(const ISeqInStream *pp, void *data, size_t *size)
SRes res;
*size = sizeOriginal;
res = p->StateCoder.Code2(p->StateCoder.p,
data, size,
(Byte *)data, size,
p->buf + p->curPos, &srcLen,
p->srcWasFinished, CODER_FINISH_ANY,
&status);
@ -814,7 +814,7 @@ static SRes Xz_CompressBlock(
SRes res;
Byte *outBuf = NULL;
size_t outSize = 0;
Bool useStream = (fp || inStream);
BoolInt useStream = (fp || inStream);
// useStream = True;
if (!useStream)
@ -940,7 +940,7 @@ typedef struct
#ifndef _7ZIP_ST
unsigned checkType;
ISeqOutStream *outStream;
Bool mtCoder_WasConstructed;
BoolInt mtCoder_WasConstructed;
CMtCoder mtCoder;
CXzEncBlockInfo EncBlocks[MTCODER__BLOCKS_MAX];
#endif

@ -1,5 +1,5 @@
/* XzIn.c - Xz input
2018-02-02 : Igor Pavlov : Public domain */
2018-07-04 : Igor Pavlov : Public domain */
#include "Precomp.h"
@ -28,7 +28,7 @@ SRes Xz_ReadHeader(CXzStreamFlags *p, ISeqInStream *inStream)
{ unsigned s = Xz_ReadVarInt(buf + pos, size - pos, res); \
if (s == 0) return SZ_ERROR_ARCHIVE; pos += s; }
SRes XzBlock_ReadHeader(CXzBlock *p, ISeqInStream *inStream, Bool *isIndex, UInt32 *headerSizeRes)
SRes XzBlock_ReadHeader(CXzBlock *p, ISeqInStream *inStream, BoolInt *isIndex, UInt32 *headerSizeRes)
{
Byte header[XZ_BLOCK_HEADER_SIZE_MAX];
unsigned headerSize;

@ -137,19 +137,23 @@ typedef struct _cl_image_desc {
size_t image_slice_pitch;
cl_uint num_mip_levels;
cl_uint num_samples;
#ifdef CL_VERSION_2_0
#ifdef __GNUC__
__extension__ /* Prevents warnings about anonymous union in -pedantic builds */
#endif
#ifdef _MSC_VER
#pragma warning( push )
#pragma warning( push )
#pragma warning( disable : 4201 ) /* Prevents warning about nameless struct/union in /W4 /Za builds */
#endif
union {
#endif
cl_mem buffer;
#ifdef CL_VERSION_2_0
cl_mem mem_object;
};
#ifdef _MSC_VER
#pragma warning( pop )
#pragma warning( pop )
#endif
#endif
} cl_image_desc;

@ -1,68 +1,95 @@
* changes v5.1.0 -> v5.x.x
* changes v5.1.0 -> v6.0.0
##
## Feature
##
- Fully modularized hash-mode integration via plugin interface and converted all existing hash-modes
- Refactor hashcat backend interface to allow adding compute API other than OpenCL
- Added CUDA as a new compute API to hashcat backend (enables hashcat to run on NVIDIA Jetson or IBM POWER9)
- Support use of all available GPU memory using CUDA backend
- Support on-the-fly loading of compressed wordlists in zip and gzip format
- Support for inline VeraCrypt PIM Brute-Force
- Support Deflate decompression for the 7-Zip hash type using zlib
- Support deflate decompression for the 7-Zip hash-mode using zlib hook
- Added documentation on hashcat brain, slow-candidate and keyboard-layout mapping features
##
## Algorithms
##
- Added hash-mode: Open Document Format (ODF) 1.2 (SHA-256, AES)
- Added hash-mode: Open Document Format (ODF) 1.1 (SHA-1, Blowfish)
- Added hash-mode: Java Object hashCode()
- Added hash-mode: Android Backup
- Added hash-mode: AuthMe sha256
- Added hash-mode: BitShares v0.x
- Added hash-mode: Blockchain, My Wallet, Second Password (SHA256)
- Added hash-mode: DiskCryptor
- Added hash-mode: Electrum Wallet (Salt-Type 3)
- Added hash-mode: Android Backup
- Added hash-mode: QNX /etc/shadow (MD5)
- Added hash-mode: QNX /etc/shadow (SHA256)
- Added hash-mode: QNX /etc/shadow (SHA512)
- Added hash-mode: Kerberos 5 TGS-REP etype 17 (AES128-CTS-HMAC-SHA1-96)
- Added hash-mode: Kerberos 5 TGS-REP etype 18 (AES256-CTS-HMAC-SHA1-96)
- Added hash-mode: Huawei Router sha1(md5($pass).$salt)
- Added hash-mode: Java Object hashCode()
- Added hash-mode: Kerberos 5 Pre-Auth etype 17 (AES128-CTS-HMAC-SHA1-96)
- Added hash-mode: Kerberos 5 Pre-Auth etype 18 (AES256-CTS-HMAC-SHA1-96)
- Added hash-mode: sha1(md5(md5($pass)))
- Added hash-mode: sha1($salt1.$pass.$salt2)
- Added hash-mode: Ruby on Rails Restful-Authentication
- Added hash-mode: Kerberos 5 TGS-REP etype 17 (AES128-CTS-HMAC-SHA1-96)
- Added hash-mode: Kerberos 5 TGS-REP etype 18 (AES256-CTS-HMAC-SHA1-96)
- Added hash-mode: Open Document Format (ODF) 1.1 (SHA-1, Blowfish)
- Added hash-mode: Open Document Format (ODF) 1.2 (SHA-256, AES)
- Added hash-mode: Oracle Transportation Management (SHA256)
- Added hash-mode: PKZIP archive encryption
- Added hash-mode: PKZIP Master Key
- Added hash-mode: DiskCryptor
- Added hash-mode: Python passlib pbkdf2-sha1
- Added hash-mode: Python passlib pbkdf2-sha256
- Added hash-mode: Python passlib pbkdf2-sha512
- Added hash-mode: Oracle Transportation Management (SHA256)
- Added hash-mode: QNX /etc/shadow (MD5)
- Added hash-mode: QNX /etc/shadow (SHA256)
- Added hash-mode: QNX /etc/shadow (SHA512)
- Added hash-mode: Ruby on Rails Restful-Authentication
- Added hash-mode: SolarWinds Orion
- Added hash-mode: md5($salt.sha1($salt.$pass))
- Added hash-mode: md5(sha1($pass).md5($pass).sha1($pass))
- Added hash-mode: md5(sha1($salt).md5($pass))
- Added hash-mode: sha1(md5(md5($pass)))
- Added hash-mode: sha1(md5($pass.$salt))
- Added hash-mode: sha1(md5($pass).$salt)
- Added hash-mode: sha1($salt1.$pass.$salt2)
- Added hash-mode: sha256(md5($pass))
- Added hash-mode: sha256(sha256_bin(pass))
- Added hash-mode: sha256(sha256($pass).$salt)
##
## Bugs
##
- Fixed buffer overflow in build_plain() function
- Fixed copy/paste error leading to invalid "Integer overflow detected in keyspace of mask" in attack-mode 6 and 7
- Fixed cracking of Cisco-PIX and Cisco-ASA MD5 passwords in mask-attack mode if mask > length 16
- Fixed cracking of Electrum Wallet Salt-Type 2 hashes
- Fixed cracking of NetNTLMv1 passwords in mask-attack mode if mask > length 16 (optimized kernels only)
- Fixed cracking raw Streebog-HMAC 256 and 512 hashes with password of length >= 64
- Fixed cracking raw Whirlpool hashes cracking with password of length >= 32
- Fixed incorrect progress-only result in a special race condition
- Fixed invalid call of mp_css_utf16le_expand()/mp_css_utf16be_expand() in a slow-candidate session
- Fixed invalid password truncation in attack-mode 1 if final password is longer than 32 character
- Fixed maximum password length limit which was announced as 256 but actually was 255
- Fixed output of IKE PSK (mode 5300 and 5400) hashes to have separators at right position
- Fixed output password of "e" rule in pure and cpu rule engine if separator character is also the first letter
- Fixed problem with the usage of the hexadecimal notations (\x00-\xff) within rules
- Fixed race condition in maskfile mode by using a dedicated flag for restore execution
- Fixed some memory leaks in case hashcat is shutting down due to some file error
- Fixed some memory leaks in case mask-files are used in optimized mode
- Fixed the 7-Zip parser to allow the entire supported range of encrypted and decrypted data lengths
- Fixed the validation of the --brain-client-features command line argument (only values 1, 2 or 3 are allowed)
- Fixed problem with the usage of the hexadecimal notations (\x00-\xff) within rules
##
## Improvements
##
- Startup Checks: Improved the pidfile check: Do not just check for existing PID but also check executable filename
- Cracking bcrypt and Password Safe v2: Use a feedback from the OpenCL runtime to dynamically find out optimal thread count
- Bitcoin Wallet: Be more user friendly by allowing a larger data range for ckey and public_key
- Building: Fix for library compilation failure due to multiple defenition of sbob_xx64()
- Building: Updated BUILD.md
- Cracking bcrypt and Password Safe v2: Use a feedback from the compute API backend to dynamically find out optimal thread count
- Documents: Added README on how to build hashcat on MSYS2
- Filehandling: Print a truncation warning in case an oversized line was detected
- My Wallet: Added additional plaintext pattern used in newer versions
- OpenCL Runtime: Disable OpenCL kernel cache on Apple for Intel CPU (throws CL_BUILD_PROGRAM_FAILURE for no reason)
- OpenCL Runtime: Do not run a shared- and constant-memory size check if their memory type is of type global memory (typically CPU)
- OpenCL Runtime: Improve ROCM detection and make sure to not confuse with recent AMDGPU drivers
- OpenCL Runtime: Not using amd_bytealign (amd_bitalign is fine) on AMDGPU driver drastically reduces JiT segfaults
- OpenCL Runtime: Unlocked maximum thread count
@ -71,25 +98,44 @@
- OpenCL Runtime: Workaround JiT compiler error on ROCM 2.3 driver if the 'inline' keyword is used in function declaration
- OpenCL Runtime: Workaround memory allocation error on AMD driver on Windows leading to CL_MEM_OBJECT_ALLOCATION_FAILURE
- OpenCL Runtime: Workaround ROCm OpenCL driver problem trying to write temporary file into readonly folder by setting TMPDIR
- OpenCL Runtime: Do not run a shared- and constant-memory size check if their memory type is of type global memory (typically CPU)
- Startup Checks: Improved the pidfile check: Do not just check for existing PID but also check executable filename
- Startup Screen: Add extra warning when using --force
- Startup Screen: Provide an estimate of host memory requirements for the requested attack
- Status Screen: Added brain status for all devices
- Status Screen: Added remaining counts and changed recovered count logic
- Status Screen: Added --status-json flag for easier machine reading of hashcat status output
- Tab Completion: Allow using "make install" version of hashcat
- Tuning Database: Updated hashcat.hctune with new models and refreshed vector width values
- WPA/WPA2 cracking: In the potfile, replace password with PMK in order to detect already cracked networks across all WPA modes
- WipZip cracking: Added two byte early reject, resulting in higher cracking speed
- WPA/WPA2 cracking: In the potfile, replace password with PMK in order to detect already cracked networks across all WPA modes
##
## Technical
##
- Binary Distribution: Removed 32 bit binary executables
- Building: On macOS, switch from ar to /usr/bin/ar to improve building compatibility
- Building: Skipping Travis/Appveyor build for non-code changes
- Codebase: Cleanup of many unused rc_* variables
- Codebase: Fixed some printf() format arguments
- Codebase: Fixed some type casting to avoid truncLongCastAssignment warnings
- Codebase: Moved hc_* file functions from shared.c to filehandling.c
- Codebase: Ran through a bunch of clang-tidy checkers and updated code accordingly
- Codebase: Remove redundant calls to fclose()
- Dependencies: Updated LZMA-Headers from 18.05 to 19.00
- Dependencies: Updated OpenCL-Headers to latest version from GitHub master repository
- Hash-mode 1460 (HMAC-SHA256 (key = $salt)): Allow up to 64 byte of salt
- Hash-Mode 1680x (WPA-PMKID) specific: Changed separator character from '*' to ':'
- Hash-Mode 8300 (DNSSEC (NSEC3)) specific: Allow empty salt
- Keep Guessing: No longer automatically activate --keep-guessing for modes 9720, 9820, 14900 and 18100
- Kernel Cache: Reactivate OpenCL runtime specific kernel caches
- Kernel Compile: Removed -cl-std= from all kernel build options since we're compatible to all OpenCL versions
- Mode 16800/16801 hash format: Changed separator character from '*' to ':'
- Requirements: Update runtime check for minimum NVIDIA driver version from 367.x to 418.56 or later
- Requirements: Add new requirement for NVIDIA GPU: CUDA Toolkit (10.1 or later)
- OpenCL Kernels: Fix OpenCL compiler warning on double precision constants
- OpenCL Options: Removed --opencl-platforms filter in order to force backend device numbers to stay constant
- Parsers: switched from strtok() to strtok_r() for thread safety
- Requirements: Add new requirement for NVIDIA GPU: CUDA Toolkit (10.1 or later)
- Requirements: Update runtime check for minimum NVIDIA driver version from 367.x to 418.56 or later
- Test Script: Switched from /bin/bash to generic /bin/sh and updated code accordingly
* changes v5.0.0 -> v5.1.0

@ -18,20 +18,16 @@ Philipp "philsmd" Schmidt <philsmd@hashcat.net> (@philsmd)
Gabriele "matrix" Gristina <matrix@hashcat.net> (@gm4tr1x)
* gzip wordlists feature
* SHA-224 kernel module + optimizations
* Some AES OpenCL kernel module optimizations
* Multiple kernel modules
* Compressed wordlist feature
* OpenCL Info feature
* Apple macOS port
* Hardware monitor initial code base
* Test suite initial code base
* Makefiles initial code base
* Makefile initial code base
* Multithreading initial code base
* MultiGPU initial code base
* Benchmarks initial code base
* Offline OpenCL Kernel Compiler support
* SHA-512 initial code base
* Some MD5 and SHA1 OpenCL optimizations
Jean-Christophe "Fist0urs" Delaunay <jean-christophe.delaunay@synacktiv.com> (@Fist0urs)
@ -49,7 +45,6 @@ Jeremi "epixoip" Gosney <jgosney@terahash.com> (@jmgosney)
* Moderating the hashcat forum
* Helping tons of new users find their way into the hashcat universe
Other contributors to hashcat
* A full list and their commits can be found here:
@ -83,4 +78,3 @@ Brandon Chalk <brandon@casaba.com> (@brandoncasaba)
* Kerberos Pre-Auth 17/18 kernel module, ported from @Fist0urs TGS kernel modules
!!! All the package maintainer of hashcat !!!

@ -0,0 +1,303 @@
## The hashcat brain ##
This feature will have a significant impact on the art of password cracking - either cracking alone, in small teams over a local network, or in large teams over the Internet.
From a technical perspective, the hashcat brain consists of two in-memory databases called "long-term" and "short-term". When I realized that the human brain also has such a long-term and a short-term memory, that's when I chose to name this feature the "hashcat brain". No worries, you don't need to understand artificial intelligence (AI) here - we are simply talking about the "memory features" of the human brain.
Put simply, the hashcat brain persistently remembers the attacks you've executed against a particular hashlist in the past ... but on a low level.
Hashcat will check each password candidate against the "brain" to find out if that candidate was already checked in the past and then accept it or reject it. The brain will check each candidate for existence in both the long-term and short-term memory areas. The nice thing is that it does not matter which attack-mode originally was used - it can be straight attack, mask attack or any of the advanced future generators.
The brain computes a hash (a very fast one called xxHash) of every password candidate and store it in the short-term memory first. Hashcat then starts cracking the usual way. Once it's done cracking, it sends a "commit" signal to the hashcat brain, which then moves the candidates from the short-term memory into the long-term memory.
The hashcat brain feature uses a client/server architecture. That means that the hashcat brain itself is actually a network server. I know, I know - you don't want any network sockets in your hashcat process? No problem, then disable the feature in the makefile by setting ENABLE_BRAIN=0 and it will be gone forever.
It's a network server for a reason. This way we can run multiple hashcat clients ... all using the same hashcat brain. This is great for collaboration with many people involved - plus it stays alive after the client shuts down. (Note, however, that even if you want to only use brain functionality locally, you must run two separate instances of hashcat - one to be the brain server, and one to be the client and perform attacks).
That's it from the technical perspective. It's hard to explain how much potential there is in this, and I'm wondering why I didn't invent this sooner. Maybe it took the Crack Me If You Can password-cracking challenge to realize that we need a feature like this.
Before you try it out yourself, let me show you a few examples.
### Example 1: Duplicate candidates all around us ###
There's no doubt that rule-based attacks are the greatest general purpose attack-modifier on an existing wordlist. But they have a little-known problem: They produce a lot of duplicate candidates. While this is not relevant for fast hashes, it has a large impact on slow hashes.
In this example, we apply best64.rule to example.dict, and writes the result to test.txt:
```
$ ./hashcat --stdout example.dict -r rules/best64.rule -o test.txt
```
Now we can see how many candidates were produced:
```
$ cat test.txt | wc -l
9888032
```
And now, let's see how many unique candidates are inside:
```
$ sort -u test.txt | wc -l
7508620
```
Of course, the wordlist and rules used have a large impact on the number of duplicates. In our example - a common wordlist and general purpose rule - the average ratio of produced dupes seems to be around 25%. And all of these dupes are detected by the brain:
```
$ ./hashcat -z example0.hash example.dict -r rules/best64.rule
...
Rejected.........: 2379391/9888032 (24.06%)
```
Note:
Hashcat brain rejects dynamically created duplicate candidates
Average dynamically created duplicate candidates is around 25%
Eliminating the duplicate 25% reduces the attack time by 25%
### Example 2: stop caring about what you've done in the past ###
Think of this: you have a single hash, but it is very high profile. You can use all of your resources. You start cracking - nothing. You try a different attack - still nothing. You're frustrated, but you must continue.. So try more attacks ... but even after two or more days - nothing. You start wondering what you've already done, but you're starting to lose track, getting tired, and making mistakes. Guess what? The hashcat brain comes to the rescue! Here's an attack that you've tried:
```
$ ./hashcat -z -m 6211 hashcat_ripemd160_aes.tc rockyou.txt
...
Time.Started.....: xxx (32 mins, 6 secs)
```
Note that the way you use hashcat doesn't change at all. The hash mode and attack mode can be replaced with anything you'd like. The only difference in your attack is that you add the new -z option to enable hashcat's new brain "client" functionality. By using -z you will also automatically enable the use of "slow candidates" -S mode.
Now let's say that two days later, you forgot that you already performed the attack before. Or maybe it wasn't you who forgot, it's just your coworker on a different machine also trying. This is what happens:
```
$ ./hashcat -z -m 6211 hashcat_ripemd160_aes.tc rockyou.txt
...
Rejected.........: 14344384/14344384 (100.00%)
Time.Started.....: xxx (15 secs)
```
The hashcat brain correctly rejected *all* of the candidates.
Important things to note here:
The rejected count exactly matches the keyspace.
The attack took a bit of time - it's not 0 seconds. The process is not completely without cost. The client must hash all of the candidates, and transfer them to the hashcat brain; the hashcat brain must then search for those candidates in both memory regions, and send back a reject list; and then hashcat must select new candidates to fill the reject gaps, and so on ...
Most important: 15 seconds is less than 32 minutes
### Example 3: It's the candidates that matter, not the attack ###
As I've stated above, it's not the command line that is stored somehow - it's not high level storage in this mode. This is where the hashcat brain server starts to create a strong advantage over manual (even organized) selection of attacks, because of the overlaps that naturally occur when carrying out a variety of attacks:
```
$ ./hashcat -z -m 6211 hashcat_ripemd160_aes.tc -a 3 ?d?d?d?d
...
Rejected.........: 6359/10000 (63.59%)
```
So what happened here? It rejected 63.59% of a mask? Yes, it did. The reason is this:
```
$ grep -c '^[0123456789]\{4\}$' rockyou.txt
6359
```
Notes:
The previous command from the second example kicks in here. In the rockyou wordlist, we have 6359 pure digits with length 4 and the hashcat brain was able to reject them - because the mask ?d?d?d?d will also produce them
The hashcat brain does not care about your attack mode. Actually, you could say that the hashcat brain creates a kind of dynamic cross attack-mode while you are using it. As you can see here, attack-mode 0 and attack-mode 3 work together.
The hashcat brain does not end after hashcat finishes - it stays intact because it's a stand-alone process
### Example 4: Improve on what you've done in the past ###
So you're out of ideas, and you start to run some simple brute-force. But you're clever, because you know the target tends to use the symbol "$" somewhere inside the password, and you optimize your mask for this. Let's start with an example not using the hashcat brain:
```
$ ./hashcat -m 6211 hashcat_ripemd160_aes.tc -a 3 -1 ?l?d$ ?1?1?1?1?1?1
...
Time.Started.....: xxx (5 hours, 37 mins)
Progress.........: 2565726409/2565726409 (100.00%)
```
Damn - it did not crack. But then your coworker shows up and tells you that he found out that the target isn't just using the "$" symbol in his passwords, but also the "!" symbol. Damn, this makes your previous run (which took 5.5 hours) completely useless - wasted! You now need even more time for the correct run:
```
$ ./hashcat -m 6211 hashcat_ripemd160_aes.tc -a 3 -1 ?l?d$! ?1?1?1?1?1?1
...
Time.Started.....: xxx (6 hours, 39 mins)
Progress.........: 3010936384/3010936384 (100.00%)
```
Now we do the same again, but with hashcat brain enabled. All of the work of that first command will no longer be wasted. The same commandline history, but this time with hashcat brain enabled, looks like this:
```
$ ./hashcat -z -m 6211 hashcat_ripemd160_aes.tc -a 3 -1 ?l?d$ ?1?1?1?1?1?1
...
Time.Started.....: xxx (5 hours, 37 mins)
```
But now, if we add the "!" character, we see the difference:
```
$ ./hashcat -z -m 6211 hashcat_ripemd160_aes.tc -a 3 -1 ?l?d$! ?1?1?1?1?1?1
...
Time.Started.....: xxx (1 hour, 5 mins)
```
So you can see here how the hashcat brain helps you to reduce the time for the second attack, from ~6 hours to ~1 hour.
### Example 5: The resurrection of the random rules ###
Random rules and salts? No way! Take a look at this, it's horrible:
```
$ cat wordlist.txt
password
$ ./hashcat wordlist.txt --stdout -g 100000 | sort -u | wc -l
20473
```
What I'm trying to show here is how inefficient the random rules actually are (and always have been). They produce tons of duplicate work.
As you can see from the above example, only 20473 of 100000 tested passwords of the produced random candidates are unique - and the remaining 80% is just wasted time.
I cannot believe that I've never thought about this in detail, but now the hashcat brain brings this to an end:
```
./hashcat -z hashlist.txt wordlist.txt -g 100000
...
Rejected.........: 82093/100000 (82.09%)
```
This alone gives -g a new role in password cracking. If you've ever attended a password cracking contest, you know how important it is to find the patterns that were used to generate the password candidates. Because finding new patterns using the combination of random-rules and debug-rules is a very efficient way to find new attack vectors.
For example, Team Hashcat managed to crack 188k/300k of the SSHA hashlist from the 2018 CMIYC contest - a strong showing. But with random rules, there's a really good chance that you'll discover what you missed. Here's an example of an attack I ran for only few minutes while writing this document:
```
$ ./hashcat -z -m 111 c0_111.list.txt wordlist.txt -g 100000 --debug-mode 4
...
INFO: Removed 188292 hashes found in potfile.
...
time:Z4 R3:tim2eeee
sexual:Y3 Z5 O35:sexllllll
poodle:Y2 T3 sBh:pooDlele
pass123:C z5:ppppppASS123
pool:y4 Z2 Y1:poolpoollll
profit:o8F ^_:_profit
smashing:Z3:smashingggg
```
These are real passwords that Team Hashcat didn't crack during the contest. What matters here is that you can see hints for possible patterns - which counts much more than just cracking a single password. And if you run the exact same command again, hashcat will generate different rules and you get more cracks, and discover more new patterns. You can do this again and again. We call this technique "raking".
Note: It can occur that a pattern discovered from random rules matches an already known pattern. In such a case, it's a strong sign that this pattern may have been searched already, but has not yet been searched exhaustively. Perhaps a previous attack was stopped too early. But with the hashcat brain, that's no longer important - we can just apply the pattern without any worry about creating double work.
## The costs of hashcat brain ##
It should now be clear now what the potential is here. There are many other examples where this feature really kicks in, but I'm sure you already have your own ideas.
Of course, the hashcat brain does not come for free - there are limitations. It's important to know some key numbers to decide when to use it (and when not to).
Each password candidate creates a hash of 8 bytes that has to be transferred, looked up and stored in the hashcat brain. This brings us to the first question: What kind of hardware do you need? Fortunately, this is pretty easy to calculate. If you have a server with 64 GB of physical memory, then you can store 8,000,000,000 candidates. I guess that's the typical size of every serious password cracker's wordlist; if you have more, you typically have too much trash in your wordlists. If you have less, then you just haven't been collecting them long enough.
So let's assume a candidate list size of 8,000,000,000. That doesn't sound like too much - especially if you want to work with rules and masks. It should be clear that using the hashcat brain against a raw MD5 is not very efficient. But now things become interesting, because of some unexpected effects that kick in.
Imagine you have a salted MD5 list, let's say VBULL which is a fast hash (not a slow hash) - and you have many of them. In thise case, each of the salts starts to work for us.
Yes, you read that right - the more salts, the better!!
Let's continue with our calculation and our 8,000,000,000 password example. The speed of a typical VBULL on a Vega64 is 2170.6 MH/s. If we have 300,000 salts, the speed drops to 7235 H/s. Now to feed the hashcat brain at a rate of 7235 H/s, it will take you 1,105,736 seconds (or 12 days). That means you can run the hashcat brain for 12 days. It's an OK time I think, though I don't let many attacks run for such a long time. Also, this is an inexpensive server with 64GB physical RAM, and you could simply add more RAM, right? At this point we should also consider using swap memory. I think there's actually room for that - but I leave testing this to our users.
Lookup times are pretty good. The hashcat brain uses two binary trees, which means that the more hashes that are added, the more efficient it becomes. Of course, the lookup times will increase drastically in the first moments, but will stabilize at some point. Note that we typically do not compare just one entry vs. million of entries - we compare hundreds of thousands of entries vs. millions of entries.
## Technical details on the hashcat brain server ##
* The hashcat brain server saves the long-term memory to disk every 5 minutes automatically
* The server also saves the long-term memory if the hashcat brain server is killed using Ctrl-C
* There's no mitigation against database poisoning - this would cost too many resources
* There's currently no mitigation against an evil client requesting the server to allocate too much memory
* Make sure your hashcat brain server is protected with a good password, because you have to trust your clients
* I'll add a standalone hashcat brain seeding tool later which enables you to easily push all the words from an entire wordlist or a mask very fast. At this time you can use the --hashcat-session option to do so with hashcat itself
* You can use --brain-server-whitelist in order to force the clients to use a specific hashlist
* The protocol used is pretty simple and does not contain hashcat specific information, which should make it possible for other cracking tools to utilize the server, too
## Technical details on the hashcat brain client ##
The client calculates the hashcat brain session based on the hashlist entries, to efficiently let a high number of salts work for us. You can override the session calculated with --brain-session, which makes sense if you want to use a fast hash in order to "seed" the hashcat brain with already-tried wordlists or masks.
The use of --remove is forbidden, but this should not really be a problem, since the potfile will do the same for you. Make sure to remove --potfile-disable in case you use it.
If multiple clients use the same attack on the same hashcat brain (which is a clever idea), you end up with a distributed solution - without the need of an overlay for keyspace distribution. This is not the intended use of the hashcat brain and should not be used as it. I'll explain later.
Since each password candidate is creating a hash of 8 bytes, some serious network upstream traffic can be generated from your client. I'll explain later.
The use of xxHash as hash is not required; we can exchange it with whatever hash we want. However so far it's doing a great job.
The status view was updated to give you some real-time statistics about the network usage:
```
Speed.#1.........: 0 H/s (0.00ms) @ Accel:64 Loops:1 Thr:1024 Vec:1
Speed.#2.........: 0 H/s (0.00ms) @ Accel:64 Loops:1 Thr:1024 Vec:1
Speed.#3.........: 0 H/s (0.00ms) @ Accel:64 Loops:1 Thr:1024 Vec:1
Speed.#4.........: 0 H/s (0.00ms) @ Accel:64 Loops:1 Thr:1024 Vec:1
...
Brain.Link.#1....: RX: 0 B (0.00 Mbps), TX: 4.1 MB (1.22 Mbps), sending
Brain.Link.#2....: RX: 0 B (0.00 Mbps), TX: 4.7 MB (1.09 Mbps), sending
Brain.Link.#3....: RX: 0 B (0.00 Mbps), TX: 3.5 MB (0.88 Mbps), sending
Brain.Link.#4....: RX: 0 B (0.00 Mbps), TX: 4.1 MB (0.69 Mbps), sending
```
When the data is transferred, there's no cracking. You can see it's doing 0 H/s. But if you have a slow hash, or a fast hash with multiple salts, this time can be seen as minor overhead. The major time taken is still in the cracking phase. So if you have a fast hash, the more salts the better! As soon as hashcat is done with the network communication, it's starting to work as always:
```
Speed.#1.........: 869.1 MH/s (1.36ms) @ Accel:64 Loops:1 Thr:1024 Vec:1
Speed.#2.........: 870.8 MH/s (1.36ms) @ Accel:64 Loops:1 Thr:1024 Vec:1
Speed.#3.........: 876.2 MH/s (1.36ms) @ Accel:64 Loops:1 Thr:1024 Vec:1
Speed.#4.........: 872.6 MH/s (1.36ms) @ Accel:64 Loops:1 Thr:1024 Vec:1
...
Brain.Link.#1....: RX: 1.3 MB (0.00 Mbps), TX: 10.5 MB (0.00 Mbps), idle
Brain.Link.#2....: RX: 1.3 MB (0.00 Mbps), TX: 10.5 MB (0.00 Mbps), idle
Brain.Link.#3....: RX: 1.3 MB (0.00 Mbps), TX: 10.5 MB (0.00 Mbps), idle
Brain.Link.#4....: RX: 1.3 MB (0.00 Mbps), TX: 10.5 MB (0.00 Mbps), idle
```
## The brain and the bottlenecks ##
While working with Team Hashcat to test how the brain performs with large numbers of clients and over the Internet, I learned about some serious bottlenecks.
The most important insight was about the performance of lookups against the brain. That should be obvious solely from the huge amount of data that we're talking about here, but the brain does not just have to look up millions of candidates against millions of existing database entries - it must also insert them into the database after each commit and ensure the ordering stays intact otherwise it would break the binary tree. This simply takes time, even if the lookup process was already threaded. But the feature was so promising that I did not want to abandon development just because of the performance challenge.
But to start from the beginning, keep the following number in mind: 50kH/s
This was the speed that was the maximum performance of the hashcat brain after the first development alpha was finished. In other words, if the performance of your attack was faster than this speed, the hashcat brain becomes the bottleneck. Now there's good and bad news about this:
Bad: This is the total number. Which means, the entire network of all GPUs participating as clients cannot create more than 50kH/s before the bottleneck effect kicks in.
Good: Salts come to the rescue. If you have a large salted hashlist - with, for example 300,000 SSHA1 hashes (as in the last Crack Me If You Can) - this means that the real maximum performance that the brain can handle jumps to 15 GH/s. (You can simply multiply the 50kH/s with the number of unique salts of your hashlist.)
Then there's another bottleneck: the network bandwidth required. For those of you who plan to use the brain inside a local network with 100Mbit, you can skip this section entirely. But for those who plan to use the brain in a large group, over VPN or in general over the Internet, keep in mind that a single GPU can create around 5Mbit/s of upstream before bandwidth becomes a bottleneck. That doesn't mean that a hashcat client will stop working - it will just reduce your theoretical maximum cracking performance.
Both of these lessons learned lead to an upgrade to the brain during development. (This means that everything that you've read up to this point is already outdated!) The bottlenecks still exist, but there's kind of a mitigation to them. To better understand what we mean when talking about how to mitigate the problem, we need new terminology in the hashcat brain universe - something we'll call brain client "features".
When running as a client, hashcat now has a new parameter called --brain-client-features. With this parameter, you can select from two features (so far) that the client has to offer:
Brain "Hashes" feature
Brain "Attacks" feature
The brain "hashes" feature is everything that we've explained from the beginning - the *low-level* function of the brain. The brain "attacks" feature is the *high-level* strategy which I added to mitigate the bottlenecks. By default, both "features" are active, and run in parallel. Depending on your use case, you can selectively enable or disable either one.
The brain "attack" feature should be explained in more detail in order to understand what it is doing. It is a high-level approach, or a compressed hint. Hashcat clients request this "hint" from the brain about a given attack as soon as the client is assigned a new work package from the local hashcat dispatcher. For example, if you have a system with 4 GPUs, the local hashcat dispatcher is responsible for distributing the workload across the local GPUs. What's new is that before a GPU starts actually working on the package, it asks the brain for a high level confirmation of whether or not to proceed. The process of how this work is basically the same as with the low-level architecture: the client "reserves" a package when the hashcat brain moves it to short-term memory - and once it is done, it will be moved to long-term memory.
The attack package itself is another 8-byte checksum - but that's more than enough to assign all feasible combinations of attacks a unique identifier. For example, hashcat takes options like the attack mode itself, rules with -r (but also -j and -k rules), masks, user-defined custom charset, Markov options, a checksum of the wordlists (if used) and so on. All of these options are combined in a repeatable way, and from that unique combination of options, a checksum is created that uniquely "fingerprints" all of the components of the attack.
When the clients connect to the hashcat brain, they send this attack checksum (along with the session ID) to the brain, so that the brain knows precisely which attack is running on a particular hashcat client. Now, if the local dispatcher creates a new package, the local start point and end point of this attack is sent to the brain so that the brain can track it. The client will automatically reject an entire package - for example, an entire wordlist, or an entire wordlist plus a specific list of rules - if the attack has some overlaps. This is done *before* the client sends any password candidate hashes to the brain. This means that if a package is rejected:
The client doesn't need to transfer the hashes (which mitigates the bandwidth bottleneck)
The brain server doesn't need to compare it (which mitigates the lookup bottleneck)
If the attack package itself is not rejected, the hashes are still sent to the brain and compared.
The hashcat brain is kind of clever when it comes to the packages. It recognizes overlapping packages on a low level - in cases where only part of one package overlaps with another package. When this occurs, the brain only rejects the overlapping section of the package and informs the client about that. It is then up to the client to decide whether it wants to either launch the attack with a minimized package size, or to ask the local dispatcher for another (smaller) portion to fill the gap. Of course, this newly creates portion is also first sent to the brain, in case it can be rejected. The entire process is packed into a loop and it will repeat the process until the client decides that the package is big enough (and the default setting for accepting a package and to start executing is half of the original package size.)
Something I realized - after I had already finished with the implementation of the high-level feature - was that the new brain "attack" feature is a very strong feature for standalone use. By setting --brain-client-features 2, you tell the client to only use the attack feature. This completely eliminates all bottlenecks - the network bandwidth, but even more importantly, the lookup bottleneck. The drawback is that you lose cross-attack functionality.
If you think that this new feature is a nice way to get a native hashcat multi-system distribution ... you are wrong. The brain client still requires running in -S mode, which means that this is all about slow hashes or fast hashes with many salts. There's also no wordlist distribution, and most importantly, there's no distribution of cracked hashes across all network clients. So the brain "attack" feature is not meant to be an alternative to existing distribution solutions, but just as a mitigation for the bottlenecks (and it works exactly as such).
## Commandline Options ##
Most of the commands are self-explaining. I'm just adding them here to inform you which ones exist:
- `--brain-server` to start a hashcat brain server
- `--brain-client` to start a hashcat brain client, automatically activates --slow-candidates
- `--brain-host` and `--brain-port` to specify ip and port of brain server, both listening and connecting
- `--brain-session` to override automatically calculated brain session ID
- `--brain-session-whitelist` to allow only explicit written session ID on brain server
- `--brain-password` to specify the brain server authentication password
- `--brain-client-features` which allows enable and disable certain features of the hashcat brain

@ -0,0 +1,19 @@
## Some notes about the --keyboard-layout-mapping feature ##
This new configuration item was added to handle a special TrueCrypt and VeraCrypt "feature" which is automatically active during the setup of encryption for a system partition or an entire system drive. Due to BIOS requirements, the user's keyboard layout is always set to the US keyboard layout during the pre-boot stage (no matter which layout is actually in use). In other words, in the pre-boot stage, when TC/VC asks the user to enter the password, the layout is actually set to the US keyboard layout.
To avoid conflicts with the real keyboard layout configured in the OS, both TC and VC have a little trick: they set the OS keyboard layout to US keyboard layout while the password prompt window is opened. You can actually verify this in the language task bar while the password prompt window is open. It will switch from whatever is configured to English, and after the window is closed, the original keyboard layout is restored.
This has a serious impact on cracking the password. For example, my German keyboard layout is a "QWERTZ" keyboard layout. The US keyboard, however used a "QWERTY" layout. The difference is that the position of the "y" and "z" letter is exchanged. If it was just that, this wouldn't be much of a problem - but almost all the special symbols are mapped very differently. (I won't go into the details; you might want to compare it yourself for fun.)
And when it comes to non-Latin based languages, this behaviour gets completely out of control. Just one example: If the user enters the password بين التخصصات (interdisciplinary) on an Arabic keyboard, the password we need to guess is: fdk hgjowwhj[g.
To deal with all of this, a hashcat user needs to know exactly which keyboard was enabled when the password was entered into the password window during setup. For German, I've added an example keyboard layout to the newly created folder "layouts", which now ships with the binary and on GitHub master. For instance, if you know a German keyboard was used, you can now add "--keyboard-layout-mapping layouts/de.hckmap" to the commandline.
Unfortunately, since I don't own all of the existing keyboards, it will be necessary for hashcat users to contribute the rest of the missing mapping tables - ideally, as a GitHub PR. Almost every language I know has special keyboard layouts. There's even a difference between the UK and US layouts.
Here's how you can help. To create a language-specific mapping table, open a text editor, and press every key on the keyboard, starting from the top left to the top right. Press Enter after every key. Use only keys which represent a real character, and ignore control keys such as Backspace, Caps Lock, etc. Then move to the next row below and repeat the process from the left to the right, and so on until you reach the space character. At that point, repeat exactly the same sequence, but with Shift pressed. When done, add a Tab after each character (Tab is used as separator character). Then switch the keyboard layout to English and repeat the entire process exactly in the same order, adding each character after the tab character. Hashcat fully supports all multibyte characters up to 32 bits on both sides of the mapping table (even if the right side side will be always a single byte character). As an example, see the layouts/de.hckmap file.
Note that when it comes to Alt/AltGr, this behavior is exploitable. TC/VC does not accept those modifier keys. If a user uses AltGr while entering the password, a window appears that tells the user that the use of this key is not allowed. For instance, on my German keyboard layout, I need to use AltGr+q to get the "@" character. As a consequence of this, we know that the TC/VC password cannot include any of the characters ("@", "[", "]", "\", "€", "|", "{", "}", "~") if the user was using a German keyboard to enter the password.
At the same time, we can guarantee that "@" will never be listed on the left side of the mapping table - because the only characters that can appear there are the ones that are are reachable only without any modifier or by using shift (but not AltGr). If we combine these concepts, we could add some code to reject all passwords which contain at least one character not listed in a mapping table. This is not yet implemented - but I'll add it if hashcat users agree that there is value in it.

@ -3,6 +3,8 @@ Performance Notes:
- Always Use the latest display driver
- To significantly improve performance use -w 3
- Cracking with a mask with a fixed prefix can be slow.
Consider instead using a mode with salt as a prefix and set salt to that fixed prefix.
Also read this:

@ -1,3 +1,12 @@
_____: _____________ _____: -aTZ! _______ ____
_\ |__\_______ _/_______ _\ |_____ _______\______ /__ ______
| _ | __ \ ____/____ _ | ___/____ __ |_______/
|: | .| \ _\____ / | .| \ / \ :| |
|_____| :|______/ / //____| :|___ //_________| :|
|_____| /___________/ |_____| /_____/ /_______|
: : :
hashcat v5.1.0
==============
@ -40,7 +49,6 @@ NVIDIA GPUs require "NVIDIA Driver" (418.56 or later) and "CUDA Toolkit" (10.1 o
- MD4
- MD5
- Half MD5
- SHA1
- SHA2-224
- SHA2-256
@ -50,48 +58,60 @@ NVIDIA GPUs require "NVIDIA Driver" (418.56 or later) and "CUDA Toolkit" (10.1 o
- SHA3-256
- SHA3-384
- SHA3-512
- RIPEMD-160
- BLAKE2b-512
- GOST R 34.11-2012 (Streebog) 256-bit, big-endian
- GOST R 34.11-2012 (Streebog) 512-bit, big-endian
- GOST R 34.11-94
- Half MD5
- Java Object hashCode()
- Keccak-224
- Keccak-256
- Keccak-384
- Keccak-512
- BLAKE2b-512
- SipHash
- RIPEMD-160
- sha256(sha256_bin(pass))
- Whirlpool
- GOST R 34.11-94
- GOST R 34.11-2012 (Streebog) 256-bit
- GOST R 34.11-2012 (Streebog) 512-bit
- Java Object hashCode()
- SipHash
- BitShares v0.x - sha512(sha512_bin(pass))
- md5($pass.$salt)
- md5($salt.$pass)
- md5(utf16le($pass).$salt)
- md5($salt.utf16le($pass))
- md5($salt.$pass.$salt)
- md5($salt.md5($pass))
- md5($salt.md5($salt.$pass))
- md5($salt.md5($pass.$salt))
- md5($salt.md5($salt.$pass))
- md5($salt.sha1($salt.$pass))
- md5($salt.utf16le($pass))
- md5(md5($pass))
- md5(md5($pass).md5($salt))
- md5(strtoupper(md5($pass)))
- md5(sha1($pass))
- md5(sha1($pass).md5($pass).sha1($pass))
- md5(sha1($salt).md5($pass))
- md5(strtoupper(md5($pass)))
- md5(utf16le($pass).$salt)
- sha1($pass.$salt)
- sha1($salt.$pass)
- sha1(utf16le($pass).$salt)
- sha1($salt.utf16le($pass))
- sha1(sha1($pass))
- sha1($salt.$pass.$salt)
- sha1($salt.sha1($pass))
- sha1($salt.utf16le($pass))
- sha1($salt1.$pass.$salt2)
- sha1(CX)
- sha1(md5($pass))
- sha1(md5($pass).$salt)
- sha1(md5($pass.$salt))
- sha1(md5(md5($pass)))
- sha1($salt.$pass.$salt)
- sha1(CX)
- sha1(sha1($pass))
- sha1(utf16le($pass).$salt)
- sha256($pass.$salt)
- sha256($salt.$pass)
- sha256(utf16le($pass).$salt)
- sha256($salt.utf16le($pass))
- sha256(md5($pass))
- sha256(sha256($pass).$salt)
- sha256(utf16le($pass).$salt)
- sha512($pass.$salt)
- sha512($salt.$pass)
- sha512(utf16le($pass).$salt)
- sha512($salt.utf16le($pass))
- sha512(utf16le($pass).$salt)
- Ruby on Rails Restful-Authentication
- HMAC-MD5 (key = $pass)
- HMAC-MD5 (key = $salt)
- HMAC-SHA1 (key = $pass)
@ -100,182 +120,201 @@ NVIDIA GPUs require "NVIDIA Driver" (418.56 or later) and "CUDA Toolkit" (10.1 o
- HMAC-SHA256 (key = $salt)
- HMAC-SHA512 (key = $pass)
- HMAC-SHA512 (key = $salt)
- DES (PT = $salt, key = $pass)
- HMAC-Streebog-256 (key = $pass), big-endian
- HMAC-Streebog-256 (key = $salt), big-endian
- HMAC-Streebog-512 (key = $pass), big-endian
- HMAC-Streebog-512 (key = $salt), big-endian
- CRC32
- 3DES (PT = $salt, key = $pass)
- Skip32 (PT = $salt, key = $pass)
- DES (PT = $salt, key = $pass)
- ChaCha20
- phpass
- scrypt
- Skip32 (PT = $salt, key = $pass)
- PBKDF2-HMAC-MD5
- PBKDF2-HMAC-SHA1
- PBKDF2-HMAC-SHA256
- PBKDF2-HMAC-SHA512
- Python passlib pbkdf2-sha1
- Python passlib pbkdf2-sha256
- scrypt
- phpass
- Ansible Vault
- Atlassian (PBKDF2-HMAC-SHA1)
- Python passlib pbkdf2-sha512
- Skype
- Python passlib pbkdf2-sha256
- Python passlib pbkdf2-sha1
- TACACS+
- SIP digest authentication (MD5)
- IKE-PSK MD5
- IKE-PSK SHA1
- WPA-EAPOL-PBKDF2
- WPA-EAPOL-PMK
- WPA-PMKID-PBKDF2
- WPA-PMKID-PMK
- iSCSI CHAP authentication, MD5(CHAP)
- IKE-PSK MD5
- IKE-PSK SHA1
- NetNTLMv1
- NetNTLMv1+ESS
- NetNTLMv2
- IPMI2 RAKP HMAC-SHA1
- Kerberos 5 AS-REQ Pre-Auth etype 23
- Kerberos 5 AS-REP etype 23
- Kerberos 5 TGS-REP etype 23 (RC4-HMAC-MD5)
- Kerberos 5 Pre-Auth etype 17/18 (AES128-CTS-HMAC-SHA1-96)
- Kerberos 5 TGS-REP etype 17/18 (AES128-CTS-HMAC-SHA1-96)
- DNSSEC (NSEC3)
- CRAM-MD5
- iSCSI CHAP authentication, MD5(CHAP)
- JWT (JSON Web Token)
- Kerberos 5, etype 23, AS-REQ Pre-Auth
- Kerberos 5, etype 23, TGS-REP
- Kerberos 5, etype 23, AS-REP
- Kerberos 5, etype 17, TGS-REP
- Kerberos 5, etype 18, TGS-REP
- Kerberos 5, etype 17, Pre-Auth
- Kerberos 5, etype 18, Pre-Auth
- NetNTLMv1 / NetNTLMv1+ESS
- NetNTLMv2
- Skype
- PostgreSQL CRAM (MD5)
- MySQL CRAM (SHA1)
- SIP digest authentication (MD5)
- TACACS+
- JWT (JSON Web Token)
- SMF (Simple Machines Forum) > v1.1
- phpBB3 (MD5)
- vBulletin < v3.8.5
- vBulletin >= v3.8.5
- MyBB 1.2+
- IPB2+ (Invision Power Board)
- WBB3 (Woltlab Burning Board)
- Joomla < 2.5.18
- Joomla >= 2.5.18 (MD5)
- WordPress (MD5)
- PHPS
- Drupal7
- osCommerce
- xt:Commerce
- PrestaShop
- Django (SHA-1)
- Django (PBKDF2-SHA256)
- Tripcode
- MediaWiki B type
- OpenCart
- Redmine
- PunBB
- Atlassian (PBKDF2-HMAC-SHA1)
- PostgreSQL
- MSSQL (2000)
- MSSQL (2005)
- MSSQL (2012, 2014)
- MySQL323
- MySQL4.1/MySQL5
- Oracle H: Type (Oracle 7+)
- Oracle S: Type (Oracle 11+)
- Oracle T: Type (Oracle 12+)
- Sybase ASE
- Episerver 6.x < .NET 4
- Episerver 6.x >= .NET 4
- Apache $apr1$ MD5, md5apr1, MD5 (APR)
- ColdFusion 10+
- hMailServer
- nsldap, SHA-1(Base64), Netscape LDAP SHA
- nsldaps, SSHA-1(Base64), Netscape LDAP SSHA
- SSHA-256(Base64), LDAP {SSHA256}
- SSHA-512(Base64), LDAP {SSHA512}
- CRAM-MD5 Dovecot
- FileZilla Server >= 0.9.55
- CRC32
- RACF
- AIX {smd5}
- AIX {ssha1}
- AIX {ssha256}
- AIX {ssha512}
- LM
- NTLM
- Domain Cached Credentials (DCC), MS Cache
- Domain Cached Credentials 2 (DCC2), MS Cache 2
- QNX /etc/shadow (MD5)
- QNX /etc/shadow (SHA256)
- QNX /etc/shadow (SHA512)
- DPAPI masterkey file v1
- DPAPI masterkey file v2
- MS-AzureSync PBKDF2-HMAC-SHA256
- descrypt, DES (Unix), Traditional DES
- GRUB 2
- MS-AzureSync PBKDF2-HMAC-SHA256
- BSDi Crypt, Extended DES
- md5crypt, MD5 (Unix), Cisco-IOS $1$ (MD5)
- NTLM
- macOS v10.4, macOS v10.5, MacOS v10.6
- macOS v10.7
- macOS v10.8+ (PBKDF2-SHA512)
- Radmin2
- Samsung Android Password/PIN
- bcrypt $2*$, Blowfish (Unix)
- md5crypt, MD5 (Unix), Cisco-IOS $1$ (MD5)
- descrypt, DES (Unix), Traditional DES
- sha256crypt $5$, SHA256 (Unix)
- sha512crypt $6$, SHA512 (Unix)
- macOS v10.4, MacOS v10.5, MacOS v10.6
- macOS v10.7
- macOS v10.8+ (PBKDF2-SHA512)
- AIX {smd5}
- AIX {ssha1}
- AIX {ssha256}
- AIX {ssha512}
- Cisco-PIX MD5
- Windows Phone 8+ PIN/password
- Cisco-ASA MD5
- Cisco-IOS $1$ (MD5)
- Cisco-IOS type 4 (SHA256)
- Cisco-IOS $8$ (PBKDF2-SHA256)
- Cisco-IOS $9$ (scrypt)
- Juniper NetScreen/SSG (ScreenOS)
- Juniper IVE
- Juniper/NetBSD sha1crypt
- FortiGate (FortiOS)
- Samsung Android Password/PIN
- Windows Phone 8+ PIN/password
- Cisco-IOS type 4 (SHA256)
- Cisco-PIX MD5
- Citrix NetScaler
- RACF
- GRUB 2
- Radmin2
- Domain Cached Credentials (DCC), MS Cache
- Domain Cached Credentials 2 (DCC2), MS Cache 2
- FortiGate (FortiOS)
- ArubaOS
- Juniper IVE
- Juniper NetScreen/SSG (ScreenOS)
- Juniper/NetBSD sha1crypt
- MSSQL (2000)
- MSSQL (2005)
- MSSQL (2012, 2014)
- PostgreSQL
- Oracle H: Type (Oracle 7+)
- Oracle S: Type (Oracle 11+)
- Oracle T: Type (Oracle 12+)
- MySQL323
- MySQL4.1/MySQL5
- Sybase ASE
- hMailServer
- DNSSEC (NSEC3)
- CRAM-MD5 Dovecot
- SSHA-256(Base64), LDAP {SSHA256}
- SSHA-512(Base64), LDAP {SSHA512}
- FileZilla Server >= 0.9.55
- ColdFusion 10+
- Apache $apr1$ MD5, md5apr1, MD5 (APR)
- Episerver 6.x < .NET 4
- Episerver 6.x >= .NET 4
- nsldap, SHA-1(Base64), Netscape LDAP SHA
- nsldaps, SSHA-1(Base64), Netscape LDAP SSHA
- SAP CODVN B (BCODE)
- SAP CODVN B (BCODE) from RFC_READ_TABLE
- SAP CODVN F/G (PASSCODE)
- SAP CODVN F/G (PASSCODE) from RFC_READ_TABLE
- SAP CODVN H (PWDSALTEDHASH) iSSHA-1
- PeopleSoft
- PeopleSoft PS_TOKEN
- SolarWinds Orion
- Lotus Notes/Domino 5
- Lotus Notes/Domino 6
- Lotus Notes/Domino 8
- PeopleSoft
- PeopleSoft PS_TOKEN
- Oracle Transportation Management (SHA256)
- 7-Zip
- RAR3-hp
- RAR5
- AxCrypt
- AxCrypt in-memory SHA1
- WinZip
- PKZIP archive encryption
- PKZIP Master Key
- iTunes backup < 10.0
- iTunes backup >= 10.0
- Android Backup
- TrueCrypt
- Android FDE <= 4.3
- Android FDE (Samsung DEK)
- Huawei sha1(md5($pass).$salt)
- AuthMe sha256
- eCryptfs
- VeraCrypt
- LUKS
- DiskCryptor
- VeraCrypt
- FileVault 2
- DiskCryptor
- Android FDE (Samsung DEK)
- Android FDE <= 4.3
- Apple File System (APFS)
- MS Office <= 2003
- MS Office 2007
- MS Office 2010
- MS Office 2013
- TrueCrypt
- PDF 1.1 - 1.3 (Acrobat 2 - 4)
- PDF 1.1 - 1.3 (Acrobat 2 - 4), collider #1
- PDF 1.1 - 1.3 (Acrobat 2 - 4), collider #2
- PDF 1.4 - 1.6 (Acrobat 5 - 8)
- PDF 1.7 Level 3 (Acrobat 9)
- PDF 1.7 Level 8 (Acrobat 10 - 11)
- Apple Secure Notes
- Open Document Format (ODF) 1.1 (SHA-1, Blowfish)
- MS Office 2007
- MS Office 2010
- MS Office 2013
- MS Office <= 2003 $0/$1, MD5 + RC4
- MS Office <= 2003 $0/$1, MD5 + RC4, collider #1
- MS Office <= 2003 $0/$1, MD5 + RC4, collider #2
- MS Office <= 2003 $3/$4, SHA1 + RC4
- MS Office <= 2003 $3, SHA1 + RC4, collider #1
- MS Office <= 2003 $3, SHA1 + RC4, collider #2
- Open Document Format (ODF) 1.2 (SHA-256, AES)
- Open Document Format (ODF) 1.1 (SHA-1, Blowfish)
- Apple Secure Notes
- JKS Java Key Store Private Keys (SHA1)
- 1Password, agilekeychain
- 1Password, cloudkeychain
- Password Safe v2
- Password Safe v3
- LastPass + LastPass sniffed
- 1Password, agilekeychain
- 1Password, cloudkeychain
- KeePass 1 (AES/Twofish) and KeePass 2 (AES)
- Bitcoin/Litecoin wallet.dat
- Electrum Wallet (Salt-Type 1-3)
- Blockchain, My Wallet
- Blockchain, My Wallet, V2
- Blockchain, My Wallet, Second Password (SHA256)
- Electrum Wallet (Salt-Type 1-3)
- KeePass 1 (AES/Twofish) and KeePass 2 (AES)
- JKS Java Key Store Private Keys (SHA1)
- Ethereum Pre-Sale Wallet, PBKDF2-HMAC-SHA256
- Ethereum Wallet, PBKDF2-HMAC-SHA256
- Ethereum Wallet, SCRYPT
- Ethereum Pre-Sale Wallet, PBKDF2-HMAC-SHA256
- Ansible Vault
- Plaintext
- 7-Zip
- RAR3-hp
- RAR5
- PKZIP (Compressed)
- PKZIP (Compressed Multi-File)
- PKZIP (Mixed Multi-File)
- PKZIP (Mixed Multi-File Checksum-Only)
- PKZIP (Uncompressed)
- PKZIP Master Key
- PKZIP Master Key (6 byte optimization)
- iTunes backup < 10.0
- iTunes backup >= 10.0
- WinZip
- Android Backup
- AxCrypt
- AxCrypt in-memory SHA1
- WBB3 (Woltlab Burning Board)
- vBulletin < v3.8.5
- vBulletin >= v3.8.5
- PHPS
- SMF (Simple Machines Forum) > v1.1
- MediaWiki B type
- Redmine
- Django (PBKDF2-SHA256)
- Django (SHA-1)
- Joomla < 2.5.18
- OpenCart
- PrestaShop
- Tripcode
- Drupal7
- osCommerce, xt:Commerce
- PunBB
- MyBB 1.2+, IPB2+ (Invision Power Board)
- TOTP (HMAC-SHA1)
##
## Attack-Modes

@ -0,0 +1,34 @@
## Generic password candidate interface, aka "slow candidates" mode ##
The first goal of this new interface is to allow attachment of advanced password candidate generators in the future (for example hashcat's table attack, kwprocessor, OMEN, PassGAN, PCFG, princeprocessor, etc.). At this time, the only attack modes that have been added are hashcat's straight attack (including rules engine), combinator attack, and mask attack (AKA brute-force with Markov optimizer). You can enable this new general password-candidate interface by using the new -S/--slow-candidates option.
The second goal of the slow candidates engine is to generate password candidates on-host (on CPU). This is useful when attacking large hashlists with fast hashes (but many salts), or generally with slow hashes. Sometimes we cannot fully run large wordlists in combination with rules, because it simply takes too much time. But if we know of a useful pattern that works well with rules, we often want to use rules with a smaller, targeted wordlist instead, in order to exploit the pattern. On GPU, this creates a bottleneck in hashcat's architecture - because hashcat can only assign the words from the wordlist to the GPU compute units.
A common workaround for this is to use a pipe, and feed hashcat to itself. But this traditional piping approach came at a cost - no ETA, no way to easily distribute chunks, etc. It was also completely incompatible with overlays like Hashtopolis. And if piping hashcat to itself isn't feasible for some reason, you quickly run into performance problems with small wordlists and large rulesets.
To demonstrate this, here's an example where you have a very small wordlist with just a single word in the wordlist, but a huge ruleset to exploit some pattern:
```
$ wc -l wordlist.txt
1 wordlist.txt
$ wc -l pattern.rule
99092 pattern.rule
```
Since the total number of candidates is ([number-of-words-from-wordlist] * [number-of-rules]), this attack should theoretically be enough to fully feed all GPU compute units. But in practice, hashcat works differently internally - mostly to deal with fast hashes. This makes the performance of such an attack terrible:
```
$ ./hashcat -m 400 example400.hash wordlist.txt -r pattern.rule --speed-only
...
Speed.#2.........: 145 H/s (0.07ms)
```
This is where slow candidates comes into play. To feed the GPU compute units more efficiently, hashcat applies rules on-host instead, creating a virtual wordlist in memory for fast access. But more importantly from hashcat's perspective, we now have a large wordlist, which allows hashcat to supply all GPU compute units with candidates. Since hashcat still needs to transfer the candidates over PCI-Express, this slows down cracking performance. In exchange, we get a large overall performance increase - multiple times higher, even considering the PCI-Express bottleneck - for both slow hashes and salted fast hashes with many salts,
Here's the exact same attack, but using the new -S option to turn on slow candidates:
```
$ ./hashcat -m 400 example400.hash wordlist.txt -r pattern.rule --speed-only -S
...
Speed.#2.........: 361.3 kH/s (3.54ms)
```

@ -22,6 +22,7 @@ We're a group of people participating in the yearly repeating password cracking
| PCrack | SAINTCON, Utah | 2017 | 1st |
| Crack Me If You Can | DEF CON, Las Vegas | 2018 | 2nd |
| CracktheCon | Cyphercon, Milwaukee | 2019 | 1st |
| Crack Me If You Can | DEF CON, Las Vegas | 2019 | 1st |
* Special recognition for team hashcat goes to:
@ -34,8 +35,8 @@ abaco alotdv atom blandyuk blaz
BlowCane chancas Chick3nman coolbry95 dakykilla
deutsch dropdead epixoip EvilMog franky
gpufreak hashtka Hydraze J0hnnyBrav0 K9
kontrast23 legion m3g9tr0n matrix minga
N|IGHT5 NSAKEY NullMode philsmd purehate
radix Rolf rurapenthe s3in!c SuperJames
Szul tehnlulz The_Mechanic T0XlC TychoTithonus
undeath unix-ninja Xanadrel xmisery
kontrast23 Kryczek legion m3g9tr0n matrix
minga N|IGHT5 NSAKEY NullMode philsmd
purehate radix Rolf rurapenthe s3in!c
SuperJames Szul tehnlulz The_Mechanic T0XlC
TychoTithonus undeath unix-ninja Xanadrel xmisery

Some files were not shown because too many files have changed in this diff Show More

Loading…
Cancel
Save