1
0
mirror of https://github.com/hashcat/hashcat.git synced 2025-06-28 02:42:36 +00:00

Converted _a3 kernels, use SIMD for CPU and GPU

This commit is contained in:
Jens Steube 2016-01-23 15:32:31 +01:00
parent 4c0e520fd8
commit 1d3795a3ab
222 changed files with 12369 additions and 13551 deletions

View File

@ -7,7 +7,7 @@
#include "include/kernel_vendor.h"
#include "OpenCL/types_ocl.c"
static void switch_buffer_by_offset (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 offset)
static void switch_buffer_by_offset_le (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 offset)
{
#if defined IS_AMD || defined IS_GENERIC
const int offset_mod_4 = offset & 3;
@ -789,12 +789,12 @@ __kernel void amp (__global pw_t *pws, __global pw_t *pws_amp, __global kernel_r
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
{
switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len);
switch_buffer_by_offset_le (wordr0, wordr1, wordr2, wordr3, pw_l_len);
}
if (combs_mode == COMBINATOR_MODE_BASE_RIGHT)
{
switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, pw_r_len);
switch_buffer_by_offset_le (wordl0, wordl1, wordl2, wordl3, pw_r_len);
}
u32 w0[4];

File diff suppressed because it is too large Load Diff

View File

@ -70,7 +70,7 @@ __kernel void m00000_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
{
append_0x80_2x4 (wordl0, wordl1, pw_l_len);
switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
switch_buffer_by_offset_le (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
}
/**
@ -113,7 +113,7 @@ __kernel void m00000_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
{
switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len);
switch_buffer_by_offset_le (wordr0, wordr1, wordr2, wordr3, pw_l_len);
}
u32 w0[4];
@ -286,7 +286,7 @@ __kernel void m00000_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
{
append_0x80_2x4 (wordl0, wordl1, pw_l_len);
switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
switch_buffer_by_offset_le (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
}
/**
@ -341,7 +341,7 @@ __kernel void m00000_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
{
switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len);
switch_buffer_by_offset_le (wordr0, wordr1, wordr2, wordr3, pw_l_len);
}
u32 w0[4];

View File

@ -23,7 +23,7 @@
#define MD5_STEP_REV(f,a,b,c,d,x,t,s) \
{ \
a -= b; \
a = rotr32 (a, s); \
a = rotr32_S (a, s); \
a -= f (b, c, d); \
a -= x; \
a -= t; \
@ -32,7 +32,7 @@
#define MD5_STEP_REV1(f,a,b,c,d,x,t,s) \
{ \
a -= b; \
a = rotr32 (a, s); \
a = rotr32_S (a, s); \
a -= x; \
a -= t; \
}
@ -306,32 +306,32 @@ static void m00000s (u32 w[16], const u32 pw_len, __global pw_t *pws, __global k
* reverse
*/
u32x a_rev = digests_buf[digests_offset].digest_buf[0];
u32x b_rev = digests_buf[digests_offset].digest_buf[1];
u32x c_rev = digests_buf[digests_offset].digest_buf[2];
u32x d_rev = digests_buf[digests_offset].digest_buf[3];
u32 a_rev = digests_buf[digests_offset].digest_buf[0];
u32 b_rev = digests_buf[digests_offset].digest_buf[1];
u32 c_rev = digests_buf[digests_offset].digest_buf[2];
u32 d_rev = digests_buf[digests_offset].digest_buf[3];
MD5_STEP_REV (MD5_I, b_rev, c_rev, d_rev, a_rev, w[ 9], MD5C3f, MD5S33);
MD5_STEP_REV (MD5_I, c_rev, d_rev, a_rev, b_rev, w[ 2], MD5C3e, MD5S32);
MD5_STEP_REV (MD5_I, d_rev, a_rev, b_rev, c_rev, w[11], MD5C3d, MD5S31);
MD5_STEP_REV (MD5_I, a_rev, b_rev, c_rev, d_rev, w[ 4], MD5C3c, MD5S30);
MD5_STEP_REV (MD5_I, b_rev, c_rev, d_rev, a_rev, w[13], MD5C3b, MD5S33);
MD5_STEP_REV (MD5_I, c_rev, d_rev, a_rev, b_rev, w[ 6], MD5C3a, MD5S32);
MD5_STEP_REV (MD5_I, d_rev, a_rev, b_rev, c_rev, w[15], MD5C39, MD5S31);
MD5_STEP_REV (MD5_I, a_rev, b_rev, c_rev, d_rev, w[ 8], MD5C38, MD5S30);
MD5_STEP_REV (MD5_I, b_rev, c_rev, d_rev, a_rev, w[ 1], MD5C37, MD5S33);
MD5_STEP_REV (MD5_I, c_rev, d_rev, a_rev, b_rev, w[10], MD5C36, MD5S32);
MD5_STEP_REV (MD5_I, d_rev, a_rev, b_rev, c_rev, w[ 3], MD5C35, MD5S31);
MD5_STEP_REV (MD5_I, a_rev, b_rev, c_rev, d_rev, w[12], MD5C34, MD5S30);
MD5_STEP_REV (MD5_I, b_rev, c_rev, d_rev, a_rev, w[ 5], MD5C33, MD5S33);
MD5_STEP_REV (MD5_I, c_rev, d_rev, a_rev, b_rev, w[14], MD5C32, MD5S32);
MD5_STEP_REV (MD5_I, d_rev, a_rev, b_rev, c_rev, w[ 7], MD5C31, MD5S31);
MD5_STEP_REV (MD5_I, a_rev, b_rev, c_rev, d_rev, 0, MD5C30, MD5S30);
MD5_STEP_REV (MD5_I_S, b_rev, c_rev, d_rev, a_rev, w[ 9], MD5C3f, MD5S33);
MD5_STEP_REV (MD5_I_S, c_rev, d_rev, a_rev, b_rev, w[ 2], MD5C3e, MD5S32);
MD5_STEP_REV (MD5_I_S, d_rev, a_rev, b_rev, c_rev, w[11], MD5C3d, MD5S31);
MD5_STEP_REV (MD5_I_S, a_rev, b_rev, c_rev, d_rev, w[ 4], MD5C3c, MD5S30);
MD5_STEP_REV (MD5_I_S, b_rev, c_rev, d_rev, a_rev, w[13], MD5C3b, MD5S33);
MD5_STEP_REV (MD5_I_S, c_rev, d_rev, a_rev, b_rev, w[ 6], MD5C3a, MD5S32);
MD5_STEP_REV (MD5_I_S, d_rev, a_rev, b_rev, c_rev, w[15], MD5C39, MD5S31);
MD5_STEP_REV (MD5_I_S, a_rev, b_rev, c_rev, d_rev, w[ 8], MD5C38, MD5S30);
MD5_STEP_REV (MD5_I_S, b_rev, c_rev, d_rev, a_rev, w[ 1], MD5C37, MD5S33);
MD5_STEP_REV (MD5_I_S, c_rev, d_rev, a_rev, b_rev, w[10], MD5C36, MD5S32);
MD5_STEP_REV (MD5_I_S, d_rev, a_rev, b_rev, c_rev, w[ 3], MD5C35, MD5S31);
MD5_STEP_REV (MD5_I_S, a_rev, b_rev, c_rev, d_rev, w[12], MD5C34, MD5S30);
MD5_STEP_REV (MD5_I_S, b_rev, c_rev, d_rev, a_rev, w[ 5], MD5C33, MD5S33);
MD5_STEP_REV (MD5_I_S, c_rev, d_rev, a_rev, b_rev, w[14], MD5C32, MD5S32);
MD5_STEP_REV (MD5_I_S, d_rev, a_rev, b_rev, c_rev, w[ 7], MD5C31, MD5S31);
MD5_STEP_REV (MD5_I_S, a_rev, b_rev, c_rev, d_rev, 0, MD5C30, MD5S30);
const u32x pre_cd = c_rev ^ d_rev;
const u32 pre_cd = c_rev ^ d_rev;
MD5_STEP_REV1(MD5_H, b_rev, c_rev, d_rev, a_rev, w[ 2], MD5C2f, MD5S23);
MD5_STEP_REV1(MD5_H, c_rev, d_rev, a_rev, b_rev, w[15], MD5C2e, MD5S22);
MD5_STEP_REV1(MD5_H_S, b_rev, c_rev, d_rev, a_rev, w[ 2], MD5C2f, MD5S23);
MD5_STEP_REV1(MD5_H_S, c_rev, d_rev, a_rev, b_rev, w[15], MD5C2e, MD5S22);
/**
* loop

View File

@ -142,7 +142,7 @@ __kernel void m00010_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
s3[2] = 0;
s3[3] = 0;
switch_buffer_by_offset (s0, s1, s2, s3, out_len);
switch_buffer_by_offset_le (s0, s1, s2, s3, out_len);
const u32 pw_salt_len = out_len + salt_len;
@ -397,7 +397,7 @@ __kernel void m00010_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
s3[2] = 0;
s3[3] = 0;
switch_buffer_by_offset (s0, s1, s2, s3, out_len);
switch_buffer_by_offset_le (s0, s1, s2, s3, out_len);
const u32 pw_salt_len = out_len + salt_len;

View File

@ -68,7 +68,7 @@ __kernel void m00010_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
if (combs_mode == COMBINATOR_MODE_BASE_RIGHT)
{
switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
switch_buffer_by_offset_le (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
}
/**
@ -131,7 +131,7 @@ __kernel void m00010_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
{
switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len);
switch_buffer_by_offset_le (wordr0, wordr1, wordr2, wordr3, pw_l_len);
}
/**
@ -166,7 +166,7 @@ __kernel void m00010_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
s3[2] = 0;
s3[3] = 0;
switch_buffer_by_offset (s0, s1, s2, s3, pw_len);
switch_buffer_by_offset_le (s0, s1, s2, s3, pw_len);
const u32 pw_salt_len = pw_len + salt_len;
@ -345,7 +345,7 @@ __kernel void m00010_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
if (combs_mode == COMBINATOR_MODE_BASE_RIGHT)
{
switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
switch_buffer_by_offset_le (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
}
/**
@ -420,7 +420,7 @@ __kernel void m00010_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
{
switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len);
switch_buffer_by_offset_le (wordr0, wordr1, wordr2, wordr3, pw_l_len);
}
/**
@ -455,7 +455,7 @@ __kernel void m00010_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
s3[2] = 0;
s3[3] = 0;
switch_buffer_by_offset (s0, s1, s2, s3, pw_len);
switch_buffer_by_offset_le (s0, s1, s2, s3, pw_len);
const u32 pw_salt_len = pw_len + salt_len;

View File

@ -23,7 +23,7 @@
#define MD5_STEP_REV(f,a,b,c,d,x,t,s) \
{ \
a -= b; \
a = rotr32 (a, s); \
a = rotr32_S (a, s); \
a -= f (b, c, d); \
a -= x; \
a -= t; \
@ -32,7 +32,7 @@
#define MD5_STEP_REV1(f,a,b,c,d,x,t,s) \
{ \
a -= b; \
a = rotr32 (a, s); \
a = rotr32_S (a, s); \
a -= x; \
a -= t; \
}
@ -78,7 +78,7 @@ static void m00010m (u32 w[16], const u32 pw_len, __global pw_t *pws, __global k
salt_buf3[2] = 0;
salt_buf3[3] = 0;
switch_buffer_by_offset_S (salt_buf0, salt_buf1, salt_buf2, salt_buf3, pw_len);
switch_buffer_by_offset_le_S (salt_buf0, salt_buf1, salt_buf2, salt_buf3, pw_len);
w[ 0] |= salt_buf0[0];
w[ 1] |= salt_buf0[1];
@ -363,32 +363,32 @@ static void m00010s (u32 w[16], const u32 pw_len, __global pw_t *pws, __global k
* reverse
*/
u32x a_rev = digests_buf[digests_offset].digest_buf[0];
u32x b_rev = digests_buf[digests_offset].digest_buf[1];
u32x c_rev = digests_buf[digests_offset].digest_buf[2];
u32x d_rev = digests_buf[digests_offset].digest_buf[3];
u32 a_rev = digests_buf[digests_offset].digest_buf[0];
u32 b_rev = digests_buf[digests_offset].digest_buf[1];
u32 c_rev = digests_buf[digests_offset].digest_buf[2];
u32 d_rev = digests_buf[digests_offset].digest_buf[3];
MD5_STEP_REV (MD5_I, b_rev, c_rev, d_rev, a_rev, w[ 9], MD5C3f, MD5S33);
MD5_STEP_REV (MD5_I, c_rev, d_rev, a_rev, b_rev, w[ 2], MD5C3e, MD5S32);
MD5_STEP_REV (MD5_I, d_rev, a_rev, b_rev, c_rev, w[11], MD5C3d, MD5S31);
MD5_STEP_REV (MD5_I, a_rev, b_rev, c_rev, d_rev, w[ 4], MD5C3c, MD5S30);
MD5_STEP_REV (MD5_I, b_rev, c_rev, d_rev, a_rev, w[13], MD5C3b, MD5S33);
MD5_STEP_REV (MD5_I, c_rev, d_rev, a_rev, b_rev, w[ 6], MD5C3a, MD5S32);
MD5_STEP_REV (MD5_I, d_rev, a_rev, b_rev, c_rev, w[15], MD5C39, MD5S31);
MD5_STEP_REV (MD5_I, a_rev, b_rev, c_rev, d_rev, w[ 8], MD5C38, MD5S30);
MD5_STEP_REV (MD5_I, b_rev, c_rev, d_rev, a_rev, w[ 1], MD5C37, MD5S33);
MD5_STEP_REV (MD5_I, c_rev, d_rev, a_rev, b_rev, w[10], MD5C36, MD5S32);
MD5_STEP_REV (MD5_I, d_rev, a_rev, b_rev, c_rev, w[ 3], MD5C35, MD5S31);
MD5_STEP_REV (MD5_I, a_rev, b_rev, c_rev, d_rev, w[12], MD5C34, MD5S30);
MD5_STEP_REV (MD5_I, b_rev, c_rev, d_rev, a_rev, w[ 5], MD5C33, MD5S33);
MD5_STEP_REV (MD5_I, c_rev, d_rev, a_rev, b_rev, w[14], MD5C32, MD5S32);
MD5_STEP_REV (MD5_I, d_rev, a_rev, b_rev, c_rev, w[ 7], MD5C31, MD5S31);
MD5_STEP_REV (MD5_I, a_rev, b_rev, c_rev, d_rev, 0, MD5C30, MD5S30);
MD5_STEP_REV (MD5_I_S, b_rev, c_rev, d_rev, a_rev, w[ 9], MD5C3f, MD5S33);
MD5_STEP_REV (MD5_I_S, c_rev, d_rev, a_rev, b_rev, w[ 2], MD5C3e, MD5S32);
MD5_STEP_REV (MD5_I_S, d_rev, a_rev, b_rev, c_rev, w[11], MD5C3d, MD5S31);
MD5_STEP_REV (MD5_I_S, a_rev, b_rev, c_rev, d_rev, w[ 4], MD5C3c, MD5S30);
MD5_STEP_REV (MD5_I_S, b_rev, c_rev, d_rev, a_rev, w[13], MD5C3b, MD5S33);
MD5_STEP_REV (MD5_I_S, c_rev, d_rev, a_rev, b_rev, w[ 6], MD5C3a, MD5S32);
MD5_STEP_REV (MD5_I_S, d_rev, a_rev, b_rev, c_rev, w[15], MD5C39, MD5S31);
MD5_STEP_REV (MD5_I_S, a_rev, b_rev, c_rev, d_rev, w[ 8], MD5C38, MD5S30);
MD5_STEP_REV (MD5_I_S, b_rev, c_rev, d_rev, a_rev, w[ 1], MD5C37, MD5S33);
MD5_STEP_REV (MD5_I_S, c_rev, d_rev, a_rev, b_rev, w[10], MD5C36, MD5S32);
MD5_STEP_REV (MD5_I_S, d_rev, a_rev, b_rev, c_rev, w[ 3], MD5C35, MD5S31);
MD5_STEP_REV (MD5_I_S, a_rev, b_rev, c_rev, d_rev, w[12], MD5C34, MD5S30);
MD5_STEP_REV (MD5_I_S, b_rev, c_rev, d_rev, a_rev, w[ 5], MD5C33, MD5S33);
MD5_STEP_REV (MD5_I_S, c_rev, d_rev, a_rev, b_rev, w[14], MD5C32, MD5S32);
MD5_STEP_REV (MD5_I_S, d_rev, a_rev, b_rev, c_rev, w[ 7], MD5C31, MD5S31);
MD5_STEP_REV (MD5_I_S, a_rev, b_rev, c_rev, d_rev, 0, MD5C30, MD5S30);
const u32x pre_cd = c_rev ^ d_rev;
const u32 pre_cd = c_rev ^ d_rev;
MD5_STEP_REV1(MD5_H, b_rev, c_rev, d_rev, a_rev, w[ 2], MD5C2f, MD5S23);
MD5_STEP_REV1(MD5_H, c_rev, d_rev, a_rev, b_rev, w[15], MD5C2e, MD5S22);
MD5_STEP_REV1(MD5_H_S, b_rev, c_rev, d_rev, a_rev, w[ 2], MD5C2f, MD5S23);
MD5_STEP_REV1(MD5_H_S, c_rev, d_rev, a_rev, b_rev, w[15], MD5C2e, MD5S22);
/**
* loop

View File

@ -132,7 +132,7 @@ __kernel void m00020_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
w3_t[2] = w3[2];
w3_t[3] = w3[3];
switch_buffer_by_offset (w0_t, w1_t, w2_t, w3_t, salt_len);
switch_buffer_by_offset_le (w0_t, w1_t, w2_t, w3_t, salt_len);
w0_t[0] |= salt_buf0[0];
w0_t[1] |= salt_buf0[1];
@ -366,7 +366,7 @@ __kernel void m00020_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
w3_t[2] = w3[2];
w3_t[3] = w3[3];
switch_buffer_by_offset (w0_t, w1_t, w2_t, w3_t, salt_len);
switch_buffer_by_offset_le (w0_t, w1_t, w2_t, w3_t, salt_len);
w0_t[0] |= salt_buf0[0];
w0_t[1] |= salt_buf0[1];

View File

@ -68,7 +68,7 @@ __kernel void m00020_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
if (combs_mode == COMBINATOR_MODE_BASE_RIGHT)
{
switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
switch_buffer_by_offset_le (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
}
/**
@ -125,7 +125,7 @@ __kernel void m00020_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
{
switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len);
switch_buffer_by_offset_le (wordr0, wordr1, wordr2, wordr3, pw_l_len);
}
u32 w0[4];
@ -178,7 +178,7 @@ __kernel void m00020_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
w3_t[2] = w3[2];
w3_t[3] = w3[3];
switch_buffer_by_offset (w0_t, w1_t, w2_t, w3_t, salt_len);
switch_buffer_by_offset_le (w0_t, w1_t, w2_t, w3_t, salt_len);
w0_t[0] |= salt_buf0[0];
w0_t[1] |= salt_buf0[1];
@ -338,7 +338,7 @@ __kernel void m00020_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
if (combs_mode == COMBINATOR_MODE_BASE_RIGHT)
{
switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
switch_buffer_by_offset_le (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
}
/**
@ -407,7 +407,7 @@ __kernel void m00020_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
{
switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len);
switch_buffer_by_offset_le (wordr0, wordr1, wordr2, wordr3, pw_l_len);
}
u32 w0[4];
@ -460,7 +460,7 @@ __kernel void m00020_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
w3_t[2] = w3[2];
w3_t[3] = w3[3];
switch_buffer_by_offset (w0_t, w1_t, w2_t, w3_t, salt_len);
switch_buffer_by_offset_le (w0_t, w1_t, w2_t, w3_t, salt_len);
w3_t[2] = pw_salt_len * 8;

View File

@ -65,36 +65,16 @@ static void m00020m (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_le
const u32 pw_salt_len = pw_len + salt_len;
/**
* loop
*/
u32 w0l = w0[0];
for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos += VECT_SIZE)
{
#if VECT_SIZE == 1
const u32x w0r = (u32x) (bfs_buf[il_pos + 0].i);
#elif VECT_SIZE == 2
const u32x w0r = (u32x) (bfs_buf[il_pos + 0].i, bfs_buf[il_pos + 1].i);
#elif VECT_SIZE == 4
const u32x w0r = (u32x) (bfs_buf[il_pos + 0].i, bfs_buf[il_pos + 1].i, bfs_buf[il_pos + 2].i, bfs_buf[il_pos + 3].i);
#elif VECT_SIZE == 8
const u32x w0r = (u32x) (bfs_buf[il_pos + 0].i, bfs_buf[il_pos + 1].i, bfs_buf[il_pos + 2].i, bfs_buf[il_pos + 3].i, bfs_buf[il_pos + 4].i, bfs_buf[il_pos + 5].i, bfs_buf[il_pos + 6].i, bfs_buf[il_pos + 7].i);
#endif
const u32x w0lr = w0l | w0r;
/**
* prepend salt
*/
u32x w0_t[4];
u32x w1_t[4];
u32x w2_t[4];
u32x w3_t[4];
u32 w0_t[4];
u32 w1_t[4];
u32 w2_t[4];
u32 w3_t[4];
w0_t[0] = w0lr;
w0_t[0] = w0[0];
w0_t[1] = w0[1];
w0_t[2] = w0[2];
w0_t[3] = w0[3];
@ -111,9 +91,7 @@ static void m00020m (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_le
w3_t[2] = w3[2];
w3_t[3] = w3[3];
switch_buffer_by_offset (w0_t, w1_t, w2_t, w3_t, salt_len);
w3_t[2] = pw_salt_len * 8;
switch_buffer_by_offset_le_S (w0_t, w1_t, w2_t, w3_t, salt_len);
w0_t[0] |= salt_buf0[0];
w0_t[1] |= salt_buf0[1];
@ -132,6 +110,61 @@ static void m00020m (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_le
w3_t[2] |= salt_buf3[2];
w3_t[3] |= salt_buf3[3];
/**
* loop
*/
u32 w0l = w0[0];
for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos += VECT_SIZE)
{
const u32x w0r = w0r_create_bft (bfs_buf, il_pos);
const u32x w0lr = w0l | w0r;
u32x wx[16];
wx[ 0] = w0_t[0];
wx[ 1] = w0_t[1];
wx[ 2] = w0_t[2];
wx[ 3] = w0_t[3];
wx[ 4] = w1_t[0];
wx[ 5] = w1_t[1];
wx[ 6] = w1_t[2];
wx[ 7] = w1_t[3];
wx[ 8] = w2_t[0];
wx[ 9] = w2_t[1];
wx[10] = w2_t[2];
wx[11] = w2_t[3];
wx[12] = w3_t[0];
wx[13] = w3_t[1];
wx[14] = w3_t[2];
wx[15] = w3_t[3];
overwrite_at_le (wx, w0lr, salt_len);
u32x w0_t[4];
u32x w1_t[4];
u32x w2_t[4];
u32x w3_t[4];
w0_t[0] = wx[ 0];
w0_t[1] = wx[ 1];
w0_t[2] = wx[ 2];
w0_t[3] = wx[ 3];
w1_t[0] = wx[ 4];
w1_t[1] = wx[ 5];
w1_t[2] = wx[ 6];
w1_t[3] = wx[ 7];
w2_t[0] = wx[ 8];
w2_t[1] = wx[ 9];
w2_t[2] = wx[10];
w2_t[3] = wx[11];
w3_t[0] = wx[12];
w3_t[1] = wx[13];
w3_t[2] = pw_salt_len * 8;
w3_t[3] = 0;
/**
* md5
*/
@ -272,36 +305,16 @@ static void m00020s (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_le
const u32 pw_salt_len = pw_len + salt_len;
/**
* loop
*/
u32 w0l = w0[0];
for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos += VECT_SIZE)
{
#if VECT_SIZE == 1
const u32x w0r = (u32x) (bfs_buf[il_pos + 0].i);
#elif VECT_SIZE == 2
const u32x w0r = (u32x) (bfs_buf[il_pos + 0].i, bfs_buf[il_pos + 1].i);
#elif VECT_SIZE == 4
const u32x w0r = (u32x) (bfs_buf[il_pos + 0].i, bfs_buf[il_pos + 1].i, bfs_buf[il_pos + 2].i, bfs_buf[il_pos + 3].i);
#elif VECT_SIZE == 8
const u32x w0r = (u32x) (bfs_buf[il_pos + 0].i, bfs_buf[il_pos + 1].i, bfs_buf[il_pos + 2].i, bfs_buf[il_pos + 3].i, bfs_buf[il_pos + 4].i, bfs_buf[il_pos + 5].i, bfs_buf[il_pos + 6].i, bfs_buf[il_pos + 7].i);
#endif
const u32x w0lr = w0l | w0r;
/**
* prepend salt
*/
u32x w0_t[4];
u32x w1_t[4];
u32x w2_t[4];
u32x w3_t[4];
u32 w0_t[4];
u32 w1_t[4];
u32 w2_t[4];
u32 w3_t[4];
w0_t[0] = w0lr;
w0_t[0] = w0[0];
w0_t[1] = w0[1];
w0_t[2] = w0[2];
w0_t[3] = w0[3];
@ -318,9 +331,7 @@ static void m00020s (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_le
w3_t[2] = w3[2];
w3_t[3] = w3[3];
switch_buffer_by_offset (w0_t, w1_t, w2_t, w3_t, salt_len);
w3_t[2] = pw_salt_len * 8;
switch_buffer_by_offset_le_S (w0_t, w1_t, w2_t, w3_t, salt_len);
w0_t[0] |= salt_buf0[0];
w0_t[1] |= salt_buf0[1];
@ -339,6 +350,61 @@ static void m00020s (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_le
w3_t[2] |= salt_buf3[2];
w3_t[3] |= salt_buf3[3];
/**
* loop
*/
u32 w0l = w0[0];
for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos += VECT_SIZE)
{
const u32x w0r = w0r_create_bft (bfs_buf, il_pos);
const u32x w0lr = w0l | w0r;
u32x wx[16];
wx[ 0] = w0_t[0];
wx[ 1] = w0_t[1];
wx[ 2] = w0_t[2];
wx[ 3] = w0_t[3];
wx[ 4] = w1_t[0];
wx[ 5] = w1_t[1];
wx[ 6] = w1_t[2];
wx[ 7] = w1_t[3];
wx[ 8] = w2_t[0];
wx[ 9] = w2_t[1];
wx[10] = w2_t[2];
wx[11] = w2_t[3];
wx[12] = w3_t[0];
wx[13] = w3_t[1];
wx[14] = w3_t[2];
wx[15] = w3_t[3];
overwrite_at_le (wx, w0lr, salt_len);
u32x w0_t[4];
u32x w1_t[4];
u32x w2_t[4];
u32x w3_t[4];
w0_t[0] = wx[ 0];
w0_t[1] = wx[ 1];
w0_t[2] = wx[ 2];
w0_t[3] = wx[ 3];
w1_t[0] = wx[ 4];
w1_t[1] = wx[ 5];
w1_t[2] = wx[ 6];
w1_t[3] = wx[ 7];
w2_t[0] = wx[ 8];
w2_t[1] = wx[ 9];
w2_t[2] = wx[10];
w2_t[3] = wx[11];
w3_t[0] = wx[12];
w3_t[1] = wx[13];
w3_t[2] = pw_salt_len * 8;
w3_t[3] = 0;
/**
* md5
*/

View File

@ -144,7 +144,7 @@ __kernel void m00030_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
s3[2] = 0;
s3[3] = 0;
switch_buffer_by_offset (s0, s1, s2, s3, (out_len * 2));
switch_buffer_by_offset_le (s0, s1, s2, s3, (out_len * 2));
const u32 out_salt_len = (out_len * 2) + salt_len;
@ -404,7 +404,7 @@ __kernel void m00030_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
s3[2] = 0;
s3[3] = 0;
switch_buffer_by_offset (s0, s1, s2, s3, (out_len * 2));
switch_buffer_by_offset_le (s0, s1, s2, s3, (out_len * 2));
const u32 out_salt_len = (out_len * 2) + salt_len;

View File

@ -70,7 +70,7 @@ __kernel void m00030_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
if (combs_mode == COMBINATOR_MODE_BASE_RIGHT)
{
switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
switch_buffer_by_offset_le (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
}
/**
@ -133,7 +133,7 @@ __kernel void m00030_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
{
switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len);
switch_buffer_by_offset_le (wordr0, wordr1, wordr2, wordr3, pw_l_len);
}
/**
@ -168,7 +168,7 @@ __kernel void m00030_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
s3[2] = 0;
s3[3] = 0;
switch_buffer_by_offset (s0, s1, s2, s3, (pw_len * 2));
switch_buffer_by_offset_le (s0, s1, s2, s3, (pw_len * 2));
const u32 pw_salt_len = (pw_len * 2) + salt_len;
@ -366,7 +366,7 @@ __kernel void m00030_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
if (combs_mode == COMBINATOR_MODE_BASE_RIGHT)
{
switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
switch_buffer_by_offset_le (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
}
/**
@ -441,7 +441,7 @@ __kernel void m00030_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
{
switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len);
switch_buffer_by_offset_le (wordr0, wordr1, wordr2, wordr3, pw_l_len);
}
/**
@ -476,7 +476,7 @@ __kernel void m00030_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
s3[2] = 0;
s3[3] = 0;
switch_buffer_by_offset (s0, s1, s2, s3, (pw_len * 2));
switch_buffer_by_offset_le (s0, s1, s2, s3, (pw_len * 2));
const u32 pw_salt_len = (pw_len * 2) + salt_len;

View File

@ -23,7 +23,7 @@
#define MD5_STEP_REV(f,a,b,c,d,x,t,s) \
{ \
a -= b; \
a = rotr32 (a, s); \
a = rotr32_S (a, s); \
a -= f (b, c, d); \
a -= x; \
a -= t; \
@ -32,7 +32,7 @@
#define MD5_STEP_REV1(f,a,b,c,d,x,t,s) \
{ \
a -= b; \
a = rotr32 (a, s); \
a = rotr32_S (a, s); \
a -= x; \
a -= t; \
}
@ -78,7 +78,7 @@ static void m00030m (u32 w[16], const u32 pw_len, __global pw_t *pws, __global k
salt_buf3[2] = 0;
salt_buf3[3] = 0;
switch_buffer_by_offset_S (salt_buf0, salt_buf1, salt_buf2, salt_buf3, pw_len);
switch_buffer_by_offset_le_S (salt_buf0, salt_buf1, salt_buf2, salt_buf3, pw_len);
w[ 0] |= salt_buf0[0];
w[ 1] |= salt_buf0[1];
@ -363,32 +363,32 @@ static void m00030s (u32 w[16], const u32 pw_len, __global pw_t *pws, __global k
* reverse
*/
u32x a_rev = digests_buf[digests_offset].digest_buf[0];
u32x b_rev = digests_buf[digests_offset].digest_buf[1];
u32x c_rev = digests_buf[digests_offset].digest_buf[2];
u32x d_rev = digests_buf[digests_offset].digest_buf[3];
u32 a_rev = digests_buf[digests_offset].digest_buf[0];
u32 b_rev = digests_buf[digests_offset].digest_buf[1];
u32 c_rev = digests_buf[digests_offset].digest_buf[2];
u32 d_rev = digests_buf[digests_offset].digest_buf[3];
MD5_STEP_REV (MD5_I, b_rev, c_rev, d_rev, a_rev, w[ 9], MD5C3f, MD5S33);
MD5_STEP_REV (MD5_I, c_rev, d_rev, a_rev, b_rev, w[ 2], MD5C3e, MD5S32);
MD5_STEP_REV (MD5_I, d_rev, a_rev, b_rev, c_rev, w[11], MD5C3d, MD5S31);
MD5_STEP_REV (MD5_I, a_rev, b_rev, c_rev, d_rev, w[ 4], MD5C3c, MD5S30);
MD5_STEP_REV (MD5_I, b_rev, c_rev, d_rev, a_rev, w[13], MD5C3b, MD5S33);
MD5_STEP_REV (MD5_I, c_rev, d_rev, a_rev, b_rev, w[ 6], MD5C3a, MD5S32);
MD5_STEP_REV (MD5_I, d_rev, a_rev, b_rev, c_rev, w[15], MD5C39, MD5S31);
MD5_STEP_REV (MD5_I, a_rev, b_rev, c_rev, d_rev, w[ 8], MD5C38, MD5S30);
MD5_STEP_REV (MD5_I, b_rev, c_rev, d_rev, a_rev, w[ 1], MD5C37, MD5S33);
MD5_STEP_REV (MD5_I, c_rev, d_rev, a_rev, b_rev, w[10], MD5C36, MD5S32);
MD5_STEP_REV (MD5_I, d_rev, a_rev, b_rev, c_rev, w[ 3], MD5C35, MD5S31);
MD5_STEP_REV (MD5_I, a_rev, b_rev, c_rev, d_rev, w[12], MD5C34, MD5S30);
MD5_STEP_REV (MD5_I, b_rev, c_rev, d_rev, a_rev, w[ 5], MD5C33, MD5S33);
MD5_STEP_REV (MD5_I, c_rev, d_rev, a_rev, b_rev, w[14], MD5C32, MD5S32);
MD5_STEP_REV (MD5_I, d_rev, a_rev, b_rev, c_rev, w[ 7], MD5C31, MD5S31);
MD5_STEP_REV (MD5_I, a_rev, b_rev, c_rev, d_rev, 0, MD5C30, MD5S30);
MD5_STEP_REV (MD5_I_S, b_rev, c_rev, d_rev, a_rev, w[ 9], MD5C3f, MD5S33);
MD5_STEP_REV (MD5_I_S, c_rev, d_rev, a_rev, b_rev, w[ 2], MD5C3e, MD5S32);
MD5_STEP_REV (MD5_I_S, d_rev, a_rev, b_rev, c_rev, w[11], MD5C3d, MD5S31);
MD5_STEP_REV (MD5_I_S, a_rev, b_rev, c_rev, d_rev, w[ 4], MD5C3c, MD5S30);
MD5_STEP_REV (MD5_I_S, b_rev, c_rev, d_rev, a_rev, w[13], MD5C3b, MD5S33);
MD5_STEP_REV (MD5_I_S, c_rev, d_rev, a_rev, b_rev, w[ 6], MD5C3a, MD5S32);
MD5_STEP_REV (MD5_I_S, d_rev, a_rev, b_rev, c_rev, w[15], MD5C39, MD5S31);
MD5_STEP_REV (MD5_I_S, a_rev, b_rev, c_rev, d_rev, w[ 8], MD5C38, MD5S30);
MD5_STEP_REV (MD5_I_S, b_rev, c_rev, d_rev, a_rev, w[ 1], MD5C37, MD5S33);
MD5_STEP_REV (MD5_I_S, c_rev, d_rev, a_rev, b_rev, w[10], MD5C36, MD5S32);
MD5_STEP_REV (MD5_I_S, d_rev, a_rev, b_rev, c_rev, w[ 3], MD5C35, MD5S31);
MD5_STEP_REV (MD5_I_S, a_rev, b_rev, c_rev, d_rev, w[12], MD5C34, MD5S30);
MD5_STEP_REV (MD5_I_S, b_rev, c_rev, d_rev, a_rev, w[ 5], MD5C33, MD5S33);
MD5_STEP_REV (MD5_I_S, c_rev, d_rev, a_rev, b_rev, w[14], MD5C32, MD5S32);
MD5_STEP_REV (MD5_I_S, d_rev, a_rev, b_rev, c_rev, w[ 7], MD5C31, MD5S31);
MD5_STEP_REV (MD5_I_S, a_rev, b_rev, c_rev, d_rev, 0, MD5C30, MD5S30);
const u32x pre_cd = c_rev ^ d_rev;
const u32 pre_cd = c_rev ^ d_rev;
MD5_STEP_REV1(MD5_H, b_rev, c_rev, d_rev, a_rev, w[ 2], MD5C2f, MD5S23);
MD5_STEP_REV1(MD5_H, c_rev, d_rev, a_rev, b_rev, w[15], MD5C2e, MD5S22);
MD5_STEP_REV1(MD5_H_S, b_rev, c_rev, d_rev, a_rev, w[ 2], MD5C2f, MD5S23);
MD5_STEP_REV1(MD5_H_S, c_rev, d_rev, a_rev, b_rev, w[15], MD5C2e, MD5S22);
/**
* loop

View File

@ -124,7 +124,7 @@ __kernel void m00040_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
make_unicode (w0, w0_t, w1_t);
make_unicode (w1, w2_t, w3_t);
switch_buffer_by_offset (w0_t, w1_t, w2_t, w3_t, salt_len);
switch_buffer_by_offset_le (w0_t, w1_t, w2_t, w3_t, salt_len);
w0_t[0] |= salt_buf0[0];
w0_t[1] |= salt_buf0[1];
@ -143,6 +143,8 @@ __kernel void m00040_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
* md5
*/
u32 tmp2;
u32 a = MD5M_A;
u32 b = MD5M_B;
u32 c = MD5M_C;
@ -182,22 +184,22 @@ __kernel void m00040_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
MD5_STEP (MD5_Go, c, d, a, b, w1_t[3], MD5C1e, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w3_t[0], MD5C1f, MD5S13);
MD5_STEP (MD5_H , a, b, c, d, w1_t[1], MD5C20, MD5S20);
MD5_STEP (MD5_H , d, a, b, c, w2_t[0], MD5C21, MD5S21);
MD5_STEP (MD5_H , c, d, a, b, w2_t[3], MD5C22, MD5S22);
MD5_STEP (MD5_H , b, c, d, a, w3_t[2], MD5C23, MD5S23);
MD5_STEP (MD5_H , a, b, c, d, w0_t[1], MD5C24, MD5S20);
MD5_STEP (MD5_H , d, a, b, c, w1_t[0], MD5C25, MD5S21);
MD5_STEP (MD5_H , c, d, a, b, w1_t[3], MD5C26, MD5S22);
MD5_STEP (MD5_H , b, c, d, a, w2_t[2], MD5C27, MD5S23);
MD5_STEP (MD5_H , a, b, c, d, w3_t[1], MD5C28, MD5S20);
MD5_STEP (MD5_H , d, a, b, c, w0_t[0], MD5C29, MD5S21);
MD5_STEP (MD5_H , c, d, a, b, w0_t[3], MD5C2a, MD5S22);
MD5_STEP (MD5_H , b, c, d, a, w1_t[2], MD5C2b, MD5S23);
MD5_STEP (MD5_H , a, b, c, d, w2_t[1], MD5C2c, MD5S20);
MD5_STEP (MD5_H , d, a, b, c, w3_t[0], MD5C2d, MD5S21);
MD5_STEP (MD5_H , c, d, a, b, w3_t[3], MD5C2e, MD5S22);
MD5_STEP (MD5_H , b, c, d, a, w0_t[2], MD5C2f, MD5S23);
MD5_STEP (MD5_H1, a, b, c, d, w1_t[1], MD5C20, MD5S20);
MD5_STEP (MD5_H2, d, a, b, c, w2_t[0], MD5C21, MD5S21);
MD5_STEP (MD5_H1, c, d, a, b, w2_t[3], MD5C22, MD5S22);
MD5_STEP (MD5_H2, b, c, d, a, w3_t[2], MD5C23, MD5S23);
MD5_STEP (MD5_H1, a, b, c, d, w0_t[1], MD5C24, MD5S20);
MD5_STEP (MD5_H2, d, a, b, c, w1_t[0], MD5C25, MD5S21);
MD5_STEP (MD5_H1, c, d, a, b, w1_t[3], MD5C26, MD5S22);
MD5_STEP (MD5_H2, b, c, d, a, w2_t[2], MD5C27, MD5S23);
MD5_STEP (MD5_H1, a, b, c, d, w3_t[1], MD5C28, MD5S20);
MD5_STEP (MD5_H2, d, a, b, c, w0_t[0], MD5C29, MD5S21);
MD5_STEP (MD5_H1, c, d, a, b, w0_t[3], MD5C2a, MD5S22);
MD5_STEP (MD5_H2, b, c, d, a, w1_t[2], MD5C2b, MD5S23);
MD5_STEP (MD5_H1, a, b, c, d, w2_t[1], MD5C2c, MD5S20);
MD5_STEP (MD5_H2, d, a, b, c, w3_t[0], MD5C2d, MD5S21);
MD5_STEP (MD5_H1, c, d, a, b, w3_t[3], MD5C2e, MD5S22);
MD5_STEP (MD5_H2, b, c, d, a, w0_t[2], MD5C2f, MD5S23);
MD5_STEP (MD5_I , a, b, c, d, w0_t[0], MD5C30, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, w1_t[3], MD5C31, MD5S31);
@ -348,7 +350,7 @@ __kernel void m00040_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
make_unicode (w0, w0_t, w1_t);
make_unicode (w1, w2_t, w3_t);
switch_buffer_by_offset (w0_t, w1_t, w2_t, w3_t, salt_len);
switch_buffer_by_offset_le (w0_t, w1_t, w2_t, w3_t, salt_len);
w0_t[0] |= salt_buf0[0];
w0_t[1] |= salt_buf0[1];
@ -367,6 +369,8 @@ __kernel void m00040_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
* md5
*/
u32 tmp2;
u32 a = MD5M_A;
u32 b = MD5M_B;
u32 c = MD5M_C;
@ -406,22 +410,22 @@ __kernel void m00040_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
MD5_STEP (MD5_Go, c, d, a, b, w1_t[3], MD5C1e, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w3_t[0], MD5C1f, MD5S13);
MD5_STEP (MD5_H , a, b, c, d, w1_t[1], MD5C20, MD5S20);
MD5_STEP (MD5_H , d, a, b, c, w2_t[0], MD5C21, MD5S21);
MD5_STEP (MD5_H , c, d, a, b, w2_t[3], MD5C22, MD5S22);
MD5_STEP (MD5_H , b, c, d, a, w3_t[2], MD5C23, MD5S23);
MD5_STEP (MD5_H , a, b, c, d, w0_t[1], MD5C24, MD5S20);
MD5_STEP (MD5_H , d, a, b, c, w1_t[0], MD5C25, MD5S21);
MD5_STEP (MD5_H , c, d, a, b, w1_t[3], MD5C26, MD5S22);
MD5_STEP (MD5_H , b, c, d, a, w2_t[2], MD5C27, MD5S23);
MD5_STEP (MD5_H , a, b, c, d, w3_t[1], MD5C28, MD5S20);
MD5_STEP (MD5_H , d, a, b, c, w0_t[0], MD5C29, MD5S21);
MD5_STEP (MD5_H , c, d, a, b, w0_t[3], MD5C2a, MD5S22);
MD5_STEP (MD5_H , b, c, d, a, w1_t[2], MD5C2b, MD5S23);
MD5_STEP (MD5_H , a, b, c, d, w2_t[1], MD5C2c, MD5S20);
MD5_STEP (MD5_H , d, a, b, c, w3_t[0], MD5C2d, MD5S21);
MD5_STEP (MD5_H , c, d, a, b, w3_t[3], MD5C2e, MD5S22);
MD5_STEP (MD5_H , b, c, d, a, w0_t[2], MD5C2f, MD5S23);
MD5_STEP (MD5_H1, a, b, c, d, w1_t[1], MD5C20, MD5S20);
MD5_STEP (MD5_H2, d, a, b, c, w2_t[0], MD5C21, MD5S21);
MD5_STEP (MD5_H1, c, d, a, b, w2_t[3], MD5C22, MD5S22);
MD5_STEP (MD5_H2, b, c, d, a, w3_t[2], MD5C23, MD5S23);
MD5_STEP (MD5_H1, a, b, c, d, w0_t[1], MD5C24, MD5S20);
MD5_STEP (MD5_H2, d, a, b, c, w1_t[0], MD5C25, MD5S21);
MD5_STEP (MD5_H1, c, d, a, b, w1_t[3], MD5C26, MD5S22);
MD5_STEP (MD5_H2, b, c, d, a, w2_t[2], MD5C27, MD5S23);
MD5_STEP (MD5_H1, a, b, c, d, w3_t[1], MD5C28, MD5S20);
MD5_STEP (MD5_H2, d, a, b, c, w0_t[0], MD5C29, MD5S21);
MD5_STEP (MD5_H1, c, d, a, b, w0_t[3], MD5C2a, MD5S22);
MD5_STEP (MD5_H2, b, c, d, a, w1_t[2], MD5C2b, MD5S23);
MD5_STEP (MD5_H1, a, b, c, d, w2_t[1], MD5C2c, MD5S20);
MD5_STEP (MD5_H2, d, a, b, c, w3_t[0], MD5C2d, MD5S21);
MD5_STEP (MD5_H1, c, d, a, b, w3_t[3], MD5C2e, MD5S22);
MD5_STEP (MD5_H2, b, c, d, a, w0_t[2], MD5C2f, MD5S23);
MD5_STEP (MD5_I , a, b, c, d, w0_t[0], MD5C30, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, w1_t[3], MD5C31, MD5S31);

View File

@ -70,7 +70,7 @@ __kernel void m00040_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
if (combs_mode == COMBINATOR_MODE_BASE_RIGHT)
{
switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
switch_buffer_by_offset_le (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
}
/**
@ -133,7 +133,7 @@ __kernel void m00040_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
{
switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len);
switch_buffer_by_offset_le (wordr0, wordr1, wordr2, wordr3, pw_l_len);
}
/**
@ -172,7 +172,7 @@ __kernel void m00040_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
make_unicode (w0, w0_t, w1_t);
make_unicode (w1, w2_t, w3_t);
switch_buffer_by_offset (w0_t, w1_t, w2_t, w3_t, salt_len);
switch_buffer_by_offset_le (w0_t, w1_t, w2_t, w3_t, salt_len);
w0_t[0] |= salt_buf0[0];
w0_t[1] |= salt_buf0[1];
@ -191,6 +191,8 @@ __kernel void m00040_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
* md5
*/
u32 tmp2;
u32 a = MD5M_A;
u32 b = MD5M_B;
u32 c = MD5M_C;
@ -230,22 +232,22 @@ __kernel void m00040_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
MD5_STEP (MD5_Go, c, d, a, b, w1_t[3], MD5C1e, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w3_t[0], MD5C1f, MD5S13);
MD5_STEP (MD5_H , a, b, c, d, w1_t[1], MD5C20, MD5S20);
MD5_STEP (MD5_H , d, a, b, c, w2_t[0], MD5C21, MD5S21);
MD5_STEP (MD5_H , c, d, a, b, w2_t[3], MD5C22, MD5S22);
MD5_STEP (MD5_H , b, c, d, a, w3_t[2], MD5C23, MD5S23);
MD5_STEP (MD5_H , a, b, c, d, w0_t[1], MD5C24, MD5S20);
MD5_STEP (MD5_H , d, a, b, c, w1_t[0], MD5C25, MD5S21);
MD5_STEP (MD5_H , c, d, a, b, w1_t[3], MD5C26, MD5S22);
MD5_STEP (MD5_H , b, c, d, a, w2_t[2], MD5C27, MD5S23);
MD5_STEP (MD5_H , a, b, c, d, w3_t[1], MD5C28, MD5S20);
MD5_STEP (MD5_H , d, a, b, c, w0_t[0], MD5C29, MD5S21);
MD5_STEP (MD5_H , c, d, a, b, w0_t[3], MD5C2a, MD5S22);
MD5_STEP (MD5_H , b, c, d, a, w1_t[2], MD5C2b, MD5S23);
MD5_STEP (MD5_H , a, b, c, d, w2_t[1], MD5C2c, MD5S20);
MD5_STEP (MD5_H , d, a, b, c, w3_t[0], MD5C2d, MD5S21);
MD5_STEP (MD5_H , c, d, a, b, w3_t[3], MD5C2e, MD5S22);
MD5_STEP (MD5_H , b, c, d, a, w0_t[2], MD5C2f, MD5S23);
MD5_STEP (MD5_H1, a, b, c, d, w1_t[1], MD5C20, MD5S20);
MD5_STEP (MD5_H2, d, a, b, c, w2_t[0], MD5C21, MD5S21);
MD5_STEP (MD5_H1, c, d, a, b, w2_t[3], MD5C22, MD5S22);
MD5_STEP (MD5_H2, b, c, d, a, w3_t[2], MD5C23, MD5S23);
MD5_STEP (MD5_H1, a, b, c, d, w0_t[1], MD5C24, MD5S20);
MD5_STEP (MD5_H2, d, a, b, c, w1_t[0], MD5C25, MD5S21);
MD5_STEP (MD5_H1, c, d, a, b, w1_t[3], MD5C26, MD5S22);
MD5_STEP (MD5_H2, b, c, d, a, w2_t[2], MD5C27, MD5S23);
MD5_STEP (MD5_H1, a, b, c, d, w3_t[1], MD5C28, MD5S20);
MD5_STEP (MD5_H2, d, a, b, c, w0_t[0], MD5C29, MD5S21);
MD5_STEP (MD5_H1, c, d, a, b, w0_t[3], MD5C2a, MD5S22);
MD5_STEP (MD5_H2, b, c, d, a, w1_t[2], MD5C2b, MD5S23);
MD5_STEP (MD5_H1, a, b, c, d, w2_t[1], MD5C2c, MD5S20);
MD5_STEP (MD5_H2, d, a, b, c, w3_t[0], MD5C2d, MD5S21);
MD5_STEP (MD5_H1, c, d, a, b, w3_t[3], MD5C2e, MD5S22);
MD5_STEP (MD5_H2, b, c, d, a, w0_t[2], MD5C2f, MD5S23);
MD5_STEP (MD5_I , a, b, c, d, w0_t[0], MD5C30, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, w1_t[3], MD5C31, MD5S31);
@ -330,7 +332,7 @@ __kernel void m00040_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
if (combs_mode == COMBINATOR_MODE_BASE_RIGHT)
{
switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
switch_buffer_by_offset_le (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
}
/**
@ -405,7 +407,7 @@ __kernel void m00040_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
{
switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len);
switch_buffer_by_offset_le (wordr0, wordr1, wordr2, wordr3, pw_l_len);
}
/**
@ -444,7 +446,7 @@ __kernel void m00040_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
make_unicode (w0, w0_t, w1_t);
make_unicode (w1, w2_t, w3_t);
switch_buffer_by_offset (w0_t, w1_t, w2_t, w3_t, salt_len);
switch_buffer_by_offset_le (w0_t, w1_t, w2_t, w3_t, salt_len);
w0_t[0] |= salt_buf0[0];
w0_t[1] |= salt_buf0[1];
@ -463,6 +465,8 @@ __kernel void m00040_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
* md5
*/
u32 tmp2;
u32 a = MD5M_A;
u32 b = MD5M_B;
u32 c = MD5M_C;
@ -502,22 +506,22 @@ __kernel void m00040_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
MD5_STEP (MD5_Go, c, d, a, b, w1_t[3], MD5C1e, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w3_t[0], MD5C1f, MD5S13);
MD5_STEP (MD5_H , a, b, c, d, w1_t[1], MD5C20, MD5S20);
MD5_STEP (MD5_H , d, a, b, c, w2_t[0], MD5C21, MD5S21);
MD5_STEP (MD5_H , c, d, a, b, w2_t[3], MD5C22, MD5S22);
MD5_STEP (MD5_H , b, c, d, a, w3_t[2], MD5C23, MD5S23);
MD5_STEP (MD5_H , a, b, c, d, w0_t[1], MD5C24, MD5S20);
MD5_STEP (MD5_H , d, a, b, c, w1_t[0], MD5C25, MD5S21);
MD5_STEP (MD5_H , c, d, a, b, w1_t[3], MD5C26, MD5S22);
MD5_STEP (MD5_H , b, c, d, a, w2_t[2], MD5C27, MD5S23);
MD5_STEP (MD5_H , a, b, c, d, w3_t[1], MD5C28, MD5S20);
MD5_STEP (MD5_H , d, a, b, c, w0_t[0], MD5C29, MD5S21);
MD5_STEP (MD5_H , c, d, a, b, w0_t[3], MD5C2a, MD5S22);
MD5_STEP (MD5_H , b, c, d, a, w1_t[2], MD5C2b, MD5S23);
MD5_STEP (MD5_H , a, b, c, d, w2_t[1], MD5C2c, MD5S20);
MD5_STEP (MD5_H , d, a, b, c, w3_t[0], MD5C2d, MD5S21);
MD5_STEP (MD5_H , c, d, a, b, w3_t[3], MD5C2e, MD5S22);
MD5_STEP (MD5_H , b, c, d, a, w0_t[2], MD5C2f, MD5S23);
MD5_STEP (MD5_H1, a, b, c, d, w1_t[1], MD5C20, MD5S20);
MD5_STEP (MD5_H2, d, a, b, c, w2_t[0], MD5C21, MD5S21);
MD5_STEP (MD5_H1, c, d, a, b, w2_t[3], MD5C22, MD5S22);
MD5_STEP (MD5_H2, b, c, d, a, w3_t[2], MD5C23, MD5S23);
MD5_STEP (MD5_H1, a, b, c, d, w0_t[1], MD5C24, MD5S20);
MD5_STEP (MD5_H2, d, a, b, c, w1_t[0], MD5C25, MD5S21);
MD5_STEP (MD5_H1, c, d, a, b, w1_t[3], MD5C26, MD5S22);
MD5_STEP (MD5_H2, b, c, d, a, w2_t[2], MD5C27, MD5S23);
MD5_STEP (MD5_H1, a, b, c, d, w3_t[1], MD5C28, MD5S20);
MD5_STEP (MD5_H2, d, a, b, c, w0_t[0], MD5C29, MD5S21);
MD5_STEP (MD5_H1, c, d, a, b, w0_t[3], MD5C2a, MD5S22);
MD5_STEP (MD5_H2, b, c, d, a, w1_t[2], MD5C2b, MD5S23);
MD5_STEP (MD5_H1, a, b, c, d, w2_t[1], MD5C2c, MD5S20);
MD5_STEP (MD5_H2, d, a, b, c, w3_t[0], MD5C2d, MD5S21);
MD5_STEP (MD5_H1, c, d, a, b, w3_t[3], MD5C2e, MD5S22);
MD5_STEP (MD5_H2, b, c, d, a, w0_t[2], MD5C2f, MD5S23);
MD5_STEP (MD5_I , a, b, c, d, w0_t[0], MD5C30, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, w1_t[3], MD5C31, MD5S31);

View File

@ -5,6 +5,8 @@
#define _MD5_
#define NEW_SIMD_CODE
#include "include/constants.h"
#include "include/kernel_vendor.h"
@ -16,9 +18,7 @@
#include "include/kernel_functions.c"
#include "OpenCL/types_ocl.c"
#include "OpenCL/common.c"
#define COMPARE_S "OpenCL/check_single_comp4.c"
#define COMPARE_M "OpenCL/check_multi_comp4.c"
#include "OpenCL/simd.c"
static void m00040m (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_len, __global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset)
{
@ -65,18 +65,6 @@ static void m00040m (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_le
const u32 pw_salt_len = pw_len + salt_len;
/**
* loop
*/
u32 w0l = w0[0];
for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++)
{
const u32 w0r = bfs_buf[il_pos].i;
w0[0] = w0l | w0r;
/**
* prepend salt
*/
@ -103,9 +91,7 @@ static void m00040m (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_le
w3_t[2] = w3[2];
w3_t[3] = w3[3];
switch_buffer_by_offset (w0_t, w1_t, w2_t, w3_t, salt_len);
w3_t[2] = pw_salt_len * 8;
switch_buffer_by_offset_le_S (w0_t, w1_t, w2_t, w3_t, salt_len);
w0_t[0] |= salt_buf0[0];
w0_t[1] |= salt_buf0[1];
@ -124,14 +110,71 @@ static void m00040m (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_le
w3_t[2] |= salt_buf3[2];
w3_t[3] |= salt_buf3[3];
/**
* loop
*/
u32 w0l = w0[0];
for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos += VECT_SIZE)
{
const u32x w0r = w0r_create_bft (bfs_buf, il_pos);
const u32x w0lr = w0l | w0r;
u32x wx[16];
wx[ 0] = w0_t[0];
wx[ 1] = w0_t[1];
wx[ 2] = w0_t[2];
wx[ 3] = w0_t[3];
wx[ 4] = w1_t[0];
wx[ 5] = w1_t[1];
wx[ 6] = w1_t[2];
wx[ 7] = w1_t[3];
wx[ 8] = w2_t[0];
wx[ 9] = w2_t[1];
wx[10] = w2_t[2];
wx[11] = w2_t[3];
wx[12] = w3_t[0];
wx[13] = w3_t[1];
wx[14] = w3_t[2];
wx[15] = w3_t[3];
overwrite_at_le (wx, w0lr, salt_len);
u32x w0_t[4];
u32x w1_t[4];
u32x w2_t[4];
u32x w3_t[4];
w0_t[0] = wx[ 0];
w0_t[1] = wx[ 1];
w0_t[2] = wx[ 2];
w0_t[3] = wx[ 3];
w1_t[0] = wx[ 4];
w1_t[1] = wx[ 5];
w1_t[2] = wx[ 6];
w1_t[3] = wx[ 7];
w2_t[0] = wx[ 8];
w2_t[1] = wx[ 9];
w2_t[2] = wx[10];
w2_t[3] = wx[11];
w3_t[0] = wx[12];
w3_t[1] = wx[13];
w3_t[2] = pw_salt_len * 8;
w3_t[3] = 0;
/**
* md5
*/
u32 a = MD5M_A;
u32 b = MD5M_B;
u32 c = MD5M_C;
u32 d = MD5M_D;
u32x tmp2;
u32x a = MD5M_A;
u32x b = MD5M_B;
u32x c = MD5M_C;
u32x d = MD5M_D;
MD5_STEP (MD5_Fo, a, b, c, d, w0_t[0], MD5C00, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w0_t[1], MD5C01, MD5S01);
@ -167,22 +210,22 @@ static void m00040m (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_le
MD5_STEP (MD5_Go, c, d, a, b, w1_t[3], MD5C1e, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w3_t[0], MD5C1f, MD5S13);
MD5_STEP (MD5_H , a, b, c, d, w1_t[1], MD5C20, MD5S20);
MD5_STEP (MD5_H , d, a, b, c, w2_t[0], MD5C21, MD5S21);
MD5_STEP (MD5_H , c, d, a, b, w2_t[3], MD5C22, MD5S22);
MD5_STEP (MD5_H , b, c, d, a, w3_t[2], MD5C23, MD5S23);
MD5_STEP (MD5_H , a, b, c, d, w0_t[1], MD5C24, MD5S20);
MD5_STEP (MD5_H , d, a, b, c, w1_t[0], MD5C25, MD5S21);
MD5_STEP (MD5_H , c, d, a, b, w1_t[3], MD5C26, MD5S22);
MD5_STEP (MD5_H , b, c, d, a, w2_t[2], MD5C27, MD5S23);
MD5_STEP (MD5_H , a, b, c, d, w3_t[1], MD5C28, MD5S20);
MD5_STEP (MD5_H , d, a, b, c, w0_t[0], MD5C29, MD5S21);
MD5_STEP (MD5_H , c, d, a, b, w0_t[3], MD5C2a, MD5S22);
MD5_STEP (MD5_H , b, c, d, a, w1_t[2], MD5C2b, MD5S23);
MD5_STEP (MD5_H , a, b, c, d, w2_t[1], MD5C2c, MD5S20);
MD5_STEP (MD5_H , d, a, b, c, w3_t[0], MD5C2d, MD5S21);
MD5_STEP (MD5_H , c, d, a, b, w3_t[3], MD5C2e, MD5S22);
MD5_STEP (MD5_H , b, c, d, a, w0_t[2], MD5C2f, MD5S23);
MD5_STEP (MD5_H1, a, b, c, d, w1_t[1], MD5C20, MD5S20);
MD5_STEP (MD5_H2, d, a, b, c, w2_t[0], MD5C21, MD5S21);
MD5_STEP (MD5_H1, c, d, a, b, w2_t[3], MD5C22, MD5S22);
MD5_STEP (MD5_H2, b, c, d, a, w3_t[2], MD5C23, MD5S23);
MD5_STEP (MD5_H1, a, b, c, d, w0_t[1], MD5C24, MD5S20);
MD5_STEP (MD5_H2, d, a, b, c, w1_t[0], MD5C25, MD5S21);
MD5_STEP (MD5_H1, c, d, a, b, w1_t[3], MD5C26, MD5S22);
MD5_STEP (MD5_H2, b, c, d, a, w2_t[2], MD5C27, MD5S23);
MD5_STEP (MD5_H1, a, b, c, d, w3_t[1], MD5C28, MD5S20);
MD5_STEP (MD5_H2, d, a, b, c, w0_t[0], MD5C29, MD5S21);
MD5_STEP (MD5_H1, c, d, a, b, w0_t[3], MD5C2a, MD5S22);
MD5_STEP (MD5_H2, b, c, d, a, w1_t[2], MD5C2b, MD5S23);
MD5_STEP (MD5_H1, a, b, c, d, w2_t[1], MD5C2c, MD5S20);
MD5_STEP (MD5_H2, d, a, b, c, w3_t[0], MD5C2d, MD5S21);
MD5_STEP (MD5_H1, c, d, a, b, w3_t[3], MD5C2e, MD5S22);
MD5_STEP (MD5_H2, b, c, d, a, w0_t[2], MD5C2f, MD5S23);
MD5_STEP (MD5_I , a, b, c, d, w0_t[0], MD5C30, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, w1_t[3], MD5C31, MD5S31);
@ -201,13 +244,7 @@ static void m00040m (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_le
MD5_STEP (MD5_I , c, d, a, b, w0_t[2], MD5C3e, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w2_t[1], MD5C3f, MD5S33);
const u32 r0 = a;
const u32 r1 = d;
const u32 r2 = c;
const u32 r3 = b;
#include COMPARE_M
COMPARE_M_SIMD (a, d, c, b);
}
}
@ -268,18 +305,6 @@ static void m00040s (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_le
const u32 pw_salt_len = pw_len + salt_len;
/**
* loop
*/
u32 w0l = w0[0];
for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++)
{
const u32 w0r = bfs_buf[il_pos].i;
w0[0] = w0l | w0r;
/**
* prepend salt
*/
@ -306,9 +331,7 @@ static void m00040s (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_le
w3_t[2] = w3[2];
w3_t[3] = w3[3];
switch_buffer_by_offset (w0_t, w1_t, w2_t, w3_t, salt_len);
w3_t[2] = pw_salt_len * 8;
switch_buffer_by_offset_le_S (w0_t, w1_t, w2_t, w3_t, salt_len);
w0_t[0] |= salt_buf0[0];
w0_t[1] |= salt_buf0[1];
@ -327,14 +350,71 @@ static void m00040s (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_le
w3_t[2] |= salt_buf3[2];
w3_t[3] |= salt_buf3[3];
/**
* loop
*/
u32 w0l = w0[0];
for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos += VECT_SIZE)
{
const u32x w0r = w0r_create_bft (bfs_buf, il_pos);
const u32x w0lr = w0l | w0r;
u32x wx[16];
wx[ 0] = w0_t[0];
wx[ 1] = w0_t[1];
wx[ 2] = w0_t[2];
wx[ 3] = w0_t[3];
wx[ 4] = w1_t[0];
wx[ 5] = w1_t[1];
wx[ 6] = w1_t[2];
wx[ 7] = w1_t[3];
wx[ 8] = w2_t[0];
wx[ 9] = w2_t[1];
wx[10] = w2_t[2];
wx[11] = w2_t[3];
wx[12] = w3_t[0];
wx[13] = w3_t[1];
wx[14] = w3_t[2];
wx[15] = w3_t[3];
overwrite_at_le (wx, w0lr, salt_len);
u32x w0_t[4];
u32x w1_t[4];
u32x w2_t[4];
u32x w3_t[4];
w0_t[0] = wx[ 0];
w0_t[1] = wx[ 1];
w0_t[2] = wx[ 2];
w0_t[3] = wx[ 3];
w1_t[0] = wx[ 4];
w1_t[1] = wx[ 5];
w1_t[2] = wx[ 6];
w1_t[3] = wx[ 7];
w2_t[0] = wx[ 8];
w2_t[1] = wx[ 9];
w2_t[2] = wx[10];
w2_t[3] = wx[11];
w3_t[0] = wx[12];
w3_t[1] = wx[13];
w3_t[2] = pw_salt_len * 8;
w3_t[3] = 0;
/**
* md5
*/
u32 a = MD5M_A;
u32 b = MD5M_B;
u32 c = MD5M_C;
u32 d = MD5M_D;
u32x tmp2;
u32x a = MD5M_A;
u32x b = MD5M_B;
u32x c = MD5M_C;
u32x d = MD5M_D;
MD5_STEP (MD5_Fo, a, b, c, d, w0_t[0], MD5C00, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w0_t[1], MD5C01, MD5S01);
@ -370,22 +450,22 @@ static void m00040s (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_le
MD5_STEP (MD5_Go, c, d, a, b, w1_t[3], MD5C1e, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w3_t[0], MD5C1f, MD5S13);
MD5_STEP (MD5_H , a, b, c, d, w1_t[1], MD5C20, MD5S20);
MD5_STEP (MD5_H , d, a, b, c, w2_t[0], MD5C21, MD5S21);
MD5_STEP (MD5_H , c, d, a, b, w2_t[3], MD5C22, MD5S22);
MD5_STEP (MD5_H , b, c, d, a, w3_t[2], MD5C23, MD5S23);
MD5_STEP (MD5_H , a, b, c, d, w0_t[1], MD5C24, MD5S20);
MD5_STEP (MD5_H , d, a, b, c, w1_t[0], MD5C25, MD5S21);
MD5_STEP (MD5_H , c, d, a, b, w1_t[3], MD5C26, MD5S22);
MD5_STEP (MD5_H , b, c, d, a, w2_t[2], MD5C27, MD5S23);
MD5_STEP (MD5_H , a, b, c, d, w3_t[1], MD5C28, MD5S20);
MD5_STEP (MD5_H , d, a, b, c, w0_t[0], MD5C29, MD5S21);
MD5_STEP (MD5_H , c, d, a, b, w0_t[3], MD5C2a, MD5S22);
MD5_STEP (MD5_H , b, c, d, a, w1_t[2], MD5C2b, MD5S23);
MD5_STEP (MD5_H , a, b, c, d, w2_t[1], MD5C2c, MD5S20);
MD5_STEP (MD5_H , d, a, b, c, w3_t[0], MD5C2d, MD5S21);
MD5_STEP (MD5_H , c, d, a, b, w3_t[3], MD5C2e, MD5S22);
MD5_STEP (MD5_H , b, c, d, a, w0_t[2], MD5C2f, MD5S23);
MD5_STEP (MD5_H1, a, b, c, d, w1_t[1], MD5C20, MD5S20);
MD5_STEP (MD5_H2, d, a, b, c, w2_t[0], MD5C21, MD5S21);
MD5_STEP (MD5_H1, c, d, a, b, w2_t[3], MD5C22, MD5S22);
MD5_STEP (MD5_H2, b, c, d, a, w3_t[2], MD5C23, MD5S23);
MD5_STEP (MD5_H1, a, b, c, d, w0_t[1], MD5C24, MD5S20);
MD5_STEP (MD5_H2, d, a, b, c, w1_t[0], MD5C25, MD5S21);
MD5_STEP (MD5_H1, c, d, a, b, w1_t[3], MD5C26, MD5S22);
MD5_STEP (MD5_H2, b, c, d, a, w2_t[2], MD5C27, MD5S23);
MD5_STEP (MD5_H1, a, b, c, d, w3_t[1], MD5C28, MD5S20);
MD5_STEP (MD5_H2, d, a, b, c, w0_t[0], MD5C29, MD5S21);
MD5_STEP (MD5_H1, c, d, a, b, w0_t[3], MD5C2a, MD5S22);
MD5_STEP (MD5_H2, b, c, d, a, w1_t[2], MD5C2b, MD5S23);
MD5_STEP (MD5_H1, a, b, c, d, w2_t[1], MD5C2c, MD5S20);
MD5_STEP (MD5_H2, d, a, b, c, w3_t[0], MD5C2d, MD5S21);
MD5_STEP (MD5_H1, c, d, a, b, w3_t[3], MD5C2e, MD5S22);
MD5_STEP (MD5_H2, b, c, d, a, w0_t[2], MD5C2f, MD5S23);
MD5_STEP (MD5_I , a, b, c, d, w0_t[0], MD5C30, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, w1_t[3], MD5C31, MD5S31);
@ -401,21 +481,13 @@ static void m00040s (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_le
MD5_STEP (MD5_I , b, c, d, a, w3_t[1], MD5C3b, MD5S33);
MD5_STEP (MD5_I , a, b, c, d, w1_t[0], MD5C3c, MD5S30);
bool q_cond = allx (search[0] != a);
if (q_cond) continue;
if (MATCHES_NONE_VS (a, search[0])) continue;
MD5_STEP (MD5_I , d, a, b, c, w2_t[3], MD5C3d, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, w0_t[2], MD5C3e, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w2_t[1], MD5C3f, MD5S33);
const u32 r0 = a;
const u32 r1 = d;
const u32 r2 = c;
const u32 r3 = b;
#include COMPARE_S
COMPARE_S_SIMD (a, d, c, b);
}
}

View File

@ -253,7 +253,7 @@ __kernel void m00050_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
if (combs_mode == COMBINATOR_MODE_BASE_RIGHT)
{
switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
switch_buffer_by_offset_le (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
}
/**
@ -330,7 +330,7 @@ __kernel void m00050_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
{
switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len);
switch_buffer_by_offset_le (wordr0, wordr1, wordr2, wordr3, pw_l_len);
}
u32 w0[4];
@ -484,7 +484,7 @@ __kernel void m00050_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
if (combs_mode == COMBINATOR_MODE_BASE_RIGHT)
{
switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
switch_buffer_by_offset_le (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
}
/**
@ -573,7 +573,7 @@ __kernel void m00050_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
{
switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len);
switch_buffer_by_offset_le (wordr0, wordr1, wordr2, wordr3, pw_l_len);
}
u32 w0[4];

View File

@ -5,6 +5,8 @@
#define _MD5_
#define NEW_SIMD_CODE
#include "include/constants.h"
#include "include/kernel_vendor.h"
@ -16,35 +18,33 @@
#include "include/kernel_functions.c"
#include "OpenCL/types_ocl.c"
#include "OpenCL/common.c"
#include "OpenCL/simd.c"
#define COMPARE_S "OpenCL/check_single_comp4.c"
#define COMPARE_M "OpenCL/check_multi_comp4.c"
static void md5_transform (const u32 w0[4], const u32 w1[4], const u32 w2[4], const u32 w3[4], u32 digest[4])
static void md5_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u32x digest[4])
{
u32 a = digest[0];
u32 b = digest[1];
u32 c = digest[2];
u32 d = digest[3];
u32x a = digest[0];
u32x b = digest[1];
u32x c = digest[2];
u32x d = digest[3];
u32 w0_t = w0[0];
u32 w1_t = w0[1];
u32 w2_t = w0[2];
u32 w3_t = w0[3];
u32 w4_t = w1[0];
u32 w5_t = w1[1];
u32 w6_t = w1[2];
u32 w7_t = w1[3];
u32 w8_t = w2[0];
u32 w9_t = w2[1];
u32 wa_t = w2[2];
u32 wb_t = w2[3];
u32 wc_t = w3[0];
u32 wd_t = w3[1];
u32 we_t = w3[2];
u32 wf_t = w3[3];
u32x w0_t = w0[0];
u32x w1_t = w0[1];
u32x w2_t = w0[2];
u32x w3_t = w0[3];
u32x w4_t = w1[0];
u32x w5_t = w1[1];
u32x w6_t = w1[2];
u32x w7_t = w1[3];
u32x w8_t = w2[0];
u32x w9_t = w2[1];
u32x wa_t = w2[2];
u32x wb_t = w2[3];
u32x wc_t = w3[0];
u32x wd_t = w3[1];
u32x we_t = w3[2];
u32x wf_t = w3[3];
u32 tmp2;
u32x tmp2;
MD5_STEP (MD5_Fo, a, b, c, d, w0_t, MD5C00, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w1_t, MD5C01, MD5S01);
@ -120,7 +120,7 @@ static void md5_transform (const u32 w0[4], const u32 w1[4], const u32 w2[4], co
digest[3] += d;
}
static void hmac_md5_pad (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], u32 ipad[4], u32 opad[4])
static void hmac_md5_pad (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x ipad[4], u32x opad[4])
{
w0[0] = w0[0] ^ 0x36363636;
w0[1] = w0[1] ^ 0x36363636;
@ -171,7 +171,7 @@ static void hmac_md5_pad (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], u32 ipad[4
md5_transform (w0, w1, w2, w3, opad);
}
static void hmac_md5_run (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], u32 ipad[4], u32 opad[4], u32 digest[4])
static void hmac_md5_run (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x ipad[4], u32x opad[4], u32x digest[4])
{
digest[0] = ipad[0];
digest[1] = ipad[1];
@ -254,46 +254,46 @@ static void m00050m (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_le
u32 w0l = w0[0];
for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++)
for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos += VECT_SIZE)
{
const u32 w0r = bfs_buf[il_pos].i;
const u32x w0r = w0r_create_bft (bfs_buf, il_pos);
w0[0] = w0l | w0r;
const u32x w0lr = w0l | w0r;
/**
* pads
*/
u32 w0_t[4];
u32x w0_t[4];
w0_t[0] = w0[0];
w0_t[0] = w0lr;
w0_t[1] = w0[1];
w0_t[2] = w0[2];
w0_t[3] = w0[3];
u32 w1_t[4];
u32x w1_t[4];
w1_t[0] = w1[0];
w1_t[1] = w1[1];
w1_t[2] = w1[2];
w1_t[3] = w1[3];
u32 w2_t[4];
u32x w2_t[4];
w2_t[0] = w2[0];
w2_t[1] = w2[1];
w2_t[2] = w2[2];
w2_t[3] = w2[3];
u32 w3_t[4];
u32x w3_t[4];
w3_t[0] = w3[0];
w3_t[1] = w3[1];
w3_t[2] = 0;
w3_t[3] = 0;
u32 ipad[4];
u32 opad[4];
u32x ipad[4];
u32x opad[4];
hmac_md5_pad (w0_t, w1_t, w2_t, w3_t, ipad, opad);
@ -314,16 +314,11 @@ static void m00050m (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_le
w3_t[2] = (64 + salt_len) * 8;
w3_t[3] = 0;
u32 digest[4];
u32x digest[4];
hmac_md5_run (w0_t, w1_t, w2_t, w3_t, ipad, opad, digest);
const u32 r0 = digest[0];
const u32 r1 = digest[3];
const u32 r2 = digest[2];
const u32 r3 = digest[1];
#include COMPARE_M
COMPARE_M_SIMD (digest[0], digest[3], digest[2], digest[1]);
}
}
@ -388,46 +383,46 @@ static void m00050s (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_le
u32 w0l = w0[0];
for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++)
for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos += VECT_SIZE)
{
const u32 w0r = bfs_buf[il_pos].i;
const u32x w0r = w0r_create_bft (bfs_buf, il_pos);
w0[0] = w0l | w0r;
const u32x w0lr = w0l | w0r;
/**
* pads
*/
u32 w0_t[4];
u32x w0_t[4];
w0_t[0] = w0[0];
w0_t[0] = w0lr;
w0_t[1] = w0[1];
w0_t[2] = w0[2];
w0_t[3] = w0[3];
u32 w1_t[4];
u32x w1_t[4];
w1_t[0] = w1[0];
w1_t[1] = w1[1];
w1_t[2] = w1[2];
w1_t[3] = w1[3];
u32 w2_t[4];
u32x w2_t[4];
w2_t[0] = w2[0];
w2_t[1] = w2[1];
w2_t[2] = w2[2];
w2_t[3] = w2[3];
u32 w3_t[4];
u32x w3_t[4];
w3_t[0] = w3[0];
w3_t[1] = w3[1];
w3_t[2] = 0;
w3_t[3] = 0;
u32 ipad[4];
u32 opad[4];
u32x ipad[4];
u32x opad[4];
hmac_md5_pad (w0_t, w1_t, w2_t, w3_t, ipad, opad);
@ -448,16 +443,11 @@ static void m00050s (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_le
w3_t[2] = (64 + salt_len) * 8;
w3_t[3] = 0;
u32 digest[4];
u32x digest[4];
hmac_md5_run (w0_t, w1_t, w2_t, w3_t, ipad, opad, digest);
const u32 r0 = digest[0];
const u32 r1 = digest[3];
const u32 r2 = digest[2];
const u32 r3 = digest[1];
#include COMPARE_S
COMPARE_S_SIMD (digest[0], digest[3], digest[2], digest[1]);
}
}

View File

@ -253,7 +253,7 @@ __kernel void m00060_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
if (combs_mode == COMBINATOR_MODE_BASE_RIGHT)
{
switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
switch_buffer_by_offset_le (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
}
/**
@ -351,7 +351,7 @@ __kernel void m00060_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
{
switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len);
switch_buffer_by_offset_le (wordr0, wordr1, wordr2, wordr3, pw_l_len);
}
u32 w0[4];
@ -470,7 +470,7 @@ __kernel void m00060_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
if (combs_mode == COMBINATOR_MODE_BASE_RIGHT)
{
switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
switch_buffer_by_offset_le (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
}
/**
@ -580,7 +580,7 @@ __kernel void m00060_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
{
switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len);
switch_buffer_by_offset_le (wordr0, wordr1, wordr2, wordr3, pw_l_len);
}
u32 w0[4];

View File

@ -5,6 +5,8 @@
#define _MD5_
#define NEW_SIMD_CODE
#include "include/constants.h"
#include "include/kernel_vendor.h"
@ -16,35 +18,33 @@
#include "include/kernel_functions.c"
#include "OpenCL/types_ocl.c"
#include "OpenCL/common.c"
#include "OpenCL/simd.c"
#define COMPARE_S "OpenCL/check_single_comp4.c"
#define COMPARE_M "OpenCL/check_multi_comp4.c"
static void md5_transform (const u32 w0[4], const u32 w1[4], const u32 w2[4], const u32 w3[4], u32 digest[4])
static void md5_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u32x digest[4])
{
u32 a = digest[0];
u32 b = digest[1];
u32 c = digest[2];
u32 d = digest[3];
u32x a = digest[0];
u32x b = digest[1];
u32x c = digest[2];
u32x d = digest[3];
u32 w0_t = w0[0];
u32 w1_t = w0[1];
u32 w2_t = w0[2];
u32 w3_t = w0[3];
u32 w4_t = w1[0];
u32 w5_t = w1[1];
u32 w6_t = w1[2];
u32 w7_t = w1[3];
u32 w8_t = w2[0];
u32 w9_t = w2[1];
u32 wa_t = w2[2];
u32 wb_t = w2[3];
u32 wc_t = w3[0];
u32 wd_t = w3[1];
u32 we_t = w3[2];
u32 wf_t = w3[3];
u32x w0_t = w0[0];
u32x w1_t = w0[1];
u32x w2_t = w0[2];
u32x w3_t = w0[3];
u32x w4_t = w1[0];
u32x w5_t = w1[1];
u32x w6_t = w1[2];
u32x w7_t = w1[3];
u32x w8_t = w2[0];
u32x w9_t = w2[1];
u32x wa_t = w2[2];
u32x wb_t = w2[3];
u32x wc_t = w3[0];
u32x wd_t = w3[1];
u32x we_t = w3[2];
u32x wf_t = w3[3];
u32 tmp2;
u32x tmp2;
MD5_STEP (MD5_Fo, a, b, c, d, w0_t, MD5C00, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w1_t, MD5C01, MD5S01);
@ -120,7 +120,7 @@ static void md5_transform (const u32 w0[4], const u32 w1[4], const u32 w2[4], co
digest[3] += d;
}
static void hmac_md5_pad (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], u32 ipad[4], u32 opad[4])
static void hmac_md5_pad (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x ipad[4], u32x opad[4])
{
w0[0] = w0[0] ^ 0x36363636;
w0[1] = w0[1] ^ 0x36363636;
@ -171,7 +171,7 @@ static void hmac_md5_pad (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], u32 ipad[4
md5_transform (w0, w1, w2, w3, opad);
}
static void hmac_md5_run (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], u32 ipad[4], u32 opad[4], u32 digest[4])
static void hmac_md5_run (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x ipad[4], u32x opad[4], u32x digest[4])
{
digest[0] = ipad[0];
digest[1] = ipad[1];
@ -236,36 +236,36 @@ static void m00060m (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_le
* pads
*/
u32 w0_t[4];
u32x w0_t[4];
w0_t[0] = salt_buf0[0];
w0_t[1] = salt_buf0[1];
w0_t[2] = salt_buf0[2];
w0_t[3] = salt_buf0[3];
u32 w1_t[4];
u32x w1_t[4];
w1_t[0] = salt_buf1[0];
w1_t[1] = salt_buf1[1];
w1_t[2] = salt_buf1[2];
w1_t[3] = salt_buf1[3];
u32 w2_t[4];
u32x w2_t[4];
w2_t[0] = 0;
w2_t[1] = 0;
w2_t[2] = 0;
w2_t[3] = 0;
u32 w3_t[4];
u32x w3_t[4];
w3_t[0] = 0;
w3_t[1] = 0;
w3_t[2] = 0;
w3_t[3] = 0;
u32 ipad[4];
u32 opad[4];
u32x ipad[4];
u32x opad[4];
hmac_md5_pad (w0_t, w1_t, w2_t, w3_t, ipad, opad);
@ -275,15 +275,13 @@ static void m00060m (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_le
u32 w0l = w0[0];
for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++)
for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos += VECT_SIZE)
{
const u32 w0r = bfs_buf[il_pos].i;
const u32x w0r = w0r_create_bft (bfs_buf, il_pos);
w0[0] = w0l | w0r;
const u32x w0rl = w0r | w0l;
append_0x80_4x4 (w0, w1, w2, w3, pw_len);
w0_t[0] = w0[0];
w0_t[0] = w0rl;
w0_t[1] = w0[1];
w0_t[2] = w0[2];
w0_t[3] = w0[3];
@ -300,16 +298,13 @@ static void m00060m (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_le
w3_t[2] = (64 + pw_len) * 8;
w3_t[3] = 0;
u32 digest[4];
append_0x80_4x4 (w0_t, w1_t, w2_t, w3_t, pw_len);
u32x digest[4];
hmac_md5_run (w0_t, w1_t, w2_t, w3_t, ipad, opad, digest);
const u32 r0 = digest[0];
const u32 r1 = digest[3];
const u32 r2 = digest[2];
const u32 r3 = digest[1];
#include COMPARE_M
COMPARE_M_SIMD (digest[0], digest[3], digest[2], digest[1]);
}
}
@ -344,36 +339,36 @@ static void m00060s (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_le
* pads
*/
u32 w0_t[4];
u32x w0_t[4];
w0_t[0] = salt_buf0[0];
w0_t[1] = salt_buf0[1];
w0_t[2] = salt_buf0[2];
w0_t[3] = salt_buf0[3];
u32 w1_t[4];
u32x w1_t[4];
w1_t[0] = salt_buf1[0];
w1_t[1] = salt_buf1[1];
w1_t[2] = salt_buf1[2];
w1_t[3] = salt_buf1[3];
u32 w2_t[4];
u32x w2_t[4];
w2_t[0] = 0;
w2_t[1] = 0;
w2_t[2] = 0;
w2_t[3] = 0;
u32 w3_t[4];
u32x w3_t[4];
w3_t[0] = 0;
w3_t[1] = 0;
w3_t[2] = 0;
w3_t[3] = 0;
u32 ipad[4];
u32 opad[4];
u32x ipad[4];
u32x opad[4];
hmac_md5_pad (w0_t, w1_t, w2_t, w3_t, ipad, opad);
@ -395,15 +390,13 @@ static void m00060s (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_le
u32 w0l = w0[0];
for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++)
for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos += VECT_SIZE)
{
const u32 w0r = bfs_buf[il_pos].i;
const u32x w0r = w0r_create_bft (bfs_buf, il_pos);
w0[0] = w0l | w0r;
const u32x w0rl = w0r | w0l;
append_0x80_4x4 (w0, w1, w2, w3, pw_len);
w0_t[0] = w0[0];
w0_t[0] = w0rl;
w0_t[1] = w0[1];
w0_t[2] = w0[2];
w0_t[3] = w0[3];
@ -420,16 +413,13 @@ static void m00060s (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_le
w3_t[2] = (64 + pw_len) * 8;
w3_t[3] = 0;
u32 digest[4];
append_0x80_4x4 (w0_t, w1_t, w2_t, w3_t, pw_len);
u32x digest[4];
hmac_md5_run (w0_t, w1_t, w2_t, w3_t, ipad, opad, digest);
const u32 r0 = digest[0];
const u32 r1 = digest[3];
const u32 r2 = digest[2];
const u32 r3 = digest[1];
#include COMPARE_S
COMPARE_S_SIMD (digest[0], digest[3], digest[2], digest[1]);
}
}

View File

@ -70,7 +70,7 @@ __kernel void m00100_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
{
append_0x80_2x4 (wordl0, wordl1, pw_l_len);
switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
switch_buffer_by_offset_le (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
}
/**
@ -115,7 +115,7 @@ __kernel void m00100_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
{
append_0x80_2x4 (wordr0, wordr1, pw_r_len);
switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len);
switch_buffer_by_offset_le (wordr0, wordr1, wordr2, wordr3, pw_l_len);
}
u32 w0[4];
@ -337,7 +337,7 @@ __kernel void m00100_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
{
append_0x80_2x4 (wordl0, wordl1, pw_l_len);
switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
switch_buffer_by_offset_le (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
}
/**
@ -400,7 +400,7 @@ __kernel void m00100_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
{
append_0x80_2x4 (wordr0, wordr1, pw_r_len);
switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len);
switch_buffer_by_offset_le (wordr0, wordr1, wordr2, wordr3, pw_l_len);
}
u32 w0[4];

View File

@ -5,6 +5,8 @@
#define _SHA1_
#define NEW_SIMD_CODE
#include "include/constants.h"
#include "include/kernel_vendor.h"
@ -16,11 +18,9 @@
#include "include/kernel_functions.c"
#include "OpenCL/types_ocl.c"
#include "OpenCL/common.c"
#include "OpenCL/simd.c"
#define COMPARE_S "OpenCL/check_single_comp4.c"
#define COMPARE_M "OpenCL/check_multi_comp4.c"
static void m00100m (u32 w[16], const u32 pw_len, __global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset)
static void m00100m (u32 w[16], const u32 pw_len, __global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset)
{
/**
* modifier
@ -33,66 +33,66 @@ static void m00100m (u32 w[16], const u32 pw_len, __global pw_t *pws, __global k
* base
*/
const u32 c_16s = rotl32 ((w[13] ^ w[ 8] ^ w[ 2] ), 1u);
const u32 c_17s = rotl32 ((w[14] ^ w[ 9] ^ w[ 3] ^ w[ 1]), 1u);
const u32 c_18s = rotl32 ((w[15] ^ w[10] ^ w[ 4] ^ w[ 2]), 1u);
const u32 c_19s = rotl32 ((c_16s ^ w[11] ^ w[ 5] ^ w[ 3]), 1u);
const u32 c_20s = rotl32 ((c_17s ^ w[12] ^ w[ 6] ^ w[ 4]), 1u);
const u32 c_21s = rotl32 ((c_18s ^ w[13] ^ w[ 7] ^ w[ 5]), 1u);
const u32 c_22s = rotl32 ((c_19s ^ w[14] ^ w[ 8] ^ w[ 6]), 1u);
const u32 c_23s = rotl32 ((c_20s ^ w[15] ^ w[ 9] ^ w[ 7]), 1u);
const u32 c_24s = rotl32 ((c_21s ^ c_16s ^ w[10] ^ w[ 8]), 1u);
const u32 c_25s = rotl32 ((c_22s ^ c_17s ^ w[11] ^ w[ 9]), 1u);
const u32 c_26s = rotl32 ((c_23s ^ c_18s ^ w[12] ^ w[10]), 1u);
const u32 c_27s = rotl32 ((c_24s ^ c_19s ^ w[13] ^ w[11]), 1u);
const u32 c_28s = rotl32 ((c_25s ^ c_20s ^ w[14] ^ w[12]), 1u);
const u32 c_29s = rotl32 ((c_26s ^ c_21s ^ w[15] ^ w[13]), 1u);
const u32 c_30s = rotl32 ((c_27s ^ c_22s ^ c_16s ^ w[14]), 1u);
const u32 c_31s = rotl32 ((c_28s ^ c_23s ^ c_17s ^ w[15]), 1u);
const u32 c_32s = rotl32 ((c_29s ^ c_24s ^ c_18s ^ c_16s), 1u);
const u32 c_33s = rotl32 ((c_30s ^ c_25s ^ c_19s ^ c_17s), 1u);
const u32 c_34s = rotl32 ((c_31s ^ c_26s ^ c_20s ^ c_18s), 1u);
const u32 c_35s = rotl32 ((c_32s ^ c_27s ^ c_21s ^ c_19s), 1u);
const u32 c_36s = rotl32 ((c_33s ^ c_28s ^ c_22s ^ c_20s), 1u);
const u32 c_37s = rotl32 ((c_34s ^ c_29s ^ c_23s ^ c_21s), 1u);
const u32 c_38s = rotl32 ((c_35s ^ c_30s ^ c_24s ^ c_22s), 1u);
const u32 c_39s = rotl32 ((c_36s ^ c_31s ^ c_25s ^ c_23s), 1u);
const u32 c_40s = rotl32 ((c_37s ^ c_32s ^ c_26s ^ c_24s), 1u);
const u32 c_41s = rotl32 ((c_38s ^ c_33s ^ c_27s ^ c_25s), 1u);
const u32 c_42s = rotl32 ((c_39s ^ c_34s ^ c_28s ^ c_26s), 1u);
const u32 c_43s = rotl32 ((c_40s ^ c_35s ^ c_29s ^ c_27s), 1u);
const u32 c_44s = rotl32 ((c_41s ^ c_36s ^ c_30s ^ c_28s), 1u);
const u32 c_45s = rotl32 ((c_42s ^ c_37s ^ c_31s ^ c_29s), 1u);
const u32 c_46s = rotl32 ((c_43s ^ c_38s ^ c_32s ^ c_30s), 1u);
const u32 c_47s = rotl32 ((c_44s ^ c_39s ^ c_33s ^ c_31s), 1u);
const u32 c_48s = rotl32 ((c_45s ^ c_40s ^ c_34s ^ c_32s), 1u);
const u32 c_49s = rotl32 ((c_46s ^ c_41s ^ c_35s ^ c_33s), 1u);
const u32 c_50s = rotl32 ((c_47s ^ c_42s ^ c_36s ^ c_34s), 1u);
const u32 c_51s = rotl32 ((c_48s ^ c_43s ^ c_37s ^ c_35s), 1u);
const u32 c_52s = rotl32 ((c_49s ^ c_44s ^ c_38s ^ c_36s), 1u);
const u32 c_53s = rotl32 ((c_50s ^ c_45s ^ c_39s ^ c_37s), 1u);
const u32 c_54s = rotl32 ((c_51s ^ c_46s ^ c_40s ^ c_38s), 1u);
const u32 c_55s = rotl32 ((c_52s ^ c_47s ^ c_41s ^ c_39s), 1u);
const u32 c_56s = rotl32 ((c_53s ^ c_48s ^ c_42s ^ c_40s), 1u);
const u32 c_57s = rotl32 ((c_54s ^ c_49s ^ c_43s ^ c_41s), 1u);
const u32 c_58s = rotl32 ((c_55s ^ c_50s ^ c_44s ^ c_42s), 1u);
const u32 c_59s = rotl32 ((c_56s ^ c_51s ^ c_45s ^ c_43s), 1u);
const u32 c_60s = rotl32 ((c_57s ^ c_52s ^ c_46s ^ c_44s), 1u);
const u32 c_61s = rotl32 ((c_58s ^ c_53s ^ c_47s ^ c_45s), 1u);
const u32 c_62s = rotl32 ((c_59s ^ c_54s ^ c_48s ^ c_46s), 1u);
const u32 c_63s = rotl32 ((c_60s ^ c_55s ^ c_49s ^ c_47s), 1u);
const u32 c_64s = rotl32 ((c_61s ^ c_56s ^ c_50s ^ c_48s), 1u);
const u32 c_65s = rotl32 ((c_62s ^ c_57s ^ c_51s ^ c_49s), 1u);
const u32 c_66s = rotl32 ((c_63s ^ c_58s ^ c_52s ^ c_50s), 1u);
const u32 c_67s = rotl32 ((c_64s ^ c_59s ^ c_53s ^ c_51s), 1u);
const u32 c_68s = rotl32 ((c_65s ^ c_60s ^ c_54s ^ c_52s), 1u);
const u32 c_69s = rotl32 ((c_66s ^ c_61s ^ c_55s ^ c_53s), 1u);
const u32 c_70s = rotl32 ((c_67s ^ c_62s ^ c_56s ^ c_54s), 1u);
const u32 c_71s = rotl32 ((c_68s ^ c_63s ^ c_57s ^ c_55s), 1u);
const u32 c_72s = rotl32 ((c_69s ^ c_64s ^ c_58s ^ c_56s), 1u);
const u32 c_73s = rotl32 ((c_70s ^ c_65s ^ c_59s ^ c_57s), 1u);
const u32 c_74s = rotl32 ((c_71s ^ c_66s ^ c_60s ^ c_58s), 1u);
const u32 c_75s = rotl32 ((c_72s ^ c_67s ^ c_61s ^ c_59s), 1u);
const u32 c_16s = rotl32_S ((w[13] ^ w[ 8] ^ w[ 2] ), 1u);
const u32 c_17s = rotl32_S ((w[14] ^ w[ 9] ^ w[ 3] ^ w[ 1]), 1u);
const u32 c_18s = rotl32_S ((w[15] ^ w[10] ^ w[ 4] ^ w[ 2]), 1u);
const u32 c_19s = rotl32_S ((c_16s ^ w[11] ^ w[ 5] ^ w[ 3]), 1u);
const u32 c_20s = rotl32_S ((c_17s ^ w[12] ^ w[ 6] ^ w[ 4]), 1u);
const u32 c_21s = rotl32_S ((c_18s ^ w[13] ^ w[ 7] ^ w[ 5]), 1u);
const u32 c_22s = rotl32_S ((c_19s ^ w[14] ^ w[ 8] ^ w[ 6]), 1u);
const u32 c_23s = rotl32_S ((c_20s ^ w[15] ^ w[ 9] ^ w[ 7]), 1u);
const u32 c_24s = rotl32_S ((c_21s ^ c_16s ^ w[10] ^ w[ 8]), 1u);
const u32 c_25s = rotl32_S ((c_22s ^ c_17s ^ w[11] ^ w[ 9]), 1u);
const u32 c_26s = rotl32_S ((c_23s ^ c_18s ^ w[12] ^ w[10]), 1u);
const u32 c_27s = rotl32_S ((c_24s ^ c_19s ^ w[13] ^ w[11]), 1u);
const u32 c_28s = rotl32_S ((c_25s ^ c_20s ^ w[14] ^ w[12]), 1u);
const u32 c_29s = rotl32_S ((c_26s ^ c_21s ^ w[15] ^ w[13]), 1u);
const u32 c_30s = rotl32_S ((c_27s ^ c_22s ^ c_16s ^ w[14]), 1u);
const u32 c_31s = rotl32_S ((c_28s ^ c_23s ^ c_17s ^ w[15]), 1u);
const u32 c_32s = rotl32_S ((c_29s ^ c_24s ^ c_18s ^ c_16s), 1u);
const u32 c_33s = rotl32_S ((c_30s ^ c_25s ^ c_19s ^ c_17s), 1u);
const u32 c_34s = rotl32_S ((c_31s ^ c_26s ^ c_20s ^ c_18s), 1u);
const u32 c_35s = rotl32_S ((c_32s ^ c_27s ^ c_21s ^ c_19s), 1u);
const u32 c_36s = rotl32_S ((c_33s ^ c_28s ^ c_22s ^ c_20s), 1u);
const u32 c_37s = rotl32_S ((c_34s ^ c_29s ^ c_23s ^ c_21s), 1u);
const u32 c_38s = rotl32_S ((c_35s ^ c_30s ^ c_24s ^ c_22s), 1u);
const u32 c_39s = rotl32_S ((c_36s ^ c_31s ^ c_25s ^ c_23s), 1u);
const u32 c_40s = rotl32_S ((c_37s ^ c_32s ^ c_26s ^ c_24s), 1u);
const u32 c_41s = rotl32_S ((c_38s ^ c_33s ^ c_27s ^ c_25s), 1u);
const u32 c_42s = rotl32_S ((c_39s ^ c_34s ^ c_28s ^ c_26s), 1u);
const u32 c_43s = rotl32_S ((c_40s ^ c_35s ^ c_29s ^ c_27s), 1u);
const u32 c_44s = rotl32_S ((c_41s ^ c_36s ^ c_30s ^ c_28s), 1u);
const u32 c_45s = rotl32_S ((c_42s ^ c_37s ^ c_31s ^ c_29s), 1u);
const u32 c_46s = rotl32_S ((c_43s ^ c_38s ^ c_32s ^ c_30s), 1u);
const u32 c_47s = rotl32_S ((c_44s ^ c_39s ^ c_33s ^ c_31s), 1u);
const u32 c_48s = rotl32_S ((c_45s ^ c_40s ^ c_34s ^ c_32s), 1u);
const u32 c_49s = rotl32_S ((c_46s ^ c_41s ^ c_35s ^ c_33s), 1u);
const u32 c_50s = rotl32_S ((c_47s ^ c_42s ^ c_36s ^ c_34s), 1u);
const u32 c_51s = rotl32_S ((c_48s ^ c_43s ^ c_37s ^ c_35s), 1u);
const u32 c_52s = rotl32_S ((c_49s ^ c_44s ^ c_38s ^ c_36s), 1u);
const u32 c_53s = rotl32_S ((c_50s ^ c_45s ^ c_39s ^ c_37s), 1u);
const u32 c_54s = rotl32_S ((c_51s ^ c_46s ^ c_40s ^ c_38s), 1u);
const u32 c_55s = rotl32_S ((c_52s ^ c_47s ^ c_41s ^ c_39s), 1u);
const u32 c_56s = rotl32_S ((c_53s ^ c_48s ^ c_42s ^ c_40s), 1u);
const u32 c_57s = rotl32_S ((c_54s ^ c_49s ^ c_43s ^ c_41s), 1u);
const u32 c_58s = rotl32_S ((c_55s ^ c_50s ^ c_44s ^ c_42s), 1u);
const u32 c_59s = rotl32_S ((c_56s ^ c_51s ^ c_45s ^ c_43s), 1u);
const u32 c_60s = rotl32_S ((c_57s ^ c_52s ^ c_46s ^ c_44s), 1u);
const u32 c_61s = rotl32_S ((c_58s ^ c_53s ^ c_47s ^ c_45s), 1u);
const u32 c_62s = rotl32_S ((c_59s ^ c_54s ^ c_48s ^ c_46s), 1u);
const u32 c_63s = rotl32_S ((c_60s ^ c_55s ^ c_49s ^ c_47s), 1u);
const u32 c_64s = rotl32_S ((c_61s ^ c_56s ^ c_50s ^ c_48s), 1u);
const u32 c_65s = rotl32_S ((c_62s ^ c_57s ^ c_51s ^ c_49s), 1u);
const u32 c_66s = rotl32_S ((c_63s ^ c_58s ^ c_52s ^ c_50s), 1u);
const u32 c_67s = rotl32_S ((c_64s ^ c_59s ^ c_53s ^ c_51s), 1u);
const u32 c_68s = rotl32_S ((c_65s ^ c_60s ^ c_54s ^ c_52s), 1u);
const u32 c_69s = rotl32_S ((c_66s ^ c_61s ^ c_55s ^ c_53s), 1u);
const u32 c_70s = rotl32_S ((c_67s ^ c_62s ^ c_56s ^ c_54s), 1u);
const u32 c_71s = rotl32_S ((c_68s ^ c_63s ^ c_57s ^ c_55s), 1u);
const u32 c_72s = rotl32_S ((c_69s ^ c_64s ^ c_58s ^ c_56s), 1u);
const u32 c_73s = rotl32_S ((c_70s ^ c_65s ^ c_59s ^ c_57s), 1u);
const u32 c_74s = rotl32_S ((c_71s ^ c_66s ^ c_60s ^ c_58s), 1u);
const u32 c_75s = rotl32_S ((c_72s ^ c_67s ^ c_61s ^ c_59s), 1u);
const u32 c_17sK = c_17s + SHA1C00;
const u32 c_18sK = c_18s + SHA1C00;
@ -116,43 +116,43 @@ static void m00100m (u32 w[16], const u32 pw_len, __global pw_t *pws, __global k
u32 w0l = w[0];
for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++)
for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos += VECT_SIZE)
{
const u32 w0r = words_buf_r[il_pos];
const u32x w0r = words_buf_r[il_pos / VECT_SIZE];
const u32 w0 = w0l | w0r;
const u32x w0 = w0l | w0r;
const u32 w0s01 = rotl32 (w0, 1u);
const u32 w0s02 = rotl32 (w0, 2u);
const u32 w0s03 = rotl32 (w0, 3u);
const u32 w0s04 = rotl32 (w0, 4u);
const u32 w0s05 = rotl32 (w0, 5u);
const u32 w0s06 = rotl32 (w0, 6u);
const u32 w0s07 = rotl32 (w0, 7u);
const u32 w0s08 = rotl32 (w0, 8u);
const u32 w0s09 = rotl32 (w0, 9u);
const u32 w0s10 = rotl32 (w0, 10u);
const u32 w0s11 = rotl32 (w0, 11u);
const u32 w0s12 = rotl32 (w0, 12u);
const u32 w0s13 = rotl32 (w0, 13u);
const u32 w0s14 = rotl32 (w0, 14u);
const u32 w0s15 = rotl32 (w0, 15u);
const u32 w0s16 = rotl32 (w0, 16u);
const u32 w0s17 = rotl32 (w0, 17u);
const u32 w0s18 = rotl32 (w0, 18u);
const u32 w0s19 = rotl32 (w0, 19u);
const u32 w0s20 = rotl32 (w0, 20u);
const u32x w0s01 = rotl32 (w0, 1u);
const u32x w0s02 = rotl32 (w0, 2u);
const u32x w0s03 = rotl32 (w0, 3u);
const u32x w0s04 = rotl32 (w0, 4u);
const u32x w0s05 = rotl32 (w0, 5u);
const u32x w0s06 = rotl32 (w0, 6u);
const u32x w0s07 = rotl32 (w0, 7u);
const u32x w0s08 = rotl32 (w0, 8u);
const u32x w0s09 = rotl32 (w0, 9u);
const u32x w0s10 = rotl32 (w0, 10u);
const u32x w0s11 = rotl32 (w0, 11u);
const u32x w0s12 = rotl32 (w0, 12u);
const u32x w0s13 = rotl32 (w0, 13u);
const u32x w0s14 = rotl32 (w0, 14u);
const u32x w0s15 = rotl32 (w0, 15u);
const u32x w0s16 = rotl32 (w0, 16u);
const u32x w0s17 = rotl32 (w0, 17u);
const u32x w0s18 = rotl32 (w0, 18u);
const u32x w0s19 = rotl32 (w0, 19u);
const u32x w0s20 = rotl32 (w0, 20u);
const u32 w0s04___w0s06 = w0s04 ^ w0s06;
const u32 w0s04___w0s08 = w0s04 ^ w0s08;
const u32 w0s08___w0s12 = w0s08 ^ w0s12;
const u32 w0s04___w0s06___w0s07 = w0s04___w0s06 ^ w0s07;
const u32x w0s04___w0s06 = w0s04 ^ w0s06;
const u32x w0s04___w0s08 = w0s04 ^ w0s08;
const u32x w0s08___w0s12 = w0s08 ^ w0s12;
const u32x w0s04___w0s06___w0s07 = w0s04___w0s06 ^ w0s07;
u32 a = SHA1M_A;
u32 b = SHA1M_B;
u32 c = SHA1M_C;
u32 d = SHA1M_D;
u32 e = SHA1M_E;
u32x a = SHA1M_A;
u32x b = SHA1M_B;
u32x c = SHA1M_C;
u32x d = SHA1M_D;
u32x e = SHA1M_E;
#undef K
#define K SHA1C00
@ -247,30 +247,24 @@ static void m00100m (u32 w[16], const u32 pw_len, __global pw_t *pws, __global k
SHA1_STEP (SHA1_F1 , b, c, d, e, a, (c_74s ^ w0s08 ^ w0s16));
SHA1_STEP (SHA1_F1 , a, b, c, d, e, (c_75s ^ w0s06 ^ w0s12 ^ w0s14));
const u32 c_76s = rotl32 ((c_73s ^ c_68s ^ c_62s ^ c_60s), 1u);
const u32 c_77s = rotl32 ((c_74s ^ c_69s ^ c_63s ^ c_61s), 1u);
const u32 c_78s = rotl32 ((c_75s ^ c_70s ^ c_64s ^ c_62s), 1u);
const u32 c_79s = rotl32 ((c_76s ^ c_71s ^ c_65s ^ c_63s), 1u);
const u32x c_76s = rotl32 ((c_73s ^ c_68s ^ c_62s ^ c_60s), 1u);
const u32x c_77s = rotl32 ((c_74s ^ c_69s ^ c_63s ^ c_61s), 1u);
const u32x c_78s = rotl32 ((c_75s ^ c_70s ^ c_64s ^ c_62s), 1u);
const u32x c_79s = rotl32 ((c_76s ^ c_71s ^ c_65s ^ c_63s), 1u);
const u32 w0s21 = rotl32 (w0, 21u);
const u32 w0s22 = rotl32 (w0, 22U);
const u32x w0s21 = rotl32 (w0, 21u);
const u32x w0s22 = rotl32 (w0, 22U);
SHA1_STEP (SHA1_F1 , e, a, b, c, d, (c_76s ^ w0s07 ^ w0s08___w0s12 ^ w0s16 ^ w0s21));
SHA1_STEP (SHA1_F1 , d, e, a, b, c, (c_77s));
SHA1_STEP (SHA1_F1 , c, d, e, a, b, (c_78s ^ w0s07 ^ w0s08 ^ w0s15 ^ w0s18 ^ w0s20));
SHA1_STEP (SHA1_F1 , b, c, d, e, a, (c_79s ^ w0s08 ^ w0s22));
const u32 r0 = d;
const u32 r1 = e;
const u32 r2 = c;
const u32 r3 = b;
#include COMPARE_M
COMPARE_M_SIMD (d, e, c, b);
}
}
static void m00100s (u32 w[16], const u32 pw_len, __global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset)
static void m00100s (u32 w[16], const u32 pw_len, __global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset)
{
/**
* modifier
@ -283,66 +277,66 @@ static void m00100s (u32 w[16], const u32 pw_len, __global pw_t *pws, __global k
* base
*/
const u32 c_16s = rotl32 ((w[13] ^ w[ 8] ^ w[ 2] ), 1u);
const u32 c_17s = rotl32 ((w[14] ^ w[ 9] ^ w[ 3] ^ w[ 1]), 1u);
const u32 c_18s = rotl32 ((w[15] ^ w[10] ^ w[ 4] ^ w[ 2]), 1u);
const u32 c_19s = rotl32 ((c_16s ^ w[11] ^ w[ 5] ^ w[ 3]), 1u);
const u32 c_20s = rotl32 ((c_17s ^ w[12] ^ w[ 6] ^ w[ 4]), 1u);
const u32 c_21s = rotl32 ((c_18s ^ w[13] ^ w[ 7] ^ w[ 5]), 1u);
const u32 c_22s = rotl32 ((c_19s ^ w[14] ^ w[ 8] ^ w[ 6]), 1u);
const u32 c_23s = rotl32 ((c_20s ^ w[15] ^ w[ 9] ^ w[ 7]), 1u);
const u32 c_24s = rotl32 ((c_21s ^ c_16s ^ w[10] ^ w[ 8]), 1u);
const u32 c_25s = rotl32 ((c_22s ^ c_17s ^ w[11] ^ w[ 9]), 1u);
const u32 c_26s = rotl32 ((c_23s ^ c_18s ^ w[12] ^ w[10]), 1u);
const u32 c_27s = rotl32 ((c_24s ^ c_19s ^ w[13] ^ w[11]), 1u);
const u32 c_28s = rotl32 ((c_25s ^ c_20s ^ w[14] ^ w[12]), 1u);
const u32 c_29s = rotl32 ((c_26s ^ c_21s ^ w[15] ^ w[13]), 1u);
const u32 c_30s = rotl32 ((c_27s ^ c_22s ^ c_16s ^ w[14]), 1u);
const u32 c_31s = rotl32 ((c_28s ^ c_23s ^ c_17s ^ w[15]), 1u);
const u32 c_32s = rotl32 ((c_29s ^ c_24s ^ c_18s ^ c_16s), 1u);
const u32 c_33s = rotl32 ((c_30s ^ c_25s ^ c_19s ^ c_17s), 1u);
const u32 c_34s = rotl32 ((c_31s ^ c_26s ^ c_20s ^ c_18s), 1u);
const u32 c_35s = rotl32 ((c_32s ^ c_27s ^ c_21s ^ c_19s), 1u);
const u32 c_36s = rotl32 ((c_33s ^ c_28s ^ c_22s ^ c_20s), 1u);
const u32 c_37s = rotl32 ((c_34s ^ c_29s ^ c_23s ^ c_21s), 1u);
const u32 c_38s = rotl32 ((c_35s ^ c_30s ^ c_24s ^ c_22s), 1u);
const u32 c_39s = rotl32 ((c_36s ^ c_31s ^ c_25s ^ c_23s), 1u);
const u32 c_40s = rotl32 ((c_37s ^ c_32s ^ c_26s ^ c_24s), 1u);
const u32 c_41s = rotl32 ((c_38s ^ c_33s ^ c_27s ^ c_25s), 1u);
const u32 c_42s = rotl32 ((c_39s ^ c_34s ^ c_28s ^ c_26s), 1u);
const u32 c_43s = rotl32 ((c_40s ^ c_35s ^ c_29s ^ c_27s), 1u);
const u32 c_44s = rotl32 ((c_41s ^ c_36s ^ c_30s ^ c_28s), 1u);
const u32 c_45s = rotl32 ((c_42s ^ c_37s ^ c_31s ^ c_29s), 1u);
const u32 c_46s = rotl32 ((c_43s ^ c_38s ^ c_32s ^ c_30s), 1u);
const u32 c_47s = rotl32 ((c_44s ^ c_39s ^ c_33s ^ c_31s), 1u);
const u32 c_48s = rotl32 ((c_45s ^ c_40s ^ c_34s ^ c_32s), 1u);
const u32 c_49s = rotl32 ((c_46s ^ c_41s ^ c_35s ^ c_33s), 1u);
const u32 c_50s = rotl32 ((c_47s ^ c_42s ^ c_36s ^ c_34s), 1u);
const u32 c_51s = rotl32 ((c_48s ^ c_43s ^ c_37s ^ c_35s), 1u);
const u32 c_52s = rotl32 ((c_49s ^ c_44s ^ c_38s ^ c_36s), 1u);
const u32 c_53s = rotl32 ((c_50s ^ c_45s ^ c_39s ^ c_37s), 1u);
const u32 c_54s = rotl32 ((c_51s ^ c_46s ^ c_40s ^ c_38s), 1u);
const u32 c_55s = rotl32 ((c_52s ^ c_47s ^ c_41s ^ c_39s), 1u);
const u32 c_56s = rotl32 ((c_53s ^ c_48s ^ c_42s ^ c_40s), 1u);
const u32 c_57s = rotl32 ((c_54s ^ c_49s ^ c_43s ^ c_41s), 1u);
const u32 c_58s = rotl32 ((c_55s ^ c_50s ^ c_44s ^ c_42s), 1u);
const u32 c_59s = rotl32 ((c_56s ^ c_51s ^ c_45s ^ c_43s), 1u);
const u32 c_60s = rotl32 ((c_57s ^ c_52s ^ c_46s ^ c_44s), 1u);
const u32 c_61s = rotl32 ((c_58s ^ c_53s ^ c_47s ^ c_45s), 1u);
const u32 c_62s = rotl32 ((c_59s ^ c_54s ^ c_48s ^ c_46s), 1u);
const u32 c_63s = rotl32 ((c_60s ^ c_55s ^ c_49s ^ c_47s), 1u);
const u32 c_64s = rotl32 ((c_61s ^ c_56s ^ c_50s ^ c_48s), 1u);
const u32 c_65s = rotl32 ((c_62s ^ c_57s ^ c_51s ^ c_49s), 1u);
const u32 c_66s = rotl32 ((c_63s ^ c_58s ^ c_52s ^ c_50s), 1u);
const u32 c_67s = rotl32 ((c_64s ^ c_59s ^ c_53s ^ c_51s), 1u);
const u32 c_68s = rotl32 ((c_65s ^ c_60s ^ c_54s ^ c_52s), 1u);
const u32 c_69s = rotl32 ((c_66s ^ c_61s ^ c_55s ^ c_53s), 1u);
const u32 c_70s = rotl32 ((c_67s ^ c_62s ^ c_56s ^ c_54s), 1u);
const u32 c_71s = rotl32 ((c_68s ^ c_63s ^ c_57s ^ c_55s), 1u);
const u32 c_72s = rotl32 ((c_69s ^ c_64s ^ c_58s ^ c_56s), 1u);
const u32 c_73s = rotl32 ((c_70s ^ c_65s ^ c_59s ^ c_57s), 1u);
const u32 c_74s = rotl32 ((c_71s ^ c_66s ^ c_60s ^ c_58s), 1u);
const u32 c_75s = rotl32 ((c_72s ^ c_67s ^ c_61s ^ c_59s), 1u);
const u32 c_16s = rotl32_S ((w[13] ^ w[ 8] ^ w[ 2] ), 1u);
const u32 c_17s = rotl32_S ((w[14] ^ w[ 9] ^ w[ 3] ^ w[ 1]), 1u);
const u32 c_18s = rotl32_S ((w[15] ^ w[10] ^ w[ 4] ^ w[ 2]), 1u);
const u32 c_19s = rotl32_S ((c_16s ^ w[11] ^ w[ 5] ^ w[ 3]), 1u);
const u32 c_20s = rotl32_S ((c_17s ^ w[12] ^ w[ 6] ^ w[ 4]), 1u);
const u32 c_21s = rotl32_S ((c_18s ^ w[13] ^ w[ 7] ^ w[ 5]), 1u);
const u32 c_22s = rotl32_S ((c_19s ^ w[14] ^ w[ 8] ^ w[ 6]), 1u);
const u32 c_23s = rotl32_S ((c_20s ^ w[15] ^ w[ 9] ^ w[ 7]), 1u);
const u32 c_24s = rotl32_S ((c_21s ^ c_16s ^ w[10] ^ w[ 8]), 1u);
const u32 c_25s = rotl32_S ((c_22s ^ c_17s ^ w[11] ^ w[ 9]), 1u);
const u32 c_26s = rotl32_S ((c_23s ^ c_18s ^ w[12] ^ w[10]), 1u);
const u32 c_27s = rotl32_S ((c_24s ^ c_19s ^ w[13] ^ w[11]), 1u);
const u32 c_28s = rotl32_S ((c_25s ^ c_20s ^ w[14] ^ w[12]), 1u);
const u32 c_29s = rotl32_S ((c_26s ^ c_21s ^ w[15] ^ w[13]), 1u);
const u32 c_30s = rotl32_S ((c_27s ^ c_22s ^ c_16s ^ w[14]), 1u);
const u32 c_31s = rotl32_S ((c_28s ^ c_23s ^ c_17s ^ w[15]), 1u);
const u32 c_32s = rotl32_S ((c_29s ^ c_24s ^ c_18s ^ c_16s), 1u);
const u32 c_33s = rotl32_S ((c_30s ^ c_25s ^ c_19s ^ c_17s), 1u);
const u32 c_34s = rotl32_S ((c_31s ^ c_26s ^ c_20s ^ c_18s), 1u);
const u32 c_35s = rotl32_S ((c_32s ^ c_27s ^ c_21s ^ c_19s), 1u);
const u32 c_36s = rotl32_S ((c_33s ^ c_28s ^ c_22s ^ c_20s), 1u);
const u32 c_37s = rotl32_S ((c_34s ^ c_29s ^ c_23s ^ c_21s), 1u);
const u32 c_38s = rotl32_S ((c_35s ^ c_30s ^ c_24s ^ c_22s), 1u);
const u32 c_39s = rotl32_S ((c_36s ^ c_31s ^ c_25s ^ c_23s), 1u);
const u32 c_40s = rotl32_S ((c_37s ^ c_32s ^ c_26s ^ c_24s), 1u);
const u32 c_41s = rotl32_S ((c_38s ^ c_33s ^ c_27s ^ c_25s), 1u);
const u32 c_42s = rotl32_S ((c_39s ^ c_34s ^ c_28s ^ c_26s), 1u);
const u32 c_43s = rotl32_S ((c_40s ^ c_35s ^ c_29s ^ c_27s), 1u);
const u32 c_44s = rotl32_S ((c_41s ^ c_36s ^ c_30s ^ c_28s), 1u);
const u32 c_45s = rotl32_S ((c_42s ^ c_37s ^ c_31s ^ c_29s), 1u);
const u32 c_46s = rotl32_S ((c_43s ^ c_38s ^ c_32s ^ c_30s), 1u);
const u32 c_47s = rotl32_S ((c_44s ^ c_39s ^ c_33s ^ c_31s), 1u);
const u32 c_48s = rotl32_S ((c_45s ^ c_40s ^ c_34s ^ c_32s), 1u);
const u32 c_49s = rotl32_S ((c_46s ^ c_41s ^ c_35s ^ c_33s), 1u);
const u32 c_50s = rotl32_S ((c_47s ^ c_42s ^ c_36s ^ c_34s), 1u);
const u32 c_51s = rotl32_S ((c_48s ^ c_43s ^ c_37s ^ c_35s), 1u);
const u32 c_52s = rotl32_S ((c_49s ^ c_44s ^ c_38s ^ c_36s), 1u);
const u32 c_53s = rotl32_S ((c_50s ^ c_45s ^ c_39s ^ c_37s), 1u);
const u32 c_54s = rotl32_S ((c_51s ^ c_46s ^ c_40s ^ c_38s), 1u);
const u32 c_55s = rotl32_S ((c_52s ^ c_47s ^ c_41s ^ c_39s), 1u);
const u32 c_56s = rotl32_S ((c_53s ^ c_48s ^ c_42s ^ c_40s), 1u);
const u32 c_57s = rotl32_S ((c_54s ^ c_49s ^ c_43s ^ c_41s), 1u);
const u32 c_58s = rotl32_S ((c_55s ^ c_50s ^ c_44s ^ c_42s), 1u);
const u32 c_59s = rotl32_S ((c_56s ^ c_51s ^ c_45s ^ c_43s), 1u);
const u32 c_60s = rotl32_S ((c_57s ^ c_52s ^ c_46s ^ c_44s), 1u);
const u32 c_61s = rotl32_S ((c_58s ^ c_53s ^ c_47s ^ c_45s), 1u);
const u32 c_62s = rotl32_S ((c_59s ^ c_54s ^ c_48s ^ c_46s), 1u);
const u32 c_63s = rotl32_S ((c_60s ^ c_55s ^ c_49s ^ c_47s), 1u);
const u32 c_64s = rotl32_S ((c_61s ^ c_56s ^ c_50s ^ c_48s), 1u);
const u32 c_65s = rotl32_S ((c_62s ^ c_57s ^ c_51s ^ c_49s), 1u);
const u32 c_66s = rotl32_S ((c_63s ^ c_58s ^ c_52s ^ c_50s), 1u);
const u32 c_67s = rotl32_S ((c_64s ^ c_59s ^ c_53s ^ c_51s), 1u);
const u32 c_68s = rotl32_S ((c_65s ^ c_60s ^ c_54s ^ c_52s), 1u);
const u32 c_69s = rotl32_S ((c_66s ^ c_61s ^ c_55s ^ c_53s), 1u);
const u32 c_70s = rotl32_S ((c_67s ^ c_62s ^ c_56s ^ c_54s), 1u);
const u32 c_71s = rotl32_S ((c_68s ^ c_63s ^ c_57s ^ c_55s), 1u);
const u32 c_72s = rotl32_S ((c_69s ^ c_64s ^ c_58s ^ c_56s), 1u);
const u32 c_73s = rotl32_S ((c_70s ^ c_65s ^ c_59s ^ c_57s), 1u);
const u32 c_74s = rotl32_S ((c_71s ^ c_66s ^ c_60s ^ c_58s), 1u);
const u32 c_75s = rotl32_S ((c_72s ^ c_67s ^ c_61s ^ c_59s), 1u);
const u32 c_17sK = c_17s + SHA1C00;
const u32 c_18sK = c_18s + SHA1C00;
@ -376,7 +370,7 @@ static void m00100s (u32 w[16], const u32 pw_len, __global pw_t *pws, __global k
* reverse
*/
const u32 e_rev = rotl32 (search[1], 2u) - SHA1C03;
const u32 e_rev = rotl32_S (search[1], 2u) - SHA1C03;
/**
* loop
@ -384,43 +378,43 @@ static void m00100s (u32 w[16], const u32 pw_len, __global pw_t *pws, __global k
u32 w0l = w[0];
for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++)
for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos += VECT_SIZE)
{
const u32 w0r = words_buf_r[il_pos];
const u32x w0r = words_buf_r[il_pos / VECT_SIZE];
const u32 w0 = w0l | w0r;
const u32x w0 = w0l | w0r;
const u32 w0s01 = rotl32 (w0, 1u);
const u32 w0s02 = rotl32 (w0, 2u);
const u32 w0s03 = rotl32 (w0, 3u);
const u32 w0s04 = rotl32 (w0, 4u);
const u32 w0s05 = rotl32 (w0, 5u);
const u32 w0s06 = rotl32 (w0, 6u);
const u32 w0s07 = rotl32 (w0, 7u);
const u32 w0s08 = rotl32 (w0, 8u);
const u32 w0s09 = rotl32 (w0, 9u);
const u32 w0s10 = rotl32 (w0, 10u);
const u32 w0s11 = rotl32 (w0, 11u);
const u32 w0s12 = rotl32 (w0, 12u);
const u32 w0s13 = rotl32 (w0, 13u);
const u32 w0s14 = rotl32 (w0, 14u);
const u32 w0s15 = rotl32 (w0, 15u);
const u32 w0s16 = rotl32 (w0, 16u);
const u32 w0s17 = rotl32 (w0, 17u);
const u32 w0s18 = rotl32 (w0, 18u);
const u32 w0s19 = rotl32 (w0, 19u);
const u32 w0s20 = rotl32 (w0, 20u);
const u32x w0s01 = rotl32 (w0, 1u);
const u32x w0s02 = rotl32 (w0, 2u);
const u32x w0s03 = rotl32 (w0, 3u);
const u32x w0s04 = rotl32 (w0, 4u);
const u32x w0s05 = rotl32 (w0, 5u);
const u32x w0s06 = rotl32 (w0, 6u);
const u32x w0s07 = rotl32 (w0, 7u);
const u32x w0s08 = rotl32 (w0, 8u);
const u32x w0s09 = rotl32 (w0, 9u);
const u32x w0s10 = rotl32 (w0, 10u);
const u32x w0s11 = rotl32 (w0, 11u);
const u32x w0s12 = rotl32 (w0, 12u);
const u32x w0s13 = rotl32 (w0, 13u);
const u32x w0s14 = rotl32 (w0, 14u);
const u32x w0s15 = rotl32 (w0, 15u);
const u32x w0s16 = rotl32 (w0, 16u);
const u32x w0s17 = rotl32 (w0, 17u);
const u32x w0s18 = rotl32 (w0, 18u);
const u32x w0s19 = rotl32 (w0, 19u);
const u32x w0s20 = rotl32 (w0, 20u);
const u32 w0s04___w0s06 = w0s04 ^ w0s06;
const u32 w0s04___w0s08 = w0s04 ^ w0s08;
const u32 w0s08___w0s12 = w0s08 ^ w0s12;
const u32 w0s04___w0s06___w0s07 = w0s04___w0s06 ^ w0s07;
const u32x w0s04___w0s06 = w0s04 ^ w0s06;
const u32x w0s04___w0s08 = w0s04 ^ w0s08;
const u32x w0s08___w0s12 = w0s08 ^ w0s12;
const u32x w0s04___w0s06___w0s07 = w0s04___w0s06 ^ w0s07;
u32 a = SHA1M_A;
u32 b = SHA1M_B;
u32 c = SHA1M_C;
u32 d = SHA1M_D;
u32 e = SHA1M_E;
u32x a = SHA1M_A;
u32x b = SHA1M_B;
u32x c = SHA1M_C;
u32x d = SHA1M_D;
u32x e = SHA1M_E;
#undef K
#define K SHA1C00
@ -516,36 +510,28 @@ static void m00100s (u32 w[16], const u32 pw_len, __global pw_t *pws, __global k
SHA1_STEP_PE (SHA1_F1, a, b, c, d, e, (c_75s ^ w0s06 ^ w0s12 ^ w0s14));
bool q_cond = allx (e_rev != e);
if (q_cond) continue;
if (MATCHES_NONE_VS (e, e_rev)) continue;
SHA1_STEP_PB (SHA1_F1, a, b, c, d, e, 0);
const u32 c_76s = rotl32 ((c_73s ^ c_68s ^ c_62s ^ c_60s), 1u);
const u32 c_77s = rotl32 ((c_74s ^ c_69s ^ c_63s ^ c_61s), 1u);
const u32 c_78s = rotl32 ((c_75s ^ c_70s ^ c_64s ^ c_62s), 1u);
const u32 c_79s = rotl32 ((c_76s ^ c_71s ^ c_65s ^ c_63s), 1u);
const u32x c_76s = rotl32 ((c_73s ^ c_68s ^ c_62s ^ c_60s), 1u);
const u32x c_77s = rotl32 ((c_74s ^ c_69s ^ c_63s ^ c_61s), 1u);
const u32x c_78s = rotl32 ((c_75s ^ c_70s ^ c_64s ^ c_62s), 1u);
const u32x c_79s = rotl32 ((c_76s ^ c_71s ^ c_65s ^ c_63s), 1u);
const u32 w0s21 = rotl32 (w0, 21u);
const u32 w0s22 = rotl32 (w0, 22U);
const u32x w0s21 = rotl32 (w0, 21u);
const u32x w0s22 = rotl32 (w0, 22U);
SHA1_STEP (SHA1_F1 , e, a, b, c, d, (c_76s ^ w0s07 ^ w0s08___w0s12 ^ w0s16 ^ w0s21));
SHA1_STEP (SHA1_F1 , d, e, a, b, c, (c_77s));
SHA1_STEP (SHA1_F1 , c, d, e, a, b, (c_78s ^ w0s07 ^ w0s08 ^ w0s15 ^ w0s18 ^ w0s20));
SHA1_STEP (SHA1_F1 , b, c, d, e, a, (c_79s ^ w0s08 ^ w0s22));
const u32 r0 = d;
const u32 r1 = e;
const u32 r2 = c;
const u32 r3 = b;
#include COMPARE_S
COMPARE_S_SIMD (d, e, c, b);
}
}
__kernel void m00100_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
__kernel void m00100_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
{
/**
* base
@ -583,7 +569,7 @@ __kernel void m00100_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
m00100m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset);
}
__kernel void m00100_m08 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
__kernel void m00100_m08 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
{
/**
* base
@ -621,7 +607,7 @@ __kernel void m00100_m08 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
m00100m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset);
}
__kernel void m00100_m16 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
__kernel void m00100_m16 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
{
/**
* base
@ -659,7 +645,7 @@ __kernel void m00100_m16 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
m00100m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset);
}
__kernel void m00100_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
__kernel void m00100_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
{
/**
* base
@ -697,7 +683,7 @@ __kernel void m00100_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
m00100s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset);
}
__kernel void m00100_s08 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
__kernel void m00100_s08 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
{
/**
* base
@ -735,7 +721,7 @@ __kernel void m00100_s08 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
m00100s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset);
}
__kernel void m00100_s16 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
__kernel void m00100_s16 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
{
/**
* base

View File

@ -142,7 +142,7 @@ __kernel void m00110_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
s3[2] = 0;
s3[3] = 0;
switch_buffer_by_offset (s0, s1, s2, s3, out_len);
switch_buffer_by_offset_le (s0, s1, s2, s3, out_len);
const u32 pw_salt_len = out_len + salt_len;
@ -447,7 +447,7 @@ __kernel void m00110_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
s3[2] = 0;
s3[3] = 0;
switch_buffer_by_offset (s0, s1, s2, s3, out_len);
switch_buffer_by_offset_le (s0, s1, s2, s3, out_len);
const u32 pw_salt_len = out_len + salt_len;

View File

@ -68,7 +68,7 @@ __kernel void m00110_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
if (combs_mode == COMBINATOR_MODE_BASE_RIGHT)
{
switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
switch_buffer_by_offset_le (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
}
/**
@ -131,7 +131,7 @@ __kernel void m00110_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
{
switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len);
switch_buffer_by_offset_le (wordr0, wordr1, wordr2, wordr3, pw_l_len);
}
/**
@ -166,7 +166,7 @@ __kernel void m00110_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
s3[2] = 0;
s3[3] = 0;
switch_buffer_by_offset (s0, s1, s2, s3, pw_len);
switch_buffer_by_offset_le (s0, s1, s2, s3, pw_len);
const u32 pw_salt_len = pw_len + salt_len;
@ -389,7 +389,7 @@ __kernel void m00110_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
if (combs_mode == COMBINATOR_MODE_BASE_RIGHT)
{
switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
switch_buffer_by_offset_le (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
}
/**
@ -470,7 +470,7 @@ __kernel void m00110_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
{
switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len);
switch_buffer_by_offset_le (wordr0, wordr1, wordr2, wordr3, pw_l_len);
}
/**
@ -505,7 +505,7 @@ __kernel void m00110_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
s3[2] = 0;
s3[3] = 0;
switch_buffer_by_offset (s0, s1, s2, s3, pw_len);
switch_buffer_by_offset_le (s0, s1, s2, s3, pw_len);
const u32 pw_salt_len = pw_len + salt_len;

View File

@ -5,6 +5,8 @@
#define _SHA1_
#define NEW_SIMD_CODE
#include "include/constants.h"
#include "include/kernel_vendor.h"
@ -16,11 +18,9 @@
#include "include/kernel_functions.c"
#include "OpenCL/types_ocl.c"
#include "OpenCL/common.c"
#include "OpenCL/simd.c"
#define COMPARE_S "OpenCL/check_single_comp4.c"
#define COMPARE_M "OpenCL/check_multi_comp4.c"
static void m00110m (u32 w[16], const u32 pw_len, __global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset)
static void m00110m (u32 w[16], const u32 pw_len, __global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset)
{
/**
* modifier
@ -61,24 +61,24 @@ static void m00110m (u32 w[16], const u32 pw_len, __global pw_t *pws, __global k
salt_buf3[2] = 0;
salt_buf3[3] = 0;
switch_buffer_by_offset (salt_buf0, salt_buf1, salt_buf2, salt_buf3, pw_len);
switch_buffer_by_offset_le_S (salt_buf0, salt_buf1, salt_buf2, salt_buf3, pw_len);
w[ 0] |= swap32 (salt_buf0[0]);
w[ 1] |= swap32 (salt_buf0[1]);
w[ 2] |= swap32 (salt_buf0[2]);
w[ 3] |= swap32 (salt_buf0[3]);
w[ 4] |= swap32 (salt_buf1[0]);
w[ 5] |= swap32 (salt_buf1[1]);
w[ 6] |= swap32 (salt_buf1[2]);
w[ 7] |= swap32 (salt_buf1[3]);
w[ 8] |= swap32 (salt_buf2[0]);
w[ 9] |= swap32 (salt_buf2[1]);
w[10] |= swap32 (salt_buf2[2]);
w[11] |= swap32 (salt_buf2[3]);
w[12] |= swap32 (salt_buf3[0]);
w[13] |= swap32 (salt_buf3[1]);
w[14] |= swap32 (salt_buf3[2]);
w[15] |= swap32 (salt_buf3[3]);
w[ 0] |= swap32_S (salt_buf0[0]);
w[ 1] |= swap32_S (salt_buf0[1]);
w[ 2] |= swap32_S (salt_buf0[2]);
w[ 3] |= swap32_S (salt_buf0[3]);
w[ 4] |= swap32_S (salt_buf1[0]);
w[ 5] |= swap32_S (salt_buf1[1]);
w[ 6] |= swap32_S (salt_buf1[2]);
w[ 7] |= swap32_S (salt_buf1[3]);
w[ 8] |= swap32_S (salt_buf2[0]);
w[ 9] |= swap32_S (salt_buf2[1]);
w[10] |= swap32_S (salt_buf2[2]);
w[11] |= swap32_S (salt_buf2[3]);
w[12] |= swap32_S (salt_buf3[0]);
w[13] |= swap32_S (salt_buf3[1]);
w[14] |= swap32_S (salt_buf3[2]);
w[15] |= swap32_S (salt_buf3[3]);
const u32 salt_len = salt_bufs[salt_pos].salt_len;
@ -90,70 +90,70 @@ static void m00110m (u32 w[16], const u32 pw_len, __global pw_t *pws, __global k
* base
*/
const u32 c_16s = rotl32 ((w[13] ^ w[ 8] ^ w[ 2] ), 1u);
const u32 c_17s = rotl32 ((w[14] ^ w[ 9] ^ w[ 3] ^ w[ 1]), 1u);
const u32 c_18s = rotl32 ((w[15] ^ w[10] ^ w[ 4] ^ w[ 2]), 1u);
const u32 c_19s = rotl32 ((c_16s ^ w[11] ^ w[ 5] ^ w[ 3]), 1u);
const u32 c_20s = rotl32 ((c_17s ^ w[12] ^ w[ 6] ^ w[ 4]), 1u);
const u32 c_21s = rotl32 ((c_18s ^ w[13] ^ w[ 7] ^ w[ 5]), 1u);
const u32 c_22s = rotl32 ((c_19s ^ w[14] ^ w[ 8] ^ w[ 6]), 1u);
const u32 c_23s = rotl32 ((c_20s ^ w[15] ^ w[ 9] ^ w[ 7]), 1u);
const u32 c_24s = rotl32 ((c_21s ^ c_16s ^ w[10] ^ w[ 8]), 1u);
const u32 c_25s = rotl32 ((c_22s ^ c_17s ^ w[11] ^ w[ 9]), 1u);
const u32 c_26s = rotl32 ((c_23s ^ c_18s ^ w[12] ^ w[10]), 1u);
const u32 c_27s = rotl32 ((c_24s ^ c_19s ^ w[13] ^ w[11]), 1u);
const u32 c_28s = rotl32 ((c_25s ^ c_20s ^ w[14] ^ w[12]), 1u);
const u32 c_29s = rotl32 ((c_26s ^ c_21s ^ w[15] ^ w[13]), 1u);
const u32 c_30s = rotl32 ((c_27s ^ c_22s ^ c_16s ^ w[14]), 1u);
const u32 c_31s = rotl32 ((c_28s ^ c_23s ^ c_17s ^ w[15]), 1u);
const u32 c_32s = rotl32 ((c_29s ^ c_24s ^ c_18s ^ c_16s), 1u);
const u32 c_33s = rotl32 ((c_30s ^ c_25s ^ c_19s ^ c_17s), 1u);
const u32 c_34s = rotl32 ((c_31s ^ c_26s ^ c_20s ^ c_18s), 1u);
const u32 c_35s = rotl32 ((c_32s ^ c_27s ^ c_21s ^ c_19s), 1u);
const u32 c_36s = rotl32 ((c_33s ^ c_28s ^ c_22s ^ c_20s), 1u);
const u32 c_37s = rotl32 ((c_34s ^ c_29s ^ c_23s ^ c_21s), 1u);
const u32 c_38s = rotl32 ((c_35s ^ c_30s ^ c_24s ^ c_22s), 1u);
const u32 c_39s = rotl32 ((c_36s ^ c_31s ^ c_25s ^ c_23s), 1u);
const u32 c_40s = rotl32 ((c_37s ^ c_32s ^ c_26s ^ c_24s), 1u);
const u32 c_41s = rotl32 ((c_38s ^ c_33s ^ c_27s ^ c_25s), 1u);
const u32 c_42s = rotl32 ((c_39s ^ c_34s ^ c_28s ^ c_26s), 1u);
const u32 c_43s = rotl32 ((c_40s ^ c_35s ^ c_29s ^ c_27s), 1u);
const u32 c_44s = rotl32 ((c_41s ^ c_36s ^ c_30s ^ c_28s), 1u);
const u32 c_45s = rotl32 ((c_42s ^ c_37s ^ c_31s ^ c_29s), 1u);
const u32 c_46s = rotl32 ((c_43s ^ c_38s ^ c_32s ^ c_30s), 1u);
const u32 c_47s = rotl32 ((c_44s ^ c_39s ^ c_33s ^ c_31s), 1u);
const u32 c_48s = rotl32 ((c_45s ^ c_40s ^ c_34s ^ c_32s), 1u);
const u32 c_49s = rotl32 ((c_46s ^ c_41s ^ c_35s ^ c_33s), 1u);
const u32 c_50s = rotl32 ((c_47s ^ c_42s ^ c_36s ^ c_34s), 1u);
const u32 c_51s = rotl32 ((c_48s ^ c_43s ^ c_37s ^ c_35s), 1u);
const u32 c_52s = rotl32 ((c_49s ^ c_44s ^ c_38s ^ c_36s), 1u);
const u32 c_53s = rotl32 ((c_50s ^ c_45s ^ c_39s ^ c_37s), 1u);
const u32 c_54s = rotl32 ((c_51s ^ c_46s ^ c_40s ^ c_38s), 1u);
const u32 c_55s = rotl32 ((c_52s ^ c_47s ^ c_41s ^ c_39s), 1u);
const u32 c_56s = rotl32 ((c_53s ^ c_48s ^ c_42s ^ c_40s), 1u);
const u32 c_57s = rotl32 ((c_54s ^ c_49s ^ c_43s ^ c_41s), 1u);
const u32 c_58s = rotl32 ((c_55s ^ c_50s ^ c_44s ^ c_42s), 1u);
const u32 c_59s = rotl32 ((c_56s ^ c_51s ^ c_45s ^ c_43s), 1u);
const u32 c_60s = rotl32 ((c_57s ^ c_52s ^ c_46s ^ c_44s), 1u);
const u32 c_61s = rotl32 ((c_58s ^ c_53s ^ c_47s ^ c_45s), 1u);
const u32 c_62s = rotl32 ((c_59s ^ c_54s ^ c_48s ^ c_46s), 1u);
const u32 c_63s = rotl32 ((c_60s ^ c_55s ^ c_49s ^ c_47s), 1u);
const u32 c_64s = rotl32 ((c_61s ^ c_56s ^ c_50s ^ c_48s), 1u);
const u32 c_65s = rotl32 ((c_62s ^ c_57s ^ c_51s ^ c_49s), 1u);
const u32 c_66s = rotl32 ((c_63s ^ c_58s ^ c_52s ^ c_50s), 1u);
const u32 c_67s = rotl32 ((c_64s ^ c_59s ^ c_53s ^ c_51s), 1u);
const u32 c_68s = rotl32 ((c_65s ^ c_60s ^ c_54s ^ c_52s), 1u);
const u32 c_69s = rotl32 ((c_66s ^ c_61s ^ c_55s ^ c_53s), 1u);
const u32 c_70s = rotl32 ((c_67s ^ c_62s ^ c_56s ^ c_54s), 1u);
const u32 c_71s = rotl32 ((c_68s ^ c_63s ^ c_57s ^ c_55s), 1u);
const u32 c_72s = rotl32 ((c_69s ^ c_64s ^ c_58s ^ c_56s), 1u);
const u32 c_73s = rotl32 ((c_70s ^ c_65s ^ c_59s ^ c_57s), 1u);
const u32 c_74s = rotl32 ((c_71s ^ c_66s ^ c_60s ^ c_58s), 1u);
const u32 c_75s = rotl32 ((c_72s ^ c_67s ^ c_61s ^ c_59s), 1u);
const u32 c_76s = rotl32 ((c_73s ^ c_68s ^ c_62s ^ c_60s), 1u);
const u32 c_77s = rotl32 ((c_74s ^ c_69s ^ c_63s ^ c_61s), 1u);
const u32 c_78s = rotl32 ((c_75s ^ c_70s ^ c_64s ^ c_62s), 1u);
const u32 c_79s = rotl32 ((c_76s ^ c_71s ^ c_65s ^ c_63s), 1u);
const u32 c_16s = rotl32_S ((w[13] ^ w[ 8] ^ w[ 2] ), 1u);
const u32 c_17s = rotl32_S ((w[14] ^ w[ 9] ^ w[ 3] ^ w[ 1]), 1u);
const u32 c_18s = rotl32_S ((w[15] ^ w[10] ^ w[ 4] ^ w[ 2]), 1u);
const u32 c_19s = rotl32_S ((c_16s ^ w[11] ^ w[ 5] ^ w[ 3]), 1u);
const u32 c_20s = rotl32_S ((c_17s ^ w[12] ^ w[ 6] ^ w[ 4]), 1u);
const u32 c_21s = rotl32_S ((c_18s ^ w[13] ^ w[ 7] ^ w[ 5]), 1u);
const u32 c_22s = rotl32_S ((c_19s ^ w[14] ^ w[ 8] ^ w[ 6]), 1u);
const u32 c_23s = rotl32_S ((c_20s ^ w[15] ^ w[ 9] ^ w[ 7]), 1u);
const u32 c_24s = rotl32_S ((c_21s ^ c_16s ^ w[10] ^ w[ 8]), 1u);
const u32 c_25s = rotl32_S ((c_22s ^ c_17s ^ w[11] ^ w[ 9]), 1u);
const u32 c_26s = rotl32_S ((c_23s ^ c_18s ^ w[12] ^ w[10]), 1u);
const u32 c_27s = rotl32_S ((c_24s ^ c_19s ^ w[13] ^ w[11]), 1u);
const u32 c_28s = rotl32_S ((c_25s ^ c_20s ^ w[14] ^ w[12]), 1u);
const u32 c_29s = rotl32_S ((c_26s ^ c_21s ^ w[15] ^ w[13]), 1u);
const u32 c_30s = rotl32_S ((c_27s ^ c_22s ^ c_16s ^ w[14]), 1u);
const u32 c_31s = rotl32_S ((c_28s ^ c_23s ^ c_17s ^ w[15]), 1u);
const u32 c_32s = rotl32_S ((c_29s ^ c_24s ^ c_18s ^ c_16s), 1u);
const u32 c_33s = rotl32_S ((c_30s ^ c_25s ^ c_19s ^ c_17s), 1u);
const u32 c_34s = rotl32_S ((c_31s ^ c_26s ^ c_20s ^ c_18s), 1u);
const u32 c_35s = rotl32_S ((c_32s ^ c_27s ^ c_21s ^ c_19s), 1u);
const u32 c_36s = rotl32_S ((c_33s ^ c_28s ^ c_22s ^ c_20s), 1u);
const u32 c_37s = rotl32_S ((c_34s ^ c_29s ^ c_23s ^ c_21s), 1u);
const u32 c_38s = rotl32_S ((c_35s ^ c_30s ^ c_24s ^ c_22s), 1u);
const u32 c_39s = rotl32_S ((c_36s ^ c_31s ^ c_25s ^ c_23s), 1u);
const u32 c_40s = rotl32_S ((c_37s ^ c_32s ^ c_26s ^ c_24s), 1u);
const u32 c_41s = rotl32_S ((c_38s ^ c_33s ^ c_27s ^ c_25s), 1u);
const u32 c_42s = rotl32_S ((c_39s ^ c_34s ^ c_28s ^ c_26s), 1u);
const u32 c_43s = rotl32_S ((c_40s ^ c_35s ^ c_29s ^ c_27s), 1u);
const u32 c_44s = rotl32_S ((c_41s ^ c_36s ^ c_30s ^ c_28s), 1u);
const u32 c_45s = rotl32_S ((c_42s ^ c_37s ^ c_31s ^ c_29s), 1u);
const u32 c_46s = rotl32_S ((c_43s ^ c_38s ^ c_32s ^ c_30s), 1u);
const u32 c_47s = rotl32_S ((c_44s ^ c_39s ^ c_33s ^ c_31s), 1u);
const u32 c_48s = rotl32_S ((c_45s ^ c_40s ^ c_34s ^ c_32s), 1u);
const u32 c_49s = rotl32_S ((c_46s ^ c_41s ^ c_35s ^ c_33s), 1u);
const u32 c_50s = rotl32_S ((c_47s ^ c_42s ^ c_36s ^ c_34s), 1u);
const u32 c_51s = rotl32_S ((c_48s ^ c_43s ^ c_37s ^ c_35s), 1u);
const u32 c_52s = rotl32_S ((c_49s ^ c_44s ^ c_38s ^ c_36s), 1u);
const u32 c_53s = rotl32_S ((c_50s ^ c_45s ^ c_39s ^ c_37s), 1u);
const u32 c_54s = rotl32_S ((c_51s ^ c_46s ^ c_40s ^ c_38s), 1u);
const u32 c_55s = rotl32_S ((c_52s ^ c_47s ^ c_41s ^ c_39s), 1u);
const u32 c_56s = rotl32_S ((c_53s ^ c_48s ^ c_42s ^ c_40s), 1u);
const u32 c_57s = rotl32_S ((c_54s ^ c_49s ^ c_43s ^ c_41s), 1u);
const u32 c_58s = rotl32_S ((c_55s ^ c_50s ^ c_44s ^ c_42s), 1u);
const u32 c_59s = rotl32_S ((c_56s ^ c_51s ^ c_45s ^ c_43s), 1u);
const u32 c_60s = rotl32_S ((c_57s ^ c_52s ^ c_46s ^ c_44s), 1u);
const u32 c_61s = rotl32_S ((c_58s ^ c_53s ^ c_47s ^ c_45s), 1u);
const u32 c_62s = rotl32_S ((c_59s ^ c_54s ^ c_48s ^ c_46s), 1u);
const u32 c_63s = rotl32_S ((c_60s ^ c_55s ^ c_49s ^ c_47s), 1u);
const u32 c_64s = rotl32_S ((c_61s ^ c_56s ^ c_50s ^ c_48s), 1u);
const u32 c_65s = rotl32_S ((c_62s ^ c_57s ^ c_51s ^ c_49s), 1u);
const u32 c_66s = rotl32_S ((c_63s ^ c_58s ^ c_52s ^ c_50s), 1u);
const u32 c_67s = rotl32_S ((c_64s ^ c_59s ^ c_53s ^ c_51s), 1u);
const u32 c_68s = rotl32_S ((c_65s ^ c_60s ^ c_54s ^ c_52s), 1u);
const u32 c_69s = rotl32_S ((c_66s ^ c_61s ^ c_55s ^ c_53s), 1u);
const u32 c_70s = rotl32_S ((c_67s ^ c_62s ^ c_56s ^ c_54s), 1u);
const u32 c_71s = rotl32_S ((c_68s ^ c_63s ^ c_57s ^ c_55s), 1u);
const u32 c_72s = rotl32_S ((c_69s ^ c_64s ^ c_58s ^ c_56s), 1u);
const u32 c_73s = rotl32_S ((c_70s ^ c_65s ^ c_59s ^ c_57s), 1u);
const u32 c_74s = rotl32_S ((c_71s ^ c_66s ^ c_60s ^ c_58s), 1u);
const u32 c_75s = rotl32_S ((c_72s ^ c_67s ^ c_61s ^ c_59s), 1u);
const u32 c_76s = rotl32_S ((c_73s ^ c_68s ^ c_62s ^ c_60s), 1u);
const u32 c_77s = rotl32_S ((c_74s ^ c_69s ^ c_63s ^ c_61s), 1u);
const u32 c_78s = rotl32_S ((c_75s ^ c_70s ^ c_64s ^ c_62s), 1u);
const u32 c_79s = rotl32_S ((c_76s ^ c_71s ^ c_65s ^ c_63s), 1u);
const u32 c_17sK = c_17s + SHA1C00;
const u32 c_18sK = c_18s + SHA1C00;
@ -177,45 +177,45 @@ static void m00110m (u32 w[16], const u32 pw_len, __global pw_t *pws, __global k
u32 w0l = w[0];
for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++)
for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos += VECT_SIZE)
{
const u32 w0r = words_buf_r[il_pos];
const u32x w0r = words_buf_r[il_pos / VECT_SIZE];
const u32 w0 = w0l | w0r;
const u32x w0 = w0l | w0r;
const u32 w0s01 = rotl32 (w0, 1u);
const u32 w0s02 = rotl32 (w0, 2u);
const u32 w0s03 = rotl32 (w0, 3u);
const u32 w0s04 = rotl32 (w0, 4u);
const u32 w0s05 = rotl32 (w0, 5u);
const u32 w0s06 = rotl32 (w0, 6u);
const u32 w0s07 = rotl32 (w0, 7u);
const u32 w0s08 = rotl32 (w0, 8u);
const u32 w0s09 = rotl32 (w0, 9u);
const u32 w0s10 = rotl32 (w0, 10u);
const u32 w0s11 = rotl32 (w0, 11u);
const u32 w0s12 = rotl32 (w0, 12u);
const u32 w0s13 = rotl32 (w0, 13u);
const u32 w0s14 = rotl32 (w0, 14u);
const u32 w0s15 = rotl32 (w0, 15u);
const u32 w0s16 = rotl32 (w0, 16u);
const u32 w0s17 = rotl32 (w0, 17u);
const u32 w0s18 = rotl32 (w0, 18u);
const u32 w0s19 = rotl32 (w0, 19u);
const u32 w0s20 = rotl32 (w0, 20u);
const u32 w0s21 = rotl32 (w0, 21u);
const u32 w0s22 = rotl32 (w0, 22U);
const u32x w0s01 = rotl32 (w0, 1u);
const u32x w0s02 = rotl32 (w0, 2u);
const u32x w0s03 = rotl32 (w0, 3u);
const u32x w0s04 = rotl32 (w0, 4u);
const u32x w0s05 = rotl32 (w0, 5u);
const u32x w0s06 = rotl32 (w0, 6u);
const u32x w0s07 = rotl32 (w0, 7u);
const u32x w0s08 = rotl32 (w0, 8u);
const u32x w0s09 = rotl32 (w0, 9u);
const u32x w0s10 = rotl32 (w0, 10u);
const u32x w0s11 = rotl32 (w0, 11u);
const u32x w0s12 = rotl32 (w0, 12u);
const u32x w0s13 = rotl32 (w0, 13u);
const u32x w0s14 = rotl32 (w0, 14u);
const u32x w0s15 = rotl32 (w0, 15u);
const u32x w0s16 = rotl32 (w0, 16u);
const u32x w0s17 = rotl32 (w0, 17u);
const u32x w0s18 = rotl32 (w0, 18u);
const u32x w0s19 = rotl32 (w0, 19u);
const u32x w0s20 = rotl32 (w0, 20u);
const u32x w0s21 = rotl32 (w0, 21u);
const u32x w0s22 = rotl32 (w0, 22U);
const u32 w0s04___w0s06 = w0s04 ^ w0s06;
const u32 w0s04___w0s08 = w0s04 ^ w0s08;
const u32 w0s08___w0s12 = w0s08 ^ w0s12;
const u32 w0s04___w0s06___w0s07 = w0s04___w0s06 ^ w0s07;
const u32x w0s04___w0s06 = w0s04 ^ w0s06;
const u32x w0s04___w0s08 = w0s04 ^ w0s08;
const u32x w0s08___w0s12 = w0s08 ^ w0s12;
const u32x w0s04___w0s06___w0s07 = w0s04___w0s06 ^ w0s07;
u32 a = SHA1M_A;
u32 b = SHA1M_B;
u32 c = SHA1M_C;
u32 d = SHA1M_D;
u32 e = SHA1M_E;
u32x a = SHA1M_A;
u32x b = SHA1M_B;
u32x c = SHA1M_C;
u32x d = SHA1M_D;
u32x e = SHA1M_E;
#undef K
#define K SHA1C00
@ -314,17 +314,11 @@ static void m00110m (u32 w[16], const u32 pw_len, __global pw_t *pws, __global k
SHA1_STEP (SHA1_F1 , c, d, e, a, b, (c_78s ^ w0s07 ^ w0s08 ^ w0s15 ^ w0s18 ^ w0s20));
SHA1_STEP (SHA1_F1 , b, c, d, e, a, (c_79s ^ w0s08 ^ w0s22));
const u32 r0 = d;
const u32 r1 = e;
const u32 r2 = c;
const u32 r3 = b;
#include COMPARE_M
COMPARE_M_SIMD (d, e, c, b);
}
}
static void m00110s (u32 w[16], const u32 pw_len, __global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset)
static void m00110s (u32 w[16], const u32 pw_len, __global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset)
{
/**
* modifier
@ -337,66 +331,66 @@ static void m00110s (u32 w[16], const u32 pw_len, __global pw_t *pws, __global k
* base
*/
const u32 c_16s = rotl32 ((w[13] ^ w[ 8] ^ w[ 2] ), 1u);
const u32 c_17s = rotl32 ((w[14] ^ w[ 9] ^ w[ 3] ^ w[ 1]), 1u);
const u32 c_18s = rotl32 ((w[15] ^ w[10] ^ w[ 4] ^ w[ 2]), 1u);
const u32 c_19s = rotl32 ((c_16s ^ w[11] ^ w[ 5] ^ w[ 3]), 1u);
const u32 c_20s = rotl32 ((c_17s ^ w[12] ^ w[ 6] ^ w[ 4]), 1u);
const u32 c_21s = rotl32 ((c_18s ^ w[13] ^ w[ 7] ^ w[ 5]), 1u);
const u32 c_22s = rotl32 ((c_19s ^ w[14] ^ w[ 8] ^ w[ 6]), 1u);
const u32 c_23s = rotl32 ((c_20s ^ w[15] ^ w[ 9] ^ w[ 7]), 1u);
const u32 c_24s = rotl32 ((c_21s ^ c_16s ^ w[10] ^ w[ 8]), 1u);
const u32 c_25s = rotl32 ((c_22s ^ c_17s ^ w[11] ^ w[ 9]), 1u);
const u32 c_26s = rotl32 ((c_23s ^ c_18s ^ w[12] ^ w[10]), 1u);
const u32 c_27s = rotl32 ((c_24s ^ c_19s ^ w[13] ^ w[11]), 1u);
const u32 c_28s = rotl32 ((c_25s ^ c_20s ^ w[14] ^ w[12]), 1u);
const u32 c_29s = rotl32 ((c_26s ^ c_21s ^ w[15] ^ w[13]), 1u);
const u32 c_30s = rotl32 ((c_27s ^ c_22s ^ c_16s ^ w[14]), 1u);
const u32 c_31s = rotl32 ((c_28s ^ c_23s ^ c_17s ^ w[15]), 1u);
const u32 c_32s = rotl32 ((c_29s ^ c_24s ^ c_18s ^ c_16s), 1u);
const u32 c_33s = rotl32 ((c_30s ^ c_25s ^ c_19s ^ c_17s), 1u);
const u32 c_34s = rotl32 ((c_31s ^ c_26s ^ c_20s ^ c_18s), 1u);
const u32 c_35s = rotl32 ((c_32s ^ c_27s ^ c_21s ^ c_19s), 1u);
const u32 c_36s = rotl32 ((c_33s ^ c_28s ^ c_22s ^ c_20s), 1u);
const u32 c_37s = rotl32 ((c_34s ^ c_29s ^ c_23s ^ c_21s), 1u);
const u32 c_38s = rotl32 ((c_35s ^ c_30s ^ c_24s ^ c_22s), 1u);
const u32 c_39s = rotl32 ((c_36s ^ c_31s ^ c_25s ^ c_23s), 1u);
const u32 c_40s = rotl32 ((c_37s ^ c_32s ^ c_26s ^ c_24s), 1u);
const u32 c_41s = rotl32 ((c_38s ^ c_33s ^ c_27s ^ c_25s), 1u);
const u32 c_42s = rotl32 ((c_39s ^ c_34s ^ c_28s ^ c_26s), 1u);
const u32 c_43s = rotl32 ((c_40s ^ c_35s ^ c_29s ^ c_27s), 1u);
const u32 c_44s = rotl32 ((c_41s ^ c_36s ^ c_30s ^ c_28s), 1u);
const u32 c_45s = rotl32 ((c_42s ^ c_37s ^ c_31s ^ c_29s), 1u);
const u32 c_46s = rotl32 ((c_43s ^ c_38s ^ c_32s ^ c_30s), 1u);
const u32 c_47s = rotl32 ((c_44s ^ c_39s ^ c_33s ^ c_31s), 1u);
const u32 c_48s = rotl32 ((c_45s ^ c_40s ^ c_34s ^ c_32s), 1u);
const u32 c_49s = rotl32 ((c_46s ^ c_41s ^ c_35s ^ c_33s), 1u);
const u32 c_50s = rotl32 ((c_47s ^ c_42s ^ c_36s ^ c_34s), 1u);
const u32 c_51s = rotl32 ((c_48s ^ c_43s ^ c_37s ^ c_35s), 1u);
const u32 c_52s = rotl32 ((c_49s ^ c_44s ^ c_38s ^ c_36s), 1u);
const u32 c_53s = rotl32 ((c_50s ^ c_45s ^ c_39s ^ c_37s), 1u);
const u32 c_54s = rotl32 ((c_51s ^ c_46s ^ c_40s ^ c_38s), 1u);
const u32 c_55s = rotl32 ((c_52s ^ c_47s ^ c_41s ^ c_39s), 1u);
const u32 c_56s = rotl32 ((c_53s ^ c_48s ^ c_42s ^ c_40s), 1u);
const u32 c_57s = rotl32 ((c_54s ^ c_49s ^ c_43s ^ c_41s), 1u);
const u32 c_58s = rotl32 ((c_55s ^ c_50s ^ c_44s ^ c_42s), 1u);
const u32 c_59s = rotl32 ((c_56s ^ c_51s ^ c_45s ^ c_43s), 1u);
const u32 c_60s = rotl32 ((c_57s ^ c_52s ^ c_46s ^ c_44s), 1u);
const u32 c_61s = rotl32 ((c_58s ^ c_53s ^ c_47s ^ c_45s), 1u);
const u32 c_62s = rotl32 ((c_59s ^ c_54s ^ c_48s ^ c_46s), 1u);
const u32 c_63s = rotl32 ((c_60s ^ c_55s ^ c_49s ^ c_47s), 1u);
const u32 c_64s = rotl32 ((c_61s ^ c_56s ^ c_50s ^ c_48s), 1u);
const u32 c_65s = rotl32 ((c_62s ^ c_57s ^ c_51s ^ c_49s), 1u);
const u32 c_66s = rotl32 ((c_63s ^ c_58s ^ c_52s ^ c_50s), 1u);
const u32 c_67s = rotl32 ((c_64s ^ c_59s ^ c_53s ^ c_51s), 1u);
const u32 c_68s = rotl32 ((c_65s ^ c_60s ^ c_54s ^ c_52s), 1u);
const u32 c_69s = rotl32 ((c_66s ^ c_61s ^ c_55s ^ c_53s), 1u);
const u32 c_70s = rotl32 ((c_67s ^ c_62s ^ c_56s ^ c_54s), 1u);
const u32 c_71s = rotl32 ((c_68s ^ c_63s ^ c_57s ^ c_55s), 1u);
const u32 c_72s = rotl32 ((c_69s ^ c_64s ^ c_58s ^ c_56s), 1u);
const u32 c_73s = rotl32 ((c_70s ^ c_65s ^ c_59s ^ c_57s), 1u);
const u32 c_74s = rotl32 ((c_71s ^ c_66s ^ c_60s ^ c_58s), 1u);
const u32 c_75s = rotl32 ((c_72s ^ c_67s ^ c_61s ^ c_59s), 1u);
const u32 c_16s = rotl32_S ((w[13] ^ w[ 8] ^ w[ 2] ), 1u);
const u32 c_17s = rotl32_S ((w[14] ^ w[ 9] ^ w[ 3] ^ w[ 1]), 1u);
const u32 c_18s = rotl32_S ((w[15] ^ w[10] ^ w[ 4] ^ w[ 2]), 1u);
const u32 c_19s = rotl32_S ((c_16s ^ w[11] ^ w[ 5] ^ w[ 3]), 1u);
const u32 c_20s = rotl32_S ((c_17s ^ w[12] ^ w[ 6] ^ w[ 4]), 1u);
const u32 c_21s = rotl32_S ((c_18s ^ w[13] ^ w[ 7] ^ w[ 5]), 1u);
const u32 c_22s = rotl32_S ((c_19s ^ w[14] ^ w[ 8] ^ w[ 6]), 1u);
const u32 c_23s = rotl32_S ((c_20s ^ w[15] ^ w[ 9] ^ w[ 7]), 1u);
const u32 c_24s = rotl32_S ((c_21s ^ c_16s ^ w[10] ^ w[ 8]), 1u);
const u32 c_25s = rotl32_S ((c_22s ^ c_17s ^ w[11] ^ w[ 9]), 1u);
const u32 c_26s = rotl32_S ((c_23s ^ c_18s ^ w[12] ^ w[10]), 1u);
const u32 c_27s = rotl32_S ((c_24s ^ c_19s ^ w[13] ^ w[11]), 1u);
const u32 c_28s = rotl32_S ((c_25s ^ c_20s ^ w[14] ^ w[12]), 1u);
const u32 c_29s = rotl32_S ((c_26s ^ c_21s ^ w[15] ^ w[13]), 1u);
const u32 c_30s = rotl32_S ((c_27s ^ c_22s ^ c_16s ^ w[14]), 1u);
const u32 c_31s = rotl32_S ((c_28s ^ c_23s ^ c_17s ^ w[15]), 1u);
const u32 c_32s = rotl32_S ((c_29s ^ c_24s ^ c_18s ^ c_16s), 1u);
const u32 c_33s = rotl32_S ((c_30s ^ c_25s ^ c_19s ^ c_17s), 1u);
const u32 c_34s = rotl32_S ((c_31s ^ c_26s ^ c_20s ^ c_18s), 1u);
const u32 c_35s = rotl32_S ((c_32s ^ c_27s ^ c_21s ^ c_19s), 1u);
const u32 c_36s = rotl32_S ((c_33s ^ c_28s ^ c_22s ^ c_20s), 1u);
const u32 c_37s = rotl32_S ((c_34s ^ c_29s ^ c_23s ^ c_21s), 1u);
const u32 c_38s = rotl32_S ((c_35s ^ c_30s ^ c_24s ^ c_22s), 1u);
const u32 c_39s = rotl32_S ((c_36s ^ c_31s ^ c_25s ^ c_23s), 1u);
const u32 c_40s = rotl32_S ((c_37s ^ c_32s ^ c_26s ^ c_24s), 1u);
const u32 c_41s = rotl32_S ((c_38s ^ c_33s ^ c_27s ^ c_25s), 1u);
const u32 c_42s = rotl32_S ((c_39s ^ c_34s ^ c_28s ^ c_26s), 1u);
const u32 c_43s = rotl32_S ((c_40s ^ c_35s ^ c_29s ^ c_27s), 1u);
const u32 c_44s = rotl32_S ((c_41s ^ c_36s ^ c_30s ^ c_28s), 1u);
const u32 c_45s = rotl32_S ((c_42s ^ c_37s ^ c_31s ^ c_29s), 1u);
const u32 c_46s = rotl32_S ((c_43s ^ c_38s ^ c_32s ^ c_30s), 1u);
const u32 c_47s = rotl32_S ((c_44s ^ c_39s ^ c_33s ^ c_31s), 1u);
const u32 c_48s = rotl32_S ((c_45s ^ c_40s ^ c_34s ^ c_32s), 1u);
const u32 c_49s = rotl32_S ((c_46s ^ c_41s ^ c_35s ^ c_33s), 1u);
const u32 c_50s = rotl32_S ((c_47s ^ c_42s ^ c_36s ^ c_34s), 1u);
const u32 c_51s = rotl32_S ((c_48s ^ c_43s ^ c_37s ^ c_35s), 1u);
const u32 c_52s = rotl32_S ((c_49s ^ c_44s ^ c_38s ^ c_36s), 1u);
const u32 c_53s = rotl32_S ((c_50s ^ c_45s ^ c_39s ^ c_37s), 1u);
const u32 c_54s = rotl32_S ((c_51s ^ c_46s ^ c_40s ^ c_38s), 1u);
const u32 c_55s = rotl32_S ((c_52s ^ c_47s ^ c_41s ^ c_39s), 1u);
const u32 c_56s = rotl32_S ((c_53s ^ c_48s ^ c_42s ^ c_40s), 1u);
const u32 c_57s = rotl32_S ((c_54s ^ c_49s ^ c_43s ^ c_41s), 1u);
const u32 c_58s = rotl32_S ((c_55s ^ c_50s ^ c_44s ^ c_42s), 1u);
const u32 c_59s = rotl32_S ((c_56s ^ c_51s ^ c_45s ^ c_43s), 1u);
const u32 c_60s = rotl32_S ((c_57s ^ c_52s ^ c_46s ^ c_44s), 1u);
const u32 c_61s = rotl32_S ((c_58s ^ c_53s ^ c_47s ^ c_45s), 1u);
const u32 c_62s = rotl32_S ((c_59s ^ c_54s ^ c_48s ^ c_46s), 1u);
const u32 c_63s = rotl32_S ((c_60s ^ c_55s ^ c_49s ^ c_47s), 1u);
const u32 c_64s = rotl32_S ((c_61s ^ c_56s ^ c_50s ^ c_48s), 1u);
const u32 c_65s = rotl32_S ((c_62s ^ c_57s ^ c_51s ^ c_49s), 1u);
const u32 c_66s = rotl32_S ((c_63s ^ c_58s ^ c_52s ^ c_50s), 1u);
const u32 c_67s = rotl32_S ((c_64s ^ c_59s ^ c_53s ^ c_51s), 1u);
const u32 c_68s = rotl32_S ((c_65s ^ c_60s ^ c_54s ^ c_52s), 1u);
const u32 c_69s = rotl32_S ((c_66s ^ c_61s ^ c_55s ^ c_53s), 1u);
const u32 c_70s = rotl32_S ((c_67s ^ c_62s ^ c_56s ^ c_54s), 1u);
const u32 c_71s = rotl32_S ((c_68s ^ c_63s ^ c_57s ^ c_55s), 1u);
const u32 c_72s = rotl32_S ((c_69s ^ c_64s ^ c_58s ^ c_56s), 1u);
const u32 c_73s = rotl32_S ((c_70s ^ c_65s ^ c_59s ^ c_57s), 1u);
const u32 c_74s = rotl32_S ((c_71s ^ c_66s ^ c_60s ^ c_58s), 1u);
const u32 c_75s = rotl32_S ((c_72s ^ c_67s ^ c_61s ^ c_59s), 1u);
const u32 c_17sK = c_17s + SHA1C00;
const u32 c_18sK = c_18s + SHA1C00;
@ -430,7 +424,7 @@ static void m00110s (u32 w[16], const u32 pw_len, __global pw_t *pws, __global k
* reverse
*/
const u32 e_rev = rotl32 (search[1], 2u) - SHA1C03;
const u32 e_rev = rotl32_S (search[1], 2u) - SHA1C03;
/**
* loop
@ -438,43 +432,43 @@ static void m00110s (u32 w[16], const u32 pw_len, __global pw_t *pws, __global k
u32 w0l = w[0];
for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++)
for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos += VECT_SIZE)
{
const u32 w0r = words_buf_r[il_pos];
const u32x w0r = words_buf_r[il_pos / VECT_SIZE];
const u32 w0 = w0l | w0r;
const u32x w0 = w0l | w0r;
const u32 w0s01 = rotl32 (w0, 1u);
const u32 w0s02 = rotl32 (w0, 2u);
const u32 w0s03 = rotl32 (w0, 3u);
const u32 w0s04 = rotl32 (w0, 4u);
const u32 w0s05 = rotl32 (w0, 5u);
const u32 w0s06 = rotl32 (w0, 6u);
const u32 w0s07 = rotl32 (w0, 7u);
const u32 w0s08 = rotl32 (w0, 8u);
const u32 w0s09 = rotl32 (w0, 9u);
const u32 w0s10 = rotl32 (w0, 10u);
const u32 w0s11 = rotl32 (w0, 11u);
const u32 w0s12 = rotl32 (w0, 12u);
const u32 w0s13 = rotl32 (w0, 13u);
const u32 w0s14 = rotl32 (w0, 14u);
const u32 w0s15 = rotl32 (w0, 15u);
const u32 w0s16 = rotl32 (w0, 16u);
const u32 w0s17 = rotl32 (w0, 17u);
const u32 w0s18 = rotl32 (w0, 18u);
const u32 w0s19 = rotl32 (w0, 19u);
const u32 w0s20 = rotl32 (w0, 20u);
const u32x w0s01 = rotl32 (w0, 1u);
const u32x w0s02 = rotl32 (w0, 2u);
const u32x w0s03 = rotl32 (w0, 3u);
const u32x w0s04 = rotl32 (w0, 4u);
const u32x w0s05 = rotl32 (w0, 5u);
const u32x w0s06 = rotl32 (w0, 6u);
const u32x w0s07 = rotl32 (w0, 7u);
const u32x w0s08 = rotl32 (w0, 8u);
const u32x w0s09 = rotl32 (w0, 9u);
const u32x w0s10 = rotl32 (w0, 10u);
const u32x w0s11 = rotl32 (w0, 11u);
const u32x w0s12 = rotl32 (w0, 12u);
const u32x w0s13 = rotl32 (w0, 13u);
const u32x w0s14 = rotl32 (w0, 14u);
const u32x w0s15 = rotl32 (w0, 15u);
const u32x w0s16 = rotl32 (w0, 16u);
const u32x w0s17 = rotl32 (w0, 17u);
const u32x w0s18 = rotl32 (w0, 18u);
const u32x w0s19 = rotl32 (w0, 19u);
const u32x w0s20 = rotl32 (w0, 20u);
const u32 w0s04___w0s06 = w0s04 ^ w0s06;
const u32 w0s04___w0s08 = w0s04 ^ w0s08;
const u32 w0s08___w0s12 = w0s08 ^ w0s12;
const u32 w0s04___w0s06___w0s07 = w0s04___w0s06 ^ w0s07;
const u32x w0s04___w0s06 = w0s04 ^ w0s06;
const u32x w0s04___w0s08 = w0s04 ^ w0s08;
const u32x w0s08___w0s12 = w0s08 ^ w0s12;
const u32x w0s04___w0s06___w0s07 = w0s04___w0s06 ^ w0s07;
u32 a = SHA1M_A;
u32 b = SHA1M_B;
u32 c = SHA1M_C;
u32 d = SHA1M_D;
u32 e = SHA1M_E;
u32x a = SHA1M_A;
u32x b = SHA1M_B;
u32x c = SHA1M_C;
u32x d = SHA1M_D;
u32x e = SHA1M_E;
#undef K
#define K SHA1C00
@ -570,36 +564,28 @@ static void m00110s (u32 w[16], const u32 pw_len, __global pw_t *pws, __global k
SHA1_STEP_PE (SHA1_F1, a, b, c, d, e, (c_75s ^ w0s06 ^ w0s12 ^ w0s14));
bool q_cond = allx (e_rev != e);
if (q_cond) continue;
if (MATCHES_NONE_VS (e, e_rev)) continue;
SHA1_STEP_PB (SHA1_F1, a, b, c, d, e, 0);
const u32 c_76s = rotl32 ((c_73s ^ c_68s ^ c_62s ^ c_60s), 1u);
const u32 c_77s = rotl32 ((c_74s ^ c_69s ^ c_63s ^ c_61s), 1u);
const u32 c_78s = rotl32 ((c_75s ^ c_70s ^ c_64s ^ c_62s), 1u);
const u32 c_79s = rotl32 ((c_76s ^ c_71s ^ c_65s ^ c_63s), 1u);
const u32x c_76s = rotl32 ((c_73s ^ c_68s ^ c_62s ^ c_60s), 1u);
const u32x c_77s = rotl32 ((c_74s ^ c_69s ^ c_63s ^ c_61s), 1u);
const u32x c_78s = rotl32 ((c_75s ^ c_70s ^ c_64s ^ c_62s), 1u);
const u32x c_79s = rotl32 ((c_76s ^ c_71s ^ c_65s ^ c_63s), 1u);
const u32 w0s21 = rotl32 (w0, 21u);
const u32 w0s22 = rotl32 (w0, 22U);
const u32x w0s21 = rotl32 (w0, 21u);
const u32x w0s22 = rotl32 (w0, 22U);
SHA1_STEP (SHA1_F1 , e, a, b, c, d, (c_76s ^ w0s07 ^ w0s08___w0s12 ^ w0s16 ^ w0s21));
SHA1_STEP (SHA1_F1 , d, e, a, b, c, (c_77s));
SHA1_STEP (SHA1_F1 , c, d, e, a, b, (c_78s ^ w0s07 ^ w0s08 ^ w0s15 ^ w0s18 ^ w0s20));
SHA1_STEP (SHA1_F1 , b, c, d, e, a, (c_79s ^ w0s08 ^ w0s22));
const u32 r0 = d;
const u32 r1 = e;
const u32 r2 = c;
const u32 r3 = b;
#include COMPARE_S
COMPARE_S_SIMD (d, e, c, b);
}
}
__kernel void m00110_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
__kernel void m00110_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
{
/**
* base
@ -637,7 +623,7 @@ __kernel void m00110_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
m00110m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset);
}
__kernel void m00110_m08 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
__kernel void m00110_m08 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
{
/**
* base
@ -675,7 +661,7 @@ __kernel void m00110_m08 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
m00110m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset);
}
__kernel void m00110_m16 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
__kernel void m00110_m16 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
{
/**
* base
@ -713,7 +699,7 @@ __kernel void m00110_m16 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
m00110m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset);
}
__kernel void m00110_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
__kernel void m00110_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
{
/**
* base
@ -751,7 +737,7 @@ __kernel void m00110_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
m00110s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset);
}
__kernel void m00110_s08 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
__kernel void m00110_s08 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
{
/**
* base
@ -789,7 +775,7 @@ __kernel void m00110_s08 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
m00110s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset);
}
__kernel void m00110_s16 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
__kernel void m00110_s16 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
{
/**
* base

View File

@ -132,7 +132,7 @@ __kernel void m00120_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
w3_t[2] = w3[2];
w3_t[3] = w3[3];
switch_buffer_by_offset (w0_t, w1_t, w2_t, w3_t, salt_len);
switch_buffer_by_offset_le (w0_t, w1_t, w2_t, w3_t, salt_len);
w0_t[0] |= salt_buf0[0];
w0_t[1] |= salt_buf0[1];
@ -416,7 +416,7 @@ __kernel void m00120_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
w3_t[2] = w3[2];
w3_t[3] = w3[3];
switch_buffer_by_offset (w0_t, w1_t, w2_t, w3_t, salt_len);
switch_buffer_by_offset_le (w0_t, w1_t, w2_t, w3_t, salt_len);
w0_t[0] |= salt_buf0[0];
w0_t[1] |= salt_buf0[1];

View File

@ -68,7 +68,7 @@ __kernel void m00120_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
if (combs_mode == COMBINATOR_MODE_BASE_RIGHT)
{
switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
switch_buffer_by_offset_le (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
}
/**
@ -125,7 +125,7 @@ __kernel void m00120_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
{
switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len);
switch_buffer_by_offset_le (wordr0, wordr1, wordr2, wordr3, pw_l_len);
}
u32 w0[4];
@ -178,7 +178,7 @@ __kernel void m00120_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
w3_t[2] = w3[2];
w3_t[3] = w3[3];
switch_buffer_by_offset (w0_t, w1_t, w2_t, w3_t, salt_len);
switch_buffer_by_offset_le (w0_t, w1_t, w2_t, w3_t, salt_len);
w0_t[0] |= salt_buf0[0];
w0_t[1] |= salt_buf0[1];
@ -382,7 +382,7 @@ __kernel void m00120_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
if (combs_mode == COMBINATOR_MODE_BASE_RIGHT)
{
switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
switch_buffer_by_offset_le (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
}
/**
@ -457,7 +457,7 @@ __kernel void m00120_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
{
switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len);
switch_buffer_by_offset_le (wordr0, wordr1, wordr2, wordr3, pw_l_len);
}
u32 w0[4];
@ -510,7 +510,7 @@ __kernel void m00120_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
w3_t[2] = w3[2];
w3_t[3] = w3[3];
switch_buffer_by_offset (w0_t, w1_t, w2_t, w3_t, salt_len);
switch_buffer_by_offset_le (w0_t, w1_t, w2_t, w3_t, salt_len);
w0_t[0] |= salt_buf0[0];
w0_t[1] |= salt_buf0[1];

View File

@ -5,6 +5,8 @@
#define _SHA1_
#define NEW_SIMD_CODE
#include "include/constants.h"
#include "include/kernel_vendor.h"
@ -16,104 +18,7 @@
#include "include/kernel_functions.c"
#include "OpenCL/types_ocl.c"
#include "OpenCL/common.c"
#define COMPARE_S "OpenCL/check_single_comp4.c"
#define COMPARE_M "OpenCL/check_multi_comp4.c"
static void overwrite_at (u32 sw[16], const u32 w0, const u32 salt_len)
{
switch (salt_len)
{
case 0: sw[0] = w0;
break;
case 1: sw[0] = (sw[0] & 0xff000000) | (w0 >> 8);
sw[1] = (sw[1] & 0x00ffffff) | (w0 << 24);
break;
case 2: sw[0] = (sw[0] & 0xffff0000) | (w0 >> 16);
sw[1] = (sw[1] & 0x0000ffff) | (w0 << 16);
break;
case 3: sw[0] = (sw[0] & 0xffffff00) | (w0 >> 24);
sw[1] = (sw[1] & 0x000000ff) | (w0 << 8);
break;
case 4: sw[1] = w0;
break;
case 5: sw[1] = (sw[1] & 0xff000000) | (w0 >> 8);
sw[2] = (sw[2] & 0x00ffffff) | (w0 << 24);
break;
case 6: sw[1] = (sw[1] & 0xffff0000) | (w0 >> 16);
sw[2] = (sw[2] & 0x0000ffff) | (w0 << 16);
break;
case 7: sw[1] = (sw[1] & 0xffffff00) | (w0 >> 24);
sw[2] = (sw[2] & 0x000000ff) | (w0 << 8);
break;
case 8: sw[2] = w0;
break;
case 9: sw[2] = (sw[2] & 0xff000000) | (w0 >> 8);
sw[3] = (sw[3] & 0x00ffffff) | (w0 << 24);
break;
case 10: sw[2] = (sw[2] & 0xffff0000) | (w0 >> 16);
sw[3] = (sw[3] & 0x0000ffff) | (w0 << 16);
break;
case 11: sw[2] = (sw[2] & 0xffffff00) | (w0 >> 24);
sw[3] = (sw[3] & 0x000000ff) | (w0 << 8);
break;
case 12: sw[3] = w0;
break;
case 13: sw[3] = (sw[3] & 0xff000000) | (w0 >> 8);
sw[4] = (sw[4] & 0x00ffffff) | (w0 << 24);
break;
case 14: sw[3] = (sw[3] & 0xffff0000) | (w0 >> 16);
sw[4] = (sw[4] & 0x0000ffff) | (w0 << 16);
break;
case 15: sw[3] = (sw[3] & 0xffffff00) | (w0 >> 24);
sw[4] = (sw[4] & 0x000000ff) | (w0 << 8);
break;
case 16: sw[4] = w0;
break;
case 17: sw[4] = (sw[4] & 0xff000000) | (w0 >> 8);
sw[5] = (sw[5] & 0x00ffffff) | (w0 << 24);
break;
case 18: sw[4] = (sw[4] & 0xffff0000) | (w0 >> 16);
sw[5] = (sw[5] & 0x0000ffff) | (w0 << 16);
break;
case 19: sw[4] = (sw[4] & 0xffffff00) | (w0 >> 24);
sw[5] = (sw[5] & 0x000000ff) | (w0 << 8);
break;
case 20: sw[5] = w0;
break;
case 21: sw[5] = (sw[5] & 0xff000000) | (w0 >> 8);
sw[6] = (sw[6] & 0x00ffffff) | (w0 << 24);
break;
case 22: sw[5] = (sw[5] & 0xffff0000) | (w0 >> 16);
sw[6] = (sw[6] & 0x0000ffff) | (w0 << 16);
break;
case 23: sw[5] = (sw[5] & 0xffffff00) | (w0 >> 24);
sw[6] = (sw[6] & 0x000000ff) | (w0 << 8);
break;
case 24: sw[6] = w0;
break;
case 25: sw[6] = (sw[6] & 0xff000000) | (w0 >> 8);
sw[7] = (sw[7] & 0x00ffffff) | (w0 << 24);
break;
case 26: sw[6] = (sw[6] & 0xffff0000) | (w0 >> 16);
sw[7] = (sw[7] & 0x0000ffff) | (w0 << 16);
break;
case 27: sw[6] = (sw[6] & 0xffffff00) | (w0 >> 24);
sw[7] = (sw[7] & 0x000000ff) | (w0 << 8);
break;
case 28: sw[7] = w0;
break;
case 29: sw[7] = (sw[7] & 0xff000000) | (w0 >> 8);
sw[8] = (sw[8] & 0x00ffffff) | (w0 << 24);
break;
case 30: sw[7] = (sw[7] & 0xffff0000) | (w0 >> 16);
sw[8] = (sw[8] & 0x0000ffff) | (w0 << 16);
break;
case 31: sw[7] = (sw[7] & 0xffffff00) | (w0 >> 24);
sw[8] = (sw[8] & 0x000000ff) | (w0 << 8);
break;
}
}
#include "OpenCL/simd.c"
static void m00120m (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_len, __global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset)
{
@ -169,24 +74,24 @@ static void m00120m (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_le
u32 w2_t[4];
u32 w3_t[4];
w0_t[0] = swap32 (w0[0]);
w0_t[1] = swap32 (w0[1]);
w0_t[2] = swap32 (w0[2]);
w0_t[3] = swap32 (w0[3]);
w1_t[0] = swap32 (w1[0]);
w1_t[1] = swap32 (w1[1]);
w1_t[2] = swap32 (w1[2]);
w1_t[3] = swap32 (w1[3]);
w2_t[0] = swap32 (w2[0]);
w2_t[1] = swap32 (w2[1]);
w2_t[2] = swap32 (w2[2]);
w2_t[3] = swap32 (w2[3]);
w3_t[0] = swap32 (w3[0]);
w3_t[1] = swap32 (w3[1]);
w3_t[2] = swap32 (w3[2]);
w3_t[3] = swap32 (w3[3]);
w0_t[0] = swap32_S (w0[0]);
w0_t[1] = swap32_S (w0[1]);
w0_t[2] = swap32_S (w0[2]);
w0_t[3] = swap32_S (w0[3]);
w1_t[0] = swap32_S (w1[0]);
w1_t[1] = swap32_S (w1[1]);
w1_t[2] = swap32_S (w1[2]);
w1_t[3] = swap32_S (w1[3]);
w2_t[0] = swap32_S (w2[0]);
w2_t[1] = swap32_S (w2[1]);
w2_t[2] = swap32_S (w2[2]);
w2_t[3] = swap32_S (w2[3]);
w3_t[0] = swap32_S (w3[0]);
w3_t[1] = swap32_S (w3[1]);
w3_t[2] = swap32_S (w3[2]);
w3_t[3] = swap32_S (w3[3]);
switch_buffer_by_offset (w0_t, w1_t, w2_t, w3_t, salt_len);
switch_buffer_by_offset_le_S (w0_t, w1_t, w2_t, w3_t, salt_len);
w0_t[0] |= salt_buf0[0];
w0_t[1] |= salt_buf0[1];
@ -205,22 +110,22 @@ static void m00120m (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_le
w3_t[2] |= salt_buf3[2];
w3_t[3] |= salt_buf3[3];
w0_t[0] = swap32 (w0_t[0]);
w0_t[1] = swap32 (w0_t[1]);
w0_t[2] = swap32 (w0_t[2]);
w0_t[3] = swap32 (w0_t[3]);
w1_t[0] = swap32 (w1_t[0]);
w1_t[1] = swap32 (w1_t[1]);
w1_t[2] = swap32 (w1_t[2]);
w1_t[3] = swap32 (w1_t[3]);
w2_t[0] = swap32 (w2_t[0]);
w2_t[1] = swap32 (w2_t[1]);
w2_t[2] = swap32 (w2_t[2]);
w2_t[3] = swap32 (w2_t[3]);
w3_t[0] = swap32 (w3_t[0]);
w3_t[1] = swap32 (w3_t[1]);
w3_t[2] = swap32 (w3_t[2]);
w3_t[3] = swap32 (w3_t[3]);
w0_t[0] = swap32_S (w0_t[0]);
w0_t[1] = swap32_S (w0_t[1]);
w0_t[2] = swap32_S (w0_t[2]);
w0_t[3] = swap32_S (w0_t[3]);
w1_t[0] = swap32_S (w1_t[0]);
w1_t[1] = swap32_S (w1_t[1]);
w1_t[2] = swap32_S (w1_t[2]);
w1_t[3] = swap32_S (w1_t[3]);
w2_t[0] = swap32_S (w2_t[0]);
w2_t[1] = swap32_S (w2_t[1]);
w2_t[2] = swap32_S (w2_t[2]);
w2_t[3] = swap32_S (w2_t[3]);
w3_t[0] = swap32_S (w3_t[0]);
w3_t[1] = swap32_S (w3_t[1]);
w3_t[2] = swap32_S (w3_t[2]);
w3_t[3] = swap32_S (w3_t[3]);
/**
* loop
@ -228,13 +133,13 @@ static void m00120m (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_le
u32 w0l = w0[0];
for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++)
for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos += VECT_SIZE)
{
const u32 w0r = bfs_buf[il_pos].i;
const u32x w0r = w0r_create_bft (bfs_buf, il_pos);
const u32 w0n = w0l | w0r;
const u32x w0lr = w0l | w0r;
u32 wx[16];
u32x wx[16];
wx[ 0] = w0_t[0];
wx[ 1] = w0_t[1];
@ -253,12 +158,12 @@ static void m00120m (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_le
wx[14] = w3_t[2];
wx[15] = w3_t[3];
overwrite_at (wx, w0n, salt_len);
overwrite_at_be (wx, w0lr, salt_len);
u32 w0_t[4];
u32 w1_t[4];
u32 w2_t[4];
u32 w3_t[4];
u32x w0_t[4];
u32x w1_t[4];
u32x w2_t[4];
u32x w3_t[4];
w0_t[0] = wx[ 0];
w0_t[1] = wx[ 1];
@ -281,11 +186,11 @@ static void m00120m (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_le
* sha1
*/
u32 a = SHA1M_A;
u32 b = SHA1M_B;
u32 c = SHA1M_C;
u32 d = SHA1M_D;
u32 e = SHA1M_E;
u32x a = SHA1M_A;
u32x b = SHA1M_B;
u32x c = SHA1M_C;
u32x d = SHA1M_D;
u32x e = SHA1M_E;
#undef K
#define K SHA1C00
@ -383,12 +288,7 @@ static void m00120m (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_le
w3_t[2] = rotl32 ((w2_t[3] ^ w1_t[2] ^ w0_t[0] ^ w3_t[2]), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w3_t[2]);
w3_t[3] = rotl32 ((w3_t[0] ^ w1_t[3] ^ w0_t[1] ^ w3_t[3]), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w3_t[3]);
const u32 r0 = d;
const u32 r1 = e;
const u32 r2 = c;
const u32 r3 = b;
#include COMPARE_M
COMPARE_M_SIMD (d, e, c, b);
}
}
@ -417,7 +317,7 @@ static void m00120s (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_le
* reverse
*/
const u32 e_rev = rotl32 (search[1], 2u);
const u32 e_rev = rotl32_S (search[1], 2u);
/**
* salt
@ -464,24 +364,24 @@ static void m00120s (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_le
u32 w2_t[4];
u32 w3_t[4];
w0_t[0] = swap32 (w0[0]);
w0_t[1] = swap32 (w0[1]);
w0_t[2] = swap32 (w0[2]);
w0_t[3] = swap32 (w0[3]);
w1_t[0] = swap32 (w1[0]);
w1_t[1] = swap32 (w1[1]);
w1_t[2] = swap32 (w1[2]);
w1_t[3] = swap32 (w1[3]);
w2_t[0] = swap32 (w2[0]);
w2_t[1] = swap32 (w2[1]);
w2_t[2] = swap32 (w2[2]);
w2_t[3] = swap32 (w2[3]);
w3_t[0] = swap32 (w3[0]);
w3_t[1] = swap32 (w3[1]);
w3_t[2] = swap32 (w3[2]);
w3_t[3] = swap32 (w3[3]);
w0_t[0] = swap32_S (w0[0]);
w0_t[1] = swap32_S (w0[1]);
w0_t[2] = swap32_S (w0[2]);
w0_t[3] = swap32_S (w0[3]);
w1_t[0] = swap32_S (w1[0]);
w1_t[1] = swap32_S (w1[1]);
w1_t[2] = swap32_S (w1[2]);
w1_t[3] = swap32_S (w1[3]);
w2_t[0] = swap32_S (w2[0]);
w2_t[1] = swap32_S (w2[1]);
w2_t[2] = swap32_S (w2[2]);
w2_t[3] = swap32_S (w2[3]);
w3_t[0] = swap32_S (w3[0]);
w3_t[1] = swap32_S (w3[1]);
w3_t[2] = swap32_S (w3[2]);
w3_t[3] = swap32_S (w3[3]);
switch_buffer_by_offset (w0_t, w1_t, w2_t, w3_t, salt_len);
switch_buffer_by_offset_le_S (w0_t, w1_t, w2_t, w3_t, salt_len);
w0_t[0] |= salt_buf0[0];
w0_t[1] |= salt_buf0[1];
@ -500,22 +400,22 @@ static void m00120s (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_le
w3_t[2] |= salt_buf3[2];
w3_t[3] |= salt_buf3[3];
w0_t[0] = swap32 (w0_t[0]);
w0_t[1] = swap32 (w0_t[1]);
w0_t[2] = swap32 (w0_t[2]);
w0_t[3] = swap32 (w0_t[3]);
w1_t[0] = swap32 (w1_t[0]);
w1_t[1] = swap32 (w1_t[1]);
w1_t[2] = swap32 (w1_t[2]);
w1_t[3] = swap32 (w1_t[3]);
w2_t[0] = swap32 (w2_t[0]);
w2_t[1] = swap32 (w2_t[1]);
w2_t[2] = swap32 (w2_t[2]);
w2_t[3] = swap32 (w2_t[3]);
w3_t[0] = swap32 (w3_t[0]);
w3_t[1] = swap32 (w3_t[1]);
w3_t[2] = swap32 (w3_t[2]);
w3_t[3] = swap32 (w3_t[3]);
w0_t[0] = swap32_S (w0_t[0]);
w0_t[1] = swap32_S (w0_t[1]);
w0_t[2] = swap32_S (w0_t[2]);
w0_t[3] = swap32_S (w0_t[3]);
w1_t[0] = swap32_S (w1_t[0]);
w1_t[1] = swap32_S (w1_t[1]);
w1_t[2] = swap32_S (w1_t[2]);
w1_t[3] = swap32_S (w1_t[3]);
w2_t[0] = swap32_S (w2_t[0]);
w2_t[1] = swap32_S (w2_t[1]);
w2_t[2] = swap32_S (w2_t[2]);
w2_t[3] = swap32_S (w2_t[3]);
w3_t[0] = swap32_S (w3_t[0]);
w3_t[1] = swap32_S (w3_t[1]);
w3_t[2] = swap32_S (w3_t[2]);
w3_t[3] = swap32_S (w3_t[3]);
/**
* loop
@ -523,13 +423,13 @@ static void m00120s (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_le
u32 w0l = w0[0];
for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++)
for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos += VECT_SIZE)
{
const u32 w0r = bfs_buf[il_pos].i;
const u32x w0r = w0r_create_bft (bfs_buf, il_pos);
const u32 w0n = w0l | w0r;
const u32x w0lr = w0l | w0r;
u32 wx[16];
u32x wx[16];
wx[ 0] = w0_t[0];
wx[ 1] = w0_t[1];
@ -548,12 +448,12 @@ static void m00120s (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_le
wx[14] = w3_t[2];
wx[15] = w3_t[3];
overwrite_at (wx, w0n, salt_len);
overwrite_at_be (wx, w0lr, salt_len);
u32 w0_t[4];
u32 w1_t[4];
u32 w2_t[4];
u32 w3_t[4];
u32x w0_t[4];
u32x w1_t[4];
u32x w2_t[4];
u32x w3_t[4];
w0_t[0] = wx[ 0];
w0_t[1] = wx[ 1];
@ -576,11 +476,11 @@ static void m00120s (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_le
* sha1
*/
u32 a = SHA1M_A;
u32 b = SHA1M_B;
u32 c = SHA1M_C;
u32 d = SHA1M_D;
u32 e = SHA1M_E;
u32x a = SHA1M_A;
u32x b = SHA1M_B;
u32x c = SHA1M_C;
u32x d = SHA1M_D;
u32x e = SHA1M_E;
#undef K
#define K SHA1C00
@ -675,19 +575,13 @@ static void m00120s (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_le
w2_t[3] = rotl32 ((w2_t[0] ^ w0_t[3] ^ w3_t[1] ^ w2_t[3]), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w2_t[3]);
w3_t[0] = rotl32 ((w2_t[1] ^ w1_t[0] ^ w3_t[2] ^ w3_t[0]), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w3_t[0]);
if (allx (e != e_rev)) continue;
if (MATCHES_NONE_VS (e, e_rev)) continue;
w3_t[1] = rotl32 ((w2_t[2] ^ w1_t[1] ^ w3_t[3] ^ w3_t[1]), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t[1]);
w3_t[2] = rotl32 ((w2_t[3] ^ w1_t[2] ^ w0_t[0] ^ w3_t[2]), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w3_t[2]);
w3_t[3] = rotl32 ((w3_t[0] ^ w1_t[3] ^ w0_t[1] ^ w3_t[3]), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w3_t[3]);
const u32 r0 = d;
const u32 r1 = e;
const u32 r2 = c;
const u32 r3 = b;
#include COMPARE_S
COMPARE_S_SIMD (d, e, c, b);
}
}

View File

@ -144,7 +144,7 @@ __kernel void m00130_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
s3[2] = 0;
s3[3] = 0;
switch_buffer_by_offset (s0, s1, s2, s3, (out_len * 2));
switch_buffer_by_offset_le (s0, s1, s2, s3, (out_len * 2));
const u32 out_salt_len = (out_len * 2) + salt_len;
@ -456,7 +456,7 @@ __kernel void m00130_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
s3[2] = 0;
s3[3] = 0;
switch_buffer_by_offset (s0, s1, s2, s3, (out_len * 2));
switch_buffer_by_offset_le (s0, s1, s2, s3, (out_len * 2));
const u32 out_salt_len = (out_len * 2) + salt_len;

View File

@ -70,7 +70,7 @@ __kernel void m00130_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
if (combs_mode == COMBINATOR_MODE_BASE_RIGHT)
{
switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
switch_buffer_by_offset_le (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
}
/**
@ -133,7 +133,7 @@ __kernel void m00130_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
{
switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len);
switch_buffer_by_offset_le (wordr0, wordr1, wordr2, wordr3, pw_l_len);
}
/**
@ -168,7 +168,7 @@ __kernel void m00130_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
s3[2] = 0;
s3[3] = 0;
switch_buffer_by_offset (s0, s1, s2, s3, (pw_len * 2));
switch_buffer_by_offset_le (s0, s1, s2, s3, (pw_len * 2));
const u32 pw_salt_len = (pw_len * 2) + salt_len;
@ -412,7 +412,7 @@ __kernel void m00130_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
if (combs_mode == COMBINATOR_MODE_BASE_RIGHT)
{
switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
switch_buffer_by_offset_le (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
}
/**
@ -493,7 +493,7 @@ __kernel void m00130_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
{
switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len);
switch_buffer_by_offset_le (wordr0, wordr1, wordr2, wordr3, pw_l_len);
}
/**
@ -528,7 +528,7 @@ __kernel void m00130_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
s3[2] = 0;
s3[3] = 0;
switch_buffer_by_offset (s0, s1, s2, s3, (pw_len * 2));
switch_buffer_by_offset_le (s0, s1, s2, s3, (pw_len * 2));
const u32 pw_salt_len = (pw_len * 2) + salt_len;

View File

@ -5,6 +5,8 @@
#define _SHA1_
#define NEW_SIMD_CODE
#include "include/constants.h"
#include "include/kernel_vendor.h"
@ -16,11 +18,9 @@
#include "include/kernel_functions.c"
#include "OpenCL/types_ocl.c"
#include "OpenCL/common.c"
#include "OpenCL/simd.c"
#define COMPARE_S "OpenCL/check_single_comp4.c"
#define COMPARE_M "OpenCL/check_multi_comp4.c"
static void m00130m (u32 w[16], const u32 pw_len, __global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset)
static void m00130m (u32 w[16], const u32 pw_len, __global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset)
{
/**
* modifier
@ -61,24 +61,24 @@ static void m00130m (u32 w[16], const u32 pw_len, __global pw_t *pws, __global k
salt_buf3[2] = 0;
salt_buf3[3] = 0;
switch_buffer_by_offset (salt_buf0, salt_buf1, salt_buf2, salt_buf3, pw_len);
switch_buffer_by_offset_le_S (salt_buf0, salt_buf1, salt_buf2, salt_buf3, pw_len);
w[ 0] |= swap32 (salt_buf0[0]);
w[ 1] |= swap32 (salt_buf0[1]);
w[ 2] |= swap32 (salt_buf0[2]);
w[ 3] |= swap32 (salt_buf0[3]);
w[ 4] |= swap32 (salt_buf1[0]);
w[ 5] |= swap32 (salt_buf1[1]);
w[ 6] |= swap32 (salt_buf1[2]);
w[ 7] |= swap32 (salt_buf1[3]);
w[ 8] |= swap32 (salt_buf2[0]);
w[ 9] |= swap32 (salt_buf2[1]);
w[10] |= swap32 (salt_buf2[2]);
w[11] |= swap32 (salt_buf2[3]);
w[12] |= swap32 (salt_buf3[0]);
w[13] |= swap32 (salt_buf3[1]);
w[14] |= swap32 (salt_buf3[2]);
w[15] |= swap32 (salt_buf3[3]);
w[ 0] |= swap32_S (salt_buf0[0]);
w[ 1] |= swap32_S (salt_buf0[1]);
w[ 2] |= swap32_S (salt_buf0[2]);
w[ 3] |= swap32_S (salt_buf0[3]);
w[ 4] |= swap32_S (salt_buf1[0]);
w[ 5] |= swap32_S (salt_buf1[1]);
w[ 6] |= swap32_S (salt_buf1[2]);
w[ 7] |= swap32_S (salt_buf1[3]);
w[ 8] |= swap32_S (salt_buf2[0]);
w[ 9] |= swap32_S (salt_buf2[1]);
w[10] |= swap32_S (salt_buf2[2]);
w[11] |= swap32_S (salt_buf2[3]);
w[12] |= swap32_S (salt_buf3[0]);
w[13] |= swap32_S (salt_buf3[1]);
w[14] |= swap32_S (salt_buf3[2]);
w[15] |= swap32_S (salt_buf3[3]);
const u32 salt_len = salt_bufs[salt_pos].salt_len;
@ -90,70 +90,70 @@ static void m00130m (u32 w[16], const u32 pw_len, __global pw_t *pws, __global k
* base
*/
const u32 c_16s = rotl32 ((w[13] ^ w[ 8] ^ w[ 2] ), 1u);
const u32 c_17s = rotl32 ((w[14] ^ w[ 9] ^ w[ 3] ^ w[ 1]), 1u);
const u32 c_18s = rotl32 ((w[15] ^ w[10] ^ w[ 4] ^ w[ 2]), 1u);
const u32 c_19s = rotl32 ((c_16s ^ w[11] ^ w[ 5] ^ w[ 3]), 1u);
const u32 c_20s = rotl32 ((c_17s ^ w[12] ^ w[ 6] ^ w[ 4]), 1u);
const u32 c_21s = rotl32 ((c_18s ^ w[13] ^ w[ 7] ^ w[ 5]), 1u);
const u32 c_22s = rotl32 ((c_19s ^ w[14] ^ w[ 8] ^ w[ 6]), 1u);
const u32 c_23s = rotl32 ((c_20s ^ w[15] ^ w[ 9] ^ w[ 7]), 1u);
const u32 c_24s = rotl32 ((c_21s ^ c_16s ^ w[10] ^ w[ 8]), 1u);
const u32 c_25s = rotl32 ((c_22s ^ c_17s ^ w[11] ^ w[ 9]), 1u);
const u32 c_26s = rotl32 ((c_23s ^ c_18s ^ w[12] ^ w[10]), 1u);
const u32 c_27s = rotl32 ((c_24s ^ c_19s ^ w[13] ^ w[11]), 1u);
const u32 c_28s = rotl32 ((c_25s ^ c_20s ^ w[14] ^ w[12]), 1u);
const u32 c_29s = rotl32 ((c_26s ^ c_21s ^ w[15] ^ w[13]), 1u);
const u32 c_30s = rotl32 ((c_27s ^ c_22s ^ c_16s ^ w[14]), 1u);
const u32 c_31s = rotl32 ((c_28s ^ c_23s ^ c_17s ^ w[15]), 1u);
const u32 c_32s = rotl32 ((c_29s ^ c_24s ^ c_18s ^ c_16s), 1u);
const u32 c_33s = rotl32 ((c_30s ^ c_25s ^ c_19s ^ c_17s), 1u);
const u32 c_34s = rotl32 ((c_31s ^ c_26s ^ c_20s ^ c_18s), 1u);
const u32 c_35s = rotl32 ((c_32s ^ c_27s ^ c_21s ^ c_19s), 1u);
const u32 c_36s = rotl32 ((c_33s ^ c_28s ^ c_22s ^ c_20s), 1u);
const u32 c_37s = rotl32 ((c_34s ^ c_29s ^ c_23s ^ c_21s), 1u);
const u32 c_38s = rotl32 ((c_35s ^ c_30s ^ c_24s ^ c_22s), 1u);
const u32 c_39s = rotl32 ((c_36s ^ c_31s ^ c_25s ^ c_23s), 1u);
const u32 c_40s = rotl32 ((c_37s ^ c_32s ^ c_26s ^ c_24s), 1u);
const u32 c_41s = rotl32 ((c_38s ^ c_33s ^ c_27s ^ c_25s), 1u);
const u32 c_42s = rotl32 ((c_39s ^ c_34s ^ c_28s ^ c_26s), 1u);
const u32 c_43s = rotl32 ((c_40s ^ c_35s ^ c_29s ^ c_27s), 1u);
const u32 c_44s = rotl32 ((c_41s ^ c_36s ^ c_30s ^ c_28s), 1u);
const u32 c_45s = rotl32 ((c_42s ^ c_37s ^ c_31s ^ c_29s), 1u);
const u32 c_46s = rotl32 ((c_43s ^ c_38s ^ c_32s ^ c_30s), 1u);
const u32 c_47s = rotl32 ((c_44s ^ c_39s ^ c_33s ^ c_31s), 1u);
const u32 c_48s = rotl32 ((c_45s ^ c_40s ^ c_34s ^ c_32s), 1u);
const u32 c_49s = rotl32 ((c_46s ^ c_41s ^ c_35s ^ c_33s), 1u);
const u32 c_50s = rotl32 ((c_47s ^ c_42s ^ c_36s ^ c_34s), 1u);
const u32 c_51s = rotl32 ((c_48s ^ c_43s ^ c_37s ^ c_35s), 1u);
const u32 c_52s = rotl32 ((c_49s ^ c_44s ^ c_38s ^ c_36s), 1u);
const u32 c_53s = rotl32 ((c_50s ^ c_45s ^ c_39s ^ c_37s), 1u);
const u32 c_54s = rotl32 ((c_51s ^ c_46s ^ c_40s ^ c_38s), 1u);
const u32 c_55s = rotl32 ((c_52s ^ c_47s ^ c_41s ^ c_39s), 1u);
const u32 c_56s = rotl32 ((c_53s ^ c_48s ^ c_42s ^ c_40s), 1u);
const u32 c_57s = rotl32 ((c_54s ^ c_49s ^ c_43s ^ c_41s), 1u);
const u32 c_58s = rotl32 ((c_55s ^ c_50s ^ c_44s ^ c_42s), 1u);
const u32 c_59s = rotl32 ((c_56s ^ c_51s ^ c_45s ^ c_43s), 1u);
const u32 c_60s = rotl32 ((c_57s ^ c_52s ^ c_46s ^ c_44s), 1u);
const u32 c_61s = rotl32 ((c_58s ^ c_53s ^ c_47s ^ c_45s), 1u);
const u32 c_62s = rotl32 ((c_59s ^ c_54s ^ c_48s ^ c_46s), 1u);
const u32 c_63s = rotl32 ((c_60s ^ c_55s ^ c_49s ^ c_47s), 1u);
const u32 c_64s = rotl32 ((c_61s ^ c_56s ^ c_50s ^ c_48s), 1u);
const u32 c_65s = rotl32 ((c_62s ^ c_57s ^ c_51s ^ c_49s), 1u);
const u32 c_66s = rotl32 ((c_63s ^ c_58s ^ c_52s ^ c_50s), 1u);
const u32 c_67s = rotl32 ((c_64s ^ c_59s ^ c_53s ^ c_51s), 1u);
const u32 c_68s = rotl32 ((c_65s ^ c_60s ^ c_54s ^ c_52s), 1u);
const u32 c_69s = rotl32 ((c_66s ^ c_61s ^ c_55s ^ c_53s), 1u);
const u32 c_70s = rotl32 ((c_67s ^ c_62s ^ c_56s ^ c_54s), 1u);
const u32 c_71s = rotl32 ((c_68s ^ c_63s ^ c_57s ^ c_55s), 1u);
const u32 c_72s = rotl32 ((c_69s ^ c_64s ^ c_58s ^ c_56s), 1u);
const u32 c_73s = rotl32 ((c_70s ^ c_65s ^ c_59s ^ c_57s), 1u);
const u32 c_74s = rotl32 ((c_71s ^ c_66s ^ c_60s ^ c_58s), 1u);
const u32 c_75s = rotl32 ((c_72s ^ c_67s ^ c_61s ^ c_59s), 1u);
const u32 c_76s = rotl32 ((c_73s ^ c_68s ^ c_62s ^ c_60s), 1u);
const u32 c_77s = rotl32 ((c_74s ^ c_69s ^ c_63s ^ c_61s), 1u);
const u32 c_78s = rotl32 ((c_75s ^ c_70s ^ c_64s ^ c_62s), 1u);
const u32 c_79s = rotl32 ((c_76s ^ c_71s ^ c_65s ^ c_63s), 1u);
const u32 c_16s = rotl32_S ((w[13] ^ w[ 8] ^ w[ 2] ), 1u);
const u32 c_17s = rotl32_S ((w[14] ^ w[ 9] ^ w[ 3] ^ w[ 1]), 1u);
const u32 c_18s = rotl32_S ((w[15] ^ w[10] ^ w[ 4] ^ w[ 2]), 1u);
const u32 c_19s = rotl32_S ((c_16s ^ w[11] ^ w[ 5] ^ w[ 3]), 1u);
const u32 c_20s = rotl32_S ((c_17s ^ w[12] ^ w[ 6] ^ w[ 4]), 1u);
const u32 c_21s = rotl32_S ((c_18s ^ w[13] ^ w[ 7] ^ w[ 5]), 1u);
const u32 c_22s = rotl32_S ((c_19s ^ w[14] ^ w[ 8] ^ w[ 6]), 1u);
const u32 c_23s = rotl32_S ((c_20s ^ w[15] ^ w[ 9] ^ w[ 7]), 1u);
const u32 c_24s = rotl32_S ((c_21s ^ c_16s ^ w[10] ^ w[ 8]), 1u);
const u32 c_25s = rotl32_S ((c_22s ^ c_17s ^ w[11] ^ w[ 9]), 1u);
const u32 c_26s = rotl32_S ((c_23s ^ c_18s ^ w[12] ^ w[10]), 1u);
const u32 c_27s = rotl32_S ((c_24s ^ c_19s ^ w[13] ^ w[11]), 1u);
const u32 c_28s = rotl32_S ((c_25s ^ c_20s ^ w[14] ^ w[12]), 1u);
const u32 c_29s = rotl32_S ((c_26s ^ c_21s ^ w[15] ^ w[13]), 1u);
const u32 c_30s = rotl32_S ((c_27s ^ c_22s ^ c_16s ^ w[14]), 1u);
const u32 c_31s = rotl32_S ((c_28s ^ c_23s ^ c_17s ^ w[15]), 1u);
const u32 c_32s = rotl32_S ((c_29s ^ c_24s ^ c_18s ^ c_16s), 1u);
const u32 c_33s = rotl32_S ((c_30s ^ c_25s ^ c_19s ^ c_17s), 1u);
const u32 c_34s = rotl32_S ((c_31s ^ c_26s ^ c_20s ^ c_18s), 1u);
const u32 c_35s = rotl32_S ((c_32s ^ c_27s ^ c_21s ^ c_19s), 1u);
const u32 c_36s = rotl32_S ((c_33s ^ c_28s ^ c_22s ^ c_20s), 1u);
const u32 c_37s = rotl32_S ((c_34s ^ c_29s ^ c_23s ^ c_21s), 1u);
const u32 c_38s = rotl32_S ((c_35s ^ c_30s ^ c_24s ^ c_22s), 1u);
const u32 c_39s = rotl32_S ((c_36s ^ c_31s ^ c_25s ^ c_23s), 1u);
const u32 c_40s = rotl32_S ((c_37s ^ c_32s ^ c_26s ^ c_24s), 1u);
const u32 c_41s = rotl32_S ((c_38s ^ c_33s ^ c_27s ^ c_25s), 1u);
const u32 c_42s = rotl32_S ((c_39s ^ c_34s ^ c_28s ^ c_26s), 1u);
const u32 c_43s = rotl32_S ((c_40s ^ c_35s ^ c_29s ^ c_27s), 1u);
const u32 c_44s = rotl32_S ((c_41s ^ c_36s ^ c_30s ^ c_28s), 1u);
const u32 c_45s = rotl32_S ((c_42s ^ c_37s ^ c_31s ^ c_29s), 1u);
const u32 c_46s = rotl32_S ((c_43s ^ c_38s ^ c_32s ^ c_30s), 1u);
const u32 c_47s = rotl32_S ((c_44s ^ c_39s ^ c_33s ^ c_31s), 1u);
const u32 c_48s = rotl32_S ((c_45s ^ c_40s ^ c_34s ^ c_32s), 1u);
const u32 c_49s = rotl32_S ((c_46s ^ c_41s ^ c_35s ^ c_33s), 1u);
const u32 c_50s = rotl32_S ((c_47s ^ c_42s ^ c_36s ^ c_34s), 1u);
const u32 c_51s = rotl32_S ((c_48s ^ c_43s ^ c_37s ^ c_35s), 1u);
const u32 c_52s = rotl32_S ((c_49s ^ c_44s ^ c_38s ^ c_36s), 1u);
const u32 c_53s = rotl32_S ((c_50s ^ c_45s ^ c_39s ^ c_37s), 1u);
const u32 c_54s = rotl32_S ((c_51s ^ c_46s ^ c_40s ^ c_38s), 1u);
const u32 c_55s = rotl32_S ((c_52s ^ c_47s ^ c_41s ^ c_39s), 1u);
const u32 c_56s = rotl32_S ((c_53s ^ c_48s ^ c_42s ^ c_40s), 1u);
const u32 c_57s = rotl32_S ((c_54s ^ c_49s ^ c_43s ^ c_41s), 1u);
const u32 c_58s = rotl32_S ((c_55s ^ c_50s ^ c_44s ^ c_42s), 1u);
const u32 c_59s = rotl32_S ((c_56s ^ c_51s ^ c_45s ^ c_43s), 1u);
const u32 c_60s = rotl32_S ((c_57s ^ c_52s ^ c_46s ^ c_44s), 1u);
const u32 c_61s = rotl32_S ((c_58s ^ c_53s ^ c_47s ^ c_45s), 1u);
const u32 c_62s = rotl32_S ((c_59s ^ c_54s ^ c_48s ^ c_46s), 1u);
const u32 c_63s = rotl32_S ((c_60s ^ c_55s ^ c_49s ^ c_47s), 1u);
const u32 c_64s = rotl32_S ((c_61s ^ c_56s ^ c_50s ^ c_48s), 1u);
const u32 c_65s = rotl32_S ((c_62s ^ c_57s ^ c_51s ^ c_49s), 1u);
const u32 c_66s = rotl32_S ((c_63s ^ c_58s ^ c_52s ^ c_50s), 1u);
const u32 c_67s = rotl32_S ((c_64s ^ c_59s ^ c_53s ^ c_51s), 1u);
const u32 c_68s = rotl32_S ((c_65s ^ c_60s ^ c_54s ^ c_52s), 1u);
const u32 c_69s = rotl32_S ((c_66s ^ c_61s ^ c_55s ^ c_53s), 1u);
const u32 c_70s = rotl32_S ((c_67s ^ c_62s ^ c_56s ^ c_54s), 1u);
const u32 c_71s = rotl32_S ((c_68s ^ c_63s ^ c_57s ^ c_55s), 1u);
const u32 c_72s = rotl32_S ((c_69s ^ c_64s ^ c_58s ^ c_56s), 1u);
const u32 c_73s = rotl32_S ((c_70s ^ c_65s ^ c_59s ^ c_57s), 1u);
const u32 c_74s = rotl32_S ((c_71s ^ c_66s ^ c_60s ^ c_58s), 1u);
const u32 c_75s = rotl32_S ((c_72s ^ c_67s ^ c_61s ^ c_59s), 1u);
const u32 c_76s = rotl32_S ((c_73s ^ c_68s ^ c_62s ^ c_60s), 1u);
const u32 c_77s = rotl32_S ((c_74s ^ c_69s ^ c_63s ^ c_61s), 1u);
const u32 c_78s = rotl32_S ((c_75s ^ c_70s ^ c_64s ^ c_62s), 1u);
const u32 c_79s = rotl32_S ((c_76s ^ c_71s ^ c_65s ^ c_63s), 1u);
const u32 c_17sK = c_17s + SHA1C00;
const u32 c_18sK = c_18s + SHA1C00;
@ -177,45 +177,45 @@ static void m00130m (u32 w[16], const u32 pw_len, __global pw_t *pws, __global k
u32 w0l = w[0];
for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++)
for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos += VECT_SIZE)
{
const u32 w0r = words_buf_r[il_pos];
const u32x w0r = words_buf_r[il_pos / VECT_SIZE];
const u32 w0 = w0l | w0r;
const u32x w0 = w0l | w0r;
const u32 w0s01 = rotl32 (w0, 1u);
const u32 w0s02 = rotl32 (w0, 2u);
const u32 w0s03 = rotl32 (w0, 3u);
const u32 w0s04 = rotl32 (w0, 4u);
const u32 w0s05 = rotl32 (w0, 5u);
const u32 w0s06 = rotl32 (w0, 6u);
const u32 w0s07 = rotl32 (w0, 7u);
const u32 w0s08 = rotl32 (w0, 8u);
const u32 w0s09 = rotl32 (w0, 9u);
const u32 w0s10 = rotl32 (w0, 10u);
const u32 w0s11 = rotl32 (w0, 11u);
const u32 w0s12 = rotl32 (w0, 12u);
const u32 w0s13 = rotl32 (w0, 13u);
const u32 w0s14 = rotl32 (w0, 14u);
const u32 w0s15 = rotl32 (w0, 15u);
const u32 w0s16 = rotl32 (w0, 16u);
const u32 w0s17 = rotl32 (w0, 17u);
const u32 w0s18 = rotl32 (w0, 18u);
const u32 w0s19 = rotl32 (w0, 19u);
const u32 w0s20 = rotl32 (w0, 20u);
const u32 w0s21 = rotl32 (w0, 21u);
const u32 w0s22 = rotl32 (w0, 22U);
const u32x w0s01 = rotl32 (w0, 1u);
const u32x w0s02 = rotl32 (w0, 2u);
const u32x w0s03 = rotl32 (w0, 3u);
const u32x w0s04 = rotl32 (w0, 4u);
const u32x w0s05 = rotl32 (w0, 5u);
const u32x w0s06 = rotl32 (w0, 6u);
const u32x w0s07 = rotl32 (w0, 7u);
const u32x w0s08 = rotl32 (w0, 8u);
const u32x w0s09 = rotl32 (w0, 9u);
const u32x w0s10 = rotl32 (w0, 10u);
const u32x w0s11 = rotl32 (w0, 11u);
const u32x w0s12 = rotl32 (w0, 12u);
const u32x w0s13 = rotl32 (w0, 13u);
const u32x w0s14 = rotl32 (w0, 14u);
const u32x w0s15 = rotl32 (w0, 15u);
const u32x w0s16 = rotl32 (w0, 16u);
const u32x w0s17 = rotl32 (w0, 17u);
const u32x w0s18 = rotl32 (w0, 18u);
const u32x w0s19 = rotl32 (w0, 19u);
const u32x w0s20 = rotl32 (w0, 20u);
const u32x w0s21 = rotl32 (w0, 21u);
const u32x w0s22 = rotl32 (w0, 22U);
const u32 w0s04___w0s06 = w0s04 ^ w0s06;
const u32 w0s04___w0s08 = w0s04 ^ w0s08;
const u32 w0s08___w0s12 = w0s08 ^ w0s12;
const u32 w0s04___w0s06___w0s07 = w0s04___w0s06 ^ w0s07;
const u32x w0s04___w0s06 = w0s04 ^ w0s06;
const u32x w0s04___w0s08 = w0s04 ^ w0s08;
const u32x w0s08___w0s12 = w0s08 ^ w0s12;
const u32x w0s04___w0s06___w0s07 = w0s04___w0s06 ^ w0s07;
u32 a = SHA1M_A;
u32 b = SHA1M_B;
u32 c = SHA1M_C;
u32 d = SHA1M_D;
u32 e = SHA1M_E;
u32x a = SHA1M_A;
u32x b = SHA1M_B;
u32x c = SHA1M_C;
u32x d = SHA1M_D;
u32x e = SHA1M_E;
#undef K
#define K SHA1C00
@ -314,17 +314,11 @@ static void m00130m (u32 w[16], const u32 pw_len, __global pw_t *pws, __global k
SHA1_STEP (SHA1_F1 , c, d, e, a, b, (c_78s ^ w0s07 ^ w0s08 ^ w0s15 ^ w0s18 ^ w0s20));
SHA1_STEP (SHA1_F1 , b, c, d, e, a, (c_79s ^ w0s08 ^ w0s22));
const u32 r0 = d;
const u32 r1 = e;
const u32 r2 = c;
const u32 r3 = b;
#include COMPARE_M
COMPARE_M_SIMD (d, e, c, b);
}
}
static void m00130s (u32 w[16], const u32 pw_len, __global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset)
static void m00130s (u32 w[16], const u32 pw_len, __global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset)
{
/**
* modifier
@ -337,66 +331,66 @@ static void m00130s (u32 w[16], const u32 pw_len, __global pw_t *pws, __global k
* base
*/
const u32 c_16s = rotl32 ((w[13] ^ w[ 8] ^ w[ 2] ), 1u);
const u32 c_17s = rotl32 ((w[14] ^ w[ 9] ^ w[ 3] ^ w[ 1]), 1u);
const u32 c_18s = rotl32 ((w[15] ^ w[10] ^ w[ 4] ^ w[ 2]), 1u);
const u32 c_19s = rotl32 ((c_16s ^ w[11] ^ w[ 5] ^ w[ 3]), 1u);
const u32 c_20s = rotl32 ((c_17s ^ w[12] ^ w[ 6] ^ w[ 4]), 1u);
const u32 c_21s = rotl32 ((c_18s ^ w[13] ^ w[ 7] ^ w[ 5]), 1u);
const u32 c_22s = rotl32 ((c_19s ^ w[14] ^ w[ 8] ^ w[ 6]), 1u);
const u32 c_23s = rotl32 ((c_20s ^ w[15] ^ w[ 9] ^ w[ 7]), 1u);
const u32 c_24s = rotl32 ((c_21s ^ c_16s ^ w[10] ^ w[ 8]), 1u);
const u32 c_25s = rotl32 ((c_22s ^ c_17s ^ w[11] ^ w[ 9]), 1u);
const u32 c_26s = rotl32 ((c_23s ^ c_18s ^ w[12] ^ w[10]), 1u);
const u32 c_27s = rotl32 ((c_24s ^ c_19s ^ w[13] ^ w[11]), 1u);
const u32 c_28s = rotl32 ((c_25s ^ c_20s ^ w[14] ^ w[12]), 1u);
const u32 c_29s = rotl32 ((c_26s ^ c_21s ^ w[15] ^ w[13]), 1u);
const u32 c_30s = rotl32 ((c_27s ^ c_22s ^ c_16s ^ w[14]), 1u);
const u32 c_31s = rotl32 ((c_28s ^ c_23s ^ c_17s ^ w[15]), 1u);
const u32 c_32s = rotl32 ((c_29s ^ c_24s ^ c_18s ^ c_16s), 1u);
const u32 c_33s = rotl32 ((c_30s ^ c_25s ^ c_19s ^ c_17s), 1u);
const u32 c_34s = rotl32 ((c_31s ^ c_26s ^ c_20s ^ c_18s), 1u);
const u32 c_35s = rotl32 ((c_32s ^ c_27s ^ c_21s ^ c_19s), 1u);
const u32 c_36s = rotl32 ((c_33s ^ c_28s ^ c_22s ^ c_20s), 1u);
const u32 c_37s = rotl32 ((c_34s ^ c_29s ^ c_23s ^ c_21s), 1u);
const u32 c_38s = rotl32 ((c_35s ^ c_30s ^ c_24s ^ c_22s), 1u);
const u32 c_39s = rotl32 ((c_36s ^ c_31s ^ c_25s ^ c_23s), 1u);
const u32 c_40s = rotl32 ((c_37s ^ c_32s ^ c_26s ^ c_24s), 1u);
const u32 c_41s = rotl32 ((c_38s ^ c_33s ^ c_27s ^ c_25s), 1u);
const u32 c_42s = rotl32 ((c_39s ^ c_34s ^ c_28s ^ c_26s), 1u);
const u32 c_43s = rotl32 ((c_40s ^ c_35s ^ c_29s ^ c_27s), 1u);
const u32 c_44s = rotl32 ((c_41s ^ c_36s ^ c_30s ^ c_28s), 1u);
const u32 c_45s = rotl32 ((c_42s ^ c_37s ^ c_31s ^ c_29s), 1u);
const u32 c_46s = rotl32 ((c_43s ^ c_38s ^ c_32s ^ c_30s), 1u);
const u32 c_47s = rotl32 ((c_44s ^ c_39s ^ c_33s ^ c_31s), 1u);
const u32 c_48s = rotl32 ((c_45s ^ c_40s ^ c_34s ^ c_32s), 1u);
const u32 c_49s = rotl32 ((c_46s ^ c_41s ^ c_35s ^ c_33s), 1u);
const u32 c_50s = rotl32 ((c_47s ^ c_42s ^ c_36s ^ c_34s), 1u);
const u32 c_51s = rotl32 ((c_48s ^ c_43s ^ c_37s ^ c_35s), 1u);
const u32 c_52s = rotl32 ((c_49s ^ c_44s ^ c_38s ^ c_36s), 1u);
const u32 c_53s = rotl32 ((c_50s ^ c_45s ^ c_39s ^ c_37s), 1u);
const u32 c_54s = rotl32 ((c_51s ^ c_46s ^ c_40s ^ c_38s), 1u);
const u32 c_55s = rotl32 ((c_52s ^ c_47s ^ c_41s ^ c_39s), 1u);
const u32 c_56s = rotl32 ((c_53s ^ c_48s ^ c_42s ^ c_40s), 1u);
const u32 c_57s = rotl32 ((c_54s ^ c_49s ^ c_43s ^ c_41s), 1u);
const u32 c_58s = rotl32 ((c_55s ^ c_50s ^ c_44s ^ c_42s), 1u);
const u32 c_59s = rotl32 ((c_56s ^ c_51s ^ c_45s ^ c_43s), 1u);
const u32 c_60s = rotl32 ((c_57s ^ c_52s ^ c_46s ^ c_44s), 1u);
const u32 c_61s = rotl32 ((c_58s ^ c_53s ^ c_47s ^ c_45s), 1u);
const u32 c_62s = rotl32 ((c_59s ^ c_54s ^ c_48s ^ c_46s), 1u);
const u32 c_63s = rotl32 ((c_60s ^ c_55s ^ c_49s ^ c_47s), 1u);
const u32 c_64s = rotl32 ((c_61s ^ c_56s ^ c_50s ^ c_48s), 1u);
const u32 c_65s = rotl32 ((c_62s ^ c_57s ^ c_51s ^ c_49s), 1u);
const u32 c_66s = rotl32 ((c_63s ^ c_58s ^ c_52s ^ c_50s), 1u);
const u32 c_67s = rotl32 ((c_64s ^ c_59s ^ c_53s ^ c_51s), 1u);
const u32 c_68s = rotl32 ((c_65s ^ c_60s ^ c_54s ^ c_52s), 1u);
const u32 c_69s = rotl32 ((c_66s ^ c_61s ^ c_55s ^ c_53s), 1u);
const u32 c_70s = rotl32 ((c_67s ^ c_62s ^ c_56s ^ c_54s), 1u);
const u32 c_71s = rotl32 ((c_68s ^ c_63s ^ c_57s ^ c_55s), 1u);
const u32 c_72s = rotl32 ((c_69s ^ c_64s ^ c_58s ^ c_56s), 1u);
const u32 c_73s = rotl32 ((c_70s ^ c_65s ^ c_59s ^ c_57s), 1u);
const u32 c_74s = rotl32 ((c_71s ^ c_66s ^ c_60s ^ c_58s), 1u);
const u32 c_75s = rotl32 ((c_72s ^ c_67s ^ c_61s ^ c_59s), 1u);
const u32 c_16s = rotl32_S ((w[13] ^ w[ 8] ^ w[ 2] ), 1u);
const u32 c_17s = rotl32_S ((w[14] ^ w[ 9] ^ w[ 3] ^ w[ 1]), 1u);
const u32 c_18s = rotl32_S ((w[15] ^ w[10] ^ w[ 4] ^ w[ 2]), 1u);
const u32 c_19s = rotl32_S ((c_16s ^ w[11] ^ w[ 5] ^ w[ 3]), 1u);
const u32 c_20s = rotl32_S ((c_17s ^ w[12] ^ w[ 6] ^ w[ 4]), 1u);
const u32 c_21s = rotl32_S ((c_18s ^ w[13] ^ w[ 7] ^ w[ 5]), 1u);
const u32 c_22s = rotl32_S ((c_19s ^ w[14] ^ w[ 8] ^ w[ 6]), 1u);
const u32 c_23s = rotl32_S ((c_20s ^ w[15] ^ w[ 9] ^ w[ 7]), 1u);
const u32 c_24s = rotl32_S ((c_21s ^ c_16s ^ w[10] ^ w[ 8]), 1u);
const u32 c_25s = rotl32_S ((c_22s ^ c_17s ^ w[11] ^ w[ 9]), 1u);
const u32 c_26s = rotl32_S ((c_23s ^ c_18s ^ w[12] ^ w[10]), 1u);
const u32 c_27s = rotl32_S ((c_24s ^ c_19s ^ w[13] ^ w[11]), 1u);
const u32 c_28s = rotl32_S ((c_25s ^ c_20s ^ w[14] ^ w[12]), 1u);
const u32 c_29s = rotl32_S ((c_26s ^ c_21s ^ w[15] ^ w[13]), 1u);
const u32 c_30s = rotl32_S ((c_27s ^ c_22s ^ c_16s ^ w[14]), 1u);
const u32 c_31s = rotl32_S ((c_28s ^ c_23s ^ c_17s ^ w[15]), 1u);
const u32 c_32s = rotl32_S ((c_29s ^ c_24s ^ c_18s ^ c_16s), 1u);
const u32 c_33s = rotl32_S ((c_30s ^ c_25s ^ c_19s ^ c_17s), 1u);
const u32 c_34s = rotl32_S ((c_31s ^ c_26s ^ c_20s ^ c_18s), 1u);
const u32 c_35s = rotl32_S ((c_32s ^ c_27s ^ c_21s ^ c_19s), 1u);
const u32 c_36s = rotl32_S ((c_33s ^ c_28s ^ c_22s ^ c_20s), 1u);
const u32 c_37s = rotl32_S ((c_34s ^ c_29s ^ c_23s ^ c_21s), 1u);
const u32 c_38s = rotl32_S ((c_35s ^ c_30s ^ c_24s ^ c_22s), 1u);
const u32 c_39s = rotl32_S ((c_36s ^ c_31s ^ c_25s ^ c_23s), 1u);
const u32 c_40s = rotl32_S ((c_37s ^ c_32s ^ c_26s ^ c_24s), 1u);
const u32 c_41s = rotl32_S ((c_38s ^ c_33s ^ c_27s ^ c_25s), 1u);
const u32 c_42s = rotl32_S ((c_39s ^ c_34s ^ c_28s ^ c_26s), 1u);
const u32 c_43s = rotl32_S ((c_40s ^ c_35s ^ c_29s ^ c_27s), 1u);
const u32 c_44s = rotl32_S ((c_41s ^ c_36s ^ c_30s ^ c_28s), 1u);
const u32 c_45s = rotl32_S ((c_42s ^ c_37s ^ c_31s ^ c_29s), 1u);
const u32 c_46s = rotl32_S ((c_43s ^ c_38s ^ c_32s ^ c_30s), 1u);
const u32 c_47s = rotl32_S ((c_44s ^ c_39s ^ c_33s ^ c_31s), 1u);
const u32 c_48s = rotl32_S ((c_45s ^ c_40s ^ c_34s ^ c_32s), 1u);
const u32 c_49s = rotl32_S ((c_46s ^ c_41s ^ c_35s ^ c_33s), 1u);
const u32 c_50s = rotl32_S ((c_47s ^ c_42s ^ c_36s ^ c_34s), 1u);
const u32 c_51s = rotl32_S ((c_48s ^ c_43s ^ c_37s ^ c_35s), 1u);
const u32 c_52s = rotl32_S ((c_49s ^ c_44s ^ c_38s ^ c_36s), 1u);
const u32 c_53s = rotl32_S ((c_50s ^ c_45s ^ c_39s ^ c_37s), 1u);
const u32 c_54s = rotl32_S ((c_51s ^ c_46s ^ c_40s ^ c_38s), 1u);
const u32 c_55s = rotl32_S ((c_52s ^ c_47s ^ c_41s ^ c_39s), 1u);
const u32 c_56s = rotl32_S ((c_53s ^ c_48s ^ c_42s ^ c_40s), 1u);
const u32 c_57s = rotl32_S ((c_54s ^ c_49s ^ c_43s ^ c_41s), 1u);
const u32 c_58s = rotl32_S ((c_55s ^ c_50s ^ c_44s ^ c_42s), 1u);
const u32 c_59s = rotl32_S ((c_56s ^ c_51s ^ c_45s ^ c_43s), 1u);
const u32 c_60s = rotl32_S ((c_57s ^ c_52s ^ c_46s ^ c_44s), 1u);
const u32 c_61s = rotl32_S ((c_58s ^ c_53s ^ c_47s ^ c_45s), 1u);
const u32 c_62s = rotl32_S ((c_59s ^ c_54s ^ c_48s ^ c_46s), 1u);
const u32 c_63s = rotl32_S ((c_60s ^ c_55s ^ c_49s ^ c_47s), 1u);
const u32 c_64s = rotl32_S ((c_61s ^ c_56s ^ c_50s ^ c_48s), 1u);
const u32 c_65s = rotl32_S ((c_62s ^ c_57s ^ c_51s ^ c_49s), 1u);
const u32 c_66s = rotl32_S ((c_63s ^ c_58s ^ c_52s ^ c_50s), 1u);
const u32 c_67s = rotl32_S ((c_64s ^ c_59s ^ c_53s ^ c_51s), 1u);
const u32 c_68s = rotl32_S ((c_65s ^ c_60s ^ c_54s ^ c_52s), 1u);
const u32 c_69s = rotl32_S ((c_66s ^ c_61s ^ c_55s ^ c_53s), 1u);
const u32 c_70s = rotl32_S ((c_67s ^ c_62s ^ c_56s ^ c_54s), 1u);
const u32 c_71s = rotl32_S ((c_68s ^ c_63s ^ c_57s ^ c_55s), 1u);
const u32 c_72s = rotl32_S ((c_69s ^ c_64s ^ c_58s ^ c_56s), 1u);
const u32 c_73s = rotl32_S ((c_70s ^ c_65s ^ c_59s ^ c_57s), 1u);
const u32 c_74s = rotl32_S ((c_71s ^ c_66s ^ c_60s ^ c_58s), 1u);
const u32 c_75s = rotl32_S ((c_72s ^ c_67s ^ c_61s ^ c_59s), 1u);
const u32 c_17sK = c_17s + SHA1C00;
const u32 c_18sK = c_18s + SHA1C00;
@ -430,7 +424,7 @@ static void m00130s (u32 w[16], const u32 pw_len, __global pw_t *pws, __global k
* reverse
*/
const u32 e_rev = rotl32 (search[1], 2u) - SHA1C03;
const u32 e_rev = rotl32_S (search[1], 2u) - SHA1C03;
/**
* loop
@ -438,43 +432,43 @@ static void m00130s (u32 w[16], const u32 pw_len, __global pw_t *pws, __global k
u32 w0l = w[0];
for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++)
for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos += VECT_SIZE)
{
const u32 w0r = words_buf_r[il_pos];
const u32x w0r = words_buf_r[il_pos / VECT_SIZE];
const u32 w0 = w0l | w0r;
const u32x w0 = w0l | w0r;
const u32 w0s01 = rotl32 (w0, 1u);
const u32 w0s02 = rotl32 (w0, 2u);
const u32 w0s03 = rotl32 (w0, 3u);
const u32 w0s04 = rotl32 (w0, 4u);
const u32 w0s05 = rotl32 (w0, 5u);
const u32 w0s06 = rotl32 (w0, 6u);
const u32 w0s07 = rotl32 (w0, 7u);
const u32 w0s08 = rotl32 (w0, 8u);
const u32 w0s09 = rotl32 (w0, 9u);
const u32 w0s10 = rotl32 (w0, 10u);
const u32 w0s11 = rotl32 (w0, 11u);
const u32 w0s12 = rotl32 (w0, 12u);
const u32 w0s13 = rotl32 (w0, 13u);
const u32 w0s14 = rotl32 (w0, 14u);
const u32 w0s15 = rotl32 (w0, 15u);
const u32 w0s16 = rotl32 (w0, 16u);
const u32 w0s17 = rotl32 (w0, 17u);
const u32 w0s18 = rotl32 (w0, 18u);
const u32 w0s19 = rotl32 (w0, 19u);
const u32 w0s20 = rotl32 (w0, 20u);
const u32x w0s01 = rotl32 (w0, 1u);
const u32x w0s02 = rotl32 (w0, 2u);
const u32x w0s03 = rotl32 (w0, 3u);
const u32x w0s04 = rotl32 (w0, 4u);
const u32x w0s05 = rotl32 (w0, 5u);
const u32x w0s06 = rotl32 (w0, 6u);
const u32x w0s07 = rotl32 (w0, 7u);
const u32x w0s08 = rotl32 (w0, 8u);
const u32x w0s09 = rotl32 (w0, 9u);
const u32x w0s10 = rotl32 (w0, 10u);
const u32x w0s11 = rotl32 (w0, 11u);
const u32x w0s12 = rotl32 (w0, 12u);
const u32x w0s13 = rotl32 (w0, 13u);
const u32x w0s14 = rotl32 (w0, 14u);
const u32x w0s15 = rotl32 (w0, 15u);
const u32x w0s16 = rotl32 (w0, 16u);
const u32x w0s17 = rotl32 (w0, 17u);
const u32x w0s18 = rotl32 (w0, 18u);
const u32x w0s19 = rotl32 (w0, 19u);
const u32x w0s20 = rotl32 (w0, 20u);
const u32 w0s04___w0s06 = w0s04 ^ w0s06;
const u32 w0s04___w0s08 = w0s04 ^ w0s08;
const u32 w0s08___w0s12 = w0s08 ^ w0s12;
const u32 w0s04___w0s06___w0s07 = w0s04___w0s06 ^ w0s07;
const u32x w0s04___w0s06 = w0s04 ^ w0s06;
const u32x w0s04___w0s08 = w0s04 ^ w0s08;
const u32x w0s08___w0s12 = w0s08 ^ w0s12;
const u32x w0s04___w0s06___w0s07 = w0s04___w0s06 ^ w0s07;
u32 a = SHA1M_A;
u32 b = SHA1M_B;
u32 c = SHA1M_C;
u32 d = SHA1M_D;
u32 e = SHA1M_E;
u32x a = SHA1M_A;
u32x b = SHA1M_B;
u32x c = SHA1M_C;
u32x d = SHA1M_D;
u32x e = SHA1M_E;
#undef K
#define K SHA1C00
@ -570,36 +564,28 @@ static void m00130s (u32 w[16], const u32 pw_len, __global pw_t *pws, __global k
SHA1_STEP_PE (SHA1_F1, a, b, c, d, e, (c_75s ^ w0s06 ^ w0s12 ^ w0s14));
bool q_cond = allx (e_rev != e);
if (q_cond) continue;
if (MATCHES_NONE_VS (e, e_rev)) continue;
SHA1_STEP_PB (SHA1_F1, a, b, c, d, e, 0);
const u32 c_76s = rotl32 ((c_73s ^ c_68s ^ c_62s ^ c_60s), 1u);
const u32 c_77s = rotl32 ((c_74s ^ c_69s ^ c_63s ^ c_61s), 1u);
const u32 c_78s = rotl32 ((c_75s ^ c_70s ^ c_64s ^ c_62s), 1u);
const u32 c_79s = rotl32 ((c_76s ^ c_71s ^ c_65s ^ c_63s), 1u);
const u32x c_76s = rotl32 ((c_73s ^ c_68s ^ c_62s ^ c_60s), 1u);
const u32x c_77s = rotl32 ((c_74s ^ c_69s ^ c_63s ^ c_61s), 1u);
const u32x c_78s = rotl32 ((c_75s ^ c_70s ^ c_64s ^ c_62s), 1u);
const u32x c_79s = rotl32 ((c_76s ^ c_71s ^ c_65s ^ c_63s), 1u);
const u32 w0s21 = rotl32 (w0, 21u);
const u32 w0s22 = rotl32 (w0, 22U);
const u32x w0s21 = rotl32 (w0, 21u);
const u32x w0s22 = rotl32 (w0, 22U);
SHA1_STEP (SHA1_F1 , e, a, b, c, d, (c_76s ^ w0s07 ^ w0s08___w0s12 ^ w0s16 ^ w0s21));
SHA1_STEP (SHA1_F1 , d, e, a, b, c, (c_77s));
SHA1_STEP (SHA1_F1 , c, d, e, a, b, (c_78s ^ w0s07 ^ w0s08 ^ w0s15 ^ w0s18 ^ w0s20));
SHA1_STEP (SHA1_F1 , b, c, d, e, a, (c_79s ^ w0s08 ^ w0s22));
const u32 r0 = d;
const u32 r1 = e;
const u32 r2 = c;
const u32 r3 = b;
#include COMPARE_S
COMPARE_S_SIMD (d, e, c, b);
}
}
__kernel void m00130_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
__kernel void m00130_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
{
/**
* base
@ -637,7 +623,7 @@ __kernel void m00130_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
m00130m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset);
}
__kernel void m00130_m08 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
__kernel void m00130_m08 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
{
/**
* base
@ -675,7 +661,7 @@ __kernel void m00130_m08 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
m00130m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset);
}
__kernel void m00130_m16 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
__kernel void m00130_m16 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
{
/**
* base
@ -713,7 +699,7 @@ __kernel void m00130_m16 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
m00130m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset);
}
__kernel void m00130_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
__kernel void m00130_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
{
/**
* base
@ -751,7 +737,7 @@ __kernel void m00130_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
m00130s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset);
}
__kernel void m00130_s08 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
__kernel void m00130_s08 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
{
/**
* base
@ -789,7 +775,7 @@ __kernel void m00130_s08 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
m00130s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset);
}
__kernel void m00130_s16 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
__kernel void m00130_s16 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
{
/**
* base

View File

@ -126,7 +126,7 @@ __kernel void m00140_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
make_unicode (w0, w0_t, w1_t);
make_unicode (w1, w2_t, w3_t);
switch_buffer_by_offset (w0_t, w1_t, w2_t, w3_t, salt_len);
switch_buffer_by_offset_le (w0_t, w1_t, w2_t, w3_t, salt_len);
w0_t[0] |= salt_buf0[0];
w0_t[1] |= salt_buf0[1];
@ -402,7 +402,7 @@ __kernel void m00140_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
make_unicode (w0, w0_t, w1_t);
make_unicode (w1, w2_t, w3_t);
switch_buffer_by_offset (w0_t, w1_t, w2_t, w3_t, salt_len);
switch_buffer_by_offset_le (w0_t, w1_t, w2_t, w3_t, salt_len);
w0_t[0] |= salt_buf0[0];
w0_t[1] |= salt_buf0[1];

View File

@ -70,7 +70,7 @@ __kernel void m00140_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
if (combs_mode == COMBINATOR_MODE_BASE_RIGHT)
{
switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
switch_buffer_by_offset_le (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
}
/**
@ -133,7 +133,7 @@ __kernel void m00140_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
{
switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len);
switch_buffer_by_offset_le (wordr0, wordr1, wordr2, wordr3, pw_l_len);
}
/**
@ -172,7 +172,7 @@ __kernel void m00140_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
make_unicode (w0, w0_t, w1_t);
make_unicode (w1, w2_t, w3_t);
switch_buffer_by_offset (w0_t, w1_t, w2_t, w3_t, salt_len);
switch_buffer_by_offset_le (w0_t, w1_t, w2_t, w3_t, salt_len);
w0_t[0] |= salt_buf0[0];
w0_t[1] |= salt_buf0[1];
@ -376,7 +376,7 @@ __kernel void m00140_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
if (combs_mode == COMBINATOR_MODE_BASE_RIGHT)
{
switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
switch_buffer_by_offset_le (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
}
/**
@ -457,7 +457,7 @@ __kernel void m00140_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
{
switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len);
switch_buffer_by_offset_le (wordr0, wordr1, wordr2, wordr3, pw_l_len);
}
/**
@ -496,7 +496,7 @@ __kernel void m00140_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
make_unicode (w0, w0_t, w1_t);
make_unicode (w1, w2_t, w3_t);
switch_buffer_by_offset (w0_t, w1_t, w2_t, w3_t, salt_len);
switch_buffer_by_offset_le (w0_t, w1_t, w2_t, w3_t, salt_len);
w0_t[0] |= salt_buf0[0];
w0_t[1] |= salt_buf0[1];

View File

@ -5,6 +5,8 @@
#define _SHA1_
#define NEW_SIMD_CODE
#include "include/constants.h"
#include "include/kernel_vendor.h"
@ -16,104 +18,7 @@
#include "include/kernel_functions.c"
#include "OpenCL/types_ocl.c"
#include "OpenCL/common.c"
#define COMPARE_S "OpenCL/check_single_comp4.c"
#define COMPARE_M "OpenCL/check_multi_comp4.c"
static void overwrite_at (u32 sw[16], const u32 w0, const u32 salt_len)
{
switch (salt_len)
{
case 0: sw[0] = w0;
break;
case 1: sw[0] = (sw[0] & 0xff000000) | (w0 >> 8);
sw[1] = (sw[1] & 0x00ffffff) | (w0 << 24);
break;
case 2: sw[0] = (sw[0] & 0xffff0000) | (w0 >> 16);
sw[1] = (sw[1] & 0x0000ffff) | (w0 << 16);
break;
case 3: sw[0] = (sw[0] & 0xffffff00) | (w0 >> 24);
sw[1] = (sw[1] & 0x000000ff) | (w0 << 8);
break;
case 4: sw[1] = w0;
break;
case 5: sw[1] = (sw[1] & 0xff000000) | (w0 >> 8);
sw[2] = (sw[2] & 0x00ffffff) | (w0 << 24);
break;
case 6: sw[1] = (sw[1] & 0xffff0000) | (w0 >> 16);
sw[2] = (sw[2] & 0x0000ffff) | (w0 << 16);
break;
case 7: sw[1] = (sw[1] & 0xffffff00) | (w0 >> 24);
sw[2] = (sw[2] & 0x000000ff) | (w0 << 8);
break;
case 8: sw[2] = w0;
break;
case 9: sw[2] = (sw[2] & 0xff000000) | (w0 >> 8);
sw[3] = (sw[3] & 0x00ffffff) | (w0 << 24);
break;
case 10: sw[2] = (sw[2] & 0xffff0000) | (w0 >> 16);
sw[3] = (sw[3] & 0x0000ffff) | (w0 << 16);
break;
case 11: sw[2] = (sw[2] & 0xffffff00) | (w0 >> 24);
sw[3] = (sw[3] & 0x000000ff) | (w0 << 8);
break;
case 12: sw[3] = w0;
break;
case 13: sw[3] = (sw[3] & 0xff000000) | (w0 >> 8);
sw[4] = (sw[4] & 0x00ffffff) | (w0 << 24);
break;
case 14: sw[3] = (sw[3] & 0xffff0000) | (w0 >> 16);
sw[4] = (sw[4] & 0x0000ffff) | (w0 << 16);
break;
case 15: sw[3] = (sw[3] & 0xffffff00) | (w0 >> 24);
sw[4] = (sw[4] & 0x000000ff) | (w0 << 8);
break;
case 16: sw[4] = w0;
break;
case 17: sw[4] = (sw[4] & 0xff000000) | (w0 >> 8);
sw[5] = (sw[5] & 0x00ffffff) | (w0 << 24);
break;
case 18: sw[4] = (sw[4] & 0xffff0000) | (w0 >> 16);
sw[5] = (sw[5] & 0x0000ffff) | (w0 << 16);
break;
case 19: sw[4] = (sw[4] & 0xffffff00) | (w0 >> 24);
sw[5] = (sw[5] & 0x000000ff) | (w0 << 8);
break;
case 20: sw[5] = w0;
break;
case 21: sw[5] = (sw[5] & 0xff000000) | (w0 >> 8);
sw[6] = (sw[6] & 0x00ffffff) | (w0 << 24);
break;
case 22: sw[5] = (sw[5] & 0xffff0000) | (w0 >> 16);
sw[6] = (sw[6] & 0x0000ffff) | (w0 << 16);
break;
case 23: sw[5] = (sw[5] & 0xffffff00) | (w0 >> 24);
sw[6] = (sw[6] & 0x000000ff) | (w0 << 8);
break;
case 24: sw[6] = w0;
break;
case 25: sw[6] = (sw[6] & 0xff000000) | (w0 >> 8);
sw[7] = (sw[7] & 0x00ffffff) | (w0 << 24);
break;
case 26: sw[6] = (sw[6] & 0xffff0000) | (w0 >> 16);
sw[7] = (sw[7] & 0x0000ffff) | (w0 << 16);
break;
case 27: sw[6] = (sw[6] & 0xffffff00) | (w0 >> 24);
sw[7] = (sw[7] & 0x000000ff) | (w0 << 8);
break;
case 28: sw[7] = w0;
break;
case 29: sw[7] = (sw[7] & 0xff000000) | (w0 >> 8);
sw[8] = (sw[8] & 0x00ffffff) | (w0 << 24);
break;
case 30: sw[7] = (sw[7] & 0xffff0000) | (w0 >> 16);
sw[8] = (sw[8] & 0x0000ffff) | (w0 << 16);
break;
case 31: sw[7] = (sw[7] & 0xffffff00) | (w0 >> 24);
sw[8] = (sw[8] & 0x000000ff) | (w0 << 8);
break;
}
}
#include "OpenCL/simd.c"
static void m00140m (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_len, __global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset)
{
@ -169,24 +74,24 @@ static void m00140m (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_le
u32 w2_t[4];
u32 w3_t[4];
w0_t[0] = swap32 (w0[0]);
w0_t[1] = swap32 (w0[1]);
w0_t[2] = swap32 (w0[2]);
w0_t[3] = swap32 (w0[3]);
w1_t[0] = swap32 (w1[0]);
w1_t[1] = swap32 (w1[1]);
w1_t[2] = swap32 (w1[2]);
w1_t[3] = swap32 (w1[3]);
w2_t[0] = swap32 (w2[0]);
w2_t[1] = swap32 (w2[1]);
w2_t[2] = swap32 (w2[2]);
w2_t[3] = swap32 (w2[3]);
w3_t[0] = swap32 (w3[0]);
w3_t[1] = swap32 (w3[1]);
w3_t[2] = swap32 (w3[2]);
w3_t[3] = swap32 (w3[3]);
w0_t[0] = swap32_S (w0[0]);
w0_t[1] = swap32_S (w0[1]);
w0_t[2] = swap32_S (w0[2]);
w0_t[3] = swap32_S (w0[3]);
w1_t[0] = swap32_S (w1[0]);
w1_t[1] = swap32_S (w1[1]);
w1_t[2] = swap32_S (w1[2]);
w1_t[3] = swap32_S (w1[3]);
w2_t[0] = swap32_S (w2[0]);
w2_t[1] = swap32_S (w2[1]);
w2_t[2] = swap32_S (w2[2]);
w2_t[3] = swap32_S (w2[3]);
w3_t[0] = swap32_S (w3[0]);
w3_t[1] = swap32_S (w3[1]);
w3_t[2] = swap32_S (w3[2]);
w3_t[3] = swap32_S (w3[3]);
switch_buffer_by_offset (w0_t, w1_t, w2_t, w3_t, salt_len);
switch_buffer_by_offset_le_S (w0_t, w1_t, w2_t, w3_t, salt_len);
w0_t[0] |= salt_buf0[0];
w0_t[1] |= salt_buf0[1];
@ -205,22 +110,22 @@ static void m00140m (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_le
w3_t[2] |= salt_buf3[2];
w3_t[3] |= salt_buf3[3];
w0_t[0] = swap32 (w0_t[0]);
w0_t[1] = swap32 (w0_t[1]);
w0_t[2] = swap32 (w0_t[2]);
w0_t[3] = swap32 (w0_t[3]);
w1_t[0] = swap32 (w1_t[0]);
w1_t[1] = swap32 (w1_t[1]);
w1_t[2] = swap32 (w1_t[2]);
w1_t[3] = swap32 (w1_t[3]);
w2_t[0] = swap32 (w2_t[0]);
w2_t[1] = swap32 (w2_t[1]);
w2_t[2] = swap32 (w2_t[2]);
w2_t[3] = swap32 (w2_t[3]);
w3_t[0] = swap32 (w3_t[0]);
w3_t[1] = swap32 (w3_t[1]);
w3_t[2] = swap32 (w3_t[2]);
w3_t[3] = swap32 (w3_t[3]);
w0_t[0] = swap32_S (w0_t[0]);
w0_t[1] = swap32_S (w0_t[1]);
w0_t[2] = swap32_S (w0_t[2]);
w0_t[3] = swap32_S (w0_t[3]);
w1_t[0] = swap32_S (w1_t[0]);
w1_t[1] = swap32_S (w1_t[1]);
w1_t[2] = swap32_S (w1_t[2]);
w1_t[3] = swap32_S (w1_t[3]);
w2_t[0] = swap32_S (w2_t[0]);
w2_t[1] = swap32_S (w2_t[1]);
w2_t[2] = swap32_S (w2_t[2]);
w2_t[3] = swap32_S (w2_t[3]);
w3_t[0] = swap32_S (w3_t[0]);
w3_t[1] = swap32_S (w3_t[1]);
w3_t[2] = swap32_S (w3_t[2]);
w3_t[3] = swap32_S (w3_t[3]);
/**
* loop
@ -228,13 +133,13 @@ static void m00140m (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_le
u32 w0l = w0[0];
for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++)
for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos += VECT_SIZE)
{
const u32 w0r = bfs_buf[il_pos].i;
const u32x w0r = w0r_create_bft (bfs_buf, il_pos);
const u32 w0n = w0l | w0r;
const u32x w0lr = w0l | w0r;
u32 wx[16];
u32x wx[16];
wx[ 0] = w0_t[0];
wx[ 1] = w0_t[1];
@ -253,12 +158,12 @@ static void m00140m (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_le
wx[14] = w3_t[2];
wx[15] = w3_t[3];
overwrite_at (wx, w0n, salt_len);
overwrite_at_be (wx, w0lr, salt_len);
u32 w0_t[4];
u32 w1_t[4];
u32 w2_t[4];
u32 w3_t[4];
u32x w0_t[4];
u32x w1_t[4];
u32x w2_t[4];
u32x w3_t[4];
w0_t[0] = wx[ 0];
w0_t[1] = wx[ 1];
@ -281,11 +186,11 @@ static void m00140m (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_le
* sha1
*/
u32 a = SHA1M_A;
u32 b = SHA1M_B;
u32 c = SHA1M_C;
u32 d = SHA1M_D;
u32 e = SHA1M_E;
u32x a = SHA1M_A;
u32x b = SHA1M_B;
u32x c = SHA1M_C;
u32x d = SHA1M_D;
u32x e = SHA1M_E;
#undef K
#define K SHA1C00
@ -383,12 +288,7 @@ static void m00140m (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_le
w3_t[2] = rotl32 ((w2_t[3] ^ w1_t[2] ^ w0_t[0] ^ w3_t[2]), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w3_t[2]);
w3_t[3] = rotl32 ((w3_t[0] ^ w1_t[3] ^ w0_t[1] ^ w3_t[3]), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w3_t[3]);
const u32 r0 = d;
const u32 r1 = e;
const u32 r2 = c;
const u32 r3 = b;
#include COMPARE_M
COMPARE_M_SIMD (d, e, c, b);
}
}
@ -417,7 +317,7 @@ static void m00140s (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_le
* reverse
*/
const u32 e_rev = rotl32 (search[1], 2u);
const u32 e_rev = rotl32_S (search[1], 2u);
/**
* salt
@ -464,24 +364,24 @@ static void m00140s (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_le
u32 w2_t[4];
u32 w3_t[4];
w0_t[0] = swap32 (w0[0]);
w0_t[1] = swap32 (w0[1]);
w0_t[2] = swap32 (w0[2]);
w0_t[3] = swap32 (w0[3]);
w1_t[0] = swap32 (w1[0]);
w1_t[1] = swap32 (w1[1]);
w1_t[2] = swap32 (w1[2]);
w1_t[3] = swap32 (w1[3]);
w2_t[0] = swap32 (w2[0]);
w2_t[1] = swap32 (w2[1]);
w2_t[2] = swap32 (w2[2]);
w2_t[3] = swap32 (w2[3]);
w3_t[0] = swap32 (w3[0]);
w3_t[1] = swap32 (w3[1]);
w3_t[2] = swap32 (w3[2]);
w3_t[3] = swap32 (w3[3]);
w0_t[0] = swap32_S (w0[0]);
w0_t[1] = swap32_S (w0[1]);
w0_t[2] = swap32_S (w0[2]);
w0_t[3] = swap32_S (w0[3]);
w1_t[0] = swap32_S (w1[0]);
w1_t[1] = swap32_S (w1[1]);
w1_t[2] = swap32_S (w1[2]);
w1_t[3] = swap32_S (w1[3]);
w2_t[0] = swap32_S (w2[0]);
w2_t[1] = swap32_S (w2[1]);
w2_t[2] = swap32_S (w2[2]);
w2_t[3] = swap32_S (w2[3]);
w3_t[0] = swap32_S (w3[0]);
w3_t[1] = swap32_S (w3[1]);
w3_t[2] = swap32_S (w3[2]);
w3_t[3] = swap32_S (w3[3]);
switch_buffer_by_offset (w0_t, w1_t, w2_t, w3_t, salt_len);
switch_buffer_by_offset_le_S (w0_t, w1_t, w2_t, w3_t, salt_len);
w0_t[0] |= salt_buf0[0];
w0_t[1] |= salt_buf0[1];
@ -500,22 +400,22 @@ static void m00140s (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_le
w3_t[2] |= salt_buf3[2];
w3_t[3] |= salt_buf3[3];
w0_t[0] = swap32 (w0_t[0]);
w0_t[1] = swap32 (w0_t[1]);
w0_t[2] = swap32 (w0_t[2]);
w0_t[3] = swap32 (w0_t[3]);
w1_t[0] = swap32 (w1_t[0]);
w1_t[1] = swap32 (w1_t[1]);
w1_t[2] = swap32 (w1_t[2]);
w1_t[3] = swap32 (w1_t[3]);
w2_t[0] = swap32 (w2_t[0]);
w2_t[1] = swap32 (w2_t[1]);
w2_t[2] = swap32 (w2_t[2]);
w2_t[3] = swap32 (w2_t[3]);
w3_t[0] = swap32 (w3_t[0]);
w3_t[1] = swap32 (w3_t[1]);
w3_t[2] = swap32 (w3_t[2]);
w3_t[3] = swap32 (w3_t[3]);
w0_t[0] = swap32_S (w0_t[0]);
w0_t[1] = swap32_S (w0_t[1]);
w0_t[2] = swap32_S (w0_t[2]);
w0_t[3] = swap32_S (w0_t[3]);
w1_t[0] = swap32_S (w1_t[0]);
w1_t[1] = swap32_S (w1_t[1]);
w1_t[2] = swap32_S (w1_t[2]);
w1_t[3] = swap32_S (w1_t[3]);
w2_t[0] = swap32_S (w2_t[0]);
w2_t[1] = swap32_S (w2_t[1]);
w2_t[2] = swap32_S (w2_t[2]);
w2_t[3] = swap32_S (w2_t[3]);
w3_t[0] = swap32_S (w3_t[0]);
w3_t[1] = swap32_S (w3_t[1]);
w3_t[2] = swap32_S (w3_t[2]);
w3_t[3] = swap32_S (w3_t[3]);
/**
* loop
@ -523,13 +423,13 @@ static void m00140s (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_le
u32 w0l = w0[0];
for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++)
for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos += VECT_SIZE)
{
const u32 w0r = bfs_buf[il_pos].i;
const u32x w0r = w0r_create_bft (bfs_buf, il_pos);
const u32 w0n = w0l | w0r;
const u32x w0lr = w0l | w0r;
u32 wx[16];
u32x wx[16];
wx[ 0] = w0_t[0];
wx[ 1] = w0_t[1];
@ -548,12 +448,12 @@ static void m00140s (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_le
wx[14] = w3_t[2];
wx[15] = w3_t[3];
overwrite_at (wx, w0n, salt_len);
overwrite_at_be (wx, w0lr, salt_len);
u32 w0_t[4];
u32 w1_t[4];
u32 w2_t[4];
u32 w3_t[4];
u32x w0_t[4];
u32x w1_t[4];
u32x w2_t[4];
u32x w3_t[4];
w0_t[0] = wx[ 0];
w0_t[1] = wx[ 1];
@ -576,11 +476,11 @@ static void m00140s (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_le
* sha1
*/
u32 a = SHA1M_A;
u32 b = SHA1M_B;
u32 c = SHA1M_C;
u32 d = SHA1M_D;
u32 e = SHA1M_E;
u32x a = SHA1M_A;
u32x b = SHA1M_B;
u32x c = SHA1M_C;
u32x d = SHA1M_D;
u32x e = SHA1M_E;
#undef K
#define K SHA1C00
@ -675,19 +575,13 @@ static void m00140s (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_le
w2_t[3] = rotl32 ((w2_t[0] ^ w0_t[3] ^ w3_t[1] ^ w2_t[3]), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w2_t[3]);
w3_t[0] = rotl32 ((w2_t[1] ^ w1_t[0] ^ w3_t[2] ^ w3_t[0]), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w3_t[0]);
if (allx (e != e_rev)) continue;
if (MATCHES_NONE_VS (e, e_rev)) continue;
w3_t[1] = rotl32 ((w2_t[2] ^ w1_t[1] ^ w3_t[3] ^ w3_t[1]), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t[1]);
w3_t[2] = rotl32 ((w2_t[3] ^ w1_t[2] ^ w0_t[0] ^ w3_t[2]), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w3_t[2]);
w3_t[3] = rotl32 ((w3_t[0] ^ w1_t[3] ^ w0_t[1] ^ w3_t[3]), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w3_t[3]);
const u32 r0 = d;
const u32 r1 = e;
const u32 r2 = c;
const u32 r3 = b;
#include COMPARE_S
COMPARE_S_SIMD (d, e, c, b);
}
}

View File

@ -285,7 +285,7 @@ __kernel void m00150_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
if (combs_mode == COMBINATOR_MODE_BASE_RIGHT)
{
switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
switch_buffer_by_offset_le (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
}
/**
@ -348,7 +348,7 @@ __kernel void m00150_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
{
switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len);
switch_buffer_by_offset_le (wordr0, wordr1, wordr2, wordr3, pw_l_len);
}
u32 w0[4];
@ -502,7 +502,7 @@ __kernel void m00150_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
if (combs_mode == COMBINATOR_MODE_BASE_RIGHT)
{
switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
switch_buffer_by_offset_le (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
}
/**
@ -577,7 +577,7 @@ __kernel void m00150_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
{
switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len);
switch_buffer_by_offset_le (wordr0, wordr1, wordr2, wordr3, pw_l_len);
}
u32 w0[4];

View File

@ -5,6 +5,8 @@
#define _SHA1_
#define NEW_SIMD_CODE
#include "include/constants.h"
#include "include/kernel_vendor.h"
@ -16,34 +18,32 @@
#include "include/kernel_functions.c"
#include "OpenCL/types_ocl.c"
#include "OpenCL/common.c"
#include "OpenCL/simd.c"
#define COMPARE_S "OpenCL/check_single_comp4.c"
#define COMPARE_M "OpenCL/check_multi_comp4.c"
static void sha1_transform (const u32 w0[4], const u32 w1[4], const u32 w2[4], const u32 w3[4], u32 digest[5])
static void sha1_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u32x digest[5])
{
u32 A = digest[0];
u32 B = digest[1];
u32 C = digest[2];
u32 D = digest[3];
u32 E = digest[4];
u32x A = digest[0];
u32x B = digest[1];
u32x C = digest[2];
u32x D = digest[3];
u32x E = digest[4];
u32 w0_t = w0[0];
u32 w1_t = w0[1];
u32 w2_t = w0[2];
u32 w3_t = w0[3];
u32 w4_t = w1[0];
u32 w5_t = w1[1];
u32 w6_t = w1[2];
u32 w7_t = w1[3];
u32 w8_t = w2[0];
u32 w9_t = w2[1];
u32 wa_t = w2[2];
u32 wb_t = w2[3];
u32 wc_t = w3[0];
u32 wd_t = w3[1];
u32 we_t = w3[2];
u32 wf_t = w3[3];
u32x w0_t = w0[0];
u32x w1_t = w0[1];
u32x w2_t = w0[2];
u32x w3_t = w0[3];
u32x w4_t = w1[0];
u32x w5_t = w1[1];
u32x w6_t = w1[2];
u32x w7_t = w1[3];
u32x w8_t = w2[0];
u32x w9_t = w2[1];
u32x wa_t = w2[2];
u32x wb_t = w2[3];
u32x wc_t = w3[0];
u32x wd_t = w3[1];
u32x we_t = w3[2];
u32x wf_t = w3[3];
#undef K
#define K SHA1C00
@ -148,7 +148,7 @@ static void sha1_transform (const u32 w0[4], const u32 w1[4], const u32 w2[4], c
digest[4] += E;
}
static void hmac_sha1_pad (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], u32 ipad[5], u32 opad[5])
static void hmac_sha1_pad (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x ipad[5], u32x opad[5])
{
w0[0] = w0[0] ^ 0x36363636;
w0[1] = w0[1] ^ 0x36363636;
@ -201,7 +201,7 @@ static void hmac_sha1_pad (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], u32 ipad[
sha1_transform (w0, w1, w2, w3, opad);
}
static void hmac_sha1_run (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], u32 ipad[5], u32 opad[5], u32 digest[5])
static void hmac_sha1_run (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x ipad[5], u32x opad[5], u32x digest[5])
{
digest[0] = ipad[0];
digest[1] = ipad[1];
@ -272,46 +272,46 @@ static void m00150m (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_le
u32 w0l = w0[0];
for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++)
for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos += VECT_SIZE)
{
const u32 w0r = bfs_buf[il_pos].i;
const u32x w0r = w0r_create_bft (bfs_buf, il_pos);
w0[0] = w0l | w0r;
const u32x w0lr = w0l | w0r;
/**
* pads
*/
u32 w0_t[4];
u32x w0_t[4];
w0_t[0] = w0[0];
w0_t[0] = w0lr;
w0_t[1] = w0[1];
w0_t[2] = w0[2];
w0_t[3] = w0[3];
u32 w1_t[4];
u32x w1_t[4];
w1_t[0] = w1[0];
w1_t[1] = w1[1];
w1_t[2] = w1[2];
w1_t[3] = w1[3];
u32 w2_t[4];
u32x w2_t[4];
w2_t[0] = w2[0];
w2_t[1] = w2[1];
w2_t[2] = w2[2];
w2_t[3] = w2[3];
u32 w3_t[4];
u32x w3_t[4];
w3_t[0] = w3[0];
w3_t[1] = w3[1];
w3_t[2] = 0;
w3_t[3] = 0;
u32 ipad[5];
u32 opad[5];
u32x ipad[5];
u32x opad[5];
hmac_sha1_pad (w0_t, w1_t, w2_t, w3_t, ipad, opad);
@ -332,16 +332,11 @@ static void m00150m (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_le
w3_t[2] = 0;
w3_t[3] = (64 + salt_len) * 8;
u32 digest[5];
u32x digest[5];
hmac_sha1_run (w0_t, w1_t, w2_t, w3_t, ipad, opad, digest);
const u32 r0 = digest[3];
const u32 r1 = digest[4];
const u32 r2 = digest[2];
const u32 r3 = digest[1];
#include COMPARE_M
COMPARE_M_SIMD (digest[3], digest[4], digest[2], digest[1]);
}
}
@ -392,46 +387,46 @@ static void m00150s (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_le
u32 w0l = w0[0];
for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++)
for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos += VECT_SIZE)
{
const u32 w0r = bfs_buf[il_pos].i;
const u32x w0r = w0r_create_bft (bfs_buf, il_pos);
w0[0] = w0l | w0r;
const u32x w0lr = w0l | w0r;
/**
* pads
*/
u32 w0_t[4];
u32x w0_t[4];
w0_t[0] = w0[0];
w0_t[0] = w0lr;
w0_t[1] = w0[1];
w0_t[2] = w0[2];
w0_t[3] = w0[3];
u32 w1_t[4];
u32x w1_t[4];
w1_t[0] = w1[0];
w1_t[1] = w1[1];
w1_t[2] = w1[2];
w1_t[3] = w1[3];
u32 w2_t[4];
u32x w2_t[4];
w2_t[0] = w2[0];
w2_t[1] = w2[1];
w2_t[2] = w2[2];
w2_t[3] = w2[3];
u32 w3_t[4];
u32x w3_t[4];
w3_t[0] = w3[0];
w3_t[1] = w3[1];
w3_t[2] = 0;
w3_t[3] = 0;
u32 ipad[5];
u32 opad[5];
u32x ipad[5];
u32x opad[5];
hmac_sha1_pad (w0_t, w1_t, w2_t, w3_t, ipad, opad);
@ -452,16 +447,11 @@ static void m00150s (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_le
w3_t[2] = 0;
w3_t[3] = (64 + salt_len) * 8;
u32 digest[5];
u32x digest[5];
hmac_sha1_run (w0_t, w1_t, w2_t, w3_t, ipad, opad, digest);
const u32 r0 = digest[3];
const u32 r1 = digest[4];
const u32 r2 = digest[2];
const u32 r3 = digest[1];
#include COMPARE_S
COMPARE_S_SIMD (digest[3], digest[4], digest[2], digest[1]);
}
}

View File

@ -285,7 +285,7 @@ __kernel void m00160_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
if (combs_mode == COMBINATOR_MODE_BASE_RIGHT)
{
switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
switch_buffer_by_offset_le (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
}
/**
@ -383,7 +383,7 @@ __kernel void m00160_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
{
switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len);
switch_buffer_by_offset_le (wordr0, wordr1, wordr2, wordr3, pw_l_len);
}
u32 w0[4];
@ -502,7 +502,7 @@ __kernel void m00160_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
if (combs_mode == COMBINATOR_MODE_BASE_RIGHT)
{
switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
switch_buffer_by_offset_le (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
}
/**
@ -612,7 +612,7 @@ __kernel void m00160_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
{
switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len);
switch_buffer_by_offset_le (wordr0, wordr1, wordr2, wordr3, pw_l_len);
}
u32 w0[4];

View File

@ -5,6 +5,8 @@
#define _SHA1_
#define NEW_SIMD_CODE
#include "include/constants.h"
#include "include/kernel_vendor.h"
@ -16,34 +18,32 @@
#include "include/kernel_functions.c"
#include "OpenCL/types_ocl.c"
#include "OpenCL/common.c"
#include "OpenCL/simd.c"
#define COMPARE_S "OpenCL/check_single_comp4.c"
#define COMPARE_M "OpenCL/check_multi_comp4.c"
static void sha1_transform (const u32 w0[4], const u32 w1[4], const u32 w2[4], const u32 w3[4], u32 digest[5])
static void sha1_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u32x digest[5])
{
u32 A = digest[0];
u32 B = digest[1];
u32 C = digest[2];
u32 D = digest[3];
u32 E = digest[4];
u32x A = digest[0];
u32x B = digest[1];
u32x C = digest[2];
u32x D = digest[3];
u32x E = digest[4];
u32 w0_t = w0[0];
u32 w1_t = w0[1];
u32 w2_t = w0[2];
u32 w3_t = w0[3];
u32 w4_t = w1[0];
u32 w5_t = w1[1];
u32 w6_t = w1[2];
u32 w7_t = w1[3];
u32 w8_t = w2[0];
u32 w9_t = w2[1];
u32 wa_t = w2[2];
u32 wb_t = w2[3];
u32 wc_t = w3[0];
u32 wd_t = w3[1];
u32 we_t = w3[2];
u32 wf_t = w3[3];
u32x w0_t = w0[0];
u32x w1_t = w0[1];
u32x w2_t = w0[2];
u32x w3_t = w0[3];
u32x w4_t = w1[0];
u32x w5_t = w1[1];
u32x w6_t = w1[2];
u32x w7_t = w1[3];
u32x w8_t = w2[0];
u32x w9_t = w2[1];
u32x wa_t = w2[2];
u32x wb_t = w2[3];
u32x wc_t = w3[0];
u32x wd_t = w3[1];
u32x we_t = w3[2];
u32x wf_t = w3[3];
#undef K
#define K SHA1C00
@ -148,7 +148,7 @@ static void sha1_transform (const u32 w0[4], const u32 w1[4], const u32 w2[4], c
digest[4] += E;
}
static void hmac_sha1_pad (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], u32 ipad[5], u32 opad[5])
static void hmac_sha1_pad (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x ipad[5], u32x opad[5])
{
w0[0] = w0[0] ^ 0x36363636;
w0[1] = w0[1] ^ 0x36363636;
@ -201,7 +201,7 @@ static void hmac_sha1_pad (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], u32 ipad[
sha1_transform (w0, w1, w2, w3, opad);
}
static void hmac_sha1_run (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], u32 ipad[5], u32 opad[5], u32 digest[5])
static void hmac_sha1_run (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x ipad[5], u32x opad[5], u32x digest[5])
{
digest[0] = ipad[0];
digest[1] = ipad[1];
@ -268,36 +268,36 @@ static void m00160m (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_le
* pads
*/
u32 w0_t[4];
u32x w0_t[4];
w0_t[0] = swap32 (salt_buf0[0]);
w0_t[1] = swap32 (salt_buf0[1]);
w0_t[2] = swap32 (salt_buf0[2]);
w0_t[3] = swap32 (salt_buf0[3]);
u32 w1_t[4];
u32x w1_t[4];
w1_t[0] = swap32 (salt_buf1[0]);
w1_t[1] = swap32 (salt_buf1[1]);
w1_t[2] = swap32 (salt_buf1[2]);
w1_t[3] = swap32 (salt_buf1[3]);
u32 w2_t[4];
u32x w2_t[4];
w2_t[0] = 0;
w2_t[1] = 0;
w2_t[2] = 0;
w2_t[3] = 0;
u32 w3_t[4];
u32x w3_t[4];
w3_t[0] = 0;
w3_t[1] = 0;
w3_t[2] = 0;
w3_t[3] = 0;
u32 ipad[5];
u32 opad[5];
u32x ipad[5];
u32x opad[5];
hmac_sha1_pad (w0_t, w1_t, w2_t, w3_t, ipad, opad);
@ -307,13 +307,13 @@ static void m00160m (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_le
u32 w0l = w0[0];
for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++)
for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos += VECT_SIZE)
{
const u32 w0r = bfs_buf[il_pos].i;
const u32x w0r = w0r_create_bft (bfs_buf, il_pos);
w0[0] = w0l | w0r;
const u32x w0lr = w0l | w0r;
w0_t[0] = w0[0];
w0_t[0] = w0lr;
w0_t[1] = w0[1];
w0_t[2] = w0[2];
w0_t[3] = w0[3];
@ -330,16 +330,11 @@ static void m00160m (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_le
w3_t[2] = 0;
w3_t[3] = (64 + pw_len) * 8;
u32 digest[5];
u32x digest[5];
hmac_sha1_run (w0_t, w1_t, w2_t, w3_t, ipad, opad, digest);
const u32 r0 = digest[3];
const u32 r1 = digest[4];
const u32 r2 = digest[2];
const u32 r3 = digest[1];
#include COMPARE_M
COMPARE_M_SIMD (digest[3], digest[4], digest[2], digest[1]);
}
}
@ -374,36 +369,36 @@ static void m00160s (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_le
* pads
*/
u32 w0_t[4];
u32x w0_t[4];
w0_t[0] = swap32 (salt_buf0[0]);
w0_t[1] = swap32 (salt_buf0[1]);
w0_t[2] = swap32 (salt_buf0[2]);
w0_t[3] = swap32 (salt_buf0[3]);
u32 w1_t[4];
u32x w1_t[4];
w1_t[0] = swap32 (salt_buf1[0]);
w1_t[1] = swap32 (salt_buf1[1]);
w1_t[2] = swap32 (salt_buf1[2]);
w1_t[3] = swap32 (salt_buf1[3]);
u32 w2_t[4];
u32x w2_t[4];
w2_t[0] = 0;
w2_t[1] = 0;
w2_t[2] = 0;
w2_t[3] = 0;
u32 w3_t[4];
u32x w3_t[4];
w3_t[0] = 0;
w3_t[1] = 0;
w3_t[2] = 0;
w3_t[3] = 0;
u32 ipad[5];
u32 opad[5];
u32x ipad[5];
u32x opad[5];
hmac_sha1_pad (w0_t, w1_t, w2_t, w3_t, ipad, opad);
@ -425,13 +420,13 @@ static void m00160s (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_le
u32 w0l = w0[0];
for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++)
for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos += VECT_SIZE)
{
const u32 w0r = bfs_buf[il_pos].i;
const u32x w0r = w0r_create_bft (bfs_buf, il_pos);
w0[0] = w0l | w0r;
const u32x w0lr = w0l | w0r;
w0_t[0] = w0[0];
w0_t[0] = w0lr;
w0_t[1] = w0[1];
w0_t[2] = w0[2];
w0_t[3] = w0[3];
@ -448,16 +443,11 @@ static void m00160s (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_le
w3_t[2] = 0;
w3_t[3] = (64 + pw_len) * 8;
u32 digest[5];
u32x digest[5];
hmac_sha1_run (w0_t, w1_t, w2_t, w3_t, ipad, opad, digest);
const u32 r0 = digest[3];
const u32 r1 = digest[4];
const u32 r2 = digest[2];
const u32 r3 = digest[1];
#include COMPARE_S
COMPARE_S_SIMD (digest[3], digest[4], digest[2], digest[1]);
}
}

View File

@ -70,7 +70,7 @@ __kernel void m00190_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
{
append_0x80_2x4 (wordl0, wordl1, pw_l_len);
switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
switch_buffer_by_offset_le (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
}
/**
@ -115,7 +115,7 @@ __kernel void m00190_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
{
append_0x80_2x4 (wordr0, wordr1, pw_r_len);
switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len);
switch_buffer_by_offset_le (wordr0, wordr1, wordr2, wordr3, pw_l_len);
}
u32 w0[4];
@ -354,7 +354,7 @@ __kernel void m00190_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
{
append_0x80_2x4 (wordl0, wordl1, pw_l_len);
switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
switch_buffer_by_offset_le (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
}
/**
@ -411,7 +411,7 @@ __kernel void m00190_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
{
append_0x80_2x4 (wordr0, wordr1, pw_r_len);
switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len);
switch_buffer_by_offset_le (wordr0, wordr1, wordr2, wordr3, pw_l_len);
}
u32 w0[4];

View File

@ -5,6 +5,8 @@
#define _SHA1_
#define NEW_SIMD_CODE
#include "include/constants.h"
#include "include/kernel_vendor.h"
@ -16,11 +18,9 @@
#include "include/kernel_functions.c"
#include "OpenCL/types_ocl.c"
#include "OpenCL/common.c"
#include "OpenCL/simd.c"
#define COMPARE_S "OpenCL/check_single_comp4.c"
#define COMPARE_M "OpenCL/check_multi_comp4.c"
static void m00190m (u32 w[16], const u32 pw_len, __global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset)
static void m00190m (u32 w[16], const u32 pw_len, __global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset)
{
/**
* modifier
@ -33,66 +33,66 @@ static void m00190m (u32 w[16], const u32 pw_len, __global pw_t *pws, __global k
* base
*/
const u32 c_16s = rotl32 ((w[13] ^ w[ 8] ^ w[ 2] ), 1u);
const u32 c_17s = rotl32 ((w[14] ^ w[ 9] ^ w[ 3] ^ w[ 1]), 1u);
const u32 c_18s = rotl32 ((w[15] ^ w[10] ^ w[ 4] ^ w[ 2]), 1u);
const u32 c_19s = rotl32 ((c_16s ^ w[11] ^ w[ 5] ^ w[ 3]), 1u);
const u32 c_20s = rotl32 ((c_17s ^ w[12] ^ w[ 6] ^ w[ 4]), 1u);
const u32 c_21s = rotl32 ((c_18s ^ w[13] ^ w[ 7] ^ w[ 5]), 1u);
const u32 c_22s = rotl32 ((c_19s ^ w[14] ^ w[ 8] ^ w[ 6]), 1u);
const u32 c_23s = rotl32 ((c_20s ^ w[15] ^ w[ 9] ^ w[ 7]), 1u);
const u32 c_24s = rotl32 ((c_21s ^ c_16s ^ w[10] ^ w[ 8]), 1u);
const u32 c_25s = rotl32 ((c_22s ^ c_17s ^ w[11] ^ w[ 9]), 1u);
const u32 c_26s = rotl32 ((c_23s ^ c_18s ^ w[12] ^ w[10]), 1u);
const u32 c_27s = rotl32 ((c_24s ^ c_19s ^ w[13] ^ w[11]), 1u);
const u32 c_28s = rotl32 ((c_25s ^ c_20s ^ w[14] ^ w[12]), 1u);
const u32 c_29s = rotl32 ((c_26s ^ c_21s ^ w[15] ^ w[13]), 1u);
const u32 c_30s = rotl32 ((c_27s ^ c_22s ^ c_16s ^ w[14]), 1u);
const u32 c_31s = rotl32 ((c_28s ^ c_23s ^ c_17s ^ w[15]), 1u);
const u32 c_32s = rotl32 ((c_29s ^ c_24s ^ c_18s ^ c_16s), 1u);
const u32 c_33s = rotl32 ((c_30s ^ c_25s ^ c_19s ^ c_17s), 1u);
const u32 c_34s = rotl32 ((c_31s ^ c_26s ^ c_20s ^ c_18s), 1u);
const u32 c_35s = rotl32 ((c_32s ^ c_27s ^ c_21s ^ c_19s), 1u);
const u32 c_36s = rotl32 ((c_33s ^ c_28s ^ c_22s ^ c_20s), 1u);
const u32 c_37s = rotl32 ((c_34s ^ c_29s ^ c_23s ^ c_21s), 1u);
const u32 c_38s = rotl32 ((c_35s ^ c_30s ^ c_24s ^ c_22s), 1u);
const u32 c_39s = rotl32 ((c_36s ^ c_31s ^ c_25s ^ c_23s), 1u);
const u32 c_40s = rotl32 ((c_37s ^ c_32s ^ c_26s ^ c_24s), 1u);
const u32 c_41s = rotl32 ((c_38s ^ c_33s ^ c_27s ^ c_25s), 1u);
const u32 c_42s = rotl32 ((c_39s ^ c_34s ^ c_28s ^ c_26s), 1u);
const u32 c_43s = rotl32 ((c_40s ^ c_35s ^ c_29s ^ c_27s), 1u);
const u32 c_44s = rotl32 ((c_41s ^ c_36s ^ c_30s ^ c_28s), 1u);
const u32 c_45s = rotl32 ((c_42s ^ c_37s ^ c_31s ^ c_29s), 1u);
const u32 c_46s = rotl32 ((c_43s ^ c_38s ^ c_32s ^ c_30s), 1u);
const u32 c_47s = rotl32 ((c_44s ^ c_39s ^ c_33s ^ c_31s), 1u);
const u32 c_48s = rotl32 ((c_45s ^ c_40s ^ c_34s ^ c_32s), 1u);
const u32 c_49s = rotl32 ((c_46s ^ c_41s ^ c_35s ^ c_33s), 1u);
const u32 c_50s = rotl32 ((c_47s ^ c_42s ^ c_36s ^ c_34s), 1u);
const u32 c_51s = rotl32 ((c_48s ^ c_43s ^ c_37s ^ c_35s), 1u);
const u32 c_52s = rotl32 ((c_49s ^ c_44s ^ c_38s ^ c_36s), 1u);
const u32 c_53s = rotl32 ((c_50s ^ c_45s ^ c_39s ^ c_37s), 1u);
const u32 c_54s = rotl32 ((c_51s ^ c_46s ^ c_40s ^ c_38s), 1u);
const u32 c_55s = rotl32 ((c_52s ^ c_47s ^ c_41s ^ c_39s), 1u);
const u32 c_56s = rotl32 ((c_53s ^ c_48s ^ c_42s ^ c_40s), 1u);
const u32 c_57s = rotl32 ((c_54s ^ c_49s ^ c_43s ^ c_41s), 1u);
const u32 c_58s = rotl32 ((c_55s ^ c_50s ^ c_44s ^ c_42s), 1u);
const u32 c_59s = rotl32 ((c_56s ^ c_51s ^ c_45s ^ c_43s), 1u);
const u32 c_60s = rotl32 ((c_57s ^ c_52s ^ c_46s ^ c_44s), 1u);
const u32 c_61s = rotl32 ((c_58s ^ c_53s ^ c_47s ^ c_45s), 1u);
const u32 c_62s = rotl32 ((c_59s ^ c_54s ^ c_48s ^ c_46s), 1u);
const u32 c_63s = rotl32 ((c_60s ^ c_55s ^ c_49s ^ c_47s), 1u);
const u32 c_64s = rotl32 ((c_61s ^ c_56s ^ c_50s ^ c_48s), 1u);
const u32 c_65s = rotl32 ((c_62s ^ c_57s ^ c_51s ^ c_49s), 1u);
const u32 c_66s = rotl32 ((c_63s ^ c_58s ^ c_52s ^ c_50s), 1u);
const u32 c_67s = rotl32 ((c_64s ^ c_59s ^ c_53s ^ c_51s), 1u);
const u32 c_68s = rotl32 ((c_65s ^ c_60s ^ c_54s ^ c_52s), 1u);
const u32 c_69s = rotl32 ((c_66s ^ c_61s ^ c_55s ^ c_53s), 1u);
const u32 c_70s = rotl32 ((c_67s ^ c_62s ^ c_56s ^ c_54s), 1u);
const u32 c_71s = rotl32 ((c_68s ^ c_63s ^ c_57s ^ c_55s), 1u);
const u32 c_72s = rotl32 ((c_69s ^ c_64s ^ c_58s ^ c_56s), 1u);
const u32 c_73s = rotl32 ((c_70s ^ c_65s ^ c_59s ^ c_57s), 1u);
const u32 c_74s = rotl32 ((c_71s ^ c_66s ^ c_60s ^ c_58s), 1u);
const u32 c_75s = rotl32 ((c_72s ^ c_67s ^ c_61s ^ c_59s), 1u);
const u32 c_16s = rotl32_S ((w[13] ^ w[ 8] ^ w[ 2] ), 1u);
const u32 c_17s = rotl32_S ((w[14] ^ w[ 9] ^ w[ 3] ^ w[ 1]), 1u);
const u32 c_18s = rotl32_S ((w[15] ^ w[10] ^ w[ 4] ^ w[ 2]), 1u);
const u32 c_19s = rotl32_S ((c_16s ^ w[11] ^ w[ 5] ^ w[ 3]), 1u);
const u32 c_20s = rotl32_S ((c_17s ^ w[12] ^ w[ 6] ^ w[ 4]), 1u);
const u32 c_21s = rotl32_S ((c_18s ^ w[13] ^ w[ 7] ^ w[ 5]), 1u);
const u32 c_22s = rotl32_S ((c_19s ^ w[14] ^ w[ 8] ^ w[ 6]), 1u);
const u32 c_23s = rotl32_S ((c_20s ^ w[15] ^ w[ 9] ^ w[ 7]), 1u);
const u32 c_24s = rotl32_S ((c_21s ^ c_16s ^ w[10] ^ w[ 8]), 1u);
const u32 c_25s = rotl32_S ((c_22s ^ c_17s ^ w[11] ^ w[ 9]), 1u);
const u32 c_26s = rotl32_S ((c_23s ^ c_18s ^ w[12] ^ w[10]), 1u);
const u32 c_27s = rotl32_S ((c_24s ^ c_19s ^ w[13] ^ w[11]), 1u);
const u32 c_28s = rotl32_S ((c_25s ^ c_20s ^ w[14] ^ w[12]), 1u);
const u32 c_29s = rotl32_S ((c_26s ^ c_21s ^ w[15] ^ w[13]), 1u);
const u32 c_30s = rotl32_S ((c_27s ^ c_22s ^ c_16s ^ w[14]), 1u);
const u32 c_31s = rotl32_S ((c_28s ^ c_23s ^ c_17s ^ w[15]), 1u);
const u32 c_32s = rotl32_S ((c_29s ^ c_24s ^ c_18s ^ c_16s), 1u);
const u32 c_33s = rotl32_S ((c_30s ^ c_25s ^ c_19s ^ c_17s), 1u);
const u32 c_34s = rotl32_S ((c_31s ^ c_26s ^ c_20s ^ c_18s), 1u);
const u32 c_35s = rotl32_S ((c_32s ^ c_27s ^ c_21s ^ c_19s), 1u);
const u32 c_36s = rotl32_S ((c_33s ^ c_28s ^ c_22s ^ c_20s), 1u);
const u32 c_37s = rotl32_S ((c_34s ^ c_29s ^ c_23s ^ c_21s), 1u);
const u32 c_38s = rotl32_S ((c_35s ^ c_30s ^ c_24s ^ c_22s), 1u);
const u32 c_39s = rotl32_S ((c_36s ^ c_31s ^ c_25s ^ c_23s), 1u);
const u32 c_40s = rotl32_S ((c_37s ^ c_32s ^ c_26s ^ c_24s), 1u);
const u32 c_41s = rotl32_S ((c_38s ^ c_33s ^ c_27s ^ c_25s), 1u);
const u32 c_42s = rotl32_S ((c_39s ^ c_34s ^ c_28s ^ c_26s), 1u);
const u32 c_43s = rotl32_S ((c_40s ^ c_35s ^ c_29s ^ c_27s), 1u);
const u32 c_44s = rotl32_S ((c_41s ^ c_36s ^ c_30s ^ c_28s), 1u);
const u32 c_45s = rotl32_S ((c_42s ^ c_37s ^ c_31s ^ c_29s), 1u);
const u32 c_46s = rotl32_S ((c_43s ^ c_38s ^ c_32s ^ c_30s), 1u);
const u32 c_47s = rotl32_S ((c_44s ^ c_39s ^ c_33s ^ c_31s), 1u);
const u32 c_48s = rotl32_S ((c_45s ^ c_40s ^ c_34s ^ c_32s), 1u);
const u32 c_49s = rotl32_S ((c_46s ^ c_41s ^ c_35s ^ c_33s), 1u);
const u32 c_50s = rotl32_S ((c_47s ^ c_42s ^ c_36s ^ c_34s), 1u);
const u32 c_51s = rotl32_S ((c_48s ^ c_43s ^ c_37s ^ c_35s), 1u);
const u32 c_52s = rotl32_S ((c_49s ^ c_44s ^ c_38s ^ c_36s), 1u);
const u32 c_53s = rotl32_S ((c_50s ^ c_45s ^ c_39s ^ c_37s), 1u);
const u32 c_54s = rotl32_S ((c_51s ^ c_46s ^ c_40s ^ c_38s), 1u);
const u32 c_55s = rotl32_S ((c_52s ^ c_47s ^ c_41s ^ c_39s), 1u);
const u32 c_56s = rotl32_S ((c_53s ^ c_48s ^ c_42s ^ c_40s), 1u);
const u32 c_57s = rotl32_S ((c_54s ^ c_49s ^ c_43s ^ c_41s), 1u);
const u32 c_58s = rotl32_S ((c_55s ^ c_50s ^ c_44s ^ c_42s), 1u);
const u32 c_59s = rotl32_S ((c_56s ^ c_51s ^ c_45s ^ c_43s), 1u);
const u32 c_60s = rotl32_S ((c_57s ^ c_52s ^ c_46s ^ c_44s), 1u);
const u32 c_61s = rotl32_S ((c_58s ^ c_53s ^ c_47s ^ c_45s), 1u);
const u32 c_62s = rotl32_S ((c_59s ^ c_54s ^ c_48s ^ c_46s), 1u);
const u32 c_63s = rotl32_S ((c_60s ^ c_55s ^ c_49s ^ c_47s), 1u);
const u32 c_64s = rotl32_S ((c_61s ^ c_56s ^ c_50s ^ c_48s), 1u);
const u32 c_65s = rotl32_S ((c_62s ^ c_57s ^ c_51s ^ c_49s), 1u);
const u32 c_66s = rotl32_S ((c_63s ^ c_58s ^ c_52s ^ c_50s), 1u);
const u32 c_67s = rotl32_S ((c_64s ^ c_59s ^ c_53s ^ c_51s), 1u);
const u32 c_68s = rotl32_S ((c_65s ^ c_60s ^ c_54s ^ c_52s), 1u);
const u32 c_69s = rotl32_S ((c_66s ^ c_61s ^ c_55s ^ c_53s), 1u);
const u32 c_70s = rotl32_S ((c_67s ^ c_62s ^ c_56s ^ c_54s), 1u);
const u32 c_71s = rotl32_S ((c_68s ^ c_63s ^ c_57s ^ c_55s), 1u);
const u32 c_72s = rotl32_S ((c_69s ^ c_64s ^ c_58s ^ c_56s), 1u);
const u32 c_73s = rotl32_S ((c_70s ^ c_65s ^ c_59s ^ c_57s), 1u);
const u32 c_74s = rotl32_S ((c_71s ^ c_66s ^ c_60s ^ c_58s), 1u);
const u32 c_75s = rotl32_S ((c_72s ^ c_67s ^ c_61s ^ c_59s), 1u);
const u32 c_17sK = c_17s + SHA1C00;
const u32 c_18sK = c_18s + SHA1C00;
@ -116,43 +116,43 @@ static void m00190m (u32 w[16], const u32 pw_len, __global pw_t *pws, __global k
u32 w0l = w[0];
for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++)
for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos += VECT_SIZE)
{
const u32 w0r = words_buf_r[il_pos];
const u32x w0r = words_buf_r[il_pos / VECT_SIZE];
const u32 w0 = w0l | w0r;
const u32x w0 = w0l | w0r;
const u32 w0s01 = rotl32 (w0, 1u);
const u32 w0s02 = rotl32 (w0, 2u);
const u32 w0s03 = rotl32 (w0, 3u);
const u32 w0s04 = rotl32 (w0, 4u);
const u32 w0s05 = rotl32 (w0, 5u);
const u32 w0s06 = rotl32 (w0, 6u);
const u32 w0s07 = rotl32 (w0, 7u);
const u32 w0s08 = rotl32 (w0, 8u);
const u32 w0s09 = rotl32 (w0, 9u);
const u32 w0s10 = rotl32 (w0, 10u);
const u32 w0s11 = rotl32 (w0, 11u);
const u32 w0s12 = rotl32 (w0, 12u);
const u32 w0s13 = rotl32 (w0, 13u);
const u32 w0s14 = rotl32 (w0, 14u);
const u32 w0s15 = rotl32 (w0, 15u);
const u32 w0s16 = rotl32 (w0, 16u);
const u32 w0s17 = rotl32 (w0, 17u);
const u32 w0s18 = rotl32 (w0, 18u);
const u32 w0s19 = rotl32 (w0, 19u);
const u32 w0s20 = rotl32 (w0, 20u);
const u32x w0s01 = rotl32 (w0, 1u);
const u32x w0s02 = rotl32 (w0, 2u);
const u32x w0s03 = rotl32 (w0, 3u);
const u32x w0s04 = rotl32 (w0, 4u);
const u32x w0s05 = rotl32 (w0, 5u);
const u32x w0s06 = rotl32 (w0, 6u);
const u32x w0s07 = rotl32 (w0, 7u);
const u32x w0s08 = rotl32 (w0, 8u);
const u32x w0s09 = rotl32 (w0, 9u);
const u32x w0s10 = rotl32 (w0, 10u);
const u32x w0s11 = rotl32 (w0, 11u);
const u32x w0s12 = rotl32 (w0, 12u);
const u32x w0s13 = rotl32 (w0, 13u);
const u32x w0s14 = rotl32 (w0, 14u);
const u32x w0s15 = rotl32 (w0, 15u);
const u32x w0s16 = rotl32 (w0, 16u);
const u32x w0s17 = rotl32 (w0, 17u);
const u32x w0s18 = rotl32 (w0, 18u);
const u32x w0s19 = rotl32 (w0, 19u);
const u32x w0s20 = rotl32 (w0, 20u);
const u32 w0s04___w0s06 = w0s04 ^ w0s06;
const u32 w0s04___w0s08 = w0s04 ^ w0s08;
const u32 w0s08___w0s12 = w0s08 ^ w0s12;
const u32 w0s04___w0s06___w0s07 = w0s04___w0s06 ^ w0s07;
const u32x w0s04___w0s06 = w0s04 ^ w0s06;
const u32x w0s04___w0s08 = w0s04 ^ w0s08;
const u32x w0s08___w0s12 = w0s08 ^ w0s12;
const u32x w0s04___w0s06___w0s07 = w0s04___w0s06 ^ w0s07;
u32 a = SHA1M_A;
u32 b = SHA1M_B;
u32 c = SHA1M_C;
u32 d = SHA1M_D;
u32 e = SHA1M_E;
u32x a = SHA1M_A;
u32x b = SHA1M_B;
u32x c = SHA1M_C;
u32x d = SHA1M_D;
u32x e = SHA1M_E;
#undef K
#define K SHA1C00
@ -247,13 +247,13 @@ static void m00190m (u32 w[16], const u32 pw_len, __global pw_t *pws, __global k
SHA1_STEP (SHA1_F1 , b, c, d, e, a, (c_74s ^ w0s08 ^ w0s16));
SHA1_STEP (SHA1_F1 , a, b, c, d, e, (c_75s ^ w0s06 ^ w0s12 ^ w0s14));
const u32 c_76s = rotl32 ((c_73s ^ c_68s ^ c_62s ^ c_60s), 1u);
const u32 c_77s = rotl32 ((c_74s ^ c_69s ^ c_63s ^ c_61s), 1u);
const u32 c_78s = rotl32 ((c_75s ^ c_70s ^ c_64s ^ c_62s), 1u);
const u32 c_79s = rotl32 ((c_76s ^ c_71s ^ c_65s ^ c_63s), 1u);
const u32x c_76s = rotl32 ((c_73s ^ c_68s ^ c_62s ^ c_60s), 1u);
const u32x c_77s = rotl32 ((c_74s ^ c_69s ^ c_63s ^ c_61s), 1u);
const u32x c_78s = rotl32 ((c_75s ^ c_70s ^ c_64s ^ c_62s), 1u);
const u32x c_79s = rotl32 ((c_76s ^ c_71s ^ c_65s ^ c_63s), 1u);
const u32 w0s21 = rotl32 (w0, 21u);
const u32 w0s22 = rotl32 (w0, 22U);
const u32x w0s21 = rotl32 (w0, 21u);
const u32x w0s22 = rotl32 (w0, 22U);
SHA1_STEP (SHA1_F1 , e, a, b, c, d, (c_76s ^ w0s07 ^ w0s08___w0s12 ^ w0s16 ^ w0s21));
SHA1_STEP (SHA1_F1 , d, e, a, b, c, (c_77s));
@ -265,29 +265,15 @@ static void m00190m (u32 w[16], const u32 pw_len, __global pw_t *pws, __global k
d += SHA1M_D;
c += SHA1M_C;
{
const u32 r0 = a;
const u32 r1 = e;
const u32 r2 = d;
const u32 r3 = c;
#include COMPARE_M
}
COMPARE_M_SIMD (a, e, d, c);
a &= 0x00000fff;
{
const u32 r0 = a;
const u32 r1 = e;
const u32 r2 = d;
const u32 r3 = c;
#include COMPARE_M
}
COMPARE_M_SIMD (a, e, d, c);
}
}
static void m00190s (u32 w[16], const u32 pw_len, __global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset)
static void m00190s (u32 w[16], const u32 pw_len, __global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset)
{
/**
* modifier
@ -300,66 +286,66 @@ static void m00190s (u32 w[16], const u32 pw_len, __global pw_t *pws, __global k
* base
*/
const u32 c_16s = rotl32 ((w[13] ^ w[ 8] ^ w[ 2] ), 1u);
const u32 c_17s = rotl32 ((w[14] ^ w[ 9] ^ w[ 3] ^ w[ 1]), 1u);
const u32 c_18s = rotl32 ((w[15] ^ w[10] ^ w[ 4] ^ w[ 2]), 1u);
const u32 c_19s = rotl32 ((c_16s ^ w[11] ^ w[ 5] ^ w[ 3]), 1u);
const u32 c_20s = rotl32 ((c_17s ^ w[12] ^ w[ 6] ^ w[ 4]), 1u);
const u32 c_21s = rotl32 ((c_18s ^ w[13] ^ w[ 7] ^ w[ 5]), 1u);
const u32 c_22s = rotl32 ((c_19s ^ w[14] ^ w[ 8] ^ w[ 6]), 1u);
const u32 c_23s = rotl32 ((c_20s ^ w[15] ^ w[ 9] ^ w[ 7]), 1u);
const u32 c_24s = rotl32 ((c_21s ^ c_16s ^ w[10] ^ w[ 8]), 1u);
const u32 c_25s = rotl32 ((c_22s ^ c_17s ^ w[11] ^ w[ 9]), 1u);
const u32 c_26s = rotl32 ((c_23s ^ c_18s ^ w[12] ^ w[10]), 1u);
const u32 c_27s = rotl32 ((c_24s ^ c_19s ^ w[13] ^ w[11]), 1u);
const u32 c_28s = rotl32 ((c_25s ^ c_20s ^ w[14] ^ w[12]), 1u);
const u32 c_29s = rotl32 ((c_26s ^ c_21s ^ w[15] ^ w[13]), 1u);
const u32 c_30s = rotl32 ((c_27s ^ c_22s ^ c_16s ^ w[14]), 1u);
const u32 c_31s = rotl32 ((c_28s ^ c_23s ^ c_17s ^ w[15]), 1u);
const u32 c_32s = rotl32 ((c_29s ^ c_24s ^ c_18s ^ c_16s), 1u);
const u32 c_33s = rotl32 ((c_30s ^ c_25s ^ c_19s ^ c_17s), 1u);
const u32 c_34s = rotl32 ((c_31s ^ c_26s ^ c_20s ^ c_18s), 1u);
const u32 c_35s = rotl32 ((c_32s ^ c_27s ^ c_21s ^ c_19s), 1u);
const u32 c_36s = rotl32 ((c_33s ^ c_28s ^ c_22s ^ c_20s), 1u);
const u32 c_37s = rotl32 ((c_34s ^ c_29s ^ c_23s ^ c_21s), 1u);
const u32 c_38s = rotl32 ((c_35s ^ c_30s ^ c_24s ^ c_22s), 1u);
const u32 c_39s = rotl32 ((c_36s ^ c_31s ^ c_25s ^ c_23s), 1u);
const u32 c_40s = rotl32 ((c_37s ^ c_32s ^ c_26s ^ c_24s), 1u);
const u32 c_41s = rotl32 ((c_38s ^ c_33s ^ c_27s ^ c_25s), 1u);
const u32 c_42s = rotl32 ((c_39s ^ c_34s ^ c_28s ^ c_26s), 1u);
const u32 c_43s = rotl32 ((c_40s ^ c_35s ^ c_29s ^ c_27s), 1u);
const u32 c_44s = rotl32 ((c_41s ^ c_36s ^ c_30s ^ c_28s), 1u);
const u32 c_45s = rotl32 ((c_42s ^ c_37s ^ c_31s ^ c_29s), 1u);
const u32 c_46s = rotl32 ((c_43s ^ c_38s ^ c_32s ^ c_30s), 1u);
const u32 c_47s = rotl32 ((c_44s ^ c_39s ^ c_33s ^ c_31s), 1u);
const u32 c_48s = rotl32 ((c_45s ^ c_40s ^ c_34s ^ c_32s), 1u);
const u32 c_49s = rotl32 ((c_46s ^ c_41s ^ c_35s ^ c_33s), 1u);
const u32 c_50s = rotl32 ((c_47s ^ c_42s ^ c_36s ^ c_34s), 1u);
const u32 c_51s = rotl32 ((c_48s ^ c_43s ^ c_37s ^ c_35s), 1u);
const u32 c_52s = rotl32 ((c_49s ^ c_44s ^ c_38s ^ c_36s), 1u);
const u32 c_53s = rotl32 ((c_50s ^ c_45s ^ c_39s ^ c_37s), 1u);
const u32 c_54s = rotl32 ((c_51s ^ c_46s ^ c_40s ^ c_38s), 1u);
const u32 c_55s = rotl32 ((c_52s ^ c_47s ^ c_41s ^ c_39s), 1u);
const u32 c_56s = rotl32 ((c_53s ^ c_48s ^ c_42s ^ c_40s), 1u);
const u32 c_57s = rotl32 ((c_54s ^ c_49s ^ c_43s ^ c_41s), 1u);
const u32 c_58s = rotl32 ((c_55s ^ c_50s ^ c_44s ^ c_42s), 1u);
const u32 c_59s = rotl32 ((c_56s ^ c_51s ^ c_45s ^ c_43s), 1u);
const u32 c_60s = rotl32 ((c_57s ^ c_52s ^ c_46s ^ c_44s), 1u);
const u32 c_61s = rotl32 ((c_58s ^ c_53s ^ c_47s ^ c_45s), 1u);
const u32 c_62s = rotl32 ((c_59s ^ c_54s ^ c_48s ^ c_46s), 1u);
const u32 c_63s = rotl32 ((c_60s ^ c_55s ^ c_49s ^ c_47s), 1u);
const u32 c_64s = rotl32 ((c_61s ^ c_56s ^ c_50s ^ c_48s), 1u);
const u32 c_65s = rotl32 ((c_62s ^ c_57s ^ c_51s ^ c_49s), 1u);
const u32 c_66s = rotl32 ((c_63s ^ c_58s ^ c_52s ^ c_50s), 1u);
const u32 c_67s = rotl32 ((c_64s ^ c_59s ^ c_53s ^ c_51s), 1u);
const u32 c_68s = rotl32 ((c_65s ^ c_60s ^ c_54s ^ c_52s), 1u);
const u32 c_69s = rotl32 ((c_66s ^ c_61s ^ c_55s ^ c_53s), 1u);
const u32 c_70s = rotl32 ((c_67s ^ c_62s ^ c_56s ^ c_54s), 1u);
const u32 c_71s = rotl32 ((c_68s ^ c_63s ^ c_57s ^ c_55s), 1u);
const u32 c_72s = rotl32 ((c_69s ^ c_64s ^ c_58s ^ c_56s), 1u);
const u32 c_73s = rotl32 ((c_70s ^ c_65s ^ c_59s ^ c_57s), 1u);
const u32 c_74s = rotl32 ((c_71s ^ c_66s ^ c_60s ^ c_58s), 1u);
const u32 c_75s = rotl32 ((c_72s ^ c_67s ^ c_61s ^ c_59s), 1u);
const u32 c_16s = rotl32_S ((w[13] ^ w[ 8] ^ w[ 2] ), 1u);
const u32 c_17s = rotl32_S ((w[14] ^ w[ 9] ^ w[ 3] ^ w[ 1]), 1u);
const u32 c_18s = rotl32_S ((w[15] ^ w[10] ^ w[ 4] ^ w[ 2]), 1u);
const u32 c_19s = rotl32_S ((c_16s ^ w[11] ^ w[ 5] ^ w[ 3]), 1u);
const u32 c_20s = rotl32_S ((c_17s ^ w[12] ^ w[ 6] ^ w[ 4]), 1u);
const u32 c_21s = rotl32_S ((c_18s ^ w[13] ^ w[ 7] ^ w[ 5]), 1u);
const u32 c_22s = rotl32_S ((c_19s ^ w[14] ^ w[ 8] ^ w[ 6]), 1u);
const u32 c_23s = rotl32_S ((c_20s ^ w[15] ^ w[ 9] ^ w[ 7]), 1u);
const u32 c_24s = rotl32_S ((c_21s ^ c_16s ^ w[10] ^ w[ 8]), 1u);
const u32 c_25s = rotl32_S ((c_22s ^ c_17s ^ w[11] ^ w[ 9]), 1u);
const u32 c_26s = rotl32_S ((c_23s ^ c_18s ^ w[12] ^ w[10]), 1u);
const u32 c_27s = rotl32_S ((c_24s ^ c_19s ^ w[13] ^ w[11]), 1u);
const u32 c_28s = rotl32_S ((c_25s ^ c_20s ^ w[14] ^ w[12]), 1u);
const u32 c_29s = rotl32_S ((c_26s ^ c_21s ^ w[15] ^ w[13]), 1u);
const u32 c_30s = rotl32_S ((c_27s ^ c_22s ^ c_16s ^ w[14]), 1u);
const u32 c_31s = rotl32_S ((c_28s ^ c_23s ^ c_17s ^ w[15]), 1u);
const u32 c_32s = rotl32_S ((c_29s ^ c_24s ^ c_18s ^ c_16s), 1u);
const u32 c_33s = rotl32_S ((c_30s ^ c_25s ^ c_19s ^ c_17s), 1u);
const u32 c_34s = rotl32_S ((c_31s ^ c_26s ^ c_20s ^ c_18s), 1u);
const u32 c_35s = rotl32_S ((c_32s ^ c_27s ^ c_21s ^ c_19s), 1u);
const u32 c_36s = rotl32_S ((c_33s ^ c_28s ^ c_22s ^ c_20s), 1u);
const u32 c_37s = rotl32_S ((c_34s ^ c_29s ^ c_23s ^ c_21s), 1u);
const u32 c_38s = rotl32_S ((c_35s ^ c_30s ^ c_24s ^ c_22s), 1u);
const u32 c_39s = rotl32_S ((c_36s ^ c_31s ^ c_25s ^ c_23s), 1u);
const u32 c_40s = rotl32_S ((c_37s ^ c_32s ^ c_26s ^ c_24s), 1u);
const u32 c_41s = rotl32_S ((c_38s ^ c_33s ^ c_27s ^ c_25s), 1u);
const u32 c_42s = rotl32_S ((c_39s ^ c_34s ^ c_28s ^ c_26s), 1u);
const u32 c_43s = rotl32_S ((c_40s ^ c_35s ^ c_29s ^ c_27s), 1u);
const u32 c_44s = rotl32_S ((c_41s ^ c_36s ^ c_30s ^ c_28s), 1u);
const u32 c_45s = rotl32_S ((c_42s ^ c_37s ^ c_31s ^ c_29s), 1u);
const u32 c_46s = rotl32_S ((c_43s ^ c_38s ^ c_32s ^ c_30s), 1u);
const u32 c_47s = rotl32_S ((c_44s ^ c_39s ^ c_33s ^ c_31s), 1u);
const u32 c_48s = rotl32_S ((c_45s ^ c_40s ^ c_34s ^ c_32s), 1u);
const u32 c_49s = rotl32_S ((c_46s ^ c_41s ^ c_35s ^ c_33s), 1u);
const u32 c_50s = rotl32_S ((c_47s ^ c_42s ^ c_36s ^ c_34s), 1u);
const u32 c_51s = rotl32_S ((c_48s ^ c_43s ^ c_37s ^ c_35s), 1u);
const u32 c_52s = rotl32_S ((c_49s ^ c_44s ^ c_38s ^ c_36s), 1u);
const u32 c_53s = rotl32_S ((c_50s ^ c_45s ^ c_39s ^ c_37s), 1u);
const u32 c_54s = rotl32_S ((c_51s ^ c_46s ^ c_40s ^ c_38s), 1u);
const u32 c_55s = rotl32_S ((c_52s ^ c_47s ^ c_41s ^ c_39s), 1u);
const u32 c_56s = rotl32_S ((c_53s ^ c_48s ^ c_42s ^ c_40s), 1u);
const u32 c_57s = rotl32_S ((c_54s ^ c_49s ^ c_43s ^ c_41s), 1u);
const u32 c_58s = rotl32_S ((c_55s ^ c_50s ^ c_44s ^ c_42s), 1u);
const u32 c_59s = rotl32_S ((c_56s ^ c_51s ^ c_45s ^ c_43s), 1u);
const u32 c_60s = rotl32_S ((c_57s ^ c_52s ^ c_46s ^ c_44s), 1u);
const u32 c_61s = rotl32_S ((c_58s ^ c_53s ^ c_47s ^ c_45s), 1u);
const u32 c_62s = rotl32_S ((c_59s ^ c_54s ^ c_48s ^ c_46s), 1u);
const u32 c_63s = rotl32_S ((c_60s ^ c_55s ^ c_49s ^ c_47s), 1u);
const u32 c_64s = rotl32_S ((c_61s ^ c_56s ^ c_50s ^ c_48s), 1u);
const u32 c_65s = rotl32_S ((c_62s ^ c_57s ^ c_51s ^ c_49s), 1u);
const u32 c_66s = rotl32_S ((c_63s ^ c_58s ^ c_52s ^ c_50s), 1u);
const u32 c_67s = rotl32_S ((c_64s ^ c_59s ^ c_53s ^ c_51s), 1u);
const u32 c_68s = rotl32_S ((c_65s ^ c_60s ^ c_54s ^ c_52s), 1u);
const u32 c_69s = rotl32_S ((c_66s ^ c_61s ^ c_55s ^ c_53s), 1u);
const u32 c_70s = rotl32_S ((c_67s ^ c_62s ^ c_56s ^ c_54s), 1u);
const u32 c_71s = rotl32_S ((c_68s ^ c_63s ^ c_57s ^ c_55s), 1u);
const u32 c_72s = rotl32_S ((c_69s ^ c_64s ^ c_58s ^ c_56s), 1u);
const u32 c_73s = rotl32_S ((c_70s ^ c_65s ^ c_59s ^ c_57s), 1u);
const u32 c_74s = rotl32_S ((c_71s ^ c_66s ^ c_60s ^ c_58s), 1u);
const u32 c_75s = rotl32_S ((c_72s ^ c_67s ^ c_61s ^ c_59s), 1u);
const u32 c_17sK = c_17s + SHA1C00;
const u32 c_18sK = c_18s + SHA1C00;
@ -395,43 +381,43 @@ static void m00190s (u32 w[16], const u32 pw_len, __global pw_t *pws, __global k
u32 w0l = w[0];
for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++)
for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos += VECT_SIZE)
{
const u32 w0r = words_buf_r[il_pos];
const u32x w0r = words_buf_r[il_pos / VECT_SIZE];
const u32 w0 = w0l | w0r;
const u32x w0 = w0l | w0r;
const u32 w0s01 = rotl32 (w0, 1u);
const u32 w0s02 = rotl32 (w0, 2u);
const u32 w0s03 = rotl32 (w0, 3u);
const u32 w0s04 = rotl32 (w0, 4u);
const u32 w0s05 = rotl32 (w0, 5u);
const u32 w0s06 = rotl32 (w0, 6u);
const u32 w0s07 = rotl32 (w0, 7u);
const u32 w0s08 = rotl32 (w0, 8u);
const u32 w0s09 = rotl32 (w0, 9u);
const u32 w0s10 = rotl32 (w0, 10u);
const u32 w0s11 = rotl32 (w0, 11u);
const u32 w0s12 = rotl32 (w0, 12u);
const u32 w0s13 = rotl32 (w0, 13u);
const u32 w0s14 = rotl32 (w0, 14u);
const u32 w0s15 = rotl32 (w0, 15u);
const u32 w0s16 = rotl32 (w0, 16u);
const u32 w0s17 = rotl32 (w0, 17u);
const u32 w0s18 = rotl32 (w0, 18u);
const u32 w0s19 = rotl32 (w0, 19u);
const u32 w0s20 = rotl32 (w0, 20u);
const u32x w0s01 = rotl32 (w0, 1u);
const u32x w0s02 = rotl32 (w0, 2u);
const u32x w0s03 = rotl32 (w0, 3u);
const u32x w0s04 = rotl32 (w0, 4u);
const u32x w0s05 = rotl32 (w0, 5u);
const u32x w0s06 = rotl32 (w0, 6u);
const u32x w0s07 = rotl32 (w0, 7u);
const u32x w0s08 = rotl32 (w0, 8u);
const u32x w0s09 = rotl32 (w0, 9u);
const u32x w0s10 = rotl32 (w0, 10u);
const u32x w0s11 = rotl32 (w0, 11u);
const u32x w0s12 = rotl32 (w0, 12u);
const u32x w0s13 = rotl32 (w0, 13u);
const u32x w0s14 = rotl32 (w0, 14u);
const u32x w0s15 = rotl32 (w0, 15u);
const u32x w0s16 = rotl32 (w0, 16u);
const u32x w0s17 = rotl32 (w0, 17u);
const u32x w0s18 = rotl32 (w0, 18u);
const u32x w0s19 = rotl32 (w0, 19u);
const u32x w0s20 = rotl32 (w0, 20u);
const u32 w0s04___w0s06 = w0s04 ^ w0s06;
const u32 w0s04___w0s08 = w0s04 ^ w0s08;
const u32 w0s08___w0s12 = w0s08 ^ w0s12;
const u32 w0s04___w0s06___w0s07 = w0s04___w0s06 ^ w0s07;
const u32x w0s04___w0s06 = w0s04 ^ w0s06;
const u32x w0s04___w0s08 = w0s04 ^ w0s08;
const u32x w0s08___w0s12 = w0s08 ^ w0s12;
const u32x w0s04___w0s06___w0s07 = w0s04___w0s06 ^ w0s07;
u32 a = SHA1M_A;
u32 b = SHA1M_B;
u32 c = SHA1M_C;
u32 d = SHA1M_D;
u32 e = SHA1M_E;
u32x a = SHA1M_A;
u32x b = SHA1M_B;
u32x c = SHA1M_C;
u32x d = SHA1M_D;
u32x e = SHA1M_E;
#undef K
#define K SHA1C00
@ -524,16 +510,15 @@ static void m00190s (u32 w[16], const u32 pw_len, __global pw_t *pws, __global k
SHA1_STEP (SHA1_F1 , d, e, a, b, c, (c_72s ^ w0s05 ^ w0s11 ^ w0s12 ^ w0s13 ^ w0s16 ^ w0s18));
SHA1_STEP (SHA1_F1 , c, d, e, a, b, (c_73s ^ w0s20));
SHA1_STEP (SHA1_F1 , b, c, d, e, a, (c_74s ^ w0s08 ^ w0s16));
SHA1_STEP (SHA1_F1 , a, b, c, d, e, (c_75s ^ w0s06 ^ w0s12 ^ w0s14));
SHA1_STEP (SHA1_F1, a, b, c, d, e, (c_75s ^ w0s06 ^ w0s12 ^ w0s14));
const u32x c_76s = rotl32 ((c_73s ^ c_68s ^ c_62s ^ c_60s), 1u);
const u32x c_77s = rotl32 ((c_74s ^ c_69s ^ c_63s ^ c_61s), 1u);
const u32x c_78s = rotl32 ((c_75s ^ c_70s ^ c_64s ^ c_62s), 1u);
const u32x c_79s = rotl32 ((c_76s ^ c_71s ^ c_65s ^ c_63s), 1u);
const u32 c_76s = rotl32 ((c_73s ^ c_68s ^ c_62s ^ c_60s), 1u);
const u32 c_77s = rotl32 ((c_74s ^ c_69s ^ c_63s ^ c_61s), 1u);
const u32 c_78s = rotl32 ((c_75s ^ c_70s ^ c_64s ^ c_62s), 1u);
const u32 c_79s = rotl32 ((c_76s ^ c_71s ^ c_65s ^ c_63s), 1u);
const u32 w0s21 = rotl32 (w0, 21u);
const u32 w0s22 = rotl32 (w0, 22U);
const u32x w0s21 = rotl32 (w0, 21u);
const u32x w0s22 = rotl32 (w0, 22U);
SHA1_STEP (SHA1_F1 , e, a, b, c, d, (c_76s ^ w0s07 ^ w0s08___w0s12 ^ w0s16 ^ w0s21));
SHA1_STEP (SHA1_F1 , d, e, a, b, c, (c_77s));
@ -545,29 +530,15 @@ static void m00190s (u32 w[16], const u32 pw_len, __global pw_t *pws, __global k
d += SHA1M_D;
c += SHA1M_C;
{
const u32 r0 = a;
const u32 r1 = e;
const u32 r2 = d;
const u32 r3 = c;
#include COMPARE_S
}
COMPARE_S_SIMD (a, e, d, c);
a &= 0x00000fff;
{
const u32 r0 = a;
const u32 r1 = e;
const u32 r2 = d;
const u32 r3 = c;
#include COMPARE_S
}
COMPARE_S_SIMD (a, e, d, c);
}
}
__kernel void m00190_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
__kernel void m00190_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
{
/**
* base
@ -605,7 +576,7 @@ __kernel void m00190_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
m00190m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset);
}
__kernel void m00190_m08 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
__kernel void m00190_m08 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
{
/**
* base
@ -643,7 +614,7 @@ __kernel void m00190_m08 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
m00190m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset);
}
__kernel void m00190_m16 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
__kernel void m00190_m16 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
{
/**
* base
@ -681,7 +652,7 @@ __kernel void m00190_m16 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
m00190m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset);
}
__kernel void m00190_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
__kernel void m00190_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
{
/**
* base
@ -719,7 +690,7 @@ __kernel void m00190_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
m00190s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset);
}
__kernel void m00190_s08 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
__kernel void m00190_s08 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
{
/**
* base
@ -757,7 +728,7 @@ __kernel void m00190_s08 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
m00190s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset);
}
__kernel void m00190_s16 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
__kernel void m00190_s16 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
{
/**
* base

View File

@ -68,7 +68,7 @@ __kernel void m00200_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
if (combs_mode == COMBINATOR_MODE_BASE_RIGHT)
{
switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
switch_buffer_by_offset_le (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
}
/**
@ -111,7 +111,7 @@ __kernel void m00200_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
{
switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len);
switch_buffer_by_offset_le (wordr0, wordr1, wordr2, wordr3, pw_l_len);
}
u32 w_t[16];
@ -246,7 +246,7 @@ __kernel void m00200_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
if (combs_mode == COMBINATOR_MODE_BASE_RIGHT)
{
switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
switch_buffer_by_offset_le (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
}
/**
@ -301,7 +301,7 @@ __kernel void m00200_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
{
switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len);
switch_buffer_by_offset_le (wordr0, wordr1, wordr2, wordr3, pw_l_len);
}
u32 w_t[16];

View File

@ -5,6 +5,8 @@
#define _MYSQL323_
#define NEW_SIMD_CODE
#include "include/constants.h"
#include "include/kernel_vendor.h"
@ -16,11 +18,9 @@
#include "include/kernel_functions.c"
#include "OpenCL/types_ocl.c"
#include "OpenCL/common.c"
#include "OpenCL/simd.c"
#define COMPARE_S "OpenCL/check_single_comp4.c"
#define COMPARE_M "OpenCL/check_multi_comp4.c"
static void m00200m (u32 w[16], const u32 pw_len, __global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset)
static void m00200m (u32 w[16], const u32 pw_len, __global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset)
{
/**
* modifier
@ -35,16 +35,18 @@ static void m00200m (u32 w[16], const u32 pw_len, __global pw_t *pws, __global k
u32 w0l = w[0];
for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++)
for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos += VECT_SIZE)
{
const u32 w0r = words_buf_r[il_pos];
const u32x w0r = words_buf_r[il_pos / VECT_SIZE];
const u32 w0 = w0l | w0r;
const u32x w0 = w0l | w0r;
u32 a = MYSQL323_A;
u32 b = MYSQL323_B;
u32x a = MYSQL323_A;
u32x b = MYSQL323_B;
u32x c = 0;
u32x d = 0;
u32 add = 7;
u32x add = 7;
#define ROUND(v) \
{ \
@ -81,7 +83,7 @@ static void m00200m (u32 w[16], const u32 pw_len, __global pw_t *pws, __global k
for (i = 4, j = 1; i <= (int) pw_len - 4; i += 4, j += 1)
{
const u32 wj = w[j];
const u32x wj = w[j];
ROUND ((wj >> 0) & 0xff);
ROUND ((wj >> 8) & 0xff);
@ -89,7 +91,7 @@ static void m00200m (u32 w[16], const u32 pw_len, __global pw_t *pws, __global k
ROUND ((wj >> 24) & 0xff);
}
const u32 wj = w[j];
const u32x wj = w[j];
const u32 left = pw_len - i;
@ -112,16 +114,11 @@ static void m00200m (u32 w[16], const u32 pw_len, __global pw_t *pws, __global k
a &= 0x7fffffff;
b &= 0x7fffffff;
const u32 r0 = a;
const u32 r1 = b;
const u32 r2 = 0;
const u32 r3 = 0;
#include COMPARE_M
COMPARE_M_SIMD (a, b, c, d);
}
}
static void m00200s (u32 w[16], const u32 pw_len, __global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset)
static void m00200s (u32 w[16], const u32 pw_len, __global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset)
{
/**
* modifier
@ -148,16 +145,18 @@ static void m00200s (u32 w[16], const u32 pw_len, __global pw_t *pws, __global k
u32 w0l = w[0];
for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++)
for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos += VECT_SIZE)
{
const u32 w0r = words_buf_r[il_pos];
const u32x w0r = words_buf_r[il_pos / VECT_SIZE];
const u32 w0 = w0l | w0r;
const u32x w0 = w0l | w0r;
u32 a = MYSQL323_A;
u32 b = MYSQL323_B;
u32x a = MYSQL323_A;
u32x b = MYSQL323_B;
u32x c = 0;
u32x d = 0;
u32 add = 7;
u32x add = 7;
#define ROUND(v) \
{ \
@ -194,7 +193,7 @@ static void m00200s (u32 w[16], const u32 pw_len, __global pw_t *pws, __global k
for (i = 4, j = 1; i <= (int) pw_len - 4; i += 4, j += 1)
{
const u32 wj = w[j];
const u32x wj = w[j];
ROUND ((wj >> 0) & 0xff);
ROUND ((wj >> 8) & 0xff);
@ -202,7 +201,7 @@ static void m00200s (u32 w[16], const u32 pw_len, __global pw_t *pws, __global k
ROUND ((wj >> 24) & 0xff);
}
const u32 wj = w[j];
const u32x wj = w[j];
const u32 left = pw_len - i;
@ -225,16 +224,11 @@ static void m00200s (u32 w[16], const u32 pw_len, __global pw_t *pws, __global k
a &= 0x7fffffff;
b &= 0x7fffffff;
const u32 r0 = a;
const u32 r1 = b;
const u32 r2 = 0;
const u32 r3 = 0;
#include COMPARE_S
COMPARE_S_SIMD (a, b, c, d);
}
}
__kernel void m00200_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
__kernel void m00200_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
{
/**
* base
@ -272,7 +266,7 @@ __kernel void m00200_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
m00200m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset);
}
__kernel void m00200_m08 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
__kernel void m00200_m08 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
{
/**
* base
@ -310,7 +304,7 @@ __kernel void m00200_m08 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
m00200m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset);
}
__kernel void m00200_m16 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
__kernel void m00200_m16 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
{
/**
* base
@ -348,7 +342,7 @@ __kernel void m00200_m16 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
m00200m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset);
}
__kernel void m00200_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
__kernel void m00200_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
{
/**
* base
@ -386,7 +380,7 @@ __kernel void m00200_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
m00200s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset);
}
__kernel void m00200_s08 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
__kernel void m00200_s08 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
{
/**
* base
@ -424,7 +418,7 @@ __kernel void m00200_s08 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
m00200s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset);
}
__kernel void m00200_s16 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
__kernel void m00200_s16 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
{
/**
* base

View File

@ -70,7 +70,7 @@ __kernel void m00300_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
{
append_0x80_2x4 (wordl0, wordl1, pw_l_len);
switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
switch_buffer_by_offset_le (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
}
/**
@ -115,7 +115,7 @@ __kernel void m00300_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
{
append_0x80_2x4 (wordr0, wordr1, pw_r_len);
switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len);
switch_buffer_by_offset_le (wordr0, wordr1, wordr2, wordr3, pw_l_len);
}
u32 w0[4];
@ -462,7 +462,7 @@ __kernel void m00300_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
{
append_0x80_2x4 (wordl0, wordl1, pw_l_len);
switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
switch_buffer_by_offset_le (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
}
/**
@ -525,7 +525,7 @@ __kernel void m00300_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
{
append_0x80_2x4 (wordr0, wordr1, pw_r_len);
switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len);
switch_buffer_by_offset_le (wordr0, wordr1, wordr2, wordr3, pw_l_len);
}
u32 w0[4];

View File

@ -5,6 +5,8 @@
#define _SHA1_
#define NEW_SIMD_CODE
#include "include/constants.h"
#include "include/kernel_vendor.h"
@ -16,11 +18,9 @@
#include "include/kernel_functions.c"
#include "OpenCL/types_ocl.c"
#include "OpenCL/common.c"
#include "OpenCL/simd.c"
#define COMPARE_S "OpenCL/check_single_comp4.c"
#define COMPARE_M "OpenCL/check_multi_comp4.c"
static void m00300m (u32 w[16], const u32 pw_len, __global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset)
static void m00300m (u32 w[16], const u32 pw_len, __global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset)
{
/**
* modifier
@ -33,70 +33,70 @@ static void m00300m (u32 w[16], const u32 pw_len, __global pw_t *pws, __global k
* base
*/
const u32 c_16s = rotl32 ((w[13] ^ w[ 8] ^ w[ 2] ), 1u);
const u32 c_17s = rotl32 ((w[14] ^ w[ 9] ^ w[ 3] ^ w[ 1]), 1u);
const u32 c_18s = rotl32 ((w[15] ^ w[10] ^ w[ 4] ^ w[ 2]), 1u);
const u32 c_19s = rotl32 ((c_16s ^ w[11] ^ w[ 5] ^ w[ 3]), 1u);
const u32 c_20s = rotl32 ((c_17s ^ w[12] ^ w[ 6] ^ w[ 4]), 1u);
const u32 c_21s = rotl32 ((c_18s ^ w[13] ^ w[ 7] ^ w[ 5]), 1u);
const u32 c_22s = rotl32 ((c_19s ^ w[14] ^ w[ 8] ^ w[ 6]), 1u);
const u32 c_23s = rotl32 ((c_20s ^ w[15] ^ w[ 9] ^ w[ 7]), 1u);
const u32 c_24s = rotl32 ((c_21s ^ c_16s ^ w[10] ^ w[ 8]), 1u);
const u32 c_25s = rotl32 ((c_22s ^ c_17s ^ w[11] ^ w[ 9]), 1u);
const u32 c_26s = rotl32 ((c_23s ^ c_18s ^ w[12] ^ w[10]), 1u);
const u32 c_27s = rotl32 ((c_24s ^ c_19s ^ w[13] ^ w[11]), 1u);
const u32 c_28s = rotl32 ((c_25s ^ c_20s ^ w[14] ^ w[12]), 1u);
const u32 c_29s = rotl32 ((c_26s ^ c_21s ^ w[15] ^ w[13]), 1u);
const u32 c_30s = rotl32 ((c_27s ^ c_22s ^ c_16s ^ w[14]), 1u);
const u32 c_31s = rotl32 ((c_28s ^ c_23s ^ c_17s ^ w[15]), 1u);
const u32 c_32s = rotl32 ((c_29s ^ c_24s ^ c_18s ^ c_16s), 1u);
const u32 c_33s = rotl32 ((c_30s ^ c_25s ^ c_19s ^ c_17s), 1u);
const u32 c_34s = rotl32 ((c_31s ^ c_26s ^ c_20s ^ c_18s), 1u);
const u32 c_35s = rotl32 ((c_32s ^ c_27s ^ c_21s ^ c_19s), 1u);
const u32 c_36s = rotl32 ((c_33s ^ c_28s ^ c_22s ^ c_20s), 1u);
const u32 c_37s = rotl32 ((c_34s ^ c_29s ^ c_23s ^ c_21s), 1u);
const u32 c_38s = rotl32 ((c_35s ^ c_30s ^ c_24s ^ c_22s), 1u);
const u32 c_39s = rotl32 ((c_36s ^ c_31s ^ c_25s ^ c_23s), 1u);
const u32 c_40s = rotl32 ((c_37s ^ c_32s ^ c_26s ^ c_24s), 1u);
const u32 c_41s = rotl32 ((c_38s ^ c_33s ^ c_27s ^ c_25s), 1u);
const u32 c_42s = rotl32 ((c_39s ^ c_34s ^ c_28s ^ c_26s), 1u);
const u32 c_43s = rotl32 ((c_40s ^ c_35s ^ c_29s ^ c_27s), 1u);
const u32 c_44s = rotl32 ((c_41s ^ c_36s ^ c_30s ^ c_28s), 1u);
const u32 c_45s = rotl32 ((c_42s ^ c_37s ^ c_31s ^ c_29s), 1u);
const u32 c_46s = rotl32 ((c_43s ^ c_38s ^ c_32s ^ c_30s), 1u);
const u32 c_47s = rotl32 ((c_44s ^ c_39s ^ c_33s ^ c_31s), 1u);
const u32 c_48s = rotl32 ((c_45s ^ c_40s ^ c_34s ^ c_32s), 1u);
const u32 c_49s = rotl32 ((c_46s ^ c_41s ^ c_35s ^ c_33s), 1u);
const u32 c_50s = rotl32 ((c_47s ^ c_42s ^ c_36s ^ c_34s), 1u);
const u32 c_51s = rotl32 ((c_48s ^ c_43s ^ c_37s ^ c_35s), 1u);
const u32 c_52s = rotl32 ((c_49s ^ c_44s ^ c_38s ^ c_36s), 1u);
const u32 c_53s = rotl32 ((c_50s ^ c_45s ^ c_39s ^ c_37s), 1u);
const u32 c_54s = rotl32 ((c_51s ^ c_46s ^ c_40s ^ c_38s), 1u);
const u32 c_55s = rotl32 ((c_52s ^ c_47s ^ c_41s ^ c_39s), 1u);
const u32 c_56s = rotl32 ((c_53s ^ c_48s ^ c_42s ^ c_40s), 1u);
const u32 c_57s = rotl32 ((c_54s ^ c_49s ^ c_43s ^ c_41s), 1u);
const u32 c_58s = rotl32 ((c_55s ^ c_50s ^ c_44s ^ c_42s), 1u);
const u32 c_59s = rotl32 ((c_56s ^ c_51s ^ c_45s ^ c_43s), 1u);
const u32 c_60s = rotl32 ((c_57s ^ c_52s ^ c_46s ^ c_44s), 1u);
const u32 c_61s = rotl32 ((c_58s ^ c_53s ^ c_47s ^ c_45s), 1u);
const u32 c_62s = rotl32 ((c_59s ^ c_54s ^ c_48s ^ c_46s), 1u);
const u32 c_63s = rotl32 ((c_60s ^ c_55s ^ c_49s ^ c_47s), 1u);
const u32 c_64s = rotl32 ((c_61s ^ c_56s ^ c_50s ^ c_48s), 1u);
const u32 c_65s = rotl32 ((c_62s ^ c_57s ^ c_51s ^ c_49s), 1u);
const u32 c_66s = rotl32 ((c_63s ^ c_58s ^ c_52s ^ c_50s), 1u);
const u32 c_67s = rotl32 ((c_64s ^ c_59s ^ c_53s ^ c_51s), 1u);
const u32 c_68s = rotl32 ((c_65s ^ c_60s ^ c_54s ^ c_52s), 1u);
const u32 c_69s = rotl32 ((c_66s ^ c_61s ^ c_55s ^ c_53s), 1u);
const u32 c_70s = rotl32 ((c_67s ^ c_62s ^ c_56s ^ c_54s), 1u);
const u32 c_71s = rotl32 ((c_68s ^ c_63s ^ c_57s ^ c_55s), 1u);
const u32 c_72s = rotl32 ((c_69s ^ c_64s ^ c_58s ^ c_56s), 1u);
const u32 c_73s = rotl32 ((c_70s ^ c_65s ^ c_59s ^ c_57s), 1u);
const u32 c_74s = rotl32 ((c_71s ^ c_66s ^ c_60s ^ c_58s), 1u);
const u32 c_75s = rotl32 ((c_72s ^ c_67s ^ c_61s ^ c_59s), 1u);
const u32 c_76s = rotl32 ((c_73s ^ c_68s ^ c_62s ^ c_60s), 1u);
const u32 c_77s = rotl32 ((c_74s ^ c_69s ^ c_63s ^ c_61s), 1u);
const u32 c_78s = rotl32 ((c_75s ^ c_70s ^ c_64s ^ c_62s), 1u);
const u32 c_79s = rotl32 ((c_76s ^ c_71s ^ c_65s ^ c_63s), 1u);
const u32 c_16s = rotl32_S ((w[13] ^ w[ 8] ^ w[ 2] ), 1u);
const u32 c_17s = rotl32_S ((w[14] ^ w[ 9] ^ w[ 3] ^ w[ 1]), 1u);
const u32 c_18s = rotl32_S ((w[15] ^ w[10] ^ w[ 4] ^ w[ 2]), 1u);
const u32 c_19s = rotl32_S ((c_16s ^ w[11] ^ w[ 5] ^ w[ 3]), 1u);
const u32 c_20s = rotl32_S ((c_17s ^ w[12] ^ w[ 6] ^ w[ 4]), 1u);
const u32 c_21s = rotl32_S ((c_18s ^ w[13] ^ w[ 7] ^ w[ 5]), 1u);
const u32 c_22s = rotl32_S ((c_19s ^ w[14] ^ w[ 8] ^ w[ 6]), 1u);
const u32 c_23s = rotl32_S ((c_20s ^ w[15] ^ w[ 9] ^ w[ 7]), 1u);
const u32 c_24s = rotl32_S ((c_21s ^ c_16s ^ w[10] ^ w[ 8]), 1u);
const u32 c_25s = rotl32_S ((c_22s ^ c_17s ^ w[11] ^ w[ 9]), 1u);
const u32 c_26s = rotl32_S ((c_23s ^ c_18s ^ w[12] ^ w[10]), 1u);
const u32 c_27s = rotl32_S ((c_24s ^ c_19s ^ w[13] ^ w[11]), 1u);
const u32 c_28s = rotl32_S ((c_25s ^ c_20s ^ w[14] ^ w[12]), 1u);
const u32 c_29s = rotl32_S ((c_26s ^ c_21s ^ w[15] ^ w[13]), 1u);
const u32 c_30s = rotl32_S ((c_27s ^ c_22s ^ c_16s ^ w[14]), 1u);
const u32 c_31s = rotl32_S ((c_28s ^ c_23s ^ c_17s ^ w[15]), 1u);
const u32 c_32s = rotl32_S ((c_29s ^ c_24s ^ c_18s ^ c_16s), 1u);
const u32 c_33s = rotl32_S ((c_30s ^ c_25s ^ c_19s ^ c_17s), 1u);
const u32 c_34s = rotl32_S ((c_31s ^ c_26s ^ c_20s ^ c_18s), 1u);
const u32 c_35s = rotl32_S ((c_32s ^ c_27s ^ c_21s ^ c_19s), 1u);
const u32 c_36s = rotl32_S ((c_33s ^ c_28s ^ c_22s ^ c_20s), 1u);
const u32 c_37s = rotl32_S ((c_34s ^ c_29s ^ c_23s ^ c_21s), 1u);
const u32 c_38s = rotl32_S ((c_35s ^ c_30s ^ c_24s ^ c_22s), 1u);
const u32 c_39s = rotl32_S ((c_36s ^ c_31s ^ c_25s ^ c_23s), 1u);
const u32 c_40s = rotl32_S ((c_37s ^ c_32s ^ c_26s ^ c_24s), 1u);
const u32 c_41s = rotl32_S ((c_38s ^ c_33s ^ c_27s ^ c_25s), 1u);
const u32 c_42s = rotl32_S ((c_39s ^ c_34s ^ c_28s ^ c_26s), 1u);
const u32 c_43s = rotl32_S ((c_40s ^ c_35s ^ c_29s ^ c_27s), 1u);
const u32 c_44s = rotl32_S ((c_41s ^ c_36s ^ c_30s ^ c_28s), 1u);
const u32 c_45s = rotl32_S ((c_42s ^ c_37s ^ c_31s ^ c_29s), 1u);
const u32 c_46s = rotl32_S ((c_43s ^ c_38s ^ c_32s ^ c_30s), 1u);
const u32 c_47s = rotl32_S ((c_44s ^ c_39s ^ c_33s ^ c_31s), 1u);
const u32 c_48s = rotl32_S ((c_45s ^ c_40s ^ c_34s ^ c_32s), 1u);
const u32 c_49s = rotl32_S ((c_46s ^ c_41s ^ c_35s ^ c_33s), 1u);
const u32 c_50s = rotl32_S ((c_47s ^ c_42s ^ c_36s ^ c_34s), 1u);
const u32 c_51s = rotl32_S ((c_48s ^ c_43s ^ c_37s ^ c_35s), 1u);
const u32 c_52s = rotl32_S ((c_49s ^ c_44s ^ c_38s ^ c_36s), 1u);
const u32 c_53s = rotl32_S ((c_50s ^ c_45s ^ c_39s ^ c_37s), 1u);
const u32 c_54s = rotl32_S ((c_51s ^ c_46s ^ c_40s ^ c_38s), 1u);
const u32 c_55s = rotl32_S ((c_52s ^ c_47s ^ c_41s ^ c_39s), 1u);
const u32 c_56s = rotl32_S ((c_53s ^ c_48s ^ c_42s ^ c_40s), 1u);
const u32 c_57s = rotl32_S ((c_54s ^ c_49s ^ c_43s ^ c_41s), 1u);
const u32 c_58s = rotl32_S ((c_55s ^ c_50s ^ c_44s ^ c_42s), 1u);
const u32 c_59s = rotl32_S ((c_56s ^ c_51s ^ c_45s ^ c_43s), 1u);
const u32 c_60s = rotl32_S ((c_57s ^ c_52s ^ c_46s ^ c_44s), 1u);
const u32 c_61s = rotl32_S ((c_58s ^ c_53s ^ c_47s ^ c_45s), 1u);
const u32 c_62s = rotl32_S ((c_59s ^ c_54s ^ c_48s ^ c_46s), 1u);
const u32 c_63s = rotl32_S ((c_60s ^ c_55s ^ c_49s ^ c_47s), 1u);
const u32 c_64s = rotl32_S ((c_61s ^ c_56s ^ c_50s ^ c_48s), 1u);
const u32 c_65s = rotl32_S ((c_62s ^ c_57s ^ c_51s ^ c_49s), 1u);
const u32 c_66s = rotl32_S ((c_63s ^ c_58s ^ c_52s ^ c_50s), 1u);
const u32 c_67s = rotl32_S ((c_64s ^ c_59s ^ c_53s ^ c_51s), 1u);
const u32 c_68s = rotl32_S ((c_65s ^ c_60s ^ c_54s ^ c_52s), 1u);
const u32 c_69s = rotl32_S ((c_66s ^ c_61s ^ c_55s ^ c_53s), 1u);
const u32 c_70s = rotl32_S ((c_67s ^ c_62s ^ c_56s ^ c_54s), 1u);
const u32 c_71s = rotl32_S ((c_68s ^ c_63s ^ c_57s ^ c_55s), 1u);
const u32 c_72s = rotl32_S ((c_69s ^ c_64s ^ c_58s ^ c_56s), 1u);
const u32 c_73s = rotl32_S ((c_70s ^ c_65s ^ c_59s ^ c_57s), 1u);
const u32 c_74s = rotl32_S ((c_71s ^ c_66s ^ c_60s ^ c_58s), 1u);
const u32 c_75s = rotl32_S ((c_72s ^ c_67s ^ c_61s ^ c_59s), 1u);
const u32 c_76s = rotl32_S ((c_73s ^ c_68s ^ c_62s ^ c_60s), 1u);
const u32 c_77s = rotl32_S ((c_74s ^ c_69s ^ c_63s ^ c_61s), 1u);
const u32 c_78s = rotl32_S ((c_75s ^ c_70s ^ c_64s ^ c_62s), 1u);
const u32 c_79s = rotl32_S ((c_76s ^ c_71s ^ c_65s ^ c_63s), 1u);
const u32 c_17sK = c_17s + SHA1C00;
const u32 c_18sK = c_18s + SHA1C00;
@ -136,45 +136,45 @@ static void m00300m (u32 w[16], const u32 pw_len, __global pw_t *pws, __global k
u32 w0l = w[0];
for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++)
for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos += VECT_SIZE)
{
const u32 w0r = words_buf_r[il_pos];
const u32x w0r = words_buf_r[il_pos / VECT_SIZE];
const u32 w0 = w0l | w0r;
const u32x w0 = w0l | w0r;
const u32 w0s01 = rotl32 (w0, 1u);
const u32 w0s02 = rotl32 (w0, 2u);
const u32 w0s03 = rotl32 (w0, 3u);
const u32 w0s04 = rotl32 (w0, 4u);
const u32 w0s05 = rotl32 (w0, 5u);
const u32 w0s06 = rotl32 (w0, 6u);
const u32 w0s07 = rotl32 (w0, 7u);
const u32 w0s08 = rotl32 (w0, 8u);
const u32 w0s09 = rotl32 (w0, 9u);
const u32 w0s10 = rotl32 (w0, 10u);
const u32 w0s11 = rotl32 (w0, 11u);
const u32 w0s12 = rotl32 (w0, 12u);
const u32 w0s13 = rotl32 (w0, 13u);
const u32 w0s14 = rotl32 (w0, 14u);
const u32 w0s15 = rotl32 (w0, 15u);
const u32 w0s16 = rotl32 (w0, 16u);
const u32 w0s17 = rotl32 (w0, 17u);
const u32 w0s18 = rotl32 (w0, 18u);
const u32 w0s19 = rotl32 (w0, 19u);
const u32 w0s20 = rotl32 (w0, 20u);
const u32 w0s21 = rotl32 (w0, 21u);
const u32 w0s22 = rotl32 (w0, 22U);
const u32x w0s01 = rotl32 (w0, 1u);
const u32x w0s02 = rotl32 (w0, 2u);
const u32x w0s03 = rotl32 (w0, 3u);
const u32x w0s04 = rotl32 (w0, 4u);
const u32x w0s05 = rotl32 (w0, 5u);
const u32x w0s06 = rotl32 (w0, 6u);
const u32x w0s07 = rotl32 (w0, 7u);
const u32x w0s08 = rotl32 (w0, 8u);
const u32x w0s09 = rotl32 (w0, 9u);
const u32x w0s10 = rotl32 (w0, 10u);
const u32x w0s11 = rotl32 (w0, 11u);
const u32x w0s12 = rotl32 (w0, 12u);
const u32x w0s13 = rotl32 (w0, 13u);
const u32x w0s14 = rotl32 (w0, 14u);
const u32x w0s15 = rotl32 (w0, 15u);
const u32x w0s16 = rotl32 (w0, 16u);
const u32x w0s17 = rotl32 (w0, 17u);
const u32x w0s18 = rotl32 (w0, 18u);
const u32x w0s19 = rotl32 (w0, 19u);
const u32x w0s20 = rotl32 (w0, 20u);
const u32x w0s21 = rotl32 (w0, 21u);
const u32x w0s22 = rotl32 (w0, 22U);
const u32 w0s04___w0s06 = w0s04 ^ w0s06;
const u32 w0s04___w0s08 = w0s04 ^ w0s08;
const u32 w0s08___w0s12 = w0s08 ^ w0s12;
const u32 w0s04___w0s06___w0s07 = w0s04___w0s06 ^ w0s07;
const u32x w0s04___w0s06 = w0s04 ^ w0s06;
const u32x w0s04___w0s08 = w0s04 ^ w0s08;
const u32x w0s08___w0s12 = w0s08 ^ w0s12;
const u32x w0s04___w0s06___w0s07 = w0s04___w0s06 ^ w0s07;
u32 a = SHA1M_A;
u32 b = SHA1M_B;
u32 c = SHA1M_C;
u32 d = SHA1M_D;
u32 e = SHA1M_E;
u32x a = SHA1M_A;
u32x b = SHA1M_B;
u32x c = SHA1M_C;
u32x d = SHA1M_D;
u32x e = SHA1M_E;
#undef K
#define K SHA1C00
@ -279,22 +279,22 @@ static void m00300m (u32 w[16], const u32 pw_len, __global pw_t *pws, __global k
d += SHA1M_D;
e += SHA1M_E;
u32 w0_t = a;
u32 w1_t = b;
u32 w2_t = c;
u32 w3_t = d;
u32 w4_t = e;
u32 w5_t = 0x80000000;
u32 w6_t = 0;
u32 w7_t = 0;
u32 w8_t = 0;
u32 w9_t = 0;
u32 wa_t = 0;
u32 wb_t = 0;
u32 wc_t = 0;
u32 wd_t = 0;
u32 we_t = 0;
u32 wf_t = 20 * 8;
u32x w0_t = a;
u32x w1_t = b;
u32x w2_t = c;
u32x w3_t = d;
u32x w4_t = e;
u32x w5_t = 0x80000000;
u32x w6_t = 0;
u32x w7_t = 0;
u32x w8_t = 0;
u32x w9_t = 0;
u32x wa_t = 0;
u32x wb_t = 0;
u32x wc_t = 0;
u32x wd_t = 0;
u32x we_t = 0;
u32x wf_t = 20 * 8;
a = SHA1M_A;
b = SHA1M_B;
@ -398,17 +398,11 @@ static void m00300m (u32 w[16], const u32 pw_len, __global pw_t *pws, __global k
we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t);
wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t);
const u32 r0 = d;
const u32 r1 = e;
const u32 r2 = c;
const u32 r3 = b;
#include COMPARE_M
COMPARE_M_SIMD (d, e, c, b);
}
}
static void m00300s (u32 w[16], const u32 pw_len, __global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset)
static void m00300s (u32 w[16], const u32 pw_len, __global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset)
{
/**
* modifier
@ -421,66 +415,66 @@ static void m00300s (u32 w[16], const u32 pw_len, __global pw_t *pws, __global k
* base
*/
const u32 c_16s = rotl32 ((w[13] ^ w[ 8] ^ w[ 2] ), 1u);
const u32 c_17s = rotl32 ((w[14] ^ w[ 9] ^ w[ 3] ^ w[ 1]), 1u);
const u32 c_18s = rotl32 ((w[15] ^ w[10] ^ w[ 4] ^ w[ 2]), 1u);
const u32 c_19s = rotl32 ((c_16s ^ w[11] ^ w[ 5] ^ w[ 3]), 1u);
const u32 c_20s = rotl32 ((c_17s ^ w[12] ^ w[ 6] ^ w[ 4]), 1u);
const u32 c_21s = rotl32 ((c_18s ^ w[13] ^ w[ 7] ^ w[ 5]), 1u);
const u32 c_22s = rotl32 ((c_19s ^ w[14] ^ w[ 8] ^ w[ 6]), 1u);
const u32 c_23s = rotl32 ((c_20s ^ w[15] ^ w[ 9] ^ w[ 7]), 1u);
const u32 c_24s = rotl32 ((c_21s ^ c_16s ^ w[10] ^ w[ 8]), 1u);
const u32 c_25s = rotl32 ((c_22s ^ c_17s ^ w[11] ^ w[ 9]), 1u);
const u32 c_26s = rotl32 ((c_23s ^ c_18s ^ w[12] ^ w[10]), 1u);
const u32 c_27s = rotl32 ((c_24s ^ c_19s ^ w[13] ^ w[11]), 1u);
const u32 c_28s = rotl32 ((c_25s ^ c_20s ^ w[14] ^ w[12]), 1u);
const u32 c_29s = rotl32 ((c_26s ^ c_21s ^ w[15] ^ w[13]), 1u);
const u32 c_30s = rotl32 ((c_27s ^ c_22s ^ c_16s ^ w[14]), 1u);
const u32 c_31s = rotl32 ((c_28s ^ c_23s ^ c_17s ^ w[15]), 1u);
const u32 c_32s = rotl32 ((c_29s ^ c_24s ^ c_18s ^ c_16s), 1u);
const u32 c_33s = rotl32 ((c_30s ^ c_25s ^ c_19s ^ c_17s), 1u);
const u32 c_34s = rotl32 ((c_31s ^ c_26s ^ c_20s ^ c_18s), 1u);
const u32 c_35s = rotl32 ((c_32s ^ c_27s ^ c_21s ^ c_19s), 1u);
const u32 c_36s = rotl32 ((c_33s ^ c_28s ^ c_22s ^ c_20s), 1u);
const u32 c_37s = rotl32 ((c_34s ^ c_29s ^ c_23s ^ c_21s), 1u);
const u32 c_38s = rotl32 ((c_35s ^ c_30s ^ c_24s ^ c_22s), 1u);
const u32 c_39s = rotl32 ((c_36s ^ c_31s ^ c_25s ^ c_23s), 1u);
const u32 c_40s = rotl32 ((c_37s ^ c_32s ^ c_26s ^ c_24s), 1u);
const u32 c_41s = rotl32 ((c_38s ^ c_33s ^ c_27s ^ c_25s), 1u);
const u32 c_42s = rotl32 ((c_39s ^ c_34s ^ c_28s ^ c_26s), 1u);
const u32 c_43s = rotl32 ((c_40s ^ c_35s ^ c_29s ^ c_27s), 1u);
const u32 c_44s = rotl32 ((c_41s ^ c_36s ^ c_30s ^ c_28s), 1u);
const u32 c_45s = rotl32 ((c_42s ^ c_37s ^ c_31s ^ c_29s), 1u);
const u32 c_46s = rotl32 ((c_43s ^ c_38s ^ c_32s ^ c_30s), 1u);
const u32 c_47s = rotl32 ((c_44s ^ c_39s ^ c_33s ^ c_31s), 1u);
const u32 c_48s = rotl32 ((c_45s ^ c_40s ^ c_34s ^ c_32s), 1u);
const u32 c_49s = rotl32 ((c_46s ^ c_41s ^ c_35s ^ c_33s), 1u);
const u32 c_50s = rotl32 ((c_47s ^ c_42s ^ c_36s ^ c_34s), 1u);
const u32 c_51s = rotl32 ((c_48s ^ c_43s ^ c_37s ^ c_35s), 1u);
const u32 c_52s = rotl32 ((c_49s ^ c_44s ^ c_38s ^ c_36s), 1u);
const u32 c_53s = rotl32 ((c_50s ^ c_45s ^ c_39s ^ c_37s), 1u);
const u32 c_54s = rotl32 ((c_51s ^ c_46s ^ c_40s ^ c_38s), 1u);
const u32 c_55s = rotl32 ((c_52s ^ c_47s ^ c_41s ^ c_39s), 1u);
const u32 c_56s = rotl32 ((c_53s ^ c_48s ^ c_42s ^ c_40s), 1u);
const u32 c_57s = rotl32 ((c_54s ^ c_49s ^ c_43s ^ c_41s), 1u);
const u32 c_58s = rotl32 ((c_55s ^ c_50s ^ c_44s ^ c_42s), 1u);
const u32 c_59s = rotl32 ((c_56s ^ c_51s ^ c_45s ^ c_43s), 1u);
const u32 c_60s = rotl32 ((c_57s ^ c_52s ^ c_46s ^ c_44s), 1u);
const u32 c_61s = rotl32 ((c_58s ^ c_53s ^ c_47s ^ c_45s), 1u);
const u32 c_62s = rotl32 ((c_59s ^ c_54s ^ c_48s ^ c_46s), 1u);
const u32 c_63s = rotl32 ((c_60s ^ c_55s ^ c_49s ^ c_47s), 1u);
const u32 c_64s = rotl32 ((c_61s ^ c_56s ^ c_50s ^ c_48s), 1u);
const u32 c_65s = rotl32 ((c_62s ^ c_57s ^ c_51s ^ c_49s), 1u);
const u32 c_66s = rotl32 ((c_63s ^ c_58s ^ c_52s ^ c_50s), 1u);
const u32 c_67s = rotl32 ((c_64s ^ c_59s ^ c_53s ^ c_51s), 1u);
const u32 c_68s = rotl32 ((c_65s ^ c_60s ^ c_54s ^ c_52s), 1u);
const u32 c_69s = rotl32 ((c_66s ^ c_61s ^ c_55s ^ c_53s), 1u);
const u32 c_70s = rotl32 ((c_67s ^ c_62s ^ c_56s ^ c_54s), 1u);
const u32 c_71s = rotl32 ((c_68s ^ c_63s ^ c_57s ^ c_55s), 1u);
const u32 c_72s = rotl32 ((c_69s ^ c_64s ^ c_58s ^ c_56s), 1u);
const u32 c_73s = rotl32 ((c_70s ^ c_65s ^ c_59s ^ c_57s), 1u);
const u32 c_74s = rotl32 ((c_71s ^ c_66s ^ c_60s ^ c_58s), 1u);
const u32 c_75s = rotl32 ((c_72s ^ c_67s ^ c_61s ^ c_59s), 1u);
const u32 c_16s = rotl32_S ((w[13] ^ w[ 8] ^ w[ 2] ), 1u);
const u32 c_17s = rotl32_S ((w[14] ^ w[ 9] ^ w[ 3] ^ w[ 1]), 1u);
const u32 c_18s = rotl32_S ((w[15] ^ w[10] ^ w[ 4] ^ w[ 2]), 1u);
const u32 c_19s = rotl32_S ((c_16s ^ w[11] ^ w[ 5] ^ w[ 3]), 1u);
const u32 c_20s = rotl32_S ((c_17s ^ w[12] ^ w[ 6] ^ w[ 4]), 1u);
const u32 c_21s = rotl32_S ((c_18s ^ w[13] ^ w[ 7] ^ w[ 5]), 1u);
const u32 c_22s = rotl32_S ((c_19s ^ w[14] ^ w[ 8] ^ w[ 6]), 1u);
const u32 c_23s = rotl32_S ((c_20s ^ w[15] ^ w[ 9] ^ w[ 7]), 1u);
const u32 c_24s = rotl32_S ((c_21s ^ c_16s ^ w[10] ^ w[ 8]), 1u);
const u32 c_25s = rotl32_S ((c_22s ^ c_17s ^ w[11] ^ w[ 9]), 1u);
const u32 c_26s = rotl32_S ((c_23s ^ c_18s ^ w[12] ^ w[10]), 1u);
const u32 c_27s = rotl32_S ((c_24s ^ c_19s ^ w[13] ^ w[11]), 1u);
const u32 c_28s = rotl32_S ((c_25s ^ c_20s ^ w[14] ^ w[12]), 1u);
const u32 c_29s = rotl32_S ((c_26s ^ c_21s ^ w[15] ^ w[13]), 1u);
const u32 c_30s = rotl32_S ((c_27s ^ c_22s ^ c_16s ^ w[14]), 1u);
const u32 c_31s = rotl32_S ((c_28s ^ c_23s ^ c_17s ^ w[15]), 1u);
const u32 c_32s = rotl32_S ((c_29s ^ c_24s ^ c_18s ^ c_16s), 1u);
const u32 c_33s = rotl32_S ((c_30s ^ c_25s ^ c_19s ^ c_17s), 1u);
const u32 c_34s = rotl32_S ((c_31s ^ c_26s ^ c_20s ^ c_18s), 1u);
const u32 c_35s = rotl32_S ((c_32s ^ c_27s ^ c_21s ^ c_19s), 1u);
const u32 c_36s = rotl32_S ((c_33s ^ c_28s ^ c_22s ^ c_20s), 1u);
const u32 c_37s = rotl32_S ((c_34s ^ c_29s ^ c_23s ^ c_21s), 1u);
const u32 c_38s = rotl32_S ((c_35s ^ c_30s ^ c_24s ^ c_22s), 1u);
const u32 c_39s = rotl32_S ((c_36s ^ c_31s ^ c_25s ^ c_23s), 1u);
const u32 c_40s = rotl32_S ((c_37s ^ c_32s ^ c_26s ^ c_24s), 1u);
const u32 c_41s = rotl32_S ((c_38s ^ c_33s ^ c_27s ^ c_25s), 1u);
const u32 c_42s = rotl32_S ((c_39s ^ c_34s ^ c_28s ^ c_26s), 1u);
const u32 c_43s = rotl32_S ((c_40s ^ c_35s ^ c_29s ^ c_27s), 1u);
const u32 c_44s = rotl32_S ((c_41s ^ c_36s ^ c_30s ^ c_28s), 1u);
const u32 c_45s = rotl32_S ((c_42s ^ c_37s ^ c_31s ^ c_29s), 1u);
const u32 c_46s = rotl32_S ((c_43s ^ c_38s ^ c_32s ^ c_30s), 1u);
const u32 c_47s = rotl32_S ((c_44s ^ c_39s ^ c_33s ^ c_31s), 1u);
const u32 c_48s = rotl32_S ((c_45s ^ c_40s ^ c_34s ^ c_32s), 1u);
const u32 c_49s = rotl32_S ((c_46s ^ c_41s ^ c_35s ^ c_33s), 1u);
const u32 c_50s = rotl32_S ((c_47s ^ c_42s ^ c_36s ^ c_34s), 1u);
const u32 c_51s = rotl32_S ((c_48s ^ c_43s ^ c_37s ^ c_35s), 1u);
const u32 c_52s = rotl32_S ((c_49s ^ c_44s ^ c_38s ^ c_36s), 1u);
const u32 c_53s = rotl32_S ((c_50s ^ c_45s ^ c_39s ^ c_37s), 1u);
const u32 c_54s = rotl32_S ((c_51s ^ c_46s ^ c_40s ^ c_38s), 1u);
const u32 c_55s = rotl32_S ((c_52s ^ c_47s ^ c_41s ^ c_39s), 1u);
const u32 c_56s = rotl32_S ((c_53s ^ c_48s ^ c_42s ^ c_40s), 1u);
const u32 c_57s = rotl32_S ((c_54s ^ c_49s ^ c_43s ^ c_41s), 1u);
const u32 c_58s = rotl32_S ((c_55s ^ c_50s ^ c_44s ^ c_42s), 1u);
const u32 c_59s = rotl32_S ((c_56s ^ c_51s ^ c_45s ^ c_43s), 1u);
const u32 c_60s = rotl32_S ((c_57s ^ c_52s ^ c_46s ^ c_44s), 1u);
const u32 c_61s = rotl32_S ((c_58s ^ c_53s ^ c_47s ^ c_45s), 1u);
const u32 c_62s = rotl32_S ((c_59s ^ c_54s ^ c_48s ^ c_46s), 1u);
const u32 c_63s = rotl32_S ((c_60s ^ c_55s ^ c_49s ^ c_47s), 1u);
const u32 c_64s = rotl32_S ((c_61s ^ c_56s ^ c_50s ^ c_48s), 1u);
const u32 c_65s = rotl32_S ((c_62s ^ c_57s ^ c_51s ^ c_49s), 1u);
const u32 c_66s = rotl32_S ((c_63s ^ c_58s ^ c_52s ^ c_50s), 1u);
const u32 c_67s = rotl32_S ((c_64s ^ c_59s ^ c_53s ^ c_51s), 1u);
const u32 c_68s = rotl32_S ((c_65s ^ c_60s ^ c_54s ^ c_52s), 1u);
const u32 c_69s = rotl32_S ((c_66s ^ c_61s ^ c_55s ^ c_53s), 1u);
const u32 c_70s = rotl32_S ((c_67s ^ c_62s ^ c_56s ^ c_54s), 1u);
const u32 c_71s = rotl32_S ((c_68s ^ c_63s ^ c_57s ^ c_55s), 1u);
const u32 c_72s = rotl32_S ((c_69s ^ c_64s ^ c_58s ^ c_56s), 1u);
const u32 c_73s = rotl32_S ((c_70s ^ c_65s ^ c_59s ^ c_57s), 1u);
const u32 c_74s = rotl32_S ((c_71s ^ c_66s ^ c_60s ^ c_58s), 1u);
const u32 c_75s = rotl32_S ((c_72s ^ c_67s ^ c_61s ^ c_59s), 1u);
const u32 c_17sK = c_17s + SHA1C00;
const u32 c_18sK = c_18s + SHA1C00;
@ -530,7 +524,7 @@ static void m00300s (u32 w[16], const u32 pw_len, __global pw_t *pws, __global k
* reverse
*/
const u32 e_rev = rotl32 (search[1], 2u);
const u32 e_rev = rotl32_S (search[1], 2u);
/**
* loop
@ -538,43 +532,43 @@ static void m00300s (u32 w[16], const u32 pw_len, __global pw_t *pws, __global k
u32 w0l = w[0];
for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++)
for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos += VECT_SIZE)
{
const u32 w0r = words_buf_r[il_pos];
const u32x w0r = words_buf_r[il_pos / VECT_SIZE];
const u32 w0 = w0l | w0r;
const u32x w0 = w0l | w0r;
const u32 w0s01 = rotl32 (w0, 1u);
const u32 w0s02 = rotl32 (w0, 2u);
const u32 w0s03 = rotl32 (w0, 3u);
const u32 w0s04 = rotl32 (w0, 4u);
const u32 w0s05 = rotl32 (w0, 5u);
const u32 w0s06 = rotl32 (w0, 6u);
const u32 w0s07 = rotl32 (w0, 7u);
const u32 w0s08 = rotl32 (w0, 8u);
const u32 w0s09 = rotl32 (w0, 9u);
const u32 w0s10 = rotl32 (w0, 10u);
const u32 w0s11 = rotl32 (w0, 11u);
const u32 w0s12 = rotl32 (w0, 12u);
const u32 w0s13 = rotl32 (w0, 13u);
const u32 w0s14 = rotl32 (w0, 14u);
const u32 w0s15 = rotl32 (w0, 15u);
const u32 w0s16 = rotl32 (w0, 16u);
const u32 w0s17 = rotl32 (w0, 17u);
const u32 w0s18 = rotl32 (w0, 18u);
const u32 w0s19 = rotl32 (w0, 19u);
const u32 w0s20 = rotl32 (w0, 20u);
const u32x w0s01 = rotl32 (w0, 1u);
const u32x w0s02 = rotl32 (w0, 2u);
const u32x w0s03 = rotl32 (w0, 3u);
const u32x w0s04 = rotl32 (w0, 4u);
const u32x w0s05 = rotl32 (w0, 5u);
const u32x w0s06 = rotl32 (w0, 6u);
const u32x w0s07 = rotl32 (w0, 7u);
const u32x w0s08 = rotl32 (w0, 8u);
const u32x w0s09 = rotl32 (w0, 9u);
const u32x w0s10 = rotl32 (w0, 10u);
const u32x w0s11 = rotl32 (w0, 11u);
const u32x w0s12 = rotl32 (w0, 12u);
const u32x w0s13 = rotl32 (w0, 13u);
const u32x w0s14 = rotl32 (w0, 14u);
const u32x w0s15 = rotl32 (w0, 15u);
const u32x w0s16 = rotl32 (w0, 16u);
const u32x w0s17 = rotl32 (w0, 17u);
const u32x w0s18 = rotl32 (w0, 18u);
const u32x w0s19 = rotl32 (w0, 19u);
const u32x w0s20 = rotl32 (w0, 20u);
const u32 w0s04___w0s06 = w0s04 ^ w0s06;
const u32 w0s04___w0s08 = w0s04 ^ w0s08;
const u32 w0s08___w0s12 = w0s08 ^ w0s12;
const u32 w0s04___w0s06___w0s07 = w0s04___w0s06 ^ w0s07;
const u32x w0s04___w0s06 = w0s04 ^ w0s06;
const u32x w0s04___w0s08 = w0s04 ^ w0s08;
const u32x w0s08___w0s12 = w0s08 ^ w0s12;
const u32x w0s04___w0s06___w0s07 = w0s04___w0s06 ^ w0s07;
u32 a = SHA1M_A;
u32 b = SHA1M_B;
u32 c = SHA1M_C;
u32 d = SHA1M_D;
u32 e = SHA1M_E;
u32x a = SHA1M_A;
u32x b = SHA1M_B;
u32x c = SHA1M_C;
u32x d = SHA1M_D;
u32x e = SHA1M_E;
#undef K
#define K SHA1C00
@ -669,13 +663,13 @@ static void m00300s (u32 w[16], const u32 pw_len, __global pw_t *pws, __global k
SHA1_STEP (SHA1_F1 , b, c, d, e, a, (c_74s ^ w0s08 ^ w0s16));
SHA1_STEP (SHA1_F1 , a, b, c, d, e, (c_75s ^ w0s06 ^ w0s12 ^ w0s14));
const u32 c_76s = rotl32 ((c_73s ^ c_68s ^ c_62s ^ c_60s), 1u);
const u32 c_77s = rotl32 ((c_74s ^ c_69s ^ c_63s ^ c_61s), 1u);
const u32 c_78s = rotl32 ((c_75s ^ c_70s ^ c_64s ^ c_62s), 1u);
const u32 c_79s = rotl32 ((c_76s ^ c_71s ^ c_65s ^ c_63s), 1u);
const u32x c_76s = rotl32 ((c_73s ^ c_68s ^ c_62s ^ c_60s), 1u);
const u32x c_77s = rotl32 ((c_74s ^ c_69s ^ c_63s ^ c_61s), 1u);
const u32x c_78s = rotl32 ((c_75s ^ c_70s ^ c_64s ^ c_62s), 1u);
const u32x c_79s = rotl32 ((c_76s ^ c_71s ^ c_65s ^ c_63s), 1u);
const u32 w0s21 = rotl32 (w0, 21u);
const u32 w0s22 = rotl32 (w0, 22U);
const u32x w0s21 = rotl32 (w0, 21u);
const u32x w0s22 = rotl32 (w0, 22U);
SHA1_STEP (SHA1_F1 , e, a, b, c, d, (c_76s ^ w0s07 ^ w0s08___w0s12 ^ w0s16 ^ w0s21));
SHA1_STEP (SHA1_F1 , d, e, a, b, c, (c_77s));
@ -688,22 +682,22 @@ static void m00300s (u32 w[16], const u32 pw_len, __global pw_t *pws, __global k
d += SHA1M_D;
e += SHA1M_E;
u32 w0_t = a;
u32 w1_t = b;
u32 w2_t = c;
u32 w3_t = d;
u32 w4_t = e;
u32 w5_t = 0x80000000;
u32 w6_t = 0;
u32 w7_t = 0;
u32 w8_t = 0;
u32 w9_t = 0;
u32 wa_t = 0;
u32 wb_t = 0;
u32 wc_t = 0;
u32 wd_t = 0;
u32 we_t = 0;
u32 wf_t = 20 * 8;
u32x w0_t = a;
u32x w1_t = b;
u32x w2_t = c;
u32x w3_t = d;
u32x w4_t = e;
u32x w5_t = 0x80000000;
u32x w6_t = 0;
u32x w7_t = 0;
u32x w8_t = 0;
u32x w9_t = 0;
u32x wa_t = 0;
u32x wb_t = 0;
u32x wc_t = 0;
u32x wd_t = 0;
u32x we_t = 0;
u32x wf_t = 20 * 8;
a = SHA1M_A;
b = SHA1M_B;
@ -803,26 +797,18 @@ static void m00300s (u32 w[16], const u32 pw_len, __global pw_t *pws, __global k
wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wa_t);
wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wb_t);
bool q_cond = allx (e_rev != e);
if (q_cond) continue;
if (MATCHES_NONE_VS (e, e_rev)) continue;
wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wc_t);
wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wd_t);
we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t);
wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t);
const u32 r0 = d;
const u32 r1 = e;
const u32 r2 = c;
const u32 r3 = b;
#include COMPARE_S
COMPARE_S_SIMD (d, e, c, b);
}
}
__kernel void m00300_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
__kernel void m00300_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
{
/**
* base
@ -860,7 +846,7 @@ __kernel void m00300_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
m00300m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset);
}
__kernel void m00300_m08 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
__kernel void m00300_m08 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
{
/**
* base
@ -898,7 +884,7 @@ __kernel void m00300_m08 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
m00300m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset);
}
__kernel void m00300_m16 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
__kernel void m00300_m16 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
{
/**
* base
@ -936,7 +922,7 @@ __kernel void m00300_m16 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
m00300m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset);
}
__kernel void m00300_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
__kernel void m00300_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
{
/**
* base
@ -974,7 +960,7 @@ __kernel void m00300_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
m00300s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset);
}
__kernel void m00300_s08 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
__kernel void m00300_s08 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
{
/**
* base
@ -1012,7 +998,7 @@ __kernel void m00300_s08 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
m00300s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset);
}
__kernel void m00300_s16 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
__kernel void m00300_s16 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
{
/**
* base

View File

@ -70,7 +70,7 @@ __kernel void m00900_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
{
append_0x80_2x4 (wordl0, wordl1, pw_l_len);
switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
switch_buffer_by_offset_le (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
}
/**
@ -113,7 +113,7 @@ __kernel void m00900_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
{
switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len);
switch_buffer_by_offset_le (wordr0, wordr1, wordr2, wordr3, pw_l_len);
}
u32 w0[4];
@ -267,7 +267,7 @@ __kernel void m00900_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
{
append_0x80_2x4 (wordl0, wordl1, pw_l_len);
switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
switch_buffer_by_offset_le (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
}
/**
@ -322,7 +322,7 @@ __kernel void m00900_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
{
switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len);
switch_buffer_by_offset_le (wordr0, wordr1, wordr2, wordr3, pw_l_len);
}
u32 w0[4];

View File

@ -5,6 +5,8 @@
#define _MD4_
#define NEW_SIMD_CODE
#include "include/constants.h"
#include "include/kernel_vendor.h"
@ -16,13 +18,11 @@
#include "include/kernel_functions.c"
#include "OpenCL/types_ocl.c"
#include "OpenCL/common.c"
#define COMPARE_S "OpenCL/check_single_comp4.c"
#define COMPARE_M "OpenCL/check_multi_comp4.c"
#include "OpenCL/simd.c"
#define MD4_STEP_REV(f,a,b,c,d,x,t,s) \
{ \
a = rotr32 (a, s); \
a = rotr32_S (a, s); \
a -= f (b, c, d); \
a -= x; \
a -= t; \
@ -30,12 +30,12 @@
#define MD4_STEP_REV1(f,a,b,c,d,x,t,s) \
{ \
a = rotr32 (a, s); \
a = rotr32_S (a, s); \
a -= x; \
a -= t; \
}
static void m00900m (u32 w[16], const u32 pw_len, __global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset)
static void m00900m (u32 w[16], const u32 pw_len, __global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset)
{
/**
* modifier
@ -105,16 +105,16 @@ static void m00900m (u32 w[16], const u32 pw_len, __global pw_t *pws, __global k
u32 w0l = w[0];
for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++)
for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos += VECT_SIZE)
{
const u32 w0r = words_buf_r[il_pos];
const u32x w0r = words_buf_r[il_pos / VECT_SIZE];
const u32 w0 = w0l | w0r;
const u32x w0 = w0l | w0r;
u32 a = MD4M_A;
u32 b = MD4M_B;
u32 c = MD4M_C;
u32 d = MD4M_D;
u32x a = MD4M_A;
u32x b = MD4M_B;
u32x c = MD4M_C;
u32x d = MD4M_D;
MD4_STEP (MD4_Fo, a, b, c, d, w0, F_w0c00, MD4S00);
MD4_STEP0(MD4_Fo, d, a, b, c, F_w1c00, MD4S01);
@ -167,16 +167,11 @@ static void m00900m (u32 w[16], const u32 pw_len, __global pw_t *pws, __global k
MD4_STEP0(MD4_H , c, d, a, b, H_w7c02, MD4S22);
MD4_STEP0(MD4_H , b, c, d, a, H_wfc02, MD4S23);
const u32 r0 = a;
const u32 r1 = d;
const u32 r2 = c;
const u32 r3 = b;
#include COMPARE_M
COMPARE_M_SIMD (a, d, c, b);
}
}
static void m00900s (u32 w[16], const u32 pw_len, __global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset)
static void m00900s (u32 w[16], const u32 pw_len, __global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset)
{
/**
* modifier
@ -261,28 +256,28 @@ static void m00900s (u32 w[16], const u32 pw_len, __global pw_t *pws, __global k
u32 c_rev = digests_buf[digests_offset].digest_buf[2];
u32 d_rev = digests_buf[digests_offset].digest_buf[3];
MD4_STEP_REV (MD4_H, b_rev, c_rev, d_rev, a_rev, w[15], MD4C02, MD4S23);
MD4_STEP_REV (MD4_H, c_rev, d_rev, a_rev, b_rev, w[ 7], MD4C02, MD4S22);
MD4_STEP_REV (MD4_H, d_rev, a_rev, b_rev, c_rev, w[11], MD4C02, MD4S21);
MD4_STEP_REV (MD4_H, a_rev, b_rev, c_rev, d_rev, w[ 3], MD4C02, MD4S20);
MD4_STEP_REV (MD4_H, b_rev, c_rev, d_rev, a_rev, w[13], MD4C02, MD4S23);
MD4_STEP_REV (MD4_H, c_rev, d_rev, a_rev, b_rev, w[ 5], MD4C02, MD4S22);
MD4_STEP_REV (MD4_H, d_rev, a_rev, b_rev, c_rev, w[ 9], MD4C02, MD4S21);
MD4_STEP_REV (MD4_H, a_rev, b_rev, c_rev, d_rev, w[ 1], MD4C02, MD4S20);
MD4_STEP_REV (MD4_H, b_rev, c_rev, d_rev, a_rev, w[14], MD4C02, MD4S23);
MD4_STEP_REV (MD4_H, c_rev, d_rev, a_rev, b_rev, w[ 6], MD4C02, MD4S22);
MD4_STEP_REV (MD4_H, d_rev, a_rev, b_rev, c_rev, w[10], MD4C02, MD4S21);
MD4_STEP_REV (MD4_H, a_rev, b_rev, c_rev, d_rev, w[ 2], MD4C02, MD4S20);
MD4_STEP_REV (MD4_H, b_rev, c_rev, d_rev, a_rev, w[12], MD4C02, MD4S23);
MD4_STEP_REV (MD4_H, c_rev, d_rev, a_rev, b_rev, w[ 4], MD4C02, MD4S22);
MD4_STEP_REV (MD4_H, d_rev, a_rev, b_rev, c_rev, w[ 8], MD4C02, MD4S21);
MD4_STEP_REV (MD4_H, a_rev, b_rev, c_rev, d_rev, 0, MD4C02, MD4S20);
MD4_STEP_REV (MD4_H_S, b_rev, c_rev, d_rev, a_rev, w[15], MD4C02, MD4S23);
MD4_STEP_REV (MD4_H_S, c_rev, d_rev, a_rev, b_rev, w[ 7], MD4C02, MD4S22);
MD4_STEP_REV (MD4_H_S, d_rev, a_rev, b_rev, c_rev, w[11], MD4C02, MD4S21);
MD4_STEP_REV (MD4_H_S, a_rev, b_rev, c_rev, d_rev, w[ 3], MD4C02, MD4S20);
MD4_STEP_REV (MD4_H_S, b_rev, c_rev, d_rev, a_rev, w[13], MD4C02, MD4S23);
MD4_STEP_REV (MD4_H_S, c_rev, d_rev, a_rev, b_rev, w[ 5], MD4C02, MD4S22);
MD4_STEP_REV (MD4_H_S, d_rev, a_rev, b_rev, c_rev, w[ 9], MD4C02, MD4S21);
MD4_STEP_REV (MD4_H_S, a_rev, b_rev, c_rev, d_rev, w[ 1], MD4C02, MD4S20);
MD4_STEP_REV (MD4_H_S, b_rev, c_rev, d_rev, a_rev, w[14], MD4C02, MD4S23);
MD4_STEP_REV (MD4_H_S, c_rev, d_rev, a_rev, b_rev, w[ 6], MD4C02, MD4S22);
MD4_STEP_REV (MD4_H_S, d_rev, a_rev, b_rev, c_rev, w[10], MD4C02, MD4S21);
MD4_STEP_REV (MD4_H_S, a_rev, b_rev, c_rev, d_rev, w[ 2], MD4C02, MD4S20);
MD4_STEP_REV (MD4_H_S, b_rev, c_rev, d_rev, a_rev, w[12], MD4C02, MD4S23);
MD4_STEP_REV (MD4_H_S, c_rev, d_rev, a_rev, b_rev, w[ 4], MD4C02, MD4S22);
MD4_STEP_REV (MD4_H_S, d_rev, a_rev, b_rev, c_rev, w[ 8], MD4C02, MD4S21);
MD4_STEP_REV (MD4_H_S, a_rev, b_rev, c_rev, d_rev, 0, MD4C02, MD4S20);
const u32 sav_c = c_rev;
const u32 sav_d = d_rev;
MD4_STEP_REV1(MD4_G, b_rev, c_rev, d_rev, a_rev, w[15], MD4C01, MD4S13);
MD4_STEP_REV1(MD4_G, c_rev, d_rev, a_rev, b_rev, w[11], MD4C01, MD4S12);
MD4_STEP_REV1(MD4_G_S, b_rev, c_rev, d_rev, a_rev, w[15], MD4C01, MD4S13);
MD4_STEP_REV1(MD4_G_S, c_rev, d_rev, a_rev, b_rev, w[11], MD4C01, MD4S12);
/**
* loop
@ -290,24 +285,24 @@ static void m00900s (u32 w[16], const u32 pw_len, __global pw_t *pws, __global k
u32 w0l = w[0];
for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++)
for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos += VECT_SIZE)
{
const u32 w0r = words_buf_r[il_pos];
const u32x w0r = words_buf_r[il_pos / VECT_SIZE];
const u32 w0 = w0l | w0r;
const u32x w0 = w0l | w0r;
u32 pre_a = a_rev;
u32 pre_b = b_rev;
u32 pre_c = c_rev;
u32x pre_a = a_rev;
u32x pre_b = b_rev;
u32x pre_c = c_rev;
pre_a = pre_a - w0;
pre_b = pre_b - MD4_G (sav_c, sav_d, pre_a);
pre_c = pre_c - MD4_G (sav_d, pre_a, pre_b);
u32 a = MD4M_A;
u32 b = MD4M_B;
u32 c = MD4M_C;
u32 d = MD4M_D;
u32x a = MD4M_A;
u32x b = MD4M_B;
u32x c = MD4M_C;
u32x d = MD4M_D;
MD4_STEP (MD4_Fo, a, b, c, d, w0, F_w0c00, MD4S00);
MD4_STEP0(MD4_Fo, d, a, b, c, F_w1c00, MD4S01);
@ -338,16 +333,12 @@ static void m00900s (u32 w[16], const u32 pw_len, __global pw_t *pws, __global k
MD4_STEP0(MD4_Go, d, a, b, c, G_w6c01, MD4S11);
MD4_STEP0(MD4_Go, c, d, a, b, G_wac01, MD4S12);
bool q_cond = allx (pre_c != c);
if (q_cond) continue;
if (MATCHES_NONE_VV (c, pre_c)) continue;
MD4_STEP0(MD4_Go, b, c, d, a, G_wec01, MD4S13);
MD4_STEP0(MD4_Go, a, b, c, d, G_w3c01, MD4S10);
bool q_cond2 = allx (pre_a != a);
if (q_cond2) continue;
if (MATCHES_NONE_VV (a, pre_a)) continue;
MD4_STEP0(MD4_Go, d, a, b, c, G_w7c01, MD4S11);
MD4_STEP0(MD4_Go, c, d, a, b, G_wbc01, MD4S12);
@ -370,16 +361,11 @@ static void m00900s (u32 w[16], const u32 pw_len, __global pw_t *pws, __global k
MD4_STEP0(MD4_H , c, d, a, b, H_w7c02, MD4S22);
MD4_STEP0(MD4_H , b, c, d, a, H_wfc02, MD4S23);
const u32 r0 = a;
const u32 r1 = d;
const u32 r2 = c;
const u32 r3 = b;
#include COMPARE_S
COMPARE_S_SIMD (a, d, c, b);
}
}
__kernel void m00900_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
__kernel void m00900_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
{
/**
* base
@ -417,7 +403,7 @@ __kernel void m00900_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
m00900m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset);
}
__kernel void m00900_m08 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
__kernel void m00900_m08 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
{
/**
* base
@ -455,7 +441,7 @@ __kernel void m00900_m08 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
m00900m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset);
}
__kernel void m00900_m16 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
__kernel void m00900_m16 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
{
/**
* base
@ -493,7 +479,7 @@ __kernel void m00900_m16 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
m00900m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset);
}
__kernel void m00900_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
__kernel void m00900_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
{
/**
* base
@ -531,7 +517,7 @@ __kernel void m00900_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
m00900s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset);
}
__kernel void m00900_s08 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
__kernel void m00900_s08 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
{
/**
* base
@ -569,7 +555,7 @@ __kernel void m00900_s08 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
m00900s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset);
}
__kernel void m00900_s16 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
__kernel void m00900_s16 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
{
/**
* base

View File

@ -70,7 +70,7 @@ __kernel void m01000_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
{
append_0x80_2x4 (wordl0, wordl1, pw_l_len);
switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
switch_buffer_by_offset_le (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
}
/**
@ -113,7 +113,7 @@ __kernel void m01000_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
{
switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len);
switch_buffer_by_offset_le (wordr0, wordr1, wordr2, wordr3, pw_l_len);
}
u32 w0[4];
@ -279,7 +279,7 @@ __kernel void m01000_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
{
append_0x80_2x4 (wordl0, wordl1, pw_l_len);
switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
switch_buffer_by_offset_le (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
}
/**
@ -334,7 +334,7 @@ __kernel void m01000_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
{
switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len);
switch_buffer_by_offset_le (wordr0, wordr1, wordr2, wordr3, pw_l_len);
}
u32 w0[4];

View File

@ -22,7 +22,7 @@
#define MD4_STEP_REV(f,a,b,c,d,x,t,s) \
{ \
a = rotr32 (a, s); \
a = rotr32_S (a, s); \
a -= f (b, c, d); \
a -= x; \
a -= t; \
@ -30,7 +30,7 @@
#define MD4_STEP_REV1(f,a,b,c,d,x,t,s) \
{ \
a = rotr32 (a, s); \
a = rotr32_S (a, s); \
a -= x; \
a -= t; \
}
@ -111,8 +111,6 @@ static void m01000m (u32 w[16], const u32 pw_len, __global pw_t *pws, __global k
const u32x w0 = w0l | w0r;
u32x tmp2;
u32x a = MD4M_A;
u32x b = MD4M_B;
u32x c = MD4M_C;
@ -152,22 +150,22 @@ static void m01000m (u32 w[16], const u32 pw_len, __global pw_t *pws, __global k
MD4_STEP0(MD4_Go, c, d, a, b, G_wbc01, MD4S12);
MD4_STEP0(MD4_Go, b, c, d, a, G_wfc01, MD4S13);
MD4_STEP (MD4_H1, a, b, c, d, w0, H_w0c02, MD4S20);
MD4_STEP0(MD4_H2, d, a, b, c, H_w8c02, MD4S21);
MD4_STEP0(MD4_H1, c, d, a, b, H_w4c02, MD4S22);
MD4_STEP0(MD4_H2, b, c, d, a, H_wcc02, MD4S23);
MD4_STEP0(MD4_H1, a, b, c, d, H_w2c02, MD4S20);
MD4_STEP0(MD4_H2, d, a, b, c, H_wac02, MD4S21);
MD4_STEP0(MD4_H1, c, d, a, b, H_w6c02, MD4S22);
MD4_STEP0(MD4_H2, b, c, d, a, H_wec02, MD4S23);
MD4_STEP0(MD4_H1, a, b, c, d, H_w1c02, MD4S20);
MD4_STEP0(MD4_H2, d, a, b, c, H_w9c02, MD4S21);
MD4_STEP0(MD4_H1, c, d, a, b, H_w5c02, MD4S22);
MD4_STEP0(MD4_H2, b, c, d, a, H_wdc02, MD4S23);
MD4_STEP0(MD4_H1, a, b, c, d, H_w3c02, MD4S20);
MD4_STEP0(MD4_H2, d, a, b, c, H_wbc02, MD4S21);
MD4_STEP0(MD4_H1, c, d, a, b, H_w7c02, MD4S22);
MD4_STEP0(MD4_H2, b, c, d, a, H_wfc02, MD4S23);
MD4_STEP (MD4_H , a, b, c, d, w0, H_w0c02, MD4S20);
MD4_STEP0(MD4_H , d, a, b, c, H_w8c02, MD4S21);
MD4_STEP0(MD4_H , c, d, a, b, H_w4c02, MD4S22);
MD4_STEP0(MD4_H , b, c, d, a, H_wcc02, MD4S23);
MD4_STEP0(MD4_H , a, b, c, d, H_w2c02, MD4S20);
MD4_STEP0(MD4_H , d, a, b, c, H_wac02, MD4S21);
MD4_STEP0(MD4_H , c, d, a, b, H_w6c02, MD4S22);
MD4_STEP0(MD4_H , b, c, d, a, H_wec02, MD4S23);
MD4_STEP0(MD4_H , a, b, c, d, H_w1c02, MD4S20);
MD4_STEP0(MD4_H , d, a, b, c, H_w9c02, MD4S21);
MD4_STEP0(MD4_H , c, d, a, b, H_w5c02, MD4S22);
MD4_STEP0(MD4_H , b, c, d, a, H_wdc02, MD4S23);
MD4_STEP0(MD4_H , a, b, c, d, H_w3c02, MD4S20);
MD4_STEP0(MD4_H , d, a, b, c, H_wbc02, MD4S21);
MD4_STEP0(MD4_H , c, d, a, b, H_w7c02, MD4S22);
MD4_STEP0(MD4_H , b, c, d, a, H_wfc02, MD4S23);
COMPARE_M_SIMD (a, d, c, b);
}
@ -253,33 +251,33 @@ static void m01000s (u32 w[16], const u32 pw_len, __global pw_t *pws, __global k
* reverse
*/
u32x a_rev = digests_buf[digests_offset].digest_buf[0];
u32x b_rev = digests_buf[digests_offset].digest_buf[1];
u32x c_rev = digests_buf[digests_offset].digest_buf[2];
u32x d_rev = digests_buf[digests_offset].digest_buf[3];
u32 a_rev = digests_buf[digests_offset].digest_buf[0];
u32 b_rev = digests_buf[digests_offset].digest_buf[1];
u32 c_rev = digests_buf[digests_offset].digest_buf[2];
u32 d_rev = digests_buf[digests_offset].digest_buf[3];
MD4_STEP_REV (MD4_H, b_rev, c_rev, d_rev, a_rev, w[15], MD4C02, MD4S23);
MD4_STEP_REV (MD4_H, c_rev, d_rev, a_rev, b_rev, w[ 7], MD4C02, MD4S22);
MD4_STEP_REV (MD4_H, d_rev, a_rev, b_rev, c_rev, w[11], MD4C02, MD4S21);
MD4_STEP_REV (MD4_H, a_rev, b_rev, c_rev, d_rev, w[ 3], MD4C02, MD4S20);
MD4_STEP_REV (MD4_H, b_rev, c_rev, d_rev, a_rev, w[13], MD4C02, MD4S23);
MD4_STEP_REV (MD4_H, c_rev, d_rev, a_rev, b_rev, w[ 5], MD4C02, MD4S22);
MD4_STEP_REV (MD4_H, d_rev, a_rev, b_rev, c_rev, w[ 9], MD4C02, MD4S21);
MD4_STEP_REV (MD4_H, a_rev, b_rev, c_rev, d_rev, w[ 1], MD4C02, MD4S20);
MD4_STEP_REV (MD4_H, b_rev, c_rev, d_rev, a_rev, w[14], MD4C02, MD4S23);
MD4_STEP_REV (MD4_H, c_rev, d_rev, a_rev, b_rev, w[ 6], MD4C02, MD4S22);
MD4_STEP_REV (MD4_H, d_rev, a_rev, b_rev, c_rev, w[10], MD4C02, MD4S21);
MD4_STEP_REV (MD4_H, a_rev, b_rev, c_rev, d_rev, w[ 2], MD4C02, MD4S20);
MD4_STEP_REV (MD4_H, b_rev, c_rev, d_rev, a_rev, w[12], MD4C02, MD4S23);
MD4_STEP_REV (MD4_H, c_rev, d_rev, a_rev, b_rev, w[ 4], MD4C02, MD4S22);
MD4_STEP_REV (MD4_H, d_rev, a_rev, b_rev, c_rev, w[ 8], MD4C02, MD4S21);
MD4_STEP_REV (MD4_H, a_rev, b_rev, c_rev, d_rev, 0, MD4C02, MD4S20);
MD4_STEP_REV (MD4_H_S, b_rev, c_rev, d_rev, a_rev, w[15], MD4C02, MD4S23);
MD4_STEP_REV (MD4_H_S, c_rev, d_rev, a_rev, b_rev, w[ 7], MD4C02, MD4S22);
MD4_STEP_REV (MD4_H_S, d_rev, a_rev, b_rev, c_rev, w[11], MD4C02, MD4S21);
MD4_STEP_REV (MD4_H_S, a_rev, b_rev, c_rev, d_rev, w[ 3], MD4C02, MD4S20);
MD4_STEP_REV (MD4_H_S, b_rev, c_rev, d_rev, a_rev, w[13], MD4C02, MD4S23);
MD4_STEP_REV (MD4_H_S, c_rev, d_rev, a_rev, b_rev, w[ 5], MD4C02, MD4S22);
MD4_STEP_REV (MD4_H_S, d_rev, a_rev, b_rev, c_rev, w[ 9], MD4C02, MD4S21);
MD4_STEP_REV (MD4_H_S, a_rev, b_rev, c_rev, d_rev, w[ 1], MD4C02, MD4S20);
MD4_STEP_REV (MD4_H_S, b_rev, c_rev, d_rev, a_rev, w[14], MD4C02, MD4S23);
MD4_STEP_REV (MD4_H_S, c_rev, d_rev, a_rev, b_rev, w[ 6], MD4C02, MD4S22);
MD4_STEP_REV (MD4_H_S, d_rev, a_rev, b_rev, c_rev, w[10], MD4C02, MD4S21);
MD4_STEP_REV (MD4_H_S, a_rev, b_rev, c_rev, d_rev, w[ 2], MD4C02, MD4S20);
MD4_STEP_REV (MD4_H_S, b_rev, c_rev, d_rev, a_rev, w[12], MD4C02, MD4S23);
MD4_STEP_REV (MD4_H_S, c_rev, d_rev, a_rev, b_rev, w[ 4], MD4C02, MD4S22);
MD4_STEP_REV (MD4_H_S, d_rev, a_rev, b_rev, c_rev, w[ 8], MD4C02, MD4S21);
MD4_STEP_REV (MD4_H_S, a_rev, b_rev, c_rev, d_rev, 0, MD4C02, MD4S20);
const u32x sav_c = c_rev;
const u32x sav_d = d_rev;
const u32 sav_c = c_rev;
const u32 sav_d = d_rev;
MD4_STEP_REV1(MD4_G, b_rev, c_rev, d_rev, a_rev, w[15], MD4C01, MD4S13);
MD4_STEP_REV1(MD4_G, c_rev, d_rev, a_rev, b_rev, w[11], MD4C01, MD4S12);
MD4_STEP_REV1(MD4_G_S, b_rev, c_rev, d_rev, a_rev, w[15], MD4C01, MD4S13);
MD4_STEP_REV1(MD4_G_S, c_rev, d_rev, a_rev, b_rev, w[11], MD4C01, MD4S12);
/**
* loop
@ -301,8 +299,6 @@ static void m01000s (u32 w[16], const u32 pw_len, __global pw_t *pws, __global k
pre_b = pre_b - MD4_G (sav_c, sav_d, pre_a);
pre_c = pre_c - MD4_G (sav_d, pre_a, pre_b);
u32x tmp2;
u32x a = MD4M_A;
u32x b = MD4M_B;
u32x c = MD4M_C;
@ -337,33 +333,33 @@ static void m01000s (u32 w[16], const u32 pw_len, __global pw_t *pws, __global k
MD4_STEP0(MD4_Go, d, a, b, c, G_w6c01, MD4S11);
MD4_STEP0(MD4_Go, c, d, a, b, G_wac01, MD4S12);
if (MATCHES_NONE_VV (pre_c, c)) continue;
if (MATCHES_NONE_VV (c, pre_c)) continue;
MD4_STEP0(MD4_Go, b, c, d, a, G_wec01, MD4S13);
MD4_STEP0(MD4_Go, a, b, c, d, G_w3c01, MD4S10);
if (MATCHES_NONE_VV (pre_a, a)) continue;
if (MATCHES_NONE_VV (a, pre_a)) continue;
MD4_STEP0(MD4_Go, d, a, b, c, G_w7c01, MD4S11);
MD4_STEP0(MD4_Go, c, d, a, b, G_wbc01, MD4S12);
MD4_STEP0(MD4_Go, b, c, d, a, G_wfc01, MD4S13);
MD4_STEP (MD4_H1, a, b, c, d, w0, H_w0c02, MD4S20);
MD4_STEP0(MD4_H2, d, a, b, c, H_w8c02, MD4S21);
MD4_STEP0(MD4_H1, c, d, a, b, H_w4c02, MD4S22);
MD4_STEP0(MD4_H2, b, c, d, a, H_wcc02, MD4S23);
MD4_STEP0(MD4_H1, a, b, c, d, H_w2c02, MD4S20);
MD4_STEP0(MD4_H2, d, a, b, c, H_wac02, MD4S21);
MD4_STEP0(MD4_H1, c, d, a, b, H_w6c02, MD4S22);
MD4_STEP0(MD4_H2, b, c, d, a, H_wec02, MD4S23);
MD4_STEP0(MD4_H1, a, b, c, d, H_w1c02, MD4S20);
MD4_STEP0(MD4_H2, d, a, b, c, H_w9c02, MD4S21);
MD4_STEP0(MD4_H1, c, d, a, b, H_w5c02, MD4S22);
MD4_STEP0(MD4_H2, b, c, d, a, H_wdc02, MD4S23);
MD4_STEP0(MD4_H1, a, b, c, d, H_w3c02, MD4S20);
MD4_STEP0(MD4_H2, d, a, b, c, H_wbc02, MD4S21);
MD4_STEP0(MD4_H1, c, d, a, b, H_w7c02, MD4S22);
MD4_STEP0(MD4_H2, b, c, d, a, H_wfc02, MD4S23);
MD4_STEP (MD4_H , a, b, c, d, w0, H_w0c02, MD4S20);
MD4_STEP0(MD4_H , d, a, b, c, H_w8c02, MD4S21);
MD4_STEP0(MD4_H , c, d, a, b, H_w4c02, MD4S22);
MD4_STEP0(MD4_H , b, c, d, a, H_wcc02, MD4S23);
MD4_STEP0(MD4_H , a, b, c, d, H_w2c02, MD4S20);
MD4_STEP0(MD4_H , d, a, b, c, H_wac02, MD4S21);
MD4_STEP0(MD4_H , c, d, a, b, H_w6c02, MD4S22);
MD4_STEP0(MD4_H , b, c, d, a, H_wec02, MD4S23);
MD4_STEP0(MD4_H , a, b, c, d, H_w1c02, MD4S20);
MD4_STEP0(MD4_H , d, a, b, c, H_w9c02, MD4S21);
MD4_STEP0(MD4_H , c, d, a, b, H_w5c02, MD4S22);
MD4_STEP0(MD4_H , b, c, d, a, H_wdc02, MD4S23);
MD4_STEP0(MD4_H , a, b, c, d, H_w3c02, MD4S20);
MD4_STEP0(MD4_H , d, a, b, c, H_wbc02, MD4S21);
MD4_STEP0(MD4_H , c, d, a, b, H_w7c02, MD4S22);
MD4_STEP0(MD4_H , b, c, d, a, H_wfc02, MD4S23);
COMPARE_S_SIMD (a, d, c, b);
}

View File

@ -70,7 +70,7 @@ __kernel void m01100_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
{
append_0x80_2x4 (wordl0, wordl1, pw_l_len);
switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
switch_buffer_by_offset_le (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
}
/**
@ -140,7 +140,7 @@ __kernel void m01100_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
{
switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len);
switch_buffer_by_offset_le (wordr0, wordr1, wordr2, wordr3, pw_l_len);
}
u32 w0[4];
@ -382,7 +382,7 @@ __kernel void m01100_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
{
append_0x80_2x4 (wordl0, wordl1, pw_l_len);
switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
switch_buffer_by_offset_le (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
}
/**
@ -464,7 +464,7 @@ __kernel void m01100_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
{
switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len);
switch_buffer_by_offset_le (wordr0, wordr1, wordr2, wordr3, pw_l_len);
}
u32 w0[4];

View File

@ -5,6 +5,8 @@
#define _MD4_
#define NEW_SIMD_CODE
#include "include/constants.h"
#include "include/kernel_vendor.h"
@ -16,11 +18,9 @@
#include "include/kernel_functions.c"
#include "OpenCL/types_ocl.c"
#include "OpenCL/common.c"
#include "OpenCL/simd.c"
#define COMPARE_S "OpenCL/check_single_comp4.c"
#define COMPARE_M "OpenCL/check_multi_comp4.c"
static void m01100m (u32 w[16], const u32 pw_len, __global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset)
static void m01100m (u32 w[16], const u32 pw_len, __global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset)
{
/**
* modifier
@ -117,16 +117,16 @@ static void m01100m (u32 w[16], const u32 pw_len, __global pw_t *pws, __global k
u32 w0l = w[0];
for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++)
for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos += VECT_SIZE)
{
const u32 w0r = words_buf_r[il_pos];
const u32x w0r = words_buf_r[il_pos / VECT_SIZE];
const u32 w0 = w0l | w0r;
const u32x w0 = w0l | w0r;
u32 a = MD4M_A;
u32 b = MD4M_B;
u32 c = MD4M_C;
u32 d = MD4M_D;
u32x a = MD4M_A;
u32x b = MD4M_B;
u32x c = MD4M_C;
u32x d = MD4M_D;
MD4_STEP (MD4_Fo, a, b, c, d, w0, F_w0c00, MD4S00);
MD4_STEP0(MD4_Fo, d, a, b, c, F_w1c00, MD4S01);
@ -184,10 +184,10 @@ static void m01100m (u32 w[16], const u32 pw_len, __global pw_t *pws, __global k
c += MD4M_C;
d += MD4M_D;
u32 w0_t[4];
u32 w1_t[4];
u32 w2_t[4];
u32 w3_t[4];
u32x w0_t[4];
u32x w1_t[4];
u32x w2_t[4];
u32x w3_t[4];
w0_t[0] = a;
w0_t[1] = b;
@ -262,16 +262,11 @@ static void m01100m (u32 w[16], const u32 pw_len, __global pw_t *pws, __global k
MD4_STEP (MD4_H , c, d, a, b, w1_t[3], MD4C02, MD4S22);
MD4_STEP (MD4_H , b, c, d, a, w3_t[3], MD4C02, MD4S23);
const u32 r0 = a;
const u32 r1 = d;
const u32 r2 = c;
const u32 r3 = b;
#include COMPARE_M
COMPARE_M_SIMD (a, d, c, b);
}
}
static void m01100s (u32 w[16], const u32 pw_len, __global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset)
static void m01100s (u32 w[16], const u32 pw_len, __global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset)
{
/**
* modifier
@ -380,16 +375,16 @@ static void m01100s (u32 w[16], const u32 pw_len, __global pw_t *pws, __global k
u32 w0l = w[0];
for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++)
for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos += VECT_SIZE)
{
const u32 w0r = words_buf_r[il_pos];
const u32x w0r = words_buf_r[il_pos / VECT_SIZE];
const u32 w0 = w0l | w0r;
const u32x w0 = w0l | w0r;
u32 a = MD4M_A;
u32 b = MD4M_B;
u32 c = MD4M_C;
u32 d = MD4M_D;
u32x a = MD4M_A;
u32x b = MD4M_B;
u32x c = MD4M_C;
u32x d = MD4M_D;
MD4_STEP (MD4_Fo, a, b, c, d, w0, F_w0c00, MD4S00);
MD4_STEP0(MD4_Fo, d, a, b, c, F_w1c00, MD4S01);
@ -447,10 +442,10 @@ static void m01100s (u32 w[16], const u32 pw_len, __global pw_t *pws, __global k
c += MD4M_C;
d += MD4M_D;
u32 w0_t[4];
u32 w1_t[4];
u32 w2_t[4];
u32 w3_t[4];
u32x w0_t[4];
u32x w1_t[4];
u32x w2_t[4];
u32x w3_t[4];
w0_t[0] = a;
w0_t[1] = b;
@ -522,24 +517,17 @@ static void m01100s (u32 w[16], const u32 pw_len, __global pw_t *pws, __global k
MD4_STEP (MD4_H , b, c, d, a, w3_t[1], MD4C02, MD4S23);
MD4_STEP (MD4_H , a, b, c, d, w0_t[3], MD4C02, MD4S20);
bool q_cond = allx (search[0] != a);
if (q_cond) continue;
if (MATCHES_NONE_VS (a, search[0])) continue;
MD4_STEP (MD4_H , d, a, b, c, w2_t[3], MD4C02, MD4S21);
MD4_STEP (MD4_H , c, d, a, b, w1_t[3], MD4C02, MD4S22);
MD4_STEP (MD4_H , b, c, d, a, w3_t[3], MD4C02, MD4S23);
const u32 r0 = a;
const u32 r1 = d;
const u32 r2 = c;
const u32 r3 = b;
#include COMPARE_S
COMPARE_S_SIMD (a, d, c, b);
}
}
__kernel void m01100_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
__kernel void m01100_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
{
/**
* base
@ -577,7 +565,7 @@ __kernel void m01100_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
m01100m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset);
}
__kernel void m01100_m08 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
__kernel void m01100_m08 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
{
/**
* base
@ -615,11 +603,11 @@ __kernel void m01100_m08 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
m01100m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset);
}
__kernel void m01100_m16 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
__kernel void m01100_m16 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
{
}
__kernel void m01100_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
__kernel void m01100_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
{
/**
* base
@ -657,7 +645,7 @@ __kernel void m01100_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
m01100s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset);
}
__kernel void m01100_s08 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
__kernel void m01100_s08 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
{
/**
* base
@ -695,6 +683,6 @@ __kernel void m01100_s08 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
m01100s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset);
}
__kernel void m01100_s16 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
__kernel void m01100_s16 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
{
}

View File

@ -70,7 +70,7 @@ __kernel void m01400_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
{
append_0x80_2x4 (wordl0, wordl1, pw_l_len);
switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
switch_buffer_by_offset_le (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
}
/**
@ -115,7 +115,7 @@ __kernel void m01400_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
{
append_0x80_2x4 (wordr0, wordr1, pw_r_len);
switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len);
switch_buffer_by_offset_le (wordr0, wordr1, wordr2, wordr3, pw_l_len);
}
u32 w0[4];
@ -306,7 +306,7 @@ __kernel void m01400_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
{
append_0x80_2x4 (wordl0, wordl1, pw_l_len);
switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
switch_buffer_by_offset_le (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
}
/**
@ -363,7 +363,7 @@ __kernel void m01400_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
{
append_0x80_2x4 (wordr0, wordr1, pw_r_len);
switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len);
switch_buffer_by_offset_le (wordr0, wordr1, wordr2, wordr3, pw_l_len);
}
u32 w0[4];

View File

@ -5,6 +5,8 @@
#define _SHA256_
#define NEW_SIMD_CODE
#include "include/constants.h"
#include "include/kernel_vendor.h"
@ -16,11 +18,9 @@
#include "include/kernel_functions.c"
#include "OpenCL/types_ocl.c"
#include "OpenCL/common.c"
#include "OpenCL/simd.c"
#define COMPARE_S "OpenCL/check_single_comp4.c"
#define COMPARE_M "OpenCL/check_multi_comp4.c"
static void m01400m (u32 w[16], const u32 pw_len, __global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset)
static void m01400m (u32 w[16], const u32 pw_len, __global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset)
{
/**
* modifier
@ -35,37 +35,37 @@ static void m01400m (u32 w[16], const u32 pw_len, __global pw_t *pws, __global k
u32 w0l = w[0];
for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++)
for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos += VECT_SIZE)
{
const u32 w0r = words_buf_r[il_pos];
const u32x w0r = words_buf_r[il_pos / VECT_SIZE];
const u32 w0 = w0l | w0r;
const u32x w0 = w0l | w0r;
u32 w0_t = w0;
u32 w1_t = w[ 1];
u32 w2_t = w[ 2];
u32 w3_t = w[ 3];
u32 w4_t = w[ 4];
u32 w5_t = w[ 5];
u32 w6_t = w[ 6];
u32 w7_t = w[ 7];
u32 w8_t = w[ 8];
u32 w9_t = w[ 9];
u32 wa_t = w[10];
u32 wb_t = w[11];
u32 wc_t = w[12];
u32 wd_t = w[13];
u32 we_t = w[14];
u32 wf_t = w[15];
u32x w0_t = w0;
u32x w1_t = w[ 1];
u32x w2_t = w[ 2];
u32x w3_t = w[ 3];
u32x w4_t = w[ 4];
u32x w5_t = w[ 5];
u32x w6_t = w[ 6];
u32x w7_t = w[ 7];
u32x w8_t = w[ 8];
u32x w9_t = w[ 9];
u32x wa_t = w[10];
u32x wb_t = w[11];
u32x wc_t = w[12];
u32x wd_t = w[13];
u32x we_t = w[14];
u32x wf_t = w[15];
u32 a = SHA256M_A;
u32 b = SHA256M_B;
u32 c = SHA256M_C;
u32 d = SHA256M_D;
u32 e = SHA256M_E;
u32 f = SHA256M_F;
u32 g = SHA256M_G;
u32 h = SHA256M_H;
u32x a = SHA256M_A;
u32x b = SHA256M_B;
u32x c = SHA256M_C;
u32x d = SHA256M_D;
u32x e = SHA256M_E;
u32x f = SHA256M_F;
u32x g = SHA256M_G;
u32x h = SHA256M_H;
SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C00);
SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C01);
@ -135,17 +135,11 @@ static void m01400m (u32 w[16], const u32 pw_len, __global pw_t *pws, __global k
we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C3e);
wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C3f);
const u32 r0 = d;
const u32 r1 = h;
const u32 r2 = c;
const u32 r3 = g;
#include COMPARE_M
COMPARE_M_SIMD (d, h, c, g);
}
}
static void m01400s (u32 w[16], const u32 pw_len, __global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset)
static void m01400s (u32 w[16], const u32 pw_len, __global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset)
{
/**
* modifier
@ -172,37 +166,37 @@ static void m01400s (u32 w[16], const u32 pw_len, __global pw_t *pws, __global k
u32 w0l = w[0];
for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++)
for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos += VECT_SIZE)
{
const u32 w0r = words_buf_r[il_pos];
const u32x w0r = words_buf_r[il_pos / VECT_SIZE];
const u32 w0 = w0l | w0r;
const u32x w0 = w0l | w0r;
u32 w0_t = w0;
u32 w1_t = w[ 1];
u32 w2_t = w[ 2];
u32 w3_t = w[ 3];
u32 w4_t = w[ 4];
u32 w5_t = w[ 5];
u32 w6_t = w[ 6];
u32 w7_t = w[ 7];
u32 w8_t = w[ 8];
u32 w9_t = w[ 9];
u32 wa_t = w[10];
u32 wb_t = w[11];
u32 wc_t = w[12];
u32 wd_t = w[13];
u32 we_t = w[14];
u32 wf_t = w[15];
u32x w0_t = w0;
u32x w1_t = w[ 1];
u32x w2_t = w[ 2];
u32x w3_t = w[ 3];
u32x w4_t = w[ 4];
u32x w5_t = w[ 5];
u32x w6_t = w[ 6];
u32x w7_t = w[ 7];
u32x w8_t = w[ 8];
u32x w9_t = w[ 9];
u32x wa_t = w[10];
u32x wb_t = w[11];
u32x wc_t = w[12];
u32x wd_t = w[13];
u32x we_t = w[14];
u32x wf_t = w[15];
u32 a = SHA256M_A;
u32 b = SHA256M_B;
u32 c = SHA256M_C;
u32 d = SHA256M_D;
u32 e = SHA256M_E;
u32 f = SHA256M_F;
u32 g = SHA256M_G;
u32 h = SHA256M_H;
u32x a = SHA256M_A;
u32x b = SHA256M_B;
u32x c = SHA256M_C;
u32x d = SHA256M_D;
u32x e = SHA256M_E;
u32x f = SHA256M_F;
u32x g = SHA256M_G;
u32x h = SHA256M_H;
SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C00);
SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C01);
@ -268,20 +262,18 @@ static void m01400s (u32 w[16], const u32 pw_len, __global pw_t *pws, __global k
wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C3a);
wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C3b);
wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C3c);
if (MATCHES_NONE_VS (d, search[0])) continue;
wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C3d);
we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C3e);
wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C3f);
const u32 r0 = d;
const u32 r1 = h;
const u32 r2 = c;
const u32 r3 = g;
#include COMPARE_S
COMPARE_S_SIMD (d, h, c, g);
}
}
__kernel void m01400_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
__kernel void m01400_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
{
/**
* base
@ -319,7 +311,7 @@ __kernel void m01400_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
m01400m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset);
}
__kernel void m01400_m08 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
__kernel void m01400_m08 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
{
/**
* base
@ -357,7 +349,7 @@ __kernel void m01400_m08 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
m01400m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset);
}
__kernel void m01400_m16 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
__kernel void m01400_m16 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
{
/**
* base
@ -395,7 +387,7 @@ __kernel void m01400_m16 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
m01400m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset);
}
__kernel void m01400_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
__kernel void m01400_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
{
/**
* base
@ -433,7 +425,7 @@ __kernel void m01400_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
m01400s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset);
}
__kernel void m01400_s08 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
__kernel void m01400_s08 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
{
/**
* base
@ -471,7 +463,7 @@ __kernel void m01400_s08 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
m01400s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset);
}
__kernel void m01400_s16 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
__kernel void m01400_s16 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
{
/**
* base

View File

@ -142,7 +142,7 @@ __kernel void m01410_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
s3[2] = 0;
s3[3] = 0;
switch_buffer_by_offset (s0, s1, s2, s3, out_len);
switch_buffer_by_offset_le (s0, s1, s2, s3, out_len);
const u32 out_salt_len = out_len + salt_len;
@ -416,7 +416,7 @@ __kernel void m01410_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
s3[2] = 0;
s3[3] = 0;
switch_buffer_by_offset (s0, s1, s2, s3, out_len);
switch_buffer_by_offset_le (s0, s1, s2, s3, out_len);
const u32 out_salt_len = out_len + salt_len;

View File

@ -68,7 +68,7 @@ __kernel void m01410_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
if (combs_mode == COMBINATOR_MODE_BASE_RIGHT)
{
switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
switch_buffer_by_offset_le (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
}
/**
@ -131,7 +131,7 @@ __kernel void m01410_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
{
switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len);
switch_buffer_by_offset_le (wordr0, wordr1, wordr2, wordr3, pw_l_len);
}
/**
@ -166,7 +166,7 @@ __kernel void m01410_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
s3[2] = 0;
s3[3] = 0;
switch_buffer_by_offset (s0, s1, s2, s3, pw_len);
switch_buffer_by_offset_le (s0, s1, s2, s3, pw_len);
const u32 pw_salt_len = pw_len + salt_len;
@ -358,7 +358,7 @@ __kernel void m01410_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
if (combs_mode == COMBINATOR_MODE_BASE_RIGHT)
{
switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
switch_buffer_by_offset_le (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
}
/**
@ -433,7 +433,7 @@ __kernel void m01410_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
{
switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len);
switch_buffer_by_offset_le (wordr0, wordr1, wordr2, wordr3, pw_l_len);
}
/**
@ -468,7 +468,7 @@ __kernel void m01410_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
s3[2] = 0;
s3[3] = 0;
switch_buffer_by_offset (s0, s1, s2, s3, pw_len);
switch_buffer_by_offset_le (s0, s1, s2, s3, pw_len);
const u32 pw_salt_len = pw_len + salt_len;

View File

@ -5,6 +5,8 @@
#define _SHA256_
#define NEW_SIMD_CODE
#include "include/constants.h"
#include "include/kernel_vendor.h"
@ -16,11 +18,9 @@
#include "include/kernel_functions.c"
#include "OpenCL/types_ocl.c"
#include "OpenCL/common.c"
#include "OpenCL/simd.c"
#define COMPARE_S "OpenCL/check_single_comp4.c"
#define COMPARE_M "OpenCL/check_multi_comp4.c"
static void m01410m (u32 w[16], const u32 pw_len, __global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset)
static void m01410m (u32 w[16], const u32 pw_len, __global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset)
{
/**
* modifier
@ -61,24 +61,24 @@ static void m01410m (u32 w[16], const u32 pw_len, __global pw_t *pws, __global k
salt_buf3[2] = 0;
salt_buf3[3] = 0;
switch_buffer_by_offset (salt_buf0, salt_buf1, salt_buf2, salt_buf3, pw_len);
switch_buffer_by_offset_le_S (salt_buf0, salt_buf1, salt_buf2, salt_buf3, pw_len);
w[ 0] |= swap32 (salt_buf0[0]);
w[ 1] |= swap32 (salt_buf0[1]);
w[ 2] |= swap32 (salt_buf0[2]);
w[ 3] |= swap32 (salt_buf0[3]);
w[ 4] |= swap32 (salt_buf1[0]);
w[ 5] |= swap32 (salt_buf1[1]);
w[ 6] |= swap32 (salt_buf1[2]);
w[ 7] |= swap32 (salt_buf1[3]);
w[ 8] |= swap32 (salt_buf2[0]);
w[ 9] |= swap32 (salt_buf2[1]);
w[10] |= swap32 (salt_buf2[2]);
w[11] |= swap32 (salt_buf2[3]);
w[12] |= swap32 (salt_buf3[0]);
w[13] |= swap32 (salt_buf3[1]);
w[14] |= swap32 (salt_buf3[2]);
w[15] |= swap32 (salt_buf3[3]);
w[ 0] |= swap32_S (salt_buf0[0]);
w[ 1] |= swap32_S (salt_buf0[1]);
w[ 2] |= swap32_S (salt_buf0[2]);
w[ 3] |= swap32_S (salt_buf0[3]);
w[ 4] |= swap32_S (salt_buf1[0]);
w[ 5] |= swap32_S (salt_buf1[1]);
w[ 6] |= swap32_S (salt_buf1[2]);
w[ 7] |= swap32_S (salt_buf1[3]);
w[ 8] |= swap32_S (salt_buf2[0]);
w[ 9] |= swap32_S (salt_buf2[1]);
w[10] |= swap32_S (salt_buf2[2]);
w[11] |= swap32_S (salt_buf2[3]);
w[12] |= swap32_S (salt_buf3[0]);
w[13] |= swap32_S (salt_buf3[1]);
w[14] |= swap32_S (salt_buf3[2]);
w[15] |= swap32_S (salt_buf3[3]);
const u32 salt_len = salt_bufs[salt_pos].salt_len;
@ -92,37 +92,37 @@ static void m01410m (u32 w[16], const u32 pw_len, __global pw_t *pws, __global k
u32 w0l = w[0];
for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++)
for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos += VECT_SIZE)
{
const u32 w0r = words_buf_r[il_pos];
const u32x w0r = words_buf_r[il_pos / VECT_SIZE];
const u32 w0 = w0l | w0r;
const u32x w0 = w0l | w0r;
u32 w0_t = w0;
u32 w1_t = w[ 1];
u32 w2_t = w[ 2];
u32 w3_t = w[ 3];
u32 w4_t = w[ 4];
u32 w5_t = w[ 5];
u32 w6_t = w[ 6];
u32 w7_t = w[ 7];
u32 w8_t = w[ 8];
u32 w9_t = w[ 9];
u32 wa_t = w[10];
u32 wb_t = w[11];
u32 wc_t = w[12];
u32 wd_t = w[13];
u32 we_t = w[14];
u32 wf_t = w[15];
u32x w0_t = w0;
u32x w1_t = w[ 1];
u32x w2_t = w[ 2];
u32x w3_t = w[ 3];
u32x w4_t = w[ 4];
u32x w5_t = w[ 5];
u32x w6_t = w[ 6];
u32x w7_t = w[ 7];
u32x w8_t = w[ 8];
u32x w9_t = w[ 9];
u32x wa_t = w[10];
u32x wb_t = w[11];
u32x wc_t = w[12];
u32x wd_t = w[13];
u32x we_t = w[14];
u32x wf_t = w[15];
u32 a = SHA256M_A;
u32 b = SHA256M_B;
u32 c = SHA256M_C;
u32 d = SHA256M_D;
u32 e = SHA256M_E;
u32 f = SHA256M_F;
u32 g = SHA256M_G;
u32 h = SHA256M_H;
u32x a = SHA256M_A;
u32x b = SHA256M_B;
u32x c = SHA256M_C;
u32x d = SHA256M_D;
u32x e = SHA256M_E;
u32x f = SHA256M_F;
u32x g = SHA256M_G;
u32x h = SHA256M_H;
SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C00);
SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C01);
@ -192,17 +192,11 @@ static void m01410m (u32 w[16], const u32 pw_len, __global pw_t *pws, __global k
we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C3e);
wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C3f);
const u32 r0 = d;
const u32 r1 = h;
const u32 r2 = c;
const u32 r3 = g;
#include COMPARE_M
COMPARE_M_SIMD (d, h, c, g);
}
}
static void m01410s (u32 w[16], const u32 pw_len, __global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset)
static void m01410s (u32 w[16], const u32 pw_len, __global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset)
{
/**
* modifier
@ -229,37 +223,37 @@ static void m01410s (u32 w[16], const u32 pw_len, __global pw_t *pws, __global k
u32 w0l = w[0];
for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++)
for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos += VECT_SIZE)
{
const u32 w0r = words_buf_r[il_pos];
const u32x w0r = words_buf_r[il_pos / VECT_SIZE];
const u32 w0 = w0l | w0r;
const u32x w0 = w0l | w0r;
u32 w0_t = w0;
u32 w1_t = w[ 1];
u32 w2_t = w[ 2];
u32 w3_t = w[ 3];
u32 w4_t = w[ 4];
u32 w5_t = w[ 5];
u32 w6_t = w[ 6];
u32 w7_t = w[ 7];
u32 w8_t = w[ 8];
u32 w9_t = w[ 9];
u32 wa_t = w[10];
u32 wb_t = w[11];
u32 wc_t = w[12];
u32 wd_t = w[13];
u32 we_t = w[14];
u32 wf_t = w[15];
u32x w0_t = w0;
u32x w1_t = w[ 1];
u32x w2_t = w[ 2];
u32x w3_t = w[ 3];
u32x w4_t = w[ 4];
u32x w5_t = w[ 5];
u32x w6_t = w[ 6];
u32x w7_t = w[ 7];
u32x w8_t = w[ 8];
u32x w9_t = w[ 9];
u32x wa_t = w[10];
u32x wb_t = w[11];
u32x wc_t = w[12];
u32x wd_t = w[13];
u32x we_t = w[14];
u32x wf_t = w[15];
u32 a = SHA256M_A;
u32 b = SHA256M_B;
u32 c = SHA256M_C;
u32 d = SHA256M_D;
u32 e = SHA256M_E;
u32 f = SHA256M_F;
u32 g = SHA256M_G;
u32 h = SHA256M_H;
u32x a = SHA256M_A;
u32x b = SHA256M_B;
u32x c = SHA256M_C;
u32x d = SHA256M_D;
u32x e = SHA256M_E;
u32x f = SHA256M_F;
u32x g = SHA256M_G;
u32x h = SHA256M_H;
SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C00);
SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C01);
@ -325,21 +319,18 @@ static void m01410s (u32 w[16], const u32 pw_len, __global pw_t *pws, __global k
wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C3a);
wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C3b);
wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C3c);
if (MATCHES_NONE_VS (d, search[0])) continue;
wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C3d);
we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C3e);
wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C3f);
const u32 r0 = d;
const u32 r1 = h;
const u32 r2 = c;
const u32 r3 = g;
#include COMPARE_S
COMPARE_S_SIMD (d, h, c, g);
}
}
__kernel void m01410_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
__kernel void m01410_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
{
/**
* base
@ -377,7 +368,7 @@ __kernel void m01410_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
m01410m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset);
}
__kernel void m01410_m08 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
__kernel void m01410_m08 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
{
/**
* base
@ -415,7 +406,7 @@ __kernel void m01410_m08 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
m01410m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset);
}
__kernel void m01410_m16 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
__kernel void m01410_m16 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
{
/**
* base
@ -453,7 +444,7 @@ __kernel void m01410_m16 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
m01410m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset);
}
__kernel void m01410_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
__kernel void m01410_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
{
/**
* base
@ -491,7 +482,7 @@ __kernel void m01410_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
m01410s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset);
}
__kernel void m01410_s08 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
__kernel void m01410_s08 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
{
/**
* base
@ -529,7 +520,7 @@ __kernel void m01410_s08 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
m01410s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset);
}
__kernel void m01410_s16 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
__kernel void m01410_s16 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
{
/**
* base

View File

@ -116,7 +116,7 @@ __kernel void m01420_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
const u32 out_salt_len = out_len + salt_len;
switch_buffer_by_offset (w0, w1, w2, w3, salt_len);
switch_buffer_by_offset_le (w0, w1, w2, w3, salt_len);
w0[0] |= salt_buf0[0];
w0[1] |= salt_buf0[1];
@ -351,7 +351,7 @@ __kernel void m01420_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
const u32 out_salt_len = out_len + salt_len;
switch_buffer_by_offset (w0, w1, w2, w3, salt_len);
switch_buffer_by_offset_le (w0, w1, w2, w3, salt_len);
w0[0] |= salt_buf0[0];
w0[1] |= salt_buf0[1];

View File

@ -68,7 +68,7 @@ __kernel void m01420_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
if (combs_mode == COMBINATOR_MODE_BASE_RIGHT)
{
switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
switch_buffer_by_offset_le (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
}
/**
@ -125,7 +125,7 @@ __kernel void m01420_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
{
switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len);
switch_buffer_by_offset_le (wordr0, wordr1, wordr2, wordr3, pw_l_len);
}
u32 w0[4];
@ -156,7 +156,7 @@ __kernel void m01420_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
const u32 pw_salt_len = pw_len + salt_len;
switch_buffer_by_offset (w0, w1, w2, w3, salt_len);
switch_buffer_by_offset_le (w0, w1, w2, w3, salt_len);
w0[0] |= salt_buf0[0];
w0[1] |= salt_buf0[1];
@ -333,7 +333,7 @@ __kernel void m01420_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
if (combs_mode == COMBINATOR_MODE_BASE_RIGHT)
{
switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
switch_buffer_by_offset_le (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
}
/**
@ -402,7 +402,7 @@ __kernel void m01420_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
{
switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len);
switch_buffer_by_offset_le (wordr0, wordr1, wordr2, wordr3, pw_l_len);
}
u32 w0[4];
@ -433,7 +433,7 @@ __kernel void m01420_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
const u32 pw_salt_len = pw_len + salt_len;
switch_buffer_by_offset (w0, w1, w2, w3, salt_len);
switch_buffer_by_offset_le (w0, w1, w2, w3, salt_len);
w0[0] |= salt_buf0[0];
w0[1] |= salt_buf0[1];

View File

@ -5,6 +5,8 @@
#define _SHA256_
#define NEW_SIMD_CODE
#include "include/constants.h"
#include "include/kernel_vendor.h"
@ -16,9 +18,7 @@
#include "include/kernel_functions.c"
#include "OpenCL/types_ocl.c"
#include "OpenCL/common.c"
#define COMPARE_S "OpenCL/check_single_comp4.c"
#define COMPARE_M "OpenCL/check_multi_comp4.c"
#include "OpenCL/simd.c"
static void m01420m (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_len, __global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset)
{
@ -65,92 +65,126 @@ static void m01420m (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_le
const u32 pw_salt_len = pw_len + salt_len;
/**
* prepend salt
*/
u32 w0_t[4];
u32 w1_t[4];
u32 w2_t[4];
u32 w3_t[4];
w0_t[0] = swap32_S (w0[0]);
w0_t[1] = swap32_S (w0[1]);
w0_t[2] = swap32_S (w0[2]);
w0_t[3] = swap32_S (w0[3]);
w1_t[0] = swap32_S (w1[0]);
w1_t[1] = swap32_S (w1[1]);
w1_t[2] = swap32_S (w1[2]);
w1_t[3] = swap32_S (w1[3]);
w2_t[0] = swap32_S (w2[0]);
w2_t[1] = swap32_S (w2[1]);
w2_t[2] = swap32_S (w2[2]);
w2_t[3] = swap32_S (w2[3]);
w3_t[0] = swap32_S (w3[0]);
w3_t[1] = swap32_S (w3[1]);
w3_t[2] = swap32_S (w3[2]);
w3_t[3] = swap32_S (w3[3]);
switch_buffer_by_offset_le_S (w0_t, w1_t, w2_t, w3_t, salt_len);
w0_t[0] |= salt_buf0[0];
w0_t[1] |= salt_buf0[1];
w0_t[2] |= salt_buf0[2];
w0_t[3] |= salt_buf0[3];
w1_t[0] |= salt_buf1[0];
w1_t[1] |= salt_buf1[1];
w1_t[2] |= salt_buf1[2];
w1_t[3] |= salt_buf1[3];
w2_t[0] |= salt_buf2[0];
w2_t[1] |= salt_buf2[1];
w2_t[2] |= salt_buf2[2];
w2_t[3] |= salt_buf2[3];
w3_t[0] |= salt_buf3[0];
w3_t[1] |= salt_buf3[1];
w3_t[2] |= salt_buf3[2];
w3_t[3] |= salt_buf3[3];
w0_t[0] = swap32_S (w0_t[0]);
w0_t[1] = swap32_S (w0_t[1]);
w0_t[2] = swap32_S (w0_t[2]);
w0_t[3] = swap32_S (w0_t[3]);
w1_t[0] = swap32_S (w1_t[0]);
w1_t[1] = swap32_S (w1_t[1]);
w1_t[2] = swap32_S (w1_t[2]);
w1_t[3] = swap32_S (w1_t[3]);
w2_t[0] = swap32_S (w2_t[0]);
w2_t[1] = swap32_S (w2_t[1]);
w2_t[2] = swap32_S (w2_t[2]);
w2_t[3] = swap32_S (w2_t[3]);
w3_t[0] = swap32_S (w3_t[0]);
w3_t[1] = swap32_S (w3_t[1]);
w3_t[2] = swap32_S (w3_t[2]);
w3_t[3] = swap32_S (w3_t[3]);
/**
* loop
*/
u32 w0l = w0[0];
for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++)
for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos += VECT_SIZE)
{
const u32 w0r = bfs_buf[il_pos].i;
const u32x w0r = w0r_create_bft (bfs_buf, il_pos);
w0[0] = w0l | w0r;
const u32x w0lr = w0l | w0r;
/**
* prepend salt
*/
u32x wx[16];
u32 w0_t2[4];
u32 w1_t2[4];
u32 w2_t2[4];
u32 w3_t2[4];
wx[ 0] = w0_t[0];
wx[ 1] = w0_t[1];
wx[ 2] = w0_t[2];
wx[ 3] = w0_t[3];
wx[ 4] = w1_t[0];
wx[ 5] = w1_t[1];
wx[ 6] = w1_t[2];
wx[ 7] = w1_t[3];
wx[ 8] = w2_t[0];
wx[ 9] = w2_t[1];
wx[10] = w2_t[2];
wx[11] = w2_t[3];
wx[12] = w3_t[0];
wx[13] = w3_t[1];
wx[14] = w3_t[2];
wx[15] = w3_t[3];
w0_t2[0] = swap32 (w0[0]);
w0_t2[1] = swap32 (w0[1]);
w0_t2[2] = swap32 (w0[2]);
w0_t2[3] = swap32 (w0[3]);
w1_t2[0] = swap32 (w1[0]);
w1_t2[1] = swap32 (w1[1]);
w1_t2[2] = swap32 (w1[2]);
w1_t2[3] = swap32 (w1[3]);
w2_t2[0] = swap32 (w2[0]);
w2_t2[1] = swap32 (w2[1]);
w2_t2[2] = swap32 (w2[2]);
w2_t2[3] = swap32 (w2[3]);
w3_t2[0] = swap32 (w3[0]);
w3_t2[1] = swap32 (w3[1]);
w3_t2[2] = swap32 (w3[2]);
w3_t2[3] = swap32 (w3[3]);
overwrite_at_be (wx, w0lr, salt_len);
switch_buffer_by_offset (w0_t2, w1_t2, w2_t2, w3_t2, salt_len);
u32x w0_t = wx[ 0];
u32x w1_t = wx[ 1];
u32x w2_t = wx[ 2];
u32x w3_t = wx[ 3];
u32x w4_t = wx[ 4];
u32x w5_t = wx[ 5];
u32x w6_t = wx[ 6];
u32x w7_t = wx[ 7];
u32x w8_t = wx[ 8];
u32x w9_t = wx[ 9];
u32x wa_t = wx[10];
u32x wb_t = wx[11];
u32x wc_t = wx[12];
u32x wd_t = wx[13];
u32x we_t = 0;
u32x wf_t = pw_salt_len * 8;
w0_t2[0] |= salt_buf0[0];
w0_t2[1] |= salt_buf0[1];
w0_t2[2] |= salt_buf0[2];
w0_t2[3] |= salt_buf0[3];
w1_t2[0] |= salt_buf1[0];
w1_t2[1] |= salt_buf1[1];
w1_t2[2] |= salt_buf1[2];
w1_t2[3] |= salt_buf1[3];
w2_t2[0] |= salt_buf2[0];
w2_t2[1] |= salt_buf2[1];
w2_t2[2] |= salt_buf2[2];
w2_t2[3] |= salt_buf2[3];
w3_t2[0] |= salt_buf3[0];
w3_t2[1] |= salt_buf3[1];
w3_t2[2] |= salt_buf3[2];
w3_t2[3] |= salt_buf3[3];
/**
* sha256
*/
u32 w0_t = swap32 (w0_t2[0]);
u32 w1_t = swap32 (w0_t2[1]);
u32 w2_t = swap32 (w0_t2[2]);
u32 w3_t = swap32 (w0_t2[3]);
u32 w4_t = swap32 (w1_t2[0]);
u32 w5_t = swap32 (w1_t2[1]);
u32 w6_t = swap32 (w1_t2[2]);
u32 w7_t = swap32 (w1_t2[3]);
u32 w8_t = swap32 (w2_t2[0]);
u32 w9_t = swap32 (w2_t2[1]);
u32 wa_t = swap32 (w2_t2[2]);
u32 wb_t = swap32 (w2_t2[3]);
u32 wc_t = swap32 (w3_t2[0]);
u32 wd_t = swap32 (w3_t2[1]);
u32 we_t = 0;
u32 wf_t = pw_salt_len * 8;
u32 a = SHA256M_A;
u32 b = SHA256M_B;
u32 c = SHA256M_C;
u32 d = SHA256M_D;
u32 e = SHA256M_E;
u32 f = SHA256M_F;
u32 g = SHA256M_G;
u32 h = SHA256M_H;
u32x a = SHA256M_A;
u32x b = SHA256M_B;
u32x c = SHA256M_C;
u32x d = SHA256M_D;
u32x e = SHA256M_E;
u32x f = SHA256M_F;
u32x g = SHA256M_G;
u32x h = SHA256M_H;
SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C00);
SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C01);
@ -220,13 +254,7 @@ static void m01420m (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_le
we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C3e);
wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C3f);
const u32 r0 = d;
const u32 r1 = h;
const u32 r2 = c;
const u32 r3 = g;
#include COMPARE_M
COMPARE_M_SIMD (d, h, c, g);
}
}
@ -287,92 +315,126 @@ static void m01420s (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_le
const u32 pw_salt_len = pw_len + salt_len;
/**
* prepend salt
*/
u32 w0_t[4];
u32 w1_t[4];
u32 w2_t[4];
u32 w3_t[4];
w0_t[0] = swap32_S (w0[0]);
w0_t[1] = swap32_S (w0[1]);
w0_t[2] = swap32_S (w0[2]);
w0_t[3] = swap32_S (w0[3]);
w1_t[0] = swap32_S (w1[0]);
w1_t[1] = swap32_S (w1[1]);
w1_t[2] = swap32_S (w1[2]);
w1_t[3] = swap32_S (w1[3]);
w2_t[0] = swap32_S (w2[0]);
w2_t[1] = swap32_S (w2[1]);
w2_t[2] = swap32_S (w2[2]);
w2_t[3] = swap32_S (w2[3]);
w3_t[0] = swap32_S (w3[0]);
w3_t[1] = swap32_S (w3[1]);
w3_t[2] = swap32_S (w3[2]);
w3_t[3] = swap32_S (w3[3]);
switch_buffer_by_offset_le_S (w0_t, w1_t, w2_t, w3_t, salt_len);
w0_t[0] |= salt_buf0[0];
w0_t[1] |= salt_buf0[1];
w0_t[2] |= salt_buf0[2];
w0_t[3] |= salt_buf0[3];
w1_t[0] |= salt_buf1[0];
w1_t[1] |= salt_buf1[1];
w1_t[2] |= salt_buf1[2];
w1_t[3] |= salt_buf1[3];
w2_t[0] |= salt_buf2[0];
w2_t[1] |= salt_buf2[1];
w2_t[2] |= salt_buf2[2];
w2_t[3] |= salt_buf2[3];
w3_t[0] |= salt_buf3[0];
w3_t[1] |= salt_buf3[1];
w3_t[2] |= salt_buf3[2];
w3_t[3] |= salt_buf3[3];
w0_t[0] = swap32_S (w0_t[0]);
w0_t[1] = swap32_S (w0_t[1]);
w0_t[2] = swap32_S (w0_t[2]);
w0_t[3] = swap32_S (w0_t[3]);
w1_t[0] = swap32_S (w1_t[0]);
w1_t[1] = swap32_S (w1_t[1]);
w1_t[2] = swap32_S (w1_t[2]);
w1_t[3] = swap32_S (w1_t[3]);
w2_t[0] = swap32_S (w2_t[0]);
w2_t[1] = swap32_S (w2_t[1]);
w2_t[2] = swap32_S (w2_t[2]);
w2_t[3] = swap32_S (w2_t[3]);
w3_t[0] = swap32_S (w3_t[0]);
w3_t[1] = swap32_S (w3_t[1]);
w3_t[2] = swap32_S (w3_t[2]);
w3_t[3] = swap32_S (w3_t[3]);
/**
* loop
*/
u32 w0l = w0[0];
for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++)
for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos += VECT_SIZE)
{
const u32 w0r = bfs_buf[il_pos].i;
const u32x w0r = w0r_create_bft (bfs_buf, il_pos);
w0[0] = w0l | w0r;
const u32x w0lr = w0l | w0r;
/**
* prepend salt
*/
u32x wx[16];
u32 w0_t2[4];
u32 w1_t2[4];
u32 w2_t2[4];
u32 w3_t2[4];
wx[ 0] = w0_t[0];
wx[ 1] = w0_t[1];
wx[ 2] = w0_t[2];
wx[ 3] = w0_t[3];
wx[ 4] = w1_t[0];
wx[ 5] = w1_t[1];
wx[ 6] = w1_t[2];
wx[ 7] = w1_t[3];
wx[ 8] = w2_t[0];
wx[ 9] = w2_t[1];
wx[10] = w2_t[2];
wx[11] = w2_t[3];
wx[12] = w3_t[0];
wx[13] = w3_t[1];
wx[14] = w3_t[2];
wx[15] = w3_t[3];
w0_t2[0] = swap32 (w0[0]);
w0_t2[1] = swap32 (w0[1]);
w0_t2[2] = swap32 (w0[2]);
w0_t2[3] = swap32 (w0[3]);
w1_t2[0] = swap32 (w1[0]);
w1_t2[1] = swap32 (w1[1]);
w1_t2[2] = swap32 (w1[2]);
w1_t2[3] = swap32 (w1[3]);
w2_t2[0] = swap32 (w2[0]);
w2_t2[1] = swap32 (w2[1]);
w2_t2[2] = swap32 (w2[2]);
w2_t2[3] = swap32 (w2[3]);
w3_t2[0] = swap32 (w3[0]);
w3_t2[1] = swap32 (w3[1]);
w3_t2[2] = swap32 (w3[2]);
w3_t2[3] = swap32 (w3[3]);
overwrite_at_be (wx, w0lr, salt_len);
switch_buffer_by_offset (w0_t2, w1_t2, w2_t2, w3_t2, salt_len);
u32x w0_t = wx[ 0];
u32x w1_t = wx[ 1];
u32x w2_t = wx[ 2];
u32x w3_t = wx[ 3];
u32x w4_t = wx[ 4];
u32x w5_t = wx[ 5];
u32x w6_t = wx[ 6];
u32x w7_t = wx[ 7];
u32x w8_t = wx[ 8];
u32x w9_t = wx[ 9];
u32x wa_t = wx[10];
u32x wb_t = wx[11];
u32x wc_t = wx[12];
u32x wd_t = wx[13];
u32x we_t = 0;
u32x wf_t = pw_salt_len * 8;
w0_t2[0] |= salt_buf0[0];
w0_t2[1] |= salt_buf0[1];
w0_t2[2] |= salt_buf0[2];
w0_t2[3] |= salt_buf0[3];
w1_t2[0] |= salt_buf1[0];
w1_t2[1] |= salt_buf1[1];
w1_t2[2] |= salt_buf1[2];
w1_t2[3] |= salt_buf1[3];
w2_t2[0] |= salt_buf2[0];
w2_t2[1] |= salt_buf2[1];
w2_t2[2] |= salt_buf2[2];
w2_t2[3] |= salt_buf2[3];
w3_t2[0] |= salt_buf3[0];
w3_t2[1] |= salt_buf3[1];
w3_t2[2] |= salt_buf3[2];
w3_t2[3] |= salt_buf3[3];
/**
* sha256
*/
u32 w0_t = swap32 (w0_t2[0]);
u32 w1_t = swap32 (w0_t2[1]);
u32 w2_t = swap32 (w0_t2[2]);
u32 w3_t = swap32 (w0_t2[3]);
u32 w4_t = swap32 (w1_t2[0]);
u32 w5_t = swap32 (w1_t2[1]);
u32 w6_t = swap32 (w1_t2[2]);
u32 w7_t = swap32 (w1_t2[3]);
u32 w8_t = swap32 (w2_t2[0]);
u32 w9_t = swap32 (w2_t2[1]);
u32 wa_t = swap32 (w2_t2[2]);
u32 wb_t = swap32 (w2_t2[3]);
u32 wc_t = swap32 (w3_t2[0]);
u32 wd_t = swap32 (w3_t2[1]);
u32 we_t = 0;
u32 wf_t = pw_salt_len * 8;
u32 a = SHA256M_A;
u32 b = SHA256M_B;
u32 c = SHA256M_C;
u32 d = SHA256M_D;
u32 e = SHA256M_E;
u32 f = SHA256M_F;
u32 g = SHA256M_G;
u32 h = SHA256M_H;
u32x a = SHA256M_A;
u32x b = SHA256M_B;
u32x c = SHA256M_C;
u32x d = SHA256M_D;
u32x e = SHA256M_E;
u32x f = SHA256M_F;
u32x g = SHA256M_G;
u32x h = SHA256M_H;
SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C00);
SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C01);
@ -438,17 +500,14 @@ static void m01420s (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_le
wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C3a);
wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C3b);
wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C3c);
if (MATCHES_NONE_VS (d, search[0])) continue;
wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C3d);
we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C3e);
wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C3f);
const u32 r0 = d;
const u32 r1 = h;
const u32 r2 = c;
const u32 r3 = g;
#include COMPARE_S
COMPARE_S_SIMD (d, h, c, g);
}
}

View File

@ -142,7 +142,7 @@ __kernel void m01430_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
s3[2] = 0;
s3[3] = 0;
switch_buffer_by_offset (s0, s1, s2, s3, (out_len * 2));
switch_buffer_by_offset_le (s0, s1, s2, s3, (out_len * 2));
const u32 out_salt_len = (out_len * 2) + salt_len;
@ -421,7 +421,7 @@ __kernel void m01430_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
s3[2] = 0;
s3[3] = 0;
switch_buffer_by_offset (s0, s1, s2, s3, (out_len * 2));
switch_buffer_by_offset_le (s0, s1, s2, s3, (out_len * 2));
const u32 out_salt_len = (out_len * 2) + salt_len;

View File

@ -68,7 +68,7 @@ __kernel void m01430_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
if (combs_mode == COMBINATOR_MODE_BASE_RIGHT)
{
switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
switch_buffer_by_offset_le (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
}
/**
@ -125,7 +125,7 @@ __kernel void m01430_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
{
switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len);
switch_buffer_by_offset_le (wordr0, wordr1, wordr2, wordr3, pw_l_len);
}
u32 w0[4];
@ -182,7 +182,7 @@ __kernel void m01430_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
s3[2] = 0;
s3[3] = 0;
switch_buffer_by_offset (s0, s1, s2, s3, (pw_len * 2));
switch_buffer_by_offset_le (s0, s1, s2, s3, (pw_len * 2));
const u32 pw_salt_len = (pw_len * 2) + salt_len;
@ -377,7 +377,7 @@ __kernel void m01430_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
if (combs_mode == COMBINATOR_MODE_BASE_RIGHT)
{
switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
switch_buffer_by_offset_le (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
}
/**
@ -446,7 +446,7 @@ __kernel void m01430_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
{
switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len);
switch_buffer_by_offset_le (wordr0, wordr1, wordr2, wordr3, pw_l_len);
}
u32 w0[4];
@ -503,7 +503,7 @@ __kernel void m01430_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
s3[2] = 0;
s3[3] = 0;
switch_buffer_by_offset (s0, s1, s2, s3, (pw_len * 2));
switch_buffer_by_offset_le (s0, s1, s2, s3, (pw_len * 2));
const u32 pw_salt_len = (pw_len * 2) + salt_len;

View File

@ -5,6 +5,8 @@
#define _SHA256_
#define NEW_SIMD_CODE
#include "include/constants.h"
#include "include/kernel_vendor.h"
@ -16,11 +18,9 @@
#include "include/kernel_functions.c"
#include "OpenCL/types_ocl.c"
#include "OpenCL/common.c"
#include "OpenCL/simd.c"
#define COMPARE_S "OpenCL/check_single_comp4.c"
#define COMPARE_M "OpenCL/check_multi_comp4.c"
static void m01430m (u32 w[16], const u32 pw_len, __global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset)
static void m01430m (u32 w[16], const u32 pw_len, __global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset)
{
/**
* modifier
@ -61,24 +61,24 @@ static void m01430m (u32 w[16], const u32 pw_len, __global pw_t *pws, __global k
salt_buf3[2] = 0;
salt_buf3[3] = 0;
switch_buffer_by_offset (salt_buf0, salt_buf1, salt_buf2, salt_buf3, pw_len);
switch_buffer_by_offset_le_S (salt_buf0, salt_buf1, salt_buf2, salt_buf3, pw_len);
w[ 0] |= swap32 (salt_buf0[0]);
w[ 1] |= swap32 (salt_buf0[1]);
w[ 2] |= swap32 (salt_buf0[2]);
w[ 3] |= swap32 (salt_buf0[3]);
w[ 4] |= swap32 (salt_buf1[0]);
w[ 5] |= swap32 (salt_buf1[1]);
w[ 6] |= swap32 (salt_buf1[2]);
w[ 7] |= swap32 (salt_buf1[3]);
w[ 8] |= swap32 (salt_buf2[0]);
w[ 9] |= swap32 (salt_buf2[1]);
w[10] |= swap32 (salt_buf2[2]);
w[11] |= swap32 (salt_buf2[3]);
w[12] |= swap32 (salt_buf3[0]);
w[13] |= swap32 (salt_buf3[1]);
w[14] |= swap32 (salt_buf3[2]);
w[15] |= swap32 (salt_buf3[3]);
w[ 0] |= swap32_S (salt_buf0[0]);
w[ 1] |= swap32_S (salt_buf0[1]);
w[ 2] |= swap32_S (salt_buf0[2]);
w[ 3] |= swap32_S (salt_buf0[3]);
w[ 4] |= swap32_S (salt_buf1[0]);
w[ 5] |= swap32_S (salt_buf1[1]);
w[ 6] |= swap32_S (salt_buf1[2]);
w[ 7] |= swap32_S (salt_buf1[3]);
w[ 8] |= swap32_S (salt_buf2[0]);
w[ 9] |= swap32_S (salt_buf2[1]);
w[10] |= swap32_S (salt_buf2[2]);
w[11] |= swap32_S (salt_buf2[3]);
w[12] |= swap32_S (salt_buf3[0]);
w[13] |= swap32_S (salt_buf3[1]);
w[14] |= swap32_S (salt_buf3[2]);
w[15] |= swap32_S (salt_buf3[3]);
const u32 salt_len = salt_bufs[salt_pos].salt_len;
@ -92,37 +92,37 @@ static void m01430m (u32 w[16], const u32 pw_len, __global pw_t *pws, __global k
u32 w0l = w[0];
for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++)
for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos += VECT_SIZE)
{
const u32 w0r = words_buf_r[il_pos];
const u32x w0r = words_buf_r[il_pos / VECT_SIZE];
const u32 w0 = w0l | w0r;
const u32x w0 = w0l | w0r;
u32 w0_t = w0;
u32 w1_t = w[ 1];
u32 w2_t = w[ 2];
u32 w3_t = w[ 3];
u32 w4_t = w[ 4];
u32 w5_t = w[ 5];
u32 w6_t = w[ 6];
u32 w7_t = w[ 7];
u32 w8_t = w[ 8];
u32 w9_t = w[ 9];
u32 wa_t = w[10];
u32 wb_t = w[11];
u32 wc_t = w[12];
u32 wd_t = w[13];
u32 we_t = w[14];
u32 wf_t = w[15];
u32x w0_t = w0;
u32x w1_t = w[ 1];
u32x w2_t = w[ 2];
u32x w3_t = w[ 3];
u32x w4_t = w[ 4];
u32x w5_t = w[ 5];
u32x w6_t = w[ 6];
u32x w7_t = w[ 7];
u32x w8_t = w[ 8];
u32x w9_t = w[ 9];
u32x wa_t = w[10];
u32x wb_t = w[11];
u32x wc_t = w[12];
u32x wd_t = w[13];
u32x we_t = w[14];
u32x wf_t = w[15];
u32 a = SHA256M_A;
u32 b = SHA256M_B;
u32 c = SHA256M_C;
u32 d = SHA256M_D;
u32 e = SHA256M_E;
u32 f = SHA256M_F;
u32 g = SHA256M_G;
u32 h = SHA256M_H;
u32x a = SHA256M_A;
u32x b = SHA256M_B;
u32x c = SHA256M_C;
u32x d = SHA256M_D;
u32x e = SHA256M_E;
u32x f = SHA256M_F;
u32x g = SHA256M_G;
u32x h = SHA256M_H;
SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C00);
SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C01);
@ -192,17 +192,11 @@ static void m01430m (u32 w[16], const u32 pw_len, __global pw_t *pws, __global k
we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C3e);
wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C3f);
const u32 r0 = d;
const u32 r1 = h;
const u32 r2 = c;
const u32 r3 = g;
#include COMPARE_M
COMPARE_M_SIMD (d, h, c, g);
}
}
static void m01430s (u32 w[16], const u32 pw_len, __global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset)
static void m01430s (u32 w[16], const u32 pw_len, __global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset)
{
/**
* modifier
@ -229,37 +223,37 @@ static void m01430s (u32 w[16], const u32 pw_len, __global pw_t *pws, __global k
u32 w0l = w[0];
for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++)
for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos += VECT_SIZE)
{
const u32 w0r = words_buf_r[il_pos];
const u32x w0r = words_buf_r[il_pos / VECT_SIZE];
const u32 w0 = w0l | w0r;
const u32x w0 = w0l | w0r;
u32 w0_t = w0;
u32 w1_t = w[ 1];
u32 w2_t = w[ 2];
u32 w3_t = w[ 3];
u32 w4_t = w[ 4];
u32 w5_t = w[ 5];
u32 w6_t = w[ 6];
u32 w7_t = w[ 7];
u32 w8_t = w[ 8];
u32 w9_t = w[ 9];
u32 wa_t = w[10];
u32 wb_t = w[11];
u32 wc_t = w[12];
u32 wd_t = w[13];
u32 we_t = w[14];
u32 wf_t = w[15];
u32x w0_t = w0;
u32x w1_t = w[ 1];
u32x w2_t = w[ 2];
u32x w3_t = w[ 3];
u32x w4_t = w[ 4];
u32x w5_t = w[ 5];
u32x w6_t = w[ 6];
u32x w7_t = w[ 7];
u32x w8_t = w[ 8];
u32x w9_t = w[ 9];
u32x wa_t = w[10];
u32x wb_t = w[11];
u32x wc_t = w[12];
u32x wd_t = w[13];
u32x we_t = w[14];
u32x wf_t = w[15];
u32 a = SHA256M_A;
u32 b = SHA256M_B;
u32 c = SHA256M_C;
u32 d = SHA256M_D;
u32 e = SHA256M_E;
u32 f = SHA256M_F;
u32 g = SHA256M_G;
u32 h = SHA256M_H;
u32x a = SHA256M_A;
u32x b = SHA256M_B;
u32x c = SHA256M_C;
u32x d = SHA256M_D;
u32x e = SHA256M_E;
u32x f = SHA256M_F;
u32x g = SHA256M_G;
u32x h = SHA256M_H;
SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C00);
SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C01);
@ -325,21 +319,18 @@ static void m01430s (u32 w[16], const u32 pw_len, __global pw_t *pws, __global k
wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C3a);
wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C3b);
wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C3c);
if (MATCHES_NONE_VS (d, search[0])) continue;
wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C3d);
we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C3e);
wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C3f);
const u32 r0 = d;
const u32 r1 = h;
const u32 r2 = c;
const u32 r3 = g;
#include COMPARE_S
COMPARE_S_SIMD (d, h, c, g);
}
}
__kernel void m01430_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
__kernel void m01430_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
{
/**
* base
@ -377,7 +368,7 @@ __kernel void m01430_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
m01430m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset);
}
__kernel void m01430_m08 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
__kernel void m01430_m08 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
{
/**
* base
@ -415,7 +406,7 @@ __kernel void m01430_m08 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
m01430m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset);
}
__kernel void m01430_m16 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
__kernel void m01430_m16 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
{
/**
* base
@ -453,7 +444,7 @@ __kernel void m01430_m16 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
m01430m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset);
}
__kernel void m01430_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
__kernel void m01430_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
{
/**
* base
@ -491,7 +482,7 @@ __kernel void m01430_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
m01430s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset);
}
__kernel void m01430_s08 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
__kernel void m01430_s08 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
{
/**
* base
@ -529,7 +520,7 @@ __kernel void m01430_s08 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
m01430s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset);
}
__kernel void m01430_s16 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
__kernel void m01430_s16 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
{
/**
* base

View File

@ -118,7 +118,7 @@ __kernel void m01440_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
make_unicode (w0, w0_t2, w1_t2);
make_unicode (w1, w2_t2, w3_t2);
switch_buffer_by_offset (w0_t2, w1_t2, w2_t2, w3_t2, salt_len);
switch_buffer_by_offset_le (w0_t2, w1_t2, w2_t2, w3_t2, salt_len);
w0_t2[0] |= salt_buf0[0];
w0_t2[1] |= salt_buf0[1];
@ -355,7 +355,7 @@ __kernel void m01440_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
make_unicode (w0, w0_t2, w1_t2);
make_unicode (w1, w2_t2, w3_t2);
switch_buffer_by_offset (w0_t2, w1_t2, w2_t2, w3_t2, salt_len);
switch_buffer_by_offset_le (w0_t2, w1_t2, w2_t2, w3_t2, salt_len);
w0_t2[0] |= salt_buf0[0];
w0_t2[1] |= salt_buf0[1];

View File

@ -68,7 +68,7 @@ __kernel void m01440_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
if (combs_mode == COMBINATOR_MODE_BASE_RIGHT)
{
switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
switch_buffer_by_offset_le (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
}
/**
@ -125,7 +125,7 @@ __kernel void m01440_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
{
switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len);
switch_buffer_by_offset_le (wordr0, wordr1, wordr2, wordr3, pw_l_len);
}
u32 w0[4];
@ -164,7 +164,7 @@ __kernel void m01440_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
make_unicode (w0, w0_t2, w1_t2);
make_unicode (w1, w2_t2, w3_t2);
switch_buffer_by_offset (w0_t2, w1_t2, w2_t2, w3_t2, salt_len);
switch_buffer_by_offset_le (w0_t2, w1_t2, w2_t2, w3_t2, salt_len);
w0_t2[0] |= salt_buf0[0];
w0_t2[1] |= salt_buf0[1];
@ -341,7 +341,7 @@ __kernel void m01440_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
if (combs_mode == COMBINATOR_MODE_BASE_RIGHT)
{
switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
switch_buffer_by_offset_le (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
}
/**
@ -410,7 +410,7 @@ __kernel void m01440_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
{
switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len);
switch_buffer_by_offset_le (wordr0, wordr1, wordr2, wordr3, pw_l_len);
}
u32 w0[4];
@ -449,7 +449,7 @@ __kernel void m01440_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
make_unicode (w0, w0_t2, w1_t2);
make_unicode (w1, w2_t2, w3_t2);
switch_buffer_by_offset (w0_t2, w1_t2, w2_t2, w3_t2, salt_len);
switch_buffer_by_offset_le (w0_t2, w1_t2, w2_t2, w3_t2, salt_len);
w0_t2[0] |= salt_buf0[0];
w0_t2[1] |= salt_buf0[1];

View File

@ -5,6 +5,8 @@
#define _SHA256_
#define NEW_SIMD_CODE
#include "include/constants.h"
#include "include/kernel_vendor.h"
@ -16,9 +18,7 @@
#include "include/kernel_functions.c"
#include "OpenCL/types_ocl.c"
#include "OpenCL/common.c"
#define COMPARE_S "OpenCL/check_single_comp4.c"
#define COMPARE_M "OpenCL/check_multi_comp4.c"
#include "OpenCL/simd.c"
static void m01440m (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_len, __global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset)
{
@ -65,92 +65,126 @@ static void m01440m (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_le
const u32 pw_salt_len = pw_len + salt_len;
/**
* prepend salt
*/
u32 w0_t[4];
u32 w1_t[4];
u32 w2_t[4];
u32 w3_t[4];
w0_t[0] = swap32_S (w0[0]);
w0_t[1] = swap32_S (w0[1]);
w0_t[2] = swap32_S (w0[2]);
w0_t[3] = swap32_S (w0[3]);
w1_t[0] = swap32_S (w1[0]);
w1_t[1] = swap32_S (w1[1]);
w1_t[2] = swap32_S (w1[2]);
w1_t[3] = swap32_S (w1[3]);
w2_t[0] = swap32_S (w2[0]);
w2_t[1] = swap32_S (w2[1]);
w2_t[2] = swap32_S (w2[2]);
w2_t[3] = swap32_S (w2[3]);
w3_t[0] = swap32_S (w3[0]);
w3_t[1] = swap32_S (w3[1]);
w3_t[2] = swap32_S (w3[2]);
w3_t[3] = swap32_S (w3[3]);
switch_buffer_by_offset_le_S (w0_t, w1_t, w2_t, w3_t, salt_len);
w0_t[0] |= salt_buf0[0];
w0_t[1] |= salt_buf0[1];
w0_t[2] |= salt_buf0[2];
w0_t[3] |= salt_buf0[3];
w1_t[0] |= salt_buf1[0];
w1_t[1] |= salt_buf1[1];
w1_t[2] |= salt_buf1[2];
w1_t[3] |= salt_buf1[3];
w2_t[0] |= salt_buf2[0];
w2_t[1] |= salt_buf2[1];
w2_t[2] |= salt_buf2[2];
w2_t[3] |= salt_buf2[3];
w3_t[0] |= salt_buf3[0];
w3_t[1] |= salt_buf3[1];
w3_t[2] |= salt_buf3[2];
w3_t[3] |= salt_buf3[3];
w0_t[0] = swap32_S (w0_t[0]);
w0_t[1] = swap32_S (w0_t[1]);
w0_t[2] = swap32_S (w0_t[2]);
w0_t[3] = swap32_S (w0_t[3]);
w1_t[0] = swap32_S (w1_t[0]);
w1_t[1] = swap32_S (w1_t[1]);
w1_t[2] = swap32_S (w1_t[2]);
w1_t[3] = swap32_S (w1_t[3]);
w2_t[0] = swap32_S (w2_t[0]);
w2_t[1] = swap32_S (w2_t[1]);
w2_t[2] = swap32_S (w2_t[2]);
w2_t[3] = swap32_S (w2_t[3]);
w3_t[0] = swap32_S (w3_t[0]);
w3_t[1] = swap32_S (w3_t[1]);
w3_t[2] = swap32_S (w3_t[2]);
w3_t[3] = swap32_S (w3_t[3]);
/**
* loop
*/
u32 w0l = w0[0];
for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++)
for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos += VECT_SIZE)
{
const u32 w0r = bfs_buf[il_pos].i;
const u32x w0r = w0r_create_bft (bfs_buf, il_pos);
w0[0] = w0l | w0r;
const u32x w0lr = w0l | w0r;
/**
* prepend salt
*/
u32x wx[16];
u32 w0_t2[4];
u32 w1_t2[4];
u32 w2_t2[4];
u32 w3_t2[4];
wx[ 0] = w0_t[0];
wx[ 1] = w0_t[1];
wx[ 2] = w0_t[2];
wx[ 3] = w0_t[3];
wx[ 4] = w1_t[0];
wx[ 5] = w1_t[1];
wx[ 6] = w1_t[2];
wx[ 7] = w1_t[3];
wx[ 8] = w2_t[0];
wx[ 9] = w2_t[1];
wx[10] = w2_t[2];
wx[11] = w2_t[3];
wx[12] = w3_t[0];
wx[13] = w3_t[1];
wx[14] = w3_t[2];
wx[15] = w3_t[3];
w0_t2[0] = swap32 (w0[0]);
w0_t2[1] = swap32 (w0[1]);
w0_t2[2] = swap32 (w0[2]);
w0_t2[3] = swap32 (w0[3]);
w1_t2[0] = swap32 (w1[0]);
w1_t2[1] = swap32 (w1[1]);
w1_t2[2] = swap32 (w1[2]);
w1_t2[3] = swap32 (w1[3]);
w2_t2[0] = swap32 (w2[0]);
w2_t2[1] = swap32 (w2[1]);
w2_t2[2] = swap32 (w2[2]);
w2_t2[3] = swap32 (w2[3]);
w3_t2[0] = swap32 (w3[0]);
w3_t2[1] = swap32 (w3[1]);
w3_t2[2] = swap32 (w3[2]);
w3_t2[3] = swap32 (w3[3]);
overwrite_at_be (wx, w0lr, salt_len);
switch_buffer_by_offset (w0_t2, w1_t2, w2_t2, w3_t2, salt_len);
u32x w0_t = wx[ 0];
u32x w1_t = wx[ 1];
u32x w2_t = wx[ 2];
u32x w3_t = wx[ 3];
u32x w4_t = wx[ 4];
u32x w5_t = wx[ 5];
u32x w6_t = wx[ 6];
u32x w7_t = wx[ 7];
u32x w8_t = wx[ 8];
u32x w9_t = wx[ 9];
u32x wa_t = wx[10];
u32x wb_t = wx[11];
u32x wc_t = wx[12];
u32x wd_t = wx[13];
u32x we_t = 0;
u32x wf_t = pw_salt_len * 8;
w0_t2[0] |= salt_buf0[0];
w0_t2[1] |= salt_buf0[1];
w0_t2[2] |= salt_buf0[2];
w0_t2[3] |= salt_buf0[3];
w1_t2[0] |= salt_buf1[0];
w1_t2[1] |= salt_buf1[1];
w1_t2[2] |= salt_buf1[2];
w1_t2[3] |= salt_buf1[3];
w2_t2[0] |= salt_buf2[0];
w2_t2[1] |= salt_buf2[1];
w2_t2[2] |= salt_buf2[2];
w2_t2[3] |= salt_buf2[3];
w3_t2[0] |= salt_buf3[0];
w3_t2[1] |= salt_buf3[1];
w3_t2[2] |= salt_buf3[2];
w3_t2[3] |= salt_buf3[3];
/**
* sha256
*/
u32 w0_t = swap32 (w0_t2[0]);
u32 w1_t = swap32 (w0_t2[1]);
u32 w2_t = swap32 (w0_t2[2]);
u32 w3_t = swap32 (w0_t2[3]);
u32 w4_t = swap32 (w1_t2[0]);
u32 w5_t = swap32 (w1_t2[1]);
u32 w6_t = swap32 (w1_t2[2]);
u32 w7_t = swap32 (w1_t2[3]);
u32 w8_t = swap32 (w2_t2[0]);
u32 w9_t = swap32 (w2_t2[1]);
u32 wa_t = swap32 (w2_t2[2]);
u32 wb_t = swap32 (w2_t2[3]);
u32 wc_t = swap32 (w3_t2[0]);
u32 wd_t = swap32 (w3_t2[1]);
u32 we_t = 0;
u32 wf_t = pw_salt_len * 8;
u32 a = SHA256M_A;
u32 b = SHA256M_B;
u32 c = SHA256M_C;
u32 d = SHA256M_D;
u32 e = SHA256M_E;
u32 f = SHA256M_F;
u32 g = SHA256M_G;
u32 h = SHA256M_H;
u32x a = SHA256M_A;
u32x b = SHA256M_B;
u32x c = SHA256M_C;
u32x d = SHA256M_D;
u32x e = SHA256M_E;
u32x f = SHA256M_F;
u32x g = SHA256M_G;
u32x h = SHA256M_H;
SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C00);
SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C01);
@ -220,13 +254,7 @@ static void m01440m (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_le
we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C3e);
wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C3f);
const u32 r0 = d;
const u32 r1 = h;
const u32 r2 = c;
const u32 r3 = g;
#include COMPARE_M
COMPARE_M_SIMD (d, h, c, g);
}
}
@ -287,92 +315,126 @@ static void m01440s (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_le
const u32 pw_salt_len = pw_len + salt_len;
/**
* prepend salt
*/
u32 w0_t[4];
u32 w1_t[4];
u32 w2_t[4];
u32 w3_t[4];
w0_t[0] = swap32_S (w0[0]);
w0_t[1] = swap32_S (w0[1]);
w0_t[2] = swap32_S (w0[2]);
w0_t[3] = swap32_S (w0[3]);
w1_t[0] = swap32_S (w1[0]);
w1_t[1] = swap32_S (w1[1]);
w1_t[2] = swap32_S (w1[2]);
w1_t[3] = swap32_S (w1[3]);
w2_t[0] = swap32_S (w2[0]);
w2_t[1] = swap32_S (w2[1]);
w2_t[2] = swap32_S (w2[2]);
w2_t[3] = swap32_S (w2[3]);
w3_t[0] = swap32_S (w3[0]);
w3_t[1] = swap32_S (w3[1]);
w3_t[2] = swap32_S (w3[2]);
w3_t[3] = swap32_S (w3[3]);
switch_buffer_by_offset_le_S (w0_t, w1_t, w2_t, w3_t, salt_len);
w0_t[0] |= salt_buf0[0];
w0_t[1] |= salt_buf0[1];
w0_t[2] |= salt_buf0[2];
w0_t[3] |= salt_buf0[3];
w1_t[0] |= salt_buf1[0];
w1_t[1] |= salt_buf1[1];
w1_t[2] |= salt_buf1[2];
w1_t[3] |= salt_buf1[3];
w2_t[0] |= salt_buf2[0];
w2_t[1] |= salt_buf2[1];
w2_t[2] |= salt_buf2[2];
w2_t[3] |= salt_buf2[3];
w3_t[0] |= salt_buf3[0];
w3_t[1] |= salt_buf3[1];
w3_t[2] |= salt_buf3[2];
w3_t[3] |= salt_buf3[3];
w0_t[0] = swap32_S (w0_t[0]);
w0_t[1] = swap32_S (w0_t[1]);
w0_t[2] = swap32_S (w0_t[2]);
w0_t[3] = swap32_S (w0_t[3]);
w1_t[0] = swap32_S (w1_t[0]);
w1_t[1] = swap32_S (w1_t[1]);
w1_t[2] = swap32_S (w1_t[2]);
w1_t[3] = swap32_S (w1_t[3]);
w2_t[0] = swap32_S (w2_t[0]);
w2_t[1] = swap32_S (w2_t[1]);
w2_t[2] = swap32_S (w2_t[2]);
w2_t[3] = swap32_S (w2_t[3]);
w3_t[0] = swap32_S (w3_t[0]);
w3_t[1] = swap32_S (w3_t[1]);
w3_t[2] = swap32_S (w3_t[2]);
w3_t[3] = swap32_S (w3_t[3]);
/**
* loop
*/
u32 w0l = w0[0];
for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++)
for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos += VECT_SIZE)
{
const u32 w0r = bfs_buf[il_pos].i;
const u32x w0r = w0r_create_bft (bfs_buf, il_pos);
w0[0] = w0l | w0r;
const u32x w0lr = w0l | w0r;
/**
* prepend salt
*/
u32x wx[16];
u32 w0_t2[4];
u32 w1_t2[4];
u32 w2_t2[4];
u32 w3_t2[4];
wx[ 0] = w0_t[0];
wx[ 1] = w0_t[1];
wx[ 2] = w0_t[2];
wx[ 3] = w0_t[3];
wx[ 4] = w1_t[0];
wx[ 5] = w1_t[1];
wx[ 6] = w1_t[2];
wx[ 7] = w1_t[3];
wx[ 8] = w2_t[0];
wx[ 9] = w2_t[1];
wx[10] = w2_t[2];
wx[11] = w2_t[3];
wx[12] = w3_t[0];
wx[13] = w3_t[1];
wx[14] = w3_t[2];
wx[15] = w3_t[3];
w0_t2[0] = swap32 (w0[0]);
w0_t2[1] = swap32 (w0[1]);
w0_t2[2] = swap32 (w0[2]);
w0_t2[3] = swap32 (w0[3]);
w1_t2[0] = swap32 (w1[0]);
w1_t2[1] = swap32 (w1[1]);
w1_t2[2] = swap32 (w1[2]);
w1_t2[3] = swap32 (w1[3]);
w2_t2[0] = swap32 (w2[0]);
w2_t2[1] = swap32 (w2[1]);
w2_t2[2] = swap32 (w2[2]);
w2_t2[3] = swap32 (w2[3]);
w3_t2[0] = swap32 (w3[0]);
w3_t2[1] = swap32 (w3[1]);
w3_t2[2] = swap32 (w3[2]);
w3_t2[3] = swap32 (w3[3]);
overwrite_at_be (wx, w0lr, salt_len);
switch_buffer_by_offset (w0_t2, w1_t2, w2_t2, w3_t2, salt_len);
u32x w0_t = wx[ 0];
u32x w1_t = wx[ 1];
u32x w2_t = wx[ 2];
u32x w3_t = wx[ 3];
u32x w4_t = wx[ 4];
u32x w5_t = wx[ 5];
u32x w6_t = wx[ 6];
u32x w7_t = wx[ 7];
u32x w8_t = wx[ 8];
u32x w9_t = wx[ 9];
u32x wa_t = wx[10];
u32x wb_t = wx[11];
u32x wc_t = wx[12];
u32x wd_t = wx[13];
u32x we_t = 0;
u32x wf_t = pw_salt_len * 8;
w0_t2[0] |= salt_buf0[0];
w0_t2[1] |= salt_buf0[1];
w0_t2[2] |= salt_buf0[2];
w0_t2[3] |= salt_buf0[3];
w1_t2[0] |= salt_buf1[0];
w1_t2[1] |= salt_buf1[1];
w1_t2[2] |= salt_buf1[2];
w1_t2[3] |= salt_buf1[3];
w2_t2[0] |= salt_buf2[0];
w2_t2[1] |= salt_buf2[1];
w2_t2[2] |= salt_buf2[2];
w2_t2[3] |= salt_buf2[3];
w3_t2[0] |= salt_buf3[0];
w3_t2[1] |= salt_buf3[1];
w3_t2[2] |= salt_buf3[2];
w3_t2[3] |= salt_buf3[3];
/**
* sha256
*/
u32 w0_t = swap32 (w0_t2[0]);
u32 w1_t = swap32 (w0_t2[1]);
u32 w2_t = swap32 (w0_t2[2]);
u32 w3_t = swap32 (w0_t2[3]);
u32 w4_t = swap32 (w1_t2[0]);
u32 w5_t = swap32 (w1_t2[1]);
u32 w6_t = swap32 (w1_t2[2]);
u32 w7_t = swap32 (w1_t2[3]);
u32 w8_t = swap32 (w2_t2[0]);
u32 w9_t = swap32 (w2_t2[1]);
u32 wa_t = swap32 (w2_t2[2]);
u32 wb_t = swap32 (w2_t2[3]);
u32 wc_t = swap32 (w3_t2[0]);
u32 wd_t = swap32 (w3_t2[1]);
u32 we_t = 0;
u32 wf_t = pw_salt_len * 8;
u32 a = SHA256M_A;
u32 b = SHA256M_B;
u32 c = SHA256M_C;
u32 d = SHA256M_D;
u32 e = SHA256M_E;
u32 f = SHA256M_F;
u32 g = SHA256M_G;
u32 h = SHA256M_H;
u32x a = SHA256M_A;
u32x b = SHA256M_B;
u32x c = SHA256M_C;
u32x d = SHA256M_D;
u32x e = SHA256M_E;
u32x f = SHA256M_F;
u32x g = SHA256M_G;
u32x h = SHA256M_H;
SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C00);
SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C01);
@ -438,17 +500,14 @@ static void m01440s (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_le
wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C3a);
wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C3b);
wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C3c);
if (MATCHES_NONE_VS (d, search[0])) continue;
wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C3d);
we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C3e);
wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C3f);
const u32 r0 = d;
const u32 r1 = h;
const u32 r2 = c;
const u32 r3 = g;
#include COMPARE_S
COMPARE_S_SIMD (d, h, c, g);
}
}

View File

@ -275,7 +275,7 @@ __kernel void m01450_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
if (combs_mode == COMBINATOR_MODE_BASE_RIGHT)
{
switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
switch_buffer_by_offset_le (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
}
/**
@ -338,7 +338,7 @@ __kernel void m01450_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
{
switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len);
switch_buffer_by_offset_le (wordr0, wordr1, wordr2, wordr3, pw_l_len);
}
u32 w0[4];
@ -492,7 +492,7 @@ __kernel void m01450_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
if (combs_mode == COMBINATOR_MODE_BASE_RIGHT)
{
switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
switch_buffer_by_offset_le (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
}
/**
@ -567,7 +567,7 @@ __kernel void m01450_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
{
switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len);
switch_buffer_by_offset_le (wordr0, wordr1, wordr2, wordr3, pw_l_len);
}
u32 w0[4];

View File

@ -5,6 +5,8 @@
#define _SHA256_
#define NEW_SIMD_CODE
#include "include/constants.h"
#include "include/kernel_vendor.h"
@ -16,9 +18,7 @@
#include "include/kernel_functions.c"
#include "OpenCL/types_ocl.c"
#include "OpenCL/common.c"
#define COMPARE_S "OpenCL/check_single_comp4.c"
#define COMPARE_M "OpenCL/check_multi_comp4.c"
#include "OpenCL/simd.c"
__constant u32 k_sha256[64] =
{
@ -40,33 +40,33 @@ __constant u32 k_sha256[64] =
SHA256C3c, SHA256C3d, SHA256C3e, SHA256C3f,
};
static void sha256_transform (const u32 w0[4], const u32 w1[4], const u32 w2[4], const u32 w3[4], u32 digest[8])
static void sha256_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u32x digest[8])
{
u32 a = digest[0];
u32 b = digest[1];
u32 c = digest[2];
u32 d = digest[3];
u32 e = digest[4];
u32 f = digest[5];
u32 g = digest[6];
u32 h = digest[7];
u32x a = digest[0];
u32x b = digest[1];
u32x c = digest[2];
u32x d = digest[3];
u32x e = digest[4];
u32x f = digest[5];
u32x g = digest[6];
u32x h = digest[7];
u32 w0_t = w0[0];
u32 w1_t = w0[1];
u32 w2_t = w0[2];
u32 w3_t = w0[3];
u32 w4_t = w1[0];
u32 w5_t = w1[1];
u32 w6_t = w1[2];
u32 w7_t = w1[3];
u32 w8_t = w2[0];
u32 w9_t = w2[1];
u32 wa_t = w2[2];
u32 wb_t = w2[3];
u32 wc_t = w3[0];
u32 wd_t = w3[1];
u32 we_t = w3[2];
u32 wf_t = w3[3];
u32x w0_t = w0[0];
u32x w1_t = w0[1];
u32x w2_t = w0[2];
u32x w3_t = w0[3];
u32x w4_t = w1[0];
u32x w5_t = w1[1];
u32x w6_t = w1[2];
u32x w7_t = w1[3];
u32x w8_t = w2[0];
u32x w9_t = w2[1];
u32x wa_t = w2[2];
u32x wb_t = w2[3];
u32x wc_t = w3[0];
u32x wd_t = w3[1];
u32x we_t = w3[2];
u32x wf_t = w3[3];
#define ROUND_EXPAND() \
{ \
@ -126,7 +126,7 @@ static void sha256_transform (const u32 w0[4], const u32 w1[4], const u32 w2[4],
digest[7] += h;
}
static void hmac_sha256_pad (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], u32 ipad[8], u32 opad[8])
static void hmac_sha256_pad (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x ipad[8], u32x opad[8])
{
w0[0] = w0[0] ^ 0x36363636;
w0[1] = w0[1] ^ 0x36363636;
@ -185,7 +185,7 @@ static void hmac_sha256_pad (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], u32 ipa
sha256_transform (w0, w1, w2, w3, opad);
}
static void hmac_sha256_run (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], u32 ipad[8], u32 opad[8], u32 digest[8])
static void hmac_sha256_run (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x ipad[8], u32x opad[8], u32x digest[8])
{
digest[0] = ipad[0];
digest[1] = ipad[1];
@ -262,46 +262,46 @@ static void m01450m (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_le
u32 w0l = w0[0];
for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++)
for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos += VECT_SIZE)
{
const u32 w0r = bfs_buf[il_pos].i;
const u32x w0r = w0r_create_bft (bfs_buf, il_pos);
w0[0] = w0l | w0r;
const u32x w0lr = w0l | w0r;
/**
* pads
*/
u32 w0_t[4];
u32x w0_t[4];
w0_t[0] = w0[0];
w0_t[0] = w0lr;
w0_t[1] = w0[1];
w0_t[2] = w0[2];
w0_t[3] = w0[3];
u32 w1_t[4];
u32x w1_t[4];
w1_t[0] = w1[0];
w1_t[1] = w1[1];
w1_t[2] = w1[2];
w1_t[3] = w1[3];
u32 w2_t[4];
u32x w2_t[4];
w2_t[0] = w2[0];
w2_t[1] = w2[1];
w2_t[2] = w2[2];
w2_t[3] = w2[3];
u32 w3_t[4];
u32x w3_t[4];
w3_t[0] = w3[0];
w3_t[1] = w3[1];
w3_t[2] = 0;
w3_t[3] = 0;
u32 ipad[8];
u32 opad[8];
u32x ipad[8];
u32x opad[8];
hmac_sha256_pad (w0_t, w1_t, w2_t, w3_t, ipad, opad);
@ -322,16 +322,11 @@ static void m01450m (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_le
w3_t[2] = 0;
w3_t[3] = (64 + salt_len) * 8;
u32 digest[8];
u32x digest[8];
hmac_sha256_run (w0_t, w1_t, w2_t, w3_t, ipad, opad, digest);
const u32 r0 = digest[3];
const u32 r1 = digest[7];
const u32 r2 = digest[2];
const u32 r3 = digest[6];
#include COMPARE_M
COMPARE_M_SIMD (digest[3], digest[7], digest[2], digest[6]);
}
}
@ -382,46 +377,46 @@ static void m01450s (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_le
u32 w0l = w0[0];
for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++)
for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos += VECT_SIZE)
{
const u32 w0r = bfs_buf[il_pos].i;
const u32x w0r = w0r_create_bft (bfs_buf, il_pos);
w0[0] = w0l | w0r;
const u32x w0lr = w0l | w0r;
/**
* pads
*/
u32 w0_t[4];
u32x w0_t[4];
w0_t[0] = w0[0];
w0_t[0] = w0lr;
w0_t[1] = w0[1];
w0_t[2] = w0[2];
w0_t[3] = w0[3];
u32 w1_t[4];
u32x w1_t[4];
w1_t[0] = w1[0];
w1_t[1] = w1[1];
w1_t[2] = w1[2];
w1_t[3] = w1[3];
u32 w2_t[4];
u32x w2_t[4];
w2_t[0] = w2[0];
w2_t[1] = w2[1];
w2_t[2] = w2[2];
w2_t[3] = w2[3];
u32 w3_t[4];
u32x w3_t[4];
w3_t[0] = w3[0];
w3_t[1] = w3[1];
w3_t[2] = 0;
w3_t[3] = 0;
u32 ipad[8];
u32 opad[8];
u32x ipad[8];
u32x opad[8];
hmac_sha256_pad (w0_t, w1_t, w2_t, w3_t, ipad, opad);
@ -442,16 +437,11 @@ static void m01450s (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_le
w3_t[2] = 0;
w3_t[3] = (64 + salt_len) * 8;
u32 digest[8];
u32x digest[8];
hmac_sha256_run (w0_t, w1_t, w2_t, w3_t, ipad, opad, digest);
const u32 r0 = digest[3];
const u32 r1 = digest[7];
const u32 r2 = digest[2];
const u32 r3 = digest[6];
#include COMPARE_S
COMPARE_S_SIMD (digest[3], digest[7], digest[2], digest[6]);
}
}

View File

@ -275,7 +275,7 @@ __kernel void m01460_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
if (combs_mode == COMBINATOR_MODE_BASE_RIGHT)
{
switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
switch_buffer_by_offset_le (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
}
/**
@ -373,7 +373,7 @@ __kernel void m01460_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
{
switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len);
switch_buffer_by_offset_le (wordr0, wordr1, wordr2, wordr3, pw_l_len);
}
u32 w0[4];
@ -492,7 +492,7 @@ __kernel void m01460_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
if (combs_mode == COMBINATOR_MODE_BASE_RIGHT)
{
switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
switch_buffer_by_offset_le (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
}
/**
@ -602,7 +602,7 @@ __kernel void m01460_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
{
switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len);
switch_buffer_by_offset_le (wordr0, wordr1, wordr2, wordr3, pw_l_len);
}
u32 w0[4];

View File

@ -5,6 +5,8 @@
#define _SHA256_
#define NEW_SIMD_CODE
#include "include/constants.h"
#include "include/kernel_vendor.h"
@ -16,9 +18,7 @@
#include "include/kernel_functions.c"
#include "OpenCL/types_ocl.c"
#include "OpenCL/common.c"
#define COMPARE_S "OpenCL/check_single_comp4.c"
#define COMPARE_M "OpenCL/check_multi_comp4.c"
#include "OpenCL/simd.c"
__constant u32 k_sha256[64] =
{
@ -40,33 +40,33 @@ __constant u32 k_sha256[64] =
SHA256C3c, SHA256C3d, SHA256C3e, SHA256C3f,
};
static void sha256_transform (const u32 w0[4], const u32 w1[4], const u32 w2[4], const u32 w3[4], u32 digest[8])
static void sha256_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u32x digest[8])
{
u32 a = digest[0];
u32 b = digest[1];
u32 c = digest[2];
u32 d = digest[3];
u32 e = digest[4];
u32 f = digest[5];
u32 g = digest[6];
u32 h = digest[7];
u32x a = digest[0];
u32x b = digest[1];
u32x c = digest[2];
u32x d = digest[3];
u32x e = digest[4];
u32x f = digest[5];
u32x g = digest[6];
u32x h = digest[7];
u32 w0_t = w0[0];
u32 w1_t = w0[1];
u32 w2_t = w0[2];
u32 w3_t = w0[3];
u32 w4_t = w1[0];
u32 w5_t = w1[1];
u32 w6_t = w1[2];
u32 w7_t = w1[3];
u32 w8_t = w2[0];
u32 w9_t = w2[1];
u32 wa_t = w2[2];
u32 wb_t = w2[3];
u32 wc_t = w3[0];
u32 wd_t = w3[1];
u32 we_t = w3[2];
u32 wf_t = w3[3];
u32x w0_t = w0[0];
u32x w1_t = w0[1];
u32x w2_t = w0[2];
u32x w3_t = w0[3];
u32x w4_t = w1[0];
u32x w5_t = w1[1];
u32x w6_t = w1[2];
u32x w7_t = w1[3];
u32x w8_t = w2[0];
u32x w9_t = w2[1];
u32x wa_t = w2[2];
u32x wb_t = w2[3];
u32x wc_t = w3[0];
u32x wd_t = w3[1];
u32x we_t = w3[2];
u32x wf_t = w3[3];
#define ROUND_EXPAND() \
{ \
@ -126,7 +126,7 @@ static void sha256_transform (const u32 w0[4], const u32 w1[4], const u32 w2[4],
digest[7] += h;
}
static void hmac_sha256_pad (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], u32 ipad[8], u32 opad[8])
static void hmac_sha256_pad (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x ipad[8], u32x opad[8])
{
w0[0] = w0[0] ^ 0x36363636;
w0[1] = w0[1] ^ 0x36363636;
@ -185,7 +185,7 @@ static void hmac_sha256_pad (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], u32 ipa
sha256_transform (w0, w1, w2, w3, opad);
}
static void hmac_sha256_run (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], u32 ipad[8], u32 opad[8], u32 digest[8])
static void hmac_sha256_run (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x ipad[8], u32x opad[8], u32x digest[8])
{
digest[0] = ipad[0];
digest[1] = ipad[1];
@ -258,36 +258,36 @@ static void m01460m (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_le
* pads
*/
u32 w0_t[4];
u32x w0_t[4];
w0_t[0] = swap32 (salt_buf0[0]);
w0_t[1] = swap32 (salt_buf0[1]);
w0_t[2] = swap32 (salt_buf0[2]);
w0_t[3] = swap32 (salt_buf0[3]);
u32 w1_t[4];
u32x w1_t[4];
w1_t[0] = swap32 (salt_buf1[0]);
w1_t[1] = swap32 (salt_buf1[1]);
w1_t[2] = swap32 (salt_buf1[2]);
w1_t[3] = swap32 (salt_buf1[3]);
u32 w2_t[4];
u32x w2_t[4];
w2_t[0] = 0;
w2_t[1] = 0;
w2_t[2] = 0;
w2_t[3] = 0;
u32 w3_t[4];
u32x w3_t[4];
w3_t[0] = 0;
w3_t[1] = 0;
w3_t[2] = 0;
w3_t[3] = 0;
u32 ipad[8];
u32 opad[8];
u32x ipad[8];
u32x opad[8];
hmac_sha256_pad (w0_t, w1_t, w2_t, w3_t, ipad, opad);
@ -297,13 +297,13 @@ static void m01460m (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_le
u32 w0l = w0[0];
for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++)
for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos += VECT_SIZE)
{
const u32 w0r = bfs_buf[il_pos].i;
const u32x w0r = w0r_create_bft (bfs_buf, il_pos);
w0[0] = w0l | w0r;
const u32x w0lr = w0l | w0r;
w0_t[0] = w0[0];
w0_t[0] = w0lr;
w0_t[1] = w0[1];
w0_t[2] = w0[2];
w0_t[3] = w0[3];
@ -320,16 +320,11 @@ static void m01460m (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_le
w3_t[2] = 0;
w3_t[3] = (64 + pw_len) * 8;
u32 digest[8];
u32x digest[8];
hmac_sha256_run (w0_t, w1_t, w2_t, w3_t, ipad, opad, digest);
const u32 r0 = digest[3];
const u32 r1 = digest[7];
const u32 r2 = digest[2];
const u32 r3 = digest[6];
#include COMPARE_M
COMPARE_M_SIMD (digest[3], digest[7], digest[2], digest[6]);
}
}
@ -364,36 +359,36 @@ static void m01460s (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_le
* pads
*/
u32 w0_t[4];
u32x w0_t[4];
w0_t[0] = swap32 (salt_buf0[0]);
w0_t[1] = swap32 (salt_buf0[1]);
w0_t[2] = swap32 (salt_buf0[2]);
w0_t[3] = swap32 (salt_buf0[3]);
u32 w1_t[4];
u32x w1_t[4];
w1_t[0] = swap32 (salt_buf1[0]);
w1_t[1] = swap32 (salt_buf1[1]);
w1_t[2] = swap32 (salt_buf1[2]);
w1_t[3] = swap32 (salt_buf1[3]);
u32 w2_t[4];
u32x w2_t[4];
w2_t[0] = 0;
w2_t[1] = 0;
w2_t[2] = 0;
w2_t[3] = 0;
u32 w3_t[4];
u32x w3_t[4];
w3_t[0] = 0;
w3_t[1] = 0;
w3_t[2] = 0;
w3_t[3] = 0;
u32 ipad[8];
u32 opad[8];
u32x ipad[8];
u32x opad[8];
hmac_sha256_pad (w0_t, w1_t, w2_t, w3_t, ipad, opad);
@ -415,13 +410,13 @@ static void m01460s (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_le
u32 w0l = w0[0];
for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++)
for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos += VECT_SIZE)
{
const u32 w0r = bfs_buf[il_pos].i;
const u32x w0r = w0r_create_bft (bfs_buf, il_pos);
w0[0] = w0l | w0r;
const u32x w0lr = w0l | w0r;
w0_t[0] = w0[0];
w0_t[0] = w0lr;
w0_t[1] = w0[1];
w0_t[2] = w0[2];
w0_t[3] = w0[3];
@ -438,16 +433,11 @@ static void m01460s (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_le
w3_t[2] = 0;
w3_t[3] = (64 + pw_len) * 8;
u32 digest[8];
u32x digest[8];
hmac_sha256_run (w0_t, w1_t, w2_t, w3_t, ipad, opad, digest);
const u32 r0 = digest[3];
const u32 r1 = digest[7];
const u32 r2 = digest[2];
const u32 r3 = digest[6];
#include COMPARE_S
COMPARE_S_SIMD (digest[3], digest[7], digest[2], digest[6]);
}
}

View File

@ -518,7 +518,7 @@ __kernel void m01500_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
if (combs_mode == COMBINATOR_MODE_BASE_RIGHT)
{
switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
switch_buffer_by_offset_le (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
}
/**
@ -601,7 +601,7 @@ __kernel void m01500_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
{
switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len);
switch_buffer_by_offset_le (wordr0, wordr1, wordr2, wordr3, pw_l_len);
}
u32 w0[4];
@ -709,7 +709,7 @@ __kernel void m01500_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
if (combs_mode == COMBINATOR_MODE_BASE_RIGHT)
{
switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
switch_buffer_by_offset_le (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
}
/**
@ -804,7 +804,7 @@ __kernel void m01500_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
{
switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len);
switch_buffer_by_offset_le (wordr0, wordr1, wordr2, wordr3, pw_l_len);
}
u32 w0[4];

View File

@ -191,7 +191,7 @@ __kernel void m01700_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
{
append_0x80_2x4 (wordl0, wordl1, pw_l_len);
switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
switch_buffer_by_offset_le (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
}
/**
@ -236,7 +236,7 @@ __kernel void m01700_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
{
append_0x80_2x4 (wordr0, wordr1, pw_r_len);
switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len);
switch_buffer_by_offset_le (wordr0, wordr1, wordr2, wordr3, pw_l_len);
}
u32 w0[4];
@ -368,7 +368,7 @@ __kernel void m01700_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
{
append_0x80_2x4 (wordl0, wordl1, pw_l_len);
switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
switch_buffer_by_offset_le (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
}
/**
@ -425,7 +425,7 @@ __kernel void m01700_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
{
append_0x80_2x4 (wordr0, wordr1, pw_r_len);
switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len);
switch_buffer_by_offset_le (wordr0, wordr1, wordr2, wordr3, pw_l_len);
}
u32 w0[4];

View File

@ -5,6 +5,8 @@
#define _SHA512_
#define NEW_SIMD_CODE
#include "include/constants.h"
#include "include/kernel_vendor.h"
@ -16,9 +18,7 @@
#include "include/kernel_functions.c"
#include "OpenCL/types_ocl.c"
#include "OpenCL/common.c"
#define COMPARE_S "OpenCL/check_single_comp4.c"
#define COMPARE_M "OpenCL/check_multi_comp4.c"
#include "OpenCL/simd.c"
__constant u64 k_sha512[80] =
{
@ -44,33 +44,33 @@ __constant u64 k_sha512[80] =
SHA512C4c, SHA512C4d, SHA512C4e, SHA512C4f,
};
static void sha512_transform (const u32 w0[4], const u32 w1[4], const u32 w2[4], const u32 w3[4], u64 digest[8])
static void sha512_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u64x digest[8])
{
u64 w0_t = hl32_to_64 (w0[0], w0[1]);
u64 w1_t = hl32_to_64 (w0[2], w0[3]);
u64 w2_t = hl32_to_64 (w1[0], w1[1]);
u64 w3_t = hl32_to_64 (w1[2], w1[3]);
u64 w4_t = hl32_to_64 (w2[0], w2[1]);
u64 w5_t = hl32_to_64 (w2[2], w2[3]);
u64 w6_t = hl32_to_64 (w3[0], w3[1]);
u64 w7_t = 0;
u64 w8_t = 0;
u64 w9_t = 0;
u64 wa_t = 0;
u64 wb_t = 0;
u64 wc_t = 0;
u64 wd_t = 0;
u64 we_t = 0;
u64 wf_t = hl32_to_64 (w3[2], w3[3]);
u64x w0_t = hl32_to_64 (w0[0], w0[1]);
u64x w1_t = hl32_to_64 (w0[2], w0[3]);
u64x w2_t = hl32_to_64 (w1[0], w1[1]);
u64x w3_t = hl32_to_64 (w1[2], w1[3]);
u64x w4_t = hl32_to_64 (w2[0], w2[1]);
u64x w5_t = hl32_to_64 (w2[2], w2[3]);
u64x w6_t = hl32_to_64 (w3[0], w3[1]);
u64x w7_t = 0;
u64x w8_t = 0;
u64x w9_t = 0;
u64x wa_t = 0;
u64x wb_t = 0;
u64x wc_t = 0;
u64x wd_t = 0;
u64x we_t = 0;
u64x wf_t = hl32_to_64 (w3[2], w3[3]);
u64 a = digest[0];
u64 b = digest[1];
u64 c = digest[2];
u64 d = digest[3];
u64 e = digest[4];
u64 f = digest[5];
u64 g = digest[6];
u64 h = digest[7];
u64x a = digest[0];
u64x b = digest[1];
u64x c = digest[2];
u64x d = digest[3];
u64x e = digest[4];
u64x f = digest[5];
u64x g = digest[6];
u64x h = digest[7];
#define ROUND_EXPAND() \
{ \
@ -141,7 +141,7 @@ static void sha512_transform (const u32 w0[4], const u32 w1[4], const u32 w2[4],
digest[7] = h;
}
static void m01700m (u32 w[16], const u32 pw_len, __global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset)
static void m01700m (u32 w[16], const u32 pw_len, __global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset)
{
/**
* modifier
@ -156,16 +156,16 @@ static void m01700m (u32 w[16], const u32 pw_len, __global pw_t *pws, __global k
u32 w0l = w[0];
for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++)
for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos += VECT_SIZE)
{
const u32 w0r = words_buf_r[il_pos];
const u32x w0r = words_buf_r[il_pos / VECT_SIZE];
const u32 w0 = w0l | w0r;
const u32x w0 = w0l | w0r;
u32 w0_t[4];
u32 w1_t[4];
u32 w2_t[4];
u32 w3_t[4];
u32x w0_t[4];
u32x w1_t[4];
u32x w2_t[4];
u32x w3_t[4];
w0_t[0] = w0;
w0_t[1] = w[ 1];
@ -184,7 +184,7 @@ static void m01700m (u32 w[16], const u32 pw_len, __global pw_t *pws, __global k
w3_t[2] = w[14];
w3_t[3] = w[15];
u64 digest[8];
u64x digest[8];
digest[0] = SHA512M_A;
digest[1] = SHA512M_B;
@ -197,17 +197,16 @@ static void m01700m (u32 w[16], const u32 pw_len, __global pw_t *pws, __global k
sha512_transform (w0_t, w1_t, w2_t, w3_t, digest);
const u32x r0 = l32_from_64 (digest[7]);
const u32x r1 = h32_from_64 (digest[7]);
const u32x r2 = l32_from_64 (digest[3]);
const u32x r3 = h32_from_64 (digest[3]);
const u32 r0 = l32_from_64 (digest[7]);
const u32 r1 = h32_from_64 (digest[7]);
const u32 r2 = l32_from_64 (digest[3]);
const u32 r3 = h32_from_64 (digest[3]);
#include COMPARE_M
COMPARE_M_SIMD (r0, r1, r2, r3);
}
}
static void m01700s (u32 w[16], const u32 pw_len, __global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset)
static void m01700s (u32 w[16], const u32 pw_len, __global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset)
{
/**
* modifier
@ -234,16 +233,16 @@ static void m01700s (u32 w[16], const u32 pw_len, __global pw_t *pws, __global k
u32 w0l = w[0];
for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++)
for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos += VECT_SIZE)
{
const u32 w0r = words_buf_r[il_pos];
const u32x w0r = words_buf_r[il_pos / VECT_SIZE];
const u32 w0 = w0l | w0r;
const u32x w0 = w0l | w0r;
u32 w0_t[4];
u32 w1_t[4];
u32 w2_t[4];
u32 w3_t[4];
u32x w0_t[4];
u32x w1_t[4];
u32x w2_t[4];
u32x w3_t[4];
w0_t[0] = w0;
w0_t[1] = w[ 1];
@ -262,7 +261,7 @@ static void m01700s (u32 w[16], const u32 pw_len, __global pw_t *pws, __global k
w3_t[2] = w[14];
w3_t[3] = w[15];
u64 digest[8];
u64x digest[8];
digest[0] = SHA512M_A;
digest[1] = SHA512M_B;
@ -275,17 +274,16 @@ static void m01700s (u32 w[16], const u32 pw_len, __global pw_t *pws, __global k
sha512_transform (w0_t, w1_t, w2_t, w3_t, digest);
const u32x r0 = l32_from_64 (digest[7]);
const u32x r1 = h32_from_64 (digest[7]);
const u32x r2 = l32_from_64 (digest[3]);
const u32x r3 = h32_from_64 (digest[3]);
const u32 r0 = l32_from_64 (digest[7]);
const u32 r1 = h32_from_64 (digest[7]);
const u32 r2 = l32_from_64 (digest[3]);
const u32 r3 = h32_from_64 (digest[3]);
#include COMPARE_S
COMPARE_S_SIMD (r0, r1, r2, r3);
}
}
__kernel void m01700_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
__kernel void m01700_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
{
/**
* base
@ -323,7 +321,7 @@ __kernel void m01700_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
m01700m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset);
}
__kernel void m01700_m08 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
__kernel void m01700_m08 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
{
/**
* base
@ -361,7 +359,7 @@ __kernel void m01700_m08 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
m01700m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset);
}
__kernel void m01700_m16 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
__kernel void m01700_m16 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
{
/**
* base
@ -399,7 +397,7 @@ __kernel void m01700_m16 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
m01700m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset);
}
__kernel void m01700_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
__kernel void m01700_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
{
/**
* base
@ -437,7 +435,7 @@ __kernel void m01700_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
m01700s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset);
}
__kernel void m01700_s08 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
__kernel void m01700_s08 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
{
/**
* base
@ -475,7 +473,7 @@ __kernel void m01700_s08 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
m01700s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset);
}
__kernel void m01700_s16 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
__kernel void m01700_s16 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
{
/**
* base

View File

@ -263,7 +263,7 @@ __kernel void m01710_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
s3[2] = 0;
s3[3] = 0;
switch_buffer_by_offset (s0, s1, s2, s3, out_len);
switch_buffer_by_offset_le (s0, s1, s2, s3, out_len);
const u32 out_salt_len = out_len + salt_len;
@ -478,7 +478,7 @@ __kernel void m01710_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
s3[2] = 0;
s3[3] = 0;
switch_buffer_by_offset (s0, s1, s2, s3, out_len);
switch_buffer_by_offset_le (s0, s1, s2, s3, out_len);
const u32 out_salt_len = out_len + salt_len;

View File

@ -189,7 +189,7 @@ __kernel void m01710_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
if (combs_mode == COMBINATOR_MODE_BASE_RIGHT)
{
switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
switch_buffer_by_offset_le (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
}
/**
@ -252,7 +252,7 @@ __kernel void m01710_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
{
switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len);
switch_buffer_by_offset_le (wordr0, wordr1, wordr2, wordr3, pw_l_len);
}
/**
@ -287,7 +287,7 @@ __kernel void m01710_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
s3[2] = 0;
s3[3] = 0;
switch_buffer_by_offset (s0, s1, s2, s3, pw_len);
switch_buffer_by_offset_le (s0, s1, s2, s3, pw_len);
const u32 pw_salt_len = pw_len + salt_len;
@ -420,7 +420,7 @@ __kernel void m01710_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
if (combs_mode == COMBINATOR_MODE_BASE_RIGHT)
{
switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
switch_buffer_by_offset_le (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
}
/**
@ -495,7 +495,7 @@ __kernel void m01710_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
{
switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len);
switch_buffer_by_offset_le (wordr0, wordr1, wordr2, wordr3, pw_l_len);
}
/**
@ -530,7 +530,7 @@ __kernel void m01710_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
s3[2] = 0;
s3[3] = 0;
switch_buffer_by_offset (s0, s1, s2, s3, pw_len);
switch_buffer_by_offset_le (s0, s1, s2, s3, pw_len);
const u32 pw_salt_len = pw_len + salt_len;

View File

@ -5,6 +5,8 @@
#define _SHA512_
#define NEW_SIMD_CODE
#include "include/constants.h"
#include "include/kernel_vendor.h"
@ -16,9 +18,7 @@
#include "include/kernel_functions.c"
#include "OpenCL/types_ocl.c"
#include "OpenCL/common.c"
#define COMPARE_S "OpenCL/check_single_comp4.c"
#define COMPARE_M "OpenCL/check_multi_comp4.c"
#include "OpenCL/simd.c"
__constant u64 k_sha512[80] =
{
@ -44,33 +44,33 @@ __constant u64 k_sha512[80] =
SHA512C4c, SHA512C4d, SHA512C4e, SHA512C4f,
};
static void sha512_transform (const u32 w0[4], const u32 w1[4], const u32 w2[4], const u32 w3[4], u64 digest[8])
static void sha512_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u64x digest[8])
{
u64 w0_t = hl32_to_64 (w0[0], w0[1]);
u64 w1_t = hl32_to_64 (w0[2], w0[3]);
u64 w2_t = hl32_to_64 (w1[0], w1[1]);
u64 w3_t = hl32_to_64 (w1[2], w1[3]);
u64 w4_t = hl32_to_64 (w2[0], w2[1]);
u64 w5_t = hl32_to_64 (w2[2], w2[3]);
u64 w6_t = hl32_to_64 (w3[0], w3[1]);
u64 w7_t = 0;
u64 w8_t = 0;
u64 w9_t = 0;
u64 wa_t = 0;
u64 wb_t = 0;
u64 wc_t = 0;
u64 wd_t = 0;
u64 we_t = 0;
u64 wf_t = hl32_to_64 (w3[2], w3[3]);
u64x w0_t = hl32_to_64 (w0[0], w0[1]);
u64x w1_t = hl32_to_64 (w0[2], w0[3]);
u64x w2_t = hl32_to_64 (w1[0], w1[1]);
u64x w3_t = hl32_to_64 (w1[2], w1[3]);
u64x w4_t = hl32_to_64 (w2[0], w2[1]);
u64x w5_t = hl32_to_64 (w2[2], w2[3]);
u64x w6_t = hl32_to_64 (w3[0], w3[1]);
u64x w7_t = 0;
u64x w8_t = 0;
u64x w9_t = 0;
u64x wa_t = 0;
u64x wb_t = 0;
u64x wc_t = 0;
u64x wd_t = 0;
u64x we_t = 0;
u64x wf_t = hl32_to_64 (w3[2], w3[3]);
u64 a = digest[0];
u64 b = digest[1];
u64 c = digest[2];
u64 d = digest[3];
u64 e = digest[4];
u64 f = digest[5];
u64 g = digest[6];
u64 h = digest[7];
u64x a = digest[0];
u64x b = digest[1];
u64x c = digest[2];
u64x d = digest[3];
u64x e = digest[4];
u64x f = digest[5];
u64x g = digest[6];
u64x h = digest[7];
#define ROUND_EXPAND() \
{ \
@ -141,7 +141,7 @@ static void sha512_transform (const u32 w0[4], const u32 w1[4], const u32 w2[4],
digest[7] = h;
}
static void m01710m (u32 w[16], const u32 pw_len, __global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset)
static void m01710m (u32 w[16], const u32 pw_len, __global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset)
{
/**
* modifier
@ -182,24 +182,24 @@ static void m01710m (u32 w[16], const u32 pw_len, __global pw_t *pws, __global k
salt_buf3[2] = 0;
salt_buf3[3] = 0;
switch_buffer_by_offset (salt_buf0, salt_buf1, salt_buf2, salt_buf3, pw_len);
switch_buffer_by_offset_le_S (salt_buf0, salt_buf1, salt_buf2, salt_buf3, pw_len);
w[ 0] |= swap32 (salt_buf0[0]);
w[ 1] |= swap32 (salt_buf0[1]);
w[ 2] |= swap32 (salt_buf0[2]);
w[ 3] |= swap32 (salt_buf0[3]);
w[ 4] |= swap32 (salt_buf1[0]);
w[ 5] |= swap32 (salt_buf1[1]);
w[ 6] |= swap32 (salt_buf1[2]);
w[ 7] |= swap32 (salt_buf1[3]);
w[ 8] |= swap32 (salt_buf2[0]);
w[ 9] |= swap32 (salt_buf2[1]);
w[10] |= swap32 (salt_buf2[2]);
w[11] |= swap32 (salt_buf2[3]);
w[12] |= swap32 (salt_buf3[0]);
w[13] |= swap32 (salt_buf3[1]);
w[14] |= swap32 (salt_buf3[2]);
w[15] |= swap32 (salt_buf3[3]);
w[ 0] |= swap32_S (salt_buf0[0]);
w[ 1] |= swap32_S (salt_buf0[1]);
w[ 2] |= swap32_S (salt_buf0[2]);
w[ 3] |= swap32_S (salt_buf0[3]);
w[ 4] |= swap32_S (salt_buf1[0]);
w[ 5] |= swap32_S (salt_buf1[1]);
w[ 6] |= swap32_S (salt_buf1[2]);
w[ 7] |= swap32_S (salt_buf1[3]);
w[ 8] |= swap32_S (salt_buf2[0]);
w[ 9] |= swap32_S (salt_buf2[1]);
w[10] |= swap32_S (salt_buf2[2]);
w[11] |= swap32_S (salt_buf2[3]);
w[12] |= swap32_S (salt_buf3[0]);
w[13] |= swap32_S (salt_buf3[1]);
w[14] |= swap32_S (salt_buf3[2]);
w[15] |= swap32_S (salt_buf3[3]);
const u32 salt_len = salt_bufs[salt_pos].salt_len;
@ -213,16 +213,16 @@ static void m01710m (u32 w[16], const u32 pw_len, __global pw_t *pws, __global k
u32 w0l = w[0];
for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++)
for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos += VECT_SIZE)
{
const u32 w0r = words_buf_r[il_pos];
const u32x w0r = words_buf_r[il_pos / VECT_SIZE];
const u32 w0 = w0l | w0r;
const u32x w0 = w0l | w0r;
u32 w0_t[4];
u32 w1_t[4];
u32 w2_t[4];
u32 w3_t[4];
u32x w0_t[4];
u32x w1_t[4];
u32x w2_t[4];
u32x w3_t[4];
w0_t[0] = w0;
w0_t[1] = w[ 1];
@ -241,7 +241,7 @@ static void m01710m (u32 w[16], const u32 pw_len, __global pw_t *pws, __global k
w3_t[2] = w[14];
w3_t[3] = w[15];
u64 digest[8];
u64x digest[8];
digest[0] = SHA512M_A;
digest[1] = SHA512M_B;
@ -254,17 +254,16 @@ static void m01710m (u32 w[16], const u32 pw_len, __global pw_t *pws, __global k
sha512_transform (w0_t, w1_t, w2_t, w3_t, digest);
const u32x r0 = l32_from_64 (digest[7]);
const u32x r1 = h32_from_64 (digest[7]);
const u32x r2 = l32_from_64 (digest[3]);
const u32x r3 = h32_from_64 (digest[3]);
const u32 r0 = l32_from_64 (digest[7]);
const u32 r1 = h32_from_64 (digest[7]);
const u32 r2 = l32_from_64 (digest[3]);
const u32 r3 = h32_from_64 (digest[3]);
#include COMPARE_M
COMPARE_M_SIMD (r0, r1, r2, r3);
}
}
static void m01710s (u32 w[16], const u32 pw_len, __global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset)
static void m01710s (u32 w[16], const u32 pw_len, __global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset)
{
/**
* modifier
@ -291,16 +290,16 @@ static void m01710s (u32 w[16], const u32 pw_len, __global pw_t *pws, __global k
u32 w0l = w[0];
for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++)
for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos += VECT_SIZE)
{
const u32 w0r = words_buf_r[il_pos];
const u32x w0r = words_buf_r[il_pos / VECT_SIZE];
const u32 w0 = w0l | w0r;
const u32x w0 = w0l | w0r;
u32 w0_t[4];
u32 w1_t[4];
u32 w2_t[4];
u32 w3_t[4];
u32x w0_t[4];
u32x w1_t[4];
u32x w2_t[4];
u32x w3_t[4];
w0_t[0] = w0;
w0_t[1] = w[ 1];
@ -319,7 +318,7 @@ static void m01710s (u32 w[16], const u32 pw_len, __global pw_t *pws, __global k
w3_t[2] = w[14];
w3_t[3] = w[15];
u64 digest[8];
u64x digest[8];
digest[0] = SHA512M_A;
digest[1] = SHA512M_B;
@ -332,17 +331,16 @@ static void m01710s (u32 w[16], const u32 pw_len, __global pw_t *pws, __global k
sha512_transform (w0_t, w1_t, w2_t, w3_t, digest);
const u32x r0 = l32_from_64 (digest[7]);
const u32x r1 = h32_from_64 (digest[7]);
const u32x r2 = l32_from_64 (digest[3]);
const u32x r3 = h32_from_64 (digest[3]);
const u32 r0 = l32_from_64 (digest[7]);
const u32 r1 = h32_from_64 (digest[7]);
const u32 r2 = l32_from_64 (digest[3]);
const u32 r3 = h32_from_64 (digest[3]);
#include COMPARE_S
COMPARE_S_SIMD (r0, r1, r2, r3);
}
}
__kernel void m01710_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
__kernel void m01710_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
{
/**
* base
@ -380,7 +378,7 @@ __kernel void m01710_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
m01710m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset);
}
__kernel void m01710_m08 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
__kernel void m01710_m08 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
{
/**
* base
@ -418,7 +416,7 @@ __kernel void m01710_m08 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
m01710m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset);
}
__kernel void m01710_m16 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
__kernel void m01710_m16 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
{
/**
* base
@ -456,7 +454,7 @@ __kernel void m01710_m16 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
m01710m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset);
}
__kernel void m01710_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
__kernel void m01710_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
{
/**
* base
@ -494,7 +492,7 @@ __kernel void m01710_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
m01710s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset);
}
__kernel void m01710_s08 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
__kernel void m01710_s08 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
{
/**
* base
@ -532,7 +530,7 @@ __kernel void m01710_s08 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
m01710s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset);
}
__kernel void m01710_s16 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
__kernel void m01710_s16 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
{
/**
* base

View File

@ -237,7 +237,7 @@ __kernel void m01720_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
const u32 out_salt_len = out_len + salt_len;
switch_buffer_by_offset (w0, w1, w2, w3, salt_len);
switch_buffer_by_offset_le (w0, w1, w2, w3, salt_len);
w0[0] |= salt_buf0[0];
w0[1] |= salt_buf0[1];
@ -413,7 +413,7 @@ __kernel void m01720_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
const u32 out_salt_len = out_len + salt_len;
switch_buffer_by_offset (w0, w1, w2, w3, salt_len);
switch_buffer_by_offset_le (w0, w1, w2, w3, salt_len);
w0[0] |= salt_buf0[0];
w0[1] |= salt_buf0[1];

View File

@ -189,7 +189,7 @@ __kernel void m01720_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
if (combs_mode == COMBINATOR_MODE_BASE_RIGHT)
{
switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
switch_buffer_by_offset_le (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
}
/**
@ -246,7 +246,7 @@ __kernel void m01720_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
{
switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len);
switch_buffer_by_offset_le (wordr0, wordr1, wordr2, wordr3, pw_l_len);
}
u32 w0[4];
@ -277,7 +277,7 @@ __kernel void m01720_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
const u32 pw_salt_len = pw_len + salt_len;
switch_buffer_by_offset (w0, w1, w2, w3, salt_len);
switch_buffer_by_offset_le (w0, w1, w2, w3, salt_len);
w0[0] |= salt_buf0[0];
w0[1] |= salt_buf0[1];
@ -395,7 +395,7 @@ __kernel void m01720_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
if (combs_mode == COMBINATOR_MODE_BASE_RIGHT)
{
switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
switch_buffer_by_offset_le (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
}
/**
@ -464,7 +464,7 @@ __kernel void m01720_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
{
switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len);
switch_buffer_by_offset_le (wordr0, wordr1, wordr2, wordr3, pw_l_len);
}
u32 w0[4];
@ -495,7 +495,7 @@ __kernel void m01720_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
const u32 pw_salt_len = pw_len + salt_len;
switch_buffer_by_offset (w0, w1, w2, w3, salt_len);
switch_buffer_by_offset_le (w0, w1, w2, w3, salt_len);
w0[0] |= salt_buf0[0];
w0[1] |= salt_buf0[1];

View File

@ -5,6 +5,8 @@
#define _SHA512_
#define NEW_SIMD_CODE
#include "include/constants.h"
#include "include/kernel_vendor.h"
@ -16,9 +18,7 @@
#include "include/kernel_functions.c"
#include "OpenCL/types_ocl.c"
#include "OpenCL/common.c"
#define COMPARE_S "OpenCL/check_single_comp4.c"
#define COMPARE_M "OpenCL/check_multi_comp4.c"
#include "OpenCL/simd.c"
__constant u64 k_sha512[80] =
{
@ -44,33 +44,33 @@ __constant u64 k_sha512[80] =
SHA512C4c, SHA512C4d, SHA512C4e, SHA512C4f,
};
static void sha512_transform (const u32 w0[4], const u32 w1[4], const u32 w2[4], const u32 w3[4], u64 digest[8])
static void sha512_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u64x digest[8])
{
u64 w0_t = hl32_to_64 (w0[0], w0[1]);
u64 w1_t = hl32_to_64 (w0[2], w0[3]);
u64 w2_t = hl32_to_64 (w1[0], w1[1]);
u64 w3_t = hl32_to_64 (w1[2], w1[3]);
u64 w4_t = hl32_to_64 (w2[0], w2[1]);
u64 w5_t = hl32_to_64 (w2[2], w2[3]);
u64 w6_t = hl32_to_64 (w3[0], w3[1]);
u64 w7_t = 0;
u64 w8_t = 0;
u64 w9_t = 0;
u64 wa_t = 0;
u64 wb_t = 0;
u64 wc_t = 0;
u64 wd_t = 0;
u64 we_t = 0;
u64 wf_t = hl32_to_64 (w3[2], w3[3]);
u64x w0_t = hl32_to_64 (w0[0], w0[1]);
u64x w1_t = hl32_to_64 (w0[2], w0[3]);
u64x w2_t = hl32_to_64 (w1[0], w1[1]);
u64x w3_t = hl32_to_64 (w1[2], w1[3]);
u64x w4_t = hl32_to_64 (w2[0], w2[1]);
u64x w5_t = hl32_to_64 (w2[2], w2[3]);
u64x w6_t = hl32_to_64 (w3[0], w3[1]);
u64x w7_t = 0;
u64x w8_t = 0;
u64x w9_t = 0;
u64x wa_t = 0;
u64x wb_t = 0;
u64x wc_t = 0;
u64x wd_t = 0;
u64x we_t = 0;
u64x wf_t = hl32_to_64 (w3[2], w3[3]);
u64 a = digest[0];
u64 b = digest[1];
u64 c = digest[2];
u64 d = digest[3];
u64 e = digest[4];
u64 f = digest[5];
u64 g = digest[6];
u64 h = digest[7];
u64x a = digest[0];
u64x b = digest[1];
u64x c = digest[2];
u64x d = digest[3];
u64x e = digest[4];
u64x f = digest[5];
u64x g = digest[6];
u64x h = digest[7];
#define ROUND_EXPAND() \
{ \
@ -186,17 +186,6 @@ static void m01720m (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_le
const u32 pw_salt_len = pw_len + salt_len;
/**
* loop
*/
u32 w0l = w0[0];
for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++)
{
const u32 w0r = bfs_buf[il_pos].i;
w0[0] = w0l | w0r;
/**
* prepend salt
@ -207,24 +196,24 @@ static void m01720m (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_le
u32 w2_t[4];
u32 w3_t[4];
w0_t[0] = swap32 (w0[0]);
w0_t[1] = swap32 (w0[1]);
w0_t[2] = swap32 (w0[2]);
w0_t[3] = swap32 (w0[3]);
w1_t[0] = swap32 (w1[0]);
w1_t[1] = swap32 (w1[1]);
w1_t[2] = swap32 (w1[2]);
w1_t[3] = swap32 (w1[3]);
w2_t[0] = swap32 (w2[0]);
w2_t[1] = swap32 (w2[1]);
w2_t[2] = swap32 (w2[2]);
w2_t[3] = swap32 (w2[3]);
w3_t[0] = swap32 (w3[0]);
w3_t[1] = swap32 (w3[1]);
w3_t[2] = swap32 (w3[2]);
w3_t[3] = swap32 (w3[3]);
w0_t[0] = swap32_S (w0[0]);
w0_t[1] = swap32_S (w0[1]);
w0_t[2] = swap32_S (w0[2]);
w0_t[3] = swap32_S (w0[3]);
w1_t[0] = swap32_S (w1[0]);
w1_t[1] = swap32_S (w1[1]);
w1_t[2] = swap32_S (w1[2]);
w1_t[3] = swap32_S (w1[3]);
w2_t[0] = swap32_S (w2[0]);
w2_t[1] = swap32_S (w2[1]);
w2_t[2] = swap32_S (w2[2]);
w2_t[3] = swap32_S (w2[3]);
w3_t[0] = swap32_S (w3[0]);
w3_t[1] = swap32_S (w3[1]);
w3_t[2] = swap32_S (w3[2]);
w3_t[3] = swap32_S (w3[3]);
switch_buffer_by_offset (w0_t, w1_t, w2_t, w3_t, salt_len);
switch_buffer_by_offset_le_S (w0_t, w1_t, w2_t, w3_t, salt_len);
w0_t[0] |= salt_buf0[0];
w0_t[1] |= salt_buf0[1];
@ -240,6 +229,78 @@ static void m01720m (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_le
w2_t[3] |= salt_buf2[3];
w3_t[0] |= salt_buf3[0];
w3_t[1] |= salt_buf3[1];
w3_t[2] |= salt_buf3[2];
w3_t[3] |= salt_buf3[3];
w0_t[0] = swap32_S (w0_t[0]);
w0_t[1] = swap32_S (w0_t[1]);
w0_t[2] = swap32_S (w0_t[2]);
w0_t[3] = swap32_S (w0_t[3]);
w1_t[0] = swap32_S (w1_t[0]);
w1_t[1] = swap32_S (w1_t[1]);
w1_t[2] = swap32_S (w1_t[2]);
w1_t[3] = swap32_S (w1_t[3]);
w2_t[0] = swap32_S (w2_t[0]);
w2_t[1] = swap32_S (w2_t[1]);
w2_t[2] = swap32_S (w2_t[2]);
w2_t[3] = swap32_S (w2_t[3]);
w3_t[0] = swap32_S (w3_t[0]);
w3_t[1] = swap32_S (w3_t[1]);
w3_t[2] = swap32_S (w3_t[2]);
w3_t[3] = swap32_S (w3_t[3]);
/**
* loop
*/
u32 w0l = w0[0];
for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos += VECT_SIZE)
{
const u32x w0r = w0r_create_bft (bfs_buf, il_pos);
const u32x w0lr = w0l | w0r;
u32x wx[16];
wx[ 0] = w0_t[0];
wx[ 1] = w0_t[1];
wx[ 2] = w0_t[2];
wx[ 3] = w0_t[3];
wx[ 4] = w1_t[0];
wx[ 5] = w1_t[1];
wx[ 6] = w1_t[2];
wx[ 7] = w1_t[3];
wx[ 8] = w2_t[0];
wx[ 9] = w2_t[1];
wx[10] = w2_t[2];
wx[11] = w2_t[3];
wx[12] = w3_t[0];
wx[13] = w3_t[1];
wx[14] = w3_t[2];
wx[15] = w3_t[3];
overwrite_at_be (wx, w0lr, salt_len);
u32x w0_t[4];
u32x w1_t[4];
u32x w2_t[4];
u32x w3_t[4];
w0_t[0] = wx[ 0];
w0_t[1] = wx[ 1];
w0_t[2] = wx[ 2];
w0_t[3] = wx[ 3];
w1_t[0] = wx[ 4];
w1_t[1] = wx[ 5];
w1_t[2] = wx[ 6];
w1_t[3] = wx[ 7];
w2_t[0] = wx[ 8];
w2_t[1] = wx[ 9];
w2_t[2] = wx[10];
w2_t[3] = wx[11];
w3_t[0] = wx[12];
w3_t[1] = wx[13];
w3_t[2] = 0;
w3_t[3] = pw_salt_len * 8;
@ -247,24 +308,7 @@ static void m01720m (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_le
* sha512
*/
w0_t[0] = swap32 (w0_t[0]);
w0_t[1] = swap32 (w0_t[1]);
w0_t[2] = swap32 (w0_t[2]);
w0_t[3] = swap32 (w0_t[3]);
w1_t[0] = swap32 (w1_t[0]);
w1_t[1] = swap32 (w1_t[1]);
w1_t[2] = swap32 (w1_t[2]);
w1_t[3] = swap32 (w1_t[3]);
w2_t[0] = swap32 (w2_t[0]);
w2_t[1] = swap32 (w2_t[1]);
w2_t[2] = swap32 (w2_t[2]);
w2_t[3] = swap32 (w2_t[3]);
w3_t[0] = swap32 (w3_t[0]);
w3_t[1] = swap32 (w3_t[1]);
//w3_t[2] = swap32 (w3_t[2]);
//w3_t[3] = swap32 (w3_t[3]);
u64 digest[8];
u64x digest[8];
digest[0] = SHA512M_A;
digest[1] = SHA512M_B;
@ -277,13 +321,12 @@ static void m01720m (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_le
sha512_transform (w0_t, w1_t, w2_t, w3_t, digest);
const u32x r0 = l32_from_64 (digest[7]);
const u32x r1 = h32_from_64 (digest[7]);
const u32x r2 = l32_from_64 (digest[3]);
const u32x r3 = h32_from_64 (digest[3]);
const u32 r0 = l32_from_64 (digest[7]);
const u32 r1 = h32_from_64 (digest[7]);
const u32 r2 = l32_from_64 (digest[3]);
const u32 r3 = h32_from_64 (digest[3]);
#include COMPARE_M
COMPARE_M_SIMD (r0, r1, r2, r3);
}
}
@ -344,18 +387,6 @@ static void m01720s (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_le
const u32 pw_salt_len = pw_len + salt_len;
/**
* loop
*/
u32 w0l = w0[0];
for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++)
{
const u32 w0r = bfs_buf[il_pos].i;
w0[0] = w0l | w0r;
/**
* prepend salt
*/
@ -365,24 +396,24 @@ static void m01720s (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_le
u32 w2_t[4];
u32 w3_t[4];
w0_t[0] = swap32 (w0[0]);
w0_t[1] = swap32 (w0[1]);
w0_t[2] = swap32 (w0[2]);
w0_t[3] = swap32 (w0[3]);
w1_t[0] = swap32 (w1[0]);
w1_t[1] = swap32 (w1[1]);
w1_t[2] = swap32 (w1[2]);
w1_t[3] = swap32 (w1[3]);
w2_t[0] = swap32 (w2[0]);
w2_t[1] = swap32 (w2[1]);
w2_t[2] = swap32 (w2[2]);
w2_t[3] = swap32 (w2[3]);
w3_t[0] = swap32 (w3[0]);
w3_t[1] = swap32 (w3[1]);
w3_t[2] = swap32 (w3[2]);
w3_t[3] = swap32 (w3[3]);
w0_t[0] = swap32_S (w0[0]);
w0_t[1] = swap32_S (w0[1]);
w0_t[2] = swap32_S (w0[2]);
w0_t[3] = swap32_S (w0[3]);
w1_t[0] = swap32_S (w1[0]);
w1_t[1] = swap32_S (w1[1]);
w1_t[2] = swap32_S (w1[2]);
w1_t[3] = swap32_S (w1[3]);
w2_t[0] = swap32_S (w2[0]);
w2_t[1] = swap32_S (w2[1]);
w2_t[2] = swap32_S (w2[2]);
w2_t[3] = swap32_S (w2[3]);
w3_t[0] = swap32_S (w3[0]);
w3_t[1] = swap32_S (w3[1]);
w3_t[2] = swap32_S (w3[2]);
w3_t[3] = swap32_S (w3[3]);
switch_buffer_by_offset (w0_t, w1_t, w2_t, w3_t, salt_len);
switch_buffer_by_offset_le_S (w0_t, w1_t, w2_t, w3_t, salt_len);
w0_t[0] |= salt_buf0[0];
w0_t[1] |= salt_buf0[1];
@ -398,31 +429,86 @@ static void m01720s (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_le
w2_t[3] |= salt_buf2[3];
w3_t[0] |= salt_buf3[0];
w3_t[1] |= salt_buf3[1];
w3_t[2] |= salt_buf3[2];
w3_t[3] |= salt_buf3[3];
w0_t[0] = swap32_S (w0_t[0]);
w0_t[1] = swap32_S (w0_t[1]);
w0_t[2] = swap32_S (w0_t[2]);
w0_t[3] = swap32_S (w0_t[3]);
w1_t[0] = swap32_S (w1_t[0]);
w1_t[1] = swap32_S (w1_t[1]);
w1_t[2] = swap32_S (w1_t[2]);
w1_t[3] = swap32_S (w1_t[3]);
w2_t[0] = swap32_S (w2_t[0]);
w2_t[1] = swap32_S (w2_t[1]);
w2_t[2] = swap32_S (w2_t[2]);
w2_t[3] = swap32_S (w2_t[3]);
w3_t[0] = swap32_S (w3_t[0]);
w3_t[1] = swap32_S (w3_t[1]);
w3_t[2] = swap32_S (w3_t[2]);
w3_t[3] = swap32_S (w3_t[3]);
/**
* loop
*/
u32 w0l = w0[0];
for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos += VECT_SIZE)
{
const u32x w0r = w0r_create_bft (bfs_buf, il_pos);
const u32x w0lr = w0l | w0r;
/**
* prepend salt
*/
u32x wx[16];
wx[ 0] = w0_t[0];
wx[ 1] = w0_t[1];
wx[ 2] = w0_t[2];
wx[ 3] = w0_t[3];
wx[ 4] = w1_t[0];
wx[ 5] = w1_t[1];
wx[ 6] = w1_t[2];
wx[ 7] = w1_t[3];
wx[ 8] = w2_t[0];
wx[ 9] = w2_t[1];
wx[10] = w2_t[2];
wx[11] = w2_t[3];
wx[12] = w3_t[0];
wx[13] = w3_t[1];
wx[14] = w3_t[2];
wx[15] = w3_t[3];
overwrite_at_be (wx, w0lr, salt_len);
u32x w0_t[4];
u32x w1_t[4];
u32x w2_t[4];
u32x w3_t[4];
w0_t[0] = wx[ 0];
w0_t[1] = wx[ 1];
w0_t[2] = wx[ 2];
w0_t[3] = wx[ 3];
w1_t[0] = wx[ 4];
w1_t[1] = wx[ 5];
w1_t[2] = wx[ 6];
w1_t[3] = wx[ 7];
w2_t[0] = wx[ 8];
w2_t[1] = wx[ 9];
w2_t[2] = wx[10];
w2_t[3] = wx[11];
w3_t[0] = wx[12];
w3_t[1] = wx[13];
w3_t[2] = 0;
w3_t[3] = pw_salt_len * 8;
/**
* sha512
*/
w0_t[0] = swap32 (w0_t[0]);
w0_t[1] = swap32 (w0_t[1]);
w0_t[2] = swap32 (w0_t[2]);
w0_t[3] = swap32 (w0_t[3]);
w1_t[0] = swap32 (w1_t[0]);
w1_t[1] = swap32 (w1_t[1]);
w1_t[2] = swap32 (w1_t[2]);
w1_t[3] = swap32 (w1_t[3]);
w2_t[0] = swap32 (w2_t[0]);
w2_t[1] = swap32 (w2_t[1]);
w2_t[2] = swap32 (w2_t[2]);
w2_t[3] = swap32 (w2_t[3]);
w3_t[0] = swap32 (w3_t[0]);
w3_t[1] = swap32 (w3_t[1]);
//w3_t[2] = swap32 (w3_t[2]);
//w3_t[3] = swap32 (w3_t[3]);
u64 digest[8];
u64x digest[8];
digest[0] = SHA512M_A;
digest[1] = SHA512M_B;
@ -435,13 +521,12 @@ static void m01720s (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_le
sha512_transform (w0_t, w1_t, w2_t, w3_t, digest);
const u32x r0 = l32_from_64 (digest[7]);
const u32x r1 = h32_from_64 (digest[7]);
const u32x r2 = l32_from_64 (digest[3]);
const u32x r3 = h32_from_64 (digest[3]);
const u32 r0 = l32_from_64 (digest[7]);
const u32 r1 = h32_from_64 (digest[7]);
const u32 r2 = l32_from_64 (digest[3]);
const u32 r3 = h32_from_64 (digest[3]);
#include COMPARE_S
COMPARE_S_SIMD (r0, r1, r2, r3);
}
}

View File

@ -263,7 +263,7 @@ __kernel void m01730_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
s3[2] = 0;
s3[3] = 0;
switch_buffer_by_offset (s0, s1, s2, s3, (out_len * 2));
switch_buffer_by_offset_le (s0, s1, s2, s3, (out_len * 2));
const u32 out_salt_len = (out_len * 2) + salt_len;
@ -478,7 +478,7 @@ __kernel void m01730_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
s3[2] = 0;
s3[3] = 0;
switch_buffer_by_offset (s0, s1, s2, s3, (out_len * 2));
switch_buffer_by_offset_le (s0, s1, s2, s3, (out_len * 2));
const u32 out_salt_len = (out_len * 2) + salt_len;

View File

@ -189,7 +189,7 @@ __kernel void m01730_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
if (combs_mode == COMBINATOR_MODE_BASE_RIGHT)
{
switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
switch_buffer_by_offset_le (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
}
/**
@ -246,7 +246,7 @@ __kernel void m01730_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
{
switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len);
switch_buffer_by_offset_le (wordr0, wordr1, wordr2, wordr3, pw_l_len);
}
u32 w0[4];
@ -303,7 +303,7 @@ __kernel void m01730_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
s3[2] = 0;
s3[3] = 0;
switch_buffer_by_offset (s0, s1, s2, s3, (pw_len * 2));
switch_buffer_by_offset_le (s0, s1, s2, s3, (pw_len * 2));
const u32 pw_salt_len = (pw_len * 2) + salt_len;
@ -434,7 +434,7 @@ __kernel void m01730_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
if (combs_mode == COMBINATOR_MODE_BASE_RIGHT)
{
switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
switch_buffer_by_offset_le (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
}
/**
@ -503,7 +503,7 @@ __kernel void m01730_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
{
switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len);
switch_buffer_by_offset_le (wordr0, wordr1, wordr2, wordr3, pw_l_len);
}
u32 w0[4];
@ -560,7 +560,7 @@ __kernel void m01730_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
s3[2] = 0;
s3[3] = 0;
switch_buffer_by_offset (s0, s1, s2, s3, (pw_len * 2));
switch_buffer_by_offset_le (s0, s1, s2, s3, (pw_len * 2));
const u32 pw_salt_len = (pw_len * 2) + salt_len;

View File

@ -5,6 +5,8 @@
#define _SHA512_
#define NEW_SIMD_CODE
#include "include/constants.h"
#include "include/kernel_vendor.h"
@ -16,9 +18,7 @@
#include "include/kernel_functions.c"
#include "OpenCL/types_ocl.c"
#include "OpenCL/common.c"
#define COMPARE_S "OpenCL/check_single_comp4.c"
#define COMPARE_M "OpenCL/check_multi_comp4.c"
#include "OpenCL/simd.c"
__constant u64 k_sha512[80] =
{
@ -44,33 +44,33 @@ __constant u64 k_sha512[80] =
SHA512C4c, SHA512C4d, SHA512C4e, SHA512C4f,
};
static void sha512_transform (const u32 w0[4], const u32 w1[4], const u32 w2[4], const u32 w3[4], u64 digest[8])
static void sha512_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u64x digest[8])
{
u64 w0_t = hl32_to_64 (w0[0], w0[1]);
u64 w1_t = hl32_to_64 (w0[2], w0[3]);
u64 w2_t = hl32_to_64 (w1[0], w1[1]);
u64 w3_t = hl32_to_64 (w1[2], w1[3]);
u64 w4_t = hl32_to_64 (w2[0], w2[1]);
u64 w5_t = hl32_to_64 (w2[2], w2[3]);
u64 w6_t = hl32_to_64 (w3[0], w3[1]);
u64 w7_t = 0;
u64 w8_t = 0;
u64 w9_t = 0;
u64 wa_t = 0;
u64 wb_t = 0;
u64 wc_t = 0;
u64 wd_t = 0;
u64 we_t = 0;
u64 wf_t = hl32_to_64 (w3[2], w3[3]);
u64x w0_t = hl32_to_64 (w0[0], w0[1]);
u64x w1_t = hl32_to_64 (w0[2], w0[3]);
u64x w2_t = hl32_to_64 (w1[0], w1[1]);
u64x w3_t = hl32_to_64 (w1[2], w1[3]);
u64x w4_t = hl32_to_64 (w2[0], w2[1]);
u64x w5_t = hl32_to_64 (w2[2], w2[3]);
u64x w6_t = hl32_to_64 (w3[0], w3[1]);
u64x w7_t = 0;
u64x w8_t = 0;
u64x w9_t = 0;
u64x wa_t = 0;
u64x wb_t = 0;
u64x wc_t = 0;
u64x wd_t = 0;
u64x we_t = 0;
u64x wf_t = hl32_to_64 (w3[2], w3[3]);
u64 a = digest[0];
u64 b = digest[1];
u64 c = digest[2];
u64 d = digest[3];
u64 e = digest[4];
u64 f = digest[5];
u64 g = digest[6];
u64 h = digest[7];
u64x a = digest[0];
u64x b = digest[1];
u64x c = digest[2];
u64x d = digest[3];
u64x e = digest[4];
u64x f = digest[5];
u64x g = digest[6];
u64x h = digest[7];
#define ROUND_EXPAND() \
{ \
@ -141,7 +141,7 @@ static void sha512_transform (const u32 w0[4], const u32 w1[4], const u32 w2[4],
digest[7] = h;
}
static void m01730m (u32 w[16], const u32 pw_len, __global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset)
static void m01730m (u32 w[16], const u32 pw_len, __global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset)
{
/**
* modifier
@ -182,24 +182,24 @@ static void m01730m (u32 w[16], const u32 pw_len, __global pw_t *pws, __global k
salt_buf3[2] = 0;
salt_buf3[3] = 0;
switch_buffer_by_offset (salt_buf0, salt_buf1, salt_buf2, salt_buf3, pw_len);
switch_buffer_by_offset_le_S (salt_buf0, salt_buf1, salt_buf2, salt_buf3, pw_len);
w[ 0] |= swap32 (salt_buf0[0]);
w[ 1] |= swap32 (salt_buf0[1]);
w[ 2] |= swap32 (salt_buf0[2]);
w[ 3] |= swap32 (salt_buf0[3]);
w[ 4] |= swap32 (salt_buf1[0]);
w[ 5] |= swap32 (salt_buf1[1]);
w[ 6] |= swap32 (salt_buf1[2]);
w[ 7] |= swap32 (salt_buf1[3]);
w[ 8] |= swap32 (salt_buf2[0]);
w[ 9] |= swap32 (salt_buf2[1]);
w[10] |= swap32 (salt_buf2[2]);
w[11] |= swap32 (salt_buf2[3]);
w[12] |= swap32 (salt_buf3[0]);
w[13] |= swap32 (salt_buf3[1]);
w[14] |= swap32 (salt_buf3[2]);
w[15] |= swap32 (salt_buf3[3]);
w[ 0] |= swap32_S (salt_buf0[0]);
w[ 1] |= swap32_S (salt_buf0[1]);
w[ 2] |= swap32_S (salt_buf0[2]);
w[ 3] |= swap32_S (salt_buf0[3]);
w[ 4] |= swap32_S (salt_buf1[0]);
w[ 5] |= swap32_S (salt_buf1[1]);
w[ 6] |= swap32_S (salt_buf1[2]);
w[ 7] |= swap32_S (salt_buf1[3]);
w[ 8] |= swap32_S (salt_buf2[0]);
w[ 9] |= swap32_S (salt_buf2[1]);
w[10] |= swap32_S (salt_buf2[2]);
w[11] |= swap32_S (salt_buf2[3]);
w[12] |= swap32_S (salt_buf3[0]);
w[13] |= swap32_S (salt_buf3[1]);
w[14] |= swap32_S (salt_buf3[2]);
w[15] |= swap32_S (salt_buf3[3]);
const u32 salt_len = salt_bufs[salt_pos].salt_len;
@ -213,17 +213,16 @@ static void m01730m (u32 w[16], const u32 pw_len, __global pw_t *pws, __global k
u32 w0l = w[0];
for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++)
for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos += VECT_SIZE)
{
const u32 w0r = words_buf_r[il_pos];
const u32x w0r = words_buf_r[il_pos / VECT_SIZE];
const u32 w0 = w0l | w0r;
const u32x w0 = w0l | w0r;
u32 w0_t[4];
u32 w1_t[4];
u32 w2_t[4];
u32 w3_t[4];
u32x w0_t[4];
u32x w1_t[4];
u32x w2_t[4];
u32x w3_t[4];
w0_t[0] = w0;
w0_t[1] = w[ 1];
@ -242,7 +241,7 @@ static void m01730m (u32 w[16], const u32 pw_len, __global pw_t *pws, __global k
w3_t[2] = w[14];
w3_t[3] = w[15];
u64 digest[8];
u64x digest[8];
digest[0] = SHA512M_A;
digest[1] = SHA512M_B;
@ -255,17 +254,16 @@ static void m01730m (u32 w[16], const u32 pw_len, __global pw_t *pws, __global k
sha512_transform (w0_t, w1_t, w2_t, w3_t, digest);
const u32x r0 = l32_from_64 (digest[7]);
const u32x r1 = h32_from_64 (digest[7]);
const u32x r2 = l32_from_64 (digest[3]);
const u32x r3 = h32_from_64 (digest[3]);
const u32 r0 = l32_from_64 (digest[7]);
const u32 r1 = h32_from_64 (digest[7]);
const u32 r2 = l32_from_64 (digest[3]);
const u32 r3 = h32_from_64 (digest[3]);
#include COMPARE_M
COMPARE_M_SIMD (r0, r1, r2, r3);
}
}
static void m01730s (u32 w[16], const u32 pw_len, __global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset)
static void m01730s (u32 w[16], const u32 pw_len, __global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset)
{
/**
* modifier
@ -292,16 +290,16 @@ static void m01730s (u32 w[16], const u32 pw_len, __global pw_t *pws, __global k
u32 w0l = w[0];
for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++)
for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos += VECT_SIZE)
{
const u32 w0r = words_buf_r[il_pos];
const u32x w0r = words_buf_r[il_pos / VECT_SIZE];
const u32 w0 = w0l | w0r;
const u32x w0 = w0l | w0r;
u32 w0_t[4];
u32 w1_t[4];
u32 w2_t[4];
u32 w3_t[4];
u32x w0_t[4];
u32x w1_t[4];
u32x w2_t[4];
u32x w3_t[4];
w0_t[0] = w0;
w0_t[1] = w[ 1];
@ -320,7 +318,7 @@ static void m01730s (u32 w[16], const u32 pw_len, __global pw_t *pws, __global k
w3_t[2] = w[14];
w3_t[3] = w[15];
u64 digest[8];
u64x digest[8];
digest[0] = SHA512M_A;
digest[1] = SHA512M_B;
@ -333,17 +331,16 @@ static void m01730s (u32 w[16], const u32 pw_len, __global pw_t *pws, __global k
sha512_transform (w0_t, w1_t, w2_t, w3_t, digest);
const u32x r0 = l32_from_64 (digest[7]);
const u32x r1 = h32_from_64 (digest[7]);
const u32x r2 = l32_from_64 (digest[3]);
const u32x r3 = h32_from_64 (digest[3]);
const u32 r0 = l32_from_64 (digest[7]);
const u32 r1 = h32_from_64 (digest[7]);
const u32 r2 = l32_from_64 (digest[3]);
const u32 r3 = h32_from_64 (digest[3]);
#include COMPARE_S
COMPARE_S_SIMD (r0, r1, r2, r3);
}
}
__kernel void m01730_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
__kernel void m01730_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
{
/**
* base
@ -381,7 +378,7 @@ __kernel void m01730_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
m01730m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset);
}
__kernel void m01730_m08 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
__kernel void m01730_m08 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
{
/**
* base
@ -419,7 +416,7 @@ __kernel void m01730_m08 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
m01730m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset);
}
__kernel void m01730_m16 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
__kernel void m01730_m16 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
{
/**
* base
@ -457,7 +454,7 @@ __kernel void m01730_m16 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
m01730m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset);
}
__kernel void m01730_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
__kernel void m01730_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
{
/**
* base
@ -495,7 +492,7 @@ __kernel void m01730_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
m01730s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset);
}
__kernel void m01730_s08 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
__kernel void m01730_s08 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
{
/**
* base
@ -533,7 +530,7 @@ __kernel void m01730_s08 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
m01730s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset);
}
__kernel void m01730_s16 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
__kernel void m01730_s16 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
{
/**
* base

View File

@ -239,7 +239,7 @@ __kernel void m01740_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
make_unicode (w0, w0_t, w1_t);
make_unicode (w1, w2_t, w3_t);
switch_buffer_by_offset (w0_t, w1_t, w2_t, w3_t, salt_len);
switch_buffer_by_offset_le (w0_t, w1_t, w2_t, w3_t, salt_len);
w0_t[0] |= salt_buf0[0];
w0_t[1] |= salt_buf0[1];
@ -412,7 +412,7 @@ __kernel void m01740_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
make_unicode (w0, w0_t, w1_t);
make_unicode (w1, w2_t, w3_t);
switch_buffer_by_offset (w0_t, w1_t, w2_t, w3_t, salt_len);
switch_buffer_by_offset_le (w0_t, w1_t, w2_t, w3_t, salt_len);
w0_t[0] |= salt_buf0[0];
w0_t[1] |= salt_buf0[1];

View File

@ -189,7 +189,7 @@ __kernel void m01740_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
if (combs_mode == COMBINATOR_MODE_BASE_RIGHT)
{
switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
switch_buffer_by_offset_le (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
}
/**
@ -246,7 +246,7 @@ __kernel void m01740_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
{
switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len);
switch_buffer_by_offset_le (wordr0, wordr1, wordr2, wordr3, pw_l_len);
}
u32 w0[4];
@ -285,7 +285,7 @@ __kernel void m01740_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
make_unicode (w0, w0_t, w1_t);
make_unicode (w1, w2_t, w3_t);
switch_buffer_by_offset (w0_t, w1_t, w2_t, w3_t, salt_len);
switch_buffer_by_offset_le (w0_t, w1_t, w2_t, w3_t, salt_len);
w0_t[0] |= salt_buf0[0];
w0_t[1] |= salt_buf0[1];
@ -398,7 +398,7 @@ __kernel void m01740_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
if (combs_mode == COMBINATOR_MODE_BASE_RIGHT)
{
switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
switch_buffer_by_offset_le (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
}
/**
@ -467,7 +467,7 @@ __kernel void m01740_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
{
switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len);
switch_buffer_by_offset_le (wordr0, wordr1, wordr2, wordr3, pw_l_len);
}
u32 w0[4];
@ -506,7 +506,7 @@ __kernel void m01740_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
make_unicode (w0, w0_t, w1_t);
make_unicode (w1, w2_t, w3_t);
switch_buffer_by_offset (w0_t, w1_t, w2_t, w3_t, salt_len);
switch_buffer_by_offset_le (w0_t, w1_t, w2_t, w3_t, salt_len);
w0_t[0] |= salt_buf0[0];
w0_t[1] |= salt_buf0[1];

View File

@ -5,6 +5,8 @@
#define _SHA512_
#define NEW_SIMD_CODE
#include "include/constants.h"
#include "include/kernel_vendor.h"
@ -16,9 +18,7 @@
#include "include/kernel_functions.c"
#include "OpenCL/types_ocl.c"
#include "OpenCL/common.c"
#define COMPARE_S "OpenCL/check_single_comp4.c"
#define COMPARE_M "OpenCL/check_multi_comp4.c"
#include "OpenCL/simd.c"
__constant u64 k_sha512[80] =
{
@ -44,33 +44,33 @@ __constant u64 k_sha512[80] =
SHA512C4c, SHA512C4d, SHA512C4e, SHA512C4f,
};
static void sha512_transform (const u32 w0[4], const u32 w1[4], const u32 w2[4], const u32 w3[4], u64 digest[8])
static void sha512_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u64x digest[8])
{
u64 w0_t = hl32_to_64 (w0[0], w0[1]);
u64 w1_t = hl32_to_64 (w0[2], w0[3]);
u64 w2_t = hl32_to_64 (w1[0], w1[1]);
u64 w3_t = hl32_to_64 (w1[2], w1[3]);
u64 w4_t = hl32_to_64 (w2[0], w2[1]);
u64 w5_t = hl32_to_64 (w2[2], w2[3]);
u64 w6_t = hl32_to_64 (w3[0], w3[1]);
u64 w7_t = 0;
u64 w8_t = 0;
u64 w9_t = 0;
u64 wa_t = 0;
u64 wb_t = 0;
u64 wc_t = 0;
u64 wd_t = 0;
u64 we_t = 0;
u64 wf_t = hl32_to_64 (w3[2], w3[3]);
u64x w0_t = hl32_to_64 (w0[0], w0[1]);
u64x w1_t = hl32_to_64 (w0[2], w0[3]);
u64x w2_t = hl32_to_64 (w1[0], w1[1]);
u64x w3_t = hl32_to_64 (w1[2], w1[3]);
u64x w4_t = hl32_to_64 (w2[0], w2[1]);
u64x w5_t = hl32_to_64 (w2[2], w2[3]);
u64x w6_t = hl32_to_64 (w3[0], w3[1]);
u64x w7_t = 0;
u64x w8_t = 0;
u64x w9_t = 0;
u64x wa_t = 0;
u64x wb_t = 0;
u64x wc_t = 0;
u64x wd_t = 0;
u64x we_t = 0;
u64x wf_t = hl32_to_64 (w3[2], w3[3]);
u64 a = digest[0];
u64 b = digest[1];
u64 c = digest[2];
u64 d = digest[3];
u64 e = digest[4];
u64 f = digest[5];
u64 g = digest[6];
u64 h = digest[7];
u64x a = digest[0];
u64x b = digest[1];
u64x c = digest[2];
u64x d = digest[3];
u64x e = digest[4];
u64x f = digest[5];
u64x g = digest[6];
u64x h = digest[7];
#define ROUND_EXPAND() \
{ \
@ -186,17 +186,6 @@ static void m01740m (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_le
const u32 pw_salt_len = pw_len + salt_len;
/**
* loop
*/
u32 w0l = w0[0];
for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++)
{
const u32 w0r = bfs_buf[il_pos].i;
w0[0] = w0l | w0r;
/**
* prepend salt
@ -207,24 +196,24 @@ static void m01740m (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_le
u32 w2_t[4];
u32 w3_t[4];
w0_t[0] = swap32 (w0[0]);
w0_t[1] = swap32 (w0[1]);
w0_t[2] = swap32 (w0[2]);
w0_t[3] = swap32 (w0[3]);
w1_t[0] = swap32 (w1[0]);
w1_t[1] = swap32 (w1[1]);
w1_t[2] = swap32 (w1[2]);
w1_t[3] = swap32 (w1[3]);
w2_t[0] = swap32 (w2[0]);
w2_t[1] = swap32 (w2[1]);
w2_t[2] = swap32 (w2[2]);
w2_t[3] = swap32 (w2[3]);
w3_t[0] = swap32 (w3[0]);
w3_t[1] = swap32 (w3[1]);
w3_t[2] = swap32 (w3[2]);
w3_t[3] = swap32 (w3[3]);
w0_t[0] = swap32_S (w0[0]);
w0_t[1] = swap32_S (w0[1]);
w0_t[2] = swap32_S (w0[2]);
w0_t[3] = swap32_S (w0[3]);
w1_t[0] = swap32_S (w1[0]);
w1_t[1] = swap32_S (w1[1]);
w1_t[2] = swap32_S (w1[2]);
w1_t[3] = swap32_S (w1[3]);
w2_t[0] = swap32_S (w2[0]);
w2_t[1] = swap32_S (w2[1]);
w2_t[2] = swap32_S (w2[2]);
w2_t[3] = swap32_S (w2[3]);
w3_t[0] = swap32_S (w3[0]);
w3_t[1] = swap32_S (w3[1]);
w3_t[2] = swap32_S (w3[2]);
w3_t[3] = swap32_S (w3[3]);
switch_buffer_by_offset (w0_t, w1_t, w2_t, w3_t, salt_len);
switch_buffer_by_offset_le_S (w0_t, w1_t, w2_t, w3_t, salt_len);
w0_t[0] |= salt_buf0[0];
w0_t[1] |= salt_buf0[1];
@ -240,6 +229,78 @@ static void m01740m (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_le
w2_t[3] |= salt_buf2[3];
w3_t[0] |= salt_buf3[0];
w3_t[1] |= salt_buf3[1];
w3_t[2] |= salt_buf3[2];
w3_t[3] |= salt_buf3[3];
w0_t[0] = swap32_S (w0_t[0]);
w0_t[1] = swap32_S (w0_t[1]);
w0_t[2] = swap32_S (w0_t[2]);
w0_t[3] = swap32_S (w0_t[3]);
w1_t[0] = swap32_S (w1_t[0]);
w1_t[1] = swap32_S (w1_t[1]);
w1_t[2] = swap32_S (w1_t[2]);
w1_t[3] = swap32_S (w1_t[3]);
w2_t[0] = swap32_S (w2_t[0]);
w2_t[1] = swap32_S (w2_t[1]);
w2_t[2] = swap32_S (w2_t[2]);
w2_t[3] = swap32_S (w2_t[3]);
w3_t[0] = swap32_S (w3_t[0]);
w3_t[1] = swap32_S (w3_t[1]);
w3_t[2] = swap32_S (w3_t[2]);
w3_t[3] = swap32_S (w3_t[3]);
/**
* loop
*/
u32 w0l = w0[0];
for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos += VECT_SIZE)
{
const u32x w0r = w0r_create_bft (bfs_buf, il_pos);
const u32x w0lr = w0l | w0r;
u32x wx[16];
wx[ 0] = w0_t[0];
wx[ 1] = w0_t[1];
wx[ 2] = w0_t[2];
wx[ 3] = w0_t[3];
wx[ 4] = w1_t[0];
wx[ 5] = w1_t[1];
wx[ 6] = w1_t[2];
wx[ 7] = w1_t[3];
wx[ 8] = w2_t[0];
wx[ 9] = w2_t[1];
wx[10] = w2_t[2];
wx[11] = w2_t[3];
wx[12] = w3_t[0];
wx[13] = w3_t[1];
wx[14] = w3_t[2];
wx[15] = w3_t[3];
overwrite_at_be (wx, w0lr, salt_len);
u32x w0_t[4];
u32x w1_t[4];
u32x w2_t[4];
u32x w3_t[4];
w0_t[0] = wx[ 0];
w0_t[1] = wx[ 1];
w0_t[2] = wx[ 2];
w0_t[3] = wx[ 3];
w1_t[0] = wx[ 4];
w1_t[1] = wx[ 5];
w1_t[2] = wx[ 6];
w1_t[3] = wx[ 7];
w2_t[0] = wx[ 8];
w2_t[1] = wx[ 9];
w2_t[2] = wx[10];
w2_t[3] = wx[11];
w3_t[0] = wx[12];
w3_t[1] = wx[13];
w3_t[2] = 0;
w3_t[3] = pw_salt_len * 8;
@ -247,24 +308,7 @@ static void m01740m (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_le
* sha512
*/
w0_t[0] = swap32 (w0_t[0]);
w0_t[1] = swap32 (w0_t[1]);
w0_t[2] = swap32 (w0_t[2]);
w0_t[3] = swap32 (w0_t[3]);
w1_t[0] = swap32 (w1_t[0]);
w1_t[1] = swap32 (w1_t[1]);
w1_t[2] = swap32 (w1_t[2]);
w1_t[3] = swap32 (w1_t[3]);
w2_t[0] = swap32 (w2_t[0]);
w2_t[1] = swap32 (w2_t[1]);
w2_t[2] = swap32 (w2_t[2]);
w2_t[3] = swap32 (w2_t[3]);
w3_t[0] = swap32 (w3_t[0]);
w3_t[1] = swap32 (w3_t[1]);
//w3_t[2] = swap32 (w3_t[2]);
//w3_t[3] = swap32 (w3_t[3]);
u64 digest[8];
u64x digest[8];
digest[0] = SHA512M_A;
digest[1] = SHA512M_B;
@ -277,13 +321,12 @@ static void m01740m (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_le
sha512_transform (w0_t, w1_t, w2_t, w3_t, digest);
const u32x r0 = l32_from_64 (digest[7]);
const u32x r1 = h32_from_64 (digest[7]);
const u32x r2 = l32_from_64 (digest[3]);
const u32x r3 = h32_from_64 (digest[3]);
const u32 r0 = l32_from_64 (digest[7]);
const u32 r1 = h32_from_64 (digest[7]);
const u32 r2 = l32_from_64 (digest[3]);
const u32 r3 = h32_from_64 (digest[3]);
#include COMPARE_M
COMPARE_M_SIMD (r0, r1, r2, r3);
}
}
@ -344,18 +387,6 @@ static void m01740s (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_le
const u32 pw_salt_len = pw_len + salt_len;
/**
* loop
*/
u32 w0l = w0[0];
for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++)
{
const u32 w0r = bfs_buf[il_pos].i;
w0[0] = w0l | w0r;
/**
* prepend salt
*/
@ -365,24 +396,24 @@ static void m01740s (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_le
u32 w2_t[4];
u32 w3_t[4];
w0_t[0] = swap32 (w0[0]);
w0_t[1] = swap32 (w0[1]);
w0_t[2] = swap32 (w0[2]);
w0_t[3] = swap32 (w0[3]);
w1_t[0] = swap32 (w1[0]);
w1_t[1] = swap32 (w1[1]);
w1_t[2] = swap32 (w1[2]);
w1_t[3] = swap32 (w1[3]);
w2_t[0] = swap32 (w2[0]);
w2_t[1] = swap32 (w2[1]);
w2_t[2] = swap32 (w2[2]);
w2_t[3] = swap32 (w2[3]);
w3_t[0] = swap32 (w3[0]);
w3_t[1] = swap32 (w3[1]);
w3_t[2] = swap32 (w3[2]);
w3_t[3] = swap32 (w3[3]);
w0_t[0] = swap32_S (w0[0]);
w0_t[1] = swap32_S (w0[1]);
w0_t[2] = swap32_S (w0[2]);
w0_t[3] = swap32_S (w0[3]);
w1_t[0] = swap32_S (w1[0]);
w1_t[1] = swap32_S (w1[1]);
w1_t[2] = swap32_S (w1[2]);
w1_t[3] = swap32_S (w1[3]);
w2_t[0] = swap32_S (w2[0]);
w2_t[1] = swap32_S (w2[1]);
w2_t[2] = swap32_S (w2[2]);
w2_t[3] = swap32_S (w2[3]);
w3_t[0] = swap32_S (w3[0]);
w3_t[1] = swap32_S (w3[1]);
w3_t[2] = swap32_S (w3[2]);
w3_t[3] = swap32_S (w3[3]);
switch_buffer_by_offset (w0_t, w1_t, w2_t, w3_t, salt_len);
switch_buffer_by_offset_le_S (w0_t, w1_t, w2_t, w3_t, salt_len);
w0_t[0] |= salt_buf0[0];
w0_t[1] |= salt_buf0[1];
@ -398,31 +429,86 @@ static void m01740s (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_le
w2_t[3] |= salt_buf2[3];
w3_t[0] |= salt_buf3[0];
w3_t[1] |= salt_buf3[1];
w3_t[2] |= salt_buf3[2];
w3_t[3] |= salt_buf3[3];
w0_t[0] = swap32_S (w0_t[0]);
w0_t[1] = swap32_S (w0_t[1]);
w0_t[2] = swap32_S (w0_t[2]);
w0_t[3] = swap32_S (w0_t[3]);
w1_t[0] = swap32_S (w1_t[0]);
w1_t[1] = swap32_S (w1_t[1]);
w1_t[2] = swap32_S (w1_t[2]);
w1_t[3] = swap32_S (w1_t[3]);
w2_t[0] = swap32_S (w2_t[0]);
w2_t[1] = swap32_S (w2_t[1]);
w2_t[2] = swap32_S (w2_t[2]);
w2_t[3] = swap32_S (w2_t[3]);
w3_t[0] = swap32_S (w3_t[0]);
w3_t[1] = swap32_S (w3_t[1]);
w3_t[2] = swap32_S (w3_t[2]);
w3_t[3] = swap32_S (w3_t[3]);
/**
* loop
*/
u32 w0l = w0[0];
for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos += VECT_SIZE)
{
const u32x w0r = w0r_create_bft (bfs_buf, il_pos);
const u32x w0lr = w0l | w0r;
/**
* prepend salt
*/
u32x wx[16];
wx[ 0] = w0_t[0];
wx[ 1] = w0_t[1];
wx[ 2] = w0_t[2];
wx[ 3] = w0_t[3];
wx[ 4] = w1_t[0];
wx[ 5] = w1_t[1];
wx[ 6] = w1_t[2];
wx[ 7] = w1_t[3];
wx[ 8] = w2_t[0];
wx[ 9] = w2_t[1];
wx[10] = w2_t[2];
wx[11] = w2_t[3];
wx[12] = w3_t[0];
wx[13] = w3_t[1];
wx[14] = w3_t[2];
wx[15] = w3_t[3];
overwrite_at_be (wx, w0lr, salt_len);
u32x w0_t[4];
u32x w1_t[4];
u32x w2_t[4];
u32x w3_t[4];
w0_t[0] = wx[ 0];
w0_t[1] = wx[ 1];
w0_t[2] = wx[ 2];
w0_t[3] = wx[ 3];
w1_t[0] = wx[ 4];
w1_t[1] = wx[ 5];
w1_t[2] = wx[ 6];
w1_t[3] = wx[ 7];
w2_t[0] = wx[ 8];
w2_t[1] = wx[ 9];
w2_t[2] = wx[10];
w2_t[3] = wx[11];
w3_t[0] = wx[12];
w3_t[1] = wx[13];
w3_t[2] = 0;
w3_t[3] = pw_salt_len * 8;
/**
* sha512
*/
w0_t[0] = swap32 (w0_t[0]);
w0_t[1] = swap32 (w0_t[1]);
w0_t[2] = swap32 (w0_t[2]);
w0_t[3] = swap32 (w0_t[3]);
w1_t[0] = swap32 (w1_t[0]);
w1_t[1] = swap32 (w1_t[1]);
w1_t[2] = swap32 (w1_t[2]);
w1_t[3] = swap32 (w1_t[3]);
w2_t[0] = swap32 (w2_t[0]);
w2_t[1] = swap32 (w2_t[1]);
w2_t[2] = swap32 (w2_t[2]);
w2_t[3] = swap32 (w2_t[3]);
w3_t[0] = swap32 (w3_t[0]);
w3_t[1] = swap32 (w3_t[1]);
//w3_t[2] = swap32 (w3_t[2]);
//w3_t[3] = swap32 (w3_t[3]);
u64 digest[8];
u64x digest[8];
digest[0] = SHA512M_A;
digest[1] = SHA512M_B;
@ -435,13 +521,12 @@ static void m01740s (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_le
sha512_transform (w0_t, w1_t, w2_t, w3_t, digest);
const u32x r0 = l32_from_64 (digest[7]);
const u32x r1 = h32_from_64 (digest[7]);
const u32x r2 = l32_from_64 (digest[3]);
const u32x r3 = h32_from_64 (digest[3]);
const u32 r0 = l32_from_64 (digest[7]);
const u32 r1 = h32_from_64 (digest[7]);
const u32 r2 = l32_from_64 (digest[3]);
const u32 r3 = h32_from_64 (digest[3]);
#include COMPARE_S
COMPARE_S_SIMD (r0, r1, r2, r3);
}
}

View File

@ -306,7 +306,7 @@ __kernel void m01750_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
if (combs_mode == COMBINATOR_MODE_BASE_RIGHT)
{
switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
switch_buffer_by_offset_le (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
}
/**
@ -369,7 +369,7 @@ __kernel void m01750_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
{
switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len);
switch_buffer_by_offset_le (wordr0, wordr1, wordr2, wordr3, pw_l_len);
}
u32 w0[4];
@ -524,7 +524,7 @@ __kernel void m01750_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
if (combs_mode == COMBINATOR_MODE_BASE_RIGHT)
{
switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
switch_buffer_by_offset_le (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
}
/**
@ -599,7 +599,7 @@ __kernel void m01750_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
{
switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len);
switch_buffer_by_offset_le (wordr0, wordr1, wordr2, wordr3, pw_l_len);
}
u32 w0[4];

View File

@ -5,6 +5,8 @@
#define _SHA512_
#define NEW_SIMD_CODE
#include "include/constants.h"
#include "include/kernel_vendor.h"
@ -16,9 +18,7 @@
#include "include/kernel_functions.c"
#include "OpenCL/types_ocl.c"
#include "OpenCL/common.c"
#define COMPARE_S "OpenCL/check_single_comp4.c"
#define COMPARE_M "OpenCL/check_multi_comp4.c"
#include "OpenCL/simd.c"
__constant u64 k_sha512[80] =
{
@ -44,33 +44,33 @@ __constant u64 k_sha512[80] =
SHA512C4c, SHA512C4d, SHA512C4e, SHA512C4f,
};
static void sha512_transform (const u64 w0[4], const u64 w1[4], const u64 w2[4], const u64 w3[4], u64 digest[8])
static void sha512_transform (const u64x w0[4], const u64x w1[4], const u64x w2[4], const u64x w3[4], u64x digest[8])
{
u64 w0_t = w0[0];
u64 w1_t = w0[1];
u64 w2_t = w0[2];
u64 w3_t = w0[3];
u64 w4_t = w1[0];
u64 w5_t = w1[1];
u64 w6_t = w1[2];
u64 w7_t = w1[3];
u64 w8_t = w2[0];
u64 w9_t = w2[1];
u64 wa_t = w2[2];
u64 wb_t = w2[3];
u64 wc_t = w3[0];
u64 wd_t = w3[1];
u64 we_t = w3[2];
u64 wf_t = w3[3];
u64x w0_t = w0[0];
u64x w1_t = w0[1];
u64x w2_t = w0[2];
u64x w3_t = w0[3];
u64x w4_t = w1[0];
u64x w5_t = w1[1];
u64x w6_t = w1[2];
u64x w7_t = w1[3];
u64x w8_t = w2[0];
u64x w9_t = w2[1];
u64x wa_t = w2[2];
u64x wb_t = w2[3];
u64x wc_t = w3[0];
u64x wd_t = w3[1];
u64x we_t = w3[2];
u64x wf_t = w3[3];
u64 a = digest[0];
u64 b = digest[1];
u64 c = digest[2];
u64 d = digest[3];
u64 e = digest[4];
u64 f = digest[5];
u64 g = digest[6];
u64 h = digest[7];
u64x a = digest[0];
u64x b = digest[1];
u64x c = digest[2];
u64x d = digest[3];
u64x e = digest[4];
u64x f = digest[5];
u64x g = digest[6];
u64x h = digest[7];
#define ROUND_EXPAND() \
{ \
@ -130,12 +130,12 @@ static void sha512_transform (const u64 w0[4], const u64 w1[4], const u64 w2[4],
digest[7] += h;
}
static void hmac_sha512_pad (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], u64 ipad[8], u64 opad[8])
static void hmac_sha512_pad (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u64x ipad[8], u64x opad[8])
{
u64 w0_t[4];
u64 w1_t[4];
u64 w2_t[4];
u64 w3_t[4];
u64x w0_t[4];
u64x w1_t[4];
u64x w2_t[4];
u64x w3_t[4];
w0_t[0] = hl32_to_64 (w0[0], w0[1]) ^ 0x3636363636363636;
w0_t[1] = hl32_to_64 (w0[2], w0[3]) ^ 0x3636363636363636;
@ -194,12 +194,12 @@ static void hmac_sha512_pad (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], u64 ipa
sha512_transform (w0_t, w1_t, w2_t, w3_t, opad);
}
static void hmac_sha512_run (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], u64 ipad[8], u64 opad[8], u64 digest[8])
static void hmac_sha512_run (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u64x ipad[8], u64x opad[8], u64x digest[8])
{
u64 w0_t[4];
u64 w1_t[4];
u64 w2_t[4];
u64 w3_t[4];
u64x w0_t[4];
u64x w1_t[4];
u64x w2_t[4];
u64x w3_t[4];
w0_t[0] = hl32_to_64 (w0[0], w0[1]);
w0_t[1] = hl32_to_64 (w0[2], w0[3]);
@ -293,46 +293,46 @@ static void m01750m (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_le
u32 w0l = w0[0];
for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++)
for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos += VECT_SIZE)
{
const u32 w0r = bfs_buf[il_pos].i;
const u32x w0r = w0r_create_bft (bfs_buf, il_pos);
w0[0] = w0l | w0r;
const u32x w0lr = w0l | w0r;
/**
* pads
*/
u32 w0_t[4];
u32x w0_t[4];
w0_t[0] = w0[0];
w0_t[0] = w0lr;
w0_t[1] = w0[1];
w0_t[2] = w0[2];
w0_t[3] = w0[3];
u32 w1_t[4];
u32x w1_t[4];
w1_t[0] = w1[0];
w1_t[1] = w1[1];
w1_t[2] = w1[2];
w1_t[3] = w1[3];
u32 w2_t[4];
u32x w2_t[4];
w2_t[0] = w2[0];
w2_t[1] = w2[1];
w2_t[2] = w2[2];
w2_t[3] = w2[3];
u32 w3_t[4];
u32x w3_t[4];
w3_t[0] = w3[0];
w3_t[1] = w3[1];
w3_t[2] = 0;
w3_t[3] = 0;
u64 ipad[8];
u64 opad[8];
u64x ipad[8];
u64x opad[8];
hmac_sha512_pad (w0_t, w1_t, w2_t, w3_t, ipad, opad);
@ -353,17 +353,16 @@ static void m01750m (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_le
w3_t[2] = 0;
w3_t[3] = (128 + salt_len) * 8;
u64 digest[8];
u64x digest[8];
hmac_sha512_run (w0_t, w1_t, w2_t, w3_t, ipad, opad, digest);
const u32x r0 = l32_from_64 (digest[7]);
const u32x r1 = h32_from_64 (digest[7]);
const u32x r2 = l32_from_64 (digest[3]);
const u32x r3 = h32_from_64 (digest[3]);
const u32 r0 = l32_from_64 (digest[7]);
const u32 r1 = h32_from_64 (digest[7]);
const u32 r2 = l32_from_64 (digest[3]);
const u32 r3 = h32_from_64 (digest[3]);
#include COMPARE_M
COMPARE_M_SIMD (r0, r1, r2, r3);
}
}
@ -414,46 +413,46 @@ static void m01750s (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_le
u32 w0l = w0[0];
for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++)
for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos += VECT_SIZE)
{
const u32 w0r = bfs_buf[il_pos].i;
const u32x w0r = w0r_create_bft (bfs_buf, il_pos);
w0[0] = w0l | w0r;
const u32x w0lr = w0l | w0r;
/**
* pads
*/
u32 w0_t[4];
u32x w0_t[4];
w0_t[0] = w0[0];
w0_t[0] = w0lr;
w0_t[1] = w0[1];
w0_t[2] = w0[2];
w0_t[3] = w0[3];
u32 w1_t[4];
u32x w1_t[4];
w1_t[0] = w1[0];
w1_t[1] = w1[1];
w1_t[2] = w1[2];
w1_t[3] = w1[3];
u32 w2_t[4];
u32x w2_t[4];
w2_t[0] = w2[0];
w2_t[1] = w2[1];
w2_t[2] = w2[2];
w2_t[3] = w2[3];
u32 w3_t[4];
u32x w3_t[4];
w3_t[0] = w3[0];
w3_t[1] = w3[1];
w3_t[2] = 0;
w3_t[3] = 0;
u64 ipad[8];
u64 opad[8];
u64x ipad[8];
u64x opad[8];
hmac_sha512_pad (w0_t, w1_t, w2_t, w3_t, ipad, opad);
@ -474,17 +473,16 @@ static void m01750s (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_le
w3_t[2] = 0;
w3_t[3] = (128 + salt_len) * 8;
u64 digest[8];
u64x digest[8];
hmac_sha512_run (w0_t, w1_t, w2_t, w3_t, ipad, opad, digest);
const u32x r0 = l32_from_64 (digest[7]);
const u32x r1 = h32_from_64 (digest[7]);
const u32x r2 = l32_from_64 (digest[3]);
const u32x r3 = h32_from_64 (digest[3]);
const u32 r0 = l32_from_64 (digest[7]);
const u32 r1 = h32_from_64 (digest[7]);
const u32 r2 = l32_from_64 (digest[3]);
const u32 r3 = h32_from_64 (digest[3]);
#include COMPARE_S
COMPARE_S_SIMD (r0, r1, r2, r3);
}
}

View File

@ -306,7 +306,7 @@ __kernel void m01760_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
if (combs_mode == COMBINATOR_MODE_BASE_RIGHT)
{
switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
switch_buffer_by_offset_le (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
}
/**
@ -404,7 +404,7 @@ __kernel void m01760_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
{
switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len);
switch_buffer_by_offset_le (wordr0, wordr1, wordr2, wordr3, pw_l_len);
}
u32 w0[4];
@ -524,7 +524,7 @@ __kernel void m01760_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
if (combs_mode == COMBINATOR_MODE_BASE_RIGHT)
{
switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
switch_buffer_by_offset_le (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
}
/**
@ -634,7 +634,7 @@ __kernel void m01760_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
{
switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len);
switch_buffer_by_offset_le (wordr0, wordr1, wordr2, wordr3, pw_l_len);
}
u32 w0[4];

View File

@ -5,6 +5,8 @@
#define _SHA512_
#define NEW_SIMD_CODE
#include "include/constants.h"
#include "include/kernel_vendor.h"
@ -16,9 +18,7 @@
#include "include/kernel_functions.c"
#include "OpenCL/types_ocl.c"
#include "OpenCL/common.c"
#define COMPARE_S "OpenCL/check_single_comp4.c"
#define COMPARE_M "OpenCL/check_multi_comp4.c"
#include "OpenCL/simd.c"
__constant u64 k_sha512[80] =
{
@ -44,33 +44,33 @@ __constant u64 k_sha512[80] =
SHA512C4c, SHA512C4d, SHA512C4e, SHA512C4f,
};
static void sha512_transform (const u64 w0[4], const u64 w1[4], const u64 w2[4], const u64 w3[4], u64 digest[8])
static void sha512_transform (const u64x w0[4], const u64x w1[4], const u64x w2[4], const u64x w3[4], u64x digest[8])
{
u64 w0_t = w0[0];
u64 w1_t = w0[1];
u64 w2_t = w0[2];
u64 w3_t = w0[3];
u64 w4_t = w1[0];
u64 w5_t = w1[1];
u64 w6_t = w1[2];
u64 w7_t = w1[3];
u64 w8_t = w2[0];
u64 w9_t = w2[1];
u64 wa_t = w2[2];
u64 wb_t = w2[3];
u64 wc_t = w3[0];
u64 wd_t = w3[1];
u64 we_t = w3[2];
u64 wf_t = w3[3];
u64x w0_t = w0[0];
u64x w1_t = w0[1];
u64x w2_t = w0[2];
u64x w3_t = w0[3];
u64x w4_t = w1[0];
u64x w5_t = w1[1];
u64x w6_t = w1[2];
u64x w7_t = w1[3];
u64x w8_t = w2[0];
u64x w9_t = w2[1];
u64x wa_t = w2[2];
u64x wb_t = w2[3];
u64x wc_t = w3[0];
u64x wd_t = w3[1];
u64x we_t = w3[2];
u64x wf_t = w3[3];
u64 a = digest[0];
u64 b = digest[1];
u64 c = digest[2];
u64 d = digest[3];
u64 e = digest[4];
u64 f = digest[5];
u64 g = digest[6];
u64 h = digest[7];
u64x a = digest[0];
u64x b = digest[1];
u64x c = digest[2];
u64x d = digest[3];
u64x e = digest[4];
u64x f = digest[5];
u64x g = digest[6];
u64x h = digest[7];
#define ROUND_EXPAND() \
{ \
@ -130,12 +130,12 @@ static void sha512_transform (const u64 w0[4], const u64 w1[4], const u64 w2[4],
digest[7] += h;
}
static void hmac_sha512_pad (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], u64 ipad[8], u64 opad[8])
static void hmac_sha512_pad (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u64x ipad[8], u64x opad[8])
{
u64 w0_t[4];
u64 w1_t[4];
u64 w2_t[4];
u64 w3_t[4];
u64x w0_t[4];
u64x w1_t[4];
u64x w2_t[4];
u64x w3_t[4];
w0_t[0] = hl32_to_64 (w0[0], w0[1]) ^ 0x3636363636363636;
w0_t[1] = hl32_to_64 (w0[2], w0[3]) ^ 0x3636363636363636;
@ -145,14 +145,14 @@ static void hmac_sha512_pad (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], u64 ipa
w1_t[1] = hl32_to_64 (w2[2], w2[3]) ^ 0x3636363636363636;
w1_t[2] = hl32_to_64 (w3[0], w3[1]) ^ 0x3636363636363636;
w1_t[3] = hl32_to_64 (w3[2], w3[3]) ^ 0x3636363636363636;
w2_t[0] = 0 ^ 0x3636363636363636;
w2_t[1] = 0 ^ 0x3636363636363636;
w2_t[2] = 0 ^ 0x3636363636363636;
w2_t[3] = 0 ^ 0x3636363636363636;
w3_t[0] = 0 ^ 0x3636363636363636;
w3_t[1] = 0 ^ 0x3636363636363636;
w3_t[2] = 0 ^ 0x3636363636363636;
w3_t[3] = 0 ^ 0x3636363636363636;
w2_t[0] = 0x3636363636363636;
w2_t[1] = 0x3636363636363636;
w2_t[2] = 0x3636363636363636;
w2_t[3] = 0x3636363636363636;
w3_t[0] = 0x3636363636363636;
w3_t[1] = 0x3636363636363636;
w3_t[2] = 0x3636363636363636;
w3_t[3] = 0x3636363636363636;
ipad[0] = SHA512M_A;
ipad[1] = SHA512M_B;
@ -173,14 +173,14 @@ static void hmac_sha512_pad (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], u64 ipa
w1_t[1] = hl32_to_64 (w2[2], w2[3]) ^ 0x5c5c5c5c5c5c5c5c;
w1_t[2] = hl32_to_64 (w3[0], w3[1]) ^ 0x5c5c5c5c5c5c5c5c;
w1_t[3] = hl32_to_64 (w3[2], w3[3]) ^ 0x5c5c5c5c5c5c5c5c;
w2_t[0] = 0 ^ 0x5c5c5c5c5c5c5c5c;
w2_t[1] = 0 ^ 0x5c5c5c5c5c5c5c5c;
w2_t[2] = 0 ^ 0x5c5c5c5c5c5c5c5c;
w2_t[3] = 0 ^ 0x5c5c5c5c5c5c5c5c;
w3_t[0] = 0 ^ 0x5c5c5c5c5c5c5c5c;
w3_t[1] = 0 ^ 0x5c5c5c5c5c5c5c5c;
w3_t[2] = 0 ^ 0x5c5c5c5c5c5c5c5c;
w3_t[3] = 0 ^ 0x5c5c5c5c5c5c5c5c;
w2_t[0] = 0x5c5c5c5c5c5c5c5c;
w2_t[1] = 0x5c5c5c5c5c5c5c5c;
w2_t[2] = 0x5c5c5c5c5c5c5c5c;
w2_t[3] = 0x5c5c5c5c5c5c5c5c;
w3_t[0] = 0x5c5c5c5c5c5c5c5c;
w3_t[1] = 0x5c5c5c5c5c5c5c5c;
w3_t[2] = 0x5c5c5c5c5c5c5c5c;
w3_t[3] = 0x5c5c5c5c5c5c5c5c;
opad[0] = SHA512M_A;
opad[1] = SHA512M_B;
@ -194,12 +194,12 @@ static void hmac_sha512_pad (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], u64 ipa
sha512_transform (w0_t, w1_t, w2_t, w3_t, opad);
}
static void hmac_sha512_run (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], u64 ipad[8], u64 opad[8], u64 digest[8])
static void hmac_sha512_run (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u64x ipad[8], u64x opad[8], u64x digest[8])
{
u64 w0_t[4];
u64 w1_t[4];
u64 w2_t[4];
u64 w3_t[4];
u64x w0_t[4];
u64x w1_t[4];
u64x w2_t[4];
u64x w3_t[4];
w0_t[0] = hl32_to_64 (w0[0], w0[1]);
w0_t[1] = hl32_to_64 (w0[2], w0[3]);
@ -289,36 +289,36 @@ static void m01760m (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_le
* pads
*/
u32 w0_t[4];
u32x w0_t[4];
w0_t[0] = swap32 (salt_buf0[0]);
w0_t[1] = swap32 (salt_buf0[1]);
w0_t[2] = swap32 (salt_buf0[2]);
w0_t[3] = swap32 (salt_buf0[3]);
u32 w1_t[4];
u32x w1_t[4];
w1_t[0] = swap32 (salt_buf1[0]);
w1_t[1] = swap32 (salt_buf1[1]);
w1_t[2] = swap32 (salt_buf1[2]);
w1_t[3] = swap32 (salt_buf1[3]);
u32 w2_t[4];
u32x w2_t[4];
w2_t[0] = 0;
w2_t[1] = 0;
w2_t[2] = 0;
w2_t[3] = 0;
u32 w3_t[4];
u32x w3_t[4];
w3_t[0] = 0;
w3_t[1] = 0;
w3_t[2] = 0;
w3_t[3] = 0;
u64 ipad[8];
u64 opad[8];
u64x ipad[8];
u64x opad[8];
hmac_sha512_pad (w0_t, w1_t, w2_t, w3_t, ipad, opad);
@ -328,13 +328,13 @@ static void m01760m (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_le
u32 w0l = w0[0];
for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++)
for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos += VECT_SIZE)
{
const u32 w0r = bfs_buf[il_pos].i;
const u32x w0r = w0r_create_bft (bfs_buf, il_pos);
w0[0] = w0l | w0r;
const u32x w0lr = w0l | w0r;
w0_t[0] = w0[0];
w0_t[0] = w0lr;
w0_t[1] = w0[1];
w0_t[2] = w0[2];
w0_t[3] = w0[3];
@ -351,17 +351,16 @@ static void m01760m (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_le
w3_t[2] = 0;
w3_t[3] = (128 + pw_len) * 8;
u64 digest[8];
u64x digest[8];
hmac_sha512_run (w0_t, w1_t, w2_t, w3_t, ipad, opad, digest);
const u32x r0 = l32_from_64 (digest[7]);
const u32x r1 = h32_from_64 (digest[7]);
const u32x r2 = l32_from_64 (digest[3]);
const u32x r3 = h32_from_64 (digest[3]);
const u32 r0 = l32_from_64 (digest[7]);
const u32 r1 = h32_from_64 (digest[7]);
const u32 r2 = l32_from_64 (digest[3]);
const u32 r3 = h32_from_64 (digest[3]);
#include COMPARE_M
COMPARE_M_SIMD (r0, r1, r2, r3);
}
}
@ -396,36 +395,36 @@ static void m01760s (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_le
* pads
*/
u32 w0_t[4];
u32x w0_t[4];
w0_t[0] = swap32 (salt_buf0[0]);
w0_t[1] = swap32 (salt_buf0[1]);
w0_t[2] = swap32 (salt_buf0[2]);
w0_t[3] = swap32 (salt_buf0[3]);
u32 w1_t[4];
u32x w1_t[4];
w1_t[0] = swap32 (salt_buf1[0]);
w1_t[1] = swap32 (salt_buf1[1]);
w1_t[2] = swap32 (salt_buf1[2]);
w1_t[3] = swap32 (salt_buf1[3]);
u32 w2_t[4];
u32x w2_t[4];
w2_t[0] = 0;
w2_t[1] = 0;
w2_t[2] = 0;
w2_t[3] = 0;
u32 w3_t[4];
u32x w3_t[4];
w3_t[0] = 0;
w3_t[1] = 0;
w3_t[2] = 0;
w3_t[3] = 0;
u64 ipad[8];
u64 opad[8];
u64x ipad[8];
u64x opad[8];
hmac_sha512_pad (w0_t, w1_t, w2_t, w3_t, ipad, opad);
@ -447,13 +446,13 @@ static void m01760s (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_le
u32 w0l = w0[0];
for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++)
for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos += VECT_SIZE)
{
const u32 w0r = bfs_buf[il_pos].i;
const u32x w0r = w0r_create_bft (bfs_buf, il_pos);
w0[0] = w0l | w0r;
const u32x w0lr = w0l | w0r;
w0_t[0] = w0[0];
w0_t[0] = w0lr;
w0_t[1] = w0[1];
w0_t[2] = w0[2];
w0_t[3] = w0[3];
@ -470,17 +469,16 @@ static void m01760s (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_le
w3_t[2] = 0;
w3_t[3] = (128 + pw_len) * 8;
u64 digest[8];
u64x digest[8];
hmac_sha512_run (w0_t, w1_t, w2_t, w3_t, ipad, opad, digest);
const u32x r0 = l32_from_64 (digest[7]);
const u32x r1 = h32_from_64 (digest[7]);
const u32x r2 = l32_from_64 (digest[3]);
const u32x r3 = h32_from_64 (digest[3]);
const u32 r0 = l32_from_64 (digest[7]);
const u32 r1 = h32_from_64 (digest[7]);
const u32 r2 = l32_from_64 (digest[3]);
const u32 r3 = h32_from_64 (digest[3]);
#include COMPARE_S
COMPARE_S_SIMD (r0, r1, r2, r3);
}
}

View File

@ -68,7 +68,7 @@ __kernel void m02400_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
if (combs_mode == COMBINATOR_MODE_BASE_RIGHT)
{
switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
switch_buffer_by_offset_le (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
}
/**
@ -111,7 +111,7 @@ __kernel void m02400_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
{
switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len);
switch_buffer_by_offset_le (wordr0, wordr1, wordr2, wordr3, pw_l_len);
}
u32 w0[4];
@ -292,7 +292,7 @@ __kernel void m02400_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
if (combs_mode == COMBINATOR_MODE_BASE_RIGHT)
{
switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
switch_buffer_by_offset_le (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
}
/**
@ -347,7 +347,7 @@ __kernel void m02400_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
{
switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len);
switch_buffer_by_offset_le (wordr0, wordr1, wordr2, wordr3, pw_l_len);
}
u32 w0[4];

View File

@ -5,6 +5,8 @@
#define _MD5_
#define NEW_SIMD_CODE
#include "include/constants.h"
#include "include/kernel_vendor.h"
@ -16,11 +18,9 @@
#include "include/kernel_functions.c"
#include "OpenCL/types_ocl.c"
#include "OpenCL/common.c"
#include "OpenCL/simd.c"
#define COMPARE_S "OpenCL/check_single_comp4.c"
#define COMPARE_M "OpenCL/check_multi_comp4.c"
static void m02400m (u32 w[16], const u32 pw_len, __global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset)
static void m02400m (u32 w[16], const u32 pw_len, __global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset)
{
/**
* modifier
@ -114,18 +114,18 @@ static void m02400m (u32 w[16], const u32 pw_len, __global pw_t *pws, __global k
u32 w0l = w[0];
for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++)
for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos += VECT_SIZE)
{
const u32 w0r = words_buf_r[il_pos];
const u32x w0r = words_buf_r[il_pos / VECT_SIZE];
const u32 w0 = w0l | w0r;
const u32x w0 = w0l | w0r;
u32 tmp2;
u32x tmp2;
u32 a = MD5M_A;
u32 b = MD5M_B;
u32 c = MD5M_C;
u32 d = MD5M_D;
u32x a = MD5M_A;
u32x b = MD5M_B;
u32x c = MD5M_C;
u32x d = MD5M_D;
MD5_STEP (MD5_Fo, a, b, c, d, w0, F_w0c00, MD5S00);
MD5_STEP0(MD5_Fo, d, a, b, c, F_w1c01, MD5S01);
@ -200,16 +200,11 @@ static void m02400m (u32 w[16], const u32 pw_len, __global pw_t *pws, __global k
c &= 0x00ffffff;
b &= 0x00ffffff;
const u32 r0 = a;
const u32 r1 = d;
const u32 r2 = c;
const u32 r3 = b;
#include COMPARE_M
COMPARE_M_SIMD (a, d, c, b);
}
}
static void m02400s (u32 w[16], const u32 pw_len, __global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset)
static void m02400s (u32 w[16], const u32 pw_len, __global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset)
{
/**
* modifier
@ -315,18 +310,18 @@ static void m02400s (u32 w[16], const u32 pw_len, __global pw_t *pws, __global k
u32 w0l = w[0];
for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++)
for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos += VECT_SIZE)
{
const u32 w0r = words_buf_r[il_pos];
const u32x w0r = words_buf_r[il_pos / VECT_SIZE];
const u32 w0 = w0l | w0r;
const u32x w0 = w0l | w0r;
u32 tmp2;
u32x tmp2;
u32 a = MD5M_A;
u32 b = MD5M_B;
u32 c = MD5M_C;
u32 d = MD5M_D;
u32x a = MD5M_A;
u32x b = MD5M_B;
u32x c = MD5M_C;
u32x d = MD5M_D;
MD5_STEP (MD5_Fo, a, b, c, d, w0, F_w0c00, MD5S00);
MD5_STEP0(MD5_Fo, d, a, b, c, F_w1c01, MD5S01);
@ -393,9 +388,7 @@ static void m02400s (u32 w[16], const u32 pw_len, __global pw_t *pws, __global k
MD5_STEP0(MD5_I , b, c, d, a, I_wdc3b, MD5S33);
MD5_STEP0(MD5_I , a, b, c, d, I_w4c3c, MD5S30);
bool q_cond = allx ((a & 0x00ffffff) != search[0]);
if (q_cond) continue;
if (MATCHES_NONE_VS ((a & 0x00ffffff), search[0])) continue;
MD5_STEP0(MD5_I , d, a, b, c, I_wbc3d, MD5S31);
MD5_STEP0(MD5_I , c, d, a, b, I_w2c3e, MD5S32);
@ -406,16 +399,11 @@ static void m02400s (u32 w[16], const u32 pw_len, __global pw_t *pws, __global k
c &= 0x00ffffff;
b &= 0x00ffffff;
const u32 r0 = a;
const u32 r1 = d;
const u32 r2 = c;
const u32 r3 = b;
#include COMPARE_S
COMPARE_S_SIMD (a, d, c, b);
}
}
__kernel void m02400_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
__kernel void m02400_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
{
/**
* base
@ -453,15 +441,15 @@ __kernel void m02400_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
m02400m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset);
}
__kernel void m02400_m08 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
__kernel void m02400_m08 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
{
}
__kernel void m02400_m16 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
__kernel void m02400_m16 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
{
}
__kernel void m02400_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
__kernel void m02400_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
{
/**
* base
@ -499,10 +487,10 @@ __kernel void m02400_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
m02400s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset);
}
__kernel void m02400_s08 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
__kernel void m02400_s08 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
{
}
__kernel void m02400_s16 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
__kernel void m02400_s16 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
{
}

View File

@ -128,7 +128,7 @@ __kernel void m02410_m04 (__global pw_t *pws, __global kernel_rule_t * rules_bu
s3[2] = 0;
s3[3] = 0;
switch_buffer_by_offset (s0, s1, s2, s3, out_len);
switch_buffer_by_offset_le (s0, s1, s2, s3, out_len);
w0[0] |= s0[0];
w0[1] |= s0[1];
@ -357,7 +357,7 @@ __kernel void m02410_s04 (__global pw_t *pws, __global kernel_rule_t * rules_bu
s3[2] = 0;
s3[3] = 0;
switch_buffer_by_offset (s0, s1, s2, s3, out_len);
switch_buffer_by_offset_le (s0, s1, s2, s3, out_len);
w0[0] |= s0[0];
w0[1] |= s0[1];

View File

@ -68,7 +68,7 @@ __kernel void m02410_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
if (combs_mode == COMBINATOR_MODE_BASE_RIGHT)
{
switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
switch_buffer_by_offset_le (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
}
/**
@ -124,7 +124,7 @@ __kernel void m02410_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
{
switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len);
switch_buffer_by_offset_le (wordr0, wordr1, wordr2, wordr3, pw_l_len);
}
/**
@ -159,7 +159,7 @@ __kernel void m02410_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
s3[2] = 0;
s3[3] = 0;
switch_buffer_by_offset (s0, s1, s2, s3, pw_len);
switch_buffer_by_offset_le (s0, s1, s2, s3, pw_len);
const u32 pw_salt_len = pw_len + salt_len;
@ -341,7 +341,7 @@ __kernel void m02410_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
if (combs_mode == COMBINATOR_MODE_BASE_RIGHT)
{
switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
switch_buffer_by_offset_le (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
}
/**
@ -409,7 +409,7 @@ __kernel void m02410_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
{
switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len);
switch_buffer_by_offset_le (wordr0, wordr1, wordr2, wordr3, pw_l_len);
}
/**
@ -444,7 +444,7 @@ __kernel void m02410_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
s3[2] = 0;
s3[3] = 0;
switch_buffer_by_offset (s0, s1, s2, s3, pw_len);
switch_buffer_by_offset_le (s0, s1, s2, s3, pw_len);
const u32 pw_salt_len = pw_len + salt_len;

View File

@ -5,6 +5,8 @@
#define _MD5_
#define NEW_SIMD_CODE
#include "include/constants.h"
#include "include/kernel_vendor.h"
@ -16,11 +18,9 @@
#include "include/kernel_functions.c"
#include "OpenCL/types_ocl.c"
#include "OpenCL/common.c"
#include "OpenCL/simd.c"
#define COMPARE_S "OpenCL/check_single_comp4.c"
#define COMPARE_M "OpenCL/check_multi_comp4.c"
static void m02410m (u32 w[16], const u32 pw_len, __global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset)
static void m02410m (u32 w[16], const u32 pw_len, __global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset)
{
/**
* modifier
@ -61,7 +61,7 @@ static void m02410m (u32 w[16], const u32 pw_len, __global pw_t *pws, __global k
salt_buf3[2] = 0;
salt_buf3[3] = 0;
switch_buffer_by_offset (salt_buf0, salt_buf1, salt_buf2, salt_buf3, pw_len);
switch_buffer_by_offset_le_S (salt_buf0, salt_buf1, salt_buf2, salt_buf3, pw_len);
w[0] |= salt_buf0[0];
w[1] |= salt_buf0[1];
@ -72,7 +72,7 @@ static void m02410m (u32 w[16], const u32 pw_len, __global pw_t *pws, __global k
const u32 pw_salt_len = pw_len + salt_len;
truncate_block (w, pw_salt_len);
truncate_block_S (w, pw_salt_len);
/**
* algorithm specific
@ -159,18 +159,18 @@ static void m02410m (u32 w[16], const u32 pw_len, __global pw_t *pws, __global k
u32 w0l = w[0];
for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++)
for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos += VECT_SIZE)
{
const u32 w0r = words_buf_r[il_pos];
const u32x w0r = words_buf_r[il_pos / VECT_SIZE];
const u32 w0 = w0l | w0r;
const u32x w0 = w0l | w0r;
u32 tmp2;
u32x tmp2;
u32 a = MD5M_A;
u32 b = MD5M_B;
u32 c = MD5M_C;
u32 d = MD5M_D;
u32x a = MD5M_A;
u32x b = MD5M_B;
u32x c = MD5M_C;
u32x d = MD5M_D;
MD5_STEP (MD5_Fo, a, b, c, d, w0, F_w0c00, MD5S00);
MD5_STEP0(MD5_Fo, d, a, b, c, F_w1c01, MD5S01);
@ -245,16 +245,11 @@ static void m02410m (u32 w[16], const u32 pw_len, __global pw_t *pws, __global k
c &= 0x00ffffff;
b &= 0x00ffffff;
const u32 r0 = a;
const u32 r1 = d;
const u32 r2 = c;
const u32 r3 = b;
#include COMPARE_M
COMPARE_M_SIMD (a, d, c, b);
}
}
static void m02410s (u32 w[16], const u32 pw_len, __global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset)
static void m02410s (u32 w[16], const u32 pw_len, __global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset)
{
/**
* modifier
@ -295,7 +290,7 @@ static void m02410s (u32 w[16], const u32 pw_len, __global pw_t *pws, __global k
salt_buf3[2] = 0;
salt_buf3[3] = 0;
switch_buffer_by_offset (salt_buf0, salt_buf1, salt_buf2, salt_buf3, pw_len);
switch_buffer_by_offset_le_S (salt_buf0, salt_buf1, salt_buf2, salt_buf3, pw_len);
w[0] |= salt_buf0[0];
w[1] |= salt_buf0[1];
@ -306,7 +301,7 @@ static void m02410s (u32 w[16], const u32 pw_len, __global pw_t *pws, __global k
const u32 pw_salt_len = pw_len + salt_len;
truncate_block (w, pw_salt_len);
truncate_block_S (w, pw_salt_len);
/**
* algorithm specific
@ -405,18 +400,18 @@ static void m02410s (u32 w[16], const u32 pw_len, __global pw_t *pws, __global k
u32 w0l = w[0];
for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++)
for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos += VECT_SIZE)
{
const u32 w0r = words_buf_r[il_pos];
const u32x w0r = words_buf_r[il_pos / VECT_SIZE];
const u32 w0 = w0l | w0r;
const u32x w0 = w0l | w0r;
u32 tmp2;
u32x tmp2;
u32 a = MD5M_A;
u32 b = MD5M_B;
u32 c = MD5M_C;
u32 d = MD5M_D;
u32x a = MD5M_A;
u32x b = MD5M_B;
u32x c = MD5M_C;
u32x d = MD5M_D;
MD5_STEP (MD5_Fo, a, b, c, d, w0, F_w0c00, MD5S00);
MD5_STEP0(MD5_Fo, d, a, b, c, F_w1c01, MD5S01);
@ -483,9 +478,7 @@ static void m02410s (u32 w[16], const u32 pw_len, __global pw_t *pws, __global k
MD5_STEP0(MD5_I , b, c, d, a, I_wdc3b, MD5S33);
MD5_STEP0(MD5_I , a, b, c, d, I_w4c3c, MD5S30);
bool q_cond = allx ((a & 0x00ffffff) != search[0]);
if (q_cond) continue;
if (MATCHES_NONE_VS ((a & 0x00ffffff), search[0])) continue;
MD5_STEP0(MD5_I , d, a, b, c, I_wbc3d, MD5S31);
MD5_STEP0(MD5_I , c, d, a, b, I_w2c3e, MD5S32);
@ -496,16 +489,11 @@ static void m02410s (u32 w[16], const u32 pw_len, __global pw_t *pws, __global k
c &= 0x00ffffff;
b &= 0x00ffffff;
const u32 r0 = a;
const u32 r1 = d;
const u32 r2 = c;
const u32 r3 = b;
#include COMPARE_S
COMPARE_S_SIMD (a, d, c, b);
}
}
__kernel void m02410_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
__kernel void m02410_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
{
/**
* base
@ -543,15 +531,15 @@ __kernel void m02410_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
m02410m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset);
}
__kernel void m02410_m08 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
__kernel void m02410_m08 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
{
}
__kernel void m02410_m16 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
__kernel void m02410_m16 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
{
}
__kernel void m02410_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
__kernel void m02410_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
{
/**
* base
@ -589,10 +577,10 @@ __kernel void m02410_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
m02410s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset);
}
__kernel void m02410_s08 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
__kernel void m02410_s08 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
{
}
__kernel void m02410_s16 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
__kernel void m02410_s16 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __constant u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
{
}

View File

@ -70,7 +70,7 @@ __kernel void m02610_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
{
append_0x80_2x4 (wordl0, wordl1, pw_l_len);
switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
switch_buffer_by_offset_le (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
}
/**
@ -149,7 +149,7 @@ __kernel void m02610_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
{
switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len);
switch_buffer_by_offset_le (wordr0, wordr1, wordr2, wordr3, pw_l_len);
}
u32 w0[4];
@ -422,7 +422,7 @@ __kernel void m02610_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
{
append_0x80_2x4 (wordl0, wordl1, pw_l_len);
switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
switch_buffer_by_offset_le (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
}
/**
@ -513,7 +513,7 @@ __kernel void m02610_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
{
switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len);
switch_buffer_by_offset_le (wordr0, wordr1, wordr2, wordr3, pw_l_len);
}
u32 w0[4];

View File

@ -5,6 +5,8 @@
#define _MD5_
#define NEW_SIMD_CODE
#include "include/constants.h"
#include "include/kernel_vendor.h"
@ -16,11 +18,17 @@
#include "include/kernel_functions.c"
#include "OpenCL/types_ocl.c"
#include "OpenCL/common.c"
#include "OpenCL/simd.c"
#define COMPARE_S "OpenCL/check_single_comp4.c"
#define COMPARE_M "OpenCL/check_multi_comp4.c"
#define uint_to_hex_lower8(i) l_bin2asc[(i)]
#if VECT_SIZE == 1
#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i)])
#elif VECT_SIZE == 2
#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
#elif VECT_SIZE == 4
#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
#elif VECT_SIZE == 8
#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
#endif
static void m02610m (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_len, __global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, __local u32 l_bin2asc[256])
{
@ -54,195 +62,219 @@ static void m02610m (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_le
u32 w0l = w0[0];
for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++)
for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos += VECT_SIZE)
{
const u32 w0r = bfs_buf[il_pos].i;
const u32x w0r = w0r_create_bft (bfs_buf, il_pos);
w0[0] = w0l | w0r;
const u32x w0lr = w0l | w0r;
u32 a = MD5M_A;
u32 b = MD5M_B;
u32 c = MD5M_C;
u32 d = MD5M_D;
u32x w0_t[4];
MD5_STEP (MD5_Fo, a, b, c, d, w0[0], MD5C00, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w0[1], MD5C01, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, w0[2], MD5C02, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, w0[3], MD5C03, MD5S03);
MD5_STEP (MD5_Fo, a, b, c, d, w1[0], MD5C04, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w1[1], MD5C05, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, w1[2], MD5C06, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, w1[3], MD5C07, MD5S03);
MD5_STEP (MD5_Fo, a, b, c, d, w2[0], MD5C08, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w2[1], MD5C09, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, w2[2], MD5C0a, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, w2[3], MD5C0b, MD5S03);
MD5_STEP (MD5_Fo, a, b, c, d, w3[0], MD5C0c, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w3[1], MD5C0d, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, w3[2], MD5C0e, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, w3[3], MD5C0f, MD5S03);
w0_t[0] = w0lr;
w0_t[1] = w0[1];
w0_t[2] = w0[2];
w0_t[3] = w0[3];
MD5_STEP (MD5_Go, a, b, c, d, w0[1], MD5C10, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, w1[2], MD5C11, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, w2[3], MD5C12, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w0[0], MD5C13, MD5S13);
MD5_STEP (MD5_Go, a, b, c, d, w1[1], MD5C14, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, w2[2], MD5C15, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, w3[3], MD5C16, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w1[0], MD5C17, MD5S13);
MD5_STEP (MD5_Go, a, b, c, d, w2[1], MD5C18, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, w3[2], MD5C19, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, w0[3], MD5C1a, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w2[0], MD5C1b, MD5S13);
MD5_STEP (MD5_Go, a, b, c, d, w3[1], MD5C1c, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, w0[2], MD5C1d, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, w1[3], MD5C1e, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w3[0], MD5C1f, MD5S13);
u32x w1_t[4];
MD5_STEP (MD5_H , a, b, c, d, w1[1], MD5C20, MD5S20);
MD5_STEP (MD5_H , d, a, b, c, w2[0], MD5C21, MD5S21);
MD5_STEP (MD5_H , c, d, a, b, w2[3], MD5C22, MD5S22);
MD5_STEP (MD5_H , b, c, d, a, w3[2], MD5C23, MD5S23);
MD5_STEP (MD5_H , a, b, c, d, w0[1], MD5C24, MD5S20);
MD5_STEP (MD5_H , d, a, b, c, w1[0], MD5C25, MD5S21);
MD5_STEP (MD5_H , c, d, a, b, w1[3], MD5C26, MD5S22);
MD5_STEP (MD5_H , b, c, d, a, w2[2], MD5C27, MD5S23);
MD5_STEP (MD5_H , a, b, c, d, w3[1], MD5C28, MD5S20);
MD5_STEP (MD5_H , d, a, b, c, w0[0], MD5C29, MD5S21);
MD5_STEP (MD5_H , c, d, a, b, w0[3], MD5C2a, MD5S22);
MD5_STEP (MD5_H , b, c, d, a, w1[2], MD5C2b, MD5S23);
MD5_STEP (MD5_H , a, b, c, d, w2[1], MD5C2c, MD5S20);
MD5_STEP (MD5_H , d, a, b, c, w3[0], MD5C2d, MD5S21);
MD5_STEP (MD5_H , c, d, a, b, w3[3], MD5C2e, MD5S22);
MD5_STEP (MD5_H , b, c, d, a, w0[2], MD5C2f, MD5S23);
w1_t[0] = w1[0];
w1_t[1] = w1[1];
w1_t[2] = w1[2];
w1_t[3] = w1[3];
MD5_STEP (MD5_I , a, b, c, d, w0[0], MD5C30, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, w1[3], MD5C31, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, w3[2], MD5C32, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w1[1], MD5C33, MD5S33);
MD5_STEP (MD5_I , a, b, c, d, w3[0], MD5C34, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, w0[3], MD5C35, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, w2[2], MD5C36, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w0[1], MD5C37, MD5S33);
MD5_STEP (MD5_I , a, b, c, d, w2[0], MD5C38, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, w3[3], MD5C39, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, w1[2], MD5C3a, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w3[1], MD5C3b, MD5S33);
MD5_STEP (MD5_I , a, b, c, d, w1[0], MD5C3c, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, w2[3], MD5C3d, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, w0[2], MD5C3e, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w2[1], MD5C3f, MD5S33);
u32x w2_t[4];
w2_t[0] = w2[0];
w2_t[1] = w2[1];
w2_t[2] = w2[2];
w2_t[3] = w2[3];
u32x w3_t[4];
w3_t[0] = w3[0];
w3_t[1] = w3[1];
w3_t[2] = w3[2];
w3_t[3] = w3[3];
u32x a = MD5M_A;
u32x b = MD5M_B;
u32x c = MD5M_C;
u32x d = MD5M_D;
MD5_STEP (MD5_Fo, a, b, c, d, w0_t[0], MD5C00, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w0_t[1], MD5C01, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, w0_t[2], MD5C02, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, w0_t[3], MD5C03, MD5S03);
MD5_STEP (MD5_Fo, a, b, c, d, w1_t[0], MD5C04, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w1_t[1], MD5C05, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, w1_t[2], MD5C06, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, w1_t[3], MD5C07, MD5S03);
MD5_STEP (MD5_Fo, a, b, c, d, w2_t[0], MD5C08, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w2_t[1], MD5C09, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, w2_t[2], MD5C0a, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, w2_t[3], MD5C0b, MD5S03);
MD5_STEP (MD5_Fo, a, b, c, d, w3_t[0], MD5C0c, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w3_t[1], MD5C0d, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, w3_t[2], MD5C0e, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, w3_t[3], MD5C0f, MD5S03);
MD5_STEP (MD5_Go, a, b, c, d, w0_t[1], MD5C10, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, w1_t[2], MD5C11, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, w2_t[3], MD5C12, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w0_t[0], MD5C13, MD5S13);
MD5_STEP (MD5_Go, a, b, c, d, w1_t[1], MD5C14, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, w2_t[2], MD5C15, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, w3_t[3], MD5C16, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w1_t[0], MD5C17, MD5S13);
MD5_STEP (MD5_Go, a, b, c, d, w2_t[1], MD5C18, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, w3_t[2], MD5C19, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, w0_t[3], MD5C1a, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w2_t[0], MD5C1b, MD5S13);
MD5_STEP (MD5_Go, a, b, c, d, w3_t[1], MD5C1c, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, w0_t[2], MD5C1d, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, w1_t[3], MD5C1e, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w3_t[0], MD5C1f, MD5S13);
MD5_STEP (MD5_H , a, b, c, d, w1_t[1], MD5C20, MD5S20);
MD5_STEP (MD5_H , d, a, b, c, w2_t[0], MD5C21, MD5S21);
MD5_STEP (MD5_H , c, d, a, b, w2_t[3], MD5C22, MD5S22);
MD5_STEP (MD5_H , b, c, d, a, w3_t[2], MD5C23, MD5S23);
MD5_STEP (MD5_H , a, b, c, d, w0_t[1], MD5C24, MD5S20);
MD5_STEP (MD5_H , d, a, b, c, w1_t[0], MD5C25, MD5S21);
MD5_STEP (MD5_H , c, d, a, b, w1_t[3], MD5C26, MD5S22);
MD5_STEP (MD5_H , b, c, d, a, w2_t[2], MD5C27, MD5S23);
MD5_STEP (MD5_H , a, b, c, d, w3_t[1], MD5C28, MD5S20);
MD5_STEP (MD5_H , d, a, b, c, w0_t[0], MD5C29, MD5S21);
MD5_STEP (MD5_H , c, d, a, b, w0_t[3], MD5C2a, MD5S22);
MD5_STEP (MD5_H , b, c, d, a, w1_t[2], MD5C2b, MD5S23);
MD5_STEP (MD5_H , a, b, c, d, w2_t[1], MD5C2c, MD5S20);
MD5_STEP (MD5_H , d, a, b, c, w3_t[0], MD5C2d, MD5S21);
MD5_STEP (MD5_H , c, d, a, b, w3_t[3], MD5C2e, MD5S22);
MD5_STEP (MD5_H , b, c, d, a, w0_t[2], MD5C2f, MD5S23);
MD5_STEP (MD5_I , a, b, c, d, w0_t[0], MD5C30, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, w1_t[3], MD5C31, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, w3_t[2], MD5C32, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w1_t[1], MD5C33, MD5S33);
MD5_STEP (MD5_I , a, b, c, d, w3_t[0], MD5C34, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, w0_t[3], MD5C35, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, w2_t[2], MD5C36, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w0_t[1], MD5C37, MD5S33);
MD5_STEP (MD5_I , a, b, c, d, w2_t[0], MD5C38, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, w3_t[3], MD5C39, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, w1_t[2], MD5C3a, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w3_t[1], MD5C3b, MD5S33);
MD5_STEP (MD5_I , a, b, c, d, w1_t[0], MD5C3c, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, w2_t[3], MD5C3d, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, w0_t[2], MD5C3e, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w2_t[1], MD5C3f, MD5S33);
a += MD5M_A;
b += MD5M_B;
c += MD5M_C;
d += MD5M_D;
const u32 w0_t = uint_to_hex_lower8 ((a >> 0) & 255) << 0
w0_t[0] = uint_to_hex_lower8 ((a >> 0) & 255) << 0
| uint_to_hex_lower8 ((a >> 8) & 255) << 16;
const u32 w1_t = uint_to_hex_lower8 ((a >> 16) & 255) << 0
w0_t[1] = uint_to_hex_lower8 ((a >> 16) & 255) << 0
| uint_to_hex_lower8 ((a >> 24) & 255) << 16;
const u32 w2_t = uint_to_hex_lower8 ((b >> 0) & 255) << 0
w0_t[2] = uint_to_hex_lower8 ((b >> 0) & 255) << 0
| uint_to_hex_lower8 ((b >> 8) & 255) << 16;
const u32 w3_t = uint_to_hex_lower8 ((b >> 16) & 255) << 0
w0_t[3] = uint_to_hex_lower8 ((b >> 16) & 255) << 0
| uint_to_hex_lower8 ((b >> 24) & 255) << 16;
const u32 w4_t = uint_to_hex_lower8 ((c >> 0) & 255) << 0
w1_t[0] = uint_to_hex_lower8 ((c >> 0) & 255) << 0
| uint_to_hex_lower8 ((c >> 8) & 255) << 16;
const u32 w5_t = uint_to_hex_lower8 ((c >> 16) & 255) << 0
w1_t[1] = uint_to_hex_lower8 ((c >> 16) & 255) << 0
| uint_to_hex_lower8 ((c >> 24) & 255) << 16;
const u32 w6_t = uint_to_hex_lower8 ((d >> 0) & 255) << 0
w1_t[2] = uint_to_hex_lower8 ((d >> 0) & 255) << 0
| uint_to_hex_lower8 ((d >> 8) & 255) << 16;
const u32 w7_t = uint_to_hex_lower8 ((d >> 16) & 255) << 0
w1_t[3] = uint_to_hex_lower8 ((d >> 16) & 255) << 0
| uint_to_hex_lower8 ((d >> 24) & 255) << 16;
const u32 w8_t = s[0];
const u32 w9_t = s[1];
const u32 wa_t = s[2];
const u32 wb_t = s[3];
const u32 wc_t = s[4];
const u32 wd_t = s[5];
const u32 we_t = s[6];
const u32 wf_t = s[7];
w2_t[0] = s[0];
w2_t[1] = s[1];
w2_t[2] = s[2];
w2_t[3] = s[3];
w3_t[0] = s[4];
w3_t[1] = s[5];
w3_t[2] = s[6];
w3_t[3] = s[7];
a = MD5M_A;
b = MD5M_B;
c = MD5M_C;
d = MD5M_D;
MD5_STEP (MD5_Fo, a, b, c, d, w0_t, MD5C00, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w1_t, MD5C01, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, w2_t, MD5C02, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, w3_t, MD5C03, MD5S03);
MD5_STEP (MD5_Fo, a, b, c, d, w4_t, MD5C04, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w5_t, MD5C05, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, w6_t, MD5C06, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, w7_t, MD5C07, MD5S03);
MD5_STEP (MD5_Fo, a, b, c, d, w8_t, MD5C08, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w9_t, MD5C09, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, wa_t, MD5C0a, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, wb_t, MD5C0b, MD5S03);
MD5_STEP (MD5_Fo, a, b, c, d, wc_t, MD5C0c, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, wd_t, MD5C0d, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, we_t, MD5C0e, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, wf_t, MD5C0f, MD5S03);
MD5_STEP (MD5_Fo, a, b, c, d, w0_t[0], MD5C00, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w0_t[1], MD5C01, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, w0_t[2], MD5C02, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, w0_t[3], MD5C03, MD5S03);
MD5_STEP (MD5_Fo, a, b, c, d, w1_t[0], MD5C04, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w1_t[1], MD5C05, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, w1_t[2], MD5C06, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, w1_t[3], MD5C07, MD5S03);
MD5_STEP (MD5_Fo, a, b, c, d, w2_t[0], MD5C08, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w2_t[1], MD5C09, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, w2_t[2], MD5C0a, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, w2_t[3], MD5C0b, MD5S03);
MD5_STEP (MD5_Fo, a, b, c, d, w3_t[0], MD5C0c, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w3_t[1], MD5C0d, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, w3_t[2], MD5C0e, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, w3_t[3], MD5C0f, MD5S03);
MD5_STEP (MD5_Go, a, b, c, d, w1_t, MD5C10, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, w6_t, MD5C11, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, wb_t, MD5C12, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w0_t, MD5C13, MD5S13);
MD5_STEP (MD5_Go, a, b, c, d, w5_t, MD5C14, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, wa_t, MD5C15, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, wf_t, MD5C16, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w4_t, MD5C17, MD5S13);
MD5_STEP (MD5_Go, a, b, c, d, w9_t, MD5C18, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, we_t, MD5C19, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, w3_t, MD5C1a, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w8_t, MD5C1b, MD5S13);
MD5_STEP (MD5_Go, a, b, c, d, wd_t, MD5C1c, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, w2_t, MD5C1d, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, w7_t, MD5C1e, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, wc_t, MD5C1f, MD5S13);
MD5_STEP (MD5_Go, a, b, c, d, w0_t[1], MD5C10, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, w1_t[2], MD5C11, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, w2_t[3], MD5C12, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w0_t[0], MD5C13, MD5S13);
MD5_STEP (MD5_Go, a, b, c, d, w1_t[1], MD5C14, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, w2_t[2], MD5C15, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, w3_t[3], MD5C16, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w1_t[0], MD5C17, MD5S13);
MD5_STEP (MD5_Go, a, b, c, d, w2_t[1], MD5C18, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, w3_t[2], MD5C19, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, w0_t[3], MD5C1a, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w2_t[0], MD5C1b, MD5S13);
MD5_STEP (MD5_Go, a, b, c, d, w3_t[1], MD5C1c, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, w0_t[2], MD5C1d, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, w1_t[3], MD5C1e, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w3_t[0], MD5C1f, MD5S13);
MD5_STEP (MD5_H , a, b, c, d, w5_t, MD5C20, MD5S20);
MD5_STEP (MD5_H , d, a, b, c, w8_t, MD5C21, MD5S21);
MD5_STEP (MD5_H , c, d, a, b, wb_t, MD5C22, MD5S22);
MD5_STEP (MD5_H , b, c, d, a, we_t, MD5C23, MD5S23);
MD5_STEP (MD5_H , a, b, c, d, w1_t, MD5C24, MD5S20);
MD5_STEP (MD5_H , d, a, b, c, w4_t, MD5C25, MD5S21);
MD5_STEP (MD5_H , c, d, a, b, w7_t, MD5C26, MD5S22);
MD5_STEP (MD5_H , b, c, d, a, wa_t, MD5C27, MD5S23);
MD5_STEP (MD5_H , a, b, c, d, wd_t, MD5C28, MD5S20);
MD5_STEP (MD5_H , d, a, b, c, w0_t, MD5C29, MD5S21);
MD5_STEP (MD5_H , c, d, a, b, w3_t, MD5C2a, MD5S22);
MD5_STEP (MD5_H , b, c, d, a, w6_t, MD5C2b, MD5S23);
MD5_STEP (MD5_H , a, b, c, d, w9_t, MD5C2c, MD5S20);
MD5_STEP (MD5_H , d, a, b, c, wc_t, MD5C2d, MD5S21);
MD5_STEP (MD5_H , c, d, a, b, wf_t, MD5C2e, MD5S22);
MD5_STEP (MD5_H , b, c, d, a, w2_t, MD5C2f, MD5S23);
MD5_STEP (MD5_H , a, b, c, d, w1_t[1], MD5C20, MD5S20);
MD5_STEP (MD5_H , d, a, b, c, w2_t[0], MD5C21, MD5S21);
MD5_STEP (MD5_H , c, d, a, b, w2_t[3], MD5C22, MD5S22);
MD5_STEP (MD5_H , b, c, d, a, w3_t[2], MD5C23, MD5S23);
MD5_STEP (MD5_H , a, b, c, d, w0_t[1], MD5C24, MD5S20);
MD5_STEP (MD5_H , d, a, b, c, w1_t[0], MD5C25, MD5S21);
MD5_STEP (MD5_H , c, d, a, b, w1_t[3], MD5C26, MD5S22);
MD5_STEP (MD5_H , b, c, d, a, w2_t[2], MD5C27, MD5S23);
MD5_STEP (MD5_H , a, b, c, d, w3_t[1], MD5C28, MD5S20);
MD5_STEP (MD5_H , d, a, b, c, w0_t[0], MD5C29, MD5S21);
MD5_STEP (MD5_H , c, d, a, b, w0_t[3], MD5C2a, MD5S22);
MD5_STEP (MD5_H , b, c, d, a, w1_t[2], MD5C2b, MD5S23);
MD5_STEP (MD5_H , a, b, c, d, w2_t[1], MD5C2c, MD5S20);
MD5_STEP (MD5_H , d, a, b, c, w3_t[0], MD5C2d, MD5S21);
MD5_STEP (MD5_H , c, d, a, b, w3_t[3], MD5C2e, MD5S22);
MD5_STEP (MD5_H , b, c, d, a, w0_t[2], MD5C2f, MD5S23);
MD5_STEP (MD5_I , a, b, c, d, w0_t, MD5C30, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, w7_t, MD5C31, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, we_t, MD5C32, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w5_t, MD5C33, MD5S33);
MD5_STEP (MD5_I , a, b, c, d, wc_t, MD5C34, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, w3_t, MD5C35, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, wa_t, MD5C36, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w1_t, MD5C37, MD5S33);
MD5_STEP (MD5_I , a, b, c, d, w8_t, MD5C38, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, wf_t, MD5C39, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, w6_t, MD5C3a, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, wd_t, MD5C3b, MD5S33);
MD5_STEP (MD5_I , a, b, c, d, w4_t, MD5C3c, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, wb_t, MD5C3d, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, w2_t, MD5C3e, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w9_t, MD5C3f, MD5S33);
MD5_STEP (MD5_I , a, b, c, d, w0_t[0], MD5C30, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, w1_t[3], MD5C31, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, w3_t[2], MD5C32, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w1_t[1], MD5C33, MD5S33);
MD5_STEP (MD5_I , a, b, c, d, w3_t[0], MD5C34, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, w0_t[3], MD5C35, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, w2_t[2], MD5C36, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w0_t[1], MD5C37, MD5S33);
MD5_STEP (MD5_I , a, b, c, d, w2_t[0], MD5C38, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, w3_t[3], MD5C39, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, w1_t[2], MD5C3a, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w3_t[1], MD5C3b, MD5S33);
MD5_STEP (MD5_I , a, b, c, d, w1_t[0], MD5C3c, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, w2_t[3], MD5C3d, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, w0_t[2], MD5C3e, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w2_t[1], MD5C3f, MD5S33);
const u32 r0 = a;
const u32 r1 = d;
const u32 r2 = c;
const u32 r3 = b;
#include COMPARE_M
COMPARE_M_SIMD (a, d, c, b);
}
}
@ -290,195 +322,219 @@ static void m02610s (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_le
u32 w0l = w0[0];
for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++)
for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos += VECT_SIZE)
{
const u32 w0r = bfs_buf[il_pos].i;
const u32x w0r = w0r_create_bft (bfs_buf, il_pos);
w0[0] = w0l | w0r;
const u32x w0lr = w0l | w0r;
u32 a = MD5M_A;
u32 b = MD5M_B;
u32 c = MD5M_C;
u32 d = MD5M_D;
u32x w0_t[4];
MD5_STEP (MD5_Fo, a, b, c, d, w0[0], MD5C00, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w0[1], MD5C01, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, w0[2], MD5C02, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, w0[3], MD5C03, MD5S03);
MD5_STEP (MD5_Fo, a, b, c, d, w1[0], MD5C04, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w1[1], MD5C05, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, w1[2], MD5C06, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, w1[3], MD5C07, MD5S03);
MD5_STEP (MD5_Fo, a, b, c, d, w2[0], MD5C08, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w2[1], MD5C09, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, w2[2], MD5C0a, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, w2[3], MD5C0b, MD5S03);
MD5_STEP (MD5_Fo, a, b, c, d, w3[0], MD5C0c, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w3[1], MD5C0d, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, w3[2], MD5C0e, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, w3[3], MD5C0f, MD5S03);
w0_t[0] = w0lr;
w0_t[1] = w0[1];
w0_t[2] = w0[2];
w0_t[3] = w0[3];
MD5_STEP (MD5_Go, a, b, c, d, w0[1], MD5C10, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, w1[2], MD5C11, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, w2[3], MD5C12, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w0[0], MD5C13, MD5S13);
MD5_STEP (MD5_Go, a, b, c, d, w1[1], MD5C14, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, w2[2], MD5C15, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, w3[3], MD5C16, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w1[0], MD5C17, MD5S13);
MD5_STEP (MD5_Go, a, b, c, d, w2[1], MD5C18, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, w3[2], MD5C19, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, w0[3], MD5C1a, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w2[0], MD5C1b, MD5S13);
MD5_STEP (MD5_Go, a, b, c, d, w3[1], MD5C1c, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, w0[2], MD5C1d, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, w1[3], MD5C1e, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w3[0], MD5C1f, MD5S13);
u32x w1_t[4];
MD5_STEP (MD5_H , a, b, c, d, w1[1], MD5C20, MD5S20);
MD5_STEP (MD5_H , d, a, b, c, w2[0], MD5C21, MD5S21);
MD5_STEP (MD5_H , c, d, a, b, w2[3], MD5C22, MD5S22);
MD5_STEP (MD5_H , b, c, d, a, w3[2], MD5C23, MD5S23);
MD5_STEP (MD5_H , a, b, c, d, w0[1], MD5C24, MD5S20);
MD5_STEP (MD5_H , d, a, b, c, w1[0], MD5C25, MD5S21);
MD5_STEP (MD5_H , c, d, a, b, w1[3], MD5C26, MD5S22);
MD5_STEP (MD5_H , b, c, d, a, w2[2], MD5C27, MD5S23);
MD5_STEP (MD5_H , a, b, c, d, w3[1], MD5C28, MD5S20);
MD5_STEP (MD5_H , d, a, b, c, w0[0], MD5C29, MD5S21);
MD5_STEP (MD5_H , c, d, a, b, w0[3], MD5C2a, MD5S22);
MD5_STEP (MD5_H , b, c, d, a, w1[2], MD5C2b, MD5S23);
MD5_STEP (MD5_H , a, b, c, d, w2[1], MD5C2c, MD5S20);
MD5_STEP (MD5_H , d, a, b, c, w3[0], MD5C2d, MD5S21);
MD5_STEP (MD5_H , c, d, a, b, w3[3], MD5C2e, MD5S22);
MD5_STEP (MD5_H , b, c, d, a, w0[2], MD5C2f, MD5S23);
w1_t[0] = w1[0];
w1_t[1] = w1[1];
w1_t[2] = w1[2];
w1_t[3] = w1[3];
MD5_STEP (MD5_I , a, b, c, d, w0[0], MD5C30, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, w1[3], MD5C31, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, w3[2], MD5C32, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w1[1], MD5C33, MD5S33);
MD5_STEP (MD5_I , a, b, c, d, w3[0], MD5C34, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, w0[3], MD5C35, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, w2[2], MD5C36, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w0[1], MD5C37, MD5S33);
MD5_STEP (MD5_I , a, b, c, d, w2[0], MD5C38, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, w3[3], MD5C39, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, w1[2], MD5C3a, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w3[1], MD5C3b, MD5S33);
MD5_STEP (MD5_I , a, b, c, d, w1[0], MD5C3c, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, w2[3], MD5C3d, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, w0[2], MD5C3e, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w2[1], MD5C3f, MD5S33);
u32x w2_t[4];
w2_t[0] = w2[0];
w2_t[1] = w2[1];
w2_t[2] = w2[2];
w2_t[3] = w2[3];
u32x w3_t[4];
w3_t[0] = w3[0];
w3_t[1] = w3[1];
w3_t[2] = w3[2];
w3_t[3] = w3[3];
u32x a = MD5M_A;
u32x b = MD5M_B;
u32x c = MD5M_C;
u32x d = MD5M_D;
MD5_STEP (MD5_Fo, a, b, c, d, w0_t[0], MD5C00, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w0_t[1], MD5C01, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, w0_t[2], MD5C02, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, w0_t[3], MD5C03, MD5S03);
MD5_STEP (MD5_Fo, a, b, c, d, w1_t[0], MD5C04, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w1_t[1], MD5C05, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, w1_t[2], MD5C06, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, w1_t[3], MD5C07, MD5S03);
MD5_STEP (MD5_Fo, a, b, c, d, w2_t[0], MD5C08, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w2_t[1], MD5C09, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, w2_t[2], MD5C0a, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, w2_t[3], MD5C0b, MD5S03);
MD5_STEP (MD5_Fo, a, b, c, d, w3_t[0], MD5C0c, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w3_t[1], MD5C0d, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, w3_t[2], MD5C0e, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, w3_t[3], MD5C0f, MD5S03);
MD5_STEP (MD5_Go, a, b, c, d, w0_t[1], MD5C10, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, w1_t[2], MD5C11, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, w2_t[3], MD5C12, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w0_t[0], MD5C13, MD5S13);
MD5_STEP (MD5_Go, a, b, c, d, w1_t[1], MD5C14, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, w2_t[2], MD5C15, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, w3_t[3], MD5C16, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w1_t[0], MD5C17, MD5S13);
MD5_STEP (MD5_Go, a, b, c, d, w2_t[1], MD5C18, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, w3_t[2], MD5C19, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, w0_t[3], MD5C1a, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w2_t[0], MD5C1b, MD5S13);
MD5_STEP (MD5_Go, a, b, c, d, w3_t[1], MD5C1c, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, w0_t[2], MD5C1d, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, w1_t[3], MD5C1e, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w3_t[0], MD5C1f, MD5S13);
MD5_STEP (MD5_H , a, b, c, d, w1_t[1], MD5C20, MD5S20);
MD5_STEP (MD5_H , d, a, b, c, w2_t[0], MD5C21, MD5S21);
MD5_STEP (MD5_H , c, d, a, b, w2_t[3], MD5C22, MD5S22);
MD5_STEP (MD5_H , b, c, d, a, w3_t[2], MD5C23, MD5S23);
MD5_STEP (MD5_H , a, b, c, d, w0_t[1], MD5C24, MD5S20);
MD5_STEP (MD5_H , d, a, b, c, w1_t[0], MD5C25, MD5S21);
MD5_STEP (MD5_H , c, d, a, b, w1_t[3], MD5C26, MD5S22);
MD5_STEP (MD5_H , b, c, d, a, w2_t[2], MD5C27, MD5S23);
MD5_STEP (MD5_H , a, b, c, d, w3_t[1], MD5C28, MD5S20);
MD5_STEP (MD5_H , d, a, b, c, w0_t[0], MD5C29, MD5S21);
MD5_STEP (MD5_H , c, d, a, b, w0_t[3], MD5C2a, MD5S22);
MD5_STEP (MD5_H , b, c, d, a, w1_t[2], MD5C2b, MD5S23);
MD5_STEP (MD5_H , a, b, c, d, w2_t[1], MD5C2c, MD5S20);
MD5_STEP (MD5_H , d, a, b, c, w3_t[0], MD5C2d, MD5S21);
MD5_STEP (MD5_H , c, d, a, b, w3_t[3], MD5C2e, MD5S22);
MD5_STEP (MD5_H , b, c, d, a, w0_t[2], MD5C2f, MD5S23);
MD5_STEP (MD5_I , a, b, c, d, w0_t[0], MD5C30, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, w1_t[3], MD5C31, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, w3_t[2], MD5C32, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w1_t[1], MD5C33, MD5S33);
MD5_STEP (MD5_I , a, b, c, d, w3_t[0], MD5C34, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, w0_t[3], MD5C35, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, w2_t[2], MD5C36, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w0_t[1], MD5C37, MD5S33);
MD5_STEP (MD5_I , a, b, c, d, w2_t[0], MD5C38, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, w3_t[3], MD5C39, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, w1_t[2], MD5C3a, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w3_t[1], MD5C3b, MD5S33);
MD5_STEP (MD5_I , a, b, c, d, w1_t[0], MD5C3c, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, w2_t[3], MD5C3d, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, w0_t[2], MD5C3e, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w2_t[1], MD5C3f, MD5S33);
a += MD5M_A;
b += MD5M_B;
c += MD5M_C;
d += MD5M_D;
const u32 w0_t = uint_to_hex_lower8 ((a >> 0) & 255) << 0
w0_t[0] = uint_to_hex_lower8 ((a >> 0) & 255) << 0
| uint_to_hex_lower8 ((a >> 8) & 255) << 16;
const u32 w1_t = uint_to_hex_lower8 ((a >> 16) & 255) << 0
w0_t[1] = uint_to_hex_lower8 ((a >> 16) & 255) << 0
| uint_to_hex_lower8 ((a >> 24) & 255) << 16;
const u32 w2_t = uint_to_hex_lower8 ((b >> 0) & 255) << 0
w0_t[2] = uint_to_hex_lower8 ((b >> 0) & 255) << 0
| uint_to_hex_lower8 ((b >> 8) & 255) << 16;
const u32 w3_t = uint_to_hex_lower8 ((b >> 16) & 255) << 0
w0_t[3] = uint_to_hex_lower8 ((b >> 16) & 255) << 0
| uint_to_hex_lower8 ((b >> 24) & 255) << 16;
const u32 w4_t = uint_to_hex_lower8 ((c >> 0) & 255) << 0
w1_t[0] = uint_to_hex_lower8 ((c >> 0) & 255) << 0
| uint_to_hex_lower8 ((c >> 8) & 255) << 16;
const u32 w5_t = uint_to_hex_lower8 ((c >> 16) & 255) << 0
w1_t[1] = uint_to_hex_lower8 ((c >> 16) & 255) << 0
| uint_to_hex_lower8 ((c >> 24) & 255) << 16;
const u32 w6_t = uint_to_hex_lower8 ((d >> 0) & 255) << 0
w1_t[2] = uint_to_hex_lower8 ((d >> 0) & 255) << 0
| uint_to_hex_lower8 ((d >> 8) & 255) << 16;
const u32 w7_t = uint_to_hex_lower8 ((d >> 16) & 255) << 0
w1_t[3] = uint_to_hex_lower8 ((d >> 16) & 255) << 0
| uint_to_hex_lower8 ((d >> 24) & 255) << 16;
const u32 w8_t = s[0];
const u32 w9_t = s[1];
const u32 wa_t = s[2];
const u32 wb_t = s[3];
const u32 wc_t = s[4];
const u32 wd_t = s[5];
const u32 we_t = s[6];
const u32 wf_t = s[7];
w2_t[0] = s[0];
w2_t[1] = s[1];
w2_t[2] = s[2];
w2_t[3] = s[3];
w3_t[0] = s[4];
w3_t[1] = s[5];
w3_t[2] = s[6];
w3_t[3] = s[7];
a = MD5M_A;
b = MD5M_B;
c = MD5M_C;
d = MD5M_D;
MD5_STEP (MD5_Fo, a, b, c, d, w0_t, MD5C00, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w1_t, MD5C01, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, w2_t, MD5C02, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, w3_t, MD5C03, MD5S03);
MD5_STEP (MD5_Fo, a, b, c, d, w4_t, MD5C04, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w5_t, MD5C05, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, w6_t, MD5C06, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, w7_t, MD5C07, MD5S03);
MD5_STEP (MD5_Fo, a, b, c, d, w8_t, MD5C08, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w9_t, MD5C09, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, wa_t, MD5C0a, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, wb_t, MD5C0b, MD5S03);
MD5_STEP (MD5_Fo, a, b, c, d, wc_t, MD5C0c, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, wd_t, MD5C0d, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, we_t, MD5C0e, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, wf_t, MD5C0f, MD5S03);
MD5_STEP (MD5_Fo, a, b, c, d, w0_t[0], MD5C00, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w0_t[1], MD5C01, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, w0_t[2], MD5C02, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, w0_t[3], MD5C03, MD5S03);
MD5_STEP (MD5_Fo, a, b, c, d, w1_t[0], MD5C04, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w1_t[1], MD5C05, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, w1_t[2], MD5C06, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, w1_t[3], MD5C07, MD5S03);
MD5_STEP (MD5_Fo, a, b, c, d, w2_t[0], MD5C08, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w2_t[1], MD5C09, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, w2_t[2], MD5C0a, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, w2_t[3], MD5C0b, MD5S03);
MD5_STEP (MD5_Fo, a, b, c, d, w3_t[0], MD5C0c, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w3_t[1], MD5C0d, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, w3_t[2], MD5C0e, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, w3_t[3], MD5C0f, MD5S03);
MD5_STEP (MD5_Go, a, b, c, d, w1_t, MD5C10, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, w6_t, MD5C11, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, wb_t, MD5C12, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w0_t, MD5C13, MD5S13);
MD5_STEP (MD5_Go, a, b, c, d, w5_t, MD5C14, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, wa_t, MD5C15, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, wf_t, MD5C16, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w4_t, MD5C17, MD5S13);
MD5_STEP (MD5_Go, a, b, c, d, w9_t, MD5C18, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, we_t, MD5C19, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, w3_t, MD5C1a, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w8_t, MD5C1b, MD5S13);
MD5_STEP (MD5_Go, a, b, c, d, wd_t, MD5C1c, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, w2_t, MD5C1d, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, w7_t, MD5C1e, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, wc_t, MD5C1f, MD5S13);
MD5_STEP (MD5_Go, a, b, c, d, w0_t[1], MD5C10, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, w1_t[2], MD5C11, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, w2_t[3], MD5C12, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w0_t[0], MD5C13, MD5S13);
MD5_STEP (MD5_Go, a, b, c, d, w1_t[1], MD5C14, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, w2_t[2], MD5C15, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, w3_t[3], MD5C16, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w1_t[0], MD5C17, MD5S13);
MD5_STEP (MD5_Go, a, b, c, d, w2_t[1], MD5C18, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, w3_t[2], MD5C19, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, w0_t[3], MD5C1a, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w2_t[0], MD5C1b, MD5S13);
MD5_STEP (MD5_Go, a, b, c, d, w3_t[1], MD5C1c, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, w0_t[2], MD5C1d, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, w1_t[3], MD5C1e, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w3_t[0], MD5C1f, MD5S13);
MD5_STEP (MD5_H , a, b, c, d, w5_t, MD5C20, MD5S20);
MD5_STEP (MD5_H , d, a, b, c, w8_t, MD5C21, MD5S21);
MD5_STEP (MD5_H , c, d, a, b, wb_t, MD5C22, MD5S22);
MD5_STEP (MD5_H , b, c, d, a, we_t, MD5C23, MD5S23);
MD5_STEP (MD5_H , a, b, c, d, w1_t, MD5C24, MD5S20);
MD5_STEP (MD5_H , d, a, b, c, w4_t, MD5C25, MD5S21);
MD5_STEP (MD5_H , c, d, a, b, w7_t, MD5C26, MD5S22);
MD5_STEP (MD5_H , b, c, d, a, wa_t, MD5C27, MD5S23);
MD5_STEP (MD5_H , a, b, c, d, wd_t, MD5C28, MD5S20);
MD5_STEP (MD5_H , d, a, b, c, w0_t, MD5C29, MD5S21);
MD5_STEP (MD5_H , c, d, a, b, w3_t, MD5C2a, MD5S22);
MD5_STEP (MD5_H , b, c, d, a, w6_t, MD5C2b, MD5S23);
MD5_STEP (MD5_H , a, b, c, d, w9_t, MD5C2c, MD5S20);
MD5_STEP (MD5_H , d, a, b, c, wc_t, MD5C2d, MD5S21);
MD5_STEP (MD5_H , c, d, a, b, wf_t, MD5C2e, MD5S22);
MD5_STEP (MD5_H , b, c, d, a, w2_t, MD5C2f, MD5S23);
MD5_STEP (MD5_H , a, b, c, d, w1_t[1], MD5C20, MD5S20);
MD5_STEP (MD5_H , d, a, b, c, w2_t[0], MD5C21, MD5S21);
MD5_STEP (MD5_H , c, d, a, b, w2_t[3], MD5C22, MD5S22);
MD5_STEP (MD5_H , b, c, d, a, w3_t[2], MD5C23, MD5S23);
MD5_STEP (MD5_H , a, b, c, d, w0_t[1], MD5C24, MD5S20);
MD5_STEP (MD5_H , d, a, b, c, w1_t[0], MD5C25, MD5S21);
MD5_STEP (MD5_H , c, d, a, b, w1_t[3], MD5C26, MD5S22);
MD5_STEP (MD5_H , b, c, d, a, w2_t[2], MD5C27, MD5S23);
MD5_STEP (MD5_H , a, b, c, d, w3_t[1], MD5C28, MD5S20);
MD5_STEP (MD5_H , d, a, b, c, w0_t[0], MD5C29, MD5S21);
MD5_STEP (MD5_H , c, d, a, b, w0_t[3], MD5C2a, MD5S22);
MD5_STEP (MD5_H , b, c, d, a, w1_t[2], MD5C2b, MD5S23);
MD5_STEP (MD5_H , a, b, c, d, w2_t[1], MD5C2c, MD5S20);
MD5_STEP (MD5_H , d, a, b, c, w3_t[0], MD5C2d, MD5S21);
MD5_STEP (MD5_H , c, d, a, b, w3_t[3], MD5C2e, MD5S22);
MD5_STEP (MD5_H , b, c, d, a, w0_t[2], MD5C2f, MD5S23);
MD5_STEP (MD5_I , a, b, c, d, w0_t, MD5C30, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, w7_t, MD5C31, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, we_t, MD5C32, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w5_t, MD5C33, MD5S33);
MD5_STEP (MD5_I , a, b, c, d, wc_t, MD5C34, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, w3_t, MD5C35, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, wa_t, MD5C36, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w1_t, MD5C37, MD5S33);
MD5_STEP (MD5_I , a, b, c, d, w8_t, MD5C38, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, wf_t, MD5C39, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, w6_t, MD5C3a, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, wd_t, MD5C3b, MD5S33);
MD5_STEP (MD5_I , a, b, c, d, w4_t, MD5C3c, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, wb_t, MD5C3d, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, w2_t, MD5C3e, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w9_t, MD5C3f, MD5S33);
MD5_STEP (MD5_I , a, b, c, d, w0_t[0], MD5C30, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, w1_t[3], MD5C31, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, w3_t[2], MD5C32, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w1_t[1], MD5C33, MD5S33);
MD5_STEP (MD5_I , a, b, c, d, w3_t[0], MD5C34, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, w0_t[3], MD5C35, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, w2_t[2], MD5C36, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w0_t[1], MD5C37, MD5S33);
MD5_STEP (MD5_I , a, b, c, d, w2_t[0], MD5C38, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, w3_t[3], MD5C39, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, w1_t[2], MD5C3a, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w3_t[1], MD5C3b, MD5S33);
MD5_STEP (MD5_I , a, b, c, d, w1_t[0], MD5C3c, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, w2_t[3], MD5C3d, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, w0_t[2], MD5C3e, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w2_t[1], MD5C3f, MD5S33);
const u32 r0 = a;
const u32 r1 = d;
const u32 r2 = c;
const u32 r3 = b;
#include COMPARE_S
COMPARE_S_SIMD (a, d, c, b);
}
}

View File

@ -70,7 +70,7 @@ __kernel void m02710_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
{
append_0x80_2x4 (wordl0, wordl1, pw_l_len);
switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
switch_buffer_by_offset_le (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
}
/**
@ -151,7 +151,7 @@ __kernel void m02710_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
{
switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len);
switch_buffer_by_offset_le (wordr0, wordr1, wordr2, wordr3, pw_l_len);
}
u32 w0[4];
@ -507,7 +507,7 @@ __kernel void m02710_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
{
append_0x80_2x4 (wordl0, wordl1, pw_l_len);
switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
switch_buffer_by_offset_le (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
}
/**
@ -600,7 +600,7 @@ __kernel void m02710_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
{
switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len);
switch_buffer_by_offset_le (wordr0, wordr1, wordr2, wordr3, pw_l_len);
}
u32 w0[4];

View File

@ -5,6 +5,8 @@
#define _MD5_
#define NEW_SIMD_CODE
#include "include/constants.h"
#include "include/kernel_vendor.h"
@ -16,11 +18,17 @@
#include "include/kernel_functions.c"
#include "OpenCL/types_ocl.c"
#include "OpenCL/common.c"
#include "OpenCL/simd.c"
#define COMPARE_S "OpenCL/check_single_comp4.c"
#define COMPARE_M "OpenCL/check_multi_comp4.c"
#define uint_to_hex_lower8(i) l_bin2asc[(i)]
#if VECT_SIZE == 1
#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i)])
#elif VECT_SIZE == 2
#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
#elif VECT_SIZE == 4
#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
#elif VECT_SIZE == 8
#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
#endif
static void m02710m (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_len, __global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, __local u32 l_bin2asc[256])
{
@ -56,193 +64,222 @@ static void m02710m (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_le
u32 w0l = w0[0];
for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++)
for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos += VECT_SIZE)
{
const u32 w0r = bfs_buf[il_pos].i;
const u32x w0r = w0r_create_bft (bfs_buf, il_pos);
w0[0] = w0l | w0r;
const u32x w0lr = w0l | w0r;
u32 a = MD5M_A;
u32 b = MD5M_B;
u32 c = MD5M_C;
u32 d = MD5M_D;
u32x w0_t[4];
MD5_STEP (MD5_Fo, a, b, c, d, w0[0], MD5C00, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w0[1], MD5C01, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, w0[2], MD5C02, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, w0[3], MD5C03, MD5S03);
MD5_STEP (MD5_Fo, a, b, c, d, w1[0], MD5C04, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w1[1], MD5C05, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, w1[2], MD5C06, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, w1[3], MD5C07, MD5S03);
MD5_STEP (MD5_Fo, a, b, c, d, w2[0], MD5C08, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w2[1], MD5C09, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, w2[2], MD5C0a, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, w2[3], MD5C0b, MD5S03);
MD5_STEP (MD5_Fo, a, b, c, d, w3[0], MD5C0c, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w3[1], MD5C0d, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, w3[2], MD5C0e, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, w3[3], MD5C0f, MD5S03);
w0_t[0] = w0lr;
w0_t[1] = w0[1];
w0_t[2] = w0[2];
w0_t[3] = w0[3];
MD5_STEP (MD5_Go, a, b, c, d, w0[1], MD5C10, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, w1[2], MD5C11, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, w2[3], MD5C12, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w0[0], MD5C13, MD5S13);
MD5_STEP (MD5_Go, a, b, c, d, w1[1], MD5C14, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, w2[2], MD5C15, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, w3[3], MD5C16, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w1[0], MD5C17, MD5S13);
MD5_STEP (MD5_Go, a, b, c, d, w2[1], MD5C18, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, w3[2], MD5C19, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, w0[3], MD5C1a, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w2[0], MD5C1b, MD5S13);
MD5_STEP (MD5_Go, a, b, c, d, w3[1], MD5C1c, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, w0[2], MD5C1d, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, w1[3], MD5C1e, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w3[0], MD5C1f, MD5S13);
u32x w1_t[4];
MD5_STEP (MD5_H , a, b, c, d, w1[1], MD5C20, MD5S20);
MD5_STEP (MD5_H , d, a, b, c, w2[0], MD5C21, MD5S21);
MD5_STEP (MD5_H , c, d, a, b, w2[3], MD5C22, MD5S22);
MD5_STEP (MD5_H , b, c, d, a, w3[2], MD5C23, MD5S23);
MD5_STEP (MD5_H , a, b, c, d, w0[1], MD5C24, MD5S20);
MD5_STEP (MD5_H , d, a, b, c, w1[0], MD5C25, MD5S21);
MD5_STEP (MD5_H , c, d, a, b, w1[3], MD5C26, MD5S22);
MD5_STEP (MD5_H , b, c, d, a, w2[2], MD5C27, MD5S23);
MD5_STEP (MD5_H , a, b, c, d, w3[1], MD5C28, MD5S20);
MD5_STEP (MD5_H , d, a, b, c, w0[0], MD5C29, MD5S21);
MD5_STEP (MD5_H , c, d, a, b, w0[3], MD5C2a, MD5S22);
MD5_STEP (MD5_H , b, c, d, a, w1[2], MD5C2b, MD5S23);
MD5_STEP (MD5_H , a, b, c, d, w2[1], MD5C2c, MD5S20);
MD5_STEP (MD5_H , d, a, b, c, w3[0], MD5C2d, MD5S21);
MD5_STEP (MD5_H , c, d, a, b, w3[3], MD5C2e, MD5S22);
MD5_STEP (MD5_H , b, c, d, a, w0[2], MD5C2f, MD5S23);
w1_t[0] = w1[0];
w1_t[1] = w1[1];
w1_t[2] = w1[2];
w1_t[3] = w1[3];
MD5_STEP (MD5_I , a, b, c, d, w0[0], MD5C30, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, w1[3], MD5C31, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, w3[2], MD5C32, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w1[1], MD5C33, MD5S33);
MD5_STEP (MD5_I , a, b, c, d, w3[0], MD5C34, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, w0[3], MD5C35, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, w2[2], MD5C36, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w0[1], MD5C37, MD5S33);
MD5_STEP (MD5_I , a, b, c, d, w2[0], MD5C38, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, w3[3], MD5C39, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, w1[2], MD5C3a, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w3[1], MD5C3b, MD5S33);
MD5_STEP (MD5_I , a, b, c, d, w1[0], MD5C3c, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, w2[3], MD5C3d, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, w0[2], MD5C3e, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w2[1], MD5C3f, MD5S33);
u32x w2_t[4];
w2_t[0] = w2[0];
w2_t[1] = w2[1];
w2_t[2] = w2[2];
w2_t[3] = w2[3];
u32x w3_t[4];
w3_t[0] = w3[0];
w3_t[1] = w3[1];
w3_t[2] = w3[2];
w3_t[3] = w3[3];
u32x a = MD5M_A;
u32x b = MD5M_B;
u32x c = MD5M_C;
u32x d = MD5M_D;
MD5_STEP (MD5_Fo, a, b, c, d, w0_t[0], MD5C00, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w0_t[1], MD5C01, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, w0_t[2], MD5C02, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, w0_t[3], MD5C03, MD5S03);
MD5_STEP (MD5_Fo, a, b, c, d, w1_t[0], MD5C04, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w1_t[1], MD5C05, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, w1_t[2], MD5C06, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, w1_t[3], MD5C07, MD5S03);
MD5_STEP (MD5_Fo, a, b, c, d, w2_t[0], MD5C08, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w2_t[1], MD5C09, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, w2_t[2], MD5C0a, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, w2_t[3], MD5C0b, MD5S03);
MD5_STEP (MD5_Fo, a, b, c, d, w3_t[0], MD5C0c, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w3_t[1], MD5C0d, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, w3_t[2], MD5C0e, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, w3_t[3], MD5C0f, MD5S03);
MD5_STEP (MD5_Go, a, b, c, d, w0_t[1], MD5C10, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, w1_t[2], MD5C11, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, w2_t[3], MD5C12, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w0_t[0], MD5C13, MD5S13);
MD5_STEP (MD5_Go, a, b, c, d, w1_t[1], MD5C14, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, w2_t[2], MD5C15, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, w3_t[3], MD5C16, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w1_t[0], MD5C17, MD5S13);
MD5_STEP (MD5_Go, a, b, c, d, w2_t[1], MD5C18, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, w3_t[2], MD5C19, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, w0_t[3], MD5C1a, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w2_t[0], MD5C1b, MD5S13);
MD5_STEP (MD5_Go, a, b, c, d, w3_t[1], MD5C1c, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, w0_t[2], MD5C1d, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, w1_t[3], MD5C1e, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w3_t[0], MD5C1f, MD5S13);
MD5_STEP (MD5_H , a, b, c, d, w1_t[1], MD5C20, MD5S20);
MD5_STEP (MD5_H , d, a, b, c, w2_t[0], MD5C21, MD5S21);
MD5_STEP (MD5_H , c, d, a, b, w2_t[3], MD5C22, MD5S22);
MD5_STEP (MD5_H , b, c, d, a, w3_t[2], MD5C23, MD5S23);
MD5_STEP (MD5_H , a, b, c, d, w0_t[1], MD5C24, MD5S20);
MD5_STEP (MD5_H , d, a, b, c, w1_t[0], MD5C25, MD5S21);
MD5_STEP (MD5_H , c, d, a, b, w1_t[3], MD5C26, MD5S22);
MD5_STEP (MD5_H , b, c, d, a, w2_t[2], MD5C27, MD5S23);
MD5_STEP (MD5_H , a, b, c, d, w3_t[1], MD5C28, MD5S20);
MD5_STEP (MD5_H , d, a, b, c, w0_t[0], MD5C29, MD5S21);
MD5_STEP (MD5_H , c, d, a, b, w0_t[3], MD5C2a, MD5S22);
MD5_STEP (MD5_H , b, c, d, a, w1_t[2], MD5C2b, MD5S23);
MD5_STEP (MD5_H , a, b, c, d, w2_t[1], MD5C2c, MD5S20);
MD5_STEP (MD5_H , d, a, b, c, w3_t[0], MD5C2d, MD5S21);
MD5_STEP (MD5_H , c, d, a, b, w3_t[3], MD5C2e, MD5S22);
MD5_STEP (MD5_H , b, c, d, a, w0_t[2], MD5C2f, MD5S23);
MD5_STEP (MD5_I , a, b, c, d, w0_t[0], MD5C30, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, w1_t[3], MD5C31, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, w3_t[2], MD5C32, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w1_t[1], MD5C33, MD5S33);
MD5_STEP (MD5_I , a, b, c, d, w3_t[0], MD5C34, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, w0_t[3], MD5C35, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, w2_t[2], MD5C36, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w0_t[1], MD5C37, MD5S33);
MD5_STEP (MD5_I , a, b, c, d, w2_t[0], MD5C38, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, w3_t[3], MD5C39, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, w1_t[2], MD5C3a, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w3_t[1], MD5C3b, MD5S33);
MD5_STEP (MD5_I , a, b, c, d, w1_t[0], MD5C3c, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, w2_t[3], MD5C3d, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, w0_t[2], MD5C3e, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w2_t[1], MD5C3f, MD5S33);
a += MD5M_A;
b += MD5M_B;
c += MD5M_C;
d += MD5M_D;
const u32 w0_t = uint_to_hex_lower8 ((a >> 0) & 255) << 0
w0_t[0] = uint_to_hex_lower8 ((a >> 0) & 255) << 0
| uint_to_hex_lower8 ((a >> 8) & 255) << 16;
const u32 w1_t = uint_to_hex_lower8 ((a >> 16) & 255) << 0
w0_t[1] = uint_to_hex_lower8 ((a >> 16) & 255) << 0
| uint_to_hex_lower8 ((a >> 24) & 255) << 16;
const u32 w2_t = uint_to_hex_lower8 ((b >> 0) & 255) << 0
w0_t[2] = uint_to_hex_lower8 ((b >> 0) & 255) << 0
| uint_to_hex_lower8 ((b >> 8) & 255) << 16;
const u32 w3_t = uint_to_hex_lower8 ((b >> 16) & 255) << 0
w0_t[3] = uint_to_hex_lower8 ((b >> 16) & 255) << 0
| uint_to_hex_lower8 ((b >> 24) & 255) << 16;
const u32 w4_t = uint_to_hex_lower8 ((c >> 0) & 255) << 0
w1_t[0] = uint_to_hex_lower8 ((c >> 0) & 255) << 0
| uint_to_hex_lower8 ((c >> 8) & 255) << 16;
const u32 w5_t = uint_to_hex_lower8 ((c >> 16) & 255) << 0
w1_t[1] = uint_to_hex_lower8 ((c >> 16) & 255) << 0
| uint_to_hex_lower8 ((c >> 24) & 255) << 16;
const u32 w6_t = uint_to_hex_lower8 ((d >> 0) & 255) << 0
w1_t[2] = uint_to_hex_lower8 ((d >> 0) & 255) << 0
| uint_to_hex_lower8 ((d >> 8) & 255) << 16;
const u32 w7_t = uint_to_hex_lower8 ((d >> 16) & 255) << 0
w1_t[3] = uint_to_hex_lower8 ((d >> 16) & 255) << 0
| uint_to_hex_lower8 ((d >> 24) & 255) << 16;
const u32 w8_t = s[0];
const u32 w9_t = s[1];
const u32 wa_t = s[2];
const u32 wb_t = s[3];
const u32 wc_t = s[4];
const u32 wd_t = s[5];
const u32 we_t = s[6];
const u32 wf_t = s[7];
w2_t[0] = s[0];
w2_t[1] = s[1];
w2_t[2] = s[2];
w2_t[3] = s[3];
w3_t[0] = s[4];
w3_t[1] = s[5];
w3_t[2] = s[6];
w3_t[3] = s[7];
a = MD5M_A;
b = MD5M_B;
c = MD5M_C;
d = MD5M_D;
MD5_STEP (MD5_Fo, a, b, c, d, w0_t, MD5C00, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w1_t, MD5C01, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, w2_t, MD5C02, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, w3_t, MD5C03, MD5S03);
MD5_STEP (MD5_Fo, a, b, c, d, w4_t, MD5C04, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w5_t, MD5C05, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, w6_t, MD5C06, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, w7_t, MD5C07, MD5S03);
MD5_STEP (MD5_Fo, a, b, c, d, w8_t, MD5C08, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w9_t, MD5C09, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, wa_t, MD5C0a, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, wb_t, MD5C0b, MD5S03);
MD5_STEP (MD5_Fo, a, b, c, d, wc_t, MD5C0c, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, wd_t, MD5C0d, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, we_t, MD5C0e, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, wf_t, MD5C0f, MD5S03);
MD5_STEP (MD5_Fo, a, b, c, d, w0_t[0], MD5C00, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w0_t[1], MD5C01, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, w0_t[2], MD5C02, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, w0_t[3], MD5C03, MD5S03);
MD5_STEP (MD5_Fo, a, b, c, d, w1_t[0], MD5C04, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w1_t[1], MD5C05, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, w1_t[2], MD5C06, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, w1_t[3], MD5C07, MD5S03);
MD5_STEP (MD5_Fo, a, b, c, d, w2_t[0], MD5C08, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w2_t[1], MD5C09, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, w2_t[2], MD5C0a, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, w2_t[3], MD5C0b, MD5S03);
MD5_STEP (MD5_Fo, a, b, c, d, w3_t[0], MD5C0c, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w3_t[1], MD5C0d, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, w3_t[2], MD5C0e, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, w3_t[3], MD5C0f, MD5S03);
MD5_STEP (MD5_Go, a, b, c, d, w1_t, MD5C10, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, w6_t, MD5C11, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, wb_t, MD5C12, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w0_t, MD5C13, MD5S13);
MD5_STEP (MD5_Go, a, b, c, d, w5_t, MD5C14, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, wa_t, MD5C15, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, wf_t, MD5C16, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w4_t, MD5C17, MD5S13);
MD5_STEP (MD5_Go, a, b, c, d, w9_t, MD5C18, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, we_t, MD5C19, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, w3_t, MD5C1a, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w8_t, MD5C1b, MD5S13);
MD5_STEP (MD5_Go, a, b, c, d, wd_t, MD5C1c, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, w2_t, MD5C1d, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, w7_t, MD5C1e, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, wc_t, MD5C1f, MD5S13);
MD5_STEP (MD5_Go, a, b, c, d, w0_t[1], MD5C10, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, w1_t[2], MD5C11, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, w2_t[3], MD5C12, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w0_t[0], MD5C13, MD5S13);
MD5_STEP (MD5_Go, a, b, c, d, w1_t[1], MD5C14, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, w2_t[2], MD5C15, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, w3_t[3], MD5C16, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w1_t[0], MD5C17, MD5S13);
MD5_STEP (MD5_Go, a, b, c, d, w2_t[1], MD5C18, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, w3_t[2], MD5C19, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, w0_t[3], MD5C1a, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w2_t[0], MD5C1b, MD5S13);
MD5_STEP (MD5_Go, a, b, c, d, w3_t[1], MD5C1c, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, w0_t[2], MD5C1d, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, w1_t[3], MD5C1e, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w3_t[0], MD5C1f, MD5S13);
MD5_STEP (MD5_H , a, b, c, d, w5_t, MD5C20, MD5S20);
MD5_STEP (MD5_H , d, a, b, c, w8_t, MD5C21, MD5S21);
MD5_STEP (MD5_H , c, d, a, b, wb_t, MD5C22, MD5S22);
MD5_STEP (MD5_H , b, c, d, a, we_t, MD5C23, MD5S23);
MD5_STEP (MD5_H , a, b, c, d, w1_t, MD5C24, MD5S20);
MD5_STEP (MD5_H , d, a, b, c, w4_t, MD5C25, MD5S21);
MD5_STEP (MD5_H , c, d, a, b, w7_t, MD5C26, MD5S22);
MD5_STEP (MD5_H , b, c, d, a, wa_t, MD5C27, MD5S23);
MD5_STEP (MD5_H , a, b, c, d, wd_t, MD5C28, MD5S20);
MD5_STEP (MD5_H , d, a, b, c, w0_t, MD5C29, MD5S21);
MD5_STEP (MD5_H , c, d, a, b, w3_t, MD5C2a, MD5S22);
MD5_STEP (MD5_H , b, c, d, a, w6_t, MD5C2b, MD5S23);
MD5_STEP (MD5_H , a, b, c, d, w9_t, MD5C2c, MD5S20);
MD5_STEP (MD5_H , d, a, b, c, wc_t, MD5C2d, MD5S21);
MD5_STEP (MD5_H , c, d, a, b, wf_t, MD5C2e, MD5S22);
MD5_STEP (MD5_H , b, c, d, a, w2_t, MD5C2f, MD5S23);
MD5_STEP (MD5_H , a, b, c, d, w1_t[1], MD5C20, MD5S20);
MD5_STEP (MD5_H , d, a, b, c, w2_t[0], MD5C21, MD5S21);
MD5_STEP (MD5_H , c, d, a, b, w2_t[3], MD5C22, MD5S22);
MD5_STEP (MD5_H , b, c, d, a, w3_t[2], MD5C23, MD5S23);
MD5_STEP (MD5_H , a, b, c, d, w0_t[1], MD5C24, MD5S20);
MD5_STEP (MD5_H , d, a, b, c, w1_t[0], MD5C25, MD5S21);
MD5_STEP (MD5_H , c, d, a, b, w1_t[3], MD5C26, MD5S22);
MD5_STEP (MD5_H , b, c, d, a, w2_t[2], MD5C27, MD5S23);
MD5_STEP (MD5_H , a, b, c, d, w3_t[1], MD5C28, MD5S20);
MD5_STEP (MD5_H , d, a, b, c, w0_t[0], MD5C29, MD5S21);
MD5_STEP (MD5_H , c, d, a, b, w0_t[3], MD5C2a, MD5S22);
MD5_STEP (MD5_H , b, c, d, a, w1_t[2], MD5C2b, MD5S23);
MD5_STEP (MD5_H , a, b, c, d, w2_t[1], MD5C2c, MD5S20);
MD5_STEP (MD5_H , d, a, b, c, w3_t[0], MD5C2d, MD5S21);
MD5_STEP (MD5_H , c, d, a, b, w3_t[3], MD5C2e, MD5S22);
MD5_STEP (MD5_H , b, c, d, a, w0_t[2], MD5C2f, MD5S23);
MD5_STEP (MD5_I , a, b, c, d, w0_t, MD5C30, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, w7_t, MD5C31, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, we_t, MD5C32, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w5_t, MD5C33, MD5S33);
MD5_STEP (MD5_I , a, b, c, d, wc_t, MD5C34, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, w3_t, MD5C35, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, wa_t, MD5C36, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w1_t, MD5C37, MD5S33);
MD5_STEP (MD5_I , a, b, c, d, w8_t, MD5C38, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, wf_t, MD5C39, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, w6_t, MD5C3a, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, wd_t, MD5C3b, MD5S33);
MD5_STEP (MD5_I , a, b, c, d, w4_t, MD5C3c, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, wb_t, MD5C3d, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, w2_t, MD5C3e, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w9_t, MD5C3f, MD5S33);
MD5_STEP (MD5_I , a, b, c, d, w0_t[0], MD5C30, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, w1_t[3], MD5C31, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, w3_t[2], MD5C32, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w1_t[1], MD5C33, MD5S33);
MD5_STEP (MD5_I , a, b, c, d, w3_t[0], MD5C34, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, w0_t[3], MD5C35, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, w2_t[2], MD5C36, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w0_t[1], MD5C37, MD5S33);
MD5_STEP (MD5_I , a, b, c, d, w2_t[0], MD5C38, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, w3_t[3], MD5C39, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, w1_t[2], MD5C3a, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w3_t[1], MD5C3b, MD5S33);
MD5_STEP (MD5_I , a, b, c, d, w1_t[0], MD5C3c, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, w2_t[3], MD5C3d, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, w0_t[2], MD5C3e, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w2_t[1], MD5C3f, MD5S33);
const u32 r_a = a + MD5M_A;
const u32 r_b = b + MD5M_B;
const u32 r_c = c + MD5M_C;
const u32 r_d = d + MD5M_D;
const u32x r_a = a + MD5M_A;
const u32x r_b = b + MD5M_B;
const u32x r_c = c + MD5M_C;
const u32x r_d = d + MD5M_D;
a = r_a;
b = r_b;
@ -322,12 +359,7 @@ static void m02710m (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_le
c += r_c;
d += r_d;
const u32 r0 = a;
const u32 r1 = d;
const u32 r2 = c;
const u32 r3 = b;
#include COMPARE_M
COMPARE_M_SIMD (a, d, c, b);
}
}
@ -377,193 +409,222 @@ static void m02710s (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_le
u32 w0l = w0[0];
for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++)
for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos += VECT_SIZE)
{
const u32 w0r = bfs_buf[il_pos].i;
const u32x w0r = w0r_create_bft (bfs_buf, il_pos);
w0[0] = w0l | w0r;
const u32x w0lr = w0l | w0r;
u32 a = MD5M_A;
u32 b = MD5M_B;
u32 c = MD5M_C;
u32 d = MD5M_D;
u32x w0_t[4];
MD5_STEP (MD5_Fo, a, b, c, d, w0[0], MD5C00, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w0[1], MD5C01, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, w0[2], MD5C02, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, w0[3], MD5C03, MD5S03);
MD5_STEP (MD5_Fo, a, b, c, d, w1[0], MD5C04, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w1[1], MD5C05, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, w1[2], MD5C06, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, w1[3], MD5C07, MD5S03);
MD5_STEP (MD5_Fo, a, b, c, d, w2[0], MD5C08, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w2[1], MD5C09, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, w2[2], MD5C0a, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, w2[3], MD5C0b, MD5S03);
MD5_STEP (MD5_Fo, a, b, c, d, w3[0], MD5C0c, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w3[1], MD5C0d, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, w3[2], MD5C0e, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, w3[3], MD5C0f, MD5S03);
w0_t[0] = w0lr;
w0_t[1] = w0[1];
w0_t[2] = w0[2];
w0_t[3] = w0[3];
MD5_STEP (MD5_Go, a, b, c, d, w0[1], MD5C10, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, w1[2], MD5C11, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, w2[3], MD5C12, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w0[0], MD5C13, MD5S13);
MD5_STEP (MD5_Go, a, b, c, d, w1[1], MD5C14, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, w2[2], MD5C15, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, w3[3], MD5C16, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w1[0], MD5C17, MD5S13);
MD5_STEP (MD5_Go, a, b, c, d, w2[1], MD5C18, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, w3[2], MD5C19, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, w0[3], MD5C1a, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w2[0], MD5C1b, MD5S13);
MD5_STEP (MD5_Go, a, b, c, d, w3[1], MD5C1c, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, w0[2], MD5C1d, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, w1[3], MD5C1e, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w3[0], MD5C1f, MD5S13);
u32x w1_t[4];
MD5_STEP (MD5_H , a, b, c, d, w1[1], MD5C20, MD5S20);
MD5_STEP (MD5_H , d, a, b, c, w2[0], MD5C21, MD5S21);
MD5_STEP (MD5_H , c, d, a, b, w2[3], MD5C22, MD5S22);
MD5_STEP (MD5_H , b, c, d, a, w3[2], MD5C23, MD5S23);
MD5_STEP (MD5_H , a, b, c, d, w0[1], MD5C24, MD5S20);
MD5_STEP (MD5_H , d, a, b, c, w1[0], MD5C25, MD5S21);
MD5_STEP (MD5_H , c, d, a, b, w1[3], MD5C26, MD5S22);
MD5_STEP (MD5_H , b, c, d, a, w2[2], MD5C27, MD5S23);
MD5_STEP (MD5_H , a, b, c, d, w3[1], MD5C28, MD5S20);
MD5_STEP (MD5_H , d, a, b, c, w0[0], MD5C29, MD5S21);
MD5_STEP (MD5_H , c, d, a, b, w0[3], MD5C2a, MD5S22);
MD5_STEP (MD5_H , b, c, d, a, w1[2], MD5C2b, MD5S23);
MD5_STEP (MD5_H , a, b, c, d, w2[1], MD5C2c, MD5S20);
MD5_STEP (MD5_H , d, a, b, c, w3[0], MD5C2d, MD5S21);
MD5_STEP (MD5_H , c, d, a, b, w3[3], MD5C2e, MD5S22);
MD5_STEP (MD5_H , b, c, d, a, w0[2], MD5C2f, MD5S23);
w1_t[0] = w1[0];
w1_t[1] = w1[1];
w1_t[2] = w1[2];
w1_t[3] = w1[3];
MD5_STEP (MD5_I , a, b, c, d, w0[0], MD5C30, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, w1[3], MD5C31, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, w3[2], MD5C32, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w1[1], MD5C33, MD5S33);
MD5_STEP (MD5_I , a, b, c, d, w3[0], MD5C34, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, w0[3], MD5C35, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, w2[2], MD5C36, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w0[1], MD5C37, MD5S33);
MD5_STEP (MD5_I , a, b, c, d, w2[0], MD5C38, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, w3[3], MD5C39, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, w1[2], MD5C3a, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w3[1], MD5C3b, MD5S33);
MD5_STEP (MD5_I , a, b, c, d, w1[0], MD5C3c, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, w2[3], MD5C3d, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, w0[2], MD5C3e, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w2[1], MD5C3f, MD5S33);
u32x w2_t[4];
w2_t[0] = w2[0];
w2_t[1] = w2[1];
w2_t[2] = w2[2];
w2_t[3] = w2[3];
u32x w3_t[4];
w3_t[0] = w3[0];
w3_t[1] = w3[1];
w3_t[2] = w3[2];
w3_t[3] = w3[3];
u32x a = MD5M_A;
u32x b = MD5M_B;
u32x c = MD5M_C;
u32x d = MD5M_D;
MD5_STEP (MD5_Fo, a, b, c, d, w0_t[0], MD5C00, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w0_t[1], MD5C01, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, w0_t[2], MD5C02, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, w0_t[3], MD5C03, MD5S03);
MD5_STEP (MD5_Fo, a, b, c, d, w1_t[0], MD5C04, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w1_t[1], MD5C05, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, w1_t[2], MD5C06, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, w1_t[3], MD5C07, MD5S03);
MD5_STEP (MD5_Fo, a, b, c, d, w2_t[0], MD5C08, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w2_t[1], MD5C09, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, w2_t[2], MD5C0a, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, w2_t[3], MD5C0b, MD5S03);
MD5_STEP (MD5_Fo, a, b, c, d, w3_t[0], MD5C0c, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w3_t[1], MD5C0d, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, w3_t[2], MD5C0e, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, w3_t[3], MD5C0f, MD5S03);
MD5_STEP (MD5_Go, a, b, c, d, w0_t[1], MD5C10, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, w1_t[2], MD5C11, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, w2_t[3], MD5C12, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w0_t[0], MD5C13, MD5S13);
MD5_STEP (MD5_Go, a, b, c, d, w1_t[1], MD5C14, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, w2_t[2], MD5C15, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, w3_t[3], MD5C16, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w1_t[0], MD5C17, MD5S13);
MD5_STEP (MD5_Go, a, b, c, d, w2_t[1], MD5C18, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, w3_t[2], MD5C19, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, w0_t[3], MD5C1a, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w2_t[0], MD5C1b, MD5S13);
MD5_STEP (MD5_Go, a, b, c, d, w3_t[1], MD5C1c, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, w0_t[2], MD5C1d, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, w1_t[3], MD5C1e, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w3_t[0], MD5C1f, MD5S13);
MD5_STEP (MD5_H , a, b, c, d, w1_t[1], MD5C20, MD5S20);
MD5_STEP (MD5_H , d, a, b, c, w2_t[0], MD5C21, MD5S21);
MD5_STEP (MD5_H , c, d, a, b, w2_t[3], MD5C22, MD5S22);
MD5_STEP (MD5_H , b, c, d, a, w3_t[2], MD5C23, MD5S23);
MD5_STEP (MD5_H , a, b, c, d, w0_t[1], MD5C24, MD5S20);
MD5_STEP (MD5_H , d, a, b, c, w1_t[0], MD5C25, MD5S21);
MD5_STEP (MD5_H , c, d, a, b, w1_t[3], MD5C26, MD5S22);
MD5_STEP (MD5_H , b, c, d, a, w2_t[2], MD5C27, MD5S23);
MD5_STEP (MD5_H , a, b, c, d, w3_t[1], MD5C28, MD5S20);
MD5_STEP (MD5_H , d, a, b, c, w0_t[0], MD5C29, MD5S21);
MD5_STEP (MD5_H , c, d, a, b, w0_t[3], MD5C2a, MD5S22);
MD5_STEP (MD5_H , b, c, d, a, w1_t[2], MD5C2b, MD5S23);
MD5_STEP (MD5_H , a, b, c, d, w2_t[1], MD5C2c, MD5S20);
MD5_STEP (MD5_H , d, a, b, c, w3_t[0], MD5C2d, MD5S21);
MD5_STEP (MD5_H , c, d, a, b, w3_t[3], MD5C2e, MD5S22);
MD5_STEP (MD5_H , b, c, d, a, w0_t[2], MD5C2f, MD5S23);
MD5_STEP (MD5_I , a, b, c, d, w0_t[0], MD5C30, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, w1_t[3], MD5C31, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, w3_t[2], MD5C32, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w1_t[1], MD5C33, MD5S33);
MD5_STEP (MD5_I , a, b, c, d, w3_t[0], MD5C34, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, w0_t[3], MD5C35, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, w2_t[2], MD5C36, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w0_t[1], MD5C37, MD5S33);
MD5_STEP (MD5_I , a, b, c, d, w2_t[0], MD5C38, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, w3_t[3], MD5C39, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, w1_t[2], MD5C3a, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w3_t[1], MD5C3b, MD5S33);
MD5_STEP (MD5_I , a, b, c, d, w1_t[0], MD5C3c, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, w2_t[3], MD5C3d, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, w0_t[2], MD5C3e, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w2_t[1], MD5C3f, MD5S33);
a += MD5M_A;
b += MD5M_B;
c += MD5M_C;
d += MD5M_D;
const u32 w0_t = uint_to_hex_lower8 ((a >> 0) & 255) << 0
w0_t[0] = uint_to_hex_lower8 ((a >> 0) & 255) << 0
| uint_to_hex_lower8 ((a >> 8) & 255) << 16;
const u32 w1_t = uint_to_hex_lower8 ((a >> 16) & 255) << 0
w0_t[1] = uint_to_hex_lower8 ((a >> 16) & 255) << 0
| uint_to_hex_lower8 ((a >> 24) & 255) << 16;
const u32 w2_t = uint_to_hex_lower8 ((b >> 0) & 255) << 0
w0_t[2] = uint_to_hex_lower8 ((b >> 0) & 255) << 0
| uint_to_hex_lower8 ((b >> 8) & 255) << 16;
const u32 w3_t = uint_to_hex_lower8 ((b >> 16) & 255) << 0
w0_t[3] = uint_to_hex_lower8 ((b >> 16) & 255) << 0
| uint_to_hex_lower8 ((b >> 24) & 255) << 16;
const u32 w4_t = uint_to_hex_lower8 ((c >> 0) & 255) << 0
w1_t[0] = uint_to_hex_lower8 ((c >> 0) & 255) << 0
| uint_to_hex_lower8 ((c >> 8) & 255) << 16;
const u32 w5_t = uint_to_hex_lower8 ((c >> 16) & 255) << 0
w1_t[1] = uint_to_hex_lower8 ((c >> 16) & 255) << 0
| uint_to_hex_lower8 ((c >> 24) & 255) << 16;
const u32 w6_t = uint_to_hex_lower8 ((d >> 0) & 255) << 0
w1_t[2] = uint_to_hex_lower8 ((d >> 0) & 255) << 0
| uint_to_hex_lower8 ((d >> 8) & 255) << 16;
const u32 w7_t = uint_to_hex_lower8 ((d >> 16) & 255) << 0
w1_t[3] = uint_to_hex_lower8 ((d >> 16) & 255) << 0
| uint_to_hex_lower8 ((d >> 24) & 255) << 16;
const u32 w8_t = s[0];
const u32 w9_t = s[1];
const u32 wa_t = s[2];
const u32 wb_t = s[3];
const u32 wc_t = s[4];
const u32 wd_t = s[5];
const u32 we_t = s[6];
const u32 wf_t = s[7];
w2_t[0] = s[0];
w2_t[1] = s[1];
w2_t[2] = s[2];
w2_t[3] = s[3];
w3_t[0] = s[4];
w3_t[1] = s[5];
w3_t[2] = s[6];
w3_t[3] = s[7];
a = MD5M_A;
b = MD5M_B;
c = MD5M_C;
d = MD5M_D;
MD5_STEP (MD5_Fo, a, b, c, d, w0_t, MD5C00, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w1_t, MD5C01, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, w2_t, MD5C02, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, w3_t, MD5C03, MD5S03);
MD5_STEP (MD5_Fo, a, b, c, d, w4_t, MD5C04, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w5_t, MD5C05, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, w6_t, MD5C06, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, w7_t, MD5C07, MD5S03);
MD5_STEP (MD5_Fo, a, b, c, d, w8_t, MD5C08, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w9_t, MD5C09, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, wa_t, MD5C0a, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, wb_t, MD5C0b, MD5S03);
MD5_STEP (MD5_Fo, a, b, c, d, wc_t, MD5C0c, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, wd_t, MD5C0d, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, we_t, MD5C0e, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, wf_t, MD5C0f, MD5S03);
MD5_STEP (MD5_Fo, a, b, c, d, w0_t[0], MD5C00, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w0_t[1], MD5C01, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, w0_t[2], MD5C02, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, w0_t[3], MD5C03, MD5S03);
MD5_STEP (MD5_Fo, a, b, c, d, w1_t[0], MD5C04, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w1_t[1], MD5C05, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, w1_t[2], MD5C06, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, w1_t[3], MD5C07, MD5S03);
MD5_STEP (MD5_Fo, a, b, c, d, w2_t[0], MD5C08, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w2_t[1], MD5C09, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, w2_t[2], MD5C0a, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, w2_t[3], MD5C0b, MD5S03);
MD5_STEP (MD5_Fo, a, b, c, d, w3_t[0], MD5C0c, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w3_t[1], MD5C0d, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, w3_t[2], MD5C0e, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, w3_t[3], MD5C0f, MD5S03);
MD5_STEP (MD5_Go, a, b, c, d, w1_t, MD5C10, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, w6_t, MD5C11, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, wb_t, MD5C12, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w0_t, MD5C13, MD5S13);
MD5_STEP (MD5_Go, a, b, c, d, w5_t, MD5C14, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, wa_t, MD5C15, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, wf_t, MD5C16, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w4_t, MD5C17, MD5S13);
MD5_STEP (MD5_Go, a, b, c, d, w9_t, MD5C18, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, we_t, MD5C19, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, w3_t, MD5C1a, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w8_t, MD5C1b, MD5S13);
MD5_STEP (MD5_Go, a, b, c, d, wd_t, MD5C1c, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, w2_t, MD5C1d, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, w7_t, MD5C1e, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, wc_t, MD5C1f, MD5S13);
MD5_STEP (MD5_Go, a, b, c, d, w0_t[1], MD5C10, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, w1_t[2], MD5C11, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, w2_t[3], MD5C12, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w0_t[0], MD5C13, MD5S13);
MD5_STEP (MD5_Go, a, b, c, d, w1_t[1], MD5C14, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, w2_t[2], MD5C15, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, w3_t[3], MD5C16, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w1_t[0], MD5C17, MD5S13);
MD5_STEP (MD5_Go, a, b, c, d, w2_t[1], MD5C18, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, w3_t[2], MD5C19, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, w0_t[3], MD5C1a, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w2_t[0], MD5C1b, MD5S13);
MD5_STEP (MD5_Go, a, b, c, d, w3_t[1], MD5C1c, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, w0_t[2], MD5C1d, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, w1_t[3], MD5C1e, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w3_t[0], MD5C1f, MD5S13);
MD5_STEP (MD5_H , a, b, c, d, w5_t, MD5C20, MD5S20);
MD5_STEP (MD5_H , d, a, b, c, w8_t, MD5C21, MD5S21);
MD5_STEP (MD5_H , c, d, a, b, wb_t, MD5C22, MD5S22);
MD5_STEP (MD5_H , b, c, d, a, we_t, MD5C23, MD5S23);
MD5_STEP (MD5_H , a, b, c, d, w1_t, MD5C24, MD5S20);
MD5_STEP (MD5_H , d, a, b, c, w4_t, MD5C25, MD5S21);
MD5_STEP (MD5_H , c, d, a, b, w7_t, MD5C26, MD5S22);
MD5_STEP (MD5_H , b, c, d, a, wa_t, MD5C27, MD5S23);
MD5_STEP (MD5_H , a, b, c, d, wd_t, MD5C28, MD5S20);
MD5_STEP (MD5_H , d, a, b, c, w0_t, MD5C29, MD5S21);
MD5_STEP (MD5_H , c, d, a, b, w3_t, MD5C2a, MD5S22);
MD5_STEP (MD5_H , b, c, d, a, w6_t, MD5C2b, MD5S23);
MD5_STEP (MD5_H , a, b, c, d, w9_t, MD5C2c, MD5S20);
MD5_STEP (MD5_H , d, a, b, c, wc_t, MD5C2d, MD5S21);
MD5_STEP (MD5_H , c, d, a, b, wf_t, MD5C2e, MD5S22);
MD5_STEP (MD5_H , b, c, d, a, w2_t, MD5C2f, MD5S23);
MD5_STEP (MD5_H , a, b, c, d, w1_t[1], MD5C20, MD5S20);
MD5_STEP (MD5_H , d, a, b, c, w2_t[0], MD5C21, MD5S21);
MD5_STEP (MD5_H , c, d, a, b, w2_t[3], MD5C22, MD5S22);
MD5_STEP (MD5_H , b, c, d, a, w3_t[2], MD5C23, MD5S23);
MD5_STEP (MD5_H , a, b, c, d, w0_t[1], MD5C24, MD5S20);
MD5_STEP (MD5_H , d, a, b, c, w1_t[0], MD5C25, MD5S21);
MD5_STEP (MD5_H , c, d, a, b, w1_t[3], MD5C26, MD5S22);
MD5_STEP (MD5_H , b, c, d, a, w2_t[2], MD5C27, MD5S23);
MD5_STEP (MD5_H , a, b, c, d, w3_t[1], MD5C28, MD5S20);
MD5_STEP (MD5_H , d, a, b, c, w0_t[0], MD5C29, MD5S21);
MD5_STEP (MD5_H , c, d, a, b, w0_t[3], MD5C2a, MD5S22);
MD5_STEP (MD5_H , b, c, d, a, w1_t[2], MD5C2b, MD5S23);
MD5_STEP (MD5_H , a, b, c, d, w2_t[1], MD5C2c, MD5S20);
MD5_STEP (MD5_H , d, a, b, c, w3_t[0], MD5C2d, MD5S21);
MD5_STEP (MD5_H , c, d, a, b, w3_t[3], MD5C2e, MD5S22);
MD5_STEP (MD5_H , b, c, d, a, w0_t[2], MD5C2f, MD5S23);
MD5_STEP (MD5_I , a, b, c, d, w0_t, MD5C30, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, w7_t, MD5C31, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, we_t, MD5C32, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w5_t, MD5C33, MD5S33);
MD5_STEP (MD5_I , a, b, c, d, wc_t, MD5C34, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, w3_t, MD5C35, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, wa_t, MD5C36, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w1_t, MD5C37, MD5S33);
MD5_STEP (MD5_I , a, b, c, d, w8_t, MD5C38, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, wf_t, MD5C39, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, w6_t, MD5C3a, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, wd_t, MD5C3b, MD5S33);
MD5_STEP (MD5_I , a, b, c, d, w4_t, MD5C3c, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, wb_t, MD5C3d, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, w2_t, MD5C3e, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w9_t, MD5C3f, MD5S33);
MD5_STEP (MD5_I , a, b, c, d, w0_t[0], MD5C30, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, w1_t[3], MD5C31, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, w3_t[2], MD5C32, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w1_t[1], MD5C33, MD5S33);
MD5_STEP (MD5_I , a, b, c, d, w3_t[0], MD5C34, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, w0_t[3], MD5C35, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, w2_t[2], MD5C36, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w0_t[1], MD5C37, MD5S33);
MD5_STEP (MD5_I , a, b, c, d, w2_t[0], MD5C38, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, w3_t[3], MD5C39, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, w1_t[2], MD5C3a, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w3_t[1], MD5C3b, MD5S33);
MD5_STEP (MD5_I , a, b, c, d, w1_t[0], MD5C3c, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, w2_t[3], MD5C3d, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, w0_t[2], MD5C3e, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w2_t[1], MD5C3f, MD5S33);
const u32 r_a = a + MD5M_A;
const u32 r_b = b + MD5M_B;
const u32 r_c = c + MD5M_C;
const u32 r_d = d + MD5M_D;
const u32x r_a = a + MD5M_A;
const u32x r_b = b + MD5M_B;
const u32x r_c = c + MD5M_C;
const u32x r_d = d + MD5M_D;
a = r_a;
b = r_b;
@ -635,7 +696,7 @@ static void m02710s (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_le
MD5_STEP0(MD5_I , b, c, d, a, MD5C3b, MD5S33);
MD5_STEP0(MD5_I , a, b, c, d, MD5C3c, MD5S30);
if (allx ((a + r_a) != search[0])) continue;
if (MATCHES_NONE_VS ((a + r_a), search[0])) continue;
MD5_STEP0(MD5_I , d, a, b, c, MD5C3d, MD5S31);
MD5_STEP0(MD5_I , c, d, a, b, MD5C3e, MD5S32);
@ -646,12 +707,7 @@ static void m02710s (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_le
c += r_c;
d += r_d;
const u32 r0 = a;
const u32 r1 = d;
const u32 r2 = c;
const u32 r3 = b;
#include COMPARE_S
COMPARE_S_SIMD (a, d, c, b);
}
}

View File

@ -70,7 +70,7 @@ __kernel void m02810_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
{
append_0x80_2x4 (wordl0, wordl1, pw_l_len);
switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
switch_buffer_by_offset_le (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
}
/**
@ -150,7 +150,7 @@ __kernel void m02810_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
{
switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len);
switch_buffer_by_offset_le (wordr0, wordr1, wordr2, wordr3, pw_l_len);
}
u32 w0[4];
@ -506,7 +506,7 @@ __kernel void m02810_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
{
append_0x80_2x4 (wordl0, wordl1, pw_l_len);
switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
switch_buffer_by_offset_le (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
}
/**
@ -598,7 +598,7 @@ __kernel void m02810_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
{
switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len);
switch_buffer_by_offset_le (wordr0, wordr1, wordr2, wordr3, pw_l_len);
}
u32 w0[4];

View File

@ -5,6 +5,8 @@
#define _MD5_
#define NEW_SIMD_CODE
#include "include/constants.h"
#include "include/kernel_vendor.h"
@ -16,11 +18,17 @@
#include "include/kernel_functions.c"
#include "OpenCL/types_ocl.c"
#include "OpenCL/common.c"
#include "OpenCL/simd.c"
#define COMPARE_S "OpenCL/check_single_comp4.c"
#define COMPARE_M "OpenCL/check_multi_comp4.c"
#define uint_to_hex_lower8(i) l_bin2asc[(i)]
#if VECT_SIZE == 1
#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i)])
#elif VECT_SIZE == 2
#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
#elif VECT_SIZE == 4
#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
#elif VECT_SIZE == 8
#define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
#endif
static void m02810m (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_len, __global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, __local u32 l_bin2asc[256])
{
@ -55,114 +63,143 @@ static void m02810m (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_le
u32 w0l = w0[0];
for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++)
for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos += VECT_SIZE)
{
const u32 w0r = bfs_buf[il_pos].i;
const u32x w0r = w0r_create_bft (bfs_buf, il_pos);
w0[0] = w0l | w0r;
const u32x w0lr = w0l | w0r;
u32 a = MD5M_A;
u32 b = MD5M_B;
u32 c = MD5M_C;
u32 d = MD5M_D;
u32x w0_t[4];
MD5_STEP (MD5_Fo, a, b, c, d, w0[0], MD5C00, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w0[1], MD5C01, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, w0[2], MD5C02, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, w0[3], MD5C03, MD5S03);
MD5_STEP (MD5_Fo, a, b, c, d, w1[0], MD5C04, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w1[1], MD5C05, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, w1[2], MD5C06, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, w1[3], MD5C07, MD5S03);
MD5_STEP (MD5_Fo, a, b, c, d, w2[0], MD5C08, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w2[1], MD5C09, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, w2[2], MD5C0a, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, w2[3], MD5C0b, MD5S03);
MD5_STEP (MD5_Fo, a, b, c, d, w3[0], MD5C0c, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w3[1], MD5C0d, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, w3[2], MD5C0e, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, w3[3], MD5C0f, MD5S03);
w0_t[0] = w0lr;
w0_t[1] = w0[1];
w0_t[2] = w0[2];
w0_t[3] = w0[3];
MD5_STEP (MD5_Go, a, b, c, d, w0[1], MD5C10, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, w1[2], MD5C11, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, w2[3], MD5C12, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w0[0], MD5C13, MD5S13);
MD5_STEP (MD5_Go, a, b, c, d, w1[1], MD5C14, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, w2[2], MD5C15, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, w3[3], MD5C16, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w1[0], MD5C17, MD5S13);
MD5_STEP (MD5_Go, a, b, c, d, w2[1], MD5C18, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, w3[2], MD5C19, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, w0[3], MD5C1a, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w2[0], MD5C1b, MD5S13);
MD5_STEP (MD5_Go, a, b, c, d, w3[1], MD5C1c, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, w0[2], MD5C1d, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, w1[3], MD5C1e, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w3[0], MD5C1f, MD5S13);
u32x w1_t[4];
MD5_STEP (MD5_H , a, b, c, d, w1[1], MD5C20, MD5S20);
MD5_STEP (MD5_H , d, a, b, c, w2[0], MD5C21, MD5S21);
MD5_STEP (MD5_H , c, d, a, b, w2[3], MD5C22, MD5S22);
MD5_STEP (MD5_H , b, c, d, a, w3[2], MD5C23, MD5S23);
MD5_STEP (MD5_H , a, b, c, d, w0[1], MD5C24, MD5S20);
MD5_STEP (MD5_H , d, a, b, c, w1[0], MD5C25, MD5S21);
MD5_STEP (MD5_H , c, d, a, b, w1[3], MD5C26, MD5S22);
MD5_STEP (MD5_H , b, c, d, a, w2[2], MD5C27, MD5S23);
MD5_STEP (MD5_H , a, b, c, d, w3[1], MD5C28, MD5S20);
MD5_STEP (MD5_H , d, a, b, c, w0[0], MD5C29, MD5S21);
MD5_STEP (MD5_H , c, d, a, b, w0[3], MD5C2a, MD5S22);
MD5_STEP (MD5_H , b, c, d, a, w1[2], MD5C2b, MD5S23);
MD5_STEP (MD5_H , a, b, c, d, w2[1], MD5C2c, MD5S20);
MD5_STEP (MD5_H , d, a, b, c, w3[0], MD5C2d, MD5S21);
MD5_STEP (MD5_H , c, d, a, b, w3[3], MD5C2e, MD5S22);
MD5_STEP (MD5_H , b, c, d, a, w0[2], MD5C2f, MD5S23);
w1_t[0] = w1[0];
w1_t[1] = w1[1];
w1_t[2] = w1[2];
w1_t[3] = w1[3];
MD5_STEP (MD5_I , a, b, c, d, w0[0], MD5C30, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, w1[3], MD5C31, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, w3[2], MD5C32, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w1[1], MD5C33, MD5S33);
MD5_STEP (MD5_I , a, b, c, d, w3[0], MD5C34, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, w0[3], MD5C35, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, w2[2], MD5C36, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w0[1], MD5C37, MD5S33);
MD5_STEP (MD5_I , a, b, c, d, w2[0], MD5C38, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, w3[3], MD5C39, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, w1[2], MD5C3a, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w3[1], MD5C3b, MD5S33);
MD5_STEP (MD5_I , a, b, c, d, w1[0], MD5C3c, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, w2[3], MD5C3d, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, w0[2], MD5C3e, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w2[1], MD5C3f, MD5S33);
u32x w2_t[4];
w2_t[0] = w2[0];
w2_t[1] = w2[1];
w2_t[2] = w2[2];
w2_t[3] = w2[3];
u32x w3_t[4];
w3_t[0] = w3[0];
w3_t[1] = w3[1];
w3_t[2] = w3[2];
w3_t[3] = w3[3];
u32x a = MD5M_A;
u32x b = MD5M_B;
u32x c = MD5M_C;
u32x d = MD5M_D;
MD5_STEP (MD5_Fo, a, b, c, d, w0_t[0], MD5C00, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w0_t[1], MD5C01, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, w0_t[2], MD5C02, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, w0_t[3], MD5C03, MD5S03);
MD5_STEP (MD5_Fo, a, b, c, d, w1_t[0], MD5C04, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w1_t[1], MD5C05, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, w1_t[2], MD5C06, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, w1_t[3], MD5C07, MD5S03);
MD5_STEP (MD5_Fo, a, b, c, d, w2_t[0], MD5C08, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w2_t[1], MD5C09, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, w2_t[2], MD5C0a, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, w2_t[3], MD5C0b, MD5S03);
MD5_STEP (MD5_Fo, a, b, c, d, w3_t[0], MD5C0c, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w3_t[1], MD5C0d, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, w3_t[2], MD5C0e, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, w3_t[3], MD5C0f, MD5S03);
MD5_STEP (MD5_Go, a, b, c, d, w0_t[1], MD5C10, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, w1_t[2], MD5C11, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, w2_t[3], MD5C12, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w0_t[0], MD5C13, MD5S13);
MD5_STEP (MD5_Go, a, b, c, d, w1_t[1], MD5C14, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, w2_t[2], MD5C15, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, w3_t[3], MD5C16, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w1_t[0], MD5C17, MD5S13);
MD5_STEP (MD5_Go, a, b, c, d, w2_t[1], MD5C18, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, w3_t[2], MD5C19, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, w0_t[3], MD5C1a, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w2_t[0], MD5C1b, MD5S13);
MD5_STEP (MD5_Go, a, b, c, d, w3_t[1], MD5C1c, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, w0_t[2], MD5C1d, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, w1_t[3], MD5C1e, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w3_t[0], MD5C1f, MD5S13);
MD5_STEP (MD5_H , a, b, c, d, w1_t[1], MD5C20, MD5S20);
MD5_STEP (MD5_H , d, a, b, c, w2_t[0], MD5C21, MD5S21);
MD5_STEP (MD5_H , c, d, a, b, w2_t[3], MD5C22, MD5S22);
MD5_STEP (MD5_H , b, c, d, a, w3_t[2], MD5C23, MD5S23);
MD5_STEP (MD5_H , a, b, c, d, w0_t[1], MD5C24, MD5S20);
MD5_STEP (MD5_H , d, a, b, c, w1_t[0], MD5C25, MD5S21);
MD5_STEP (MD5_H , c, d, a, b, w1_t[3], MD5C26, MD5S22);
MD5_STEP (MD5_H , b, c, d, a, w2_t[2], MD5C27, MD5S23);
MD5_STEP (MD5_H , a, b, c, d, w3_t[1], MD5C28, MD5S20);
MD5_STEP (MD5_H , d, a, b, c, w0_t[0], MD5C29, MD5S21);
MD5_STEP (MD5_H , c, d, a, b, w0_t[3], MD5C2a, MD5S22);
MD5_STEP (MD5_H , b, c, d, a, w1_t[2], MD5C2b, MD5S23);
MD5_STEP (MD5_H , a, b, c, d, w2_t[1], MD5C2c, MD5S20);
MD5_STEP (MD5_H , d, a, b, c, w3_t[0], MD5C2d, MD5S21);
MD5_STEP (MD5_H , c, d, a, b, w3_t[3], MD5C2e, MD5S22);
MD5_STEP (MD5_H , b, c, d, a, w0_t[2], MD5C2f, MD5S23);
MD5_STEP (MD5_I , a, b, c, d, w0_t[0], MD5C30, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, w1_t[3], MD5C31, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, w3_t[2], MD5C32, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w1_t[1], MD5C33, MD5S33);
MD5_STEP (MD5_I , a, b, c, d, w3_t[0], MD5C34, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, w0_t[3], MD5C35, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, w2_t[2], MD5C36, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w0_t[1], MD5C37, MD5S33);
MD5_STEP (MD5_I , a, b, c, d, w2_t[0], MD5C38, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, w3_t[3], MD5C39, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, w1_t[2], MD5C3a, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w3_t[1], MD5C3b, MD5S33);
MD5_STEP (MD5_I , a, b, c, d, w1_t[0], MD5C3c, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, w2_t[3], MD5C3d, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, w0_t[2], MD5C3e, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w2_t[1], MD5C3f, MD5S33);
a += MD5M_A;
b += MD5M_B;
c += MD5M_C;
d += MD5M_D;
const u32 w0_t = s[0];
const u32 w1_t = s[1];
const u32 w2_t = s[2];
const u32 w3_t = s[3];
const u32 w4_t = s[4];
const u32 w5_t = s[5];
const u32 w6_t = s[6];
const u32 w7_t = s[7];
w0_t[0] = s[0];
w0_t[1] = s[1];
w0_t[2] = s[2];
w0_t[3] = s[3];
const u32 w8_t = uint_to_hex_lower8 ((a >> 0) & 255) << 0
w1_t[0] = s[4];
w1_t[1] = s[5];
w1_t[2] = s[6];
w1_t[3] = s[7];
w2_t[0] = uint_to_hex_lower8 ((a >> 0) & 255) << 0
| uint_to_hex_lower8 ((a >> 8) & 255) << 16;
const u32 w9_t = uint_to_hex_lower8 ((a >> 16) & 255) << 0
w2_t[1] = uint_to_hex_lower8 ((a >> 16) & 255) << 0
| uint_to_hex_lower8 ((a >> 24) & 255) << 16;
const u32 wa_t = uint_to_hex_lower8 ((b >> 0) & 255) << 0
w2_t[2] = uint_to_hex_lower8 ((b >> 0) & 255) << 0
| uint_to_hex_lower8 ((b >> 8) & 255) << 16;
const u32 wb_t = uint_to_hex_lower8 ((b >> 16) & 255) << 0
w2_t[3] = uint_to_hex_lower8 ((b >> 16) & 255) << 0
| uint_to_hex_lower8 ((b >> 24) & 255) << 16;
const u32 wc_t = uint_to_hex_lower8 ((c >> 0) & 255) << 0
w3_t[0] = uint_to_hex_lower8 ((c >> 0) & 255) << 0
| uint_to_hex_lower8 ((c >> 8) & 255) << 16;
const u32 wd_t = uint_to_hex_lower8 ((c >> 16) & 255) << 0
w3_t[1] = uint_to_hex_lower8 ((c >> 16) & 255) << 0
| uint_to_hex_lower8 ((c >> 24) & 255) << 16;
const u32 we_t = uint_to_hex_lower8 ((d >> 0) & 255) << 0
w3_t[2] = uint_to_hex_lower8 ((d >> 0) & 255) << 0
| uint_to_hex_lower8 ((d >> 8) & 255) << 16;
const u32 wf_t = uint_to_hex_lower8 ((d >> 16) & 255) << 0
w3_t[3] = uint_to_hex_lower8 ((d >> 16) & 255) << 0
| uint_to_hex_lower8 ((d >> 24) & 255) << 16;
a = MD5M_A;
@ -170,78 +207,78 @@ static void m02810m (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_le
c = MD5M_C;
d = MD5M_D;
MD5_STEP (MD5_Fo, a, b, c, d, w0_t, MD5C00, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w1_t, MD5C01, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, w2_t, MD5C02, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, w3_t, MD5C03, MD5S03);
MD5_STEP (MD5_Fo, a, b, c, d, w4_t, MD5C04, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w5_t, MD5C05, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, w6_t, MD5C06, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, w7_t, MD5C07, MD5S03);
MD5_STEP (MD5_Fo, a, b, c, d, w8_t, MD5C08, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w9_t, MD5C09, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, wa_t, MD5C0a, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, wb_t, MD5C0b, MD5S03);
MD5_STEP (MD5_Fo, a, b, c, d, wc_t, MD5C0c, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, wd_t, MD5C0d, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, we_t, MD5C0e, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, wf_t, MD5C0f, MD5S03);
MD5_STEP (MD5_Fo, a, b, c, d, w0_t[0], MD5C00, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w0_t[1], MD5C01, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, w0_t[2], MD5C02, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, w0_t[3], MD5C03, MD5S03);
MD5_STEP (MD5_Fo, a, b, c, d, w1_t[0], MD5C04, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w1_t[1], MD5C05, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, w1_t[2], MD5C06, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, w1_t[3], MD5C07, MD5S03);
MD5_STEP (MD5_Fo, a, b, c, d, w2_t[0], MD5C08, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w2_t[1], MD5C09, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, w2_t[2], MD5C0a, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, w2_t[3], MD5C0b, MD5S03);
MD5_STEP (MD5_Fo, a, b, c, d, w3_t[0], MD5C0c, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w3_t[1], MD5C0d, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, w3_t[2], MD5C0e, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, w3_t[3], MD5C0f, MD5S03);
MD5_STEP (MD5_Go, a, b, c, d, w1_t, MD5C10, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, w6_t, MD5C11, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, wb_t, MD5C12, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w0_t, MD5C13, MD5S13);
MD5_STEP (MD5_Go, a, b, c, d, w5_t, MD5C14, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, wa_t, MD5C15, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, wf_t, MD5C16, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w4_t, MD5C17, MD5S13);
MD5_STEP (MD5_Go, a, b, c, d, w9_t, MD5C18, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, we_t, MD5C19, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, w3_t, MD5C1a, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w8_t, MD5C1b, MD5S13);
MD5_STEP (MD5_Go, a, b, c, d, wd_t, MD5C1c, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, w2_t, MD5C1d, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, w7_t, MD5C1e, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, wc_t, MD5C1f, MD5S13);
MD5_STEP (MD5_Go, a, b, c, d, w0_t[1], MD5C10, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, w1_t[2], MD5C11, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, w2_t[3], MD5C12, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w0_t[0], MD5C13, MD5S13);
MD5_STEP (MD5_Go, a, b, c, d, w1_t[1], MD5C14, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, w2_t[2], MD5C15, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, w3_t[3], MD5C16, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w1_t[0], MD5C17, MD5S13);
MD5_STEP (MD5_Go, a, b, c, d, w2_t[1], MD5C18, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, w3_t[2], MD5C19, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, w0_t[3], MD5C1a, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w2_t[0], MD5C1b, MD5S13);
MD5_STEP (MD5_Go, a, b, c, d, w3_t[1], MD5C1c, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, w0_t[2], MD5C1d, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, w1_t[3], MD5C1e, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w3_t[0], MD5C1f, MD5S13);
MD5_STEP (MD5_H , a, b, c, d, w5_t, MD5C20, MD5S20);
MD5_STEP (MD5_H , d, a, b, c, w8_t, MD5C21, MD5S21);
MD5_STEP (MD5_H , c, d, a, b, wb_t, MD5C22, MD5S22);
MD5_STEP (MD5_H , b, c, d, a, we_t, MD5C23, MD5S23);
MD5_STEP (MD5_H , a, b, c, d, w1_t, MD5C24, MD5S20);
MD5_STEP (MD5_H , d, a, b, c, w4_t, MD5C25, MD5S21);
MD5_STEP (MD5_H , c, d, a, b, w7_t, MD5C26, MD5S22);
MD5_STEP (MD5_H , b, c, d, a, wa_t, MD5C27, MD5S23);
MD5_STEP (MD5_H , a, b, c, d, wd_t, MD5C28, MD5S20);
MD5_STEP (MD5_H , d, a, b, c, w0_t, MD5C29, MD5S21);
MD5_STEP (MD5_H , c, d, a, b, w3_t, MD5C2a, MD5S22);
MD5_STEP (MD5_H , b, c, d, a, w6_t, MD5C2b, MD5S23);
MD5_STEP (MD5_H , a, b, c, d, w9_t, MD5C2c, MD5S20);
MD5_STEP (MD5_H , d, a, b, c, wc_t, MD5C2d, MD5S21);
MD5_STEP (MD5_H , c, d, a, b, wf_t, MD5C2e, MD5S22);
MD5_STEP (MD5_H , b, c, d, a, w2_t, MD5C2f, MD5S23);
MD5_STEP (MD5_H , a, b, c, d, w1_t[1], MD5C20, MD5S20);
MD5_STEP (MD5_H , d, a, b, c, w2_t[0], MD5C21, MD5S21);
MD5_STEP (MD5_H , c, d, a, b, w2_t[3], MD5C22, MD5S22);
MD5_STEP (MD5_H , b, c, d, a, w3_t[2], MD5C23, MD5S23);
MD5_STEP (MD5_H , a, b, c, d, w0_t[1], MD5C24, MD5S20);
MD5_STEP (MD5_H , d, a, b, c, w1_t[0], MD5C25, MD5S21);
MD5_STEP (MD5_H , c, d, a, b, w1_t[3], MD5C26, MD5S22);
MD5_STEP (MD5_H , b, c, d, a, w2_t[2], MD5C27, MD5S23);
MD5_STEP (MD5_H , a, b, c, d, w3_t[1], MD5C28, MD5S20);
MD5_STEP (MD5_H , d, a, b, c, w0_t[0], MD5C29, MD5S21);
MD5_STEP (MD5_H , c, d, a, b, w0_t[3], MD5C2a, MD5S22);
MD5_STEP (MD5_H , b, c, d, a, w1_t[2], MD5C2b, MD5S23);
MD5_STEP (MD5_H , a, b, c, d, w2_t[1], MD5C2c, MD5S20);
MD5_STEP (MD5_H , d, a, b, c, w3_t[0], MD5C2d, MD5S21);
MD5_STEP (MD5_H , c, d, a, b, w3_t[3], MD5C2e, MD5S22);
MD5_STEP (MD5_H , b, c, d, a, w0_t[2], MD5C2f, MD5S23);
MD5_STEP (MD5_I , a, b, c, d, w0_t, MD5C30, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, w7_t, MD5C31, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, we_t, MD5C32, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w5_t, MD5C33, MD5S33);
MD5_STEP (MD5_I , a, b, c, d, wc_t, MD5C34, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, w3_t, MD5C35, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, wa_t, MD5C36, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w1_t, MD5C37, MD5S33);
MD5_STEP (MD5_I , a, b, c, d, w8_t, MD5C38, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, wf_t, MD5C39, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, w6_t, MD5C3a, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, wd_t, MD5C3b, MD5S33);
MD5_STEP (MD5_I , a, b, c, d, w4_t, MD5C3c, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, wb_t, MD5C3d, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, w2_t, MD5C3e, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w9_t, MD5C3f, MD5S33);
MD5_STEP (MD5_I , a, b, c, d, w0_t[0], MD5C30, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, w1_t[3], MD5C31, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, w3_t[2], MD5C32, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w1_t[1], MD5C33, MD5S33);
MD5_STEP (MD5_I , a, b, c, d, w3_t[0], MD5C34, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, w0_t[3], MD5C35, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, w2_t[2], MD5C36, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w0_t[1], MD5C37, MD5S33);
MD5_STEP (MD5_I , a, b, c, d, w2_t[0], MD5C38, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, w3_t[3], MD5C39, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, w1_t[2], MD5C3a, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w3_t[1], MD5C3b, MD5S33);
MD5_STEP (MD5_I , a, b, c, d, w1_t[0], MD5C3c, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, w2_t[3], MD5C3d, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, w0_t[2], MD5C3e, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w2_t[1], MD5C3f, MD5S33);
const u32 r_a = a + MD5M_A;
const u32 r_b = b + MD5M_B;
const u32 r_c = c + MD5M_C;
const u32 r_d = d + MD5M_D;
const u32x r_a = a + MD5M_A;
const u32x r_b = b + MD5M_B;
const u32x r_c = c + MD5M_C;
const u32x r_d = d + MD5M_D;
a = r_a;
b = r_b;
@ -321,12 +358,7 @@ static void m02810m (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_le
c += r_c;
d += r_d;
const u32 r0 = a;
const u32 r1 = d;
const u32 r2 = c;
const u32 r3 = b;
#include COMPARE_M
COMPARE_M_SIMD (a, d, c, b);
}
}
@ -375,114 +407,143 @@ static void m02810s (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_le
u32 w0l = w0[0];
for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++)
for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos += VECT_SIZE)
{
const u32 w0r = bfs_buf[il_pos].i;
const u32x w0r = w0r_create_bft (bfs_buf, il_pos);
w0[0] = w0l | w0r;
const u32x w0lr = w0l | w0r;
u32 a = MD5M_A;
u32 b = MD5M_B;
u32 c = MD5M_C;
u32 d = MD5M_D;
u32x w0_t[4];
MD5_STEP (MD5_Fo, a, b, c, d, w0[0], MD5C00, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w0[1], MD5C01, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, w0[2], MD5C02, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, w0[3], MD5C03, MD5S03);
MD5_STEP (MD5_Fo, a, b, c, d, w1[0], MD5C04, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w1[1], MD5C05, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, w1[2], MD5C06, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, w1[3], MD5C07, MD5S03);
MD5_STEP (MD5_Fo, a, b, c, d, w2[0], MD5C08, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w2[1], MD5C09, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, w2[2], MD5C0a, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, w2[3], MD5C0b, MD5S03);
MD5_STEP (MD5_Fo, a, b, c, d, w3[0], MD5C0c, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w3[1], MD5C0d, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, w3[2], MD5C0e, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, w3[3], MD5C0f, MD5S03);
w0_t[0] = w0lr;
w0_t[1] = w0[1];
w0_t[2] = w0[2];
w0_t[3] = w0[3];
MD5_STEP (MD5_Go, a, b, c, d, w0[1], MD5C10, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, w1[2], MD5C11, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, w2[3], MD5C12, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w0[0], MD5C13, MD5S13);
MD5_STEP (MD5_Go, a, b, c, d, w1[1], MD5C14, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, w2[2], MD5C15, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, w3[3], MD5C16, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w1[0], MD5C17, MD5S13);
MD5_STEP (MD5_Go, a, b, c, d, w2[1], MD5C18, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, w3[2], MD5C19, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, w0[3], MD5C1a, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w2[0], MD5C1b, MD5S13);
MD5_STEP (MD5_Go, a, b, c, d, w3[1], MD5C1c, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, w0[2], MD5C1d, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, w1[3], MD5C1e, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w3[0], MD5C1f, MD5S13);
u32x w1_t[4];
MD5_STEP (MD5_H , a, b, c, d, w1[1], MD5C20, MD5S20);
MD5_STEP (MD5_H , d, a, b, c, w2[0], MD5C21, MD5S21);
MD5_STEP (MD5_H , c, d, a, b, w2[3], MD5C22, MD5S22);
MD5_STEP (MD5_H , b, c, d, a, w3[2], MD5C23, MD5S23);
MD5_STEP (MD5_H , a, b, c, d, w0[1], MD5C24, MD5S20);
MD5_STEP (MD5_H , d, a, b, c, w1[0], MD5C25, MD5S21);
MD5_STEP (MD5_H , c, d, a, b, w1[3], MD5C26, MD5S22);
MD5_STEP (MD5_H , b, c, d, a, w2[2], MD5C27, MD5S23);
MD5_STEP (MD5_H , a, b, c, d, w3[1], MD5C28, MD5S20);
MD5_STEP (MD5_H , d, a, b, c, w0[0], MD5C29, MD5S21);
MD5_STEP (MD5_H , c, d, a, b, w0[3], MD5C2a, MD5S22);
MD5_STEP (MD5_H , b, c, d, a, w1[2], MD5C2b, MD5S23);
MD5_STEP (MD5_H , a, b, c, d, w2[1], MD5C2c, MD5S20);
MD5_STEP (MD5_H , d, a, b, c, w3[0], MD5C2d, MD5S21);
MD5_STEP (MD5_H , c, d, a, b, w3[3], MD5C2e, MD5S22);
MD5_STEP (MD5_H , b, c, d, a, w0[2], MD5C2f, MD5S23);
w1_t[0] = w1[0];
w1_t[1] = w1[1];
w1_t[2] = w1[2];
w1_t[3] = w1[3];
MD5_STEP (MD5_I , a, b, c, d, w0[0], MD5C30, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, w1[3], MD5C31, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, w3[2], MD5C32, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w1[1], MD5C33, MD5S33);
MD5_STEP (MD5_I , a, b, c, d, w3[0], MD5C34, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, w0[3], MD5C35, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, w2[2], MD5C36, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w0[1], MD5C37, MD5S33);
MD5_STEP (MD5_I , a, b, c, d, w2[0], MD5C38, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, w3[3], MD5C39, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, w1[2], MD5C3a, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w3[1], MD5C3b, MD5S33);
MD5_STEP (MD5_I , a, b, c, d, w1[0], MD5C3c, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, w2[3], MD5C3d, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, w0[2], MD5C3e, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w2[1], MD5C3f, MD5S33);
u32x w2_t[4];
w2_t[0] = w2[0];
w2_t[1] = w2[1];
w2_t[2] = w2[2];
w2_t[3] = w2[3];
u32x w3_t[4];
w3_t[0] = w3[0];
w3_t[1] = w3[1];
w3_t[2] = w3[2];
w3_t[3] = w3[3];
u32x a = MD5M_A;
u32x b = MD5M_B;
u32x c = MD5M_C;
u32x d = MD5M_D;
MD5_STEP (MD5_Fo, a, b, c, d, w0_t[0], MD5C00, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w0_t[1], MD5C01, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, w0_t[2], MD5C02, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, w0_t[3], MD5C03, MD5S03);
MD5_STEP (MD5_Fo, a, b, c, d, w1_t[0], MD5C04, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w1_t[1], MD5C05, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, w1_t[2], MD5C06, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, w1_t[3], MD5C07, MD5S03);
MD5_STEP (MD5_Fo, a, b, c, d, w2_t[0], MD5C08, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w2_t[1], MD5C09, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, w2_t[2], MD5C0a, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, w2_t[3], MD5C0b, MD5S03);
MD5_STEP (MD5_Fo, a, b, c, d, w3_t[0], MD5C0c, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w3_t[1], MD5C0d, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, w3_t[2], MD5C0e, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, w3_t[3], MD5C0f, MD5S03);
MD5_STEP (MD5_Go, a, b, c, d, w0_t[1], MD5C10, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, w1_t[2], MD5C11, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, w2_t[3], MD5C12, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w0_t[0], MD5C13, MD5S13);
MD5_STEP (MD5_Go, a, b, c, d, w1_t[1], MD5C14, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, w2_t[2], MD5C15, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, w3_t[3], MD5C16, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w1_t[0], MD5C17, MD5S13);
MD5_STEP (MD5_Go, a, b, c, d, w2_t[1], MD5C18, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, w3_t[2], MD5C19, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, w0_t[3], MD5C1a, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w2_t[0], MD5C1b, MD5S13);
MD5_STEP (MD5_Go, a, b, c, d, w3_t[1], MD5C1c, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, w0_t[2], MD5C1d, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, w1_t[3], MD5C1e, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w3_t[0], MD5C1f, MD5S13);
MD5_STEP (MD5_H , a, b, c, d, w1_t[1], MD5C20, MD5S20);
MD5_STEP (MD5_H , d, a, b, c, w2_t[0], MD5C21, MD5S21);
MD5_STEP (MD5_H , c, d, a, b, w2_t[3], MD5C22, MD5S22);
MD5_STEP (MD5_H , b, c, d, a, w3_t[2], MD5C23, MD5S23);
MD5_STEP (MD5_H , a, b, c, d, w0_t[1], MD5C24, MD5S20);
MD5_STEP (MD5_H , d, a, b, c, w1_t[0], MD5C25, MD5S21);
MD5_STEP (MD5_H , c, d, a, b, w1_t[3], MD5C26, MD5S22);
MD5_STEP (MD5_H , b, c, d, a, w2_t[2], MD5C27, MD5S23);
MD5_STEP (MD5_H , a, b, c, d, w3_t[1], MD5C28, MD5S20);
MD5_STEP (MD5_H , d, a, b, c, w0_t[0], MD5C29, MD5S21);
MD5_STEP (MD5_H , c, d, a, b, w0_t[3], MD5C2a, MD5S22);
MD5_STEP (MD5_H , b, c, d, a, w1_t[2], MD5C2b, MD5S23);
MD5_STEP (MD5_H , a, b, c, d, w2_t[1], MD5C2c, MD5S20);
MD5_STEP (MD5_H , d, a, b, c, w3_t[0], MD5C2d, MD5S21);
MD5_STEP (MD5_H , c, d, a, b, w3_t[3], MD5C2e, MD5S22);
MD5_STEP (MD5_H , b, c, d, a, w0_t[2], MD5C2f, MD5S23);
MD5_STEP (MD5_I , a, b, c, d, w0_t[0], MD5C30, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, w1_t[3], MD5C31, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, w3_t[2], MD5C32, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w1_t[1], MD5C33, MD5S33);
MD5_STEP (MD5_I , a, b, c, d, w3_t[0], MD5C34, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, w0_t[3], MD5C35, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, w2_t[2], MD5C36, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w0_t[1], MD5C37, MD5S33);
MD5_STEP (MD5_I , a, b, c, d, w2_t[0], MD5C38, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, w3_t[3], MD5C39, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, w1_t[2], MD5C3a, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w3_t[1], MD5C3b, MD5S33);
MD5_STEP (MD5_I , a, b, c, d, w1_t[0], MD5C3c, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, w2_t[3], MD5C3d, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, w0_t[2], MD5C3e, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w2_t[1], MD5C3f, MD5S33);
a += MD5M_A;
b += MD5M_B;
c += MD5M_C;
d += MD5M_D;
const u32 w0_t = s[0];
const u32 w1_t = s[1];
const u32 w2_t = s[2];
const u32 w3_t = s[3];
const u32 w4_t = s[4];
const u32 w5_t = s[5];
const u32 w6_t = s[6];
const u32 w7_t = s[7];
w0_t[0] = s[0];
w0_t[1] = s[1];
w0_t[2] = s[2];
w0_t[3] = s[3];
const u32 w8_t = uint_to_hex_lower8 ((a >> 0) & 255) << 0
w1_t[0] = s[4];
w1_t[1] = s[5];
w1_t[2] = s[6];
w1_t[3] = s[7];
w2_t[0] = uint_to_hex_lower8 ((a >> 0) & 255) << 0
| uint_to_hex_lower8 ((a >> 8) & 255) << 16;
const u32 w9_t = uint_to_hex_lower8 ((a >> 16) & 255) << 0
w2_t[1] = uint_to_hex_lower8 ((a >> 16) & 255) << 0
| uint_to_hex_lower8 ((a >> 24) & 255) << 16;
const u32 wa_t = uint_to_hex_lower8 ((b >> 0) & 255) << 0
w2_t[2] = uint_to_hex_lower8 ((b >> 0) & 255) << 0
| uint_to_hex_lower8 ((b >> 8) & 255) << 16;
const u32 wb_t = uint_to_hex_lower8 ((b >> 16) & 255) << 0
w2_t[3] = uint_to_hex_lower8 ((b >> 16) & 255) << 0
| uint_to_hex_lower8 ((b >> 24) & 255) << 16;
const u32 wc_t = uint_to_hex_lower8 ((c >> 0) & 255) << 0
w3_t[0] = uint_to_hex_lower8 ((c >> 0) & 255) << 0
| uint_to_hex_lower8 ((c >> 8) & 255) << 16;
const u32 wd_t = uint_to_hex_lower8 ((c >> 16) & 255) << 0
w3_t[1] = uint_to_hex_lower8 ((c >> 16) & 255) << 0
| uint_to_hex_lower8 ((c >> 24) & 255) << 16;
const u32 we_t = uint_to_hex_lower8 ((d >> 0) & 255) << 0
w3_t[2] = uint_to_hex_lower8 ((d >> 0) & 255) << 0
| uint_to_hex_lower8 ((d >> 8) & 255) << 16;
const u32 wf_t = uint_to_hex_lower8 ((d >> 16) & 255) << 0
w3_t[3] = uint_to_hex_lower8 ((d >> 16) & 255) << 0
| uint_to_hex_lower8 ((d >> 24) & 255) << 16;
a = MD5M_A;
@ -490,78 +551,78 @@ static void m02810s (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_le
c = MD5M_C;
d = MD5M_D;
MD5_STEP (MD5_Fo, a, b, c, d, w0_t, MD5C00, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w1_t, MD5C01, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, w2_t, MD5C02, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, w3_t, MD5C03, MD5S03);
MD5_STEP (MD5_Fo, a, b, c, d, w4_t, MD5C04, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w5_t, MD5C05, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, w6_t, MD5C06, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, w7_t, MD5C07, MD5S03);
MD5_STEP (MD5_Fo, a, b, c, d, w8_t, MD5C08, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w9_t, MD5C09, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, wa_t, MD5C0a, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, wb_t, MD5C0b, MD5S03);
MD5_STEP (MD5_Fo, a, b, c, d, wc_t, MD5C0c, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, wd_t, MD5C0d, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, we_t, MD5C0e, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, wf_t, MD5C0f, MD5S03);
MD5_STEP (MD5_Fo, a, b, c, d, w0_t[0], MD5C00, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w0_t[1], MD5C01, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, w0_t[2], MD5C02, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, w0_t[3], MD5C03, MD5S03);
MD5_STEP (MD5_Fo, a, b, c, d, w1_t[0], MD5C04, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w1_t[1], MD5C05, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, w1_t[2], MD5C06, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, w1_t[3], MD5C07, MD5S03);
MD5_STEP (MD5_Fo, a, b, c, d, w2_t[0], MD5C08, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w2_t[1], MD5C09, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, w2_t[2], MD5C0a, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, w2_t[3], MD5C0b, MD5S03);
MD5_STEP (MD5_Fo, a, b, c, d, w3_t[0], MD5C0c, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w3_t[1], MD5C0d, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, w3_t[2], MD5C0e, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, w3_t[3], MD5C0f, MD5S03);
MD5_STEP (MD5_Go, a, b, c, d, w1_t, MD5C10, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, w6_t, MD5C11, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, wb_t, MD5C12, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w0_t, MD5C13, MD5S13);
MD5_STEP (MD5_Go, a, b, c, d, w5_t, MD5C14, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, wa_t, MD5C15, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, wf_t, MD5C16, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w4_t, MD5C17, MD5S13);
MD5_STEP (MD5_Go, a, b, c, d, w9_t, MD5C18, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, we_t, MD5C19, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, w3_t, MD5C1a, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w8_t, MD5C1b, MD5S13);
MD5_STEP (MD5_Go, a, b, c, d, wd_t, MD5C1c, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, w2_t, MD5C1d, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, w7_t, MD5C1e, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, wc_t, MD5C1f, MD5S13);
MD5_STEP (MD5_Go, a, b, c, d, w0_t[1], MD5C10, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, w1_t[2], MD5C11, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, w2_t[3], MD5C12, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w0_t[0], MD5C13, MD5S13);
MD5_STEP (MD5_Go, a, b, c, d, w1_t[1], MD5C14, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, w2_t[2], MD5C15, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, w3_t[3], MD5C16, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w1_t[0], MD5C17, MD5S13);
MD5_STEP (MD5_Go, a, b, c, d, w2_t[1], MD5C18, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, w3_t[2], MD5C19, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, w0_t[3], MD5C1a, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w2_t[0], MD5C1b, MD5S13);
MD5_STEP (MD5_Go, a, b, c, d, w3_t[1], MD5C1c, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, w0_t[2], MD5C1d, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, w1_t[3], MD5C1e, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w3_t[0], MD5C1f, MD5S13);
MD5_STEP (MD5_H , a, b, c, d, w5_t, MD5C20, MD5S20);
MD5_STEP (MD5_H , d, a, b, c, w8_t, MD5C21, MD5S21);
MD5_STEP (MD5_H , c, d, a, b, wb_t, MD5C22, MD5S22);
MD5_STEP (MD5_H , b, c, d, a, we_t, MD5C23, MD5S23);
MD5_STEP (MD5_H , a, b, c, d, w1_t, MD5C24, MD5S20);
MD5_STEP (MD5_H , d, a, b, c, w4_t, MD5C25, MD5S21);
MD5_STEP (MD5_H , c, d, a, b, w7_t, MD5C26, MD5S22);
MD5_STEP (MD5_H , b, c, d, a, wa_t, MD5C27, MD5S23);
MD5_STEP (MD5_H , a, b, c, d, wd_t, MD5C28, MD5S20);
MD5_STEP (MD5_H , d, a, b, c, w0_t, MD5C29, MD5S21);
MD5_STEP (MD5_H , c, d, a, b, w3_t, MD5C2a, MD5S22);
MD5_STEP (MD5_H , b, c, d, a, w6_t, MD5C2b, MD5S23);
MD5_STEP (MD5_H , a, b, c, d, w9_t, MD5C2c, MD5S20);
MD5_STEP (MD5_H , d, a, b, c, wc_t, MD5C2d, MD5S21);
MD5_STEP (MD5_H , c, d, a, b, wf_t, MD5C2e, MD5S22);
MD5_STEP (MD5_H , b, c, d, a, w2_t, MD5C2f, MD5S23);
MD5_STEP (MD5_H , a, b, c, d, w1_t[1], MD5C20, MD5S20);
MD5_STEP (MD5_H , d, a, b, c, w2_t[0], MD5C21, MD5S21);
MD5_STEP (MD5_H , c, d, a, b, w2_t[3], MD5C22, MD5S22);
MD5_STEP (MD5_H , b, c, d, a, w3_t[2], MD5C23, MD5S23);
MD5_STEP (MD5_H , a, b, c, d, w0_t[1], MD5C24, MD5S20);
MD5_STEP (MD5_H , d, a, b, c, w1_t[0], MD5C25, MD5S21);
MD5_STEP (MD5_H , c, d, a, b, w1_t[3], MD5C26, MD5S22);
MD5_STEP (MD5_H , b, c, d, a, w2_t[2], MD5C27, MD5S23);
MD5_STEP (MD5_H , a, b, c, d, w3_t[1], MD5C28, MD5S20);
MD5_STEP (MD5_H , d, a, b, c, w0_t[0], MD5C29, MD5S21);
MD5_STEP (MD5_H , c, d, a, b, w0_t[3], MD5C2a, MD5S22);
MD5_STEP (MD5_H , b, c, d, a, w1_t[2], MD5C2b, MD5S23);
MD5_STEP (MD5_H , a, b, c, d, w2_t[1], MD5C2c, MD5S20);
MD5_STEP (MD5_H , d, a, b, c, w3_t[0], MD5C2d, MD5S21);
MD5_STEP (MD5_H , c, d, a, b, w3_t[3], MD5C2e, MD5S22);
MD5_STEP (MD5_H , b, c, d, a, w0_t[2], MD5C2f, MD5S23);
MD5_STEP (MD5_I , a, b, c, d, w0_t, MD5C30, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, w7_t, MD5C31, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, we_t, MD5C32, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w5_t, MD5C33, MD5S33);
MD5_STEP (MD5_I , a, b, c, d, wc_t, MD5C34, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, w3_t, MD5C35, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, wa_t, MD5C36, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w1_t, MD5C37, MD5S33);
MD5_STEP (MD5_I , a, b, c, d, w8_t, MD5C38, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, wf_t, MD5C39, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, w6_t, MD5C3a, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, wd_t, MD5C3b, MD5S33);
MD5_STEP (MD5_I , a, b, c, d, w4_t, MD5C3c, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, wb_t, MD5C3d, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, w2_t, MD5C3e, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w9_t, MD5C3f, MD5S33);
MD5_STEP (MD5_I , a, b, c, d, w0_t[0], MD5C30, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, w1_t[3], MD5C31, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, w3_t[2], MD5C32, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w1_t[1], MD5C33, MD5S33);
MD5_STEP (MD5_I , a, b, c, d, w3_t[0], MD5C34, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, w0_t[3], MD5C35, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, w2_t[2], MD5C36, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w0_t[1], MD5C37, MD5S33);
MD5_STEP (MD5_I , a, b, c, d, w2_t[0], MD5C38, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, w3_t[3], MD5C39, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, w1_t[2], MD5C3a, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w3_t[1], MD5C3b, MD5S33);
MD5_STEP (MD5_I , a, b, c, d, w1_t[0], MD5C3c, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, w2_t[3], MD5C3d, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, w0_t[2], MD5C3e, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w2_t[1], MD5C3f, MD5S33);
const u32 r_a = a + MD5M_A;
const u32 r_b = b + MD5M_B;
const u32 r_c = c + MD5M_C;
const u32 r_d = d + MD5M_D;
const u32x r_a = a + MD5M_A;
const u32x r_b = b + MD5M_B;
const u32x r_c = c + MD5M_C;
const u32x r_d = d + MD5M_D;
a = r_a;
b = r_b;
@ -633,7 +694,7 @@ static void m02810s (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_le
MD5_STEP0(MD5_I , b, c, d, a, MD5C3b, MD5S33);
MD5_STEP0(MD5_I , a, b, c, d, MD5C3c, MD5S30);
if (allx ((a + r_a) != search[0])) continue;
if (MATCHES_NONE_VS ((a + r_a), search[0])) continue;
MD5_STEP0(MD5_I , d, a, b, c, MD5C3d, MD5S31);
MD5_STEP0(MD5_I , c, d, a, b, MD5C3e, MD5S32);
@ -644,12 +705,7 @@ static void m02810s (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_le
c += r_c;
d += r_d;
const u32 r0 = a;
const u32 r1 = d;
const u32 r2 = c;
const u32 r3 = b;
#include COMPARE_S
COMPARE_S_SIMD (a, d, c, b);
}
}

View File

@ -515,7 +515,7 @@ __kernel void m03000_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
if (combs_mode == COMBINATOR_MODE_BASE_RIGHT)
{
switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
switch_buffer_by_offset_le (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
}
/**
@ -592,7 +592,7 @@ __kernel void m03000_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
{
switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len);
switch_buffer_by_offset_le (wordr0, wordr1, wordr2, wordr3, pw_l_len);
}
u32 w0[4];
@ -707,7 +707,7 @@ __kernel void m03000_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
if (combs_mode == COMBINATOR_MODE_BASE_RIGHT)
{
switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
switch_buffer_by_offset_le (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
}
/**
@ -796,7 +796,7 @@ __kernel void m03000_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
{
switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len);
switch_buffer_by_offset_le (wordr0, wordr1, wordr2, wordr3, pw_l_len);
}
u32 w0[4];

View File

@ -632,7 +632,7 @@ __kernel void m03100_m04 (__global pw_t *pws, __global kernel_rule_t * rules_bu
w3_t[2] = w3[2];
w3_t[3] = w3[3];
switch_buffer_by_offset (w0_t, w1_t, w2_t, w3_t, salt_len);
switch_buffer_by_offset_le (w0_t, w1_t, w2_t, w3_t, salt_len);
w0_t[0] |= salt_buf0[0];
w0_t[1] |= salt_buf0[1];
@ -934,7 +934,7 @@ __kernel void m03100_s04 (__global pw_t *pws, __global kernel_rule_t * rules_bu
w3_t[2] = w3[2];
w3_t[3] = w3[3];
switch_buffer_by_offset (w0_t, w1_t, w2_t, w3_t, salt_len);
switch_buffer_by_offset_le (w0_t, w1_t, w2_t, w3_t, salt_len);
w0_t[0] |= salt_buf0[0];
w0_t[1] |= salt_buf0[1];

Some files were not shown because too many files have changed in this diff Show More