Add SHA3 and Keccak

The previous hash-mode 5000 covered Keccak-256 only. FIPS changed one
padding byte while adopting Keccak as the SHA3 standard, which gives us
different digests. Now we have separate kernels for SHA3 and Keccak.

 - Added hash-mode 17300 = SHA3-224
 - Added hash-mode 17400 = SHA3-256
 - Added hash-mode 17500 = SHA3-384
 - Added hash-mode 17600 = SHA3-512
 - Added hash-mode 17700 = Keccak-224
 - Added hash-mode 17800 = Keccak-256
 - Added hash-mode 17900 = Keccak-384
 - Added hash-mode 18000 = Keccak-512
 - Removed hash-mode 5000 = SHA-3 (Keccak)
pull/1717/head
R. Yushaev 6 years ago
parent 9d7e39590b
commit 5c87720acc

@ -30952,6 +30952,18 @@ DECLSPEC void append_0x01_2x4_S (u32 *w0, u32 *w1, const u32 offset)
append_helper_1x4_S (w1, ((offset16 == 1) ? 0x01010101 : 0), v);
}
DECLSPEC void append_0x06_2x4_S (u32 *w0, u32 *w1, const u32 offset)
{
u32 v[4];
set_mark_1x4_S (v, offset);
const u32 offset16 = offset / 16;
append_helper_1x4_S (w0, ((offset16 == 0) ? 0x06060606 : 0), v);
append_helper_1x4_S (w1, ((offset16 == 1) ? 0x06060606 : 0), v);
}
DECLSPEC void append_0x80_1x4_S (u32 *w0, const u32 offset)
{
u32 v[4];
@ -60122,6 +60134,64 @@ DECLSPEC void append_0x01_2x4_VV (u32x *w0, u32x *w1, const u32x offset)
#endif
}
DECLSPEC void append_0x06_2x4_VV (u32x *w0, u32x *w1, const u32x offset)
{
#if VECT_SIZE == 1
append_0x06_2x4_S (w0, w1, offset);
#else
u32 t0[4];
u32 t1[4];
#endif
#if VECT_SIZE == 2
PACKVS24 (t0, t1, w0, w1, 0); append_0x06_2x4_S (t0, t1, offset.s0); PACKSV24 (t0, t1, w0, w1, 0);
PACKVS24 (t0, t1, w0, w1, 1); append_0x06_2x4_S (t0, t1, offset.s1); PACKSV24 (t0, t1, w0, w1, 1);
#elif VECT_SIZE == 4
PACKVS24 (t0, t1, w0, w1, 0); append_0x06_2x4_S (t0, t1, offset.s0); PACKSV24 (t0, t1, w0, w1, 0);
PACKVS24 (t0, t1, w0, w1, 1); append_0x06_2x4_S (t0, t1, offset.s1); PACKSV24 (t0, t1, w0, w1, 1);
PACKVS24 (t0, t1, w0, w1, 2); append_0x06_2x4_S (t0, t1, offset.s2); PACKSV24 (t0, t1, w0, w1, 2);
PACKVS24 (t0, t1, w0, w1, 3); append_0x06_2x4_S (t0, t1, offset.s3); PACKSV24 (t0, t1, w0, w1, 3);
#elif VECT_SIZE == 8
PACKVS24 (t0, t1, w0, w1, 0); append_0x06_2x4_S (t0, t1, offset.s0); PACKSV24 (t0, t1, w0, w1, 0);
PACKVS24 (t0, t1, w0, w1, 1); append_0x06_2x4_S (t0, t1, offset.s1); PACKSV24 (t0, t1, w0, w1, 1);
PACKVS24 (t0, t1, w0, w1, 2); append_0x06_2x4_S (t0, t1, offset.s2); PACKSV24 (t0, t1, w0, w1, 2);
PACKVS24 (t0, t1, w0, w1, 3); append_0x06_2x4_S (t0, t1, offset.s3); PACKSV24 (t0, t1, w0, w1, 3);
PACKVS24 (t0, t1, w0, w1, 4); append_0x06_2x4_S (t0, t1, offset.s4); PACKSV24 (t0, t1, w0, w1, 4);
PACKVS24 (t0, t1, w0, w1, 5); append_0x06_2x4_S (t0, t1, offset.s5); PACKSV24 (t0, t1, w0, w1, 5);
PACKVS24 (t0, t1, w0, w1, 6); append_0x06_2x4_S (t0, t1, offset.s6); PACKSV24 (t0, t1, w0, w1, 6);
PACKVS24 (t0, t1, w0, w1, 7); append_0x06_2x4_S (t0, t1, offset.s7); PACKSV24 (t0, t1, w0, w1, 7);
#elif VECT_SIZE == 16
PACKVS24 (t0, t1, w0, w1, 0); append_0x06_2x4_S (t0, t1, offset.s0); PACKSV24 (t0, t1, w0, w1, 0);
PACKVS24 (t0, t1, w0, w1, 1); append_0x06_2x4_S (t0, t1, offset.s1); PACKSV24 (t0, t1, w0, w1, 1);
PACKVS24 (t0, t1, w0, w1, 2); append_0x06_2x4_S (t0, t1, offset.s2); PACKSV24 (t0, t1, w0, w1, 2);
PACKVS24 (t0, t1, w0, w1, 3); append_0x06_2x4_S (t0, t1, offset.s3); PACKSV24 (t0, t1, w0, w1, 3);
PACKVS24 (t0, t1, w0, w1, 4); append_0x06_2x4_S (t0, t1, offset.s4); PACKSV24 (t0, t1, w0, w1, 4);
PACKVS24 (t0, t1, w0, w1, 5); append_0x06_2x4_S (t0, t1, offset.s5); PACKSV24 (t0, t1, w0, w1, 5);
PACKVS24 (t0, t1, w0, w1, 6); append_0x06_2x4_S (t0, t1, offset.s6); PACKSV24 (t0, t1, w0, w1, 6);
PACKVS24 (t0, t1, w0, w1, 7); append_0x06_2x4_S (t0, t1, offset.s7); PACKSV24 (t0, t1, w0, w1, 7);
PACKVS24 (t0, t1, w0, w1, 8); append_0x06_2x4_S (t0, t1, offset.s8); PACKSV24 (t0, t1, w0, w1, 8);
PACKVS24 (t0, t1, w0, w1, 9); append_0x06_2x4_S (t0, t1, offset.s9); PACKSV24 (t0, t1, w0, w1, 9);
PACKVS24 (t0, t1, w0, w1, a); append_0x06_2x4_S (t0, t1, offset.sa); PACKSV24 (t0, t1, w0, w1, a);
PACKVS24 (t0, t1, w0, w1, b); append_0x06_2x4_S (t0, t1, offset.sb); PACKSV24 (t0, t1, w0, w1, b);
PACKVS24 (t0, t1, w0, w1, c); append_0x06_2x4_S (t0, t1, offset.sc); PACKSV24 (t0, t1, w0, w1, c);
PACKVS24 (t0, t1, w0, w1, d); append_0x06_2x4_S (t0, t1, offset.sd); PACKSV24 (t0, t1, w0, w1, d);
PACKVS24 (t0, t1, w0, w1, e); append_0x06_2x4_S (t0, t1, offset.se); PACKSV24 (t0, t1, w0, w1, e);
PACKVS24 (t0, t1, w0, w1, f); append_0x06_2x4_S (t0, t1, offset.sf); PACKSV24 (t0, t1, w0, w1, f);
#endif
}
DECLSPEC void append_0x80_2x4_VV (u32x *w0, u32x *w1, const u32x offset)
{
#if VECT_SIZE == 1

@ -0,0 +1,479 @@
/**
* Author......: See docs/credits.txt
* License.....: MIT
*/
#define NEW_SIMD_CODE
#include "inc_vendor.cl"
#include "inc_hash_constants.h"
#include "inc_hash_functions.cl"
#include "inc_types.cl"
#include "inc_common.cl"
#include "inc_rp_optimized.h"
#include "inc_rp_optimized.cl"
#include "inc_simd.cl"
__constant u64a keccakf_rndc[24] =
{
0x0000000000000001, 0x0000000000008082, 0x800000000000808a,
0x8000000080008000, 0x000000000000808b, 0x0000000080000001,
0x8000000080008081, 0x8000000000008009, 0x000000000000008a,
0x0000000000000088, 0x0000000080008009, 0x000000008000000a,
0x000000008000808b, 0x800000000000008b, 0x8000000000008089,
0x8000000000008003, 0x8000000000008002, 0x8000000000000080,
0x000000000000800a, 0x800000008000000a, 0x8000000080008081,
0x8000000000008080, 0x0000000080000001, 0x8000000080008008
};
#ifndef KECCAK_ROUNDS
#define KECCAK_ROUNDS 24
#endif
__kernel void m17300_m04 (__global pw_t *pws, __constant const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
{
/**
* modifier
*/
const u64 lid = get_local_id (0);
/**
* base
*/
const u64 gid = get_global_id (0);
if (gid >= gid_max) return;
u32 pw_buf0[4];
u32 pw_buf1[4];
pw_buf0[0] = pws[gid].i[0];
pw_buf0[1] = pws[gid].i[1];
pw_buf0[2] = pws[gid].i[2];
pw_buf0[3] = pws[gid].i[3];
pw_buf1[0] = pws[gid].i[4];
pw_buf1[1] = pws[gid].i[5];
pw_buf1[2] = pws[gid].i[6];
pw_buf1[3] = pws[gid].i[7];
const u32 pw_len = pws[gid].pw_len;
/**
* loop
*/
for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE)
{
u32x w0[4] = { 0 };
u32x w1[4] = { 0 };
u32x w2[4] = { 0 };
u32x w3[4] = { 0 };
const u32x out_len = apply_rules_vect (pw_buf0, pw_buf1, pw_len, rules_buf, il_pos, w0, w1);
append_0x06_2x4_VV (w0, w1, out_len);
/**
* Keccak
*/
u64x a00 = hl32_to_64 (w0[1], w0[0]);
u64x a01 = hl32_to_64 (w0[3], w0[2]);
u64x a02 = hl32_to_64 (w1[1], w1[0]);
u64x a03 = hl32_to_64 (w1[3], w1[2]);
u64x a04 = 0;
u64x a10 = 0;
u64x a11 = 0;
u64x a12 = 0;
u64x a13 = 0;
u64x a14 = 0;
u64x a20 = 0;
u64x a21 = 0;
u64x a22 = 0;
u64x a23 = 0;
u64x a24 = 0;
u64x a30 = 0;
u64x a31 = 0;
u64x a32 = 0x8000000000000000;
u64x a33 = 0;
u64x a34 = 0;
u64x a40 = 0;
u64x a41 = 0;
u64x a42 = 0;
u64x a43 = 0;
u64x a44 = 0;
#define Rho_Pi(ad,r) \
bc0 = ad; \
ad = rotl64 (t, r); \
t = bc0; \
#ifdef _unroll
#pragma unroll
#endif
for (int round = 0; round < KECCAK_ROUNDS - 1; round++)
{
// Theta
u64x bc0 = a00 ^ a10 ^ a20 ^ a30 ^ a40;
u64x bc1 = a01 ^ a11 ^ a21 ^ a31 ^ a41;
u64x bc2 = a02 ^ a12 ^ a22 ^ a32 ^ a42;
u64x bc3 = a03 ^ a13 ^ a23 ^ a33 ^ a43;
u64x bc4 = a04 ^ a14 ^ a24 ^ a34 ^ a44;
u64x t;
t = bc4 ^ rotl64 (bc1, 1); a00 ^= t; a10 ^= t; a20 ^= t; a30 ^= t; a40 ^= t;
t = bc0 ^ rotl64 (bc2, 1); a01 ^= t; a11 ^= t; a21 ^= t; a31 ^= t; a41 ^= t;
t = bc1 ^ rotl64 (bc3, 1); a02 ^= t; a12 ^= t; a22 ^= t; a32 ^= t; a42 ^= t;
t = bc2 ^ rotl64 (bc4, 1); a03 ^= t; a13 ^= t; a23 ^= t; a33 ^= t; a43 ^= t;
t = bc3 ^ rotl64 (bc0, 1); a04 ^= t; a14 ^= t; a24 ^= t; a34 ^= t; a44 ^= t;
// Rho Pi
t = a01;
Rho_Pi (a20, 1);
Rho_Pi (a12, 3);
Rho_Pi (a21, 6);
Rho_Pi (a32, 10);
Rho_Pi (a33, 15);
Rho_Pi (a03, 21);
Rho_Pi (a10, 28);
Rho_Pi (a31, 36);
Rho_Pi (a13, 45);
Rho_Pi (a41, 55);
Rho_Pi (a44, 2);
Rho_Pi (a04, 14);
Rho_Pi (a30, 27);
Rho_Pi (a43, 41);
Rho_Pi (a34, 56);
Rho_Pi (a23, 8);
Rho_Pi (a22, 25);
Rho_Pi (a02, 43);
Rho_Pi (a40, 62);
Rho_Pi (a24, 18);
Rho_Pi (a42, 39);
Rho_Pi (a14, 61);
Rho_Pi (a11, 20);
Rho_Pi (a01, 44);
// Chi
bc0 = a00; bc1 = a01; bc2 = a02; bc3 = a03; bc4 = a04;
a00 ^= ~bc1 & bc2; a01 ^= ~bc2 & bc3; a02 ^= ~bc3 & bc4; a03 ^= ~bc4 & bc0; a04 ^= ~bc0 & bc1;
bc0 = a10; bc1 = a11; bc2 = a12; bc3 = a13; bc4 = a14;
a10 ^= ~bc1 & bc2; a11 ^= ~bc2 & bc3; a12 ^= ~bc3 & bc4; a13 ^= ~bc4 & bc0; a14 ^= ~bc0 & bc1;
bc0 = a20; bc1 = a21; bc2 = a22; bc3 = a23; bc4 = a24;
a20 ^= ~bc1 & bc2; a21 ^= ~bc2 & bc3; a22 ^= ~bc3 & bc4; a23 ^= ~bc4 & bc0; a24 ^= ~bc0 & bc1;
bc0 = a30; bc1 = a31; bc2 = a32; bc3 = a33; bc4 = a34;
a30 ^= ~bc1 & bc2; a31 ^= ~bc2 & bc3; a32 ^= ~bc3 & bc4; a33 ^= ~bc4 & bc0; a34 ^= ~bc0 & bc1;
bc0 = a40; bc1 = a41; bc2 = a42; bc3 = a43; bc4 = a44;
a40 ^= ~bc1 & bc2; a41 ^= ~bc2 & bc3; a42 ^= ~bc3 & bc4; a43 ^= ~bc4 & bc0; a44 ^= ~bc0 & bc1;
// Iota
a00 ^= keccakf_rndc[round];
}
// Theta
u64x bc0 = a00 ^ a10 ^ a20 ^ a30 ^ a40;
u64x bc1 = a01 ^ a11 ^ a21 ^ a31 ^ a41;
u64x bc2 = a02 ^ a12 ^ a22 ^ a32 ^ a42;
u64x bc3 = a03 ^ a13 ^ a23 ^ a33 ^ a43;
u64x bc4 = a04 ^ a14 ^ a24 ^ a34 ^ a44;
u64x t;
t = bc4 ^ rotl64 (bc1, 1); a00 ^= t; a10 ^= t; a20 ^= t; a30 ^= t;
t = bc0 ^ rotl64 (bc2, 1); a21 ^= t; a31 ^= t; a41 ^= t;
t = bc1 ^ rotl64 (bc3, 1); a02 ^= t; a12 ^= t; a22 ^= t; a32 ^= t;
t = bc2 ^ rotl64 (bc4, 1); a03 ^= t; a13 ^= t; a23 ^= t; a33 ^= t; a43 ^= t;
t = bc3 ^ rotl64 (bc0, 1); a04 ^= t; a34 ^= t; a44 ^= t;
// Rho Pi
t = a01;
Rho_Pi (a20, 1);
Rho_Pi (a12, 3);
Rho_Pi (a21, 6);
Rho_Pi (a32, 10);
Rho_Pi (a33, 15);
Rho_Pi (a03, 21);
Rho_Pi (a10, 28);
Rho_Pi (a31, 36);
Rho_Pi (a13, 45);
Rho_Pi (a41, 55);
Rho_Pi (a44, 2);
Rho_Pi (a04, 14);
Rho_Pi (a30, 27);
Rho_Pi (a43, 41);
Rho_Pi (a34, 56);
Rho_Pi (a23, 8);
Rho_Pi (a22, 25);
Rho_Pi (a02, 43);
#undef Rho_Pi
bc0 = a00;
bc2 = a02;
bc3 = a03;
bc4 = a04;
a02 ^= ~bc3 & bc4;
a03 ^= ~bc4 & bc0;
const u32x r0 = l32_from_64 (a03);
const u32x r1 = 0;
const u32x r2 = l32_from_64 (a02);
const u32x r3 = h32_from_64 (a02);
COMPARE_M_SIMD (r0, r1, r2, r3);
}
}
__kernel void m17300_m08 (__global pw_t *pws, __constant const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
{
}
__kernel void m17300_m16 (__global pw_t *pws, __constant const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
{
}
__kernel void m17300_s04 (__global pw_t *pws, __constant const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
{
/**
* modifier
*/
const u64 lid = get_local_id (0);
/**
* base
*/
const u64 gid = get_global_id (0);
if (gid >= gid_max) return;
u32 pw_buf0[4];
u32 pw_buf1[4];
pw_buf0[0] = pws[gid].i[0];
pw_buf0[1] = pws[gid].i[1];
pw_buf0[2] = pws[gid].i[2];
pw_buf0[3] = pws[gid].i[3];
pw_buf1[0] = pws[gid].i[4];
pw_buf1[1] = pws[gid].i[5];
pw_buf1[2] = pws[gid].i[6];
pw_buf1[3] = pws[gid].i[7];
const u32 pw_len = pws[gid].pw_len;
/**
* digest
*/
const u32 search[4] =
{
digests_buf[digests_offset].digest_buf[DGST_R0],
digests_buf[digests_offset].digest_buf[DGST_R1],
digests_buf[digests_offset].digest_buf[DGST_R2],
digests_buf[digests_offset].digest_buf[DGST_R3]
};
/**
* loop
*/
for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE)
{
u32x w0[4] = { 0 };
u32x w1[4] = { 0 };
u32x w2[4] = { 0 };
u32x w3[4] = { 0 };
const u32x out_len = apply_rules_vect (pw_buf0, pw_buf1, pw_len, rules_buf, il_pos, w0, w1);
append_0x06_2x4_VV (w0, w1, out_len);
/**
* Keccak
*/
u64x a00 = hl32_to_64 (w0[1], w0[0]);
u64x a01 = hl32_to_64 (w0[3], w0[2]);
u64x a02 = hl32_to_64 (w1[1], w1[0]);
u64x a03 = hl32_to_64 (w1[3], w1[2]);
u64x a04 = 0;
u64x a10 = 0;
u64x a11 = 0;
u64x a12 = 0;
u64x a13 = 0;
u64x a14 = 0;
u64x a20 = 0;
u64x a21 = 0;
u64x a22 = 0;
u64x a23 = 0;
u64x a24 = 0;
u64x a30 = 0;
u64x a31 = 0;
u64x a32 = 0x8000000000000000;
u64x a33 = 0;
u64x a34 = 0;
u64x a40 = 0;
u64x a41 = 0;
u64x a42 = 0;
u64x a43 = 0;
u64x a44 = 0;
#define Rho_Pi(ad,r) \
bc0 = ad; \
ad = rotl64 (t, r); \
t = bc0; \
#ifdef _unroll
#pragma unroll
#endif
for (int round = 0; round < KECCAK_ROUNDS - 1; round++)
{
// Theta
u64x bc0 = a00 ^ a10 ^ a20 ^ a30 ^ a40;
u64x bc1 = a01 ^ a11 ^ a21 ^ a31 ^ a41;
u64x bc2 = a02 ^ a12 ^ a22 ^ a32 ^ a42;
u64x bc3 = a03 ^ a13 ^ a23 ^ a33 ^ a43;
u64x bc4 = a04 ^ a14 ^ a24 ^ a34 ^ a44;
u64x t;
t = bc4 ^ rotl64 (bc1, 1); a00 ^= t; a10 ^= t; a20 ^= t; a30 ^= t; a40 ^= t;
t = bc0 ^ rotl64 (bc2, 1); a01 ^= t; a11 ^= t; a21 ^= t; a31 ^= t; a41 ^= t;
t = bc1 ^ rotl64 (bc3, 1); a02 ^= t; a12 ^= t; a22 ^= t; a32 ^= t; a42 ^= t;
t = bc2 ^ rotl64 (bc4, 1); a03 ^= t; a13 ^= t; a23 ^= t; a33 ^= t; a43 ^= t;
t = bc3 ^ rotl64 (bc0, 1); a04 ^= t; a14 ^= t; a24 ^= t; a34 ^= t; a44 ^= t;
// Rho Pi
t = a01;
Rho_Pi (a20, 1);
Rho_Pi (a12, 3);
Rho_Pi (a21, 6);
Rho_Pi (a32, 10);
Rho_Pi (a33, 15);
Rho_Pi (a03, 21);
Rho_Pi (a10, 28);
Rho_Pi (a31, 36);
Rho_Pi (a13, 45);
Rho_Pi (a41, 55);
Rho_Pi (a44, 2);
Rho_Pi (a04, 14);
Rho_Pi (a30, 27);
Rho_Pi (a43, 41);
Rho_Pi (a34, 56);
Rho_Pi (a23, 8);
Rho_Pi (a22, 25);
Rho_Pi (a02, 43);
Rho_Pi (a40, 62);
Rho_Pi (a24, 18);
Rho_Pi (a42, 39);
Rho_Pi (a14, 61);
Rho_Pi (a11, 20);
Rho_Pi (a01, 44);
// Chi
bc0 = a00; bc1 = a01; bc2 = a02; bc3 = a03; bc4 = a04;
a00 ^= ~bc1 & bc2; a01 ^= ~bc2 & bc3; a02 ^= ~bc3 & bc4; a03 ^= ~bc4 & bc0; a04 ^= ~bc0 & bc1;
bc0 = a10; bc1 = a11; bc2 = a12; bc3 = a13; bc4 = a14;
a10 ^= ~bc1 & bc2; a11 ^= ~bc2 & bc3; a12 ^= ~bc3 & bc4; a13 ^= ~bc4 & bc0; a14 ^= ~bc0 & bc1;
bc0 = a20; bc1 = a21; bc2 = a22; bc3 = a23; bc4 = a24;
a20 ^= ~bc1 & bc2; a21 ^= ~bc2 & bc3; a22 ^= ~bc3 & bc4; a23 ^= ~bc4 & bc0; a24 ^= ~bc0 & bc1;
bc0 = a30; bc1 = a31; bc2 = a32; bc3 = a33; bc4 = a34;
a30 ^= ~bc1 & bc2; a31 ^= ~bc2 & bc3; a32 ^= ~bc3 & bc4; a33 ^= ~bc4 & bc0; a34 ^= ~bc0 & bc1;
bc0 = a40; bc1 = a41; bc2 = a42; bc3 = a43; bc4 = a44;
a40 ^= ~bc1 & bc2; a41 ^= ~bc2 & bc3; a42 ^= ~bc3 & bc4; a43 ^= ~bc4 & bc0; a44 ^= ~bc0 & bc1;
// Iota
a00 ^= keccakf_rndc[round];
}
// Theta
u64x bc0 = a00 ^ a10 ^ a20 ^ a30 ^ a40;
u64x bc1 = a01 ^ a11 ^ a21 ^ a31 ^ a41;
u64x bc2 = a02 ^ a12 ^ a22 ^ a32 ^ a42;
u64x bc3 = a03 ^ a13 ^ a23 ^ a33 ^ a43;
u64x bc4 = a04 ^ a14 ^ a24 ^ a34 ^ a44;
u64x t;
t = bc4 ^ rotl64 (bc1, 1); a00 ^= t; a10 ^= t; a20 ^= t; a30 ^= t;
t = bc0 ^ rotl64 (bc2, 1); a21 ^= t; a31 ^= t; a41 ^= t;
t = bc1 ^ rotl64 (bc3, 1); a02 ^= t; a12 ^= t; a22 ^= t; a32 ^= t;
t = bc2 ^ rotl64 (bc4, 1); a03 ^= t; a13 ^= t; a23 ^= t; a33 ^= t; a43 ^= t;
t = bc3 ^ rotl64 (bc0, 1); a04 ^= t; a34 ^= t; a44 ^= t;
// Rho Pi
t = a01;
Rho_Pi (a20, 1);
Rho_Pi (a12, 3);
Rho_Pi (a21, 6);
Rho_Pi (a32, 10);
Rho_Pi (a33, 15);
Rho_Pi (a03, 21);
Rho_Pi (a10, 28);
Rho_Pi (a31, 36);
Rho_Pi (a13, 45);
Rho_Pi (a41, 55);
Rho_Pi (a44, 2);
Rho_Pi (a04, 14);
Rho_Pi (a30, 27);
Rho_Pi (a43, 41);
Rho_Pi (a34, 56);
Rho_Pi (a23, 8);
Rho_Pi (a22, 25);
Rho_Pi (a02, 43);
#undef Rho_Pi
bc0 = a00;
bc2 = a02;
bc3 = a03;
bc4 = a04;
a02 ^= ~bc3 & bc4;
a03 ^= ~bc4 & bc0;
const u32x r0 = l32_from_64 (a03);
const u32x r1 = 0;
const u32x r2 = l32_from_64 (a02);
const u32x r3 = h32_from_64 (a02);
COMPARE_S_SIMD (r0, r1, r2, r3);
}
}
__kernel void m17300_s08 (__global pw_t *pws, __constant const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
{
}
__kernel void m17300_s16 (__global pw_t *pws, __constant const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
{
}

@ -0,0 +1,594 @@
/**
* Author......: See docs/credits.txt
* License.....: MIT
*/
#define NEW_SIMD_CODE
#include "inc_vendor.cl"
#include "inc_hash_constants.h"
#include "inc_hash_functions.cl"
#include "inc_types.cl"
#include "inc_common.cl"
#include "inc_simd.cl"
__constant u64a keccakf_rndc[24] =
{
0x0000000000000001, 0x0000000000008082, 0x800000000000808a,
0x8000000080008000, 0x000000000000808b, 0x0000000080000001,
0x8000000080008081, 0x8000000000008009, 0x000000000000008a,
0x0000000000000088, 0x0000000080008009, 0x000000008000000a,
0x000000008000808b, 0x800000000000008b, 0x8000000000008089,
0x8000000000008003, 0x8000000000008002, 0x8000000000000080,
0x000000000000800a, 0x800000008000000a, 0x8000000080008081,
0x8000000000008080, 0x0000000080000001, 0x8000000080008008
};
#ifndef KECCAK_ROUNDS
#define KECCAK_ROUNDS 24
#endif
__kernel void m17300_m04 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
{
/**
* modifier
*/
const u64 lid = get_local_id (0);
/**
* base
*/
const u64 gid = get_global_id (0);
if (gid >= gid_max) return;
u32 pw_buf0[4];
u32 pw_buf1[4];
pw_buf0[0] = pws[gid].i[0];
pw_buf0[1] = pws[gid].i[1];
pw_buf0[2] = pws[gid].i[2];
pw_buf0[3] = pws[gid].i[3];
pw_buf1[0] = pws[gid].i[4];
pw_buf1[1] = pws[gid].i[5];
pw_buf1[2] = pws[gid].i[6];
pw_buf1[3] = pws[gid].i[7];
const u32 pw_l_len = pws[gid].pw_len;
/**
* loop
*/
for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE)
{
const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos);
const u32x pw_len = pw_l_len + pw_r_len;
/**
* concat password candidate
*/
u32x wordl0[4] = { 0 };
u32x wordl1[4] = { 0 };
u32x wordl2[4] = { 0 };
u32x wordl3[4] = { 0 };
wordl0[0] = pw_buf0[0];
wordl0[1] = pw_buf0[1];
wordl0[2] = pw_buf0[2];
wordl0[3] = pw_buf0[3];
wordl1[0] = pw_buf1[0];
wordl1[1] = pw_buf1[1];
wordl1[2] = pw_buf1[2];
wordl1[3] = pw_buf1[3];
u32x wordr0[4] = { 0 };
u32x wordr1[4] = { 0 };
u32x wordr2[4] = { 0 };
u32x wordr3[4] = { 0 };
wordr0[0] = ix_create_combt (combs_buf, il_pos, 0);
wordr0[1] = ix_create_combt (combs_buf, il_pos, 1);
wordr0[2] = ix_create_combt (combs_buf, il_pos, 2);
wordr0[3] = ix_create_combt (combs_buf, il_pos, 3);
wordr1[0] = ix_create_combt (combs_buf, il_pos, 4);
wordr1[1] = ix_create_combt (combs_buf, il_pos, 5);
wordr1[2] = ix_create_combt (combs_buf, il_pos, 6);
wordr1[3] = ix_create_combt (combs_buf, il_pos, 7);
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
{
switch_buffer_by_offset_le_VV (wordr0, wordr1, wordr2, wordr3, pw_l_len);
}
else
{
switch_buffer_by_offset_le_VV (wordl0, wordl1, wordl2, wordl3, pw_r_len);
}
u32x w0[4];
u32x w1[4];
u32x w2[4];
u32x w3[4];
w0[0] = wordl0[0] | wordr0[0];
w0[1] = wordl0[1] | wordr0[1];
w0[2] = wordl0[2] | wordr0[2];
w0[3] = wordl0[3] | wordr0[3];
w1[0] = wordl1[0] | wordr1[0];
w1[1] = wordl1[1] | wordr1[1];
w1[2] = wordl1[2] | wordr1[2];
w1[3] = wordl1[3] | wordr1[3];
w2[0] = wordl2[0] | wordr2[0];
w2[1] = wordl2[1] | wordr2[1];
w2[2] = wordl2[2] | wordr2[2];
w2[3] = wordl2[3] | wordr2[3];
w3[0] = wordl3[0] | wordr3[0];
w3[1] = wordl3[1] | wordr3[1];
w3[2] = wordl3[2] | wordr3[2];
w3[3] = wordl3[3] | wordr3[3];
/**
* Keccak
*/
u64x a00 = hl32_to_64 (w0[1], w0[0]);
u64x a01 = hl32_to_64 (w0[3], w0[2]);
u64x a02 = hl32_to_64 (w1[1], w1[0]);
u64x a03 = hl32_to_64 (w1[3], w1[2]);
u64x a04 = hl32_to_64 (w2[1], w2[0]);
u64x a10 = hl32_to_64 (w2[3], w2[2]);
u64x a11 = hl32_to_64 (w3[1], w3[0]);
u64x a12 = hl32_to_64 (w3[3], w3[2]);
u64x a13 = 0;
u64x a14 = 0;
u64x a20 = 0;
u64x a21 = 0;
u64x a22 = 0;
u64x a23 = 0;
u64x a24 = 0;
u64x a30 = 0;
u64x a31 = 0;
u64x a32 = 0x8000000000000000;
u64x a33 = 0;
u64x a34 = 0;
u64x a40 = 0;
u64x a41 = 0;
u64x a42 = 0;
u64x a43 = 0;
u64x a44 = 0;
#define Rho_Pi(ad,r) \
bc0 = ad; \
ad = rotl64 (t, r); \
t = bc0; \
#ifdef _unroll
#pragma unroll
#endif
for (int round = 0; round < KECCAK_ROUNDS - 1; round++)
{
// Theta
u64x bc0 = a00 ^ a10 ^ a20 ^ a30 ^ a40;
u64x bc1 = a01 ^ a11 ^ a21 ^ a31 ^ a41;
u64x bc2 = a02 ^ a12 ^ a22 ^ a32 ^ a42;
u64x bc3 = a03 ^ a13 ^ a23 ^ a33 ^ a43;
u64x bc4 = a04 ^ a14 ^ a24 ^ a34 ^ a44;
u64x t;
t = bc4 ^ rotl64 (bc1, 1); a00 ^= t; a10 ^= t; a20 ^= t; a30 ^= t; a40 ^= t;
t = bc0 ^ rotl64 (bc2, 1); a01 ^= t; a11 ^= t; a21 ^= t; a31 ^= t; a41 ^= t;
t = bc1 ^ rotl64 (bc3, 1); a02 ^= t; a12 ^= t; a22 ^= t; a32 ^= t; a42 ^= t;
t = bc2 ^ rotl64 (bc4, 1); a03 ^= t; a13 ^= t; a23 ^= t; a33 ^= t; a43 ^= t;
t = bc3 ^ rotl64 (bc0, 1); a04 ^= t; a14 ^= t; a24 ^= t; a34 ^= t; a44 ^= t;
// Rho Pi
t = a01;
Rho_Pi (a20, 1);
Rho_Pi (a12, 3);
Rho_Pi (a21, 6);
Rho_Pi (a32, 10);
Rho_Pi (a33, 15);
Rho_Pi (a03, 21);
Rho_Pi (a10, 28);
Rho_Pi (a31, 36);
Rho_Pi (a13, 45);
Rho_Pi (a41, 55);
Rho_Pi (a44, 2);
Rho_Pi (a04, 14);
Rho_Pi (a30, 27);
Rho_Pi (a43, 41);
Rho_Pi (a34, 56);
Rho_Pi (a23, 8);
Rho_Pi (a22, 25);
Rho_Pi (a02, 43);
Rho_Pi (a40, 62);
Rho_Pi (a24, 18);
Rho_Pi (a42, 39);
Rho_Pi (a14, 61);
Rho_Pi (a11, 20);
Rho_Pi (a01, 44);
// Chi
bc0 = a00; bc1 = a01; bc2 = a02; bc3 = a03; bc4 = a04;
a00 ^= ~bc1 & bc2; a01 ^= ~bc2 & bc3; a02 ^= ~bc3 & bc4; a03 ^= ~bc4 & bc0; a04 ^= ~bc0 & bc1;
bc0 = a10; bc1 = a11; bc2 = a12; bc3 = a13; bc4 = a14;
a10 ^= ~bc1 & bc2; a11 ^= ~bc2 & bc3; a12 ^= ~bc3 & bc4; a13 ^= ~bc4 & bc0; a14 ^= ~bc0 & bc1;
bc0 = a20; bc1 = a21; bc2 = a22; bc3 = a23; bc4 = a24;
a20 ^= ~bc1 & bc2; a21 ^= ~bc2 & bc3; a22 ^= ~bc3 & bc4; a23 ^= ~bc4 & bc0; a24 ^= ~bc0 & bc1;
bc0 = a30; bc1 = a31; bc2 = a32; bc3 = a33; bc4 = a34;
a30 ^= ~bc1 & bc2; a31 ^= ~bc2 & bc3; a32 ^= ~bc3 & bc4; a33 ^= ~bc4 & bc0; a34 ^= ~bc0 & bc1;
bc0 = a40; bc1 = a41; bc2 = a42; bc3 = a43; bc4 = a44;
a40 ^= ~bc1 & bc2; a41 ^= ~bc2 & bc3; a42 ^= ~bc3 & bc4; a43 ^= ~bc4 & bc0; a44 ^= ~bc0 & bc1;
// Iota
a00 ^= keccakf_rndc[round];
}
// Theta
u64x bc0 = a00 ^ a10 ^ a20 ^ a30 ^ a40;
u64x bc1 = a01 ^ a11 ^ a21 ^ a31 ^ a41;
u64x bc2 = a02 ^ a12 ^ a22 ^ a32 ^ a42;
u64x bc3 = a03 ^ a13 ^ a23 ^ a33 ^ a43;
u64x bc4 = a04 ^ a14 ^ a24 ^ a34 ^ a44;
u64x t;
t = bc4 ^ rotl64 (bc1, 1); a00 ^= t; a10 ^= t; a20 ^= t; a30 ^= t;
t = bc0 ^ rotl64 (bc2, 1); a21 ^= t; a31 ^= t; a41 ^= t;
t = bc1 ^ rotl64 (bc3, 1); a02 ^= t; a12 ^= t; a22 ^= t; a32 ^= t;
t = bc2 ^ rotl64 (bc4, 1); a03 ^= t; a13 ^= t; a23 ^= t; a33 ^= t; a43 ^= t;
t = bc3 ^ rotl64 (bc0, 1); a04 ^= t; a34 ^= t; a44 ^= t;
// Rho Pi
t = a01;
Rho_Pi (a20, 1);
Rho_Pi (a12, 3);
Rho_Pi (a21, 6);
Rho_Pi (a32, 10);
Rho_Pi (a33, 15);
Rho_Pi (a03, 21);
Rho_Pi (a10, 28);
Rho_Pi (a31, 36);
Rho_Pi (a13, 45);
Rho_Pi (a41, 55);
Rho_Pi (a44, 2);
Rho_Pi (a04, 14);
Rho_Pi (a30, 27);
Rho_Pi (a43, 41);
Rho_Pi (a34, 56);
Rho_Pi (a23, 8);
Rho_Pi (a22, 25);
Rho_Pi (a02, 43);
#undef Rho_Pi
bc0 = a00;
bc2 = a02;
bc3 = a03;
bc4 = a04;
a02 ^= ~bc3 & bc4;
a03 ^= ~bc4 & bc0;
const u32x r0 = l32_from_64 (a03);
const u32x r1 = 0;
const u32x r2 = l32_from_64 (a02);
const u32x r3 = h32_from_64 (a02);
COMPARE_M_SIMD (r0, r1, r2, r3);
}
}
__kernel void m17300_m08 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
{
}
__kernel void m17300_m16 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
{
}
__kernel void m17300_s04 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
{
/**
* modifier
*/
const u64 lid = get_local_id (0);
/**
* base
*/
const u64 gid = get_global_id (0);
if (gid >= gid_max) return;
u32 pw_buf0[4];
u32 pw_buf1[4];
pw_buf0[0] = pws[gid].i[0];
pw_buf0[1] = pws[gid].i[1];
pw_buf0[2] = pws[gid].i[2];
pw_buf0[3] = pws[gid].i[3];
pw_buf1[0] = pws[gid].i[4];
pw_buf1[1] = pws[gid].i[5];
pw_buf1[2] = pws[gid].i[6];
pw_buf1[3] = pws[gid].i[7];
const u32 pw_l_len = pws[gid].pw_len;
/**
* digest
*/
const u32 search[4] =
{
digests_buf[digests_offset].digest_buf[DGST_R0],
digests_buf[digests_offset].digest_buf[DGST_R1],
digests_buf[digests_offset].digest_buf[DGST_R2],
digests_buf[digests_offset].digest_buf[DGST_R3]
};
/**
* loop
*/
for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE)
{
const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos);
const u32x pw_len = pw_l_len + pw_r_len;
/**
* concat password candidate
*/
u32x wordl0[4] = { 0 };
u32x wordl1[4] = { 0 };
u32x wordl2[4] = { 0 };
u32x wordl3[4] = { 0 };
wordl0[0] = pw_buf0[0];
wordl0[1] = pw_buf0[1];
wordl0[2] = pw_buf0[2];
wordl0[3] = pw_buf0[3];
wordl1[0] = pw_buf1[0];
wordl1[1] = pw_buf1[1];
wordl1[2] = pw_buf1[2];
wordl1[3] = pw_buf1[3];
u32x wordr0[4] = { 0 };
u32x wordr1[4] = { 0 };
u32x wordr2[4] = { 0 };
u32x wordr3[4] = { 0 };
wordr0[0] = ix_create_combt (combs_buf, il_pos, 0);
wordr0[1] = ix_create_combt (combs_buf, il_pos, 1);
wordr0[2] = ix_create_combt (combs_buf, il_pos, 2);
wordr0[3] = ix_create_combt (combs_buf, il_pos, 3);
wordr1[0] = ix_create_combt (combs_buf, il_pos, 4);
wordr1[1] = ix_create_combt (combs_buf, il_pos, 5);
wordr1[2] = ix_create_combt (combs_buf, il_pos, 6);
wordr1[3] = ix_create_combt (combs_buf, il_pos, 7);
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
{
switch_buffer_by_offset_le_VV (wordr0, wordr1, wordr2, wordr3, pw_l_len);
}
else
{
switch_buffer_by_offset_le_VV (wordl0, wordl1, wordl2, wordl3, pw_r_len);
}
u32x w0[4];
u32x w1[4];
u32x w2[4];
u32x w3[4];
w0[0] = wordl0[0] | wordr0[0];
w0[1] = wordl0[1] | wordr0[1];
w0[2] = wordl0[2] | wordr0[2];
w0[3] = wordl0[3] | wordr0[3];
w1[0] = wordl1[0] | wordr1[0];
w1[1] = wordl1[1] | wordr1[1];
w1[2] = wordl1[2] | wordr1[2];
w1[3] = wordl1[3] | wordr1[3];
w2[0] = wordl2[0] | wordr2[0];
w2[1] = wordl2[1] | wordr2[1];
w2[2] = wordl2[2] | wordr2[2];
w2[3] = wordl2[3] | wordr2[3];
w3[0] = wordl3[0] | wordr3[0];
w3[1] = wordl3[1] | wordr3[1];
w3[2] = wordl3[2] | wordr3[2];
w3[3] = wordl3[3] | wordr3[3];
/**
* Keccak
*/
u64x a00 = hl32_to_64 (w0[1], w0[0]);
u64x a01 = hl32_to_64 (w0[3], w0[2]);
u64x a02 = hl32_to_64 (w1[1], w1[0]);
u64x a03 = hl32_to_64 (w1[3], w1[2]);
u64x a04 = hl32_to_64 (w2[1], w2[0]);
u64x a10 = hl32_to_64 (w2[3], w2[2]);
u64x a11 = hl32_to_64 (w3[1], w3[0]);
u64x a12 = hl32_to_64 (w3[3], w3[2]);
u64x a13 = 0;
u64x a14 = 0;
u64x a20 = 0;
u64x a21 = 0;
u64x a22 = 0;
u64x a23 = 0;
u64x a24 = 0;
u64x a30 = 0;
u64x a31 = 0;
u64x a32 = 0x8000000000000000;
u64x a33 = 0;
u64x a34 = 0;
u64x a40 = 0;
u64x a41 = 0;
u64x a42 = 0;
u64x a43 = 0;
u64x a44 = 0;
#define Rho_Pi(ad,r) \
bc0 = ad; \
ad = rotl64 (t, r); \
t = bc0; \
#ifdef _unroll
#pragma unroll
#endif
for (int round = 0; round < KECCAK_ROUNDS - 1; round++)
{
// Theta
u64x bc0 = a00 ^ a10 ^ a20 ^ a30 ^ a40;
u64x bc1 = a01 ^ a11 ^ a21 ^ a31 ^ a41;
u64x bc2 = a02 ^ a12 ^ a22 ^ a32 ^ a42;
u64x bc3 = a03 ^ a13 ^ a23 ^ a33 ^ a43;
u64x bc4 = a04 ^ a14 ^ a24 ^ a34 ^ a44;
u64x t;
t = bc4 ^ rotl64 (bc1, 1); a00 ^= t; a10 ^= t; a20 ^= t; a30 ^= t; a40 ^= t;
t = bc0 ^ rotl64 (bc2, 1); a01 ^= t; a11 ^= t; a21 ^= t; a31 ^= t; a41 ^= t;
t = bc1 ^ rotl64 (bc3, 1); a02 ^= t; a12 ^= t; a22 ^= t; a32 ^= t; a42 ^= t;
t = bc2 ^ rotl64 (bc4, 1); a03 ^= t; a13 ^= t; a23 ^= t; a33 ^= t; a43 ^= t;
t = bc3 ^ rotl64 (bc0, 1); a04 ^= t; a14 ^= t; a24 ^= t; a34 ^= t; a44 ^= t;
// Rho Pi
t = a01;
Rho_Pi (a20, 1);
Rho_Pi (a12, 3);
Rho_Pi (a21, 6);
Rho_Pi (a32, 10);
Rho_Pi (a33, 15);
Rho_Pi (a03, 21);
Rho_Pi (a10, 28);
Rho_Pi (a31, 36);
Rho_Pi (a13, 45);
Rho_Pi (a41, 55);
Rho_Pi (a44, 2);
Rho_Pi (a04, 14);
Rho_Pi (a30, 27);
Rho_Pi (a43, 41);
Rho_Pi (a34, 56);
Rho_Pi (a23, 8);
Rho_Pi (a22, 25);
Rho_Pi (a02, 43);
Rho_Pi (a40, 62);
Rho_Pi (a24, 18);
Rho_Pi (a42, 39);
Rho_Pi (a14, 61);
Rho_Pi (a11, 20);
Rho_Pi (a01, 44);
// Chi
bc0 = a00; bc1 = a01; bc2 = a02; bc3 = a03; bc4 = a04;
a00 ^= ~bc1 & bc2; a01 ^= ~bc2 & bc3; a02 ^= ~bc3 & bc4; a03 ^= ~bc4 & bc0; a04 ^= ~bc0 & bc1;
bc0 = a10; bc1 = a11; bc2 = a12; bc3 = a13; bc4 = a14;
a10 ^= ~bc1 & bc2; a11 ^= ~bc2 & bc3; a12 ^= ~bc3 & bc4; a13 ^= ~bc4 & bc0; a14 ^= ~bc0 & bc1;
bc0 = a20; bc1 = a21; bc2 = a22; bc3 = a23; bc4 = a24;
a20 ^= ~bc1 & bc2; a21 ^= ~bc2 & bc3; a22 ^= ~bc3 & bc4; a23 ^= ~bc4 & bc0; a24 ^= ~bc0 & bc1;
bc0 = a30; bc1 = a31; bc2 = a32; bc3 = a33; bc4 = a34;
a30 ^= ~bc1 & bc2; a31 ^= ~bc2 & bc3; a32 ^= ~bc3 & bc4; a33 ^= ~bc4 & bc0; a34 ^= ~bc0 & bc1;
bc0 = a40; bc1 = a41; bc2 = a42; bc3 = a43; bc4 = a44;
a40 ^= ~bc1 & bc2; a41 ^= ~bc2 & bc3; a42 ^= ~bc3 & bc4; a43 ^= ~bc4 & bc0; a44 ^= ~bc0 & bc1;
// Iota
a00 ^= keccakf_rndc[round];
}
// Theta
u64x bc0 = a00 ^ a10 ^ a20 ^ a30 ^ a40;
u64x bc1 = a01 ^ a11 ^ a21 ^ a31 ^ a41;
u64x bc2 = a02 ^ a12 ^ a22 ^ a32 ^ a42;
u64x bc3 = a03 ^ a13 ^ a23 ^ a33 ^ a43;
u64x bc4 = a04 ^ a14 ^ a24 ^ a34 ^ a44;
u64x t;
t = bc4 ^ rotl64 (bc1, 1); a00 ^= t; a10 ^= t; a20 ^= t; a30 ^= t;
t = bc0 ^ rotl64 (bc2, 1); a21 ^= t; a31 ^= t; a41 ^= t;
t = bc1 ^ rotl64 (bc3, 1); a02 ^= t; a12 ^= t; a22 ^= t; a32 ^= t;
t = bc2 ^ rotl64 (bc4, 1); a03 ^= t; a13 ^= t; a23 ^= t; a33 ^= t; a43 ^= t;
t = bc3 ^ rotl64 (bc0, 1); a04 ^= t; a34 ^= t; a44 ^= t;
// Rho Pi
t = a01;
Rho_Pi (a20, 1);
Rho_Pi (a12, 3);
Rho_Pi (a21, 6);
Rho_Pi (a32, 10);
Rho_Pi (a33, 15);
Rho_Pi (a03, 21);
Rho_Pi (a10, 28);
Rho_Pi (a31, 36);
Rho_Pi (a13, 45);
Rho_Pi (a41, 55);
Rho_Pi (a44, 2);
Rho_Pi (a04, 14);
Rho_Pi (a30, 27);
Rho_Pi (a43, 41);
Rho_Pi (a34, 56);
Rho_Pi (a23, 8);
Rho_Pi (a22, 25);
Rho_Pi (a02, 43);
#undef Rho_Pi
bc0 = a00;
bc2 = a02;
bc3 = a03;
bc4 = a04;
a02 ^= ~bc3 & bc4;
a03 ^= ~bc4 & bc0;
const u32x r0 = l32_from_64 (a03);
const u32x r1 = 0;
const u32x r2 = l32_from_64 (a02);
const u32x r3 = h32_from_64 (a02);
COMPARE_S_SIMD (r0, r1, r2, r3);
}
}
__kernel void m17300_s08 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
{
}
__kernel void m17300_s16 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
{
}

@ -0,0 +1,695 @@
/**
* Author......: See docs/credits.txt
* License.....: MIT
*/
#define NEW_SIMD_CODE
#include "inc_vendor.cl"
#include "inc_hash_constants.h"
#include "inc_hash_functions.cl"
#include "inc_types.cl"
#include "inc_common.cl"
#include "inc_simd.cl"
__constant u64a keccakf_rndc[24] =
{
0x0000000000000001, 0x0000000000008082, 0x800000000000808a,
0x8000000080008000, 0x000000000000808b, 0x0000000080000001,
0x8000000080008081, 0x8000000000008009, 0x000000000000008a,
0x0000000000000088, 0x0000000080008009, 0x000000008000000a,
0x000000008000808b, 0x800000000000008b, 0x8000000000008089,
0x8000000000008003, 0x8000000000008002, 0x8000000000000080,
0x000000000000800a, 0x800000008000000a, 0x8000000080008081,
0x8000000000008080, 0x0000000080000001, 0x8000000080008008
};
#ifndef KECCAK_ROUNDS
#define KECCAK_ROUNDS 24
#endif
DECLSPEC void m17300m (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, __global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset)
{
/**
* modifier
*/
const u64 gid = get_global_id (0);
const u64 lid = get_local_id (0);
/**
* loop
*/
u32 w0l = w0[0];
for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE)
{
const u32x w0r = ix_create_bft (bfs_buf, il_pos);
const u32x w0lr = w0l | w0r;
/**
* Keccak
*/
u64x a00 = hl32_to_64 (w0[1], w0lr);
u64x a01 = hl32_to_64 (w0[3], w0[2]);
u64x a02 = hl32_to_64 (w1[1], w1[0]);
u64x a03 = hl32_to_64 (w1[3], w1[2]);
u64x a04 = hl32_to_64 (w2[1], w2[0]);
u64x a10 = hl32_to_64 (w2[3], w2[2]);
u64x a11 = hl32_to_64 (w3[1], w3[0]);
u64x a12 = hl32_to_64 (w3[3], w3[2]);
u64x a13 = 0;
u64x a14 = 0;
u64x a20 = 0;
u64x a21 = 0;
u64x a22 = 0;
u64x a23 = 0;
u64x a24 = 0;
u64x a30 = 0;
u64x a31 = 0;
u64x a32 = 0x8000000000000000;
u64x a33 = 0;
u64x a34 = 0;
u64x a40 = 0;
u64x a41 = 0;
u64x a42 = 0;
u64x a43 = 0;
u64x a44 = 0;
#define Rho_Pi(ad,r) \
bc0 = ad; \
ad = rotl64 (t, r); \
t = bc0; \
#ifdef _unroll
#pragma unroll
#endif
for (int round = 0; round < KECCAK_ROUNDS - 1; round++)
{
// Theta
u64x bc0 = a00 ^ a10 ^ a20 ^ a30 ^ a40;
u64x bc1 = a01 ^ a11 ^ a21 ^ a31 ^ a41;
u64x bc2 = a02 ^ a12 ^ a22 ^ a32 ^ a42;
u64x bc3 = a03 ^ a13 ^ a23 ^ a33 ^ a43;
u64x bc4 = a04 ^ a14 ^ a24 ^ a34 ^ a44;
u64x t;
t = bc4 ^ rotl64 (bc1, 1); a00 ^= t; a10 ^= t; a20 ^= t; a30 ^= t; a40 ^= t;
t = bc0 ^ rotl64 (bc2, 1); a01 ^= t; a11 ^= t; a21 ^= t; a31 ^= t; a41 ^= t;
t = bc1 ^ rotl64 (bc3, 1); a02 ^= t; a12 ^= t; a22 ^= t; a32 ^= t; a42 ^= t;
t = bc2 ^ rotl64 (bc4, 1); a03 ^= t; a13 ^= t; a23 ^= t; a33 ^= t; a43 ^= t;
t = bc3 ^ rotl64 (bc0, 1); a04 ^= t; a14 ^= t; a24 ^= t; a34 ^= t; a44 ^= t;
// Rho Pi
t = a01;
Rho_Pi (a20, 1);
Rho_Pi (a12, 3);
Rho_Pi (a21, 6);
Rho_Pi (a32, 10);
Rho_Pi (a33, 15);
Rho_Pi (a03, 21);
Rho_Pi (a10, 28);
Rho_Pi (a31, 36);
Rho_Pi (a13, 45);
Rho_Pi (a41, 55);
Rho_Pi (a44, 2);
Rho_Pi (a04, 14);
Rho_Pi (a30, 27);
Rho_Pi (a43, 41);
Rho_Pi (a34, 56);
Rho_Pi (a23, 8);
Rho_Pi (a22, 25);
Rho_Pi (a02, 43);
Rho_Pi (a40, 62);
Rho_Pi (a24, 18);
Rho_Pi (a42, 39);
Rho_Pi (a14, 61);
Rho_Pi (a11, 20);
Rho_Pi (a01, 44);
// Chi
bc0 = a00; bc1 = a01; bc2 = a02; bc3 = a03; bc4 = a04;
a00 ^= ~bc1 & bc2; a01 ^= ~bc2 & bc3; a02 ^= ~bc3 & bc4; a03 ^= ~bc4 & bc0; a04 ^= ~bc0 & bc1;
bc0 = a10; bc1 = a11; bc2 = a12; bc3 = a13; bc4 = a14;
a10 ^= ~bc1 & bc2; a11 ^= ~bc2 & bc3; a12 ^= ~bc3 & bc4; a13 ^= ~bc4 & bc0; a14 ^= ~bc0 & bc1;
bc0 = a20; bc1 = a21; bc2 = a22; bc3 = a23; bc4 = a24;
a20 ^= ~bc1 & bc2; a21 ^= ~bc2 & bc3; a22 ^= ~bc3 & bc4; a23 ^= ~bc4 & bc0; a24 ^= ~bc0 & bc1;
bc0 = a30; bc1 = a31; bc2 = a32; bc3 = a33; bc4 = a34;
a30 ^= ~bc1 & bc2; a31 ^= ~bc2 & bc3; a32 ^= ~bc3 & bc4; a33 ^= ~bc4 & bc0; a34 ^= ~bc0 & bc1;
bc0 = a40; bc1 = a41; bc2 = a42; bc3 = a43; bc4 = a44;
a40 ^= ~bc1 & bc2; a41 ^= ~bc2 & bc3; a42 ^= ~bc3 & bc4; a43 ^= ~bc4 & bc0; a44 ^= ~bc0 & bc1;
// Iota
a00 ^= keccakf_rndc[round];
}
// Theta
u64x bc0 = a00 ^ a10 ^ a20 ^ a30 ^ a40;
u64x bc1 = a01 ^ a11 ^ a21 ^ a31 ^ a41;
u64x bc2 = a02 ^ a12 ^ a22 ^ a32 ^ a42;
u64x bc3 = a03 ^ a13 ^ a23 ^ a33 ^ a43;
u64x bc4 = a04 ^ a14 ^ a24 ^ a34 ^ a44;
u64x t;
t = bc4 ^ rotl64 (bc1, 1); a00 ^= t; a10 ^= t; a20 ^= t; a30 ^= t;
t = bc0 ^ rotl64 (bc2, 1); a21 ^= t; a31 ^= t; a41 ^= t;
t = bc1 ^ rotl64 (bc3, 1); a02 ^= t; a12 ^= t; a22 ^= t; a32 ^= t;
t = bc2 ^ rotl64 (bc4, 1); a03 ^= t; a13 ^= t; a23 ^= t; a33 ^= t; a43 ^= t;
t = bc3 ^ rotl64 (bc0, 1); a04 ^= t; a34 ^= t; a44 ^= t;
// Rho Pi
t = a01;
Rho_Pi (a20, 1);
Rho_Pi (a12, 3);
Rho_Pi (a21, 6);
Rho_Pi (a32, 10);
Rho_Pi (a33, 15);
Rho_Pi (a03, 21);
Rho_Pi (a10, 28);
Rho_Pi (a31, 36);
Rho_Pi (a13, 45);
Rho_Pi (a41, 55);
Rho_Pi (a44, 2);
Rho_Pi (a04, 14);
Rho_Pi (a30, 27);
Rho_Pi (a43, 41);
Rho_Pi (a34, 56);
Rho_Pi (a23, 8);
Rho_Pi (a22, 25);
Rho_Pi (a02, 43);
#undef Rho_Pi
bc0 = a00;
bc2 = a02;
bc3 = a03;
bc4 = a04;
a02 ^= ~bc3 & bc4;
a03 ^= ~bc4 & bc0;
const u32x r0 = l32_from_64 (a03);
const u32x r1 = 0;
const u32x r2 = l32_from_64 (a02);
const u32x r3 = h32_from_64 (a02);
COMPARE_M_SIMD (r0, r1, r2, r3);
}
}
DECLSPEC void m17300s (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, __global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset)
{
/**
* modifier
*/
const u64 gid = get_global_id (0);
const u64 lid = get_local_id (0);
/**
* digest
*/
const u32 search[4] =
{
digests_buf[digests_offset].digest_buf[DGST_R0],
digests_buf[digests_offset].digest_buf[DGST_R1],
digests_buf[digests_offset].digest_buf[DGST_R2],
digests_buf[digests_offset].digest_buf[DGST_R3]
};
/**
* loop
*/
u32 w0l = w0[0];
for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE)
{
const u32x w0r = ix_create_bft (bfs_buf, il_pos);
const u32x w0lr = w0l | w0r;
/**
* Keccak
*/
u64x a00 = hl32_to_64 (w0[1], w0lr);
u64x a01 = hl32_to_64 (w0[3], w0[2]);
u64x a02 = hl32_to_64 (w1[1], w1[0]);
u64x a03 = hl32_to_64 (w1[3], w1[2]);
u64x a04 = hl32_to_64 (w2[1], w2[0]);
u64x a10 = hl32_to_64 (w2[3], w2[2]);
u64x a11 = hl32_to_64 (w3[1], w3[0]);
u64x a12 = hl32_to_64 (w3[3], w3[2]);
u64x a13 = 0;
u64x a14 = 0;
u64x a20 = 0;
u64x a21 = 0;
u64x a22 = 0;
u64x a23 = 0;
u64x a24 = 0;
u64x a30 = 0;
u64x a31 = 0;
u64x a32 = 0x8000000000000000;
u64x a33 = 0;
u64x a34 = 0;
u64x a40 = 0;
u64x a41 = 0;
u64x a42 = 0;
u64x a43 = 0;
u64x a44 = 0;
#define Rho_Pi(ad,r) \
bc0 = ad; \
ad = rotl64 (t, r); \
t = bc0; \
#ifdef _unroll
#pragma unroll
#endif
for (int round = 0; round < KECCAK_ROUNDS - 1; round++)
{
// Theta
u64x bc0 = a00 ^ a10 ^ a20 ^ a30 ^ a40;
u64x bc1 = a01 ^ a11 ^ a21 ^ a31 ^ a41;
u64x bc2 = a02 ^ a12 ^ a22 ^ a32 ^ a42;
u64x bc3 = a03 ^ a13 ^ a23 ^ a33 ^ a43;
u64x bc4 = a04 ^ a14 ^ a24 ^ a34 ^ a44;
u64x t;
t = bc4 ^ rotl64 (bc1, 1); a00 ^= t; a10 ^= t; a20 ^= t; a30 ^= t; a40 ^= t;
t = bc0 ^ rotl64 (bc2, 1); a01 ^= t; a11 ^= t; a21 ^= t; a31 ^= t; a41 ^= t;
t = bc1 ^ rotl64 (bc3, 1); a02 ^= t; a12 ^= t; a22 ^= t; a32 ^= t; a42 ^= t;
t = bc2 ^ rotl64 (bc4, 1); a03 ^= t; a13 ^= t; a23 ^= t; a33 ^= t; a43 ^= t;
t = bc3 ^ rotl64 (bc0, 1); a04 ^= t; a14 ^= t; a24 ^= t; a34 ^= t; a44 ^= t;
// Rho Pi
t = a01;
Rho_Pi (a20, 1);
Rho_Pi (a12, 3);
Rho_Pi (a21, 6);
Rho_Pi (a32, 10);
Rho_Pi (a33, 15);
Rho_Pi (a03, 21);
Rho_Pi (a10, 28);
Rho_Pi (a31, 36);
Rho_Pi (a13, 45);
Rho_Pi (a41, 55);
Rho_Pi (a44, 2);
Rho_Pi (a04, 14);
Rho_Pi (a30, 27);
Rho_Pi (a43, 41);
Rho_Pi (a34, 56);
Rho_Pi (a23, 8);
Rho_Pi (a22, 25);
Rho_Pi (a02, 43);
Rho_Pi (a40, 62);
Rho_Pi (a24, 18);
Rho_Pi (a42, 39);
Rho_Pi (a14, 61);
Rho_Pi (a11, 20);
Rho_Pi (a01, 44);
// Chi
bc0 = a00; bc1 = a01; bc2 = a02; bc3 = a03; bc4 = a04;
a00 ^= ~bc1 & bc2; a01 ^= ~bc2 & bc3; a02 ^= ~bc3 & bc4; a03 ^= ~bc4 & bc0; a04 ^= ~bc0 & bc1;
bc0 = a10; bc1 = a11; bc2 = a12; bc3 = a13; bc4 = a14;
a10 ^= ~bc1 & bc2; a11 ^= ~bc2 & bc3; a12 ^= ~bc3 & bc4; a13 ^= ~bc4 & bc0; a14 ^= ~bc0 & bc1;
bc0 = a20; bc1 = a21; bc2 = a22; bc3 = a23; bc4 = a24;
a20 ^= ~bc1 & bc2; a21 ^= ~bc2 & bc3; a22 ^= ~bc3 & bc4; a23 ^= ~bc4 & bc0; a24 ^= ~bc0 & bc1;
bc0 = a30; bc1 = a31; bc2 = a32; bc3 = a33; bc4 = a34;
a30 ^= ~bc1 & bc2; a31 ^= ~bc2 & bc3; a32 ^= ~bc3 & bc4; a33 ^= ~bc4 & bc0; a34 ^= ~bc0 & bc1;
bc0 = a40; bc1 = a41; bc2 = a42; bc3 = a43; bc4 = a44;
a40 ^= ~bc1 & bc2; a41 ^= ~bc2 & bc3; a42 ^= ~bc3 & bc4; a43 ^= ~bc4 & bc0; a44 ^= ~bc0 & bc1;
// Iota
a00 ^= keccakf_rndc[round];
}
// Theta
u64x bc0 = a00 ^ a10 ^ a20 ^ a30 ^ a40;
u64x bc1 = a01 ^ a11 ^ a21 ^ a31 ^ a41;
u64x bc2 = a02 ^ a12 ^ a22 ^ a32 ^ a42;
u64x bc3 = a03 ^ a13 ^ a23 ^ a33 ^ a43;
u64x bc4 = a04 ^ a14 ^ a24 ^ a34 ^ a44;
u64x t;
t = bc4 ^ rotl64 (bc1, 1); a00 ^= t; a10 ^= t; a20 ^= t; a30 ^= t;
t = bc0 ^ rotl64 (bc2, 1); a21 ^= t; a31 ^= t; a41 ^= t;
t = bc1 ^ rotl64 (bc3, 1); a02 ^= t; a12 ^= t; a22 ^= t; a32 ^= t;
t = bc2 ^ rotl64 (bc4, 1); a03 ^= t; a13 ^= t; a23 ^= t; a33 ^= t; a43 ^= t;
t = bc3 ^ rotl64 (bc0, 1); a04 ^= t; a34 ^= t; a44 ^= t;
// Rho Pi
t = a01;
Rho_Pi (a20, 1);
Rho_Pi (a12, 3);
Rho_Pi (a21, 6);
Rho_Pi (a32, 10);
Rho_Pi (a33, 15);
Rho_Pi (a03, 21);
Rho_Pi (a10, 28);
Rho_Pi (a31, 36);
Rho_Pi (a13, 45);
Rho_Pi (a41, 55);
Rho_Pi (a44, 2);
Rho_Pi (a04, 14);
Rho_Pi (a30, 27);
Rho_Pi (a43, 41);
Rho_Pi (a34, 56);
Rho_Pi (a23, 8);
Rho_Pi (a22, 25);
Rho_Pi (a02, 43);
#undef Rho_Pi
bc0 = a00;
bc2 = a02;
bc3 = a03;
bc4 = a04;
a02 ^= ~bc3 & bc4;
a03 ^= ~bc4 & bc0;
const u32x r0 = l32_from_64 (a03);
const u32x r1 = 0;
const u32x r2 = l32_from_64 (a02);
const u32x r3 = h32_from_64 (a02);
COMPARE_S_SIMD (r0, r1, r2, r3);
}
}
__kernel void m17300_m04 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
{
/**
* base
*/
const u64 gid = get_global_id (0);
if (gid >= gid_max) return;
u32 w0[4];
w0[0] = pws[gid].i[ 0];
w0[1] = pws[gid].i[ 1];
w0[2] = pws[gid].i[ 2];
w0[3] = pws[gid].i[ 3];
u32 w1[4];
w1[0] = 0;
w1[1] = 0;
w1[2] = 0;
w1[3] = 0;
u32 w2[4];
w2[0] = 0;
w2[1] = 0;
w2[2] = 0;
w2[3] = 0;
u32 w3[4];
w3[0] = 0;
w3[1] = 0;
w3[2] = 0;
w3[3] = 0;
const u32 pw_len = pws[gid].pw_len;
/**
* main
*/
m17300m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV0_buf, d_scryptV1_buf, d_scryptV2_buf, d_scryptV3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset);
}
__kernel void m17300_m08 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
{
/**
* base
*/
const u64 gid = get_global_id (0);
if (gid >= gid_max) return;
u32 w0[4];
w0[0] = pws[gid].i[ 0];
w0[1] = pws[gid].i[ 1];
w0[2] = pws[gid].i[ 2];
w0[3] = pws[gid].i[ 3];
u32 w1[4];
w1[0] = pws[gid].i[ 4];
w1[1] = pws[gid].i[ 5];
w1[2] = pws[gid].i[ 6];
w1[3] = pws[gid].i[ 7];
u32 w2[4];
w2[0] = 0;
w2[1] = 0;
w2[2] = 0;
w2[3] = 0;
u32 w3[4];
w3[0] = 0;
w3[1] = 0;
w3[2] = 0;
w3[3] = 0;
const u32 pw_len = pws[gid].pw_len;
/**
* main
*/
m17300m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV0_buf, d_scryptV1_buf, d_scryptV2_buf, d_scryptV3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset);
}
__kernel void m17300_m16 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
{
/**
* base
*/
const u64 gid = get_global_id (0);
if (gid >= gid_max) return;
u32 w0[4];
w0[0] = pws[gid].i[ 0];
w0[1] = pws[gid].i[ 1];
w0[2] = pws[gid].i[ 2];
w0[3] = pws[gid].i[ 3];
u32 w1[4];
w1[0] = pws[gid].i[ 4];
w1[1] = pws[gid].i[ 5];
w1[2] = pws[gid].i[ 6];
w1[3] = pws[gid].i[ 7];
u32 w2[4];
w2[0] = pws[gid].i[ 8];
w2[1] = pws[gid].i[ 9];
w2[2] = pws[gid].i[10];
w2[3] = pws[gid].i[11];
u32 w3[4];
w3[0] = pws[gid].i[12];
w3[1] = pws[gid].i[13];
w3[2] = 0;
w3[3] = 0;
const u32 pw_len = pws[gid].pw_len;
/**
* main
*/
m17300m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV0_buf, d_scryptV1_buf, d_scryptV2_buf, d_scryptV3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset);
}
__kernel void m17300_s04 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
{
/**
* base
*/
const u64 gid = get_global_id (0);
if (gid >= gid_max) return;
u32 w0[4];
w0[0] = pws[gid].i[ 0];
w0[1] = pws[gid].i[ 1];
w0[2] = pws[gid].i[ 2];
w0[3] = pws[gid].i[ 3];
u32 w1[4];
w1[0] = 0;
w1[1] = 0;
w1[2] = 0;
w1[3] = 0;
u32 w2[4];
w2[0] = 0;
w2[1] = 0;
w2[2] = 0;
w2[3] = 0;
u32 w3[4];
w3[0] = 0;
w3[1] = 0;
w3[2] = 0;
w3[3] = 0;
const u32 pw_len = pws[gid].pw_len;
/**
* main
*/
m17300s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV0_buf, d_scryptV1_buf, d_scryptV2_buf, d_scryptV3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset);
}
__kernel void m17300_s08 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
{
/**
* base
*/
const u64 gid = get_global_id (0);
if (gid >= gid_max) return;
u32 w0[4];
w0[0] = pws[gid].i[ 0];
w0[1] = pws[gid].i[ 1];
w0[2] = pws[gid].i[ 2];
w0[3] = pws[gid].i[ 3];
u32 w1[4];
w1[0] = pws[gid].i[ 4];
w1[1] = pws[gid].i[ 5];
w1[2] = pws[gid].i[ 6];
w1[3] = pws[gid].i[ 7];
u32 w2[4];
w2[0] = 0;
w2[1] = 0;
w2[2] = 0;
w2[3] = 0;
u32 w3[4];
w3[0] = 0;
w3[1] = 0;
w3[2] = 0;
w3[3] = 0;
const u32 pw_len = pws[gid].pw_len;
/**
* main
*/
m17300s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV0_buf, d_scryptV1_buf, d_scryptV2_buf, d_scryptV3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset);
}
__kernel void m17300_s16 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
{
/**
* base
*/
const u64 gid = get_global_id (0);
if (gid >= gid_max) return;
u32 w0[4];
w0[0] = pws[gid].i[ 0];
w0[1] = pws[gid].i[ 1];
w0[2] = pws[gid].i[ 2];
w0[3] = pws[gid].i[ 3];
u32 w1[4];
w1[0] = pws[gid].i[ 4];
w1[1] = pws[gid].i[ 5];
w1[2] = pws[gid].i[ 6];
w1[3] = pws[gid].i[ 7];
u32 w2[4];
w2[0] = pws[gid].i[ 8];
w2[1] = pws[gid].i[ 9];
w2[2] = pws[gid].i[10];
w2[3] = pws[gid].i[11];
u32 w3[4];
w3[0] = pws[gid].i[12];
w3[1] = pws[gid].i[13];
w3[2] = 0;
w3[3] = 0;
const u32 pw_len = pws[gid].pw_len;
/**
* main
*/
m17300s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV0_buf, d_scryptV1_buf, d_scryptV2_buf, d_scryptV3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset);
}

@ -0,0 +1,479 @@
/**
* Author......: See docs/credits.txt
* License.....: MIT
*/
#define NEW_SIMD_CODE
#include "inc_vendor.cl"
#include "inc_hash_constants.h"
#include "inc_hash_functions.cl"
#include "inc_types.cl"
#include "inc_common.cl"
#include "inc_rp_optimized.h"
#include "inc_rp_optimized.cl"
#include "inc_simd.cl"
__constant u64a keccakf_rndc[24] =
{
0x0000000000000001, 0x0000000000008082, 0x800000000000808a,
0x8000000080008000, 0x000000000000808b, 0x0000000080000001,
0x8000000080008081, 0x8000000000008009, 0x000000000000008a,
0x0000000000000088, 0x0000000080008009, 0x000000008000000a,
0x000000008000808b, 0x800000000000008b, 0x8000000000008089,
0x8000000000008003, 0x8000000000008002, 0x8000000000000080,
0x000000000000800a, 0x800000008000000a, 0x8000000080008081,
0x8000000000008080, 0x0000000080000001, 0x8000000080008008
};
#ifndef KECCAK_ROUNDS
#define KECCAK_ROUNDS 24
#endif
__kernel void m17400_m04 (__global pw_t *pws, __constant const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
{
/**
* modifier
*/
const u64 lid = get_local_id (0);
/**
* base
*/
const u64 gid = get_global_id (0);
if (gid >= gid_max) return;
u32 pw_buf0[4];
u32 pw_buf1[4];
pw_buf0[0] = pws[gid].i[0];
pw_buf0[1] = pws[gid].i[1];
pw_buf0[2] = pws[gid].i[2];
pw_buf0[3] = pws[gid].i[3];
pw_buf1[0] = pws[gid].i[4];
pw_buf1[1] = pws[gid].i[5];
pw_buf1[2] = pws[gid].i[6];
pw_buf1[3] = pws[gid].i[7];
const u32 pw_len = pws[gid].pw_len;
/**
* loop
*/
for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE)
{
u32x w0[4] = { 0 };
u32x w1[4] = { 0 };
u32x w2[4] = { 0 };
u32x w3[4] = { 0 };
const u32x out_len = apply_rules_vect (pw_buf0, pw_buf1, pw_len, rules_buf, il_pos, w0, w1);
append_0x06_2x4_VV (w0, w1, out_len);
/**
* Keccak
*/
u64x a00 = hl32_to_64 (w0[1], w0[0]);
u64x a01 = hl32_to_64 (w0[3], w0[2]);
u64x a02 = hl32_to_64 (w1[1], w1[0]);
u64x a03 = hl32_to_64 (w1[3], w1[2]);
u64x a04 = 0;
u64x a10 = 0;
u64x a11 = 0;
u64x a12 = 0;
u64x a13 = 0;
u64x a14 = 0;
u64x a20 = 0;
u64x a21 = 0;
u64x a22 = 0;
u64x a23 = 0;
u64x a24 = 0;
u64x a30 = 0;
u64x a31 = 0x8000000000000000;
u64x a32 = 0;
u64x a33 = 0;
u64x a34 = 0;
u64x a40 = 0;
u64x a41 = 0;
u64x a42 = 0;
u64x a43 = 0;
u64x a44 = 0;
#define Rho_Pi(ad,r) \
bc0 = ad; \
ad = rotl64 (t, r); \
t = bc0; \
#ifdef _unroll
#pragma unroll
#endif
for (int round = 0; round < KECCAK_ROUNDS - 1; round++)
{
// Theta
u64x bc0 = a00 ^ a10 ^ a20 ^ a30 ^ a40;
u64x bc1 = a01 ^ a11 ^ a21 ^ a31 ^ a41;
u64x bc2 = a02 ^ a12 ^ a22 ^ a32 ^ a42;
u64x bc3 = a03 ^ a13 ^ a23 ^ a33 ^ a43;
u64x bc4 = a04 ^ a14 ^ a24 ^ a34 ^ a44;
u64x t;
t = bc4 ^ rotl64 (bc1, 1); a00 ^= t; a10 ^= t; a20 ^= t; a30 ^= t; a40 ^= t;
t = bc0 ^ rotl64 (bc2, 1); a01 ^= t; a11 ^= t; a21 ^= t; a31 ^= t; a41 ^= t;
t = bc1 ^ rotl64 (bc3, 1); a02 ^= t; a12 ^= t; a22 ^= t; a32 ^= t; a42 ^= t;
t = bc2 ^ rotl64 (bc4, 1); a03 ^= t; a13 ^= t; a23 ^= t; a33 ^= t; a43 ^= t;
t = bc3 ^ rotl64 (bc0, 1); a04 ^= t; a14 ^= t; a24 ^= t; a34 ^= t; a44 ^= t;
// Rho Pi
t = a01;
Rho_Pi (a20, 1);
Rho_Pi (a12, 3);
Rho_Pi (a21, 6);
Rho_Pi (a32, 10);
Rho_Pi (a33, 15);
Rho_Pi (a03, 21);
Rho_Pi (a10, 28);
Rho_Pi (a31, 36);
Rho_Pi (a13, 45);
Rho_Pi (a41, 55);
Rho_Pi (a44, 2);
Rho_Pi (a04, 14);
Rho_Pi (a30, 27);
Rho_Pi (a43, 41);
Rho_Pi (a34, 56);
Rho_Pi (a23, 8);
Rho_Pi (a22, 25);
Rho_Pi (a02, 43);
Rho_Pi (a40, 62);
Rho_Pi (a24, 18);
Rho_Pi (a42, 39);
Rho_Pi (a14, 61);
Rho_Pi (a11, 20);
Rho_Pi (a01, 44);
// Chi
bc0 = a00; bc1 = a01; bc2 = a02; bc3 = a03; bc4 = a04;
a00 ^= ~bc1 & bc2; a01 ^= ~bc2 & bc3; a02 ^= ~bc3 & bc4; a03 ^= ~bc4 & bc0; a04 ^= ~bc0 & bc1;
bc0 = a10; bc1 = a11; bc2 = a12; bc3 = a13; bc4 = a14;
a10 ^= ~bc1 & bc2; a11 ^= ~bc2 & bc3; a12 ^= ~bc3 & bc4; a13 ^= ~bc4 & bc0; a14 ^= ~bc0 & bc1;
bc0 = a20; bc1 = a21; bc2 = a22; bc3 = a23; bc4 = a24;
a20 ^= ~bc1 & bc2; a21 ^= ~bc2 & bc3; a22 ^= ~bc3 & bc4; a23 ^= ~bc4 & bc0; a24 ^= ~bc0 & bc1;
bc0 = a30; bc1 = a31; bc2 = a32; bc3 = a33; bc4 = a34;
a30 ^= ~bc1 & bc2; a31 ^= ~bc2 & bc3; a32 ^= ~bc3 & bc4; a33 ^= ~bc4 & bc0; a34 ^= ~bc0 & bc1;
bc0 = a40; bc1 = a41; bc2 = a42; bc3 = a43; bc4 = a44;
a40 ^= ~bc1 & bc2; a41 ^= ~bc2 & bc3; a42 ^= ~bc3 & bc4; a43 ^= ~bc4 & bc0; a44 ^= ~bc0 & bc1;
// Iota
a00 ^= keccakf_rndc[round];
}
// Theta
u64x bc0 = a00 ^ a10 ^ a20 ^ a30 ^ a40;
u64x bc1 = a01 ^ a11 ^ a21 ^ a31 ^ a41;
u64x bc2 = a02 ^ a12 ^ a22 ^ a32 ^ a42;
u64x bc3 = a03 ^ a13 ^ a23 ^ a33 ^ a43;
u64x bc4 = a04 ^ a14 ^ a24 ^ a34 ^ a44;
u64x t;
t = bc4 ^ rotl64 (bc1, 1); a00 ^= t; a10 ^= t; a20 ^= t; a30 ^= t;
t = bc0 ^ rotl64 (bc2, 1); a21 ^= t; a31 ^= t; a41 ^= t;
t = bc1 ^ rotl64 (bc3, 1); a02 ^= t; a12 ^= t; a22 ^= t; a32 ^= t;
t = bc2 ^ rotl64 (bc4, 1); a03 ^= t; a13 ^= t; a23 ^= t; a33 ^= t; a43 ^= t;
t = bc3 ^ rotl64 (bc0, 1); a04 ^= t; a34 ^= t; a44 ^= t;
// Rho Pi
t = a01;
Rho_Pi (a20, 1);
Rho_Pi (a12, 3);
Rho_Pi (a21, 6);
Rho_Pi (a32, 10);
Rho_Pi (a33, 15);
Rho_Pi (a03, 21);
Rho_Pi (a10, 28);
Rho_Pi (a31, 36);
Rho_Pi (a13, 45);
Rho_Pi (a41, 55);
Rho_Pi (a44, 2);
Rho_Pi (a04, 14);
Rho_Pi (a30, 27);
Rho_Pi (a43, 41);
Rho_Pi (a34, 56);
Rho_Pi (a23, 8);
Rho_Pi (a22, 25);
Rho_Pi (a02, 43);
#undef Rho_Pi
bc0 = a00;
bc2 = a02;
bc3 = a03;
bc4 = a04;
a02 ^= ~bc3 & bc4;
a03 ^= ~bc4 & bc0;
const u32x r0 = l32_from_64 (a03);
const u32x r1 = h32_from_64 (a03);
const u32x r2 = l32_from_64 (a02);
const u32x r3 = h32_from_64 (a02);
COMPARE_M_SIMD (r0, r1, r2, r3);
}
}
__kernel void m17400_m08 (__global pw_t *pws, __constant const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
{
}
__kernel void m17400_m16 (__global pw_t *pws, __constant const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
{
}
__kernel void m17400_s04 (__global pw_t *pws, __constant const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
{
/**
* modifier
*/
const u64 lid = get_local_id (0);
/**
* base
*/
const u64 gid = get_global_id (0);
if (gid >= gid_max) return;
u32 pw_buf0[4];
u32 pw_buf1[4];
pw_buf0[0] = pws[gid].i[0];
pw_buf0[1] = pws[gid].i[1];
pw_buf0[2] = pws[gid].i[2];
pw_buf0[3] = pws[gid].i[3];
pw_buf1[0] = pws[gid].i[4];
pw_buf1[1] = pws[gid].i[5];
pw_buf1[2] = pws[gid].i[6];
pw_buf1[3] = pws[gid].i[7];
const u32 pw_len = pws[gid].pw_len;
/**
* digest
*/
const u32 search[4] =
{
digests_buf[digests_offset].digest_buf[DGST_R0],
digests_buf[digests_offset].digest_buf[DGST_R1],
digests_buf[digests_offset].digest_buf[DGST_R2],
digests_buf[digests_offset].digest_buf[DGST_R3]
};
/**
* loop
*/
for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE)
{
u32x w0[4] = { 0 };
u32x w1[4] = { 0 };
u32x w2[4] = { 0 };
u32x w3[4] = { 0 };
const u32x out_len = apply_rules_vect (pw_buf0, pw_buf1, pw_len, rules_buf, il_pos, w0, w1);
append_0x06_2x4_VV (w0, w1, out_len);
/**
* Keccak
*/
u64x a00 = hl32_to_64 (w0[1], w0[0]);
u64x a01 = hl32_to_64 (w0[3], w0[2]);
u64x a02 = hl32_to_64 (w1[1], w1[0]);
u64x a03 = hl32_to_64 (w1[3], w1[2]);
u64x a04 = 0;
u64x a10 = 0;
u64x a11 = 0;
u64x a12 = 0;
u64x a13 = 0;
u64x a14 = 0;
u64x a20 = 0;
u64x a21 = 0;
u64x a22 = 0;
u64x a23 = 0;
u64x a24 = 0;
u64x a30 = 0;
u64x a31 = 0x8000000000000000;
u64x a32 = 0;
u64x a33 = 0;
u64x a34 = 0;
u64x a40 = 0;
u64x a41 = 0;
u64x a42 = 0;
u64x a43 = 0;
u64x a44 = 0;
#define Rho_Pi(ad,r) \
bc0 = ad; \
ad = rotl64 (t, r); \
t = bc0; \
#ifdef _unroll
#pragma unroll
#endif
for (int round = 0; round < KECCAK_ROUNDS - 1; round++)
{
// Theta
u64x bc0 = a00 ^ a10 ^ a20 ^ a30 ^ a40;
u64x bc1 = a01 ^ a11 ^ a21 ^ a31 ^ a41;
u64x bc2 = a02 ^ a12 ^ a22 ^ a32 ^ a42;
u64x bc3 = a03 ^ a13 ^ a23 ^ a33 ^ a43;
u64x bc4 = a04 ^ a14 ^ a24 ^ a34 ^ a44;
u64x t;
t = bc4 ^ rotl64 (bc1, 1); a00 ^= t; a10 ^= t; a20 ^= t; a30 ^= t; a40 ^= t;
t = bc0 ^ rotl64 (bc2, 1); a01 ^= t; a11 ^= t; a21 ^= t; a31 ^= t; a41 ^= t;
t = bc1 ^ rotl64 (bc3, 1); a02 ^= t; a12 ^= t; a22 ^= t; a32 ^= t; a42 ^= t;
t = bc2 ^ rotl64 (bc4, 1); a03 ^= t; a13 ^= t; a23 ^= t; a33 ^= t; a43 ^= t;
t = bc3 ^ rotl64 (bc0, 1); a04 ^= t; a14 ^= t; a24 ^= t; a34 ^= t; a44 ^= t;
// Rho Pi
t = a01;
Rho_Pi (a20, 1);
Rho_Pi (a12, 3);
Rho_Pi (a21, 6);
Rho_Pi (a32, 10);
Rho_Pi (a33, 15);
Rho_Pi (a03, 21);
Rho_Pi (a10, 28);
Rho_Pi (a31, 36);
Rho_Pi (a13, 45);
Rho_Pi (a41, 55);
Rho_Pi (a44, 2);
Rho_Pi (a04, 14);
Rho_Pi (a30, 27);
Rho_Pi (a43, 41);
Rho_Pi (a34, 56);
Rho_Pi (a23, 8);
Rho_Pi (a22, 25);
Rho_Pi (a02, 43);
Rho_Pi (a40, 62);
Rho_Pi (a24, 18);
Rho_Pi (a42, 39);
Rho_Pi (a14, 61);
Rho_Pi (a11, 20);
Rho_Pi (a01, 44);
// Chi
bc0 = a00; bc1 = a01; bc2 = a02; bc3 = a03; bc4 = a04;
a00 ^= ~bc1 & bc2; a01 ^= ~bc2 & bc3; a02 ^= ~bc3 & bc4; a03 ^= ~bc4 & bc0; a04 ^= ~bc0 & bc1;
bc0 = a10; bc1 = a11; bc2 = a12; bc3 = a13; bc4 = a14;
a10 ^= ~bc1 & bc2; a11 ^= ~bc2 & bc3; a12 ^= ~bc3 & bc4; a13 ^= ~bc4 & bc0; a14 ^= ~bc0 & bc1;
bc0 = a20; bc1 = a21; bc2 = a22; bc3 = a23; bc4 = a24;
a20 ^= ~bc1 & bc2; a21 ^= ~bc2 & bc3; a22 ^= ~bc3 & bc4; a23 ^= ~bc4 & bc0; a24 ^= ~bc0 & bc1;
bc0 = a30; bc1 = a31; bc2 = a32; bc3 = a33; bc4 = a34;
a30 ^= ~bc1 & bc2; a31 ^= ~bc2 & bc3; a32 ^= ~bc3 & bc4; a33 ^= ~bc4 & bc0; a34 ^= ~bc0 & bc1;
bc0 = a40; bc1 = a41; bc2 = a42; bc3 = a43; bc4 = a44;
a40 ^= ~bc1 & bc2; a41 ^= ~bc2 & bc3; a42 ^= ~bc3 & bc4; a43 ^= ~bc4 & bc0; a44 ^= ~bc0 & bc1;
// Iota
a00 ^= keccakf_rndc[round];
}
// Theta
u64x bc0 = a00 ^ a10 ^ a20 ^ a30 ^ a40;
u64x bc1 = a01 ^ a11 ^ a21 ^ a31 ^ a41;
u64x bc2 = a02 ^ a12 ^ a22 ^ a32 ^ a42;
u64x bc3 = a03 ^ a13 ^ a23 ^ a33 ^ a43;
u64x bc4 = a04 ^ a14 ^ a24 ^ a34 ^ a44;
u64x t;
t = bc4 ^ rotl64 (bc1, 1); a00 ^= t; a10 ^= t; a20 ^= t; a30 ^= t;
t = bc0 ^ rotl64 (bc2, 1); a21 ^= t; a31 ^= t; a41 ^= t;
t = bc1 ^ rotl64 (bc3, 1); a02 ^= t; a12 ^= t; a22 ^= t; a32 ^= t;
t = bc2 ^ rotl64 (bc4, 1); a03 ^= t; a13 ^= t; a23 ^= t; a33 ^= t; a43 ^= t;
t = bc3 ^ rotl64 (bc0, 1); a04 ^= t; a34 ^= t; a44 ^= t;
// Rho Pi
t = a01;
Rho_Pi (a20, 1);
Rho_Pi (a12, 3);
Rho_Pi (a21, 6);
Rho_Pi (a32, 10);
Rho_Pi (a33, 15);
Rho_Pi (a03, 21);
Rho_Pi (a10, 28);
Rho_Pi (a31, 36);
Rho_Pi (a13, 45);
Rho_Pi (a41, 55);
Rho_Pi (a44, 2);
Rho_Pi (a04, 14);
Rho_Pi (a30, 27);
Rho_Pi (a43, 41);
Rho_Pi (a34, 56);
Rho_Pi (a23, 8);
Rho_Pi (a22, 25);
Rho_Pi (a02, 43);
#undef Rho_Pi
bc0 = a00;
bc2 = a02;
bc3 = a03;
bc4 = a04;
a02 ^= ~bc3 & bc4;
a03 ^= ~bc4 & bc0;
const u32x r0 = l32_from_64 (a03);
const u32x r1 = h32_from_64 (a03);
const u32x r2 = l32_from_64 (a02);
const u32x r3 = h32_from_64 (a02);
COMPARE_S_SIMD (r0, r1, r2, r3);
}
}
__kernel void m17400_s08 (__global pw_t *pws, __constant const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
{
}
__kernel void m17400_s16 (__global pw_t *pws, __constant const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
{
}

@ -28,7 +28,7 @@ __constant u64a keccakf_rndc[24] =
#define KECCAK_ROUNDS 24
#endif
__kernel void m05000_m04 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
__kernel void m17400_m04 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
{
/**
* modifier
@ -297,15 +297,15 @@ __kernel void m05000_m04 (__global pw_t *pws, __global const kernel_rule_t *rule
}
__kernel void m05000_m08 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
__kernel void m17400_m08 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
{
}
__kernel void m05000_m16 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
__kernel void m17400_m16 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
{
}
__kernel void m05000_s04 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
__kernel void m17400_s04 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
{
/**
* modifier
@ -585,10 +585,10 @@ __kernel void m05000_s04 (__global pw_t *pws, __global const kernel_rule_t *rule
}
}
__kernel void m05000_s08 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
__kernel void m17400_s08 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
{
}
__kernel void m05000_s16 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
__kernel void m17400_s16 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
{
}

@ -0,0 +1,696 @@
/**
* Author......: See docs/credits.txt
* License.....: MIT
*/
#define NEW_SIMD_CODE
#include "inc_vendor.cl"
#include "inc_hash_constants.h"
#include "inc_hash_functions.cl"
#include "inc_types.cl"
#include "inc_common.cl"
#include "inc_simd.cl"
__constant u64a keccakf_rndc[24] =
{
0x0000000000000001, 0x0000000000008082, 0x800000000000808a,
0x8000000080008000, 0x000000000000808b, 0x0000000080000001,
0x8000000080008081, 0x8000000000008009, 0x000000000000008a,
0x0000000000000088, 0x0000000080008009, 0x000000008000000a,
0x000000008000808b, 0x800000000000008b, 0x8000000000008089,
0x8000000000008003, 0x8000000000008002, 0x8000000000000080,
0x000000000000800a, 0x800000008000000a, 0x8000000080008081,
0x8000000000008080, 0x0000000080000001, 0x8000000080008008
};
#ifndef KECCAK_ROUNDS
#define KECCAK_ROUNDS 24
#endif
DECLSPEC void m17400m (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, __global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset)
{
/**
* modifier
*/
const u64 gid = get_global_id (0);
const u64 lid = get_local_id (0);
/**
* loop
*/
u32 w0l = w0[0];
for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE)
{
const u32x w0r = ix_create_bft (bfs_buf, il_pos);
const u32x w0lr = w0l | w0r;
/**
* Keccak
*/
u64x a00 = hl32_to_64 (w0[1], w0lr);
u64x a01 = hl32_to_64 (w0[3], w0[2]);
u64x a02 = hl32_to_64 (w1[1], w1[0]);
u64x a03 = hl32_to_64 (w1[3], w1[2]);
u64x a04 = hl32_to_64 (w2[1], w2[0]);
u64x a10 = hl32_to_64 (w2[3], w2[2]);
u64x a11 = hl32_to_64 (w3[1], w3[0]);
u64x a12 = hl32_to_64 (w3[3], w3[2]);
u64x a13 = 0;
u64x a14 = 0;
u64x a20 = 0;
u64x a21 = 0;
u64x a22 = 0;
u64x a23 = 0;
u64x a24 = 0;
u64x a30 = 0;
u64x a31 = 0x8000000000000000;
u64x a32 = 0;
u64x a33 = 0;
u64x a34 = 0;
u64x a40 = 0;
u64x a41 = 0;
u64x a42 = 0;
u64x a43 = 0;
u64x a44 = 0;
#define Rho_Pi(ad,r) \
bc0 = ad; \
ad = rotl64 (t, r); \
t = bc0; \
#ifdef _unroll
#pragma unroll
#endif
for (int round = 0; round < KECCAK_ROUNDS - 1; round++)
{
// Theta
u64x bc0 = a00 ^ a10 ^ a20 ^ a30 ^ a40;
u64x bc1 = a01 ^ a11 ^ a21 ^ a31 ^ a41;
u64x bc2 = a02 ^ a12 ^ a22 ^ a32 ^ a42;
u64x bc3 = a03 ^ a13 ^ a23 ^ a33 ^ a43;
u64x bc4 = a04 ^ a14 ^ a24 ^ a34 ^ a44;
u64x t;
t = bc4 ^ rotl64 (bc1, 1); a00 ^= t; a10 ^= t; a20 ^= t; a30 ^= t; a40 ^= t;
t = bc0 ^ rotl64 (bc2, 1); a01 ^= t; a11 ^= t; a21 ^= t; a31 ^= t; a41 ^= t;
t = bc1 ^ rotl64 (bc3, 1); a02 ^= t; a12 ^= t; a22 ^= t; a32 ^= t; a42 ^= t;
t = bc2 ^ rotl64 (bc4, 1); a03 ^= t; a13 ^= t; a23 ^= t; a33 ^= t; a43 ^= t;
t = bc3 ^ rotl64 (bc0, 1); a04 ^= t; a14 ^= t; a24 ^= t; a34 ^= t; a44 ^= t;
// Rho Pi
t = a01;
Rho_Pi (a20, 1);
Rho_Pi (a12, 3);
Rho_Pi (a21, 6);
Rho_Pi (a32, 10);
Rho_Pi (a33, 15);
Rho_Pi (a03, 21);
Rho_Pi (a10, 28);
Rho_Pi (a31, 36);
Rho_Pi (a13, 45);
Rho_Pi (a41, 55);
Rho_Pi (a44, 2);
Rho_Pi (a04, 14);
Rho_Pi (a30, 27);
Rho_Pi (a43, 41);
Rho_Pi (a34, 56);
Rho_Pi (a23, 8);
Rho_Pi (a22, 25);
Rho_Pi (a02, 43);
Rho_Pi (a40, 62);
Rho_Pi (a24, 18);
Rho_Pi (a42, 39);
Rho_Pi (a14, 61);
Rho_Pi (a11, 20);
Rho_Pi (a01, 44);
// Chi
bc0 = a00; bc1 = a01; bc2 = a02; bc3 = a03; bc4 = a04;
a00 ^= ~bc1 & bc2; a01 ^= ~bc2 & bc3; a02 ^= ~bc3 & bc4; a03 ^= ~bc4 & bc0; a04 ^= ~bc0 & bc1;
bc0 = a10; bc1 = a11; bc2 = a12; bc3 = a13; bc4 = a14;
a10 ^= ~bc1 & bc2; a11 ^= ~bc2 & bc3; a12 ^= ~bc3 & bc4; a13 ^= ~bc4 & bc0; a14 ^= ~bc0 & bc1;
bc0 = a20; bc1 = a21; bc2 = a22; bc3 = a23; bc4 = a24;
a20 ^= ~bc1 & bc2; a21 ^= ~bc2 & bc3; a22 ^= ~bc3 & bc4; a23 ^= ~bc4 & bc0; a24 ^= ~bc0 & bc1;
bc0 = a30; bc1 = a31; bc2 = a32; bc3 = a33; bc4 = a34;
a30 ^= ~bc1 & bc2; a31 ^= ~bc2 & bc3; a32 ^= ~bc3 & bc4; a33 ^= ~bc4 & bc0; a34 ^= ~bc0 & bc1;
bc0 = a40; bc1 = a41; bc2 = a42; bc3 = a43; bc4 = a44;
a40 ^= ~bc1 & bc2; a41 ^= ~bc2 & bc3; a42 ^= ~bc3 & bc4; a43 ^= ~bc4 & bc0; a44 ^= ~bc0 & bc1;
// Iota
a00 ^= keccakf_rndc[round];
}
// Theta
u64x bc0 = a00 ^ a10 ^ a20 ^ a30 ^ a40;
u64x bc1 = a01 ^ a11 ^ a21 ^ a31 ^ a41;
u64x bc2 = a02 ^ a12 ^ a22 ^ a32 ^ a42;
u64x bc3 = a03 ^ a13 ^ a23 ^ a33 ^ a43;
u64x bc4 = a04 ^ a14 ^ a24 ^ a34 ^ a44;
u64x t;
t = bc4 ^ rotl64 (bc1, 1); a00 ^= t; a10 ^= t; a20 ^= t; a30 ^= t;
t = bc0 ^ rotl64 (bc2, 1); a21 ^= t; a31 ^= t; a41 ^= t;
t = bc1 ^ rotl64 (bc3, 1); a02 ^= t; a12 ^= t; a22 ^= t; a32 ^= t;
t = bc2 ^ rotl64 (bc4, 1); a03 ^= t; a13 ^= t; a23 ^= t; a33 ^= t; a43 ^= t;
t = bc3 ^ rotl64 (bc0, 1); a04 ^= t; a34 ^= t; a44 ^= t;
// Rho Pi
t = a01;
Rho_Pi (a20, 1);
Rho_Pi (a12, 3);
Rho_Pi (a21, 6);
Rho_Pi (a32, 10);
Rho_Pi (a33, 15);
Rho_Pi (a03, 21);
Rho_Pi (a10, 28);
Rho_Pi (a31, 36);
Rho_Pi (a13, 45);
Rho_Pi (a41, 55);
Rho_Pi (a44, 2);
Rho_Pi (a04, 14);
Rho_Pi (a30, 27);
Rho_Pi (a43, 41);
Rho_Pi (a34, 56);
Rho_Pi (a23, 8);
Rho_Pi (a22, 25);
Rho_Pi (a02, 43);
#undef Rho_Pi
bc0 = a00;
bc2 = a02;
bc3 = a03;
bc4 = a04;
a02 ^= ~bc3 & bc4;
a03 ^= ~bc4 & bc0;
const u32x r0 = l32_from_64 (a03);
const u32x r1 = h32_from_64 (a03);
const u32x r2 = l32_from_64 (a02);
const u32x r3 = h32_from_64 (a02);
COMPARE_M_SIMD (r0, r1, r2, r3);
}
}
DECLSPEC void m17400s (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, __global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset)
{
/**
* modifier
*/
const u64 gid = get_global_id (0);
const u64 lid = get_local_id (0);
/**
* digest
*/
const u32 search[4] =
{
digests_buf[digests_offset].digest_buf[DGST_R0],
digests_buf[digests_offset].digest_buf[DGST_R1],
digests_buf[digests_offset].digest_buf[DGST_R2],
digests_buf[digests_offset].digest_buf[DGST_R3]
};
/**
* loop
*/
u32 w0l = w0[0];
for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE)
{
const u32x w0r = ix_create_bft (bfs_buf, il_pos);
const u32x w0lr = w0l | w0r;
/**
* Keccak
*/
u64x a00 = hl32_to_64 (w0[1], w0lr);
u64x a01 = hl32_to_64 (w0[3], w0[2]);
u64x a02 = hl32_to_64 (w1[1], w1[0]);
u64x a03 = hl32_to_64 (w1[3], w1[2]);
u64x a04 = hl32_to_64 (w2[1], w2[0]);
u64x a10 = hl32_to_64 (w2[3], w2[2]);
u64x a11 = hl32_to_64 (w3[1], w3[0]);
u64x a12 = hl32_to_64 (w3[3], w3[2]);
u64x a13 = 0;
u64x a14 = 0;
u64x a20 = 0;
u64x a21 = 0;
u64x a22 = 0;
u64x a23 = 0;
u64x a24 = 0;
u64x a30 = 0;
u64x a31 = 0x8000000000000000;
u64x a32 = 0;
u64x a33 = 0;
u64x a34 = 0;
u64x a40 = 0;
u64x a41 = 0;
u64x a42 = 0;
u64x a43 = 0;
u64x a44 = 0;
#define Rho_Pi(ad,r) \
bc0 = ad; \
ad = rotl64 (t, r); \
t = bc0; \
#ifdef _unroll
#pragma unroll
#endif
for (int round = 0; round < KECCAK_ROUNDS - 1; round++)
{
// Theta
u64x bc0 = a00 ^ a10 ^ a20 ^ a30 ^ a40;
u64x bc1 = a01 ^ a11 ^ a21 ^ a31 ^ a41;
u64x bc2 = a02 ^ a12 ^ a22 ^ a32 ^ a42;
u64x bc3 = a03 ^ a13 ^ a23 ^ a33 ^ a43;
u64x bc4 = a04 ^ a14 ^ a24 ^ a34 ^ a44;
u64x t;
t = bc4 ^ rotl64 (bc1, 1); a00 ^= t; a10 ^= t; a20 ^= t; a30 ^= t; a40 ^= t;
t = bc0 ^ rotl64 (bc2, 1); a01 ^= t; a11 ^= t; a21 ^= t; a31 ^= t; a41 ^= t;
t = bc1 ^ rotl64 (bc3, 1); a02 ^= t; a12 ^= t; a22 ^= t; a32 ^= t; a42 ^= t;
t = bc2 ^ rotl64 (bc4, 1); a03 ^= t; a13 ^= t; a23 ^= t; a33 ^= t; a43 ^= t;
t = bc3 ^ rotl64 (bc0, 1); a04 ^= t; a14 ^= t; a24 ^= t; a34 ^= t; a44 ^= t;
// Rho Pi
t = a01;
Rho_Pi (a20, 1);
Rho_Pi (a12, 3);
Rho_Pi (a21, 6);
Rho_Pi (a32, 10);
Rho_Pi (a33, 15);
Rho_Pi (a03, 21);
Rho_Pi (a10, 28);
Rho_Pi (a31, 36);
Rho_Pi (a13, 45);
Rho_Pi (a41, 55);
Rho_Pi (a44, 2);
Rho_Pi (a04, 14);
Rho_Pi (a30, 27);
Rho_Pi (a43, 41);
Rho_Pi (a34, 56);
Rho_Pi (a23, 8);
Rho_Pi (a22, 25);
Rho_Pi (a02, 43);
Rho_Pi (a40, 62);
Rho_Pi (a24, 18);
Rho_Pi (a42, 39);
Rho_Pi (a14, 61);
Rho_Pi (a11, 20);
Rho_Pi (a01, 44);
// Chi
bc0 = a00; bc1 = a01; bc2 = a02; bc3 = a03; bc4 = a04;
a00 ^= ~bc1 & bc2; a01 ^= ~bc2 & bc3; a02 ^= ~bc3 & bc4; a03 ^= ~bc4 & bc0; a04 ^= ~bc0 & bc1;
bc0 = a10; bc1 = a11; bc2 = a12; bc3 = a13; bc4 = a14;
a10 ^= ~bc1 & bc2; a11 ^= ~bc2 & bc3; a12 ^= ~bc3 & bc4; a13 ^= ~bc4 & bc0; a14 ^= ~bc0 & bc1;
bc0 = a20; bc1 = a21; bc2 = a22; bc3 = a23; bc4 = a24;
a20 ^= ~bc1 & bc2; a21 ^= ~bc2 & bc3; a22 ^= ~bc3 & bc4; a23 ^= ~bc4 & bc0; a24 ^= ~bc0 & bc1;
bc0 = a30; bc1 = a31; bc2 = a32; bc3 = a33; bc4 = a34;
a30 ^= ~bc1 & bc2; a31 ^= ~bc2 & bc3; a32 ^= ~bc3 & bc4; a33 ^= ~bc4 & bc0; a34 ^= ~bc0 & bc1;
bc0 = a40; bc1 = a41; bc2 = a42; bc3 = a43; bc4 = a44;
a40 ^= ~bc1 & bc2; a41 ^= ~bc2 & bc3; a42 ^= ~bc3 & bc4; a43 ^= ~bc4 & bc0; a44 ^= ~bc0 & bc1;
// Iota
a00 ^= keccakf_rndc[round];
}
// Theta
u64x bc0 = a00 ^ a10 ^ a20 ^ a30 ^ a40;
u64x bc1 = a01 ^ a11 ^ a21 ^ a31 ^ a41;
u64x bc2 = a02 ^ a12 ^ a22 ^ a32 ^ a42;
u64x bc3 = a03 ^ a13 ^ a23 ^ a33 ^ a43;
u64x bc4 = a04 ^ a14 ^ a24 ^ a34 ^ a44;
u64x t;
t = bc4 ^ rotl64 (bc1, 1); a00 ^= t; a10 ^= t; a20 ^= t; a30 ^= t;
t = bc0 ^ rotl64 (bc2, 1); a21 ^= t; a31 ^= t; a41 ^= t;
t = bc1 ^ rotl64 (bc3, 1); a02 ^= t; a12 ^= t; a22 ^= t; a32 ^= t;
t = bc2 ^ rotl64 (bc4, 1); a03 ^= t; a13 ^= t; a23 ^= t; a33 ^= t; a43 ^= t;
t = bc3 ^ rotl64 (bc0, 1); a04 ^= t; a34 ^= t; a44 ^= t;
// Rho Pi
t = a01;
Rho_Pi (a20, 1);
Rho_Pi (a12, 3);
Rho_Pi (a21, 6);
Rho_Pi (a32, 10);
Rho_Pi (a33, 15);
Rho_Pi (a03, 21);
Rho_Pi (a10, 28);
Rho_Pi (a31, 36);
Rho_Pi (a13, 45);
Rho_Pi (a41, 55);
Rho_Pi (a44, 2);
Rho_Pi (a04, 14);
Rho_Pi (a30, 27);
Rho_Pi (a43, 41);
Rho_Pi (a34, 56);
Rho_Pi (a23, 8);
Rho_Pi (a22, 25);
Rho_Pi (a02, 43);
#undef Rho_Pi
bc0 = a00;
bc2 = a02;
bc3 = a03;
bc4 = a04;
a02 ^= ~bc3 & bc4;
a03 ^= ~bc4 & bc0;
const u32x r0 = l32_from_64 (a03);
const u32x r1 = h32_from_64 (a03);
const u32x r2 = l32_from_64 (a02);
const u32x r3 = h32_from_64 (a02);
COMPARE_S_SIMD (r0, r1, r2, r3);
}
}
__kernel void m17400_m04 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
{
/**
* base
*/
const u64 gid = get_global_id (0);
if (gid >= gid_max) return;
u32 w0[4];
w0[0] = pws[gid].i[ 0];
w0[1] = pws[gid].i[ 1];
w0[2] = pws[gid].i[ 2];
w0[3] = pws[gid].i[ 3];
u32 w1[4];
w1[0] = 0;
w1[1] = 0;
w1[2] = 0;
w1[3] = 0;
u32 w2[4];
w2[0] = 0;
w2[1] = 0;
w2[2] = 0;
w2[3] = 0;
u32 w3[4];
w3[0] = 0;
w3[1] = 0;
w3[2] = 0;
w3[3] = 0;
const u32 pw_len = pws[gid].pw_len;
/**
* main
*/
m17400m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV0_buf, d_scryptV1_buf, d_scryptV2_buf, d_scryptV3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset);
}
__kernel void m17400_m08 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
{
/**
* base
*/
const u64 gid = get_global_id (0);
if (gid >= gid_max) return;
u32 w0[4];
w0[0] = pws[gid].i[ 0];
w0[1] = pws[gid].i[ 1];
w0[2] = pws[gid].i[ 2];
w0[3] = pws[gid].i[ 3];
u32 w1[4];
w1[0] = pws[gid].i[ 4];
w1[1] = pws[gid].i[ 5];
w1[2] = pws[gid].i[ 6];
w1[3] = pws[gid].i[ 7];
u32 w2[4];
w2[0] = 0;
w2[1] = 0;
w2[2] = 0;
w2[3] = 0;
u32 w3[4];
w3[0] = 0;
w3[1] = 0;
w3[2] = 0;
w3[3] = 0;
const u32 pw_len = pws[gid].pw_len;
/**
* main
*/
m17400m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV0_buf, d_scryptV1_buf, d_scryptV2_buf, d_scryptV3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset);
}
__kernel void m17400_m16 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
{
/**
* base
*/
const u64 gid = get_global_id (0);
if (gid >= gid_max) return;
u32 w0[4];
w0[0] = pws[gid].i[ 0];
w0[1] = pws[gid].i[ 1];
w0[2] = pws[gid].i[ 2];
w0[3] = pws[gid].i[ 3];
u32 w1[4];
w1[0] = pws[gid].i[ 4];
w1[1] = pws[gid].i[ 5];
w1[2] = pws[gid].i[ 6];
w1[3] = pws[gid].i[ 7];
u32 w2[4];
w2[0] = pws[gid].i[ 8];
w2[1] = pws[gid].i[ 9];
w2[2] = pws[gid].i[10];
w2[3] = pws[gid].i[11];
u32 w3[4];
w3[0] = pws[gid].i[12];
w3[1] = pws[gid].i[13];
w3[2] = 0;
w3[3] = 0;
const u32 pw_len = pws[gid].pw_len;
/**
* main
*/
m17400m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV0_buf, d_scryptV1_buf, d_scryptV2_buf, d_scryptV3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset);
}
__kernel void m17400_s04 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
{
/**
* base
*/
const u64 gid = get_global_id (0);
if (gid >= gid_max) return;
u32 w0[4];
w0[0] = pws[gid].i[ 0];
w0[1] = pws[gid].i[ 1];
w0[2] = pws[gid].i[ 2];
w0[3] = pws[gid].i[ 3];
u32 w1[4];
w1[0] = 0;
w1[1] = 0;
w1[2] = 0;
w1[3] = 0;
u32 w2[4];
w2[0] = 0;
w2[1] = 0;
w2[2] = 0;
w2[3] = 0;
u32 w3[4];
w3[0] = 0;
w3[1] = 0;
w3[2] = 0;
w3[3] = 0;
const u32 pw_len = pws[gid].pw_len;
/**
* main
*/
m17400s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV0_buf, d_scryptV1_buf, d_scryptV2_buf, d_scryptV3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset);
}
__kernel void m17400_s08 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
{
/**
* base
*/
const u64 gid = get_global_id (0);
if (gid >= gid_max) return;
u32 w0[4];
w0[0] = pws[gid].i[ 0];
w0[1] = pws[gid].i[ 1];
w0[2] = pws[gid].i[ 2];
w0[3] = pws[gid].i[ 3];
u32 w1[4];
w1[0] = pws[gid].i[ 4];
w1[1] = pws[gid].i[ 5];
w1[2] = pws[gid].i[ 6];
w1[3] = pws[gid].i[ 7];
u32 w2[4];
w2[0] = 0;
w2[1] = 0;
w2[2] = 0;
w2[3] = 0;
u32 w3[4];
w3[0] = 0;
w3[1] = 0;
w3[2] = 0;
w3[3] = 0;
const u32 pw_len = pws[gid].pw_len;
/**
* main
*/
m17400s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV0_buf, d_scryptV1_buf, d_scryptV2_buf, d_scryptV3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset);
}
__kernel void m17400_s16 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
{
/**
* base
*/
const u64 gid = get_global_id (0);
if (gid >= gid_max) return;
u32 w0[4];
w0[0] = pws[gid].i[ 0];
w0[1] = pws[gid].i[ 1];
w0[2] = pws[gid].i[ 2];
w0[3] = pws[gid].i[ 3];
u32 w1[4];
w1[0] = pws[gid].i[ 4];
w1[1] = pws[gid].i[ 5];
w1[2] = pws[gid].i[ 6];
w1[3] = pws[gid].i[ 7];
u32 w2[4];
w2[0] = pws[gid].i[ 8];
w2[1] = pws[gid].i[ 9];
w2[2] = pws[gid].i[10];
w2[3] = pws[gid].i[11];
u32 w3[4];
w3[0] = pws[gid].i[12];
w3[1] = pws[gid].i[13];
w3[2] = 0;
w3[3] = 0;
const u32 pw_len = pws[gid].pw_len;
/**
* main
*/
m17400s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV0_buf, d_scryptV1_buf, d_scryptV2_buf, d_scryptV3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset);
}

@ -0,0 +1,479 @@
/**
* Author......: See docs/credits.txt
* License.....: MIT
*/
#define NEW_SIMD_CODE
#include "inc_vendor.cl"
#include "inc_hash_constants.h"
#include "inc_hash_functions.cl"
#include "inc_types.cl"
#include "inc_common.cl"
#include "inc_rp_optimized.h"
#include "inc_rp_optimized.cl"
#include "inc_simd.cl"
__constant u64a keccakf_rndc[24] =
{
0x0000000000000001, 0x0000000000008082, 0x800000000000808a,
0x8000000080008000, 0x000000000000808b, 0x0000000080000001,
0x8000000080008081, 0x8000000000008009, 0x000000000000008a,
0x0000000000000088, 0x0000000080008009, 0x000000008000000a,
0x000000008000808b, 0x800000000000008b, 0x8000000000008089,
0x8000000000008003, 0x8000000000008002, 0x8000000000000080,
0x000000000000800a, 0x800000008000000a, 0x8000000080008081,
0x8000000000008080, 0x0000000080000001, 0x8000000080008008
};
#ifndef KECCAK_ROUNDS
#define KECCAK_ROUNDS 24
#endif
__kernel void m17500_m04 (__global pw_t *pws, __constant const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
{
/**
* modifier
*/
const u64 lid = get_local_id (0);
/**
* base
*/
const u64 gid = get_global_id (0);
if (gid >= gid_max) return;
u32 pw_buf0[4];
u32 pw_buf1[4];
pw_buf0[0] = pws[gid].i[0];
pw_buf0[1] = pws[gid].i[1];
pw_buf0[2] = pws[gid].i[2];
pw_buf0[3] = pws[gid].i[3];
pw_buf1[0] = pws[gid].i[4];
pw_buf1[1] = pws[gid].i[5];
pw_buf1[2] = pws[gid].i[6];
pw_buf1[3] = pws[gid].i[7];
const u32 pw_len = pws[gid].pw_len;
/**
* loop
*/
for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE)
{
u32x w0[4] = { 0 };
u32x w1[4] = { 0 };
u32x w2[4] = { 0 };
u32x w3[4] = { 0 };
const u32x out_len = apply_rules_vect (pw_buf0, pw_buf1, pw_len, rules_buf, il_pos, w0, w1);
append_0x06_2x4_VV (w0, w1, out_len);
/**
* Keccak
*/
u64x a00 = hl32_to_64 (w0[1], w0[0]);
u64x a01 = hl32_to_64 (w0[3], w0[2]);
u64x a02 = hl32_to_64 (w1[1], w1[0]);
u64x a03 = hl32_to_64 (w1[3], w1[2]);
u64x a04 = 0;
u64x a10 = 0;
u64x a11 = 0;
u64x a12 = 0;
u64x a13 = 0;
u64x a14 = 0;
u64x a20 = 0;
u64x a21 = 0;
u64x a22 = 0x8000000000000000;
u64x a23 = 0;
u64x a24 = 0;
u64x a30 = 0;
u64x a31 = 0;
u64x a32 = 0;
u64x a33 = 0;
u64x a34 = 0;
u64x a40 = 0;
u64x a41 = 0;
u64x a42 = 0;
u64x a43 = 0;
u64x a44 = 0;
#define Rho_Pi(ad,r) \
bc0 = ad; \
ad = rotl64 (t, r); \
t = bc0; \
#ifdef _unroll
#pragma unroll
#endif
for (int round = 0; round < KECCAK_ROUNDS - 1; round++)
{
// Theta
u64x bc0 = a00 ^ a10 ^ a20 ^ a30 ^ a40;
u64x bc1 = a01 ^ a11 ^ a21 ^ a31 ^ a41;
u64x bc2 = a02 ^ a12 ^ a22 ^ a32 ^ a42;
u64x bc3 = a03 ^ a13 ^ a23 ^ a33 ^ a43;
u64x bc4 = a04 ^ a14 ^ a24 ^ a34 ^ a44;
u64x t;
t = bc4 ^ rotl64 (bc1, 1); a00 ^= t; a10 ^= t; a20 ^= t; a30 ^= t; a40 ^= t;
t = bc0 ^ rotl64 (bc2, 1); a01 ^= t; a11 ^= t; a21 ^= t; a31 ^= t; a41 ^= t;
t = bc1 ^ rotl64 (bc3, 1); a02 ^= t; a12 ^= t; a22 ^= t; a32 ^= t; a42 ^= t;
t = bc2 ^ rotl64 (bc4, 1); a03 ^= t; a13 ^= t; a23 ^= t; a33 ^= t; a43 ^= t;
t = bc3 ^ rotl64 (bc0, 1); a04 ^= t; a14 ^= t; a24 ^= t; a34 ^= t; a44 ^= t;
// Rho Pi
t = a01;
Rho_Pi (a20, 1);
Rho_Pi (a12, 3);
Rho_Pi (a21, 6);
Rho_Pi (a32, 10);
Rho_Pi (a33, 15);
Rho_Pi (a03, 21);
Rho_Pi (a10, 28);
Rho_Pi (a31, 36);
Rho_Pi (a13, 45);
Rho_Pi (a41, 55);
Rho_Pi (a44, 2);
Rho_Pi (a04, 14);
Rho_Pi (a30, 27);
Rho_Pi (a43, 41);
Rho_Pi (a34, 56);
Rho_Pi (a23, 8);
Rho_Pi (a22, 25);
Rho_Pi (a02, 43);
Rho_Pi (a40, 62);
Rho_Pi (a24, 18);
Rho_Pi (a42, 39);
Rho_Pi (a14, 61);
Rho_Pi (a11, 20);
Rho_Pi (a01, 44);
// Chi
bc0 = a00; bc1 = a01; bc2 = a02; bc3 = a03; bc4 = a04;
a00 ^= ~bc1 & bc2; a01 ^= ~bc2 & bc3; a02 ^= ~bc3 & bc4; a03 ^= ~bc4 & bc0; a04 ^= ~bc0 & bc1;
bc0 = a10; bc1 = a11; bc2 = a12; bc3 = a13; bc4 = a14;
a10 ^= ~bc1 & bc2; a11 ^= ~bc2 & bc3; a12 ^= ~bc3 & bc4; a13 ^= ~bc4 & bc0; a14 ^= ~bc0 & bc1;
bc0 = a20; bc1 = a21; bc2 = a22; bc3 = a23; bc4 = a24;
a20 ^= ~bc1 & bc2; a21 ^= ~bc2 & bc3; a22 ^= ~bc3 & bc4; a23 ^= ~bc4 & bc0; a24 ^= ~bc0 & bc1;
bc0 = a30; bc1 = a31; bc2 = a32; bc3 = a33; bc4 = a34;
a30 ^= ~bc1 & bc2; a31 ^= ~bc2 & bc3; a32 ^= ~bc3 & bc4; a33 ^= ~bc4 & bc0; a34 ^= ~bc0 & bc1;
bc0 = a40; bc1 = a41; bc2 = a42; bc3 = a43; bc4 = a44;
a40 ^= ~bc1 & bc2; a41 ^= ~bc2 & bc3; a42 ^= ~bc3 & bc4; a43 ^= ~bc4 & bc0; a44 ^= ~bc0 & bc1;
// Iota
a00 ^= keccakf_rndc[round];
}
// Theta
u64x bc0 = a00 ^ a10 ^ a20 ^ a30 ^ a40;
u64x bc1 = a01 ^ a11 ^ a21 ^ a31 ^ a41;
u64x bc2 = a02 ^ a12 ^ a22 ^ a32 ^ a42;
u64x bc3 = a03 ^ a13 ^ a23 ^ a33 ^ a43;
u64x bc4 = a04 ^ a14 ^ a24 ^ a34 ^ a44;
u64x t;
t = bc4 ^ rotl64 (bc1, 1); a00 ^= t; a10 ^= t; a20 ^= t; a30 ^= t;
t = bc0 ^ rotl64 (bc2, 1); a21 ^= t; a31 ^= t; a41 ^= t;
t = bc1 ^ rotl64 (bc3, 1); a02 ^= t; a12 ^= t; a22 ^= t; a32 ^= t;
t = bc2 ^ rotl64 (bc4, 1); a03 ^= t; a13 ^= t; a23 ^= t; a33 ^= t; a43 ^= t;
t = bc3 ^ rotl64 (bc0, 1); a04 ^= t; a34 ^= t; a44 ^= t;
// Rho Pi
t = a01;
Rho_Pi (a20, 1);
Rho_Pi (a12, 3);
Rho_Pi (a21, 6);
Rho_Pi (a32, 10);
Rho_Pi (a33, 15);
Rho_Pi (a03, 21);
Rho_Pi (a10, 28);
Rho_Pi (a31, 36);
Rho_Pi (a13, 45);
Rho_Pi (a41, 55);
Rho_Pi (a44, 2);
Rho_Pi (a04, 14);
Rho_Pi (a30, 27);
Rho_Pi (a43, 41);
Rho_Pi (a34, 56);
Rho_Pi (a23, 8);
Rho_Pi (a22, 25);
Rho_Pi (a02, 43);
#undef Rho_Pi
bc0 = a00;
bc2 = a02;
bc3 = a03;
bc4 = a04;
a02 ^= ~bc3 & bc4;
a03 ^= ~bc4 & bc0;
const u32x r0 = l32_from_64 (a03);
const u32x r1 = h32_from_64 (a03);
const u32x r2 = l32_from_64 (a02);
const u32x r3 = h32_from_64 (a02);
COMPARE_M_SIMD (r0, r1, r2, r3);
}
}
__kernel void m17500_m08 (__global pw_t *pws, __constant const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
{
}
__kernel void m17500_m16 (__global pw_t *pws, __constant const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
{
}
__kernel void m17500_s04 (__global pw_t *pws, __constant const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
{
/**
* modifier
*/
const u64 lid = get_local_id (0);
/**
* base
*/
const u64 gid = get_global_id (0);
if (gid >= gid_max) return;
u32 pw_buf0[4];
u32 pw_buf1[4];
pw_buf0[0] = pws[gid].i[0];
pw_buf0[1] = pws[gid].i[1];
pw_buf0[2] = pws[gid].i[2];
pw_buf0[3] = pws[gid].i[3];
pw_buf1[0] = pws[gid].i[4];
pw_buf1[1] = pws[gid].i[5];
pw_buf1[2] = pws[gid].i[6];
pw_buf1[3] = pws[gid].i[7];
const u32 pw_len = pws[gid].pw_len;
/**
* digest
*/
const u32 search[4] =
{
digests_buf[digests_offset].digest_buf[DGST_R0],
digests_buf[digests_offset].digest_buf[DGST_R1],
digests_buf[digests_offset].digest_buf[DGST_R2],
digests_buf[digests_offset].digest_buf[DGST_R3]
};
/**
* loop
*/
for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE)
{
u32x w0[4] = { 0 };
u32x w1[4] = { 0 };
u32x w2[4] = { 0 };
u32x w3[4] = { 0 };
const u32x out_len = apply_rules_vect (pw_buf0, pw_buf1, pw_len, rules_buf, il_pos, w0, w1);
append_0x06_2x4_VV (w0, w1, out_len);
/**
* Keccak
*/
u64x a00 = hl32_to_64 (w0[1], w0[0]);
u64x a01 = hl32_to_64 (w0[3], w0[2]);
u64x a02 = hl32_to_64 (w1[1], w1[0]);
u64x a03 = hl32_to_64 (w1[3], w1[2]);
u64x a04 = 0;
u64x a10 = 0;
u64x a11 = 0;
u64x a12 = 0;
u64x a13 = 0;
u64x a14 = 0;
u64x a20 = 0;
u64x a21 = 0;
u64x a22 = 0x8000000000000000;
u64x a23 = 0;
u64x a24 = 0;
u64x a30 = 0;
u64x a31 = 0;
u64x a32 = 0;
u64x a33 = 0;
u64x a34 = 0;
u64x a40 = 0;
u64x a41 = 0;
u64x a42 = 0;
u64x a43 = 0;
u64x a44 = 0;
#define Rho_Pi(ad,r) \
bc0 = ad; \
ad = rotl64 (t, r); \
t = bc0; \
#ifdef _unroll
#pragma unroll
#endif
for (int round = 0; round < KECCAK_ROUNDS - 1; round++)
{
// Theta
u64x bc0 = a00 ^ a10 ^ a20 ^ a30 ^ a40;
u64x bc1 = a01 ^ a11 ^ a21 ^ a31 ^ a41;
u64x bc2 = a02 ^ a12 ^ a22 ^ a32 ^ a42;
u64x bc3 = a03 ^ a13 ^ a23 ^ a33 ^ a43;
u64x bc4 = a04 ^ a14 ^ a24 ^ a34 ^ a44;
u64x t;
t = bc4 ^ rotl64 (bc1, 1); a00 ^= t; a10 ^= t; a20 ^= t; a30 ^= t; a40 ^= t;
t = bc0 ^ rotl64 (bc2, 1); a01 ^= t; a11 ^= t; a21 ^= t; a31 ^= t; a41 ^= t;
t = bc1 ^ rotl64 (bc3, 1); a02 ^= t; a12 ^= t; a22 ^= t; a32 ^= t; a42 ^= t;
t = bc2 ^ rotl64 (bc4, 1); a03 ^= t; a13 ^= t; a23 ^= t; a33 ^= t; a43 ^= t;
t = bc3 ^ rotl64 (bc0, 1); a04 ^= t; a14 ^= t; a24 ^= t; a34 ^= t; a44 ^= t;
// Rho Pi
t = a01;
Rho_Pi (a20, 1);
Rho_Pi (a12, 3);
Rho_Pi (a21, 6);
Rho_Pi (a32, 10);
Rho_Pi (a33, 15);
Rho_Pi (a03, 21);
Rho_Pi (a10, 28);
Rho_Pi (a31, 36);
Rho_Pi (a13, 45);
Rho_Pi (a41, 55);
Rho_Pi (a44, 2);
Rho_Pi (a04, 14);
Rho_Pi (a30, 27);
Rho_Pi (a43, 41);
Rho_Pi (a34, 56);
Rho_Pi (a23, 8);
Rho_Pi (a22, 25);
Rho_Pi (a02, 43);
Rho_Pi (a40, 62);
Rho_Pi (a24, 18);
Rho_Pi (a42, 39);
Rho_Pi (a14, 61);
Rho_Pi (a11, 20);
Rho_Pi (a01, 44);
// Chi
bc0 = a00; bc1 = a01; bc2 = a02; bc3 = a03; bc4 = a04;
a00 ^= ~bc1 & bc2; a01 ^= ~bc2 & bc3; a02 ^= ~bc3 & bc4; a03 ^= ~bc4 & bc0; a04 ^= ~bc0 & bc1;
bc0 = a10; bc1 = a11; bc2 = a12; bc3 = a13; bc4 = a14;
a10 ^= ~bc1 & bc2; a11 ^= ~bc2 & bc3; a12 ^= ~bc3 & bc4; a13 ^= ~bc4 & bc0; a14 ^= ~bc0 & bc1;
bc0 = a20; bc1 = a21; bc2 = a22; bc3 = a23; bc4 = a24;
a20 ^= ~bc1 & bc2; a21 ^= ~bc2 & bc3; a22 ^= ~bc3 & bc4; a23 ^= ~bc4 & bc0; a24 ^= ~bc0 & bc1;
bc0 = a30; bc1 = a31; bc2 = a32; bc3 = a33; bc4 = a34;
a30 ^= ~bc1 & bc2; a31 ^= ~bc2 & bc3; a32 ^= ~bc3 & bc4; a33 ^= ~bc4 & bc0; a34 ^= ~bc0 & bc1;
bc0 = a40; bc1 = a41; bc2 = a42; bc3 = a43; bc4 = a44;
a40 ^= ~bc1 & bc2; a41 ^= ~bc2 & bc3; a42 ^= ~bc3 & bc4; a43 ^= ~bc4 & bc0; a44 ^= ~bc0 & bc1;
// Iota
a00 ^= keccakf_rndc[round];
}
// Theta
u64x bc0 = a00 ^ a10 ^ a20 ^ a30 ^ a40;
u64x bc1 = a01 ^ a11 ^ a21 ^ a31 ^ a41;
u64x bc2 = a02 ^ a12 ^ a22 ^ a32 ^ a42;
u64x bc3 = a03 ^ a13 ^ a23 ^ a33 ^ a43;
u64x bc4 = a04 ^ a14 ^ a24 ^ a34 ^ a44;
u64x t;
t = bc4 ^ rotl64 (bc1, 1); a00 ^= t; a10 ^= t; a20 ^= t; a30 ^= t;
t = bc0 ^ rotl64 (bc2, 1); a21 ^= t; a31 ^= t; a41 ^= t;
t = bc1 ^ rotl64 (bc3, 1); a02 ^= t; a12 ^= t; a22 ^= t; a32 ^= t;
t = bc2 ^ rotl64 (bc4, 1); a03 ^= t; a13 ^= t; a23 ^= t; a33 ^= t; a43 ^= t;
t = bc3 ^ rotl64 (bc0, 1); a04 ^= t; a34 ^= t; a44 ^= t;
// Rho Pi
t = a01;
Rho_Pi (a20, 1);
Rho_Pi (a12, 3);
Rho_Pi (a21, 6);
Rho_Pi (a32, 10);
Rho_Pi (a33, 15);
Rho_Pi (a03, 21);
Rho_Pi (a10, 28);
Rho_Pi (a31, 36);
Rho_Pi (a13, 45);
Rho_Pi (a41, 55);
Rho_Pi (a44, 2);
Rho_Pi (a04, 14);
Rho_Pi (a30, 27);
Rho_Pi (a43, 41);
Rho_Pi (a34, 56);
Rho_Pi (a23, 8);
Rho_Pi (a22, 25);
Rho_Pi (a02, 43);
#undef Rho_Pi
bc0 = a00;
bc2 = a02;
bc3 = a03;
bc4 = a04;
a02 ^= ~bc3 & bc4;
a03 ^= ~bc4 & bc0;
const u32x r0 = l32_from_64 (a03);
const u32x r1 = h32_from_64 (a03);
const u32x r2 = l32_from_64 (a02);
const u32x r3 = h32_from_64 (a02);
COMPARE_S_SIMD (r0, r1, r2, r3);
}
}
__kernel void m17500_s08 (__global pw_t *pws, __constant const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
{
}
__kernel void m17500_s16 (__global pw_t *pws, __constant const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
{
}

@ -0,0 +1,594 @@
/**
* Author......: See docs/credits.txt
* License.....: MIT
*/
#define NEW_SIMD_CODE
#include "inc_vendor.cl"
#include "inc_hash_constants.h"
#include "inc_hash_functions.cl"
#include "inc_types.cl"
#include "inc_common.cl"
#include "inc_simd.cl"
__constant u64a keccakf_rndc[24] =
{
0x0000000000000001, 0x0000000000008082, 0x800000000000808a,
0x8000000080008000, 0x000000000000808b, 0x0000000080000001,
0x8000000080008081, 0x8000000000008009, 0x000000000000008a,
0x0000000000000088, 0x0000000080008009, 0x000000008000000a,
0x000000008000808b, 0x800000000000008b, 0x8000000000008089,
0x8000000000008003, 0x8000000000008002, 0x8000000000000080,
0x000000000000800a, 0x800000008000000a, 0x8000000080008081,
0x8000000000008080, 0x0000000080000001, 0x8000000080008008
};
#ifndef KECCAK_ROUNDS
#define KECCAK_ROUNDS 24
#endif
__kernel void m17500_m04 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
{
/**
* modifier
*/
const u64 lid = get_local_id (0);
/**
* base
*/
const u64 gid = get_global_id (0);
if (gid >= gid_max) return;
u32 pw_buf0[4];
u32 pw_buf1[4];
pw_buf0[0] = pws[gid].i[0];
pw_buf0[1] = pws[gid].i[1];
pw_buf0[2] = pws[gid].i[2];
pw_buf0[3] = pws[gid].i[3];
pw_buf1[0] = pws[gid].i[4];
pw_buf1[1] = pws[gid].i[5];
pw_buf1[2] = pws[gid].i[6];
pw_buf1[3] = pws[gid].i[7];
const u32 pw_l_len = pws[gid].pw_len;
/**
* loop
*/
for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE)
{
const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos);
const u32x pw_len = pw_l_len + pw_r_len;
/**
* concat password candidate
*/
u32x wordl0[4] = { 0 };
u32x wordl1[4] = { 0 };
u32x wordl2[4] = { 0 };
u32x wordl3[4] = { 0 };
wordl0[0] = pw_buf0[0];
wordl0[1] = pw_buf0[1];
wordl0[2] = pw_buf0[2];
wordl0[3] = pw_buf0[3];
wordl1[0] = pw_buf1[0];
wordl1[1] = pw_buf1[1];
wordl1[2] = pw_buf1[2];
wordl1[3] = pw_buf1[3];
u32x wordr0[4] = { 0 };
u32x wordr1[4] = { 0 };
u32x wordr2[4] = { 0 };
u32x wordr3[4] = { 0 };
wordr0[0] = ix_create_combt (combs_buf, il_pos, 0);
wordr0[1] = ix_create_combt (combs_buf, il_pos, 1);
wordr0[2] = ix_create_combt (combs_buf, il_pos, 2);
wordr0[3] = ix_create_combt (combs_buf, il_pos, 3);
wordr1[0] = ix_create_combt (combs_buf, il_pos, 4);
wordr1[1] = ix_create_combt (combs_buf, il_pos, 5);
wordr1[2] = ix_create_combt (combs_buf, il_pos, 6);
wordr1[3] = ix_create_combt (combs_buf, il_pos, 7);
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
{
switch_buffer_by_offset_le_VV (wordr0, wordr1, wordr2, wordr3, pw_l_len);
}
else
{
switch_buffer_by_offset_le_VV (wordl0, wordl1, wordl2, wordl3, pw_r_len);
}
u32x w0[4];
u32x w1[4];
u32x w2[4];
u32x w3[4];
w0[0] = wordl0[0] | wordr0[0];
w0[1] = wordl0[1] | wordr0[1];
w0[2] = wordl0[2] | wordr0[2];
w0[3] = wordl0[3] | wordr0[3];
w1[0] = wordl1[0] | wordr1[0];
w1[1] = wordl1[1] | wordr1[1];
w1[2] = wordl1[2] | wordr1[2];
w1[3] = wordl1[3] | wordr1[3];
w2[0] = wordl2[0] | wordr2[0];
w2[1] = wordl2[1] | wordr2[1];
w2[2] = wordl2[2] | wordr2[2];
w2[3] = wordl2[3] | wordr2[3];
w3[0] = wordl3[0] | wordr3[0];
w3[1] = wordl3[1] | wordr3[1];
w3[2] = wordl3[2] | wordr3[2];
w3[3] = wordl3[3] | wordr3[3];
/**
* Keccak
*/
u64x a00 = hl32_to_64 (w0[1], w0[0]);
u64x a01 = hl32_to_64 (w0[3], w0[2]);
u64x a02 = hl32_to_64 (w1[1], w1[0]);
u64x a03 = hl32_to_64 (w1[3], w1[2]);
u64x a04 = hl32_to_64 (w2[1], w2[0]);
u64x a10 = hl32_to_64 (w2[3], w2[2]);
u64x a11 = hl32_to_64 (w3[1], w3[0]);
u64x a12 = hl32_to_64 (w3[3], w3[2]);
u64x a13 = 0;
u64x a14 = 0;
u64x a20 = 0;
u64x a21 = 0;
u64x a22 = 0x8000000000000000;
u64x a23 = 0;
u64x a24 = 0;
u64x a30 = 0;
u64x a31 = 0;
u64x a32 = 0;
u64x a33 = 0;
u64x a34 = 0;
u64x a40 = 0;
u64x a41 = 0;
u64x a42 = 0;
u64x a43 = 0;
u64x a44 = 0;
#define Rho_Pi(ad,r) \
bc0 = ad; \
ad = rotl64 (t, r); \
t = bc0; \
#ifdef _unroll
#pragma unroll
#endif
for (int round = 0; round < KECCAK_ROUNDS - 1; round++)
{
// Theta
u64x bc0 = a00 ^ a10 ^ a20 ^ a30 ^ a40;
u64x bc1 = a01 ^ a11 ^ a21 ^ a31 ^ a41;
u64x bc2 = a02 ^ a12 ^ a22 ^ a32 ^ a42;
u64x bc3 = a03 ^ a13 ^ a23 ^ a33 ^ a43;
u64x bc4 = a04 ^ a14 ^ a24 ^ a34 ^ a44;
u64x t;
t = bc4 ^ rotl64 (bc1, 1); a00 ^= t; a10 ^= t; a20 ^= t; a30 ^= t; a40 ^= t;
t = bc0 ^ rotl64 (bc2, 1); a01 ^= t; a11 ^= t; a21 ^= t; a31 ^= t; a41 ^= t;
t = bc1 ^ rotl64 (bc3, 1); a02 ^= t; a12 ^= t; a22 ^= t; a32 ^= t; a42 ^= t;
t = bc2 ^ rotl64 (bc4, 1); a03 ^= t; a13 ^= t; a23 ^= t; a33 ^= t; a43 ^= t;
t = bc3 ^ rotl64 (bc0, 1); a04 ^= t; a14 ^= t; a24 ^= t; a34 ^= t; a44 ^= t;
// Rho Pi
t = a01;
Rho_Pi (a20, 1);
Rho_Pi (a12, 3);
Rho_Pi (a21, 6);
Rho_Pi (a32, 10);
Rho_Pi (a33, 15);
Rho_Pi (a03, 21);
Rho_Pi (a10, 28);
Rho_Pi (a31, 36);
Rho_Pi (a13, 45);
Rho_Pi (a41, 55);
Rho_Pi (a44, 2);
Rho_Pi (a04, 14);
Rho_Pi (a30, 27);
Rho_Pi (a43, 41);
Rho_Pi (a34, 56);
Rho_Pi (a23, 8);
Rho_Pi (a22, 25);
Rho_Pi (a02, 43);
Rho_Pi (a40, 62);
Rho_Pi (a24, 18);
Rho_Pi (a42, 39);
Rho_Pi (a14, 61);
Rho_Pi (a11, 20);
Rho_Pi (a01, 44);
// Chi
bc0 = a00; bc1 = a01; bc2 = a02; bc3 = a03; bc4 = a04;
a00 ^= ~bc1 & bc2; a01 ^= ~bc2 & bc3; a02 ^= ~bc3 & bc4; a03 ^= ~bc4 & bc0; a04 ^= ~bc0 & bc1;
bc0 = a10; bc1 = a11; bc2 = a12; bc3 = a13; bc4 = a14;
a10 ^= ~bc1 & bc2; a11 ^= ~bc2 & bc3; a12 ^= ~bc3 & bc4; a13 ^= ~bc4 & bc0; a14 ^= ~bc0 & bc1;
bc0 = a20; bc1 = a21; bc2 = a22; bc3 = a23; bc4 = a24;
a20 ^= ~bc1 & bc2; a21 ^= ~bc2 & bc3; a22 ^= ~bc3 & bc4; a23 ^= ~bc4 & bc0; a24 ^= ~bc0 & bc1;
bc0 = a30; bc1 = a31; bc2 = a32; bc3 = a33; bc4 = a34;
a30 ^= ~bc1 & bc2; a31 ^= ~bc2 & bc3; a32 ^= ~bc3 & bc4; a33 ^= ~bc4 & bc0; a34 ^= ~bc0 & bc1;
bc0 = a40; bc1 = a41; bc2 = a42; bc3 = a43; bc4 = a44;
a40 ^= ~bc1 & bc2; a41 ^= ~bc2 & bc3; a42 ^= ~bc3 & bc4; a43 ^= ~bc4 & bc0; a44 ^= ~bc0 & bc1;
// Iota
a00 ^= keccakf_rndc[round];
}
// Theta
u64x bc0 = a00 ^ a10 ^ a20 ^ a30 ^ a40;
u64x bc1 = a01 ^ a11 ^ a21 ^ a31 ^ a41;
u64x bc2 = a02 ^ a12 ^ a22 ^ a32 ^ a42;
u64x bc3 = a03 ^ a13 ^ a23 ^ a33 ^ a43;
u64x bc4 = a04 ^ a14 ^ a24 ^ a34 ^ a44;
u64x t;
t = bc4 ^ rotl64 (bc1, 1); a00 ^= t; a10 ^= t; a20 ^= t; a30 ^= t;
t = bc0 ^ rotl64 (bc2, 1); a21 ^= t; a31 ^= t; a41 ^= t;
t = bc1 ^ rotl64 (bc3, 1); a02 ^= t; a12 ^= t; a22 ^= t; a32 ^= t;
t = bc2 ^ rotl64 (bc4, 1); a03 ^= t; a13 ^= t; a23 ^= t; a33 ^= t; a43 ^= t;
t = bc3 ^ rotl64 (bc0, 1); a04 ^= t; a34 ^= t; a44 ^= t;
// Rho Pi
t = a01;
Rho_Pi (a20, 1);
Rho_Pi (a12, 3);
Rho_Pi (a21, 6);
Rho_Pi (a32, 10);
Rho_Pi (a33, 15);
Rho_Pi (a03, 21);
Rho_Pi (a10, 28);
Rho_Pi (a31, 36);
Rho_Pi (a13, 45);
Rho_Pi (a41, 55);
Rho_Pi (a44, 2);
Rho_Pi (a04, 14);
Rho_Pi (a30, 27);
Rho_Pi (a43, 41);
Rho_Pi (a34, 56);
Rho_Pi (a23, 8);
Rho_Pi (a22, 25);
Rho_Pi (a02, 43);
#undef Rho_Pi
bc0 = a00;
bc2 = a02;
bc3 = a03;
bc4 = a04;
a02 ^= ~bc3 & bc4;
a03 ^= ~bc4 & bc0;
const u32x r0 = l32_from_64 (a03);
const u32x r1 = h32_from_64 (a03);
const u32x r2 = l32_from_64 (a02);
const u32x r3 = h32_from_64 (a02);
COMPARE_M_SIMD (r0, r1, r2, r3);
}
}
__kernel void m17500_m08 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
{
}
__kernel void m17500_m16 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
{
}
__kernel void m17500_s04 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
{
/**
* modifier
*/
const u64 lid = get_local_id (0);
/**
* base
*/
const u64 gid = get_global_id (0);
if (gid >= gid_max) return;
u32 pw_buf0[4];
u32 pw_buf1[4];
pw_buf0[0] = pws[gid].i[0];
pw_buf0[1] = pws[gid].i[1];
pw_buf0[2] = pws[gid].i[2];
pw_buf0[3] = pws[gid].i[3];
pw_buf1[0] = pws[gid].i[4];
pw_buf1[1] = pws[gid].i[5];
pw_buf1[2] = pws[gid].i[6];
pw_buf1[3] = pws[gid].i[7];
const u32 pw_l_len = pws[gid].pw_len;
/**
* digest
*/
const u32 search[4] =
{
digests_buf[digests_offset].digest_buf[DGST_R0],
digests_buf[digests_offset].digest_buf[DGST_R1],
digests_buf[digests_offset].digest_buf[DGST_R2],
digests_buf[digests_offset].digest_buf[DGST_R3]
};
/**
* loop
*/
for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE)
{
const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos);
const u32x pw_len = pw_l_len + pw_r_len;
/**
* concat password candidate
*/
u32x wordl0[4] = { 0 };
u32x wordl1[4] = { 0 };
u32x wordl2[4] = { 0 };
u32x wordl3[4] = { 0 };
wordl0[0] = pw_buf0[0];
wordl0[1] = pw_buf0[1];
wordl0[2] = pw_buf0[2];
wordl0[3] = pw_buf0[3];
wordl1[0] = pw_buf1[0];
wordl1[1] = pw_buf1[1];
wordl1[2] = pw_buf1[2];
wordl1[3] = pw_buf1[3];
u32x wordr0[4] = { 0 };
u32x wordr1[4] = { 0 };
u32x wordr2[4] = { 0 };
u32x wordr3[4] = { 0 };
wordr0[0] = ix_create_combt (combs_buf, il_pos, 0);
wordr0[1] = ix_create_combt (combs_buf, il_pos, 1);
wordr0[2] = ix_create_combt (combs_buf, il_pos, 2);
wordr0[3] = ix_create_combt (combs_buf, il_pos, 3);
wordr1[0] = ix_create_combt (combs_buf, il_pos, 4);
wordr1[1] = ix_create_combt (combs_buf, il_pos, 5);
wordr1[2] = ix_create_combt (combs_buf, il_pos, 6);
wordr1[3] = ix_create_combt (combs_buf, il_pos, 7);
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
{
switch_buffer_by_offset_le_VV (wordr0, wordr1, wordr2, wordr3, pw_l_len);
}
else
{
switch_buffer_by_offset_le_VV (wordl0, wordl1, wordl2, wordl3, pw_r_len);
}
u32x w0[4];
u32x w1[4];
u32x w2[4];
u32x w3[4];
w0[0] = wordl0[0] | wordr0[0];
w0[1] = wordl0[1] | wordr0[1];
w0[2] = wordl0[2] | wordr0[2];
w0[3] = wordl0[3] | wordr0[3];
w1[0] = wordl1[0] | wordr1[0];
w1[1] = wordl1[1] | wordr1[1];
w1[2] = wordl1[2] | wordr1[2];
w1[3] = wordl1[3] | wordr1[3];
w2[0] = wordl2[0] | wordr2[0];
w2[1] = wordl2[1] | wordr2[1];
w2[2] = wordl2[2] | wordr2[2];
w2[3] = wordl2[3] | wordr2[3];
w3[0] = wordl3[0] | wordr3[0];
w3[1] = wordl3[1] | wordr3[1];
w3[2] = wordl3[2] | wordr3[2];
w3[3] = wordl3[3] | wordr3[3];
/**
* Keccak
*/
u64x a00 = hl32_to_64 (w0[1], w0[0]);
u64x a01 = hl32_to_64 (w0[3], w0[2]);
u64x a02 = hl32_to_64 (w1[1], w1[0]);
u64x a03 = hl32_to_64 (w1[3], w1[2]);
u64x a04 = hl32_to_64 (w2[1], w2[0]);
u64x a10 = hl32_to_64 (w2[3], w2[2]);
u64x a11 = hl32_to_64 (w3[1], w3[0]);
u64x a12 = hl32_to_64 (w3[3], w3[2]);
u64x a13 = 0;
u64x a14 = 0;
u64x a20 = 0;
u64x a21 = 0;
u64x a22 = 0x8000000000000000;
u64x a23 = 0;
u64x a24 = 0;
u64x a30 = 0;
u64x a31 = 0;
u64x a32 = 0;
u64x a33 = 0;
u64x a34 = 0;
u64x a40 = 0;
u64x a41 = 0;
u64x a42 = 0;
u64x a43 = 0;
u64x a44 = 0;
#define Rho_Pi(ad,r) \
bc0 = ad; \
ad = rotl64 (t, r); \
t = bc0; \
#ifdef _unroll
#pragma unroll
#endif
for (int round = 0; round < KECCAK_ROUNDS - 1; round++)
{
// Theta
u64x bc0 = a00 ^ a10 ^ a20 ^ a30 ^ a40;
u64x bc1 = a01 ^ a11 ^ a21 ^ a31 ^ a41;
u64x bc2 = a02 ^ a12 ^ a22 ^ a32 ^ a42;
u64x bc3 = a03 ^ a13 ^ a23 ^ a33 ^ a43;
u64x bc4 = a04 ^ a14 ^ a24 ^ a34 ^ a44;
u64x t;
t = bc4 ^ rotl64 (bc1, 1); a00 ^= t; a10 ^= t; a20 ^= t; a30 ^= t; a40 ^= t;
t = bc0 ^ rotl64 (bc2, 1); a01 ^= t; a11 ^= t; a21 ^= t; a31 ^= t; a41 ^= t;
t = bc1 ^ rotl64 (bc3, 1); a02 ^= t; a12 ^= t; a22 ^= t; a32 ^= t; a42 ^= t;
t = bc2 ^ rotl64 (bc4, 1); a03 ^= t; a13 ^= t; a23 ^= t; a33 ^= t; a43 ^= t;
t = bc3 ^ rotl64 (bc0, 1); a04 ^= t; a14 ^= t; a24 ^= t; a34 ^= t; a44 ^= t;
// Rho Pi
t = a01;
Rho_Pi (a20, 1);
Rho_Pi (a12, 3);
Rho_Pi (a21, 6);
Rho_Pi (a32, 10);
Rho_Pi (a33, 15);
Rho_Pi (a03, 21);
Rho_Pi (a10, 28);
Rho_Pi (a31, 36);
Rho_Pi (a13, 45);
Rho_Pi (a41, 55);
Rho_Pi (a44, 2);
Rho_Pi (a04, 14);
Rho_Pi (a30, 27);
Rho_Pi (a43, 41);
Rho_Pi (a34, 56);
Rho_Pi (a23, 8);
Rho_Pi (a22, 25);
Rho_Pi (a02, 43);
Rho_Pi (a40, 62);
Rho_Pi (a24, 18);
Rho_Pi (a42, 39);
Rho_Pi (a14, 61);
Rho_Pi (a11, 20);
Rho_Pi (a01, 44);
// Chi
bc0 = a00; bc1 = a01; bc2 = a02; bc3 = a03; bc4 = a04;
a00 ^= ~bc1 & bc2; a01 ^= ~bc2 & bc3; a02 ^= ~bc3 & bc4; a03 ^= ~bc4 & bc0; a04 ^= ~bc0 & bc1;
bc0 = a10; bc1 = a11; bc2 = a12; bc3 = a13; bc4 = a14;
a10 ^= ~bc1 & bc2; a11 ^= ~bc2 & bc3; a12 ^= ~bc3 & bc4; a13 ^= ~bc4 & bc0; a14 ^= ~bc0 & bc1;
bc0 = a20; bc1 = a21; bc2 = a22; bc3 = a23; bc4 = a24;
a20 ^= ~bc1 & bc2; a21 ^= ~bc2 & bc3; a22 ^= ~bc3 & bc4; a23 ^= ~bc4 & bc0; a24 ^= ~bc0 & bc1;
bc0 = a30; bc1 = a31; bc2 = a32; bc3 = a33; bc4 = a34;
a30 ^= ~bc1 & bc2; a31 ^= ~bc2 & bc3; a32 ^= ~bc3 & bc4; a33 ^= ~bc4 & bc0; a34 ^= ~bc0 & bc1;
bc0 = a40; bc1 = a41; bc2 = a42; bc3 = a43; bc4 = a44;
a40 ^= ~bc1 & bc2; a41 ^= ~bc2 & bc3; a42 ^= ~bc3 & bc4; a43 ^= ~bc4 & bc0; a44 ^= ~bc0 & bc1;
// Iota
a00 ^= keccakf_rndc[round];
}
// Theta
u64x bc0 = a00 ^ a10 ^ a20 ^ a30 ^ a40;
u64x bc1 = a01 ^ a11 ^ a21 ^ a31 ^ a41;
u64x bc2 = a02 ^ a12 ^ a22 ^ a32 ^ a42;
u64x bc3 = a03 ^ a13 ^ a23 ^ a33 ^ a43;
u64x bc4 = a04 ^ a14 ^ a24 ^ a34 ^ a44;
u64x t;
t = bc4 ^ rotl64 (bc1, 1); a00 ^= t; a10 ^= t; a20 ^= t; a30 ^= t;
t = bc0 ^ rotl64 (bc2, 1); a21 ^= t; a31 ^= t; a41 ^= t;
t = bc1 ^ rotl64 (bc3, 1); a02 ^= t; a12 ^= t; a22 ^= t; a32 ^= t;
t = bc2 ^ rotl64 (bc4, 1); a03 ^= t; a13 ^= t; a23 ^= t; a33 ^= t; a43 ^= t;
t = bc3 ^ rotl64 (bc0, 1); a04 ^= t; a34 ^= t; a44 ^= t;
// Rho Pi
t = a01;
Rho_Pi (a20, 1);
Rho_Pi (a12, 3);
Rho_Pi (a21, 6);
Rho_Pi (a32, 10);
Rho_Pi (a33, 15);
Rho_Pi (a03, 21);
Rho_Pi (a10, 28);
Rho_Pi (a31, 36);
Rho_Pi (a13, 45);
Rho_Pi (a41, 55);
Rho_Pi (a44, 2);
Rho_Pi (a04, 14);
Rho_Pi (a30, 27);
Rho_Pi (a43, 41);
Rho_Pi (a34, 56);
Rho_Pi (a23, 8);
Rho_Pi (a22, 25);
Rho_Pi (a02, 43);
#undef Rho_Pi
bc0 = a00;
bc2 = a02;
bc3 = a03;
bc4 = a04;
a02 ^= ~bc3 & bc4;
a03 ^= ~bc4 & bc0;
const u32x r0 = l32_from_64 (a03);
const u32x r1 = h32_from_64 (a03);
const u32x r2 = l32_from_64 (a02);
const u32x r3 = h32_from_64 (a02);
COMPARE_S_SIMD (r0, r1, r2, r3);
}
}
__kernel void m17500_s08 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
{
}
__kernel void m17500_s16 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
{
}

@ -0,0 +1,695 @@
/**
* Author......: See docs/credits.txt
* License.....: MIT
*/
#define NEW_SIMD_CODE
#include "inc_vendor.cl"
#include "inc_hash_constants.h"
#include "inc_hash_functions.cl"
#include "inc_types.cl"
#include "inc_common.cl"
#include "inc_simd.cl"
__constant u64a keccakf_rndc[24] =
{
0x0000000000000001, 0x0000000000008082, 0x800000000000808a,
0x8000000080008000, 0x000000000000808b, 0x0000000080000001,
0x8000000080008081, 0x8000000000008009, 0x000000000000008a,
0x0000000000000088, 0x0000000080008009, 0x000000008000000a,
0x000000008000808b, 0x800000000000008b, 0x8000000000008089,
0x8000000000008003, 0x8000000000008002, 0x8000000000000080,
0x000000000000800a, 0x800000008000000a, 0x8000000080008081,
0x8000000000008080, 0x0000000080000001, 0x8000000080008008
};
#ifndef KECCAK_ROUNDS
#define KECCAK_ROUNDS 24
#endif
DECLSPEC void m17500m (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, __global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset)
{
/**
* modifier
*/
const u64 gid = get_global_id (0);
const u64 lid = get_local_id (0);
/**
* loop
*/
u32 w0l = w0[0];
for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE)
{
const u32x w0r = ix_create_bft (bfs_buf, il_pos);
const u32x w0lr = w0l | w0r;
/**
* Keccak
*/
u64x a00 = hl32_to_64 (w0[1], w0lr);
u64x a01 = hl32_to_64 (w0[3], w0[2]);
u64x a02 = hl32_to_64 (w1[1], w1[0]);
u64x a03 = hl32_to_64 (w1[3], w1[2]);
u64x a04 = hl32_to_64 (w2[1], w2[0]);
u64x a10 = hl32_to_64 (w2[3], w2[2]);
u64x a11 = hl32_to_64 (w3[1], w3[0]);
u64x a12 = hl32_to_64 (w3[3], w3[2]);
u64x a13 = 0;
u64x a14 = 0;
u64x a20 = 0;
u64x a21 = 0;
u64x a22 = 0x8000000000000000;
u64x a23 = 0;
u64x a24 = 0;
u64x a30 = 0;
u64x a31 = 0;
u64x a32 = 0;
u64x a33 = 0;
u64x a34 = 0;
u64x a40 = 0;
u64x a41 = 0;
u64x a42 = 0;
u64x a43 = 0;
u64x a44 = 0;
#define Rho_Pi(ad,r) \
bc0 = ad; \
ad = rotl64 (t, r); \
t = bc0; \
#ifdef _unroll
#pragma unroll
#endif
for (int round = 0; round < KECCAK_ROUNDS - 1; round++)
{
// Theta
u64x bc0 = a00 ^ a10 ^ a20 ^ a30 ^ a40;
u64x bc1 = a01 ^ a11 ^ a21 ^ a31 ^ a41;
u64x bc2 = a02 ^ a12 ^ a22 ^ a32 ^ a42;
u64x bc3 = a03 ^ a13 ^ a23 ^ a33 ^ a43;
u64x bc4 = a04 ^ a14 ^ a24 ^ a34 ^ a44;
u64x t;
t = bc4 ^ rotl64 (bc1, 1); a00 ^= t; a10 ^= t; a20 ^= t; a30 ^= t; a40 ^= t;
t = bc0 ^ rotl64 (bc2, 1); a01 ^= t; a11 ^= t; a21 ^= t; a31 ^= t; a41 ^= t;
t = bc1 ^ rotl64 (bc3, 1); a02 ^= t; a12 ^= t; a22 ^= t; a32 ^= t; a42 ^= t;
t = bc2 ^ rotl64 (bc4, 1); a03 ^= t; a13 ^= t; a23 ^= t; a33 ^= t; a43 ^= t;
t = bc3 ^ rotl64 (bc0, 1); a04 ^= t; a14 ^= t; a24 ^= t; a34 ^= t; a44 ^= t;
// Rho Pi
t = a01;
Rho_Pi (a20, 1);
Rho_Pi (a12, 3);
Rho_Pi (a21, 6);
Rho_Pi (a32, 10);
Rho_Pi (a33, 15);
Rho_Pi (a03, 21);
Rho_Pi (a10, 28);
Rho_Pi (a31, 36);
Rho_Pi (a13, 45);
Rho_Pi (a41, 55);
Rho_Pi (a44, 2);
Rho_Pi (a04, 14);
Rho_Pi (a30, 27);
Rho_Pi (a43, 41);
Rho_Pi (a34, 56);
Rho_Pi (a23, 8);
Rho_Pi (a22, 25);
Rho_Pi (a02, 43);
Rho_Pi (a40, 62);
Rho_Pi (a24, 18);
Rho_Pi (a42, 39);
Rho_Pi (a14, 61);
Rho_Pi (a11, 20);
Rho_Pi (a01, 44);
// Chi
bc0 = a00; bc1 = a01; bc2 = a02; bc3 = a03; bc4 = a04;
a00 ^= ~bc1 & bc2; a01 ^= ~bc2 & bc3; a02 ^= ~bc3 & bc4; a03 ^= ~bc4 & bc0; a04 ^= ~bc0 & bc1;
bc0 = a10; bc1 = a11; bc2 = a12; bc3 = a13; bc4 = a14;
a10 ^= ~bc1 & bc2; a11 ^= ~bc2 & bc3; a12 ^= ~bc3 & bc4; a13 ^= ~bc4 & bc0; a14 ^= ~bc0 & bc1;
bc0 = a20; bc1 = a21; bc2 = a22; bc3 = a23; bc4 = a24;
a20 ^= ~bc1 & bc2; a21 ^= ~bc2 & bc3; a22 ^= ~bc3 & bc4; a23 ^= ~bc4 & bc0; a24 ^= ~bc0 & bc1;
bc0 = a30; bc1 = a31; bc2 = a32; bc3 = a33; bc4 = a34;
a30 ^= ~bc1 & bc2; a31 ^= ~bc2 & bc3; a32 ^= ~bc3 & bc4; a33 ^= ~bc4 & bc0; a34 ^= ~bc0 & bc1;
bc0 = a40; bc1 = a41; bc2 = a42; bc3 = a43; bc4 = a44;
a40 ^= ~bc1 & bc2; a41 ^= ~bc2 & bc3; a42 ^= ~bc3 & bc4; a43 ^= ~bc4 & bc0; a44 ^= ~bc0 & bc1;
// Iota
a00 ^= keccakf_rndc[round];
}
// Theta
u64x bc0 = a00 ^ a10 ^ a20 ^ a30 ^ a40;
u64x bc1 = a01 ^ a11 ^ a21 ^ a31 ^ a41;
u64x bc2 = a02 ^ a12 ^ a22 ^ a32 ^ a42;
u64x bc3 = a03 ^ a13 ^ a23 ^ a33 ^ a43;
u64x bc4 = a04 ^ a14 ^ a24 ^ a34 ^ a44;
u64x t;
t = bc4 ^ rotl64 (bc1, 1); a00 ^= t; a10 ^= t; a20 ^= t; a30 ^= t;
t = bc0 ^ rotl64 (bc2, 1); a21 ^= t; a31 ^= t; a41 ^= t;
t = bc1 ^ rotl64 (bc3, 1); a02 ^= t; a12 ^= t; a22 ^= t; a32 ^= t;
t = bc2 ^ rotl64 (bc4, 1); a03 ^= t; a13 ^= t; a23 ^= t; a33 ^= t; a43 ^= t;
t = bc3 ^ rotl64 (bc0, 1); a04 ^= t; a34 ^= t; a44 ^= t;
// Rho Pi
t = a01;
Rho_Pi (a20, 1);
Rho_Pi (a12, 3);
Rho_Pi (a21, 6);
Rho_Pi (a32, 10);
Rho_Pi (a33, 15);
Rho_Pi (a03, 21);
Rho_Pi (a10, 28);
Rho_Pi (a31, 36);
Rho_Pi (a13, 45);
Rho_Pi (a41, 55);
Rho_Pi (a44, 2);
Rho_Pi (a04, 14);
Rho_Pi (a30, 27);
Rho_Pi (a43, 41);
Rho_Pi (a34, 56);
Rho_Pi (a23, 8);
Rho_Pi (a22, 25);
Rho_Pi (a02, 43);
#undef Rho_Pi
bc0 = a00;
bc2 = a02;
bc3 = a03;
bc4 = a04;
a02 ^= ~bc3 & bc4;
a03 ^= ~bc4 & bc0;
const u32x r0 = l32_from_64 (a03);
const u32x r1 = h32_from_64 (a03);
const u32x r2 = l32_from_64 (a02);
const u32x r3 = h32_from_64 (a02);
COMPARE_M_SIMD (r0, r1, r2, r3);
}
}
DECLSPEC void m17500s (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, __global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset)
{
/**
* modifier
*/
const u64 gid = get_global_id (0);
const u64 lid = get_local_id (0);
/**
* digest
*/
const u32 search[4] =
{
digests_buf[digests_offset].digest_buf[DGST_R0],
digests_buf[digests_offset].digest_buf[DGST_R1],
digests_buf[digests_offset].digest_buf[DGST_R2],
digests_buf[digests_offset].digest_buf[DGST_R3]
};
/**
* loop
*/
u32 w0l = w0[0];
for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE)
{
const u32x w0r = ix_create_bft (bfs_buf, il_pos);
const u32x w0lr = w0l | w0r;
/**
* Keccak
*/
u64x a00 = hl32_to_64 (w0[1], w0lr);
u64x a01 = hl32_to_64 (w0[3], w0[2]);
u64x a02 = hl32_to_64 (w1[1], w1[0]);
u64x a03 = hl32_to_64 (w1[3], w1[2]);
u64x a04 = hl32_to_64 (w2[1], w2[0]);
u64x a10 = hl32_to_64 (w2[3], w2[2]);
u64x a11 = hl32_to_64 (w3[1], w3[0]);
u64x a12 = hl32_to_64 (w3[3], w3[2]);
u64x a13 = 0;
u64x a14 = 0;
u64x a20 = 0;
u64x a21 = 0;
u64x a22 = 0x8000000000000000;
u64x a23 = 0;
u64x a24 = 0;
u64x a30 = 0;
u64x a31 = 0;
u64x a32 = 0;
u64x a33 = 0;
u64x a34 = 0;
u64x a40 = 0;
u64x a41 = 0;
u64x a42 = 0;
u64x a43 = 0;
u64x a44 = 0;
#define Rho_Pi(ad,r) \
bc0 = ad; \
ad = rotl64 (t, r); \
t = bc0; \
#ifdef _unroll
#pragma unroll
#endif
for (int round = 0; round < KECCAK_ROUNDS - 1; round++)
{
// Theta
u64x bc0 = a00 ^ a10 ^ a20 ^ a30 ^ a40;
u64x bc1 = a01 ^ a11 ^ a21 ^ a31 ^ a41;
u64x bc2 = a02 ^ a12 ^ a22 ^ a32 ^ a42;
u64x bc3 = a03 ^ a13 ^ a23 ^ a33 ^ a43;
u64x bc4 = a04 ^ a14 ^ a24 ^ a34 ^ a44;
u64x t;
t = bc4 ^ rotl64 (bc1, 1); a00 ^= t; a10 ^= t; a20 ^= t; a30 ^= t; a40 ^= t;
t = bc0 ^ rotl64 (bc2, 1); a01 ^= t; a11 ^= t; a21 ^= t; a31 ^= t; a41 ^= t;
t = bc1 ^ rotl64 (bc3, 1); a02 ^= t; a12 ^= t; a22 ^= t; a32 ^= t; a42 ^= t;
t = bc2 ^ rotl64 (bc4, 1); a03 ^= t; a13 ^= t; a23 ^= t; a33 ^= t; a43 ^= t;
t = bc3 ^ rotl64 (bc0, 1); a04 ^= t; a14 ^= t; a24 ^= t; a34 ^= t; a44 ^= t;
// Rho Pi
t = a01;
Rho_Pi (a20, 1);
Rho_Pi (a12, 3);
Rho_Pi (a21, 6);
Rho_Pi (a32, 10);
Rho_Pi (a33, 15);
Rho_Pi (a03, 21);
Rho_Pi (a10, 28);
Rho_Pi (a31, 36);
Rho_Pi (a13, 45);
Rho_Pi (a41, 55);
Rho_Pi (a44, 2);
Rho_Pi (a04, 14);
Rho_Pi (a30, 27);
Rho_Pi (a43, 41);
Rho_Pi (a34, 56);
Rho_Pi (a23, 8);
Rho_Pi (a22, 25);
Rho_Pi (a02, 43);
Rho_Pi (a40, 62);
Rho_Pi (a24, 18);
Rho_Pi (a42, 39);
Rho_Pi (a14, 61);
Rho_Pi (a11, 20);
Rho_Pi (a01, 44);
// Chi
bc0 = a00; bc1 = a01; bc2 = a02; bc3 = a03; bc4 = a04;
a00 ^= ~bc1 & bc2; a01 ^= ~bc2 & bc3; a02 ^= ~bc3 & bc4; a03 ^= ~bc4 & bc0; a04 ^= ~bc0 & bc1;
bc0 = a10; bc1 = a11; bc2 = a12; bc3 = a13; bc4 = a14;
a10 ^= ~bc1 & bc2; a11 ^= ~bc2 & bc3; a12 ^= ~bc3 & bc4; a13 ^= ~bc4 & bc0; a14 ^= ~bc0 & bc1;
bc0 = a20; bc1 = a21; bc2 = a22; bc3 = a23; bc4 = a24;
a20 ^= ~bc1 & bc2; a21 ^= ~bc2 & bc3; a22 ^= ~bc3 & bc4; a23 ^= ~bc4 & bc0; a24 ^= ~bc0 & bc1;
bc0 = a30; bc1 = a31; bc2 = a32; bc3 = a33; bc4 = a34;
a30 ^= ~bc1 & bc2; a31 ^= ~bc2 & bc3; a32 ^= ~bc3 & bc4; a33 ^= ~bc4 & bc0; a34 ^= ~bc0 & bc1;
bc0 = a40; bc1 = a41; bc2 = a42; bc3 = a43; bc4 = a44;
a40 ^= ~bc1 & bc2; a41 ^= ~bc2 & bc3; a42 ^= ~bc3 & bc4; a43 ^= ~bc4 & bc0; a44 ^= ~bc0 & bc1;
// Iota
a00 ^= keccakf_rndc[round];
}
// Theta
u64x bc0 = a00 ^ a10 ^ a20 ^ a30 ^ a40;
u64x bc1 = a01 ^ a11 ^ a21 ^ a31 ^ a41;
u64x bc2 = a02 ^ a12 ^ a22 ^ a32 ^ a42;
u64x bc3 = a03 ^ a13 ^ a23 ^ a33 ^ a43;
u64x bc4 = a04 ^ a14 ^ a24 ^ a34 ^ a44;
u64x t;
t = bc4 ^ rotl64 (bc1, 1); a00 ^= t; a10 ^= t; a20 ^= t; a30 ^= t;
t = bc0 ^ rotl64 (bc2, 1); a21 ^= t; a31 ^= t; a41 ^= t;
t = bc1 ^ rotl64 (bc3, 1); a02 ^= t; a12 ^= t; a22 ^= t; a32 ^= t;
t = bc2 ^ rotl64 (bc4, 1); a03 ^= t; a13 ^= t; a23 ^= t; a33 ^= t; a43 ^= t;
t = bc3 ^ rotl64 (bc0, 1); a04 ^= t; a34 ^= t; a44 ^= t;
// Rho Pi
t = a01;
Rho_Pi (a20, 1);
Rho_Pi (a12, 3);
Rho_Pi (a21, 6);
Rho_Pi (a32, 10);
Rho_Pi (a33, 15);
Rho_Pi (a03, 21);
Rho_Pi (a10, 28);
Rho_Pi (a31, 36);
Rho_Pi (a13, 45);
Rho_Pi (a41, 55);
Rho_Pi (a44, 2);
Rho_Pi (a04, 14);
Rho_Pi (a30, 27);
Rho_Pi (a43, 41);
Rho_Pi (a34, 56);
Rho_Pi (a23, 8);
Rho_Pi (a22, 25);
Rho_Pi (a02, 43);
#undef Rho_Pi
bc0 = a00;
bc2 = a02;
bc3 = a03;
bc4 = a04;
a02 ^= ~bc3 & bc4;
a03 ^= ~bc4 & bc0;
const u32x r0 = l32_from_64 (a03);
const u32x r1 = h32_from_64 (a03);
const u32x r2 = l32_from_64 (a02);
const u32x r3 = h32_from_64 (a02);
COMPARE_S_SIMD (r0, r1, r2, r3);
}
}
__kernel void m17500_m04 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
{
/**
* base
*/
const u64 gid = get_global_id (0);
if (gid >= gid_max) return;
u32 w0[4];
w0[0] = pws[gid].i[ 0];
w0[1] = pws[gid].i[ 1];
w0[2] = pws[gid].i[ 2];
w0[3] = pws[gid].i[ 3];
u32 w1[4];
w1[0] = 0;
w1[1] = 0;
w1[2] = 0;
w1[3] = 0;
u32 w2[4];
w2[0] = 0;
w2[1] = 0;
w2[2] = 0;
w2[3] = 0;
u32 w3[4];
w3[0] = 0;
w3[1] = 0;
w3[2] = 0;
w3[3] = 0;
const u32 pw_len = pws[gid].pw_len;
/**
* main
*/
m17500m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV0_buf, d_scryptV1_buf, d_scryptV2_buf, d_scryptV3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset);
}
__kernel void m17500_m08 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
{
/**
* base
*/
const u64 gid = get_global_id (0);
if (gid >= gid_max) return;
u32 w0[4];
w0[0] = pws[gid].i[ 0];
w0[1] = pws[gid].i[ 1];
w0[2] = pws[gid].i[ 2];
w0[3] = pws[gid].i[ 3];
u32 w1[4];
w1[0] = pws[gid].i[ 4];
w1[1] = pws[gid].i[ 5];
w1[2] = pws[gid].i[ 6];
w1[3] = pws[gid].i[ 7];
u32 w2[4];
w2[0] = 0;
w2[1] = 0;
w2[2] = 0;
w2[3] = 0;
u32 w3[4];
w3[0] = 0;
w3[1] = 0;
w3[2] = 0;
w3[3] = 0;
const u32 pw_len = pws[gid].pw_len;
/**
* main
*/
m17500m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV0_buf, d_scryptV1_buf, d_scryptV2_buf, d_scryptV3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset);
}
__kernel void m17500_m16 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
{
/**
* base
*/
const u64 gid = get_global_id (0);
if (gid >= gid_max) return;
u32 w0[4];
w0[0] = pws[gid].i[ 0];
w0[1] = pws[gid].i[ 1];
w0[2] = pws[gid].i[ 2];
w0[3] = pws[gid].i[ 3];
u32 w1[4];
w1[0] = pws[gid].i[ 4];
w1[1] = pws[gid].i[ 5];
w1[2] = pws[gid].i[ 6];
w1[3] = pws[gid].i[ 7];
u32 w2[4];
w2[0] = pws[gid].i[ 8];
w2[1] = pws[gid].i[ 9];
w2[2] = pws[gid].i[10];
w2[3] = pws[gid].i[11];
u32 w3[4];
w3[0] = pws[gid].i[12];
w3[1] = pws[gid].i[13];
w3[2] = 0;
w3[3] = 0;
const u32 pw_len = pws[gid].pw_len;
/**
* main
*/
m17500m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV0_buf, d_scryptV1_buf, d_scryptV2_buf, d_scryptV3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset);
}
__kernel void m17500_s04 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
{
/**
* base
*/
const u64 gid = get_global_id (0);
if (gid >= gid_max) return;
u32 w0[4];
w0[0] = pws[gid].i[ 0];
w0[1] = pws[gid].i[ 1];
w0[2] = pws[gid].i[ 2];
w0[3] = pws[gid].i[ 3];
u32 w1[4];
w1[0] = 0;
w1[1] = 0;
w1[2] = 0;
w1[3] = 0;
u32 w2[4];
w2[0] = 0;
w2[1] = 0;
w2[2] = 0;
w2[3] = 0;
u32 w3[4];
w3[0] = 0;
w3[1] = 0;
w3[2] = 0;
w3[3] = 0;
const u32 pw_len = pws[gid].pw_len;
/**
* main
*/
m17500s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV0_buf, d_scryptV1_buf, d_scryptV2_buf, d_scryptV3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset);
}
__kernel void m17500_s08 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
{
/**
* base
*/
const u64 gid = get_global_id (0);
if (gid >= gid_max) return;
u32 w0[4];
w0[0] = pws[gid].i[ 0];
w0[1] = pws[gid].i[ 1];
w0[2] = pws[gid].i[ 2];
w0[3] = pws[gid].i[ 3];
u32 w1[4];
w1[0] = pws[gid].i[ 4];
w1[1] = pws[gid].i[ 5];
w1[2] = pws[gid].i[ 6];
w1[3] = pws[gid].i[ 7];
u32 w2[4];
w2[0] = 0;
w2[1] = 0;
w2[2] = 0;
w2[3] = 0;
u32 w3[4];
w3[0] = 0;
w3[1] = 0;
w3[2] = 0;
w3[3] = 0;
const u32 pw_len = pws[gid].pw_len;
/**
* main
*/
m17500s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV0_buf, d_scryptV1_buf, d_scryptV2_buf, d_scryptV3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset);
}
__kernel void m17500_s16 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
{
/**
* base
*/
const u64 gid = get_global_id (0);
if (gid >= gid_max) return;
u32 w0[4];
w0[0] = pws[gid].i[ 0];
w0[1] = pws[gid].i[ 1];
w0[2] = pws[gid].i[ 2];
w0[3] = pws[gid].i[ 3];
u32 w1[4];
w1[0] = pws[gid].i[ 4];
w1[1] = pws[gid].i[ 5];
w1[2] = pws[gid].i[ 6];
w1[3] = pws[gid].i[ 7];
u32 w2[4];
w2[0] = pws[gid].i[ 8];
w2[1] = pws[gid].i[ 9];
w2[2] = pws[gid].i[10];
w2[3] = pws[gid].i[11];
u32 w3[4];
w3[0] = pws[gid].i[12];
w3[1] = pws[gid].i[13];
w3[2] = 0;
w3[3] = 0;
const u32 pw_len = pws[gid].pw_len;
/**
* main
*/
m17500s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV0_buf, d_scryptV1_buf, d_scryptV2_buf, d_scryptV3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset);
}

@ -0,0 +1,479 @@
/**
* Author......: See docs/credits.txt
* License.....: MIT
*/
#define NEW_SIMD_CODE
#include "inc_vendor.cl"
#include "inc_hash_constants.h"
#include "inc_hash_functions.cl"
#include "inc_types.cl"
#include "inc_common.cl"
#include "inc_rp_optimized.h"
#include "inc_rp_optimized.cl"
#include "inc_simd.cl"
__constant u64a keccakf_rndc[24] =
{
0x0000000000000001, 0x0000000000008082, 0x800000000000808a,
0x8000000080008000, 0x000000000000808b, 0x0000000080000001,
0x8000000080008081, 0x8000000000008009, 0x000000000000008a,
0x0000000000000088, 0x0000000080008009, 0x000000008000000a,
0x000000008000808b, 0x800000000000008b, 0x8000000000008089,
0x8000000000008003, 0x8000000000008002, 0x8000000000000080,
0x000000000000800a, 0x800000008000000a, 0x8000000080008081,
0x8000000000008080, 0x0000000080000001, 0x8000000080008008
};
#ifndef KECCAK_ROUNDS
#define KECCAK_ROUNDS 24
#endif
__kernel void m17600_m04 (__global pw_t *pws, __constant const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
{
/**
* modifier
*/
const u64 lid = get_local_id (0);
/**
* base
*/
const u64 gid = get_global_id (0);
if (gid >= gid_max) return;
u32 pw_buf0[4];
u32 pw_buf1[4];
pw_buf0[0] = pws[gid].i[0];
pw_buf0[1] = pws[gid].i[1];
pw_buf0[2] = pws[gid].i[2];
pw_buf0[3] = pws[gid].i[3];
pw_buf1[0] = pws[gid].i[4];
pw_buf1[1] = pws[gid].i[5];
pw_buf1[2] = pws[gid].i[6];
pw_buf1[3] = pws[gid].i[7];
const u32 pw_len = pws[gid].pw_len;
/**
* loop
*/
for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE)
{
u32x w0[4] = { 0 };
u32x w1[4] = { 0 };
u32x w2[4] = { 0 };
u32x w3[4] = { 0 };
const u32x out_len = apply_rules_vect (pw_buf0, pw_buf1, pw_len, rules_buf, il_pos, w0, w1);
append_0x06_2x4_VV (w0, w1, out_len);
/**
* Keccak
*/
u64x a00 = hl32_to_64 (w0[1], w0[0]);
u64x a01 = hl32_to_64 (w0[3], w0[2]);
u64x a02 = hl32_to_64 (w1[1], w1[0]);
u64x a03 = hl32_to_64 (w1[3], w1[2]);
u64x a04 = 0;
u64x a10 = 0;
u64x a11 = 0;
u64x a12 = 0;
u64x a13 = 0x8000000000000000;
u64x a14 = 0;
u64x a20 = 0;
u64x a21 = 0;
u64x a22 = 0;
u64x a23 = 0;
u64x a24 = 0;
u64x a30 = 0;
u64x a31 = 0;
u64x a32 = 0;
u64x a33 = 0;
u64x a34 = 0;
u64x a40 = 0;
u64x a41 = 0;
u64x a42 = 0;
u64x a43 = 0;
u64x a44 = 0;
#define Rho_Pi(ad,r) \
bc0 = ad; \
ad = rotl64 (t, r); \
t = bc0; \
#ifdef _unroll
#pragma unroll
#endif
for (int round = 0; round < KECCAK_ROUNDS - 1; round++)
{
// Theta
u64x bc0 = a00 ^ a10 ^ a20 ^ a30 ^ a40;
u64x bc1 = a01 ^ a11 ^ a21 ^ a31 ^ a41;
u64x bc2 = a02 ^ a12 ^ a22 ^ a32 ^ a42;
u64x bc3 = a03 ^ a13 ^ a23 ^ a33 ^ a43;
u64x bc4 = a04 ^ a14 ^ a24 ^ a34 ^ a44;
u64x t;
t = bc4 ^ rotl64 (bc1, 1); a00 ^= t; a10 ^= t; a20 ^= t; a30 ^= t; a40 ^= t;
t = bc0 ^ rotl64 (bc2, 1); a01 ^= t; a11 ^= t; a21 ^= t; a31 ^= t; a41 ^= t;
t = bc1 ^ rotl64 (bc3, 1); a02 ^= t; a12 ^= t; a22 ^= t; a32 ^= t; a42 ^= t;
t = bc2 ^ rotl64 (bc4, 1); a03 ^= t; a13 ^= t; a23 ^= t; a33 ^= t; a43 ^= t;
t = bc3 ^ rotl64 (bc0, 1); a04 ^= t; a14 ^= t; a24 ^= t; a34 ^= t; a44 ^= t;
// Rho Pi
t = a01;
Rho_Pi (a20, 1);
Rho_Pi (a12, 3);
Rho_Pi (a21, 6);
Rho_Pi (a32, 10);
Rho_Pi (a33, 15);
Rho_Pi (a03, 21);
Rho_Pi (a10, 28);
Rho_Pi (a31, 36);
Rho_Pi (a13, 45);
Rho_Pi (a41, 55);
Rho_Pi (a44, 2);
Rho_Pi (a04, 14);
Rho_Pi (a30, 27);
Rho_Pi (a43, 41);
Rho_Pi (a34, 56);
Rho_Pi (a23, 8);
Rho_Pi (a22, 25);
Rho_Pi (a02, 43);
Rho_Pi (a40, 62);
Rho_Pi (a24, 18);
Rho_Pi (a42, 39);
Rho_Pi (a14, 61);
Rho_Pi (a11, 20);
Rho_Pi (a01, 44);
// Chi
bc0 = a00; bc1 = a01; bc2 = a02; bc3 = a03; bc4 = a04;
a00 ^= ~bc1 & bc2; a01 ^= ~bc2 & bc3; a02 ^= ~bc3 & bc4; a03 ^= ~bc4 & bc0; a04 ^= ~bc0 & bc1;
bc0 = a10; bc1 = a11; bc2 = a12; bc3 = a13; bc4 = a14;
a10 ^= ~bc1 & bc2; a11 ^= ~bc2 & bc3; a12 ^= ~bc3 & bc4; a13 ^= ~bc4 & bc0; a14 ^= ~bc0 & bc1;
bc0 = a20; bc1 = a21; bc2 = a22; bc3 = a23; bc4 = a24;
a20 ^= ~bc1 & bc2; a21 ^= ~bc2 & bc3; a22 ^= ~bc3 & bc4; a23 ^= ~bc4 & bc0; a24 ^= ~bc0 & bc1;
bc0 = a30; bc1 = a31; bc2 = a32; bc3 = a33; bc4 = a34;
a30 ^= ~bc1 & bc2; a31 ^= ~bc2 & bc3; a32 ^= ~bc3 & bc4; a33 ^= ~bc4 & bc0; a34 ^= ~bc0 & bc1;
bc0 = a40; bc1 = a41; bc2 = a42; bc3 = a43; bc4 = a44;
a40 ^= ~bc1 & bc2; a41 ^= ~bc2 & bc3; a42 ^= ~bc3 & bc4; a43 ^= ~bc4 & bc0; a44 ^= ~bc0 & bc1;
// Iota
a00 ^= keccakf_rndc[round];
}
// Theta
u64x bc0 = a00 ^ a10 ^ a20 ^ a30 ^ a40;
u64x bc1 = a01 ^ a11 ^ a21 ^ a31 ^ a41;
u64x bc2 = a02 ^ a12 ^ a22 ^ a32 ^ a42;
u64x bc3 = a03 ^ a13 ^ a23 ^ a33 ^ a43;
u64x bc4 = a04 ^ a14 ^ a24 ^ a34 ^ a44;
u64x t;
t = bc4 ^ rotl64 (bc1, 1); a00 ^= t; a10 ^= t; a20 ^= t; a30 ^= t;
t = bc0 ^ rotl64 (bc2, 1); a21 ^= t; a31 ^= t; a41 ^= t;
t = bc1 ^ rotl64 (bc3, 1); a02 ^= t; a12 ^= t; a22 ^= t; a32 ^= t;
t = bc2 ^ rotl64 (bc4, 1); a03 ^= t; a13 ^= t; a23 ^= t; a33 ^= t; a43 ^= t;
t = bc3 ^ rotl64 (bc0, 1); a04 ^= t; a34 ^= t; a44 ^= t;
// Rho Pi
t = a01;
Rho_Pi (a20, 1);
Rho_Pi (a12, 3);
Rho_Pi (a21, 6);
Rho_Pi (a32, 10);
Rho_Pi (a33, 15);
Rho_Pi (a03, 21);
Rho_Pi (a10, 28);
Rho_Pi (a31, 36);
Rho_Pi (a13, 45);
Rho_Pi (a41, 55);
Rho_Pi (a44, 2);
Rho_Pi (a04, 14);
Rho_Pi (a30, 27);
Rho_Pi (a43, 41);
Rho_Pi (a34, 56);
Rho_Pi (a23, 8);
Rho_Pi (a22, 25);
Rho_Pi (a02, 43);
#undef Rho_Pi
bc0 = a00;
bc2 = a02;
bc3 = a03;
bc4 = a04;
a02 ^= ~bc3 & bc4;
a03 ^= ~bc4 & bc0;
const u32x r0 = l32_from_64 (a03);
const u32x r1 = h32_from_64 (a03);
const u32x r2 = l32_from_64 (a02);
const u32x r3 = h32_from_64 (a02);
COMPARE_M_SIMD (r0, r1, r2, r3);
}
}
__kernel void m17600_m08 (__global pw_t *pws, __constant const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
{
}
__kernel void m17600_m16 (__global pw_t *pws, __constant const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
{
}
__kernel void m17600_s04 (__global pw_t *pws, __constant const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
{
/**
* modifier
*/
const u64 lid = get_local_id (0);
/**
* base
*/
const u64 gid = get_global_id (0);
if (gid >= gid_max) return;
u32 pw_buf0[4];
u32 pw_buf1[4];
pw_buf0[0] = pws[gid].i[0];
pw_buf0[1] = pws[gid].i[1];
pw_buf0[2] = pws[gid].i[2];
pw_buf0[3] = pws[gid].i[3];
pw_buf1[0] = pws[gid].i[4];
pw_buf1[1] = pws[gid].i[5];
pw_buf1[2] = pws[gid].i[6];
pw_buf1[3] = pws[gid].i[7];
const u32 pw_len = pws[gid].pw_len;
/**
* digest
*/
const u32 search[4] =
{
digests_buf[digests_offset].digest_buf[DGST_R0],
digests_buf[digests_offset].digest_buf[DGST_R1],
digests_buf[digests_offset].digest_buf[DGST_R2],
digests_buf[digests_offset].digest_buf[DGST_R3]
};
/**
* loop
*/
for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE)
{
u32x w0[4] = { 0 };
u32x w1[4] = { 0 };
u32x w2[4] = { 0 };
u32x w3[4] = { 0 };
const u32x out_len = apply_rules_vect (pw_buf0, pw_buf1, pw_len, rules_buf, il_pos, w0, w1);
append_0x06_2x4_VV (w0, w1, out_len);
/**
* Keccak
*/
u64x a00 = hl32_to_64 (w0[1], w0[0]);
u64x a01 = hl32_to_64 (w0[3], w0[2]);
u64x a02 = hl32_to_64 (w1[1], w1[0]);
u64x a03 = hl32_to_64 (w1[3], w1[2]);
u64x a04 = 0;
u64x a10 = 0;
u64x a11 = 0;
u64x a12 = 0;
u64x a13 = 0x8000000000000000;
u64x a14 = 0;
u64x a20 = 0;
u64x a21 = 0;
u64x a22 = 0;
u64x a23 = 0;
u64x a24 = 0;
u64x a30 = 0;
u64x a31 = 0;
u64x a32 = 0;
u64x a33 = 0;
u64x a34 = 0;
u64x a40 = 0;
u64x a41 = 0;
u64x a42 = 0;
u64x a43 = 0;
u64x a44 = 0;
#define Rho_Pi(ad,r) \
bc0 = ad; \
ad = rotl64 (t, r); \
t = bc0; \
#ifdef _unroll
#pragma unroll
#endif
for (int round = 0; round < KECCAK_ROUNDS - 1; round++)
{
// Theta
u64x bc0 = a00 ^ a10 ^ a20 ^ a30 ^ a40;
u64x bc1 = a01 ^ a11 ^ a21 ^ a31 ^ a41;
u64x bc2 = a02 ^ a12 ^ a22 ^ a32 ^ a42;
u64x bc3 = a03 ^ a13 ^ a23 ^ a33 ^ a43;
u64x bc4 = a04 ^ a14 ^ a24 ^ a34 ^ a44;
u64x t;
t = bc4 ^ rotl64 (bc1, 1); a00 ^= t; a10 ^= t; a20 ^= t; a30 ^= t; a40 ^= t;
t = bc0 ^ rotl64 (bc2, 1); a01 ^= t; a11 ^= t; a21 ^= t; a31 ^= t; a41 ^= t;
t = bc1 ^ rotl64 (bc3, 1); a02 ^= t; a12 ^= t; a22 ^= t; a32 ^= t; a42 ^= t;
t = bc2 ^ rotl64 (bc4, 1); a03 ^= t; a13 ^= t; a23 ^= t; a33 ^= t; a43 ^= t;
t = bc3 ^ rotl64 (bc0, 1); a04 ^= t; a14 ^= t; a24 ^= t; a34 ^= t; a44 ^= t;
// Rho Pi
t = a01;
Rho_Pi (a20, 1);
Rho_Pi (a12, 3);
Rho_Pi (a21, 6);
Rho_Pi (a32, 10);
Rho_Pi (a33, 15);
Rho_Pi (a03, 21);
Rho_Pi (a10, 28);
Rho_Pi (a31, 36);
Rho_Pi (a13, 45);
Rho_Pi (a41, 55);
Rho_Pi (a44, 2);
Rho_Pi (a04, 14);
Rho_Pi (a30, 27);
Rho_Pi (a43, 41);
Rho_Pi (a34, 56);
Rho_Pi (a23, 8);
Rho_Pi (a22, 25);
Rho_Pi (a02, 43);
Rho_Pi (a40, 62);
Rho_Pi (a24, 18);
Rho_Pi (a42, 39);
Rho_Pi (a14, 61);
Rho_Pi (a11, 20);
Rho_Pi (a01, 44);
// Chi
bc0 = a00; bc1 = a01; bc2 = a02; bc3 = a03; bc4 = a04;
a00 ^= ~bc1 & bc2; a01 ^= ~bc2 & bc3; a02 ^= ~bc3 & bc4; a03 ^= ~bc4 & bc0; a04 ^= ~bc0 & bc1;
bc0 = a10; bc1 = a11; bc2 = a12; bc3 = a13; bc4 = a14;
a10 ^= ~bc1 & bc2; a11 ^= ~bc2 & bc3; a12 ^= ~bc3 & bc4; a13 ^= ~bc4 & bc0; a14 ^= ~bc0 & bc1;
bc0 = a20; bc1 = a21; bc2 = a22; bc3 = a23; bc4 = a24;
a20 ^= ~bc1 & bc2; a21 ^= ~bc2 & bc3; a22 ^= ~bc3 & bc4; a23 ^= ~bc4 & bc0; a24 ^= ~bc0 & bc1;
bc0 = a30; bc1 = a31; bc2 = a32; bc3 = a33; bc4 = a34;
a30 ^= ~bc1 & bc2; a31 ^= ~bc2 & bc3; a32 ^= ~bc3 & bc4; a33 ^= ~bc4 & bc0; a34 ^= ~bc0 & bc1;
bc0 = a40; bc1 = a41; bc2 = a42; bc3 = a43; bc4 = a44;
a40 ^= ~bc1 & bc2; a41 ^= ~bc2 & bc3; a42 ^= ~bc3 & bc4; a43 ^= ~bc4 & bc0; a44 ^= ~bc0 & bc1;
// Iota
a00 ^= keccakf_rndc[round];
}
// Theta
u64x bc0 = a00 ^ a10 ^ a20 ^ a30 ^ a40;
u64x bc1 = a01 ^ a11 ^ a21 ^ a31 ^ a41;
u64x bc2 = a02 ^ a12 ^ a22 ^ a32 ^ a42;
u64x bc3 = a03 ^ a13 ^ a23 ^ a33 ^ a43;
u64x bc4 = a04 ^ a14 ^ a24 ^ a34 ^ a44;
u64x t;
t = bc4 ^ rotl64 (bc1, 1); a00 ^= t; a10 ^= t; a20 ^= t; a30 ^= t;
t = bc0 ^ rotl64 (bc2, 1); a21 ^= t; a31 ^= t; a41 ^= t;
t = bc1 ^ rotl64 (bc3, 1); a02 ^= t; a12 ^= t; a22 ^= t; a32 ^= t;
t = bc2 ^ rotl64 (bc4, 1); a03 ^= t; a13 ^= t; a23 ^= t; a33 ^= t; a43 ^= t;
t = bc3 ^ rotl64 (bc0, 1); a04 ^= t; a34 ^= t; a44 ^= t;
// Rho Pi
t = a01;
Rho_Pi (a20, 1);
Rho_Pi (a12, 3);
Rho_Pi (a21, 6);
Rho_Pi (a32, 10);
Rho_Pi (a33, 15);
Rho_Pi (a03, 21);
Rho_Pi (a10, 28);
Rho_Pi (a31, 36);
Rho_Pi (a13, 45);
Rho_Pi (a41, 55);
Rho_Pi (a44, 2);
Rho_Pi (a04, 14);
Rho_Pi (a30, 27);
Rho_Pi (a43, 41);
Rho_Pi (a34, 56);
Rho_Pi (a23, 8);
Rho_Pi (a22, 25);
Rho_Pi (a02, 43);
#undef Rho_Pi
bc0 = a00;
bc2 = a02;
bc3 = a03;
bc4 = a04;
a02 ^= ~bc3 & bc4;
a03 ^= ~bc4 & bc0;
const u32x r0 = l32_from_64 (a03);
const u32x r1 = h32_from_64 (a03);
const u32x r2 = l32_from_64 (a02);
const u32x r3 = h32_from_64 (a02);
COMPARE_S_SIMD (r0, r1, r2, r3);
}
}
__kernel void m17600_s08 (__global pw_t *pws, __constant const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
{
}
__kernel void m17600_s16 (__global pw_t *pws, __constant const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
{
}

@ -0,0 +1,594 @@
/**
* Author......: See docs/credits.txt
* License.....: MIT
*/
#define NEW_SIMD_CODE
#include "inc_vendor.cl"
#include "inc_hash_constants.h"
#include "inc_hash_functions.cl"
#include "inc_types.cl"
#include "inc_common.cl"
#include "inc_simd.cl"
__constant u64a keccakf_rndc[24] =
{
0x0000000000000001, 0x0000000000008082, 0x800000000000808a,
0x8000000080008000, 0x000000000000808b, 0x0000000080000001,
0x8000000080008081, 0x8000000000008009, 0x000000000000008a,
0x0000000000000088, 0x0000000080008009, 0x000000008000000a,
0x000000008000808b, 0x800000000000008b, 0x8000000000008089,
0x8000000000008003, 0x8000000000008002, 0x8000000000000080,
0x000000000000800a, 0x800000008000000a, 0x8000000080008081,
0x8000000000008080, 0x0000000080000001, 0x8000000080008008
};
#ifndef KECCAK_ROUNDS
#define KECCAK_ROUNDS 24
#endif
__kernel void m17600_m04 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
{
/**
* modifier
*/
const u64 lid = get_local_id (0);
/**
* base
*/
const u64 gid = get_global_id (0);
if (gid >= gid_max) return;
u32 pw_buf0[4];
u32 pw_buf1[4];
pw_buf0[0] = pws[gid].i[0];
pw_buf0[1] = pws[gid].i[1];
pw_buf0[2] = pws[gid].i[2];
pw_buf0[3] = pws[gid].i[3];
pw_buf1[0] = pws[gid].i[4];
pw_buf1[1] = pws[gid].i[5];
pw_buf1[2] = pws[gid].i[6];
pw_buf1[3] = pws[gid].i[7];
const u32 pw_l_len = pws[gid].pw_len;
/**
* loop
*/
for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE)
{
const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos);
const u32x pw_len = pw_l_len + pw_r_len;
/**
* concat password candidate
*/
u32x wordl0[4] = { 0 };
u32x wordl1[4] = { 0 };
u32x wordl2[4] = { 0 };
u32x wordl3[4] = { 0 };
wordl0[0] = pw_buf0[0];
wordl0[1] = pw_buf0[1];
wordl0[2] = pw_buf0[2];
wordl0[3] = pw_buf0[3];
wordl1[0] = pw_buf1[0];
wordl1[1] = pw_buf1[1];
wordl1[2] = pw_buf1[2];
wordl1[3] = pw_buf1[3];
u32x wordr0[4] = { 0 };
u32x wordr1[4] = { 0 };
u32x wordr2[4] = { 0 };
u32x wordr3[4] = { 0 };
wordr0[0] = ix_create_combt (combs_buf, il_pos, 0);
wordr0[1] = ix_create_combt (combs_buf, il_pos, 1);
wordr0[2] = ix_create_combt (combs_buf, il_pos, 2);
wordr0[3] = ix_create_combt (combs_buf, il_pos, 3);
wordr1[0] = ix_create_combt (combs_buf, il_pos, 4);
wordr1[1] = ix_create_combt (combs_buf, il_pos, 5);
wordr1[2] = ix_create_combt (combs_buf, il_pos, 6);
wordr1[3] = ix_create_combt (combs_buf, il_pos, 7);
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
{
switch_buffer_by_offset_le_VV (wordr0, wordr1, wordr2, wordr3, pw_l_len);
}
else
{
switch_buffer_by_offset_le_VV (wordl0, wordl1, wordl2, wordl3, pw_r_len);
}
u32x w0[4];
u32x w1[4];
u32x w2[4];
u32x w3[4];
w0[0] = wordl0[0] | wordr0[0];
w0[1] = wordl0[1] | wordr0[1];
w0[2] = wordl0[2] | wordr0[2];
w0[3] = wordl0[3] | wordr0[3];
w1[0] = wordl1[0] | wordr1[0];
w1[1] = wordl1[1] | wordr1[1];
w1[2] = wordl1[2] | wordr1[2];
w1[3] = wordl1[3] | wordr1[3];
w2[0] = wordl2[0] | wordr2[0];
w2[1] = wordl2[1] | wordr2[1];
w2[2] = wordl2[2] | wordr2[2];
w2[3] = wordl2[3] | wordr2[3];
w3[0] = wordl3[0] | wordr3[0];
w3[1] = wordl3[1] | wordr3[1];
w3[2] = wordl3[2] | wordr3[2];
w3[3] = wordl3[3] | wordr3[3];
/**
* Keccak
*/
u64x a00 = hl32_to_64 (w0[1], w0[0]);
u64x a01 = hl32_to_64 (w0[3], w0[2]);
u64x a02 = hl32_to_64 (w1[1], w1[0]);
u64x a03 = hl32_to_64 (w1[3], w1[2]);
u64x a04 = hl32_to_64 (w2[1], w2[0]);
u64x a10 = hl32_to_64 (w2[3], w2[2]);
u64x a11 = hl32_to_64 (w3[1], w3[0]);
u64x a12 = hl32_to_64 (w3[3], w3[2]);
u64x a13 = 0x8000000000000000;
u64x a14 = 0;
u64x a20 = 0;
u64x a21 = 0;
u64x a22 = 0;
u64x a23 = 0;
u64x a24 = 0;
u64x a30 = 0;
u64x a31 = 0;
u64x a32 = 0;
u64x a33 = 0;
u64x a34 = 0;
u64x a40 = 0;
u64x a41 = 0;
u64x a42 = 0;
u64x a43 = 0;
u64x a44 = 0;
#define Rho_Pi(ad,r) \
bc0 = ad; \
ad = rotl64 (t, r); \
t = bc0; \
#ifdef _unroll
#pragma unroll
#endif
for (int round = 0; round < KECCAK_ROUNDS - 1; round++)
{
// Theta
u64x bc0 = a00 ^ a10 ^ a20 ^ a30 ^ a40;
u64x bc1 = a01 ^ a11 ^ a21 ^ a31 ^ a41;
u64x bc2 = a02 ^ a12 ^ a22 ^ a32 ^ a42;
u64x bc3 = a03 ^ a13 ^ a23 ^ a33 ^ a43;
u64x bc4 = a04 ^ a14 ^ a24 ^ a34 ^ a44;
u64x t;
t = bc4 ^ rotl64 (bc1, 1); a00 ^= t; a10 ^= t; a20 ^= t; a30 ^= t; a40 ^= t;
t = bc0 ^ rotl64 (bc2, 1); a01 ^= t; a11 ^= t; a21 ^= t; a31 ^= t; a41 ^= t;
t = bc1 ^ rotl64 (bc3, 1); a02 ^= t; a12 ^= t; a22 ^= t; a32 ^= t; a42 ^= t;
t = bc2 ^ rotl64 (bc4, 1); a03 ^= t; a13 ^= t; a23 ^= t; a33 ^= t; a43 ^= t;
t = bc3 ^ rotl64 (bc0, 1); a04 ^= t; a14 ^= t; a24 ^= t; a34 ^= t; a44 ^= t;
// Rho Pi
t = a01;
Rho_Pi (a20, 1);
Rho_Pi (a12, 3);
Rho_Pi (a21, 6);
Rho_Pi (a32, 10);
Rho_Pi (a33, 15);
Rho_Pi (a03, 21);
Rho_Pi (a10, 28);
Rho_Pi (a31, 36);
Rho_Pi (a13, 45);
Rho_Pi (a41, 55);
Rho_Pi (a44, 2);
Rho_Pi (a04, 14);
Rho_Pi (a30, 27);
Rho_Pi (a43, 41);
Rho_Pi (a34, 56);
Rho_Pi (a23, 8);
Rho_Pi (a22, 25);
Rho_Pi (a02, 43);
Rho_Pi (a40, 62);
Rho_Pi (a24, 18);
Rho_Pi (a42, 39);
Rho_Pi (a14, 61);
Rho_Pi (a11, 20);
Rho_Pi (a01, 44);
// Chi
bc0 = a00; bc1 = a01; bc2 = a02; bc3 = a03; bc4 = a04;
a00 ^= ~bc1 & bc2; a01 ^= ~bc2 & bc3; a02 ^= ~bc3 & bc4; a03 ^= ~bc4 & bc0; a04 ^= ~bc0 & bc1;
bc0 = a10; bc1 = a11; bc2 = a12; bc3 = a13; bc4 = a14;
a10 ^= ~bc1 & bc2; a11 ^= ~bc2 & bc3; a12 ^= ~bc3 & bc4; a13 ^= ~bc4 & bc0; a14 ^= ~bc0 & bc1;
bc0 = a20; bc1 = a21; bc2 = a22; bc3 = a23; bc4 = a24;
a20 ^= ~bc1 & bc2; a21 ^= ~bc2 & bc3; a22 ^= ~bc3 & bc4; a23 ^= ~bc4 & bc0; a24 ^= ~bc0 & bc1;
bc0 = a30; bc1 = a31; bc2 = a32; bc3 = a33; bc4 = a34;
a30 ^= ~bc1 & bc2; a31 ^= ~bc2 & bc3; a32 ^= ~bc3 & bc4; a33 ^= ~bc4 & bc0; a34 ^= ~bc0 & bc1;
bc0 = a40; bc1 = a41; bc2 = a42; bc3 = a43; bc4 = a44;
a40 ^= ~bc1 & bc2; a41 ^= ~bc2 & bc3; a42 ^= ~bc3 & bc4; a43 ^= ~bc4 & bc0; a44 ^= ~bc0 & bc1;
// Iota
a00 ^= keccakf_rndc[round];
}
// Theta
u64x bc0 = a00 ^ a10 ^ a20 ^ a30 ^ a40;
u64x bc1 = a01 ^ a11 ^ a21 ^ a31 ^ a41;
u64x bc2 = a02 ^ a12 ^ a22 ^ a32 ^ a42;
u64x bc3 = a03 ^ a13 ^ a23 ^ a33 ^ a43;
u64x bc4 = a04 ^ a14 ^ a24 ^ a34 ^ a44;
u64x t;
t = bc4 ^ rotl64 (bc1, 1); a00 ^= t; a10 ^= t; a20 ^= t; a30 ^= t;
t = bc0 ^ rotl64 (bc2, 1); a21 ^= t; a31 ^= t; a41 ^= t;
t = bc1 ^ rotl64 (bc3, 1); a02 ^= t; a12 ^= t; a22 ^= t; a32 ^= t;
t = bc2 ^ rotl64 (bc4, 1); a03 ^= t; a13 ^= t; a23 ^= t; a33 ^= t; a43 ^= t;
t = bc3 ^ rotl64 (bc0, 1); a04 ^= t; a34 ^= t; a44 ^= t;
// Rho Pi
t = a01;
Rho_Pi (a20, 1);
Rho_Pi (a12, 3);
Rho_Pi (a21, 6);
Rho_Pi (a32, 10);
Rho_Pi (a33, 15);
Rho_Pi (a03, 21);
Rho_Pi (a10, 28);
Rho_Pi (a31, 36);
Rho_Pi (a13, 45);
Rho_Pi (a41, 55);
Rho_Pi (a44, 2);
Rho_Pi (a04, 14);
Rho_Pi (a30, 27);
Rho_Pi (a43, 41);
Rho_Pi (a34, 56);
Rho_Pi (a23, 8);
Rho_Pi (a22, 25);
Rho_Pi (a02, 43);
#undef Rho_Pi
bc0 = a00;
bc2 = a02;
bc3 = a03;
bc4 = a04;
a02 ^= ~bc3 & bc4;
a03 ^= ~bc4 & bc0;
const u32x r0 = l32_from_64 (a03);
const u32x r1 = h32_from_64 (a03);
const u32x r2 = l32_from_64 (a02);
const u32x r3 = h32_from_64 (a02);
COMPARE_M_SIMD (r0, r1, r2, r3);
}
}
__kernel void m17600_m08 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
{
}
__kernel void m17600_m16 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
{
}
__kernel void m17600_s04 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
{
/**
* modifier
*/
const u64 lid = get_local_id (0);
/**
* base
*/
const u64 gid = get_global_id (0);
if (gid >= gid_max) return;
u32 pw_buf0[4];
u32 pw_buf1[4];
pw_buf0[0] = pws[gid].i[0];
pw_buf0[1] = pws[gid].i[1];
pw_buf0[2] = pws[gid].i[2];
pw_buf0[3] = pws[gid].i[3];
pw_buf1[0] = pws[gid].i[4];
pw_buf1[1] = pws[gid].i[5];
pw_buf1[2] = pws[gid].i[6];
pw_buf1[3] = pws[gid].i[7];
const u32 pw_l_len = pws[gid].pw_len;
/**
* digest
*/
const u32 search[4] =
{
digests_buf[digests_offset].digest_buf[DGST_R0],
digests_buf[digests_offset].digest_buf[DGST_R1],
digests_buf[digests_offset].digest_buf[DGST_R2],
digests_buf[digests_offset].digest_buf[DGST_R3]
};
/**
* loop
*/
for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE)
{
const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos);
const u32x pw_len = pw_l_len + pw_r_len;
/**
* concat password candidate
*/
u32x wordl0[4] = { 0 };
u32x wordl1[4] = { 0 };
u32x wordl2[4] = { 0 };
u32x wordl3[4] = { 0 };
wordl0[0] = pw_buf0[0];
wordl0[1] = pw_buf0[1];
wordl0[2] = pw_buf0[2];
wordl0[3] = pw_buf0[3];
wordl1[0] = pw_buf1[0];
wordl1[1] = pw_buf1[1];
wordl1[2] = pw_buf1[2];
wordl1[3] = pw_buf1[3];
u32x wordr0[4] = { 0 };
u32x wordr1[4] = { 0 };
u32x wordr2[4] = { 0 };
u32x wordr3[4] = { 0 };
wordr0[0] = ix_create_combt (combs_buf, il_pos, 0);
wordr0[1] = ix_create_combt (combs_buf, il_pos, 1);
wordr0[2] = ix_create_combt (combs_buf, il_pos, 2);
wordr0[3] = ix_create_combt (combs_buf, il_pos, 3);
wordr1[0] = ix_create_combt (combs_buf, il_pos, 4);
wordr1[1] = ix_create_combt (combs_buf, il_pos, 5);
wordr1[2] = ix_create_combt (combs_buf, il_pos, 6);
wordr1[3] = ix_create_combt (combs_buf, il_pos, 7);
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
{
switch_buffer_by_offset_le_VV (wordr0, wordr1, wordr2, wordr3, pw_l_len);
}
else
{
switch_buffer_by_offset_le_VV (wordl0, wordl1, wordl2, wordl3, pw_r_len);
}
u32x w0[4];
u32x w1[4];
u32x w2[4];
u32x w3[4];
w0[0] = wordl0[0] | wordr0[0];
w0[1] = wordl0[1] | wordr0[1];
w0[2] = wordl0[2] | wordr0[2];
w0[3] = wordl0[3] | wordr0[3];
w1[0] = wordl1[0] | wordr1[0];
w1[1] = wordl1[1] | wordr1[1];
w1[2] = wordl1[2] | wordr1[2];
w1[3] = wordl1[3] | wordr1[3];
w2[0] = wordl2[0] | wordr2[0];
w2[1] = wordl2[1] | wordr2[1];
w2[2] = wordl2[2] | wordr2[2];
w2[3] = wordl2[3] | wordr2[3];
w3[0] = wordl3[0] | wordr3[0];
w3[1] = wordl3[1] | wordr3[1];
w3[2] = wordl3[2] | wordr3[2];
w3[3] = wordl3[3] | wordr3[3];
/**
* Keccak
*/
u64x a00 = hl32_to_64 (w0[1], w0[0]);
u64x a01 = hl32_to_64 (w0[3], w0[2]);
u64x a02 = hl32_to_64 (w1[1], w1[0]);
u64x a03 = hl32_to_64 (w1[3], w1[2]);
u64x a04 = hl32_to_64 (w2[1], w2[0]);
u64x a10 = hl32_to_64 (w2[3], w2[2]);
u64x a11 = hl32_to_64 (w3[1], w3[0]);
u64x a12 = hl32_to_64 (w3[3], w3[2]);
u64x a13 = 0x8000000000000000;
u64x a14 = 0;
u64x a20 = 0;
u64x a21 = 0;
u64x a22 = 0;
u64x a23 = 0;
u64x a24 = 0;
u64x a30 = 0;
u64x a31 = 0;
u64x a32 = 0;
u64x a33 = 0;
u64x a34 = 0;
u64x a40 = 0;
u64x a41 = 0;
u64x a42 = 0;
u64x a43 = 0;
u64x a44 = 0;
#define Rho_Pi(ad,r) \
bc0 = ad; \
ad = rotl64 (t, r); \
t = bc0; \
#ifdef _unroll
#pragma unroll
#endif
for (int round = 0; round < KECCAK_ROUNDS - 1; round++)
{
// Theta
u64x bc0 = a00 ^ a10 ^ a20 ^ a30 ^ a40;
u64x bc1 = a01 ^ a11 ^ a21 ^ a31 ^ a41;
u64x bc2 = a02 ^ a12 ^ a22 ^ a32 ^ a42;
u64x bc3 = a03 ^ a13 ^ a23 ^ a33 ^ a43;
u64x bc4 = a04 ^ a14 ^ a24 ^ a34 ^ a44;
u64x t;
t = bc4 ^ rotl64 (bc1, 1); a00 ^= t; a10 ^= t; a20 ^= t; a30 ^= t; a40 ^= t;
t = bc0 ^ rotl64 (bc2, 1); a01 ^= t; a11 ^= t; a21 ^= t; a31 ^= t; a41 ^= t;
t = bc1 ^ rotl64 (bc3, 1); a02 ^= t; a12 ^= t; a22 ^= t; a32 ^= t; a42 ^= t;
t = bc2 ^ rotl64 (bc4, 1); a03 ^= t; a13 ^= t; a23 ^= t; a33 ^= t; a43 ^= t;
t = bc3 ^ rotl64 (bc0, 1); a04 ^= t; a14 ^= t; a24 ^= t; a34 ^= t; a44 ^= t;
// Rho Pi
t = a01;
Rho_Pi (a20, 1);
Rho_Pi (a12, 3);
Rho_Pi (a21, 6);
Rho_Pi (a32, 10);
Rho_Pi (a33, 15);
Rho_Pi (a03, 21);
Rho_Pi (a10, 28);
Rho_Pi (a31, 36);
Rho_Pi (a13, 45);
Rho_Pi (a41, 55);
Rho_Pi (a44, 2);
Rho_Pi (a04, 14);
Rho_Pi (a30, 27);
Rho_Pi (a43, 41);
Rho_Pi (a34, 56);
Rho_Pi (a23, 8);
Rho_Pi (a22, 25);
Rho_Pi (a02, 43);
Rho_Pi (a40, 62);
Rho_Pi (a24, 18);
Rho_Pi (a42, 39);
Rho_Pi (a14, 61);
Rho_Pi (a11, 20);
Rho_Pi (a01, 44);
// Chi
bc0 = a00; bc1 = a01; bc2 = a02; bc3 = a03; bc4 = a04;
a00 ^= ~bc1 & bc2; a01 ^= ~bc2 & bc3; a02 ^= ~bc3 & bc4; a03 ^= ~bc4 & bc0; a04 ^= ~bc0 & bc1;
bc0 = a10; bc1 = a11; bc2 = a12; bc3 = a13; bc4 = a14;
a10 ^= ~bc1 & bc2; a11 ^= ~bc2 & bc3; a12 ^= ~bc3 & bc4; a13 ^= ~bc4 & bc0; a14 ^= ~bc0 & bc1;
bc0 = a20; bc1 = a21; bc2 = a22; bc3 = a23; bc4 = a24;
a20 ^= ~bc1 & bc2; a21 ^= ~bc2 & bc3; a22 ^= ~bc3 & bc4; a23 ^= ~bc4 & bc0; a24 ^= ~bc0 & bc1;
bc0 = a30; bc1 = a31; bc2 = a32; bc3 = a33; bc4 = a34;
a30 ^= ~bc1 & bc2; a31 ^= ~bc2 & bc3; a32 ^= ~bc3 & bc4; a33 ^= ~bc4 & bc0; a34 ^= ~bc0 & bc1;
bc0 = a40; bc1 = a41; bc2 = a42; bc3 = a43; bc4 = a44;
a40 ^= ~bc1 & bc2; a41 ^= ~bc2 & bc3; a42 ^= ~bc3 & bc4; a43 ^= ~bc4 & bc0; a44 ^= ~bc0 & bc1;
// Iota
a00 ^= keccakf_rndc[round];
}
// Theta
u64x bc0 = a00 ^ a10 ^ a20 ^ a30 ^ a40;
u64x bc1 = a01 ^ a11 ^ a21 ^ a31 ^ a41;
u64x bc2 = a02 ^ a12 ^ a22 ^ a32 ^ a42;
u64x bc3 = a03 ^ a13 ^ a23 ^ a33 ^ a43;
u64x bc4 = a04 ^ a14 ^ a24 ^ a34 ^ a44;
u64x t;
t = bc4 ^ rotl64 (bc1, 1); a00 ^= t; a10 ^= t; a20 ^= t; a30 ^= t;
t = bc0 ^ rotl64 (bc2, 1); a21 ^= t; a31 ^= t; a41 ^= t;
t = bc1 ^ rotl64 (bc3, 1); a02 ^= t; a12 ^= t; a22 ^= t; a32 ^= t;
t = bc2 ^ rotl64 (bc4, 1); a03 ^= t; a13 ^= t; a23 ^= t; a33 ^= t; a43 ^= t;
t = bc3 ^ rotl64 (bc0, 1); a04 ^= t; a34 ^= t; a44 ^= t;
// Rho Pi
t = a01;
Rho_Pi (a20, 1);
Rho_Pi (a12, 3);
Rho_Pi (a21, 6);
Rho_Pi (a32, 10);
Rho_Pi (a33, 15);
Rho_Pi (a03, 21);
Rho_Pi (a10, 28);
Rho_Pi (a31, 36);
Rho_Pi (a13, 45);
Rho_Pi (a41, 55);
Rho_Pi (a44, 2);
Rho_Pi (a04, 14);
Rho_Pi (a30, 27);
Rho_Pi (a43, 41);
Rho_Pi (a34, 56);
Rho_Pi (a23, 8);
Rho_Pi (a22, 25);
Rho_Pi (a02, 43);
#undef Rho_Pi
bc0 = a00;
bc2 = a02;
bc3 = a03;
bc4 = a04;
a02 ^= ~bc3 & bc4;
a03 ^= ~bc4 & bc0;
const u32x r0 = l32_from_64 (a03);
const u32x r1 = h32_from_64 (a03);
const u32x r2 = l32_from_64 (a02);
const u32x r3 = h32_from_64 (a02);
COMPARE_S_SIMD (r0, r1, r2, r3);
}
}
__kernel void m17600_s08 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
{
}
__kernel void m17600_s16 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
{
}

@ -0,0 +1,696 @@
/**
* Author......: See docs/credits.txt
* License.....: MIT
*/
#define NEW_SIMD_CODE
#include "inc_vendor.cl"
#include "inc_hash_constants.h"
#include "inc_hash_functions.cl"
#include "inc_types.cl"
#include "inc_common.cl"
#include "inc_simd.cl"
__constant u64a keccakf_rndc[24] =
{
0x0000000000000001, 0x0000000000008082, 0x800000000000808a,
0x8000000080008000, 0x000000000000808b, 0x0000000080000001,
0x8000000080008081, 0x8000000000008009, 0x000000000000008a,
0x0000000000000088, 0x0000000080008009, 0x000000008000000a,
0x000000008000808b, 0x800000000000008b, 0x8000000000008089,
0x8000000000008003, 0x8000000000008002, 0x8000000000000080,
0x000000000000800a, 0x800000008000000a, 0x8000000080008081,
0x8000000000008080, 0x0000000080000001, 0x8000000080008008
};
#ifndef KECCAK_ROUNDS
#define KECCAK_ROUNDS 24
#endif
DECLSPEC void m17600m (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, __global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset)
{
/**
* modifier
*/
const u64 gid = get_global_id (0);
const u64 lid = get_local_id (0);
/**
* loop
*/
u32 w0l = w0[0];
for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE)
{
const u32x w0r = ix_create_bft (bfs_buf, il_pos);
const u32x w0lr = w0l | w0r;
/**
* Keccak
*/
u64x a00 = hl32_to_64 (w0[1], w0lr);
u64x a01 = hl32_to_64 (w0[3], w0[2]);
u64x a02 = hl32_to_64 (w1[1], w1[0]);
u64x a03 = hl32_to_64 (w1[3], w1[2]);
u64x a04 = hl32_to_64 (w2[1], w2[0]);
u64x a10 = hl32_to_64 (w2[3], w2[2]);
u64x a11 = hl32_to_64 (w3[1], w3[0]);
u64x a12 = hl32_to_64 (w3[3], w3[2]);
u64x a13 = 0x8000000000000000;
u64x a14 = 0;
u64x a20 = 0;
u64x a21 = 0;
u64x a22 = 0;
u64x a23 = 0;
u64x a24 = 0;
u64x a30 = 0;
u64x a31 = 0;
u64x a32 = 0;
u64x a33 = 0;
u64x a34 = 0;
u64x a40 = 0;
u64x a41 = 0;
u64x a42 = 0;
u64x a43 = 0;
u64x a44 = 0;
#define Rho_Pi(ad,r) \
bc0 = ad; \
ad = rotl64 (t, r); \
t = bc0; \
#ifdef _unroll
#pragma unroll
#endif
for (int round = 0; round < KECCAK_ROUNDS - 1; round++)
{
// Theta
u64x bc0 = a00 ^ a10 ^ a20 ^ a30 ^ a40;
u64x bc1 = a01 ^ a11 ^ a21 ^ a31 ^ a41;
u64x bc2 = a02 ^ a12 ^ a22 ^ a32 ^ a42;
u64x bc3 = a03 ^ a13 ^ a23 ^ a33 ^ a43;
u64x bc4 = a04 ^ a14 ^ a24 ^ a34 ^ a44;
u64x t;
t = bc4 ^ rotl64 (bc1, 1); a00 ^= t; a10 ^= t; a20 ^= t; a30 ^= t; a40 ^= t;
t = bc0 ^ rotl64 (bc2, 1); a01 ^= t; a11 ^= t; a21 ^= t; a31 ^= t; a41 ^= t;
t = bc1 ^ rotl64 (bc3, 1); a02 ^= t; a12 ^= t; a22 ^= t; a32 ^= t; a42 ^= t;
t = bc2 ^ rotl64 (bc4, 1); a03 ^= t; a13 ^= t; a23 ^= t; a33 ^= t; a43 ^= t;
t = bc3 ^ rotl64 (bc0, 1); a04 ^= t; a14 ^= t; a24 ^= t; a34 ^= t; a44 ^= t;
// Rho Pi
t = a01;
Rho_Pi (a20, 1);
Rho_Pi (a12, 3);
Rho_Pi (a21, 6);
Rho_Pi (a32, 10);
Rho_Pi (a33, 15);
Rho_Pi (a03, 21);
Rho_Pi (a10, 28);
Rho_Pi (a31, 36);
Rho_Pi (a13, 45);
Rho_Pi (a41, 55);
Rho_Pi (a44, 2);
Rho_Pi (a04, 14);
Rho_Pi (a30, 27);
Rho_Pi (a43, 41);
Rho_Pi (a34, 56);
Rho_Pi (a23, 8);
Rho_Pi (a22, 25);
Rho_Pi (a02, 43);
Rho_Pi (a40, 62);
Rho_Pi (a24, 18);
Rho_Pi (a42, 39);
Rho_Pi (a14, 61);
Rho_Pi (a11, 20);
Rho_Pi (a01, 44);
// Chi
bc0 = a00; bc1 = a01; bc2 = a02; bc3 = a03; bc4 = a04;
a00 ^= ~bc1 & bc2; a01 ^= ~bc2 & bc3; a02 ^= ~bc3 & bc4; a03 ^= ~bc4 & bc0; a04 ^= ~bc0 & bc1;
bc0 = a10; bc1 = a11; bc2 = a12; bc3 = a13; bc4 = a14;
a10 ^= ~bc1 & bc2; a11 ^= ~bc2 & bc3; a12 ^= ~bc3 & bc4; a13 ^= ~bc4 & bc0; a14 ^= ~bc0 & bc1;
bc0 = a20; bc1 = a21; bc2 = a22; bc3 = a23; bc4 = a24;
a20 ^= ~bc1 & bc2; a21 ^= ~bc2 & bc3; a22 ^= ~bc3 & bc4; a23 ^= ~bc4 & bc0; a24 ^= ~bc0 & bc1;
bc0 = a30; bc1 = a31; bc2 = a32; bc3 = a33; bc4 = a34;
a30 ^= ~bc1 & bc2; a31 ^= ~bc2 & bc3; a32 ^= ~bc3 & bc4; a33 ^= ~bc4 & bc0; a34 ^= ~bc0 & bc1;
bc0 = a40; bc1 = a41; bc2 = a42; bc3 = a43; bc4 = a44;
a40 ^= ~bc1 & bc2; a41 ^= ~bc2 & bc3; a42 ^= ~bc3 & bc4; a43 ^= ~bc4 & bc0; a44 ^= ~bc0 & bc1;
// Iota
a00 ^= keccakf_rndc[round];
}
// Theta
u64x bc0 = a00 ^ a10 ^ a20 ^ a30 ^ a40;
u64x bc1 = a01 ^ a11 ^ a21 ^ a31 ^ a41;
u64x bc2 = a02 ^ a12 ^ a22 ^ a32 ^ a42;
u64x bc3 = a03 ^ a13 ^ a23 ^ a33 ^ a43;
u64x bc4 = a04 ^ a14 ^ a24 ^ a34 ^ a44;
u64x t;
t = bc4 ^ rotl64 (bc1, 1); a00 ^= t; a10 ^= t; a20 ^= t; a30 ^= t;
t = bc0 ^ rotl64 (bc2, 1); a21 ^= t; a31 ^= t; a41 ^= t;
t = bc1 ^ rotl64 (bc3, 1); a02 ^= t; a12 ^= t; a22 ^= t; a32 ^= t;
t = bc2 ^ rotl64 (bc4, 1); a03 ^= t; a13 ^= t; a23 ^= t; a33 ^= t; a43 ^= t;
t = bc3 ^ rotl64 (bc0, 1); a04 ^= t; a34 ^= t; a44 ^= t;
// Rho Pi
t = a01;
Rho_Pi (a20, 1);
Rho_Pi (a12, 3);
Rho_Pi (a21, 6);
Rho_Pi (a32, 10);
Rho_Pi (a33, 15);
Rho_Pi (a03, 21);
Rho_Pi (a10, 28);
Rho_Pi (a31, 36);
Rho_Pi (a13, 45);
Rho_Pi (a41, 55);
Rho_Pi (a44, 2);
Rho_Pi (a04, 14);
Rho_Pi (a30, 27);
Rho_Pi (a43, 41);
Rho_Pi (a34, 56);
Rho_Pi (a23, 8);
Rho_Pi (a22, 25);
Rho_Pi (a02, 43);
#undef Rho_Pi
bc0 = a00;
bc2 = a02;
bc3 = a03;
bc4 = a04;
a02 ^= ~bc3 & bc4;
a03 ^= ~bc4 & bc0;
const u32x r0 = l32_from_64 (a03);
const u32x r1 = h32_from_64 (a03);
const u32x r2 = l32_from_64 (a02);
const u32x r3 = h32_from_64 (a02);
COMPARE_M_SIMD (r0, r1, r2, r3);
}
}
DECLSPEC void m17600s (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, __global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset)
{
/**
* modifier
*/
const u64 gid = get_global_id (0);
const u64 lid = get_local_id (0);
/**
* digest
*/
const u32 search[4] =
{
digests_buf[digests_offset].digest_buf[DGST_R0],
digests_buf[digests_offset].digest_buf[DGST_R1],
digests_buf[digests_offset].digest_buf[DGST_R2],
digests_buf[digests_offset].digest_buf[DGST_R3]
};
/**
* loop
*/
u32 w0l = w0[0];
for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE)
{
const u32x w0r = ix_create_bft (bfs_buf, il_pos);
const u32x w0lr = w0l | w0r;
/**
* Keccak
*/
u64x a00 = hl32_to_64 (w0[1], w0lr);
u64x a01 = hl32_to_64 (w0[3], w0[2]);
u64x a02 = hl32_to_64 (w1[1], w1[0]);
u64x a03 = hl32_to_64 (w1[3], w1[2]);
u64x a04 = hl32_to_64 (w2[1], w2[0]);
u64x a10 = hl32_to_64 (w2[3], w2[2]);
u64x a11 = hl32_to_64 (w3[1], w3[0]);
u64x a12 = hl32_to_64 (w3[3], w3[2]);
u64x a13 = 0x8000000000000000;
u64x a14 = 0;
u64x a20 = 0;
u64x a21 = 0;
u64x a22 = 0;
u64x a23 = 0;
u64x a24 = 0;
u64x a30 = 0;
u64x a31 = 0;
u64x a32 = 0;
u64x a33 = 0;
u64x a34 = 0;
u64x a40 = 0;
u64x a41 = 0;
u64x a42 = 0;
u64x a43 = 0;
u64x a44 = 0;
#define Rho_Pi(ad,r) \
bc0 = ad; \
ad = rotl64 (t, r); \
t = bc0; \
#ifdef _unroll
#pragma unroll
#endif
for (int round = 0; round < KECCAK_ROUNDS - 1; round++)
{
// Theta
u64x bc0 = a00 ^ a10 ^ a20 ^ a30 ^ a40;
u64x bc1 = a01 ^ a11 ^ a21 ^ a31 ^ a41;
u64x bc2 = a02 ^ a12 ^ a22 ^ a32 ^ a42;
u64x bc3 = a03 ^ a13 ^ a23 ^ a33 ^ a43;
u64x bc4 = a04 ^ a14 ^ a24 ^ a34 ^ a44;
u64x t;
t = bc4 ^ rotl64 (bc1, 1); a00 ^= t; a10 ^= t; a20 ^= t; a30 ^= t; a40 ^= t;
t = bc0 ^ rotl64 (bc2, 1); a01 ^= t; a11 ^= t; a21 ^= t; a31 ^= t; a41 ^= t;
t = bc1 ^ rotl64 (bc3, 1); a02 ^= t; a12 ^= t; a22 ^= t; a32 ^= t; a42 ^= t;
t = bc2 ^ rotl64 (bc4, 1); a03 ^= t; a13 ^= t; a23 ^= t; a33 ^= t; a43 ^= t;
t = bc3 ^ rotl64 (bc0, 1); a04 ^= t; a14 ^= t; a24 ^= t; a34 ^= t; a44 ^= t;
// Rho Pi
t = a01;
Rho_Pi (a20, 1);
Rho_Pi (a12, 3);
Rho_Pi (a21, 6);
Rho_Pi (a32, 10);
Rho_Pi (a33, 15);
Rho_Pi (a03, 21);
Rho_Pi (a10, 28);
Rho_Pi (a31, 36);
Rho_Pi (a13, 45);
Rho_Pi (a41, 55);
Rho_Pi (a44, 2);
Rho_Pi (a04, 14);
Rho_Pi (a30, 27);
Rho_Pi (a43, 41);
Rho_Pi (a34, 56);
Rho_Pi (a23, 8);
Rho_Pi (a22, 25);
Rho_Pi (a02, 43);
Rho_Pi (a40, 62);
Rho_Pi (a24, 18);
Rho_Pi (a42, 39);
Rho_Pi (a14, 61);
Rho_Pi (a11, 20);
Rho_Pi (a01, 44);
// Chi
bc0 = a00; bc1 = a01; bc2 = a02; bc3 = a03; bc4 = a04;
a00 ^= ~bc1 & bc2; a01 ^= ~bc2 & bc3; a02 ^= ~bc3 & bc4; a03 ^= ~bc4 & bc0; a04 ^= ~bc0 & bc1;
bc0 = a10; bc1 = a11; bc2 = a12; bc3 = a13; bc4 = a14;
a10 ^= ~bc1 & bc2; a11 ^= ~bc2 & bc3; a12 ^= ~bc3 & bc4; a13 ^= ~bc4 & bc0; a14 ^= ~bc0 & bc1;
bc0 = a20; bc1 = a21; bc2 = a22; bc3 = a23; bc4 = a24;
a20 ^= ~bc1 & bc2; a21 ^= ~bc2 & bc3; a22 ^= ~bc3 & bc4; a23 ^= ~bc4 & bc0; a24 ^= ~bc0 & bc1;
bc0 = a30; bc1 = a31; bc2 = a32; bc3 = a33; bc4 = a34;
a30 ^= ~bc1 & bc2; a31 ^= ~bc2 & bc3; a32 ^= ~bc3 & bc4; a33 ^= ~bc4 & bc0; a34 ^= ~bc0 & bc1;
bc0 = a40; bc1 = a41; bc2 = a42; bc3 = a43; bc4 = a44;
a40 ^= ~bc1 & bc2; a41 ^= ~bc2 & bc3; a42 ^= ~bc3 & bc4; a43 ^= ~bc4 & bc0; a44 ^= ~bc0 & bc1;
// Iota
a00 ^= keccakf_rndc[round];
}
// Theta
u64x bc0 = a00 ^ a10 ^ a20 ^ a30 ^ a40;
u64x bc1 = a01 ^ a11 ^ a21 ^ a31 ^ a41;
u64x bc2 = a02 ^ a12 ^ a22 ^ a32 ^ a42;
u64x bc3 = a03 ^ a13 ^ a23 ^ a33 ^ a43;
u64x bc4 = a04 ^ a14 ^ a24 ^ a34 ^ a44;
u64x t;
t = bc4 ^ rotl64 (bc1, 1); a00 ^= t; a10 ^= t; a20 ^= t; a30 ^= t;
t = bc0 ^ rotl64 (bc2, 1); a21 ^= t; a31 ^= t; a41 ^= t;
t = bc1 ^ rotl64 (bc3, 1); a02 ^= t; a12 ^= t; a22 ^= t; a32 ^= t;
t = bc2 ^ rotl64 (bc4, 1); a03 ^= t; a13 ^= t; a23 ^= t; a33 ^= t; a43 ^= t;
t = bc3 ^ rotl64 (bc0, 1); a04 ^= t; a34 ^= t; a44 ^= t;
// Rho Pi
t = a01;
Rho_Pi (a20, 1);
Rho_Pi (a12, 3);
Rho_Pi (a21, 6);
Rho_Pi (a32, 10);
Rho_Pi (a33, 15);
Rho_Pi (a03, 21);
Rho_Pi (a10, 28);
Rho_Pi (a31, 36);
Rho_Pi (a13, 45);
Rho_Pi (a41, 55);
Rho_Pi (a44, 2);
Rho_Pi (a04, 14);
Rho_Pi (a30, 27);
Rho_Pi (a43, 41);
Rho_Pi (a34, 56);
Rho_Pi (a23, 8);
Rho_Pi (a22, 25);
Rho_Pi (a02, 43);
#undef Rho_Pi
bc0 = a00;
bc2 = a02;
bc3 = a03;
bc4 = a04;
a02 ^= ~bc3 & bc4;
a03 ^= ~bc4 & bc0;
const u32x r0 = l32_from_64 (a03);
const u32x r1 = h32_from_64 (a03);
const u32x r2 = l32_from_64 (a02);
const u32x r3 = h32_from_64 (a02);
COMPARE_S_SIMD (r0, r1, r2, r3);
}
}
__kernel void m17600_m04 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
{
/**
* base
*/
const u64 gid = get_global_id (0);
if (gid >= gid_max) return;
u32 w0[4];
w0[0] = pws[gid].i[ 0];
w0[1] = pws[gid].i[ 1];
w0[2] = pws[gid].i[ 2];
w0[3] = pws[gid].i[ 3];
u32 w1[4];
w1[0] = 0;
w1[1] = 0;
w1[2] = 0;
w1[3] = 0;
u32 w2[4];
w2[0] = 0;
w2[1] = 0;
w2[2] = 0;
w2[3] = 0;
u32 w3[4];
w3[0] = 0;
w3[1] = 0;
w3[2] = 0;
w3[3] = 0;
const u32 pw_len = pws[gid].pw_len;
/**
* main
*/
m17600m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV0_buf, d_scryptV1_buf, d_scryptV2_buf, d_scryptV3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset);
}
__kernel void m17600_m08 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
{
/**
* base
*/
const u64 gid = get_global_id (0);
if (gid >= gid_max) return;
u32 w0[4];
w0[0] = pws[gid].i[ 0];
w0[1] = pws[gid].i[ 1];
w0[2] = pws[gid].i[ 2];
w0[3] = pws[gid].i[ 3];
u32 w1[4];
w1[0] = pws[gid].i[ 4];
w1[1] = pws[gid].i[ 5];
w1[2] = pws[gid].i[ 6];
w1[3] = pws[gid].i[ 7];
u32 w2[4];
w2[0] = 0;
w2[1] = 0;
w2[2] = 0;
w2[3] = 0;
u32 w3[4];
w3[0] = 0;
w3[1] = 0;
w3[2] = 0;
w3[3] = 0;
const u32 pw_len = pws[gid].pw_len;
/**
* main
*/
m17600m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV0_buf, d_scryptV1_buf, d_scryptV2_buf, d_scryptV3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset);
}
__kernel void m17600_m16 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
{
/**
* base
*/
const u64 gid = get_global_id (0);
if (gid >= gid_max) return;
u32 w0[4];
w0[0] = pws[gid].i[ 0];
w0[1] = pws[gid].i[ 1];
w0[2] = pws[gid].i[ 2];
w0[3] = pws[gid].i[ 3];
u32 w1[4];
w1[0] = pws[gid].i[ 4];
w1[1] = pws[gid].i[ 5];
w1[2] = pws[gid].i[ 6];
w1[3] = pws[gid].i[ 7];
u32 w2[4];
w2[0] = pws[gid].i[ 8];
w2[1] = pws[gid].i[ 9];
w2[2] = pws[gid].i[10];
w2[3] = pws[gid].i[11];
u32 w3[4];
w3[0] = pws[gid].i[12];
w3[1] = pws[gid].i[13];
w3[2] = 0;
w3[3] = 0;
const u32 pw_len = pws[gid].pw_len;
/**
* main
*/
m17600m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV0_buf, d_scryptV1_buf, d_scryptV2_buf, d_scryptV3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset);
}
__kernel void m17600_s04 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
{
/**
* base
*/
const u64 gid = get_global_id (0);
if (gid >= gid_max) return;
u32 w0[4];
w0[0] = pws[gid].i[ 0];
w0[1] = pws[gid].i[ 1];
w0[2] = pws[gid].i[ 2];
w0[3] = pws[gid].i[ 3];
u32 w1[4];
w1[0] = 0;
w1[1] = 0;
w1[2] = 0;
w1[3] = 0;
u32 w2[4];
w2[0] = 0;
w2[1] = 0;
w2[2] = 0;
w2[3] = 0;
u32 w3[4];
w3[0] = 0;
w3[1] = 0;
w3[2] = 0;
w3[3] = 0;
const u32 pw_len = pws[gid].pw_len;
/**
* main
*/
m17600s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV0_buf, d_scryptV1_buf, d_scryptV2_buf, d_scryptV3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset);
}
__kernel void m17600_s08 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
{
/**
* base
*/
const u64 gid = get_global_id (0);
if (gid >= gid_max) return;
u32 w0[4];
w0[0] = pws[gid].i[ 0];
w0[1] = pws[gid].i[ 1];
w0[2] = pws[gid].i[ 2];
w0[3] = pws[gid].i[ 3];
u32 w1[4];
w1[0] = pws[gid].i[ 4];
w1[1] = pws[gid].i[ 5];
w1[2] = pws[gid].i[ 6];
w1[3] = pws[gid].i[ 7];
u32 w2[4];
w2[0] = 0;
w2[1] = 0;
w2[2] = 0;
w2[3] = 0;
u32 w3[4];
w3[0] = 0;
w3[1] = 0;
w3[2] = 0;
w3[3] = 0;
const u32 pw_len = pws[gid].pw_len;
/**
* main
*/
m17600s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV0_buf, d_scryptV1_buf, d_scryptV2_buf, d_scryptV3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset);
}
__kernel void m17600_s16 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
{
/**
* base
*/
const u64 gid = get_global_id (0);
if (gid >= gid_max) return;
u32 w0[4];
w0[0] = pws[gid].i[ 0];
w0[1] = pws[gid].i[ 1];
w0[2] = pws[gid].i[ 2];
w0[3] = pws[gid].i[ 3];
u32 w1[4];
w1[0] = pws[gid].i[ 4];
w1[1] = pws[gid].i[ 5];
w1[2] = pws[gid].i[ 6];
w1[3] = pws[gid].i[ 7];
u32 w2[4];
w2[0] = pws[gid].i[ 8];
w2[1] = pws[gid].i[ 9];
w2[2] = pws[gid].i[10];
w2[3] = pws[gid].i[11];
u32 w3[4];
w3[0] = pws[gid].i[12];
w3[1] = pws[gid].i[13];
w3[2] = 0;
w3[3] = 0;
const u32 pw_len = pws[gid].pw_len;
/**
* main
*/
m17600s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV0_buf, d_scryptV1_buf, d_scryptV2_buf, d_scryptV3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset);
}

@ -0,0 +1,479 @@
/**
* Author......: See docs/credits.txt
* License.....: MIT
*/
#define NEW_SIMD_CODE
#include "inc_vendor.cl"
#include "inc_hash_constants.h"
#include "inc_hash_functions.cl"
#include "inc_types.cl"
#include "inc_common.cl"
#include "inc_rp_optimized.h"
#include "inc_rp_optimized.cl"
#include "inc_simd.cl"
__constant u64a keccakf_rndc[24] =
{
0x0000000000000001, 0x0000000000008082, 0x800000000000808a,
0x8000000080008000, 0x000000000000808b, 0x0000000080000001,
0x8000000080008081, 0x8000000000008009, 0x000000000000008a,
0x0000000000000088, 0x0000000080008009, 0x000000008000000a,
0x000000008000808b, 0x800000000000008b, 0x8000000000008089,
0x8000000000008003, 0x8000000000008002, 0x8000000000000080,
0x000000000000800a, 0x800000008000000a, 0x8000000080008081,
0x8000000000008080, 0x0000000080000001, 0x8000000080008008
};
#ifndef KECCAK_ROUNDS
#define KECCAK_ROUNDS 24
#endif
__kernel void m17700_m04 (__global pw_t *pws, __constant const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
{
/**
* modifier
*/
const u64 lid = get_local_id (0);
/**
* base
*/
const u64 gid = get_global_id (0);
if (gid >= gid_max) return;
u32 pw_buf0[4];
u32 pw_buf1[4];
pw_buf0[0] = pws[gid].i[0];
pw_buf0[1] = pws[gid].i[1];
pw_buf0[2] = pws[gid].i[2];
pw_buf0[3] = pws[gid].i[3];
pw_buf1[0] = pws[gid].i[4];
pw_buf1[1] = pws[gid].i[5];
pw_buf1[2] = pws[gid].i[6];
pw_buf1[3] = pws[gid].i[7];
const u32 pw_len = pws[gid].pw_len;
/**
* loop
*/
for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE)
{
u32x w0[4] = { 0 };
u32x w1[4] = { 0 };
u32x w2[4] = { 0 };
u32x w3[4] = { 0 };
const u32x out_len = apply_rules_vect (pw_buf0, pw_buf1, pw_len, rules_buf, il_pos, w0, w1);
append_0x01_2x4_VV (w0, w1, out_len);
/**
* Keccak
*/
u64x a00 = hl32_to_64 (w0[1], w0[0]);
u64x a01 = hl32_to_64 (w0[3], w0[2]);
u64x a02 = hl32_to_64 (w1[1], w1[0]);
u64x a03 = hl32_to_64 (w1[3], w1[2]);
u64x a04 = 0;
u64x a10 = 0;
u64x a11 = 0;
u64x a12 = 0;
u64x a13 = 0;
u64x a14 = 0;
u64x a20 = 0;
u64x a21 = 0;
u64x a22 = 0;
u64x a23 = 0;
u64x a24 = 0;
u64x a30 = 0;
u64x a31 = 0;
u64x a32 = 0x8000000000000000;
u64x a33 = 0;
u64x a34 = 0;
u64x a40 = 0;
u64x a41 = 0;
u64x a42 = 0;
u64x a43 = 0;
u64x a44 = 0;
#define Rho_Pi(ad,r) \
bc0 = ad; \
ad = rotl64 (t, r); \
t = bc0; \
#ifdef _unroll
#pragma unroll
#endif
for (int round = 0; round < KECCAK_ROUNDS - 1; round++)
{
// Theta
u64x bc0 = a00 ^ a10 ^ a20 ^ a30 ^ a40;
u64x bc1 = a01 ^ a11 ^ a21 ^ a31 ^ a41;
u64x bc2 = a02 ^ a12 ^ a22 ^ a32 ^ a42;
u64x bc3 = a03 ^ a13 ^ a23 ^ a33 ^ a43;
u64x bc4 = a04 ^ a14 ^ a24 ^ a34 ^ a44;
u64x t;
t = bc4 ^ rotl64 (bc1, 1); a00 ^= t; a10 ^= t; a20 ^= t; a30 ^= t; a40 ^= t;
t = bc0 ^ rotl64 (bc2, 1); a01 ^= t; a11 ^= t; a21 ^= t; a31 ^= t; a41 ^= t;
t = bc1 ^ rotl64 (bc3, 1); a02 ^= t; a12 ^= t; a22 ^= t; a32 ^= t; a42 ^= t;
t = bc2 ^ rotl64 (bc4, 1); a03 ^= t; a13 ^= t; a23 ^= t; a33 ^= t; a43 ^= t;
t = bc3 ^ rotl64 (bc0, 1); a04 ^= t; a14 ^= t; a24 ^= t; a34 ^= t; a44 ^= t;
// Rho Pi
t = a01;
Rho_Pi (a20, 1);
Rho_Pi (a12, 3);
Rho_Pi (a21, 6);
Rho_Pi (a32, 10);
Rho_Pi (a33, 15);
Rho_Pi (a03, 21);
Rho_Pi (a10, 28);
Rho_Pi (a31, 36);
Rho_Pi (a13, 45);
Rho_Pi (a41, 55);
Rho_Pi (a44, 2);
Rho_Pi (a04, 14);
Rho_Pi (a30, 27);
Rho_Pi (a43, 41);
Rho_Pi (a34, 56);
Rho_Pi (a23, 8);
Rho_Pi (a22, 25);
Rho_Pi (a02, 43);
Rho_Pi (a40, 62);
Rho_Pi (a24, 18);
Rho_Pi (a42, 39);
Rho_Pi (a14, 61);
Rho_Pi (a11, 20);
Rho_Pi (a01, 44);
// Chi
bc0 = a00; bc1 = a01; bc2 = a02; bc3 = a03; bc4 = a04;
a00 ^= ~bc1 & bc2; a01 ^= ~bc2 & bc3; a02 ^= ~bc3 & bc4; a03 ^= ~bc4 & bc0; a04 ^= ~bc0 & bc1;
bc0 = a10; bc1 = a11; bc2 = a12; bc3 = a13; bc4 = a14;
a10 ^= ~bc1 & bc2; a11 ^= ~bc2 & bc3; a12 ^= ~bc3 & bc4; a13 ^= ~bc4 & bc0; a14 ^= ~bc0 & bc1;
bc0 = a20; bc1 = a21; bc2 = a22; bc3 = a23; bc4 = a24;
a20 ^= ~bc1 & bc2; a21 ^= ~bc2 & bc3; a22 ^= ~bc3 & bc4; a23 ^= ~bc4 & bc0; a24 ^= ~bc0 & bc1;
bc0 = a30; bc1 = a31; bc2 = a32; bc3 = a33; bc4 = a34;
a30 ^= ~bc1 & bc2; a31 ^= ~bc2 & bc3; a32 ^= ~bc3 & bc4; a33 ^= ~bc4 & bc0; a34 ^= ~bc0 & bc1;
bc0 = a40; bc1 = a41; bc2 = a42; bc3 = a43; bc4 = a44;
a40 ^= ~bc1 & bc2; a41 ^= ~bc2 & bc3; a42 ^= ~bc3 & bc4; a43 ^= ~bc4 & bc0; a44 ^= ~bc0 & bc1;
// Iota
a00 ^= keccakf_rndc[round];
}
// Theta
u64x bc0 = a00 ^ a10 ^ a20 ^ a30 ^ a40;
u64x bc1 = a01 ^ a11 ^ a21 ^ a31 ^ a41;
u64x bc2 = a02 ^ a12 ^ a22 ^ a32 ^ a42;
u64x bc3 = a03 ^ a13 ^ a23 ^ a33 ^ a43;
u64x bc4 = a04 ^ a14 ^ a24 ^ a34 ^ a44;
u64x t;
t = bc4 ^ rotl64 (bc1, 1); a00 ^= t; a10 ^= t; a20 ^= t; a30 ^= t;
t = bc0 ^ rotl64 (bc2, 1); a21 ^= t; a31 ^= t; a41 ^= t;
t = bc1 ^ rotl64 (bc3, 1); a02 ^= t; a12 ^= t; a22 ^= t; a32 ^= t;
t = bc2 ^ rotl64 (bc4, 1); a03 ^= t; a13 ^= t; a23 ^= t; a33 ^= t; a43 ^= t;
t = bc3 ^ rotl64 (bc0, 1); a04 ^= t; a34 ^= t; a44 ^= t;
// Rho Pi
t = a01;
Rho_Pi (a20, 1);
Rho_Pi (a12, 3);
Rho_Pi (a21, 6);
Rho_Pi (a32, 10);
Rho_Pi (a33, 15);
Rho_Pi (a03, 21);
Rho_Pi (a10, 28);
Rho_Pi (a31, 36);
Rho_Pi (a13, 45);
Rho_Pi (a41, 55);
Rho_Pi (a44, 2);
Rho_Pi (a04, 14);
Rho_Pi (a30, 27);
Rho_Pi (a43, 41);
Rho_Pi (a34, 56);
Rho_Pi (a23, 8);
Rho_Pi (a22, 25);
Rho_Pi (a02, 43);
#undef Rho_Pi
bc0 = a00;
bc2 = a02;
bc3 = a03;
bc4 = a04;
a02 ^= ~bc3 & bc4;
a03 ^= ~bc4 & bc0;
const u32x r0 = l32_from_64 (a03);
const u32x r1 = 0;
const u32x r2 = l32_from_64 (a02);
const u32x r3 = h32_from_64 (a02);
COMPARE_M_SIMD (r0, r1, r2, r3);
}
}
__kernel void m17700_m08 (__global pw_t *pws, __constant const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
{
}
__kernel void m17700_m16 (__global pw_t *pws, __constant const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
{
}
__kernel void m17700_s04 (__global pw_t *pws, __constant const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
{
/**
* modifier
*/
const u64 lid = get_local_id (0);
/**
* base
*/
const u64 gid = get_global_id (0);
if (gid >= gid_max) return;
u32 pw_buf0[4];
u32 pw_buf1[4];
pw_buf0[0] = pws[gid].i[0];
pw_buf0[1] = pws[gid].i[1];
pw_buf0[2] = pws[gid].i[2];
pw_buf0[3] = pws[gid].i[3];
pw_buf1[0] = pws[gid].i[4];
pw_buf1[1] = pws[gid].i[5];
pw_buf1[2] = pws[gid].i[6];
pw_buf1[3] = pws[gid].i[7];
const u32 pw_len = pws[gid].pw_len;
/**
* digest
*/
const u32 search[4] =
{
digests_buf[digests_offset].digest_buf[DGST_R0],
digests_buf[digests_offset].digest_buf[DGST_R1],
digests_buf[digests_offset].digest_buf[DGST_R2],
digests_buf[digests_offset].digest_buf[DGST_R3]
};
/**
* loop
*/
for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE)
{
u32x w0[4] = { 0 };
u32x w1[4] = { 0 };
u32x w2[4] = { 0 };
u32x w3[4] = { 0 };
const u32x out_len = apply_rules_vect (pw_buf0, pw_buf1, pw_len, rules_buf, il_pos, w0, w1);
append_0x01_2x4_VV (w0, w1, out_len);
/**
* Keccak
*/
u64x a00 = hl32_to_64 (w0[1], w0[0]);
u64x a01 = hl32_to_64 (w0[3], w0[2]);
u64x a02 = hl32_to_64 (w1[1], w1[0]);
u64x a03 = hl32_to_64 (w1[3], w1[2]);
u64x a04 = 0;
u64x a10 = 0;
u64x a11 = 0;
u64x a12 = 0;
u64x a13 = 0;
u64x a14 = 0;
u64x a20 = 0;
u64x a21 = 0;
u64x a22 = 0;
u64x a23 = 0;
u64x a24 = 0;
u64x a30 = 0;
u64x a31 = 0;
u64x a32 = 0x8000000000000000;
u64x a33 = 0;
u64x a34 = 0;
u64x a40 = 0;
u64x a41 = 0;
u64x a42 = 0;
u64x a43 = 0;
u64x a44 = 0;
#define Rho_Pi(ad,r) \
bc0 = ad; \
ad = rotl64 (t, r); \
t = bc0; \
#ifdef _unroll
#pragma unroll
#endif
for (int round = 0; round < KECCAK_ROUNDS - 1; round++)
{
// Theta
u64x bc0 = a00 ^ a10 ^ a20 ^ a30 ^ a40;
u64x bc1 = a01 ^ a11 ^ a21 ^ a31 ^ a41;
u64x bc2 = a02 ^ a12 ^ a22 ^ a32 ^ a42;
u64x bc3 = a03 ^ a13 ^ a23 ^ a33 ^ a43;
u64x bc4 = a04 ^ a14 ^ a24 ^ a34 ^ a44;
u64x t;
t = bc4 ^ rotl64 (bc1, 1); a00 ^= t; a10 ^= t; a20 ^= t; a30 ^= t; a40 ^= t;
t = bc0 ^ rotl64 (bc2, 1); a01 ^= t; a11 ^= t; a21 ^= t; a31 ^= t; a41 ^= t;
t = bc1 ^ rotl64 (bc3, 1); a02 ^= t; a12 ^= t; a22 ^= t; a32 ^= t; a42 ^= t;
t = bc2 ^ rotl64 (bc4, 1); a03 ^= t; a13 ^= t; a23 ^= t; a33 ^= t; a43 ^= t;
t = bc3 ^ rotl64 (bc0, 1); a04 ^= t; a14 ^= t; a24 ^= t; a34 ^= t; a44 ^= t;
// Rho Pi
t = a01;
Rho_Pi (a20, 1);
Rho_Pi (a12, 3);
Rho_Pi (a21, 6);
Rho_Pi (a32, 10);
Rho_Pi (a33, 15);
Rho_Pi (a03, 21);
Rho_Pi (a10, 28);
Rho_Pi (a31, 36);
Rho_Pi (a13, 45);
Rho_Pi (a41, 55);
Rho_Pi (a44, 2);
Rho_Pi (a04, 14);
Rho_Pi (a30, 27);
Rho_Pi (a43, 41);
Rho_Pi (a34, 56);
Rho_Pi (a23, 8);
Rho_Pi (a22, 25);
Rho_Pi (a02, 43);
Rho_Pi (a40, 62);
Rho_Pi (a24, 18);
Rho_Pi (a42, 39);
Rho_Pi (a14, 61);
Rho_Pi (a11, 20);
Rho_Pi (a01, 44);
// Chi
bc0 = a00; bc1 = a01; bc2 = a02; bc3 = a03; bc4 = a04;
a00 ^= ~bc1 & bc2; a01 ^= ~bc2 & bc3; a02 ^= ~bc3 & bc4; a03 ^= ~bc4 & bc0; a04 ^= ~bc0 & bc1;
bc0 = a10; bc1 = a11; bc2 = a12; bc3 = a13; bc4 = a14;
a10 ^= ~bc1 & bc2; a11 ^= ~bc2 & bc3; a12 ^= ~bc3 & bc4; a13 ^= ~bc4 & bc0; a14 ^= ~bc0 & bc1;
bc0 = a20; bc1 = a21; bc2 = a22; bc3 = a23; bc4 = a24;
a20 ^= ~bc1 & bc2; a21 ^= ~bc2 & bc3; a22 ^= ~bc3 & bc4; a23 ^= ~bc4 & bc0; a24 ^= ~bc0 & bc1;
bc0 = a30; bc1 = a31; bc2 = a32; bc3 = a33; bc4 = a34;
a30 ^= ~bc1 & bc2; a31 ^= ~bc2 & bc3; a32 ^= ~bc3 & bc4; a33 ^= ~bc4 & bc0; a34 ^= ~bc0 & bc1;
bc0 = a40; bc1 = a41; bc2 = a42; bc3 = a43; bc4 = a44;
a40 ^= ~bc1 & bc2; a41 ^= ~bc2 & bc3; a42 ^= ~bc3 & bc4; a43 ^= ~bc4 & bc0; a44 ^= ~bc0 & bc1;
// Iota
a00 ^= keccakf_rndc[round];
}
// Theta
u64x bc0 = a00 ^ a10 ^ a20 ^ a30 ^ a40;
u64x bc1 = a01 ^ a11 ^ a21 ^ a31 ^ a41;
u64x bc2 = a02 ^ a12 ^ a22 ^ a32 ^ a42;
u64x bc3 = a03 ^ a13 ^ a23 ^ a33 ^ a43;
u64x bc4 = a04 ^ a14 ^ a24 ^ a34 ^ a44;
u64x t;
t = bc4 ^ rotl64 (bc1, 1); a00 ^= t; a10 ^= t; a20 ^= t; a30 ^= t;
t = bc0 ^ rotl64 (bc2, 1); a21 ^= t; a31 ^= t; a41 ^= t;
t = bc1 ^ rotl64 (bc3, 1); a02 ^= t; a12 ^= t; a22 ^= t; a32 ^= t;
t = bc2 ^ rotl64 (bc4, 1); a03 ^= t; a13 ^= t; a23 ^= t; a33 ^= t; a43 ^= t;
t = bc3 ^ rotl64 (bc0, 1); a04 ^= t; a34 ^= t; a44 ^= t;
// Rho Pi
t = a01;
Rho_Pi (a20, 1);
Rho_Pi (a12, 3);
Rho_Pi (a21, 6);
Rho_Pi (a32, 10);
Rho_Pi (a33, 15);
Rho_Pi (a03, 21);
Rho_Pi (a10, 28);
Rho_Pi (a31, 36);
Rho_Pi (a13, 45);
Rho_Pi (a41, 55);
Rho_Pi (a44, 2);
Rho_Pi (a04, 14);
Rho_Pi (a30, 27);
Rho_Pi (a43, 41);
Rho_Pi (a34, 56);
Rho_Pi (a23, 8);
Rho_Pi (a22, 25);
Rho_Pi (a02, 43);
#undef Rho_Pi
bc0 = a00;
bc2 = a02;
bc3 = a03;
bc4 = a04;
a02 ^= ~bc3 & bc4;
a03 ^= ~bc4 & bc0;
const u32x r0 = l32_from_64 (a03);
const u32x r1 = 0;
const u32x r2 = l32_from_64 (a02);
const u32x r3 = h32_from_64 (a02);
COMPARE_S_SIMD (r0, r1, r2, r3);
}
}
__kernel void m17700_s08 (__global pw_t *pws, __constant const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
{
}
__kernel void m17700_s16 (__global pw_t *pws, __constant const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
{
}

@ -0,0 +1,594 @@
/**
* Author......: See docs/credits.txt
* License.....: MIT
*/
#define NEW_SIMD_CODE
#include "inc_vendor.cl"
#include "inc_hash_constants.h"
#include "inc_hash_functions.cl"
#include "inc_types.cl"
#include "inc_common.cl"
#include "inc_simd.cl"
__constant u64a keccakf_rndc[24] =
{
0x0000000000000001, 0x0000000000008082, 0x800000000000808a,
0x8000000080008000, 0x000000000000808b, 0x0000000080000001,
0x8000000080008081, 0x8000000000008009, 0x000000000000008a,
0x0000000000000088, 0x0000000080008009, 0x000000008000000a,
0x000000008000808b, 0x800000000000008b, 0x8000000000008089,
0x8000000000008003, 0x8000000000008002, 0x8000000000000080,
0x000000000000800a, 0x800000008000000a, 0x8000000080008081,
0x8000000000008080, 0x0000000080000001, 0x8000000080008008
};
#ifndef KECCAK_ROUNDS
#define KECCAK_ROUNDS 24
#endif
__kernel void m17700_m04 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
{
/**
* modifier
*/
const u64 lid = get_local_id (0);
/**
* base
*/
const u64 gid = get_global_id (0);
if (gid >= gid_max) return;
u32 pw_buf0[4];
u32 pw_buf1[4];
pw_buf0[0] = pws[gid].i[0];
pw_buf0[1] = pws[gid].i[1];
pw_buf0[2] = pws[gid].i[2];
pw_buf0[3] = pws[gid].i[3];
pw_buf1[0] = pws[gid].i[4];
pw_buf1[1] = pws[gid].i[5];
pw_buf1[2] = pws[gid].i[6];
pw_buf1[3] = pws[gid].i[7];
const u32 pw_l_len = pws[gid].pw_len;
/**
* loop
*/
for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE)
{
const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos);
const u32x pw_len = pw_l_len + pw_r_len;
/**
* concat password candidate
*/
u32x wordl0[4] = { 0 };
u32x wordl1[4] = { 0 };
u32x wordl2[4] = { 0 };
u32x wordl3[4] = { 0 };
wordl0[0] = pw_buf0[0];
wordl0[1] = pw_buf0[1];
wordl0[2] = pw_buf0[2];
wordl0[3] = pw_buf0[3];
wordl1[0] = pw_buf1[0];
wordl1[1] = pw_buf1[1];
wordl1[2] = pw_buf1[2];
wordl1[3] = pw_buf1[3];
u32x wordr0[4] = { 0 };
u32x wordr1[4] = { 0 };
u32x wordr2[4] = { 0 };
u32x wordr3[4] = { 0 };
wordr0[0] = ix_create_combt (combs_buf, il_pos, 0);
wordr0[1] = ix_create_combt (combs_buf, il_pos, 1);
wordr0[2] = ix_create_combt (combs_buf, il_pos, 2);
wordr0[3] = ix_create_combt (combs_buf, il_pos, 3);
wordr1[0] = ix_create_combt (combs_buf, il_pos, 4);
wordr1[1] = ix_create_combt (combs_buf, il_pos, 5);
wordr1[2] = ix_create_combt (combs_buf, il_pos, 6);
wordr1[3] = ix_create_combt (combs_buf, il_pos, 7);
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
{
switch_buffer_by_offset_le_VV (wordr0, wordr1, wordr2, wordr3, pw_l_len);
}
else
{
switch_buffer_by_offset_le_VV (wordl0, wordl1, wordl2, wordl3, pw_r_len);
}
u32x w0[4];
u32x w1[4];
u32x w2[4];
u32x w3[4];
w0[0] = wordl0[0] | wordr0[0];
w0[1] = wordl0[1] | wordr0[1];
w0[2] = wordl0[2] | wordr0[2];
w0[3] = wordl0[3] | wordr0[3];
w1[0] = wordl1[0] | wordr1[0];
w1[1] = wordl1[1] | wordr1[1];
w1[2] = wordl1[2] | wordr1[2];
w1[3] = wordl1[3] | wordr1[3];
w2[0] = wordl2[0] | wordr2[0];
w2[1] = wordl2[1] | wordr2[1];
w2[2] = wordl2[2] | wordr2[2];
w2[3] = wordl2[3] | wordr2[3];
w3[0] = wordl3[0] | wordr3[0];
w3[1] = wordl3[1] | wordr3[1];
w3[2] = wordl3[2] | wordr3[2];
w3[3] = wordl3[3] | wordr3[3];
/**
* Keccak
*/
u64x a00 = hl32_to_64 (w0[1], w0[0]);
u64x a01 = hl32_to_64 (w0[3], w0[2]);
u64x a02 = hl32_to_64 (w1[1], w1[0]);
u64x a03 = hl32_to_64 (w1[3], w1[2]);
u64x a04 = hl32_to_64 (w2[1], w2[0]);
u64x a10 = hl32_to_64 (w2[3], w2[2]);
u64x a11 = hl32_to_64 (w3[1], w3[0]);
u64x a12 = hl32_to_64 (w3[3], w3[2]);
u64x a13 = 0;
u64x a14 = 0;
u64x a20 = 0;
u64x a21 = 0;
u64x a22 = 0;
u64x a23 = 0;
u64x a24 = 0;
u64x a30 = 0;
u64x a31 = 0;
u64x a32 = 0x8000000000000000;
u64x a33 = 0;
u64x a34 = 0;
u64x a40 = 0;
u64x a41 = 0;
u64x a42 = 0;
u64x a43 = 0;
u64x a44 = 0;
#define Rho_Pi(ad,r) \
bc0 = ad; \
ad = rotl64 (t, r); \
t = bc0; \
#ifdef _unroll
#pragma unroll
#endif
for (int round = 0; round < KECCAK_ROUNDS - 1; round++)
{
// Theta
u64x bc0 = a00 ^ a10 ^ a20 ^ a30 ^ a40;
u64x bc1 = a01 ^ a11 ^ a21 ^ a31 ^ a41;
u64x bc2 = a02 ^ a12 ^ a22 ^ a32 ^ a42;
u64x bc3 = a03 ^ a13 ^ a23 ^ a33 ^ a43;
u64x bc4 = a04 ^ a14 ^ a24 ^ a34 ^ a44;
u64x t;
t = bc4 ^ rotl64 (bc1, 1); a00 ^= t; a10 ^= t; a20 ^= t; a30 ^= t; a40 ^= t;
t = bc0 ^ rotl64 (bc2, 1); a01 ^= t; a11 ^= t; a21 ^= t; a31 ^= t; a41 ^= t;
t = bc1 ^ rotl64 (bc3, 1); a02 ^= t; a12 ^= t; a22 ^= t; a32 ^= t; a42 ^= t;
t = bc2 ^ rotl64 (bc4, 1); a03 ^= t; a13 ^= t; a23 ^= t; a33 ^= t; a43 ^= t;
t = bc3 ^ rotl64 (bc0, 1); a04 ^= t; a14 ^= t; a24 ^= t; a34 ^= t; a44 ^= t;
// Rho Pi
t = a01;
Rho_Pi (a20, 1);
Rho_Pi (a12, 3);
Rho_Pi (a21, 6);
Rho_Pi (a32, 10);
Rho_Pi (a33, 15);
Rho_Pi (a03, 21);
Rho_Pi (a10, 28);
Rho_Pi (a31, 36);
Rho_Pi (a13, 45);
Rho_Pi (a41, 55);
Rho_Pi (a44, 2);
Rho_Pi (a04, 14);
Rho_Pi (a30, 27);
Rho_Pi (a43, 41);
Rho_Pi (a34, 56);
Rho_Pi (a23, 8);
Rho_Pi (a22, 25);
Rho_Pi (a02, 43);
Rho_Pi (a40, 62);
Rho_Pi (a24, 18);
Rho_Pi (a42, 39);
Rho_Pi (a14, 61);
Rho_Pi (a11, 20);
Rho_Pi (a01, 44);
// Chi
bc0 = a00; bc1 = a01; bc2 = a02; bc3 = a03; bc4 = a04;
a00 ^= ~bc1 & bc2; a01 ^= ~bc2 & bc3; a02 ^= ~bc3 & bc4; a03 ^= ~bc4 & bc0; a04 ^= ~bc0 & bc1;
bc0 = a10; bc1 = a11; bc2 = a12; bc3 = a13; bc4 = a14;
a10 ^= ~bc1 & bc2; a11 ^= ~bc2 & bc3; a12 ^= ~bc3 & bc4; a13 ^= ~bc4 & bc0; a14 ^= ~bc0 & bc1;
bc0 = a20; bc1 = a21; bc2 = a22; bc3 = a23; bc4 = a24;
a20 ^= ~bc1 & bc2; a21 ^= ~bc2 & bc3; a22 ^= ~bc3 & bc4; a23 ^= ~bc4 & bc0; a24 ^= ~bc0 & bc1;
bc0 = a30; bc1 = a31; bc2 = a32; bc3 = a33; bc4 = a34;
a30 ^= ~bc1 & bc2; a31 ^= ~bc2 & bc3; a32 ^= ~bc3 & bc4; a33 ^= ~bc4 & bc0; a34 ^= ~bc0 & bc1;
bc0 = a40; bc1 = a41; bc2 = a42; bc3 = a43; bc4 = a44;
a40 ^= ~bc1 & bc2; a41 ^= ~bc2 & bc3; a42 ^= ~bc3 & bc4; a43 ^= ~bc4 & bc0; a44 ^= ~bc0 & bc1;
// Iota
a00 ^= keccakf_rndc[round];
}
// Theta
u64x bc0 = a00 ^ a10 ^ a20 ^ a30 ^ a40;
u64x bc1 = a01 ^ a11 ^ a21 ^ a31 ^ a41;
u64x bc2 = a02 ^ a12 ^ a22 ^ a32 ^ a42;
u64x bc3 = a03 ^ a13 ^ a23 ^ a33 ^ a43;
u64x bc4 = a04 ^ a14 ^ a24 ^ a34 ^ a44;
u64x t;
t = bc4 ^ rotl64 (bc1, 1); a00 ^= t; a10 ^= t; a20 ^= t; a30 ^= t;
t = bc0 ^ rotl64 (bc2, 1); a21 ^= t; a31 ^= t; a41 ^= t;
t = bc1 ^ rotl64 (bc3, 1); a02 ^= t; a12 ^= t; a22 ^= t; a32 ^= t;
t = bc2 ^ rotl64 (bc4, 1); a03 ^= t; a13 ^= t; a23 ^= t; a33 ^= t; a43 ^= t;
t = bc3 ^ rotl64 (bc0, 1); a04 ^= t; a34 ^= t; a44 ^= t;
// Rho Pi
t = a01;
Rho_Pi (a20, 1);
Rho_Pi (a12, 3);
Rho_Pi (a21, 6);
Rho_Pi (a32, 10);
Rho_Pi (a33, 15);
Rho_Pi (a03, 21);
Rho_Pi (a10, 28);
Rho_Pi (a31, 36);
Rho_Pi (a13, 45);
Rho_Pi (a41, 55);
Rho_Pi (a44, 2);
Rho_Pi (a04, 14);
Rho_Pi (a30, 27);
Rho_Pi (a43, 41);
Rho_Pi (a34, 56);
Rho_Pi (a23, 8);
Rho_Pi (a22, 25);
Rho_Pi (a02, 43);
#undef Rho_Pi
bc0 = a00;
bc2 = a02;
bc3 = a03;
bc4 = a04;
a02 ^= ~bc3 & bc4;
a03 ^= ~bc4 & bc0;
const u32x r0 = l32_from_64 (a03);
const u32x r1 = 0;
const u32x r2 = l32_from_64 (a02);
const u32x r3 = h32_from_64 (a02);
COMPARE_M_SIMD (r0, r1, r2, r3);
}
}
__kernel void m17700_m08 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
{
}
__kernel void m17700_m16 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
{
}
__kernel void m17700_s04 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
{
/**
* modifier
*/
const u64 lid = get_local_id (0);
/**
* base
*/
const u64 gid = get_global_id (0);
if (gid >= gid_max) return;
u32 pw_buf0[4];
u32 pw_buf1[4];
pw_buf0[0] = pws[gid].i[0];
pw_buf0[1] = pws[gid].i[1];
pw_buf0[2] = pws[gid].i[2];
pw_buf0[3] = pws[gid].i[3];
pw_buf1[0] = pws[gid].i[4];
pw_buf1[1] = pws[gid].i[5];
pw_buf1[2] = pws[gid].i[6];
pw_buf1[3] = pws[gid].i[7];
const u32 pw_l_len = pws[gid].pw_len;
/**
* digest
*/
const u32 search[4] =
{
digests_buf[digests_offset].digest_buf[DGST_R0],
digests_buf[digests_offset].digest_buf[DGST_R1],
digests_buf[digests_offset].digest_buf[DGST_R2],
digests_buf[digests_offset].digest_buf[DGST_R3]
};
/**
* loop
*/
for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE)
{
const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos);
const u32x pw_len = pw_l_len + pw_r_len;
/**
* concat password candidate
*/
u32x wordl0[4] = { 0 };
u32x wordl1[4] = { 0 };
u32x wordl2[4] = { 0 };
u32x wordl3[4] = { 0 };
wordl0[0] = pw_buf0[0];
wordl0[1] = pw_buf0[1];
wordl0[2] = pw_buf0[2];
wordl0[3] = pw_buf0[3];
wordl1[0] = pw_buf1[0];
wordl1[1] = pw_buf1[1];
wordl1[2] = pw_buf1[2];
wordl1[3] = pw_buf1[3];
u32x wordr0[4] = { 0 };
u32x wordr1[4] = { 0 };
u32x wordr2[4] = { 0 };
u32x wordr3[4] = { 0 };
wordr0[0] = ix_create_combt (combs_buf, il_pos, 0);
wordr0[1] = ix_create_combt (combs_buf, il_pos, 1);
wordr0[2] = ix_create_combt (combs_buf, il_pos, 2);
wordr0[3] = ix_create_combt (combs_buf, il_pos, 3);
wordr1[0] = ix_create_combt (combs_buf, il_pos, 4);
wordr1[1] = ix_create_combt (combs_buf, il_pos, 5);
wordr1[2] = ix_create_combt (combs_buf, il_pos, 6);
wordr1[3] = ix_create_combt (combs_buf, il_pos, 7);
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
{
switch_buffer_by_offset_le_VV (wordr0, wordr1, wordr2, wordr3, pw_l_len);
}
else
{
switch_buffer_by_offset_le_VV (wordl0, wordl1, wordl2, wordl3, pw_r_len);
}
u32x w0[4];
u32x w1[4];
u32x w2[4];
u32x w3[4];
w0[0] = wordl0[0] | wordr0[0];
w0[1] = wordl0[1] | wordr0[1];
w0[2] = wordl0[2] | wordr0[2];
w0[3] = wordl0[3] | wordr0[3];
w1[0] = wordl1[0] | wordr1[0];
w1[1] = wordl1[1] | wordr1[1];
w1[2] = wordl1[2] | wordr1[2];
w1[3] = wordl1[3] | wordr1[3];
w2[0] = wordl2[0] | wordr2[0];
w2[1] = wordl2[1] | wordr2[1];
w2[2] = wordl2[2] | wordr2[2];
w2[3] = wordl2[3] | wordr2[3];
w3[0] = wordl3[0] | wordr3[0];
w3[1] = wordl3[1] | wordr3[1];
w3[2] = wordl3[2] | wordr3[2];
w3[3] = wordl3[3] | wordr3[3];
/**
* Keccak
*/
u64x a00 = hl32_to_64 (w0[1], w0[0]);
u64x a01 = hl32_to_64 (w0[3], w0[2]);
u64x a02 = hl32_to_64 (w1[1], w1[0]);
u64x a03 = hl32_to_64 (w1[3], w1[2]);
u64x a04 = hl32_to_64 (w2[1], w2[0]);
u64x a10 = hl32_to_64 (w2[3], w2[2]);
u64x a11 = hl32_to_64 (w3[1], w3[0]);
u64x a12 = hl32_to_64 (w3[3], w3[2]);
u64x a13 = 0;
u64x a14 = 0;
u64x a20 = 0;
u64x a21 = 0;
u64x a22 = 0;
u64x a23 = 0;
u64x a24 = 0;
u64x a30 = 0;
u64x a31 = 0;
u64x a32 = 0x8000000000000000;
u64x a33 = 0;
u64x a34 = 0;
u64x a40 = 0;
u64x a41 = 0;
u64x a42 = 0;
u64x a43 = 0;
u64x a44 = 0;
#define Rho_Pi(ad,r) \
bc0 = ad; \
ad = rotl64 (t, r); \
t = bc0; \
#ifdef _unroll
#pragma unroll
#endif
for (int round = 0; round < KECCAK_ROUNDS - 1; round++)
{
// Theta
u64x bc0 = a00 ^ a10 ^ a20 ^ a30 ^ a40;
u64x bc1 = a01 ^ a11 ^ a21 ^ a31 ^ a41;
u64x bc2 = a02 ^ a12 ^ a22 ^ a32 ^ a42;
u64x bc3 = a03 ^ a13 ^ a23 ^ a33 ^ a43;
u64x bc4 = a04 ^ a14 ^ a24 ^ a34 ^ a44;
u64x t;
t = bc4 ^ rotl64 (bc1, 1); a00 ^= t; a10 ^= t; a20 ^= t; a30 ^= t; a40 ^= t;
t = bc0 ^ rotl64 (bc2, 1); a01 ^= t; a11 ^= t; a21 ^= t; a31 ^= t; a41 ^= t;
t = bc1 ^ rotl64 (bc3, 1); a02 ^= t; a12 ^= t; a22 ^= t; a32 ^= t; a42 ^= t;
t = bc2 ^ rotl64 (bc4, 1); a03 ^= t; a13 ^= t; a23 ^= t; a33 ^= t; a43 ^= t;
t = bc3 ^ rotl64 (bc0, 1); a04 ^= t; a14 ^= t; a24 ^= t; a34 ^= t; a44 ^= t;
// Rho Pi
t = a01;
Rho_Pi (a20, 1);
Rho_Pi (a12, 3);
Rho_Pi (a21, 6);
Rho_Pi (a32, 10);
Rho_Pi (a33, 15);
Rho_Pi (a03, 21);
Rho_Pi (a10, 28);
Rho_Pi (a31, 36);
Rho_Pi (a13, 45);
Rho_Pi (a41, 55);
Rho_Pi (a44, 2);
Rho_Pi (a04, 14);
Rho_Pi (a30, 27);
Rho_Pi (a43, 41);
Rho_Pi (a34, 56);
Rho_Pi (a23, 8);
Rho_Pi (a22, 25);
Rho_Pi (a02, 43);
Rho_Pi (a40, 62);
Rho_Pi (a24, 18);
Rho_Pi (a42, 39);
Rho_Pi (a14, 61);
Rho_Pi (a11, 20);
Rho_Pi (a01, 44);
// Chi
bc0 = a00; bc1 = a01; bc2 = a02; bc3 = a03; bc4 = a04;
a00 ^= ~bc1 & bc2; a01 ^= ~bc2 & bc3; a02 ^= ~bc3 & bc4; a03 ^= ~bc4 & bc0; a04 ^= ~bc0 & bc1;
bc0 = a10; bc1 = a11; bc2 = a12; bc3 = a13; bc4 = a14;
a10 ^= ~bc1 & bc2; a11 ^= ~bc2 & bc3; a12 ^= ~bc3 & bc4; a13 ^= ~bc4 & bc0; a14 ^= ~bc0 & bc1;
bc0 = a20; bc1 = a21; bc2 = a22; bc3 = a23; bc4 = a24;
a20 ^= ~bc1 & bc2; a21 ^= ~bc2 & bc3; a22 ^= ~bc3 & bc4; a23 ^= ~bc4 & bc0; a24 ^= ~bc0 & bc1;
bc0 = a30; bc1 = a31; bc2 = a32; bc3 = a33; bc4 = a34;
a30 ^= ~bc1 & bc2; a31 ^= ~bc2 & bc3; a32 ^= ~bc3 & bc4; a33 ^= ~bc4 & bc0; a34 ^= ~bc0 & bc1;
bc0 = a40; bc1 = a41; bc2 = a42; bc3 = a43; bc4 = a44;
a40 ^= ~bc1 & bc2; a41 ^= ~bc2 & bc3; a42 ^= ~bc3 & bc4; a43 ^= ~bc4 & bc0; a44 ^= ~bc0 & bc1;
// Iota
a00 ^= keccakf_rndc[round];
}
// Theta
u64x bc0 = a00 ^ a10 ^ a20 ^ a30 ^ a40;
u64x bc1 = a01 ^ a11 ^ a21 ^ a31 ^ a41;
u64x bc2 = a02 ^ a12 ^ a22 ^ a32 ^ a42;
u64x bc3 = a03 ^ a13 ^ a23 ^ a33 ^ a43;
u64x bc4 = a04 ^ a14 ^ a24 ^ a34 ^ a44;
u64x t;
t = bc4 ^ rotl64 (bc1, 1); a00 ^= t; a10 ^= t; a20 ^= t; a30 ^= t;
t = bc0 ^ rotl64 (bc2, 1); a21 ^= t; a31 ^= t; a41 ^= t;
t = bc1 ^ rotl64 (bc3, 1); a02 ^= t; a12 ^= t; a22 ^= t; a32 ^= t;
t = bc2 ^ rotl64 (bc4, 1); a03 ^= t; a13 ^= t; a23 ^= t; a33 ^= t; a43 ^= t;
t = bc3 ^ rotl64 (bc0, 1); a04 ^= t; a34 ^= t; a44 ^= t;
// Rho Pi
t = a01;
Rho_Pi (a20, 1);
Rho_Pi (a12, 3);
Rho_Pi (a21, 6);
Rho_Pi (a32, 10);
Rho_Pi (a33, 15);
Rho_Pi (a03, 21);
Rho_Pi (a10, 28);
Rho_Pi (a31, 36);
Rho_Pi (a13, 45);
Rho_Pi (a41, 55);
Rho_Pi (a44, 2);
Rho_Pi (a04, 14);
Rho_Pi (a30, 27);
Rho_Pi (a43, 41);
Rho_Pi (a34, 56);
Rho_Pi (a23, 8);
Rho_Pi (a22, 25);
Rho_Pi (a02, 43);
#undef Rho_Pi
bc0 = a00;
bc2 = a02;
bc3 = a03;
bc4 = a04;
a02 ^= ~bc3 & bc4;
a03 ^= ~bc4 & bc0;
const u32x r0 = l32_from_64 (a03);
const u32x r1 = 0;
const u32x r2 = l32_from_64 (a02);
const u32x r3 = h32_from_64 (a02);
COMPARE_S_SIMD (r0, r1, r2, r3);
}
}
__kernel void m17700_s08 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
{
}
__kernel void m17700_s16 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
{
}

@ -0,0 +1,695 @@
/**
* Author......: See docs/credits.txt
* License.....: MIT
*/
#define NEW_SIMD_CODE
#include "inc_vendor.cl"
#include "inc_hash_constants.h"
#include "inc_hash_functions.cl"
#include "inc_types.cl"
#include "inc_common.cl"
#include "inc_simd.cl"
__constant u64a keccakf_rndc[24] =
{
0x0000000000000001, 0x0000000000008082, 0x800000000000808a,
0x8000000080008000, 0x000000000000808b, 0x0000000080000001,
0x8000000080008081, 0x8000000000008009, 0x000000000000008a,
0x0000000000000088, 0x0000000080008009, 0x000000008000000a,
0x000000008000808b, 0x800000000000008b, 0x8000000000008089,
0x8000000000008003, 0x8000000000008002, 0x8000000000000080,
0x000000000000800a, 0x800000008000000a, 0x8000000080008081,
0x8000000000008080, 0x0000000080000001, 0x8000000080008008
};
#ifndef KECCAK_ROUNDS
#define KECCAK_ROUNDS 24
#endif
DECLSPEC void m17300m (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, __global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset)
{
/**
* modifier
*/
const u64 gid = get_global_id (0);
const u64 lid = get_local_id (0);
/**
* loop
*/
u32 w0l = w0[0];
for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE)
{
const u32x w0r = ix_create_bft (bfs_buf, il_pos);
const u32x w0lr = w0l | w0r;
/**
* Keccak
*/
u64x a00 = hl32_to_64 (w0[1], w0lr);
u64x a01 = hl32_to_64 (w0[3], w0[2]);
u64x a02 = hl32_to_64 (w1[1], w1[0]);
u64x a03 = hl32_to_64 (w1[3], w1[2]);
u64x a04 = hl32_to_64 (w2[1], w2[0]);
u64x a10 = hl32_to_64 (w2[3], w2[2]);
u64x a11 = hl32_to_64 (w3[1], w3[0]);
u64x a12 = hl32_to_64 (w3[3], w3[2]);
u64x a13 = 0;
u64x a14 = 0;
u64x a20 = 0;
u64x a21 = 0;
u64x a22 = 0;
u64x a23 = 0;
u64x a24 = 0;
u64x a30 = 0;
u64x a31 = 0;
u64x a32 = 0x8000000000000000;
u64x a33 = 0;
u64x a34 = 0;
u64x a40 = 0;
u64x a41 = 0;
u64x a42 = 0;
u64x a43 = 0;
u64x a44 = 0;
#define Rho_Pi(ad,r) \
bc0 = ad; \
ad = rotl64 (t, r); \
t = bc0; \
#ifdef _unroll
#pragma unroll
#endif
for (int round = 0; round < KECCAK_ROUNDS - 1; round++)
{
// Theta
u64x bc0 = a00 ^ a10 ^ a20 ^ a30 ^ a40;
u64x bc1 = a01 ^ a11 ^ a21 ^ a31 ^ a41;
u64x bc2 = a02 ^ a12 ^ a22 ^ a32 ^ a42;
u64x bc3 = a03 ^ a13 ^ a23 ^ a33 ^ a43;
u64x bc4 = a04 ^ a14 ^ a24 ^ a34 ^ a44;
u64x t;
t = bc4 ^ rotl64 (bc1, 1); a00 ^= t; a10 ^= t; a20 ^= t; a30 ^= t; a40 ^= t;
t = bc0 ^ rotl64 (bc2, 1); a01 ^= t; a11 ^= t; a21 ^= t; a31 ^= t; a41 ^= t;
t = bc1 ^ rotl64 (bc3, 1); a02 ^= t; a12 ^= t; a22 ^= t; a32 ^= t; a42 ^= t;
t = bc2 ^ rotl64 (bc4, 1); a03 ^= t; a13 ^= t; a23 ^= t; a33 ^= t; a43 ^= t;
t = bc3 ^ rotl64 (bc0, 1); a04 ^= t; a14 ^= t; a24 ^= t; a34 ^= t; a44 ^= t;
// Rho Pi
t = a01;
Rho_Pi (a20, 1);
Rho_Pi (a12, 3);
Rho_Pi (a21, 6);
Rho_Pi (a32, 10);
Rho_Pi (a33, 15);
Rho_Pi (a03, 21);
Rho_Pi (a10, 28);
Rho_Pi (a31, 36);
Rho_Pi (a13, 45);
Rho_Pi (a41, 55);
Rho_Pi (a44, 2);
Rho_Pi (a04, 14);
Rho_Pi (a30, 27);
Rho_Pi (a43, 41);
Rho_Pi (a34, 56);
Rho_Pi (a23, 8);
Rho_Pi (a22, 25);
Rho_Pi (a02, 43);
Rho_Pi (a40, 62);
Rho_Pi (a24, 18);
Rho_Pi (a42, 39);
Rho_Pi (a14, 61);
Rho_Pi (a11, 20);
Rho_Pi (a01, 44);
// Chi
bc0 = a00; bc1 = a01; bc2 = a02; bc3 = a03; bc4 = a04;
a00 ^= ~bc1 & bc2; a01 ^= ~bc2 & bc3; a02 ^= ~bc3 & bc4; a03 ^= ~bc4 & bc0; a04 ^= ~bc0 & bc1;
bc0 = a10; bc1 = a11; bc2 = a12; bc3 = a13; bc4 = a14;
a10 ^= ~bc1 & bc2; a11 ^= ~bc2 & bc3; a12 ^= ~bc3 & bc4; a13 ^= ~bc4 & bc0; a14 ^= ~bc0 & bc1;
bc0 = a20; bc1 = a21; bc2 = a22; bc3 = a23; bc4 = a24;
a20 ^= ~bc1 & bc2; a21 ^= ~bc2 & bc3; a22 ^= ~bc3 & bc4; a23 ^= ~bc4 & bc0; a24 ^= ~bc0 & bc1;
bc0 = a30; bc1 = a31; bc2 = a32; bc3 = a33; bc4 = a34;
a30 ^= ~bc1 & bc2; a31 ^= ~bc2 & bc3; a32 ^= ~bc3 & bc4; a33 ^= ~bc4 & bc0; a34 ^= ~bc0 & bc1;
bc0 = a40; bc1 = a41; bc2 = a42; bc3 = a43; bc4 = a44;
a40 ^= ~bc1 & bc2; a41 ^= ~bc2 & bc3; a42 ^= ~bc3 & bc4; a43 ^= ~bc4 & bc0; a44 ^= ~bc0 & bc1;
// Iota
a00 ^= keccakf_rndc[round];
}
// Theta
u64x bc0 = a00 ^ a10 ^ a20 ^ a30 ^ a40;
u64x bc1 = a01 ^ a11 ^ a21 ^ a31 ^ a41;
u64x bc2 = a02 ^ a12 ^ a22 ^ a32 ^ a42;
u64x bc3 = a03 ^ a13 ^ a23 ^ a33 ^ a43;
u64x bc4 = a04 ^ a14 ^ a24 ^ a34 ^ a44;
u64x t;
t = bc4 ^ rotl64 (bc1, 1); a00 ^= t; a10 ^= t; a20 ^= t; a30 ^= t;
t = bc0 ^ rotl64 (bc2, 1); a21 ^= t; a31 ^= t; a41 ^= t;
t = bc1 ^ rotl64 (bc3, 1); a02 ^= t; a12 ^= t; a22 ^= t; a32 ^= t;
t = bc2 ^ rotl64 (bc4, 1); a03 ^= t; a13 ^= t; a23 ^= t; a33 ^= t; a43 ^= t;
t = bc3 ^ rotl64 (bc0, 1); a04 ^= t; a34 ^= t; a44 ^= t;
// Rho Pi
t = a01;
Rho_Pi (a20, 1);
Rho_Pi (a12, 3);
Rho_Pi (a21, 6);
Rho_Pi (a32, 10);
Rho_Pi (a33, 15);
Rho_Pi (a03, 21);
Rho_Pi (a10, 28);
Rho_Pi (a31, 36);
Rho_Pi (a13, 45);
Rho_Pi (a41, 55);
Rho_Pi (a44, 2);
Rho_Pi (a04, 14);
Rho_Pi (a30, 27);
Rho_Pi (a43, 41);
Rho_Pi (a34, 56);
Rho_Pi (a23, 8);
Rho_Pi (a22, 25);
Rho_Pi (a02, 43);
#undef Rho_Pi
bc0 = a00;
bc2 = a02;
bc3 = a03;
bc4 = a04;
a02 ^= ~bc3 & bc4;
a03 ^= ~bc4 & bc0;
const u32x r0 = l32_from_64 (a03);
const u32x r1 = 0;
const u32x r2 = l32_from_64 (a02);
const u32x r3 = h32_from_64 (a02);
COMPARE_M_SIMD (r0, r1, r2, r3);
}
}
DECLSPEC void m17300s (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, __global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset)
{
/**
* modifier
*/
const u64 gid = get_global_id (0);
const u64 lid = get_local_id (0);
/**
* digest
*/
const u32 search[4] =
{
digests_buf[digests_offset].digest_buf[DGST_R0],
digests_buf[digests_offset].digest_buf[DGST_R1],
digests_buf[digests_offset].digest_buf[DGST_R2],
digests_buf[digests_offset].digest_buf[DGST_R3]
};
/**
* loop
*/
u32 w0l = w0[0];
for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE)
{
const u32x w0r = ix_create_bft (bfs_buf, il_pos);
const u32x w0lr = w0l | w0r;
/**
* Keccak
*/
u64x a00 = hl32_to_64 (w0[1], w0lr);
u64x a01 = hl32_to_64 (w0[3], w0[2]);
u64x a02 = hl32_to_64 (w1[1], w1[0]);
u64x a03 = hl32_to_64 (w1[3], w1[2]);
u64x a04 = hl32_to_64 (w2[1], w2[0]);
u64x a10 = hl32_to_64 (w2[3], w2[2]);
u64x a11 = hl32_to_64 (w3[1], w3[0]);
u64x a12 = hl32_to_64 (w3[3], w3[2]);
u64x a13 = 0;
u64x a14 = 0;
u64x a20 = 0;
u64x a21 = 0;
u64x a22 = 0;
u64x a23 = 0;
u64x a24 = 0;
u64x a30 = 0;
u64x a31 = 0;
u64x a32 = 0x8000000000000000;
u64x a33 = 0;
u64x a34 = 0;
u64x a40 = 0;
u64x a41 = 0;
u64x a42 = 0;
u64x a43 = 0;
u64x a44 = 0;
#define Rho_Pi(ad,r) \
bc0 = ad; \
ad = rotl64 (t, r); \
t = bc0; \
#ifdef _unroll
#pragma unroll
#endif
for (int round = 0; round < KECCAK_ROUNDS - 1; round++)
{
// Theta
u64x bc0 = a00 ^ a10 ^ a20 ^ a30 ^ a40;
u64x bc1 = a01 ^ a11 ^ a21 ^ a31 ^ a41;
u64x bc2 = a02 ^ a12 ^ a22 ^ a32 ^ a42;
u64x bc3 = a03 ^ a13 ^ a23 ^ a33 ^ a43;
u64x bc4 = a04 ^ a14 ^ a24 ^ a34 ^ a44;
u64x t;
t = bc4 ^ rotl64 (bc1, 1); a00 ^= t; a10 ^= t; a20 ^= t; a30 ^= t; a40 ^= t;
t = bc0 ^ rotl64 (bc2, 1); a01 ^= t; a11 ^= t; a21 ^= t; a31 ^= t; a41 ^= t;
t = bc1 ^ rotl64 (bc3, 1); a02 ^= t; a12 ^= t; a22 ^= t; a32 ^= t; a42 ^= t;
t = bc2 ^ rotl64 (bc4, 1); a03 ^= t; a13 ^= t; a23 ^= t; a33 ^= t; a43 ^= t;
t = bc3 ^ rotl64 (bc0, 1); a04 ^= t; a14 ^= t; a24 ^= t; a34 ^= t; a44 ^= t;
// Rho Pi
t = a01;
Rho_Pi (a20, 1);
Rho_Pi (a12, 3);
Rho_Pi (a21, 6);
Rho_Pi (a32, 10);
Rho_Pi (a33, 15);
Rho_Pi (a03, 21);
Rho_Pi (a10, 28);
Rho_Pi (a31, 36);
Rho_Pi (a13, 45);
Rho_Pi (a41, 55);
Rho_Pi (a44, 2);
Rho_Pi (a04, 14);
Rho_Pi (a30, 27);
Rho_Pi (a43, 41);
Rho_Pi (a34, 56);
Rho_Pi (a23, 8);
Rho_Pi (a22, 25);
Rho_Pi (a02, 43);
Rho_Pi (a40, 62);
Rho_Pi (a24, 18);
Rho_Pi (a42, 39);
Rho_Pi (a14, 61);
Rho_Pi (a11, 20);
Rho_Pi (a01, 44);
// Chi
bc0 = a00; bc1 = a01; bc2 = a02; bc3 = a03; bc4 = a04;
a00 ^= ~bc1 & bc2; a01 ^= ~bc2 & bc3; a02 ^= ~bc3 & bc4; a03 ^= ~bc4 & bc0; a04 ^= ~bc0 & bc1;
bc0 = a10; bc1 = a11; bc2 = a12; bc3 = a13; bc4 = a14;
a10 ^= ~bc1 & bc2; a11 ^= ~bc2 & bc3; a12 ^= ~bc3 & bc4; a13 ^= ~bc4 & bc0; a14 ^= ~bc0 & bc1;
bc0 = a20; bc1 = a21; bc2 = a22; bc3 = a23; bc4 = a24;
a20 ^= ~bc1 & bc2; a21 ^= ~bc2 & bc3; a22 ^= ~bc3 & bc4; a23 ^= ~bc4 & bc0; a24 ^= ~bc0 & bc1;
bc0 = a30; bc1 = a31; bc2 = a32; bc3 = a33; bc4 = a34;
a30 ^= ~bc1 & bc2; a31 ^= ~bc2 & bc3; a32 ^= ~bc3 & bc4; a33 ^= ~bc4 & bc0; a34 ^= ~bc0 & bc1;
bc0 = a40; bc1 = a41; bc2 = a42; bc3 = a43; bc4 = a44;
a40 ^= ~bc1 & bc2; a41 ^= ~bc2 & bc3; a42 ^= ~bc3 & bc4; a43 ^= ~bc4 & bc0; a44 ^= ~bc0 & bc1;
// Iota
a00 ^= keccakf_rndc[round];
}
// Theta
u64x bc0 = a00 ^ a10 ^ a20 ^ a30 ^ a40;
u64x bc1 = a01 ^ a11 ^ a21 ^ a31 ^ a41;
u64x bc2 = a02 ^ a12 ^ a22 ^ a32 ^ a42;
u64x bc3 = a03 ^ a13 ^ a23 ^ a33 ^ a43;
u64x bc4 = a04 ^ a14 ^ a24 ^ a34 ^ a44;
u64x t;
t = bc4 ^ rotl64 (bc1, 1); a00 ^= t; a10 ^= t; a20 ^= t; a30 ^= t;
t = bc0 ^ rotl64 (bc2, 1); a21 ^= t; a31 ^= t; a41 ^= t;
t = bc1 ^ rotl64 (bc3, 1); a02 ^= t; a12 ^= t; a22 ^= t; a32 ^= t;
t = bc2 ^ rotl64 (bc4, 1); a03 ^= t; a13 ^= t; a23 ^= t; a33 ^= t; a43 ^= t;
t = bc3 ^ rotl64 (bc0, 1); a04 ^= t; a34 ^= t; a44 ^= t;
// Rho Pi
t = a01;
Rho_Pi (a20, 1);
Rho_Pi (a12, 3);
Rho_Pi (a21, 6);
Rho_Pi (a32, 10);
Rho_Pi (a33, 15);
Rho_Pi (a03, 21);
Rho_Pi (a10, 28);
Rho_Pi (a31, 36);
Rho_Pi (a13, 45);
Rho_Pi (a41, 55);
Rho_Pi (a44, 2);
Rho_Pi (a04, 14);
Rho_Pi (a30, 27);
Rho_Pi (a43, 41);
Rho_Pi (a34, 56);
Rho_Pi (a23, 8);
Rho_Pi (a22, 25);
Rho_Pi (a02, 43);
#undef Rho_Pi
bc0 = a00;
bc2 = a02;
bc3 = a03;
bc4 = a04;
a02 ^= ~bc3 & bc4;
a03 ^= ~bc4 & bc0;
const u32x r0 = l32_from_64 (a03);
const u32x r1 = 0;
const u32x r2 = l32_from_64 (a02);
const u32x r3 = h32_from_64 (a02);
COMPARE_S_SIMD (r0, r1, r2, r3);
}
}
__kernel void m17700_m04 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
{
/**
* base
*/
const u64 gid = get_global_id (0);
if (gid >= gid_max) return;
u32 w0[4];
w0[0] = pws[gid].i[ 0];
w0[1] = pws[gid].i[ 1];
w0[2] = pws[gid].i[ 2];
w0[3] = pws[gid].i[ 3];
u32 w1[4];
w1[0] = 0;
w1[1] = 0;
w1[2] = 0;
w1[3] = 0;
u32 w2[4];
w2[0] = 0;
w2[1] = 0;
w2[2] = 0;
w2[3] = 0;
u32 w3[4];
w3[0] = 0;
w3[1] = 0;
w3[2] = 0;
w3[3] = 0;
const u32 pw_len = pws[gid].pw_len;
/**
* main
*/
m17300m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV0_buf, d_scryptV1_buf, d_scryptV2_buf, d_scryptV3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset);
}
__kernel void m17700_m08 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
{
/**
* base
*/
const u64 gid = get_global_id (0);
if (gid >= gid_max) return;
u32 w0[4];
w0[0] = pws[gid].i[ 0];
w0[1] = pws[gid].i[ 1];
w0[2] = pws[gid].i[ 2];
w0[3] = pws[gid].i[ 3];
u32 w1[4];
w1[0] = pws[gid].i[ 4];
w1[1] = pws[gid].i[ 5];
w1[2] = pws[gid].i[ 6];
w1[3] = pws[gid].i[ 7];
u32 w2[4];
w2[0] = 0;
w2[1] = 0;
w2[2] = 0;
w2[3] = 0;
u32 w3[4];
w3[0] = 0;
w3[1] = 0;
w3[2] = 0;
w3[3] = 0;
const u32 pw_len = pws[gid].pw_len;
/**
* main
*/
m17300m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV0_buf, d_scryptV1_buf, d_scryptV2_buf, d_scryptV3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset);
}
__kernel void m17700_m16 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
{
/**
* base
*/
const u64 gid = get_global_id (0);
if (gid >= gid_max) return;
u32 w0[4];
w0[0] = pws[gid].i[ 0];
w0[1] = pws[gid].i[ 1];
w0[2] = pws[gid].i[ 2];
w0[3] = pws[gid].i[ 3];
u32 w1[4];
w1[0] = pws[gid].i[ 4];
w1[1] = pws[gid].i[ 5];
w1[2] = pws[gid].i[ 6];
w1[3] = pws[gid].i[ 7];
u32 w2[4];
w2[0] = pws[gid].i[ 8];
w2[1] = pws[gid].i[ 9];
w2[2] = pws[gid].i[10];
w2[3] = pws[gid].i[11];
u32 w3[4];
w3[0] = pws[gid].i[12];
w3[1] = pws[gid].i[13];
w3[2] = 0;
w3[3] = 0;
const u32 pw_len = pws[gid].pw_len;
/**
* main
*/
m17300m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV0_buf, d_scryptV1_buf, d_scryptV2_buf, d_scryptV3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset);
}
__kernel void m17700_s04 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
{
/**
* base
*/
const u64 gid = get_global_id (0);
if (gid >= gid_max) return;
u32 w0[4];
w0[0] = pws[gid].i[ 0];
w0[1] = pws[gid].i[ 1];
w0[2] = pws[gid].i[ 2];
w0[3] = pws[gid].i[ 3];
u32 w1[4];
w1[0] = 0;
w1[1] = 0;
w1[2] = 0;
w1[3] = 0;
u32 w2[4];
w2[0] = 0;
w2[1] = 0;
w2[2] = 0;
w2[3] = 0;
u32 w3[4];
w3[0] = 0;
w3[1] = 0;
w3[2] = 0;
w3[3] = 0;
const u32 pw_len = pws[gid].pw_len;
/**
* main
*/
m17300s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV0_buf, d_scryptV1_buf, d_scryptV2_buf, d_scryptV3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset);
}
__kernel void m17700_s08 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
{
/**
* base
*/
const u64 gid = get_global_id (0);
if (gid >= gid_max) return;
u32 w0[4];
w0[0] = pws[gid].i[ 0];
w0[1] = pws[gid].i[ 1];
w0[2] = pws[gid].i[ 2];
w0[3] = pws[gid].i[ 3];
u32 w1[4];
w1[0] = pws[gid].i[ 4];
w1[1] = pws[gid].i[ 5];
w1[2] = pws[gid].i[ 6];
w1[3] = pws[gid].i[ 7];
u32 w2[4];
w2[0] = 0;
w2[1] = 0;
w2[2] = 0;
w2[3] = 0;
u32 w3[4];
w3[0] = 0;
w3[1] = 0;
w3[2] = 0;
w3[3] = 0;
const u32 pw_len = pws[gid].pw_len;
/**
* main
*/
m17300s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV0_buf, d_scryptV1_buf, d_scryptV2_buf, d_scryptV3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset);
}
__kernel void m17700_s16 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
{
/**
* base
*/
const u64 gid = get_global_id (0);
if (gid >= gid_max) return;
u32 w0[4];
w0[0] = pws[gid].i[ 0];
w0[1] = pws[gid].i[ 1];
w0[2] = pws[gid].i[ 2];
w0[3] = pws[gid].i[ 3];
u32 w1[4];
w1[0] = pws[gid].i[ 4];
w1[1] = pws[gid].i[ 5];
w1[2] = pws[gid].i[ 6];
w1[3] = pws[gid].i[ 7];
u32 w2[4];
w2[0] = pws[gid].i[ 8];
w2[1] = pws[gid].i[ 9];
w2[2] = pws[gid].i[10];
w2[3] = pws[gid].i[11];
u32 w3[4];
w3[0] = pws[gid].i[12];
w3[1] = pws[gid].i[13];
w3[2] = 0;
w3[3] = 0;
const u32 pw_len = pws[gid].pw_len;
/**
* main
*/
m17300s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV0_buf, d_scryptV1_buf, d_scryptV2_buf, d_scryptV3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset);
}

@ -30,7 +30,7 @@ __constant u64a keccakf_rndc[24] =
#define KECCAK_ROUNDS 24
#endif
__kernel void m05000_m04 (__global pw_t *pws, __constant const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
__kernel void m17800_m04 (__global pw_t *pws, __constant const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
{
/**
* modifier
@ -240,15 +240,15 @@ __kernel void m05000_m04 (__global pw_t *pws, __constant const kernel_rule_t *ru
}
}
__kernel void m05000_m08 (__global pw_t *pws, __constant const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
__kernel void m17800_m08 (__global pw_t *pws, __constant const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
{
}
__kernel void m05000_m16 (__global pw_t *pws, __constant const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
__kernel void m17800_m16 (__global pw_t *pws, __constant const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
{
}
__kernel void m05000_s04 (__global pw_t *pws, __constant const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
__kernel void m17800_s04 (__global pw_t *pws, __constant const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
{
/**
* modifier
@ -470,10 +470,10 @@ __kernel void m05000_s04 (__global pw_t *pws, __constant const kernel_rule_t *ru
}
}
__kernel void m05000_s08 (__global pw_t *pws, __constant const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
__kernel void m17800_s08 (__global pw_t *pws, __constant const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
{
}
__kernel void m05000_s16 (__global pw_t *pws, __constant const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
__kernel void m17800_s16 (__global pw_t *pws, __constant const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
{
}

@ -0,0 +1,594 @@
/**
* Author......: See docs/credits.txt
* License.....: MIT
*/
#define NEW_SIMD_CODE
#include "inc_vendor.cl"
#include "inc_hash_constants.h"
#include "inc_hash_functions.cl"
#include "inc_types.cl"
#include "inc_common.cl"
#include "inc_simd.cl"
__constant u64a keccakf_rndc[24] =
{
0x0000000000000001, 0x0000000000008082, 0x800000000000808a,
0x8000000080008000, 0x000000000000808b, 0x0000000080000001,
0x8000000080008081, 0x8000000000008009, 0x000000000000008a,
0x0000000000000088, 0x0000000080008009, 0x000000008000000a,
0x000000008000808b, 0x800000000000008b, 0x8000000000008089,
0x8000000000008003, 0x8000000000008002, 0x8000000000000080,
0x000000000000800a, 0x800000008000000a, 0x8000000080008081,
0x8000000000008080, 0x0000000080000001, 0x8000000080008008
};
#ifndef KECCAK_ROUNDS
#define KECCAK_ROUNDS 24
#endif
__kernel void m17800_m04 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
{
/**
* modifier
*/
const u64 lid = get_local_id (0);
/**
* base
*/
const u64 gid = get_global_id (0);
if (gid >= gid_max) return;
u32 pw_buf0[4];
u32 pw_buf1[4];
pw_buf0[0] = pws[gid].i[0];
pw_buf0[1] = pws[gid].i[1];
pw_buf0[2] = pws[gid].i[2];
pw_buf0[3] = pws[gid].i[3];
pw_buf1[0] = pws[gid].i[4];
pw_buf1[1] = pws[gid].i[5];
pw_buf1[2] = pws[gid].i[6];
pw_buf1[3] = pws[gid].i[7];
const u32 pw_l_len = pws[gid].pw_len;
/**
* loop
*/
for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE)
{
const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos);
const u32x pw_len = pw_l_len + pw_r_len;
/**
* concat password candidate
*/
u32x wordl0[4] = { 0 };
u32x wordl1[4] = { 0 };
u32x wordl2[4] = { 0 };
u32x wordl3[4] = { 0 };
wordl0[0] = pw_buf0[0];
wordl0[1] = pw_buf0[1];
wordl0[2] = pw_buf0[2];
wordl0[3] = pw_buf0[3];
wordl1[0] = pw_buf1[0];
wordl1[1] = pw_buf1[1];
wordl1[2] = pw_buf1[2];
wordl1[3] = pw_buf1[3];
u32x wordr0[4] = { 0 };
u32x wordr1[4] = { 0 };
u32x wordr2[4] = { 0 };
u32x wordr3[4] = { 0 };
wordr0[0] = ix_create_combt (combs_buf, il_pos, 0);
wordr0[1] = ix_create_combt (combs_buf, il_pos, 1);
wordr0[2] = ix_create_combt (combs_buf, il_pos, 2);
wordr0[3] = ix_create_combt (combs_buf, il_pos, 3);
wordr1[0] = ix_create_combt (combs_buf, il_pos, 4);
wordr1[1] = ix_create_combt (combs_buf, il_pos, 5);
wordr1[2] = ix_create_combt (combs_buf, il_pos, 6);
wordr1[3] = ix_create_combt (combs_buf, il_pos, 7);
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
{
switch_buffer_by_offset_le_VV (wordr0, wordr1, wordr2, wordr3, pw_l_len);
}
else
{
switch_buffer_by_offset_le_VV (wordl0, wordl1, wordl2, wordl3, pw_r_len);
}
u32x w0[4];
u32x w1[4];
u32x w2[4];
u32x w3[4];
w0[0] = wordl0[0] | wordr0[0];
w0[1] = wordl0[1] | wordr0[1];
w0[2] = wordl0[2] | wordr0[2];
w0[3] = wordl0[3] | wordr0[3];
w1[0] = wordl1[0] | wordr1[0];
w1[1] = wordl1[1] | wordr1[1];
w1[2] = wordl1[2] | wordr1[2];
w1[3] = wordl1[3] | wordr1[3];
w2[0] = wordl2[0] | wordr2[0];
w2[1] = wordl2[1] | wordr2[1];
w2[2] = wordl2[2] | wordr2[2];
w2[3] = wordl2[3] | wordr2[3];
w3[0] = wordl3[0] | wordr3[0];
w3[1] = wordl3[1] | wordr3[1];
w3[2] = wordl3[2] | wordr3[2];
w3[3] = wordl3[3] | wordr3[3];
/**
* Keccak
*/
u64x a00 = hl32_to_64 (w0[1], w0[0]);
u64x a01 = hl32_to_64 (w0[3], w0[2]);
u64x a02 = hl32_to_64 (w1[1], w1[0]);
u64x a03 = hl32_to_64 (w1[3], w1[2]);
u64x a04 = hl32_to_64 (w2[1], w2[0]);
u64x a10 = hl32_to_64 (w2[3], w2[2]);
u64x a11 = hl32_to_64 (w3[1], w3[0]);
u64x a12 = hl32_to_64 (w3[3], w3[2]);
u64x a13 = 0;
u64x a14 = 0;
u64x a20 = 0;
u64x a21 = 0;
u64x a22 = 0;
u64x a23 = 0;
u64x a24 = 0;
u64x a30 = 0;
u64x a31 = 0x8000000000000000;
u64x a32 = 0;
u64x a33 = 0;
u64x a34 = 0;
u64x a40 = 0;
u64x a41 = 0;
u64x a42 = 0;
u64x a43 = 0;
u64x a44 = 0;
#define Rho_Pi(ad,r) \
bc0 = ad; \
ad = rotl64 (t, r); \
t = bc0; \
#ifdef _unroll
#pragma unroll
#endif
for (int round = 0; round < KECCAK_ROUNDS - 1; round++)
{
// Theta
u64x bc0 = a00 ^ a10 ^ a20 ^ a30 ^ a40;
u64x bc1 = a01 ^ a11 ^ a21 ^ a31 ^ a41;
u64x bc2 = a02 ^ a12 ^ a22 ^ a32 ^ a42;
u64x bc3 = a03 ^ a13 ^ a23 ^ a33 ^ a43;
u64x bc4 = a04 ^ a14 ^ a24 ^ a34 ^ a44;
u64x t;
t = bc4 ^ rotl64 (bc1, 1); a00 ^= t; a10 ^= t; a20 ^= t; a30 ^= t; a40 ^= t;
t = bc0 ^ rotl64 (bc2, 1); a01 ^= t; a11 ^= t; a21 ^= t; a31 ^= t; a41 ^= t;
t = bc1 ^ rotl64 (bc3, 1); a02 ^= t; a12 ^= t; a22 ^= t; a32 ^= t; a42 ^= t;
t = bc2 ^ rotl64 (bc4, 1); a03 ^= t; a13 ^= t; a23 ^= t; a33 ^= t; a43 ^= t;
t = bc3 ^ rotl64 (bc0, 1); a04 ^= t; a14 ^= t; a24 ^= t; a34 ^= t; a44 ^= t;
// Rho Pi
t = a01;
Rho_Pi (a20, 1);
Rho_Pi (a12, 3);
Rho_Pi (a21, 6);
Rho_Pi (a32, 10);
Rho_Pi (a33, 15);
Rho_Pi (a03, 21);
Rho_Pi (a10, 28);
Rho_Pi (a31, 36);
Rho_Pi (a13, 45);
Rho_Pi (a41, 55);
Rho_Pi (a44, 2);
Rho_Pi (a04, 14);
Rho_Pi (a30, 27);
Rho_Pi (a43, 41);
Rho_Pi (a34, 56);
Rho_Pi (a23, 8);
Rho_Pi (a22, 25);
Rho_Pi (a02, 43);
Rho_Pi (a40, 62);
Rho_Pi (a24, 18);
Rho_Pi (a42, 39);
Rho_Pi (a14, 61);
Rho_Pi (a11, 20);
Rho_Pi (a01, 44);
// Chi
bc0 = a00; bc1 = a01; bc2 = a02; bc3 = a03; bc4 = a04;
a00 ^= ~bc1 & bc2; a01 ^= ~bc2 & bc3; a02 ^= ~bc3 & bc4; a03 ^= ~bc4 & bc0; a04 ^= ~bc0 & bc1;
bc0 = a10; bc1 = a11; bc2 = a12; bc3 = a13; bc4 = a14;
a10 ^= ~bc1 & bc2; a11 ^= ~bc2 & bc3; a12 ^= ~bc3 & bc4; a13 ^= ~bc4 & bc0; a14 ^= ~bc0 & bc1;
bc0 = a20; bc1 = a21; bc2 = a22; bc3 = a23; bc4 = a24;
a20 ^= ~bc1 & bc2; a21 ^= ~bc2 & bc3; a22 ^= ~bc3 & bc4; a23 ^= ~bc4 & bc0; a24 ^= ~bc0 & bc1;
bc0 = a30; bc1 = a31; bc2 = a32; bc3 = a33; bc4 = a34;
a30 ^= ~bc1 & bc2; a31 ^= ~bc2 & bc3; a32 ^= ~bc3 & bc4; a33 ^= ~bc4 & bc0; a34 ^= ~bc0 & bc1;
bc0 = a40; bc1 = a41; bc2 = a42; bc3 = a43; bc4 = a44;
a40 ^= ~bc1 & bc2; a41 ^= ~bc2 & bc3; a42 ^= ~bc3 & bc4; a43 ^= ~bc4 & bc0; a44 ^= ~bc0 & bc1;
// Iota
a00 ^= keccakf_rndc[round];
}
// Theta
u64x bc0 = a00 ^ a10 ^ a20 ^ a30 ^ a40;
u64x bc1 = a01 ^ a11 ^ a21 ^ a31 ^ a41;
u64x bc2 = a02 ^ a12 ^ a22 ^ a32 ^ a42;
u64x bc3 = a03 ^ a13 ^ a23 ^ a33 ^ a43;
u64x bc4 = a04 ^ a14 ^ a24 ^ a34 ^ a44;
u64x t;
t = bc4 ^ rotl64 (bc1, 1); a00 ^= t; a10 ^= t; a20 ^= t; a30 ^= t;
t = bc0 ^ rotl64 (bc2, 1); a21 ^= t; a31 ^= t; a41 ^= t;
t = bc1 ^ rotl64 (bc3, 1); a02 ^= t; a12 ^= t; a22 ^= t; a32 ^= t;
t = bc2 ^ rotl64 (bc4, 1); a03 ^= t; a13 ^= t; a23 ^= t; a33 ^= t; a43 ^= t;
t = bc3 ^ rotl64 (bc0, 1); a04 ^= t; a34 ^= t; a44 ^= t;
// Rho Pi
t = a01;
Rho_Pi (a20, 1);
Rho_Pi (a12, 3);
Rho_Pi (a21, 6);
Rho_Pi (a32, 10);
Rho_Pi (a33, 15);
Rho_Pi (a03, 21);
Rho_Pi (a10, 28);
Rho_Pi (a31, 36);
Rho_Pi (a13, 45);
Rho_Pi (a41, 55);
Rho_Pi (a44, 2);
Rho_Pi (a04, 14);
Rho_Pi (a30, 27);
Rho_Pi (a43, 41);
Rho_Pi (a34, 56);
Rho_Pi (a23, 8);
Rho_Pi (a22, 25);
Rho_Pi (a02, 43);
#undef Rho_Pi
bc0 = a00;
bc2 = a02;
bc3 = a03;
bc4 = a04;
a02 ^= ~bc3 & bc4;
a03 ^= ~bc4 & bc0;
const u32x r0 = l32_from_64 (a03);
const u32x r1 = h32_from_64 (a03);
const u32x r2 = l32_from_64 (a02);
const u32x r3 = h32_from_64 (a02);
COMPARE_M_SIMD (r0, r1, r2, r3);
}
}
__kernel void m17800_m08 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
{
}
__kernel void m17800_m16 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
{
}
__kernel void m17800_s04 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
{
/**
* modifier
*/
const u64 lid = get_local_id (0);
/**
* base
*/
const u64 gid = get_global_id (0);
if (gid >= gid_max) return;
u32 pw_buf0[4];
u32 pw_buf1[4];
pw_buf0[0] = pws[gid].i[0];
pw_buf0[1] = pws[gid].i[1];
pw_buf0[2] = pws[gid].i[2];
pw_buf0[3] = pws[gid].i[3];
pw_buf1[0] = pws[gid].i[4];
pw_buf1[1] = pws[gid].i[5];
pw_buf1[2] = pws[gid].i[6];
pw_buf1[3] = pws[gid].i[7];
const u32 pw_l_len = pws[gid].pw_len;
/**
* digest
*/
const u32 search[4] =
{
digests_buf[digests_offset].digest_buf[DGST_R0],
digests_buf[digests_offset].digest_buf[DGST_R1],
digests_buf[digests_offset].digest_buf[DGST_R2],
digests_buf[digests_offset].digest_buf[DGST_R3]
};
/**
* loop
*/
for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE)
{
const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos);
const u32x pw_len = pw_l_len + pw_r_len;
/**
* concat password candidate
*/
u32x wordl0[4] = { 0 };
u32x wordl1[4] = { 0 };
u32x wordl2[4] = { 0 };
u32x wordl3[4] = { 0 };
wordl0[0] = pw_buf0[0];
wordl0[1] = pw_buf0[1];
wordl0[2] = pw_buf0[2];
wordl0[3] = pw_buf0[3];
wordl1[0] = pw_buf1[0];
wordl1[1] = pw_buf1[1];
wordl1[2] = pw_buf1[2];
wordl1[3] = pw_buf1[3];
u32x wordr0[4] = { 0 };
u32x wordr1[4] = { 0 };
u32x wordr2[4] = { 0 };
u32x wordr3[4] = { 0 };
wordr0[0] = ix_create_combt (combs_buf, il_pos, 0);
wordr0[1] = ix_create_combt (combs_buf, il_pos, 1);
wordr0[2] = ix_create_combt (combs_buf, il_pos, 2);
wordr0[3] = ix_create_combt (combs_buf, il_pos, 3);
wordr1[0] = ix_create_combt (combs_buf, il_pos, 4);
wordr1[1] = ix_create_combt (combs_buf, il_pos, 5);
wordr1[2] = ix_create_combt (combs_buf, il_pos, 6);
wordr1[3] = ix_create_combt (combs_buf, il_pos, 7);
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
{
switch_buffer_by_offset_le_VV (wordr0, wordr1, wordr2, wordr3, pw_l_len);
}
else
{
switch_buffer_by_offset_le_VV (wordl0, wordl1, wordl2, wordl3, pw_r_len);
}
u32x w0[4];
u32x w1[4];
u32x w2[4];
u32x w3[4];
w0[0] = wordl0[0] | wordr0[0];
w0[1] = wordl0[1] | wordr0[1];
w0[2] = wordl0[2] | wordr0[2];
w0[3] = wordl0[3] | wordr0[3];
w1[0] = wordl1[0] | wordr1[0];
w1[1] = wordl1[1] | wordr1[1];
w1[2] = wordl1[2] | wordr1[2];
w1[3] = wordl1[3] | wordr1[3];
w2[0] = wordl2[0] | wordr2[0];
w2[1] = wordl2[1] | wordr2[1];
w2[2] = wordl2[2] | wordr2[2];
w2[3] = wordl2[3] | wordr2[3];
w3[0] = wordl3[0] | wordr3[0];
w3[1] = wordl3[1] | wordr3[1];
w3[2] = wordl3[2] | wordr3[2];
w3[3] = wordl3[3] | wordr3[3];
/**
* Keccak
*/
u64x a00 = hl32_to_64 (w0[1], w0[0]);
u64x a01 = hl32_to_64 (w0[3], w0[2]);
u64x a02 = hl32_to_64 (w1[1], w1[0]);
u64x a03 = hl32_to_64 (w1[3], w1[2]);
u64x a04 = hl32_to_64 (w2[1], w2[0]);
u64x a10 = hl32_to_64 (w2[3], w2[2]);
u64x a11 = hl32_to_64 (w3[1], w3[0]);
u64x a12 = hl32_to_64 (w3[3], w3[2]);
u64x a13 = 0;
u64x a14 = 0;
u64x a20 = 0;
u64x a21 = 0;
u64x a22 = 0;
u64x a23 = 0;
u64x a24 = 0;
u64x a30 = 0;
u64x a31 = 0x8000000000000000;
u64x a32 = 0;
u64x a33 = 0;
u64x a34 = 0;
u64x a40 = 0;
u64x a41 = 0;
u64x a42 = 0;
u64x a43 = 0;
u64x a44 = 0;
#define Rho_Pi(ad,r) \
bc0 = ad; \
ad = rotl64 (t, r); \
t = bc0; \
#ifdef _unroll
#pragma unroll
#endif
for (int round = 0; round < KECCAK_ROUNDS - 1; round++)
{
// Theta
u64x bc0 = a00 ^ a10 ^ a20 ^ a30 ^ a40;
u64x bc1 = a01 ^ a11 ^ a21 ^ a31 ^ a41;
u64x bc2 = a02 ^ a12 ^ a22 ^ a32 ^ a42;
u64x bc3 = a03 ^ a13 ^ a23 ^ a33 ^ a43;
u64x bc4 = a04 ^ a14 ^ a24 ^ a34 ^ a44;
u64x t;
t = bc4 ^ rotl64 (bc1, 1); a00 ^= t; a10 ^= t; a20 ^= t; a30 ^= t; a40 ^= t;
t = bc0 ^ rotl64 (bc2, 1); a01 ^= t; a11 ^= t; a21 ^= t; a31 ^= t; a41 ^= t;
t = bc1 ^ rotl64 (bc3, 1); a02 ^= t; a12 ^= t; a22 ^= t; a32 ^= t; a42 ^= t;
t = bc2 ^ rotl64 (bc4, 1); a03 ^= t; a13 ^= t; a23 ^= t; a33 ^= t; a43 ^= t;
t = bc3 ^ rotl64 (bc0, 1); a04 ^= t; a14 ^= t; a24 ^= t; a34 ^= t; a44 ^= t;
// Rho Pi
t = a01;
Rho_Pi (a20, 1);
Rho_Pi (a12, 3);
Rho_Pi (a21, 6);
Rho_Pi (a32, 10);
Rho_Pi (a33, 15);
Rho_Pi (a03, 21);
Rho_Pi (a10, 28);
Rho_Pi (a31, 36);
Rho_Pi (a13, 45);
Rho_Pi (a41, 55);
Rho_Pi (a44, 2);
Rho_Pi (a04, 14);
Rho_Pi (a30, 27);
Rho_Pi (a43, 41);
Rho_Pi (a34, 56);
Rho_Pi (a23, 8);
Rho_Pi (a22, 25);
Rho_Pi (a02, 43);
Rho_Pi (a40, 62);
Rho_Pi (a24, 18);
Rho_Pi (a42, 39);
Rho_Pi (a14, 61);
Rho_Pi (a11, 20);
Rho_Pi (a01, 44);
// Chi
bc0 = a00; bc1 = a01; bc2 = a02; bc3 = a03; bc4 = a04;
a00 ^= ~bc1 & bc2; a01 ^= ~bc2 & bc3; a02 ^= ~bc3 & bc4; a03 ^= ~bc4 & bc0; a04 ^= ~bc0 & bc1;
bc0 = a10; bc1 = a11; bc2 = a12; bc3 = a13; bc4 = a14;
a10 ^= ~bc1 & bc2; a11 ^= ~bc2 & bc3; a12 ^= ~bc3 & bc4; a13 ^= ~bc4 & bc0; a14 ^= ~bc0 & bc1;
bc0 = a20; bc1 = a21; bc2 = a22; bc3 = a23; bc4 = a24;
a20 ^= ~bc1 & bc2; a21 ^= ~bc2 & bc3; a22 ^= ~bc3 & bc4; a23 ^= ~bc4 & bc0; a24 ^= ~bc0 & bc1;
bc0 = a30; bc1 = a31; bc2 = a32; bc3 = a33; bc4 = a34;
a30 ^= ~bc1 & bc2; a31 ^= ~bc2 & bc3; a32 ^= ~bc3 & bc4; a33 ^= ~bc4 & bc0; a34 ^= ~bc0 & bc1;
bc0 = a40; bc1 = a41; bc2 = a42; bc3 = a43; bc4 = a44;
a40 ^= ~bc1 & bc2; a41 ^= ~bc2 & bc3; a42 ^= ~bc3 & bc4; a43 ^= ~bc4 & bc0; a44 ^= ~bc0 & bc1;
// Iota
a00 ^= keccakf_rndc[round];
}
// Theta
u64x bc0 = a00 ^ a10 ^ a20 ^ a30 ^ a40;
u64x bc1 = a01 ^ a11 ^ a21 ^ a31 ^ a41;
u64x bc2 = a02 ^ a12 ^ a22 ^ a32 ^ a42;
u64x bc3 = a03 ^ a13 ^ a23 ^ a33 ^ a43;
u64x bc4 = a04 ^ a14 ^ a24 ^ a34 ^ a44;
u64x t;
t = bc4 ^ rotl64 (bc1, 1); a00 ^= t; a10 ^= t; a20 ^= t; a30 ^= t;
t = bc0 ^ rotl64 (bc2, 1); a21 ^= t; a31 ^= t; a41 ^= t;
t = bc1 ^ rotl64 (bc3, 1); a02 ^= t; a12 ^= t; a22 ^= t; a32 ^= t;
t = bc2 ^ rotl64 (bc4, 1); a03 ^= t; a13 ^= t; a23 ^= t; a33 ^= t; a43 ^= t;
t = bc3 ^ rotl64 (bc0, 1); a04 ^= t; a34 ^= t; a44 ^= t;
// Rho Pi
t = a01;
Rho_Pi (a20, 1);
Rho_Pi (a12, 3);
Rho_Pi (a21, 6);
Rho_Pi (a32, 10);
Rho_Pi (a33, 15);
Rho_Pi (a03, 21);
Rho_Pi (a10, 28);
Rho_Pi (a31, 36);
Rho_Pi (a13, 45);
Rho_Pi (a41, 55);
Rho_Pi (a44, 2);
Rho_Pi (a04, 14);
Rho_Pi (a30, 27);
Rho_Pi (a43, 41);
Rho_Pi (a34, 56);
Rho_Pi (a23, 8);
Rho_Pi (a22, 25);
Rho_Pi (a02, 43);
#undef Rho_Pi
bc0 = a00;
bc2 = a02;
bc3 = a03;
bc4 = a04;
a02 ^= ~bc3 & bc4;
a03 ^= ~bc4 & bc0;
const u32x r0 = l32_from_64 (a03);
const u32x r1 = h32_from_64 (a03);
const u32x r2 = l32_from_64 (a02);
const u32x r3 = h32_from_64 (a02);
COMPARE_S_SIMD (r0, r1, r2, r3);
}
}
__kernel void m17800_s08 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
{
}
__kernel void m17800_s16 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
{
}

@ -28,7 +28,7 @@ __constant u64a keccakf_rndc[24] =
#define KECCAK_ROUNDS 24
#endif
DECLSPEC void m05000m (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, __global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset)
DECLSPEC void m17400m (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, __global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset)
{
/**
* modifier
@ -214,7 +214,7 @@ DECLSPEC void m05000m (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, __g
}
}
DECLSPEC void m05000s (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, __global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset)
DECLSPEC void m17400s (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, __global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset)
{
/**
* modifier
@ -412,7 +412,7 @@ DECLSPEC void m05000s (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, __g
}
}
__kernel void m05000_m04 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
__kernel void m17800_m04 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
{
/**
* base
@ -456,10 +456,10 @@ __kernel void m05000_m04 (__global pw_t *pws, __global const kernel_rule_t *rule
* main
*/
m05000m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV0_buf, d_scryptV1_buf, d_scryptV2_buf, d_scryptV3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset);
m17400m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV0_buf, d_scryptV1_buf, d_scryptV2_buf, d_scryptV3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset);
}
__kernel void m05000_m08 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
__kernel void m17800_m08 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
{
/**
* base
@ -503,10 +503,10 @@ __kernel void m05000_m08 (__global pw_t *pws, __global const kernel_rule_t *rule
* main
*/
m05000m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV0_buf, d_scryptV1_buf, d_scryptV2_buf, d_scryptV3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset);
m17400m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV0_buf, d_scryptV1_buf, d_scryptV2_buf, d_scryptV3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset);
}
__kernel void m05000_m16 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
__kernel void m17800_m16 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
{
/**
* base
@ -550,10 +550,10 @@ __kernel void m05000_m16 (__global pw_t *pws, __global const kernel_rule_t *rule
* main
*/
m05000m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV0_buf, d_scryptV1_buf, d_scryptV2_buf, d_scryptV3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset);
m17400m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV0_buf, d_scryptV1_buf, d_scryptV2_buf, d_scryptV3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset);
}
__kernel void m05000_s04 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
__kernel void m17800_s04 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
{
/**
* base
@ -597,10 +597,10 @@ __kernel void m05000_s04 (__global pw_t *pws, __global const kernel_rule_t *rule
* main
*/
m05000s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV0_buf, d_scryptV1_buf, d_scryptV2_buf, d_scryptV3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset);
m17400s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV0_buf, d_scryptV1_buf, d_scryptV2_buf, d_scryptV3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset);
}
__kernel void m05000_s08 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
__kernel void m17800_s08 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
{
/**
* base
@ -644,10 +644,10 @@ __kernel void m05000_s08 (__global pw_t *pws, __global const kernel_rule_t *rule
* main
*/
m05000s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV0_buf, d_scryptV1_buf, d_scryptV2_buf, d_scryptV3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset);
m17400s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV0_buf, d_scryptV1_buf, d_scryptV2_buf, d_scryptV3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset);
}
__kernel void m05000_s16 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
__kernel void m17800_s16 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
{
/**
* base
@ -691,5 +691,5 @@ __kernel void m05000_s16 (__global pw_t *pws, __global const kernel_rule_t *rule
* main
*/
m05000s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV0_buf, d_scryptV1_buf, d_scryptV2_buf, d_scryptV3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset);
m17400s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV0_buf, d_scryptV1_buf, d_scryptV2_buf, d_scryptV3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset);
}

@ -0,0 +1,479 @@
/**
* Author......: See docs/credits.txt
* License.....: MIT
*/
#define NEW_SIMD_CODE
#include "inc_vendor.cl"
#include "inc_hash_constants.h"
#include "inc_hash_functions.cl"
#include "inc_types.cl"
#include "inc_common.cl"
#include "inc_rp_optimized.h"
#include "inc_rp_optimized.cl"
#include "inc_simd.cl"
__constant u64a keccakf_rndc[24] =
{
0x0000000000000001, 0x0000000000008082, 0x800000000000808a,
0x8000000080008000, 0x000000000000808b, 0x0000000080000001,
0x8000000080008081, 0x8000000000008009, 0x000000000000008a,
0x0000000000000088, 0x0000000080008009, 0x000000008000000a,
0x000000008000808b, 0x800000000000008b, 0x8000000000008089,
0x8000000000008003, 0x8000000000008002, 0x8000000000000080,
0x000000000000800a, 0x800000008000000a, 0x8000000080008081,
0x8000000000008080, 0x0000000080000001, 0x8000000080008008
};
#ifndef KECCAK_ROUNDS
#define KECCAK_ROUNDS 24
#endif
__kernel void m17900_m04 (__global pw_t *pws, __constant const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
{
/**
* modifier
*/
const u64 lid = get_local_id (0);
/**
* base
*/
const u64 gid = get_global_id (0);
if (gid >= gid_max) return;
u32 pw_buf0[4];
u32 pw_buf1[4];
pw_buf0[0] = pws[gid].i[0];
pw_buf0[1] = pws[gid].i[1];
pw_buf0[2] = pws[gid].i[2];
pw_buf0[3] = pws[gid].i[3];
pw_buf1[0] = pws[gid].i[4];
pw_buf1[1] = pws[gid].i[5];
pw_buf1[2] = pws[gid].i[6];
pw_buf1[3] = pws[gid].i[7];
const u32 pw_len = pws[gid].pw_len;
/**
* loop
*/
for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE)
{
u32x w0[4] = { 0 };
u32x w1[4] = { 0 };
u32x w2[4] = { 0 };
u32x w3[4] = { 0 };
const u32x out_len = apply_rules_vect (pw_buf0, pw_buf1, pw_len, rules_buf, il_pos, w0, w1);
append_0x01_2x4_VV (w0, w1, out_len);
/**
* Keccak
*/
u64x a00 = hl32_to_64 (w0[1], w0[0]);
u64x a01 = hl32_to_64 (w0[3], w0[2]);
u64x a02 = hl32_to_64 (w1[1], w1[0]);
u64x a03 = hl32_to_64 (w1[3], w1[2]);
u64x a04 = 0;
u64x a10 = 0;
u64x a11 = 0;
u64x a12 = 0;
u64x a13 = 0;
u64x a14 = 0;
u64x a20 = 0;
u64x a21 = 0;
u64x a22 = 0x8000000000000000;
u64x a23 = 0;
u64x a24 = 0;
u64x a30 = 0;
u64x a31 = 0;
u64x a32 = 0;
u64x a33 = 0;
u64x a34 = 0;
u64x a40 = 0;
u64x a41 = 0;
u64x a42 = 0;
u64x a43 = 0;
u64x a44 = 0;
#define Rho_Pi(ad,r) \
bc0 = ad; \
ad = rotl64 (t, r); \
t = bc0; \
#ifdef _unroll
#pragma unroll
#endif
for (int round = 0; round < KECCAK_ROUNDS - 1; round++)
{
// Theta
u64x bc0 = a00 ^ a10 ^ a20 ^ a30 ^ a40;
u64x bc1 = a01 ^ a11 ^ a21 ^ a31 ^ a41;
u64x bc2 = a02 ^ a12 ^ a22 ^ a32 ^ a42;
u64x bc3 = a03 ^ a13 ^ a23 ^ a33 ^ a43;
u64x bc4 = a04 ^ a14 ^ a24 ^ a34 ^ a44;
u64x t;
t = bc4 ^ rotl64 (bc1, 1); a00 ^= t; a10 ^= t; a20 ^= t; a30 ^= t; a40 ^= t;
t = bc0 ^ rotl64 (bc2, 1); a01 ^= t; a11 ^= t; a21 ^= t; a31 ^= t; a41 ^= t;
t = bc1 ^ rotl64 (bc3, 1); a02 ^= t; a12 ^= t; a22 ^= t; a32 ^= t; a42 ^= t;
t = bc2 ^ rotl64 (bc4, 1); a03 ^= t; a13 ^= t; a23 ^= t; a33 ^= t; a43 ^= t;
t = bc3 ^ rotl64 (bc0, 1); a04 ^= t; a14 ^= t; a24 ^= t; a34 ^= t; a44 ^= t;
// Rho Pi
t = a01;
Rho_Pi (a20, 1);
Rho_Pi (a12, 3);
Rho_Pi (a21, 6);
Rho_Pi (a32, 10);
Rho_Pi (a33, 15);
Rho_Pi (a03, 21);
Rho_Pi (a10, 28);
Rho_Pi (a31, 36);
Rho_Pi (a13, 45);
Rho_Pi (a41, 55);
Rho_Pi (a44, 2);
Rho_Pi (a04, 14);
Rho_Pi (a30, 27);
Rho_Pi (a43, 41);
Rho_Pi (a34, 56);
Rho_Pi (a23, 8);
Rho_Pi (a22, 25);
Rho_Pi (a02, 43);
Rho_Pi (a40, 62);
Rho_Pi (a24, 18);
Rho_Pi (a42, 39);
Rho_Pi (a14, 61);
Rho_Pi (a11, 20);
Rho_Pi (a01, 44);
// Chi
bc0 = a00; bc1 = a01; bc2 = a02; bc3 = a03; bc4 = a04;
a00 ^= ~bc1 & bc2; a01 ^= ~bc2 & bc3; a02 ^= ~bc3 & bc4; a03 ^= ~bc4 & bc0; a04 ^= ~bc0 & bc1;
bc0 = a10; bc1 = a11; bc2 = a12; bc3 = a13; bc4 = a14;
a10 ^= ~bc1 & bc2; a11 ^= ~bc2 & bc3; a12 ^= ~bc3 & bc4; a13 ^= ~bc4 & bc0; a14 ^= ~bc0 & bc1;
bc0 = a20; bc1 = a21; bc2 = a22; bc3 = a23; bc4 = a24;
a20 ^= ~bc1 & bc2; a21 ^= ~bc2 & bc3; a22 ^= ~bc3 & bc4; a23 ^= ~bc4 & bc0; a24 ^= ~bc0 & bc1;
bc0 = a30; bc1 = a31; bc2 = a32; bc3 = a33; bc4 = a34;
a30 ^= ~bc1 & bc2; a31 ^= ~bc2 & bc3; a32 ^= ~bc3 & bc4; a33 ^= ~bc4 & bc0; a34 ^= ~bc0 & bc1;
bc0 = a40; bc1 = a41; bc2 = a42; bc3 = a43; bc4 = a44;
a40 ^= ~bc1 & bc2; a41 ^= ~bc2 & bc3; a42 ^= ~bc3 & bc4; a43 ^= ~bc4 & bc0; a44 ^= ~bc0 & bc1;
// Iota
a00 ^= keccakf_rndc[round];
}
// Theta
u64x bc0 = a00 ^ a10 ^ a20 ^ a30 ^ a40;
u64x bc1 = a01 ^ a11 ^ a21 ^ a31 ^ a41;
u64x bc2 = a02 ^ a12 ^ a22 ^ a32 ^ a42;
u64x bc3 = a03 ^ a13 ^ a23 ^ a33 ^ a43;
u64x bc4 = a04 ^ a14 ^ a24 ^ a34 ^ a44;
u64x t;
t = bc4 ^ rotl64 (bc1, 1); a00 ^= t; a10 ^= t; a20 ^= t; a30 ^= t;
t = bc0 ^ rotl64 (bc2, 1); a21 ^= t; a31 ^= t; a41 ^= t;
t = bc1 ^ rotl64 (bc3, 1); a02 ^= t; a12 ^= t; a22 ^= t; a32 ^= t;
t = bc2 ^ rotl64 (bc4, 1); a03 ^= t; a13 ^= t; a23 ^= t; a33 ^= t; a43 ^= t;
t = bc3 ^ rotl64 (bc0, 1); a04 ^= t; a34 ^= t; a44 ^= t;
// Rho Pi
t = a01;
Rho_Pi (a20, 1);
Rho_Pi (a12, 3);
Rho_Pi (a21, 6);
Rho_Pi (a32, 10);
Rho_Pi (a33, 15);
Rho_Pi (a03, 21);
Rho_Pi (a10, 28);
Rho_Pi (a31, 36);
Rho_Pi (a13, 45);
Rho_Pi (a41, 55);
Rho_Pi (a44, 2);
Rho_Pi (a04, 14);
Rho_Pi (a30, 27);
Rho_Pi (a43, 41);
Rho_Pi (a34, 56);
Rho_Pi (a23, 8);
Rho_Pi (a22, 25);
Rho_Pi (a02, 43);
#undef Rho_Pi
bc0 = a00;
bc2 = a02;
bc3 = a03;
bc4 = a04;
a02 ^= ~bc3 & bc4;
a03 ^= ~bc4 & bc0;
const u32x r0 = l32_from_64 (a03);
const u32x r1 = h32_from_64 (a03);
const u32x r2 = l32_from_64 (a02);
const u32x r3 = h32_from_64 (a02);
COMPARE_M_SIMD (r0, r1, r2, r3);
}
}
__kernel void m17900_m08 (__global pw_t *pws, __constant const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
{
}
__kernel void m17900_m16 (__global pw_t *pws, __constant const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
{
}
__kernel void m17900_s04 (__global pw_t *pws, __constant const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
{
/**
* modifier
*/
const u64 lid = get_local_id (0);
/**
* base
*/
const u64 gid = get_global_id (0);
if (gid >= gid_max) return;
u32 pw_buf0[4];
u32 pw_buf1[4];
pw_buf0[0] = pws[gid].i[0];
pw_buf0[1] = pws[gid].i[1];
pw_buf0[2] = pws[gid].i[2];
pw_buf0[3] = pws[gid].i[3];
pw_buf1[0] = pws[gid].i[4];
pw_buf1[1] = pws[gid].i[5];
pw_buf1[2] = pws[gid].i[6];
pw_buf1[3] = pws[gid].i[7];
const u32 pw_len = pws[gid].pw_len;
/**
* digest
*/
const u32 search[4] =
{
digests_buf[digests_offset].digest_buf[DGST_R0],
digests_buf[digests_offset].digest_buf[DGST_R1],
digests_buf[digests_offset].digest_buf[DGST_R2],
digests_buf[digests_offset].digest_buf[DGST_R3]
};
/**
* loop
*/
for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE)
{
u32x w0[4] = { 0 };
u32x w1[4] = { 0 };
u32x w2[4] = { 0 };
u32x w3[4] = { 0 };
const u32x out_len = apply_rules_vect (pw_buf0, pw_buf1, pw_len, rules_buf, il_pos, w0, w1);
append_0x01_2x4_VV (w0, w1, out_len);
/**
* Keccak
*/
u64x a00 = hl32_to_64 (w0[1], w0[0]);
u64x a01 = hl32_to_64 (w0[3], w0[2]);
u64x a02 = hl32_to_64 (w1[1], w1[0]);
u64x a03 = hl32_to_64 (w1[3], w1[2]);
u64x a04 = 0;
u64x a10 = 0;
u64x a11 = 0;
u64x a12 = 0;
u64x a13 = 0;
u64x a14 = 0;
u64x a20 = 0;
u64x a21 = 0;
u64x a22 = 0x8000000000000000;
u64x a23 = 0;
u64x a24 = 0;
u64x a30 = 0;
u64x a31 = 0;
u64x a32 = 0;
u64x a33 = 0;
u64x a34 = 0;
u64x a40 = 0;
u64x a41 = 0;
u64x a42 = 0;
u64x a43 = 0;
u64x a44 = 0;
#define Rho_Pi(ad,r) \
bc0 = ad; \
ad = rotl64 (t, r); \
t = bc0; \
#ifdef _unroll
#pragma unroll
#endif
for (int round = 0; round < KECCAK_ROUNDS - 1; round++)
{
// Theta
u64x bc0 = a00 ^ a10 ^ a20 ^ a30 ^ a40;
u64x bc1 = a01 ^ a11 ^ a21 ^ a31 ^ a41;
u64x bc2 = a02 ^ a12 ^ a22 ^ a32 ^ a42;
u64x bc3 = a03 ^ a13 ^ a23 ^ a33 ^ a43;
u64x bc4 = a04 ^ a14 ^ a24 ^ a34 ^ a44;
u64x t;
t = bc4 ^ rotl64 (bc1, 1); a00 ^= t; a10 ^= t; a20 ^= t; a30 ^= t; a40 ^= t;
t = bc0 ^ rotl64 (bc2, 1); a01 ^= t; a11 ^= t; a21 ^= t; a31 ^= t; a41 ^= t;
t = bc1 ^ rotl64 (bc3, 1); a02 ^= t; a12 ^= t; a22 ^= t; a32 ^= t; a42 ^= t;
t = bc2 ^ rotl64 (bc4, 1); a03 ^= t; a13 ^= t; a23 ^= t; a33 ^= t; a43 ^= t;
t = bc3 ^ rotl64 (bc0, 1); a04 ^= t; a14 ^= t; a24 ^= t; a34 ^= t; a44 ^= t;
// Rho Pi
t = a01;
Rho_Pi (a20, 1);
Rho_Pi (a12, 3);
Rho_Pi (a21, 6);
Rho_Pi (a32, 10);
Rho_Pi (a33, 15);
Rho_Pi (a03, 21);
Rho_Pi (a10, 28);
Rho_Pi (a31, 36);
Rho_Pi (a13, 45);
Rho_Pi (a41, 55);
Rho_Pi (a44, 2);
Rho_Pi (a04, 14);
Rho_Pi (a30, 27);
Rho_Pi (a43, 41);
Rho_Pi (a34, 56);
Rho_Pi (a23, 8);
Rho_Pi (a22, 25);
Rho_Pi (a02, 43);
Rho_Pi (a40, 62);
Rho_Pi (a24, 18);
Rho_Pi (a42, 39);
Rho_Pi (a14, 61);
Rho_Pi (a11, 20);
Rho_Pi (a01, 44);
// Chi
bc0 = a00; bc1 = a01; bc2 = a02; bc3 = a03; bc4 = a04;
a00 ^= ~bc1 & bc2; a01 ^= ~bc2 & bc3; a02 ^= ~bc3 & bc4; a03 ^= ~bc4 & bc0; a04 ^= ~bc0 & bc1;
bc0 = a10; bc1 = a11; bc2 = a12; bc3 = a13; bc4 = a14;
a10 ^= ~bc1 & bc2; a11 ^= ~bc2 & bc3; a12 ^= ~bc3 & bc4; a13 ^= ~bc4 & bc0; a14 ^= ~bc0 & bc1;
bc0 = a20; bc1 = a21; bc2 = a22; bc3 = a23; bc4 = a24;
a20 ^= ~bc1 & bc2; a21 ^= ~bc2 & bc3; a22 ^= ~bc3 & bc4; a23 ^= ~bc4 & bc0; a24 ^= ~bc0 & bc1;
bc0 = a30; bc1 = a31; bc2 = a32; bc3 = a33; bc4 = a34;
a30 ^= ~bc1 & bc2; a31 ^= ~bc2 & bc3; a32 ^= ~bc3 & bc4; a33 ^= ~bc4 & bc0; a34 ^= ~bc0 & bc1;
bc0 = a40; bc1 = a41; bc2 = a42; bc3 = a43; bc4 = a44;
a40 ^= ~bc1 & bc2; a41 ^= ~bc2 & bc3; a42 ^= ~bc3 & bc4; a43 ^= ~bc4 & bc0; a44 ^= ~bc0 & bc1;
// Iota
a00 ^= keccakf_rndc[round];
}
// Theta
u64x bc0 = a00 ^ a10 ^ a20 ^ a30 ^ a40;
u64x bc1 = a01 ^ a11 ^ a21 ^ a31 ^ a41;
u64x bc2 = a02 ^ a12 ^ a22 ^ a32 ^ a42;
u64x bc3 = a03 ^ a13 ^ a23 ^ a33 ^ a43;
u64x bc4 = a04 ^ a14 ^ a24 ^ a34 ^ a44;
u64x t;
t = bc4 ^ rotl64 (bc1, 1); a00 ^= t; a10 ^= t; a20 ^= t; a30 ^= t;
t = bc0 ^ rotl64 (bc2, 1); a21 ^= t; a31 ^= t; a41 ^= t;
t = bc1 ^ rotl64 (bc3, 1); a02 ^= t; a12 ^= t; a22 ^= t; a32 ^= t;
t = bc2 ^ rotl64 (bc4, 1); a03 ^= t; a13 ^= t; a23 ^= t; a33 ^= t; a43 ^= t;
t = bc3 ^ rotl64 (bc0, 1); a04 ^= t; a34 ^= t; a44 ^= t;
// Rho Pi
t = a01;
Rho_Pi (a20, 1);
Rho_Pi (a12, 3);
Rho_Pi (a21, 6);
Rho_Pi (a32, 10);
Rho_Pi (a33, 15);
Rho_Pi (a03, 21);
Rho_Pi (a10, 28);
Rho_Pi (a31, 36);
Rho_Pi (a13, 45);
Rho_Pi (a41, 55);
Rho_Pi (a44, 2);
Rho_Pi (a04, 14);
Rho_Pi (a30, 27);
Rho_Pi (a43, 41);
Rho_Pi (a34, 56);
Rho_Pi (a23, 8);
Rho_Pi (a22, 25);
Rho_Pi (a02, 43);
#undef Rho_Pi
bc0 = a00;
bc2 = a02;
bc3 = a03;
bc4 = a04;
a02 ^= ~bc3 & bc4;
a03 ^= ~bc4 & bc0;
const u32x r0 = l32_from_64 (a03);
const u32x r1 = h32_from_64 (a03);
const u32x r2 = l32_from_64 (a02);
const u32x r3 = h32_from_64 (a02);
COMPARE_S_SIMD (r0, r1, r2, r3);
}
}
__kernel void m17900_s08 (__global pw_t *pws, __constant const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
{
}
__kernel void m17900_s16 (__global pw_t *pws, __constant const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
{
}

@ -0,0 +1,594 @@
/**
* Author......: See docs/credits.txt
* License.....: MIT
*/
#define NEW_SIMD_CODE
#include "inc_vendor.cl"
#include "inc_hash_constants.h"
#include "inc_hash_functions.cl"
#include "inc_types.cl"
#include "inc_common.cl"
#include "inc_simd.cl"
__constant u64a keccakf_rndc[24] =
{
0x0000000000000001, 0x0000000000008082, 0x800000000000808a,
0x8000000080008000, 0x000000000000808b, 0x0000000080000001,
0x8000000080008081, 0x8000000000008009, 0x000000000000008a,
0x0000000000000088, 0x0000000080008009, 0x000000008000000a,
0x000000008000808b, 0x800000000000008b, 0x8000000000008089,
0x8000000000008003, 0x8000000000008002, 0x8000000000000080,
0x000000000000800a, 0x800000008000000a, 0x8000000080008081,
0x8000000000008080, 0x0000000080000001, 0x8000000080008008
};
#ifndef KECCAK_ROUNDS
#define KECCAK_ROUNDS 24
#endif
__kernel void m17900_m04 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
{
/**
* modifier
*/
const u64 lid = get_local_id (0);
/**
* base
*/
const u64 gid = get_global_id (0);
if (gid >= gid_max) return;
u32 pw_buf0[4];
u32 pw_buf1[4];
pw_buf0[0] = pws[gid].i[0];
pw_buf0[1] = pws[gid].i[1];
pw_buf0[2] = pws[gid].i[2];
pw_buf0[3] = pws[gid].i[3];
pw_buf1[0] = pws[gid].i[4];
pw_buf1[1] = pws[gid].i[5];
pw_buf1[2] = pws[gid].i[6];
pw_buf1[3] = pws[gid].i[7];
const u32 pw_l_len = pws[gid].pw_len;
/**
* loop
*/
for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE)
{
const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos);
const u32x pw_len = pw_l_len + pw_r_len;
/**
* concat password candidate
*/
u32x wordl0[4] = { 0 };
u32x wordl1[4] = { 0 };
u32x wordl2[4] = { 0 };
u32x wordl3[4] = { 0 };
wordl0[0] = pw_buf0[0];
wordl0[1] = pw_buf0[1];
wordl0[2] = pw_buf0[2];
wordl0[3] = pw_buf0[3];
wordl1[0] = pw_buf1[0];
wordl1[1] = pw_buf1[1];
wordl1[2] = pw_buf1[2];
wordl1[3] = pw_buf1[3];
u32x wordr0[4] = { 0 };
u32x wordr1[4] = { 0 };
u32x wordr2[4] = { 0 };
u32x wordr3[4] = { 0 };
wordr0[0] = ix_create_combt (combs_buf, il_pos, 0);
wordr0[1] = ix_create_combt (combs_buf, il_pos, 1);
wordr0[2] = ix_create_combt (combs_buf, il_pos, 2);
wordr0[3] = ix_create_combt (combs_buf, il_pos, 3);
wordr1[0] = ix_create_combt (combs_buf, il_pos, 4);
wordr1[1] = ix_create_combt (combs_buf, il_pos, 5);
wordr1[2] = ix_create_combt (combs_buf, il_pos, 6);
wordr1[3] = ix_create_combt (combs_buf, il_pos, 7);
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
{
switch_buffer_by_offset_le_VV (wordr0, wordr1, wordr2, wordr3, pw_l_len);
}
else
{
switch_buffer_by_offset_le_VV (wordl0, wordl1, wordl2, wordl3, pw_r_len);
}
u32x w0[4];
u32x w1[4];
u32x w2[4];
u32x w3[4];
w0[0] = wordl0[0] | wordr0[0];
w0[1] = wordl0[1] | wordr0[1];
w0[2] = wordl0[2] | wordr0[2];
w0[3] = wordl0[3] | wordr0[3];
w1[0] = wordl1[0] | wordr1[0];
w1[1] = wordl1[1] | wordr1[1];
w1[2] = wordl1[2] | wordr1[2];
w1[3] = wordl1[3] | wordr1[3];
w2[0] = wordl2[0] | wordr2[0];
w2[1] = wordl2[1] | wordr2[1];
w2[2] = wordl2[2] | wordr2[2];
w2[3] = wordl2[3] | wordr2[3];
w3[0] = wordl3[0] | wordr3[0];
w3[1] = wordl3[1] | wordr3[1];
w3[2] = wordl3[2] | wordr3[2];
w3[3] = wordl3[3] | wordr3[3];
/**
* Keccak
*/
u64x a00 = hl32_to_64 (w0[1], w0[0]);
u64x a01 = hl32_to_64 (w0[3], w0[2]);
u64x a02 = hl32_to_64 (w1[1], w1[0]);
u64x a03 = hl32_to_64 (w1[3], w1[2]);
u64x a04 = hl32_to_64 (w2[1], w2[0]);
u64x a10 = hl32_to_64 (w2[3], w2[2]);
u64x a11 = hl32_to_64 (w3[1], w3[0]);
u64x a12 = hl32_to_64 (w3[3], w3[2]);
u64x a13 = 0;
u64x a14 = 0;
u64x a20 = 0;
u64x a21 = 0;
u64x a22 = 0x8000000000000000;
u64x a23 = 0;
u64x a24 = 0;
u64x a30 = 0;
u64x a31 = 0;
u64x a32 = 0;
u64x a33 = 0;
u64x a34 = 0;
u64x a40 = 0;
u64x a41 = 0;
u64x a42 = 0;
u64x a43 = 0;
u64x a44 = 0;
#define Rho_Pi(ad,r) \
bc0 = ad; \
ad = rotl64 (t, r); \
t = bc0; \
#ifdef _unroll
#pragma unroll
#endif
for (int round = 0; round < KECCAK_ROUNDS - 1; round++)
{
// Theta
u64x bc0 = a00 ^ a10 ^ a20 ^ a30 ^ a40;
u64x bc1 = a01 ^ a11 ^ a21 ^ a31 ^ a41;
u64x bc2 = a02 ^ a12 ^ a22 ^ a32 ^ a42;
u64x bc3 = a03 ^ a13 ^ a23 ^ a33 ^ a43;
u64x bc4 = a04 ^ a14 ^ a24 ^ a34 ^ a44;
u64x t;
t = bc4 ^ rotl64 (bc1, 1); a00 ^= t; a10 ^= t; a20 ^= t; a30 ^= t; a40 ^= t;
t = bc0 ^ rotl64 (bc2, 1); a01 ^= t; a11 ^= t; a21 ^= t; a31 ^= t; a41 ^= t;
t = bc1 ^ rotl64 (bc3, 1); a02 ^= t; a12 ^= t; a22 ^= t; a32 ^= t; a42 ^= t;
t = bc2 ^ rotl64 (bc4, 1); a03 ^= t; a13 ^= t; a23 ^= t; a33 ^= t; a43 ^= t;
t = bc3 ^ rotl64 (bc0, 1); a04 ^= t; a14 ^= t; a24 ^= t; a34 ^= t; a44 ^= t;
// Rho Pi
t = a01;
Rho_Pi (a20, 1);
Rho_Pi (a12, 3);
Rho_Pi (a21, 6);
Rho_Pi (a32, 10);
Rho_Pi (a33, 15);
Rho_Pi (a03, 21);
Rho_Pi (a10, 28);
Rho_Pi (a31, 36);
Rho_Pi (a13, 45);
Rho_Pi (a41, 55);
Rho_Pi (a44, 2);
Rho_Pi (a04, 14);
Rho_Pi (a30, 27);
Rho_Pi (a43, 41);
Rho_Pi (a34, 56);
Rho_Pi (a23, 8);
Rho_Pi (a22, 25);
Rho_Pi (a02, 43);
Rho_Pi (a40, 62);
Rho_Pi (a24, 18);
Rho_Pi (a42, 39);
Rho_Pi (a14, 61);
Rho_Pi (a11, 20);
Rho_Pi (a01, 44);
// Chi
bc0 = a00; bc1 = a01; bc2 = a02; bc3 = a03; bc4 = a04;
a00 ^= ~bc1 & bc2; a01 ^= ~bc2 & bc3; a02 ^= ~bc3 & bc4; a03 ^= ~bc4 & bc0; a04 ^= ~bc0 & bc1;
bc0 = a10; bc1 = a11; bc2 = a12; bc3 = a13; bc4 = a14;
a10 ^= ~bc1 & bc2; a11 ^= ~bc2 & bc3; a12 ^= ~bc3 & bc4; a13 ^= ~bc4 & bc0; a14 ^= ~bc0 & bc1;
bc0 = a20; bc1 = a21; bc2 = a22; bc3 = a23; bc4 = a24;
a20 ^= ~bc1 & bc2; a21 ^= ~bc2 & bc3; a22 ^= ~bc3 & bc4; a23 ^= ~bc4 & bc0; a24 ^= ~bc0 & bc1;
bc0 = a30; bc1 = a31; bc2 = a32; bc3 = a33; bc4 = a34;
a30 ^= ~bc1 & bc2; a31 ^= ~bc2 & bc3; a32 ^= ~bc3 & bc4; a33 ^= ~bc4 & bc0; a34 ^= ~bc0 & bc1;
bc0 = a40; bc1 = a41; bc2 = a42; bc3 = a43; bc4 = a44;
a40 ^= ~bc1 & bc2; a41 ^= ~bc2 & bc3; a42 ^= ~bc3 & bc4; a43 ^= ~bc4 & bc0; a44 ^= ~bc0 & bc1;
// Iota
a00 ^= keccakf_rndc[round];
}
// Theta
u64x bc0 = a00 ^ a10 ^ a20 ^ a30 ^ a40;
u64x bc1 = a01 ^ a11 ^ a21 ^ a31 ^ a41;
u64x bc2 = a02 ^ a12 ^ a22 ^ a32 ^ a42;
u64x bc3 = a03 ^ a13 ^ a23 ^ a33 ^ a43;
u64x bc4 = a04 ^ a14 ^ a24 ^ a34 ^ a44;
u64x t;
t = bc4 ^ rotl64 (bc1, 1); a00 ^= t; a10 ^= t; a20 ^= t; a30 ^= t;
t = bc0 ^ rotl64 (bc2, 1); a21 ^= t; a31 ^= t; a41 ^= t;
t = bc1 ^ rotl64 (bc3, 1); a02 ^= t; a12 ^= t; a22 ^= t; a32 ^= t;
t = bc2 ^ rotl64 (bc4, 1); a03 ^= t; a13 ^= t; a23 ^= t; a33 ^= t; a43 ^= t;
t = bc3 ^ rotl64 (bc0, 1); a04 ^= t; a34 ^= t; a44 ^= t;
// Rho Pi
t = a01;
Rho_Pi (a20, 1);
Rho_Pi (a12, 3);
Rho_Pi (a21, 6);
Rho_Pi (a32, 10);
Rho_Pi (a33, 15);
Rho_Pi (a03, 21);
Rho_Pi (a10, 28);
Rho_Pi (a31, 36);
Rho_Pi (a13, 45);
Rho_Pi (a41, 55);
Rho_Pi (a44, 2);
Rho_Pi (a04, 14);
Rho_Pi (a30, 27);
Rho_Pi (a43, 41);
Rho_Pi (a34, 56);
Rho_Pi (a23, 8);
Rho_Pi (a22, 25);
Rho_Pi (a02, 43);
#undef Rho_Pi
bc0 = a00;
bc2 = a02;
bc3 = a03;
bc4 = a04;
a02 ^= ~bc3 & bc4;
a03 ^= ~bc4 & bc0;
const u32x r0 = l32_from_64 (a03);
const u32x r1 = h32_from_64 (a03);
const u32x r2 = l32_from_64 (a02);
const u32x r3 = h32_from_64 (a02);
COMPARE_M_SIMD (r0, r1, r2, r3);
}
}
__kernel void m17900_m08 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
{
}
__kernel void m17900_m16 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
{
}
__kernel void m17900_s04 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
{
/**
* modifier
*/
const u64 lid = get_local_id (0);
/**
* base
*/
const u64 gid = get_global_id (0);
if (gid >= gid_max) return;
u32 pw_buf0[4];
u32 pw_buf1[4];
pw_buf0[0] = pws[gid].i[0];
pw_buf0[1] = pws[gid].i[1];
pw_buf0[2] = pws[gid].i[2];
pw_buf0[3] = pws[gid].i[3];
pw_buf1[0] = pws[gid].i[4];
pw_buf1[1] = pws[gid].i[5];
pw_buf1[2] = pws[gid].i[6];
pw_buf1[3] = pws[gid].i[7];
const u32 pw_l_len = pws[gid].pw_len;
/**
* digest
*/
const u32 search[4] =
{
digests_buf[digests_offset].digest_buf[DGST_R0],
digests_buf[digests_offset].digest_buf[DGST_R1],
digests_buf[digests_offset].digest_buf[DGST_R2],
digests_buf[digests_offset].digest_buf[DGST_R3]
};
/**
* loop
*/
for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE)
{
const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos);
const u32x pw_len = pw_l_len + pw_r_len;
/**
* concat password candidate
*/
u32x wordl0[4] = { 0 };
u32x wordl1[4] = { 0 };
u32x wordl2[4] = { 0 };
u32x wordl3[4] = { 0 };
wordl0[0] = pw_buf0[0];
wordl0[1] = pw_buf0[1];
wordl0[2] = pw_buf0[2];
wordl0[3] = pw_buf0[3];
wordl1[0] = pw_buf1[0];
wordl1[1] = pw_buf1[1];
wordl1[2] = pw_buf1[2];
wordl1[3] = pw_buf1[3];
u32x wordr0[4] = { 0 };
u32x wordr1[4] = { 0 };
u32x wordr2[4] = { 0 };
u32x wordr3[4] = { 0 };
wordr0[0] = ix_create_combt (combs_buf, il_pos, 0);
wordr0[1] = ix_create_combt (combs_buf, il_pos, 1);
wordr0[2] = ix_create_combt (combs_buf, il_pos, 2);
wordr0[3] = ix_create_combt (combs_buf, il_pos, 3);
wordr1[0] = ix_create_combt (combs_buf, il_pos, 4);
wordr1[1] = ix_create_combt (combs_buf, il_pos, 5);
wordr1[2] = ix_create_combt (combs_buf, il_pos, 6);
wordr1[3] = ix_create_combt (combs_buf, il_pos, 7);
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
{
switch_buffer_by_offset_le_VV (wordr0, wordr1, wordr2, wordr3, pw_l_len);
}
else
{
switch_buffer_by_offset_le_VV (wordl0, wordl1, wordl2, wordl3, pw_r_len);
}
u32x w0[4];
u32x w1[4];
u32x w2[4];
u32x w3[4];
w0[0] = wordl0[0] | wordr0[0];
w0[1] = wordl0[1] | wordr0[1];
w0[2] = wordl0[2] | wordr0[2];
w0[3] = wordl0[3] | wordr0[3];
w1[0] = wordl1[0] | wordr1[0];
w1[1] = wordl1[1] | wordr1[1];
w1[2] = wordl1[2] | wordr1[2];
w1[3] = wordl1[3] | wordr1[3];
w2[0] = wordl2[0] | wordr2[0];
w2[1] = wordl2[1] | wordr2[1];
w2[2] = wordl2[2] | wordr2[2];
w2[3] = wordl2[3] | wordr2[3];
w3[0] = wordl3[0] | wordr3[0];
w3[1] = wordl3[1] | wordr3[1];
w3[2] = wordl3[2] | wordr3[2];
w3[3] = wordl3[3] | wordr3[3];
/**
* Keccak
*/
u64x a00 = hl32_to_64 (w0[1], w0[0]);
u64x a01 = hl32_to_64 (w0[3], w0[2]);
u64x a02 = hl32_to_64 (w1[1], w1[0]);
u64x a03 = hl32_to_64 (w1[3], w1[2]);
u64x a04 = hl32_to_64 (w2[1], w2[0]);
u64x a10 = hl32_to_64 (w2[3], w2[2]);
u64x a11 = hl32_to_64 (w3[1], w3[0]);
u64x a12 = hl32_to_64 (w3[3], w3[2]);
u64x a13 = 0;
u64x a14 = 0;
u64x a20 = 0;
u64x a21 = 0;
u64x a22 = 0x8000000000000000;
u64x a23 = 0;
u64x a24 = 0;
u64x a30 = 0;
u64x a31 = 0;
u64x a32 = 0;
u64x a33 = 0;
u64x a34 = 0;
u64x a40 = 0;
u64x a41 = 0;
u64x a42 = 0;
u64x a43 = 0;
u64x a44 = 0;
#define Rho_Pi(ad,r) \
bc0 = ad; \
ad = rotl64 (t, r); \
t = bc0; \
#ifdef _unroll
#pragma unroll
#endif
for (int round = 0; round < KECCAK_ROUNDS - 1; round++)
{
// Theta
u64x bc0 = a00 ^ a10 ^ a20 ^ a30 ^ a40;
u64x bc1 = a01 ^ a11 ^ a21 ^ a31 ^ a41;
u64x bc2 = a02 ^ a12 ^ a22 ^ a32 ^ a42;
u64x bc3 = a03 ^ a13 ^ a23 ^ a33 ^ a43;
u64x bc4 = a04 ^ a14 ^ a24 ^ a34 ^ a44;
u64x t;
t = bc4 ^ rotl64 (bc1, 1); a00 ^= t; a10 ^= t; a20 ^= t; a30 ^= t; a40 ^= t;
t = bc0 ^ rotl64 (bc2, 1); a01 ^= t; a11 ^= t; a21 ^= t; a31 ^= t; a41 ^= t;
t = bc1 ^ rotl64 (bc3, 1); a02 ^= t; a12 ^= t; a22 ^= t; a32 ^= t; a42 ^= t;
t = bc2 ^ rotl64 (bc4, 1); a03 ^= t; a13 ^= t; a23 ^= t; a33 ^= t; a43 ^= t;
t = bc3 ^ rotl64 (bc0, 1); a04 ^= t; a14 ^= t; a24 ^= t; a34 ^= t; a44 ^= t;
// Rho Pi
t = a01;
Rho_Pi (a20, 1);
Rho_Pi (a12, 3);
Rho_Pi (a21, 6);
Rho_Pi (a32, 10);
Rho_Pi (a33, 15);
Rho_Pi (a03, 21);
Rho_Pi (a10, 28);
Rho_Pi (a31, 36);
Rho_Pi (a13, 45);
Rho_Pi (a41, 55);
Rho_Pi (a44, 2);
Rho_Pi (a04, 14);
Rho_Pi (a30, 27);
Rho_Pi (a43, 41);
Rho_Pi (a34, 56);
Rho_Pi (a23, 8);
Rho_Pi (a22, 25);
Rho_Pi (a02, 43);
Rho_Pi (a40, 62);
Rho_Pi (a24, 18);
Rho_Pi (a42, 39);
Rho_Pi (a14, 61);
Rho_Pi (a11, 20);
Rho_Pi (a01, 44);
// Chi
bc0 = a00; bc1 = a01; bc2 = a02; bc3 = a03; bc4 = a04;
a00 ^= ~bc1 & bc2; a01 ^= ~bc2 & bc3; a02 ^= ~bc3 & bc4; a03 ^= ~bc4 & bc0; a04 ^= ~bc0 & bc1;
bc0 = a10; bc1 = a11; bc2 = a12; bc3 = a13; bc4 = a14;
a10 ^= ~bc1 & bc2; a11 ^= ~bc2 & bc3; a12 ^= ~bc3 & bc4; a13 ^= ~bc4 & bc0; a14 ^= ~bc0 & bc1;
bc0 = a20; bc1 = a21; bc2 = a22; bc3 = a23; bc4 = a24;
a20 ^= ~bc1 & bc2; a21 ^= ~bc2 & bc3; a22 ^= ~bc3 & bc4; a23 ^= ~bc4 & bc0; a24 ^= ~bc0 & bc1;
bc0 = a30; bc1 = a31; bc2 = a32; bc3 = a33; bc4 = a34;
a30 ^= ~bc1 & bc2; a31 ^= ~bc2 & bc3; a32 ^= ~bc3 & bc4; a33 ^= ~bc4 & bc0; a34 ^= ~bc0 & bc1;
bc0 = a40; bc1 = a41; bc2 = a42; bc3 = a43; bc4 = a44;
a40 ^= ~bc1 & bc2; a41 ^= ~bc2 & bc3; a42 ^= ~bc3 & bc4; a43 ^= ~bc4 & bc0; a44 ^= ~bc0 & bc1;
// Iota
a00 ^= keccakf_rndc[round];
}
// Theta
u64x bc0 = a00 ^ a10 ^ a20 ^ a30 ^ a40;
u64x bc1 = a01 ^ a11 ^ a21 ^ a31 ^ a41;
u64x bc2 = a02 ^ a12 ^ a22 ^ a32 ^ a42;
u64x bc3 = a03 ^ a13 ^ a23 ^ a33 ^ a43;
u64x bc4 = a04 ^ a14 ^ a24 ^ a34 ^ a44;
u64x t;
t = bc4 ^ rotl64 (bc1, 1); a00 ^= t; a10 ^= t; a20 ^= t; a30 ^= t;
t = bc0 ^ rotl64 (bc2, 1); a21 ^= t; a31 ^= t; a41 ^= t;
t = bc1 ^ rotl64 (bc3, 1); a02 ^= t; a12 ^= t; a22 ^= t; a32 ^= t;
t = bc2 ^ rotl64 (bc4, 1); a03 ^= t; a13 ^= t; a23 ^= t; a33 ^= t; a43 ^= t;
t = bc3 ^ rotl64 (bc0, 1); a04 ^= t; a34 ^= t; a44 ^= t;
// Rho Pi
t = a01;
Rho_Pi (a20, 1);
Rho_Pi (a12, 3);
Rho_Pi (a21, 6);
Rho_Pi (a32, 10);
Rho_Pi (a33, 15);
Rho_Pi (a03, 21);
Rho_Pi (a10, 28);
Rho_Pi (a31, 36);
Rho_Pi (a13, 45);
Rho_Pi (a41, 55);
Rho_Pi (a44, 2);
Rho_Pi (a04, 14);
Rho_Pi (a30, 27);
Rho_Pi (a43, 41);
Rho_Pi (a34, 56);
Rho_Pi (a23, 8);
Rho_Pi (a22, 25);
Rho_Pi (a02, 43);
#undef Rho_Pi
bc0 = a00;
bc2 = a02;
bc3 = a03;
bc4 = a04;
a02 ^= ~bc3 & bc4;
a03 ^= ~bc4 & bc0;
const u32x r0 = l32_from_64 (a03);
const u32x r1 = h32_from_64 (a03);
const u32x r2 = l32_from_64 (a02);
const u32x r3 = h32_from_64 (a02);
COMPARE_S_SIMD (r0, r1, r2, r3);
}
}
__kernel void m17900_s08 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
{
}
__kernel void m17900_s16 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
{
}

@ -0,0 +1,695 @@
/**
* Author......: See docs/credits.txt
* License.....: MIT
*/
#define NEW_SIMD_CODE
#include "inc_vendor.cl"
#include "inc_hash_constants.h"
#include "inc_hash_functions.cl"
#include "inc_types.cl"
#include "inc_common.cl"
#include "inc_simd.cl"
__constant u64a keccakf_rndc[24] =
{
0x0000000000000001, 0x0000000000008082, 0x800000000000808a,
0x8000000080008000, 0x000000000000808b, 0x0000000080000001,
0x8000000080008081, 0x8000000000008009, 0x000000000000008a,
0x0000000000000088, 0x0000000080008009, 0x000000008000000a,
0x000000008000808b, 0x800000000000008b, 0x8000000000008089,
0x8000000000008003, 0x8000000000008002, 0x8000000000000080,
0x000000000000800a, 0x800000008000000a, 0x8000000080008081,
0x8000000000008080, 0x0000000080000001, 0x8000000080008008
};
#ifndef KECCAK_ROUNDS
#define KECCAK_ROUNDS 24
#endif
DECLSPEC void m17500m (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, __global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset)
{
/**
* modifier
*/
const u64 gid = get_global_id (0);
const u64 lid = get_local_id (0);
/**
* loop
*/
u32 w0l = w0[0];
for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE)
{
const u32x w0r = ix_create_bft (bfs_buf, il_pos);
const u32x w0lr = w0l | w0r;
/**
* Keccak
*/
u64x a00 = hl32_to_64 (w0[1], w0lr);
u64x a01 = hl32_to_64 (w0[3], w0[2]);
u64x a02 = hl32_to_64 (w1[1], w1[0]);
u64x a03 = hl32_to_64 (w1[3], w1[2]);
u64x a04 = hl32_to_64 (w2[1], w2[0]);
u64x a10 = hl32_to_64 (w2[3], w2[2]);
u64x a11 = hl32_to_64 (w3[1], w3[0]);
u64x a12 = hl32_to_64 (w3[3], w3[2]);
u64x a13 = 0;
u64x a14 = 0;
u64x a20 = 0;
u64x a21 = 0;
u64x a22 = 0x8000000000000000;
u64x a23 = 0;
u64x a24 = 0;
u64x a30 = 0;
u64x a31 = 0;
u64x a32 = 0;
u64x a33 = 0;
u64x a34 = 0;
u64x a40 = 0;
u64x a41 = 0;
u64x a42 = 0;
u64x a43 = 0;
u64x a44 = 0;
#define Rho_Pi(ad,r) \
bc0 = ad; \
ad = rotl64 (t, r); \
t = bc0; \
#ifdef _unroll
#pragma unroll
#endif
for (int round = 0; round < KECCAK_ROUNDS - 1; round++)
{
// Theta
u64x bc0 = a00 ^ a10 ^ a20 ^ a30 ^ a40;
u64x bc1 = a01 ^ a11 ^ a21 ^ a31 ^ a41;
u64x bc2 = a02 ^ a12 ^ a22 ^ a32 ^ a42;
u64x bc3 = a03 ^ a13 ^ a23 ^ a33 ^ a43;
u64x bc4 = a04 ^ a14 ^ a24 ^ a34 ^ a44;
u64x t;
t = bc4 ^ rotl64 (bc1, 1); a00 ^= t; a10 ^= t; a20 ^= t; a30 ^= t; a40 ^= t;
t = bc0 ^ rotl64 (bc2, 1); a01 ^= t; a11 ^= t; a21 ^= t; a31 ^= t; a41 ^= t;
t = bc1 ^ rotl64 (bc3, 1); a02 ^= t; a12 ^= t; a22 ^= t; a32 ^= t; a42 ^= t;
t = bc2 ^ rotl64 (bc4, 1); a03 ^= t; a13 ^= t; a23 ^= t; a33 ^= t; a43 ^= t;
t = bc3 ^ rotl64 (bc0, 1); a04 ^= t; a14 ^= t; a24 ^= t; a34 ^= t; a44 ^= t;
// Rho Pi
t = a01;
Rho_Pi (a20, 1);
Rho_Pi (a12, 3);
Rho_Pi (a21, 6);
Rho_Pi (a32, 10);
Rho_Pi (a33, 15);
Rho_Pi (a03, 21);
Rho_Pi (a10, 28);
Rho_Pi (a31, 36);
Rho_Pi (a13, 45);
Rho_Pi (a41, 55);
Rho_Pi (a44, 2);
Rho_Pi (a04, 14);
Rho_Pi (a30, 27);
Rho_Pi (a43, 41);
Rho_Pi (a34, 56);
Rho_Pi (a23, 8);
Rho_Pi (a22, 25);
Rho_Pi (a02, 43);
Rho_Pi (a40, 62);
Rho_Pi (a24, 18);
Rho_Pi (a42, 39);
Rho_Pi (a14, 61);
Rho_Pi (a11, 20);
Rho_Pi (a01, 44);
// Chi
bc0 = a00; bc1 = a01; bc2 = a02; bc3 = a03; bc4 = a04;
a00 ^= ~bc1 & bc2; a01 ^= ~bc2 & bc3; a02 ^= ~bc3 & bc4; a03 ^= ~bc4 & bc0; a04 ^= ~bc0 & bc1;
bc0 = a10; bc1 = a11; bc2 = a12; bc3 = a13; bc4 = a14;
a10 ^= ~bc1 & bc2; a11 ^= ~bc2 & bc3; a12 ^= ~bc3 & bc4; a13 ^= ~bc4 & bc0; a14 ^= ~bc0 & bc1;
bc0 = a20; bc1 = a21; bc2 = a22; bc3 = a23; bc4 = a24;
a20 ^= ~bc1 & bc2; a21 ^= ~bc2 & bc3; a22 ^= ~bc3 & bc4; a23 ^= ~bc4 & bc0; a24 ^= ~bc0 & bc1;
bc0 = a30; bc1 = a31; bc2 = a32; bc3 = a33; bc4 = a34;
a30 ^= ~bc1 & bc2; a31 ^= ~bc2 & bc3; a32 ^= ~bc3 & bc4; a33 ^= ~bc4 & bc0; a34 ^= ~bc0 & bc1;
bc0 = a40; bc1 = a41; bc2 = a42; bc3 = a43; bc4 = a44;
a40 ^= ~bc1 & bc2; a41 ^= ~bc2 & bc3; a42 ^= ~bc3 & bc4; a43 ^= ~bc4 & bc0; a44 ^= ~bc0 & bc1;
// Iota
a00 ^= keccakf_rndc[round];
}
// Theta
u64x bc0 = a00 ^ a10 ^ a20 ^ a30 ^ a40;
u64x bc1 = a01 ^ a11 ^ a21 ^ a31 ^ a41;
u64x bc2 = a02 ^ a12 ^ a22 ^ a32 ^ a42;
u64x bc3 = a03 ^ a13 ^ a23 ^ a33 ^ a43;
u64x bc4 = a04 ^ a14 ^ a24 ^ a34 ^ a44;
u64x t;
t = bc4 ^ rotl64 (bc1, 1); a00 ^= t; a10 ^= t; a20 ^= t; a30 ^= t;
t = bc0 ^ rotl64 (bc2, 1); a21 ^= t; a31 ^= t; a41 ^= t;
t = bc1 ^ rotl64 (bc3, 1); a02 ^= t; a12 ^= t; a22 ^= t; a32 ^= t;
t = bc2 ^ rotl64 (bc4, 1); a03 ^= t; a13 ^= t; a23 ^= t; a33 ^= t; a43 ^= t;
t = bc3 ^ rotl64 (bc0, 1); a04 ^= t; a34 ^= t; a44 ^= t;
// Rho Pi
t = a01;
Rho_Pi (a20, 1);
Rho_Pi (a12, 3);
Rho_Pi (a21, 6);
Rho_Pi (a32, 10);
Rho_Pi (a33, 15);
Rho_Pi (a03, 21);
Rho_Pi (a10, 28);
Rho_Pi (a31, 36);
Rho_Pi (a13, 45);
Rho_Pi (a41, 55);
Rho_Pi (a44, 2);
Rho_Pi (a04, 14);
Rho_Pi (a30, 27);
Rho_Pi (a43, 41);
Rho_Pi (a34, 56);
Rho_Pi (a23, 8);
Rho_Pi (a22, 25);
Rho_Pi (a02, 43);
#undef Rho_Pi
bc0 = a00;
bc2 = a02;
bc3 = a03;
bc4 = a04;
a02 ^= ~bc3 & bc4;
a03 ^= ~bc4 & bc0;
const u32x r0 = l32_from_64 (a03);
const u32x r1 = h32_from_64 (a03);
const u32x r2 = l32_from_64 (a02);
const u32x r3 = h32_from_64 (a02);
COMPARE_M_SIMD (r0, r1, r2, r3);
}
}
DECLSPEC void m17500s (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, __global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset)
{
/**
* modifier
*/
const u64 gid = get_global_id (0);
const u64 lid = get_local_id (0);
/**
* digest
*/
const u32 search[4] =
{
digests_buf[digests_offset].digest_buf[DGST_R0],
digests_buf[digests_offset].digest_buf[DGST_R1],
digests_buf[digests_offset].digest_buf[DGST_R2],
digests_buf[digests_offset].digest_buf[DGST_R3]
};
/**
* loop
*/
u32 w0l = w0[0];
for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE)
{
const u32x w0r = ix_create_bft (bfs_buf, il_pos);
const u32x w0lr = w0l | w0r;
/**
* Keccak
*/
u64x a00 = hl32_to_64 (w0[1], w0lr);
u64x a01 = hl32_to_64 (w0[3], w0[2]);
u64x a02 = hl32_to_64 (w1[1], w1[0]);
u64x a03 = hl32_to_64 (w1[3], w1[2]);
u64x a04 = hl32_to_64 (w2[1], w2[0]);
u64x a10 = hl32_to_64 (w2[3], w2[2]);
u64x a11 = hl32_to_64 (w3[1], w3[0]);
u64x a12 = hl32_to_64 (w3[3], w3[2]);
u64x a13 = 0;
u64x a14 = 0;
u64x a20 = 0;
u64x a21 = 0;
u64x a22 = 0x8000000000000000;
u64x a23 = 0;
u64x a24 = 0;
u64x a30 = 0;
u64x a31 = 0;
u64x a32 = 0;
u64x a33 = 0;
u64x a34 = 0;
u64x a40 = 0;
u64x a41 = 0;
u64x a42 = 0;
u64x a43 = 0;
u64x a44 = 0;
#define Rho_Pi(ad,r) \
bc0 = ad; \
ad = rotl64 (t, r); \
t = bc0; \
#ifdef _unroll
#pragma unroll
#endif
for (int round = 0; round < KECCAK_ROUNDS - 1; round++)
{
// Theta
u64x bc0 = a00 ^ a10 ^ a20 ^ a30 ^ a40;
u64x bc1 = a01 ^ a11 ^ a21 ^ a31 ^ a41;
u64x bc2 = a02 ^ a12 ^ a22 ^ a32 ^ a42;
u64x bc3 = a03 ^ a13 ^ a23 ^ a33 ^ a43;
u64x bc4 = a04 ^ a14 ^ a24 ^ a34 ^ a44;
u64x t;
t = bc4 ^ rotl64 (bc1, 1); a00 ^= t; a10 ^= t; a20 ^= t; a30 ^= t; a40 ^= t;
t = bc0 ^ rotl64 (bc2, 1); a01 ^= t; a11 ^= t; a21 ^= t; a31 ^= t; a41 ^= t;
t = bc1 ^ rotl64 (bc3, 1); a02 ^= t; a12 ^= t; a22 ^= t; a32 ^= t; a42 ^= t;
t = bc2 ^ rotl64 (bc4, 1); a03 ^= t; a13 ^= t; a23 ^= t; a33 ^= t; a43 ^= t;
t = bc3 ^ rotl64 (bc0, 1); a04 ^= t; a14 ^= t; a24 ^= t; a34 ^= t; a44 ^= t;
// Rho Pi
t = a01;
Rho_Pi (a20, 1);
Rho_Pi (a12, 3);
Rho_Pi (a21, 6);
Rho_Pi (a32, 10);
Rho_Pi (a33, 15);
Rho_Pi (a03, 21);
Rho_Pi (a10, 28);
Rho_Pi (a31, 36);
Rho_Pi (a13, 45);
Rho_Pi (a41, 55);
Rho_Pi (a44, 2);
Rho_Pi (a04, 14);
Rho_Pi (a30, 27);
Rho_Pi (a43, 41);
Rho_Pi (a34, 56);
Rho_Pi (a23, 8);
Rho_Pi (a22, 25);
Rho_Pi (a02, 43);
Rho_Pi (a40, 62);
Rho_Pi (a24, 18);
Rho_Pi (a42, 39);
Rho_Pi (a14, 61);
Rho_Pi (a11, 20);
Rho_Pi (a01, 44);
// Chi
bc0 = a00; bc1 = a01; bc2 = a02; bc3 = a03; bc4 = a04;
a00 ^= ~bc1 & bc2; a01 ^= ~bc2 & bc3; a02 ^= ~bc3 & bc4; a03 ^= ~bc4 & bc0; a04 ^= ~bc0 & bc1;
bc0 = a10; bc1 = a11; bc2 = a12; bc3 = a13; bc4 = a14;
a10 ^= ~bc1 & bc2; a11 ^= ~bc2 & bc3; a12 ^= ~bc3 & bc4; a13 ^= ~bc4 & bc0; a14 ^= ~bc0 & bc1;
bc0 = a20; bc1 = a21; bc2 = a22; bc3 = a23; bc4 = a24;
a20 ^= ~bc1 & bc2; a21 ^= ~bc2 & bc3; a22 ^= ~bc3 & bc4; a23 ^= ~bc4 & bc0; a24 ^= ~bc0 & bc1;
bc0 = a30; bc1 = a31; bc2 = a32; bc3 = a33; bc4 = a34;
a30 ^= ~bc1 & bc2; a31 ^= ~bc2 & bc3; a32 ^= ~bc3 & bc4; a33 ^= ~bc4 & bc0; a34 ^= ~bc0 & bc1;
bc0 = a40; bc1 = a41; bc2 = a42; bc3 = a43; bc4 = a44;
a40 ^= ~bc1 & bc2; a41 ^= ~bc2 & bc3; a42 ^= ~bc3 & bc4; a43 ^= ~bc4 & bc0; a44 ^= ~bc0 & bc1;
// Iota
a00 ^= keccakf_rndc[round];
}
// Theta
u64x bc0 = a00 ^ a10 ^ a20 ^ a30 ^ a40;
u64x bc1 = a01 ^ a11 ^ a21 ^ a31 ^ a41;
u64x bc2 = a02 ^ a12 ^ a22 ^ a32 ^ a42;
u64x bc3 = a03 ^ a13 ^ a23 ^ a33 ^ a43;
u64x bc4 = a04 ^ a14 ^ a24 ^ a34 ^ a44;
u64x t;
t = bc4 ^ rotl64 (bc1, 1); a00 ^= t; a10 ^= t; a20 ^= t; a30 ^= t;
t = bc0 ^ rotl64 (bc2, 1); a21 ^= t; a31 ^= t; a41 ^= t;
t = bc1 ^ rotl64 (bc3, 1); a02 ^= t; a12 ^= t; a22 ^= t; a32 ^= t;
t = bc2 ^ rotl64 (bc4, 1); a03 ^= t; a13 ^= t; a23 ^= t; a33 ^= t; a43 ^= t;
t = bc3 ^ rotl64 (bc0, 1); a04 ^= t; a34 ^= t; a44 ^= t;
// Rho Pi
t = a01;
Rho_Pi (a20, 1);
Rho_Pi (a12, 3);
Rho_Pi (a21, 6);
Rho_Pi (a32, 10);
Rho_Pi (a33, 15);
Rho_Pi (a03, 21);
Rho_Pi (a10, 28);
Rho_Pi (a31, 36);
Rho_Pi (a13, 45);
Rho_Pi (a41, 55);
Rho_Pi (a44, 2);
Rho_Pi (a04, 14);
Rho_Pi (a30, 27);
Rho_Pi (a43, 41);
Rho_Pi (a34, 56);
Rho_Pi (a23, 8);
Rho_Pi (a22, 25);
Rho_Pi (a02, 43);
#undef Rho_Pi
bc0 = a00;
bc2 = a02;
bc3 = a03;
bc4 = a04;
a02 ^= ~bc3 & bc4;
a03 ^= ~bc4 & bc0;
const u32x r0 = l32_from_64 (a03);
const u32x r1 = h32_from_64 (a03);
const u32x r2 = l32_from_64 (a02);
const u32x r3 = h32_from_64 (a02);
COMPARE_S_SIMD (r0, r1, r2, r3);
}
}
__kernel void m17900_m04 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
{
/**
* base
*/
const u64 gid = get_global_id (0);
if (gid >= gid_max) return;
u32 w0[4];
w0[0] = pws[gid].i[ 0];
w0[1] = pws[gid].i[ 1];
w0[2] = pws[gid].i[ 2];
w0[3] = pws[gid].i[ 3];
u32 w1[4];
w1[0] = 0;
w1[1] = 0;
w1[2] = 0;
w1[3] = 0;
u32 w2[4];
w2[0] = 0;
w2[1] = 0;
w2[2] = 0;
w2[3] = 0;
u32 w3[4];
w3[0] = 0;
w3[1] = 0;
w3[2] = 0;
w3[3] = 0;
const u32 pw_len = pws[gid].pw_len;
/**
* main
*/
m17500m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV0_buf, d_scryptV1_buf, d_scryptV2_buf, d_scryptV3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset);
}
__kernel void m17900_m08 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
{
/**
* base
*/
const u64 gid = get_global_id (0);
if (gid >= gid_max) return;
u32 w0[4];
w0[0] = pws[gid].i[ 0];
w0[1] = pws[gid].i[ 1];
w0[2] = pws[gid].i[ 2];
w0[3] = pws[gid].i[ 3];
u32 w1[4];
w1[0] = pws[gid].i[ 4];
w1[1] = pws[gid].i[ 5];
w1[2] = pws[gid].i[ 6];
w1[3] = pws[gid].i[ 7];
u32 w2[4];
w2[0] = 0;
w2[1] = 0;
w2[2] = 0;
w2[3] = 0;
u32 w3[4];
w3[0] = 0;
w3[1] = 0;
w3[2] = 0;
w3[3] = 0;
const u32 pw_len = pws[gid].pw_len;
/**
* main
*/
m17500m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV0_buf, d_scryptV1_buf, d_scryptV2_buf, d_scryptV3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset);
}
__kernel void m17900_m16 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
{
/**
* base
*/
const u64 gid = get_global_id (0);
if (gid >= gid_max) return;
u32 w0[4];
w0[0] = pws[gid].i[ 0];
w0[1] = pws[gid].i[ 1];
w0[2] = pws[gid].i[ 2];
w0[3] = pws[gid].i[ 3];
u32 w1[4];
w1[0] = pws[gid].i[ 4];
w1[1] = pws[gid].i[ 5];
w1[2] = pws[gid].i[ 6];
w1[3] = pws[gid].i[ 7];
u32 w2[4];
w2[0] = pws[gid].i[ 8];
w2[1] = pws[gid].i[ 9];
w2[2] = pws[gid].i[10];
w2[3] = pws[gid].i[11];
u32 w3[4];
w3[0] = pws[gid].i[12];
w3[1] = pws[gid].i[13];
w3[2] = 0;
w3[3] = 0;
const u32 pw_len = pws[gid].pw_len;
/**
* main
*/
m17500m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV0_buf, d_scryptV1_buf, d_scryptV2_buf, d_scryptV3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset);
}
__kernel void m17900_s04 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
{
/**
* base
*/
const u64 gid = get_global_id (0);
if (gid >= gid_max) return;
u32 w0[4];
w0[0] = pws[gid].i[ 0];
w0[1] = pws[gid].i[ 1];
w0[2] = pws[gid].i[ 2];
w0[3] = pws[gid].i[ 3];
u32 w1[4];
w1[0] = 0;
w1[1] = 0;
w1[2] = 0;
w1[3] = 0;
u32 w2[4];
w2[0] = 0;
w2[1] = 0;
w2[2] = 0;
w2[3] = 0;
u32 w3[4];
w3[0] = 0;
w3[1] = 0;
w3[2] = 0;
w3[3] = 0;
const u32 pw_len = pws[gid].pw_len;
/**
* main
*/
m17500s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV0_buf, d_scryptV1_buf, d_scryptV2_buf, d_scryptV3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset);
}
__kernel void m17900_s08 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
{
/**
* base
*/
const u64 gid = get_global_id (0);
if (gid >= gid_max) return;
u32 w0[4];
w0[0] = pws[gid].i[ 0];
w0[1] = pws[gid].i[ 1];
w0[2] = pws[gid].i[ 2];
w0[3] = pws[gid].i[ 3];
u32 w1[4];
w1[0] = pws[gid].i[ 4];
w1[1] = pws[gid].i[ 5];
w1[2] = pws[gid].i[ 6];
w1[3] = pws[gid].i[ 7];
u32 w2[4];
w2[0] = 0;
w2[1] = 0;
w2[2] = 0;
w2[3] = 0;
u32 w3[4];
w3[0] = 0;
w3[1] = 0;
w3[2] = 0;
w3[3] = 0;
const u32 pw_len = pws[gid].pw_len;
/**
* main
*/
m17500s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV0_buf, d_scryptV1_buf, d_scryptV2_buf, d_scryptV3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset);
}
__kernel void m17900_s16 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
{
/**
* base
*/
const u64 gid = get_global_id (0);
if (gid >= gid_max) return;
u32 w0[4];
w0[0] = pws[gid].i[ 0];
w0[1] = pws[gid].i[ 1];
w0[2] = pws[gid].i[ 2];
w0[3] = pws[gid].i[ 3];
u32 w1[4];
w1[0] = pws[gid].i[ 4];
w1[1] = pws[gid].i[ 5];
w1[2] = pws[gid].i[ 6];
w1[3] = pws[gid].i[ 7];
u32 w2[4];
w2[0] = pws[gid].i[ 8];
w2[1] = pws[gid].i[ 9];
w2[2] = pws[gid].i[10];
w2[3] = pws[gid].i[11];
u32 w3[4];
w3[0] = pws[gid].i[12];
w3[1] = pws[gid].i[13];
w3[2] = 0;
w3[3] = 0;
const u32 pw_len = pws[gid].pw_len;
/**
* main
*/
m17500s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV0_buf, d_scryptV1_buf, d_scryptV2_buf, d_scryptV3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset);
}

@ -0,0 +1,479 @@
/**
* Author......: See docs/credits.txt
* License.....: MIT
*/
#define NEW_SIMD_CODE
#include "inc_vendor.cl"
#include "inc_hash_constants.h"
#include "inc_hash_functions.cl"
#include "inc_types.cl"
#include "inc_common.cl"
#include "inc_rp_optimized.h"
#include "inc_rp_optimized.cl"
#include "inc_simd.cl"
__constant u64a keccakf_rndc[24] =
{
0x0000000000000001, 0x0000000000008082, 0x800000000000808a,
0x8000000080008000, 0x000000000000808b, 0x0000000080000001,
0x8000000080008081, 0x8000000000008009, 0x000000000000008a,
0x0000000000000088, 0x0000000080008009, 0x000000008000000a,
0x000000008000808b, 0x800000000000008b, 0x8000000000008089,
0x8000000000008003, 0x8000000000008002, 0x8000000000000080,
0x000000000000800a, 0x800000008000000a, 0x8000000080008081,
0x8000000000008080, 0x0000000080000001, 0x8000000080008008
};
#ifndef KECCAK_ROUNDS
#define KECCAK_ROUNDS 24
#endif
__kernel void m18000_m04 (__global pw_t *pws, __constant const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
{
/**
* modifier
*/
const u64 lid = get_local_id (0);
/**
* base
*/
const u64 gid = get_global_id (0);
if (gid >= gid_max) return;
u32 pw_buf0[4];
u32 pw_buf1[4];
pw_buf0[0] = pws[gid].i[0];
pw_buf0[1] = pws[gid].i[1];
pw_buf0[2] = pws[gid].i[2];
pw_buf0[3] = pws[gid].i[3];
pw_buf1[0] = pws[gid].i[4];
pw_buf1[1] = pws[gid].i[5];
pw_buf1[2] = pws[gid].i[6];
pw_buf1[3] = pws[gid].i[7];
const u32 pw_len = pws[gid].pw_len;
/**
* loop
*/
for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE)
{
u32x w0[4] = { 0 };
u32x w1[4] = { 0 };
u32x w2[4] = { 0 };
u32x w3[4] = { 0 };
const u32x out_len = apply_rules_vect (pw_buf0, pw_buf1, pw_len, rules_buf, il_pos, w0, w1);
append_0x01_2x4_VV (w0, w1, out_len);
/**
* Keccak
*/
u64x a00 = hl32_to_64 (w0[1], w0[0]);
u64x a01 = hl32_to_64 (w0[3], w0[2]);
u64x a02 = hl32_to_64 (w1[1], w1[0]);
u64x a03 = hl32_to_64 (w1[3], w1[2]);
u64x a04 = 0;
u64x a10 = 0;
u64x a11 = 0;
u64x a12 = 0;
u64x a13 = 0x8000000000000000;
u64x a14 = 0;
u64x a20 = 0;
u64x a21 = 0;
u64x a22 = 0;
u64x a23 = 0;
u64x a24 = 0;
u64x a30 = 0;
u64x a31 = 0;
u64x a32 = 0;
u64x a33 = 0;
u64x a34 = 0;
u64x a40 = 0;
u64x a41 = 0;
u64x a42 = 0;
u64x a43 = 0;
u64x a44 = 0;
#define Rho_Pi(ad,r) \
bc0 = ad; \
ad = rotl64 (t, r); \
t = bc0; \
#ifdef _unroll
#pragma unroll
#endif
for (int round = 0; round < KECCAK_ROUNDS - 1; round++)
{
// Theta
u64x bc0 = a00 ^ a10 ^ a20 ^ a30 ^ a40;
u64x bc1 = a01 ^ a11 ^ a21 ^ a31 ^ a41;
u64x bc2 = a02 ^ a12 ^ a22 ^ a32 ^ a42;
u64x bc3 = a03 ^ a13 ^ a23 ^ a33 ^ a43;
u64x bc4 = a04 ^ a14 ^ a24 ^ a34 ^ a44;
u64x t;
t = bc4 ^ rotl64 (bc1, 1); a00 ^= t; a10 ^= t; a20 ^= t; a30 ^= t; a40 ^= t;
t = bc0 ^ rotl64 (bc2, 1); a01 ^= t; a11 ^= t; a21 ^= t; a31 ^= t; a41 ^= t;
t = bc1 ^ rotl64 (bc3, 1); a02 ^= t; a12 ^= t; a22 ^= t; a32 ^= t; a42 ^= t;
t = bc2 ^ rotl64 (bc4, 1); a03 ^= t; a13 ^= t; a23 ^= t; a33 ^= t; a43 ^= t;
t = bc3 ^ rotl64 (bc0, 1); a04 ^= t; a14 ^= t; a24 ^= t; a34 ^= t; a44 ^= t;
// Rho Pi
t = a01;
Rho_Pi (a20, 1);
Rho_Pi (a12, 3);
Rho_Pi (a21, 6);
Rho_Pi (a32, 10);
Rho_Pi (a33, 15);
Rho_Pi (a03, 21);
Rho_Pi (a10, 28);
Rho_Pi (a31, 36);
Rho_Pi (a13, 45);
Rho_Pi (a41, 55);
Rho_Pi (a44, 2);
Rho_Pi (a04, 14);
Rho_Pi (a30, 27);
Rho_Pi (a43, 41);
Rho_Pi (a34, 56);
Rho_Pi (a23, 8);
Rho_Pi (a22, 25);
Rho_Pi (a02, 43);
Rho_Pi (a40, 62);
Rho_Pi (a24, 18);
Rho_Pi (a42, 39);
Rho_Pi (a14, 61);
Rho_Pi (a11, 20);
Rho_Pi (a01, 44);
// Chi
bc0 = a00; bc1 = a01; bc2 = a02; bc3 = a03; bc4 = a04;
a00 ^= ~bc1 & bc2; a01 ^= ~bc2 & bc3; a02 ^= ~bc3 & bc4; a03 ^= ~bc4 & bc0; a04 ^= ~bc0 & bc1;
bc0 = a10; bc1 = a11; bc2 = a12; bc3 = a13; bc4 = a14;
a10 ^= ~bc1 & bc2; a11 ^= ~bc2 & bc3; a12 ^= ~bc3 & bc4; a13 ^= ~bc4 & bc0; a14 ^= ~bc0 & bc1;
bc0 = a20; bc1 = a21; bc2 = a22; bc3 = a23; bc4 = a24;
a20 ^= ~bc1 & bc2; a21 ^= ~bc2 & bc3; a22 ^= ~bc3 & bc4; a23 ^= ~bc4 & bc0; a24 ^= ~bc0 & bc1;
bc0 = a30; bc1 = a31; bc2 = a32; bc3 = a33; bc4 = a34;
a30 ^= ~bc1 & bc2; a31 ^= ~bc2 & bc3; a32 ^= ~bc3 & bc4; a33 ^= ~bc4 & bc0; a34 ^= ~bc0 & bc1;
bc0 = a40; bc1 = a41; bc2 = a42; bc3 = a43; bc4 = a44;
a40 ^= ~bc1 & bc2; a41 ^= ~bc2 & bc3; a42 ^= ~bc3 & bc4; a43 ^= ~bc4 & bc0; a44 ^= ~bc0 & bc1;
// Iota
a00 ^= keccakf_rndc[round];
}
// Theta
u64x bc0 = a00 ^ a10 ^ a20 ^ a30 ^ a40;
u64x bc1 = a01 ^ a11 ^ a21 ^ a31 ^ a41;
u64x bc2 = a02 ^ a12 ^ a22 ^ a32 ^ a42;
u64x bc3 = a03 ^ a13 ^ a23 ^ a33 ^ a43;
u64x bc4 = a04 ^ a14 ^ a24 ^ a34 ^ a44;
u64x t;
t = bc4 ^ rotl64 (bc1, 1); a00 ^= t; a10 ^= t; a20 ^= t; a30 ^= t;
t = bc0 ^ rotl64 (bc2, 1); a21 ^= t; a31 ^= t; a41 ^= t;
t = bc1 ^ rotl64 (bc3, 1); a02 ^= t; a12 ^= t; a22 ^= t; a32 ^= t;
t = bc2 ^ rotl64 (bc4, 1); a03 ^= t; a13 ^= t; a23 ^= t; a33 ^= t; a43 ^= t;
t = bc3 ^ rotl64 (bc0, 1); a04 ^= t; a34 ^= t; a44 ^= t;
// Rho Pi
t = a01;
Rho_Pi (a20, 1);
Rho_Pi (a12, 3);
Rho_Pi (a21, 6);
Rho_Pi (a32, 10);
Rho_Pi (a33, 15);
Rho_Pi (a03, 21);
Rho_Pi (a10, 28);
Rho_Pi (a31, 36);
Rho_Pi (a13, 45);
Rho_Pi (a41, 55);
Rho_Pi (a44, 2);
Rho_Pi (a04, 14);
Rho_Pi (a30, 27);
Rho_Pi (a43, 41);
Rho_Pi (a34, 56);
Rho_Pi (a23, 8);
Rho_Pi (a22, 25);
Rho_Pi (a02, 43);
#undef Rho_Pi
bc0 = a00;
bc2 = a02;
bc3 = a03;
bc4 = a04;
a02 ^= ~bc3 & bc4;
a03 ^= ~bc4 & bc0;
const u32x r0 = l32_from_64 (a03);
const u32x r1 = h32_from_64 (a03);
const u32x r2 = l32_from_64 (a02);
const u32x r3 = h32_from_64 (a02);
COMPARE_M_SIMD (r0, r1, r2, r3);
}
}
__kernel void m18000_m08 (__global pw_t *pws, __constant const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
{
}
__kernel void m18000_m16 (__global pw_t *pws, __constant const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
{
}
__kernel void m18000_s04 (__global pw_t *pws, __constant const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
{
/**
* modifier
*/
const u64 lid = get_local_id (0);
/**
* base
*/
const u64 gid = get_global_id (0);
if (gid >= gid_max) return;
u32 pw_buf0[4];
u32 pw_buf1[4];
pw_buf0[0] = pws[gid].i[0];
pw_buf0[1] = pws[gid].i[1];
pw_buf0[2] = pws[gid].i[2];
pw_buf0[3] = pws[gid].i[3];
pw_buf1[0] = pws[gid].i[4];
pw_buf1[1] = pws[gid].i[5];
pw_buf1[2] = pws[gid].i[6];
pw_buf1[3] = pws[gid].i[7];
const u32 pw_len = pws[gid].pw_len;
/**
* digest
*/
const u32 search[4] =
{
digests_buf[digests_offset].digest_buf[DGST_R0],
digests_buf[digests_offset].digest_buf[DGST_R1],
digests_buf[digests_offset].digest_buf[DGST_R2],
digests_buf[digests_offset].digest_buf[DGST_R3]
};
/**
* loop
*/
for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE)
{
u32x w0[4] = { 0 };
u32x w1[4] = { 0 };
u32x w2[4] = { 0 };
u32x w3[4] = { 0 };
const u32x out_len = apply_rules_vect (pw_buf0, pw_buf1, pw_len, rules_buf, il_pos, w0, w1);
append_0x01_2x4_VV (w0, w1, out_len);
/**
* Keccak
*/
u64x a00 = hl32_to_64 (w0[1], w0[0]);
u64x a01 = hl32_to_64 (w0[3], w0[2]);
u64x a02 = hl32_to_64 (w1[1], w1[0]);
u64x a03 = hl32_to_64 (w1[3], w1[2]);
u64x a04 = 0;
u64x a10 = 0;
u64x a11 = 0;
u64x a12 = 0;
u64x a13 = 0x8000000000000000;
u64x a14 = 0;
u64x a20 = 0;
u64x a21 = 0;
u64x a22 = 0;
u64x a23 = 0;
u64x a24 = 0;
u64x a30 = 0;
u64x a31 = 0;
u64x a32 = 0;
u64x a33 = 0;
u64x a34 = 0;
u64x a40 = 0;
u64x a41 = 0;
u64x a42 = 0;
u64x a43 = 0;
u64x a44 = 0;
#define Rho_Pi(ad,r) \
bc0 = ad; \
ad = rotl64 (t, r); \
t = bc0; \
#ifdef _unroll
#pragma unroll
#endif
for (int round = 0; round < KECCAK_ROUNDS - 1; round++)
{
// Theta
u64x bc0 = a00 ^ a10 ^ a20 ^ a30 ^ a40;
u64x bc1 = a01 ^ a11 ^ a21 ^ a31 ^ a41;
u64x bc2 = a02 ^ a12 ^ a22 ^ a32 ^ a42;
u64x bc3 = a03 ^ a13 ^ a23 ^ a33 ^ a43;
u64x bc4 = a04 ^ a14 ^ a24 ^ a34 ^ a44;
u64x t;
t = bc4 ^ rotl64 (bc1, 1); a00 ^= t; a10 ^= t; a20 ^= t; a30 ^= t; a40 ^= t;
t = bc0 ^ rotl64 (bc2, 1); a01 ^= t; a11 ^= t; a21 ^= t; a31 ^= t; a41 ^= t;
t = bc1 ^ rotl64 (bc3, 1); a02 ^= t; a12 ^= t; a22 ^= t; a32 ^= t; a42 ^= t;
t = bc2 ^ rotl64 (bc4, 1); a03 ^= t; a13 ^= t; a23 ^= t; a33 ^= t; a43 ^= t;
t = bc3 ^ rotl64 (bc0, 1); a04 ^= t; a14 ^= t; a24 ^= t; a34 ^= t; a44 ^= t;
// Rho Pi
t = a01;
Rho_Pi (a20, 1);
Rho_Pi (a12, 3);
Rho_Pi (a21, 6);
Rho_Pi (a32, 10);
Rho_Pi (a33, 15);
Rho_Pi (a03, 21);
Rho_Pi (a10, 28);
Rho_Pi (a31, 36);
Rho_Pi (a13, 45);
Rho_Pi (a41, 55);
Rho_Pi (a44, 2);
Rho_Pi (a04, 14);
Rho_Pi (a30, 27);
Rho_Pi (a43, 41);
Rho_Pi (a34, 56);
Rho_Pi (a23, 8);
Rho_Pi (a22, 25);
Rho_Pi (a02, 43);
Rho_Pi (a40, 62);
Rho_Pi (a24, 18);
Rho_Pi (a42, 39);
Rho_Pi (a14, 61);
Rho_Pi (a11, 20);
Rho_Pi (a01, 44);
// Chi
bc0 = a00; bc1 = a01; bc2 = a02; bc3 = a03; bc4 = a04;
a00 ^= ~bc1 & bc2; a01 ^= ~bc2 & bc3; a02 ^= ~bc3 & bc4; a03 ^= ~bc4 & bc0; a04 ^= ~bc0 & bc1;
bc0 = a10; bc1 = a11; bc2 = a12; bc3 = a13; bc4 = a14;
a10 ^= ~bc1 & bc2; a11 ^= ~bc2 & bc3; a12 ^= ~bc3 & bc4; a13 ^= ~bc4 & bc0; a14 ^= ~bc0 & bc1;
bc0 = a20; bc1 = a21; bc2 = a22; bc3 = a23; bc4 = a24;
a20 ^= ~bc1 & bc2; a21 ^= ~bc2 & bc3; a22 ^= ~bc3 & bc4; a23 ^= ~bc4 & bc0; a24 ^= ~bc0 & bc1;
bc0 = a30; bc1 = a31; bc2 = a32; bc3 = a33; bc4 = a34;
a30 ^= ~bc1 & bc2; a31 ^= ~bc2 & bc3; a32 ^= ~bc3 & bc4; a33 ^= ~bc4 & bc0; a34 ^= ~bc0 & bc1;
bc0 = a40; bc1 = a41; bc2 = a42; bc3 = a43; bc4 = a44;
a40 ^= ~bc1 & bc2; a41 ^= ~bc2 & bc3; a42 ^= ~bc3 & bc4; a43 ^= ~bc4 & bc0; a44 ^= ~bc0 & bc1;
// Iota
a00 ^= keccakf_rndc[round];
}
// Theta
u64x bc0 = a00 ^ a10 ^ a20 ^ a30 ^ a40;
u64x bc1 = a01 ^ a11 ^ a21 ^ a31 ^ a41;
u64x bc2 = a02 ^ a12 ^ a22 ^ a32 ^ a42;
u64x bc3 = a03 ^ a13 ^ a23 ^ a33 ^ a43;
u64x bc4 = a04 ^ a14 ^ a24 ^ a34 ^ a44;
u64x t;
t = bc4 ^ rotl64 (bc1, 1); a00 ^= t; a10 ^= t; a20 ^= t; a30 ^= t;
t = bc0 ^ rotl64 (bc2, 1); a21 ^= t; a31 ^= t; a41 ^= t;
t = bc1 ^ rotl64 (bc3, 1); a02 ^= t; a12 ^= t; a22 ^= t; a32 ^= t;
t = bc2 ^ rotl64 (bc4, 1); a03 ^= t; a13 ^= t; a23 ^= t; a33 ^= t; a43 ^= t;
t = bc3 ^ rotl64 (bc0, 1); a04 ^= t; a34 ^= t; a44 ^= t;
// Rho Pi
t = a01;
Rho_Pi (a20, 1);
Rho_Pi (a12, 3);
Rho_Pi (a21, 6);
Rho_Pi (a32, 10);
Rho_Pi (a33, 15);
Rho_Pi (a03, 21);
Rho_Pi (a10, 28);
Rho_Pi (a31, 36);
Rho_Pi (a13, 45);
Rho_Pi (a41, 55);
Rho_Pi (a44, 2);
Rho_Pi (a04, 14);
Rho_Pi (a30, 27);
Rho_Pi (a43, 41);
Rho_Pi (a34, 56);
Rho_Pi (a23, 8);
Rho_Pi (a22, 25);
Rho_Pi (a02, 43);
#undef Rho_Pi
bc0 = a00;
bc2 = a02;
bc3 = a03;
bc4 = a04;
a02 ^= ~bc3 & bc4;
a03 ^= ~bc4 & bc0;
const u32x r0 = l32_from_64 (a03);
const u32x r1 = h32_from_64 (a03);
const u32x r2 = l32_from_64 (a02);
const u32x r3 = h32_from_64 (a02);
COMPARE_S_SIMD (r0, r1, r2, r3);
}
}
__kernel void m18000_s08 (__global pw_t *pws, __constant const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
{
}
__kernel void m18000_s16 (__global pw_t *pws, __constant const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
{
}

@ -0,0 +1,594 @@
/**
* Author......: See docs/credits.txt
* License.....: MIT
*/
#define NEW_SIMD_CODE
#include "inc_vendor.cl"
#include "inc_hash_constants.h"
#include "inc_hash_functions.cl"
#include "inc_types.cl"
#include "inc_common.cl"
#include "inc_simd.cl"
__constant u64a keccakf_rndc[24] =
{
0x0000000000000001, 0x0000000000008082, 0x800000000000808a,
0x8000000080008000, 0x000000000000808b, 0x0000000080000001,
0x8000000080008081, 0x8000000000008009, 0x000000000000008a,
0x0000000000000088, 0x0000000080008009, 0x000000008000000a,
0x000000008000808b, 0x800000000000008b, 0x8000000000008089,
0x8000000000008003, 0x8000000000008002, 0x8000000000000080,
0x000000000000800a, 0x800000008000000a, 0x8000000080008081,
0x8000000000008080, 0x0000000080000001, 0x8000000080008008
};
#ifndef KECCAK_ROUNDS
#define KECCAK_ROUNDS 24
#endif
__kernel void m18000_m04 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
{
/**
* modifier
*/
const u64 lid = get_local_id (0);
/**
* base
*/
const u64 gid = get_global_id (0);
if (gid >= gid_max) return;
u32 pw_buf0[4];
u32 pw_buf1[4];
pw_buf0[0] = pws[gid].i[0];
pw_buf0[1] = pws[gid].i[1];
pw_buf0[2] = pws[gid].i[2];
pw_buf0[3] = pws[gid].i[3];
pw_buf1[0] = pws[gid].i[4];
pw_buf1[1] = pws[gid].i[5];
pw_buf1[2] = pws[gid].i[6];
pw_buf1[3] = pws[gid].i[7];
const u32 pw_l_len = pws[gid].pw_len;
/**
* loop
*/
for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE)
{
const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos);
const u32x pw_len = pw_l_len + pw_r_len;
/**
* concat password candidate
*/
u32x wordl0[4] = { 0 };
u32x wordl1[4] = { 0 };
u32x wordl2[4] = { 0 };
u32x wordl3[4] = { 0 };
wordl0[0] = pw_buf0[0];
wordl0[1] = pw_buf0[1];
wordl0[2] = pw_buf0[2];
wordl0[3] = pw_buf0[3];
wordl1[0] = pw_buf1[0];
wordl1[1] = pw_buf1[1];
wordl1[2] = pw_buf1[2];
wordl1[3] = pw_buf1[3];
u32x wordr0[4] = { 0 };
u32x wordr1[4] = { 0 };
u32x wordr2[4] = { 0 };
u32x wordr3[4] = { 0 };
wordr0[0] = ix_create_combt (combs_buf, il_pos, 0);
wordr0[1] = ix_create_combt (combs_buf, il_pos, 1);
wordr0[2] = ix_create_combt (combs_buf, il_pos, 2);
wordr0[3] = ix_create_combt (combs_buf, il_pos, 3);
wordr1[0] = ix_create_combt (combs_buf, il_pos, 4);
wordr1[1] = ix_create_combt (combs_buf, il_pos, 5);
wordr1[2] = ix_create_combt (combs_buf, il_pos, 6);
wordr1[3] = ix_create_combt (combs_buf, il_pos, 7);
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
{
switch_buffer_by_offset_le_VV (wordr0, wordr1, wordr2, wordr3, pw_l_len);
}
else
{
switch_buffer_by_offset_le_VV (wordl0, wordl1, wordl2, wordl3, pw_r_len);
}
u32x w0[4];
u32x w1[4];
u32x w2[4];
u32x w3[4];
w0[0] = wordl0[0] | wordr0[0];
w0[1] = wordl0[1] | wordr0[1];
w0[2] = wordl0[2] | wordr0[2];
w0[3] = wordl0[3] | wordr0[3];
w1[0] = wordl1[0] | wordr1[0];
w1[1] = wordl1[1] | wordr1[1];
w1[2] = wordl1[2] | wordr1[2];
w1[3] = wordl1[3] | wordr1[3];
w2[0] = wordl2[0] | wordr2[0];
w2[1] = wordl2[1] | wordr2[1];
w2[2] = wordl2[2] | wordr2[2];
w2[3] = wordl2[3] | wordr2[3];
w3[0] = wordl3[0] | wordr3[0];
w3[1] = wordl3[1] | wordr3[1];
w3[2] = wordl3[2] | wordr3[2];
w3[3] = wordl3[3] | wordr3[3];
/**
* Keccak
*/
u64x a00 = hl32_to_64 (w0[1], w0[0]);
u64x a01 = hl32_to_64 (w0[3], w0[2]);
u64x a02 = hl32_to_64 (w1[1], w1[0]);
u64x a03 = hl32_to_64 (w1[3], w1[2]);
u64x a04 = hl32_to_64 (w2[1], w2[0]);
u64x a10 = hl32_to_64 (w2[3], w2[2]);
u64x a11 = hl32_to_64 (w3[1], w3[0]);
u64x a12 = hl32_to_64 (w3[3], w3[2]);
u64x a13 = 0x8000000000000000;
u64x a14 = 0;
u64x a20 = 0;
u64x a21 = 0;
u64x a22 = 0;
u64x a23 = 0;
u64x a24 = 0;
u64x a30 = 0;
u64x a31 = 0;
u64x a32 = 0;
u64x a33 = 0;
u64x a34 = 0;
u64x a40 = 0;
u64x a41 = 0;
u64x a42 = 0;
u64x a43 = 0;
u64x a44 = 0;
#define Rho_Pi(ad,r) \
bc0 = ad; \
ad = rotl64 (t, r); \
t = bc0; \
#ifdef _unroll
#pragma unroll
#endif
for (int round = 0; round < KECCAK_ROUNDS - 1; round++)
{
// Theta
u64x bc0 = a00 ^ a10 ^ a20 ^ a30 ^ a40;
u64x bc1 = a01 ^ a11 ^ a21 ^ a31 ^ a41;
u64x bc2 = a02 ^ a12 ^ a22 ^ a32 ^ a42;
u64x bc3 = a03 ^ a13 ^ a23 ^ a33 ^ a43;
u64x bc4 = a04 ^ a14 ^ a24 ^ a34 ^ a44;
u64x t;
t = bc4 ^ rotl64 (bc1, 1); a00 ^= t; a10 ^= t; a20 ^= t; a30 ^= t; a40 ^= t;
t = bc0 ^ rotl64 (bc2, 1); a01 ^= t; a11 ^= t; a21 ^= t; a31 ^= t; a41 ^= t;
t = bc1 ^ rotl64 (bc3, 1); a02 ^= t; a12 ^= t; a22 ^= t; a32 ^= t; a42 ^= t;
t = bc2 ^ rotl64 (bc4, 1); a03 ^= t; a13 ^= t; a23 ^= t; a33 ^= t; a43 ^= t;
t = bc3 ^ rotl64 (bc0, 1); a04 ^= t; a14 ^= t; a24 ^= t; a34 ^= t; a44 ^= t;
// Rho Pi
t = a01;
Rho_Pi (a20, 1);
Rho_Pi (a12, 3);
Rho_Pi (a21, 6);
Rho_Pi (a32, 10);
Rho_Pi (a33, 15);
Rho_Pi (a03, 21);
Rho_Pi (a10, 28);
Rho_Pi (a31, 36);
Rho_Pi (a13, 45);
Rho_Pi (a41, 55);
Rho_Pi (a44, 2);
Rho_Pi (a04, 14);
Rho_Pi (a30, 27);
Rho_Pi (a43, 41);
Rho_Pi (a34, 56);
Rho_Pi (a23, 8);
Rho_Pi (a22, 25);
Rho_Pi (a02, 43);
Rho_Pi (a40, 62);
Rho_Pi (a24, 18);
Rho_Pi (a42, 39);
Rho_Pi (a14, 61);
Rho_Pi (a11, 20);
Rho_Pi (a01, 44);
// Chi
bc0 = a00; bc1 = a01; bc2 = a02; bc3 = a03; bc4 = a04;
a00 ^= ~bc1 & bc2; a01 ^= ~bc2 & bc3; a02 ^= ~bc3 & bc4; a03 ^= ~bc4 & bc0; a04 ^= ~bc0 & bc1;
bc0 = a10; bc1 = a11; bc2 = a12; bc3 = a13; bc4 = a14;
a10 ^= ~bc1 & bc2; a11 ^= ~bc2 & bc3; a12 ^= ~bc3 & bc4; a13 ^= ~bc4 & bc0; a14 ^= ~bc0 & bc1;
bc0 = a20; bc1 = a21; bc2 = a22; bc3 = a23; bc4 = a24;
a20 ^= ~bc1 & bc2; a21 ^= ~bc2 & bc3; a22 ^= ~bc3 & bc4; a23 ^= ~bc4 & bc0; a24 ^= ~bc0 & bc1;
bc0 = a30; bc1 = a31; bc2 = a32; bc3 = a33; bc4 = a34;
a30 ^= ~bc1 & bc2; a31 ^= ~bc2 & bc3; a32 ^= ~bc3 & bc4; a33 ^= ~bc4 & bc0; a34 ^= ~bc0 & bc1;
bc0 = a40; bc1 = a41; bc2 = a42; bc3 = a43; bc4 = a44;
a40 ^= ~bc1 & bc2; a41 ^= ~bc2 & bc3; a42 ^= ~bc3 & bc4; a43 ^= ~bc4 & bc0; a44 ^= ~bc0 & bc1;
// Iota
a00 ^= keccakf_rndc[round];
}
// Theta
u64x bc0 = a00 ^ a10 ^ a20 ^ a30 ^ a40;
u64x bc1 = a01 ^ a11 ^ a21 ^ a31 ^ a41;
u64x bc2 = a02 ^ a12 ^ a22 ^ a32 ^ a42;
u64x bc3 = a03 ^ a13 ^ a23 ^ a33 ^ a43;
u64x bc4 = a04 ^ a14 ^ a24 ^ a34 ^ a44;
u64x t;
t = bc4 ^ rotl64 (bc1, 1); a00 ^= t; a10 ^= t; a20 ^= t; a30 ^= t;
t = bc0 ^ rotl64 (bc2, 1); a21 ^= t; a31 ^= t; a41 ^= t;
t = bc1 ^ rotl64 (bc3, 1); a02 ^= t; a12 ^= t; a22 ^= t; a32 ^= t;
t = bc2 ^ rotl64 (bc4, 1); a03 ^= t; a13 ^= t; a23 ^= t; a33 ^= t; a43 ^= t;
t = bc3 ^ rotl64 (bc0, 1); a04 ^= t; a34 ^= t; a44 ^= t;
// Rho Pi
t = a01;
Rho_Pi (a20, 1);
Rho_Pi (a12, 3);
Rho_Pi (a21, 6);
Rho_Pi (a32, 10);
Rho_Pi (a33, 15);
Rho_Pi (a03, 21);
Rho_Pi (a10, 28);
Rho_Pi (a31, 36);
Rho_Pi (a13, 45);
Rho_Pi (a41, 55);
Rho_Pi (a44, 2);
Rho_Pi (a04, 14);
Rho_Pi (a30, 27);
Rho_Pi (a43, 41);
Rho_Pi (a34, 56);
Rho_Pi (a23, 8);
Rho_Pi (a22, 25);
Rho_Pi (a02, 43);
#undef Rho_Pi
bc0 = a00;
bc2 = a02;
bc3 = a03;
bc4 = a04;
a02 ^= ~bc3 & bc4;
a03 ^= ~bc4 & bc0;
const u32x r0 = l32_from_64 (a03);
const u32x r1 = h32_from_64 (a03);
const u32x r2 = l32_from_64 (a02);
const u32x r3 = h32_from_64 (a02);
COMPARE_M_SIMD (r0, r1, r2, r3);
}
}
__kernel void m18000_m08 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
{
}
__kernel void m18000_m16 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
{
}
__kernel void m18000_s04 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
{
/**
* modifier
*/
const u64 lid = get_local_id (0);
/**
* base
*/
const u64 gid = get_global_id (0);
if (gid >= gid_max) return;
u32 pw_buf0[4];
u32 pw_buf1[4];
pw_buf0[0] = pws[gid].i[0];
pw_buf0[1] = pws[gid].i[1];
pw_buf0[2] = pws[gid].i[2];
pw_buf0[3] = pws[gid].i[3];
pw_buf1[0] = pws[gid].i[4];
pw_buf1[1] = pws[gid].i[5];
pw_buf1[2] = pws[gid].i[6];
pw_buf1[3] = pws[gid].i[7];
const u32 pw_l_len = pws[gid].pw_len;
/**
* digest
*/
const u32 search[4] =
{
digests_buf[digests_offset].digest_buf[DGST_R0],
digests_buf[digests_offset].digest_buf[DGST_R1],
digests_buf[digests_offset].digest_buf[DGST_R2],
digests_buf[digests_offset].digest_buf[DGST_R3]
};
/**
* loop
*/
for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE)
{
const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos);
const u32x pw_len = pw_l_len + pw_r_len;
/**
* concat password candidate
*/
u32x wordl0[4] = { 0 };
u32x wordl1[4] = { 0 };
u32x wordl2[4] = { 0 };
u32x wordl3[4] = { 0 };
wordl0[0] = pw_buf0[0];
wordl0[1] = pw_buf0[1];
wordl0[2] = pw_buf0[2];
wordl0[3] = pw_buf0[3];
wordl1[0] = pw_buf1[0];
wordl1[1] = pw_buf1[1];
wordl1[2] = pw_buf1[2];
wordl1[3] = pw_buf1[3];
u32x wordr0[4] = { 0 };
u32x wordr1[4] = { 0 };
u32x wordr2[4] = { 0 };
u32x wordr3[4] = { 0 };
wordr0[0] = ix_create_combt (combs_buf, il_pos, 0);
wordr0[1] = ix_create_combt (combs_buf, il_pos, 1);
wordr0[2] = ix_create_combt (combs_buf, il_pos, 2);
wordr0[3] = ix_create_combt (combs_buf, il_pos, 3);
wordr1[0] = ix_create_combt (combs_buf, il_pos, 4);
wordr1[1] = ix_create_combt (combs_buf, il_pos, 5);
wordr1[2] = ix_create_combt (combs_buf, il_pos, 6);
wordr1[3] = ix_create_combt (combs_buf, il_pos, 7);
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
{
switch_buffer_by_offset_le_VV (wordr0, wordr1, wordr2, wordr3, pw_l_len);
}
else
{
switch_buffer_by_offset_le_VV (wordl0, wordl1, wordl2, wordl3, pw_r_len);
}
u32x w0[4];
u32x w1[4];
u32x w2[4];
u32x w3[4];
w0[0] = wordl0[0] | wordr0[0];
w0[1] = wordl0[1] | wordr0[1];
w0[2] = wordl0[2] | wordr0[2];
w0[3] = wordl0[3] | wordr0[3];
w1[0] = wordl1[0] | wordr1[0];
w1[1] = wordl1[1] | wordr1[1];
w1[2] = wordl1[2] | wordr1[2];
w1[3] = wordl1[3] | wordr1[3];
w2[0] = wordl2[0] | wordr2[0];
w2[1] = wordl2[1] | wordr2[1];
w2[2] = wordl2[2] | wordr2[2];
w2[3] = wordl2[3] | wordr2[3];
w3[0] = wordl3[0] | wordr3[0];
w3[1] = wordl3[1] | wordr3[1];
w3[2] = wordl3[2] | wordr3[2];
w3[3] = wordl3[3] | wordr3[3];
/**
* Keccak
*/
u64x a00 = hl32_to_64 (w0[1], w0[0]);
u64x a01 = hl32_to_64 (w0[3], w0[2]);
u64x a02 = hl32_to_64 (w1[1], w1[0]);
u64x a03 = hl32_to_64 (w1[3], w1[2]);
u64x a04 = hl32_to_64 (w2[1], w2[0]);
u64x a10 = hl32_to_64 (w2[3], w2[2]);
u64x a11 = hl32_to_64 (w3[1], w3[0]);
u64x a12 = hl32_to_64 (w3[3], w3[2]);
u64x a13 = 0x8000000000000000;
u64x a14 = 0;
u64x a20 = 0;
u64x a21 = 0;
u64x a22 = 0;
u64x a23 = 0;
u64x a24 = 0;
u64x a30 = 0;
u64x a31 = 0;
u64x a32 = 0;
u64x a33 = 0;
u64x a34 = 0;
u64x a40 = 0;
u64x a41 = 0;
u64x a42 = 0;
u64x a43 = 0;
u64x a44 = 0;
#define Rho_Pi(ad,r) \
bc0 = ad; \
ad = rotl64 (t, r); \
t = bc0; \
#ifdef _unroll
#pragma unroll
#endif
for (int round = 0; round < KECCAK_ROUNDS - 1; round++)
{
// Theta
u64x bc0 = a00 ^ a10 ^ a20 ^ a30 ^ a40;
u64x bc1 = a01 ^ a11 ^ a21 ^ a31 ^ a41;
u64x bc2 = a02 ^ a12 ^ a22 ^ a32 ^ a42;
u64x bc3 = a03 ^ a13 ^ a23 ^ a33 ^ a43;
u64x bc4 = a04 ^ a14 ^ a24 ^ a34 ^ a44;
u64x t;
t = bc4 ^ rotl64 (bc1, 1); a00 ^= t; a10 ^= t; a20 ^= t; a30 ^= t; a40 ^= t;
t = bc0 ^ rotl64 (bc2, 1); a01 ^= t; a11 ^= t; a21 ^= t; a31 ^= t; a41 ^= t;
t = bc1 ^ rotl64 (bc3, 1); a02 ^= t; a12 ^= t; a22 ^= t; a32 ^= t; a42 ^= t;
t = bc2 ^ rotl64 (bc4, 1); a03 ^= t; a13 ^= t; a23 ^= t; a33 ^= t; a43 ^= t;
t = bc3 ^ rotl64 (bc0, 1); a04 ^= t; a14 ^= t; a24 ^= t; a34 ^= t; a44 ^= t;
// Rho Pi
t = a01;
Rho_Pi (a20, 1);
Rho_Pi (a12, 3);
Rho_Pi (a21, 6);
Rho_Pi (a32, 10);
Rho_Pi (a33, 15);
Rho_Pi (a03, 21);
Rho_Pi (a10, 28);
Rho_Pi (a31, 36);
Rho_Pi (a13, 45);
Rho_Pi (a41, 55);
Rho_Pi (a44, 2);
Rho_Pi (a04, 14);
Rho_Pi (a30, 27);
Rho_Pi (a43, 41);
Rho_Pi (a34, 56);
Rho_Pi (a23, 8);
Rho_Pi (a22, 25);
Rho_Pi (a02, 43);
Rho_Pi (a40, 62);
Rho_Pi (a24, 18);
Rho_Pi (a42, 39);
Rho_Pi (a14, 61);
Rho_Pi (a11, 20);
Rho_Pi (a01, 44);
// Chi
bc0 = a00; bc1 = a01; bc2 = a02; bc3 = a03; bc4 = a04;
a00 ^= ~bc1 & bc2; a01 ^= ~bc2 & bc3; a02 ^= ~bc3 & bc4; a03 ^= ~bc4 & bc0; a04 ^= ~bc0 & bc1;
bc0 = a10; bc1 = a11; bc2 = a12; bc3 = a13; bc4 = a14;
a10 ^= ~bc1 & bc2; a11 ^= ~bc2 & bc3; a12 ^= ~bc3 & bc4; a13 ^= ~bc4 & bc0; a14 ^= ~bc0 & bc1;
bc0 = a20; bc1 = a21; bc2 = a22; bc3 = a23; bc4 = a24;
a20 ^= ~bc1 & bc2; a21 ^= ~bc2 & bc3; a22 ^= ~bc3 & bc4; a23 ^= ~bc4 & bc0; a24 ^= ~bc0 & bc1;
bc0 = a30; bc1 = a31; bc2 = a32; bc3 = a33; bc4 = a34;
a30 ^= ~bc1 & bc2; a31 ^= ~bc2 & bc3; a32 ^= ~bc3 & bc4; a33 ^= ~bc4 & bc0; a34 ^= ~bc0 & bc1;
bc0 = a40; bc1 = a41; bc2 = a42; bc3 = a43; bc4 = a44;
a40 ^= ~bc1 & bc2; a41 ^= ~bc2 & bc3; a42 ^= ~bc3 & bc4; a43 ^= ~bc4 & bc0; a44 ^= ~bc0 & bc1;
// Iota
a00 ^= keccakf_rndc[round];
}
// Theta
u64x bc0 = a00 ^ a10 ^ a20 ^ a30 ^ a40;
u64x bc1 = a01 ^ a11 ^ a21 ^ a31 ^ a41;
u64x bc2 = a02 ^ a12 ^ a22 ^ a32 ^ a42;
u64x bc3 = a03 ^ a13 ^ a23 ^ a33 ^ a43;
u64x bc4 = a04 ^ a14 ^ a24 ^ a34 ^ a44;
u64x t;
t = bc4 ^ rotl64 (bc1, 1); a00 ^= t; a10 ^= t; a20 ^= t; a30 ^= t;
t = bc0 ^ rotl64 (bc2, 1); a21 ^= t; a31 ^= t; a41 ^= t;
t = bc1 ^ rotl64 (bc3, 1); a02 ^= t; a12 ^= t; a22 ^= t; a32 ^= t;
t = bc2 ^ rotl64 (bc4, 1); a03 ^= t; a13 ^= t; a23 ^= t; a33 ^= t; a43 ^= t;
t = bc3 ^ rotl64 (bc0, 1); a04 ^= t; a34 ^= t; a44 ^= t;
// Rho Pi
t = a01;
Rho_Pi (a20, 1);
Rho_Pi (a12, 3);
Rho_Pi (a21, 6);
Rho_Pi (a32, 10);
Rho_Pi (a33, 15);
Rho_Pi (a03, 21);
Rho_Pi (a10, 28);
Rho_Pi (a31, 36);
Rho_Pi (a13, 45);
Rho_Pi (a41, 55);
Rho_Pi (a44, 2);
Rho_Pi (a04, 14);
Rho_Pi (a30, 27);
Rho_Pi (a43, 41);
Rho_Pi (a34, 56);
Rho_Pi (a23, 8);
Rho_Pi (a22, 25);
Rho_Pi (a02, 43);
#undef Rho_Pi
bc0 = a00;
bc2 = a02;
bc3 = a03;
bc4 = a04;
a02 ^= ~bc3 & bc4;
a03 ^= ~bc4 & bc0;
const u32x r0 = l32_from_64 (a03);
const u32x r1 = h32_from_64 (a03);
const u32x r2 = l32_from_64 (a02);
const u32x r3 = h32_from_64 (a02);
COMPARE_S_SIMD (r0, r1, r2, r3);
}
}
__kernel void m18000_s08 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
{
}
__kernel void m18000_s16 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
{
}

@ -0,0 +1,695 @@
/**
* Author......: See docs/credits.txt
* License.....: MIT
*/
#define NEW_SIMD_CODE
#include "inc_vendor.cl"
#include "inc_hash_constants.h"
#include "inc_hash_functions.cl"
#include "inc_types.cl"
#include "inc_common.cl"
#include "inc_simd.cl"
__constant u64a keccakf_rndc[24] =
{
0x0000000000000001, 0x0000000000008082, 0x800000000000808a,
0x8000000080008000, 0x000000000000808b, 0x0000000080000001,
0x8000000080008081, 0x8000000000008009, 0x000000000000008a,
0x0000000000000088, 0x0000000080008009, 0x000000008000000a,
0x000000008000808b, 0x800000000000008b, 0x8000000000008089,
0x8000000000008003, 0x8000000000008002, 0x8000000000000080,
0x000000000000800a, 0x800000008000000a, 0x8000000080008081,
0x8000000000008080, 0x0000000080000001, 0x8000000080008008
};
#ifndef KECCAK_ROUNDS
#define KECCAK_ROUNDS 24
#endif
DECLSPEC void m17600m (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, __global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset)
{
/**
* modifier
*/
const u64 gid = get_global_id (0);
const u64 lid = get_local_id (0);
/**
* loop
*/
u32 w0l = w0[0];
for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE)
{
const u32x w0r = ix_create_bft (bfs_buf, il_pos);
const u32x w0lr = w0l | w0r;
/**
* Keccak
*/
u64x a00 = hl32_to_64 (w0[1], w0lr);
u64x a01 = hl32_to_64 (w0[3], w0[2]);
u64x a02 = hl32_to_64 (w1[1], w1[0]);
u64x a03 = hl32_to_64 (w1[3], w1[2]);
u64x a04 = hl32_to_64 (w2[1], w2[0]);
u64x a10 = hl32_to_64 (w2[3], w2[2]);
u64x a11 = hl32_to_64 (w3[1], w3[0]);
u64x a12 = hl32_to_64 (w3[3], w3[2]);
u64x a13 = 0x8000000000000000;
u64x a14 = 0;
u64x a20 = 0;
u64x a21 = 0;
u64x a22 = 0;
u64x a23 = 0;
u64x a24 = 0;
u64x a30 = 0;
u64x a31 = 0;
u64x a32 = 0;
u64x a33 = 0;
u64x a34 = 0;
u64x a40 = 0;
u64x a41 = 0;
u64x a42 = 0;
u64x a43 = 0;
u64x a44 = 0;
#define Rho_Pi(ad,r) \
bc0 = ad; \
ad = rotl64 (t, r); \
t = bc0; \
#ifdef _unroll
#pragma unroll
#endif
for (int round = 0; round < KECCAK_ROUNDS - 1; round++)
{
// Theta
u64x bc0 = a00 ^ a10 ^ a20 ^ a30 ^ a40;
u64x bc1 = a01 ^ a11 ^ a21 ^ a31 ^ a41;
u64x bc2 = a02 ^ a12 ^ a22 ^ a32 ^ a42;
u64x bc3 = a03 ^ a13 ^ a23 ^ a33 ^ a43;
u64x bc4 = a04 ^ a14 ^ a24 ^ a34 ^ a44;
u64x t;
t = bc4 ^ rotl64 (bc1, 1); a00 ^= t; a10 ^= t; a20 ^= t; a30 ^= t; a40 ^= t;
t = bc0 ^ rotl64 (bc2, 1); a01 ^= t; a11 ^= t; a21 ^= t; a31 ^= t; a41 ^= t;
t = bc1 ^ rotl64 (bc3, 1); a02 ^= t; a12 ^= t; a22 ^= t; a32 ^= t; a42 ^= t;
t = bc2 ^ rotl64 (bc4, 1); a03 ^= t; a13 ^= t; a23 ^= t; a33 ^= t; a43 ^= t;
t = bc3 ^ rotl64 (bc0, 1); a04 ^= t; a14 ^= t; a24 ^= t; a34 ^= t; a44 ^= t;
// Rho Pi
t = a01;
Rho_Pi (a20, 1);
Rho_Pi (a12, 3);
Rho_Pi (a21, 6);
Rho_Pi (a32, 10);
Rho_Pi (a33, 15);
Rho_Pi (a03, 21);
Rho_Pi (a10, 28);
Rho_Pi (a31, 36);
Rho_Pi (a13, 45);
Rho_Pi (a41, 55);
Rho_Pi (a44, 2);
Rho_Pi (a04, 14);
Rho_Pi (a30, 27);
Rho_Pi (a43, 41);
Rho_Pi (a34, 56);
Rho_Pi (a23, 8);
Rho_Pi (a22, 25);
Rho_Pi (a02, 43);
Rho_Pi (a40, 62);
Rho_Pi (a24, 18);
Rho_Pi (a42, 39);
Rho_Pi (a14, 61);
Rho_Pi (a11, 20);
Rho_Pi (a01, 44);
// Chi
bc0 = a00; bc1 = a01; bc2 = a02; bc3 = a03; bc4 = a04;
a00 ^= ~bc1 & bc2; a01 ^= ~bc2 & bc3; a02 ^= ~bc3 & bc4; a03 ^= ~bc4 & bc0; a04 ^= ~bc0 & bc1;
bc0 = a10; bc1 = a11; bc2 = a12; bc3 = a13; bc4 = a14;
a10 ^= ~bc1 & bc2; a11 ^= ~bc2 & bc3; a12 ^= ~bc3 & bc4; a13 ^= ~bc4 & bc0; a14 ^= ~bc0 & bc1;
bc0 = a20; bc1 = a21; bc2 = a22; bc3 = a23; bc4 = a24;
a20 ^= ~bc1 & bc2; a21 ^= ~bc2 & bc3; a22 ^= ~bc3 & bc4; a23 ^= ~bc4 & bc0; a24 ^= ~bc0 & bc1;
bc0 = a30; bc1 = a31; bc2 = a32; bc3 = a33; bc4 = a34;
a30 ^= ~bc1 & bc2; a31 ^= ~bc2 & bc3; a32 ^= ~bc3 & bc4; a33 ^= ~bc4 & bc0; a34 ^= ~bc0 & bc1;
bc0 = a40; bc1 = a41; bc2 = a42; bc3 = a43; bc4 = a44;
a40 ^= ~bc1 & bc2; a41 ^= ~bc2 & bc3; a42 ^= ~bc3 & bc4; a43 ^= ~bc4 & bc0; a44 ^= ~bc0 & bc1;
// Iota
a00 ^= keccakf_rndc[round];
}
// Theta
u64x bc0 = a00 ^ a10 ^ a20 ^ a30 ^ a40;
u64x bc1 = a01 ^ a11 ^ a21 ^ a31 ^ a41;
u64x bc2 = a02 ^ a12 ^ a22 ^ a32 ^ a42;
u64x bc3 = a03 ^ a13 ^ a23 ^ a33 ^ a43;
u64x bc4 = a04 ^ a14 ^ a24 ^ a34 ^ a44;
u64x t;
t = bc4 ^ rotl64 (bc1, 1); a00 ^= t; a10 ^= t; a20 ^= t; a30 ^= t;
t = bc0 ^ rotl64 (bc2, 1); a21 ^= t; a31 ^= t; a41 ^= t;
t = bc1 ^ rotl64 (bc3, 1); a02 ^= t; a12 ^= t; a22 ^= t; a32 ^= t;
t = bc2 ^ rotl64 (bc4, 1); a03 ^= t; a13 ^= t; a23 ^= t; a33 ^= t; a43 ^= t;
t = bc3 ^ rotl64 (bc0, 1); a04 ^= t; a34 ^= t; a44 ^= t;
// Rho Pi
t = a01;
Rho_Pi (a20, 1);
Rho_Pi (a12, 3);
Rho_Pi (a21, 6);
Rho_Pi (a32, 10);
Rho_Pi (a33, 15);
Rho_Pi (a03, 21);
Rho_Pi (a10, 28);
Rho_Pi (a31, 36);
Rho_Pi (a13, 45);
Rho_Pi (a41, 55);
Rho_Pi (a44, 2);
Rho_Pi (a04, 14);
Rho_Pi (a30, 27);
Rho_Pi (a43, 41);
Rho_Pi (a34, 56);
Rho_Pi (a23, 8);
Rho_Pi (a22, 25);
Rho_Pi (a02, 43);
#undef Rho_Pi
bc0 = a00;
bc2 = a02;
bc3 = a03;
bc4 = a04;
a02 ^= ~bc3 & bc4;
a03 ^= ~bc4 & bc0;
const u32x r0 = l32_from_64 (a03);
const u32x r1 = h32_from_64 (a03);
const u32x r2 = l32_from_64 (a02);
const u32x r3 = h32_from_64 (a02);
COMPARE_M_SIMD (r0, r1, r2, r3);
}
}
DECLSPEC void m17600s (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, __global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset)
{
/**
* modifier
*/
const u64 gid = get_global_id (0);
const u64 lid = get_local_id (0);
/**
* digest
*/
const u32 search[4] =
{
digests_buf[digests_offset].digest_buf[DGST_R0],
digests_buf[digests_offset].digest_buf[DGST_R1],
digests_buf[digests_offset].digest_buf[DGST_R2],
digests_buf[digests_offset].digest_buf[DGST_R3]
};
/**
* loop
*/
u32 w0l = w0[0];
for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE)
{
const u32x w0r = ix_create_bft (bfs_buf, il_pos);
const u32x w0lr = w0l | w0r;
/**
* Keccak
*/
u64x a00 = hl32_to_64 (w0[1], w0lr);
u64x a01 = hl32_to_64 (w0[3], w0[2]);
u64x a02 = hl32_to_64 (w1[1], w1[0]);
u64x a03 = hl32_to_64 (w1[3], w1[2]);
u64x a04 = hl32_to_64 (w2[1], w2[0]);
u64x a10 = hl32_to_64 (w2[3], w2[2]);
u64x a11 = hl32_to_64 (w3[1], w3[0]);
u64x a12 = hl32_to_64 (w3[3], w3[2]);
u64x a13 = 0x8000000000000000;
u64x a14 = 0;
u64x a20 = 0;
u64x a21 = 0;
u64x a22 = 0;
u64x a23 = 0;
u64x a24 = 0;
u64x a30 = 0;
u64x a31 = 0;
u64x a32 = 0;
u64x a33 = 0;
u64x a34 = 0;
u64x a40 = 0;
u64x a41 = 0;
u64x a42 = 0;
u64x a43 = 0;
u64x a44 = 0;
#define Rho_Pi(ad,r) \
bc0 = ad; \
ad = rotl64 (t, r); \
t = bc0; \
#ifdef _unroll
#pragma unroll
#endif
for (int round = 0; round < KECCAK_ROUNDS - 1; round++)
{
// Theta
u64x bc0 = a00 ^ a10 ^ a20 ^ a30 ^ a40;
u64x bc1 = a01 ^ a11 ^ a21 ^ a31 ^ a41;
u64x bc2 = a02 ^ a12 ^ a22 ^ a32 ^ a42;
u64x bc3 = a03 ^ a13 ^ a23 ^ a33 ^ a43;
u64x bc4 = a04 ^ a14 ^ a24 ^ a34 ^ a44;
u64x t;
t = bc4 ^ rotl64 (bc1, 1); a00 ^= t; a10 ^= t; a20 ^= t; a30 ^= t; a40 ^= t;
t = bc0 ^ rotl64 (bc2, 1); a01 ^= t; a11 ^= t; a21 ^= t; a31 ^= t; a41 ^= t;
t = bc1 ^ rotl64 (bc3, 1); a02 ^= t; a12 ^= t; a22 ^= t; a32 ^= t; a42 ^= t;
t = bc2 ^ rotl64 (bc4, 1); a03 ^= t; a13 ^= t; a23 ^= t; a33 ^= t; a43 ^= t;
t = bc3 ^ rotl64 (bc0, 1); a04 ^= t; a14 ^= t; a24 ^= t; a34 ^= t; a44 ^= t;
// Rho Pi
t = a01;
Rho_Pi (a20, 1);
Rho_Pi (a12, 3);
Rho_Pi (a21, 6);
Rho_Pi (a32, 10);
Rho_Pi (a33, 15);
Rho_Pi (a03, 21);
Rho_Pi (a10, 28);
Rho_Pi (a31, 36);
Rho_Pi (a13, 45);
Rho_Pi (a41, 55);
Rho_Pi (a44, 2);
Rho_Pi (a04, 14);
Rho_Pi (a30, 27);
Rho_Pi (a43, 41);
Rho_Pi (a34, 56);
Rho_Pi (a23, 8);
Rho_Pi (a22, 25);
Rho_Pi (a02, 43);
Rho_Pi (a40, 62);
Rho_Pi (a24, 18);
Rho_Pi (a42, 39);
Rho_Pi (a14, 61);
Rho_Pi (a11, 20);
Rho_Pi (a01, 44);
// Chi
bc0 = a00; bc1 = a01; bc2 = a02; bc3 = a03; bc4 = a04;
a00 ^= ~bc1 & bc2; a01 ^= ~bc2 & bc3; a02 ^= ~bc3 & bc4; a03 ^= ~bc4 & bc0; a04 ^= ~bc0 & bc1;
bc0 = a10; bc1 = a11; bc2 = a12; bc3 = a13; bc4 = a14;
a10 ^= ~bc1 & bc2; a11 ^= ~bc2 & bc3; a12 ^= ~bc3 & bc4; a13 ^= ~bc4 & bc0; a14 ^= ~bc0 & bc1;
bc0 = a20; bc1 = a21; bc2 = a22; bc3 = a23; bc4 = a24;
a20 ^= ~bc1 & bc2; a21 ^= ~bc2 & bc3; a22 ^= ~bc3 & bc4; a23 ^= ~bc4 & bc0; a24 ^= ~bc0 & bc1;
bc0 = a30; bc1 = a31; bc2 = a32; bc3 = a33; bc4 = a34;
a30 ^= ~bc1 & bc2; a31 ^= ~bc2 & bc3; a32 ^= ~bc3 & bc4; a33 ^= ~bc4 & bc0; a34 ^= ~bc0 & bc1;
bc0 = a40; bc1 = a41; bc2 = a42; bc3 = a43; bc4 = a44;
a40 ^= ~bc1 & bc2; a41 ^= ~bc2 & bc3; a42 ^= ~bc3 & bc4; a43 ^= ~bc4 & bc0; a44 ^= ~bc0 & bc1;
// Iota
a00 ^= keccakf_rndc[round];
}
// Theta
u64x bc0 = a00 ^ a10 ^ a20 ^ a30 ^ a40;
u64x bc1 = a01 ^ a11 ^ a21 ^ a31 ^ a41;
u64x bc2 = a02 ^ a12 ^ a22 ^ a32 ^ a42;
u64x bc3 = a03 ^ a13 ^ a23 ^ a33 ^ a43;
u64x bc4 = a04 ^ a14 ^ a24 ^ a34 ^ a44;
u64x t;
t = bc4 ^ rotl64 (bc1, 1); a00 ^= t; a10 ^= t; a20 ^= t; a30 ^= t;
t = bc0 ^ rotl64 (bc2, 1); a21 ^= t; a31 ^= t; a41 ^= t;
t = bc1 ^ rotl64 (bc3, 1); a02 ^= t; a12 ^= t; a22 ^= t; a32 ^= t;
t = bc2 ^ rotl64 (bc4, 1); a03 ^= t; a13 ^= t; a23 ^= t; a33 ^= t; a43 ^= t;
t = bc3 ^ rotl64 (bc0, 1); a04 ^= t; a34 ^= t; a44 ^= t;
// Rho Pi
t = a01;
Rho_Pi (a20, 1);
Rho_Pi (a12, 3);
Rho_Pi (a21, 6);
Rho_Pi (a32, 10);
Rho_Pi (a33, 15);
Rho_Pi (a03, 21);
Rho_Pi (a10, 28);
Rho_Pi (a31, 36);
Rho_Pi (a13, 45);
Rho_Pi (a41, 55);
Rho_Pi (a44, 2);
Rho_Pi (a04, 14);
Rho_Pi (a30, 27);
Rho_Pi (a43, 41);
Rho_Pi (a34, 56);
Rho_Pi (a23, 8);
Rho_Pi (a22, 25);
Rho_Pi (a02, 43);
#undef Rho_Pi
bc0 = a00;
bc2 = a02;
bc3 = a03;
bc4 = a04;
a02 ^= ~bc3 & bc4;
a03 ^= ~bc4 & bc0;
const u32x r0 = l32_from_64 (a03);
const u32x r1 = h32_from_64 (a03);
const u32x r2 = l32_from_64 (a02);
const u32x r3 = h32_from_64 (a02);
COMPARE_S_SIMD (r0, r1, r2, r3);
}
}
__kernel void m18000_m04 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
{
/**
* base
*/
const u64 gid = get_global_id (0);
if (gid >= gid_max) return;
u32 w0[4];
w0[0] = pws[gid].i[ 0];
w0[1] = pws[gid].i[ 1];
w0[2] = pws[gid].i[ 2];
w0[3] = pws[gid].i[ 3];
u32 w1[4];
w1[0] = 0;
w1[1] = 0;
w1[2] = 0;
w1[3] = 0;
u32 w2[4];
w2[0] = 0;
w2[1] = 0;
w2[2] = 0;
w2[3] = 0;
u32 w3[4];
w3[0] = 0;
w3[1] = 0;
w3[2] = 0;
w3[3] = 0;
const u32 pw_len = pws[gid].pw_len;
/**
* main
*/
m17600m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV0_buf, d_scryptV1_buf, d_scryptV2_buf, d_scryptV3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset);
}
__kernel void m18000_m08 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
{
/**
* base
*/
const u64 gid = get_global_id (0);
if (gid >= gid_max) return;
u32 w0[4];
w0[0] = pws[gid].i[ 0];
w0[1] = pws[gid].i[ 1];
w0[2] = pws[gid].i[ 2];
w0[3] = pws[gid].i[ 3];
u32 w1[4];
w1[0] = pws[gid].i[ 4];
w1[1] = pws[gid].i[ 5];
w1[2] = pws[gid].i[ 6];
w1[3] = pws[gid].i[ 7];
u32 w2[4];
w2[0] = 0;
w2[1] = 0;
w2[2] = 0;
w2[3] = 0;
u32 w3[4];
w3[0] = 0;
w3[1] = 0;
w3[2] = 0;
w3[3] = 0;
const u32 pw_len = pws[gid].pw_len;
/**
* main
*/
m17600m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV0_buf, d_scryptV1_buf, d_scryptV2_buf, d_scryptV3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset);
}
__kernel void m18000_m16 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
{
/**
* base
*/
const u64 gid = get_global_id (0);
if (gid >= gid_max) return;
u32 w0[4];
w0[0] = pws[gid].i[ 0];
w0[1] = pws[gid].i[ 1];
w0[2] = pws[gid].i[ 2];
w0[3] = pws[gid].i[ 3];
u32 w1[4];
w1[0] = pws[gid].i[ 4];
w1[1] = pws[gid].i[ 5];
w1[2] = pws[gid].i[ 6];
w1[3] = pws[gid].i[ 7];
u32 w2[4];
w2[0] = pws[gid].i[ 8];
w2[1] = pws[gid].i[ 9];
w2[2] = pws[gid].i[10];
w2[3] = pws[gid].i[11];
u32 w3[4];
w3[0] = pws[gid].i[12];
w3[1] = pws[gid].i[13];
w3[2] = 0;
w3[3] = 0;
const u32 pw_len = pws[gid].pw_len;
/**
* main
*/
m17600m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV0_buf, d_scryptV1_buf, d_scryptV2_buf, d_scryptV3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset);
}
__kernel void m18000_s04 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
{
/**
* base
*/
const u64 gid = get_global_id (0);
if (gid >= gid_max) return;
u32 w0[4];
w0[0] = pws[gid].i[ 0];
w0[1] = pws[gid].i[ 1];
w0[2] = pws[gid].i[ 2];
w0[3] = pws[gid].i[ 3];
u32 w1[4];
w1[0] = 0;
w1[1] = 0;
w1[2] = 0;
w1[3] = 0;
u32 w2[4];
w2[0] = 0;
w2[1] = 0;
w2[2] = 0;
w2[3] = 0;
u32 w3[4];
w3[0] = 0;
w3[1] = 0;
w3[2] = 0;
w3[3] = 0;
const u32 pw_len = pws[gid].pw_len;
/**
* main
*/
m17600s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV0_buf, d_scryptV1_buf, d_scryptV2_buf, d_scryptV3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset);
}
__kernel void m18000_s08 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
{
/**
* base
*/
const u64 gid = get_global_id (0);
if (gid >= gid_max) return;
u32 w0[4];
w0[0] = pws[gid].i[ 0];
w0[1] = pws[gid].i[ 1];
w0[2] = pws[gid].i[ 2];
w0[3] = pws[gid].i[ 3];
u32 w1[4];
w1[0] = pws[gid].i[ 4];
w1[1] = pws[gid].i[ 5];
w1[2] = pws[gid].i[ 6];
w1[3] = pws[gid].i[ 7];
u32 w2[4];
w2[0] = 0;
w2[1] = 0;
w2[2] = 0;
w2[3] = 0;
u32 w3[4];
w3[0] = 0;
w3[1] = 0;
w3[2] = 0;
w3[3] = 0;
const u32 pw_len = pws[gid].pw_len;
/**
* main
*/
m17600s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV0_buf, d_scryptV1_buf, d_scryptV2_buf, d_scryptV3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset);
}
__kernel void m18000_s16 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
{
/**
* base
*/
const u64 gid = get_global_id (0);
if (gid >= gid_max) return;
u32 w0[4];
w0[0] = pws[gid].i[ 0];
w0[1] = pws[gid].i[ 1];
w0[2] = pws[gid].i[ 2];
w0[3] = pws[gid].i[ 3];
u32 w1[4];
w1[0] = pws[gid].i[ 4];
w1[1] = pws[gid].i[ 5];
w1[2] = pws[gid].i[ 6];
w1[3] = pws[gid].i[ 7];
u32 w2[4];
w2[0] = pws[gid].i[ 8];
w2[1] = pws[gid].i[ 9];
w2[2] = pws[gid].i[10];
w2[3] = pws[gid].i[11];
u32 w3[4];
w3[0] = pws[gid].i[12];
w3[1] = pws[gid].i[13];
w3[2] = 0;
w3[3] = 0;
const u32 pw_len = pws[gid].pw_len;
/**
* main
*/
m17600s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV0_buf, d_scryptV1_buf, d_scryptV2_buf, d_scryptV3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset);
}

@ -6,6 +6,20 @@
- Add new option --slow-candidates which allows hashcat to generate passwords on-host
##
## Algorithms
##
- Added hash-mode 17300 = SHA3-224
- Added hash-mode 17400 = SHA3-256
- Added hash-mode 17500 = SHA3-384
- Added hash-mode 17600 = SHA3-512
- Added hash-mode 17700 = Keccak-224
- Added hash-mode 17800 = Keccak-256
- Added hash-mode 17900 = Keccak-384
- Added hash-mode 18000 = Keccak-512
- Removed hash-mode 5000 = SHA-3 (Keccak)
##
## Improvements
##
@ -33,7 +47,6 @@
- Fixed detection of unique ESSID in WPA-PMKID-* parser
- Fixed speed/delay problem when quitting while the outfile folder is being scanned
- Fixed a race condition in status view leading to out-of-bound reads
- Fixed the tokenizer configuration in postgres hash parser
* changes v4.2.0 -> v4.2.1

@ -41,11 +41,18 @@ NVIDIA GPUs require "NVIDIA Driver" (367.x or later)
- MD5
- Half MD5
- SHA1
- SHA-224
- SHA-256
- SHA-384
- SHA-512
- SHA-3 (Keccak)
- SHA2-224
- SHA2-256
- SHA2-384
- SHA2-512
- SHA3-224
- SHA3-256
- SHA3-384
- SHA3-512
- Keccak-224
- Keccak-256
- Keccak-384
- Keccak-512
- BLAKE2b-512
- SipHash
- RIPEMD-160

@ -176,7 +176,7 @@ _hashcat ()
{
local VERSION=4.2.1
local HASH_MODES="0 10 11 12 20 21 22 23 30 40 50 60 100 101 110 111 112 120 121 122 124 130 131 132 133 140 141 150 160 200 300 400 500 501 600 900 1000 1100 1400 1410 1411 1420 1421 1430 1440 1441 1450 1460 1500 1600 1700 1710 1711 1720 1722 1730 1731 1740 1750 1760 1800 2100 2400 2410 2500 2501 2600 2611 2612 2711 2811 3000 3100 3200 3710 3711 3800 3910 4010 4110 4300 4400 4500 4520 4521 4522 4700 4800 4900 5000 5100 5200 5300 5400 5500 5600 5700 5800 6000 6100 6211 6212 6213 6221 6222 6223 6231 6232 6233 6241 6242 6243 6300 6400 6500 6600 6700 6800 6900 7000 7100 7200 7300 7400 7500 7700 7800 7900 8000 8100 8200 8300 8400 8500 8600 8700 8800 8900 9000 9100 9200 9300 9400 9500 9600 9700 9710 9720 9800 9810 9820 9900 10000 10100 10200 10300 10400 10410 10420 10500 10600 10700 10800 10900 11000 11100 11200 11300 11400 11500 11600 11700 11800 11900 12000 12001 12100 12200 12300 12400 12500 12600 12700 12800 12900 13000 13100 13200 13300 13400 13500 13600 13800 13900 14000 14100 14700 14800 14900 15000 15100 15200 15300 15400 15500 15600 15700 15900 16000 16100 16200 16300 16400 16500 16600 16700 16800 16801 16900"
local HASH_MODES="0 10 11 12 20 21 22 23 30 40 50 60 100 101 110 111 112 120 121 122 124 130 131 132 133 140 141 150 160 200 300 400 500 501 600 900 1000 1100 1400 1410 1411 1420 1421 1430 1440 1441 1450 1460 1500 1600 1700 1710 1711 1720 1722 1730 1731 1740 1750 1760 1800 2100 2400 2410 2500 2501 2600 2611 2612 2711 2811 3000 3100 3200 3710 3711 3800 3910 4010 4110 4300 4400 4500 4520 4521 4522 4700 4800 4900 5100 5200 5300 5400 5500 5600 5700 5800 6000 6100 6211 6212 6213 6221 6222 6223 6231 6232 6233 6241 6242 6243 6300 6400 6500 6600 6700 6800 6900 7000 7100 7200 7300 7400 7500 7700 7800 7900 8000 8100 8200 8300 8400 8500 8600 8700 8800 8900 9000 9100 9200 9300 9400 9500 9600 9700 9710 9720 9800 9810 9820 9900 10000 10100 10200 10300 10400 10410 10420 10500 10600 10700 10800 10900 11000 11100 11200 11300 11400 11500 11600 11700 11800 11900 12000 12001 12100 12200 12300 12400 12500 12600 12700 12800 12900 13000 13100 13200 13300 13400 13500 13600 13800 13900 14000 14100 14700 14800 14900 15000 15100 15200 15300 15400 15500 15600 15700 15900 16000 16100 16200 16300 16400 16500 16600 16700 16800 16801 16900 17300 17400 17500 17600 17700 17800 17900 18000"
local ATTACK_MODES="0 1 3 6 7"
local HCCAPX_MESSAGE_PAIRS="0 1 2 3 4 5"
local OUTFILE_FORMATS="1 2 3 4 5 6 7 8 9 10 11 12 13 14 15"

@ -1060,7 +1060,6 @@ typedef enum hash_type
HASH_TYPE_ORACLEH = 13,
HASH_TYPE_DESRACF = 14,
HASH_TYPE_BCRYPT = 15,
HASH_TYPE_KECCAK = 16,
HASH_TYPE_NETNTLM = 17,
HASH_TYPE_RIPEMD160 = 18,
HASH_TYPE_WHIRLPOOL = 19,
@ -1186,7 +1185,6 @@ typedef enum kern_type
KERN_TYPE_SHA1_MD5 = 4700,
KERN_TYPE_MD5_CHAP = 4800,
KERN_TYPE_SHA1_SLT_PW_SLT = 4900,
KERN_TYPE_KECCAK = 5000,
KERN_TYPE_MD5H = 5100,
KERN_TYPE_PSAFE3 = 5200,
KERN_TYPE_IKEPSK_MD5 = 5300,
@ -1325,6 +1323,14 @@ typedef enum kern_type
KERN_TYPE_WPA_PMKID_PBKDF2 = 16800,
KERN_TYPE_WPA_PMKID_PMK = 16801,
KERN_TYPE_ANSIBLE_VAULT = 16900,
KERN_TYPE_SHA3_224 = 17300,
KERN_TYPE_SHA3_256 = 17400,
KERN_TYPE_SHA3_384 = 17500,
KERN_TYPE_SHA3_512 = 17600,
KERN_TYPE_KECCAK_224 = 17700,
KERN_TYPE_KECCAK_256 = 17800,
KERN_TYPE_KECCAK_384 = 17900,
KERN_TYPE_KECCAK_512 = 18000,
KERN_TYPE_PLAINTEXT = 99999,
} kern_type_t;
@ -1418,7 +1424,10 @@ int des_parse_hash (u8 *input_buf, u32 input_len, hash_t *hash_bu
int episerver_parse_hash (u8 *input_buf, u32 input_len, hash_t *hash_buf, MAYBE_UNUSED hashconfig_t *hashconfig);
int postgresql_parse_hash (u8 *input_buf, u32 input_len, hash_t *hash_buf, MAYBE_UNUSED hashconfig_t *hashconfig);
int netscreen_parse_hash (u8 *input_buf, u32 input_len, hash_t *hash_buf, MAYBE_UNUSED hashconfig_t *hashconfig);
int keccak_parse_hash (u8 *input_buf, u32 input_len, hash_t *hash_buf, MAYBE_UNUSED hashconfig_t *hashconfig);
int keccak_224_parse_hash (u8 *input_buf, u32 input_len, hash_t *hash_buf, MAYBE_UNUSED hashconfig_t *hashconfig);
int keccak_256_parse_hash (u8 *input_buf, u32 input_len, hash_t *hash_buf, MAYBE_UNUSED hashconfig_t *hashconfig);
int keccak_384_parse_hash (u8 *input_buf, u32 input_len, hash_t *hash_buf, MAYBE_UNUSED hashconfig_t *hashconfig);
int keccak_512_parse_hash (u8 *input_buf, u32 input_len, hash_t *hash_buf, MAYBE_UNUSED hashconfig_t *hashconfig);
int blake2b_parse_hash (u8 *input_buf, u32 input_len, hash_t *hash_buf, MAYBE_UNUSED hashconfig_t *hashconfig);
int chacha20_parse_hash (u8 *input_buf, u32 input_len, hash_t *hash_buf, MAYBE_UNUSED hashconfig_t *hashconfig);
int lm_parse_hash (u8 *input_buf, u32 input_len, hash_t *hash_buf, MAYBE_UNUSED hashconfig_t *hashconfig);

@ -403,6 +403,7 @@ typedef enum opts_type
OPTS_TYPE_AUX4 = (1ULL << 37),
OPTS_TYPE_BINARY_HASHFILE = (1ULL << 38),
OPTS_TYPE_PREFERED_THREAD = (1ULL << 39), // some algorithms (complicated ones with many branches) benefit from this
OPTS_TYPE_PT_ADD06 = (1ULL << 40),
} opts_type_t;
@ -724,8 +725,6 @@ typedef struct salt
u32 salt_iter2;
u32 salt_sign[2];
u32 keccak_mdlen;
u32 digests_cnt;
u32 digests_done;

@ -137,7 +137,6 @@ static const char *ST_HASH_04522 = "9038129c474caa3f0de56f38db84033d0fe1d4b8:365
static const char *ST_HASH_04700 = "92d85978d884eb1d99a51652b1139c8279fa8663";
static const char *ST_HASH_04800 = "aa4aaa1d52319525023c06a4873f4c51:35343534373533343633383832343736:dc";
static const char *ST_HASH_04900 = "75d280ca9a0c2ee18729603104ead576d9ca6285:347070";
static const char *ST_HASH_05000 = "203f88777f18bb4ee1226627b547808f38d90d3e106262b5de9ca943b57137b6";
static const char *ST_HASH_05100 = "8743b52063cd8409";
static const char *ST_HASH_05200 = "50575333e4e2a590a5e5c8269f57ec04a8a1c0c03da55b311c51236dab8c6b96b0afca02000800005eaeee20c6cc10d5caa6522b3ca545c41d9133d630ca08f467b7aae8a2bbef51aa2df968d10b9c4cfb17a182c0add7acb8c153794f51337e12f472f451d10e6dcac664ed760606aabdbb6b794a80d6ce2a330100c76de0ff961a45cca21576b893d826c52f272b97cdf48aca6fbe6c74b039f81c61b7d632fb6fddd9f96162ab1effd69a4598a331e855e38792e5365272d4791bf991d248e1585a9ad20ea3d77b5d2ef9a711ef90a70ec6991cb578f1b8bdaa9efa7b0039e9ea96f777491713047bdd99fa1d78f06f23406a66046b387d3034e46b1f84129bba853cc18fa49f107dc0290547258d30566a4b1b363ff4c1c16cb2f5f400059833d4b651bfa508200cbdc7a75fc57ef90eb1d90b0deea8505753332d454f46505753332d454f466236710e2e2477878e738b60d0aa2834a96b01e97764fe980243a06ad16939d1";
static const char *ST_HASH_05300 = "50503326cac6e4bd892b8257805b5a59a285f464ad3f63dc01bd0335f8341ef52e00be0b8cb205422a3788f021e4e6e8ccbe34784bc85abe42f62545bac64888426a2f1264fa28cf384ff00b14cfa5eff562dda4fad2a31fd7a6715218cff959916deed856feea5bee2e773241c5fbebf202958f0ce0c432955e0f1f6d1259da:688a7bfa8d5819630a970ed6d27018021a15fbb3e2fdcc36ce9b563d8ff95f510c4b3236c014d1cde9c2f1a999b121bc3ab1bc8049c8ac1e8c167a84f53c867492723eb01ab4b38074b38f4297d6fea8f44e01ea828fce33c433430938b1551f60673ce8088e7d2f41e3b49315344046fefee1e3860064331417562761db3ba4:c66606d691eaade4:8bdc88a2cdb4a1cf:c3b13137fae9f66684d98709939e5c3454ee31a98c80a1c76427d805b5dea866eff045515e8fb42dd259b9448caba9d937f4b3b75ec1b092a92232b4c8c1e70a60a52076e907f887b731d0f66e19e09b535238169c74c04a4b393f9b815c54eef4558cd8a22c9018bb4f24ee6db0e32979f9a353361cdba948f9027551ee40b1c96ba81c28aa3e1a0fac105dc469efa83f6d3ee281b945c6fa8b4677bac26dda:53f757c5b08afad6:aa02d9289e1702e5d7ed1e4ebf35ab31c2688e00:aab8580015cf545ac0b7291d15a4f2c79e06defd:944a0df3939f3bd281c9d05fbc0e3d30";
@ -281,6 +280,14 @@ static const char *ST_HASH_16700 = "$fvde$1$16$84286044060108438487434858307513$
static const char *ST_HASH_16800 = "2582a8281bf9d4308d6f5731d0e61c61*4604ba734d4e*89acf0e761f4*ed487162465a774bfba60eb603a39f3a";
static const char *ST_HASH_16801 = "2582a8281bf9d4308d6f5731d0e61c61*4604ba734d4e*89acf0e761f4";
static const char *ST_HASH_16900 = "$ansible$0*0*6b761adc6faeb0cc0bf197d3d4a4a7d3f1682e4b169cae8fa6b459b3214ed41e*426d313c5809d4a80a4b9bc7d4823070*d8bad190c7fbc7c3cb1c60a27abfb0ff59d6fb73178681c7454d94a0f56a4360";
static const char *ST_HASH_17300 = "412ef78534ba6ab0e9b1607d3e9767a25c1ea9d5e83176b4c2817a6c";
static const char *ST_HASH_17400 = "d60fcf6585da4e17224f58858970f0ed5ab042c3916b76b0b828e62eaf636cbd";
static const char *ST_HASH_17500 = "983ba28532cc6320d04f20fa485bcedb38bddb666eca5f1e5aa279ff1c6244fe5f83cf4bbf05b95ff378dd2353617221";
static const char *ST_HASH_17600 = "7c2dc1d743735d4e069f3bda85b1b7e9172033dfdd8cd599ca094ef8570f3930c3f2c0b7afc8d6152ce4eaad6057a2ff22e71934b3a3dd0fb55a7fc84a53144e";
static const char *ST_HASH_17700 = "e1dfad9bafeae6ef15f5bbb16cf4c26f09f5f1e7870581962fc84636";
static const char *ST_HASH_17800 = "203f88777f18bb4ee1226627b547808f38d90d3e106262b5de9ca943b57137b6";
static const char *ST_HASH_17900 = "5804b7ada5806ba79540100e9a7ef493654ff2a21d94d4f2ce4bf69abda5d94bf03701fe9525a15dfdc625bfbd769701";
static const char *ST_HASH_18000 = "2fbf5c9080f0a704de2e915ba8fdae6ab00bbc026b2c1c8fa07da1239381c6b7f4dfd399bf9652500da723694a4c719587dd0219cb30eabe61210a8ae4dc0b03";
static const char *ST_HASH_99999 = "hashcat";
static const char *OPTI_STR_OPTIMIZED_KERNEL = "Optimized-Kernel";
@ -368,8 +375,8 @@ static const char *HT_00600 = "BLAKE2b";
static const char *HT_00900 = "MD4";
static const char *HT_01000 = "NTLM";
static const char *HT_01100 = "Domain Cached Credentials (DCC), MS Cache";
static const char *HT_01300 = "SHA-224";
static const char *HT_01400 = "SHA-256";
static const char *HT_01300 = "SHA2-224";
static const char *HT_01400 = "SHA2-256";
static const char *HT_01410 = "sha256($pass.$salt)";
static const char *HT_01420 = "sha256($salt.$pass)";
static const char *HT_01430 = "sha256(utf16le($pass).$salt)";
@ -378,7 +385,7 @@ static const char *HT_01450 = "HMAC-SHA256 (key = $pass)";
static const char *HT_01460 = "HMAC-SHA256 (key = $salt)";
static const char *HT_01500 = "descrypt, DES (Unix), Traditional DES";
static const char *HT_01600 = "Apache $apr1$ MD5, md5apr1, MD5 (APR)";
static const char *HT_01700 = "SHA-512";
static const char *HT_01700 = "SHA2-512";
static const char *HT_01710 = "sha512($pass.$salt)";
static const char *HT_01720 = "sha512($salt.$pass)";
static const char *HT_01730 = "sha512(utf16le($pass).$salt)";
@ -408,7 +415,6 @@ static const char *HT_04520 = "sha1($salt.sha1($pass))";
static const char *HT_04700 = "sha1(md5($pass))";
static const char *HT_04800 = "iSCSI CHAP authentication, MD5(CHAP)";
static const char *HT_04900 = "sha1($salt.$pass.$salt)";
static const char *HT_05000 = "SHA-3 (Keccak)";
static const char *HT_05100 = "Half MD5";
static const char *HT_05200 = "Password Safe v3";
static const char *HT_05300 = "IKE-PSK MD5";
@ -471,7 +477,7 @@ static const char *HT_10420 = "PDF 1.1 - 1.3 (Acrobat 2 - 4), collider #2";
static const char *HT_10500 = "PDF 1.4 - 1.6 (Acrobat 5 - 8)";
static const char *HT_10600 = "PDF 1.7 Level 3 (Acrobat 9)";
static const char *HT_10700 = "PDF 1.7 Level 8 (Acrobat 10 - 11)";
static const char *HT_10800 = "SHA-384";
static const char *HT_10800 = "SHA2-384";
static const char *HT_10900 = "PBKDF2-HMAC-SHA256";
static const char *HT_11000 = "PrestaShop";
static const char *HT_11100 = "PostgreSQL CRAM (MD5)";
@ -529,6 +535,14 @@ static const char *HT_16700 = "FileVault 2";
static const char *HT_16800 = "WPA-PMKID-PBKDF2";
static const char *HT_16801 = "WPA-PMKID-PMK";
static const char *HT_16900 = "Ansible Vault";
static const char *HT_17300 = "SHA3-224";
static const char *HT_17400 = "SHA3-256";
static const char *HT_17500 = "SHA3-384";
static const char *HT_17600 = "SHA3-512";
static const char *HT_17700 = "Keccak-224";
static const char *HT_17800 = "Keccak-256";
static const char *HT_17900 = "Keccak-384";
static const char *HT_18000 = "Keccak-512";
static const char *HT_99999 = "Plaintext";
static const char *HT_00011 = "Joomla < 2.5.18";
@ -4870,7 +4884,7 @@ int postgresql_parse_hash (u8 *input_buf, u32 input_len, hash_t *hash_buf, MAYBE
token.attr[0] = TOKEN_ATTR_VERIFY_LENGTH
| TOKEN_ATTR_VERIFY_HEX;
token.len_min[1] = 0;
token.len_min[1] = 32;
token.len_max[1] = 32;
token.attr[1] = TOKEN_ATTR_VERIFY_LENGTH;
@ -6152,18 +6166,79 @@ int sha512crypt_parse_hash (u8 *input_buf, u32 input_len, hash_t *hash_buf, MAYB
return (PARSER_OK);
}
int keccak_parse_hash (u8 *input_buf, u32 input_len, hash_t *hash_buf, MAYBE_UNUSED hashconfig_t *hashconfig)
int keccak_224_parse_hash (u8 *input_buf, u32 input_len, hash_t *hash_buf, MAYBE_UNUSED hashconfig_t *hashconfig)
{
u32 *digest = (u32 *) hash_buf->digest;
token_t token;
token.token_cnt = 1;
token.len_min[0] = 56;
token.len_max[0] = 56;
token.attr[0] = TOKEN_ATTR_FIXED_LENGTH
| TOKEN_ATTR_VERIFY_HEX;
const int rc_tokenizer = input_tokenizer (input_buf, input_len, &token);
if (rc_tokenizer != PARSER_OK) return (rc_tokenizer);
u8 *hash_pos = token.buf[0];
int hash_len = token.len[0];
if (hash_len != 56) return (PARSER_GLOBAL_LENGTH);
digest[0] = hex_to_u32 (hash_pos + 0);
digest[1] = hex_to_u32 (hash_pos + 8);
digest[2] = hex_to_u32 (hash_pos + 16);
digest[3] = hex_to_u32 (hash_pos + 24);
digest[4] = hex_to_u32 (hash_pos + 32);
digest[5] = hex_to_u32 (hash_pos + 40);
digest[6] = hex_to_u32 (hash_pos + 48);
return (PARSER_OK);
}
int keccak_256_parse_hash (u8 *input_buf, u32 input_len, hash_t *hash_buf, MAYBE_UNUSED hashconfig_t *hashconfig)
{
u64 *digest = (u64 *) hash_buf->digest;
salt_t *salt = hash_buf->salt;
token_t token;
token.token_cnt = 1;
token.len_min[0] = 64;
token.len_max[0] = 64;
token.attr[0] = TOKEN_ATTR_FIXED_LENGTH
| TOKEN_ATTR_VERIFY_HEX;
const int rc_tokenizer = input_tokenizer (input_buf, input_len, &token);
if (rc_tokenizer != PARSER_OK) return (rc_tokenizer);
u8 *hash_pos = token.buf[0];
int hash_len = token.len[0];
if (hash_len != 64) return (PARSER_GLOBAL_LENGTH);
digest[0] = hex_to_u64 (hash_pos + 0);
digest[1] = hex_to_u64 (hash_pos + 16);
digest[2] = hex_to_u64 (hash_pos + 32);
digest[3] = hex_to_u64 (hash_pos + 48);
return (PARSER_OK);
}
int keccak_384_parse_hash (u8 *input_buf, u32 input_len, hash_t *hash_buf, MAYBE_UNUSED hashconfig_t *hashconfig)
{
u64 *digest = (u64 *) hash_buf->digest;
token_t token;
token.token_cnt = 1;
token.len_min[0] = 16;
token.len_max[0] = 400;
token.len_min[0] = 96;
token.len_max[0] = 96;
token.attr[0] = TOKEN_ATTR_FIXED_LENGTH
| TOKEN_ATTR_VERIFY_HEX;
@ -6174,16 +6249,48 @@ int keccak_parse_hash (u8 *input_buf, u32 input_len, hash_t *hash_buf, MAYBE_UNU
u8 *hash_pos = token.buf[0];
int hash_len = token.len[0];
if (hash_len % 16) return (PARSER_GLOBAL_LENGTH);
if (hash_len != 96) return (PARSER_GLOBAL_LENGTH);
u32 keccak_mdlen = hash_len / 2;
digest[0] = hex_to_u64 (hash_pos + 0);
digest[1] = hex_to_u64 (hash_pos + 16);
digest[2] = hex_to_u64 (hash_pos + 32);
digest[3] = hex_to_u64 (hash_pos + 48);
digest[4] = hex_to_u64 (hash_pos + 64);
digest[5] = hex_to_u64 (hash_pos + 80);
for (u32 i = 0, j = 0; i < keccak_mdlen / 8; i += 1, j += 16)
{
digest[i] = hex_to_u64 (hash_pos + j);
}
return (PARSER_OK);
}
salt->keccak_mdlen = keccak_mdlen;
int keccak_512_parse_hash (u8 *input_buf, u32 input_len, hash_t *hash_buf, MAYBE_UNUSED hashconfig_t *hashconfig)
{
u64 *digest = (u64 *) hash_buf->digest;
token_t token;
token.token_cnt = 1;
token.len_min[0] = 128;
token.len_max[0] = 128;
token.attr[0] = TOKEN_ATTR_FIXED_LENGTH
| TOKEN_ATTR_VERIFY_HEX;
const int rc_tokenizer = input_tokenizer (input_buf, input_len, &token);
if (rc_tokenizer != PARSER_OK) return (rc_tokenizer);
u8 *hash_pos = token.buf[0];
int hash_len = token.len[0];
if (hash_len != 128) return (PARSER_GLOBAL_LENGTH);
digest[0] = hex_to_u64 (hash_pos + 0);
digest[1] = hex_to_u64 (hash_pos + 16);
digest[2] = hex_to_u64 (hash_pos + 32);
digest[3] = hex_to_u64 (hash_pos + 48);
digest[4] = hex_to_u64 (hash_pos + 64);
digest[5] = hex_to_u64 (hash_pos + 80);
digest[6] = hex_to_u64 (hash_pos + 96);
digest[7] = hex_to_u64 (hash_pos + 112);
return (PARSER_OK);
}
@ -17735,20 +17842,6 @@ int wpa_pmkid_pmk_parse_hash (u8 *input_buf, u32 input_len, hash_t *hash_buf, MA
const int rc_tokenizer2 = input_tokenizer (input_buf, input_len, &token);
if (rc_tokenizer2 != PARSER_OK) return (rc_tokenizer);
// essid
u8 *essid_buf = token.buf[3];
int essid_len = token.len[3];
u8 *essid_ptr = (u8 *) wpa_pmkid->essid_buf;
for (int i = 0, j = 0; i < essid_len; i += 2, j += 1)
{
essid_ptr[j] = hex_to_u8 (essid_buf + i);
}
wpa_pmkid->essid_len = essid_len / 2;
}
// pmkid
@ -18249,7 +18342,6 @@ const char *strhashtype (const u32 hash_mode)
case 4700: return HT_04700;
case 4800: return HT_04800;
case 4900: return HT_04900;
case 5000: return HT_05000;
case 5100: return HT_05100;
case 5200: return HT_05200;
case 5300: return HT_05300;
@ -18401,6 +18493,14 @@ const char *strhashtype (const u32 hash_mode)
case 16800: return HT_16800;
case 16801: return HT_16801;
case 16900: return HT_16900;
case 17300: return HT_17300;
case 17400: return HT_17400;
case 17500: return HT_17500;
case 17600: return HT_17600;
case 17700: return HT_17700;
case 17800: return HT_17800;
case 17900: return HT_17900;
case 18000: return HT_18000;
case 99999: return HT_99999;
}
@ -22061,53 +22161,23 @@ int ascii_digest (hashcat_ctx_t *hashcat_ctx, char *out_buf, const size_t out_le
wpa_pmkid_t *wpa_pmkid = &wpa_pmkids[digest_cur];
if (wpa_pmkid->essid_len)
{
exec_hexify ((const u8*) wpa_pmkid->essid_buf, wpa_pmkid->essid_len, (u8 *) tmp_buf);
int tmp_len = wpa_pmkid->essid_len * 2;
tmp_buf[tmp_len] = 0;
snprintf (out_buf, out_len - 1, "%08x%08x%08x%08x*%02x%02x%02x%02x%02x%02x*%02x%02x%02x%02x%02x%02x:%s",
wpa_pmkid->pmkid[0],
wpa_pmkid->pmkid[1],
wpa_pmkid->pmkid[2],
wpa_pmkid->pmkid[3],
wpa_pmkid->orig_mac_ap[0],
wpa_pmkid->orig_mac_ap[1],
wpa_pmkid->orig_mac_ap[2],
wpa_pmkid->orig_mac_ap[3],
wpa_pmkid->orig_mac_ap[4],
wpa_pmkid->orig_mac_ap[5],
wpa_pmkid->orig_mac_sta[0],
wpa_pmkid->orig_mac_sta[1],
wpa_pmkid->orig_mac_sta[2],
wpa_pmkid->orig_mac_sta[3],
wpa_pmkid->orig_mac_sta[4],
wpa_pmkid->orig_mac_sta[5],
tmp_buf);
}
else
{
snprintf (out_buf, out_len - 1, "%08x%08x%08x%08x*%02x%02x%02x%02x%02x%02x*%02x%02x%02x%02x%02x%02x",
wpa_pmkid->pmkid[0],
wpa_pmkid->pmkid[1],
wpa_pmkid->pmkid[2],
wpa_pmkid->pmkid[3],
wpa_pmkid->orig_mac_ap[0],
wpa_pmkid->orig_mac_ap[1],
wpa_pmkid->orig_mac_ap[2],
wpa_pmkid->orig_mac_ap[3],
wpa_pmkid->orig_mac_ap[4],
wpa_pmkid->orig_mac_ap[5],
wpa_pmkid->orig_mac_sta[0],
wpa_pmkid->orig_mac_sta[1],
wpa_pmkid->orig_mac_sta[2],
wpa_pmkid->orig_mac_sta[3],
wpa_pmkid->orig_mac_sta[4],
wpa_pmkid->orig_mac_sta[5]);
}
snprintf (out_buf, out_len - 1, "%08x%08x%08x%08x*%02x%02x%02x%02x%02x%02x*%02x%02x%02x%02x%02x%02x",
wpa_pmkid->pmkid[0],
wpa_pmkid->pmkid[1],
wpa_pmkid->pmkid[2],
wpa_pmkid->pmkid[3],
wpa_pmkid->orig_mac_ap[0],
wpa_pmkid->orig_mac_ap[1],
wpa_pmkid->orig_mac_ap[2],
wpa_pmkid->orig_mac_ap[3],
wpa_pmkid->orig_mac_ap[4],
wpa_pmkid->orig_mac_ap[5],
wpa_pmkid->orig_mac_sta[0],
wpa_pmkid->orig_mac_sta[1],
wpa_pmkid->orig_mac_sta[2],
wpa_pmkid->orig_mac_sta[3],
wpa_pmkid->orig_mac_sta[4],
wpa_pmkid->orig_mac_sta[5]);
}
else if (hash_mode == 16900)
{
@ -22146,6 +22216,56 @@ int ascii_digest (hashcat_ctx_t *hashcat_ctx, char *out_buf, const size_t out_le
byte_swap_32 (digest_buf[6]),
byte_swap_32 (digest_buf[7]));
}
else if (hash_mode == 17300 || hash_mode == 17700)
{
u32 *ptr = digest_buf;
snprintf (out_buf, out_len - 1, "%08x%08x%08x%08x%08x%08x%08x",
ptr[1], ptr[0],
ptr[3], ptr[2],
ptr[5], ptr[4],
ptr[7]
);
}
else if (hash_mode == 17400 || hash_mode == 17800)
{
u32 *ptr = digest_buf;
snprintf (out_buf, out_len - 1, "%08x%08x%08x%08x%08x%08x%08x%08x",
ptr[1], ptr[0],
ptr[3], ptr[2],
ptr[5], ptr[4],
ptr[7], ptr[6]
);
}
else if (hash_mode == 17500 || hash_mode == 17900)
{
u32 *ptr = digest_buf;
snprintf (out_buf, out_len - 1, "%08x%08x%08x%08x%08x%08x%08x%08x%08x%08x%08x%08x",
ptr[ 1], ptr[ 0],
ptr[ 3], ptr[ 2],
ptr[ 5], ptr[ 4],
ptr[ 7], ptr[ 6],
ptr[ 9], ptr[ 8],
ptr[11], ptr[10]
);
}
else if (hash_mode == 17600 || hash_mode == 18000)
{
u32 *ptr = digest_buf;
snprintf (out_buf, out_len - 1, "%08x%08x%08x%08x%08x%08x%08x%08x%08x%08x%08x%08x%08x%08x%08x%08x",
ptr[ 1], ptr[ 0],
ptr[ 3], ptr[ 2],
ptr[ 5], ptr[ 4],
ptr[ 7], ptr[ 6],
ptr[ 9], ptr[ 8],
ptr[11], ptr[10],
ptr[13], ptr[12],
ptr[15], ptr[14]
);
}
else if (hash_mode == 99999)
{
char *ptr = (char *) digest_buf;
@ -22249,40 +22369,6 @@ int ascii_digest (hashcat_ctx_t *hashcat_ctx, char *out_buf, const size_t out_le
snprintf (out_buf, out_len - 1, "%s$%s", (char *) salt.salt_sign, tmp_buf);
}
else if (hash_type == HASH_TYPE_KECCAK)
{
u32 *ptr = digest_buf;
snprintf (out_buf, out_len - 1, "%08x%08x%08x%08x%08x%08x%08x%08x%08x%08x%08x%08x%08x%08x%08x%08x%08x%08x%08x%08x%08x%08x%08x%08x%08x%08x%08x%08x%08x%08x%08x%08x%08x%08x%08x%08x%08x%08x%08x%08x%08x%08x%08x%08x%08x%08x%08x%08x%08x%08x",
ptr[ 1], ptr[ 0],
ptr[ 3], ptr[ 2],
ptr[ 5], ptr[ 4],
ptr[ 7], ptr[ 6],
ptr[ 9], ptr[ 8],
ptr[11], ptr[10],
ptr[13], ptr[12],
ptr[15], ptr[14],
ptr[17], ptr[16],
ptr[19], ptr[18],
ptr[21], ptr[20],
ptr[23], ptr[22],
ptr[25], ptr[24],
ptr[27], ptr[26],
ptr[29], ptr[28],
ptr[31], ptr[30],
ptr[33], ptr[32],
ptr[35], ptr[34],
ptr[37], ptr[36],
ptr[39], ptr[38],
ptr[41], ptr[30],
ptr[43], ptr[42],
ptr[45], ptr[44],
ptr[47], ptr[46],
ptr[49], ptr[48]
);
out_buf[salt.keccak_mdlen * 2] = 0;
}
else if (hash_type == HASH_TYPE_BLAKE2B)
{
u32 *ptr = digest_buf;
@ -24547,25 +24633,6 @@ int hashconfig_init (hashcat_ctx_t *hashcat_ctx)
hashconfig->st_pass = ST_PASS_HASHCAT_PLAIN;
break;
case 5000: hashconfig->hash_type = HASH_TYPE_KECCAK;
hashconfig->salt_type = SALT_TYPE_EMBEDDED;
hashconfig->attack_exec = ATTACK_EXEC_INSIDE_KERNEL;
hashconfig->opts_type = OPTS_TYPE_PT_GENERATE_LE
| OPTS_TYPE_PT_ADD01;
hashconfig->kern_type = KERN_TYPE_KECCAK;
hashconfig->dgst_size = DGST_SIZE_8_25;
hashconfig->parse_func = keccak_parse_hash;
hashconfig->opti_type = OPTI_TYPE_ZERO_BYTE
| OPTI_TYPE_USES_BITS_64
| OPTI_TYPE_RAW_HASH;
hashconfig->dgst_pos0 = 6;
hashconfig->dgst_pos1 = 7;
hashconfig->dgst_pos2 = 4;
hashconfig->dgst_pos3 = 5;
hashconfig->st_hash = ST_HASH_05000;
hashconfig->st_pass = ST_PASS_HASHCAT_PLAIN;
break;
case 5100: hashconfig->hash_type = HASH_TYPE_MD5H;
hashconfig->salt_type = SALT_TYPE_NONE;
hashconfig->attack_exec = ATTACK_EXEC_INSIDE_KERNEL;
@ -27343,6 +27410,150 @@ int hashconfig_init (hashcat_ctx_t *hashcat_ctx)
hashconfig->st_pass = ST_PASS_HASHCAT_PLAIN;
break;
case 17300: hashconfig->salt_type = SALT_TYPE_EMBEDDED;
hashconfig->attack_exec = ATTACK_EXEC_INSIDE_KERNEL;
hashconfig->opts_type = OPTS_TYPE_PT_GENERATE_LE
| OPTS_TYPE_PT_ADD06;
hashconfig->kern_type = KERN_TYPE_SHA3_224;
hashconfig->dgst_size = DGST_SIZE_8_25;
hashconfig->parse_func = keccak_224_parse_hash;
hashconfig->opti_type = OPTI_TYPE_ZERO_BYTE
| OPTI_TYPE_USES_BITS_64
| OPTI_TYPE_RAW_HASH;
hashconfig->dgst_pos0 = 6;
hashconfig->dgst_pos1 = 7;
hashconfig->dgst_pos2 = 4;
hashconfig->dgst_pos3 = 5;
hashconfig->st_hash = ST_HASH_17300;
hashconfig->st_pass = ST_PASS_HASHCAT_PLAIN;
break;
case 17400: hashconfig->salt_type = SALT_TYPE_EMBEDDED;
hashconfig->attack_exec = ATTACK_EXEC_INSIDE_KERNEL;
hashconfig->opts_type = OPTS_TYPE_PT_GENERATE_LE
| OPTS_TYPE_PT_ADD06;
hashconfig->kern_type = KERN_TYPE_SHA3_256;
hashconfig->dgst_size = DGST_SIZE_8_25;
hashconfig->parse_func = keccak_256_parse_hash;
hashconfig->opti_type = OPTI_TYPE_ZERO_BYTE
| OPTI_TYPE_USES_BITS_64
| OPTI_TYPE_RAW_HASH;
hashconfig->dgst_pos0 = 6;
hashconfig->dgst_pos1 = 7;
hashconfig->dgst_pos2 = 4;
hashconfig->dgst_pos3 = 5;
hashconfig->st_hash = ST_HASH_17400;
hashconfig->st_pass = ST_PASS_HASHCAT_PLAIN;
break;
case 17500: hashconfig->salt_type = SALT_TYPE_EMBEDDED;
hashconfig->attack_exec = ATTACK_EXEC_INSIDE_KERNEL;
hashconfig->opts_type = OPTS_TYPE_PT_GENERATE_LE
| OPTS_TYPE_PT_ADD06;
hashconfig->kern_type = KERN_TYPE_SHA3_384;
hashconfig->dgst_size = DGST_SIZE_8_25;
hashconfig->parse_func = keccak_384_parse_hash;
hashconfig->opti_type = OPTI_TYPE_ZERO_BYTE
| OPTI_TYPE_USES_BITS_64
| OPTI_TYPE_RAW_HASH;
hashconfig->dgst_pos0 = 6;
hashconfig->dgst_pos1 = 7;
hashconfig->dgst_pos2 = 4;
hashconfig->dgst_pos3 = 5;
hashconfig->st_hash = ST_HASH_17500;
hashconfig->st_pass = ST_PASS_HASHCAT_PLAIN;
break;
case 17600: hashconfig->salt_type = SALT_TYPE_EMBEDDED;
hashconfig->attack_exec = ATTACK_EXEC_INSIDE_KERNEL;
hashconfig->opts_type = OPTS_TYPE_PT_GENERATE_LE
| OPTS_TYPE_PT_ADD06;
hashconfig->kern_type = KERN_TYPE_SHA3_512;
hashconfig->dgst_size = DGST_SIZE_8_25;
hashconfig->parse_func = keccak_512_parse_hash;
hashconfig->opti_type = OPTI_TYPE_ZERO_BYTE
| OPTI_TYPE_USES_BITS_64
| OPTI_TYPE_RAW_HASH;
hashconfig->dgst_pos0 = 6;
hashconfig->dgst_pos1 = 7;
hashconfig->dgst_pos2 = 4;
hashconfig->dgst_pos3 = 5;
hashconfig->st_hash = ST_HASH_17600;
hashconfig->st_pass = ST_PASS_HASHCAT_PLAIN;
break;
case 17700: hashconfig->salt_type = SALT_TYPE_EMBEDDED;
hashconfig->attack_exec = ATTACK_EXEC_INSIDE_KERNEL;
hashconfig->opts_type = OPTS_TYPE_PT_GENERATE_LE
| OPTS_TYPE_PT_ADD01;
hashconfig->kern_type = KERN_TYPE_KECCAK_224;
hashconfig->dgst_size = DGST_SIZE_8_25;
hashconfig->parse_func = keccak_224_parse_hash;
hashconfig->opti_type = OPTI_TYPE_ZERO_BYTE
| OPTI_TYPE_USES_BITS_64
| OPTI_TYPE_RAW_HASH;
hashconfig->dgst_pos0 = 6;
hashconfig->dgst_pos1 = 7;
hashconfig->dgst_pos2 = 4;
hashconfig->dgst_pos3 = 5;
hashconfig->st_hash = ST_HASH_17700;
hashconfig->st_pass = ST_PASS_HASHCAT_PLAIN;
break;
case 17800: hashconfig->salt_type = SALT_TYPE_EMBEDDED;
hashconfig->attack_exec = ATTACK_EXEC_INSIDE_KERNEL;
hashconfig->opts_type = OPTS_TYPE_PT_GENERATE_LE
| OPTS_TYPE_PT_ADD01;
hashconfig->kern_type = KERN_TYPE_KECCAK_256;
hashconfig->dgst_size = DGST_SIZE_8_25;
hashconfig->parse_func = keccak_256_parse_hash;
hashconfig->opti_type = OPTI_TYPE_ZERO_BYTE
| OPTI_TYPE_USES_BITS_64
| OPTI_TYPE_RAW_HASH;
hashconfig->dgst_pos0 = 6;
hashconfig->dgst_pos1 = 7;
hashconfig->dgst_pos2 = 4;
hashconfig->dgst_pos3 = 5;
hashconfig->st_hash = ST_HASH_17800;
hashconfig->st_pass = ST_PASS_HASHCAT_PLAIN;
break;
case 17900: hashconfig->salt_type = SALT_TYPE_EMBEDDED;
hashconfig->attack_exec = ATTACK_EXEC_INSIDE_KERNEL;
hashconfig->opts_type = OPTS_TYPE_PT_GENERATE_LE
| OPTS_TYPE_PT_ADD01;
hashconfig->kern_type = KERN_TYPE_KECCAK_384;
hashconfig->dgst_size = DGST_SIZE_8_25;
hashconfig->parse_func = keccak_384_parse_hash;
hashconfig->opti_type = OPTI_TYPE_ZERO_BYTE
| OPTI_TYPE_USES_BITS_64
| OPTI_TYPE_RAW_HASH;
hashconfig->dgst_pos0 = 6;
hashconfig->dgst_pos1 = 7;
hashconfig->dgst_pos2 = 4;
hashconfig->dgst_pos3 = 5;
hashconfig->st_hash = ST_HASH_17900;
hashconfig->st_pass = ST_PASS_HASHCAT_PLAIN;
break;
case 18000: hashconfig->salt_type = SALT_TYPE_EMBEDDED;
hashconfig->attack_exec = ATTACK_EXEC_INSIDE_KERNEL;
hashconfig->opts_type = OPTS_TYPE_PT_GENERATE_LE
| OPTS_TYPE_PT_ADD01;
hashconfig->kern_type = KERN_TYPE_KECCAK_512;
hashconfig->dgst_size = DGST_SIZE_8_25;
hashconfig->parse_func = keccak_512_parse_hash;
hashconfig->opti_type = OPTI_TYPE_ZERO_BYTE
| OPTI_TYPE_USES_BITS_64
| OPTI_TYPE_RAW_HASH;
hashconfig->dgst_pos0 = 6;
hashconfig->dgst_pos1 = 7;
hashconfig->dgst_pos2 = 4;
hashconfig->dgst_pos3 = 5;
hashconfig->st_hash = ST_HASH_18000;
hashconfig->st_pass = ST_PASS_HASHCAT_PLAIN;
break;
case 99999: hashconfig->hash_type = HASH_TYPE_PLAINTEXT;
hashconfig->salt_type = SALT_TYPE_NONE;
hashconfig->attack_exec = ATTACK_EXEC_INSIDE_KERNEL;
@ -27444,6 +27655,7 @@ int hashconfig_init (hashcat_ctx_t *hashcat_ctx)
hashconfig->opts_type &= ~OPTS_TYPE_PT_UTF16BE;
hashconfig->opts_type &= ~OPTS_TYPE_PT_ADD01;
hashconfig->opts_type &= ~OPTS_TYPE_PT_ADD02;
hashconfig->opts_type &= ~OPTS_TYPE_PT_ADD06;
hashconfig->opts_type &= ~OPTS_TYPE_PT_ADD80;
hashconfig->opts_type &= ~OPTS_TYPE_PT_ADDBITS14;
hashconfig->opts_type &= ~OPTS_TYPE_PT_ADDBITS15;
@ -28333,8 +28545,6 @@ void hashconfig_benchmark_defaults (hashcat_ctx_t *hashcat_ctx, salt_t *salt, vo
break;
case 3100: salt->salt_len = 1;
break;
case 5000: salt->keccak_mdlen = 32;
break;
case 5800: salt->salt_len = 16;
break;
case 6800: salt->salt_len = 32;

@ -35,6 +35,7 @@ static const char *drm_card0_driver_path = "/sys/class/drm/card0/device/driver";
#endif
static const u32 full01 = 0x01010101;
static const u32 full06 = 0x06060606;
static const u32 full80 = 0x80808080;
static double TARGET_MSEC_PROFILE[4] = { 2, 12, 96, 480 };
@ -2153,6 +2154,10 @@ int run_copy (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, const
{
rebuild_pws_compressed_append (device_param, pws_cnt, 0x01);
}
else if (hashconfig->opts_type & OPTS_TYPE_PT_ADD06)
{
rebuild_pws_compressed_append (device_param, pws_cnt, 0x06);
}
else if (hashconfig->opts_type & OPTS_TYPE_PT_ADD80)
{
rebuild_pws_compressed_append (device_param, pws_cnt, 0x80);
@ -2165,6 +2170,10 @@ int run_copy (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, const
{
rebuild_pws_compressed_append (device_param, pws_cnt, 0x01);
}
else if (hashconfig->opts_type & OPTS_TYPE_PT_ADD06)
{
rebuild_pws_compressed_append (device_param, pws_cnt, 0x06);
}
else if (hashconfig->opts_type & OPTS_TYPE_PT_ADD80)
{
rebuild_pws_compressed_append (device_param, pws_cnt, 0x80);
@ -2472,6 +2481,11 @@ int run_cracker (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, co
ptr[line_len] = 0x80;
}
if (hashconfig->opts_type & OPTS_TYPE_PT_ADD06)
{
ptr[line_len] = 0x06;
}
if (hashconfig->opts_type & OPTS_TYPE_PT_ADD01)
{
ptr[line_len] = 0x01;
@ -2590,6 +2604,11 @@ int run_cracker (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, co
ptr[line_len] = 0x80;
}
if (hashconfig->opts_type & OPTS_TYPE_PT_ADD06)
{
ptr[line_len] = 0x06;
}
if (hashconfig->opts_type & OPTS_TYPE_PT_ADD01)
{
ptr[line_len] = 0x01;
@ -6313,6 +6332,7 @@ int opencl_session_begin (hashcat_ctx_t *hashcat_ctx)
device_param->kernel_params_mp_buf32[7] = 0;
if (hashconfig->opts_type & OPTS_TYPE_PT_ADD01) device_param->kernel_params_mp_buf32[5] = full01;
if (hashconfig->opts_type & OPTS_TYPE_PT_ADD06) device_param->kernel_params_mp_buf32[5] = full06;
if (hashconfig->opts_type & OPTS_TYPE_PT_ADD80) device_param->kernel_params_mp_buf32[5] = full80;
if (hashconfig->opts_type & OPTS_TYPE_PT_ADDBITS14) device_param->kernel_params_mp_buf32[6] = 1;
if (hashconfig->opts_type & OPTS_TYPE_PT_ADDBITS15) device_param->kernel_params_mp_buf32[7] = 1;
@ -6337,6 +6357,7 @@ int opencl_session_begin (hashcat_ctx_t *hashcat_ctx)
device_param->kernel_params_mp_l_buf32[8] = 0;
if (hashconfig->opts_type & OPTS_TYPE_PT_ADD01) device_param->kernel_params_mp_l_buf32[6] = full01;
if (hashconfig->opts_type & OPTS_TYPE_PT_ADD06) device_param->kernel_params_mp_l_buf32[6] = full06;
if (hashconfig->opts_type & OPTS_TYPE_PT_ADD80) device_param->kernel_params_mp_l_buf32[6] = full80;
if (hashconfig->opts_type & OPTS_TYPE_PT_ADDBITS14) device_param->kernel_params_mp_l_buf32[7] = 1;
if (hashconfig->opts_type & OPTS_TYPE_PT_ADDBITS15) device_param->kernel_params_mp_l_buf32[8] = 1;

@ -126,6 +126,11 @@ static int selftest (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param
comb_ptr[comb.pw_len] = 0x01;
}
if (hashconfig->opts_type & OPTS_TYPE_PT_ADD06)
{
comb_ptr[comb.pw_len] = 0x06;
}
if (hashconfig->opts_type & OPTS_TYPE_PT_ADD80)
{
comb_ptr[comb.pw_len] = 0x80;
@ -264,6 +269,11 @@ static int selftest (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param
pw_ptr[new_pass_len] = 0x01;
}
if (hashconfig->opts_type & OPTS_TYPE_PT_ADD06)
{
pw_ptr[new_pass_len] = 0x06;
}
if (hashconfig->opts_type & OPTS_TYPE_PT_ADD80)
{
pw_ptr[new_pass_len] = 0x80;

@ -125,11 +125,18 @@ static const char *const USAGE_BIG[] =
" 0 | MD5 | Raw Hash",
" 5100 | Half MD5 | Raw Hash",
" 100 | SHA1 | Raw Hash",
" 1300 | SHA-224 | Raw Hash",
" 1400 | SHA-256 | Raw Hash",
" 10800 | SHA-384 | Raw Hash",
" 1700 | SHA-512 | Raw Hash",
" 5000 | SHA-3 (Keccak) | Raw Hash",
" 1300 | SHA2-224 | Raw Hash",
" 1400 | SHA2-256 | Raw Hash",
" 10800 | SHA2-384 | Raw Hash",
" 1700 | SHA2-512 | Raw Hash",
" 17300 | SHA3-224 | Raw Hash",
" 17400 | SHA3-256 | Raw Hash",
" 17500 | SHA3-384 | Raw Hash",
" 17600 | SHA3-512 | Raw Hash",
" 17700 | Keccak-224 | Raw Hash",
" 17800 | Keccak-256 | Raw Hash",
" 17900 | Keccak-384 | Raw Hash",
" 18000 | Keccak-512 | Raw Hash",
" 600 | BLAKE2b-512 | Raw Hash",
" 10100 | SipHash | Raw Hash",
" 6000 | RIPEMD-160 | Raw Hash",

@ -7,7 +7,7 @@
## install help:
##
## cpan install Authen::Passphrase::LANManager Authen::Passphrase::MySQL323 Authen::Passphrase::NTHash Authen::Passphrase::PHPass Crypt::CBC Crypt::DES Crypt::Digest::RIPEMD160 Crypt::Digest::Whirlpool Crypt::ECB Crypt::Eksblowfish::Bcrypt Crypt::Mode::ECB Crypt::MySQL Crypt::OpenSSH::ChachaPoly Crypt::PBKDF2 Crypt::RC4 Crypt::Rijndael Crypt::ScryptKDF Crypt::Skip32 Crypt::Twofish Crypt::UnixCrypt_XS Digest::BLAKE2 Digest::CMAC Digest::CRC Digest::GOST Digest::HMAC Digest::HMAC_MD5 Digest::Keccak Digest::MD4 Digest::MD5 Digest::Perl::MD5 Digest::SHA Digest::SipHash JSON Net::DNS::RR::NSEC3 Net::DNS::SEC Convert::EBCDIC
## cpan install Authen::Passphrase::LANManager Authen::Passphrase::MySQL323 Authen::Passphrase::NTHash Authen::Passphrase::PHPass Crypt::CBC Crypt::DES Crypt::Digest::RIPEMD160 Crypt::Digest::Whirlpool Crypt::ECB Crypt::Eksblowfish::Bcrypt Crypt::Mode::ECB Crypt::MySQL Crypt::OpenSSH::ChachaPoly Crypt::PBKDF2 Crypt::RC4 Crypt::Rijndael Crypt::ScryptKDF Crypt::Skip32 Crypt::Twofish Crypt::UnixCrypt_XS Digest::BLAKE2 Digest::CMAC Digest::CRC Digest::GOST Digest::HMAC Digest::HMAC_MD5 Digest::Keccak Digest::MD4 Digest::MD5 Digest::Perl::MD5 Digest::SHA Digest::SHA3 Digest::SipHash JSON Net::DNS::RR::NSEC3 Net::DNS::SEC Convert::EBCDIC
##
use strict;
@ -15,8 +15,9 @@ use warnings;
use Digest::MD4 qw (md4 md4_hex);
use Digest::MD5 qw (md5 md5_hex);
use Digest::SHA qw (sha1 sha256 sha384 sha512 sha1_hex sha224_hex sha256_hex sha384_hex sha512_hex hmac_sha1 hmac_sha256 hmac_sha512);
use Digest::SHA3 qw (sha3_224_hex sha3_256_hex sha3_384_hex sha3_512_hex);
use Digest::Keccak qw (keccak_224_hex keccak_256_hex keccak_384_hex keccak_512_hex);
use Digest::HMAC qw (hmac hmac_hex);
use Digest::Keccak qw (keccak_256_hex);
use Digest::BLAKE2 qw (blake2b_hex);
use Crypt::MySQL qw (password41);
use Digest::GOST qw (gost gost_hex);
@ -57,7 +58,7 @@ my $hashcat = "./hashcat";
my $MAX_LEN = 55;
my @modes = (0, 10, 11, 12, 20, 21, 22, 23, 30, 40, 50, 60, 100, 101, 110, 111, 112, 120, 121, 122, 125, 130, 131, 132, 133, 140, 141, 150, 160, 200, 300, 400, 500, 600, 900, 1000, 1100, 1300, 1400, 1410, 1411, 1420, 1430, 1440, 1441, 1450, 1460, 1500, 1600, 1700, 1710, 1711, 1720, 1730, 1740, 1722, 1731, 1750, 1760, 1800, 2100, 2400, 2410, 2500, 2600, 2611, 2612, 2711, 2811, 3000, 3100, 3200, 3710, 3711, 3300, 3500, 3610, 3720, 3800, 3910, 4010, 4110, 4210, 4300, 4400, 4500, 4520, 4521, 4522, 4600, 4700, 4800, 4900, 5000, 5100, 5300, 5400, 5500, 5600, 5700, 5800, 6000, 6100, 6300, 6400, 6500, 6600, 6700, 6800, 6900, 7000, 7100, 7200, 7300, 7400, 7500, 7700, 7701, 7800, 7801, 7900, 8000, 8100, 8200, 8300, 8400, 8500, 8600, 8700, 8900, 9100, 9200, 9300, 9400, 9500, 9600, 9700, 9800, 9900, 10000, 10100, 10200, 10300, 10400, 10500, 10600, 10700, 10800, 10900, 11000, 11100, 11200, 11300, 11400, 11500, 11600, 11900, 12000, 12001, 12100, 12200, 12300, 12400, 12600, 12700, 12800, 12900, 13000, 13100, 13200, 13300, 13400, 13500, 13600, 13800, 13900, 14000, 14100, 14400, 14700, 14800, 14900, 15000, 15100, 15200, 15300, 15400, 15500, 15600, 15700, 15900, 16000, 16100, 16200, 16300, 16400, 16500, 16600, 16700, 16800, 16900, 99999);
my @modes = (0, 10, 11, 12, 20, 21, 22, 23, 30, 40, 50, 60, 100, 101, 110, 111, 112, 120, 121, 122, 125, 130, 131, 132, 133, 140, 141, 150, 160, 200, 300, 400, 500, 600, 900, 1000, 1100, 1300, 1400, 1410, 1411, 1420, 1430, 1440, 1441, 1450, 1460, 1500, 1600, 1700, 1710, 1711, 1720, 1730, 1740, 1722, 1731, 1750, 1760, 1800, 2100, 2400, 2410, 2500, 2600, 2611, 2612, 2711, 2811, 3000, 3100, 3200, 3710, 3711, 3300, 3500, 3610, 3720, 3800, 3910, 4010, 4110, 4210, 4300, 4400, 4500, 4520, 4521, 4522, 4600, 4700, 4800, 4900, 5100, 5300, 5400, 5500, 5600, 5700, 5800, 6000, 6100, 6300, 6400, 6500, 6600, 6700, 6800, 6900, 7000, 7100, 7200, 7300, 7400, 7500, 7700, 7701, 7800, 7801, 7900, 8000, 8100, 8200, 8300, 8400, 8500, 8600, 8700, 8900, 9100, 9200, 9300, 9400, 9500, 9600, 9700, 9800, 9900, 10000, 10100, 10200, 10300, 10400, 10500, 10600, 10700, 10800, 10900, 11000, 11100, 11200, 11300, 11400, 11500, 11600, 11900, 12000, 12001, 12100, 12200, 12300, 12400, 12600, 12700, 12800, 12900, 13000, 13100, 13200, 13300, 13400, 13500, 13600, 13800, 13900, 14000, 14100, 14400, 14700, 14800, 14900, 15000, 15100, 15200, 15300, 15400, 15500, 15600, 15700, 15900, 16000, 16100, 16200, 16300, 16400, 16500, 16600, 16700, 16800, 16900, 17300, 17400, 17500, 17600, 17700, 17800, 17900, 18000, 99999);
my %is_utf16le = map { $_ => 1 } qw (30 40 130 131 132 133 140 141 1000 1100 1430 1440 1441 1730 1740 1731 5500 5600 8000 9400 9500 9600 9700 9800 11600 13500 13800);
my %less_fifteen = map { $_ => 1 } qw (500 1600 1800 3200 6300 7400 10500 10700);
@ -225,7 +226,7 @@ sub verify
# remember always do "exists ($db->{$hash_in})" checks as soon as possible and don't forget it
# unsalted
if ($mode == 0 || $mode == 100 || $mode == 101 || $mode == 133 || $mode == 200 || $mode == 300 || $mode == 600 || $mode == 900 || $mode == 1000 || $mode == 1300 || $mode == 1400 || $mode == 1700 || $mode == 2400 || $mode == 2600 || $mode == 3000 || $mode == 3500 || $mode == 4300 || $mode == 4400 || $mode == 4500 || $mode == 4600 || $mode == 4700 || $mode == 5000 || $mode == 5100 || $mode == 5700 || $mode == 6000 || $mode == 6100 || $mode == 6900 || $mode == 8600 || $mode == 9900 || $mode == 10800 || $mode == 11500 || $mode == 16000 || $mode == 16400 || $mode == 99999)
if ($mode == 0 || $mode == 100 || $mode == 101 || $mode == 133 || $mode == 200 || $mode == 300 || $mode == 600 || $mode == 900 || $mode == 1000 || $mode == 1300 || $mode == 1400 || $mode == 1700 || $mode == 2400 || $mode == 2600 || $mode == 3000 || $mode == 3500 || $mode == 4300 || $mode == 4400 || $mode == 4500 || $mode == 4600 || $mode == 4700 || $mode == 5100 || $mode == 5700 || $mode == 6000 || $mode == 6100 || $mode == 6900 || $mode == 8600 || $mode == 9900 || $mode == 10800 || $mode == 11500 || $mode == 16000 || $mode == 16400 || $mode == 17300 || $mode == 17400 || $mode == 17500 || $mode == 17600 || $mode == 17700 || $mode == 17800 || $mode == 17900 || $mode == 18000 || $mode == 99999)
{
my $index = index ($line, ":");
@ -3546,7 +3547,7 @@ sub passthrough
my $tmp_hash;
if ($mode == 0 || $mode == 100 || $mode == 101 || $mode == 133 || $mode == 200 || $mode == 300 || $mode == 600 || $mode == 900 || $mode == 1000 || $mode == 1300 || $mode == 1400 || $mode == 1700 || $mode == 2400 || $mode == 2600 || $mode == 3500 || $mode == 4300 || $mode == 4400 || $mode == 4500 || $mode == 4600 || $mode == 4700 || $mode == 5000 || $mode == 5100 || $mode == 6000 || $mode == 6100 || $mode == 6900 || $mode == 5700 || $mode == 9900 || $mode == 10800 || $mode == 11500 || $mode == 13300 || $mode == 16400 || $mode == 99999)
if ($mode == 0 || $mode == 100 || $mode == 101 || $mode == 133 || $mode == 200 || $mode == 300 || $mode == 600 || $mode == 900 || $mode == 1000 || $mode == 1300 || $mode == 1400 || $mode == 1700 || $mode == 2400 || $mode == 2600 || $mode == 3500 || $mode == 4300 || $mode == 4400 || $mode == 4500 || $mode == 4600 || $mode == 4700 || $mode == 5100 || $mode == 6000 || $mode == 6100 || $mode == 6900 || $mode == 5700 || $mode == 9900 || $mode == 10800 || $mode == 11500 || $mode == 13300 || $mode == 16400 || $mode == 17300 || $mode == 17400 || $mode == 17500 || $mode == 17600 || $mode == 17700 || $mode == 17800 || $mode == 17900 || $mode == 18000 || $mode == 99999)
{
$tmp_hash = gen_hash ($mode, $word_buf, "");
}
@ -4076,7 +4077,7 @@ sub single
{
my $mode = $modes[$j];
if ($mode == 0 || $mode == 100 || $mode == 101 || $mode == 133 || $mode == 200 || $mode == 300 || $mode == 600 || $mode == 900 || $mode == 1000 || $mode == 1300 || $mode == 1400 || $mode == 1700 || $mode == 2600 || $mode == 3500 || $mode == 4300 || $mode == 4400 || $mode == 4500 || $mode == 4600 || $mode == 4700 || $mode == 5000 || $mode == 5100 || $mode == 5300 || $mode == 5400 || $mode == 6000 || $mode == 6100 || $mode == 6600 || $mode == 6900 || $mode == 5700 || $mode == 8200 || $mode == 8300 || $mode == 9900 || $mode == 10800 || $mode == 11500 || $mode == 13300 || $mode == 16400 || $mode == 99999)
if ($mode == 0 || $mode == 100 || $mode == 101 || $mode == 133 || $mode == 200 || $mode == 300 || $mode == 600 || $mode == 900 || $mode == 1000 || $mode == 1300 || $mode == 1400 || $mode == 1700 || $mode == 2600 || $mode == 3500 || $mode == 4300 || $mode == 4400 || $mode == 4500 || $mode == 4600 || $mode == 4700 || $mode == 5100 || $mode == 5300 || $mode == 5400 || $mode == 6000 || $mode == 6100 || $mode == 6600 || $mode == 6900 || $mode == 5700 || $mode == 8200 || $mode == 8300 || $mode == 9900 || $mode == 10800 || $mode == 11500 || $mode == 13300 || $mode == 16400 || $mode == 17300 || $mode == 17400 || $mode == 17500 || $mode == 17600 || $mode == 17700 || $mode == 17800 || $mode == 17900 || $mode == 18000 || $mode == 99999)
{
for (my $i = 1; $i < 32; $i++)
{
@ -6077,12 +6078,6 @@ sub gen_hash
$tmp_hash = sprintf ("%s:%s", $hash_buf, $salt_buf);
}
elsif ($mode == 5000)
{
$hash_buf = keccak_256_hex ($word_buf);
$tmp_hash = sprintf ("%s", $hash_buf);
}
elsif ($mode == 5100)
{
my $pos;
@ -9928,6 +9923,54 @@ END_CODE
$tmp_hash = sprintf ("%s*%s*%s*%s", substr ($pmkid, 0, 32), $macap, $macsta, $essid);
}
elsif ($mode == 17300)
{
$hash_buf = sha3_224_hex ($word_buf);
$tmp_hash = sprintf ("%s", $hash_buf);
}
elsif ($mode == 17400)
{
$hash_buf = sha3_256_hex ($word_buf);
$tmp_hash = sprintf ("%s", $hash_buf);
}
elsif ($mode == 17500)
{
$hash_buf = sha3_384_hex ($word_buf);
$tmp_hash = sprintf ("%s", $hash_buf);
}
elsif ($mode == 17600)
{
$hash_buf = sha3_512_hex ($word_buf);
$tmp_hash = sprintf ("%s", $hash_buf);
}
elsif ($mode == 17700)
{
$hash_buf = keccak_224_hex ($word_buf);
$tmp_hash = sprintf ("%s", $hash_buf);
}
elsif ($mode == 17800)
{
$hash_buf = keccak_256_hex ($word_buf);
$tmp_hash = sprintf ("%s", $hash_buf);
}
elsif ($mode == 17900)
{
$hash_buf = keccak_384_hex ($word_buf);
$tmp_hash = sprintf ("%s", $hash_buf);
}
elsif ($mode == 18000)
{
$hash_buf = keccak_512_hex ($word_buf);
$tmp_hash = sprintf ("%s", $hash_buf);
}
elsif ($mode == 99999)
{
$tmp_hash = sprintf ("%s", $word_buf);

@ -9,7 +9,7 @@ TDIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
# missing hash types: 5200,6251,6261,6271,6281
HASH_TYPES="0 10 11 12 20 21 22 23 30 40 50 60 100 101 110 111 112 120 121 122 125 130 131 132 133 140 141 150 160 200 300 400 500 600 900 1000 1100 1300 1400 1410 1411 1420 1430 1440 1441 1450 1460 1500 1600 1700 1710 1711 1720 1722 1730 1731 1740 1750 1760 1800 2100 2400 2410 2500 2600 2611 2612 2711 2811 3000 3100 3200 3710 3711 3800 3910 4010 4110 4300 4400 4500 4520 4521 4522 4700 4800 4900 5000 5100 5300 5400 5500 5600 5700 5800 6000 6100 6211 6212 6213 6221 6222 6223 6231 6232 6233 6241 6242 6243 6300 6400 6500 6600 6700 6800 6900 7000 7100 7200 7300 7400 7500 7700 7701 7800 7801 7900 8000 8100 8200 8300 8400 8500 8600 8700 8900 9100 9200 9300 9400 9500 9600 9700 9800 9900 10000 10100 10200 10300 10400 10500 10600 10700 10800 10900 11000 11100 11200 11300 11400 11500 11600 11900 12000 12001 12100 12200 12300 12400 12600 12700 12800 12900 13000 13100 13200 13300 13400 13500 13600 13800 13900 14000 14100 14400 14600 14700 14800 14900 15000 15100 15200 15300 15400 15500 15600 15700 15900 16000 16100 16200 16300 16400 16500 16600 16700 16800 16900 99999"
HASH_TYPES="0 10 11 12 20 21 22 23 30 40 50 60 100 101 110 111 112 120 121 122 125 130 131 132 133 140 141 150 160 200 300 400 500 600 900 1000 1100 1300 1400 1410 1411 1420 1430 1440 1441 1450 1460 1500 1600 1700 1710 1711 1720 1722 1730 1731 1740 1750 1760 1800 2100 2400 2410 2500 2600 2611 2612 2711 2811 3000 3100 3200 3710 3711 3800 3910 4010 4110 4300 4400 4500 4520 4521 4522 4700 4800 4900 5100 5300 5400 5500 5600 5700 5800 6000 6100 6211 6212 6213 6221 6222 6223 6231 6232 6233 6241 6242 6243 6300 6400 6500 6600 6700 6800 6900 7000 7100 7200 7300 7400 7500 7700 7701 7800 7801 7900 8000 8100 8200 8300 8400 8500 8600 8700 8900 9100 9200 9300 9400 9500 9600 9700 9800 9900 10000 10100 10200 10300 10400 10500 10600 10700 10800 10900 11000 11100 11200 11300 11400 11500 11600 11900 12000 12001 12100 12200 12300 12400 12600 12700 12800 12900 13000 13100 13200 13300 13400 13500 13600 13800 13900 14000 14100 14400 14600 14700 14800 14900 15000 15100 15200 15300 15400 15500 15600 15700 15900 16000 16100 16200 16300 16400 16500 16600 16700 16800 16900 17300 17400 17500 17600 17700 17800 17900 18000 99999"
#ATTACK_MODES="0 1 3 6 7"
ATTACK_MODES="0 1 3 7"

Loading…
Cancel
Save