mirror of
https://github.com/hashcat/hashcat.git
synced 2025-01-10 07:40:58 +00:00
Resolve conflicts
This commit is contained in:
commit
3c3b05d1e5
.gitmodules
OpenCL
inc_common.clm17300_a0-optimized.clm17300_a1-optimized.clm17300_a3-optimized.clm17400_a0-optimized.clm17400_a1-optimized.clm17400_a3-optimized.clm17500_a0-optimized.clm17500_a1-optimized.clm17500_a3-optimized.clm17600_a0-optimized.clm17600_a1-optimized.clm17600_a3-optimized.clm17700_a0-optimized.clm17700_a1-optimized.clm17700_a3-optimized.clm17800_a0-optimized.clm17800_a1-optimized.clm17800_a3-optimized.clm17900_a0-optimized.clm17900_a1-optimized.clm17900_a3-optimized.clm18000_a0-optimized.clm18000_a1-optimized.clm18000_a3-optimized.cl
deps
docs
extra/tab_completion
include
src
tools
4
.gitmodules
vendored
4
.gitmodules
vendored
@ -1,3 +1,3 @@
|
||||
[submodule "deps/OpenCL-Headers/CL"]
|
||||
path = deps/OpenCL-Headers/CL
|
||||
[submodule "OpenCL-Headers"]
|
||||
path = deps/git/OpenCL-Headers
|
||||
url = https://github.com/KhronosGroup/OpenCL-Headers.git
|
||||
|
@ -30952,6 +30952,18 @@ DECLSPEC void append_0x01_2x4_S (u32 *w0, u32 *w1, const u32 offset)
|
||||
append_helper_1x4_S (w1, ((offset16 == 1) ? 0x01010101 : 0), v);
|
||||
}
|
||||
|
||||
DECLSPEC void append_0x06_2x4_S (u32 *w0, u32 *w1, const u32 offset)
|
||||
{
|
||||
u32 v[4];
|
||||
|
||||
set_mark_1x4_S (v, offset);
|
||||
|
||||
const u32 offset16 = offset / 16;
|
||||
|
||||
append_helper_1x4_S (w0, ((offset16 == 0) ? 0x06060606 : 0), v);
|
||||
append_helper_1x4_S (w1, ((offset16 == 1) ? 0x06060606 : 0), v);
|
||||
}
|
||||
|
||||
DECLSPEC void append_0x80_1x4_S (u32 *w0, const u32 offset)
|
||||
{
|
||||
u32 v[4];
|
||||
@ -60122,6 +60134,64 @@ DECLSPEC void append_0x01_2x4_VV (u32x *w0, u32x *w1, const u32x offset)
|
||||
#endif
|
||||
}
|
||||
|
||||
DECLSPEC void append_0x06_2x4_VV (u32x *w0, u32x *w1, const u32x offset)
|
||||
{
|
||||
#if VECT_SIZE == 1
|
||||
|
||||
append_0x06_2x4_S (w0, w1, offset);
|
||||
|
||||
#else
|
||||
|
||||
u32 t0[4];
|
||||
u32 t1[4];
|
||||
|
||||
#endif
|
||||
|
||||
#if VECT_SIZE == 2
|
||||
|
||||
PACKVS24 (t0, t1, w0, w1, 0); append_0x06_2x4_S (t0, t1, offset.s0); PACKSV24 (t0, t1, w0, w1, 0);
|
||||
PACKVS24 (t0, t1, w0, w1, 1); append_0x06_2x4_S (t0, t1, offset.s1); PACKSV24 (t0, t1, w0, w1, 1);
|
||||
|
||||
#elif VECT_SIZE == 4
|
||||
|
||||
PACKVS24 (t0, t1, w0, w1, 0); append_0x06_2x4_S (t0, t1, offset.s0); PACKSV24 (t0, t1, w0, w1, 0);
|
||||
PACKVS24 (t0, t1, w0, w1, 1); append_0x06_2x4_S (t0, t1, offset.s1); PACKSV24 (t0, t1, w0, w1, 1);
|
||||
PACKVS24 (t0, t1, w0, w1, 2); append_0x06_2x4_S (t0, t1, offset.s2); PACKSV24 (t0, t1, w0, w1, 2);
|
||||
PACKVS24 (t0, t1, w0, w1, 3); append_0x06_2x4_S (t0, t1, offset.s3); PACKSV24 (t0, t1, w0, w1, 3);
|
||||
|
||||
#elif VECT_SIZE == 8
|
||||
|
||||
PACKVS24 (t0, t1, w0, w1, 0); append_0x06_2x4_S (t0, t1, offset.s0); PACKSV24 (t0, t1, w0, w1, 0);
|
||||
PACKVS24 (t0, t1, w0, w1, 1); append_0x06_2x4_S (t0, t1, offset.s1); PACKSV24 (t0, t1, w0, w1, 1);
|
||||
PACKVS24 (t0, t1, w0, w1, 2); append_0x06_2x4_S (t0, t1, offset.s2); PACKSV24 (t0, t1, w0, w1, 2);
|
||||
PACKVS24 (t0, t1, w0, w1, 3); append_0x06_2x4_S (t0, t1, offset.s3); PACKSV24 (t0, t1, w0, w1, 3);
|
||||
PACKVS24 (t0, t1, w0, w1, 4); append_0x06_2x4_S (t0, t1, offset.s4); PACKSV24 (t0, t1, w0, w1, 4);
|
||||
PACKVS24 (t0, t1, w0, w1, 5); append_0x06_2x4_S (t0, t1, offset.s5); PACKSV24 (t0, t1, w0, w1, 5);
|
||||
PACKVS24 (t0, t1, w0, w1, 6); append_0x06_2x4_S (t0, t1, offset.s6); PACKSV24 (t0, t1, w0, w1, 6);
|
||||
PACKVS24 (t0, t1, w0, w1, 7); append_0x06_2x4_S (t0, t1, offset.s7); PACKSV24 (t0, t1, w0, w1, 7);
|
||||
|
||||
#elif VECT_SIZE == 16
|
||||
|
||||
PACKVS24 (t0, t1, w0, w1, 0); append_0x06_2x4_S (t0, t1, offset.s0); PACKSV24 (t0, t1, w0, w1, 0);
|
||||
PACKVS24 (t0, t1, w0, w1, 1); append_0x06_2x4_S (t0, t1, offset.s1); PACKSV24 (t0, t1, w0, w1, 1);
|
||||
PACKVS24 (t0, t1, w0, w1, 2); append_0x06_2x4_S (t0, t1, offset.s2); PACKSV24 (t0, t1, w0, w1, 2);
|
||||
PACKVS24 (t0, t1, w0, w1, 3); append_0x06_2x4_S (t0, t1, offset.s3); PACKSV24 (t0, t1, w0, w1, 3);
|
||||
PACKVS24 (t0, t1, w0, w1, 4); append_0x06_2x4_S (t0, t1, offset.s4); PACKSV24 (t0, t1, w0, w1, 4);
|
||||
PACKVS24 (t0, t1, w0, w1, 5); append_0x06_2x4_S (t0, t1, offset.s5); PACKSV24 (t0, t1, w0, w1, 5);
|
||||
PACKVS24 (t0, t1, w0, w1, 6); append_0x06_2x4_S (t0, t1, offset.s6); PACKSV24 (t0, t1, w0, w1, 6);
|
||||
PACKVS24 (t0, t1, w0, w1, 7); append_0x06_2x4_S (t0, t1, offset.s7); PACKSV24 (t0, t1, w0, w1, 7);
|
||||
PACKVS24 (t0, t1, w0, w1, 8); append_0x06_2x4_S (t0, t1, offset.s8); PACKSV24 (t0, t1, w0, w1, 8);
|
||||
PACKVS24 (t0, t1, w0, w1, 9); append_0x06_2x4_S (t0, t1, offset.s9); PACKSV24 (t0, t1, w0, w1, 9);
|
||||
PACKVS24 (t0, t1, w0, w1, a); append_0x06_2x4_S (t0, t1, offset.sa); PACKSV24 (t0, t1, w0, w1, a);
|
||||
PACKVS24 (t0, t1, w0, w1, b); append_0x06_2x4_S (t0, t1, offset.sb); PACKSV24 (t0, t1, w0, w1, b);
|
||||
PACKVS24 (t0, t1, w0, w1, c); append_0x06_2x4_S (t0, t1, offset.sc); PACKSV24 (t0, t1, w0, w1, c);
|
||||
PACKVS24 (t0, t1, w0, w1, d); append_0x06_2x4_S (t0, t1, offset.sd); PACKSV24 (t0, t1, w0, w1, d);
|
||||
PACKVS24 (t0, t1, w0, w1, e); append_0x06_2x4_S (t0, t1, offset.se); PACKSV24 (t0, t1, w0, w1, e);
|
||||
PACKVS24 (t0, t1, w0, w1, f); append_0x06_2x4_S (t0, t1, offset.sf); PACKSV24 (t0, t1, w0, w1, f);
|
||||
|
||||
#endif
|
||||
}
|
||||
|
||||
DECLSPEC void append_0x80_2x4_VV (u32x *w0, u32x *w1, const u32x offset)
|
||||
{
|
||||
#if VECT_SIZE == 1
|
||||
|
479
OpenCL/m17300_a0-optimized.cl
Normal file
479
OpenCL/m17300_a0-optimized.cl
Normal file
@ -0,0 +1,479 @@
|
||||
/**
|
||||
* Author......: See docs/credits.txt
|
||||
* License.....: MIT
|
||||
*/
|
||||
|
||||
#define NEW_SIMD_CODE
|
||||
|
||||
#include "inc_vendor.cl"
|
||||
#include "inc_hash_constants.h"
|
||||
#include "inc_hash_functions.cl"
|
||||
#include "inc_types.cl"
|
||||
#include "inc_common.cl"
|
||||
#include "inc_rp_optimized.h"
|
||||
#include "inc_rp_optimized.cl"
|
||||
#include "inc_simd.cl"
|
||||
|
||||
__constant u64a keccakf_rndc[24] =
|
||||
{
|
||||
0x0000000000000001, 0x0000000000008082, 0x800000000000808a,
|
||||
0x8000000080008000, 0x000000000000808b, 0x0000000080000001,
|
||||
0x8000000080008081, 0x8000000000008009, 0x000000000000008a,
|
||||
0x0000000000000088, 0x0000000080008009, 0x000000008000000a,
|
||||
0x000000008000808b, 0x800000000000008b, 0x8000000000008089,
|
||||
0x8000000000008003, 0x8000000000008002, 0x8000000000000080,
|
||||
0x000000000000800a, 0x800000008000000a, 0x8000000080008081,
|
||||
0x8000000000008080, 0x0000000080000001, 0x8000000080008008
|
||||
};
|
||||
|
||||
#ifndef KECCAK_ROUNDS
|
||||
#define KECCAK_ROUNDS 24
|
||||
#endif
|
||||
|
||||
__kernel void m17300_m04 (__global pw_t *pws, __constant const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
|
||||
{
|
||||
/**
|
||||
* modifier
|
||||
*/
|
||||
|
||||
const u64 lid = get_local_id (0);
|
||||
|
||||
/**
|
||||
* base
|
||||
*/
|
||||
|
||||
const u64 gid = get_global_id (0);
|
||||
|
||||
if (gid >= gid_max) return;
|
||||
|
||||
u32 pw_buf0[4];
|
||||
u32 pw_buf1[4];
|
||||
|
||||
pw_buf0[0] = pws[gid].i[0];
|
||||
pw_buf0[1] = pws[gid].i[1];
|
||||
pw_buf0[2] = pws[gid].i[2];
|
||||
pw_buf0[3] = pws[gid].i[3];
|
||||
pw_buf1[0] = pws[gid].i[4];
|
||||
pw_buf1[1] = pws[gid].i[5];
|
||||
pw_buf1[2] = pws[gid].i[6];
|
||||
pw_buf1[3] = pws[gid].i[7];
|
||||
|
||||
const u32 pw_len = pws[gid].pw_len;
|
||||
|
||||
/**
|
||||
* loop
|
||||
*/
|
||||
|
||||
for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE)
|
||||
{
|
||||
u32x w0[4] = { 0 };
|
||||
u32x w1[4] = { 0 };
|
||||
u32x w2[4] = { 0 };
|
||||
u32x w3[4] = { 0 };
|
||||
|
||||
const u32x out_len = apply_rules_vect (pw_buf0, pw_buf1, pw_len, rules_buf, il_pos, w0, w1);
|
||||
|
||||
append_0x06_2x4_VV (w0, w1, out_len);
|
||||
|
||||
/**
|
||||
* Keccak
|
||||
*/
|
||||
|
||||
u64x a00 = hl32_to_64 (w0[1], w0[0]);
|
||||
u64x a01 = hl32_to_64 (w0[3], w0[2]);
|
||||
u64x a02 = hl32_to_64 (w1[1], w1[0]);
|
||||
u64x a03 = hl32_to_64 (w1[3], w1[2]);
|
||||
u64x a04 = 0;
|
||||
u64x a10 = 0;
|
||||
u64x a11 = 0;
|
||||
u64x a12 = 0;
|
||||
u64x a13 = 0;
|
||||
u64x a14 = 0;
|
||||
u64x a20 = 0;
|
||||
u64x a21 = 0;
|
||||
u64x a22 = 0;
|
||||
u64x a23 = 0;
|
||||
u64x a24 = 0;
|
||||
u64x a30 = 0;
|
||||
u64x a31 = 0;
|
||||
u64x a32 = 0x8000000000000000;
|
||||
u64x a33 = 0;
|
||||
u64x a34 = 0;
|
||||
u64x a40 = 0;
|
||||
u64x a41 = 0;
|
||||
u64x a42 = 0;
|
||||
u64x a43 = 0;
|
||||
u64x a44 = 0;
|
||||
|
||||
#define Rho_Pi(ad,r) \
|
||||
bc0 = ad; \
|
||||
ad = rotl64 (t, r); \
|
||||
t = bc0; \
|
||||
|
||||
#ifdef _unroll
|
||||
#pragma unroll
|
||||
#endif
|
||||
for (int round = 0; round < KECCAK_ROUNDS - 1; round++)
|
||||
{
|
||||
// Theta
|
||||
|
||||
u64x bc0 = a00 ^ a10 ^ a20 ^ a30 ^ a40;
|
||||
u64x bc1 = a01 ^ a11 ^ a21 ^ a31 ^ a41;
|
||||
u64x bc2 = a02 ^ a12 ^ a22 ^ a32 ^ a42;
|
||||
u64x bc3 = a03 ^ a13 ^ a23 ^ a33 ^ a43;
|
||||
u64x bc4 = a04 ^ a14 ^ a24 ^ a34 ^ a44;
|
||||
|
||||
u64x t;
|
||||
|
||||
t = bc4 ^ rotl64 (bc1, 1); a00 ^= t; a10 ^= t; a20 ^= t; a30 ^= t; a40 ^= t;
|
||||
t = bc0 ^ rotl64 (bc2, 1); a01 ^= t; a11 ^= t; a21 ^= t; a31 ^= t; a41 ^= t;
|
||||
t = bc1 ^ rotl64 (bc3, 1); a02 ^= t; a12 ^= t; a22 ^= t; a32 ^= t; a42 ^= t;
|
||||
t = bc2 ^ rotl64 (bc4, 1); a03 ^= t; a13 ^= t; a23 ^= t; a33 ^= t; a43 ^= t;
|
||||
t = bc3 ^ rotl64 (bc0, 1); a04 ^= t; a14 ^= t; a24 ^= t; a34 ^= t; a44 ^= t;
|
||||
|
||||
// Rho Pi
|
||||
|
||||
t = a01;
|
||||
|
||||
Rho_Pi (a20, 1);
|
||||
Rho_Pi (a12, 3);
|
||||
Rho_Pi (a21, 6);
|
||||
Rho_Pi (a32, 10);
|
||||
Rho_Pi (a33, 15);
|
||||
Rho_Pi (a03, 21);
|
||||
Rho_Pi (a10, 28);
|
||||
Rho_Pi (a31, 36);
|
||||
Rho_Pi (a13, 45);
|
||||
Rho_Pi (a41, 55);
|
||||
Rho_Pi (a44, 2);
|
||||
Rho_Pi (a04, 14);
|
||||
Rho_Pi (a30, 27);
|
||||
Rho_Pi (a43, 41);
|
||||
Rho_Pi (a34, 56);
|
||||
Rho_Pi (a23, 8);
|
||||
Rho_Pi (a22, 25);
|
||||
Rho_Pi (a02, 43);
|
||||
Rho_Pi (a40, 62);
|
||||
Rho_Pi (a24, 18);
|
||||
Rho_Pi (a42, 39);
|
||||
Rho_Pi (a14, 61);
|
||||
Rho_Pi (a11, 20);
|
||||
Rho_Pi (a01, 44);
|
||||
|
||||
// Chi
|
||||
|
||||
bc0 = a00; bc1 = a01; bc2 = a02; bc3 = a03; bc4 = a04;
|
||||
a00 ^= ~bc1 & bc2; a01 ^= ~bc2 & bc3; a02 ^= ~bc3 & bc4; a03 ^= ~bc4 & bc0; a04 ^= ~bc0 & bc1;
|
||||
|
||||
bc0 = a10; bc1 = a11; bc2 = a12; bc3 = a13; bc4 = a14;
|
||||
a10 ^= ~bc1 & bc2; a11 ^= ~bc2 & bc3; a12 ^= ~bc3 & bc4; a13 ^= ~bc4 & bc0; a14 ^= ~bc0 & bc1;
|
||||
|
||||
bc0 = a20; bc1 = a21; bc2 = a22; bc3 = a23; bc4 = a24;
|
||||
a20 ^= ~bc1 & bc2; a21 ^= ~bc2 & bc3; a22 ^= ~bc3 & bc4; a23 ^= ~bc4 & bc0; a24 ^= ~bc0 & bc1;
|
||||
|
||||
bc0 = a30; bc1 = a31; bc2 = a32; bc3 = a33; bc4 = a34;
|
||||
a30 ^= ~bc1 & bc2; a31 ^= ~bc2 & bc3; a32 ^= ~bc3 & bc4; a33 ^= ~bc4 & bc0; a34 ^= ~bc0 & bc1;
|
||||
|
||||
bc0 = a40; bc1 = a41; bc2 = a42; bc3 = a43; bc4 = a44;
|
||||
a40 ^= ~bc1 & bc2; a41 ^= ~bc2 & bc3; a42 ^= ~bc3 & bc4; a43 ^= ~bc4 & bc0; a44 ^= ~bc0 & bc1;
|
||||
|
||||
// Iota
|
||||
|
||||
a00 ^= keccakf_rndc[round];
|
||||
}
|
||||
|
||||
// Theta
|
||||
|
||||
u64x bc0 = a00 ^ a10 ^ a20 ^ a30 ^ a40;
|
||||
u64x bc1 = a01 ^ a11 ^ a21 ^ a31 ^ a41;
|
||||
u64x bc2 = a02 ^ a12 ^ a22 ^ a32 ^ a42;
|
||||
u64x bc3 = a03 ^ a13 ^ a23 ^ a33 ^ a43;
|
||||
u64x bc4 = a04 ^ a14 ^ a24 ^ a34 ^ a44;
|
||||
|
||||
u64x t;
|
||||
|
||||
t = bc4 ^ rotl64 (bc1, 1); a00 ^= t; a10 ^= t; a20 ^= t; a30 ^= t;
|
||||
t = bc0 ^ rotl64 (bc2, 1); a21 ^= t; a31 ^= t; a41 ^= t;
|
||||
t = bc1 ^ rotl64 (bc3, 1); a02 ^= t; a12 ^= t; a22 ^= t; a32 ^= t;
|
||||
t = bc2 ^ rotl64 (bc4, 1); a03 ^= t; a13 ^= t; a23 ^= t; a33 ^= t; a43 ^= t;
|
||||
t = bc3 ^ rotl64 (bc0, 1); a04 ^= t; a34 ^= t; a44 ^= t;
|
||||
|
||||
// Rho Pi
|
||||
|
||||
t = a01;
|
||||
|
||||
Rho_Pi (a20, 1);
|
||||
Rho_Pi (a12, 3);
|
||||
Rho_Pi (a21, 6);
|
||||
Rho_Pi (a32, 10);
|
||||
Rho_Pi (a33, 15);
|
||||
Rho_Pi (a03, 21);
|
||||
Rho_Pi (a10, 28);
|
||||
Rho_Pi (a31, 36);
|
||||
Rho_Pi (a13, 45);
|
||||
Rho_Pi (a41, 55);
|
||||
Rho_Pi (a44, 2);
|
||||
Rho_Pi (a04, 14);
|
||||
Rho_Pi (a30, 27);
|
||||
Rho_Pi (a43, 41);
|
||||
Rho_Pi (a34, 56);
|
||||
Rho_Pi (a23, 8);
|
||||
Rho_Pi (a22, 25);
|
||||
Rho_Pi (a02, 43);
|
||||
|
||||
#undef Rho_Pi
|
||||
|
||||
bc0 = a00;
|
||||
bc2 = a02;
|
||||
bc3 = a03;
|
||||
bc4 = a04;
|
||||
|
||||
a02 ^= ~bc3 & bc4;
|
||||
a03 ^= ~bc4 & bc0;
|
||||
|
||||
const u32x r0 = l32_from_64 (a03);
|
||||
const u32x r1 = 0;
|
||||
const u32x r2 = l32_from_64 (a02);
|
||||
const u32x r3 = h32_from_64 (a02);
|
||||
|
||||
COMPARE_M_SIMD (r0, r1, r2, r3);
|
||||
}
|
||||
}
|
||||
|
||||
__kernel void m17300_m08 (__global pw_t *pws, __constant const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
|
||||
{
|
||||
}
|
||||
|
||||
__kernel void m17300_m16 (__global pw_t *pws, __constant const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
|
||||
{
|
||||
}
|
||||
|
||||
__kernel void m17300_s04 (__global pw_t *pws, __constant const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
|
||||
{
|
||||
/**
|
||||
* modifier
|
||||
*/
|
||||
|
||||
const u64 lid = get_local_id (0);
|
||||
|
||||
/**
|
||||
* base
|
||||
*/
|
||||
|
||||
const u64 gid = get_global_id (0);
|
||||
|
||||
if (gid >= gid_max) return;
|
||||
|
||||
u32 pw_buf0[4];
|
||||
u32 pw_buf1[4];
|
||||
|
||||
pw_buf0[0] = pws[gid].i[0];
|
||||
pw_buf0[1] = pws[gid].i[1];
|
||||
pw_buf0[2] = pws[gid].i[2];
|
||||
pw_buf0[3] = pws[gid].i[3];
|
||||
pw_buf1[0] = pws[gid].i[4];
|
||||
pw_buf1[1] = pws[gid].i[5];
|
||||
pw_buf1[2] = pws[gid].i[6];
|
||||
pw_buf1[3] = pws[gid].i[7];
|
||||
|
||||
const u32 pw_len = pws[gid].pw_len;
|
||||
|
||||
/**
|
||||
* digest
|
||||
*/
|
||||
|
||||
const u32 search[4] =
|
||||
{
|
||||
digests_buf[digests_offset].digest_buf[DGST_R0],
|
||||
digests_buf[digests_offset].digest_buf[DGST_R1],
|
||||
digests_buf[digests_offset].digest_buf[DGST_R2],
|
||||
digests_buf[digests_offset].digest_buf[DGST_R3]
|
||||
};
|
||||
|
||||
/**
|
||||
* loop
|
||||
*/
|
||||
|
||||
for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE)
|
||||
{
|
||||
u32x w0[4] = { 0 };
|
||||
u32x w1[4] = { 0 };
|
||||
u32x w2[4] = { 0 };
|
||||
u32x w3[4] = { 0 };
|
||||
|
||||
const u32x out_len = apply_rules_vect (pw_buf0, pw_buf1, pw_len, rules_buf, il_pos, w0, w1);
|
||||
|
||||
append_0x06_2x4_VV (w0, w1, out_len);
|
||||
|
||||
/**
|
||||
* Keccak
|
||||
*/
|
||||
|
||||
u64x a00 = hl32_to_64 (w0[1], w0[0]);
|
||||
u64x a01 = hl32_to_64 (w0[3], w0[2]);
|
||||
u64x a02 = hl32_to_64 (w1[1], w1[0]);
|
||||
u64x a03 = hl32_to_64 (w1[3], w1[2]);
|
||||
u64x a04 = 0;
|
||||
u64x a10 = 0;
|
||||
u64x a11 = 0;
|
||||
u64x a12 = 0;
|
||||
u64x a13 = 0;
|
||||
u64x a14 = 0;
|
||||
u64x a20 = 0;
|
||||
u64x a21 = 0;
|
||||
u64x a22 = 0;
|
||||
u64x a23 = 0;
|
||||
u64x a24 = 0;
|
||||
u64x a30 = 0;
|
||||
u64x a31 = 0;
|
||||
u64x a32 = 0x8000000000000000;
|
||||
u64x a33 = 0;
|
||||
u64x a34 = 0;
|
||||
u64x a40 = 0;
|
||||
u64x a41 = 0;
|
||||
u64x a42 = 0;
|
||||
u64x a43 = 0;
|
||||
u64x a44 = 0;
|
||||
|
||||
#define Rho_Pi(ad,r) \
|
||||
bc0 = ad; \
|
||||
ad = rotl64 (t, r); \
|
||||
t = bc0; \
|
||||
|
||||
#ifdef _unroll
|
||||
#pragma unroll
|
||||
#endif
|
||||
for (int round = 0; round < KECCAK_ROUNDS - 1; round++)
|
||||
{
|
||||
// Theta
|
||||
|
||||
u64x bc0 = a00 ^ a10 ^ a20 ^ a30 ^ a40;
|
||||
u64x bc1 = a01 ^ a11 ^ a21 ^ a31 ^ a41;
|
||||
u64x bc2 = a02 ^ a12 ^ a22 ^ a32 ^ a42;
|
||||
u64x bc3 = a03 ^ a13 ^ a23 ^ a33 ^ a43;
|
||||
u64x bc4 = a04 ^ a14 ^ a24 ^ a34 ^ a44;
|
||||
|
||||
u64x t;
|
||||
|
||||
t = bc4 ^ rotl64 (bc1, 1); a00 ^= t; a10 ^= t; a20 ^= t; a30 ^= t; a40 ^= t;
|
||||
t = bc0 ^ rotl64 (bc2, 1); a01 ^= t; a11 ^= t; a21 ^= t; a31 ^= t; a41 ^= t;
|
||||
t = bc1 ^ rotl64 (bc3, 1); a02 ^= t; a12 ^= t; a22 ^= t; a32 ^= t; a42 ^= t;
|
||||
t = bc2 ^ rotl64 (bc4, 1); a03 ^= t; a13 ^= t; a23 ^= t; a33 ^= t; a43 ^= t;
|
||||
t = bc3 ^ rotl64 (bc0, 1); a04 ^= t; a14 ^= t; a24 ^= t; a34 ^= t; a44 ^= t;
|
||||
|
||||
// Rho Pi
|
||||
|
||||
t = a01;
|
||||
|
||||
Rho_Pi (a20, 1);
|
||||
Rho_Pi (a12, 3);
|
||||
Rho_Pi (a21, 6);
|
||||
Rho_Pi (a32, 10);
|
||||
Rho_Pi (a33, 15);
|
||||
Rho_Pi (a03, 21);
|
||||
Rho_Pi (a10, 28);
|
||||
Rho_Pi (a31, 36);
|
||||
Rho_Pi (a13, 45);
|
||||
Rho_Pi (a41, 55);
|
||||
Rho_Pi (a44, 2);
|
||||
Rho_Pi (a04, 14);
|
||||
Rho_Pi (a30, 27);
|
||||
Rho_Pi (a43, 41);
|
||||
Rho_Pi (a34, 56);
|
||||
Rho_Pi (a23, 8);
|
||||
Rho_Pi (a22, 25);
|
||||
Rho_Pi (a02, 43);
|
||||
Rho_Pi (a40, 62);
|
||||
Rho_Pi (a24, 18);
|
||||
Rho_Pi (a42, 39);
|
||||
Rho_Pi (a14, 61);
|
||||
Rho_Pi (a11, 20);
|
||||
Rho_Pi (a01, 44);
|
||||
|
||||
// Chi
|
||||
|
||||
bc0 = a00; bc1 = a01; bc2 = a02; bc3 = a03; bc4 = a04;
|
||||
a00 ^= ~bc1 & bc2; a01 ^= ~bc2 & bc3; a02 ^= ~bc3 & bc4; a03 ^= ~bc4 & bc0; a04 ^= ~bc0 & bc1;
|
||||
|
||||
bc0 = a10; bc1 = a11; bc2 = a12; bc3 = a13; bc4 = a14;
|
||||
a10 ^= ~bc1 & bc2; a11 ^= ~bc2 & bc3; a12 ^= ~bc3 & bc4; a13 ^= ~bc4 & bc0; a14 ^= ~bc0 & bc1;
|
||||
|
||||
bc0 = a20; bc1 = a21; bc2 = a22; bc3 = a23; bc4 = a24;
|
||||
a20 ^= ~bc1 & bc2; a21 ^= ~bc2 & bc3; a22 ^= ~bc3 & bc4; a23 ^= ~bc4 & bc0; a24 ^= ~bc0 & bc1;
|
||||
|
||||
bc0 = a30; bc1 = a31; bc2 = a32; bc3 = a33; bc4 = a34;
|
||||
a30 ^= ~bc1 & bc2; a31 ^= ~bc2 & bc3; a32 ^= ~bc3 & bc4; a33 ^= ~bc4 & bc0; a34 ^= ~bc0 & bc1;
|
||||
|
||||
bc0 = a40; bc1 = a41; bc2 = a42; bc3 = a43; bc4 = a44;
|
||||
a40 ^= ~bc1 & bc2; a41 ^= ~bc2 & bc3; a42 ^= ~bc3 & bc4; a43 ^= ~bc4 & bc0; a44 ^= ~bc0 & bc1;
|
||||
|
||||
// Iota
|
||||
|
||||
a00 ^= keccakf_rndc[round];
|
||||
}
|
||||
|
||||
// Theta
|
||||
|
||||
u64x bc0 = a00 ^ a10 ^ a20 ^ a30 ^ a40;
|
||||
u64x bc1 = a01 ^ a11 ^ a21 ^ a31 ^ a41;
|
||||
u64x bc2 = a02 ^ a12 ^ a22 ^ a32 ^ a42;
|
||||
u64x bc3 = a03 ^ a13 ^ a23 ^ a33 ^ a43;
|
||||
u64x bc4 = a04 ^ a14 ^ a24 ^ a34 ^ a44;
|
||||
|
||||
u64x t;
|
||||
|
||||
t = bc4 ^ rotl64 (bc1, 1); a00 ^= t; a10 ^= t; a20 ^= t; a30 ^= t;
|
||||
t = bc0 ^ rotl64 (bc2, 1); a21 ^= t; a31 ^= t; a41 ^= t;
|
||||
t = bc1 ^ rotl64 (bc3, 1); a02 ^= t; a12 ^= t; a22 ^= t; a32 ^= t;
|
||||
t = bc2 ^ rotl64 (bc4, 1); a03 ^= t; a13 ^= t; a23 ^= t; a33 ^= t; a43 ^= t;
|
||||
t = bc3 ^ rotl64 (bc0, 1); a04 ^= t; a34 ^= t; a44 ^= t;
|
||||
|
||||
// Rho Pi
|
||||
|
||||
t = a01;
|
||||
|
||||
Rho_Pi (a20, 1);
|
||||
Rho_Pi (a12, 3);
|
||||
Rho_Pi (a21, 6);
|
||||
Rho_Pi (a32, 10);
|
||||
Rho_Pi (a33, 15);
|
||||
Rho_Pi (a03, 21);
|
||||
Rho_Pi (a10, 28);
|
||||
Rho_Pi (a31, 36);
|
||||
Rho_Pi (a13, 45);
|
||||
Rho_Pi (a41, 55);
|
||||
Rho_Pi (a44, 2);
|
||||
Rho_Pi (a04, 14);
|
||||
Rho_Pi (a30, 27);
|
||||
Rho_Pi (a43, 41);
|
||||
Rho_Pi (a34, 56);
|
||||
Rho_Pi (a23, 8);
|
||||
Rho_Pi (a22, 25);
|
||||
Rho_Pi (a02, 43);
|
||||
|
||||
#undef Rho_Pi
|
||||
|
||||
bc0 = a00;
|
||||
bc2 = a02;
|
||||
bc3 = a03;
|
||||
bc4 = a04;
|
||||
|
||||
a02 ^= ~bc3 & bc4;
|
||||
a03 ^= ~bc4 & bc0;
|
||||
|
||||
const u32x r0 = l32_from_64 (a03);
|
||||
const u32x r1 = 0;
|
||||
const u32x r2 = l32_from_64 (a02);
|
||||
const u32x r3 = h32_from_64 (a02);
|
||||
|
||||
COMPARE_S_SIMD (r0, r1, r2, r3);
|
||||
}
|
||||
}
|
||||
|
||||
__kernel void m17300_s08 (__global pw_t *pws, __constant const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
|
||||
{
|
||||
}
|
||||
|
||||
__kernel void m17300_s16 (__global pw_t *pws, __constant const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
|
||||
{
|
||||
}
|
594
OpenCL/m17300_a1-optimized.cl
Normal file
594
OpenCL/m17300_a1-optimized.cl
Normal file
@ -0,0 +1,594 @@
|
||||
/**
|
||||
* Author......: See docs/credits.txt
|
||||
* License.....: MIT
|
||||
*/
|
||||
|
||||
#define NEW_SIMD_CODE
|
||||
|
||||
#include "inc_vendor.cl"
|
||||
#include "inc_hash_constants.h"
|
||||
#include "inc_hash_functions.cl"
|
||||
#include "inc_types.cl"
|
||||
#include "inc_common.cl"
|
||||
#include "inc_simd.cl"
|
||||
|
||||
__constant u64a keccakf_rndc[24] =
|
||||
{
|
||||
0x0000000000000001, 0x0000000000008082, 0x800000000000808a,
|
||||
0x8000000080008000, 0x000000000000808b, 0x0000000080000001,
|
||||
0x8000000080008081, 0x8000000000008009, 0x000000000000008a,
|
||||
0x0000000000000088, 0x0000000080008009, 0x000000008000000a,
|
||||
0x000000008000808b, 0x800000000000008b, 0x8000000000008089,
|
||||
0x8000000000008003, 0x8000000000008002, 0x8000000000000080,
|
||||
0x000000000000800a, 0x800000008000000a, 0x8000000080008081,
|
||||
0x8000000000008080, 0x0000000080000001, 0x8000000080008008
|
||||
};
|
||||
|
||||
#ifndef KECCAK_ROUNDS
|
||||
#define KECCAK_ROUNDS 24
|
||||
#endif
|
||||
|
||||
__kernel void m17300_m04 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
|
||||
{
|
||||
/**
|
||||
* modifier
|
||||
*/
|
||||
|
||||
const u64 lid = get_local_id (0);
|
||||
|
||||
/**
|
||||
* base
|
||||
*/
|
||||
|
||||
const u64 gid = get_global_id (0);
|
||||
|
||||
if (gid >= gid_max) return;
|
||||
|
||||
u32 pw_buf0[4];
|
||||
u32 pw_buf1[4];
|
||||
|
||||
pw_buf0[0] = pws[gid].i[0];
|
||||
pw_buf0[1] = pws[gid].i[1];
|
||||
pw_buf0[2] = pws[gid].i[2];
|
||||
pw_buf0[3] = pws[gid].i[3];
|
||||
pw_buf1[0] = pws[gid].i[4];
|
||||
pw_buf1[1] = pws[gid].i[5];
|
||||
pw_buf1[2] = pws[gid].i[6];
|
||||
pw_buf1[3] = pws[gid].i[7];
|
||||
|
||||
const u32 pw_l_len = pws[gid].pw_len;
|
||||
|
||||
/**
|
||||
* loop
|
||||
*/
|
||||
|
||||
for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE)
|
||||
{
|
||||
const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos);
|
||||
|
||||
const u32x pw_len = pw_l_len + pw_r_len;
|
||||
|
||||
/**
|
||||
* concat password candidate
|
||||
*/
|
||||
|
||||
u32x wordl0[4] = { 0 };
|
||||
u32x wordl1[4] = { 0 };
|
||||
u32x wordl2[4] = { 0 };
|
||||
u32x wordl3[4] = { 0 };
|
||||
|
||||
wordl0[0] = pw_buf0[0];
|
||||
wordl0[1] = pw_buf0[1];
|
||||
wordl0[2] = pw_buf0[2];
|
||||
wordl0[3] = pw_buf0[3];
|
||||
wordl1[0] = pw_buf1[0];
|
||||
wordl1[1] = pw_buf1[1];
|
||||
wordl1[2] = pw_buf1[2];
|
||||
wordl1[3] = pw_buf1[3];
|
||||
|
||||
u32x wordr0[4] = { 0 };
|
||||
u32x wordr1[4] = { 0 };
|
||||
u32x wordr2[4] = { 0 };
|
||||
u32x wordr3[4] = { 0 };
|
||||
|
||||
wordr0[0] = ix_create_combt (combs_buf, il_pos, 0);
|
||||
wordr0[1] = ix_create_combt (combs_buf, il_pos, 1);
|
||||
wordr0[2] = ix_create_combt (combs_buf, il_pos, 2);
|
||||
wordr0[3] = ix_create_combt (combs_buf, il_pos, 3);
|
||||
wordr1[0] = ix_create_combt (combs_buf, il_pos, 4);
|
||||
wordr1[1] = ix_create_combt (combs_buf, il_pos, 5);
|
||||
wordr1[2] = ix_create_combt (combs_buf, il_pos, 6);
|
||||
wordr1[3] = ix_create_combt (combs_buf, il_pos, 7);
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
|
||||
{
|
||||
switch_buffer_by_offset_le_VV (wordr0, wordr1, wordr2, wordr3, pw_l_len);
|
||||
}
|
||||
else
|
||||
{
|
||||
switch_buffer_by_offset_le_VV (wordl0, wordl1, wordl2, wordl3, pw_r_len);
|
||||
}
|
||||
|
||||
u32x w0[4];
|
||||
u32x w1[4];
|
||||
u32x w2[4];
|
||||
u32x w3[4];
|
||||
|
||||
w0[0] = wordl0[0] | wordr0[0];
|
||||
w0[1] = wordl0[1] | wordr0[1];
|
||||
w0[2] = wordl0[2] | wordr0[2];
|
||||
w0[3] = wordl0[3] | wordr0[3];
|
||||
w1[0] = wordl1[0] | wordr1[0];
|
||||
w1[1] = wordl1[1] | wordr1[1];
|
||||
w1[2] = wordl1[2] | wordr1[2];
|
||||
w1[3] = wordl1[3] | wordr1[3];
|
||||
w2[0] = wordl2[0] | wordr2[0];
|
||||
w2[1] = wordl2[1] | wordr2[1];
|
||||
w2[2] = wordl2[2] | wordr2[2];
|
||||
w2[3] = wordl2[3] | wordr2[3];
|
||||
w3[0] = wordl3[0] | wordr3[0];
|
||||
w3[1] = wordl3[1] | wordr3[1];
|
||||
w3[2] = wordl3[2] | wordr3[2];
|
||||
w3[3] = wordl3[3] | wordr3[3];
|
||||
|
||||
/**
|
||||
* Keccak
|
||||
*/
|
||||
|
||||
u64x a00 = hl32_to_64 (w0[1], w0[0]);
|
||||
u64x a01 = hl32_to_64 (w0[3], w0[2]);
|
||||
u64x a02 = hl32_to_64 (w1[1], w1[0]);
|
||||
u64x a03 = hl32_to_64 (w1[3], w1[2]);
|
||||
u64x a04 = hl32_to_64 (w2[1], w2[0]);
|
||||
u64x a10 = hl32_to_64 (w2[3], w2[2]);
|
||||
u64x a11 = hl32_to_64 (w3[1], w3[0]);
|
||||
u64x a12 = hl32_to_64 (w3[3], w3[2]);
|
||||
u64x a13 = 0;
|
||||
u64x a14 = 0;
|
||||
u64x a20 = 0;
|
||||
u64x a21 = 0;
|
||||
u64x a22 = 0;
|
||||
u64x a23 = 0;
|
||||
u64x a24 = 0;
|
||||
u64x a30 = 0;
|
||||
u64x a31 = 0;
|
||||
u64x a32 = 0x8000000000000000;
|
||||
u64x a33 = 0;
|
||||
u64x a34 = 0;
|
||||
u64x a40 = 0;
|
||||
u64x a41 = 0;
|
||||
u64x a42 = 0;
|
||||
u64x a43 = 0;
|
||||
u64x a44 = 0;
|
||||
|
||||
#define Rho_Pi(ad,r) \
|
||||
bc0 = ad; \
|
||||
ad = rotl64 (t, r); \
|
||||
t = bc0; \
|
||||
|
||||
#ifdef _unroll
|
||||
#pragma unroll
|
||||
#endif
|
||||
for (int round = 0; round < KECCAK_ROUNDS - 1; round++)
|
||||
{
|
||||
// Theta
|
||||
|
||||
u64x bc0 = a00 ^ a10 ^ a20 ^ a30 ^ a40;
|
||||
u64x bc1 = a01 ^ a11 ^ a21 ^ a31 ^ a41;
|
||||
u64x bc2 = a02 ^ a12 ^ a22 ^ a32 ^ a42;
|
||||
u64x bc3 = a03 ^ a13 ^ a23 ^ a33 ^ a43;
|
||||
u64x bc4 = a04 ^ a14 ^ a24 ^ a34 ^ a44;
|
||||
|
||||
u64x t;
|
||||
|
||||
t = bc4 ^ rotl64 (bc1, 1); a00 ^= t; a10 ^= t; a20 ^= t; a30 ^= t; a40 ^= t;
|
||||
t = bc0 ^ rotl64 (bc2, 1); a01 ^= t; a11 ^= t; a21 ^= t; a31 ^= t; a41 ^= t;
|
||||
t = bc1 ^ rotl64 (bc3, 1); a02 ^= t; a12 ^= t; a22 ^= t; a32 ^= t; a42 ^= t;
|
||||
t = bc2 ^ rotl64 (bc4, 1); a03 ^= t; a13 ^= t; a23 ^= t; a33 ^= t; a43 ^= t;
|
||||
t = bc3 ^ rotl64 (bc0, 1); a04 ^= t; a14 ^= t; a24 ^= t; a34 ^= t; a44 ^= t;
|
||||
|
||||
// Rho Pi
|
||||
|
||||
t = a01;
|
||||
|
||||
Rho_Pi (a20, 1);
|
||||
Rho_Pi (a12, 3);
|
||||
Rho_Pi (a21, 6);
|
||||
Rho_Pi (a32, 10);
|
||||
Rho_Pi (a33, 15);
|
||||
Rho_Pi (a03, 21);
|
||||
Rho_Pi (a10, 28);
|
||||
Rho_Pi (a31, 36);
|
||||
Rho_Pi (a13, 45);
|
||||
Rho_Pi (a41, 55);
|
||||
Rho_Pi (a44, 2);
|
||||
Rho_Pi (a04, 14);
|
||||
Rho_Pi (a30, 27);
|
||||
Rho_Pi (a43, 41);
|
||||
Rho_Pi (a34, 56);
|
||||
Rho_Pi (a23, 8);
|
||||
Rho_Pi (a22, 25);
|
||||
Rho_Pi (a02, 43);
|
||||
Rho_Pi (a40, 62);
|
||||
Rho_Pi (a24, 18);
|
||||
Rho_Pi (a42, 39);
|
||||
Rho_Pi (a14, 61);
|
||||
Rho_Pi (a11, 20);
|
||||
Rho_Pi (a01, 44);
|
||||
|
||||
// Chi
|
||||
|
||||
bc0 = a00; bc1 = a01; bc2 = a02; bc3 = a03; bc4 = a04;
|
||||
a00 ^= ~bc1 & bc2; a01 ^= ~bc2 & bc3; a02 ^= ~bc3 & bc4; a03 ^= ~bc4 & bc0; a04 ^= ~bc0 & bc1;
|
||||
|
||||
bc0 = a10; bc1 = a11; bc2 = a12; bc3 = a13; bc4 = a14;
|
||||
a10 ^= ~bc1 & bc2; a11 ^= ~bc2 & bc3; a12 ^= ~bc3 & bc4; a13 ^= ~bc4 & bc0; a14 ^= ~bc0 & bc1;
|
||||
|
||||
bc0 = a20; bc1 = a21; bc2 = a22; bc3 = a23; bc4 = a24;
|
||||
a20 ^= ~bc1 & bc2; a21 ^= ~bc2 & bc3; a22 ^= ~bc3 & bc4; a23 ^= ~bc4 & bc0; a24 ^= ~bc0 & bc1;
|
||||
|
||||
bc0 = a30; bc1 = a31; bc2 = a32; bc3 = a33; bc4 = a34;
|
||||
a30 ^= ~bc1 & bc2; a31 ^= ~bc2 & bc3; a32 ^= ~bc3 & bc4; a33 ^= ~bc4 & bc0; a34 ^= ~bc0 & bc1;
|
||||
|
||||
bc0 = a40; bc1 = a41; bc2 = a42; bc3 = a43; bc4 = a44;
|
||||
a40 ^= ~bc1 & bc2; a41 ^= ~bc2 & bc3; a42 ^= ~bc3 & bc4; a43 ^= ~bc4 & bc0; a44 ^= ~bc0 & bc1;
|
||||
|
||||
// Iota
|
||||
|
||||
a00 ^= keccakf_rndc[round];
|
||||
}
|
||||
|
||||
// Theta
|
||||
|
||||
u64x bc0 = a00 ^ a10 ^ a20 ^ a30 ^ a40;
|
||||
u64x bc1 = a01 ^ a11 ^ a21 ^ a31 ^ a41;
|
||||
u64x bc2 = a02 ^ a12 ^ a22 ^ a32 ^ a42;
|
||||
u64x bc3 = a03 ^ a13 ^ a23 ^ a33 ^ a43;
|
||||
u64x bc4 = a04 ^ a14 ^ a24 ^ a34 ^ a44;
|
||||
|
||||
u64x t;
|
||||
|
||||
t = bc4 ^ rotl64 (bc1, 1); a00 ^= t; a10 ^= t; a20 ^= t; a30 ^= t;
|
||||
t = bc0 ^ rotl64 (bc2, 1); a21 ^= t; a31 ^= t; a41 ^= t;
|
||||
t = bc1 ^ rotl64 (bc3, 1); a02 ^= t; a12 ^= t; a22 ^= t; a32 ^= t;
|
||||
t = bc2 ^ rotl64 (bc4, 1); a03 ^= t; a13 ^= t; a23 ^= t; a33 ^= t; a43 ^= t;
|
||||
t = bc3 ^ rotl64 (bc0, 1); a04 ^= t; a34 ^= t; a44 ^= t;
|
||||
|
||||
// Rho Pi
|
||||
|
||||
t = a01;
|
||||
|
||||
Rho_Pi (a20, 1);
|
||||
Rho_Pi (a12, 3);
|
||||
Rho_Pi (a21, 6);
|
||||
Rho_Pi (a32, 10);
|
||||
Rho_Pi (a33, 15);
|
||||
Rho_Pi (a03, 21);
|
||||
Rho_Pi (a10, 28);
|
||||
Rho_Pi (a31, 36);
|
||||
Rho_Pi (a13, 45);
|
||||
Rho_Pi (a41, 55);
|
||||
Rho_Pi (a44, 2);
|
||||
Rho_Pi (a04, 14);
|
||||
Rho_Pi (a30, 27);
|
||||
Rho_Pi (a43, 41);
|
||||
Rho_Pi (a34, 56);
|
||||
Rho_Pi (a23, 8);
|
||||
Rho_Pi (a22, 25);
|
||||
Rho_Pi (a02, 43);
|
||||
|
||||
#undef Rho_Pi
|
||||
|
||||
bc0 = a00;
|
||||
bc2 = a02;
|
||||
bc3 = a03;
|
||||
bc4 = a04;
|
||||
|
||||
a02 ^= ~bc3 & bc4;
|
||||
a03 ^= ~bc4 & bc0;
|
||||
|
||||
const u32x r0 = l32_from_64 (a03);
|
||||
const u32x r1 = 0;
|
||||
const u32x r2 = l32_from_64 (a02);
|
||||
const u32x r3 = h32_from_64 (a02);
|
||||
|
||||
COMPARE_M_SIMD (r0, r1, r2, r3);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
__kernel void m17300_m08 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
|
||||
{
|
||||
}
|
||||
|
||||
__kernel void m17300_m16 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
|
||||
{
|
||||
}
|
||||
|
||||
__kernel void m17300_s04 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
|
||||
{
|
||||
/**
|
||||
* modifier
|
||||
*/
|
||||
|
||||
const u64 lid = get_local_id (0);
|
||||
|
||||
/**
|
||||
* base
|
||||
*/
|
||||
|
||||
const u64 gid = get_global_id (0);
|
||||
|
||||
if (gid >= gid_max) return;
|
||||
|
||||
u32 pw_buf0[4];
|
||||
u32 pw_buf1[4];
|
||||
|
||||
pw_buf0[0] = pws[gid].i[0];
|
||||
pw_buf0[1] = pws[gid].i[1];
|
||||
pw_buf0[2] = pws[gid].i[2];
|
||||
pw_buf0[3] = pws[gid].i[3];
|
||||
pw_buf1[0] = pws[gid].i[4];
|
||||
pw_buf1[1] = pws[gid].i[5];
|
||||
pw_buf1[2] = pws[gid].i[6];
|
||||
pw_buf1[3] = pws[gid].i[7];
|
||||
|
||||
const u32 pw_l_len = pws[gid].pw_len;
|
||||
|
||||
/**
|
||||
* digest
|
||||
*/
|
||||
|
||||
const u32 search[4] =
|
||||
{
|
||||
digests_buf[digests_offset].digest_buf[DGST_R0],
|
||||
digests_buf[digests_offset].digest_buf[DGST_R1],
|
||||
digests_buf[digests_offset].digest_buf[DGST_R2],
|
||||
digests_buf[digests_offset].digest_buf[DGST_R3]
|
||||
};
|
||||
|
||||
/**
|
||||
* loop
|
||||
*/
|
||||
|
||||
for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE)
|
||||
{
|
||||
const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos);
|
||||
|
||||
const u32x pw_len = pw_l_len + pw_r_len;
|
||||
|
||||
/**
|
||||
* concat password candidate
|
||||
*/
|
||||
|
||||
u32x wordl0[4] = { 0 };
|
||||
u32x wordl1[4] = { 0 };
|
||||
u32x wordl2[4] = { 0 };
|
||||
u32x wordl3[4] = { 0 };
|
||||
|
||||
wordl0[0] = pw_buf0[0];
|
||||
wordl0[1] = pw_buf0[1];
|
||||
wordl0[2] = pw_buf0[2];
|
||||
wordl0[3] = pw_buf0[3];
|
||||
wordl1[0] = pw_buf1[0];
|
||||
wordl1[1] = pw_buf1[1];
|
||||
wordl1[2] = pw_buf1[2];
|
||||
wordl1[3] = pw_buf1[3];
|
||||
|
||||
u32x wordr0[4] = { 0 };
|
||||
u32x wordr1[4] = { 0 };
|
||||
u32x wordr2[4] = { 0 };
|
||||
u32x wordr3[4] = { 0 };
|
||||
|
||||
wordr0[0] = ix_create_combt (combs_buf, il_pos, 0);
|
||||
wordr0[1] = ix_create_combt (combs_buf, il_pos, 1);
|
||||
wordr0[2] = ix_create_combt (combs_buf, il_pos, 2);
|
||||
wordr0[3] = ix_create_combt (combs_buf, il_pos, 3);
|
||||
wordr1[0] = ix_create_combt (combs_buf, il_pos, 4);
|
||||
wordr1[1] = ix_create_combt (combs_buf, il_pos, 5);
|
||||
wordr1[2] = ix_create_combt (combs_buf, il_pos, 6);
|
||||
wordr1[3] = ix_create_combt (combs_buf, il_pos, 7);
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
|
||||
{
|
||||
switch_buffer_by_offset_le_VV (wordr0, wordr1, wordr2, wordr3, pw_l_len);
|
||||
}
|
||||
else
|
||||
{
|
||||
switch_buffer_by_offset_le_VV (wordl0, wordl1, wordl2, wordl3, pw_r_len);
|
||||
}
|
||||
|
||||
u32x w0[4];
|
||||
u32x w1[4];
|
||||
u32x w2[4];
|
||||
u32x w3[4];
|
||||
|
||||
w0[0] = wordl0[0] | wordr0[0];
|
||||
w0[1] = wordl0[1] | wordr0[1];
|
||||
w0[2] = wordl0[2] | wordr0[2];
|
||||
w0[3] = wordl0[3] | wordr0[3];
|
||||
w1[0] = wordl1[0] | wordr1[0];
|
||||
w1[1] = wordl1[1] | wordr1[1];
|
||||
w1[2] = wordl1[2] | wordr1[2];
|
||||
w1[3] = wordl1[3] | wordr1[3];
|
||||
w2[0] = wordl2[0] | wordr2[0];
|
||||
w2[1] = wordl2[1] | wordr2[1];
|
||||
w2[2] = wordl2[2] | wordr2[2];
|
||||
w2[3] = wordl2[3] | wordr2[3];
|
||||
w3[0] = wordl3[0] | wordr3[0];
|
||||
w3[1] = wordl3[1] | wordr3[1];
|
||||
w3[2] = wordl3[2] | wordr3[2];
|
||||
w3[3] = wordl3[3] | wordr3[3];
|
||||
|
||||
/**
|
||||
* Keccak
|
||||
*/
|
||||
|
||||
u64x a00 = hl32_to_64 (w0[1], w0[0]);
|
||||
u64x a01 = hl32_to_64 (w0[3], w0[2]);
|
||||
u64x a02 = hl32_to_64 (w1[1], w1[0]);
|
||||
u64x a03 = hl32_to_64 (w1[3], w1[2]);
|
||||
u64x a04 = hl32_to_64 (w2[1], w2[0]);
|
||||
u64x a10 = hl32_to_64 (w2[3], w2[2]);
|
||||
u64x a11 = hl32_to_64 (w3[1], w3[0]);
|
||||
u64x a12 = hl32_to_64 (w3[3], w3[2]);
|
||||
u64x a13 = 0;
|
||||
u64x a14 = 0;
|
||||
u64x a20 = 0;
|
||||
u64x a21 = 0;
|
||||
u64x a22 = 0;
|
||||
u64x a23 = 0;
|
||||
u64x a24 = 0;
|
||||
u64x a30 = 0;
|
||||
u64x a31 = 0;
|
||||
u64x a32 = 0x8000000000000000;
|
||||
u64x a33 = 0;
|
||||
u64x a34 = 0;
|
||||
u64x a40 = 0;
|
||||
u64x a41 = 0;
|
||||
u64x a42 = 0;
|
||||
u64x a43 = 0;
|
||||
u64x a44 = 0;
|
||||
|
||||
#define Rho_Pi(ad,r) \
|
||||
bc0 = ad; \
|
||||
ad = rotl64 (t, r); \
|
||||
t = bc0; \
|
||||
|
||||
#ifdef _unroll
|
||||
#pragma unroll
|
||||
#endif
|
||||
for (int round = 0; round < KECCAK_ROUNDS - 1; round++)
|
||||
{
|
||||
// Theta
|
||||
|
||||
u64x bc0 = a00 ^ a10 ^ a20 ^ a30 ^ a40;
|
||||
u64x bc1 = a01 ^ a11 ^ a21 ^ a31 ^ a41;
|
||||
u64x bc2 = a02 ^ a12 ^ a22 ^ a32 ^ a42;
|
||||
u64x bc3 = a03 ^ a13 ^ a23 ^ a33 ^ a43;
|
||||
u64x bc4 = a04 ^ a14 ^ a24 ^ a34 ^ a44;
|
||||
|
||||
u64x t;
|
||||
|
||||
t = bc4 ^ rotl64 (bc1, 1); a00 ^= t; a10 ^= t; a20 ^= t; a30 ^= t; a40 ^= t;
|
||||
t = bc0 ^ rotl64 (bc2, 1); a01 ^= t; a11 ^= t; a21 ^= t; a31 ^= t; a41 ^= t;
|
||||
t = bc1 ^ rotl64 (bc3, 1); a02 ^= t; a12 ^= t; a22 ^= t; a32 ^= t; a42 ^= t;
|
||||
t = bc2 ^ rotl64 (bc4, 1); a03 ^= t; a13 ^= t; a23 ^= t; a33 ^= t; a43 ^= t;
|
||||
t = bc3 ^ rotl64 (bc0, 1); a04 ^= t; a14 ^= t; a24 ^= t; a34 ^= t; a44 ^= t;
|
||||
|
||||
// Rho Pi
|
||||
|
||||
t = a01;
|
||||
|
||||
Rho_Pi (a20, 1);
|
||||
Rho_Pi (a12, 3);
|
||||
Rho_Pi (a21, 6);
|
||||
Rho_Pi (a32, 10);
|
||||
Rho_Pi (a33, 15);
|
||||
Rho_Pi (a03, 21);
|
||||
Rho_Pi (a10, 28);
|
||||
Rho_Pi (a31, 36);
|
||||
Rho_Pi (a13, 45);
|
||||
Rho_Pi (a41, 55);
|
||||
Rho_Pi (a44, 2);
|
||||
Rho_Pi (a04, 14);
|
||||
Rho_Pi (a30, 27);
|
||||
Rho_Pi (a43, 41);
|
||||
Rho_Pi (a34, 56);
|
||||
Rho_Pi (a23, 8);
|
||||
Rho_Pi (a22, 25);
|
||||
Rho_Pi (a02, 43);
|
||||
Rho_Pi (a40, 62);
|
||||
Rho_Pi (a24, 18);
|
||||
Rho_Pi (a42, 39);
|
||||
Rho_Pi (a14, 61);
|
||||
Rho_Pi (a11, 20);
|
||||
Rho_Pi (a01, 44);
|
||||
|
||||
// Chi
|
||||
|
||||
bc0 = a00; bc1 = a01; bc2 = a02; bc3 = a03; bc4 = a04;
|
||||
a00 ^= ~bc1 & bc2; a01 ^= ~bc2 & bc3; a02 ^= ~bc3 & bc4; a03 ^= ~bc4 & bc0; a04 ^= ~bc0 & bc1;
|
||||
|
||||
bc0 = a10; bc1 = a11; bc2 = a12; bc3 = a13; bc4 = a14;
|
||||
a10 ^= ~bc1 & bc2; a11 ^= ~bc2 & bc3; a12 ^= ~bc3 & bc4; a13 ^= ~bc4 & bc0; a14 ^= ~bc0 & bc1;
|
||||
|
||||
bc0 = a20; bc1 = a21; bc2 = a22; bc3 = a23; bc4 = a24;
|
||||
a20 ^= ~bc1 & bc2; a21 ^= ~bc2 & bc3; a22 ^= ~bc3 & bc4; a23 ^= ~bc4 & bc0; a24 ^= ~bc0 & bc1;
|
||||
|
||||
bc0 = a30; bc1 = a31; bc2 = a32; bc3 = a33; bc4 = a34;
|
||||
a30 ^= ~bc1 & bc2; a31 ^= ~bc2 & bc3; a32 ^= ~bc3 & bc4; a33 ^= ~bc4 & bc0; a34 ^= ~bc0 & bc1;
|
||||
|
||||
bc0 = a40; bc1 = a41; bc2 = a42; bc3 = a43; bc4 = a44;
|
||||
a40 ^= ~bc1 & bc2; a41 ^= ~bc2 & bc3; a42 ^= ~bc3 & bc4; a43 ^= ~bc4 & bc0; a44 ^= ~bc0 & bc1;
|
||||
|
||||
// Iota
|
||||
|
||||
a00 ^= keccakf_rndc[round];
|
||||
}
|
||||
|
||||
// Theta
|
||||
|
||||
u64x bc0 = a00 ^ a10 ^ a20 ^ a30 ^ a40;
|
||||
u64x bc1 = a01 ^ a11 ^ a21 ^ a31 ^ a41;
|
||||
u64x bc2 = a02 ^ a12 ^ a22 ^ a32 ^ a42;
|
||||
u64x bc3 = a03 ^ a13 ^ a23 ^ a33 ^ a43;
|
||||
u64x bc4 = a04 ^ a14 ^ a24 ^ a34 ^ a44;
|
||||
|
||||
u64x t;
|
||||
|
||||
t = bc4 ^ rotl64 (bc1, 1); a00 ^= t; a10 ^= t; a20 ^= t; a30 ^= t;
|
||||
t = bc0 ^ rotl64 (bc2, 1); a21 ^= t; a31 ^= t; a41 ^= t;
|
||||
t = bc1 ^ rotl64 (bc3, 1); a02 ^= t; a12 ^= t; a22 ^= t; a32 ^= t;
|
||||
t = bc2 ^ rotl64 (bc4, 1); a03 ^= t; a13 ^= t; a23 ^= t; a33 ^= t; a43 ^= t;
|
||||
t = bc3 ^ rotl64 (bc0, 1); a04 ^= t; a34 ^= t; a44 ^= t;
|
||||
|
||||
// Rho Pi
|
||||
|
||||
t = a01;
|
||||
|
||||
Rho_Pi (a20, 1);
|
||||
Rho_Pi (a12, 3);
|
||||
Rho_Pi (a21, 6);
|
||||
Rho_Pi (a32, 10);
|
||||
Rho_Pi (a33, 15);
|
||||
Rho_Pi (a03, 21);
|
||||
Rho_Pi (a10, 28);
|
||||
Rho_Pi (a31, 36);
|
||||
Rho_Pi (a13, 45);
|
||||
Rho_Pi (a41, 55);
|
||||
Rho_Pi (a44, 2);
|
||||
Rho_Pi (a04, 14);
|
||||
Rho_Pi (a30, 27);
|
||||
Rho_Pi (a43, 41);
|
||||
Rho_Pi (a34, 56);
|
||||
Rho_Pi (a23, 8);
|
||||
Rho_Pi (a22, 25);
|
||||
Rho_Pi (a02, 43);
|
||||
|
||||
#undef Rho_Pi
|
||||
|
||||
bc0 = a00;
|
||||
bc2 = a02;
|
||||
bc3 = a03;
|
||||
bc4 = a04;
|
||||
|
||||
a02 ^= ~bc3 & bc4;
|
||||
a03 ^= ~bc4 & bc0;
|
||||
|
||||
const u32x r0 = l32_from_64 (a03);
|
||||
const u32x r1 = 0;
|
||||
const u32x r2 = l32_from_64 (a02);
|
||||
const u32x r3 = h32_from_64 (a02);
|
||||
|
||||
COMPARE_S_SIMD (r0, r1, r2, r3);
|
||||
}
|
||||
}
|
||||
|
||||
__kernel void m17300_s08 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
|
||||
{
|
||||
}
|
||||
|
||||
__kernel void m17300_s16 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
|
||||
{
|
||||
}
|
695
OpenCL/m17300_a3-optimized.cl
Normal file
695
OpenCL/m17300_a3-optimized.cl
Normal file
@ -0,0 +1,695 @@
|
||||
/**
|
||||
* Author......: See docs/credits.txt
|
||||
* License.....: MIT
|
||||
*/
|
||||
|
||||
#define NEW_SIMD_CODE
|
||||
|
||||
#include "inc_vendor.cl"
|
||||
#include "inc_hash_constants.h"
|
||||
#include "inc_hash_functions.cl"
|
||||
#include "inc_types.cl"
|
||||
#include "inc_common.cl"
|
||||
#include "inc_simd.cl"
|
||||
|
||||
__constant u64a keccakf_rndc[24] =
|
||||
{
|
||||
0x0000000000000001, 0x0000000000008082, 0x800000000000808a,
|
||||
0x8000000080008000, 0x000000000000808b, 0x0000000080000001,
|
||||
0x8000000080008081, 0x8000000000008009, 0x000000000000008a,
|
||||
0x0000000000000088, 0x0000000080008009, 0x000000008000000a,
|
||||
0x000000008000808b, 0x800000000000008b, 0x8000000000008089,
|
||||
0x8000000000008003, 0x8000000000008002, 0x8000000000000080,
|
||||
0x000000000000800a, 0x800000008000000a, 0x8000000080008081,
|
||||
0x8000000000008080, 0x0000000080000001, 0x8000000080008008
|
||||
};
|
||||
|
||||
#ifndef KECCAK_ROUNDS
|
||||
#define KECCAK_ROUNDS 24
|
||||
#endif
|
||||
|
||||
DECLSPEC void m17300m (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, __global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset)
|
||||
{
|
||||
/**
|
||||
* modifier
|
||||
*/
|
||||
|
||||
const u64 gid = get_global_id (0);
|
||||
const u64 lid = get_local_id (0);
|
||||
|
||||
/**
|
||||
* loop
|
||||
*/
|
||||
|
||||
u32 w0l = w0[0];
|
||||
|
||||
for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE)
|
||||
{
|
||||
const u32x w0r = ix_create_bft (bfs_buf, il_pos);
|
||||
|
||||
const u32x w0lr = w0l | w0r;
|
||||
|
||||
/**
|
||||
* Keccak
|
||||
*/
|
||||
|
||||
u64x a00 = hl32_to_64 (w0[1], w0lr);
|
||||
u64x a01 = hl32_to_64 (w0[3], w0[2]);
|
||||
u64x a02 = hl32_to_64 (w1[1], w1[0]);
|
||||
u64x a03 = hl32_to_64 (w1[3], w1[2]);
|
||||
u64x a04 = hl32_to_64 (w2[1], w2[0]);
|
||||
u64x a10 = hl32_to_64 (w2[3], w2[2]);
|
||||
u64x a11 = hl32_to_64 (w3[1], w3[0]);
|
||||
u64x a12 = hl32_to_64 (w3[3], w3[2]);
|
||||
u64x a13 = 0;
|
||||
u64x a14 = 0;
|
||||
u64x a20 = 0;
|
||||
u64x a21 = 0;
|
||||
u64x a22 = 0;
|
||||
u64x a23 = 0;
|
||||
u64x a24 = 0;
|
||||
u64x a30 = 0;
|
||||
u64x a31 = 0;
|
||||
u64x a32 = 0x8000000000000000;
|
||||
u64x a33 = 0;
|
||||
u64x a34 = 0;
|
||||
u64x a40 = 0;
|
||||
u64x a41 = 0;
|
||||
u64x a42 = 0;
|
||||
u64x a43 = 0;
|
||||
u64x a44 = 0;
|
||||
|
||||
#define Rho_Pi(ad,r) \
|
||||
bc0 = ad; \
|
||||
ad = rotl64 (t, r); \
|
||||
t = bc0; \
|
||||
|
||||
#ifdef _unroll
|
||||
#pragma unroll
|
||||
#endif
|
||||
for (int round = 0; round < KECCAK_ROUNDS - 1; round++)
|
||||
{
|
||||
// Theta
|
||||
|
||||
u64x bc0 = a00 ^ a10 ^ a20 ^ a30 ^ a40;
|
||||
u64x bc1 = a01 ^ a11 ^ a21 ^ a31 ^ a41;
|
||||
u64x bc2 = a02 ^ a12 ^ a22 ^ a32 ^ a42;
|
||||
u64x bc3 = a03 ^ a13 ^ a23 ^ a33 ^ a43;
|
||||
u64x bc4 = a04 ^ a14 ^ a24 ^ a34 ^ a44;
|
||||
|
||||
u64x t;
|
||||
|
||||
t = bc4 ^ rotl64 (bc1, 1); a00 ^= t; a10 ^= t; a20 ^= t; a30 ^= t; a40 ^= t;
|
||||
t = bc0 ^ rotl64 (bc2, 1); a01 ^= t; a11 ^= t; a21 ^= t; a31 ^= t; a41 ^= t;
|
||||
t = bc1 ^ rotl64 (bc3, 1); a02 ^= t; a12 ^= t; a22 ^= t; a32 ^= t; a42 ^= t;
|
||||
t = bc2 ^ rotl64 (bc4, 1); a03 ^= t; a13 ^= t; a23 ^= t; a33 ^= t; a43 ^= t;
|
||||
t = bc3 ^ rotl64 (bc0, 1); a04 ^= t; a14 ^= t; a24 ^= t; a34 ^= t; a44 ^= t;
|
||||
|
||||
// Rho Pi
|
||||
|
||||
t = a01;
|
||||
|
||||
Rho_Pi (a20, 1);
|
||||
Rho_Pi (a12, 3);
|
||||
Rho_Pi (a21, 6);
|
||||
Rho_Pi (a32, 10);
|
||||
Rho_Pi (a33, 15);
|
||||
Rho_Pi (a03, 21);
|
||||
Rho_Pi (a10, 28);
|
||||
Rho_Pi (a31, 36);
|
||||
Rho_Pi (a13, 45);
|
||||
Rho_Pi (a41, 55);
|
||||
Rho_Pi (a44, 2);
|
||||
Rho_Pi (a04, 14);
|
||||
Rho_Pi (a30, 27);
|
||||
Rho_Pi (a43, 41);
|
||||
Rho_Pi (a34, 56);
|
||||
Rho_Pi (a23, 8);
|
||||
Rho_Pi (a22, 25);
|
||||
Rho_Pi (a02, 43);
|
||||
Rho_Pi (a40, 62);
|
||||
Rho_Pi (a24, 18);
|
||||
Rho_Pi (a42, 39);
|
||||
Rho_Pi (a14, 61);
|
||||
Rho_Pi (a11, 20);
|
||||
Rho_Pi (a01, 44);
|
||||
|
||||
// Chi
|
||||
|
||||
bc0 = a00; bc1 = a01; bc2 = a02; bc3 = a03; bc4 = a04;
|
||||
a00 ^= ~bc1 & bc2; a01 ^= ~bc2 & bc3; a02 ^= ~bc3 & bc4; a03 ^= ~bc4 & bc0; a04 ^= ~bc0 & bc1;
|
||||
|
||||
bc0 = a10; bc1 = a11; bc2 = a12; bc3 = a13; bc4 = a14;
|
||||
a10 ^= ~bc1 & bc2; a11 ^= ~bc2 & bc3; a12 ^= ~bc3 & bc4; a13 ^= ~bc4 & bc0; a14 ^= ~bc0 & bc1;
|
||||
|
||||
bc0 = a20; bc1 = a21; bc2 = a22; bc3 = a23; bc4 = a24;
|
||||
a20 ^= ~bc1 & bc2; a21 ^= ~bc2 & bc3; a22 ^= ~bc3 & bc4; a23 ^= ~bc4 & bc0; a24 ^= ~bc0 & bc1;
|
||||
|
||||
bc0 = a30; bc1 = a31; bc2 = a32; bc3 = a33; bc4 = a34;
|
||||
a30 ^= ~bc1 & bc2; a31 ^= ~bc2 & bc3; a32 ^= ~bc3 & bc4; a33 ^= ~bc4 & bc0; a34 ^= ~bc0 & bc1;
|
||||
|
||||
bc0 = a40; bc1 = a41; bc2 = a42; bc3 = a43; bc4 = a44;
|
||||
a40 ^= ~bc1 & bc2; a41 ^= ~bc2 & bc3; a42 ^= ~bc3 & bc4; a43 ^= ~bc4 & bc0; a44 ^= ~bc0 & bc1;
|
||||
|
||||
// Iota
|
||||
|
||||
a00 ^= keccakf_rndc[round];
|
||||
}
|
||||
|
||||
// Theta
|
||||
|
||||
u64x bc0 = a00 ^ a10 ^ a20 ^ a30 ^ a40;
|
||||
u64x bc1 = a01 ^ a11 ^ a21 ^ a31 ^ a41;
|
||||
u64x bc2 = a02 ^ a12 ^ a22 ^ a32 ^ a42;
|
||||
u64x bc3 = a03 ^ a13 ^ a23 ^ a33 ^ a43;
|
||||
u64x bc4 = a04 ^ a14 ^ a24 ^ a34 ^ a44;
|
||||
|
||||
u64x t;
|
||||
|
||||
t = bc4 ^ rotl64 (bc1, 1); a00 ^= t; a10 ^= t; a20 ^= t; a30 ^= t;
|
||||
t = bc0 ^ rotl64 (bc2, 1); a21 ^= t; a31 ^= t; a41 ^= t;
|
||||
t = bc1 ^ rotl64 (bc3, 1); a02 ^= t; a12 ^= t; a22 ^= t; a32 ^= t;
|
||||
t = bc2 ^ rotl64 (bc4, 1); a03 ^= t; a13 ^= t; a23 ^= t; a33 ^= t; a43 ^= t;
|
||||
t = bc3 ^ rotl64 (bc0, 1); a04 ^= t; a34 ^= t; a44 ^= t;
|
||||
|
||||
// Rho Pi
|
||||
|
||||
t = a01;
|
||||
|
||||
Rho_Pi (a20, 1);
|
||||
Rho_Pi (a12, 3);
|
||||
Rho_Pi (a21, 6);
|
||||
Rho_Pi (a32, 10);
|
||||
Rho_Pi (a33, 15);
|
||||
Rho_Pi (a03, 21);
|
||||
Rho_Pi (a10, 28);
|
||||
Rho_Pi (a31, 36);
|
||||
Rho_Pi (a13, 45);
|
||||
Rho_Pi (a41, 55);
|
||||
Rho_Pi (a44, 2);
|
||||
Rho_Pi (a04, 14);
|
||||
Rho_Pi (a30, 27);
|
||||
Rho_Pi (a43, 41);
|
||||
Rho_Pi (a34, 56);
|
||||
Rho_Pi (a23, 8);
|
||||
Rho_Pi (a22, 25);
|
||||
Rho_Pi (a02, 43);
|
||||
|
||||
#undef Rho_Pi
|
||||
|
||||
bc0 = a00;
|
||||
bc2 = a02;
|
||||
bc3 = a03;
|
||||
bc4 = a04;
|
||||
|
||||
a02 ^= ~bc3 & bc4;
|
||||
a03 ^= ~bc4 & bc0;
|
||||
|
||||
const u32x r0 = l32_from_64 (a03);
|
||||
const u32x r1 = 0;
|
||||
const u32x r2 = l32_from_64 (a02);
|
||||
const u32x r3 = h32_from_64 (a02);
|
||||
|
||||
COMPARE_M_SIMD (r0, r1, r2, r3);
|
||||
}
|
||||
}
|
||||
|
||||
DECLSPEC void m17300s (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, __global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset)
|
||||
{
|
||||
/**
|
||||
* modifier
|
||||
*/
|
||||
|
||||
const u64 gid = get_global_id (0);
|
||||
const u64 lid = get_local_id (0);
|
||||
|
||||
/**
|
||||
* digest
|
||||
*/
|
||||
|
||||
const u32 search[4] =
|
||||
{
|
||||
digests_buf[digests_offset].digest_buf[DGST_R0],
|
||||
digests_buf[digests_offset].digest_buf[DGST_R1],
|
||||
digests_buf[digests_offset].digest_buf[DGST_R2],
|
||||
digests_buf[digests_offset].digest_buf[DGST_R3]
|
||||
};
|
||||
|
||||
/**
|
||||
* loop
|
||||
*/
|
||||
|
||||
u32 w0l = w0[0];
|
||||
|
||||
for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE)
|
||||
{
|
||||
const u32x w0r = ix_create_bft (bfs_buf, il_pos);
|
||||
|
||||
const u32x w0lr = w0l | w0r;
|
||||
|
||||
/**
|
||||
* Keccak
|
||||
*/
|
||||
|
||||
u64x a00 = hl32_to_64 (w0[1], w0lr);
|
||||
u64x a01 = hl32_to_64 (w0[3], w0[2]);
|
||||
u64x a02 = hl32_to_64 (w1[1], w1[0]);
|
||||
u64x a03 = hl32_to_64 (w1[3], w1[2]);
|
||||
u64x a04 = hl32_to_64 (w2[1], w2[0]);
|
||||
u64x a10 = hl32_to_64 (w2[3], w2[2]);
|
||||
u64x a11 = hl32_to_64 (w3[1], w3[0]);
|
||||
u64x a12 = hl32_to_64 (w3[3], w3[2]);
|
||||
u64x a13 = 0;
|
||||
u64x a14 = 0;
|
||||
u64x a20 = 0;
|
||||
u64x a21 = 0;
|
||||
u64x a22 = 0;
|
||||
u64x a23 = 0;
|
||||
u64x a24 = 0;
|
||||
u64x a30 = 0;
|
||||
u64x a31 = 0;
|
||||
u64x a32 = 0x8000000000000000;
|
||||
u64x a33 = 0;
|
||||
u64x a34 = 0;
|
||||
u64x a40 = 0;
|
||||
u64x a41 = 0;
|
||||
u64x a42 = 0;
|
||||
u64x a43 = 0;
|
||||
u64x a44 = 0;
|
||||
|
||||
#define Rho_Pi(ad,r) \
|
||||
bc0 = ad; \
|
||||
ad = rotl64 (t, r); \
|
||||
t = bc0; \
|
||||
|
||||
#ifdef _unroll
|
||||
#pragma unroll
|
||||
#endif
|
||||
for (int round = 0; round < KECCAK_ROUNDS - 1; round++)
|
||||
{
|
||||
// Theta
|
||||
|
||||
u64x bc0 = a00 ^ a10 ^ a20 ^ a30 ^ a40;
|
||||
u64x bc1 = a01 ^ a11 ^ a21 ^ a31 ^ a41;
|
||||
u64x bc2 = a02 ^ a12 ^ a22 ^ a32 ^ a42;
|
||||
u64x bc3 = a03 ^ a13 ^ a23 ^ a33 ^ a43;
|
||||
u64x bc4 = a04 ^ a14 ^ a24 ^ a34 ^ a44;
|
||||
|
||||
u64x t;
|
||||
|
||||
t = bc4 ^ rotl64 (bc1, 1); a00 ^= t; a10 ^= t; a20 ^= t; a30 ^= t; a40 ^= t;
|
||||
t = bc0 ^ rotl64 (bc2, 1); a01 ^= t; a11 ^= t; a21 ^= t; a31 ^= t; a41 ^= t;
|
||||
t = bc1 ^ rotl64 (bc3, 1); a02 ^= t; a12 ^= t; a22 ^= t; a32 ^= t; a42 ^= t;
|
||||
t = bc2 ^ rotl64 (bc4, 1); a03 ^= t; a13 ^= t; a23 ^= t; a33 ^= t; a43 ^= t;
|
||||
t = bc3 ^ rotl64 (bc0, 1); a04 ^= t; a14 ^= t; a24 ^= t; a34 ^= t; a44 ^= t;
|
||||
|
||||
// Rho Pi
|
||||
|
||||
t = a01;
|
||||
|
||||
Rho_Pi (a20, 1);
|
||||
Rho_Pi (a12, 3);
|
||||
Rho_Pi (a21, 6);
|
||||
Rho_Pi (a32, 10);
|
||||
Rho_Pi (a33, 15);
|
||||
Rho_Pi (a03, 21);
|
||||
Rho_Pi (a10, 28);
|
||||
Rho_Pi (a31, 36);
|
||||
Rho_Pi (a13, 45);
|
||||
Rho_Pi (a41, 55);
|
||||
Rho_Pi (a44, 2);
|
||||
Rho_Pi (a04, 14);
|
||||
Rho_Pi (a30, 27);
|
||||
Rho_Pi (a43, 41);
|
||||
Rho_Pi (a34, 56);
|
||||
Rho_Pi (a23, 8);
|
||||
Rho_Pi (a22, 25);
|
||||
Rho_Pi (a02, 43);
|
||||
Rho_Pi (a40, 62);
|
||||
Rho_Pi (a24, 18);
|
||||
Rho_Pi (a42, 39);
|
||||
Rho_Pi (a14, 61);
|
||||
Rho_Pi (a11, 20);
|
||||
Rho_Pi (a01, 44);
|
||||
|
||||
// Chi
|
||||
|
||||
bc0 = a00; bc1 = a01; bc2 = a02; bc3 = a03; bc4 = a04;
|
||||
a00 ^= ~bc1 & bc2; a01 ^= ~bc2 & bc3; a02 ^= ~bc3 & bc4; a03 ^= ~bc4 & bc0; a04 ^= ~bc0 & bc1;
|
||||
|
||||
bc0 = a10; bc1 = a11; bc2 = a12; bc3 = a13; bc4 = a14;
|
||||
a10 ^= ~bc1 & bc2; a11 ^= ~bc2 & bc3; a12 ^= ~bc3 & bc4; a13 ^= ~bc4 & bc0; a14 ^= ~bc0 & bc1;
|
||||
|
||||
bc0 = a20; bc1 = a21; bc2 = a22; bc3 = a23; bc4 = a24;
|
||||
a20 ^= ~bc1 & bc2; a21 ^= ~bc2 & bc3; a22 ^= ~bc3 & bc4; a23 ^= ~bc4 & bc0; a24 ^= ~bc0 & bc1;
|
||||
|
||||
bc0 = a30; bc1 = a31; bc2 = a32; bc3 = a33; bc4 = a34;
|
||||
a30 ^= ~bc1 & bc2; a31 ^= ~bc2 & bc3; a32 ^= ~bc3 & bc4; a33 ^= ~bc4 & bc0; a34 ^= ~bc0 & bc1;
|
||||
|
||||
bc0 = a40; bc1 = a41; bc2 = a42; bc3 = a43; bc4 = a44;
|
||||
a40 ^= ~bc1 & bc2; a41 ^= ~bc2 & bc3; a42 ^= ~bc3 & bc4; a43 ^= ~bc4 & bc0; a44 ^= ~bc0 & bc1;
|
||||
|
||||
// Iota
|
||||
|
||||
a00 ^= keccakf_rndc[round];
|
||||
}
|
||||
|
||||
// Theta
|
||||
|
||||
u64x bc0 = a00 ^ a10 ^ a20 ^ a30 ^ a40;
|
||||
u64x bc1 = a01 ^ a11 ^ a21 ^ a31 ^ a41;
|
||||
u64x bc2 = a02 ^ a12 ^ a22 ^ a32 ^ a42;
|
||||
u64x bc3 = a03 ^ a13 ^ a23 ^ a33 ^ a43;
|
||||
u64x bc4 = a04 ^ a14 ^ a24 ^ a34 ^ a44;
|
||||
|
||||
u64x t;
|
||||
|
||||
t = bc4 ^ rotl64 (bc1, 1); a00 ^= t; a10 ^= t; a20 ^= t; a30 ^= t;
|
||||
t = bc0 ^ rotl64 (bc2, 1); a21 ^= t; a31 ^= t; a41 ^= t;
|
||||
t = bc1 ^ rotl64 (bc3, 1); a02 ^= t; a12 ^= t; a22 ^= t; a32 ^= t;
|
||||
t = bc2 ^ rotl64 (bc4, 1); a03 ^= t; a13 ^= t; a23 ^= t; a33 ^= t; a43 ^= t;
|
||||
t = bc3 ^ rotl64 (bc0, 1); a04 ^= t; a34 ^= t; a44 ^= t;
|
||||
|
||||
// Rho Pi
|
||||
|
||||
t = a01;
|
||||
|
||||
Rho_Pi (a20, 1);
|
||||
Rho_Pi (a12, 3);
|
||||
Rho_Pi (a21, 6);
|
||||
Rho_Pi (a32, 10);
|
||||
Rho_Pi (a33, 15);
|
||||
Rho_Pi (a03, 21);
|
||||
Rho_Pi (a10, 28);
|
||||
Rho_Pi (a31, 36);
|
||||
Rho_Pi (a13, 45);
|
||||
Rho_Pi (a41, 55);
|
||||
Rho_Pi (a44, 2);
|
||||
Rho_Pi (a04, 14);
|
||||
Rho_Pi (a30, 27);
|
||||
Rho_Pi (a43, 41);
|
||||
Rho_Pi (a34, 56);
|
||||
Rho_Pi (a23, 8);
|
||||
Rho_Pi (a22, 25);
|
||||
Rho_Pi (a02, 43);
|
||||
|
||||
#undef Rho_Pi
|
||||
|
||||
bc0 = a00;
|
||||
bc2 = a02;
|
||||
bc3 = a03;
|
||||
bc4 = a04;
|
||||
|
||||
a02 ^= ~bc3 & bc4;
|
||||
a03 ^= ~bc4 & bc0;
|
||||
|
||||
const u32x r0 = l32_from_64 (a03);
|
||||
const u32x r1 = 0;
|
||||
const u32x r2 = l32_from_64 (a02);
|
||||
const u32x r3 = h32_from_64 (a02);
|
||||
|
||||
COMPARE_S_SIMD (r0, r1, r2, r3);
|
||||
}
|
||||
}
|
||||
|
||||
__kernel void m17300_m04 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
|
||||
{
|
||||
/**
|
||||
* base
|
||||
*/
|
||||
|
||||
const u64 gid = get_global_id (0);
|
||||
|
||||
if (gid >= gid_max) return;
|
||||
|
||||
u32 w0[4];
|
||||
|
||||
w0[0] = pws[gid].i[ 0];
|
||||
w0[1] = pws[gid].i[ 1];
|
||||
w0[2] = pws[gid].i[ 2];
|
||||
w0[3] = pws[gid].i[ 3];
|
||||
|
||||
u32 w1[4];
|
||||
|
||||
w1[0] = 0;
|
||||
w1[1] = 0;
|
||||
w1[2] = 0;
|
||||
w1[3] = 0;
|
||||
|
||||
u32 w2[4];
|
||||
|
||||
w2[0] = 0;
|
||||
w2[1] = 0;
|
||||
w2[2] = 0;
|
||||
w2[3] = 0;
|
||||
|
||||
u32 w3[4];
|
||||
|
||||
w3[0] = 0;
|
||||
w3[1] = 0;
|
||||
w3[2] = 0;
|
||||
w3[3] = 0;
|
||||
|
||||
const u32 pw_len = pws[gid].pw_len;
|
||||
|
||||
/**
|
||||
* main
|
||||
*/
|
||||
|
||||
m17300m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV0_buf, d_scryptV1_buf, d_scryptV2_buf, d_scryptV3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset);
|
||||
}
|
||||
|
||||
__kernel void m17300_m08 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
|
||||
{
|
||||
/**
|
||||
* base
|
||||
*/
|
||||
|
||||
const u64 gid = get_global_id (0);
|
||||
|
||||
if (gid >= gid_max) return;
|
||||
|
||||
u32 w0[4];
|
||||
|
||||
w0[0] = pws[gid].i[ 0];
|
||||
w0[1] = pws[gid].i[ 1];
|
||||
w0[2] = pws[gid].i[ 2];
|
||||
w0[3] = pws[gid].i[ 3];
|
||||
|
||||
u32 w1[4];
|
||||
|
||||
w1[0] = pws[gid].i[ 4];
|
||||
w1[1] = pws[gid].i[ 5];
|
||||
w1[2] = pws[gid].i[ 6];
|
||||
w1[3] = pws[gid].i[ 7];
|
||||
|
||||
u32 w2[4];
|
||||
|
||||
w2[0] = 0;
|
||||
w2[1] = 0;
|
||||
w2[2] = 0;
|
||||
w2[3] = 0;
|
||||
|
||||
u32 w3[4];
|
||||
|
||||
w3[0] = 0;
|
||||
w3[1] = 0;
|
||||
w3[2] = 0;
|
||||
w3[3] = 0;
|
||||
|
||||
const u32 pw_len = pws[gid].pw_len;
|
||||
|
||||
/**
|
||||
* main
|
||||
*/
|
||||
|
||||
m17300m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV0_buf, d_scryptV1_buf, d_scryptV2_buf, d_scryptV3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset);
|
||||
}
|
||||
|
||||
__kernel void m17300_m16 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
|
||||
{
|
||||
/**
|
||||
* base
|
||||
*/
|
||||
|
||||
const u64 gid = get_global_id (0);
|
||||
|
||||
if (gid >= gid_max) return;
|
||||
|
||||
u32 w0[4];
|
||||
|
||||
w0[0] = pws[gid].i[ 0];
|
||||
w0[1] = pws[gid].i[ 1];
|
||||
w0[2] = pws[gid].i[ 2];
|
||||
w0[3] = pws[gid].i[ 3];
|
||||
|
||||
u32 w1[4];
|
||||
|
||||
w1[0] = pws[gid].i[ 4];
|
||||
w1[1] = pws[gid].i[ 5];
|
||||
w1[2] = pws[gid].i[ 6];
|
||||
w1[3] = pws[gid].i[ 7];
|
||||
|
||||
u32 w2[4];
|
||||
|
||||
w2[0] = pws[gid].i[ 8];
|
||||
w2[1] = pws[gid].i[ 9];
|
||||
w2[2] = pws[gid].i[10];
|
||||
w2[3] = pws[gid].i[11];
|
||||
|
||||
u32 w3[4];
|
||||
|
||||
w3[0] = pws[gid].i[12];
|
||||
w3[1] = pws[gid].i[13];
|
||||
w3[2] = 0;
|
||||
w3[3] = 0;
|
||||
|
||||
const u32 pw_len = pws[gid].pw_len;
|
||||
|
||||
/**
|
||||
* main
|
||||
*/
|
||||
|
||||
m17300m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV0_buf, d_scryptV1_buf, d_scryptV2_buf, d_scryptV3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset);
|
||||
}
|
||||
|
||||
__kernel void m17300_s04 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
|
||||
{
|
||||
/**
|
||||
* base
|
||||
*/
|
||||
|
||||
const u64 gid = get_global_id (0);
|
||||
|
||||
if (gid >= gid_max) return;
|
||||
|
||||
u32 w0[4];
|
||||
|
||||
w0[0] = pws[gid].i[ 0];
|
||||
w0[1] = pws[gid].i[ 1];
|
||||
w0[2] = pws[gid].i[ 2];
|
||||
w0[3] = pws[gid].i[ 3];
|
||||
|
||||
u32 w1[4];
|
||||
|
||||
w1[0] = 0;
|
||||
w1[1] = 0;
|
||||
w1[2] = 0;
|
||||
w1[3] = 0;
|
||||
|
||||
u32 w2[4];
|
||||
|
||||
w2[0] = 0;
|
||||
w2[1] = 0;
|
||||
w2[2] = 0;
|
||||
w2[3] = 0;
|
||||
|
||||
u32 w3[4];
|
||||
|
||||
w3[0] = 0;
|
||||
w3[1] = 0;
|
||||
w3[2] = 0;
|
||||
w3[3] = 0;
|
||||
|
||||
const u32 pw_len = pws[gid].pw_len;
|
||||
|
||||
/**
|
||||
* main
|
||||
*/
|
||||
|
||||
m17300s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV0_buf, d_scryptV1_buf, d_scryptV2_buf, d_scryptV3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset);
|
||||
}
|
||||
|
||||
__kernel void m17300_s08 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
|
||||
{
|
||||
/**
|
||||
* base
|
||||
*/
|
||||
|
||||
const u64 gid = get_global_id (0);
|
||||
|
||||
if (gid >= gid_max) return;
|
||||
|
||||
u32 w0[4];
|
||||
|
||||
w0[0] = pws[gid].i[ 0];
|
||||
w0[1] = pws[gid].i[ 1];
|
||||
w0[2] = pws[gid].i[ 2];
|
||||
w0[3] = pws[gid].i[ 3];
|
||||
|
||||
u32 w1[4];
|
||||
|
||||
w1[0] = pws[gid].i[ 4];
|
||||
w1[1] = pws[gid].i[ 5];
|
||||
w1[2] = pws[gid].i[ 6];
|
||||
w1[3] = pws[gid].i[ 7];
|
||||
|
||||
u32 w2[4];
|
||||
|
||||
w2[0] = 0;
|
||||
w2[1] = 0;
|
||||
w2[2] = 0;
|
||||
w2[3] = 0;
|
||||
|
||||
u32 w3[4];
|
||||
|
||||
w3[0] = 0;
|
||||
w3[1] = 0;
|
||||
w3[2] = 0;
|
||||
w3[3] = 0;
|
||||
|
||||
const u32 pw_len = pws[gid].pw_len;
|
||||
|
||||
/**
|
||||
* main
|
||||
*/
|
||||
|
||||
m17300s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV0_buf, d_scryptV1_buf, d_scryptV2_buf, d_scryptV3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset);
|
||||
}
|
||||
|
||||
__kernel void m17300_s16 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
|
||||
{
|
||||
/**
|
||||
* base
|
||||
*/
|
||||
|
||||
const u64 gid = get_global_id (0);
|
||||
|
||||
if (gid >= gid_max) return;
|
||||
|
||||
u32 w0[4];
|
||||
|
||||
w0[0] = pws[gid].i[ 0];
|
||||
w0[1] = pws[gid].i[ 1];
|
||||
w0[2] = pws[gid].i[ 2];
|
||||
w0[3] = pws[gid].i[ 3];
|
||||
|
||||
u32 w1[4];
|
||||
|
||||
w1[0] = pws[gid].i[ 4];
|
||||
w1[1] = pws[gid].i[ 5];
|
||||
w1[2] = pws[gid].i[ 6];
|
||||
w1[3] = pws[gid].i[ 7];
|
||||
|
||||
u32 w2[4];
|
||||
|
||||
w2[0] = pws[gid].i[ 8];
|
||||
w2[1] = pws[gid].i[ 9];
|
||||
w2[2] = pws[gid].i[10];
|
||||
w2[3] = pws[gid].i[11];
|
||||
|
||||
u32 w3[4];
|
||||
|
||||
w3[0] = pws[gid].i[12];
|
||||
w3[1] = pws[gid].i[13];
|
||||
w3[2] = 0;
|
||||
w3[3] = 0;
|
||||
|
||||
const u32 pw_len = pws[gid].pw_len;
|
||||
|
||||
/**
|
||||
* main
|
||||
*/
|
||||
|
||||
m17300s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV0_buf, d_scryptV1_buf, d_scryptV2_buf, d_scryptV3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset);
|
||||
}
|
479
OpenCL/m17400_a0-optimized.cl
Normal file
479
OpenCL/m17400_a0-optimized.cl
Normal file
@ -0,0 +1,479 @@
|
||||
/**
|
||||
* Author......: See docs/credits.txt
|
||||
* License.....: MIT
|
||||
*/
|
||||
|
||||
#define NEW_SIMD_CODE
|
||||
|
||||
#include "inc_vendor.cl"
|
||||
#include "inc_hash_constants.h"
|
||||
#include "inc_hash_functions.cl"
|
||||
#include "inc_types.cl"
|
||||
#include "inc_common.cl"
|
||||
#include "inc_rp_optimized.h"
|
||||
#include "inc_rp_optimized.cl"
|
||||
#include "inc_simd.cl"
|
||||
|
||||
__constant u64a keccakf_rndc[24] =
|
||||
{
|
||||
0x0000000000000001, 0x0000000000008082, 0x800000000000808a,
|
||||
0x8000000080008000, 0x000000000000808b, 0x0000000080000001,
|
||||
0x8000000080008081, 0x8000000000008009, 0x000000000000008a,
|
||||
0x0000000000000088, 0x0000000080008009, 0x000000008000000a,
|
||||
0x000000008000808b, 0x800000000000008b, 0x8000000000008089,
|
||||
0x8000000000008003, 0x8000000000008002, 0x8000000000000080,
|
||||
0x000000000000800a, 0x800000008000000a, 0x8000000080008081,
|
||||
0x8000000000008080, 0x0000000080000001, 0x8000000080008008
|
||||
};
|
||||
|
||||
#ifndef KECCAK_ROUNDS
|
||||
#define KECCAK_ROUNDS 24
|
||||
#endif
|
||||
|
||||
__kernel void m17400_m04 (__global pw_t *pws, __constant const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
|
||||
{
|
||||
/**
|
||||
* modifier
|
||||
*/
|
||||
|
||||
const u64 lid = get_local_id (0);
|
||||
|
||||
/**
|
||||
* base
|
||||
*/
|
||||
|
||||
const u64 gid = get_global_id (0);
|
||||
|
||||
if (gid >= gid_max) return;
|
||||
|
||||
u32 pw_buf0[4];
|
||||
u32 pw_buf1[4];
|
||||
|
||||
pw_buf0[0] = pws[gid].i[0];
|
||||
pw_buf0[1] = pws[gid].i[1];
|
||||
pw_buf0[2] = pws[gid].i[2];
|
||||
pw_buf0[3] = pws[gid].i[3];
|
||||
pw_buf1[0] = pws[gid].i[4];
|
||||
pw_buf1[1] = pws[gid].i[5];
|
||||
pw_buf1[2] = pws[gid].i[6];
|
||||
pw_buf1[3] = pws[gid].i[7];
|
||||
|
||||
const u32 pw_len = pws[gid].pw_len;
|
||||
|
||||
/**
|
||||
* loop
|
||||
*/
|
||||
|
||||
for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE)
|
||||
{
|
||||
u32x w0[4] = { 0 };
|
||||
u32x w1[4] = { 0 };
|
||||
u32x w2[4] = { 0 };
|
||||
u32x w3[4] = { 0 };
|
||||
|
||||
const u32x out_len = apply_rules_vect (pw_buf0, pw_buf1, pw_len, rules_buf, il_pos, w0, w1);
|
||||
|
||||
append_0x06_2x4_VV (w0, w1, out_len);
|
||||
|
||||
/**
|
||||
* Keccak
|
||||
*/
|
||||
|
||||
u64x a00 = hl32_to_64 (w0[1], w0[0]);
|
||||
u64x a01 = hl32_to_64 (w0[3], w0[2]);
|
||||
u64x a02 = hl32_to_64 (w1[1], w1[0]);
|
||||
u64x a03 = hl32_to_64 (w1[3], w1[2]);
|
||||
u64x a04 = 0;
|
||||
u64x a10 = 0;
|
||||
u64x a11 = 0;
|
||||
u64x a12 = 0;
|
||||
u64x a13 = 0;
|
||||
u64x a14 = 0;
|
||||
u64x a20 = 0;
|
||||
u64x a21 = 0;
|
||||
u64x a22 = 0;
|
||||
u64x a23 = 0;
|
||||
u64x a24 = 0;
|
||||
u64x a30 = 0;
|
||||
u64x a31 = 0x8000000000000000;
|
||||
u64x a32 = 0;
|
||||
u64x a33 = 0;
|
||||
u64x a34 = 0;
|
||||
u64x a40 = 0;
|
||||
u64x a41 = 0;
|
||||
u64x a42 = 0;
|
||||
u64x a43 = 0;
|
||||
u64x a44 = 0;
|
||||
|
||||
#define Rho_Pi(ad,r) \
|
||||
bc0 = ad; \
|
||||
ad = rotl64 (t, r); \
|
||||
t = bc0; \
|
||||
|
||||
#ifdef _unroll
|
||||
#pragma unroll
|
||||
#endif
|
||||
for (int round = 0; round < KECCAK_ROUNDS - 1; round++)
|
||||
{
|
||||
// Theta
|
||||
|
||||
u64x bc0 = a00 ^ a10 ^ a20 ^ a30 ^ a40;
|
||||
u64x bc1 = a01 ^ a11 ^ a21 ^ a31 ^ a41;
|
||||
u64x bc2 = a02 ^ a12 ^ a22 ^ a32 ^ a42;
|
||||
u64x bc3 = a03 ^ a13 ^ a23 ^ a33 ^ a43;
|
||||
u64x bc4 = a04 ^ a14 ^ a24 ^ a34 ^ a44;
|
||||
|
||||
u64x t;
|
||||
|
||||
t = bc4 ^ rotl64 (bc1, 1); a00 ^= t; a10 ^= t; a20 ^= t; a30 ^= t; a40 ^= t;
|
||||
t = bc0 ^ rotl64 (bc2, 1); a01 ^= t; a11 ^= t; a21 ^= t; a31 ^= t; a41 ^= t;
|
||||
t = bc1 ^ rotl64 (bc3, 1); a02 ^= t; a12 ^= t; a22 ^= t; a32 ^= t; a42 ^= t;
|
||||
t = bc2 ^ rotl64 (bc4, 1); a03 ^= t; a13 ^= t; a23 ^= t; a33 ^= t; a43 ^= t;
|
||||
t = bc3 ^ rotl64 (bc0, 1); a04 ^= t; a14 ^= t; a24 ^= t; a34 ^= t; a44 ^= t;
|
||||
|
||||
// Rho Pi
|
||||
|
||||
t = a01;
|
||||
|
||||
Rho_Pi (a20, 1);
|
||||
Rho_Pi (a12, 3);
|
||||
Rho_Pi (a21, 6);
|
||||
Rho_Pi (a32, 10);
|
||||
Rho_Pi (a33, 15);
|
||||
Rho_Pi (a03, 21);
|
||||
Rho_Pi (a10, 28);
|
||||
Rho_Pi (a31, 36);
|
||||
Rho_Pi (a13, 45);
|
||||
Rho_Pi (a41, 55);
|
||||
Rho_Pi (a44, 2);
|
||||
Rho_Pi (a04, 14);
|
||||
Rho_Pi (a30, 27);
|
||||
Rho_Pi (a43, 41);
|
||||
Rho_Pi (a34, 56);
|
||||
Rho_Pi (a23, 8);
|
||||
Rho_Pi (a22, 25);
|
||||
Rho_Pi (a02, 43);
|
||||
Rho_Pi (a40, 62);
|
||||
Rho_Pi (a24, 18);
|
||||
Rho_Pi (a42, 39);
|
||||
Rho_Pi (a14, 61);
|
||||
Rho_Pi (a11, 20);
|
||||
Rho_Pi (a01, 44);
|
||||
|
||||
// Chi
|
||||
|
||||
bc0 = a00; bc1 = a01; bc2 = a02; bc3 = a03; bc4 = a04;
|
||||
a00 ^= ~bc1 & bc2; a01 ^= ~bc2 & bc3; a02 ^= ~bc3 & bc4; a03 ^= ~bc4 & bc0; a04 ^= ~bc0 & bc1;
|
||||
|
||||
bc0 = a10; bc1 = a11; bc2 = a12; bc3 = a13; bc4 = a14;
|
||||
a10 ^= ~bc1 & bc2; a11 ^= ~bc2 & bc3; a12 ^= ~bc3 & bc4; a13 ^= ~bc4 & bc0; a14 ^= ~bc0 & bc1;
|
||||
|
||||
bc0 = a20; bc1 = a21; bc2 = a22; bc3 = a23; bc4 = a24;
|
||||
a20 ^= ~bc1 & bc2; a21 ^= ~bc2 & bc3; a22 ^= ~bc3 & bc4; a23 ^= ~bc4 & bc0; a24 ^= ~bc0 & bc1;
|
||||
|
||||
bc0 = a30; bc1 = a31; bc2 = a32; bc3 = a33; bc4 = a34;
|
||||
a30 ^= ~bc1 & bc2; a31 ^= ~bc2 & bc3; a32 ^= ~bc3 & bc4; a33 ^= ~bc4 & bc0; a34 ^= ~bc0 & bc1;
|
||||
|
||||
bc0 = a40; bc1 = a41; bc2 = a42; bc3 = a43; bc4 = a44;
|
||||
a40 ^= ~bc1 & bc2; a41 ^= ~bc2 & bc3; a42 ^= ~bc3 & bc4; a43 ^= ~bc4 & bc0; a44 ^= ~bc0 & bc1;
|
||||
|
||||
// Iota
|
||||
|
||||
a00 ^= keccakf_rndc[round];
|
||||
}
|
||||
|
||||
// Theta
|
||||
|
||||
u64x bc0 = a00 ^ a10 ^ a20 ^ a30 ^ a40;
|
||||
u64x bc1 = a01 ^ a11 ^ a21 ^ a31 ^ a41;
|
||||
u64x bc2 = a02 ^ a12 ^ a22 ^ a32 ^ a42;
|
||||
u64x bc3 = a03 ^ a13 ^ a23 ^ a33 ^ a43;
|
||||
u64x bc4 = a04 ^ a14 ^ a24 ^ a34 ^ a44;
|
||||
|
||||
u64x t;
|
||||
|
||||
t = bc4 ^ rotl64 (bc1, 1); a00 ^= t; a10 ^= t; a20 ^= t; a30 ^= t;
|
||||
t = bc0 ^ rotl64 (bc2, 1); a21 ^= t; a31 ^= t; a41 ^= t;
|
||||
t = bc1 ^ rotl64 (bc3, 1); a02 ^= t; a12 ^= t; a22 ^= t; a32 ^= t;
|
||||
t = bc2 ^ rotl64 (bc4, 1); a03 ^= t; a13 ^= t; a23 ^= t; a33 ^= t; a43 ^= t;
|
||||
t = bc3 ^ rotl64 (bc0, 1); a04 ^= t; a34 ^= t; a44 ^= t;
|
||||
|
||||
// Rho Pi
|
||||
|
||||
t = a01;
|
||||
|
||||
Rho_Pi (a20, 1);
|
||||
Rho_Pi (a12, 3);
|
||||
Rho_Pi (a21, 6);
|
||||
Rho_Pi (a32, 10);
|
||||
Rho_Pi (a33, 15);
|
||||
Rho_Pi (a03, 21);
|
||||
Rho_Pi (a10, 28);
|
||||
Rho_Pi (a31, 36);
|
||||
Rho_Pi (a13, 45);
|
||||
Rho_Pi (a41, 55);
|
||||
Rho_Pi (a44, 2);
|
||||
Rho_Pi (a04, 14);
|
||||
Rho_Pi (a30, 27);
|
||||
Rho_Pi (a43, 41);
|
||||
Rho_Pi (a34, 56);
|
||||
Rho_Pi (a23, 8);
|
||||
Rho_Pi (a22, 25);
|
||||
Rho_Pi (a02, 43);
|
||||
|
||||
#undef Rho_Pi
|
||||
|
||||
bc0 = a00;
|
||||
bc2 = a02;
|
||||
bc3 = a03;
|
||||
bc4 = a04;
|
||||
|
||||
a02 ^= ~bc3 & bc4;
|
||||
a03 ^= ~bc4 & bc0;
|
||||
|
||||
const u32x r0 = l32_from_64 (a03);
|
||||
const u32x r1 = h32_from_64 (a03);
|
||||
const u32x r2 = l32_from_64 (a02);
|
||||
const u32x r3 = h32_from_64 (a02);
|
||||
|
||||
COMPARE_M_SIMD (r0, r1, r2, r3);
|
||||
}
|
||||
}
|
||||
|
||||
__kernel void m17400_m08 (__global pw_t *pws, __constant const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
|
||||
{
|
||||
}
|
||||
|
||||
__kernel void m17400_m16 (__global pw_t *pws, __constant const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
|
||||
{
|
||||
}
|
||||
|
||||
__kernel void m17400_s04 (__global pw_t *pws, __constant const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
|
||||
{
|
||||
/**
|
||||
* modifier
|
||||
*/
|
||||
|
||||
const u64 lid = get_local_id (0);
|
||||
|
||||
/**
|
||||
* base
|
||||
*/
|
||||
|
||||
const u64 gid = get_global_id (0);
|
||||
|
||||
if (gid >= gid_max) return;
|
||||
|
||||
u32 pw_buf0[4];
|
||||
u32 pw_buf1[4];
|
||||
|
||||
pw_buf0[0] = pws[gid].i[0];
|
||||
pw_buf0[1] = pws[gid].i[1];
|
||||
pw_buf0[2] = pws[gid].i[2];
|
||||
pw_buf0[3] = pws[gid].i[3];
|
||||
pw_buf1[0] = pws[gid].i[4];
|
||||
pw_buf1[1] = pws[gid].i[5];
|
||||
pw_buf1[2] = pws[gid].i[6];
|
||||
pw_buf1[3] = pws[gid].i[7];
|
||||
|
||||
const u32 pw_len = pws[gid].pw_len;
|
||||
|
||||
/**
|
||||
* digest
|
||||
*/
|
||||
|
||||
const u32 search[4] =
|
||||
{
|
||||
digests_buf[digests_offset].digest_buf[DGST_R0],
|
||||
digests_buf[digests_offset].digest_buf[DGST_R1],
|
||||
digests_buf[digests_offset].digest_buf[DGST_R2],
|
||||
digests_buf[digests_offset].digest_buf[DGST_R3]
|
||||
};
|
||||
|
||||
/**
|
||||
* loop
|
||||
*/
|
||||
|
||||
for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE)
|
||||
{
|
||||
u32x w0[4] = { 0 };
|
||||
u32x w1[4] = { 0 };
|
||||
u32x w2[4] = { 0 };
|
||||
u32x w3[4] = { 0 };
|
||||
|
||||
const u32x out_len = apply_rules_vect (pw_buf0, pw_buf1, pw_len, rules_buf, il_pos, w0, w1);
|
||||
|
||||
append_0x06_2x4_VV (w0, w1, out_len);
|
||||
|
||||
/**
|
||||
* Keccak
|
||||
*/
|
||||
|
||||
u64x a00 = hl32_to_64 (w0[1], w0[0]);
|
||||
u64x a01 = hl32_to_64 (w0[3], w0[2]);
|
||||
u64x a02 = hl32_to_64 (w1[1], w1[0]);
|
||||
u64x a03 = hl32_to_64 (w1[3], w1[2]);
|
||||
u64x a04 = 0;
|
||||
u64x a10 = 0;
|
||||
u64x a11 = 0;
|
||||
u64x a12 = 0;
|
||||
u64x a13 = 0;
|
||||
u64x a14 = 0;
|
||||
u64x a20 = 0;
|
||||
u64x a21 = 0;
|
||||
u64x a22 = 0;
|
||||
u64x a23 = 0;
|
||||
u64x a24 = 0;
|
||||
u64x a30 = 0;
|
||||
u64x a31 = 0x8000000000000000;
|
||||
u64x a32 = 0;
|
||||
u64x a33 = 0;
|
||||
u64x a34 = 0;
|
||||
u64x a40 = 0;
|
||||
u64x a41 = 0;
|
||||
u64x a42 = 0;
|
||||
u64x a43 = 0;
|
||||
u64x a44 = 0;
|
||||
|
||||
#define Rho_Pi(ad,r) \
|
||||
bc0 = ad; \
|
||||
ad = rotl64 (t, r); \
|
||||
t = bc0; \
|
||||
|
||||
#ifdef _unroll
|
||||
#pragma unroll
|
||||
#endif
|
||||
for (int round = 0; round < KECCAK_ROUNDS - 1; round++)
|
||||
{
|
||||
// Theta
|
||||
|
||||
u64x bc0 = a00 ^ a10 ^ a20 ^ a30 ^ a40;
|
||||
u64x bc1 = a01 ^ a11 ^ a21 ^ a31 ^ a41;
|
||||
u64x bc2 = a02 ^ a12 ^ a22 ^ a32 ^ a42;
|
||||
u64x bc3 = a03 ^ a13 ^ a23 ^ a33 ^ a43;
|
||||
u64x bc4 = a04 ^ a14 ^ a24 ^ a34 ^ a44;
|
||||
|
||||
u64x t;
|
||||
|
||||
t = bc4 ^ rotl64 (bc1, 1); a00 ^= t; a10 ^= t; a20 ^= t; a30 ^= t; a40 ^= t;
|
||||
t = bc0 ^ rotl64 (bc2, 1); a01 ^= t; a11 ^= t; a21 ^= t; a31 ^= t; a41 ^= t;
|
||||
t = bc1 ^ rotl64 (bc3, 1); a02 ^= t; a12 ^= t; a22 ^= t; a32 ^= t; a42 ^= t;
|
||||
t = bc2 ^ rotl64 (bc4, 1); a03 ^= t; a13 ^= t; a23 ^= t; a33 ^= t; a43 ^= t;
|
||||
t = bc3 ^ rotl64 (bc0, 1); a04 ^= t; a14 ^= t; a24 ^= t; a34 ^= t; a44 ^= t;
|
||||
|
||||
// Rho Pi
|
||||
|
||||
t = a01;
|
||||
|
||||
Rho_Pi (a20, 1);
|
||||
Rho_Pi (a12, 3);
|
||||
Rho_Pi (a21, 6);
|
||||
Rho_Pi (a32, 10);
|
||||
Rho_Pi (a33, 15);
|
||||
Rho_Pi (a03, 21);
|
||||
Rho_Pi (a10, 28);
|
||||
Rho_Pi (a31, 36);
|
||||
Rho_Pi (a13, 45);
|
||||
Rho_Pi (a41, 55);
|
||||
Rho_Pi (a44, 2);
|
||||
Rho_Pi (a04, 14);
|
||||
Rho_Pi (a30, 27);
|
||||
Rho_Pi (a43, 41);
|
||||
Rho_Pi (a34, 56);
|
||||
Rho_Pi (a23, 8);
|
||||
Rho_Pi (a22, 25);
|
||||
Rho_Pi (a02, 43);
|
||||
Rho_Pi (a40, 62);
|
||||
Rho_Pi (a24, 18);
|
||||
Rho_Pi (a42, 39);
|
||||
Rho_Pi (a14, 61);
|
||||
Rho_Pi (a11, 20);
|
||||
Rho_Pi (a01, 44);
|
||||
|
||||
// Chi
|
||||
|
||||
bc0 = a00; bc1 = a01; bc2 = a02; bc3 = a03; bc4 = a04;
|
||||
a00 ^= ~bc1 & bc2; a01 ^= ~bc2 & bc3; a02 ^= ~bc3 & bc4; a03 ^= ~bc4 & bc0; a04 ^= ~bc0 & bc1;
|
||||
|
||||
bc0 = a10; bc1 = a11; bc2 = a12; bc3 = a13; bc4 = a14;
|
||||
a10 ^= ~bc1 & bc2; a11 ^= ~bc2 & bc3; a12 ^= ~bc3 & bc4; a13 ^= ~bc4 & bc0; a14 ^= ~bc0 & bc1;
|
||||
|
||||
bc0 = a20; bc1 = a21; bc2 = a22; bc3 = a23; bc4 = a24;
|
||||
a20 ^= ~bc1 & bc2; a21 ^= ~bc2 & bc3; a22 ^= ~bc3 & bc4; a23 ^= ~bc4 & bc0; a24 ^= ~bc0 & bc1;
|
||||
|
||||
bc0 = a30; bc1 = a31; bc2 = a32; bc3 = a33; bc4 = a34;
|
||||
a30 ^= ~bc1 & bc2; a31 ^= ~bc2 & bc3; a32 ^= ~bc3 & bc4; a33 ^= ~bc4 & bc0; a34 ^= ~bc0 & bc1;
|
||||
|
||||
bc0 = a40; bc1 = a41; bc2 = a42; bc3 = a43; bc4 = a44;
|
||||
a40 ^= ~bc1 & bc2; a41 ^= ~bc2 & bc3; a42 ^= ~bc3 & bc4; a43 ^= ~bc4 & bc0; a44 ^= ~bc0 & bc1;
|
||||
|
||||
// Iota
|
||||
|
||||
a00 ^= keccakf_rndc[round];
|
||||
}
|
||||
|
||||
// Theta
|
||||
|
||||
u64x bc0 = a00 ^ a10 ^ a20 ^ a30 ^ a40;
|
||||
u64x bc1 = a01 ^ a11 ^ a21 ^ a31 ^ a41;
|
||||
u64x bc2 = a02 ^ a12 ^ a22 ^ a32 ^ a42;
|
||||
u64x bc3 = a03 ^ a13 ^ a23 ^ a33 ^ a43;
|
||||
u64x bc4 = a04 ^ a14 ^ a24 ^ a34 ^ a44;
|
||||
|
||||
u64x t;
|
||||
|
||||
t = bc4 ^ rotl64 (bc1, 1); a00 ^= t; a10 ^= t; a20 ^= t; a30 ^= t;
|
||||
t = bc0 ^ rotl64 (bc2, 1); a21 ^= t; a31 ^= t; a41 ^= t;
|
||||
t = bc1 ^ rotl64 (bc3, 1); a02 ^= t; a12 ^= t; a22 ^= t; a32 ^= t;
|
||||
t = bc2 ^ rotl64 (bc4, 1); a03 ^= t; a13 ^= t; a23 ^= t; a33 ^= t; a43 ^= t;
|
||||
t = bc3 ^ rotl64 (bc0, 1); a04 ^= t; a34 ^= t; a44 ^= t;
|
||||
|
||||
// Rho Pi
|
||||
|
||||
t = a01;
|
||||
|
||||
Rho_Pi (a20, 1);
|
||||
Rho_Pi (a12, 3);
|
||||
Rho_Pi (a21, 6);
|
||||
Rho_Pi (a32, 10);
|
||||
Rho_Pi (a33, 15);
|
||||
Rho_Pi (a03, 21);
|
||||
Rho_Pi (a10, 28);
|
||||
Rho_Pi (a31, 36);
|
||||
Rho_Pi (a13, 45);
|
||||
Rho_Pi (a41, 55);
|
||||
Rho_Pi (a44, 2);
|
||||
Rho_Pi (a04, 14);
|
||||
Rho_Pi (a30, 27);
|
||||
Rho_Pi (a43, 41);
|
||||
Rho_Pi (a34, 56);
|
||||
Rho_Pi (a23, 8);
|
||||
Rho_Pi (a22, 25);
|
||||
Rho_Pi (a02, 43);
|
||||
|
||||
#undef Rho_Pi
|
||||
|
||||
bc0 = a00;
|
||||
bc2 = a02;
|
||||
bc3 = a03;
|
||||
bc4 = a04;
|
||||
|
||||
a02 ^= ~bc3 & bc4;
|
||||
a03 ^= ~bc4 & bc0;
|
||||
|
||||
const u32x r0 = l32_from_64 (a03);
|
||||
const u32x r1 = h32_from_64 (a03);
|
||||
const u32x r2 = l32_from_64 (a02);
|
||||
const u32x r3 = h32_from_64 (a02);
|
||||
|
||||
COMPARE_S_SIMD (r0, r1, r2, r3);
|
||||
}
|
||||
}
|
||||
|
||||
__kernel void m17400_s08 (__global pw_t *pws, __constant const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
|
||||
{
|
||||
}
|
||||
|
||||
__kernel void m17400_s16 (__global pw_t *pws, __constant const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
|
||||
{
|
||||
}
|
@ -28,7 +28,7 @@ __constant u64a keccakf_rndc[24] =
|
||||
#define KECCAK_ROUNDS 24
|
||||
#endif
|
||||
|
||||
__kernel void m05000_m04 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
|
||||
__kernel void m17400_m04 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
|
||||
{
|
||||
/**
|
||||
* modifier
|
||||
@ -297,15 +297,15 @@ __kernel void m05000_m04 (__global pw_t *pws, __global const kernel_rule_t *rule
|
||||
}
|
||||
|
||||
|
||||
__kernel void m05000_m08 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
|
||||
__kernel void m17400_m08 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
|
||||
{
|
||||
}
|
||||
|
||||
__kernel void m05000_m16 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
|
||||
__kernel void m17400_m16 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
|
||||
{
|
||||
}
|
||||
|
||||
__kernel void m05000_s04 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
|
||||
__kernel void m17400_s04 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
|
||||
{
|
||||
/**
|
||||
* modifier
|
||||
@ -585,10 +585,10 @@ __kernel void m05000_s04 (__global pw_t *pws, __global const kernel_rule_t *rule
|
||||
}
|
||||
}
|
||||
|
||||
__kernel void m05000_s08 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
|
||||
__kernel void m17400_s08 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
|
||||
{
|
||||
}
|
||||
|
||||
__kernel void m05000_s16 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
|
||||
__kernel void m17400_s16 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
|
||||
{
|
||||
}
|
696
OpenCL/m17400_a3-optimized.cl
Normal file
696
OpenCL/m17400_a3-optimized.cl
Normal file
@ -0,0 +1,696 @@
|
||||
/**
|
||||
* Author......: See docs/credits.txt
|
||||
* License.....: MIT
|
||||
*/
|
||||
|
||||
#define NEW_SIMD_CODE
|
||||
|
||||
#include "inc_vendor.cl"
|
||||
#include "inc_hash_constants.h"
|
||||
#include "inc_hash_functions.cl"
|
||||
#include "inc_types.cl"
|
||||
#include "inc_common.cl"
|
||||
#include "inc_simd.cl"
|
||||
|
||||
__constant u64a keccakf_rndc[24] =
|
||||
{
|
||||
0x0000000000000001, 0x0000000000008082, 0x800000000000808a,
|
||||
0x8000000080008000, 0x000000000000808b, 0x0000000080000001,
|
||||
0x8000000080008081, 0x8000000000008009, 0x000000000000008a,
|
||||
0x0000000000000088, 0x0000000080008009, 0x000000008000000a,
|
||||
0x000000008000808b, 0x800000000000008b, 0x8000000000008089,
|
||||
0x8000000000008003, 0x8000000000008002, 0x8000000000000080,
|
||||
0x000000000000800a, 0x800000008000000a, 0x8000000080008081,
|
||||
0x8000000000008080, 0x0000000080000001, 0x8000000080008008
|
||||
};
|
||||
|
||||
#ifndef KECCAK_ROUNDS
|
||||
#define KECCAK_ROUNDS 24
|
||||
#endif
|
||||
|
||||
DECLSPEC void m17400m (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, __global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset)
|
||||
{
|
||||
/**
|
||||
* modifier
|
||||
*/
|
||||
|
||||
const u64 gid = get_global_id (0);
|
||||
const u64 lid = get_local_id (0);
|
||||
|
||||
/**
|
||||
* loop
|
||||
*/
|
||||
|
||||
u32 w0l = w0[0];
|
||||
|
||||
for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE)
|
||||
{
|
||||
const u32x w0r = ix_create_bft (bfs_buf, il_pos);
|
||||
|
||||
const u32x w0lr = w0l | w0r;
|
||||
|
||||
/**
|
||||
* Keccak
|
||||
*/
|
||||
|
||||
u64x a00 = hl32_to_64 (w0[1], w0lr);
|
||||
u64x a01 = hl32_to_64 (w0[3], w0[2]);
|
||||
u64x a02 = hl32_to_64 (w1[1], w1[0]);
|
||||
u64x a03 = hl32_to_64 (w1[3], w1[2]);
|
||||
u64x a04 = hl32_to_64 (w2[1], w2[0]);
|
||||
u64x a10 = hl32_to_64 (w2[3], w2[2]);
|
||||
u64x a11 = hl32_to_64 (w3[1], w3[0]);
|
||||
u64x a12 = hl32_to_64 (w3[3], w3[2]);
|
||||
u64x a13 = 0;
|
||||
u64x a14 = 0;
|
||||
u64x a20 = 0;
|
||||
u64x a21 = 0;
|
||||
u64x a22 = 0;
|
||||
u64x a23 = 0;
|
||||
u64x a24 = 0;
|
||||
u64x a30 = 0;
|
||||
u64x a31 = 0x8000000000000000;
|
||||
u64x a32 = 0;
|
||||
u64x a33 = 0;
|
||||
u64x a34 = 0;
|
||||
u64x a40 = 0;
|
||||
u64x a41 = 0;
|
||||
u64x a42 = 0;
|
||||
u64x a43 = 0;
|
||||
u64x a44 = 0;
|
||||
|
||||
|
||||
#define Rho_Pi(ad,r) \
|
||||
bc0 = ad; \
|
||||
ad = rotl64 (t, r); \
|
||||
t = bc0; \
|
||||
|
||||
#ifdef _unroll
|
||||
#pragma unroll
|
||||
#endif
|
||||
for (int round = 0; round < KECCAK_ROUNDS - 1; round++)
|
||||
{
|
||||
// Theta
|
||||
|
||||
u64x bc0 = a00 ^ a10 ^ a20 ^ a30 ^ a40;
|
||||
u64x bc1 = a01 ^ a11 ^ a21 ^ a31 ^ a41;
|
||||
u64x bc2 = a02 ^ a12 ^ a22 ^ a32 ^ a42;
|
||||
u64x bc3 = a03 ^ a13 ^ a23 ^ a33 ^ a43;
|
||||
u64x bc4 = a04 ^ a14 ^ a24 ^ a34 ^ a44;
|
||||
|
||||
u64x t;
|
||||
|
||||
t = bc4 ^ rotl64 (bc1, 1); a00 ^= t; a10 ^= t; a20 ^= t; a30 ^= t; a40 ^= t;
|
||||
t = bc0 ^ rotl64 (bc2, 1); a01 ^= t; a11 ^= t; a21 ^= t; a31 ^= t; a41 ^= t;
|
||||
t = bc1 ^ rotl64 (bc3, 1); a02 ^= t; a12 ^= t; a22 ^= t; a32 ^= t; a42 ^= t;
|
||||
t = bc2 ^ rotl64 (bc4, 1); a03 ^= t; a13 ^= t; a23 ^= t; a33 ^= t; a43 ^= t;
|
||||
t = bc3 ^ rotl64 (bc0, 1); a04 ^= t; a14 ^= t; a24 ^= t; a34 ^= t; a44 ^= t;
|
||||
|
||||
// Rho Pi
|
||||
|
||||
t = a01;
|
||||
|
||||
Rho_Pi (a20, 1);
|
||||
Rho_Pi (a12, 3);
|
||||
Rho_Pi (a21, 6);
|
||||
Rho_Pi (a32, 10);
|
||||
Rho_Pi (a33, 15);
|
||||
Rho_Pi (a03, 21);
|
||||
Rho_Pi (a10, 28);
|
||||
Rho_Pi (a31, 36);
|
||||
Rho_Pi (a13, 45);
|
||||
Rho_Pi (a41, 55);
|
||||
Rho_Pi (a44, 2);
|
||||
Rho_Pi (a04, 14);
|
||||
Rho_Pi (a30, 27);
|
||||
Rho_Pi (a43, 41);
|
||||
Rho_Pi (a34, 56);
|
||||
Rho_Pi (a23, 8);
|
||||
Rho_Pi (a22, 25);
|
||||
Rho_Pi (a02, 43);
|
||||
Rho_Pi (a40, 62);
|
||||
Rho_Pi (a24, 18);
|
||||
Rho_Pi (a42, 39);
|
||||
Rho_Pi (a14, 61);
|
||||
Rho_Pi (a11, 20);
|
||||
Rho_Pi (a01, 44);
|
||||
|
||||
// Chi
|
||||
|
||||
bc0 = a00; bc1 = a01; bc2 = a02; bc3 = a03; bc4 = a04;
|
||||
a00 ^= ~bc1 & bc2; a01 ^= ~bc2 & bc3; a02 ^= ~bc3 & bc4; a03 ^= ~bc4 & bc0; a04 ^= ~bc0 & bc1;
|
||||
|
||||
bc0 = a10; bc1 = a11; bc2 = a12; bc3 = a13; bc4 = a14;
|
||||
a10 ^= ~bc1 & bc2; a11 ^= ~bc2 & bc3; a12 ^= ~bc3 & bc4; a13 ^= ~bc4 & bc0; a14 ^= ~bc0 & bc1;
|
||||
|
||||
bc0 = a20; bc1 = a21; bc2 = a22; bc3 = a23; bc4 = a24;
|
||||
a20 ^= ~bc1 & bc2; a21 ^= ~bc2 & bc3; a22 ^= ~bc3 & bc4; a23 ^= ~bc4 & bc0; a24 ^= ~bc0 & bc1;
|
||||
|
||||
bc0 = a30; bc1 = a31; bc2 = a32; bc3 = a33; bc4 = a34;
|
||||
a30 ^= ~bc1 & bc2; a31 ^= ~bc2 & bc3; a32 ^= ~bc3 & bc4; a33 ^= ~bc4 & bc0; a34 ^= ~bc0 & bc1;
|
||||
|
||||
bc0 = a40; bc1 = a41; bc2 = a42; bc3 = a43; bc4 = a44;
|
||||
a40 ^= ~bc1 & bc2; a41 ^= ~bc2 & bc3; a42 ^= ~bc3 & bc4; a43 ^= ~bc4 & bc0; a44 ^= ~bc0 & bc1;
|
||||
|
||||
// Iota
|
||||
|
||||
a00 ^= keccakf_rndc[round];
|
||||
}
|
||||
|
||||
// Theta
|
||||
|
||||
u64x bc0 = a00 ^ a10 ^ a20 ^ a30 ^ a40;
|
||||
u64x bc1 = a01 ^ a11 ^ a21 ^ a31 ^ a41;
|
||||
u64x bc2 = a02 ^ a12 ^ a22 ^ a32 ^ a42;
|
||||
u64x bc3 = a03 ^ a13 ^ a23 ^ a33 ^ a43;
|
||||
u64x bc4 = a04 ^ a14 ^ a24 ^ a34 ^ a44;
|
||||
|
||||
u64x t;
|
||||
|
||||
t = bc4 ^ rotl64 (bc1, 1); a00 ^= t; a10 ^= t; a20 ^= t; a30 ^= t;
|
||||
t = bc0 ^ rotl64 (bc2, 1); a21 ^= t; a31 ^= t; a41 ^= t;
|
||||
t = bc1 ^ rotl64 (bc3, 1); a02 ^= t; a12 ^= t; a22 ^= t; a32 ^= t;
|
||||
t = bc2 ^ rotl64 (bc4, 1); a03 ^= t; a13 ^= t; a23 ^= t; a33 ^= t; a43 ^= t;
|
||||
t = bc3 ^ rotl64 (bc0, 1); a04 ^= t; a34 ^= t; a44 ^= t;
|
||||
|
||||
// Rho Pi
|
||||
|
||||
t = a01;
|
||||
|
||||
Rho_Pi (a20, 1);
|
||||
Rho_Pi (a12, 3);
|
||||
Rho_Pi (a21, 6);
|
||||
Rho_Pi (a32, 10);
|
||||
Rho_Pi (a33, 15);
|
||||
Rho_Pi (a03, 21);
|
||||
Rho_Pi (a10, 28);
|
||||
Rho_Pi (a31, 36);
|
||||
Rho_Pi (a13, 45);
|
||||
Rho_Pi (a41, 55);
|
||||
Rho_Pi (a44, 2);
|
||||
Rho_Pi (a04, 14);
|
||||
Rho_Pi (a30, 27);
|
||||
Rho_Pi (a43, 41);
|
||||
Rho_Pi (a34, 56);
|
||||
Rho_Pi (a23, 8);
|
||||
Rho_Pi (a22, 25);
|
||||
Rho_Pi (a02, 43);
|
||||
|
||||
#undef Rho_Pi
|
||||
|
||||
bc0 = a00;
|
||||
bc2 = a02;
|
||||
bc3 = a03;
|
||||
bc4 = a04;
|
||||
|
||||
a02 ^= ~bc3 & bc4;
|
||||
a03 ^= ~bc4 & bc0;
|
||||
|
||||
const u32x r0 = l32_from_64 (a03);
|
||||
const u32x r1 = h32_from_64 (a03);
|
||||
const u32x r2 = l32_from_64 (a02);
|
||||
const u32x r3 = h32_from_64 (a02);
|
||||
|
||||
COMPARE_M_SIMD (r0, r1, r2, r3);
|
||||
}
|
||||
}
|
||||
|
||||
DECLSPEC void m17400s (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, __global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset)
|
||||
{
|
||||
/**
|
||||
* modifier
|
||||
*/
|
||||
|
||||
const u64 gid = get_global_id (0);
|
||||
const u64 lid = get_local_id (0);
|
||||
|
||||
/**
|
||||
* digest
|
||||
*/
|
||||
|
||||
const u32 search[4] =
|
||||
{
|
||||
digests_buf[digests_offset].digest_buf[DGST_R0],
|
||||
digests_buf[digests_offset].digest_buf[DGST_R1],
|
||||
digests_buf[digests_offset].digest_buf[DGST_R2],
|
||||
digests_buf[digests_offset].digest_buf[DGST_R3]
|
||||
};
|
||||
|
||||
/**
|
||||
* loop
|
||||
*/
|
||||
|
||||
u32 w0l = w0[0];
|
||||
|
||||
for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE)
|
||||
{
|
||||
const u32x w0r = ix_create_bft (bfs_buf, il_pos);
|
||||
|
||||
const u32x w0lr = w0l | w0r;
|
||||
|
||||
/**
|
||||
* Keccak
|
||||
*/
|
||||
|
||||
u64x a00 = hl32_to_64 (w0[1], w0lr);
|
||||
u64x a01 = hl32_to_64 (w0[3], w0[2]);
|
||||
u64x a02 = hl32_to_64 (w1[1], w1[0]);
|
||||
u64x a03 = hl32_to_64 (w1[3], w1[2]);
|
||||
u64x a04 = hl32_to_64 (w2[1], w2[0]);
|
||||
u64x a10 = hl32_to_64 (w2[3], w2[2]);
|
||||
u64x a11 = hl32_to_64 (w3[1], w3[0]);
|
||||
u64x a12 = hl32_to_64 (w3[3], w3[2]);
|
||||
u64x a13 = 0;
|
||||
u64x a14 = 0;
|
||||
u64x a20 = 0;
|
||||
u64x a21 = 0;
|
||||
u64x a22 = 0;
|
||||
u64x a23 = 0;
|
||||
u64x a24 = 0;
|
||||
u64x a30 = 0;
|
||||
u64x a31 = 0x8000000000000000;
|
||||
u64x a32 = 0;
|
||||
u64x a33 = 0;
|
||||
u64x a34 = 0;
|
||||
u64x a40 = 0;
|
||||
u64x a41 = 0;
|
||||
u64x a42 = 0;
|
||||
u64x a43 = 0;
|
||||
u64x a44 = 0;
|
||||
|
||||
#define Rho_Pi(ad,r) \
|
||||
bc0 = ad; \
|
||||
ad = rotl64 (t, r); \
|
||||
t = bc0; \
|
||||
|
||||
#ifdef _unroll
|
||||
#pragma unroll
|
||||
#endif
|
||||
for (int round = 0; round < KECCAK_ROUNDS - 1; round++)
|
||||
{
|
||||
// Theta
|
||||
|
||||
u64x bc0 = a00 ^ a10 ^ a20 ^ a30 ^ a40;
|
||||
u64x bc1 = a01 ^ a11 ^ a21 ^ a31 ^ a41;
|
||||
u64x bc2 = a02 ^ a12 ^ a22 ^ a32 ^ a42;
|
||||
u64x bc3 = a03 ^ a13 ^ a23 ^ a33 ^ a43;
|
||||
u64x bc4 = a04 ^ a14 ^ a24 ^ a34 ^ a44;
|
||||
|
||||
u64x t;
|
||||
|
||||
t = bc4 ^ rotl64 (bc1, 1); a00 ^= t; a10 ^= t; a20 ^= t; a30 ^= t; a40 ^= t;
|
||||
t = bc0 ^ rotl64 (bc2, 1); a01 ^= t; a11 ^= t; a21 ^= t; a31 ^= t; a41 ^= t;
|
||||
t = bc1 ^ rotl64 (bc3, 1); a02 ^= t; a12 ^= t; a22 ^= t; a32 ^= t; a42 ^= t;
|
||||
t = bc2 ^ rotl64 (bc4, 1); a03 ^= t; a13 ^= t; a23 ^= t; a33 ^= t; a43 ^= t;
|
||||
t = bc3 ^ rotl64 (bc0, 1); a04 ^= t; a14 ^= t; a24 ^= t; a34 ^= t; a44 ^= t;
|
||||
|
||||
// Rho Pi
|
||||
|
||||
t = a01;
|
||||
|
||||
Rho_Pi (a20, 1);
|
||||
Rho_Pi (a12, 3);
|
||||
Rho_Pi (a21, 6);
|
||||
Rho_Pi (a32, 10);
|
||||
Rho_Pi (a33, 15);
|
||||
Rho_Pi (a03, 21);
|
||||
Rho_Pi (a10, 28);
|
||||
Rho_Pi (a31, 36);
|
||||
Rho_Pi (a13, 45);
|
||||
Rho_Pi (a41, 55);
|
||||
Rho_Pi (a44, 2);
|
||||
Rho_Pi (a04, 14);
|
||||
Rho_Pi (a30, 27);
|
||||
Rho_Pi (a43, 41);
|
||||
Rho_Pi (a34, 56);
|
||||
Rho_Pi (a23, 8);
|
||||
Rho_Pi (a22, 25);
|
||||
Rho_Pi (a02, 43);
|
||||
Rho_Pi (a40, 62);
|
||||
Rho_Pi (a24, 18);
|
||||
Rho_Pi (a42, 39);
|
||||
Rho_Pi (a14, 61);
|
||||
Rho_Pi (a11, 20);
|
||||
Rho_Pi (a01, 44);
|
||||
|
||||
// Chi
|
||||
|
||||
bc0 = a00; bc1 = a01; bc2 = a02; bc3 = a03; bc4 = a04;
|
||||
a00 ^= ~bc1 & bc2; a01 ^= ~bc2 & bc3; a02 ^= ~bc3 & bc4; a03 ^= ~bc4 & bc0; a04 ^= ~bc0 & bc1;
|
||||
|
||||
bc0 = a10; bc1 = a11; bc2 = a12; bc3 = a13; bc4 = a14;
|
||||
a10 ^= ~bc1 & bc2; a11 ^= ~bc2 & bc3; a12 ^= ~bc3 & bc4; a13 ^= ~bc4 & bc0; a14 ^= ~bc0 & bc1;
|
||||
|
||||
bc0 = a20; bc1 = a21; bc2 = a22; bc3 = a23; bc4 = a24;
|
||||
a20 ^= ~bc1 & bc2; a21 ^= ~bc2 & bc3; a22 ^= ~bc3 & bc4; a23 ^= ~bc4 & bc0; a24 ^= ~bc0 & bc1;
|
||||
|
||||
bc0 = a30; bc1 = a31; bc2 = a32; bc3 = a33; bc4 = a34;
|
||||
a30 ^= ~bc1 & bc2; a31 ^= ~bc2 & bc3; a32 ^= ~bc3 & bc4; a33 ^= ~bc4 & bc0; a34 ^= ~bc0 & bc1;
|
||||
|
||||
bc0 = a40; bc1 = a41; bc2 = a42; bc3 = a43; bc4 = a44;
|
||||
a40 ^= ~bc1 & bc2; a41 ^= ~bc2 & bc3; a42 ^= ~bc3 & bc4; a43 ^= ~bc4 & bc0; a44 ^= ~bc0 & bc1;
|
||||
|
||||
// Iota
|
||||
|
||||
a00 ^= keccakf_rndc[round];
|
||||
}
|
||||
|
||||
// Theta
|
||||
|
||||
u64x bc0 = a00 ^ a10 ^ a20 ^ a30 ^ a40;
|
||||
u64x bc1 = a01 ^ a11 ^ a21 ^ a31 ^ a41;
|
||||
u64x bc2 = a02 ^ a12 ^ a22 ^ a32 ^ a42;
|
||||
u64x bc3 = a03 ^ a13 ^ a23 ^ a33 ^ a43;
|
||||
u64x bc4 = a04 ^ a14 ^ a24 ^ a34 ^ a44;
|
||||
|
||||
u64x t;
|
||||
|
||||
t = bc4 ^ rotl64 (bc1, 1); a00 ^= t; a10 ^= t; a20 ^= t; a30 ^= t;
|
||||
t = bc0 ^ rotl64 (bc2, 1); a21 ^= t; a31 ^= t; a41 ^= t;
|
||||
t = bc1 ^ rotl64 (bc3, 1); a02 ^= t; a12 ^= t; a22 ^= t; a32 ^= t;
|
||||
t = bc2 ^ rotl64 (bc4, 1); a03 ^= t; a13 ^= t; a23 ^= t; a33 ^= t; a43 ^= t;
|
||||
t = bc3 ^ rotl64 (bc0, 1); a04 ^= t; a34 ^= t; a44 ^= t;
|
||||
|
||||
// Rho Pi
|
||||
|
||||
t = a01;
|
||||
|
||||
Rho_Pi (a20, 1);
|
||||
Rho_Pi (a12, 3);
|
||||
Rho_Pi (a21, 6);
|
||||
Rho_Pi (a32, 10);
|
||||
Rho_Pi (a33, 15);
|
||||
Rho_Pi (a03, 21);
|
||||
Rho_Pi (a10, 28);
|
||||
Rho_Pi (a31, 36);
|
||||
Rho_Pi (a13, 45);
|
||||
Rho_Pi (a41, 55);
|
||||
Rho_Pi (a44, 2);
|
||||
Rho_Pi (a04, 14);
|
||||
Rho_Pi (a30, 27);
|
||||
Rho_Pi (a43, 41);
|
||||
Rho_Pi (a34, 56);
|
||||
Rho_Pi (a23, 8);
|
||||
Rho_Pi (a22, 25);
|
||||
Rho_Pi (a02, 43);
|
||||
|
||||
#undef Rho_Pi
|
||||
|
||||
bc0 = a00;
|
||||
bc2 = a02;
|
||||
bc3 = a03;
|
||||
bc4 = a04;
|
||||
|
||||
a02 ^= ~bc3 & bc4;
|
||||
a03 ^= ~bc4 & bc0;
|
||||
|
||||
const u32x r0 = l32_from_64 (a03);
|
||||
const u32x r1 = h32_from_64 (a03);
|
||||
const u32x r2 = l32_from_64 (a02);
|
||||
const u32x r3 = h32_from_64 (a02);
|
||||
|
||||
COMPARE_S_SIMD (r0, r1, r2, r3);
|
||||
}
|
||||
}
|
||||
|
||||
__kernel void m17400_m04 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
|
||||
{
|
||||
/**
|
||||
* base
|
||||
*/
|
||||
|
||||
const u64 gid = get_global_id (0);
|
||||
|
||||
if (gid >= gid_max) return;
|
||||
|
||||
u32 w0[4];
|
||||
|
||||
w0[0] = pws[gid].i[ 0];
|
||||
w0[1] = pws[gid].i[ 1];
|
||||
w0[2] = pws[gid].i[ 2];
|
||||
w0[3] = pws[gid].i[ 3];
|
||||
|
||||
u32 w1[4];
|
||||
|
||||
w1[0] = 0;
|
||||
w1[1] = 0;
|
||||
w1[2] = 0;
|
||||
w1[3] = 0;
|
||||
|
||||
u32 w2[4];
|
||||
|
||||
w2[0] = 0;
|
||||
w2[1] = 0;
|
||||
w2[2] = 0;
|
||||
w2[3] = 0;
|
||||
|
||||
u32 w3[4];
|
||||
|
||||
w3[0] = 0;
|
||||
w3[1] = 0;
|
||||
w3[2] = 0;
|
||||
w3[3] = 0;
|
||||
|
||||
const u32 pw_len = pws[gid].pw_len;
|
||||
|
||||
/**
|
||||
* main
|
||||
*/
|
||||
|
||||
m17400m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV0_buf, d_scryptV1_buf, d_scryptV2_buf, d_scryptV3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset);
|
||||
}
|
||||
|
||||
__kernel void m17400_m08 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
|
||||
{
|
||||
/**
|
||||
* base
|
||||
*/
|
||||
|
||||
const u64 gid = get_global_id (0);
|
||||
|
||||
if (gid >= gid_max) return;
|
||||
|
||||
u32 w0[4];
|
||||
|
||||
w0[0] = pws[gid].i[ 0];
|
||||
w0[1] = pws[gid].i[ 1];
|
||||
w0[2] = pws[gid].i[ 2];
|
||||
w0[3] = pws[gid].i[ 3];
|
||||
|
||||
u32 w1[4];
|
||||
|
||||
w1[0] = pws[gid].i[ 4];
|
||||
w1[1] = pws[gid].i[ 5];
|
||||
w1[2] = pws[gid].i[ 6];
|
||||
w1[3] = pws[gid].i[ 7];
|
||||
|
||||
u32 w2[4];
|
||||
|
||||
w2[0] = 0;
|
||||
w2[1] = 0;
|
||||
w2[2] = 0;
|
||||
w2[3] = 0;
|
||||
|
||||
u32 w3[4];
|
||||
|
||||
w3[0] = 0;
|
||||
w3[1] = 0;
|
||||
w3[2] = 0;
|
||||
w3[3] = 0;
|
||||
|
||||
const u32 pw_len = pws[gid].pw_len;
|
||||
|
||||
/**
|
||||
* main
|
||||
*/
|
||||
|
||||
m17400m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV0_buf, d_scryptV1_buf, d_scryptV2_buf, d_scryptV3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset);
|
||||
}
|
||||
|
||||
__kernel void m17400_m16 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
|
||||
{
|
||||
/**
|
||||
* base
|
||||
*/
|
||||
|
||||
const u64 gid = get_global_id (0);
|
||||
|
||||
if (gid >= gid_max) return;
|
||||
|
||||
u32 w0[4];
|
||||
|
||||
w0[0] = pws[gid].i[ 0];
|
||||
w0[1] = pws[gid].i[ 1];
|
||||
w0[2] = pws[gid].i[ 2];
|
||||
w0[3] = pws[gid].i[ 3];
|
||||
|
||||
u32 w1[4];
|
||||
|
||||
w1[0] = pws[gid].i[ 4];
|
||||
w1[1] = pws[gid].i[ 5];
|
||||
w1[2] = pws[gid].i[ 6];
|
||||
w1[3] = pws[gid].i[ 7];
|
||||
|
||||
u32 w2[4];
|
||||
|
||||
w2[0] = pws[gid].i[ 8];
|
||||
w2[1] = pws[gid].i[ 9];
|
||||
w2[2] = pws[gid].i[10];
|
||||
w2[3] = pws[gid].i[11];
|
||||
|
||||
u32 w3[4];
|
||||
|
||||
w3[0] = pws[gid].i[12];
|
||||
w3[1] = pws[gid].i[13];
|
||||
w3[2] = 0;
|
||||
w3[3] = 0;
|
||||
|
||||
const u32 pw_len = pws[gid].pw_len;
|
||||
|
||||
/**
|
||||
* main
|
||||
*/
|
||||
|
||||
m17400m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV0_buf, d_scryptV1_buf, d_scryptV2_buf, d_scryptV3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset);
|
||||
}
|
||||
|
||||
__kernel void m17400_s04 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
|
||||
{
|
||||
/**
|
||||
* base
|
||||
*/
|
||||
|
||||
const u64 gid = get_global_id (0);
|
||||
|
||||
if (gid >= gid_max) return;
|
||||
|
||||
u32 w0[4];
|
||||
|
||||
w0[0] = pws[gid].i[ 0];
|
||||
w0[1] = pws[gid].i[ 1];
|
||||
w0[2] = pws[gid].i[ 2];
|
||||
w0[3] = pws[gid].i[ 3];
|
||||
|
||||
u32 w1[4];
|
||||
|
||||
w1[0] = 0;
|
||||
w1[1] = 0;
|
||||
w1[2] = 0;
|
||||
w1[3] = 0;
|
||||
|
||||
u32 w2[4];
|
||||
|
||||
w2[0] = 0;
|
||||
w2[1] = 0;
|
||||
w2[2] = 0;
|
||||
w2[3] = 0;
|
||||
|
||||
u32 w3[4];
|
||||
|
||||
w3[0] = 0;
|
||||
w3[1] = 0;
|
||||
w3[2] = 0;
|
||||
w3[3] = 0;
|
||||
|
||||
const u32 pw_len = pws[gid].pw_len;
|
||||
|
||||
/**
|
||||
* main
|
||||
*/
|
||||
|
||||
m17400s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV0_buf, d_scryptV1_buf, d_scryptV2_buf, d_scryptV3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset);
|
||||
}
|
||||
|
||||
__kernel void m17400_s08 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
|
||||
{
|
||||
/**
|
||||
* base
|
||||
*/
|
||||
|
||||
const u64 gid = get_global_id (0);
|
||||
|
||||
if (gid >= gid_max) return;
|
||||
|
||||
u32 w0[4];
|
||||
|
||||
w0[0] = pws[gid].i[ 0];
|
||||
w0[1] = pws[gid].i[ 1];
|
||||
w0[2] = pws[gid].i[ 2];
|
||||
w0[3] = pws[gid].i[ 3];
|
||||
|
||||
u32 w1[4];
|
||||
|
||||
w1[0] = pws[gid].i[ 4];
|
||||
w1[1] = pws[gid].i[ 5];
|
||||
w1[2] = pws[gid].i[ 6];
|
||||
w1[3] = pws[gid].i[ 7];
|
||||
|
||||
u32 w2[4];
|
||||
|
||||
w2[0] = 0;
|
||||
w2[1] = 0;
|
||||
w2[2] = 0;
|
||||
w2[3] = 0;
|
||||
|
||||
u32 w3[4];
|
||||
|
||||
w3[0] = 0;
|
||||
w3[1] = 0;
|
||||
w3[2] = 0;
|
||||
w3[3] = 0;
|
||||
|
||||
const u32 pw_len = pws[gid].pw_len;
|
||||
|
||||
/**
|
||||
* main
|
||||
*/
|
||||
|
||||
m17400s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV0_buf, d_scryptV1_buf, d_scryptV2_buf, d_scryptV3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset);
|
||||
}
|
||||
|
||||
__kernel void m17400_s16 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
|
||||
{
|
||||
/**
|
||||
* base
|
||||
*/
|
||||
|
||||
const u64 gid = get_global_id (0);
|
||||
|
||||
if (gid >= gid_max) return;
|
||||
|
||||
u32 w0[4];
|
||||
|
||||
w0[0] = pws[gid].i[ 0];
|
||||
w0[1] = pws[gid].i[ 1];
|
||||
w0[2] = pws[gid].i[ 2];
|
||||
w0[3] = pws[gid].i[ 3];
|
||||
|
||||
u32 w1[4];
|
||||
|
||||
w1[0] = pws[gid].i[ 4];
|
||||
w1[1] = pws[gid].i[ 5];
|
||||
w1[2] = pws[gid].i[ 6];
|
||||
w1[3] = pws[gid].i[ 7];
|
||||
|
||||
u32 w2[4];
|
||||
|
||||
w2[0] = pws[gid].i[ 8];
|
||||
w2[1] = pws[gid].i[ 9];
|
||||
w2[2] = pws[gid].i[10];
|
||||
w2[3] = pws[gid].i[11];
|
||||
|
||||
u32 w3[4];
|
||||
|
||||
w3[0] = pws[gid].i[12];
|
||||
w3[1] = pws[gid].i[13];
|
||||
w3[2] = 0;
|
||||
w3[3] = 0;
|
||||
|
||||
const u32 pw_len = pws[gid].pw_len;
|
||||
|
||||
/**
|
||||
* main
|
||||
*/
|
||||
|
||||
m17400s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV0_buf, d_scryptV1_buf, d_scryptV2_buf, d_scryptV3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset);
|
||||
}
|
479
OpenCL/m17500_a0-optimized.cl
Normal file
479
OpenCL/m17500_a0-optimized.cl
Normal file
@ -0,0 +1,479 @@
|
||||
/**
|
||||
* Author......: See docs/credits.txt
|
||||
* License.....: MIT
|
||||
*/
|
||||
|
||||
#define NEW_SIMD_CODE
|
||||
|
||||
#include "inc_vendor.cl"
|
||||
#include "inc_hash_constants.h"
|
||||
#include "inc_hash_functions.cl"
|
||||
#include "inc_types.cl"
|
||||
#include "inc_common.cl"
|
||||
#include "inc_rp_optimized.h"
|
||||
#include "inc_rp_optimized.cl"
|
||||
#include "inc_simd.cl"
|
||||
|
||||
__constant u64a keccakf_rndc[24] =
|
||||
{
|
||||
0x0000000000000001, 0x0000000000008082, 0x800000000000808a,
|
||||
0x8000000080008000, 0x000000000000808b, 0x0000000080000001,
|
||||
0x8000000080008081, 0x8000000000008009, 0x000000000000008a,
|
||||
0x0000000000000088, 0x0000000080008009, 0x000000008000000a,
|
||||
0x000000008000808b, 0x800000000000008b, 0x8000000000008089,
|
||||
0x8000000000008003, 0x8000000000008002, 0x8000000000000080,
|
||||
0x000000000000800a, 0x800000008000000a, 0x8000000080008081,
|
||||
0x8000000000008080, 0x0000000080000001, 0x8000000080008008
|
||||
};
|
||||
|
||||
#ifndef KECCAK_ROUNDS
|
||||
#define KECCAK_ROUNDS 24
|
||||
#endif
|
||||
|
||||
__kernel void m17500_m04 (__global pw_t *pws, __constant const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
|
||||
{
|
||||
/**
|
||||
* modifier
|
||||
*/
|
||||
|
||||
const u64 lid = get_local_id (0);
|
||||
|
||||
/**
|
||||
* base
|
||||
*/
|
||||
|
||||
const u64 gid = get_global_id (0);
|
||||
|
||||
if (gid >= gid_max) return;
|
||||
|
||||
u32 pw_buf0[4];
|
||||
u32 pw_buf1[4];
|
||||
|
||||
pw_buf0[0] = pws[gid].i[0];
|
||||
pw_buf0[1] = pws[gid].i[1];
|
||||
pw_buf0[2] = pws[gid].i[2];
|
||||
pw_buf0[3] = pws[gid].i[3];
|
||||
pw_buf1[0] = pws[gid].i[4];
|
||||
pw_buf1[1] = pws[gid].i[5];
|
||||
pw_buf1[2] = pws[gid].i[6];
|
||||
pw_buf1[3] = pws[gid].i[7];
|
||||
|
||||
const u32 pw_len = pws[gid].pw_len;
|
||||
|
||||
/**
|
||||
* loop
|
||||
*/
|
||||
|
||||
for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE)
|
||||
{
|
||||
u32x w0[4] = { 0 };
|
||||
u32x w1[4] = { 0 };
|
||||
u32x w2[4] = { 0 };
|
||||
u32x w3[4] = { 0 };
|
||||
|
||||
const u32x out_len = apply_rules_vect (pw_buf0, pw_buf1, pw_len, rules_buf, il_pos, w0, w1);
|
||||
|
||||
append_0x06_2x4_VV (w0, w1, out_len);
|
||||
|
||||
/**
|
||||
* Keccak
|
||||
*/
|
||||
|
||||
u64x a00 = hl32_to_64 (w0[1], w0[0]);
|
||||
u64x a01 = hl32_to_64 (w0[3], w0[2]);
|
||||
u64x a02 = hl32_to_64 (w1[1], w1[0]);
|
||||
u64x a03 = hl32_to_64 (w1[3], w1[2]);
|
||||
u64x a04 = 0;
|
||||
u64x a10 = 0;
|
||||
u64x a11 = 0;
|
||||
u64x a12 = 0;
|
||||
u64x a13 = 0;
|
||||
u64x a14 = 0;
|
||||
u64x a20 = 0;
|
||||
u64x a21 = 0;
|
||||
u64x a22 = 0x8000000000000000;
|
||||
u64x a23 = 0;
|
||||
u64x a24 = 0;
|
||||
u64x a30 = 0;
|
||||
u64x a31 = 0;
|
||||
u64x a32 = 0;
|
||||
u64x a33 = 0;
|
||||
u64x a34 = 0;
|
||||
u64x a40 = 0;
|
||||
u64x a41 = 0;
|
||||
u64x a42 = 0;
|
||||
u64x a43 = 0;
|
||||
u64x a44 = 0;
|
||||
|
||||
#define Rho_Pi(ad,r) \
|
||||
bc0 = ad; \
|
||||
ad = rotl64 (t, r); \
|
||||
t = bc0; \
|
||||
|
||||
#ifdef _unroll
|
||||
#pragma unroll
|
||||
#endif
|
||||
for (int round = 0; round < KECCAK_ROUNDS - 1; round++)
|
||||
{
|
||||
// Theta
|
||||
|
||||
u64x bc0 = a00 ^ a10 ^ a20 ^ a30 ^ a40;
|
||||
u64x bc1 = a01 ^ a11 ^ a21 ^ a31 ^ a41;
|
||||
u64x bc2 = a02 ^ a12 ^ a22 ^ a32 ^ a42;
|
||||
u64x bc3 = a03 ^ a13 ^ a23 ^ a33 ^ a43;
|
||||
u64x bc4 = a04 ^ a14 ^ a24 ^ a34 ^ a44;
|
||||
|
||||
u64x t;
|
||||
|
||||
t = bc4 ^ rotl64 (bc1, 1); a00 ^= t; a10 ^= t; a20 ^= t; a30 ^= t; a40 ^= t;
|
||||
t = bc0 ^ rotl64 (bc2, 1); a01 ^= t; a11 ^= t; a21 ^= t; a31 ^= t; a41 ^= t;
|
||||
t = bc1 ^ rotl64 (bc3, 1); a02 ^= t; a12 ^= t; a22 ^= t; a32 ^= t; a42 ^= t;
|
||||
t = bc2 ^ rotl64 (bc4, 1); a03 ^= t; a13 ^= t; a23 ^= t; a33 ^= t; a43 ^= t;
|
||||
t = bc3 ^ rotl64 (bc0, 1); a04 ^= t; a14 ^= t; a24 ^= t; a34 ^= t; a44 ^= t;
|
||||
|
||||
// Rho Pi
|
||||
|
||||
t = a01;
|
||||
|
||||
Rho_Pi (a20, 1);
|
||||
Rho_Pi (a12, 3);
|
||||
Rho_Pi (a21, 6);
|
||||
Rho_Pi (a32, 10);
|
||||
Rho_Pi (a33, 15);
|
||||
Rho_Pi (a03, 21);
|
||||
Rho_Pi (a10, 28);
|
||||
Rho_Pi (a31, 36);
|
||||
Rho_Pi (a13, 45);
|
||||
Rho_Pi (a41, 55);
|
||||
Rho_Pi (a44, 2);
|
||||
Rho_Pi (a04, 14);
|
||||
Rho_Pi (a30, 27);
|
||||
Rho_Pi (a43, 41);
|
||||
Rho_Pi (a34, 56);
|
||||
Rho_Pi (a23, 8);
|
||||
Rho_Pi (a22, 25);
|
||||
Rho_Pi (a02, 43);
|
||||
Rho_Pi (a40, 62);
|
||||
Rho_Pi (a24, 18);
|
||||
Rho_Pi (a42, 39);
|
||||
Rho_Pi (a14, 61);
|
||||
Rho_Pi (a11, 20);
|
||||
Rho_Pi (a01, 44);
|
||||
|
||||
// Chi
|
||||
|
||||
bc0 = a00; bc1 = a01; bc2 = a02; bc3 = a03; bc4 = a04;
|
||||
a00 ^= ~bc1 & bc2; a01 ^= ~bc2 & bc3; a02 ^= ~bc3 & bc4; a03 ^= ~bc4 & bc0; a04 ^= ~bc0 & bc1;
|
||||
|
||||
bc0 = a10; bc1 = a11; bc2 = a12; bc3 = a13; bc4 = a14;
|
||||
a10 ^= ~bc1 & bc2; a11 ^= ~bc2 & bc3; a12 ^= ~bc3 & bc4; a13 ^= ~bc4 & bc0; a14 ^= ~bc0 & bc1;
|
||||
|
||||
bc0 = a20; bc1 = a21; bc2 = a22; bc3 = a23; bc4 = a24;
|
||||
a20 ^= ~bc1 & bc2; a21 ^= ~bc2 & bc3; a22 ^= ~bc3 & bc4; a23 ^= ~bc4 & bc0; a24 ^= ~bc0 & bc1;
|
||||
|
||||
bc0 = a30; bc1 = a31; bc2 = a32; bc3 = a33; bc4 = a34;
|
||||
a30 ^= ~bc1 & bc2; a31 ^= ~bc2 & bc3; a32 ^= ~bc3 & bc4; a33 ^= ~bc4 & bc0; a34 ^= ~bc0 & bc1;
|
||||
|
||||
bc0 = a40; bc1 = a41; bc2 = a42; bc3 = a43; bc4 = a44;
|
||||
a40 ^= ~bc1 & bc2; a41 ^= ~bc2 & bc3; a42 ^= ~bc3 & bc4; a43 ^= ~bc4 & bc0; a44 ^= ~bc0 & bc1;
|
||||
|
||||
// Iota
|
||||
|
||||
a00 ^= keccakf_rndc[round];
|
||||
}
|
||||
|
||||
// Theta
|
||||
|
||||
u64x bc0 = a00 ^ a10 ^ a20 ^ a30 ^ a40;
|
||||
u64x bc1 = a01 ^ a11 ^ a21 ^ a31 ^ a41;
|
||||
u64x bc2 = a02 ^ a12 ^ a22 ^ a32 ^ a42;
|
||||
u64x bc3 = a03 ^ a13 ^ a23 ^ a33 ^ a43;
|
||||
u64x bc4 = a04 ^ a14 ^ a24 ^ a34 ^ a44;
|
||||
|
||||
u64x t;
|
||||
|
||||
t = bc4 ^ rotl64 (bc1, 1); a00 ^= t; a10 ^= t; a20 ^= t; a30 ^= t;
|
||||
t = bc0 ^ rotl64 (bc2, 1); a21 ^= t; a31 ^= t; a41 ^= t;
|
||||
t = bc1 ^ rotl64 (bc3, 1); a02 ^= t; a12 ^= t; a22 ^= t; a32 ^= t;
|
||||
t = bc2 ^ rotl64 (bc4, 1); a03 ^= t; a13 ^= t; a23 ^= t; a33 ^= t; a43 ^= t;
|
||||
t = bc3 ^ rotl64 (bc0, 1); a04 ^= t; a34 ^= t; a44 ^= t;
|
||||
|
||||
// Rho Pi
|
||||
|
||||
t = a01;
|
||||
|
||||
Rho_Pi (a20, 1);
|
||||
Rho_Pi (a12, 3);
|
||||
Rho_Pi (a21, 6);
|
||||
Rho_Pi (a32, 10);
|
||||
Rho_Pi (a33, 15);
|
||||
Rho_Pi (a03, 21);
|
||||
Rho_Pi (a10, 28);
|
||||
Rho_Pi (a31, 36);
|
||||
Rho_Pi (a13, 45);
|
||||
Rho_Pi (a41, 55);
|
||||
Rho_Pi (a44, 2);
|
||||
Rho_Pi (a04, 14);
|
||||
Rho_Pi (a30, 27);
|
||||
Rho_Pi (a43, 41);
|
||||
Rho_Pi (a34, 56);
|
||||
Rho_Pi (a23, 8);
|
||||
Rho_Pi (a22, 25);
|
||||
Rho_Pi (a02, 43);
|
||||
|
||||
#undef Rho_Pi
|
||||
|
||||
bc0 = a00;
|
||||
bc2 = a02;
|
||||
bc3 = a03;
|
||||
bc4 = a04;
|
||||
|
||||
a02 ^= ~bc3 & bc4;
|
||||
a03 ^= ~bc4 & bc0;
|
||||
|
||||
const u32x r0 = l32_from_64 (a03);
|
||||
const u32x r1 = h32_from_64 (a03);
|
||||
const u32x r2 = l32_from_64 (a02);
|
||||
const u32x r3 = h32_from_64 (a02);
|
||||
|
||||
COMPARE_M_SIMD (r0, r1, r2, r3);
|
||||
}
|
||||
}
|
||||
|
||||
__kernel void m17500_m08 (__global pw_t *pws, __constant const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
|
||||
{
|
||||
}
|
||||
|
||||
__kernel void m17500_m16 (__global pw_t *pws, __constant const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
|
||||
{
|
||||
}
|
||||
|
||||
__kernel void m17500_s04 (__global pw_t *pws, __constant const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
|
||||
{
|
||||
/**
|
||||
* modifier
|
||||
*/
|
||||
|
||||
const u64 lid = get_local_id (0);
|
||||
|
||||
/**
|
||||
* base
|
||||
*/
|
||||
|
||||
const u64 gid = get_global_id (0);
|
||||
|
||||
if (gid >= gid_max) return;
|
||||
|
||||
u32 pw_buf0[4];
|
||||
u32 pw_buf1[4];
|
||||
|
||||
pw_buf0[0] = pws[gid].i[0];
|
||||
pw_buf0[1] = pws[gid].i[1];
|
||||
pw_buf0[2] = pws[gid].i[2];
|
||||
pw_buf0[3] = pws[gid].i[3];
|
||||
pw_buf1[0] = pws[gid].i[4];
|
||||
pw_buf1[1] = pws[gid].i[5];
|
||||
pw_buf1[2] = pws[gid].i[6];
|
||||
pw_buf1[3] = pws[gid].i[7];
|
||||
|
||||
const u32 pw_len = pws[gid].pw_len;
|
||||
|
||||
/**
|
||||
* digest
|
||||
*/
|
||||
|
||||
const u32 search[4] =
|
||||
{
|
||||
digests_buf[digests_offset].digest_buf[DGST_R0],
|
||||
digests_buf[digests_offset].digest_buf[DGST_R1],
|
||||
digests_buf[digests_offset].digest_buf[DGST_R2],
|
||||
digests_buf[digests_offset].digest_buf[DGST_R3]
|
||||
};
|
||||
|
||||
/**
|
||||
* loop
|
||||
*/
|
||||
|
||||
for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE)
|
||||
{
|
||||
u32x w0[4] = { 0 };
|
||||
u32x w1[4] = { 0 };
|
||||
u32x w2[4] = { 0 };
|
||||
u32x w3[4] = { 0 };
|
||||
|
||||
const u32x out_len = apply_rules_vect (pw_buf0, pw_buf1, pw_len, rules_buf, il_pos, w0, w1);
|
||||
|
||||
append_0x06_2x4_VV (w0, w1, out_len);
|
||||
|
||||
/**
|
||||
* Keccak
|
||||
*/
|
||||
|
||||
u64x a00 = hl32_to_64 (w0[1], w0[0]);
|
||||
u64x a01 = hl32_to_64 (w0[3], w0[2]);
|
||||
u64x a02 = hl32_to_64 (w1[1], w1[0]);
|
||||
u64x a03 = hl32_to_64 (w1[3], w1[2]);
|
||||
u64x a04 = 0;
|
||||
u64x a10 = 0;
|
||||
u64x a11 = 0;
|
||||
u64x a12 = 0;
|
||||
u64x a13 = 0;
|
||||
u64x a14 = 0;
|
||||
u64x a20 = 0;
|
||||
u64x a21 = 0;
|
||||
u64x a22 = 0x8000000000000000;
|
||||
u64x a23 = 0;
|
||||
u64x a24 = 0;
|
||||
u64x a30 = 0;
|
||||
u64x a31 = 0;
|
||||
u64x a32 = 0;
|
||||
u64x a33 = 0;
|
||||
u64x a34 = 0;
|
||||
u64x a40 = 0;
|
||||
u64x a41 = 0;
|
||||
u64x a42 = 0;
|
||||
u64x a43 = 0;
|
||||
u64x a44 = 0;
|
||||
|
||||
#define Rho_Pi(ad,r) \
|
||||
bc0 = ad; \
|
||||
ad = rotl64 (t, r); \
|
||||
t = bc0; \
|
||||
|
||||
#ifdef _unroll
|
||||
#pragma unroll
|
||||
#endif
|
||||
for (int round = 0; round < KECCAK_ROUNDS - 1; round++)
|
||||
{
|
||||
// Theta
|
||||
|
||||
u64x bc0 = a00 ^ a10 ^ a20 ^ a30 ^ a40;
|
||||
u64x bc1 = a01 ^ a11 ^ a21 ^ a31 ^ a41;
|
||||
u64x bc2 = a02 ^ a12 ^ a22 ^ a32 ^ a42;
|
||||
u64x bc3 = a03 ^ a13 ^ a23 ^ a33 ^ a43;
|
||||
u64x bc4 = a04 ^ a14 ^ a24 ^ a34 ^ a44;
|
||||
|
||||
u64x t;
|
||||
|
||||
t = bc4 ^ rotl64 (bc1, 1); a00 ^= t; a10 ^= t; a20 ^= t; a30 ^= t; a40 ^= t;
|
||||
t = bc0 ^ rotl64 (bc2, 1); a01 ^= t; a11 ^= t; a21 ^= t; a31 ^= t; a41 ^= t;
|
||||
t = bc1 ^ rotl64 (bc3, 1); a02 ^= t; a12 ^= t; a22 ^= t; a32 ^= t; a42 ^= t;
|
||||
t = bc2 ^ rotl64 (bc4, 1); a03 ^= t; a13 ^= t; a23 ^= t; a33 ^= t; a43 ^= t;
|
||||
t = bc3 ^ rotl64 (bc0, 1); a04 ^= t; a14 ^= t; a24 ^= t; a34 ^= t; a44 ^= t;
|
||||
|
||||
// Rho Pi
|
||||
|
||||
t = a01;
|
||||
|
||||
Rho_Pi (a20, 1);
|
||||
Rho_Pi (a12, 3);
|
||||
Rho_Pi (a21, 6);
|
||||
Rho_Pi (a32, 10);
|
||||
Rho_Pi (a33, 15);
|
||||
Rho_Pi (a03, 21);
|
||||
Rho_Pi (a10, 28);
|
||||
Rho_Pi (a31, 36);
|
||||
Rho_Pi (a13, 45);
|
||||
Rho_Pi (a41, 55);
|
||||
Rho_Pi (a44, 2);
|
||||
Rho_Pi (a04, 14);
|
||||
Rho_Pi (a30, 27);
|
||||
Rho_Pi (a43, 41);
|
||||
Rho_Pi (a34, 56);
|
||||
Rho_Pi (a23, 8);
|
||||
Rho_Pi (a22, 25);
|
||||
Rho_Pi (a02, 43);
|
||||
Rho_Pi (a40, 62);
|
||||
Rho_Pi (a24, 18);
|
||||
Rho_Pi (a42, 39);
|
||||
Rho_Pi (a14, 61);
|
||||
Rho_Pi (a11, 20);
|
||||
Rho_Pi (a01, 44);
|
||||
|
||||
// Chi
|
||||
|
||||
bc0 = a00; bc1 = a01; bc2 = a02; bc3 = a03; bc4 = a04;
|
||||
a00 ^= ~bc1 & bc2; a01 ^= ~bc2 & bc3; a02 ^= ~bc3 & bc4; a03 ^= ~bc4 & bc0; a04 ^= ~bc0 & bc1;
|
||||
|
||||
bc0 = a10; bc1 = a11; bc2 = a12; bc3 = a13; bc4 = a14;
|
||||
a10 ^= ~bc1 & bc2; a11 ^= ~bc2 & bc3; a12 ^= ~bc3 & bc4; a13 ^= ~bc4 & bc0; a14 ^= ~bc0 & bc1;
|
||||
|
||||
bc0 = a20; bc1 = a21; bc2 = a22; bc3 = a23; bc4 = a24;
|
||||
a20 ^= ~bc1 & bc2; a21 ^= ~bc2 & bc3; a22 ^= ~bc3 & bc4; a23 ^= ~bc4 & bc0; a24 ^= ~bc0 & bc1;
|
||||
|
||||
bc0 = a30; bc1 = a31; bc2 = a32; bc3 = a33; bc4 = a34;
|
||||
a30 ^= ~bc1 & bc2; a31 ^= ~bc2 & bc3; a32 ^= ~bc3 & bc4; a33 ^= ~bc4 & bc0; a34 ^= ~bc0 & bc1;
|
||||
|
||||
bc0 = a40; bc1 = a41; bc2 = a42; bc3 = a43; bc4 = a44;
|
||||
a40 ^= ~bc1 & bc2; a41 ^= ~bc2 & bc3; a42 ^= ~bc3 & bc4; a43 ^= ~bc4 & bc0; a44 ^= ~bc0 & bc1;
|
||||
|
||||
// Iota
|
||||
|
||||
a00 ^= keccakf_rndc[round];
|
||||
}
|
||||
|
||||
// Theta
|
||||
|
||||
u64x bc0 = a00 ^ a10 ^ a20 ^ a30 ^ a40;
|
||||
u64x bc1 = a01 ^ a11 ^ a21 ^ a31 ^ a41;
|
||||
u64x bc2 = a02 ^ a12 ^ a22 ^ a32 ^ a42;
|
||||
u64x bc3 = a03 ^ a13 ^ a23 ^ a33 ^ a43;
|
||||
u64x bc4 = a04 ^ a14 ^ a24 ^ a34 ^ a44;
|
||||
|
||||
u64x t;
|
||||
|
||||
t = bc4 ^ rotl64 (bc1, 1); a00 ^= t; a10 ^= t; a20 ^= t; a30 ^= t;
|
||||
t = bc0 ^ rotl64 (bc2, 1); a21 ^= t; a31 ^= t; a41 ^= t;
|
||||
t = bc1 ^ rotl64 (bc3, 1); a02 ^= t; a12 ^= t; a22 ^= t; a32 ^= t;
|
||||
t = bc2 ^ rotl64 (bc4, 1); a03 ^= t; a13 ^= t; a23 ^= t; a33 ^= t; a43 ^= t;
|
||||
t = bc3 ^ rotl64 (bc0, 1); a04 ^= t; a34 ^= t; a44 ^= t;
|
||||
|
||||
// Rho Pi
|
||||
|
||||
t = a01;
|
||||
|
||||
Rho_Pi (a20, 1);
|
||||
Rho_Pi (a12, 3);
|
||||
Rho_Pi (a21, 6);
|
||||
Rho_Pi (a32, 10);
|
||||
Rho_Pi (a33, 15);
|
||||
Rho_Pi (a03, 21);
|
||||
Rho_Pi (a10, 28);
|
||||
Rho_Pi (a31, 36);
|
||||
Rho_Pi (a13, 45);
|
||||
Rho_Pi (a41, 55);
|
||||
Rho_Pi (a44, 2);
|
||||
Rho_Pi (a04, 14);
|
||||
Rho_Pi (a30, 27);
|
||||
Rho_Pi (a43, 41);
|
||||
Rho_Pi (a34, 56);
|
||||
Rho_Pi (a23, 8);
|
||||
Rho_Pi (a22, 25);
|
||||
Rho_Pi (a02, 43);
|
||||
|
||||
#undef Rho_Pi
|
||||
|
||||
bc0 = a00;
|
||||
bc2 = a02;
|
||||
bc3 = a03;
|
||||
bc4 = a04;
|
||||
|
||||
a02 ^= ~bc3 & bc4;
|
||||
a03 ^= ~bc4 & bc0;
|
||||
|
||||
const u32x r0 = l32_from_64 (a03);
|
||||
const u32x r1 = h32_from_64 (a03);
|
||||
const u32x r2 = l32_from_64 (a02);
|
||||
const u32x r3 = h32_from_64 (a02);
|
||||
|
||||
COMPARE_S_SIMD (r0, r1, r2, r3);
|
||||
}
|
||||
}
|
||||
|
||||
__kernel void m17500_s08 (__global pw_t *pws, __constant const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
|
||||
{
|
||||
}
|
||||
|
||||
__kernel void m17500_s16 (__global pw_t *pws, __constant const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
|
||||
{
|
||||
}
|
594
OpenCL/m17500_a1-optimized.cl
Normal file
594
OpenCL/m17500_a1-optimized.cl
Normal file
@ -0,0 +1,594 @@
|
||||
/**
|
||||
* Author......: See docs/credits.txt
|
||||
* License.....: MIT
|
||||
*/
|
||||
|
||||
#define NEW_SIMD_CODE
|
||||
|
||||
#include "inc_vendor.cl"
|
||||
#include "inc_hash_constants.h"
|
||||
#include "inc_hash_functions.cl"
|
||||
#include "inc_types.cl"
|
||||
#include "inc_common.cl"
|
||||
#include "inc_simd.cl"
|
||||
|
||||
__constant u64a keccakf_rndc[24] =
|
||||
{
|
||||
0x0000000000000001, 0x0000000000008082, 0x800000000000808a,
|
||||
0x8000000080008000, 0x000000000000808b, 0x0000000080000001,
|
||||
0x8000000080008081, 0x8000000000008009, 0x000000000000008a,
|
||||
0x0000000000000088, 0x0000000080008009, 0x000000008000000a,
|
||||
0x000000008000808b, 0x800000000000008b, 0x8000000000008089,
|
||||
0x8000000000008003, 0x8000000000008002, 0x8000000000000080,
|
||||
0x000000000000800a, 0x800000008000000a, 0x8000000080008081,
|
||||
0x8000000000008080, 0x0000000080000001, 0x8000000080008008
|
||||
};
|
||||
|
||||
#ifndef KECCAK_ROUNDS
|
||||
#define KECCAK_ROUNDS 24
|
||||
#endif
|
||||
|
||||
__kernel void m17500_m04 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
|
||||
{
|
||||
/**
|
||||
* modifier
|
||||
*/
|
||||
|
||||
const u64 lid = get_local_id (0);
|
||||
|
||||
/**
|
||||
* base
|
||||
*/
|
||||
|
||||
const u64 gid = get_global_id (0);
|
||||
|
||||
if (gid >= gid_max) return;
|
||||
|
||||
u32 pw_buf0[4];
|
||||
u32 pw_buf1[4];
|
||||
|
||||
pw_buf0[0] = pws[gid].i[0];
|
||||
pw_buf0[1] = pws[gid].i[1];
|
||||
pw_buf0[2] = pws[gid].i[2];
|
||||
pw_buf0[3] = pws[gid].i[3];
|
||||
pw_buf1[0] = pws[gid].i[4];
|
||||
pw_buf1[1] = pws[gid].i[5];
|
||||
pw_buf1[2] = pws[gid].i[6];
|
||||
pw_buf1[3] = pws[gid].i[7];
|
||||
|
||||
const u32 pw_l_len = pws[gid].pw_len;
|
||||
|
||||
/**
|
||||
* loop
|
||||
*/
|
||||
|
||||
for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE)
|
||||
{
|
||||
const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos);
|
||||
|
||||
const u32x pw_len = pw_l_len + pw_r_len;
|
||||
|
||||
/**
|
||||
* concat password candidate
|
||||
*/
|
||||
|
||||
u32x wordl0[4] = { 0 };
|
||||
u32x wordl1[4] = { 0 };
|
||||
u32x wordl2[4] = { 0 };
|
||||
u32x wordl3[4] = { 0 };
|
||||
|
||||
wordl0[0] = pw_buf0[0];
|
||||
wordl0[1] = pw_buf0[1];
|
||||
wordl0[2] = pw_buf0[2];
|
||||
wordl0[3] = pw_buf0[3];
|
||||
wordl1[0] = pw_buf1[0];
|
||||
wordl1[1] = pw_buf1[1];
|
||||
wordl1[2] = pw_buf1[2];
|
||||
wordl1[3] = pw_buf1[3];
|
||||
|
||||
u32x wordr0[4] = { 0 };
|
||||
u32x wordr1[4] = { 0 };
|
||||
u32x wordr2[4] = { 0 };
|
||||
u32x wordr3[4] = { 0 };
|
||||
|
||||
wordr0[0] = ix_create_combt (combs_buf, il_pos, 0);
|
||||
wordr0[1] = ix_create_combt (combs_buf, il_pos, 1);
|
||||
wordr0[2] = ix_create_combt (combs_buf, il_pos, 2);
|
||||
wordr0[3] = ix_create_combt (combs_buf, il_pos, 3);
|
||||
wordr1[0] = ix_create_combt (combs_buf, il_pos, 4);
|
||||
wordr1[1] = ix_create_combt (combs_buf, il_pos, 5);
|
||||
wordr1[2] = ix_create_combt (combs_buf, il_pos, 6);
|
||||
wordr1[3] = ix_create_combt (combs_buf, il_pos, 7);
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
|
||||
{
|
||||
switch_buffer_by_offset_le_VV (wordr0, wordr1, wordr2, wordr3, pw_l_len);
|
||||
}
|
||||
else
|
||||
{
|
||||
switch_buffer_by_offset_le_VV (wordl0, wordl1, wordl2, wordl3, pw_r_len);
|
||||
}
|
||||
|
||||
u32x w0[4];
|
||||
u32x w1[4];
|
||||
u32x w2[4];
|
||||
u32x w3[4];
|
||||
|
||||
w0[0] = wordl0[0] | wordr0[0];
|
||||
w0[1] = wordl0[1] | wordr0[1];
|
||||
w0[2] = wordl0[2] | wordr0[2];
|
||||
w0[3] = wordl0[3] | wordr0[3];
|
||||
w1[0] = wordl1[0] | wordr1[0];
|
||||
w1[1] = wordl1[1] | wordr1[1];
|
||||
w1[2] = wordl1[2] | wordr1[2];
|
||||
w1[3] = wordl1[3] | wordr1[3];
|
||||
w2[0] = wordl2[0] | wordr2[0];
|
||||
w2[1] = wordl2[1] | wordr2[1];
|
||||
w2[2] = wordl2[2] | wordr2[2];
|
||||
w2[3] = wordl2[3] | wordr2[3];
|
||||
w3[0] = wordl3[0] | wordr3[0];
|
||||
w3[1] = wordl3[1] | wordr3[1];
|
||||
w3[2] = wordl3[2] | wordr3[2];
|
||||
w3[3] = wordl3[3] | wordr3[3];
|
||||
|
||||
/**
|
||||
* Keccak
|
||||
*/
|
||||
|
||||
u64x a00 = hl32_to_64 (w0[1], w0[0]);
|
||||
u64x a01 = hl32_to_64 (w0[3], w0[2]);
|
||||
u64x a02 = hl32_to_64 (w1[1], w1[0]);
|
||||
u64x a03 = hl32_to_64 (w1[3], w1[2]);
|
||||
u64x a04 = hl32_to_64 (w2[1], w2[0]);
|
||||
u64x a10 = hl32_to_64 (w2[3], w2[2]);
|
||||
u64x a11 = hl32_to_64 (w3[1], w3[0]);
|
||||
u64x a12 = hl32_to_64 (w3[3], w3[2]);
|
||||
u64x a13 = 0;
|
||||
u64x a14 = 0;
|
||||
u64x a20 = 0;
|
||||
u64x a21 = 0;
|
||||
u64x a22 = 0x8000000000000000;
|
||||
u64x a23 = 0;
|
||||
u64x a24 = 0;
|
||||
u64x a30 = 0;
|
||||
u64x a31 = 0;
|
||||
u64x a32 = 0;
|
||||
u64x a33 = 0;
|
||||
u64x a34 = 0;
|
||||
u64x a40 = 0;
|
||||
u64x a41 = 0;
|
||||
u64x a42 = 0;
|
||||
u64x a43 = 0;
|
||||
u64x a44 = 0;
|
||||
|
||||
#define Rho_Pi(ad,r) \
|
||||
bc0 = ad; \
|
||||
ad = rotl64 (t, r); \
|
||||
t = bc0; \
|
||||
|
||||
#ifdef _unroll
|
||||
#pragma unroll
|
||||
#endif
|
||||
for (int round = 0; round < KECCAK_ROUNDS - 1; round++)
|
||||
{
|
||||
// Theta
|
||||
|
||||
u64x bc0 = a00 ^ a10 ^ a20 ^ a30 ^ a40;
|
||||
u64x bc1 = a01 ^ a11 ^ a21 ^ a31 ^ a41;
|
||||
u64x bc2 = a02 ^ a12 ^ a22 ^ a32 ^ a42;
|
||||
u64x bc3 = a03 ^ a13 ^ a23 ^ a33 ^ a43;
|
||||
u64x bc4 = a04 ^ a14 ^ a24 ^ a34 ^ a44;
|
||||
|
||||
u64x t;
|
||||
|
||||
t = bc4 ^ rotl64 (bc1, 1); a00 ^= t; a10 ^= t; a20 ^= t; a30 ^= t; a40 ^= t;
|
||||
t = bc0 ^ rotl64 (bc2, 1); a01 ^= t; a11 ^= t; a21 ^= t; a31 ^= t; a41 ^= t;
|
||||
t = bc1 ^ rotl64 (bc3, 1); a02 ^= t; a12 ^= t; a22 ^= t; a32 ^= t; a42 ^= t;
|
||||
t = bc2 ^ rotl64 (bc4, 1); a03 ^= t; a13 ^= t; a23 ^= t; a33 ^= t; a43 ^= t;
|
||||
t = bc3 ^ rotl64 (bc0, 1); a04 ^= t; a14 ^= t; a24 ^= t; a34 ^= t; a44 ^= t;
|
||||
|
||||
// Rho Pi
|
||||
|
||||
t = a01;
|
||||
|
||||
Rho_Pi (a20, 1);
|
||||
Rho_Pi (a12, 3);
|
||||
Rho_Pi (a21, 6);
|
||||
Rho_Pi (a32, 10);
|
||||
Rho_Pi (a33, 15);
|
||||
Rho_Pi (a03, 21);
|
||||
Rho_Pi (a10, 28);
|
||||
Rho_Pi (a31, 36);
|
||||
Rho_Pi (a13, 45);
|
||||
Rho_Pi (a41, 55);
|
||||
Rho_Pi (a44, 2);
|
||||
Rho_Pi (a04, 14);
|
||||
Rho_Pi (a30, 27);
|
||||
Rho_Pi (a43, 41);
|
||||
Rho_Pi (a34, 56);
|
||||
Rho_Pi (a23, 8);
|
||||
Rho_Pi (a22, 25);
|
||||
Rho_Pi (a02, 43);
|
||||
Rho_Pi (a40, 62);
|
||||
Rho_Pi (a24, 18);
|
||||
Rho_Pi (a42, 39);
|
||||
Rho_Pi (a14, 61);
|
||||
Rho_Pi (a11, 20);
|
||||
Rho_Pi (a01, 44);
|
||||
|
||||
// Chi
|
||||
|
||||
bc0 = a00; bc1 = a01; bc2 = a02; bc3 = a03; bc4 = a04;
|
||||
a00 ^= ~bc1 & bc2; a01 ^= ~bc2 & bc3; a02 ^= ~bc3 & bc4; a03 ^= ~bc4 & bc0; a04 ^= ~bc0 & bc1;
|
||||
|
||||
bc0 = a10; bc1 = a11; bc2 = a12; bc3 = a13; bc4 = a14;
|
||||
a10 ^= ~bc1 & bc2; a11 ^= ~bc2 & bc3; a12 ^= ~bc3 & bc4; a13 ^= ~bc4 & bc0; a14 ^= ~bc0 & bc1;
|
||||
|
||||
bc0 = a20; bc1 = a21; bc2 = a22; bc3 = a23; bc4 = a24;
|
||||
a20 ^= ~bc1 & bc2; a21 ^= ~bc2 & bc3; a22 ^= ~bc3 & bc4; a23 ^= ~bc4 & bc0; a24 ^= ~bc0 & bc1;
|
||||
|
||||
bc0 = a30; bc1 = a31; bc2 = a32; bc3 = a33; bc4 = a34;
|
||||
a30 ^= ~bc1 & bc2; a31 ^= ~bc2 & bc3; a32 ^= ~bc3 & bc4; a33 ^= ~bc4 & bc0; a34 ^= ~bc0 & bc1;
|
||||
|
||||
bc0 = a40; bc1 = a41; bc2 = a42; bc3 = a43; bc4 = a44;
|
||||
a40 ^= ~bc1 & bc2; a41 ^= ~bc2 & bc3; a42 ^= ~bc3 & bc4; a43 ^= ~bc4 & bc0; a44 ^= ~bc0 & bc1;
|
||||
|
||||
// Iota
|
||||
|
||||
a00 ^= keccakf_rndc[round];
|
||||
}
|
||||
|
||||
// Theta
|
||||
|
||||
u64x bc0 = a00 ^ a10 ^ a20 ^ a30 ^ a40;
|
||||
u64x bc1 = a01 ^ a11 ^ a21 ^ a31 ^ a41;
|
||||
u64x bc2 = a02 ^ a12 ^ a22 ^ a32 ^ a42;
|
||||
u64x bc3 = a03 ^ a13 ^ a23 ^ a33 ^ a43;
|
||||
u64x bc4 = a04 ^ a14 ^ a24 ^ a34 ^ a44;
|
||||
|
||||
u64x t;
|
||||
|
||||
t = bc4 ^ rotl64 (bc1, 1); a00 ^= t; a10 ^= t; a20 ^= t; a30 ^= t;
|
||||
t = bc0 ^ rotl64 (bc2, 1); a21 ^= t; a31 ^= t; a41 ^= t;
|
||||
t = bc1 ^ rotl64 (bc3, 1); a02 ^= t; a12 ^= t; a22 ^= t; a32 ^= t;
|
||||
t = bc2 ^ rotl64 (bc4, 1); a03 ^= t; a13 ^= t; a23 ^= t; a33 ^= t; a43 ^= t;
|
||||
t = bc3 ^ rotl64 (bc0, 1); a04 ^= t; a34 ^= t; a44 ^= t;
|
||||
|
||||
// Rho Pi
|
||||
|
||||
t = a01;
|
||||
|
||||
Rho_Pi (a20, 1);
|
||||
Rho_Pi (a12, 3);
|
||||
Rho_Pi (a21, 6);
|
||||
Rho_Pi (a32, 10);
|
||||
Rho_Pi (a33, 15);
|
||||
Rho_Pi (a03, 21);
|
||||
Rho_Pi (a10, 28);
|
||||
Rho_Pi (a31, 36);
|
||||
Rho_Pi (a13, 45);
|
||||
Rho_Pi (a41, 55);
|
||||
Rho_Pi (a44, 2);
|
||||
Rho_Pi (a04, 14);
|
||||
Rho_Pi (a30, 27);
|
||||
Rho_Pi (a43, 41);
|
||||
Rho_Pi (a34, 56);
|
||||
Rho_Pi (a23, 8);
|
||||
Rho_Pi (a22, 25);
|
||||
Rho_Pi (a02, 43);
|
||||
|
||||
#undef Rho_Pi
|
||||
|
||||
bc0 = a00;
|
||||
bc2 = a02;
|
||||
bc3 = a03;
|
||||
bc4 = a04;
|
||||
|
||||
a02 ^= ~bc3 & bc4;
|
||||
a03 ^= ~bc4 & bc0;
|
||||
|
||||
const u32x r0 = l32_from_64 (a03);
|
||||
const u32x r1 = h32_from_64 (a03);
|
||||
const u32x r2 = l32_from_64 (a02);
|
||||
const u32x r3 = h32_from_64 (a02);
|
||||
|
||||
COMPARE_M_SIMD (r0, r1, r2, r3);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
__kernel void m17500_m08 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
|
||||
{
|
||||
}
|
||||
|
||||
__kernel void m17500_m16 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
|
||||
{
|
||||
}
|
||||
|
||||
__kernel void m17500_s04 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
|
||||
{
|
||||
/**
|
||||
* modifier
|
||||
*/
|
||||
|
||||
const u64 lid = get_local_id (0);
|
||||
|
||||
/**
|
||||
* base
|
||||
*/
|
||||
|
||||
const u64 gid = get_global_id (0);
|
||||
|
||||
if (gid >= gid_max) return;
|
||||
|
||||
u32 pw_buf0[4];
|
||||
u32 pw_buf1[4];
|
||||
|
||||
pw_buf0[0] = pws[gid].i[0];
|
||||
pw_buf0[1] = pws[gid].i[1];
|
||||
pw_buf0[2] = pws[gid].i[2];
|
||||
pw_buf0[3] = pws[gid].i[3];
|
||||
pw_buf1[0] = pws[gid].i[4];
|
||||
pw_buf1[1] = pws[gid].i[5];
|
||||
pw_buf1[2] = pws[gid].i[6];
|
||||
pw_buf1[3] = pws[gid].i[7];
|
||||
|
||||
const u32 pw_l_len = pws[gid].pw_len;
|
||||
|
||||
/**
|
||||
* digest
|
||||
*/
|
||||
|
||||
const u32 search[4] =
|
||||
{
|
||||
digests_buf[digests_offset].digest_buf[DGST_R0],
|
||||
digests_buf[digests_offset].digest_buf[DGST_R1],
|
||||
digests_buf[digests_offset].digest_buf[DGST_R2],
|
||||
digests_buf[digests_offset].digest_buf[DGST_R3]
|
||||
};
|
||||
|
||||
/**
|
||||
* loop
|
||||
*/
|
||||
|
||||
for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE)
|
||||
{
|
||||
const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos);
|
||||
|
||||
const u32x pw_len = pw_l_len + pw_r_len;
|
||||
|
||||
/**
|
||||
* concat password candidate
|
||||
*/
|
||||
|
||||
u32x wordl0[4] = { 0 };
|
||||
u32x wordl1[4] = { 0 };
|
||||
u32x wordl2[4] = { 0 };
|
||||
u32x wordl3[4] = { 0 };
|
||||
|
||||
wordl0[0] = pw_buf0[0];
|
||||
wordl0[1] = pw_buf0[1];
|
||||
wordl0[2] = pw_buf0[2];
|
||||
wordl0[3] = pw_buf0[3];
|
||||
wordl1[0] = pw_buf1[0];
|
||||
wordl1[1] = pw_buf1[1];
|
||||
wordl1[2] = pw_buf1[2];
|
||||
wordl1[3] = pw_buf1[3];
|
||||
|
||||
u32x wordr0[4] = { 0 };
|
||||
u32x wordr1[4] = { 0 };
|
||||
u32x wordr2[4] = { 0 };
|
||||
u32x wordr3[4] = { 0 };
|
||||
|
||||
wordr0[0] = ix_create_combt (combs_buf, il_pos, 0);
|
||||
wordr0[1] = ix_create_combt (combs_buf, il_pos, 1);
|
||||
wordr0[2] = ix_create_combt (combs_buf, il_pos, 2);
|
||||
wordr0[3] = ix_create_combt (combs_buf, il_pos, 3);
|
||||
wordr1[0] = ix_create_combt (combs_buf, il_pos, 4);
|
||||
wordr1[1] = ix_create_combt (combs_buf, il_pos, 5);
|
||||
wordr1[2] = ix_create_combt (combs_buf, il_pos, 6);
|
||||
wordr1[3] = ix_create_combt (combs_buf, il_pos, 7);
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
|
||||
{
|
||||
switch_buffer_by_offset_le_VV (wordr0, wordr1, wordr2, wordr3, pw_l_len);
|
||||
}
|
||||
else
|
||||
{
|
||||
switch_buffer_by_offset_le_VV (wordl0, wordl1, wordl2, wordl3, pw_r_len);
|
||||
}
|
||||
|
||||
u32x w0[4];
|
||||
u32x w1[4];
|
||||
u32x w2[4];
|
||||
u32x w3[4];
|
||||
|
||||
w0[0] = wordl0[0] | wordr0[0];
|
||||
w0[1] = wordl0[1] | wordr0[1];
|
||||
w0[2] = wordl0[2] | wordr0[2];
|
||||
w0[3] = wordl0[3] | wordr0[3];
|
||||
w1[0] = wordl1[0] | wordr1[0];
|
||||
w1[1] = wordl1[1] | wordr1[1];
|
||||
w1[2] = wordl1[2] | wordr1[2];
|
||||
w1[3] = wordl1[3] | wordr1[3];
|
||||
w2[0] = wordl2[0] | wordr2[0];
|
||||
w2[1] = wordl2[1] | wordr2[1];
|
||||
w2[2] = wordl2[2] | wordr2[2];
|
||||
w2[3] = wordl2[3] | wordr2[3];
|
||||
w3[0] = wordl3[0] | wordr3[0];
|
||||
w3[1] = wordl3[1] | wordr3[1];
|
||||
w3[2] = wordl3[2] | wordr3[2];
|
||||
w3[3] = wordl3[3] | wordr3[3];
|
||||
|
||||
/**
|
||||
* Keccak
|
||||
*/
|
||||
|
||||
u64x a00 = hl32_to_64 (w0[1], w0[0]);
|
||||
u64x a01 = hl32_to_64 (w0[3], w0[2]);
|
||||
u64x a02 = hl32_to_64 (w1[1], w1[0]);
|
||||
u64x a03 = hl32_to_64 (w1[3], w1[2]);
|
||||
u64x a04 = hl32_to_64 (w2[1], w2[0]);
|
||||
u64x a10 = hl32_to_64 (w2[3], w2[2]);
|
||||
u64x a11 = hl32_to_64 (w3[1], w3[0]);
|
||||
u64x a12 = hl32_to_64 (w3[3], w3[2]);
|
||||
u64x a13 = 0;
|
||||
u64x a14 = 0;
|
||||
u64x a20 = 0;
|
||||
u64x a21 = 0;
|
||||
u64x a22 = 0x8000000000000000;
|
||||
u64x a23 = 0;
|
||||
u64x a24 = 0;
|
||||
u64x a30 = 0;
|
||||
u64x a31 = 0;
|
||||
u64x a32 = 0;
|
||||
u64x a33 = 0;
|
||||
u64x a34 = 0;
|
||||
u64x a40 = 0;
|
||||
u64x a41 = 0;
|
||||
u64x a42 = 0;
|
||||
u64x a43 = 0;
|
||||
u64x a44 = 0;
|
||||
|
||||
#define Rho_Pi(ad,r) \
|
||||
bc0 = ad; \
|
||||
ad = rotl64 (t, r); \
|
||||
t = bc0; \
|
||||
|
||||
#ifdef _unroll
|
||||
#pragma unroll
|
||||
#endif
|
||||
for (int round = 0; round < KECCAK_ROUNDS - 1; round++)
|
||||
{
|
||||
// Theta
|
||||
|
||||
u64x bc0 = a00 ^ a10 ^ a20 ^ a30 ^ a40;
|
||||
u64x bc1 = a01 ^ a11 ^ a21 ^ a31 ^ a41;
|
||||
u64x bc2 = a02 ^ a12 ^ a22 ^ a32 ^ a42;
|
||||
u64x bc3 = a03 ^ a13 ^ a23 ^ a33 ^ a43;
|
||||
u64x bc4 = a04 ^ a14 ^ a24 ^ a34 ^ a44;
|
||||
|
||||
u64x t;
|
||||
|
||||
t = bc4 ^ rotl64 (bc1, 1); a00 ^= t; a10 ^= t; a20 ^= t; a30 ^= t; a40 ^= t;
|
||||
t = bc0 ^ rotl64 (bc2, 1); a01 ^= t; a11 ^= t; a21 ^= t; a31 ^= t; a41 ^= t;
|
||||
t = bc1 ^ rotl64 (bc3, 1); a02 ^= t; a12 ^= t; a22 ^= t; a32 ^= t; a42 ^= t;
|
||||
t = bc2 ^ rotl64 (bc4, 1); a03 ^= t; a13 ^= t; a23 ^= t; a33 ^= t; a43 ^= t;
|
||||
t = bc3 ^ rotl64 (bc0, 1); a04 ^= t; a14 ^= t; a24 ^= t; a34 ^= t; a44 ^= t;
|
||||
|
||||
// Rho Pi
|
||||
|
||||
t = a01;
|
||||
|
||||
Rho_Pi (a20, 1);
|
||||
Rho_Pi (a12, 3);
|
||||
Rho_Pi (a21, 6);
|
||||
Rho_Pi (a32, 10);
|
||||
Rho_Pi (a33, 15);
|
||||
Rho_Pi (a03, 21);
|
||||
Rho_Pi (a10, 28);
|
||||
Rho_Pi (a31, 36);
|
||||
Rho_Pi (a13, 45);
|
||||
Rho_Pi (a41, 55);
|
||||
Rho_Pi (a44, 2);
|
||||
Rho_Pi (a04, 14);
|
||||
Rho_Pi (a30, 27);
|
||||
Rho_Pi (a43, 41);
|
||||
Rho_Pi (a34, 56);
|
||||
Rho_Pi (a23, 8);
|
||||
Rho_Pi (a22, 25);
|
||||
Rho_Pi (a02, 43);
|
||||
Rho_Pi (a40, 62);
|
||||
Rho_Pi (a24, 18);
|
||||
Rho_Pi (a42, 39);
|
||||
Rho_Pi (a14, 61);
|
||||
Rho_Pi (a11, 20);
|
||||
Rho_Pi (a01, 44);
|
||||
|
||||
// Chi
|
||||
|
||||
bc0 = a00; bc1 = a01; bc2 = a02; bc3 = a03; bc4 = a04;
|
||||
a00 ^= ~bc1 & bc2; a01 ^= ~bc2 & bc3; a02 ^= ~bc3 & bc4; a03 ^= ~bc4 & bc0; a04 ^= ~bc0 & bc1;
|
||||
|
||||
bc0 = a10; bc1 = a11; bc2 = a12; bc3 = a13; bc4 = a14;
|
||||
a10 ^= ~bc1 & bc2; a11 ^= ~bc2 & bc3; a12 ^= ~bc3 & bc4; a13 ^= ~bc4 & bc0; a14 ^= ~bc0 & bc1;
|
||||
|
||||
bc0 = a20; bc1 = a21; bc2 = a22; bc3 = a23; bc4 = a24;
|
||||
a20 ^= ~bc1 & bc2; a21 ^= ~bc2 & bc3; a22 ^= ~bc3 & bc4; a23 ^= ~bc4 & bc0; a24 ^= ~bc0 & bc1;
|
||||
|
||||
bc0 = a30; bc1 = a31; bc2 = a32; bc3 = a33; bc4 = a34;
|
||||
a30 ^= ~bc1 & bc2; a31 ^= ~bc2 & bc3; a32 ^= ~bc3 & bc4; a33 ^= ~bc4 & bc0; a34 ^= ~bc0 & bc1;
|
||||
|
||||
bc0 = a40; bc1 = a41; bc2 = a42; bc3 = a43; bc4 = a44;
|
||||
a40 ^= ~bc1 & bc2; a41 ^= ~bc2 & bc3; a42 ^= ~bc3 & bc4; a43 ^= ~bc4 & bc0; a44 ^= ~bc0 & bc1;
|
||||
|
||||
// Iota
|
||||
|
||||
a00 ^= keccakf_rndc[round];
|
||||
}
|
||||
|
||||
// Theta
|
||||
|
||||
u64x bc0 = a00 ^ a10 ^ a20 ^ a30 ^ a40;
|
||||
u64x bc1 = a01 ^ a11 ^ a21 ^ a31 ^ a41;
|
||||
u64x bc2 = a02 ^ a12 ^ a22 ^ a32 ^ a42;
|
||||
u64x bc3 = a03 ^ a13 ^ a23 ^ a33 ^ a43;
|
||||
u64x bc4 = a04 ^ a14 ^ a24 ^ a34 ^ a44;
|
||||
|
||||
u64x t;
|
||||
|
||||
t = bc4 ^ rotl64 (bc1, 1); a00 ^= t; a10 ^= t; a20 ^= t; a30 ^= t;
|
||||
t = bc0 ^ rotl64 (bc2, 1); a21 ^= t; a31 ^= t; a41 ^= t;
|
||||
t = bc1 ^ rotl64 (bc3, 1); a02 ^= t; a12 ^= t; a22 ^= t; a32 ^= t;
|
||||
t = bc2 ^ rotl64 (bc4, 1); a03 ^= t; a13 ^= t; a23 ^= t; a33 ^= t; a43 ^= t;
|
||||
t = bc3 ^ rotl64 (bc0, 1); a04 ^= t; a34 ^= t; a44 ^= t;
|
||||
|
||||
// Rho Pi
|
||||
|
||||
t = a01;
|
||||
|
||||
Rho_Pi (a20, 1);
|
||||
Rho_Pi (a12, 3);
|
||||
Rho_Pi (a21, 6);
|
||||
Rho_Pi (a32, 10);
|
||||
Rho_Pi (a33, 15);
|
||||
Rho_Pi (a03, 21);
|
||||
Rho_Pi (a10, 28);
|
||||
Rho_Pi (a31, 36);
|
||||
Rho_Pi (a13, 45);
|
||||
Rho_Pi (a41, 55);
|
||||
Rho_Pi (a44, 2);
|
||||
Rho_Pi (a04, 14);
|
||||
Rho_Pi (a30, 27);
|
||||
Rho_Pi (a43, 41);
|
||||
Rho_Pi (a34, 56);
|
||||
Rho_Pi (a23, 8);
|
||||
Rho_Pi (a22, 25);
|
||||
Rho_Pi (a02, 43);
|
||||
|
||||
#undef Rho_Pi
|
||||
|
||||
bc0 = a00;
|
||||
bc2 = a02;
|
||||
bc3 = a03;
|
||||
bc4 = a04;
|
||||
|
||||
a02 ^= ~bc3 & bc4;
|
||||
a03 ^= ~bc4 & bc0;
|
||||
|
||||
const u32x r0 = l32_from_64 (a03);
|
||||
const u32x r1 = h32_from_64 (a03);
|
||||
const u32x r2 = l32_from_64 (a02);
|
||||
const u32x r3 = h32_from_64 (a02);
|
||||
|
||||
COMPARE_S_SIMD (r0, r1, r2, r3);
|
||||
}
|
||||
}
|
||||
|
||||
__kernel void m17500_s08 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
|
||||
{
|
||||
}
|
||||
|
||||
__kernel void m17500_s16 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
|
||||
{
|
||||
}
|
695
OpenCL/m17500_a3-optimized.cl
Normal file
695
OpenCL/m17500_a3-optimized.cl
Normal file
@ -0,0 +1,695 @@
|
||||
/**
|
||||
* Author......: See docs/credits.txt
|
||||
* License.....: MIT
|
||||
*/
|
||||
|
||||
#define NEW_SIMD_CODE
|
||||
|
||||
#include "inc_vendor.cl"
|
||||
#include "inc_hash_constants.h"
|
||||
#include "inc_hash_functions.cl"
|
||||
#include "inc_types.cl"
|
||||
#include "inc_common.cl"
|
||||
#include "inc_simd.cl"
|
||||
|
||||
__constant u64a keccakf_rndc[24] =
|
||||
{
|
||||
0x0000000000000001, 0x0000000000008082, 0x800000000000808a,
|
||||
0x8000000080008000, 0x000000000000808b, 0x0000000080000001,
|
||||
0x8000000080008081, 0x8000000000008009, 0x000000000000008a,
|
||||
0x0000000000000088, 0x0000000080008009, 0x000000008000000a,
|
||||
0x000000008000808b, 0x800000000000008b, 0x8000000000008089,
|
||||
0x8000000000008003, 0x8000000000008002, 0x8000000000000080,
|
||||
0x000000000000800a, 0x800000008000000a, 0x8000000080008081,
|
||||
0x8000000000008080, 0x0000000080000001, 0x8000000080008008
|
||||
};
|
||||
|
||||
#ifndef KECCAK_ROUNDS
|
||||
#define KECCAK_ROUNDS 24
|
||||
#endif
|
||||
|
||||
DECLSPEC void m17500m (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, __global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset)
|
||||
{
|
||||
/**
|
||||
* modifier
|
||||
*/
|
||||
|
||||
const u64 gid = get_global_id (0);
|
||||
const u64 lid = get_local_id (0);
|
||||
|
||||
/**
|
||||
* loop
|
||||
*/
|
||||
|
||||
u32 w0l = w0[0];
|
||||
|
||||
for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE)
|
||||
{
|
||||
const u32x w0r = ix_create_bft (bfs_buf, il_pos);
|
||||
|
||||
const u32x w0lr = w0l | w0r;
|
||||
|
||||
/**
|
||||
* Keccak
|
||||
*/
|
||||
|
||||
u64x a00 = hl32_to_64 (w0[1], w0lr);
|
||||
u64x a01 = hl32_to_64 (w0[3], w0[2]);
|
||||
u64x a02 = hl32_to_64 (w1[1], w1[0]);
|
||||
u64x a03 = hl32_to_64 (w1[3], w1[2]);
|
||||
u64x a04 = hl32_to_64 (w2[1], w2[0]);
|
||||
u64x a10 = hl32_to_64 (w2[3], w2[2]);
|
||||
u64x a11 = hl32_to_64 (w3[1], w3[0]);
|
||||
u64x a12 = hl32_to_64 (w3[3], w3[2]);
|
||||
u64x a13 = 0;
|
||||
u64x a14 = 0;
|
||||
u64x a20 = 0;
|
||||
u64x a21 = 0;
|
||||
u64x a22 = 0x8000000000000000;
|
||||
u64x a23 = 0;
|
||||
u64x a24 = 0;
|
||||
u64x a30 = 0;
|
||||
u64x a31 = 0;
|
||||
u64x a32 = 0;
|
||||
u64x a33 = 0;
|
||||
u64x a34 = 0;
|
||||
u64x a40 = 0;
|
||||
u64x a41 = 0;
|
||||
u64x a42 = 0;
|
||||
u64x a43 = 0;
|
||||
u64x a44 = 0;
|
||||
|
||||
#define Rho_Pi(ad,r) \
|
||||
bc0 = ad; \
|
||||
ad = rotl64 (t, r); \
|
||||
t = bc0; \
|
||||
|
||||
#ifdef _unroll
|
||||
#pragma unroll
|
||||
#endif
|
||||
for (int round = 0; round < KECCAK_ROUNDS - 1; round++)
|
||||
{
|
||||
// Theta
|
||||
|
||||
u64x bc0 = a00 ^ a10 ^ a20 ^ a30 ^ a40;
|
||||
u64x bc1 = a01 ^ a11 ^ a21 ^ a31 ^ a41;
|
||||
u64x bc2 = a02 ^ a12 ^ a22 ^ a32 ^ a42;
|
||||
u64x bc3 = a03 ^ a13 ^ a23 ^ a33 ^ a43;
|
||||
u64x bc4 = a04 ^ a14 ^ a24 ^ a34 ^ a44;
|
||||
|
||||
u64x t;
|
||||
|
||||
t = bc4 ^ rotl64 (bc1, 1); a00 ^= t; a10 ^= t; a20 ^= t; a30 ^= t; a40 ^= t;
|
||||
t = bc0 ^ rotl64 (bc2, 1); a01 ^= t; a11 ^= t; a21 ^= t; a31 ^= t; a41 ^= t;
|
||||
t = bc1 ^ rotl64 (bc3, 1); a02 ^= t; a12 ^= t; a22 ^= t; a32 ^= t; a42 ^= t;
|
||||
t = bc2 ^ rotl64 (bc4, 1); a03 ^= t; a13 ^= t; a23 ^= t; a33 ^= t; a43 ^= t;
|
||||
t = bc3 ^ rotl64 (bc0, 1); a04 ^= t; a14 ^= t; a24 ^= t; a34 ^= t; a44 ^= t;
|
||||
|
||||
// Rho Pi
|
||||
|
||||
t = a01;
|
||||
|
||||
Rho_Pi (a20, 1);
|
||||
Rho_Pi (a12, 3);
|
||||
Rho_Pi (a21, 6);
|
||||
Rho_Pi (a32, 10);
|
||||
Rho_Pi (a33, 15);
|
||||
Rho_Pi (a03, 21);
|
||||
Rho_Pi (a10, 28);
|
||||
Rho_Pi (a31, 36);
|
||||
Rho_Pi (a13, 45);
|
||||
Rho_Pi (a41, 55);
|
||||
Rho_Pi (a44, 2);
|
||||
Rho_Pi (a04, 14);
|
||||
Rho_Pi (a30, 27);
|
||||
Rho_Pi (a43, 41);
|
||||
Rho_Pi (a34, 56);
|
||||
Rho_Pi (a23, 8);
|
||||
Rho_Pi (a22, 25);
|
||||
Rho_Pi (a02, 43);
|
||||
Rho_Pi (a40, 62);
|
||||
Rho_Pi (a24, 18);
|
||||
Rho_Pi (a42, 39);
|
||||
Rho_Pi (a14, 61);
|
||||
Rho_Pi (a11, 20);
|
||||
Rho_Pi (a01, 44);
|
||||
|
||||
// Chi
|
||||
|
||||
bc0 = a00; bc1 = a01; bc2 = a02; bc3 = a03; bc4 = a04;
|
||||
a00 ^= ~bc1 & bc2; a01 ^= ~bc2 & bc3; a02 ^= ~bc3 & bc4; a03 ^= ~bc4 & bc0; a04 ^= ~bc0 & bc1;
|
||||
|
||||
bc0 = a10; bc1 = a11; bc2 = a12; bc3 = a13; bc4 = a14;
|
||||
a10 ^= ~bc1 & bc2; a11 ^= ~bc2 & bc3; a12 ^= ~bc3 & bc4; a13 ^= ~bc4 & bc0; a14 ^= ~bc0 & bc1;
|
||||
|
||||
bc0 = a20; bc1 = a21; bc2 = a22; bc3 = a23; bc4 = a24;
|
||||
a20 ^= ~bc1 & bc2; a21 ^= ~bc2 & bc3; a22 ^= ~bc3 & bc4; a23 ^= ~bc4 & bc0; a24 ^= ~bc0 & bc1;
|
||||
|
||||
bc0 = a30; bc1 = a31; bc2 = a32; bc3 = a33; bc4 = a34;
|
||||
a30 ^= ~bc1 & bc2; a31 ^= ~bc2 & bc3; a32 ^= ~bc3 & bc4; a33 ^= ~bc4 & bc0; a34 ^= ~bc0 & bc1;
|
||||
|
||||
bc0 = a40; bc1 = a41; bc2 = a42; bc3 = a43; bc4 = a44;
|
||||
a40 ^= ~bc1 & bc2; a41 ^= ~bc2 & bc3; a42 ^= ~bc3 & bc4; a43 ^= ~bc4 & bc0; a44 ^= ~bc0 & bc1;
|
||||
|
||||
// Iota
|
||||
|
||||
a00 ^= keccakf_rndc[round];
|
||||
}
|
||||
|
||||
// Theta
|
||||
|
||||
u64x bc0 = a00 ^ a10 ^ a20 ^ a30 ^ a40;
|
||||
u64x bc1 = a01 ^ a11 ^ a21 ^ a31 ^ a41;
|
||||
u64x bc2 = a02 ^ a12 ^ a22 ^ a32 ^ a42;
|
||||
u64x bc3 = a03 ^ a13 ^ a23 ^ a33 ^ a43;
|
||||
u64x bc4 = a04 ^ a14 ^ a24 ^ a34 ^ a44;
|
||||
|
||||
u64x t;
|
||||
|
||||
t = bc4 ^ rotl64 (bc1, 1); a00 ^= t; a10 ^= t; a20 ^= t; a30 ^= t;
|
||||
t = bc0 ^ rotl64 (bc2, 1); a21 ^= t; a31 ^= t; a41 ^= t;
|
||||
t = bc1 ^ rotl64 (bc3, 1); a02 ^= t; a12 ^= t; a22 ^= t; a32 ^= t;
|
||||
t = bc2 ^ rotl64 (bc4, 1); a03 ^= t; a13 ^= t; a23 ^= t; a33 ^= t; a43 ^= t;
|
||||
t = bc3 ^ rotl64 (bc0, 1); a04 ^= t; a34 ^= t; a44 ^= t;
|
||||
|
||||
// Rho Pi
|
||||
|
||||
t = a01;
|
||||
|
||||
Rho_Pi (a20, 1);
|
||||
Rho_Pi (a12, 3);
|
||||
Rho_Pi (a21, 6);
|
||||
Rho_Pi (a32, 10);
|
||||
Rho_Pi (a33, 15);
|
||||
Rho_Pi (a03, 21);
|
||||
Rho_Pi (a10, 28);
|
||||
Rho_Pi (a31, 36);
|
||||
Rho_Pi (a13, 45);
|
||||
Rho_Pi (a41, 55);
|
||||
Rho_Pi (a44, 2);
|
||||
Rho_Pi (a04, 14);
|
||||
Rho_Pi (a30, 27);
|
||||
Rho_Pi (a43, 41);
|
||||
Rho_Pi (a34, 56);
|
||||
Rho_Pi (a23, 8);
|
||||
Rho_Pi (a22, 25);
|
||||
Rho_Pi (a02, 43);
|
||||
|
||||
#undef Rho_Pi
|
||||
|
||||
bc0 = a00;
|
||||
bc2 = a02;
|
||||
bc3 = a03;
|
||||
bc4 = a04;
|
||||
|
||||
a02 ^= ~bc3 & bc4;
|
||||
a03 ^= ~bc4 & bc0;
|
||||
|
||||
const u32x r0 = l32_from_64 (a03);
|
||||
const u32x r1 = h32_from_64 (a03);
|
||||
const u32x r2 = l32_from_64 (a02);
|
||||
const u32x r3 = h32_from_64 (a02);
|
||||
|
||||
COMPARE_M_SIMD (r0, r1, r2, r3);
|
||||
}
|
||||
}
|
||||
|
||||
DECLSPEC void m17500s (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, __global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset)
|
||||
{
|
||||
/**
|
||||
* modifier
|
||||
*/
|
||||
|
||||
const u64 gid = get_global_id (0);
|
||||
const u64 lid = get_local_id (0);
|
||||
|
||||
/**
|
||||
* digest
|
||||
*/
|
||||
|
||||
const u32 search[4] =
|
||||
{
|
||||
digests_buf[digests_offset].digest_buf[DGST_R0],
|
||||
digests_buf[digests_offset].digest_buf[DGST_R1],
|
||||
digests_buf[digests_offset].digest_buf[DGST_R2],
|
||||
digests_buf[digests_offset].digest_buf[DGST_R3]
|
||||
};
|
||||
|
||||
/**
|
||||
* loop
|
||||
*/
|
||||
|
||||
u32 w0l = w0[0];
|
||||
|
||||
for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE)
|
||||
{
|
||||
const u32x w0r = ix_create_bft (bfs_buf, il_pos);
|
||||
|
||||
const u32x w0lr = w0l | w0r;
|
||||
|
||||
/**
|
||||
* Keccak
|
||||
*/
|
||||
|
||||
u64x a00 = hl32_to_64 (w0[1], w0lr);
|
||||
u64x a01 = hl32_to_64 (w0[3], w0[2]);
|
||||
u64x a02 = hl32_to_64 (w1[1], w1[0]);
|
||||
u64x a03 = hl32_to_64 (w1[3], w1[2]);
|
||||
u64x a04 = hl32_to_64 (w2[1], w2[0]);
|
||||
u64x a10 = hl32_to_64 (w2[3], w2[2]);
|
||||
u64x a11 = hl32_to_64 (w3[1], w3[0]);
|
||||
u64x a12 = hl32_to_64 (w3[3], w3[2]);
|
||||
u64x a13 = 0;
|
||||
u64x a14 = 0;
|
||||
u64x a20 = 0;
|
||||
u64x a21 = 0;
|
||||
u64x a22 = 0x8000000000000000;
|
||||
u64x a23 = 0;
|
||||
u64x a24 = 0;
|
||||
u64x a30 = 0;
|
||||
u64x a31 = 0;
|
||||
u64x a32 = 0;
|
||||
u64x a33 = 0;
|
||||
u64x a34 = 0;
|
||||
u64x a40 = 0;
|
||||
u64x a41 = 0;
|
||||
u64x a42 = 0;
|
||||
u64x a43 = 0;
|
||||
u64x a44 = 0;
|
||||
|
||||
#define Rho_Pi(ad,r) \
|
||||
bc0 = ad; \
|
||||
ad = rotl64 (t, r); \
|
||||
t = bc0; \
|
||||
|
||||
#ifdef _unroll
|
||||
#pragma unroll
|
||||
#endif
|
||||
for (int round = 0; round < KECCAK_ROUNDS - 1; round++)
|
||||
{
|
||||
// Theta
|
||||
|
||||
u64x bc0 = a00 ^ a10 ^ a20 ^ a30 ^ a40;
|
||||
u64x bc1 = a01 ^ a11 ^ a21 ^ a31 ^ a41;
|
||||
u64x bc2 = a02 ^ a12 ^ a22 ^ a32 ^ a42;
|
||||
u64x bc3 = a03 ^ a13 ^ a23 ^ a33 ^ a43;
|
||||
u64x bc4 = a04 ^ a14 ^ a24 ^ a34 ^ a44;
|
||||
|
||||
u64x t;
|
||||
|
||||
t = bc4 ^ rotl64 (bc1, 1); a00 ^= t; a10 ^= t; a20 ^= t; a30 ^= t; a40 ^= t;
|
||||
t = bc0 ^ rotl64 (bc2, 1); a01 ^= t; a11 ^= t; a21 ^= t; a31 ^= t; a41 ^= t;
|
||||
t = bc1 ^ rotl64 (bc3, 1); a02 ^= t; a12 ^= t; a22 ^= t; a32 ^= t; a42 ^= t;
|
||||
t = bc2 ^ rotl64 (bc4, 1); a03 ^= t; a13 ^= t; a23 ^= t; a33 ^= t; a43 ^= t;
|
||||
t = bc3 ^ rotl64 (bc0, 1); a04 ^= t; a14 ^= t; a24 ^= t; a34 ^= t; a44 ^= t;
|
||||
|
||||
// Rho Pi
|
||||
|
||||
t = a01;
|
||||
|
||||
Rho_Pi (a20, 1);
|
||||
Rho_Pi (a12, 3);
|
||||
Rho_Pi (a21, 6);
|
||||
Rho_Pi (a32, 10);
|
||||
Rho_Pi (a33, 15);
|
||||
Rho_Pi (a03, 21);
|
||||
Rho_Pi (a10, 28);
|
||||
Rho_Pi (a31, 36);
|
||||
Rho_Pi (a13, 45);
|
||||
Rho_Pi (a41, 55);
|
||||
Rho_Pi (a44, 2);
|
||||
Rho_Pi (a04, 14);
|
||||
Rho_Pi (a30, 27);
|
||||
Rho_Pi (a43, 41);
|
||||
Rho_Pi (a34, 56);
|
||||
Rho_Pi (a23, 8);
|
||||
Rho_Pi (a22, 25);
|
||||
Rho_Pi (a02, 43);
|
||||
Rho_Pi (a40, 62);
|
||||
Rho_Pi (a24, 18);
|
||||
Rho_Pi (a42, 39);
|
||||
Rho_Pi (a14, 61);
|
||||
Rho_Pi (a11, 20);
|
||||
Rho_Pi (a01, 44);
|
||||
|
||||
// Chi
|
||||
|
||||
bc0 = a00; bc1 = a01; bc2 = a02; bc3 = a03; bc4 = a04;
|
||||
a00 ^= ~bc1 & bc2; a01 ^= ~bc2 & bc3; a02 ^= ~bc3 & bc4; a03 ^= ~bc4 & bc0; a04 ^= ~bc0 & bc1;
|
||||
|
||||
bc0 = a10; bc1 = a11; bc2 = a12; bc3 = a13; bc4 = a14;
|
||||
a10 ^= ~bc1 & bc2; a11 ^= ~bc2 & bc3; a12 ^= ~bc3 & bc4; a13 ^= ~bc4 & bc0; a14 ^= ~bc0 & bc1;
|
||||
|
||||
bc0 = a20; bc1 = a21; bc2 = a22; bc3 = a23; bc4 = a24;
|
||||
a20 ^= ~bc1 & bc2; a21 ^= ~bc2 & bc3; a22 ^= ~bc3 & bc4; a23 ^= ~bc4 & bc0; a24 ^= ~bc0 & bc1;
|
||||
|
||||
bc0 = a30; bc1 = a31; bc2 = a32; bc3 = a33; bc4 = a34;
|
||||
a30 ^= ~bc1 & bc2; a31 ^= ~bc2 & bc3; a32 ^= ~bc3 & bc4; a33 ^= ~bc4 & bc0; a34 ^= ~bc0 & bc1;
|
||||
|
||||
bc0 = a40; bc1 = a41; bc2 = a42; bc3 = a43; bc4 = a44;
|
||||
a40 ^= ~bc1 & bc2; a41 ^= ~bc2 & bc3; a42 ^= ~bc3 & bc4; a43 ^= ~bc4 & bc0; a44 ^= ~bc0 & bc1;
|
||||
|
||||
// Iota
|
||||
|
||||
a00 ^= keccakf_rndc[round];
|
||||
}
|
||||
|
||||
// Theta
|
||||
|
||||
u64x bc0 = a00 ^ a10 ^ a20 ^ a30 ^ a40;
|
||||
u64x bc1 = a01 ^ a11 ^ a21 ^ a31 ^ a41;
|
||||
u64x bc2 = a02 ^ a12 ^ a22 ^ a32 ^ a42;
|
||||
u64x bc3 = a03 ^ a13 ^ a23 ^ a33 ^ a43;
|
||||
u64x bc4 = a04 ^ a14 ^ a24 ^ a34 ^ a44;
|
||||
|
||||
u64x t;
|
||||
|
||||
t = bc4 ^ rotl64 (bc1, 1); a00 ^= t; a10 ^= t; a20 ^= t; a30 ^= t;
|
||||
t = bc0 ^ rotl64 (bc2, 1); a21 ^= t; a31 ^= t; a41 ^= t;
|
||||
t = bc1 ^ rotl64 (bc3, 1); a02 ^= t; a12 ^= t; a22 ^= t; a32 ^= t;
|
||||
t = bc2 ^ rotl64 (bc4, 1); a03 ^= t; a13 ^= t; a23 ^= t; a33 ^= t; a43 ^= t;
|
||||
t = bc3 ^ rotl64 (bc0, 1); a04 ^= t; a34 ^= t; a44 ^= t;
|
||||
|
||||
// Rho Pi
|
||||
|
||||
t = a01;
|
||||
|
||||
Rho_Pi (a20, 1);
|
||||
Rho_Pi (a12, 3);
|
||||
Rho_Pi (a21, 6);
|
||||
Rho_Pi (a32, 10);
|
||||
Rho_Pi (a33, 15);
|
||||
Rho_Pi (a03, 21);
|
||||
Rho_Pi (a10, 28);
|
||||
Rho_Pi (a31, 36);
|
||||
Rho_Pi (a13, 45);
|
||||
Rho_Pi (a41, 55);
|
||||
Rho_Pi (a44, 2);
|
||||
Rho_Pi (a04, 14);
|
||||
Rho_Pi (a30, 27);
|
||||
Rho_Pi (a43, 41);
|
||||
Rho_Pi (a34, 56);
|
||||
Rho_Pi (a23, 8);
|
||||
Rho_Pi (a22, 25);
|
||||
Rho_Pi (a02, 43);
|
||||
|
||||
#undef Rho_Pi
|
||||
|
||||
bc0 = a00;
|
||||
bc2 = a02;
|
||||
bc3 = a03;
|
||||
bc4 = a04;
|
||||
|
||||
a02 ^= ~bc3 & bc4;
|
||||
a03 ^= ~bc4 & bc0;
|
||||
|
||||
const u32x r0 = l32_from_64 (a03);
|
||||
const u32x r1 = h32_from_64 (a03);
|
||||
const u32x r2 = l32_from_64 (a02);
|
||||
const u32x r3 = h32_from_64 (a02);
|
||||
|
||||
COMPARE_S_SIMD (r0, r1, r2, r3);
|
||||
}
|
||||
}
|
||||
|
||||
__kernel void m17500_m04 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
|
||||
{
|
||||
/**
|
||||
* base
|
||||
*/
|
||||
|
||||
const u64 gid = get_global_id (0);
|
||||
|
||||
if (gid >= gid_max) return;
|
||||
|
||||
u32 w0[4];
|
||||
|
||||
w0[0] = pws[gid].i[ 0];
|
||||
w0[1] = pws[gid].i[ 1];
|
||||
w0[2] = pws[gid].i[ 2];
|
||||
w0[3] = pws[gid].i[ 3];
|
||||
|
||||
u32 w1[4];
|
||||
|
||||
w1[0] = 0;
|
||||
w1[1] = 0;
|
||||
w1[2] = 0;
|
||||
w1[3] = 0;
|
||||
|
||||
u32 w2[4];
|
||||
|
||||
w2[0] = 0;
|
||||
w2[1] = 0;
|
||||
w2[2] = 0;
|
||||
w2[3] = 0;
|
||||
|
||||
u32 w3[4];
|
||||
|
||||
w3[0] = 0;
|
||||
w3[1] = 0;
|
||||
w3[2] = 0;
|
||||
w3[3] = 0;
|
||||
|
||||
const u32 pw_len = pws[gid].pw_len;
|
||||
|
||||
/**
|
||||
* main
|
||||
*/
|
||||
|
||||
m17500m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV0_buf, d_scryptV1_buf, d_scryptV2_buf, d_scryptV3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset);
|
||||
}
|
||||
|
||||
__kernel void m17500_m08 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
|
||||
{
|
||||
/**
|
||||
* base
|
||||
*/
|
||||
|
||||
const u64 gid = get_global_id (0);
|
||||
|
||||
if (gid >= gid_max) return;
|
||||
|
||||
u32 w0[4];
|
||||
|
||||
w0[0] = pws[gid].i[ 0];
|
||||
w0[1] = pws[gid].i[ 1];
|
||||
w0[2] = pws[gid].i[ 2];
|
||||
w0[3] = pws[gid].i[ 3];
|
||||
|
||||
u32 w1[4];
|
||||
|
||||
w1[0] = pws[gid].i[ 4];
|
||||
w1[1] = pws[gid].i[ 5];
|
||||
w1[2] = pws[gid].i[ 6];
|
||||
w1[3] = pws[gid].i[ 7];
|
||||
|
||||
u32 w2[4];
|
||||
|
||||
w2[0] = 0;
|
||||
w2[1] = 0;
|
||||
w2[2] = 0;
|
||||
w2[3] = 0;
|
||||
|
||||
u32 w3[4];
|
||||
|
||||
w3[0] = 0;
|
||||
w3[1] = 0;
|
||||
w3[2] = 0;
|
||||
w3[3] = 0;
|
||||
|
||||
const u32 pw_len = pws[gid].pw_len;
|
||||
|
||||
/**
|
||||
* main
|
||||
*/
|
||||
|
||||
m17500m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV0_buf, d_scryptV1_buf, d_scryptV2_buf, d_scryptV3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset);
|
||||
}
|
||||
|
||||
__kernel void m17500_m16 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
|
||||
{
|
||||
/**
|
||||
* base
|
||||
*/
|
||||
|
||||
const u64 gid = get_global_id (0);
|
||||
|
||||
if (gid >= gid_max) return;
|
||||
|
||||
u32 w0[4];
|
||||
|
||||
w0[0] = pws[gid].i[ 0];
|
||||
w0[1] = pws[gid].i[ 1];
|
||||
w0[2] = pws[gid].i[ 2];
|
||||
w0[3] = pws[gid].i[ 3];
|
||||
|
||||
u32 w1[4];
|
||||
|
||||
w1[0] = pws[gid].i[ 4];
|
||||
w1[1] = pws[gid].i[ 5];
|
||||
w1[2] = pws[gid].i[ 6];
|
||||
w1[3] = pws[gid].i[ 7];
|
||||
|
||||
u32 w2[4];
|
||||
|
||||
w2[0] = pws[gid].i[ 8];
|
||||
w2[1] = pws[gid].i[ 9];
|
||||
w2[2] = pws[gid].i[10];
|
||||
w2[3] = pws[gid].i[11];
|
||||
|
||||
u32 w3[4];
|
||||
|
||||
w3[0] = pws[gid].i[12];
|
||||
w3[1] = pws[gid].i[13];
|
||||
w3[2] = 0;
|
||||
w3[3] = 0;
|
||||
|
||||
const u32 pw_len = pws[gid].pw_len;
|
||||
|
||||
/**
|
||||
* main
|
||||
*/
|
||||
|
||||
m17500m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV0_buf, d_scryptV1_buf, d_scryptV2_buf, d_scryptV3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset);
|
||||
}
|
||||
|
||||
__kernel void m17500_s04 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
|
||||
{
|
||||
/**
|
||||
* base
|
||||
*/
|
||||
|
||||
const u64 gid = get_global_id (0);
|
||||
|
||||
if (gid >= gid_max) return;
|
||||
|
||||
u32 w0[4];
|
||||
|
||||
w0[0] = pws[gid].i[ 0];
|
||||
w0[1] = pws[gid].i[ 1];
|
||||
w0[2] = pws[gid].i[ 2];
|
||||
w0[3] = pws[gid].i[ 3];
|
||||
|
||||
u32 w1[4];
|
||||
|
||||
w1[0] = 0;
|
||||
w1[1] = 0;
|
||||
w1[2] = 0;
|
||||
w1[3] = 0;
|
||||
|
||||
u32 w2[4];
|
||||
|
||||
w2[0] = 0;
|
||||
w2[1] = 0;
|
||||
w2[2] = 0;
|
||||
w2[3] = 0;
|
||||
|
||||
u32 w3[4];
|
||||
|
||||
w3[0] = 0;
|
||||
w3[1] = 0;
|
||||
w3[2] = 0;
|
||||
w3[3] = 0;
|
||||
|
||||
const u32 pw_len = pws[gid].pw_len;
|
||||
|
||||
/**
|
||||
* main
|
||||
*/
|
||||
|
||||
m17500s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV0_buf, d_scryptV1_buf, d_scryptV2_buf, d_scryptV3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset);
|
||||
}
|
||||
|
||||
__kernel void m17500_s08 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
|
||||
{
|
||||
/**
|
||||
* base
|
||||
*/
|
||||
|
||||
const u64 gid = get_global_id (0);
|
||||
|
||||
if (gid >= gid_max) return;
|
||||
|
||||
u32 w0[4];
|
||||
|
||||
w0[0] = pws[gid].i[ 0];
|
||||
w0[1] = pws[gid].i[ 1];
|
||||
w0[2] = pws[gid].i[ 2];
|
||||
w0[3] = pws[gid].i[ 3];
|
||||
|
||||
u32 w1[4];
|
||||
|
||||
w1[0] = pws[gid].i[ 4];
|
||||
w1[1] = pws[gid].i[ 5];
|
||||
w1[2] = pws[gid].i[ 6];
|
||||
w1[3] = pws[gid].i[ 7];
|
||||
|
||||
u32 w2[4];
|
||||
|
||||
w2[0] = 0;
|
||||
w2[1] = 0;
|
||||
w2[2] = 0;
|
||||
w2[3] = 0;
|
||||
|
||||
u32 w3[4];
|
||||
|
||||
w3[0] = 0;
|
||||
w3[1] = 0;
|
||||
w3[2] = 0;
|
||||
w3[3] = 0;
|
||||
|
||||
const u32 pw_len = pws[gid].pw_len;
|
||||
|
||||
/**
|
||||
* main
|
||||
*/
|
||||
|
||||
m17500s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV0_buf, d_scryptV1_buf, d_scryptV2_buf, d_scryptV3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset);
|
||||
}
|
||||
|
||||
__kernel void m17500_s16 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
|
||||
{
|
||||
/**
|
||||
* base
|
||||
*/
|
||||
|
||||
const u64 gid = get_global_id (0);
|
||||
|
||||
if (gid >= gid_max) return;
|
||||
|
||||
u32 w0[4];
|
||||
|
||||
w0[0] = pws[gid].i[ 0];
|
||||
w0[1] = pws[gid].i[ 1];
|
||||
w0[2] = pws[gid].i[ 2];
|
||||
w0[3] = pws[gid].i[ 3];
|
||||
|
||||
u32 w1[4];
|
||||
|
||||
w1[0] = pws[gid].i[ 4];
|
||||
w1[1] = pws[gid].i[ 5];
|
||||
w1[2] = pws[gid].i[ 6];
|
||||
w1[3] = pws[gid].i[ 7];
|
||||
|
||||
u32 w2[4];
|
||||
|
||||
w2[0] = pws[gid].i[ 8];
|
||||
w2[1] = pws[gid].i[ 9];
|
||||
w2[2] = pws[gid].i[10];
|
||||
w2[3] = pws[gid].i[11];
|
||||
|
||||
u32 w3[4];
|
||||
|
||||
w3[0] = pws[gid].i[12];
|
||||
w3[1] = pws[gid].i[13];
|
||||
w3[2] = 0;
|
||||
w3[3] = 0;
|
||||
|
||||
const u32 pw_len = pws[gid].pw_len;
|
||||
|
||||
/**
|
||||
* main
|
||||
*/
|
||||
|
||||
m17500s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV0_buf, d_scryptV1_buf, d_scryptV2_buf, d_scryptV3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset);
|
||||
}
|
479
OpenCL/m17600_a0-optimized.cl
Normal file
479
OpenCL/m17600_a0-optimized.cl
Normal file
@ -0,0 +1,479 @@
|
||||
/**
|
||||
* Author......: See docs/credits.txt
|
||||
* License.....: MIT
|
||||
*/
|
||||
|
||||
#define NEW_SIMD_CODE
|
||||
|
||||
#include "inc_vendor.cl"
|
||||
#include "inc_hash_constants.h"
|
||||
#include "inc_hash_functions.cl"
|
||||
#include "inc_types.cl"
|
||||
#include "inc_common.cl"
|
||||
#include "inc_rp_optimized.h"
|
||||
#include "inc_rp_optimized.cl"
|
||||
#include "inc_simd.cl"
|
||||
|
||||
__constant u64a keccakf_rndc[24] =
|
||||
{
|
||||
0x0000000000000001, 0x0000000000008082, 0x800000000000808a,
|
||||
0x8000000080008000, 0x000000000000808b, 0x0000000080000001,
|
||||
0x8000000080008081, 0x8000000000008009, 0x000000000000008a,
|
||||
0x0000000000000088, 0x0000000080008009, 0x000000008000000a,
|
||||
0x000000008000808b, 0x800000000000008b, 0x8000000000008089,
|
||||
0x8000000000008003, 0x8000000000008002, 0x8000000000000080,
|
||||
0x000000000000800a, 0x800000008000000a, 0x8000000080008081,
|
||||
0x8000000000008080, 0x0000000080000001, 0x8000000080008008
|
||||
};
|
||||
|
||||
#ifndef KECCAK_ROUNDS
|
||||
#define KECCAK_ROUNDS 24
|
||||
#endif
|
||||
|
||||
__kernel void m17600_m04 (__global pw_t *pws, __constant const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
|
||||
{
|
||||
/**
|
||||
* modifier
|
||||
*/
|
||||
|
||||
const u64 lid = get_local_id (0);
|
||||
|
||||
/**
|
||||
* base
|
||||
*/
|
||||
|
||||
const u64 gid = get_global_id (0);
|
||||
|
||||
if (gid >= gid_max) return;
|
||||
|
||||
u32 pw_buf0[4];
|
||||
u32 pw_buf1[4];
|
||||
|
||||
pw_buf0[0] = pws[gid].i[0];
|
||||
pw_buf0[1] = pws[gid].i[1];
|
||||
pw_buf0[2] = pws[gid].i[2];
|
||||
pw_buf0[3] = pws[gid].i[3];
|
||||
pw_buf1[0] = pws[gid].i[4];
|
||||
pw_buf1[1] = pws[gid].i[5];
|
||||
pw_buf1[2] = pws[gid].i[6];
|
||||
pw_buf1[3] = pws[gid].i[7];
|
||||
|
||||
const u32 pw_len = pws[gid].pw_len;
|
||||
|
||||
/**
|
||||
* loop
|
||||
*/
|
||||
|
||||
for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE)
|
||||
{
|
||||
u32x w0[4] = { 0 };
|
||||
u32x w1[4] = { 0 };
|
||||
u32x w2[4] = { 0 };
|
||||
u32x w3[4] = { 0 };
|
||||
|
||||
const u32x out_len = apply_rules_vect (pw_buf0, pw_buf1, pw_len, rules_buf, il_pos, w0, w1);
|
||||
|
||||
append_0x06_2x4_VV (w0, w1, out_len);
|
||||
|
||||
/**
|
||||
* Keccak
|
||||
*/
|
||||
|
||||
u64x a00 = hl32_to_64 (w0[1], w0[0]);
|
||||
u64x a01 = hl32_to_64 (w0[3], w0[2]);
|
||||
u64x a02 = hl32_to_64 (w1[1], w1[0]);
|
||||
u64x a03 = hl32_to_64 (w1[3], w1[2]);
|
||||
u64x a04 = 0;
|
||||
u64x a10 = 0;
|
||||
u64x a11 = 0;
|
||||
u64x a12 = 0;
|
||||
u64x a13 = 0x8000000000000000;
|
||||
u64x a14 = 0;
|
||||
u64x a20 = 0;
|
||||
u64x a21 = 0;
|
||||
u64x a22 = 0;
|
||||
u64x a23 = 0;
|
||||
u64x a24 = 0;
|
||||
u64x a30 = 0;
|
||||
u64x a31 = 0;
|
||||
u64x a32 = 0;
|
||||
u64x a33 = 0;
|
||||
u64x a34 = 0;
|
||||
u64x a40 = 0;
|
||||
u64x a41 = 0;
|
||||
u64x a42 = 0;
|
||||
u64x a43 = 0;
|
||||
u64x a44 = 0;
|
||||
|
||||
#define Rho_Pi(ad,r) \
|
||||
bc0 = ad; \
|
||||
ad = rotl64 (t, r); \
|
||||
t = bc0; \
|
||||
|
||||
#ifdef _unroll
|
||||
#pragma unroll
|
||||
#endif
|
||||
for (int round = 0; round < KECCAK_ROUNDS - 1; round++)
|
||||
{
|
||||
// Theta
|
||||
|
||||
u64x bc0 = a00 ^ a10 ^ a20 ^ a30 ^ a40;
|
||||
u64x bc1 = a01 ^ a11 ^ a21 ^ a31 ^ a41;
|
||||
u64x bc2 = a02 ^ a12 ^ a22 ^ a32 ^ a42;
|
||||
u64x bc3 = a03 ^ a13 ^ a23 ^ a33 ^ a43;
|
||||
u64x bc4 = a04 ^ a14 ^ a24 ^ a34 ^ a44;
|
||||
|
||||
u64x t;
|
||||
|
||||
t = bc4 ^ rotl64 (bc1, 1); a00 ^= t; a10 ^= t; a20 ^= t; a30 ^= t; a40 ^= t;
|
||||
t = bc0 ^ rotl64 (bc2, 1); a01 ^= t; a11 ^= t; a21 ^= t; a31 ^= t; a41 ^= t;
|
||||
t = bc1 ^ rotl64 (bc3, 1); a02 ^= t; a12 ^= t; a22 ^= t; a32 ^= t; a42 ^= t;
|
||||
t = bc2 ^ rotl64 (bc4, 1); a03 ^= t; a13 ^= t; a23 ^= t; a33 ^= t; a43 ^= t;
|
||||
t = bc3 ^ rotl64 (bc0, 1); a04 ^= t; a14 ^= t; a24 ^= t; a34 ^= t; a44 ^= t;
|
||||
|
||||
// Rho Pi
|
||||
|
||||
t = a01;
|
||||
|
||||
Rho_Pi (a20, 1);
|
||||
Rho_Pi (a12, 3);
|
||||
Rho_Pi (a21, 6);
|
||||
Rho_Pi (a32, 10);
|
||||
Rho_Pi (a33, 15);
|
||||
Rho_Pi (a03, 21);
|
||||
Rho_Pi (a10, 28);
|
||||
Rho_Pi (a31, 36);
|
||||
Rho_Pi (a13, 45);
|
||||
Rho_Pi (a41, 55);
|
||||
Rho_Pi (a44, 2);
|
||||
Rho_Pi (a04, 14);
|
||||
Rho_Pi (a30, 27);
|
||||
Rho_Pi (a43, 41);
|
||||
Rho_Pi (a34, 56);
|
||||
Rho_Pi (a23, 8);
|
||||
Rho_Pi (a22, 25);
|
||||
Rho_Pi (a02, 43);
|
||||
Rho_Pi (a40, 62);
|
||||
Rho_Pi (a24, 18);
|
||||
Rho_Pi (a42, 39);
|
||||
Rho_Pi (a14, 61);
|
||||
Rho_Pi (a11, 20);
|
||||
Rho_Pi (a01, 44);
|
||||
|
||||
// Chi
|
||||
|
||||
bc0 = a00; bc1 = a01; bc2 = a02; bc3 = a03; bc4 = a04;
|
||||
a00 ^= ~bc1 & bc2; a01 ^= ~bc2 & bc3; a02 ^= ~bc3 & bc4; a03 ^= ~bc4 & bc0; a04 ^= ~bc0 & bc1;
|
||||
|
||||
bc0 = a10; bc1 = a11; bc2 = a12; bc3 = a13; bc4 = a14;
|
||||
a10 ^= ~bc1 & bc2; a11 ^= ~bc2 & bc3; a12 ^= ~bc3 & bc4; a13 ^= ~bc4 & bc0; a14 ^= ~bc0 & bc1;
|
||||
|
||||
bc0 = a20; bc1 = a21; bc2 = a22; bc3 = a23; bc4 = a24;
|
||||
a20 ^= ~bc1 & bc2; a21 ^= ~bc2 & bc3; a22 ^= ~bc3 & bc4; a23 ^= ~bc4 & bc0; a24 ^= ~bc0 & bc1;
|
||||
|
||||
bc0 = a30; bc1 = a31; bc2 = a32; bc3 = a33; bc4 = a34;
|
||||
a30 ^= ~bc1 & bc2; a31 ^= ~bc2 & bc3; a32 ^= ~bc3 & bc4; a33 ^= ~bc4 & bc0; a34 ^= ~bc0 & bc1;
|
||||
|
||||
bc0 = a40; bc1 = a41; bc2 = a42; bc3 = a43; bc4 = a44;
|
||||
a40 ^= ~bc1 & bc2; a41 ^= ~bc2 & bc3; a42 ^= ~bc3 & bc4; a43 ^= ~bc4 & bc0; a44 ^= ~bc0 & bc1;
|
||||
|
||||
// Iota
|
||||
|
||||
a00 ^= keccakf_rndc[round];
|
||||
}
|
||||
|
||||
// Theta
|
||||
|
||||
u64x bc0 = a00 ^ a10 ^ a20 ^ a30 ^ a40;
|
||||
u64x bc1 = a01 ^ a11 ^ a21 ^ a31 ^ a41;
|
||||
u64x bc2 = a02 ^ a12 ^ a22 ^ a32 ^ a42;
|
||||
u64x bc3 = a03 ^ a13 ^ a23 ^ a33 ^ a43;
|
||||
u64x bc4 = a04 ^ a14 ^ a24 ^ a34 ^ a44;
|
||||
|
||||
u64x t;
|
||||
|
||||
t = bc4 ^ rotl64 (bc1, 1); a00 ^= t; a10 ^= t; a20 ^= t; a30 ^= t;
|
||||
t = bc0 ^ rotl64 (bc2, 1); a21 ^= t; a31 ^= t; a41 ^= t;
|
||||
t = bc1 ^ rotl64 (bc3, 1); a02 ^= t; a12 ^= t; a22 ^= t; a32 ^= t;
|
||||
t = bc2 ^ rotl64 (bc4, 1); a03 ^= t; a13 ^= t; a23 ^= t; a33 ^= t; a43 ^= t;
|
||||
t = bc3 ^ rotl64 (bc0, 1); a04 ^= t; a34 ^= t; a44 ^= t;
|
||||
|
||||
// Rho Pi
|
||||
|
||||
t = a01;
|
||||
|
||||
Rho_Pi (a20, 1);
|
||||
Rho_Pi (a12, 3);
|
||||
Rho_Pi (a21, 6);
|
||||
Rho_Pi (a32, 10);
|
||||
Rho_Pi (a33, 15);
|
||||
Rho_Pi (a03, 21);
|
||||
Rho_Pi (a10, 28);
|
||||
Rho_Pi (a31, 36);
|
||||
Rho_Pi (a13, 45);
|
||||
Rho_Pi (a41, 55);
|
||||
Rho_Pi (a44, 2);
|
||||
Rho_Pi (a04, 14);
|
||||
Rho_Pi (a30, 27);
|
||||
Rho_Pi (a43, 41);
|
||||
Rho_Pi (a34, 56);
|
||||
Rho_Pi (a23, 8);
|
||||
Rho_Pi (a22, 25);
|
||||
Rho_Pi (a02, 43);
|
||||
|
||||
#undef Rho_Pi
|
||||
|
||||
bc0 = a00;
|
||||
bc2 = a02;
|
||||
bc3 = a03;
|
||||
bc4 = a04;
|
||||
|
||||
a02 ^= ~bc3 & bc4;
|
||||
a03 ^= ~bc4 & bc0;
|
||||
|
||||
const u32x r0 = l32_from_64 (a03);
|
||||
const u32x r1 = h32_from_64 (a03);
|
||||
const u32x r2 = l32_from_64 (a02);
|
||||
const u32x r3 = h32_from_64 (a02);
|
||||
|
||||
COMPARE_M_SIMD (r0, r1, r2, r3);
|
||||
}
|
||||
}
|
||||
|
||||
__kernel void m17600_m08 (__global pw_t *pws, __constant const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
|
||||
{
|
||||
}
|
||||
|
||||
__kernel void m17600_m16 (__global pw_t *pws, __constant const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
|
||||
{
|
||||
}
|
||||
|
||||
__kernel void m17600_s04 (__global pw_t *pws, __constant const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
|
||||
{
|
||||
/**
|
||||
* modifier
|
||||
*/
|
||||
|
||||
const u64 lid = get_local_id (0);
|
||||
|
||||
/**
|
||||
* base
|
||||
*/
|
||||
|
||||
const u64 gid = get_global_id (0);
|
||||
|
||||
if (gid >= gid_max) return;
|
||||
|
||||
u32 pw_buf0[4];
|
||||
u32 pw_buf1[4];
|
||||
|
||||
pw_buf0[0] = pws[gid].i[0];
|
||||
pw_buf0[1] = pws[gid].i[1];
|
||||
pw_buf0[2] = pws[gid].i[2];
|
||||
pw_buf0[3] = pws[gid].i[3];
|
||||
pw_buf1[0] = pws[gid].i[4];
|
||||
pw_buf1[1] = pws[gid].i[5];
|
||||
pw_buf1[2] = pws[gid].i[6];
|
||||
pw_buf1[3] = pws[gid].i[7];
|
||||
|
||||
const u32 pw_len = pws[gid].pw_len;
|
||||
|
||||
/**
|
||||
* digest
|
||||
*/
|
||||
|
||||
const u32 search[4] =
|
||||
{
|
||||
digests_buf[digests_offset].digest_buf[DGST_R0],
|
||||
digests_buf[digests_offset].digest_buf[DGST_R1],
|
||||
digests_buf[digests_offset].digest_buf[DGST_R2],
|
||||
digests_buf[digests_offset].digest_buf[DGST_R3]
|
||||
};
|
||||
|
||||
/**
|
||||
* loop
|
||||
*/
|
||||
|
||||
for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE)
|
||||
{
|
||||
u32x w0[4] = { 0 };
|
||||
u32x w1[4] = { 0 };
|
||||
u32x w2[4] = { 0 };
|
||||
u32x w3[4] = { 0 };
|
||||
|
||||
const u32x out_len = apply_rules_vect (pw_buf0, pw_buf1, pw_len, rules_buf, il_pos, w0, w1);
|
||||
|
||||
append_0x06_2x4_VV (w0, w1, out_len);
|
||||
|
||||
/**
|
||||
* Keccak
|
||||
*/
|
||||
|
||||
u64x a00 = hl32_to_64 (w0[1], w0[0]);
|
||||
u64x a01 = hl32_to_64 (w0[3], w0[2]);
|
||||
u64x a02 = hl32_to_64 (w1[1], w1[0]);
|
||||
u64x a03 = hl32_to_64 (w1[3], w1[2]);
|
||||
u64x a04 = 0;
|
||||
u64x a10 = 0;
|
||||
u64x a11 = 0;
|
||||
u64x a12 = 0;
|
||||
u64x a13 = 0x8000000000000000;
|
||||
u64x a14 = 0;
|
||||
u64x a20 = 0;
|
||||
u64x a21 = 0;
|
||||
u64x a22 = 0;
|
||||
u64x a23 = 0;
|
||||
u64x a24 = 0;
|
||||
u64x a30 = 0;
|
||||
u64x a31 = 0;
|
||||
u64x a32 = 0;
|
||||
u64x a33 = 0;
|
||||
u64x a34 = 0;
|
||||
u64x a40 = 0;
|
||||
u64x a41 = 0;
|
||||
u64x a42 = 0;
|
||||
u64x a43 = 0;
|
||||
u64x a44 = 0;
|
||||
|
||||
#define Rho_Pi(ad,r) \
|
||||
bc0 = ad; \
|
||||
ad = rotl64 (t, r); \
|
||||
t = bc0; \
|
||||
|
||||
#ifdef _unroll
|
||||
#pragma unroll
|
||||
#endif
|
||||
for (int round = 0; round < KECCAK_ROUNDS - 1; round++)
|
||||
{
|
||||
// Theta
|
||||
|
||||
u64x bc0 = a00 ^ a10 ^ a20 ^ a30 ^ a40;
|
||||
u64x bc1 = a01 ^ a11 ^ a21 ^ a31 ^ a41;
|
||||
u64x bc2 = a02 ^ a12 ^ a22 ^ a32 ^ a42;
|
||||
u64x bc3 = a03 ^ a13 ^ a23 ^ a33 ^ a43;
|
||||
u64x bc4 = a04 ^ a14 ^ a24 ^ a34 ^ a44;
|
||||
|
||||
u64x t;
|
||||
|
||||
t = bc4 ^ rotl64 (bc1, 1); a00 ^= t; a10 ^= t; a20 ^= t; a30 ^= t; a40 ^= t;
|
||||
t = bc0 ^ rotl64 (bc2, 1); a01 ^= t; a11 ^= t; a21 ^= t; a31 ^= t; a41 ^= t;
|
||||
t = bc1 ^ rotl64 (bc3, 1); a02 ^= t; a12 ^= t; a22 ^= t; a32 ^= t; a42 ^= t;
|
||||
t = bc2 ^ rotl64 (bc4, 1); a03 ^= t; a13 ^= t; a23 ^= t; a33 ^= t; a43 ^= t;
|
||||
t = bc3 ^ rotl64 (bc0, 1); a04 ^= t; a14 ^= t; a24 ^= t; a34 ^= t; a44 ^= t;
|
||||
|
||||
// Rho Pi
|
||||
|
||||
t = a01;
|
||||
|
||||
Rho_Pi (a20, 1);
|
||||
Rho_Pi (a12, 3);
|
||||
Rho_Pi (a21, 6);
|
||||
Rho_Pi (a32, 10);
|
||||
Rho_Pi (a33, 15);
|
||||
Rho_Pi (a03, 21);
|
||||
Rho_Pi (a10, 28);
|
||||
Rho_Pi (a31, 36);
|
||||
Rho_Pi (a13, 45);
|
||||
Rho_Pi (a41, 55);
|
||||
Rho_Pi (a44, 2);
|
||||
Rho_Pi (a04, 14);
|
||||
Rho_Pi (a30, 27);
|
||||
Rho_Pi (a43, 41);
|
||||
Rho_Pi (a34, 56);
|
||||
Rho_Pi (a23, 8);
|
||||
Rho_Pi (a22, 25);
|
||||
Rho_Pi (a02, 43);
|
||||
Rho_Pi (a40, 62);
|
||||
Rho_Pi (a24, 18);
|
||||
Rho_Pi (a42, 39);
|
||||
Rho_Pi (a14, 61);
|
||||
Rho_Pi (a11, 20);
|
||||
Rho_Pi (a01, 44);
|
||||
|
||||
// Chi
|
||||
|
||||
bc0 = a00; bc1 = a01; bc2 = a02; bc3 = a03; bc4 = a04;
|
||||
a00 ^= ~bc1 & bc2; a01 ^= ~bc2 & bc3; a02 ^= ~bc3 & bc4; a03 ^= ~bc4 & bc0; a04 ^= ~bc0 & bc1;
|
||||
|
||||
bc0 = a10; bc1 = a11; bc2 = a12; bc3 = a13; bc4 = a14;
|
||||
a10 ^= ~bc1 & bc2; a11 ^= ~bc2 & bc3; a12 ^= ~bc3 & bc4; a13 ^= ~bc4 & bc0; a14 ^= ~bc0 & bc1;
|
||||
|
||||
bc0 = a20; bc1 = a21; bc2 = a22; bc3 = a23; bc4 = a24;
|
||||
a20 ^= ~bc1 & bc2; a21 ^= ~bc2 & bc3; a22 ^= ~bc3 & bc4; a23 ^= ~bc4 & bc0; a24 ^= ~bc0 & bc1;
|
||||
|
||||
bc0 = a30; bc1 = a31; bc2 = a32; bc3 = a33; bc4 = a34;
|
||||
a30 ^= ~bc1 & bc2; a31 ^= ~bc2 & bc3; a32 ^= ~bc3 & bc4; a33 ^= ~bc4 & bc0; a34 ^= ~bc0 & bc1;
|
||||
|
||||
bc0 = a40; bc1 = a41; bc2 = a42; bc3 = a43; bc4 = a44;
|
||||
a40 ^= ~bc1 & bc2; a41 ^= ~bc2 & bc3; a42 ^= ~bc3 & bc4; a43 ^= ~bc4 & bc0; a44 ^= ~bc0 & bc1;
|
||||
|
||||
// Iota
|
||||
|
||||
a00 ^= keccakf_rndc[round];
|
||||
}
|
||||
|
||||
// Theta
|
||||
|
||||
u64x bc0 = a00 ^ a10 ^ a20 ^ a30 ^ a40;
|
||||
u64x bc1 = a01 ^ a11 ^ a21 ^ a31 ^ a41;
|
||||
u64x bc2 = a02 ^ a12 ^ a22 ^ a32 ^ a42;
|
||||
u64x bc3 = a03 ^ a13 ^ a23 ^ a33 ^ a43;
|
||||
u64x bc4 = a04 ^ a14 ^ a24 ^ a34 ^ a44;
|
||||
|
||||
u64x t;
|
||||
|
||||
t = bc4 ^ rotl64 (bc1, 1); a00 ^= t; a10 ^= t; a20 ^= t; a30 ^= t;
|
||||
t = bc0 ^ rotl64 (bc2, 1); a21 ^= t; a31 ^= t; a41 ^= t;
|
||||
t = bc1 ^ rotl64 (bc3, 1); a02 ^= t; a12 ^= t; a22 ^= t; a32 ^= t;
|
||||
t = bc2 ^ rotl64 (bc4, 1); a03 ^= t; a13 ^= t; a23 ^= t; a33 ^= t; a43 ^= t;
|
||||
t = bc3 ^ rotl64 (bc0, 1); a04 ^= t; a34 ^= t; a44 ^= t;
|
||||
|
||||
// Rho Pi
|
||||
|
||||
t = a01;
|
||||
|
||||
Rho_Pi (a20, 1);
|
||||
Rho_Pi (a12, 3);
|
||||
Rho_Pi (a21, 6);
|
||||
Rho_Pi (a32, 10);
|
||||
Rho_Pi (a33, 15);
|
||||
Rho_Pi (a03, 21);
|
||||
Rho_Pi (a10, 28);
|
||||
Rho_Pi (a31, 36);
|
||||
Rho_Pi (a13, 45);
|
||||
Rho_Pi (a41, 55);
|
||||
Rho_Pi (a44, 2);
|
||||
Rho_Pi (a04, 14);
|
||||
Rho_Pi (a30, 27);
|
||||
Rho_Pi (a43, 41);
|
||||
Rho_Pi (a34, 56);
|
||||
Rho_Pi (a23, 8);
|
||||
Rho_Pi (a22, 25);
|
||||
Rho_Pi (a02, 43);
|
||||
|
||||
#undef Rho_Pi
|
||||
|
||||
bc0 = a00;
|
||||
bc2 = a02;
|
||||
bc3 = a03;
|
||||
bc4 = a04;
|
||||
|
||||
a02 ^= ~bc3 & bc4;
|
||||
a03 ^= ~bc4 & bc0;
|
||||
|
||||
const u32x r0 = l32_from_64 (a03);
|
||||
const u32x r1 = h32_from_64 (a03);
|
||||
const u32x r2 = l32_from_64 (a02);
|
||||
const u32x r3 = h32_from_64 (a02);
|
||||
|
||||
COMPARE_S_SIMD (r0, r1, r2, r3);
|
||||
}
|
||||
}
|
||||
|
||||
__kernel void m17600_s08 (__global pw_t *pws, __constant const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
|
||||
{
|
||||
}
|
||||
|
||||
__kernel void m17600_s16 (__global pw_t *pws, __constant const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
|
||||
{
|
||||
}
|
594
OpenCL/m17600_a1-optimized.cl
Normal file
594
OpenCL/m17600_a1-optimized.cl
Normal file
@ -0,0 +1,594 @@
|
||||
/**
|
||||
* Author......: See docs/credits.txt
|
||||
* License.....: MIT
|
||||
*/
|
||||
|
||||
#define NEW_SIMD_CODE
|
||||
|
||||
#include "inc_vendor.cl"
|
||||
#include "inc_hash_constants.h"
|
||||
#include "inc_hash_functions.cl"
|
||||
#include "inc_types.cl"
|
||||
#include "inc_common.cl"
|
||||
#include "inc_simd.cl"
|
||||
|
||||
__constant u64a keccakf_rndc[24] =
|
||||
{
|
||||
0x0000000000000001, 0x0000000000008082, 0x800000000000808a,
|
||||
0x8000000080008000, 0x000000000000808b, 0x0000000080000001,
|
||||
0x8000000080008081, 0x8000000000008009, 0x000000000000008a,
|
||||
0x0000000000000088, 0x0000000080008009, 0x000000008000000a,
|
||||
0x000000008000808b, 0x800000000000008b, 0x8000000000008089,
|
||||
0x8000000000008003, 0x8000000000008002, 0x8000000000000080,
|
||||
0x000000000000800a, 0x800000008000000a, 0x8000000080008081,
|
||||
0x8000000000008080, 0x0000000080000001, 0x8000000080008008
|
||||
};
|
||||
|
||||
#ifndef KECCAK_ROUNDS
|
||||
#define KECCAK_ROUNDS 24
|
||||
#endif
|
||||
|
||||
__kernel void m17600_m04 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
|
||||
{
|
||||
/**
|
||||
* modifier
|
||||
*/
|
||||
|
||||
const u64 lid = get_local_id (0);
|
||||
|
||||
/**
|
||||
* base
|
||||
*/
|
||||
|
||||
const u64 gid = get_global_id (0);
|
||||
|
||||
if (gid >= gid_max) return;
|
||||
|
||||
u32 pw_buf0[4];
|
||||
u32 pw_buf1[4];
|
||||
|
||||
pw_buf0[0] = pws[gid].i[0];
|
||||
pw_buf0[1] = pws[gid].i[1];
|
||||
pw_buf0[2] = pws[gid].i[2];
|
||||
pw_buf0[3] = pws[gid].i[3];
|
||||
pw_buf1[0] = pws[gid].i[4];
|
||||
pw_buf1[1] = pws[gid].i[5];
|
||||
pw_buf1[2] = pws[gid].i[6];
|
||||
pw_buf1[3] = pws[gid].i[7];
|
||||
|
||||
const u32 pw_l_len = pws[gid].pw_len;
|
||||
|
||||
/**
|
||||
* loop
|
||||
*/
|
||||
|
||||
for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE)
|
||||
{
|
||||
const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos);
|
||||
|
||||
const u32x pw_len = pw_l_len + pw_r_len;
|
||||
|
||||
/**
|
||||
* concat password candidate
|
||||
*/
|
||||
|
||||
u32x wordl0[4] = { 0 };
|
||||
u32x wordl1[4] = { 0 };
|
||||
u32x wordl2[4] = { 0 };
|
||||
u32x wordl3[4] = { 0 };
|
||||
|
||||
wordl0[0] = pw_buf0[0];
|
||||
wordl0[1] = pw_buf0[1];
|
||||
wordl0[2] = pw_buf0[2];
|
||||
wordl0[3] = pw_buf0[3];
|
||||
wordl1[0] = pw_buf1[0];
|
||||
wordl1[1] = pw_buf1[1];
|
||||
wordl1[2] = pw_buf1[2];
|
||||
wordl1[3] = pw_buf1[3];
|
||||
|
||||
u32x wordr0[4] = { 0 };
|
||||
u32x wordr1[4] = { 0 };
|
||||
u32x wordr2[4] = { 0 };
|
||||
u32x wordr3[4] = { 0 };
|
||||
|
||||
wordr0[0] = ix_create_combt (combs_buf, il_pos, 0);
|
||||
wordr0[1] = ix_create_combt (combs_buf, il_pos, 1);
|
||||
wordr0[2] = ix_create_combt (combs_buf, il_pos, 2);
|
||||
wordr0[3] = ix_create_combt (combs_buf, il_pos, 3);
|
||||
wordr1[0] = ix_create_combt (combs_buf, il_pos, 4);
|
||||
wordr1[1] = ix_create_combt (combs_buf, il_pos, 5);
|
||||
wordr1[2] = ix_create_combt (combs_buf, il_pos, 6);
|
||||
wordr1[3] = ix_create_combt (combs_buf, il_pos, 7);
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
|
||||
{
|
||||
switch_buffer_by_offset_le_VV (wordr0, wordr1, wordr2, wordr3, pw_l_len);
|
||||
}
|
||||
else
|
||||
{
|
||||
switch_buffer_by_offset_le_VV (wordl0, wordl1, wordl2, wordl3, pw_r_len);
|
||||
}
|
||||
|
||||
u32x w0[4];
|
||||
u32x w1[4];
|
||||
u32x w2[4];
|
||||
u32x w3[4];
|
||||
|
||||
w0[0] = wordl0[0] | wordr0[0];
|
||||
w0[1] = wordl0[1] | wordr0[1];
|
||||
w0[2] = wordl0[2] | wordr0[2];
|
||||
w0[3] = wordl0[3] | wordr0[3];
|
||||
w1[0] = wordl1[0] | wordr1[0];
|
||||
w1[1] = wordl1[1] | wordr1[1];
|
||||
w1[2] = wordl1[2] | wordr1[2];
|
||||
w1[3] = wordl1[3] | wordr1[3];
|
||||
w2[0] = wordl2[0] | wordr2[0];
|
||||
w2[1] = wordl2[1] | wordr2[1];
|
||||
w2[2] = wordl2[2] | wordr2[2];
|
||||
w2[3] = wordl2[3] | wordr2[3];
|
||||
w3[0] = wordl3[0] | wordr3[0];
|
||||
w3[1] = wordl3[1] | wordr3[1];
|
||||
w3[2] = wordl3[2] | wordr3[2];
|
||||
w3[3] = wordl3[3] | wordr3[3];
|
||||
|
||||
/**
|
||||
* Keccak
|
||||
*/
|
||||
|
||||
u64x a00 = hl32_to_64 (w0[1], w0[0]);
|
||||
u64x a01 = hl32_to_64 (w0[3], w0[2]);
|
||||
u64x a02 = hl32_to_64 (w1[1], w1[0]);
|
||||
u64x a03 = hl32_to_64 (w1[3], w1[2]);
|
||||
u64x a04 = hl32_to_64 (w2[1], w2[0]);
|
||||
u64x a10 = hl32_to_64 (w2[3], w2[2]);
|
||||
u64x a11 = hl32_to_64 (w3[1], w3[0]);
|
||||
u64x a12 = hl32_to_64 (w3[3], w3[2]);
|
||||
u64x a13 = 0x8000000000000000;
|
||||
u64x a14 = 0;
|
||||
u64x a20 = 0;
|
||||
u64x a21 = 0;
|
||||
u64x a22 = 0;
|
||||
u64x a23 = 0;
|
||||
u64x a24 = 0;
|
||||
u64x a30 = 0;
|
||||
u64x a31 = 0;
|
||||
u64x a32 = 0;
|
||||
u64x a33 = 0;
|
||||
u64x a34 = 0;
|
||||
u64x a40 = 0;
|
||||
u64x a41 = 0;
|
||||
u64x a42 = 0;
|
||||
u64x a43 = 0;
|
||||
u64x a44 = 0;
|
||||
|
||||
#define Rho_Pi(ad,r) \
|
||||
bc0 = ad; \
|
||||
ad = rotl64 (t, r); \
|
||||
t = bc0; \
|
||||
|
||||
#ifdef _unroll
|
||||
#pragma unroll
|
||||
#endif
|
||||
for (int round = 0; round < KECCAK_ROUNDS - 1; round++)
|
||||
{
|
||||
// Theta
|
||||
|
||||
u64x bc0 = a00 ^ a10 ^ a20 ^ a30 ^ a40;
|
||||
u64x bc1 = a01 ^ a11 ^ a21 ^ a31 ^ a41;
|
||||
u64x bc2 = a02 ^ a12 ^ a22 ^ a32 ^ a42;
|
||||
u64x bc3 = a03 ^ a13 ^ a23 ^ a33 ^ a43;
|
||||
u64x bc4 = a04 ^ a14 ^ a24 ^ a34 ^ a44;
|
||||
|
||||
u64x t;
|
||||
|
||||
t = bc4 ^ rotl64 (bc1, 1); a00 ^= t; a10 ^= t; a20 ^= t; a30 ^= t; a40 ^= t;
|
||||
t = bc0 ^ rotl64 (bc2, 1); a01 ^= t; a11 ^= t; a21 ^= t; a31 ^= t; a41 ^= t;
|
||||
t = bc1 ^ rotl64 (bc3, 1); a02 ^= t; a12 ^= t; a22 ^= t; a32 ^= t; a42 ^= t;
|
||||
t = bc2 ^ rotl64 (bc4, 1); a03 ^= t; a13 ^= t; a23 ^= t; a33 ^= t; a43 ^= t;
|
||||
t = bc3 ^ rotl64 (bc0, 1); a04 ^= t; a14 ^= t; a24 ^= t; a34 ^= t; a44 ^= t;
|
||||
|
||||
// Rho Pi
|
||||
|
||||
t = a01;
|
||||
|
||||
Rho_Pi (a20, 1);
|
||||
Rho_Pi (a12, 3);
|
||||
Rho_Pi (a21, 6);
|
||||
Rho_Pi (a32, 10);
|
||||
Rho_Pi (a33, 15);
|
||||
Rho_Pi (a03, 21);
|
||||
Rho_Pi (a10, 28);
|
||||
Rho_Pi (a31, 36);
|
||||
Rho_Pi (a13, 45);
|
||||
Rho_Pi (a41, 55);
|
||||
Rho_Pi (a44, 2);
|
||||
Rho_Pi (a04, 14);
|
||||
Rho_Pi (a30, 27);
|
||||
Rho_Pi (a43, 41);
|
||||
Rho_Pi (a34, 56);
|
||||
Rho_Pi (a23, 8);
|
||||
Rho_Pi (a22, 25);
|
||||
Rho_Pi (a02, 43);
|
||||
Rho_Pi (a40, 62);
|
||||
Rho_Pi (a24, 18);
|
||||
Rho_Pi (a42, 39);
|
||||
Rho_Pi (a14, 61);
|
||||
Rho_Pi (a11, 20);
|
||||
Rho_Pi (a01, 44);
|
||||
|
||||
// Chi
|
||||
|
||||
bc0 = a00; bc1 = a01; bc2 = a02; bc3 = a03; bc4 = a04;
|
||||
a00 ^= ~bc1 & bc2; a01 ^= ~bc2 & bc3; a02 ^= ~bc3 & bc4; a03 ^= ~bc4 & bc0; a04 ^= ~bc0 & bc1;
|
||||
|
||||
bc0 = a10; bc1 = a11; bc2 = a12; bc3 = a13; bc4 = a14;
|
||||
a10 ^= ~bc1 & bc2; a11 ^= ~bc2 & bc3; a12 ^= ~bc3 & bc4; a13 ^= ~bc4 & bc0; a14 ^= ~bc0 & bc1;
|
||||
|
||||
bc0 = a20; bc1 = a21; bc2 = a22; bc3 = a23; bc4 = a24;
|
||||
a20 ^= ~bc1 & bc2; a21 ^= ~bc2 & bc3; a22 ^= ~bc3 & bc4; a23 ^= ~bc4 & bc0; a24 ^= ~bc0 & bc1;
|
||||
|
||||
bc0 = a30; bc1 = a31; bc2 = a32; bc3 = a33; bc4 = a34;
|
||||
a30 ^= ~bc1 & bc2; a31 ^= ~bc2 & bc3; a32 ^= ~bc3 & bc4; a33 ^= ~bc4 & bc0; a34 ^= ~bc0 & bc1;
|
||||
|
||||
bc0 = a40; bc1 = a41; bc2 = a42; bc3 = a43; bc4 = a44;
|
||||
a40 ^= ~bc1 & bc2; a41 ^= ~bc2 & bc3; a42 ^= ~bc3 & bc4; a43 ^= ~bc4 & bc0; a44 ^= ~bc0 & bc1;
|
||||
|
||||
// Iota
|
||||
|
||||
a00 ^= keccakf_rndc[round];
|
||||
}
|
||||
|
||||
// Theta
|
||||
|
||||
u64x bc0 = a00 ^ a10 ^ a20 ^ a30 ^ a40;
|
||||
u64x bc1 = a01 ^ a11 ^ a21 ^ a31 ^ a41;
|
||||
u64x bc2 = a02 ^ a12 ^ a22 ^ a32 ^ a42;
|
||||
u64x bc3 = a03 ^ a13 ^ a23 ^ a33 ^ a43;
|
||||
u64x bc4 = a04 ^ a14 ^ a24 ^ a34 ^ a44;
|
||||
|
||||
u64x t;
|
||||
|
||||
t = bc4 ^ rotl64 (bc1, 1); a00 ^= t; a10 ^= t; a20 ^= t; a30 ^= t;
|
||||
t = bc0 ^ rotl64 (bc2, 1); a21 ^= t; a31 ^= t; a41 ^= t;
|
||||
t = bc1 ^ rotl64 (bc3, 1); a02 ^= t; a12 ^= t; a22 ^= t; a32 ^= t;
|
||||
t = bc2 ^ rotl64 (bc4, 1); a03 ^= t; a13 ^= t; a23 ^= t; a33 ^= t; a43 ^= t;
|
||||
t = bc3 ^ rotl64 (bc0, 1); a04 ^= t; a34 ^= t; a44 ^= t;
|
||||
|
||||
// Rho Pi
|
||||
|
||||
t = a01;
|
||||
|
||||
Rho_Pi (a20, 1);
|
||||
Rho_Pi (a12, 3);
|
||||
Rho_Pi (a21, 6);
|
||||
Rho_Pi (a32, 10);
|
||||
Rho_Pi (a33, 15);
|
||||
Rho_Pi (a03, 21);
|
||||
Rho_Pi (a10, 28);
|
||||
Rho_Pi (a31, 36);
|
||||
Rho_Pi (a13, 45);
|
||||
Rho_Pi (a41, 55);
|
||||
Rho_Pi (a44, 2);
|
||||
Rho_Pi (a04, 14);
|
||||
Rho_Pi (a30, 27);
|
||||
Rho_Pi (a43, 41);
|
||||
Rho_Pi (a34, 56);
|
||||
Rho_Pi (a23, 8);
|
||||
Rho_Pi (a22, 25);
|
||||
Rho_Pi (a02, 43);
|
||||
|
||||
#undef Rho_Pi
|
||||
|
||||
bc0 = a00;
|
||||
bc2 = a02;
|
||||
bc3 = a03;
|
||||
bc4 = a04;
|
||||
|
||||
a02 ^= ~bc3 & bc4;
|
||||
a03 ^= ~bc4 & bc0;
|
||||
|
||||
const u32x r0 = l32_from_64 (a03);
|
||||
const u32x r1 = h32_from_64 (a03);
|
||||
const u32x r2 = l32_from_64 (a02);
|
||||
const u32x r3 = h32_from_64 (a02);
|
||||
|
||||
COMPARE_M_SIMD (r0, r1, r2, r3);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
__kernel void m17600_m08 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
|
||||
{
|
||||
}
|
||||
|
||||
__kernel void m17600_m16 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
|
||||
{
|
||||
}
|
||||
|
||||
__kernel void m17600_s04 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
|
||||
{
|
||||
/**
|
||||
* modifier
|
||||
*/
|
||||
|
||||
const u64 lid = get_local_id (0);
|
||||
|
||||
/**
|
||||
* base
|
||||
*/
|
||||
|
||||
const u64 gid = get_global_id (0);
|
||||
|
||||
if (gid >= gid_max) return;
|
||||
|
||||
u32 pw_buf0[4];
|
||||
u32 pw_buf1[4];
|
||||
|
||||
pw_buf0[0] = pws[gid].i[0];
|
||||
pw_buf0[1] = pws[gid].i[1];
|
||||
pw_buf0[2] = pws[gid].i[2];
|
||||
pw_buf0[3] = pws[gid].i[3];
|
||||
pw_buf1[0] = pws[gid].i[4];
|
||||
pw_buf1[1] = pws[gid].i[5];
|
||||
pw_buf1[2] = pws[gid].i[6];
|
||||
pw_buf1[3] = pws[gid].i[7];
|
||||
|
||||
const u32 pw_l_len = pws[gid].pw_len;
|
||||
|
||||
/**
|
||||
* digest
|
||||
*/
|
||||
|
||||
const u32 search[4] =
|
||||
{
|
||||
digests_buf[digests_offset].digest_buf[DGST_R0],
|
||||
digests_buf[digests_offset].digest_buf[DGST_R1],
|
||||
digests_buf[digests_offset].digest_buf[DGST_R2],
|
||||
digests_buf[digests_offset].digest_buf[DGST_R3]
|
||||
};
|
||||
|
||||
/**
|
||||
* loop
|
||||
*/
|
||||
|
||||
for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE)
|
||||
{
|
||||
const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos);
|
||||
|
||||
const u32x pw_len = pw_l_len + pw_r_len;
|
||||
|
||||
/**
|
||||
* concat password candidate
|
||||
*/
|
||||
|
||||
u32x wordl0[4] = { 0 };
|
||||
u32x wordl1[4] = { 0 };
|
||||
u32x wordl2[4] = { 0 };
|
||||
u32x wordl3[4] = { 0 };
|
||||
|
||||
wordl0[0] = pw_buf0[0];
|
||||
wordl0[1] = pw_buf0[1];
|
||||
wordl0[2] = pw_buf0[2];
|
||||
wordl0[3] = pw_buf0[3];
|
||||
wordl1[0] = pw_buf1[0];
|
||||
wordl1[1] = pw_buf1[1];
|
||||
wordl1[2] = pw_buf1[2];
|
||||
wordl1[3] = pw_buf1[3];
|
||||
|
||||
u32x wordr0[4] = { 0 };
|
||||
u32x wordr1[4] = { 0 };
|
||||
u32x wordr2[4] = { 0 };
|
||||
u32x wordr3[4] = { 0 };
|
||||
|
||||
wordr0[0] = ix_create_combt (combs_buf, il_pos, 0);
|
||||
wordr0[1] = ix_create_combt (combs_buf, il_pos, 1);
|
||||
wordr0[2] = ix_create_combt (combs_buf, il_pos, 2);
|
||||
wordr0[3] = ix_create_combt (combs_buf, il_pos, 3);
|
||||
wordr1[0] = ix_create_combt (combs_buf, il_pos, 4);
|
||||
wordr1[1] = ix_create_combt (combs_buf, il_pos, 5);
|
||||
wordr1[2] = ix_create_combt (combs_buf, il_pos, 6);
|
||||
wordr1[3] = ix_create_combt (combs_buf, il_pos, 7);
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
|
||||
{
|
||||
switch_buffer_by_offset_le_VV (wordr0, wordr1, wordr2, wordr3, pw_l_len);
|
||||
}
|
||||
else
|
||||
{
|
||||
switch_buffer_by_offset_le_VV (wordl0, wordl1, wordl2, wordl3, pw_r_len);
|
||||
}
|
||||
|
||||
u32x w0[4];
|
||||
u32x w1[4];
|
||||
u32x w2[4];
|
||||
u32x w3[4];
|
||||
|
||||
w0[0] = wordl0[0] | wordr0[0];
|
||||
w0[1] = wordl0[1] | wordr0[1];
|
||||
w0[2] = wordl0[2] | wordr0[2];
|
||||
w0[3] = wordl0[3] | wordr0[3];
|
||||
w1[0] = wordl1[0] | wordr1[0];
|
||||
w1[1] = wordl1[1] | wordr1[1];
|
||||
w1[2] = wordl1[2] | wordr1[2];
|
||||
w1[3] = wordl1[3] | wordr1[3];
|
||||
w2[0] = wordl2[0] | wordr2[0];
|
||||
w2[1] = wordl2[1] | wordr2[1];
|
||||
w2[2] = wordl2[2] | wordr2[2];
|
||||
w2[3] = wordl2[3] | wordr2[3];
|
||||
w3[0] = wordl3[0] | wordr3[0];
|
||||
w3[1] = wordl3[1] | wordr3[1];
|
||||
w3[2] = wordl3[2] | wordr3[2];
|
||||
w3[3] = wordl3[3] | wordr3[3];
|
||||
|
||||
/**
|
||||
* Keccak
|
||||
*/
|
||||
|
||||
u64x a00 = hl32_to_64 (w0[1], w0[0]);
|
||||
u64x a01 = hl32_to_64 (w0[3], w0[2]);
|
||||
u64x a02 = hl32_to_64 (w1[1], w1[0]);
|
||||
u64x a03 = hl32_to_64 (w1[3], w1[2]);
|
||||
u64x a04 = hl32_to_64 (w2[1], w2[0]);
|
||||
u64x a10 = hl32_to_64 (w2[3], w2[2]);
|
||||
u64x a11 = hl32_to_64 (w3[1], w3[0]);
|
||||
u64x a12 = hl32_to_64 (w3[3], w3[2]);
|
||||
u64x a13 = 0x8000000000000000;
|
||||
u64x a14 = 0;
|
||||
u64x a20 = 0;
|
||||
u64x a21 = 0;
|
||||
u64x a22 = 0;
|
||||
u64x a23 = 0;
|
||||
u64x a24 = 0;
|
||||
u64x a30 = 0;
|
||||
u64x a31 = 0;
|
||||
u64x a32 = 0;
|
||||
u64x a33 = 0;
|
||||
u64x a34 = 0;
|
||||
u64x a40 = 0;
|
||||
u64x a41 = 0;
|
||||
u64x a42 = 0;
|
||||
u64x a43 = 0;
|
||||
u64x a44 = 0;
|
||||
|
||||
#define Rho_Pi(ad,r) \
|
||||
bc0 = ad; \
|
||||
ad = rotl64 (t, r); \
|
||||
t = bc0; \
|
||||
|
||||
#ifdef _unroll
|
||||
#pragma unroll
|
||||
#endif
|
||||
for (int round = 0; round < KECCAK_ROUNDS - 1; round++)
|
||||
{
|
||||
// Theta
|
||||
|
||||
u64x bc0 = a00 ^ a10 ^ a20 ^ a30 ^ a40;
|
||||
u64x bc1 = a01 ^ a11 ^ a21 ^ a31 ^ a41;
|
||||
u64x bc2 = a02 ^ a12 ^ a22 ^ a32 ^ a42;
|
||||
u64x bc3 = a03 ^ a13 ^ a23 ^ a33 ^ a43;
|
||||
u64x bc4 = a04 ^ a14 ^ a24 ^ a34 ^ a44;
|
||||
|
||||
u64x t;
|
||||
|
||||
t = bc4 ^ rotl64 (bc1, 1); a00 ^= t; a10 ^= t; a20 ^= t; a30 ^= t; a40 ^= t;
|
||||
t = bc0 ^ rotl64 (bc2, 1); a01 ^= t; a11 ^= t; a21 ^= t; a31 ^= t; a41 ^= t;
|
||||
t = bc1 ^ rotl64 (bc3, 1); a02 ^= t; a12 ^= t; a22 ^= t; a32 ^= t; a42 ^= t;
|
||||
t = bc2 ^ rotl64 (bc4, 1); a03 ^= t; a13 ^= t; a23 ^= t; a33 ^= t; a43 ^= t;
|
||||
t = bc3 ^ rotl64 (bc0, 1); a04 ^= t; a14 ^= t; a24 ^= t; a34 ^= t; a44 ^= t;
|
||||
|
||||
// Rho Pi
|
||||
|
||||
t = a01;
|
||||
|
||||
Rho_Pi (a20, 1);
|
||||
Rho_Pi (a12, 3);
|
||||
Rho_Pi (a21, 6);
|
||||
Rho_Pi (a32, 10);
|
||||
Rho_Pi (a33, 15);
|
||||
Rho_Pi (a03, 21);
|
||||
Rho_Pi (a10, 28);
|
||||
Rho_Pi (a31, 36);
|
||||
Rho_Pi (a13, 45);
|
||||
Rho_Pi (a41, 55);
|
||||
Rho_Pi (a44, 2);
|
||||
Rho_Pi (a04, 14);
|
||||
Rho_Pi (a30, 27);
|
||||
Rho_Pi (a43, 41);
|
||||
Rho_Pi (a34, 56);
|
||||
Rho_Pi (a23, 8);
|
||||
Rho_Pi (a22, 25);
|
||||
Rho_Pi (a02, 43);
|
||||
Rho_Pi (a40, 62);
|
||||
Rho_Pi (a24, 18);
|
||||
Rho_Pi (a42, 39);
|
||||
Rho_Pi (a14, 61);
|
||||
Rho_Pi (a11, 20);
|
||||
Rho_Pi (a01, 44);
|
||||
|
||||
// Chi
|
||||
|
||||
bc0 = a00; bc1 = a01; bc2 = a02; bc3 = a03; bc4 = a04;
|
||||
a00 ^= ~bc1 & bc2; a01 ^= ~bc2 & bc3; a02 ^= ~bc3 & bc4; a03 ^= ~bc4 & bc0; a04 ^= ~bc0 & bc1;
|
||||
|
||||
bc0 = a10; bc1 = a11; bc2 = a12; bc3 = a13; bc4 = a14;
|
||||
a10 ^= ~bc1 & bc2; a11 ^= ~bc2 & bc3; a12 ^= ~bc3 & bc4; a13 ^= ~bc4 & bc0; a14 ^= ~bc0 & bc1;
|
||||
|
||||
bc0 = a20; bc1 = a21; bc2 = a22; bc3 = a23; bc4 = a24;
|
||||
a20 ^= ~bc1 & bc2; a21 ^= ~bc2 & bc3; a22 ^= ~bc3 & bc4; a23 ^= ~bc4 & bc0; a24 ^= ~bc0 & bc1;
|
||||
|
||||
bc0 = a30; bc1 = a31; bc2 = a32; bc3 = a33; bc4 = a34;
|
||||
a30 ^= ~bc1 & bc2; a31 ^= ~bc2 & bc3; a32 ^= ~bc3 & bc4; a33 ^= ~bc4 & bc0; a34 ^= ~bc0 & bc1;
|
||||
|
||||
bc0 = a40; bc1 = a41; bc2 = a42; bc3 = a43; bc4 = a44;
|
||||
a40 ^= ~bc1 & bc2; a41 ^= ~bc2 & bc3; a42 ^= ~bc3 & bc4; a43 ^= ~bc4 & bc0; a44 ^= ~bc0 & bc1;
|
||||
|
||||
// Iota
|
||||
|
||||
a00 ^= keccakf_rndc[round];
|
||||
}
|
||||
|
||||
// Theta
|
||||
|
||||
u64x bc0 = a00 ^ a10 ^ a20 ^ a30 ^ a40;
|
||||
u64x bc1 = a01 ^ a11 ^ a21 ^ a31 ^ a41;
|
||||
u64x bc2 = a02 ^ a12 ^ a22 ^ a32 ^ a42;
|
||||
u64x bc3 = a03 ^ a13 ^ a23 ^ a33 ^ a43;
|
||||
u64x bc4 = a04 ^ a14 ^ a24 ^ a34 ^ a44;
|
||||
|
||||
u64x t;
|
||||
|
||||
t = bc4 ^ rotl64 (bc1, 1); a00 ^= t; a10 ^= t; a20 ^= t; a30 ^= t;
|
||||
t = bc0 ^ rotl64 (bc2, 1); a21 ^= t; a31 ^= t; a41 ^= t;
|
||||
t = bc1 ^ rotl64 (bc3, 1); a02 ^= t; a12 ^= t; a22 ^= t; a32 ^= t;
|
||||
t = bc2 ^ rotl64 (bc4, 1); a03 ^= t; a13 ^= t; a23 ^= t; a33 ^= t; a43 ^= t;
|
||||
t = bc3 ^ rotl64 (bc0, 1); a04 ^= t; a34 ^= t; a44 ^= t;
|
||||
|
||||
// Rho Pi
|
||||
|
||||
t = a01;
|
||||
|
||||
Rho_Pi (a20, 1);
|
||||
Rho_Pi (a12, 3);
|
||||
Rho_Pi (a21, 6);
|
||||
Rho_Pi (a32, 10);
|
||||
Rho_Pi (a33, 15);
|
||||
Rho_Pi (a03, 21);
|
||||
Rho_Pi (a10, 28);
|
||||
Rho_Pi (a31, 36);
|
||||
Rho_Pi (a13, 45);
|
||||
Rho_Pi (a41, 55);
|
||||
Rho_Pi (a44, 2);
|
||||
Rho_Pi (a04, 14);
|
||||
Rho_Pi (a30, 27);
|
||||
Rho_Pi (a43, 41);
|
||||
Rho_Pi (a34, 56);
|
||||
Rho_Pi (a23, 8);
|
||||
Rho_Pi (a22, 25);
|
||||
Rho_Pi (a02, 43);
|
||||
|
||||
#undef Rho_Pi
|
||||
|
||||
bc0 = a00;
|
||||
bc2 = a02;
|
||||
bc3 = a03;
|
||||
bc4 = a04;
|
||||
|
||||
a02 ^= ~bc3 & bc4;
|
||||
a03 ^= ~bc4 & bc0;
|
||||
|
||||
const u32x r0 = l32_from_64 (a03);
|
||||
const u32x r1 = h32_from_64 (a03);
|
||||
const u32x r2 = l32_from_64 (a02);
|
||||
const u32x r3 = h32_from_64 (a02);
|
||||
|
||||
COMPARE_S_SIMD (r0, r1, r2, r3);
|
||||
}
|
||||
}
|
||||
|
||||
__kernel void m17600_s08 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
|
||||
{
|
||||
}
|
||||
|
||||
__kernel void m17600_s16 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
|
||||
{
|
||||
}
|
696
OpenCL/m17600_a3-optimized.cl
Normal file
696
OpenCL/m17600_a3-optimized.cl
Normal file
@ -0,0 +1,696 @@
|
||||
/**
|
||||
* Author......: See docs/credits.txt
|
||||
* License.....: MIT
|
||||
*/
|
||||
|
||||
#define NEW_SIMD_CODE
|
||||
|
||||
#include "inc_vendor.cl"
|
||||
#include "inc_hash_constants.h"
|
||||
#include "inc_hash_functions.cl"
|
||||
#include "inc_types.cl"
|
||||
#include "inc_common.cl"
|
||||
#include "inc_simd.cl"
|
||||
|
||||
__constant u64a keccakf_rndc[24] =
|
||||
{
|
||||
0x0000000000000001, 0x0000000000008082, 0x800000000000808a,
|
||||
0x8000000080008000, 0x000000000000808b, 0x0000000080000001,
|
||||
0x8000000080008081, 0x8000000000008009, 0x000000000000008a,
|
||||
0x0000000000000088, 0x0000000080008009, 0x000000008000000a,
|
||||
0x000000008000808b, 0x800000000000008b, 0x8000000000008089,
|
||||
0x8000000000008003, 0x8000000000008002, 0x8000000000000080,
|
||||
0x000000000000800a, 0x800000008000000a, 0x8000000080008081,
|
||||
0x8000000000008080, 0x0000000080000001, 0x8000000080008008
|
||||
};
|
||||
|
||||
#ifndef KECCAK_ROUNDS
|
||||
#define KECCAK_ROUNDS 24
|
||||
#endif
|
||||
|
||||
DECLSPEC void m17600m (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, __global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset)
|
||||
{
|
||||
/**
|
||||
* modifier
|
||||
*/
|
||||
|
||||
const u64 gid = get_global_id (0);
|
||||
const u64 lid = get_local_id (0);
|
||||
|
||||
/**
|
||||
* loop
|
||||
*/
|
||||
|
||||
u32 w0l = w0[0];
|
||||
|
||||
for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE)
|
||||
{
|
||||
const u32x w0r = ix_create_bft (bfs_buf, il_pos);
|
||||
|
||||
const u32x w0lr = w0l | w0r;
|
||||
|
||||
/**
|
||||
* Keccak
|
||||
*/
|
||||
|
||||
u64x a00 = hl32_to_64 (w0[1], w0lr);
|
||||
u64x a01 = hl32_to_64 (w0[3], w0[2]);
|
||||
u64x a02 = hl32_to_64 (w1[1], w1[0]);
|
||||
u64x a03 = hl32_to_64 (w1[3], w1[2]);
|
||||
u64x a04 = hl32_to_64 (w2[1], w2[0]);
|
||||
u64x a10 = hl32_to_64 (w2[3], w2[2]);
|
||||
u64x a11 = hl32_to_64 (w3[1], w3[0]);
|
||||
u64x a12 = hl32_to_64 (w3[3], w3[2]);
|
||||
u64x a13 = 0x8000000000000000;
|
||||
u64x a14 = 0;
|
||||
u64x a20 = 0;
|
||||
u64x a21 = 0;
|
||||
u64x a22 = 0;
|
||||
u64x a23 = 0;
|
||||
u64x a24 = 0;
|
||||
u64x a30 = 0;
|
||||
u64x a31 = 0;
|
||||
u64x a32 = 0;
|
||||
u64x a33 = 0;
|
||||
u64x a34 = 0;
|
||||
u64x a40 = 0;
|
||||
u64x a41 = 0;
|
||||
u64x a42 = 0;
|
||||
u64x a43 = 0;
|
||||
u64x a44 = 0;
|
||||
|
||||
|
||||
#define Rho_Pi(ad,r) \
|
||||
bc0 = ad; \
|
||||
ad = rotl64 (t, r); \
|
||||
t = bc0; \
|
||||
|
||||
#ifdef _unroll
|
||||
#pragma unroll
|
||||
#endif
|
||||
for (int round = 0; round < KECCAK_ROUNDS - 1; round++)
|
||||
{
|
||||
// Theta
|
||||
|
||||
u64x bc0 = a00 ^ a10 ^ a20 ^ a30 ^ a40;
|
||||
u64x bc1 = a01 ^ a11 ^ a21 ^ a31 ^ a41;
|
||||
u64x bc2 = a02 ^ a12 ^ a22 ^ a32 ^ a42;
|
||||
u64x bc3 = a03 ^ a13 ^ a23 ^ a33 ^ a43;
|
||||
u64x bc4 = a04 ^ a14 ^ a24 ^ a34 ^ a44;
|
||||
|
||||
u64x t;
|
||||
|
||||
t = bc4 ^ rotl64 (bc1, 1); a00 ^= t; a10 ^= t; a20 ^= t; a30 ^= t; a40 ^= t;
|
||||
t = bc0 ^ rotl64 (bc2, 1); a01 ^= t; a11 ^= t; a21 ^= t; a31 ^= t; a41 ^= t;
|
||||
t = bc1 ^ rotl64 (bc3, 1); a02 ^= t; a12 ^= t; a22 ^= t; a32 ^= t; a42 ^= t;
|
||||
t = bc2 ^ rotl64 (bc4, 1); a03 ^= t; a13 ^= t; a23 ^= t; a33 ^= t; a43 ^= t;
|
||||
t = bc3 ^ rotl64 (bc0, 1); a04 ^= t; a14 ^= t; a24 ^= t; a34 ^= t; a44 ^= t;
|
||||
|
||||
// Rho Pi
|
||||
|
||||
t = a01;
|
||||
|
||||
Rho_Pi (a20, 1);
|
||||
Rho_Pi (a12, 3);
|
||||
Rho_Pi (a21, 6);
|
||||
Rho_Pi (a32, 10);
|
||||
Rho_Pi (a33, 15);
|
||||
Rho_Pi (a03, 21);
|
||||
Rho_Pi (a10, 28);
|
||||
Rho_Pi (a31, 36);
|
||||
Rho_Pi (a13, 45);
|
||||
Rho_Pi (a41, 55);
|
||||
Rho_Pi (a44, 2);
|
||||
Rho_Pi (a04, 14);
|
||||
Rho_Pi (a30, 27);
|
||||
Rho_Pi (a43, 41);
|
||||
Rho_Pi (a34, 56);
|
||||
Rho_Pi (a23, 8);
|
||||
Rho_Pi (a22, 25);
|
||||
Rho_Pi (a02, 43);
|
||||
Rho_Pi (a40, 62);
|
||||
Rho_Pi (a24, 18);
|
||||
Rho_Pi (a42, 39);
|
||||
Rho_Pi (a14, 61);
|
||||
Rho_Pi (a11, 20);
|
||||
Rho_Pi (a01, 44);
|
||||
|
||||
// Chi
|
||||
|
||||
bc0 = a00; bc1 = a01; bc2 = a02; bc3 = a03; bc4 = a04;
|
||||
a00 ^= ~bc1 & bc2; a01 ^= ~bc2 & bc3; a02 ^= ~bc3 & bc4; a03 ^= ~bc4 & bc0; a04 ^= ~bc0 & bc1;
|
||||
|
||||
bc0 = a10; bc1 = a11; bc2 = a12; bc3 = a13; bc4 = a14;
|
||||
a10 ^= ~bc1 & bc2; a11 ^= ~bc2 & bc3; a12 ^= ~bc3 & bc4; a13 ^= ~bc4 & bc0; a14 ^= ~bc0 & bc1;
|
||||
|
||||
bc0 = a20; bc1 = a21; bc2 = a22; bc3 = a23; bc4 = a24;
|
||||
a20 ^= ~bc1 & bc2; a21 ^= ~bc2 & bc3; a22 ^= ~bc3 & bc4; a23 ^= ~bc4 & bc0; a24 ^= ~bc0 & bc1;
|
||||
|
||||
bc0 = a30; bc1 = a31; bc2 = a32; bc3 = a33; bc4 = a34;
|
||||
a30 ^= ~bc1 & bc2; a31 ^= ~bc2 & bc3; a32 ^= ~bc3 & bc4; a33 ^= ~bc4 & bc0; a34 ^= ~bc0 & bc1;
|
||||
|
||||
bc0 = a40; bc1 = a41; bc2 = a42; bc3 = a43; bc4 = a44;
|
||||
a40 ^= ~bc1 & bc2; a41 ^= ~bc2 & bc3; a42 ^= ~bc3 & bc4; a43 ^= ~bc4 & bc0; a44 ^= ~bc0 & bc1;
|
||||
|
||||
// Iota
|
||||
|
||||
a00 ^= keccakf_rndc[round];
|
||||
}
|
||||
|
||||
// Theta
|
||||
|
||||
u64x bc0 = a00 ^ a10 ^ a20 ^ a30 ^ a40;
|
||||
u64x bc1 = a01 ^ a11 ^ a21 ^ a31 ^ a41;
|
||||
u64x bc2 = a02 ^ a12 ^ a22 ^ a32 ^ a42;
|
||||
u64x bc3 = a03 ^ a13 ^ a23 ^ a33 ^ a43;
|
||||
u64x bc4 = a04 ^ a14 ^ a24 ^ a34 ^ a44;
|
||||
|
||||
u64x t;
|
||||
|
||||
t = bc4 ^ rotl64 (bc1, 1); a00 ^= t; a10 ^= t; a20 ^= t; a30 ^= t;
|
||||
t = bc0 ^ rotl64 (bc2, 1); a21 ^= t; a31 ^= t; a41 ^= t;
|
||||
t = bc1 ^ rotl64 (bc3, 1); a02 ^= t; a12 ^= t; a22 ^= t; a32 ^= t;
|
||||
t = bc2 ^ rotl64 (bc4, 1); a03 ^= t; a13 ^= t; a23 ^= t; a33 ^= t; a43 ^= t;
|
||||
t = bc3 ^ rotl64 (bc0, 1); a04 ^= t; a34 ^= t; a44 ^= t;
|
||||
|
||||
// Rho Pi
|
||||
|
||||
t = a01;
|
||||
|
||||
Rho_Pi (a20, 1);
|
||||
Rho_Pi (a12, 3);
|
||||
Rho_Pi (a21, 6);
|
||||
Rho_Pi (a32, 10);
|
||||
Rho_Pi (a33, 15);
|
||||
Rho_Pi (a03, 21);
|
||||
Rho_Pi (a10, 28);
|
||||
Rho_Pi (a31, 36);
|
||||
Rho_Pi (a13, 45);
|
||||
Rho_Pi (a41, 55);
|
||||
Rho_Pi (a44, 2);
|
||||
Rho_Pi (a04, 14);
|
||||
Rho_Pi (a30, 27);
|
||||
Rho_Pi (a43, 41);
|
||||
Rho_Pi (a34, 56);
|
||||
Rho_Pi (a23, 8);
|
||||
Rho_Pi (a22, 25);
|
||||
Rho_Pi (a02, 43);
|
||||
|
||||
#undef Rho_Pi
|
||||
|
||||
bc0 = a00;
|
||||
bc2 = a02;
|
||||
bc3 = a03;
|
||||
bc4 = a04;
|
||||
|
||||
a02 ^= ~bc3 & bc4;
|
||||
a03 ^= ~bc4 & bc0;
|
||||
|
||||
const u32x r0 = l32_from_64 (a03);
|
||||
const u32x r1 = h32_from_64 (a03);
|
||||
const u32x r2 = l32_from_64 (a02);
|
||||
const u32x r3 = h32_from_64 (a02);
|
||||
|
||||
COMPARE_M_SIMD (r0, r1, r2, r3);
|
||||
}
|
||||
}
|
||||
|
||||
DECLSPEC void m17600s (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, __global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset)
|
||||
{
|
||||
/**
|
||||
* modifier
|
||||
*/
|
||||
|
||||
const u64 gid = get_global_id (0);
|
||||
const u64 lid = get_local_id (0);
|
||||
|
||||
/**
|
||||
* digest
|
||||
*/
|
||||
|
||||
const u32 search[4] =
|
||||
{
|
||||
digests_buf[digests_offset].digest_buf[DGST_R0],
|
||||
digests_buf[digests_offset].digest_buf[DGST_R1],
|
||||
digests_buf[digests_offset].digest_buf[DGST_R2],
|
||||
digests_buf[digests_offset].digest_buf[DGST_R3]
|
||||
};
|
||||
|
||||
/**
|
||||
* loop
|
||||
*/
|
||||
|
||||
u32 w0l = w0[0];
|
||||
|
||||
for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE)
|
||||
{
|
||||
const u32x w0r = ix_create_bft (bfs_buf, il_pos);
|
||||
|
||||
const u32x w0lr = w0l | w0r;
|
||||
|
||||
/**
|
||||
* Keccak
|
||||
*/
|
||||
|
||||
u64x a00 = hl32_to_64 (w0[1], w0lr);
|
||||
u64x a01 = hl32_to_64 (w0[3], w0[2]);
|
||||
u64x a02 = hl32_to_64 (w1[1], w1[0]);
|
||||
u64x a03 = hl32_to_64 (w1[3], w1[2]);
|
||||
u64x a04 = hl32_to_64 (w2[1], w2[0]);
|
||||
u64x a10 = hl32_to_64 (w2[3], w2[2]);
|
||||
u64x a11 = hl32_to_64 (w3[1], w3[0]);
|
||||
u64x a12 = hl32_to_64 (w3[3], w3[2]);
|
||||
u64x a13 = 0x8000000000000000;
|
||||
u64x a14 = 0;
|
||||
u64x a20 = 0;
|
||||
u64x a21 = 0;
|
||||
u64x a22 = 0;
|
||||
u64x a23 = 0;
|
||||
u64x a24 = 0;
|
||||
u64x a30 = 0;
|
||||
u64x a31 = 0;
|
||||
u64x a32 = 0;
|
||||
u64x a33 = 0;
|
||||
u64x a34 = 0;
|
||||
u64x a40 = 0;
|
||||
u64x a41 = 0;
|
||||
u64x a42 = 0;
|
||||
u64x a43 = 0;
|
||||
u64x a44 = 0;
|
||||
|
||||
#define Rho_Pi(ad,r) \
|
||||
bc0 = ad; \
|
||||
ad = rotl64 (t, r); \
|
||||
t = bc0; \
|
||||
|
||||
#ifdef _unroll
|
||||
#pragma unroll
|
||||
#endif
|
||||
for (int round = 0; round < KECCAK_ROUNDS - 1; round++)
|
||||
{
|
||||
// Theta
|
||||
|
||||
u64x bc0 = a00 ^ a10 ^ a20 ^ a30 ^ a40;
|
||||
u64x bc1 = a01 ^ a11 ^ a21 ^ a31 ^ a41;
|
||||
u64x bc2 = a02 ^ a12 ^ a22 ^ a32 ^ a42;
|
||||
u64x bc3 = a03 ^ a13 ^ a23 ^ a33 ^ a43;
|
||||
u64x bc4 = a04 ^ a14 ^ a24 ^ a34 ^ a44;
|
||||
|
||||
u64x t;
|
||||
|
||||
t = bc4 ^ rotl64 (bc1, 1); a00 ^= t; a10 ^= t; a20 ^= t; a30 ^= t; a40 ^= t;
|
||||
t = bc0 ^ rotl64 (bc2, 1); a01 ^= t; a11 ^= t; a21 ^= t; a31 ^= t; a41 ^= t;
|
||||
t = bc1 ^ rotl64 (bc3, 1); a02 ^= t; a12 ^= t; a22 ^= t; a32 ^= t; a42 ^= t;
|
||||
t = bc2 ^ rotl64 (bc4, 1); a03 ^= t; a13 ^= t; a23 ^= t; a33 ^= t; a43 ^= t;
|
||||
t = bc3 ^ rotl64 (bc0, 1); a04 ^= t; a14 ^= t; a24 ^= t; a34 ^= t; a44 ^= t;
|
||||
|
||||
// Rho Pi
|
||||
|
||||
t = a01;
|
||||
|
||||
Rho_Pi (a20, 1);
|
||||
Rho_Pi (a12, 3);
|
||||
Rho_Pi (a21, 6);
|
||||
Rho_Pi (a32, 10);
|
||||
Rho_Pi (a33, 15);
|
||||
Rho_Pi (a03, 21);
|
||||
Rho_Pi (a10, 28);
|
||||
Rho_Pi (a31, 36);
|
||||
Rho_Pi (a13, 45);
|
||||
Rho_Pi (a41, 55);
|
||||
Rho_Pi (a44, 2);
|
||||
Rho_Pi (a04, 14);
|
||||
Rho_Pi (a30, 27);
|
||||
Rho_Pi (a43, 41);
|
||||
Rho_Pi (a34, 56);
|
||||
Rho_Pi (a23, 8);
|
||||
Rho_Pi (a22, 25);
|
||||
Rho_Pi (a02, 43);
|
||||
Rho_Pi (a40, 62);
|
||||
Rho_Pi (a24, 18);
|
||||
Rho_Pi (a42, 39);
|
||||
Rho_Pi (a14, 61);
|
||||
Rho_Pi (a11, 20);
|
||||
Rho_Pi (a01, 44);
|
||||
|
||||
// Chi
|
||||
|
||||
bc0 = a00; bc1 = a01; bc2 = a02; bc3 = a03; bc4 = a04;
|
||||
a00 ^= ~bc1 & bc2; a01 ^= ~bc2 & bc3; a02 ^= ~bc3 & bc4; a03 ^= ~bc4 & bc0; a04 ^= ~bc0 & bc1;
|
||||
|
||||
bc0 = a10; bc1 = a11; bc2 = a12; bc3 = a13; bc4 = a14;
|
||||
a10 ^= ~bc1 & bc2; a11 ^= ~bc2 & bc3; a12 ^= ~bc3 & bc4; a13 ^= ~bc4 & bc0; a14 ^= ~bc0 & bc1;
|
||||
|
||||
bc0 = a20; bc1 = a21; bc2 = a22; bc3 = a23; bc4 = a24;
|
||||
a20 ^= ~bc1 & bc2; a21 ^= ~bc2 & bc3; a22 ^= ~bc3 & bc4; a23 ^= ~bc4 & bc0; a24 ^= ~bc0 & bc1;
|
||||
|
||||
bc0 = a30; bc1 = a31; bc2 = a32; bc3 = a33; bc4 = a34;
|
||||
a30 ^= ~bc1 & bc2; a31 ^= ~bc2 & bc3; a32 ^= ~bc3 & bc4; a33 ^= ~bc4 & bc0; a34 ^= ~bc0 & bc1;
|
||||
|
||||
bc0 = a40; bc1 = a41; bc2 = a42; bc3 = a43; bc4 = a44;
|
||||
a40 ^= ~bc1 & bc2; a41 ^= ~bc2 & bc3; a42 ^= ~bc3 & bc4; a43 ^= ~bc4 & bc0; a44 ^= ~bc0 & bc1;
|
||||
|
||||
// Iota
|
||||
|
||||
a00 ^= keccakf_rndc[round];
|
||||
}
|
||||
|
||||
// Theta
|
||||
|
||||
u64x bc0 = a00 ^ a10 ^ a20 ^ a30 ^ a40;
|
||||
u64x bc1 = a01 ^ a11 ^ a21 ^ a31 ^ a41;
|
||||
u64x bc2 = a02 ^ a12 ^ a22 ^ a32 ^ a42;
|
||||
u64x bc3 = a03 ^ a13 ^ a23 ^ a33 ^ a43;
|
||||
u64x bc4 = a04 ^ a14 ^ a24 ^ a34 ^ a44;
|
||||
|
||||
u64x t;
|
||||
|
||||
t = bc4 ^ rotl64 (bc1, 1); a00 ^= t; a10 ^= t; a20 ^= t; a30 ^= t;
|
||||
t = bc0 ^ rotl64 (bc2, 1); a21 ^= t; a31 ^= t; a41 ^= t;
|
||||
t = bc1 ^ rotl64 (bc3, 1); a02 ^= t; a12 ^= t; a22 ^= t; a32 ^= t;
|
||||
t = bc2 ^ rotl64 (bc4, 1); a03 ^= t; a13 ^= t; a23 ^= t; a33 ^= t; a43 ^= t;
|
||||
t = bc3 ^ rotl64 (bc0, 1); a04 ^= t; a34 ^= t; a44 ^= t;
|
||||
|
||||
// Rho Pi
|
||||
|
||||
t = a01;
|
||||
|
||||
Rho_Pi (a20, 1);
|
||||
Rho_Pi (a12, 3);
|
||||
Rho_Pi (a21, 6);
|
||||
Rho_Pi (a32, 10);
|
||||
Rho_Pi (a33, 15);
|
||||
Rho_Pi (a03, 21);
|
||||
Rho_Pi (a10, 28);
|
||||
Rho_Pi (a31, 36);
|
||||
Rho_Pi (a13, 45);
|
||||
Rho_Pi (a41, 55);
|
||||
Rho_Pi (a44, 2);
|
||||
Rho_Pi (a04, 14);
|
||||
Rho_Pi (a30, 27);
|
||||
Rho_Pi (a43, 41);
|
||||
Rho_Pi (a34, 56);
|
||||
Rho_Pi (a23, 8);
|
||||
Rho_Pi (a22, 25);
|
||||
Rho_Pi (a02, 43);
|
||||
|
||||
#undef Rho_Pi
|
||||
|
||||
bc0 = a00;
|
||||
bc2 = a02;
|
||||
bc3 = a03;
|
||||
bc4 = a04;
|
||||
|
||||
a02 ^= ~bc3 & bc4;
|
||||
a03 ^= ~bc4 & bc0;
|
||||
|
||||
const u32x r0 = l32_from_64 (a03);
|
||||
const u32x r1 = h32_from_64 (a03);
|
||||
const u32x r2 = l32_from_64 (a02);
|
||||
const u32x r3 = h32_from_64 (a02);
|
||||
|
||||
COMPARE_S_SIMD (r0, r1, r2, r3);
|
||||
}
|
||||
}
|
||||
|
||||
__kernel void m17600_m04 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
|
||||
{
|
||||
/**
|
||||
* base
|
||||
*/
|
||||
|
||||
const u64 gid = get_global_id (0);
|
||||
|
||||
if (gid >= gid_max) return;
|
||||
|
||||
u32 w0[4];
|
||||
|
||||
w0[0] = pws[gid].i[ 0];
|
||||
w0[1] = pws[gid].i[ 1];
|
||||
w0[2] = pws[gid].i[ 2];
|
||||
w0[3] = pws[gid].i[ 3];
|
||||
|
||||
u32 w1[4];
|
||||
|
||||
w1[0] = 0;
|
||||
w1[1] = 0;
|
||||
w1[2] = 0;
|
||||
w1[3] = 0;
|
||||
|
||||
u32 w2[4];
|
||||
|
||||
w2[0] = 0;
|
||||
w2[1] = 0;
|
||||
w2[2] = 0;
|
||||
w2[3] = 0;
|
||||
|
||||
u32 w3[4];
|
||||
|
||||
w3[0] = 0;
|
||||
w3[1] = 0;
|
||||
w3[2] = 0;
|
||||
w3[3] = 0;
|
||||
|
||||
const u32 pw_len = pws[gid].pw_len;
|
||||
|
||||
/**
|
||||
* main
|
||||
*/
|
||||
|
||||
m17600m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV0_buf, d_scryptV1_buf, d_scryptV2_buf, d_scryptV3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset);
|
||||
}
|
||||
|
||||
__kernel void m17600_m08 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
|
||||
{
|
||||
/**
|
||||
* base
|
||||
*/
|
||||
|
||||
const u64 gid = get_global_id (0);
|
||||
|
||||
if (gid >= gid_max) return;
|
||||
|
||||
u32 w0[4];
|
||||
|
||||
w0[0] = pws[gid].i[ 0];
|
||||
w0[1] = pws[gid].i[ 1];
|
||||
w0[2] = pws[gid].i[ 2];
|
||||
w0[3] = pws[gid].i[ 3];
|
||||
|
||||
u32 w1[4];
|
||||
|
||||
w1[0] = pws[gid].i[ 4];
|
||||
w1[1] = pws[gid].i[ 5];
|
||||
w1[2] = pws[gid].i[ 6];
|
||||
w1[3] = pws[gid].i[ 7];
|
||||
|
||||
u32 w2[4];
|
||||
|
||||
w2[0] = 0;
|
||||
w2[1] = 0;
|
||||
w2[2] = 0;
|
||||
w2[3] = 0;
|
||||
|
||||
u32 w3[4];
|
||||
|
||||
w3[0] = 0;
|
||||
w3[1] = 0;
|
||||
w3[2] = 0;
|
||||
w3[3] = 0;
|
||||
|
||||
const u32 pw_len = pws[gid].pw_len;
|
||||
|
||||
/**
|
||||
* main
|
||||
*/
|
||||
|
||||
m17600m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV0_buf, d_scryptV1_buf, d_scryptV2_buf, d_scryptV3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset);
|
||||
}
|
||||
|
||||
__kernel void m17600_m16 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
|
||||
{
|
||||
/**
|
||||
* base
|
||||
*/
|
||||
|
||||
const u64 gid = get_global_id (0);
|
||||
|
||||
if (gid >= gid_max) return;
|
||||
|
||||
u32 w0[4];
|
||||
|
||||
w0[0] = pws[gid].i[ 0];
|
||||
w0[1] = pws[gid].i[ 1];
|
||||
w0[2] = pws[gid].i[ 2];
|
||||
w0[3] = pws[gid].i[ 3];
|
||||
|
||||
u32 w1[4];
|
||||
|
||||
w1[0] = pws[gid].i[ 4];
|
||||
w1[1] = pws[gid].i[ 5];
|
||||
w1[2] = pws[gid].i[ 6];
|
||||
w1[3] = pws[gid].i[ 7];
|
||||
|
||||
u32 w2[4];
|
||||
|
||||
w2[0] = pws[gid].i[ 8];
|
||||
w2[1] = pws[gid].i[ 9];
|
||||
w2[2] = pws[gid].i[10];
|
||||
w2[3] = pws[gid].i[11];
|
||||
|
||||
u32 w3[4];
|
||||
|
||||
w3[0] = pws[gid].i[12];
|
||||
w3[1] = pws[gid].i[13];
|
||||
w3[2] = 0;
|
||||
w3[3] = 0;
|
||||
|
||||
const u32 pw_len = pws[gid].pw_len;
|
||||
|
||||
/**
|
||||
* main
|
||||
*/
|
||||
|
||||
m17600m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV0_buf, d_scryptV1_buf, d_scryptV2_buf, d_scryptV3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset);
|
||||
}
|
||||
|
||||
__kernel void m17600_s04 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
|
||||
{
|
||||
/**
|
||||
* base
|
||||
*/
|
||||
|
||||
const u64 gid = get_global_id (0);
|
||||
|
||||
if (gid >= gid_max) return;
|
||||
|
||||
u32 w0[4];
|
||||
|
||||
w0[0] = pws[gid].i[ 0];
|
||||
w0[1] = pws[gid].i[ 1];
|
||||
w0[2] = pws[gid].i[ 2];
|
||||
w0[3] = pws[gid].i[ 3];
|
||||
|
||||
u32 w1[4];
|
||||
|
||||
w1[0] = 0;
|
||||
w1[1] = 0;
|
||||
w1[2] = 0;
|
||||
w1[3] = 0;
|
||||
|
||||
u32 w2[4];
|
||||
|
||||
w2[0] = 0;
|
||||
w2[1] = 0;
|
||||
w2[2] = 0;
|
||||
w2[3] = 0;
|
||||
|
||||
u32 w3[4];
|
||||
|
||||
w3[0] = 0;
|
||||
w3[1] = 0;
|
||||
w3[2] = 0;
|
||||
w3[3] = 0;
|
||||
|
||||
const u32 pw_len = pws[gid].pw_len;
|
||||
|
||||
/**
|
||||
* main
|
||||
*/
|
||||
|
||||
m17600s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV0_buf, d_scryptV1_buf, d_scryptV2_buf, d_scryptV3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset);
|
||||
}
|
||||
|
||||
__kernel void m17600_s08 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
|
||||
{
|
||||
/**
|
||||
* base
|
||||
*/
|
||||
|
||||
const u64 gid = get_global_id (0);
|
||||
|
||||
if (gid >= gid_max) return;
|
||||
|
||||
u32 w0[4];
|
||||
|
||||
w0[0] = pws[gid].i[ 0];
|
||||
w0[1] = pws[gid].i[ 1];
|
||||
w0[2] = pws[gid].i[ 2];
|
||||
w0[3] = pws[gid].i[ 3];
|
||||
|
||||
u32 w1[4];
|
||||
|
||||
w1[0] = pws[gid].i[ 4];
|
||||
w1[1] = pws[gid].i[ 5];
|
||||
w1[2] = pws[gid].i[ 6];
|
||||
w1[3] = pws[gid].i[ 7];
|
||||
|
||||
u32 w2[4];
|
||||
|
||||
w2[0] = 0;
|
||||
w2[1] = 0;
|
||||
w2[2] = 0;
|
||||
w2[3] = 0;
|
||||
|
||||
u32 w3[4];
|
||||
|
||||
w3[0] = 0;
|
||||
w3[1] = 0;
|
||||
w3[2] = 0;
|
||||
w3[3] = 0;
|
||||
|
||||
const u32 pw_len = pws[gid].pw_len;
|
||||
|
||||
/**
|
||||
* main
|
||||
*/
|
||||
|
||||
m17600s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV0_buf, d_scryptV1_buf, d_scryptV2_buf, d_scryptV3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset);
|
||||
}
|
||||
|
||||
__kernel void m17600_s16 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
|
||||
{
|
||||
/**
|
||||
* base
|
||||
*/
|
||||
|
||||
const u64 gid = get_global_id (0);
|
||||
|
||||
if (gid >= gid_max) return;
|
||||
|
||||
u32 w0[4];
|
||||
|
||||
w0[0] = pws[gid].i[ 0];
|
||||
w0[1] = pws[gid].i[ 1];
|
||||
w0[2] = pws[gid].i[ 2];
|
||||
w0[3] = pws[gid].i[ 3];
|
||||
|
||||
u32 w1[4];
|
||||
|
||||
w1[0] = pws[gid].i[ 4];
|
||||
w1[1] = pws[gid].i[ 5];
|
||||
w1[2] = pws[gid].i[ 6];
|
||||
w1[3] = pws[gid].i[ 7];
|
||||
|
||||
u32 w2[4];
|
||||
|
||||
w2[0] = pws[gid].i[ 8];
|
||||
w2[1] = pws[gid].i[ 9];
|
||||
w2[2] = pws[gid].i[10];
|
||||
w2[3] = pws[gid].i[11];
|
||||
|
||||
u32 w3[4];
|
||||
|
||||
w3[0] = pws[gid].i[12];
|
||||
w3[1] = pws[gid].i[13];
|
||||
w3[2] = 0;
|
||||
w3[3] = 0;
|
||||
|
||||
const u32 pw_len = pws[gid].pw_len;
|
||||
|
||||
/**
|
||||
* main
|
||||
*/
|
||||
|
||||
m17600s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV0_buf, d_scryptV1_buf, d_scryptV2_buf, d_scryptV3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset);
|
||||
}
|
479
OpenCL/m17700_a0-optimized.cl
Normal file
479
OpenCL/m17700_a0-optimized.cl
Normal file
@ -0,0 +1,479 @@
|
||||
/**
|
||||
* Author......: See docs/credits.txt
|
||||
* License.....: MIT
|
||||
*/
|
||||
|
||||
#define NEW_SIMD_CODE
|
||||
|
||||
#include "inc_vendor.cl"
|
||||
#include "inc_hash_constants.h"
|
||||
#include "inc_hash_functions.cl"
|
||||
#include "inc_types.cl"
|
||||
#include "inc_common.cl"
|
||||
#include "inc_rp_optimized.h"
|
||||
#include "inc_rp_optimized.cl"
|
||||
#include "inc_simd.cl"
|
||||
|
||||
__constant u64a keccakf_rndc[24] =
|
||||
{
|
||||
0x0000000000000001, 0x0000000000008082, 0x800000000000808a,
|
||||
0x8000000080008000, 0x000000000000808b, 0x0000000080000001,
|
||||
0x8000000080008081, 0x8000000000008009, 0x000000000000008a,
|
||||
0x0000000000000088, 0x0000000080008009, 0x000000008000000a,
|
||||
0x000000008000808b, 0x800000000000008b, 0x8000000000008089,
|
||||
0x8000000000008003, 0x8000000000008002, 0x8000000000000080,
|
||||
0x000000000000800a, 0x800000008000000a, 0x8000000080008081,
|
||||
0x8000000000008080, 0x0000000080000001, 0x8000000080008008
|
||||
};
|
||||
|
||||
#ifndef KECCAK_ROUNDS
|
||||
#define KECCAK_ROUNDS 24
|
||||
#endif
|
||||
|
||||
__kernel void m17700_m04 (__global pw_t *pws, __constant const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
|
||||
{
|
||||
/**
|
||||
* modifier
|
||||
*/
|
||||
|
||||
const u64 lid = get_local_id (0);
|
||||
|
||||
/**
|
||||
* base
|
||||
*/
|
||||
|
||||
const u64 gid = get_global_id (0);
|
||||
|
||||
if (gid >= gid_max) return;
|
||||
|
||||
u32 pw_buf0[4];
|
||||
u32 pw_buf1[4];
|
||||
|
||||
pw_buf0[0] = pws[gid].i[0];
|
||||
pw_buf0[1] = pws[gid].i[1];
|
||||
pw_buf0[2] = pws[gid].i[2];
|
||||
pw_buf0[3] = pws[gid].i[3];
|
||||
pw_buf1[0] = pws[gid].i[4];
|
||||
pw_buf1[1] = pws[gid].i[5];
|
||||
pw_buf1[2] = pws[gid].i[6];
|
||||
pw_buf1[3] = pws[gid].i[7];
|
||||
|
||||
const u32 pw_len = pws[gid].pw_len;
|
||||
|
||||
/**
|
||||
* loop
|
||||
*/
|
||||
|
||||
for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE)
|
||||
{
|
||||
u32x w0[4] = { 0 };
|
||||
u32x w1[4] = { 0 };
|
||||
u32x w2[4] = { 0 };
|
||||
u32x w3[4] = { 0 };
|
||||
|
||||
const u32x out_len = apply_rules_vect (pw_buf0, pw_buf1, pw_len, rules_buf, il_pos, w0, w1);
|
||||
|
||||
append_0x01_2x4_VV (w0, w1, out_len);
|
||||
|
||||
/**
|
||||
* Keccak
|
||||
*/
|
||||
|
||||
u64x a00 = hl32_to_64 (w0[1], w0[0]);
|
||||
u64x a01 = hl32_to_64 (w0[3], w0[2]);
|
||||
u64x a02 = hl32_to_64 (w1[1], w1[0]);
|
||||
u64x a03 = hl32_to_64 (w1[3], w1[2]);
|
||||
u64x a04 = 0;
|
||||
u64x a10 = 0;
|
||||
u64x a11 = 0;
|
||||
u64x a12 = 0;
|
||||
u64x a13 = 0;
|
||||
u64x a14 = 0;
|
||||
u64x a20 = 0;
|
||||
u64x a21 = 0;
|
||||
u64x a22 = 0;
|
||||
u64x a23 = 0;
|
||||
u64x a24 = 0;
|
||||
u64x a30 = 0;
|
||||
u64x a31 = 0;
|
||||
u64x a32 = 0x8000000000000000;
|
||||
u64x a33 = 0;
|
||||
u64x a34 = 0;
|
||||
u64x a40 = 0;
|
||||
u64x a41 = 0;
|
||||
u64x a42 = 0;
|
||||
u64x a43 = 0;
|
||||
u64x a44 = 0;
|
||||
|
||||
#define Rho_Pi(ad,r) \
|
||||
bc0 = ad; \
|
||||
ad = rotl64 (t, r); \
|
||||
t = bc0; \
|
||||
|
||||
#ifdef _unroll
|
||||
#pragma unroll
|
||||
#endif
|
||||
for (int round = 0; round < KECCAK_ROUNDS - 1; round++)
|
||||
{
|
||||
// Theta
|
||||
|
||||
u64x bc0 = a00 ^ a10 ^ a20 ^ a30 ^ a40;
|
||||
u64x bc1 = a01 ^ a11 ^ a21 ^ a31 ^ a41;
|
||||
u64x bc2 = a02 ^ a12 ^ a22 ^ a32 ^ a42;
|
||||
u64x bc3 = a03 ^ a13 ^ a23 ^ a33 ^ a43;
|
||||
u64x bc4 = a04 ^ a14 ^ a24 ^ a34 ^ a44;
|
||||
|
||||
u64x t;
|
||||
|
||||
t = bc4 ^ rotl64 (bc1, 1); a00 ^= t; a10 ^= t; a20 ^= t; a30 ^= t; a40 ^= t;
|
||||
t = bc0 ^ rotl64 (bc2, 1); a01 ^= t; a11 ^= t; a21 ^= t; a31 ^= t; a41 ^= t;
|
||||
t = bc1 ^ rotl64 (bc3, 1); a02 ^= t; a12 ^= t; a22 ^= t; a32 ^= t; a42 ^= t;
|
||||
t = bc2 ^ rotl64 (bc4, 1); a03 ^= t; a13 ^= t; a23 ^= t; a33 ^= t; a43 ^= t;
|
||||
t = bc3 ^ rotl64 (bc0, 1); a04 ^= t; a14 ^= t; a24 ^= t; a34 ^= t; a44 ^= t;
|
||||
|
||||
// Rho Pi
|
||||
|
||||
t = a01;
|
||||
|
||||
Rho_Pi (a20, 1);
|
||||
Rho_Pi (a12, 3);
|
||||
Rho_Pi (a21, 6);
|
||||
Rho_Pi (a32, 10);
|
||||
Rho_Pi (a33, 15);
|
||||
Rho_Pi (a03, 21);
|
||||
Rho_Pi (a10, 28);
|
||||
Rho_Pi (a31, 36);
|
||||
Rho_Pi (a13, 45);
|
||||
Rho_Pi (a41, 55);
|
||||
Rho_Pi (a44, 2);
|
||||
Rho_Pi (a04, 14);
|
||||
Rho_Pi (a30, 27);
|
||||
Rho_Pi (a43, 41);
|
||||
Rho_Pi (a34, 56);
|
||||
Rho_Pi (a23, 8);
|
||||
Rho_Pi (a22, 25);
|
||||
Rho_Pi (a02, 43);
|
||||
Rho_Pi (a40, 62);
|
||||
Rho_Pi (a24, 18);
|
||||
Rho_Pi (a42, 39);
|
||||
Rho_Pi (a14, 61);
|
||||
Rho_Pi (a11, 20);
|
||||
Rho_Pi (a01, 44);
|
||||
|
||||
// Chi
|
||||
|
||||
bc0 = a00; bc1 = a01; bc2 = a02; bc3 = a03; bc4 = a04;
|
||||
a00 ^= ~bc1 & bc2; a01 ^= ~bc2 & bc3; a02 ^= ~bc3 & bc4; a03 ^= ~bc4 & bc0; a04 ^= ~bc0 & bc1;
|
||||
|
||||
bc0 = a10; bc1 = a11; bc2 = a12; bc3 = a13; bc4 = a14;
|
||||
a10 ^= ~bc1 & bc2; a11 ^= ~bc2 & bc3; a12 ^= ~bc3 & bc4; a13 ^= ~bc4 & bc0; a14 ^= ~bc0 & bc1;
|
||||
|
||||
bc0 = a20; bc1 = a21; bc2 = a22; bc3 = a23; bc4 = a24;
|
||||
a20 ^= ~bc1 & bc2; a21 ^= ~bc2 & bc3; a22 ^= ~bc3 & bc4; a23 ^= ~bc4 & bc0; a24 ^= ~bc0 & bc1;
|
||||
|
||||
bc0 = a30; bc1 = a31; bc2 = a32; bc3 = a33; bc4 = a34;
|
||||
a30 ^= ~bc1 & bc2; a31 ^= ~bc2 & bc3; a32 ^= ~bc3 & bc4; a33 ^= ~bc4 & bc0; a34 ^= ~bc0 & bc1;
|
||||
|
||||
bc0 = a40; bc1 = a41; bc2 = a42; bc3 = a43; bc4 = a44;
|
||||
a40 ^= ~bc1 & bc2; a41 ^= ~bc2 & bc3; a42 ^= ~bc3 & bc4; a43 ^= ~bc4 & bc0; a44 ^= ~bc0 & bc1;
|
||||
|
||||
// Iota
|
||||
|
||||
a00 ^= keccakf_rndc[round];
|
||||
}
|
||||
|
||||
// Theta
|
||||
|
||||
u64x bc0 = a00 ^ a10 ^ a20 ^ a30 ^ a40;
|
||||
u64x bc1 = a01 ^ a11 ^ a21 ^ a31 ^ a41;
|
||||
u64x bc2 = a02 ^ a12 ^ a22 ^ a32 ^ a42;
|
||||
u64x bc3 = a03 ^ a13 ^ a23 ^ a33 ^ a43;
|
||||
u64x bc4 = a04 ^ a14 ^ a24 ^ a34 ^ a44;
|
||||
|
||||
u64x t;
|
||||
|
||||
t = bc4 ^ rotl64 (bc1, 1); a00 ^= t; a10 ^= t; a20 ^= t; a30 ^= t;
|
||||
t = bc0 ^ rotl64 (bc2, 1); a21 ^= t; a31 ^= t; a41 ^= t;
|
||||
t = bc1 ^ rotl64 (bc3, 1); a02 ^= t; a12 ^= t; a22 ^= t; a32 ^= t;
|
||||
t = bc2 ^ rotl64 (bc4, 1); a03 ^= t; a13 ^= t; a23 ^= t; a33 ^= t; a43 ^= t;
|
||||
t = bc3 ^ rotl64 (bc0, 1); a04 ^= t; a34 ^= t; a44 ^= t;
|
||||
|
||||
// Rho Pi
|
||||
|
||||
t = a01;
|
||||
|
||||
Rho_Pi (a20, 1);
|
||||
Rho_Pi (a12, 3);
|
||||
Rho_Pi (a21, 6);
|
||||
Rho_Pi (a32, 10);
|
||||
Rho_Pi (a33, 15);
|
||||
Rho_Pi (a03, 21);
|
||||
Rho_Pi (a10, 28);
|
||||
Rho_Pi (a31, 36);
|
||||
Rho_Pi (a13, 45);
|
||||
Rho_Pi (a41, 55);
|
||||
Rho_Pi (a44, 2);
|
||||
Rho_Pi (a04, 14);
|
||||
Rho_Pi (a30, 27);
|
||||
Rho_Pi (a43, 41);
|
||||
Rho_Pi (a34, 56);
|
||||
Rho_Pi (a23, 8);
|
||||
Rho_Pi (a22, 25);
|
||||
Rho_Pi (a02, 43);
|
||||
|
||||
#undef Rho_Pi
|
||||
|
||||
bc0 = a00;
|
||||
bc2 = a02;
|
||||
bc3 = a03;
|
||||
bc4 = a04;
|
||||
|
||||
a02 ^= ~bc3 & bc4;
|
||||
a03 ^= ~bc4 & bc0;
|
||||
|
||||
const u32x r0 = l32_from_64 (a03);
|
||||
const u32x r1 = 0;
|
||||
const u32x r2 = l32_from_64 (a02);
|
||||
const u32x r3 = h32_from_64 (a02);
|
||||
|
||||
COMPARE_M_SIMD (r0, r1, r2, r3);
|
||||
}
|
||||
}
|
||||
|
||||
__kernel void m17700_m08 (__global pw_t *pws, __constant const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
|
||||
{
|
||||
}
|
||||
|
||||
__kernel void m17700_m16 (__global pw_t *pws, __constant const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
|
||||
{
|
||||
}
|
||||
|
||||
__kernel void m17700_s04 (__global pw_t *pws, __constant const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
|
||||
{
|
||||
/**
|
||||
* modifier
|
||||
*/
|
||||
|
||||
const u64 lid = get_local_id (0);
|
||||
|
||||
/**
|
||||
* base
|
||||
*/
|
||||
|
||||
const u64 gid = get_global_id (0);
|
||||
|
||||
if (gid >= gid_max) return;
|
||||
|
||||
u32 pw_buf0[4];
|
||||
u32 pw_buf1[4];
|
||||
|
||||
pw_buf0[0] = pws[gid].i[0];
|
||||
pw_buf0[1] = pws[gid].i[1];
|
||||
pw_buf0[2] = pws[gid].i[2];
|
||||
pw_buf0[3] = pws[gid].i[3];
|
||||
pw_buf1[0] = pws[gid].i[4];
|
||||
pw_buf1[1] = pws[gid].i[5];
|
||||
pw_buf1[2] = pws[gid].i[6];
|
||||
pw_buf1[3] = pws[gid].i[7];
|
||||
|
||||
const u32 pw_len = pws[gid].pw_len;
|
||||
|
||||
/**
|
||||
* digest
|
||||
*/
|
||||
|
||||
const u32 search[4] =
|
||||
{
|
||||
digests_buf[digests_offset].digest_buf[DGST_R0],
|
||||
digests_buf[digests_offset].digest_buf[DGST_R1],
|
||||
digests_buf[digests_offset].digest_buf[DGST_R2],
|
||||
digests_buf[digests_offset].digest_buf[DGST_R3]
|
||||
};
|
||||
|
||||
/**
|
||||
* loop
|
||||
*/
|
||||
|
||||
for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE)
|
||||
{
|
||||
u32x w0[4] = { 0 };
|
||||
u32x w1[4] = { 0 };
|
||||
u32x w2[4] = { 0 };
|
||||
u32x w3[4] = { 0 };
|
||||
|
||||
const u32x out_len = apply_rules_vect (pw_buf0, pw_buf1, pw_len, rules_buf, il_pos, w0, w1);
|
||||
|
||||
append_0x01_2x4_VV (w0, w1, out_len);
|
||||
|
||||
/**
|
||||
* Keccak
|
||||
*/
|
||||
|
||||
u64x a00 = hl32_to_64 (w0[1], w0[0]);
|
||||
u64x a01 = hl32_to_64 (w0[3], w0[2]);
|
||||
u64x a02 = hl32_to_64 (w1[1], w1[0]);
|
||||
u64x a03 = hl32_to_64 (w1[3], w1[2]);
|
||||
u64x a04 = 0;
|
||||
u64x a10 = 0;
|
||||
u64x a11 = 0;
|
||||
u64x a12 = 0;
|
||||
u64x a13 = 0;
|
||||
u64x a14 = 0;
|
||||
u64x a20 = 0;
|
||||
u64x a21 = 0;
|
||||
u64x a22 = 0;
|
||||
u64x a23 = 0;
|
||||
u64x a24 = 0;
|
||||
u64x a30 = 0;
|
||||
u64x a31 = 0;
|
||||
u64x a32 = 0x8000000000000000;
|
||||
u64x a33 = 0;
|
||||
u64x a34 = 0;
|
||||
u64x a40 = 0;
|
||||
u64x a41 = 0;
|
||||
u64x a42 = 0;
|
||||
u64x a43 = 0;
|
||||
u64x a44 = 0;
|
||||
|
||||
#define Rho_Pi(ad,r) \
|
||||
bc0 = ad; \
|
||||
ad = rotl64 (t, r); \
|
||||
t = bc0; \
|
||||
|
||||
#ifdef _unroll
|
||||
#pragma unroll
|
||||
#endif
|
||||
for (int round = 0; round < KECCAK_ROUNDS - 1; round++)
|
||||
{
|
||||
// Theta
|
||||
|
||||
u64x bc0 = a00 ^ a10 ^ a20 ^ a30 ^ a40;
|
||||
u64x bc1 = a01 ^ a11 ^ a21 ^ a31 ^ a41;
|
||||
u64x bc2 = a02 ^ a12 ^ a22 ^ a32 ^ a42;
|
||||
u64x bc3 = a03 ^ a13 ^ a23 ^ a33 ^ a43;
|
||||
u64x bc4 = a04 ^ a14 ^ a24 ^ a34 ^ a44;
|
||||
|
||||
u64x t;
|
||||
|
||||
t = bc4 ^ rotl64 (bc1, 1); a00 ^= t; a10 ^= t; a20 ^= t; a30 ^= t; a40 ^= t;
|
||||
t = bc0 ^ rotl64 (bc2, 1); a01 ^= t; a11 ^= t; a21 ^= t; a31 ^= t; a41 ^= t;
|
||||
t = bc1 ^ rotl64 (bc3, 1); a02 ^= t; a12 ^= t; a22 ^= t; a32 ^= t; a42 ^= t;
|
||||
t = bc2 ^ rotl64 (bc4, 1); a03 ^= t; a13 ^= t; a23 ^= t; a33 ^= t; a43 ^= t;
|
||||
t = bc3 ^ rotl64 (bc0, 1); a04 ^= t; a14 ^= t; a24 ^= t; a34 ^= t; a44 ^= t;
|
||||
|
||||
// Rho Pi
|
||||
|
||||
t = a01;
|
||||
|
||||
Rho_Pi (a20, 1);
|
||||
Rho_Pi (a12, 3);
|
||||
Rho_Pi (a21, 6);
|
||||
Rho_Pi (a32, 10);
|
||||
Rho_Pi (a33, 15);
|
||||
Rho_Pi (a03, 21);
|
||||
Rho_Pi (a10, 28);
|
||||
Rho_Pi (a31, 36);
|
||||
Rho_Pi (a13, 45);
|
||||
Rho_Pi (a41, 55);
|
||||
Rho_Pi (a44, 2);
|
||||
Rho_Pi (a04, 14);
|
||||
Rho_Pi (a30, 27);
|
||||
Rho_Pi (a43, 41);
|
||||
Rho_Pi (a34, 56);
|
||||
Rho_Pi (a23, 8);
|
||||
Rho_Pi (a22, 25);
|
||||
Rho_Pi (a02, 43);
|
||||
Rho_Pi (a40, 62);
|
||||
Rho_Pi (a24, 18);
|
||||
Rho_Pi (a42, 39);
|
||||
Rho_Pi (a14, 61);
|
||||
Rho_Pi (a11, 20);
|
||||
Rho_Pi (a01, 44);
|
||||
|
||||
// Chi
|
||||
|
||||
bc0 = a00; bc1 = a01; bc2 = a02; bc3 = a03; bc4 = a04;
|
||||
a00 ^= ~bc1 & bc2; a01 ^= ~bc2 & bc3; a02 ^= ~bc3 & bc4; a03 ^= ~bc4 & bc0; a04 ^= ~bc0 & bc1;
|
||||
|
||||
bc0 = a10; bc1 = a11; bc2 = a12; bc3 = a13; bc4 = a14;
|
||||
a10 ^= ~bc1 & bc2; a11 ^= ~bc2 & bc3; a12 ^= ~bc3 & bc4; a13 ^= ~bc4 & bc0; a14 ^= ~bc0 & bc1;
|
||||
|
||||
bc0 = a20; bc1 = a21; bc2 = a22; bc3 = a23; bc4 = a24;
|
||||
a20 ^= ~bc1 & bc2; a21 ^= ~bc2 & bc3; a22 ^= ~bc3 & bc4; a23 ^= ~bc4 & bc0; a24 ^= ~bc0 & bc1;
|
||||
|
||||
bc0 = a30; bc1 = a31; bc2 = a32; bc3 = a33; bc4 = a34;
|
||||
a30 ^= ~bc1 & bc2; a31 ^= ~bc2 & bc3; a32 ^= ~bc3 & bc4; a33 ^= ~bc4 & bc0; a34 ^= ~bc0 & bc1;
|
||||
|
||||
bc0 = a40; bc1 = a41; bc2 = a42; bc3 = a43; bc4 = a44;
|
||||
a40 ^= ~bc1 & bc2; a41 ^= ~bc2 & bc3; a42 ^= ~bc3 & bc4; a43 ^= ~bc4 & bc0; a44 ^= ~bc0 & bc1;
|
||||
|
||||
// Iota
|
||||
|
||||
a00 ^= keccakf_rndc[round];
|
||||
}
|
||||
|
||||
// Theta
|
||||
|
||||
u64x bc0 = a00 ^ a10 ^ a20 ^ a30 ^ a40;
|
||||
u64x bc1 = a01 ^ a11 ^ a21 ^ a31 ^ a41;
|
||||
u64x bc2 = a02 ^ a12 ^ a22 ^ a32 ^ a42;
|
||||
u64x bc3 = a03 ^ a13 ^ a23 ^ a33 ^ a43;
|
||||
u64x bc4 = a04 ^ a14 ^ a24 ^ a34 ^ a44;
|
||||
|
||||
u64x t;
|
||||
|
||||
t = bc4 ^ rotl64 (bc1, 1); a00 ^= t; a10 ^= t; a20 ^= t; a30 ^= t;
|
||||
t = bc0 ^ rotl64 (bc2, 1); a21 ^= t; a31 ^= t; a41 ^= t;
|
||||
t = bc1 ^ rotl64 (bc3, 1); a02 ^= t; a12 ^= t; a22 ^= t; a32 ^= t;
|
||||
t = bc2 ^ rotl64 (bc4, 1); a03 ^= t; a13 ^= t; a23 ^= t; a33 ^= t; a43 ^= t;
|
||||
t = bc3 ^ rotl64 (bc0, 1); a04 ^= t; a34 ^= t; a44 ^= t;
|
||||
|
||||
// Rho Pi
|
||||
|
||||
t = a01;
|
||||
|
||||
Rho_Pi (a20, 1);
|
||||
Rho_Pi (a12, 3);
|
||||
Rho_Pi (a21, 6);
|
||||
Rho_Pi (a32, 10);
|
||||
Rho_Pi (a33, 15);
|
||||
Rho_Pi (a03, 21);
|
||||
Rho_Pi (a10, 28);
|
||||
Rho_Pi (a31, 36);
|
||||
Rho_Pi (a13, 45);
|
||||
Rho_Pi (a41, 55);
|
||||
Rho_Pi (a44, 2);
|
||||
Rho_Pi (a04, 14);
|
||||
Rho_Pi (a30, 27);
|
||||
Rho_Pi (a43, 41);
|
||||
Rho_Pi (a34, 56);
|
||||
Rho_Pi (a23, 8);
|
||||
Rho_Pi (a22, 25);
|
||||
Rho_Pi (a02, 43);
|
||||
|
||||
#undef Rho_Pi
|
||||
|
||||
bc0 = a00;
|
||||
bc2 = a02;
|
||||
bc3 = a03;
|
||||
bc4 = a04;
|
||||
|
||||
a02 ^= ~bc3 & bc4;
|
||||
a03 ^= ~bc4 & bc0;
|
||||
|
||||
const u32x r0 = l32_from_64 (a03);
|
||||
const u32x r1 = 0;
|
||||
const u32x r2 = l32_from_64 (a02);
|
||||
const u32x r3 = h32_from_64 (a02);
|
||||
|
||||
COMPARE_S_SIMD (r0, r1, r2, r3);
|
||||
}
|
||||
}
|
||||
|
||||
__kernel void m17700_s08 (__global pw_t *pws, __constant const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
|
||||
{
|
||||
}
|
||||
|
||||
__kernel void m17700_s16 (__global pw_t *pws, __constant const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
|
||||
{
|
||||
}
|
594
OpenCL/m17700_a1-optimized.cl
Normal file
594
OpenCL/m17700_a1-optimized.cl
Normal file
@ -0,0 +1,594 @@
|
||||
/**
|
||||
* Author......: See docs/credits.txt
|
||||
* License.....: MIT
|
||||
*/
|
||||
|
||||
#define NEW_SIMD_CODE
|
||||
|
||||
#include "inc_vendor.cl"
|
||||
#include "inc_hash_constants.h"
|
||||
#include "inc_hash_functions.cl"
|
||||
#include "inc_types.cl"
|
||||
#include "inc_common.cl"
|
||||
#include "inc_simd.cl"
|
||||
|
||||
__constant u64a keccakf_rndc[24] =
|
||||
{
|
||||
0x0000000000000001, 0x0000000000008082, 0x800000000000808a,
|
||||
0x8000000080008000, 0x000000000000808b, 0x0000000080000001,
|
||||
0x8000000080008081, 0x8000000000008009, 0x000000000000008a,
|
||||
0x0000000000000088, 0x0000000080008009, 0x000000008000000a,
|
||||
0x000000008000808b, 0x800000000000008b, 0x8000000000008089,
|
||||
0x8000000000008003, 0x8000000000008002, 0x8000000000000080,
|
||||
0x000000000000800a, 0x800000008000000a, 0x8000000080008081,
|
||||
0x8000000000008080, 0x0000000080000001, 0x8000000080008008
|
||||
};
|
||||
|
||||
#ifndef KECCAK_ROUNDS
|
||||
#define KECCAK_ROUNDS 24
|
||||
#endif
|
||||
|
||||
__kernel void m17700_m04 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
|
||||
{
|
||||
/**
|
||||
* modifier
|
||||
*/
|
||||
|
||||
const u64 lid = get_local_id (0);
|
||||
|
||||
/**
|
||||
* base
|
||||
*/
|
||||
|
||||
const u64 gid = get_global_id (0);
|
||||
|
||||
if (gid >= gid_max) return;
|
||||
|
||||
u32 pw_buf0[4];
|
||||
u32 pw_buf1[4];
|
||||
|
||||
pw_buf0[0] = pws[gid].i[0];
|
||||
pw_buf0[1] = pws[gid].i[1];
|
||||
pw_buf0[2] = pws[gid].i[2];
|
||||
pw_buf0[3] = pws[gid].i[3];
|
||||
pw_buf1[0] = pws[gid].i[4];
|
||||
pw_buf1[1] = pws[gid].i[5];
|
||||
pw_buf1[2] = pws[gid].i[6];
|
||||
pw_buf1[3] = pws[gid].i[7];
|
||||
|
||||
const u32 pw_l_len = pws[gid].pw_len;
|
||||
|
||||
/**
|
||||
* loop
|
||||
*/
|
||||
|
||||
for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE)
|
||||
{
|
||||
const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos);
|
||||
|
||||
const u32x pw_len = pw_l_len + pw_r_len;
|
||||
|
||||
/**
|
||||
* concat password candidate
|
||||
*/
|
||||
|
||||
u32x wordl0[4] = { 0 };
|
||||
u32x wordl1[4] = { 0 };
|
||||
u32x wordl2[4] = { 0 };
|
||||
u32x wordl3[4] = { 0 };
|
||||
|
||||
wordl0[0] = pw_buf0[0];
|
||||
wordl0[1] = pw_buf0[1];
|
||||
wordl0[2] = pw_buf0[2];
|
||||
wordl0[3] = pw_buf0[3];
|
||||
wordl1[0] = pw_buf1[0];
|
||||
wordl1[1] = pw_buf1[1];
|
||||
wordl1[2] = pw_buf1[2];
|
||||
wordl1[3] = pw_buf1[3];
|
||||
|
||||
u32x wordr0[4] = { 0 };
|
||||
u32x wordr1[4] = { 0 };
|
||||
u32x wordr2[4] = { 0 };
|
||||
u32x wordr3[4] = { 0 };
|
||||
|
||||
wordr0[0] = ix_create_combt (combs_buf, il_pos, 0);
|
||||
wordr0[1] = ix_create_combt (combs_buf, il_pos, 1);
|
||||
wordr0[2] = ix_create_combt (combs_buf, il_pos, 2);
|
||||
wordr0[3] = ix_create_combt (combs_buf, il_pos, 3);
|
||||
wordr1[0] = ix_create_combt (combs_buf, il_pos, 4);
|
||||
wordr1[1] = ix_create_combt (combs_buf, il_pos, 5);
|
||||
wordr1[2] = ix_create_combt (combs_buf, il_pos, 6);
|
||||
wordr1[3] = ix_create_combt (combs_buf, il_pos, 7);
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
|
||||
{
|
||||
switch_buffer_by_offset_le_VV (wordr0, wordr1, wordr2, wordr3, pw_l_len);
|
||||
}
|
||||
else
|
||||
{
|
||||
switch_buffer_by_offset_le_VV (wordl0, wordl1, wordl2, wordl3, pw_r_len);
|
||||
}
|
||||
|
||||
u32x w0[4];
|
||||
u32x w1[4];
|
||||
u32x w2[4];
|
||||
u32x w3[4];
|
||||
|
||||
w0[0] = wordl0[0] | wordr0[0];
|
||||
w0[1] = wordl0[1] | wordr0[1];
|
||||
w0[2] = wordl0[2] | wordr0[2];
|
||||
w0[3] = wordl0[3] | wordr0[3];
|
||||
w1[0] = wordl1[0] | wordr1[0];
|
||||
w1[1] = wordl1[1] | wordr1[1];
|
||||
w1[2] = wordl1[2] | wordr1[2];
|
||||
w1[3] = wordl1[3] | wordr1[3];
|
||||
w2[0] = wordl2[0] | wordr2[0];
|
||||
w2[1] = wordl2[1] | wordr2[1];
|
||||
w2[2] = wordl2[2] | wordr2[2];
|
||||
w2[3] = wordl2[3] | wordr2[3];
|
||||
w3[0] = wordl3[0] | wordr3[0];
|
||||
w3[1] = wordl3[1] | wordr3[1];
|
||||
w3[2] = wordl3[2] | wordr3[2];
|
||||
w3[3] = wordl3[3] | wordr3[3];
|
||||
|
||||
/**
|
||||
* Keccak
|
||||
*/
|
||||
|
||||
u64x a00 = hl32_to_64 (w0[1], w0[0]);
|
||||
u64x a01 = hl32_to_64 (w0[3], w0[2]);
|
||||
u64x a02 = hl32_to_64 (w1[1], w1[0]);
|
||||
u64x a03 = hl32_to_64 (w1[3], w1[2]);
|
||||
u64x a04 = hl32_to_64 (w2[1], w2[0]);
|
||||
u64x a10 = hl32_to_64 (w2[3], w2[2]);
|
||||
u64x a11 = hl32_to_64 (w3[1], w3[0]);
|
||||
u64x a12 = hl32_to_64 (w3[3], w3[2]);
|
||||
u64x a13 = 0;
|
||||
u64x a14 = 0;
|
||||
u64x a20 = 0;
|
||||
u64x a21 = 0;
|
||||
u64x a22 = 0;
|
||||
u64x a23 = 0;
|
||||
u64x a24 = 0;
|
||||
u64x a30 = 0;
|
||||
u64x a31 = 0;
|
||||
u64x a32 = 0x8000000000000000;
|
||||
u64x a33 = 0;
|
||||
u64x a34 = 0;
|
||||
u64x a40 = 0;
|
||||
u64x a41 = 0;
|
||||
u64x a42 = 0;
|
||||
u64x a43 = 0;
|
||||
u64x a44 = 0;
|
||||
|
||||
#define Rho_Pi(ad,r) \
|
||||
bc0 = ad; \
|
||||
ad = rotl64 (t, r); \
|
||||
t = bc0; \
|
||||
|
||||
#ifdef _unroll
|
||||
#pragma unroll
|
||||
#endif
|
||||
for (int round = 0; round < KECCAK_ROUNDS - 1; round++)
|
||||
{
|
||||
// Theta
|
||||
|
||||
u64x bc0 = a00 ^ a10 ^ a20 ^ a30 ^ a40;
|
||||
u64x bc1 = a01 ^ a11 ^ a21 ^ a31 ^ a41;
|
||||
u64x bc2 = a02 ^ a12 ^ a22 ^ a32 ^ a42;
|
||||
u64x bc3 = a03 ^ a13 ^ a23 ^ a33 ^ a43;
|
||||
u64x bc4 = a04 ^ a14 ^ a24 ^ a34 ^ a44;
|
||||
|
||||
u64x t;
|
||||
|
||||
t = bc4 ^ rotl64 (bc1, 1); a00 ^= t; a10 ^= t; a20 ^= t; a30 ^= t; a40 ^= t;
|
||||
t = bc0 ^ rotl64 (bc2, 1); a01 ^= t; a11 ^= t; a21 ^= t; a31 ^= t; a41 ^= t;
|
||||
t = bc1 ^ rotl64 (bc3, 1); a02 ^= t; a12 ^= t; a22 ^= t; a32 ^= t; a42 ^= t;
|
||||
t = bc2 ^ rotl64 (bc4, 1); a03 ^= t; a13 ^= t; a23 ^= t; a33 ^= t; a43 ^= t;
|
||||
t = bc3 ^ rotl64 (bc0, 1); a04 ^= t; a14 ^= t; a24 ^= t; a34 ^= t; a44 ^= t;
|
||||
|
||||
// Rho Pi
|
||||
|
||||
t = a01;
|
||||
|
||||
Rho_Pi (a20, 1);
|
||||
Rho_Pi (a12, 3);
|
||||
Rho_Pi (a21, 6);
|
||||
Rho_Pi (a32, 10);
|
||||
Rho_Pi (a33, 15);
|
||||
Rho_Pi (a03, 21);
|
||||
Rho_Pi (a10, 28);
|
||||
Rho_Pi (a31, 36);
|
||||
Rho_Pi (a13, 45);
|
||||
Rho_Pi (a41, 55);
|
||||
Rho_Pi (a44, 2);
|
||||
Rho_Pi (a04, 14);
|
||||
Rho_Pi (a30, 27);
|
||||
Rho_Pi (a43, 41);
|
||||
Rho_Pi (a34, 56);
|
||||
Rho_Pi (a23, 8);
|
||||
Rho_Pi (a22, 25);
|
||||
Rho_Pi (a02, 43);
|
||||
Rho_Pi (a40, 62);
|
||||
Rho_Pi (a24, 18);
|
||||
Rho_Pi (a42, 39);
|
||||
Rho_Pi (a14, 61);
|
||||
Rho_Pi (a11, 20);
|
||||
Rho_Pi (a01, 44);
|
||||
|
||||
// Chi
|
||||
|
||||
bc0 = a00; bc1 = a01; bc2 = a02; bc3 = a03; bc4 = a04;
|
||||
a00 ^= ~bc1 & bc2; a01 ^= ~bc2 & bc3; a02 ^= ~bc3 & bc4; a03 ^= ~bc4 & bc0; a04 ^= ~bc0 & bc1;
|
||||
|
||||
bc0 = a10; bc1 = a11; bc2 = a12; bc3 = a13; bc4 = a14;
|
||||
a10 ^= ~bc1 & bc2; a11 ^= ~bc2 & bc3; a12 ^= ~bc3 & bc4; a13 ^= ~bc4 & bc0; a14 ^= ~bc0 & bc1;
|
||||
|
||||
bc0 = a20; bc1 = a21; bc2 = a22; bc3 = a23; bc4 = a24;
|
||||
a20 ^= ~bc1 & bc2; a21 ^= ~bc2 & bc3; a22 ^= ~bc3 & bc4; a23 ^= ~bc4 & bc0; a24 ^= ~bc0 & bc1;
|
||||
|
||||
bc0 = a30; bc1 = a31; bc2 = a32; bc3 = a33; bc4 = a34;
|
||||
a30 ^= ~bc1 & bc2; a31 ^= ~bc2 & bc3; a32 ^= ~bc3 & bc4; a33 ^= ~bc4 & bc0; a34 ^= ~bc0 & bc1;
|
||||
|
||||
bc0 = a40; bc1 = a41; bc2 = a42; bc3 = a43; bc4 = a44;
|
||||
a40 ^= ~bc1 & bc2; a41 ^= ~bc2 & bc3; a42 ^= ~bc3 & bc4; a43 ^= ~bc4 & bc0; a44 ^= ~bc0 & bc1;
|
||||
|
||||
// Iota
|
||||
|
||||
a00 ^= keccakf_rndc[round];
|
||||
}
|
||||
|
||||
// Theta
|
||||
|
||||
u64x bc0 = a00 ^ a10 ^ a20 ^ a30 ^ a40;
|
||||
u64x bc1 = a01 ^ a11 ^ a21 ^ a31 ^ a41;
|
||||
u64x bc2 = a02 ^ a12 ^ a22 ^ a32 ^ a42;
|
||||
u64x bc3 = a03 ^ a13 ^ a23 ^ a33 ^ a43;
|
||||
u64x bc4 = a04 ^ a14 ^ a24 ^ a34 ^ a44;
|
||||
|
||||
u64x t;
|
||||
|
||||
t = bc4 ^ rotl64 (bc1, 1); a00 ^= t; a10 ^= t; a20 ^= t; a30 ^= t;
|
||||
t = bc0 ^ rotl64 (bc2, 1); a21 ^= t; a31 ^= t; a41 ^= t;
|
||||
t = bc1 ^ rotl64 (bc3, 1); a02 ^= t; a12 ^= t; a22 ^= t; a32 ^= t;
|
||||
t = bc2 ^ rotl64 (bc4, 1); a03 ^= t; a13 ^= t; a23 ^= t; a33 ^= t; a43 ^= t;
|
||||
t = bc3 ^ rotl64 (bc0, 1); a04 ^= t; a34 ^= t; a44 ^= t;
|
||||
|
||||
// Rho Pi
|
||||
|
||||
t = a01;
|
||||
|
||||
Rho_Pi (a20, 1);
|
||||
Rho_Pi (a12, 3);
|
||||
Rho_Pi (a21, 6);
|
||||
Rho_Pi (a32, 10);
|
||||
Rho_Pi (a33, 15);
|
||||
Rho_Pi (a03, 21);
|
||||
Rho_Pi (a10, 28);
|
||||
Rho_Pi (a31, 36);
|
||||
Rho_Pi (a13, 45);
|
||||
Rho_Pi (a41, 55);
|
||||
Rho_Pi (a44, 2);
|
||||
Rho_Pi (a04, 14);
|
||||
Rho_Pi (a30, 27);
|
||||
Rho_Pi (a43, 41);
|
||||
Rho_Pi (a34, 56);
|
||||
Rho_Pi (a23, 8);
|
||||
Rho_Pi (a22, 25);
|
||||
Rho_Pi (a02, 43);
|
||||
|
||||
#undef Rho_Pi
|
||||
|
||||
bc0 = a00;
|
||||
bc2 = a02;
|
||||
bc3 = a03;
|
||||
bc4 = a04;
|
||||
|
||||
a02 ^= ~bc3 & bc4;
|
||||
a03 ^= ~bc4 & bc0;
|
||||
|
||||
const u32x r0 = l32_from_64 (a03);
|
||||
const u32x r1 = 0;
|
||||
const u32x r2 = l32_from_64 (a02);
|
||||
const u32x r3 = h32_from_64 (a02);
|
||||
|
||||
COMPARE_M_SIMD (r0, r1, r2, r3);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
__kernel void m17700_m08 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
|
||||
{
|
||||
}
|
||||
|
||||
__kernel void m17700_m16 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
|
||||
{
|
||||
}
|
||||
|
||||
__kernel void m17700_s04 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
|
||||
{
|
||||
/**
|
||||
* modifier
|
||||
*/
|
||||
|
||||
const u64 lid = get_local_id (0);
|
||||
|
||||
/**
|
||||
* base
|
||||
*/
|
||||
|
||||
const u64 gid = get_global_id (0);
|
||||
|
||||
if (gid >= gid_max) return;
|
||||
|
||||
u32 pw_buf0[4];
|
||||
u32 pw_buf1[4];
|
||||
|
||||
pw_buf0[0] = pws[gid].i[0];
|
||||
pw_buf0[1] = pws[gid].i[1];
|
||||
pw_buf0[2] = pws[gid].i[2];
|
||||
pw_buf0[3] = pws[gid].i[3];
|
||||
pw_buf1[0] = pws[gid].i[4];
|
||||
pw_buf1[1] = pws[gid].i[5];
|
||||
pw_buf1[2] = pws[gid].i[6];
|
||||
pw_buf1[3] = pws[gid].i[7];
|
||||
|
||||
const u32 pw_l_len = pws[gid].pw_len;
|
||||
|
||||
/**
|
||||
* digest
|
||||
*/
|
||||
|
||||
const u32 search[4] =
|
||||
{
|
||||
digests_buf[digests_offset].digest_buf[DGST_R0],
|
||||
digests_buf[digests_offset].digest_buf[DGST_R1],
|
||||
digests_buf[digests_offset].digest_buf[DGST_R2],
|
||||
digests_buf[digests_offset].digest_buf[DGST_R3]
|
||||
};
|
||||
|
||||
/**
|
||||
* loop
|
||||
*/
|
||||
|
||||
for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE)
|
||||
{
|
||||
const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos);
|
||||
|
||||
const u32x pw_len = pw_l_len + pw_r_len;
|
||||
|
||||
/**
|
||||
* concat password candidate
|
||||
*/
|
||||
|
||||
u32x wordl0[4] = { 0 };
|
||||
u32x wordl1[4] = { 0 };
|
||||
u32x wordl2[4] = { 0 };
|
||||
u32x wordl3[4] = { 0 };
|
||||
|
||||
wordl0[0] = pw_buf0[0];
|
||||
wordl0[1] = pw_buf0[1];
|
||||
wordl0[2] = pw_buf0[2];
|
||||
wordl0[3] = pw_buf0[3];
|
||||
wordl1[0] = pw_buf1[0];
|
||||
wordl1[1] = pw_buf1[1];
|
||||
wordl1[2] = pw_buf1[2];
|
||||
wordl1[3] = pw_buf1[3];
|
||||
|
||||
u32x wordr0[4] = { 0 };
|
||||
u32x wordr1[4] = { 0 };
|
||||
u32x wordr2[4] = { 0 };
|
||||
u32x wordr3[4] = { 0 };
|
||||
|
||||
wordr0[0] = ix_create_combt (combs_buf, il_pos, 0);
|
||||
wordr0[1] = ix_create_combt (combs_buf, il_pos, 1);
|
||||
wordr0[2] = ix_create_combt (combs_buf, il_pos, 2);
|
||||
wordr0[3] = ix_create_combt (combs_buf, il_pos, 3);
|
||||
wordr1[0] = ix_create_combt (combs_buf, il_pos, 4);
|
||||
wordr1[1] = ix_create_combt (combs_buf, il_pos, 5);
|
||||
wordr1[2] = ix_create_combt (combs_buf, il_pos, 6);
|
||||
wordr1[3] = ix_create_combt (combs_buf, il_pos, 7);
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
|
||||
{
|
||||
switch_buffer_by_offset_le_VV (wordr0, wordr1, wordr2, wordr3, pw_l_len);
|
||||
}
|
||||
else
|
||||
{
|
||||
switch_buffer_by_offset_le_VV (wordl0, wordl1, wordl2, wordl3, pw_r_len);
|
||||
}
|
||||
|
||||
u32x w0[4];
|
||||
u32x w1[4];
|
||||
u32x w2[4];
|
||||
u32x w3[4];
|
||||
|
||||
w0[0] = wordl0[0] | wordr0[0];
|
||||
w0[1] = wordl0[1] | wordr0[1];
|
||||
w0[2] = wordl0[2] | wordr0[2];
|
||||
w0[3] = wordl0[3] | wordr0[3];
|
||||
w1[0] = wordl1[0] | wordr1[0];
|
||||
w1[1] = wordl1[1] | wordr1[1];
|
||||
w1[2] = wordl1[2] | wordr1[2];
|
||||
w1[3] = wordl1[3] | wordr1[3];
|
||||
w2[0] = wordl2[0] | wordr2[0];
|
||||
w2[1] = wordl2[1] | wordr2[1];
|
||||
w2[2] = wordl2[2] | wordr2[2];
|
||||
w2[3] = wordl2[3] | wordr2[3];
|
||||
w3[0] = wordl3[0] | wordr3[0];
|
||||
w3[1] = wordl3[1] | wordr3[1];
|
||||
w3[2] = wordl3[2] | wordr3[2];
|
||||
w3[3] = wordl3[3] | wordr3[3];
|
||||
|
||||
/**
|
||||
* Keccak
|
||||
*/
|
||||
|
||||
u64x a00 = hl32_to_64 (w0[1], w0[0]);
|
||||
u64x a01 = hl32_to_64 (w0[3], w0[2]);
|
||||
u64x a02 = hl32_to_64 (w1[1], w1[0]);
|
||||
u64x a03 = hl32_to_64 (w1[3], w1[2]);
|
||||
u64x a04 = hl32_to_64 (w2[1], w2[0]);
|
||||
u64x a10 = hl32_to_64 (w2[3], w2[2]);
|
||||
u64x a11 = hl32_to_64 (w3[1], w3[0]);
|
||||
u64x a12 = hl32_to_64 (w3[3], w3[2]);
|
||||
u64x a13 = 0;
|
||||
u64x a14 = 0;
|
||||
u64x a20 = 0;
|
||||
u64x a21 = 0;
|
||||
u64x a22 = 0;
|
||||
u64x a23 = 0;
|
||||
u64x a24 = 0;
|
||||
u64x a30 = 0;
|
||||
u64x a31 = 0;
|
||||
u64x a32 = 0x8000000000000000;
|
||||
u64x a33 = 0;
|
||||
u64x a34 = 0;
|
||||
u64x a40 = 0;
|
||||
u64x a41 = 0;
|
||||
u64x a42 = 0;
|
||||
u64x a43 = 0;
|
||||
u64x a44 = 0;
|
||||
|
||||
#define Rho_Pi(ad,r) \
|
||||
bc0 = ad; \
|
||||
ad = rotl64 (t, r); \
|
||||
t = bc0; \
|
||||
|
||||
#ifdef _unroll
|
||||
#pragma unroll
|
||||
#endif
|
||||
for (int round = 0; round < KECCAK_ROUNDS - 1; round++)
|
||||
{
|
||||
// Theta
|
||||
|
||||
u64x bc0 = a00 ^ a10 ^ a20 ^ a30 ^ a40;
|
||||
u64x bc1 = a01 ^ a11 ^ a21 ^ a31 ^ a41;
|
||||
u64x bc2 = a02 ^ a12 ^ a22 ^ a32 ^ a42;
|
||||
u64x bc3 = a03 ^ a13 ^ a23 ^ a33 ^ a43;
|
||||
u64x bc4 = a04 ^ a14 ^ a24 ^ a34 ^ a44;
|
||||
|
||||
u64x t;
|
||||
|
||||
t = bc4 ^ rotl64 (bc1, 1); a00 ^= t; a10 ^= t; a20 ^= t; a30 ^= t; a40 ^= t;
|
||||
t = bc0 ^ rotl64 (bc2, 1); a01 ^= t; a11 ^= t; a21 ^= t; a31 ^= t; a41 ^= t;
|
||||
t = bc1 ^ rotl64 (bc3, 1); a02 ^= t; a12 ^= t; a22 ^= t; a32 ^= t; a42 ^= t;
|
||||
t = bc2 ^ rotl64 (bc4, 1); a03 ^= t; a13 ^= t; a23 ^= t; a33 ^= t; a43 ^= t;
|
||||
t = bc3 ^ rotl64 (bc0, 1); a04 ^= t; a14 ^= t; a24 ^= t; a34 ^= t; a44 ^= t;
|
||||
|
||||
// Rho Pi
|
||||
|
||||
t = a01;
|
||||
|
||||
Rho_Pi (a20, 1);
|
||||
Rho_Pi (a12, 3);
|
||||
Rho_Pi (a21, 6);
|
||||
Rho_Pi (a32, 10);
|
||||
Rho_Pi (a33, 15);
|
||||
Rho_Pi (a03, 21);
|
||||
Rho_Pi (a10, 28);
|
||||
Rho_Pi (a31, 36);
|
||||
Rho_Pi (a13, 45);
|
||||
Rho_Pi (a41, 55);
|
||||
Rho_Pi (a44, 2);
|
||||
Rho_Pi (a04, 14);
|
||||
Rho_Pi (a30, 27);
|
||||
Rho_Pi (a43, 41);
|
||||
Rho_Pi (a34, 56);
|
||||
Rho_Pi (a23, 8);
|
||||
Rho_Pi (a22, 25);
|
||||
Rho_Pi (a02, 43);
|
||||
Rho_Pi (a40, 62);
|
||||
Rho_Pi (a24, 18);
|
||||
Rho_Pi (a42, 39);
|
||||
Rho_Pi (a14, 61);
|
||||
Rho_Pi (a11, 20);
|
||||
Rho_Pi (a01, 44);
|
||||
|
||||
// Chi
|
||||
|
||||
bc0 = a00; bc1 = a01; bc2 = a02; bc3 = a03; bc4 = a04;
|
||||
a00 ^= ~bc1 & bc2; a01 ^= ~bc2 & bc3; a02 ^= ~bc3 & bc4; a03 ^= ~bc4 & bc0; a04 ^= ~bc0 & bc1;
|
||||
|
||||
bc0 = a10; bc1 = a11; bc2 = a12; bc3 = a13; bc4 = a14;
|
||||
a10 ^= ~bc1 & bc2; a11 ^= ~bc2 & bc3; a12 ^= ~bc3 & bc4; a13 ^= ~bc4 & bc0; a14 ^= ~bc0 & bc1;
|
||||
|
||||
bc0 = a20; bc1 = a21; bc2 = a22; bc3 = a23; bc4 = a24;
|
||||
a20 ^= ~bc1 & bc2; a21 ^= ~bc2 & bc3; a22 ^= ~bc3 & bc4; a23 ^= ~bc4 & bc0; a24 ^= ~bc0 & bc1;
|
||||
|
||||
bc0 = a30; bc1 = a31; bc2 = a32; bc3 = a33; bc4 = a34;
|
||||
a30 ^= ~bc1 & bc2; a31 ^= ~bc2 & bc3; a32 ^= ~bc3 & bc4; a33 ^= ~bc4 & bc0; a34 ^= ~bc0 & bc1;
|
||||
|
||||
bc0 = a40; bc1 = a41; bc2 = a42; bc3 = a43; bc4 = a44;
|
||||
a40 ^= ~bc1 & bc2; a41 ^= ~bc2 & bc3; a42 ^= ~bc3 & bc4; a43 ^= ~bc4 & bc0; a44 ^= ~bc0 & bc1;
|
||||
|
||||
// Iota
|
||||
|
||||
a00 ^= keccakf_rndc[round];
|
||||
}
|
||||
|
||||
// Theta
|
||||
|
||||
u64x bc0 = a00 ^ a10 ^ a20 ^ a30 ^ a40;
|
||||
u64x bc1 = a01 ^ a11 ^ a21 ^ a31 ^ a41;
|
||||
u64x bc2 = a02 ^ a12 ^ a22 ^ a32 ^ a42;
|
||||
u64x bc3 = a03 ^ a13 ^ a23 ^ a33 ^ a43;
|
||||
u64x bc4 = a04 ^ a14 ^ a24 ^ a34 ^ a44;
|
||||
|
||||
u64x t;
|
||||
|
||||
t = bc4 ^ rotl64 (bc1, 1); a00 ^= t; a10 ^= t; a20 ^= t; a30 ^= t;
|
||||
t = bc0 ^ rotl64 (bc2, 1); a21 ^= t; a31 ^= t; a41 ^= t;
|
||||
t = bc1 ^ rotl64 (bc3, 1); a02 ^= t; a12 ^= t; a22 ^= t; a32 ^= t;
|
||||
t = bc2 ^ rotl64 (bc4, 1); a03 ^= t; a13 ^= t; a23 ^= t; a33 ^= t; a43 ^= t;
|
||||
t = bc3 ^ rotl64 (bc0, 1); a04 ^= t; a34 ^= t; a44 ^= t;
|
||||
|
||||
// Rho Pi
|
||||
|
||||
t = a01;
|
||||
|
||||
Rho_Pi (a20, 1);
|
||||
Rho_Pi (a12, 3);
|
||||
Rho_Pi (a21, 6);
|
||||
Rho_Pi (a32, 10);
|
||||
Rho_Pi (a33, 15);
|
||||
Rho_Pi (a03, 21);
|
||||
Rho_Pi (a10, 28);
|
||||
Rho_Pi (a31, 36);
|
||||
Rho_Pi (a13, 45);
|
||||
Rho_Pi (a41, 55);
|
||||
Rho_Pi (a44, 2);
|
||||
Rho_Pi (a04, 14);
|
||||
Rho_Pi (a30, 27);
|
||||
Rho_Pi (a43, 41);
|
||||
Rho_Pi (a34, 56);
|
||||
Rho_Pi (a23, 8);
|
||||
Rho_Pi (a22, 25);
|
||||
Rho_Pi (a02, 43);
|
||||
|
||||
#undef Rho_Pi
|
||||
|
||||
bc0 = a00;
|
||||
bc2 = a02;
|
||||
bc3 = a03;
|
||||
bc4 = a04;
|
||||
|
||||
a02 ^= ~bc3 & bc4;
|
||||
a03 ^= ~bc4 & bc0;
|
||||
|
||||
const u32x r0 = l32_from_64 (a03);
|
||||
const u32x r1 = 0;
|
||||
const u32x r2 = l32_from_64 (a02);
|
||||
const u32x r3 = h32_from_64 (a02);
|
||||
|
||||
COMPARE_S_SIMD (r0, r1, r2, r3);
|
||||
}
|
||||
}
|
||||
|
||||
__kernel void m17700_s08 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
|
||||
{
|
||||
}
|
||||
|
||||
__kernel void m17700_s16 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
|
||||
{
|
||||
}
|
695
OpenCL/m17700_a3-optimized.cl
Normal file
695
OpenCL/m17700_a3-optimized.cl
Normal file
@ -0,0 +1,695 @@
|
||||
/**
|
||||
* Author......: See docs/credits.txt
|
||||
* License.....: MIT
|
||||
*/
|
||||
|
||||
#define NEW_SIMD_CODE
|
||||
|
||||
#include "inc_vendor.cl"
|
||||
#include "inc_hash_constants.h"
|
||||
#include "inc_hash_functions.cl"
|
||||
#include "inc_types.cl"
|
||||
#include "inc_common.cl"
|
||||
#include "inc_simd.cl"
|
||||
|
||||
__constant u64a keccakf_rndc[24] =
|
||||
{
|
||||
0x0000000000000001, 0x0000000000008082, 0x800000000000808a,
|
||||
0x8000000080008000, 0x000000000000808b, 0x0000000080000001,
|
||||
0x8000000080008081, 0x8000000000008009, 0x000000000000008a,
|
||||
0x0000000000000088, 0x0000000080008009, 0x000000008000000a,
|
||||
0x000000008000808b, 0x800000000000008b, 0x8000000000008089,
|
||||
0x8000000000008003, 0x8000000000008002, 0x8000000000000080,
|
||||
0x000000000000800a, 0x800000008000000a, 0x8000000080008081,
|
||||
0x8000000000008080, 0x0000000080000001, 0x8000000080008008
|
||||
};
|
||||
|
||||
#ifndef KECCAK_ROUNDS
|
||||
#define KECCAK_ROUNDS 24
|
||||
#endif
|
||||
|
||||
DECLSPEC void m17300m (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, __global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset)
|
||||
{
|
||||
/**
|
||||
* modifier
|
||||
*/
|
||||
|
||||
const u64 gid = get_global_id (0);
|
||||
const u64 lid = get_local_id (0);
|
||||
|
||||
/**
|
||||
* loop
|
||||
*/
|
||||
|
||||
u32 w0l = w0[0];
|
||||
|
||||
for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE)
|
||||
{
|
||||
const u32x w0r = ix_create_bft (bfs_buf, il_pos);
|
||||
|
||||
const u32x w0lr = w0l | w0r;
|
||||
|
||||
/**
|
||||
* Keccak
|
||||
*/
|
||||
|
||||
u64x a00 = hl32_to_64 (w0[1], w0lr);
|
||||
u64x a01 = hl32_to_64 (w0[3], w0[2]);
|
||||
u64x a02 = hl32_to_64 (w1[1], w1[0]);
|
||||
u64x a03 = hl32_to_64 (w1[3], w1[2]);
|
||||
u64x a04 = hl32_to_64 (w2[1], w2[0]);
|
||||
u64x a10 = hl32_to_64 (w2[3], w2[2]);
|
||||
u64x a11 = hl32_to_64 (w3[1], w3[0]);
|
||||
u64x a12 = hl32_to_64 (w3[3], w3[2]);
|
||||
u64x a13 = 0;
|
||||
u64x a14 = 0;
|
||||
u64x a20 = 0;
|
||||
u64x a21 = 0;
|
||||
u64x a22 = 0;
|
||||
u64x a23 = 0;
|
||||
u64x a24 = 0;
|
||||
u64x a30 = 0;
|
||||
u64x a31 = 0;
|
||||
u64x a32 = 0x8000000000000000;
|
||||
u64x a33 = 0;
|
||||
u64x a34 = 0;
|
||||
u64x a40 = 0;
|
||||
u64x a41 = 0;
|
||||
u64x a42 = 0;
|
||||
u64x a43 = 0;
|
||||
u64x a44 = 0;
|
||||
|
||||
#define Rho_Pi(ad,r) \
|
||||
bc0 = ad; \
|
||||
ad = rotl64 (t, r); \
|
||||
t = bc0; \
|
||||
|
||||
#ifdef _unroll
|
||||
#pragma unroll
|
||||
#endif
|
||||
for (int round = 0; round < KECCAK_ROUNDS - 1; round++)
|
||||
{
|
||||
// Theta
|
||||
|
||||
u64x bc0 = a00 ^ a10 ^ a20 ^ a30 ^ a40;
|
||||
u64x bc1 = a01 ^ a11 ^ a21 ^ a31 ^ a41;
|
||||
u64x bc2 = a02 ^ a12 ^ a22 ^ a32 ^ a42;
|
||||
u64x bc3 = a03 ^ a13 ^ a23 ^ a33 ^ a43;
|
||||
u64x bc4 = a04 ^ a14 ^ a24 ^ a34 ^ a44;
|
||||
|
||||
u64x t;
|
||||
|
||||
t = bc4 ^ rotl64 (bc1, 1); a00 ^= t; a10 ^= t; a20 ^= t; a30 ^= t; a40 ^= t;
|
||||
t = bc0 ^ rotl64 (bc2, 1); a01 ^= t; a11 ^= t; a21 ^= t; a31 ^= t; a41 ^= t;
|
||||
t = bc1 ^ rotl64 (bc3, 1); a02 ^= t; a12 ^= t; a22 ^= t; a32 ^= t; a42 ^= t;
|
||||
t = bc2 ^ rotl64 (bc4, 1); a03 ^= t; a13 ^= t; a23 ^= t; a33 ^= t; a43 ^= t;
|
||||
t = bc3 ^ rotl64 (bc0, 1); a04 ^= t; a14 ^= t; a24 ^= t; a34 ^= t; a44 ^= t;
|
||||
|
||||
// Rho Pi
|
||||
|
||||
t = a01;
|
||||
|
||||
Rho_Pi (a20, 1);
|
||||
Rho_Pi (a12, 3);
|
||||
Rho_Pi (a21, 6);
|
||||
Rho_Pi (a32, 10);
|
||||
Rho_Pi (a33, 15);
|
||||
Rho_Pi (a03, 21);
|
||||
Rho_Pi (a10, 28);
|
||||
Rho_Pi (a31, 36);
|
||||
Rho_Pi (a13, 45);
|
||||
Rho_Pi (a41, 55);
|
||||
Rho_Pi (a44, 2);
|
||||
Rho_Pi (a04, 14);
|
||||
Rho_Pi (a30, 27);
|
||||
Rho_Pi (a43, 41);
|
||||
Rho_Pi (a34, 56);
|
||||
Rho_Pi (a23, 8);
|
||||
Rho_Pi (a22, 25);
|
||||
Rho_Pi (a02, 43);
|
||||
Rho_Pi (a40, 62);
|
||||
Rho_Pi (a24, 18);
|
||||
Rho_Pi (a42, 39);
|
||||
Rho_Pi (a14, 61);
|
||||
Rho_Pi (a11, 20);
|
||||
Rho_Pi (a01, 44);
|
||||
|
||||
// Chi
|
||||
|
||||
bc0 = a00; bc1 = a01; bc2 = a02; bc3 = a03; bc4 = a04;
|
||||
a00 ^= ~bc1 & bc2; a01 ^= ~bc2 & bc3; a02 ^= ~bc3 & bc4; a03 ^= ~bc4 & bc0; a04 ^= ~bc0 & bc1;
|
||||
|
||||
bc0 = a10; bc1 = a11; bc2 = a12; bc3 = a13; bc4 = a14;
|
||||
a10 ^= ~bc1 & bc2; a11 ^= ~bc2 & bc3; a12 ^= ~bc3 & bc4; a13 ^= ~bc4 & bc0; a14 ^= ~bc0 & bc1;
|
||||
|
||||
bc0 = a20; bc1 = a21; bc2 = a22; bc3 = a23; bc4 = a24;
|
||||
a20 ^= ~bc1 & bc2; a21 ^= ~bc2 & bc3; a22 ^= ~bc3 & bc4; a23 ^= ~bc4 & bc0; a24 ^= ~bc0 & bc1;
|
||||
|
||||
bc0 = a30; bc1 = a31; bc2 = a32; bc3 = a33; bc4 = a34;
|
||||
a30 ^= ~bc1 & bc2; a31 ^= ~bc2 & bc3; a32 ^= ~bc3 & bc4; a33 ^= ~bc4 & bc0; a34 ^= ~bc0 & bc1;
|
||||
|
||||
bc0 = a40; bc1 = a41; bc2 = a42; bc3 = a43; bc4 = a44;
|
||||
a40 ^= ~bc1 & bc2; a41 ^= ~bc2 & bc3; a42 ^= ~bc3 & bc4; a43 ^= ~bc4 & bc0; a44 ^= ~bc0 & bc1;
|
||||
|
||||
// Iota
|
||||
|
||||
a00 ^= keccakf_rndc[round];
|
||||
}
|
||||
|
||||
// Theta
|
||||
|
||||
u64x bc0 = a00 ^ a10 ^ a20 ^ a30 ^ a40;
|
||||
u64x bc1 = a01 ^ a11 ^ a21 ^ a31 ^ a41;
|
||||
u64x bc2 = a02 ^ a12 ^ a22 ^ a32 ^ a42;
|
||||
u64x bc3 = a03 ^ a13 ^ a23 ^ a33 ^ a43;
|
||||
u64x bc4 = a04 ^ a14 ^ a24 ^ a34 ^ a44;
|
||||
|
||||
u64x t;
|
||||
|
||||
t = bc4 ^ rotl64 (bc1, 1); a00 ^= t; a10 ^= t; a20 ^= t; a30 ^= t;
|
||||
t = bc0 ^ rotl64 (bc2, 1); a21 ^= t; a31 ^= t; a41 ^= t;
|
||||
t = bc1 ^ rotl64 (bc3, 1); a02 ^= t; a12 ^= t; a22 ^= t; a32 ^= t;
|
||||
t = bc2 ^ rotl64 (bc4, 1); a03 ^= t; a13 ^= t; a23 ^= t; a33 ^= t; a43 ^= t;
|
||||
t = bc3 ^ rotl64 (bc0, 1); a04 ^= t; a34 ^= t; a44 ^= t;
|
||||
|
||||
// Rho Pi
|
||||
|
||||
t = a01;
|
||||
|
||||
Rho_Pi (a20, 1);
|
||||
Rho_Pi (a12, 3);
|
||||
Rho_Pi (a21, 6);
|
||||
Rho_Pi (a32, 10);
|
||||
Rho_Pi (a33, 15);
|
||||
Rho_Pi (a03, 21);
|
||||
Rho_Pi (a10, 28);
|
||||
Rho_Pi (a31, 36);
|
||||
Rho_Pi (a13, 45);
|
||||
Rho_Pi (a41, 55);
|
||||
Rho_Pi (a44, 2);
|
||||
Rho_Pi (a04, 14);
|
||||
Rho_Pi (a30, 27);
|
||||
Rho_Pi (a43, 41);
|
||||
Rho_Pi (a34, 56);
|
||||
Rho_Pi (a23, 8);
|
||||
Rho_Pi (a22, 25);
|
||||
Rho_Pi (a02, 43);
|
||||
|
||||
#undef Rho_Pi
|
||||
|
||||
bc0 = a00;
|
||||
bc2 = a02;
|
||||
bc3 = a03;
|
||||
bc4 = a04;
|
||||
|
||||
a02 ^= ~bc3 & bc4;
|
||||
a03 ^= ~bc4 & bc0;
|
||||
|
||||
const u32x r0 = l32_from_64 (a03);
|
||||
const u32x r1 = 0;
|
||||
const u32x r2 = l32_from_64 (a02);
|
||||
const u32x r3 = h32_from_64 (a02);
|
||||
|
||||
COMPARE_M_SIMD (r0, r1, r2, r3);
|
||||
}
|
||||
}
|
||||
|
||||
DECLSPEC void m17300s (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, __global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset)
|
||||
{
|
||||
/**
|
||||
* modifier
|
||||
*/
|
||||
|
||||
const u64 gid = get_global_id (0);
|
||||
const u64 lid = get_local_id (0);
|
||||
|
||||
/**
|
||||
* digest
|
||||
*/
|
||||
|
||||
const u32 search[4] =
|
||||
{
|
||||
digests_buf[digests_offset].digest_buf[DGST_R0],
|
||||
digests_buf[digests_offset].digest_buf[DGST_R1],
|
||||
digests_buf[digests_offset].digest_buf[DGST_R2],
|
||||
digests_buf[digests_offset].digest_buf[DGST_R3]
|
||||
};
|
||||
|
||||
/**
|
||||
* loop
|
||||
*/
|
||||
|
||||
u32 w0l = w0[0];
|
||||
|
||||
for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE)
|
||||
{
|
||||
const u32x w0r = ix_create_bft (bfs_buf, il_pos);
|
||||
|
||||
const u32x w0lr = w0l | w0r;
|
||||
|
||||
/**
|
||||
* Keccak
|
||||
*/
|
||||
|
||||
u64x a00 = hl32_to_64 (w0[1], w0lr);
|
||||
u64x a01 = hl32_to_64 (w0[3], w0[2]);
|
||||
u64x a02 = hl32_to_64 (w1[1], w1[0]);
|
||||
u64x a03 = hl32_to_64 (w1[3], w1[2]);
|
||||
u64x a04 = hl32_to_64 (w2[1], w2[0]);
|
||||
u64x a10 = hl32_to_64 (w2[3], w2[2]);
|
||||
u64x a11 = hl32_to_64 (w3[1], w3[0]);
|
||||
u64x a12 = hl32_to_64 (w3[3], w3[2]);
|
||||
u64x a13 = 0;
|
||||
u64x a14 = 0;
|
||||
u64x a20 = 0;
|
||||
u64x a21 = 0;
|
||||
u64x a22 = 0;
|
||||
u64x a23 = 0;
|
||||
u64x a24 = 0;
|
||||
u64x a30 = 0;
|
||||
u64x a31 = 0;
|
||||
u64x a32 = 0x8000000000000000;
|
||||
u64x a33 = 0;
|
||||
u64x a34 = 0;
|
||||
u64x a40 = 0;
|
||||
u64x a41 = 0;
|
||||
u64x a42 = 0;
|
||||
u64x a43 = 0;
|
||||
u64x a44 = 0;
|
||||
|
||||
#define Rho_Pi(ad,r) \
|
||||
bc0 = ad; \
|
||||
ad = rotl64 (t, r); \
|
||||
t = bc0; \
|
||||
|
||||
#ifdef _unroll
|
||||
#pragma unroll
|
||||
#endif
|
||||
for (int round = 0; round < KECCAK_ROUNDS - 1; round++)
|
||||
{
|
||||
// Theta
|
||||
|
||||
u64x bc0 = a00 ^ a10 ^ a20 ^ a30 ^ a40;
|
||||
u64x bc1 = a01 ^ a11 ^ a21 ^ a31 ^ a41;
|
||||
u64x bc2 = a02 ^ a12 ^ a22 ^ a32 ^ a42;
|
||||
u64x bc3 = a03 ^ a13 ^ a23 ^ a33 ^ a43;
|
||||
u64x bc4 = a04 ^ a14 ^ a24 ^ a34 ^ a44;
|
||||
|
||||
u64x t;
|
||||
|
||||
t = bc4 ^ rotl64 (bc1, 1); a00 ^= t; a10 ^= t; a20 ^= t; a30 ^= t; a40 ^= t;
|
||||
t = bc0 ^ rotl64 (bc2, 1); a01 ^= t; a11 ^= t; a21 ^= t; a31 ^= t; a41 ^= t;
|
||||
t = bc1 ^ rotl64 (bc3, 1); a02 ^= t; a12 ^= t; a22 ^= t; a32 ^= t; a42 ^= t;
|
||||
t = bc2 ^ rotl64 (bc4, 1); a03 ^= t; a13 ^= t; a23 ^= t; a33 ^= t; a43 ^= t;
|
||||
t = bc3 ^ rotl64 (bc0, 1); a04 ^= t; a14 ^= t; a24 ^= t; a34 ^= t; a44 ^= t;
|
||||
|
||||
// Rho Pi
|
||||
|
||||
t = a01;
|
||||
|
||||
Rho_Pi (a20, 1);
|
||||
Rho_Pi (a12, 3);
|
||||
Rho_Pi (a21, 6);
|
||||
Rho_Pi (a32, 10);
|
||||
Rho_Pi (a33, 15);
|
||||
Rho_Pi (a03, 21);
|
||||
Rho_Pi (a10, 28);
|
||||
Rho_Pi (a31, 36);
|
||||
Rho_Pi (a13, 45);
|
||||
Rho_Pi (a41, 55);
|
||||
Rho_Pi (a44, 2);
|
||||
Rho_Pi (a04, 14);
|
||||
Rho_Pi (a30, 27);
|
||||
Rho_Pi (a43, 41);
|
||||
Rho_Pi (a34, 56);
|
||||
Rho_Pi (a23, 8);
|
||||
Rho_Pi (a22, 25);
|
||||
Rho_Pi (a02, 43);
|
||||
Rho_Pi (a40, 62);
|
||||
Rho_Pi (a24, 18);
|
||||
Rho_Pi (a42, 39);
|
||||
Rho_Pi (a14, 61);
|
||||
Rho_Pi (a11, 20);
|
||||
Rho_Pi (a01, 44);
|
||||
|
||||
// Chi
|
||||
|
||||
bc0 = a00; bc1 = a01; bc2 = a02; bc3 = a03; bc4 = a04;
|
||||
a00 ^= ~bc1 & bc2; a01 ^= ~bc2 & bc3; a02 ^= ~bc3 & bc4; a03 ^= ~bc4 & bc0; a04 ^= ~bc0 & bc1;
|
||||
|
||||
bc0 = a10; bc1 = a11; bc2 = a12; bc3 = a13; bc4 = a14;
|
||||
a10 ^= ~bc1 & bc2; a11 ^= ~bc2 & bc3; a12 ^= ~bc3 & bc4; a13 ^= ~bc4 & bc0; a14 ^= ~bc0 & bc1;
|
||||
|
||||
bc0 = a20; bc1 = a21; bc2 = a22; bc3 = a23; bc4 = a24;
|
||||
a20 ^= ~bc1 & bc2; a21 ^= ~bc2 & bc3; a22 ^= ~bc3 & bc4; a23 ^= ~bc4 & bc0; a24 ^= ~bc0 & bc1;
|
||||
|
||||
bc0 = a30; bc1 = a31; bc2 = a32; bc3 = a33; bc4 = a34;
|
||||
a30 ^= ~bc1 & bc2; a31 ^= ~bc2 & bc3; a32 ^= ~bc3 & bc4; a33 ^= ~bc4 & bc0; a34 ^= ~bc0 & bc1;
|
||||
|
||||
bc0 = a40; bc1 = a41; bc2 = a42; bc3 = a43; bc4 = a44;
|
||||
a40 ^= ~bc1 & bc2; a41 ^= ~bc2 & bc3; a42 ^= ~bc3 & bc4; a43 ^= ~bc4 & bc0; a44 ^= ~bc0 & bc1;
|
||||
|
||||
// Iota
|
||||
|
||||
a00 ^= keccakf_rndc[round];
|
||||
}
|
||||
|
||||
// Theta
|
||||
|
||||
u64x bc0 = a00 ^ a10 ^ a20 ^ a30 ^ a40;
|
||||
u64x bc1 = a01 ^ a11 ^ a21 ^ a31 ^ a41;
|
||||
u64x bc2 = a02 ^ a12 ^ a22 ^ a32 ^ a42;
|
||||
u64x bc3 = a03 ^ a13 ^ a23 ^ a33 ^ a43;
|
||||
u64x bc4 = a04 ^ a14 ^ a24 ^ a34 ^ a44;
|
||||
|
||||
u64x t;
|
||||
|
||||
t = bc4 ^ rotl64 (bc1, 1); a00 ^= t; a10 ^= t; a20 ^= t; a30 ^= t;
|
||||
t = bc0 ^ rotl64 (bc2, 1); a21 ^= t; a31 ^= t; a41 ^= t;
|
||||
t = bc1 ^ rotl64 (bc3, 1); a02 ^= t; a12 ^= t; a22 ^= t; a32 ^= t;
|
||||
t = bc2 ^ rotl64 (bc4, 1); a03 ^= t; a13 ^= t; a23 ^= t; a33 ^= t; a43 ^= t;
|
||||
t = bc3 ^ rotl64 (bc0, 1); a04 ^= t; a34 ^= t; a44 ^= t;
|
||||
|
||||
// Rho Pi
|
||||
|
||||
t = a01;
|
||||
|
||||
Rho_Pi (a20, 1);
|
||||
Rho_Pi (a12, 3);
|
||||
Rho_Pi (a21, 6);
|
||||
Rho_Pi (a32, 10);
|
||||
Rho_Pi (a33, 15);
|
||||
Rho_Pi (a03, 21);
|
||||
Rho_Pi (a10, 28);
|
||||
Rho_Pi (a31, 36);
|
||||
Rho_Pi (a13, 45);
|
||||
Rho_Pi (a41, 55);
|
||||
Rho_Pi (a44, 2);
|
||||
Rho_Pi (a04, 14);
|
||||
Rho_Pi (a30, 27);
|
||||
Rho_Pi (a43, 41);
|
||||
Rho_Pi (a34, 56);
|
||||
Rho_Pi (a23, 8);
|
||||
Rho_Pi (a22, 25);
|
||||
Rho_Pi (a02, 43);
|
||||
|
||||
#undef Rho_Pi
|
||||
|
||||
bc0 = a00;
|
||||
bc2 = a02;
|
||||
bc3 = a03;
|
||||
bc4 = a04;
|
||||
|
||||
a02 ^= ~bc3 & bc4;
|
||||
a03 ^= ~bc4 & bc0;
|
||||
|
||||
const u32x r0 = l32_from_64 (a03);
|
||||
const u32x r1 = 0;
|
||||
const u32x r2 = l32_from_64 (a02);
|
||||
const u32x r3 = h32_from_64 (a02);
|
||||
|
||||
COMPARE_S_SIMD (r0, r1, r2, r3);
|
||||
}
|
||||
}
|
||||
|
||||
__kernel void m17700_m04 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
|
||||
{
|
||||
/**
|
||||
* base
|
||||
*/
|
||||
|
||||
const u64 gid = get_global_id (0);
|
||||
|
||||
if (gid >= gid_max) return;
|
||||
|
||||
u32 w0[4];
|
||||
|
||||
w0[0] = pws[gid].i[ 0];
|
||||
w0[1] = pws[gid].i[ 1];
|
||||
w0[2] = pws[gid].i[ 2];
|
||||
w0[3] = pws[gid].i[ 3];
|
||||
|
||||
u32 w1[4];
|
||||
|
||||
w1[0] = 0;
|
||||
w1[1] = 0;
|
||||
w1[2] = 0;
|
||||
w1[3] = 0;
|
||||
|
||||
u32 w2[4];
|
||||
|
||||
w2[0] = 0;
|
||||
w2[1] = 0;
|
||||
w2[2] = 0;
|
||||
w2[3] = 0;
|
||||
|
||||
u32 w3[4];
|
||||
|
||||
w3[0] = 0;
|
||||
w3[1] = 0;
|
||||
w3[2] = 0;
|
||||
w3[3] = 0;
|
||||
|
||||
const u32 pw_len = pws[gid].pw_len;
|
||||
|
||||
/**
|
||||
* main
|
||||
*/
|
||||
|
||||
m17300m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV0_buf, d_scryptV1_buf, d_scryptV2_buf, d_scryptV3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset);
|
||||
}
|
||||
|
||||
__kernel void m17700_m08 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
|
||||
{
|
||||
/**
|
||||
* base
|
||||
*/
|
||||
|
||||
const u64 gid = get_global_id (0);
|
||||
|
||||
if (gid >= gid_max) return;
|
||||
|
||||
u32 w0[4];
|
||||
|
||||
w0[0] = pws[gid].i[ 0];
|
||||
w0[1] = pws[gid].i[ 1];
|
||||
w0[2] = pws[gid].i[ 2];
|
||||
w0[3] = pws[gid].i[ 3];
|
||||
|
||||
u32 w1[4];
|
||||
|
||||
w1[0] = pws[gid].i[ 4];
|
||||
w1[1] = pws[gid].i[ 5];
|
||||
w1[2] = pws[gid].i[ 6];
|
||||
w1[3] = pws[gid].i[ 7];
|
||||
|
||||
u32 w2[4];
|
||||
|
||||
w2[0] = 0;
|
||||
w2[1] = 0;
|
||||
w2[2] = 0;
|
||||
w2[3] = 0;
|
||||
|
||||
u32 w3[4];
|
||||
|
||||
w3[0] = 0;
|
||||
w3[1] = 0;
|
||||
w3[2] = 0;
|
||||
w3[3] = 0;
|
||||
|
||||
const u32 pw_len = pws[gid].pw_len;
|
||||
|
||||
/**
|
||||
* main
|
||||
*/
|
||||
|
||||
m17300m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV0_buf, d_scryptV1_buf, d_scryptV2_buf, d_scryptV3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset);
|
||||
}
|
||||
|
||||
__kernel void m17700_m16 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
|
||||
{
|
||||
/**
|
||||
* base
|
||||
*/
|
||||
|
||||
const u64 gid = get_global_id (0);
|
||||
|
||||
if (gid >= gid_max) return;
|
||||
|
||||
u32 w0[4];
|
||||
|
||||
w0[0] = pws[gid].i[ 0];
|
||||
w0[1] = pws[gid].i[ 1];
|
||||
w0[2] = pws[gid].i[ 2];
|
||||
w0[3] = pws[gid].i[ 3];
|
||||
|
||||
u32 w1[4];
|
||||
|
||||
w1[0] = pws[gid].i[ 4];
|
||||
w1[1] = pws[gid].i[ 5];
|
||||
w1[2] = pws[gid].i[ 6];
|
||||
w1[3] = pws[gid].i[ 7];
|
||||
|
||||
u32 w2[4];
|
||||
|
||||
w2[0] = pws[gid].i[ 8];
|
||||
w2[1] = pws[gid].i[ 9];
|
||||
w2[2] = pws[gid].i[10];
|
||||
w2[3] = pws[gid].i[11];
|
||||
|
||||
u32 w3[4];
|
||||
|
||||
w3[0] = pws[gid].i[12];
|
||||
w3[1] = pws[gid].i[13];
|
||||
w3[2] = 0;
|
||||
w3[3] = 0;
|
||||
|
||||
const u32 pw_len = pws[gid].pw_len;
|
||||
|
||||
/**
|
||||
* main
|
||||
*/
|
||||
|
||||
m17300m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV0_buf, d_scryptV1_buf, d_scryptV2_buf, d_scryptV3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset);
|
||||
}
|
||||
|
||||
__kernel void m17700_s04 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
|
||||
{
|
||||
/**
|
||||
* base
|
||||
*/
|
||||
|
||||
const u64 gid = get_global_id (0);
|
||||
|
||||
if (gid >= gid_max) return;
|
||||
|
||||
u32 w0[4];
|
||||
|
||||
w0[0] = pws[gid].i[ 0];
|
||||
w0[1] = pws[gid].i[ 1];
|
||||
w0[2] = pws[gid].i[ 2];
|
||||
w0[3] = pws[gid].i[ 3];
|
||||
|
||||
u32 w1[4];
|
||||
|
||||
w1[0] = 0;
|
||||
w1[1] = 0;
|
||||
w1[2] = 0;
|
||||
w1[3] = 0;
|
||||
|
||||
u32 w2[4];
|
||||
|
||||
w2[0] = 0;
|
||||
w2[1] = 0;
|
||||
w2[2] = 0;
|
||||
w2[3] = 0;
|
||||
|
||||
u32 w3[4];
|
||||
|
||||
w3[0] = 0;
|
||||
w3[1] = 0;
|
||||
w3[2] = 0;
|
||||
w3[3] = 0;
|
||||
|
||||
const u32 pw_len = pws[gid].pw_len;
|
||||
|
||||
/**
|
||||
* main
|
||||
*/
|
||||
|
||||
m17300s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV0_buf, d_scryptV1_buf, d_scryptV2_buf, d_scryptV3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset);
|
||||
}
|
||||
|
||||
__kernel void m17700_s08 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
|
||||
{
|
||||
/**
|
||||
* base
|
||||
*/
|
||||
|
||||
const u64 gid = get_global_id (0);
|
||||
|
||||
if (gid >= gid_max) return;
|
||||
|
||||
u32 w0[4];
|
||||
|
||||
w0[0] = pws[gid].i[ 0];
|
||||
w0[1] = pws[gid].i[ 1];
|
||||
w0[2] = pws[gid].i[ 2];
|
||||
w0[3] = pws[gid].i[ 3];
|
||||
|
||||
u32 w1[4];
|
||||
|
||||
w1[0] = pws[gid].i[ 4];
|
||||
w1[1] = pws[gid].i[ 5];
|
||||
w1[2] = pws[gid].i[ 6];
|
||||
w1[3] = pws[gid].i[ 7];
|
||||
|
||||
u32 w2[4];
|
||||
|
||||
w2[0] = 0;
|
||||
w2[1] = 0;
|
||||
w2[2] = 0;
|
||||
w2[3] = 0;
|
||||
|
||||
u32 w3[4];
|
||||
|
||||
w3[0] = 0;
|
||||
w3[1] = 0;
|
||||
w3[2] = 0;
|
||||
w3[3] = 0;
|
||||
|
||||
const u32 pw_len = pws[gid].pw_len;
|
||||
|
||||
/**
|
||||
* main
|
||||
*/
|
||||
|
||||
m17300s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV0_buf, d_scryptV1_buf, d_scryptV2_buf, d_scryptV3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset);
|
||||
}
|
||||
|
||||
__kernel void m17700_s16 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
|
||||
{
|
||||
/**
|
||||
* base
|
||||
*/
|
||||
|
||||
const u64 gid = get_global_id (0);
|
||||
|
||||
if (gid >= gid_max) return;
|
||||
|
||||
u32 w0[4];
|
||||
|
||||
w0[0] = pws[gid].i[ 0];
|
||||
w0[1] = pws[gid].i[ 1];
|
||||
w0[2] = pws[gid].i[ 2];
|
||||
w0[3] = pws[gid].i[ 3];
|
||||
|
||||
u32 w1[4];
|
||||
|
||||
w1[0] = pws[gid].i[ 4];
|
||||
w1[1] = pws[gid].i[ 5];
|
||||
w1[2] = pws[gid].i[ 6];
|
||||
w1[3] = pws[gid].i[ 7];
|
||||
|
||||
u32 w2[4];
|
||||
|
||||
w2[0] = pws[gid].i[ 8];
|
||||
w2[1] = pws[gid].i[ 9];
|
||||
w2[2] = pws[gid].i[10];
|
||||
w2[3] = pws[gid].i[11];
|
||||
|
||||
u32 w3[4];
|
||||
|
||||
w3[0] = pws[gid].i[12];
|
||||
w3[1] = pws[gid].i[13];
|
||||
w3[2] = 0;
|
||||
w3[3] = 0;
|
||||
|
||||
const u32 pw_len = pws[gid].pw_len;
|
||||
|
||||
/**
|
||||
* main
|
||||
*/
|
||||
|
||||
m17300s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV0_buf, d_scryptV1_buf, d_scryptV2_buf, d_scryptV3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset);
|
||||
}
|
@ -30,7 +30,7 @@ __constant u64a keccakf_rndc[24] =
|
||||
#define KECCAK_ROUNDS 24
|
||||
#endif
|
||||
|
||||
__kernel void m05000_m04 (__global pw_t *pws, __constant const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
|
||||
__kernel void m17800_m04 (__global pw_t *pws, __constant const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
|
||||
{
|
||||
/**
|
||||
* modifier
|
||||
@ -240,15 +240,15 @@ __kernel void m05000_m04 (__global pw_t *pws, __constant const kernel_rule_t *ru
|
||||
}
|
||||
}
|
||||
|
||||
__kernel void m05000_m08 (__global pw_t *pws, __constant const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
|
||||
__kernel void m17800_m08 (__global pw_t *pws, __constant const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
|
||||
{
|
||||
}
|
||||
|
||||
__kernel void m05000_m16 (__global pw_t *pws, __constant const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
|
||||
__kernel void m17800_m16 (__global pw_t *pws, __constant const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
|
||||
{
|
||||
}
|
||||
|
||||
__kernel void m05000_s04 (__global pw_t *pws, __constant const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
|
||||
__kernel void m17800_s04 (__global pw_t *pws, __constant const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
|
||||
{
|
||||
/**
|
||||
* modifier
|
||||
@ -470,10 +470,10 @@ __kernel void m05000_s04 (__global pw_t *pws, __constant const kernel_rule_t *ru
|
||||
}
|
||||
}
|
||||
|
||||
__kernel void m05000_s08 (__global pw_t *pws, __constant const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
|
||||
__kernel void m17800_s08 (__global pw_t *pws, __constant const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
|
||||
{
|
||||
}
|
||||
|
||||
__kernel void m05000_s16 (__global pw_t *pws, __constant const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
|
||||
__kernel void m17800_s16 (__global pw_t *pws, __constant const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
|
||||
{
|
||||
}
|
594
OpenCL/m17800_a1-optimized.cl
Normal file
594
OpenCL/m17800_a1-optimized.cl
Normal file
@ -0,0 +1,594 @@
|
||||
/**
|
||||
* Author......: See docs/credits.txt
|
||||
* License.....: MIT
|
||||
*/
|
||||
|
||||
#define NEW_SIMD_CODE
|
||||
|
||||
#include "inc_vendor.cl"
|
||||
#include "inc_hash_constants.h"
|
||||
#include "inc_hash_functions.cl"
|
||||
#include "inc_types.cl"
|
||||
#include "inc_common.cl"
|
||||
#include "inc_simd.cl"
|
||||
|
||||
__constant u64a keccakf_rndc[24] =
|
||||
{
|
||||
0x0000000000000001, 0x0000000000008082, 0x800000000000808a,
|
||||
0x8000000080008000, 0x000000000000808b, 0x0000000080000001,
|
||||
0x8000000080008081, 0x8000000000008009, 0x000000000000008a,
|
||||
0x0000000000000088, 0x0000000080008009, 0x000000008000000a,
|
||||
0x000000008000808b, 0x800000000000008b, 0x8000000000008089,
|
||||
0x8000000000008003, 0x8000000000008002, 0x8000000000000080,
|
||||
0x000000000000800a, 0x800000008000000a, 0x8000000080008081,
|
||||
0x8000000000008080, 0x0000000080000001, 0x8000000080008008
|
||||
};
|
||||
|
||||
#ifndef KECCAK_ROUNDS
|
||||
#define KECCAK_ROUNDS 24
|
||||
#endif
|
||||
|
||||
__kernel void m17800_m04 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
|
||||
{
|
||||
/**
|
||||
* modifier
|
||||
*/
|
||||
|
||||
const u64 lid = get_local_id (0);
|
||||
|
||||
/**
|
||||
* base
|
||||
*/
|
||||
|
||||
const u64 gid = get_global_id (0);
|
||||
|
||||
if (gid >= gid_max) return;
|
||||
|
||||
u32 pw_buf0[4];
|
||||
u32 pw_buf1[4];
|
||||
|
||||
pw_buf0[0] = pws[gid].i[0];
|
||||
pw_buf0[1] = pws[gid].i[1];
|
||||
pw_buf0[2] = pws[gid].i[2];
|
||||
pw_buf0[3] = pws[gid].i[3];
|
||||
pw_buf1[0] = pws[gid].i[4];
|
||||
pw_buf1[1] = pws[gid].i[5];
|
||||
pw_buf1[2] = pws[gid].i[6];
|
||||
pw_buf1[3] = pws[gid].i[7];
|
||||
|
||||
const u32 pw_l_len = pws[gid].pw_len;
|
||||
|
||||
/**
|
||||
* loop
|
||||
*/
|
||||
|
||||
for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE)
|
||||
{
|
||||
const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos);
|
||||
|
||||
const u32x pw_len = pw_l_len + pw_r_len;
|
||||
|
||||
/**
|
||||
* concat password candidate
|
||||
*/
|
||||
|
||||
u32x wordl0[4] = { 0 };
|
||||
u32x wordl1[4] = { 0 };
|
||||
u32x wordl2[4] = { 0 };
|
||||
u32x wordl3[4] = { 0 };
|
||||
|
||||
wordl0[0] = pw_buf0[0];
|
||||
wordl0[1] = pw_buf0[1];
|
||||
wordl0[2] = pw_buf0[2];
|
||||
wordl0[3] = pw_buf0[3];
|
||||
wordl1[0] = pw_buf1[0];
|
||||
wordl1[1] = pw_buf1[1];
|
||||
wordl1[2] = pw_buf1[2];
|
||||
wordl1[3] = pw_buf1[3];
|
||||
|
||||
u32x wordr0[4] = { 0 };
|
||||
u32x wordr1[4] = { 0 };
|
||||
u32x wordr2[4] = { 0 };
|
||||
u32x wordr3[4] = { 0 };
|
||||
|
||||
wordr0[0] = ix_create_combt (combs_buf, il_pos, 0);
|
||||
wordr0[1] = ix_create_combt (combs_buf, il_pos, 1);
|
||||
wordr0[2] = ix_create_combt (combs_buf, il_pos, 2);
|
||||
wordr0[3] = ix_create_combt (combs_buf, il_pos, 3);
|
||||
wordr1[0] = ix_create_combt (combs_buf, il_pos, 4);
|
||||
wordr1[1] = ix_create_combt (combs_buf, il_pos, 5);
|
||||
wordr1[2] = ix_create_combt (combs_buf, il_pos, 6);
|
||||
wordr1[3] = ix_create_combt (combs_buf, il_pos, 7);
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
|
||||
{
|
||||
switch_buffer_by_offset_le_VV (wordr0, wordr1, wordr2, wordr3, pw_l_len);
|
||||
}
|
||||
else
|
||||
{
|
||||
switch_buffer_by_offset_le_VV (wordl0, wordl1, wordl2, wordl3, pw_r_len);
|
||||
}
|
||||
|
||||
u32x w0[4];
|
||||
u32x w1[4];
|
||||
u32x w2[4];
|
||||
u32x w3[4];
|
||||
|
||||
w0[0] = wordl0[0] | wordr0[0];
|
||||
w0[1] = wordl0[1] | wordr0[1];
|
||||
w0[2] = wordl0[2] | wordr0[2];
|
||||
w0[3] = wordl0[3] | wordr0[3];
|
||||
w1[0] = wordl1[0] | wordr1[0];
|
||||
w1[1] = wordl1[1] | wordr1[1];
|
||||
w1[2] = wordl1[2] | wordr1[2];
|
||||
w1[3] = wordl1[3] | wordr1[3];
|
||||
w2[0] = wordl2[0] | wordr2[0];
|
||||
w2[1] = wordl2[1] | wordr2[1];
|
||||
w2[2] = wordl2[2] | wordr2[2];
|
||||
w2[3] = wordl2[3] | wordr2[3];
|
||||
w3[0] = wordl3[0] | wordr3[0];
|
||||
w3[1] = wordl3[1] | wordr3[1];
|
||||
w3[2] = wordl3[2] | wordr3[2];
|
||||
w3[3] = wordl3[3] | wordr3[3];
|
||||
|
||||
/**
|
||||
* Keccak
|
||||
*/
|
||||
|
||||
u64x a00 = hl32_to_64 (w0[1], w0[0]);
|
||||
u64x a01 = hl32_to_64 (w0[3], w0[2]);
|
||||
u64x a02 = hl32_to_64 (w1[1], w1[0]);
|
||||
u64x a03 = hl32_to_64 (w1[3], w1[2]);
|
||||
u64x a04 = hl32_to_64 (w2[1], w2[0]);
|
||||
u64x a10 = hl32_to_64 (w2[3], w2[2]);
|
||||
u64x a11 = hl32_to_64 (w3[1], w3[0]);
|
||||
u64x a12 = hl32_to_64 (w3[3], w3[2]);
|
||||
u64x a13 = 0;
|
||||
u64x a14 = 0;
|
||||
u64x a20 = 0;
|
||||
u64x a21 = 0;
|
||||
u64x a22 = 0;
|
||||
u64x a23 = 0;
|
||||
u64x a24 = 0;
|
||||
u64x a30 = 0;
|
||||
u64x a31 = 0x8000000000000000;
|
||||
u64x a32 = 0;
|
||||
u64x a33 = 0;
|
||||
u64x a34 = 0;
|
||||
u64x a40 = 0;
|
||||
u64x a41 = 0;
|
||||
u64x a42 = 0;
|
||||
u64x a43 = 0;
|
||||
u64x a44 = 0;
|
||||
|
||||
#define Rho_Pi(ad,r) \
|
||||
bc0 = ad; \
|
||||
ad = rotl64 (t, r); \
|
||||
t = bc0; \
|
||||
|
||||
#ifdef _unroll
|
||||
#pragma unroll
|
||||
#endif
|
||||
for (int round = 0; round < KECCAK_ROUNDS - 1; round++)
|
||||
{
|
||||
// Theta
|
||||
|
||||
u64x bc0 = a00 ^ a10 ^ a20 ^ a30 ^ a40;
|
||||
u64x bc1 = a01 ^ a11 ^ a21 ^ a31 ^ a41;
|
||||
u64x bc2 = a02 ^ a12 ^ a22 ^ a32 ^ a42;
|
||||
u64x bc3 = a03 ^ a13 ^ a23 ^ a33 ^ a43;
|
||||
u64x bc4 = a04 ^ a14 ^ a24 ^ a34 ^ a44;
|
||||
|
||||
u64x t;
|
||||
|
||||
t = bc4 ^ rotl64 (bc1, 1); a00 ^= t; a10 ^= t; a20 ^= t; a30 ^= t; a40 ^= t;
|
||||
t = bc0 ^ rotl64 (bc2, 1); a01 ^= t; a11 ^= t; a21 ^= t; a31 ^= t; a41 ^= t;
|
||||
t = bc1 ^ rotl64 (bc3, 1); a02 ^= t; a12 ^= t; a22 ^= t; a32 ^= t; a42 ^= t;
|
||||
t = bc2 ^ rotl64 (bc4, 1); a03 ^= t; a13 ^= t; a23 ^= t; a33 ^= t; a43 ^= t;
|
||||
t = bc3 ^ rotl64 (bc0, 1); a04 ^= t; a14 ^= t; a24 ^= t; a34 ^= t; a44 ^= t;
|
||||
|
||||
// Rho Pi
|
||||
|
||||
t = a01;
|
||||
|
||||
Rho_Pi (a20, 1);
|
||||
Rho_Pi (a12, 3);
|
||||
Rho_Pi (a21, 6);
|
||||
Rho_Pi (a32, 10);
|
||||
Rho_Pi (a33, 15);
|
||||
Rho_Pi (a03, 21);
|
||||
Rho_Pi (a10, 28);
|
||||
Rho_Pi (a31, 36);
|
||||
Rho_Pi (a13, 45);
|
||||
Rho_Pi (a41, 55);
|
||||
Rho_Pi (a44, 2);
|
||||
Rho_Pi (a04, 14);
|
||||
Rho_Pi (a30, 27);
|
||||
Rho_Pi (a43, 41);
|
||||
Rho_Pi (a34, 56);
|
||||
Rho_Pi (a23, 8);
|
||||
Rho_Pi (a22, 25);
|
||||
Rho_Pi (a02, 43);
|
||||
Rho_Pi (a40, 62);
|
||||
Rho_Pi (a24, 18);
|
||||
Rho_Pi (a42, 39);
|
||||
Rho_Pi (a14, 61);
|
||||
Rho_Pi (a11, 20);
|
||||
Rho_Pi (a01, 44);
|
||||
|
||||
// Chi
|
||||
|
||||
bc0 = a00; bc1 = a01; bc2 = a02; bc3 = a03; bc4 = a04;
|
||||
a00 ^= ~bc1 & bc2; a01 ^= ~bc2 & bc3; a02 ^= ~bc3 & bc4; a03 ^= ~bc4 & bc0; a04 ^= ~bc0 & bc1;
|
||||
|
||||
bc0 = a10; bc1 = a11; bc2 = a12; bc3 = a13; bc4 = a14;
|
||||
a10 ^= ~bc1 & bc2; a11 ^= ~bc2 & bc3; a12 ^= ~bc3 & bc4; a13 ^= ~bc4 & bc0; a14 ^= ~bc0 & bc1;
|
||||
|
||||
bc0 = a20; bc1 = a21; bc2 = a22; bc3 = a23; bc4 = a24;
|
||||
a20 ^= ~bc1 & bc2; a21 ^= ~bc2 & bc3; a22 ^= ~bc3 & bc4; a23 ^= ~bc4 & bc0; a24 ^= ~bc0 & bc1;
|
||||
|
||||
bc0 = a30; bc1 = a31; bc2 = a32; bc3 = a33; bc4 = a34;
|
||||
a30 ^= ~bc1 & bc2; a31 ^= ~bc2 & bc3; a32 ^= ~bc3 & bc4; a33 ^= ~bc4 & bc0; a34 ^= ~bc0 & bc1;
|
||||
|
||||
bc0 = a40; bc1 = a41; bc2 = a42; bc3 = a43; bc4 = a44;
|
||||
a40 ^= ~bc1 & bc2; a41 ^= ~bc2 & bc3; a42 ^= ~bc3 & bc4; a43 ^= ~bc4 & bc0; a44 ^= ~bc0 & bc1;
|
||||
|
||||
// Iota
|
||||
|
||||
a00 ^= keccakf_rndc[round];
|
||||
}
|
||||
|
||||
// Theta
|
||||
|
||||
u64x bc0 = a00 ^ a10 ^ a20 ^ a30 ^ a40;
|
||||
u64x bc1 = a01 ^ a11 ^ a21 ^ a31 ^ a41;
|
||||
u64x bc2 = a02 ^ a12 ^ a22 ^ a32 ^ a42;
|
||||
u64x bc3 = a03 ^ a13 ^ a23 ^ a33 ^ a43;
|
||||
u64x bc4 = a04 ^ a14 ^ a24 ^ a34 ^ a44;
|
||||
|
||||
u64x t;
|
||||
|
||||
t = bc4 ^ rotl64 (bc1, 1); a00 ^= t; a10 ^= t; a20 ^= t; a30 ^= t;
|
||||
t = bc0 ^ rotl64 (bc2, 1); a21 ^= t; a31 ^= t; a41 ^= t;
|
||||
t = bc1 ^ rotl64 (bc3, 1); a02 ^= t; a12 ^= t; a22 ^= t; a32 ^= t;
|
||||
t = bc2 ^ rotl64 (bc4, 1); a03 ^= t; a13 ^= t; a23 ^= t; a33 ^= t; a43 ^= t;
|
||||
t = bc3 ^ rotl64 (bc0, 1); a04 ^= t; a34 ^= t; a44 ^= t;
|
||||
|
||||
// Rho Pi
|
||||
|
||||
t = a01;
|
||||
|
||||
Rho_Pi (a20, 1);
|
||||
Rho_Pi (a12, 3);
|
||||
Rho_Pi (a21, 6);
|
||||
Rho_Pi (a32, 10);
|
||||
Rho_Pi (a33, 15);
|
||||
Rho_Pi (a03, 21);
|
||||
Rho_Pi (a10, 28);
|
||||
Rho_Pi (a31, 36);
|
||||
Rho_Pi (a13, 45);
|
||||
Rho_Pi (a41, 55);
|
||||
Rho_Pi (a44, 2);
|
||||
Rho_Pi (a04, 14);
|
||||
Rho_Pi (a30, 27);
|
||||
Rho_Pi (a43, 41);
|
||||
Rho_Pi (a34, 56);
|
||||
Rho_Pi (a23, 8);
|
||||
Rho_Pi (a22, 25);
|
||||
Rho_Pi (a02, 43);
|
||||
|
||||
#undef Rho_Pi
|
||||
|
||||
bc0 = a00;
|
||||
bc2 = a02;
|
||||
bc3 = a03;
|
||||
bc4 = a04;
|
||||
|
||||
a02 ^= ~bc3 & bc4;
|
||||
a03 ^= ~bc4 & bc0;
|
||||
|
||||
const u32x r0 = l32_from_64 (a03);
|
||||
const u32x r1 = h32_from_64 (a03);
|
||||
const u32x r2 = l32_from_64 (a02);
|
||||
const u32x r3 = h32_from_64 (a02);
|
||||
|
||||
COMPARE_M_SIMD (r0, r1, r2, r3);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
__kernel void m17800_m08 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
|
||||
{
|
||||
}
|
||||
|
||||
__kernel void m17800_m16 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
|
||||
{
|
||||
}
|
||||
|
||||
__kernel void m17800_s04 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
|
||||
{
|
||||
/**
|
||||
* modifier
|
||||
*/
|
||||
|
||||
const u64 lid = get_local_id (0);
|
||||
|
||||
/**
|
||||
* base
|
||||
*/
|
||||
|
||||
const u64 gid = get_global_id (0);
|
||||
|
||||
if (gid >= gid_max) return;
|
||||
|
||||
u32 pw_buf0[4];
|
||||
u32 pw_buf1[4];
|
||||
|
||||
pw_buf0[0] = pws[gid].i[0];
|
||||
pw_buf0[1] = pws[gid].i[1];
|
||||
pw_buf0[2] = pws[gid].i[2];
|
||||
pw_buf0[3] = pws[gid].i[3];
|
||||
pw_buf1[0] = pws[gid].i[4];
|
||||
pw_buf1[1] = pws[gid].i[5];
|
||||
pw_buf1[2] = pws[gid].i[6];
|
||||
pw_buf1[3] = pws[gid].i[7];
|
||||
|
||||
const u32 pw_l_len = pws[gid].pw_len;
|
||||
|
||||
/**
|
||||
* digest
|
||||
*/
|
||||
|
||||
const u32 search[4] =
|
||||
{
|
||||
digests_buf[digests_offset].digest_buf[DGST_R0],
|
||||
digests_buf[digests_offset].digest_buf[DGST_R1],
|
||||
digests_buf[digests_offset].digest_buf[DGST_R2],
|
||||
digests_buf[digests_offset].digest_buf[DGST_R3]
|
||||
};
|
||||
|
||||
/**
|
||||
* loop
|
||||
*/
|
||||
|
||||
for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE)
|
||||
{
|
||||
const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos);
|
||||
|
||||
const u32x pw_len = pw_l_len + pw_r_len;
|
||||
|
||||
/**
|
||||
* concat password candidate
|
||||
*/
|
||||
|
||||
u32x wordl0[4] = { 0 };
|
||||
u32x wordl1[4] = { 0 };
|
||||
u32x wordl2[4] = { 0 };
|
||||
u32x wordl3[4] = { 0 };
|
||||
|
||||
wordl0[0] = pw_buf0[0];
|
||||
wordl0[1] = pw_buf0[1];
|
||||
wordl0[2] = pw_buf0[2];
|
||||
wordl0[3] = pw_buf0[3];
|
||||
wordl1[0] = pw_buf1[0];
|
||||
wordl1[1] = pw_buf1[1];
|
||||
wordl1[2] = pw_buf1[2];
|
||||
wordl1[3] = pw_buf1[3];
|
||||
|
||||
u32x wordr0[4] = { 0 };
|
||||
u32x wordr1[4] = { 0 };
|
||||
u32x wordr2[4] = { 0 };
|
||||
u32x wordr3[4] = { 0 };
|
||||
|
||||
wordr0[0] = ix_create_combt (combs_buf, il_pos, 0);
|
||||
wordr0[1] = ix_create_combt (combs_buf, il_pos, 1);
|
||||
wordr0[2] = ix_create_combt (combs_buf, il_pos, 2);
|
||||
wordr0[3] = ix_create_combt (combs_buf, il_pos, 3);
|
||||
wordr1[0] = ix_create_combt (combs_buf, il_pos, 4);
|
||||
wordr1[1] = ix_create_combt (combs_buf, il_pos, 5);
|
||||
wordr1[2] = ix_create_combt (combs_buf, il_pos, 6);
|
||||
wordr1[3] = ix_create_combt (combs_buf, il_pos, 7);
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
|
||||
{
|
||||
switch_buffer_by_offset_le_VV (wordr0, wordr1, wordr2, wordr3, pw_l_len);
|
||||
}
|
||||
else
|
||||
{
|
||||
switch_buffer_by_offset_le_VV (wordl0, wordl1, wordl2, wordl3, pw_r_len);
|
||||
}
|
||||
|
||||
u32x w0[4];
|
||||
u32x w1[4];
|
||||
u32x w2[4];
|
||||
u32x w3[4];
|
||||
|
||||
w0[0] = wordl0[0] | wordr0[0];
|
||||
w0[1] = wordl0[1] | wordr0[1];
|
||||
w0[2] = wordl0[2] | wordr0[2];
|
||||
w0[3] = wordl0[3] | wordr0[3];
|
||||
w1[0] = wordl1[0] | wordr1[0];
|
||||
w1[1] = wordl1[1] | wordr1[1];
|
||||
w1[2] = wordl1[2] | wordr1[2];
|
||||
w1[3] = wordl1[3] | wordr1[3];
|
||||
w2[0] = wordl2[0] | wordr2[0];
|
||||
w2[1] = wordl2[1] | wordr2[1];
|
||||
w2[2] = wordl2[2] | wordr2[2];
|
||||
w2[3] = wordl2[3] | wordr2[3];
|
||||
w3[0] = wordl3[0] | wordr3[0];
|
||||
w3[1] = wordl3[1] | wordr3[1];
|
||||
w3[2] = wordl3[2] | wordr3[2];
|
||||
w3[3] = wordl3[3] | wordr3[3];
|
||||
|
||||
/**
|
||||
* Keccak
|
||||
*/
|
||||
|
||||
u64x a00 = hl32_to_64 (w0[1], w0[0]);
|
||||
u64x a01 = hl32_to_64 (w0[3], w0[2]);
|
||||
u64x a02 = hl32_to_64 (w1[1], w1[0]);
|
||||
u64x a03 = hl32_to_64 (w1[3], w1[2]);
|
||||
u64x a04 = hl32_to_64 (w2[1], w2[0]);
|
||||
u64x a10 = hl32_to_64 (w2[3], w2[2]);
|
||||
u64x a11 = hl32_to_64 (w3[1], w3[0]);
|
||||
u64x a12 = hl32_to_64 (w3[3], w3[2]);
|
||||
u64x a13 = 0;
|
||||
u64x a14 = 0;
|
||||
u64x a20 = 0;
|
||||
u64x a21 = 0;
|
||||
u64x a22 = 0;
|
||||
u64x a23 = 0;
|
||||
u64x a24 = 0;
|
||||
u64x a30 = 0;
|
||||
u64x a31 = 0x8000000000000000;
|
||||
u64x a32 = 0;
|
||||
u64x a33 = 0;
|
||||
u64x a34 = 0;
|
||||
u64x a40 = 0;
|
||||
u64x a41 = 0;
|
||||
u64x a42 = 0;
|
||||
u64x a43 = 0;
|
||||
u64x a44 = 0;
|
||||
|
||||
#define Rho_Pi(ad,r) \
|
||||
bc0 = ad; \
|
||||
ad = rotl64 (t, r); \
|
||||
t = bc0; \
|
||||
|
||||
#ifdef _unroll
|
||||
#pragma unroll
|
||||
#endif
|
||||
for (int round = 0; round < KECCAK_ROUNDS - 1; round++)
|
||||
{
|
||||
// Theta
|
||||
|
||||
u64x bc0 = a00 ^ a10 ^ a20 ^ a30 ^ a40;
|
||||
u64x bc1 = a01 ^ a11 ^ a21 ^ a31 ^ a41;
|
||||
u64x bc2 = a02 ^ a12 ^ a22 ^ a32 ^ a42;
|
||||
u64x bc3 = a03 ^ a13 ^ a23 ^ a33 ^ a43;
|
||||
u64x bc4 = a04 ^ a14 ^ a24 ^ a34 ^ a44;
|
||||
|
||||
u64x t;
|
||||
|
||||
t = bc4 ^ rotl64 (bc1, 1); a00 ^= t; a10 ^= t; a20 ^= t; a30 ^= t; a40 ^= t;
|
||||
t = bc0 ^ rotl64 (bc2, 1); a01 ^= t; a11 ^= t; a21 ^= t; a31 ^= t; a41 ^= t;
|
||||
t = bc1 ^ rotl64 (bc3, 1); a02 ^= t; a12 ^= t; a22 ^= t; a32 ^= t; a42 ^= t;
|
||||
t = bc2 ^ rotl64 (bc4, 1); a03 ^= t; a13 ^= t; a23 ^= t; a33 ^= t; a43 ^= t;
|
||||
t = bc3 ^ rotl64 (bc0, 1); a04 ^= t; a14 ^= t; a24 ^= t; a34 ^= t; a44 ^= t;
|
||||
|
||||
// Rho Pi
|
||||
|
||||
t = a01;
|
||||
|
||||
Rho_Pi (a20, 1);
|
||||
Rho_Pi (a12, 3);
|
||||
Rho_Pi (a21, 6);
|
||||
Rho_Pi (a32, 10);
|
||||
Rho_Pi (a33, 15);
|
||||
Rho_Pi (a03, 21);
|
||||
Rho_Pi (a10, 28);
|
||||
Rho_Pi (a31, 36);
|
||||
Rho_Pi (a13, 45);
|
||||
Rho_Pi (a41, 55);
|
||||
Rho_Pi (a44, 2);
|
||||
Rho_Pi (a04, 14);
|
||||
Rho_Pi (a30, 27);
|
||||
Rho_Pi (a43, 41);
|
||||
Rho_Pi (a34, 56);
|
||||
Rho_Pi (a23, 8);
|
||||
Rho_Pi (a22, 25);
|
||||
Rho_Pi (a02, 43);
|
||||
Rho_Pi (a40, 62);
|
||||
Rho_Pi (a24, 18);
|
||||
Rho_Pi (a42, 39);
|
||||
Rho_Pi (a14, 61);
|
||||
Rho_Pi (a11, 20);
|
||||
Rho_Pi (a01, 44);
|
||||
|
||||
// Chi
|
||||
|
||||
bc0 = a00; bc1 = a01; bc2 = a02; bc3 = a03; bc4 = a04;
|
||||
a00 ^= ~bc1 & bc2; a01 ^= ~bc2 & bc3; a02 ^= ~bc3 & bc4; a03 ^= ~bc4 & bc0; a04 ^= ~bc0 & bc1;
|
||||
|
||||
bc0 = a10; bc1 = a11; bc2 = a12; bc3 = a13; bc4 = a14;
|
||||
a10 ^= ~bc1 & bc2; a11 ^= ~bc2 & bc3; a12 ^= ~bc3 & bc4; a13 ^= ~bc4 & bc0; a14 ^= ~bc0 & bc1;
|
||||
|
||||
bc0 = a20; bc1 = a21; bc2 = a22; bc3 = a23; bc4 = a24;
|
||||
a20 ^= ~bc1 & bc2; a21 ^= ~bc2 & bc3; a22 ^= ~bc3 & bc4; a23 ^= ~bc4 & bc0; a24 ^= ~bc0 & bc1;
|
||||
|
||||
bc0 = a30; bc1 = a31; bc2 = a32; bc3 = a33; bc4 = a34;
|
||||
a30 ^= ~bc1 & bc2; a31 ^= ~bc2 & bc3; a32 ^= ~bc3 & bc4; a33 ^= ~bc4 & bc0; a34 ^= ~bc0 & bc1;
|
||||
|
||||
bc0 = a40; bc1 = a41; bc2 = a42; bc3 = a43; bc4 = a44;
|
||||
a40 ^= ~bc1 & bc2; a41 ^= ~bc2 & bc3; a42 ^= ~bc3 & bc4; a43 ^= ~bc4 & bc0; a44 ^= ~bc0 & bc1;
|
||||
|
||||
// Iota
|
||||
|
||||
a00 ^= keccakf_rndc[round];
|
||||
}
|
||||
|
||||
// Theta
|
||||
|
||||
u64x bc0 = a00 ^ a10 ^ a20 ^ a30 ^ a40;
|
||||
u64x bc1 = a01 ^ a11 ^ a21 ^ a31 ^ a41;
|
||||
u64x bc2 = a02 ^ a12 ^ a22 ^ a32 ^ a42;
|
||||
u64x bc3 = a03 ^ a13 ^ a23 ^ a33 ^ a43;
|
||||
u64x bc4 = a04 ^ a14 ^ a24 ^ a34 ^ a44;
|
||||
|
||||
u64x t;
|
||||
|
||||
t = bc4 ^ rotl64 (bc1, 1); a00 ^= t; a10 ^= t; a20 ^= t; a30 ^= t;
|
||||
t = bc0 ^ rotl64 (bc2, 1); a21 ^= t; a31 ^= t; a41 ^= t;
|
||||
t = bc1 ^ rotl64 (bc3, 1); a02 ^= t; a12 ^= t; a22 ^= t; a32 ^= t;
|
||||
t = bc2 ^ rotl64 (bc4, 1); a03 ^= t; a13 ^= t; a23 ^= t; a33 ^= t; a43 ^= t;
|
||||
t = bc3 ^ rotl64 (bc0, 1); a04 ^= t; a34 ^= t; a44 ^= t;
|
||||
|
||||
// Rho Pi
|
||||
|
||||
t = a01;
|
||||
|
||||
Rho_Pi (a20, 1);
|
||||
Rho_Pi (a12, 3);
|
||||
Rho_Pi (a21, 6);
|
||||
Rho_Pi (a32, 10);
|
||||
Rho_Pi (a33, 15);
|
||||
Rho_Pi (a03, 21);
|
||||
Rho_Pi (a10, 28);
|
||||
Rho_Pi (a31, 36);
|
||||
Rho_Pi (a13, 45);
|
||||
Rho_Pi (a41, 55);
|
||||
Rho_Pi (a44, 2);
|
||||
Rho_Pi (a04, 14);
|
||||
Rho_Pi (a30, 27);
|
||||
Rho_Pi (a43, 41);
|
||||
Rho_Pi (a34, 56);
|
||||
Rho_Pi (a23, 8);
|
||||
Rho_Pi (a22, 25);
|
||||
Rho_Pi (a02, 43);
|
||||
|
||||
#undef Rho_Pi
|
||||
|
||||
bc0 = a00;
|
||||
bc2 = a02;
|
||||
bc3 = a03;
|
||||
bc4 = a04;
|
||||
|
||||
a02 ^= ~bc3 & bc4;
|
||||
a03 ^= ~bc4 & bc0;
|
||||
|
||||
const u32x r0 = l32_from_64 (a03);
|
||||
const u32x r1 = h32_from_64 (a03);
|
||||
const u32x r2 = l32_from_64 (a02);
|
||||
const u32x r3 = h32_from_64 (a02);
|
||||
|
||||
COMPARE_S_SIMD (r0, r1, r2, r3);
|
||||
}
|
||||
}
|
||||
|
||||
__kernel void m17800_s08 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
|
||||
{
|
||||
}
|
||||
|
||||
__kernel void m17800_s16 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
|
||||
{
|
||||
}
|
@ -28,7 +28,7 @@ __constant u64a keccakf_rndc[24] =
|
||||
#define KECCAK_ROUNDS 24
|
||||
#endif
|
||||
|
||||
DECLSPEC void m05000m (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, __global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset)
|
||||
DECLSPEC void m17400m (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, __global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset)
|
||||
{
|
||||
/**
|
||||
* modifier
|
||||
@ -214,7 +214,7 @@ DECLSPEC void m05000m (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, __g
|
||||
}
|
||||
}
|
||||
|
||||
DECLSPEC void m05000s (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, __global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset)
|
||||
DECLSPEC void m17400s (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, __global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset)
|
||||
{
|
||||
/**
|
||||
* modifier
|
||||
@ -412,7 +412,7 @@ DECLSPEC void m05000s (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, __g
|
||||
}
|
||||
}
|
||||
|
||||
__kernel void m05000_m04 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
|
||||
__kernel void m17800_m04 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
|
||||
{
|
||||
/**
|
||||
* base
|
||||
@ -456,10 +456,10 @@ __kernel void m05000_m04 (__global pw_t *pws, __global const kernel_rule_t *rule
|
||||
* main
|
||||
*/
|
||||
|
||||
m05000m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV0_buf, d_scryptV1_buf, d_scryptV2_buf, d_scryptV3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset);
|
||||
m17400m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV0_buf, d_scryptV1_buf, d_scryptV2_buf, d_scryptV3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset);
|
||||
}
|
||||
|
||||
__kernel void m05000_m08 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
|
||||
__kernel void m17800_m08 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
|
||||
{
|
||||
/**
|
||||
* base
|
||||
@ -503,10 +503,10 @@ __kernel void m05000_m08 (__global pw_t *pws, __global const kernel_rule_t *rule
|
||||
* main
|
||||
*/
|
||||
|
||||
m05000m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV0_buf, d_scryptV1_buf, d_scryptV2_buf, d_scryptV3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset);
|
||||
m17400m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV0_buf, d_scryptV1_buf, d_scryptV2_buf, d_scryptV3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset);
|
||||
}
|
||||
|
||||
__kernel void m05000_m16 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
|
||||
__kernel void m17800_m16 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
|
||||
{
|
||||
/**
|
||||
* base
|
||||
@ -550,10 +550,10 @@ __kernel void m05000_m16 (__global pw_t *pws, __global const kernel_rule_t *rule
|
||||
* main
|
||||
*/
|
||||
|
||||
m05000m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV0_buf, d_scryptV1_buf, d_scryptV2_buf, d_scryptV3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset);
|
||||
m17400m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV0_buf, d_scryptV1_buf, d_scryptV2_buf, d_scryptV3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset);
|
||||
}
|
||||
|
||||
__kernel void m05000_s04 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
|
||||
__kernel void m17800_s04 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
|
||||
{
|
||||
/**
|
||||
* base
|
||||
@ -597,10 +597,10 @@ __kernel void m05000_s04 (__global pw_t *pws, __global const kernel_rule_t *rule
|
||||
* main
|
||||
*/
|
||||
|
||||
m05000s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV0_buf, d_scryptV1_buf, d_scryptV2_buf, d_scryptV3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset);
|
||||
m17400s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV0_buf, d_scryptV1_buf, d_scryptV2_buf, d_scryptV3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset);
|
||||
}
|
||||
|
||||
__kernel void m05000_s08 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
|
||||
__kernel void m17800_s08 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
|
||||
{
|
||||
/**
|
||||
* base
|
||||
@ -644,10 +644,10 @@ __kernel void m05000_s08 (__global pw_t *pws, __global const kernel_rule_t *rule
|
||||
* main
|
||||
*/
|
||||
|
||||
m05000s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV0_buf, d_scryptV1_buf, d_scryptV2_buf, d_scryptV3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset);
|
||||
m17400s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV0_buf, d_scryptV1_buf, d_scryptV2_buf, d_scryptV3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset);
|
||||
}
|
||||
|
||||
__kernel void m05000_s16 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
|
||||
__kernel void m17800_s16 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
|
||||
{
|
||||
/**
|
||||
* base
|
||||
@ -691,5 +691,5 @@ __kernel void m05000_s16 (__global pw_t *pws, __global const kernel_rule_t *rule
|
||||
* main
|
||||
*/
|
||||
|
||||
m05000s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV0_buf, d_scryptV1_buf, d_scryptV2_buf, d_scryptV3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset);
|
||||
m17400s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV0_buf, d_scryptV1_buf, d_scryptV2_buf, d_scryptV3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset);
|
||||
}
|
479
OpenCL/m17900_a0-optimized.cl
Normal file
479
OpenCL/m17900_a0-optimized.cl
Normal file
@ -0,0 +1,479 @@
|
||||
/**
|
||||
* Author......: See docs/credits.txt
|
||||
* License.....: MIT
|
||||
*/
|
||||
|
||||
#define NEW_SIMD_CODE
|
||||
|
||||
#include "inc_vendor.cl"
|
||||
#include "inc_hash_constants.h"
|
||||
#include "inc_hash_functions.cl"
|
||||
#include "inc_types.cl"
|
||||
#include "inc_common.cl"
|
||||
#include "inc_rp_optimized.h"
|
||||
#include "inc_rp_optimized.cl"
|
||||
#include "inc_simd.cl"
|
||||
|
||||
__constant u64a keccakf_rndc[24] =
|
||||
{
|
||||
0x0000000000000001, 0x0000000000008082, 0x800000000000808a,
|
||||
0x8000000080008000, 0x000000000000808b, 0x0000000080000001,
|
||||
0x8000000080008081, 0x8000000000008009, 0x000000000000008a,
|
||||
0x0000000000000088, 0x0000000080008009, 0x000000008000000a,
|
||||
0x000000008000808b, 0x800000000000008b, 0x8000000000008089,
|
||||
0x8000000000008003, 0x8000000000008002, 0x8000000000000080,
|
||||
0x000000000000800a, 0x800000008000000a, 0x8000000080008081,
|
||||
0x8000000000008080, 0x0000000080000001, 0x8000000080008008
|
||||
};
|
||||
|
||||
#ifndef KECCAK_ROUNDS
|
||||
#define KECCAK_ROUNDS 24
|
||||
#endif
|
||||
|
||||
__kernel void m17900_m04 (__global pw_t *pws, __constant const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
|
||||
{
|
||||
/**
|
||||
* modifier
|
||||
*/
|
||||
|
||||
const u64 lid = get_local_id (0);
|
||||
|
||||
/**
|
||||
* base
|
||||
*/
|
||||
|
||||
const u64 gid = get_global_id (0);
|
||||
|
||||
if (gid >= gid_max) return;
|
||||
|
||||
u32 pw_buf0[4];
|
||||
u32 pw_buf1[4];
|
||||
|
||||
pw_buf0[0] = pws[gid].i[0];
|
||||
pw_buf0[1] = pws[gid].i[1];
|
||||
pw_buf0[2] = pws[gid].i[2];
|
||||
pw_buf0[3] = pws[gid].i[3];
|
||||
pw_buf1[0] = pws[gid].i[4];
|
||||
pw_buf1[1] = pws[gid].i[5];
|
||||
pw_buf1[2] = pws[gid].i[6];
|
||||
pw_buf1[3] = pws[gid].i[7];
|
||||
|
||||
const u32 pw_len = pws[gid].pw_len;
|
||||
|
||||
/**
|
||||
* loop
|
||||
*/
|
||||
|
||||
for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE)
|
||||
{
|
||||
u32x w0[4] = { 0 };
|
||||
u32x w1[4] = { 0 };
|
||||
u32x w2[4] = { 0 };
|
||||
u32x w3[4] = { 0 };
|
||||
|
||||
const u32x out_len = apply_rules_vect (pw_buf0, pw_buf1, pw_len, rules_buf, il_pos, w0, w1);
|
||||
|
||||
append_0x01_2x4_VV (w0, w1, out_len);
|
||||
|
||||
/**
|
||||
* Keccak
|
||||
*/
|
||||
|
||||
u64x a00 = hl32_to_64 (w0[1], w0[0]);
|
||||
u64x a01 = hl32_to_64 (w0[3], w0[2]);
|
||||
u64x a02 = hl32_to_64 (w1[1], w1[0]);
|
||||
u64x a03 = hl32_to_64 (w1[3], w1[2]);
|
||||
u64x a04 = 0;
|
||||
u64x a10 = 0;
|
||||
u64x a11 = 0;
|
||||
u64x a12 = 0;
|
||||
u64x a13 = 0;
|
||||
u64x a14 = 0;
|
||||
u64x a20 = 0;
|
||||
u64x a21 = 0;
|
||||
u64x a22 = 0x8000000000000000;
|
||||
u64x a23 = 0;
|
||||
u64x a24 = 0;
|
||||
u64x a30 = 0;
|
||||
u64x a31 = 0;
|
||||
u64x a32 = 0;
|
||||
u64x a33 = 0;
|
||||
u64x a34 = 0;
|
||||
u64x a40 = 0;
|
||||
u64x a41 = 0;
|
||||
u64x a42 = 0;
|
||||
u64x a43 = 0;
|
||||
u64x a44 = 0;
|
||||
|
||||
#define Rho_Pi(ad,r) \
|
||||
bc0 = ad; \
|
||||
ad = rotl64 (t, r); \
|
||||
t = bc0; \
|
||||
|
||||
#ifdef _unroll
|
||||
#pragma unroll
|
||||
#endif
|
||||
for (int round = 0; round < KECCAK_ROUNDS - 1; round++)
|
||||
{
|
||||
// Theta
|
||||
|
||||
u64x bc0 = a00 ^ a10 ^ a20 ^ a30 ^ a40;
|
||||
u64x bc1 = a01 ^ a11 ^ a21 ^ a31 ^ a41;
|
||||
u64x bc2 = a02 ^ a12 ^ a22 ^ a32 ^ a42;
|
||||
u64x bc3 = a03 ^ a13 ^ a23 ^ a33 ^ a43;
|
||||
u64x bc4 = a04 ^ a14 ^ a24 ^ a34 ^ a44;
|
||||
|
||||
u64x t;
|
||||
|
||||
t = bc4 ^ rotl64 (bc1, 1); a00 ^= t; a10 ^= t; a20 ^= t; a30 ^= t; a40 ^= t;
|
||||
t = bc0 ^ rotl64 (bc2, 1); a01 ^= t; a11 ^= t; a21 ^= t; a31 ^= t; a41 ^= t;
|
||||
t = bc1 ^ rotl64 (bc3, 1); a02 ^= t; a12 ^= t; a22 ^= t; a32 ^= t; a42 ^= t;
|
||||
t = bc2 ^ rotl64 (bc4, 1); a03 ^= t; a13 ^= t; a23 ^= t; a33 ^= t; a43 ^= t;
|
||||
t = bc3 ^ rotl64 (bc0, 1); a04 ^= t; a14 ^= t; a24 ^= t; a34 ^= t; a44 ^= t;
|
||||
|
||||
// Rho Pi
|
||||
|
||||
t = a01;
|
||||
|
||||
Rho_Pi (a20, 1);
|
||||
Rho_Pi (a12, 3);
|
||||
Rho_Pi (a21, 6);
|
||||
Rho_Pi (a32, 10);
|
||||
Rho_Pi (a33, 15);
|
||||
Rho_Pi (a03, 21);
|
||||
Rho_Pi (a10, 28);
|
||||
Rho_Pi (a31, 36);
|
||||
Rho_Pi (a13, 45);
|
||||
Rho_Pi (a41, 55);
|
||||
Rho_Pi (a44, 2);
|
||||
Rho_Pi (a04, 14);
|
||||
Rho_Pi (a30, 27);
|
||||
Rho_Pi (a43, 41);
|
||||
Rho_Pi (a34, 56);
|
||||
Rho_Pi (a23, 8);
|
||||
Rho_Pi (a22, 25);
|
||||
Rho_Pi (a02, 43);
|
||||
Rho_Pi (a40, 62);
|
||||
Rho_Pi (a24, 18);
|
||||
Rho_Pi (a42, 39);
|
||||
Rho_Pi (a14, 61);
|
||||
Rho_Pi (a11, 20);
|
||||
Rho_Pi (a01, 44);
|
||||
|
||||
// Chi
|
||||
|
||||
bc0 = a00; bc1 = a01; bc2 = a02; bc3 = a03; bc4 = a04;
|
||||
a00 ^= ~bc1 & bc2; a01 ^= ~bc2 & bc3; a02 ^= ~bc3 & bc4; a03 ^= ~bc4 & bc0; a04 ^= ~bc0 & bc1;
|
||||
|
||||
bc0 = a10; bc1 = a11; bc2 = a12; bc3 = a13; bc4 = a14;
|
||||
a10 ^= ~bc1 & bc2; a11 ^= ~bc2 & bc3; a12 ^= ~bc3 & bc4; a13 ^= ~bc4 & bc0; a14 ^= ~bc0 & bc1;
|
||||
|
||||
bc0 = a20; bc1 = a21; bc2 = a22; bc3 = a23; bc4 = a24;
|
||||
a20 ^= ~bc1 & bc2; a21 ^= ~bc2 & bc3; a22 ^= ~bc3 & bc4; a23 ^= ~bc4 & bc0; a24 ^= ~bc0 & bc1;
|
||||
|
||||
bc0 = a30; bc1 = a31; bc2 = a32; bc3 = a33; bc4 = a34;
|
||||
a30 ^= ~bc1 & bc2; a31 ^= ~bc2 & bc3; a32 ^= ~bc3 & bc4; a33 ^= ~bc4 & bc0; a34 ^= ~bc0 & bc1;
|
||||
|
||||
bc0 = a40; bc1 = a41; bc2 = a42; bc3 = a43; bc4 = a44;
|
||||
a40 ^= ~bc1 & bc2; a41 ^= ~bc2 & bc3; a42 ^= ~bc3 & bc4; a43 ^= ~bc4 & bc0; a44 ^= ~bc0 & bc1;
|
||||
|
||||
// Iota
|
||||
|
||||
a00 ^= keccakf_rndc[round];
|
||||
}
|
||||
|
||||
// Theta
|
||||
|
||||
u64x bc0 = a00 ^ a10 ^ a20 ^ a30 ^ a40;
|
||||
u64x bc1 = a01 ^ a11 ^ a21 ^ a31 ^ a41;
|
||||
u64x bc2 = a02 ^ a12 ^ a22 ^ a32 ^ a42;
|
||||
u64x bc3 = a03 ^ a13 ^ a23 ^ a33 ^ a43;
|
||||
u64x bc4 = a04 ^ a14 ^ a24 ^ a34 ^ a44;
|
||||
|
||||
u64x t;
|
||||
|
||||
t = bc4 ^ rotl64 (bc1, 1); a00 ^= t; a10 ^= t; a20 ^= t; a30 ^= t;
|
||||
t = bc0 ^ rotl64 (bc2, 1); a21 ^= t; a31 ^= t; a41 ^= t;
|
||||
t = bc1 ^ rotl64 (bc3, 1); a02 ^= t; a12 ^= t; a22 ^= t; a32 ^= t;
|
||||
t = bc2 ^ rotl64 (bc4, 1); a03 ^= t; a13 ^= t; a23 ^= t; a33 ^= t; a43 ^= t;
|
||||
t = bc3 ^ rotl64 (bc0, 1); a04 ^= t; a34 ^= t; a44 ^= t;
|
||||
|
||||
// Rho Pi
|
||||
|
||||
t = a01;
|
||||
|
||||
Rho_Pi (a20, 1);
|
||||
Rho_Pi (a12, 3);
|
||||
Rho_Pi (a21, 6);
|
||||
Rho_Pi (a32, 10);
|
||||
Rho_Pi (a33, 15);
|
||||
Rho_Pi (a03, 21);
|
||||
Rho_Pi (a10, 28);
|
||||
Rho_Pi (a31, 36);
|
||||
Rho_Pi (a13, 45);
|
||||
Rho_Pi (a41, 55);
|
||||
Rho_Pi (a44, 2);
|
||||
Rho_Pi (a04, 14);
|
||||
Rho_Pi (a30, 27);
|
||||
Rho_Pi (a43, 41);
|
||||
Rho_Pi (a34, 56);
|
||||
Rho_Pi (a23, 8);
|
||||
Rho_Pi (a22, 25);
|
||||
Rho_Pi (a02, 43);
|
||||
|
||||
#undef Rho_Pi
|
||||
|
||||
bc0 = a00;
|
||||
bc2 = a02;
|
||||
bc3 = a03;
|
||||
bc4 = a04;
|
||||
|
||||
a02 ^= ~bc3 & bc4;
|
||||
a03 ^= ~bc4 & bc0;
|
||||
|
||||
const u32x r0 = l32_from_64 (a03);
|
||||
const u32x r1 = h32_from_64 (a03);
|
||||
const u32x r2 = l32_from_64 (a02);
|
||||
const u32x r3 = h32_from_64 (a02);
|
||||
|
||||
COMPARE_M_SIMD (r0, r1, r2, r3);
|
||||
}
|
||||
}
|
||||
|
||||
__kernel void m17900_m08 (__global pw_t *pws, __constant const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
|
||||
{
|
||||
}
|
||||
|
||||
__kernel void m17900_m16 (__global pw_t *pws, __constant const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
|
||||
{
|
||||
}
|
||||
|
||||
__kernel void m17900_s04 (__global pw_t *pws, __constant const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
|
||||
{
|
||||
/**
|
||||
* modifier
|
||||
*/
|
||||
|
||||
const u64 lid = get_local_id (0);
|
||||
|
||||
/**
|
||||
* base
|
||||
*/
|
||||
|
||||
const u64 gid = get_global_id (0);
|
||||
|
||||
if (gid >= gid_max) return;
|
||||
|
||||
u32 pw_buf0[4];
|
||||
u32 pw_buf1[4];
|
||||
|
||||
pw_buf0[0] = pws[gid].i[0];
|
||||
pw_buf0[1] = pws[gid].i[1];
|
||||
pw_buf0[2] = pws[gid].i[2];
|
||||
pw_buf0[3] = pws[gid].i[3];
|
||||
pw_buf1[0] = pws[gid].i[4];
|
||||
pw_buf1[1] = pws[gid].i[5];
|
||||
pw_buf1[2] = pws[gid].i[6];
|
||||
pw_buf1[3] = pws[gid].i[7];
|
||||
|
||||
const u32 pw_len = pws[gid].pw_len;
|
||||
|
||||
/**
|
||||
* digest
|
||||
*/
|
||||
|
||||
const u32 search[4] =
|
||||
{
|
||||
digests_buf[digests_offset].digest_buf[DGST_R0],
|
||||
digests_buf[digests_offset].digest_buf[DGST_R1],
|
||||
digests_buf[digests_offset].digest_buf[DGST_R2],
|
||||
digests_buf[digests_offset].digest_buf[DGST_R3]
|
||||
};
|
||||
|
||||
/**
|
||||
* loop
|
||||
*/
|
||||
|
||||
for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE)
|
||||
{
|
||||
u32x w0[4] = { 0 };
|
||||
u32x w1[4] = { 0 };
|
||||
u32x w2[4] = { 0 };
|
||||
u32x w3[4] = { 0 };
|
||||
|
||||
const u32x out_len = apply_rules_vect (pw_buf0, pw_buf1, pw_len, rules_buf, il_pos, w0, w1);
|
||||
|
||||
append_0x01_2x4_VV (w0, w1, out_len);
|
||||
|
||||
/**
|
||||
* Keccak
|
||||
*/
|
||||
|
||||
u64x a00 = hl32_to_64 (w0[1], w0[0]);
|
||||
u64x a01 = hl32_to_64 (w0[3], w0[2]);
|
||||
u64x a02 = hl32_to_64 (w1[1], w1[0]);
|
||||
u64x a03 = hl32_to_64 (w1[3], w1[2]);
|
||||
u64x a04 = 0;
|
||||
u64x a10 = 0;
|
||||
u64x a11 = 0;
|
||||
u64x a12 = 0;
|
||||
u64x a13 = 0;
|
||||
u64x a14 = 0;
|
||||
u64x a20 = 0;
|
||||
u64x a21 = 0;
|
||||
u64x a22 = 0x8000000000000000;
|
||||
u64x a23 = 0;
|
||||
u64x a24 = 0;
|
||||
u64x a30 = 0;
|
||||
u64x a31 = 0;
|
||||
u64x a32 = 0;
|
||||
u64x a33 = 0;
|
||||
u64x a34 = 0;
|
||||
u64x a40 = 0;
|
||||
u64x a41 = 0;
|
||||
u64x a42 = 0;
|
||||
u64x a43 = 0;
|
||||
u64x a44 = 0;
|
||||
|
||||
#define Rho_Pi(ad,r) \
|
||||
bc0 = ad; \
|
||||
ad = rotl64 (t, r); \
|
||||
t = bc0; \
|
||||
|
||||
#ifdef _unroll
|
||||
#pragma unroll
|
||||
#endif
|
||||
for (int round = 0; round < KECCAK_ROUNDS - 1; round++)
|
||||
{
|
||||
// Theta
|
||||
|
||||
u64x bc0 = a00 ^ a10 ^ a20 ^ a30 ^ a40;
|
||||
u64x bc1 = a01 ^ a11 ^ a21 ^ a31 ^ a41;
|
||||
u64x bc2 = a02 ^ a12 ^ a22 ^ a32 ^ a42;
|
||||
u64x bc3 = a03 ^ a13 ^ a23 ^ a33 ^ a43;
|
||||
u64x bc4 = a04 ^ a14 ^ a24 ^ a34 ^ a44;
|
||||
|
||||
u64x t;
|
||||
|
||||
t = bc4 ^ rotl64 (bc1, 1); a00 ^= t; a10 ^= t; a20 ^= t; a30 ^= t; a40 ^= t;
|
||||
t = bc0 ^ rotl64 (bc2, 1); a01 ^= t; a11 ^= t; a21 ^= t; a31 ^= t; a41 ^= t;
|
||||
t = bc1 ^ rotl64 (bc3, 1); a02 ^= t; a12 ^= t; a22 ^= t; a32 ^= t; a42 ^= t;
|
||||
t = bc2 ^ rotl64 (bc4, 1); a03 ^= t; a13 ^= t; a23 ^= t; a33 ^= t; a43 ^= t;
|
||||
t = bc3 ^ rotl64 (bc0, 1); a04 ^= t; a14 ^= t; a24 ^= t; a34 ^= t; a44 ^= t;
|
||||
|
||||
// Rho Pi
|
||||
|
||||
t = a01;
|
||||
|
||||
Rho_Pi (a20, 1);
|
||||
Rho_Pi (a12, 3);
|
||||
Rho_Pi (a21, 6);
|
||||
Rho_Pi (a32, 10);
|
||||
Rho_Pi (a33, 15);
|
||||
Rho_Pi (a03, 21);
|
||||
Rho_Pi (a10, 28);
|
||||
Rho_Pi (a31, 36);
|
||||
Rho_Pi (a13, 45);
|
||||
Rho_Pi (a41, 55);
|
||||
Rho_Pi (a44, 2);
|
||||
Rho_Pi (a04, 14);
|
||||
Rho_Pi (a30, 27);
|
||||
Rho_Pi (a43, 41);
|
||||
Rho_Pi (a34, 56);
|
||||
Rho_Pi (a23, 8);
|
||||
Rho_Pi (a22, 25);
|
||||
Rho_Pi (a02, 43);
|
||||
Rho_Pi (a40, 62);
|
||||
Rho_Pi (a24, 18);
|
||||
Rho_Pi (a42, 39);
|
||||
Rho_Pi (a14, 61);
|
||||
Rho_Pi (a11, 20);
|
||||
Rho_Pi (a01, 44);
|
||||
|
||||
// Chi
|
||||
|
||||
bc0 = a00; bc1 = a01; bc2 = a02; bc3 = a03; bc4 = a04;
|
||||
a00 ^= ~bc1 & bc2; a01 ^= ~bc2 & bc3; a02 ^= ~bc3 & bc4; a03 ^= ~bc4 & bc0; a04 ^= ~bc0 & bc1;
|
||||
|
||||
bc0 = a10; bc1 = a11; bc2 = a12; bc3 = a13; bc4 = a14;
|
||||
a10 ^= ~bc1 & bc2; a11 ^= ~bc2 & bc3; a12 ^= ~bc3 & bc4; a13 ^= ~bc4 & bc0; a14 ^= ~bc0 & bc1;
|
||||
|
||||
bc0 = a20; bc1 = a21; bc2 = a22; bc3 = a23; bc4 = a24;
|
||||
a20 ^= ~bc1 & bc2; a21 ^= ~bc2 & bc3; a22 ^= ~bc3 & bc4; a23 ^= ~bc4 & bc0; a24 ^= ~bc0 & bc1;
|
||||
|
||||
bc0 = a30; bc1 = a31; bc2 = a32; bc3 = a33; bc4 = a34;
|
||||
a30 ^= ~bc1 & bc2; a31 ^= ~bc2 & bc3; a32 ^= ~bc3 & bc4; a33 ^= ~bc4 & bc0; a34 ^= ~bc0 & bc1;
|
||||
|
||||
bc0 = a40; bc1 = a41; bc2 = a42; bc3 = a43; bc4 = a44;
|
||||
a40 ^= ~bc1 & bc2; a41 ^= ~bc2 & bc3; a42 ^= ~bc3 & bc4; a43 ^= ~bc4 & bc0; a44 ^= ~bc0 & bc1;
|
||||
|
||||
// Iota
|
||||
|
||||
a00 ^= keccakf_rndc[round];
|
||||
}
|
||||
|
||||
// Theta
|
||||
|
||||
u64x bc0 = a00 ^ a10 ^ a20 ^ a30 ^ a40;
|
||||
u64x bc1 = a01 ^ a11 ^ a21 ^ a31 ^ a41;
|
||||
u64x bc2 = a02 ^ a12 ^ a22 ^ a32 ^ a42;
|
||||
u64x bc3 = a03 ^ a13 ^ a23 ^ a33 ^ a43;
|
||||
u64x bc4 = a04 ^ a14 ^ a24 ^ a34 ^ a44;
|
||||
|
||||
u64x t;
|
||||
|
||||
t = bc4 ^ rotl64 (bc1, 1); a00 ^= t; a10 ^= t; a20 ^= t; a30 ^= t;
|
||||
t = bc0 ^ rotl64 (bc2, 1); a21 ^= t; a31 ^= t; a41 ^= t;
|
||||
t = bc1 ^ rotl64 (bc3, 1); a02 ^= t; a12 ^= t; a22 ^= t; a32 ^= t;
|
||||
t = bc2 ^ rotl64 (bc4, 1); a03 ^= t; a13 ^= t; a23 ^= t; a33 ^= t; a43 ^= t;
|
||||
t = bc3 ^ rotl64 (bc0, 1); a04 ^= t; a34 ^= t; a44 ^= t;
|
||||
|
||||
// Rho Pi
|
||||
|
||||
t = a01;
|
||||
|
||||
Rho_Pi (a20, 1);
|
||||
Rho_Pi (a12, 3);
|
||||
Rho_Pi (a21, 6);
|
||||
Rho_Pi (a32, 10);
|
||||
Rho_Pi (a33, 15);
|
||||
Rho_Pi (a03, 21);
|
||||
Rho_Pi (a10, 28);
|
||||
Rho_Pi (a31, 36);
|
||||
Rho_Pi (a13, 45);
|
||||
Rho_Pi (a41, 55);
|
||||
Rho_Pi (a44, 2);
|
||||
Rho_Pi (a04, 14);
|
||||
Rho_Pi (a30, 27);
|
||||
Rho_Pi (a43, 41);
|
||||
Rho_Pi (a34, 56);
|
||||
Rho_Pi (a23, 8);
|
||||
Rho_Pi (a22, 25);
|
||||
Rho_Pi (a02, 43);
|
||||
|
||||
#undef Rho_Pi
|
||||
|
||||
bc0 = a00;
|
||||
bc2 = a02;
|
||||
bc3 = a03;
|
||||
bc4 = a04;
|
||||
|
||||
a02 ^= ~bc3 & bc4;
|
||||
a03 ^= ~bc4 & bc0;
|
||||
|
||||
const u32x r0 = l32_from_64 (a03);
|
||||
const u32x r1 = h32_from_64 (a03);
|
||||
const u32x r2 = l32_from_64 (a02);
|
||||
const u32x r3 = h32_from_64 (a02);
|
||||
|
||||
COMPARE_S_SIMD (r0, r1, r2, r3);
|
||||
}
|
||||
}
|
||||
|
||||
__kernel void m17900_s08 (__global pw_t *pws, __constant const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
|
||||
{
|
||||
}
|
||||
|
||||
__kernel void m17900_s16 (__global pw_t *pws, __constant const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
|
||||
{
|
||||
}
|
594
OpenCL/m17900_a1-optimized.cl
Normal file
594
OpenCL/m17900_a1-optimized.cl
Normal file
@ -0,0 +1,594 @@
|
||||
/**
|
||||
* Author......: See docs/credits.txt
|
||||
* License.....: MIT
|
||||
*/
|
||||
|
||||
#define NEW_SIMD_CODE
|
||||
|
||||
#include "inc_vendor.cl"
|
||||
#include "inc_hash_constants.h"
|
||||
#include "inc_hash_functions.cl"
|
||||
#include "inc_types.cl"
|
||||
#include "inc_common.cl"
|
||||
#include "inc_simd.cl"
|
||||
|
||||
__constant u64a keccakf_rndc[24] =
|
||||
{
|
||||
0x0000000000000001, 0x0000000000008082, 0x800000000000808a,
|
||||
0x8000000080008000, 0x000000000000808b, 0x0000000080000001,
|
||||
0x8000000080008081, 0x8000000000008009, 0x000000000000008a,
|
||||
0x0000000000000088, 0x0000000080008009, 0x000000008000000a,
|
||||
0x000000008000808b, 0x800000000000008b, 0x8000000000008089,
|
||||
0x8000000000008003, 0x8000000000008002, 0x8000000000000080,
|
||||
0x000000000000800a, 0x800000008000000a, 0x8000000080008081,
|
||||
0x8000000000008080, 0x0000000080000001, 0x8000000080008008
|
||||
};
|
||||
|
||||
#ifndef KECCAK_ROUNDS
|
||||
#define KECCAK_ROUNDS 24
|
||||
#endif
|
||||
|
||||
__kernel void m17900_m04 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
|
||||
{
|
||||
/**
|
||||
* modifier
|
||||
*/
|
||||
|
||||
const u64 lid = get_local_id (0);
|
||||
|
||||
/**
|
||||
* base
|
||||
*/
|
||||
|
||||
const u64 gid = get_global_id (0);
|
||||
|
||||
if (gid >= gid_max) return;
|
||||
|
||||
u32 pw_buf0[4];
|
||||
u32 pw_buf1[4];
|
||||
|
||||
pw_buf0[0] = pws[gid].i[0];
|
||||
pw_buf0[1] = pws[gid].i[1];
|
||||
pw_buf0[2] = pws[gid].i[2];
|
||||
pw_buf0[3] = pws[gid].i[3];
|
||||
pw_buf1[0] = pws[gid].i[4];
|
||||
pw_buf1[1] = pws[gid].i[5];
|
||||
pw_buf1[2] = pws[gid].i[6];
|
||||
pw_buf1[3] = pws[gid].i[7];
|
||||
|
||||
const u32 pw_l_len = pws[gid].pw_len;
|
||||
|
||||
/**
|
||||
* loop
|
||||
*/
|
||||
|
||||
for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE)
|
||||
{
|
||||
const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos);
|
||||
|
||||
const u32x pw_len = pw_l_len + pw_r_len;
|
||||
|
||||
/**
|
||||
* concat password candidate
|
||||
*/
|
||||
|
||||
u32x wordl0[4] = { 0 };
|
||||
u32x wordl1[4] = { 0 };
|
||||
u32x wordl2[4] = { 0 };
|
||||
u32x wordl3[4] = { 0 };
|
||||
|
||||
wordl0[0] = pw_buf0[0];
|
||||
wordl0[1] = pw_buf0[1];
|
||||
wordl0[2] = pw_buf0[2];
|
||||
wordl0[3] = pw_buf0[3];
|
||||
wordl1[0] = pw_buf1[0];
|
||||
wordl1[1] = pw_buf1[1];
|
||||
wordl1[2] = pw_buf1[2];
|
||||
wordl1[3] = pw_buf1[3];
|
||||
|
||||
u32x wordr0[4] = { 0 };
|
||||
u32x wordr1[4] = { 0 };
|
||||
u32x wordr2[4] = { 0 };
|
||||
u32x wordr3[4] = { 0 };
|
||||
|
||||
wordr0[0] = ix_create_combt (combs_buf, il_pos, 0);
|
||||
wordr0[1] = ix_create_combt (combs_buf, il_pos, 1);
|
||||
wordr0[2] = ix_create_combt (combs_buf, il_pos, 2);
|
||||
wordr0[3] = ix_create_combt (combs_buf, il_pos, 3);
|
||||
wordr1[0] = ix_create_combt (combs_buf, il_pos, 4);
|
||||
wordr1[1] = ix_create_combt (combs_buf, il_pos, 5);
|
||||
wordr1[2] = ix_create_combt (combs_buf, il_pos, 6);
|
||||
wordr1[3] = ix_create_combt (combs_buf, il_pos, 7);
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
|
||||
{
|
||||
switch_buffer_by_offset_le_VV (wordr0, wordr1, wordr2, wordr3, pw_l_len);
|
||||
}
|
||||
else
|
||||
{
|
||||
switch_buffer_by_offset_le_VV (wordl0, wordl1, wordl2, wordl3, pw_r_len);
|
||||
}
|
||||
|
||||
u32x w0[4];
|
||||
u32x w1[4];
|
||||
u32x w2[4];
|
||||
u32x w3[4];
|
||||
|
||||
w0[0] = wordl0[0] | wordr0[0];
|
||||
w0[1] = wordl0[1] | wordr0[1];
|
||||
w0[2] = wordl0[2] | wordr0[2];
|
||||
w0[3] = wordl0[3] | wordr0[3];
|
||||
w1[0] = wordl1[0] | wordr1[0];
|
||||
w1[1] = wordl1[1] | wordr1[1];
|
||||
w1[2] = wordl1[2] | wordr1[2];
|
||||
w1[3] = wordl1[3] | wordr1[3];
|
||||
w2[0] = wordl2[0] | wordr2[0];
|
||||
w2[1] = wordl2[1] | wordr2[1];
|
||||
w2[2] = wordl2[2] | wordr2[2];
|
||||
w2[3] = wordl2[3] | wordr2[3];
|
||||
w3[0] = wordl3[0] | wordr3[0];
|
||||
w3[1] = wordl3[1] | wordr3[1];
|
||||
w3[2] = wordl3[2] | wordr3[2];
|
||||
w3[3] = wordl3[3] | wordr3[3];
|
||||
|
||||
/**
|
||||
* Keccak
|
||||
*/
|
||||
|
||||
u64x a00 = hl32_to_64 (w0[1], w0[0]);
|
||||
u64x a01 = hl32_to_64 (w0[3], w0[2]);
|
||||
u64x a02 = hl32_to_64 (w1[1], w1[0]);
|
||||
u64x a03 = hl32_to_64 (w1[3], w1[2]);
|
||||
u64x a04 = hl32_to_64 (w2[1], w2[0]);
|
||||
u64x a10 = hl32_to_64 (w2[3], w2[2]);
|
||||
u64x a11 = hl32_to_64 (w3[1], w3[0]);
|
||||
u64x a12 = hl32_to_64 (w3[3], w3[2]);
|
||||
u64x a13 = 0;
|
||||
u64x a14 = 0;
|
||||
u64x a20 = 0;
|
||||
u64x a21 = 0;
|
||||
u64x a22 = 0x8000000000000000;
|
||||
u64x a23 = 0;
|
||||
u64x a24 = 0;
|
||||
u64x a30 = 0;
|
||||
u64x a31 = 0;
|
||||
u64x a32 = 0;
|
||||
u64x a33 = 0;
|
||||
u64x a34 = 0;
|
||||
u64x a40 = 0;
|
||||
u64x a41 = 0;
|
||||
u64x a42 = 0;
|
||||
u64x a43 = 0;
|
||||
u64x a44 = 0;
|
||||
|
||||
#define Rho_Pi(ad,r) \
|
||||
bc0 = ad; \
|
||||
ad = rotl64 (t, r); \
|
||||
t = bc0; \
|
||||
|
||||
#ifdef _unroll
|
||||
#pragma unroll
|
||||
#endif
|
||||
for (int round = 0; round < KECCAK_ROUNDS - 1; round++)
|
||||
{
|
||||
// Theta
|
||||
|
||||
u64x bc0 = a00 ^ a10 ^ a20 ^ a30 ^ a40;
|
||||
u64x bc1 = a01 ^ a11 ^ a21 ^ a31 ^ a41;
|
||||
u64x bc2 = a02 ^ a12 ^ a22 ^ a32 ^ a42;
|
||||
u64x bc3 = a03 ^ a13 ^ a23 ^ a33 ^ a43;
|
||||
u64x bc4 = a04 ^ a14 ^ a24 ^ a34 ^ a44;
|
||||
|
||||
u64x t;
|
||||
|
||||
t = bc4 ^ rotl64 (bc1, 1); a00 ^= t; a10 ^= t; a20 ^= t; a30 ^= t; a40 ^= t;
|
||||
t = bc0 ^ rotl64 (bc2, 1); a01 ^= t; a11 ^= t; a21 ^= t; a31 ^= t; a41 ^= t;
|
||||
t = bc1 ^ rotl64 (bc3, 1); a02 ^= t; a12 ^= t; a22 ^= t; a32 ^= t; a42 ^= t;
|
||||
t = bc2 ^ rotl64 (bc4, 1); a03 ^= t; a13 ^= t; a23 ^= t; a33 ^= t; a43 ^= t;
|
||||
t = bc3 ^ rotl64 (bc0, 1); a04 ^= t; a14 ^= t; a24 ^= t; a34 ^= t; a44 ^= t;
|
||||
|
||||
// Rho Pi
|
||||
|
||||
t = a01;
|
||||
|
||||
Rho_Pi (a20, 1);
|
||||
Rho_Pi (a12, 3);
|
||||
Rho_Pi (a21, 6);
|
||||
Rho_Pi (a32, 10);
|
||||
Rho_Pi (a33, 15);
|
||||
Rho_Pi (a03, 21);
|
||||
Rho_Pi (a10, 28);
|
||||
Rho_Pi (a31, 36);
|
||||
Rho_Pi (a13, 45);
|
||||
Rho_Pi (a41, 55);
|
||||
Rho_Pi (a44, 2);
|
||||
Rho_Pi (a04, 14);
|
||||
Rho_Pi (a30, 27);
|
||||
Rho_Pi (a43, 41);
|
||||
Rho_Pi (a34, 56);
|
||||
Rho_Pi (a23, 8);
|
||||
Rho_Pi (a22, 25);
|
||||
Rho_Pi (a02, 43);
|
||||
Rho_Pi (a40, 62);
|
||||
Rho_Pi (a24, 18);
|
||||
Rho_Pi (a42, 39);
|
||||
Rho_Pi (a14, 61);
|
||||
Rho_Pi (a11, 20);
|
||||
Rho_Pi (a01, 44);
|
||||
|
||||
// Chi
|
||||
|
||||
bc0 = a00; bc1 = a01; bc2 = a02; bc3 = a03; bc4 = a04;
|
||||
a00 ^= ~bc1 & bc2; a01 ^= ~bc2 & bc3; a02 ^= ~bc3 & bc4; a03 ^= ~bc4 & bc0; a04 ^= ~bc0 & bc1;
|
||||
|
||||
bc0 = a10; bc1 = a11; bc2 = a12; bc3 = a13; bc4 = a14;
|
||||
a10 ^= ~bc1 & bc2; a11 ^= ~bc2 & bc3; a12 ^= ~bc3 & bc4; a13 ^= ~bc4 & bc0; a14 ^= ~bc0 & bc1;
|
||||
|
||||
bc0 = a20; bc1 = a21; bc2 = a22; bc3 = a23; bc4 = a24;
|
||||
a20 ^= ~bc1 & bc2; a21 ^= ~bc2 & bc3; a22 ^= ~bc3 & bc4; a23 ^= ~bc4 & bc0; a24 ^= ~bc0 & bc1;
|
||||
|
||||
bc0 = a30; bc1 = a31; bc2 = a32; bc3 = a33; bc4 = a34;
|
||||
a30 ^= ~bc1 & bc2; a31 ^= ~bc2 & bc3; a32 ^= ~bc3 & bc4; a33 ^= ~bc4 & bc0; a34 ^= ~bc0 & bc1;
|
||||
|
||||
bc0 = a40; bc1 = a41; bc2 = a42; bc3 = a43; bc4 = a44;
|
||||
a40 ^= ~bc1 & bc2; a41 ^= ~bc2 & bc3; a42 ^= ~bc3 & bc4; a43 ^= ~bc4 & bc0; a44 ^= ~bc0 & bc1;
|
||||
|
||||
// Iota
|
||||
|
||||
a00 ^= keccakf_rndc[round];
|
||||
}
|
||||
|
||||
// Theta
|
||||
|
||||
u64x bc0 = a00 ^ a10 ^ a20 ^ a30 ^ a40;
|
||||
u64x bc1 = a01 ^ a11 ^ a21 ^ a31 ^ a41;
|
||||
u64x bc2 = a02 ^ a12 ^ a22 ^ a32 ^ a42;
|
||||
u64x bc3 = a03 ^ a13 ^ a23 ^ a33 ^ a43;
|
||||
u64x bc4 = a04 ^ a14 ^ a24 ^ a34 ^ a44;
|
||||
|
||||
u64x t;
|
||||
|
||||
t = bc4 ^ rotl64 (bc1, 1); a00 ^= t; a10 ^= t; a20 ^= t; a30 ^= t;
|
||||
t = bc0 ^ rotl64 (bc2, 1); a21 ^= t; a31 ^= t; a41 ^= t;
|
||||
t = bc1 ^ rotl64 (bc3, 1); a02 ^= t; a12 ^= t; a22 ^= t; a32 ^= t;
|
||||
t = bc2 ^ rotl64 (bc4, 1); a03 ^= t; a13 ^= t; a23 ^= t; a33 ^= t; a43 ^= t;
|
||||
t = bc3 ^ rotl64 (bc0, 1); a04 ^= t; a34 ^= t; a44 ^= t;
|
||||
|
||||
// Rho Pi
|
||||
|
||||
t = a01;
|
||||
|
||||
Rho_Pi (a20, 1);
|
||||
Rho_Pi (a12, 3);
|
||||
Rho_Pi (a21, 6);
|
||||
Rho_Pi (a32, 10);
|
||||
Rho_Pi (a33, 15);
|
||||
Rho_Pi (a03, 21);
|
||||
Rho_Pi (a10, 28);
|
||||
Rho_Pi (a31, 36);
|
||||
Rho_Pi (a13, 45);
|
||||
Rho_Pi (a41, 55);
|
||||
Rho_Pi (a44, 2);
|
||||
Rho_Pi (a04, 14);
|
||||
Rho_Pi (a30, 27);
|
||||
Rho_Pi (a43, 41);
|
||||
Rho_Pi (a34, 56);
|
||||
Rho_Pi (a23, 8);
|
||||
Rho_Pi (a22, 25);
|
||||
Rho_Pi (a02, 43);
|
||||
|
||||
#undef Rho_Pi
|
||||
|
||||
bc0 = a00;
|
||||
bc2 = a02;
|
||||
bc3 = a03;
|
||||
bc4 = a04;
|
||||
|
||||
a02 ^= ~bc3 & bc4;
|
||||
a03 ^= ~bc4 & bc0;
|
||||
|
||||
const u32x r0 = l32_from_64 (a03);
|
||||
const u32x r1 = h32_from_64 (a03);
|
||||
const u32x r2 = l32_from_64 (a02);
|
||||
const u32x r3 = h32_from_64 (a02);
|
||||
|
||||
COMPARE_M_SIMD (r0, r1, r2, r3);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
__kernel void m17900_m08 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
|
||||
{
|
||||
}
|
||||
|
||||
__kernel void m17900_m16 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
|
||||
{
|
||||
}
|
||||
|
||||
__kernel void m17900_s04 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
|
||||
{
|
||||
/**
|
||||
* modifier
|
||||
*/
|
||||
|
||||
const u64 lid = get_local_id (0);
|
||||
|
||||
/**
|
||||
* base
|
||||
*/
|
||||
|
||||
const u64 gid = get_global_id (0);
|
||||
|
||||
if (gid >= gid_max) return;
|
||||
|
||||
u32 pw_buf0[4];
|
||||
u32 pw_buf1[4];
|
||||
|
||||
pw_buf0[0] = pws[gid].i[0];
|
||||
pw_buf0[1] = pws[gid].i[1];
|
||||
pw_buf0[2] = pws[gid].i[2];
|
||||
pw_buf0[3] = pws[gid].i[3];
|
||||
pw_buf1[0] = pws[gid].i[4];
|
||||
pw_buf1[1] = pws[gid].i[5];
|
||||
pw_buf1[2] = pws[gid].i[6];
|
||||
pw_buf1[3] = pws[gid].i[7];
|
||||
|
||||
const u32 pw_l_len = pws[gid].pw_len;
|
||||
|
||||
/**
|
||||
* digest
|
||||
*/
|
||||
|
||||
const u32 search[4] =
|
||||
{
|
||||
digests_buf[digests_offset].digest_buf[DGST_R0],
|
||||
digests_buf[digests_offset].digest_buf[DGST_R1],
|
||||
digests_buf[digests_offset].digest_buf[DGST_R2],
|
||||
digests_buf[digests_offset].digest_buf[DGST_R3]
|
||||
};
|
||||
|
||||
/**
|
||||
* loop
|
||||
*/
|
||||
|
||||
for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE)
|
||||
{
|
||||
const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos);
|
||||
|
||||
const u32x pw_len = pw_l_len + pw_r_len;
|
||||
|
||||
/**
|
||||
* concat password candidate
|
||||
*/
|
||||
|
||||
u32x wordl0[4] = { 0 };
|
||||
u32x wordl1[4] = { 0 };
|
||||
u32x wordl2[4] = { 0 };
|
||||
u32x wordl3[4] = { 0 };
|
||||
|
||||
wordl0[0] = pw_buf0[0];
|
||||
wordl0[1] = pw_buf0[1];
|
||||
wordl0[2] = pw_buf0[2];
|
||||
wordl0[3] = pw_buf0[3];
|
||||
wordl1[0] = pw_buf1[0];
|
||||
wordl1[1] = pw_buf1[1];
|
||||
wordl1[2] = pw_buf1[2];
|
||||
wordl1[3] = pw_buf1[3];
|
||||
|
||||
u32x wordr0[4] = { 0 };
|
||||
u32x wordr1[4] = { 0 };
|
||||
u32x wordr2[4] = { 0 };
|
||||
u32x wordr3[4] = { 0 };
|
||||
|
||||
wordr0[0] = ix_create_combt (combs_buf, il_pos, 0);
|
||||
wordr0[1] = ix_create_combt (combs_buf, il_pos, 1);
|
||||
wordr0[2] = ix_create_combt (combs_buf, il_pos, 2);
|
||||
wordr0[3] = ix_create_combt (combs_buf, il_pos, 3);
|
||||
wordr1[0] = ix_create_combt (combs_buf, il_pos, 4);
|
||||
wordr1[1] = ix_create_combt (combs_buf, il_pos, 5);
|
||||
wordr1[2] = ix_create_combt (combs_buf, il_pos, 6);
|
||||
wordr1[3] = ix_create_combt (combs_buf, il_pos, 7);
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
|
||||
{
|
||||
switch_buffer_by_offset_le_VV (wordr0, wordr1, wordr2, wordr3, pw_l_len);
|
||||
}
|
||||
else
|
||||
{
|
||||
switch_buffer_by_offset_le_VV (wordl0, wordl1, wordl2, wordl3, pw_r_len);
|
||||
}
|
||||
|
||||
u32x w0[4];
|
||||
u32x w1[4];
|
||||
u32x w2[4];
|
||||
u32x w3[4];
|
||||
|
||||
w0[0] = wordl0[0] | wordr0[0];
|
||||
w0[1] = wordl0[1] | wordr0[1];
|
||||
w0[2] = wordl0[2] | wordr0[2];
|
||||
w0[3] = wordl0[3] | wordr0[3];
|
||||
w1[0] = wordl1[0] | wordr1[0];
|
||||
w1[1] = wordl1[1] | wordr1[1];
|
||||
w1[2] = wordl1[2] | wordr1[2];
|
||||
w1[3] = wordl1[3] | wordr1[3];
|
||||
w2[0] = wordl2[0] | wordr2[0];
|
||||
w2[1] = wordl2[1] | wordr2[1];
|
||||
w2[2] = wordl2[2] | wordr2[2];
|
||||
w2[3] = wordl2[3] | wordr2[3];
|
||||
w3[0] = wordl3[0] | wordr3[0];
|
||||
w3[1] = wordl3[1] | wordr3[1];
|
||||
w3[2] = wordl3[2] | wordr3[2];
|
||||
w3[3] = wordl3[3] | wordr3[3];
|
||||
|
||||
/**
|
||||
* Keccak
|
||||
*/
|
||||
|
||||
u64x a00 = hl32_to_64 (w0[1], w0[0]);
|
||||
u64x a01 = hl32_to_64 (w0[3], w0[2]);
|
||||
u64x a02 = hl32_to_64 (w1[1], w1[0]);
|
||||
u64x a03 = hl32_to_64 (w1[3], w1[2]);
|
||||
u64x a04 = hl32_to_64 (w2[1], w2[0]);
|
||||
u64x a10 = hl32_to_64 (w2[3], w2[2]);
|
||||
u64x a11 = hl32_to_64 (w3[1], w3[0]);
|
||||
u64x a12 = hl32_to_64 (w3[3], w3[2]);
|
||||
u64x a13 = 0;
|
||||
u64x a14 = 0;
|
||||
u64x a20 = 0;
|
||||
u64x a21 = 0;
|
||||
u64x a22 = 0x8000000000000000;
|
||||
u64x a23 = 0;
|
||||
u64x a24 = 0;
|
||||
u64x a30 = 0;
|
||||
u64x a31 = 0;
|
||||
u64x a32 = 0;
|
||||
u64x a33 = 0;
|
||||
u64x a34 = 0;
|
||||
u64x a40 = 0;
|
||||
u64x a41 = 0;
|
||||
u64x a42 = 0;
|
||||
u64x a43 = 0;
|
||||
u64x a44 = 0;
|
||||
|
||||
#define Rho_Pi(ad,r) \
|
||||
bc0 = ad; \
|
||||
ad = rotl64 (t, r); \
|
||||
t = bc0; \
|
||||
|
||||
#ifdef _unroll
|
||||
#pragma unroll
|
||||
#endif
|
||||
for (int round = 0; round < KECCAK_ROUNDS - 1; round++)
|
||||
{
|
||||
// Theta
|
||||
|
||||
u64x bc0 = a00 ^ a10 ^ a20 ^ a30 ^ a40;
|
||||
u64x bc1 = a01 ^ a11 ^ a21 ^ a31 ^ a41;
|
||||
u64x bc2 = a02 ^ a12 ^ a22 ^ a32 ^ a42;
|
||||
u64x bc3 = a03 ^ a13 ^ a23 ^ a33 ^ a43;
|
||||
u64x bc4 = a04 ^ a14 ^ a24 ^ a34 ^ a44;
|
||||
|
||||
u64x t;
|
||||
|
||||
t = bc4 ^ rotl64 (bc1, 1); a00 ^= t; a10 ^= t; a20 ^= t; a30 ^= t; a40 ^= t;
|
||||
t = bc0 ^ rotl64 (bc2, 1); a01 ^= t; a11 ^= t; a21 ^= t; a31 ^= t; a41 ^= t;
|
||||
t = bc1 ^ rotl64 (bc3, 1); a02 ^= t; a12 ^= t; a22 ^= t; a32 ^= t; a42 ^= t;
|
||||
t = bc2 ^ rotl64 (bc4, 1); a03 ^= t; a13 ^= t; a23 ^= t; a33 ^= t; a43 ^= t;
|
||||
t = bc3 ^ rotl64 (bc0, 1); a04 ^= t; a14 ^= t; a24 ^= t; a34 ^= t; a44 ^= t;
|
||||
|
||||
// Rho Pi
|
||||
|
||||
t = a01;
|
||||
|
||||
Rho_Pi (a20, 1);
|
||||
Rho_Pi (a12, 3);
|
||||
Rho_Pi (a21, 6);
|
||||
Rho_Pi (a32, 10);
|
||||
Rho_Pi (a33, 15);
|
||||
Rho_Pi (a03, 21);
|
||||
Rho_Pi (a10, 28);
|
||||
Rho_Pi (a31, 36);
|
||||
Rho_Pi (a13, 45);
|
||||
Rho_Pi (a41, 55);
|
||||
Rho_Pi (a44, 2);
|
||||
Rho_Pi (a04, 14);
|
||||
Rho_Pi (a30, 27);
|
||||
Rho_Pi (a43, 41);
|
||||
Rho_Pi (a34, 56);
|
||||
Rho_Pi (a23, 8);
|
||||
Rho_Pi (a22, 25);
|
||||
Rho_Pi (a02, 43);
|
||||
Rho_Pi (a40, 62);
|
||||
Rho_Pi (a24, 18);
|
||||
Rho_Pi (a42, 39);
|
||||
Rho_Pi (a14, 61);
|
||||
Rho_Pi (a11, 20);
|
||||
Rho_Pi (a01, 44);
|
||||
|
||||
// Chi
|
||||
|
||||
bc0 = a00; bc1 = a01; bc2 = a02; bc3 = a03; bc4 = a04;
|
||||
a00 ^= ~bc1 & bc2; a01 ^= ~bc2 & bc3; a02 ^= ~bc3 & bc4; a03 ^= ~bc4 & bc0; a04 ^= ~bc0 & bc1;
|
||||
|
||||
bc0 = a10; bc1 = a11; bc2 = a12; bc3 = a13; bc4 = a14;
|
||||
a10 ^= ~bc1 & bc2; a11 ^= ~bc2 & bc3; a12 ^= ~bc3 & bc4; a13 ^= ~bc4 & bc0; a14 ^= ~bc0 & bc1;
|
||||
|
||||
bc0 = a20; bc1 = a21; bc2 = a22; bc3 = a23; bc4 = a24;
|
||||
a20 ^= ~bc1 & bc2; a21 ^= ~bc2 & bc3; a22 ^= ~bc3 & bc4; a23 ^= ~bc4 & bc0; a24 ^= ~bc0 & bc1;
|
||||
|
||||
bc0 = a30; bc1 = a31; bc2 = a32; bc3 = a33; bc4 = a34;
|
||||
a30 ^= ~bc1 & bc2; a31 ^= ~bc2 & bc3; a32 ^= ~bc3 & bc4; a33 ^= ~bc4 & bc0; a34 ^= ~bc0 & bc1;
|
||||
|
||||
bc0 = a40; bc1 = a41; bc2 = a42; bc3 = a43; bc4 = a44;
|
||||
a40 ^= ~bc1 & bc2; a41 ^= ~bc2 & bc3; a42 ^= ~bc3 & bc4; a43 ^= ~bc4 & bc0; a44 ^= ~bc0 & bc1;
|
||||
|
||||
// Iota
|
||||
|
||||
a00 ^= keccakf_rndc[round];
|
||||
}
|
||||
|
||||
// Theta
|
||||
|
||||
u64x bc0 = a00 ^ a10 ^ a20 ^ a30 ^ a40;
|
||||
u64x bc1 = a01 ^ a11 ^ a21 ^ a31 ^ a41;
|
||||
u64x bc2 = a02 ^ a12 ^ a22 ^ a32 ^ a42;
|
||||
u64x bc3 = a03 ^ a13 ^ a23 ^ a33 ^ a43;
|
||||
u64x bc4 = a04 ^ a14 ^ a24 ^ a34 ^ a44;
|
||||
|
||||
u64x t;
|
||||
|
||||
t = bc4 ^ rotl64 (bc1, 1); a00 ^= t; a10 ^= t; a20 ^= t; a30 ^= t;
|
||||
t = bc0 ^ rotl64 (bc2, 1); a21 ^= t; a31 ^= t; a41 ^= t;
|
||||
t = bc1 ^ rotl64 (bc3, 1); a02 ^= t; a12 ^= t; a22 ^= t; a32 ^= t;
|
||||
t = bc2 ^ rotl64 (bc4, 1); a03 ^= t; a13 ^= t; a23 ^= t; a33 ^= t; a43 ^= t;
|
||||
t = bc3 ^ rotl64 (bc0, 1); a04 ^= t; a34 ^= t; a44 ^= t;
|
||||
|
||||
// Rho Pi
|
||||
|
||||
t = a01;
|
||||
|
||||
Rho_Pi (a20, 1);
|
||||
Rho_Pi (a12, 3);
|
||||
Rho_Pi (a21, 6);
|
||||
Rho_Pi (a32, 10);
|
||||
Rho_Pi (a33, 15);
|
||||
Rho_Pi (a03, 21);
|
||||
Rho_Pi (a10, 28);
|
||||
Rho_Pi (a31, 36);
|
||||
Rho_Pi (a13, 45);
|
||||
Rho_Pi (a41, 55);
|
||||
Rho_Pi (a44, 2);
|
||||
Rho_Pi (a04, 14);
|
||||
Rho_Pi (a30, 27);
|
||||
Rho_Pi (a43, 41);
|
||||
Rho_Pi (a34, 56);
|
||||
Rho_Pi (a23, 8);
|
||||
Rho_Pi (a22, 25);
|
||||
Rho_Pi (a02, 43);
|
||||
|
||||
#undef Rho_Pi
|
||||
|
||||
bc0 = a00;
|
||||
bc2 = a02;
|
||||
bc3 = a03;
|
||||
bc4 = a04;
|
||||
|
||||
a02 ^= ~bc3 & bc4;
|
||||
a03 ^= ~bc4 & bc0;
|
||||
|
||||
const u32x r0 = l32_from_64 (a03);
|
||||
const u32x r1 = h32_from_64 (a03);
|
||||
const u32x r2 = l32_from_64 (a02);
|
||||
const u32x r3 = h32_from_64 (a02);
|
||||
|
||||
COMPARE_S_SIMD (r0, r1, r2, r3);
|
||||
}
|
||||
}
|
||||
|
||||
__kernel void m17900_s08 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
|
||||
{
|
||||
}
|
||||
|
||||
__kernel void m17900_s16 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
|
||||
{
|
||||
}
|
695
OpenCL/m17900_a3-optimized.cl
Normal file
695
OpenCL/m17900_a3-optimized.cl
Normal file
@ -0,0 +1,695 @@
|
||||
/**
|
||||
* Author......: See docs/credits.txt
|
||||
* License.....: MIT
|
||||
*/
|
||||
|
||||
#define NEW_SIMD_CODE
|
||||
|
||||
#include "inc_vendor.cl"
|
||||
#include "inc_hash_constants.h"
|
||||
#include "inc_hash_functions.cl"
|
||||
#include "inc_types.cl"
|
||||
#include "inc_common.cl"
|
||||
#include "inc_simd.cl"
|
||||
|
||||
__constant u64a keccakf_rndc[24] =
|
||||
{
|
||||
0x0000000000000001, 0x0000000000008082, 0x800000000000808a,
|
||||
0x8000000080008000, 0x000000000000808b, 0x0000000080000001,
|
||||
0x8000000080008081, 0x8000000000008009, 0x000000000000008a,
|
||||
0x0000000000000088, 0x0000000080008009, 0x000000008000000a,
|
||||
0x000000008000808b, 0x800000000000008b, 0x8000000000008089,
|
||||
0x8000000000008003, 0x8000000000008002, 0x8000000000000080,
|
||||
0x000000000000800a, 0x800000008000000a, 0x8000000080008081,
|
||||
0x8000000000008080, 0x0000000080000001, 0x8000000080008008
|
||||
};
|
||||
|
||||
#ifndef KECCAK_ROUNDS
|
||||
#define KECCAK_ROUNDS 24
|
||||
#endif
|
||||
|
||||
DECLSPEC void m17500m (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, __global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset)
|
||||
{
|
||||
/**
|
||||
* modifier
|
||||
*/
|
||||
|
||||
const u64 gid = get_global_id (0);
|
||||
const u64 lid = get_local_id (0);
|
||||
|
||||
/**
|
||||
* loop
|
||||
*/
|
||||
|
||||
u32 w0l = w0[0];
|
||||
|
||||
for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE)
|
||||
{
|
||||
const u32x w0r = ix_create_bft (bfs_buf, il_pos);
|
||||
|
||||
const u32x w0lr = w0l | w0r;
|
||||
|
||||
/**
|
||||
* Keccak
|
||||
*/
|
||||
|
||||
u64x a00 = hl32_to_64 (w0[1], w0lr);
|
||||
u64x a01 = hl32_to_64 (w0[3], w0[2]);
|
||||
u64x a02 = hl32_to_64 (w1[1], w1[0]);
|
||||
u64x a03 = hl32_to_64 (w1[3], w1[2]);
|
||||
u64x a04 = hl32_to_64 (w2[1], w2[0]);
|
||||
u64x a10 = hl32_to_64 (w2[3], w2[2]);
|
||||
u64x a11 = hl32_to_64 (w3[1], w3[0]);
|
||||
u64x a12 = hl32_to_64 (w3[3], w3[2]);
|
||||
u64x a13 = 0;
|
||||
u64x a14 = 0;
|
||||
u64x a20 = 0;
|
||||
u64x a21 = 0;
|
||||
u64x a22 = 0x8000000000000000;
|
||||
u64x a23 = 0;
|
||||
u64x a24 = 0;
|
||||
u64x a30 = 0;
|
||||
u64x a31 = 0;
|
||||
u64x a32 = 0;
|
||||
u64x a33 = 0;
|
||||
u64x a34 = 0;
|
||||
u64x a40 = 0;
|
||||
u64x a41 = 0;
|
||||
u64x a42 = 0;
|
||||
u64x a43 = 0;
|
||||
u64x a44 = 0;
|
||||
|
||||
#define Rho_Pi(ad,r) \
|
||||
bc0 = ad; \
|
||||
ad = rotl64 (t, r); \
|
||||
t = bc0; \
|
||||
|
||||
#ifdef _unroll
|
||||
#pragma unroll
|
||||
#endif
|
||||
for (int round = 0; round < KECCAK_ROUNDS - 1; round++)
|
||||
{
|
||||
// Theta
|
||||
|
||||
u64x bc0 = a00 ^ a10 ^ a20 ^ a30 ^ a40;
|
||||
u64x bc1 = a01 ^ a11 ^ a21 ^ a31 ^ a41;
|
||||
u64x bc2 = a02 ^ a12 ^ a22 ^ a32 ^ a42;
|
||||
u64x bc3 = a03 ^ a13 ^ a23 ^ a33 ^ a43;
|
||||
u64x bc4 = a04 ^ a14 ^ a24 ^ a34 ^ a44;
|
||||
|
||||
u64x t;
|
||||
|
||||
t = bc4 ^ rotl64 (bc1, 1); a00 ^= t; a10 ^= t; a20 ^= t; a30 ^= t; a40 ^= t;
|
||||
t = bc0 ^ rotl64 (bc2, 1); a01 ^= t; a11 ^= t; a21 ^= t; a31 ^= t; a41 ^= t;
|
||||
t = bc1 ^ rotl64 (bc3, 1); a02 ^= t; a12 ^= t; a22 ^= t; a32 ^= t; a42 ^= t;
|
||||
t = bc2 ^ rotl64 (bc4, 1); a03 ^= t; a13 ^= t; a23 ^= t; a33 ^= t; a43 ^= t;
|
||||
t = bc3 ^ rotl64 (bc0, 1); a04 ^= t; a14 ^= t; a24 ^= t; a34 ^= t; a44 ^= t;
|
||||
|
||||
// Rho Pi
|
||||
|
||||
t = a01;
|
||||
|
||||
Rho_Pi (a20, 1);
|
||||
Rho_Pi (a12, 3);
|
||||
Rho_Pi (a21, 6);
|
||||
Rho_Pi (a32, 10);
|
||||
Rho_Pi (a33, 15);
|
||||
Rho_Pi (a03, 21);
|
||||
Rho_Pi (a10, 28);
|
||||
Rho_Pi (a31, 36);
|
||||
Rho_Pi (a13, 45);
|
||||
Rho_Pi (a41, 55);
|
||||
Rho_Pi (a44, 2);
|
||||
Rho_Pi (a04, 14);
|
||||
Rho_Pi (a30, 27);
|
||||
Rho_Pi (a43, 41);
|
||||
Rho_Pi (a34, 56);
|
||||
Rho_Pi (a23, 8);
|
||||
Rho_Pi (a22, 25);
|
||||
Rho_Pi (a02, 43);
|
||||
Rho_Pi (a40, 62);
|
||||
Rho_Pi (a24, 18);
|
||||
Rho_Pi (a42, 39);
|
||||
Rho_Pi (a14, 61);
|
||||
Rho_Pi (a11, 20);
|
||||
Rho_Pi (a01, 44);
|
||||
|
||||
// Chi
|
||||
|
||||
bc0 = a00; bc1 = a01; bc2 = a02; bc3 = a03; bc4 = a04;
|
||||
a00 ^= ~bc1 & bc2; a01 ^= ~bc2 & bc3; a02 ^= ~bc3 & bc4; a03 ^= ~bc4 & bc0; a04 ^= ~bc0 & bc1;
|
||||
|
||||
bc0 = a10; bc1 = a11; bc2 = a12; bc3 = a13; bc4 = a14;
|
||||
a10 ^= ~bc1 & bc2; a11 ^= ~bc2 & bc3; a12 ^= ~bc3 & bc4; a13 ^= ~bc4 & bc0; a14 ^= ~bc0 & bc1;
|
||||
|
||||
bc0 = a20; bc1 = a21; bc2 = a22; bc3 = a23; bc4 = a24;
|
||||
a20 ^= ~bc1 & bc2; a21 ^= ~bc2 & bc3; a22 ^= ~bc3 & bc4; a23 ^= ~bc4 & bc0; a24 ^= ~bc0 & bc1;
|
||||
|
||||
bc0 = a30; bc1 = a31; bc2 = a32; bc3 = a33; bc4 = a34;
|
||||
a30 ^= ~bc1 & bc2; a31 ^= ~bc2 & bc3; a32 ^= ~bc3 & bc4; a33 ^= ~bc4 & bc0; a34 ^= ~bc0 & bc1;
|
||||
|
||||
bc0 = a40; bc1 = a41; bc2 = a42; bc3 = a43; bc4 = a44;
|
||||
a40 ^= ~bc1 & bc2; a41 ^= ~bc2 & bc3; a42 ^= ~bc3 & bc4; a43 ^= ~bc4 & bc0; a44 ^= ~bc0 & bc1;
|
||||
|
||||
// Iota
|
||||
|
||||
a00 ^= keccakf_rndc[round];
|
||||
}
|
||||
|
||||
// Theta
|
||||
|
||||
u64x bc0 = a00 ^ a10 ^ a20 ^ a30 ^ a40;
|
||||
u64x bc1 = a01 ^ a11 ^ a21 ^ a31 ^ a41;
|
||||
u64x bc2 = a02 ^ a12 ^ a22 ^ a32 ^ a42;
|
||||
u64x bc3 = a03 ^ a13 ^ a23 ^ a33 ^ a43;
|
||||
u64x bc4 = a04 ^ a14 ^ a24 ^ a34 ^ a44;
|
||||
|
||||
u64x t;
|
||||
|
||||
t = bc4 ^ rotl64 (bc1, 1); a00 ^= t; a10 ^= t; a20 ^= t; a30 ^= t;
|
||||
t = bc0 ^ rotl64 (bc2, 1); a21 ^= t; a31 ^= t; a41 ^= t;
|
||||
t = bc1 ^ rotl64 (bc3, 1); a02 ^= t; a12 ^= t; a22 ^= t; a32 ^= t;
|
||||
t = bc2 ^ rotl64 (bc4, 1); a03 ^= t; a13 ^= t; a23 ^= t; a33 ^= t; a43 ^= t;
|
||||
t = bc3 ^ rotl64 (bc0, 1); a04 ^= t; a34 ^= t; a44 ^= t;
|
||||
|
||||
// Rho Pi
|
||||
|
||||
t = a01;
|
||||
|
||||
Rho_Pi (a20, 1);
|
||||
Rho_Pi (a12, 3);
|
||||
Rho_Pi (a21, 6);
|
||||
Rho_Pi (a32, 10);
|
||||
Rho_Pi (a33, 15);
|
||||
Rho_Pi (a03, 21);
|
||||
Rho_Pi (a10, 28);
|
||||
Rho_Pi (a31, 36);
|
||||
Rho_Pi (a13, 45);
|
||||
Rho_Pi (a41, 55);
|
||||
Rho_Pi (a44, 2);
|
||||
Rho_Pi (a04, 14);
|
||||
Rho_Pi (a30, 27);
|
||||
Rho_Pi (a43, 41);
|
||||
Rho_Pi (a34, 56);
|
||||
Rho_Pi (a23, 8);
|
||||
Rho_Pi (a22, 25);
|
||||
Rho_Pi (a02, 43);
|
||||
|
||||
#undef Rho_Pi
|
||||
|
||||
bc0 = a00;
|
||||
bc2 = a02;
|
||||
bc3 = a03;
|
||||
bc4 = a04;
|
||||
|
||||
a02 ^= ~bc3 & bc4;
|
||||
a03 ^= ~bc4 & bc0;
|
||||
|
||||
const u32x r0 = l32_from_64 (a03);
|
||||
const u32x r1 = h32_from_64 (a03);
|
||||
const u32x r2 = l32_from_64 (a02);
|
||||
const u32x r3 = h32_from_64 (a02);
|
||||
|
||||
COMPARE_M_SIMD (r0, r1, r2, r3);
|
||||
}
|
||||
}
|
||||
|
||||
DECLSPEC void m17500s (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, __global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset)
|
||||
{
|
||||
/**
|
||||
* modifier
|
||||
*/
|
||||
|
||||
const u64 gid = get_global_id (0);
|
||||
const u64 lid = get_local_id (0);
|
||||
|
||||
/**
|
||||
* digest
|
||||
*/
|
||||
|
||||
const u32 search[4] =
|
||||
{
|
||||
digests_buf[digests_offset].digest_buf[DGST_R0],
|
||||
digests_buf[digests_offset].digest_buf[DGST_R1],
|
||||
digests_buf[digests_offset].digest_buf[DGST_R2],
|
||||
digests_buf[digests_offset].digest_buf[DGST_R3]
|
||||
};
|
||||
|
||||
/**
|
||||
* loop
|
||||
*/
|
||||
|
||||
u32 w0l = w0[0];
|
||||
|
||||
for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE)
|
||||
{
|
||||
const u32x w0r = ix_create_bft (bfs_buf, il_pos);
|
||||
|
||||
const u32x w0lr = w0l | w0r;
|
||||
|
||||
/**
|
||||
* Keccak
|
||||
*/
|
||||
|
||||
u64x a00 = hl32_to_64 (w0[1], w0lr);
|
||||
u64x a01 = hl32_to_64 (w0[3], w0[2]);
|
||||
u64x a02 = hl32_to_64 (w1[1], w1[0]);
|
||||
u64x a03 = hl32_to_64 (w1[3], w1[2]);
|
||||
u64x a04 = hl32_to_64 (w2[1], w2[0]);
|
||||
u64x a10 = hl32_to_64 (w2[3], w2[2]);
|
||||
u64x a11 = hl32_to_64 (w3[1], w3[0]);
|
||||
u64x a12 = hl32_to_64 (w3[3], w3[2]);
|
||||
u64x a13 = 0;
|
||||
u64x a14 = 0;
|
||||
u64x a20 = 0;
|
||||
u64x a21 = 0;
|
||||
u64x a22 = 0x8000000000000000;
|
||||
u64x a23 = 0;
|
||||
u64x a24 = 0;
|
||||
u64x a30 = 0;
|
||||
u64x a31 = 0;
|
||||
u64x a32 = 0;
|
||||
u64x a33 = 0;
|
||||
u64x a34 = 0;
|
||||
u64x a40 = 0;
|
||||
u64x a41 = 0;
|
||||
u64x a42 = 0;
|
||||
u64x a43 = 0;
|
||||
u64x a44 = 0;
|
||||
|
||||
#define Rho_Pi(ad,r) \
|
||||
bc0 = ad; \
|
||||
ad = rotl64 (t, r); \
|
||||
t = bc0; \
|
||||
|
||||
#ifdef _unroll
|
||||
#pragma unroll
|
||||
#endif
|
||||
for (int round = 0; round < KECCAK_ROUNDS - 1; round++)
|
||||
{
|
||||
// Theta
|
||||
|
||||
u64x bc0 = a00 ^ a10 ^ a20 ^ a30 ^ a40;
|
||||
u64x bc1 = a01 ^ a11 ^ a21 ^ a31 ^ a41;
|
||||
u64x bc2 = a02 ^ a12 ^ a22 ^ a32 ^ a42;
|
||||
u64x bc3 = a03 ^ a13 ^ a23 ^ a33 ^ a43;
|
||||
u64x bc4 = a04 ^ a14 ^ a24 ^ a34 ^ a44;
|
||||
|
||||
u64x t;
|
||||
|
||||
t = bc4 ^ rotl64 (bc1, 1); a00 ^= t; a10 ^= t; a20 ^= t; a30 ^= t; a40 ^= t;
|
||||
t = bc0 ^ rotl64 (bc2, 1); a01 ^= t; a11 ^= t; a21 ^= t; a31 ^= t; a41 ^= t;
|
||||
t = bc1 ^ rotl64 (bc3, 1); a02 ^= t; a12 ^= t; a22 ^= t; a32 ^= t; a42 ^= t;
|
||||
t = bc2 ^ rotl64 (bc4, 1); a03 ^= t; a13 ^= t; a23 ^= t; a33 ^= t; a43 ^= t;
|
||||
t = bc3 ^ rotl64 (bc0, 1); a04 ^= t; a14 ^= t; a24 ^= t; a34 ^= t; a44 ^= t;
|
||||
|
||||
// Rho Pi
|
||||
|
||||
t = a01;
|
||||
|
||||
Rho_Pi (a20, 1);
|
||||
Rho_Pi (a12, 3);
|
||||
Rho_Pi (a21, 6);
|
||||
Rho_Pi (a32, 10);
|
||||
Rho_Pi (a33, 15);
|
||||
Rho_Pi (a03, 21);
|
||||
Rho_Pi (a10, 28);
|
||||
Rho_Pi (a31, 36);
|
||||
Rho_Pi (a13, 45);
|
||||
Rho_Pi (a41, 55);
|
||||
Rho_Pi (a44, 2);
|
||||
Rho_Pi (a04, 14);
|
||||
Rho_Pi (a30, 27);
|
||||
Rho_Pi (a43, 41);
|
||||
Rho_Pi (a34, 56);
|
||||
Rho_Pi (a23, 8);
|
||||
Rho_Pi (a22, 25);
|
||||
Rho_Pi (a02, 43);
|
||||
Rho_Pi (a40, 62);
|
||||
Rho_Pi (a24, 18);
|
||||
Rho_Pi (a42, 39);
|
||||
Rho_Pi (a14, 61);
|
||||
Rho_Pi (a11, 20);
|
||||
Rho_Pi (a01, 44);
|
||||
|
||||
// Chi
|
||||
|
||||
bc0 = a00; bc1 = a01; bc2 = a02; bc3 = a03; bc4 = a04;
|
||||
a00 ^= ~bc1 & bc2; a01 ^= ~bc2 & bc3; a02 ^= ~bc3 & bc4; a03 ^= ~bc4 & bc0; a04 ^= ~bc0 & bc1;
|
||||
|
||||
bc0 = a10; bc1 = a11; bc2 = a12; bc3 = a13; bc4 = a14;
|
||||
a10 ^= ~bc1 & bc2; a11 ^= ~bc2 & bc3; a12 ^= ~bc3 & bc4; a13 ^= ~bc4 & bc0; a14 ^= ~bc0 & bc1;
|
||||
|
||||
bc0 = a20; bc1 = a21; bc2 = a22; bc3 = a23; bc4 = a24;
|
||||
a20 ^= ~bc1 & bc2; a21 ^= ~bc2 & bc3; a22 ^= ~bc3 & bc4; a23 ^= ~bc4 & bc0; a24 ^= ~bc0 & bc1;
|
||||
|
||||
bc0 = a30; bc1 = a31; bc2 = a32; bc3 = a33; bc4 = a34;
|
||||
a30 ^= ~bc1 & bc2; a31 ^= ~bc2 & bc3; a32 ^= ~bc3 & bc4; a33 ^= ~bc4 & bc0; a34 ^= ~bc0 & bc1;
|
||||
|
||||
bc0 = a40; bc1 = a41; bc2 = a42; bc3 = a43; bc4 = a44;
|
||||
a40 ^= ~bc1 & bc2; a41 ^= ~bc2 & bc3; a42 ^= ~bc3 & bc4; a43 ^= ~bc4 & bc0; a44 ^= ~bc0 & bc1;
|
||||
|
||||
// Iota
|
||||
|
||||
a00 ^= keccakf_rndc[round];
|
||||
}
|
||||
|
||||
// Theta
|
||||
|
||||
u64x bc0 = a00 ^ a10 ^ a20 ^ a30 ^ a40;
|
||||
u64x bc1 = a01 ^ a11 ^ a21 ^ a31 ^ a41;
|
||||
u64x bc2 = a02 ^ a12 ^ a22 ^ a32 ^ a42;
|
||||
u64x bc3 = a03 ^ a13 ^ a23 ^ a33 ^ a43;
|
||||
u64x bc4 = a04 ^ a14 ^ a24 ^ a34 ^ a44;
|
||||
|
||||
u64x t;
|
||||
|
||||
t = bc4 ^ rotl64 (bc1, 1); a00 ^= t; a10 ^= t; a20 ^= t; a30 ^= t;
|
||||
t = bc0 ^ rotl64 (bc2, 1); a21 ^= t; a31 ^= t; a41 ^= t;
|
||||
t = bc1 ^ rotl64 (bc3, 1); a02 ^= t; a12 ^= t; a22 ^= t; a32 ^= t;
|
||||
t = bc2 ^ rotl64 (bc4, 1); a03 ^= t; a13 ^= t; a23 ^= t; a33 ^= t; a43 ^= t;
|
||||
t = bc3 ^ rotl64 (bc0, 1); a04 ^= t; a34 ^= t; a44 ^= t;
|
||||
|
||||
// Rho Pi
|
||||
|
||||
t = a01;
|
||||
|
||||
Rho_Pi (a20, 1);
|
||||
Rho_Pi (a12, 3);
|
||||
Rho_Pi (a21, 6);
|
||||
Rho_Pi (a32, 10);
|
||||
Rho_Pi (a33, 15);
|
||||
Rho_Pi (a03, 21);
|
||||
Rho_Pi (a10, 28);
|
||||
Rho_Pi (a31, 36);
|
||||
Rho_Pi (a13, 45);
|
||||
Rho_Pi (a41, 55);
|
||||
Rho_Pi (a44, 2);
|
||||
Rho_Pi (a04, 14);
|
||||
Rho_Pi (a30, 27);
|
||||
Rho_Pi (a43, 41);
|
||||
Rho_Pi (a34, 56);
|
||||
Rho_Pi (a23, 8);
|
||||
Rho_Pi (a22, 25);
|
||||
Rho_Pi (a02, 43);
|
||||
|
||||
#undef Rho_Pi
|
||||
|
||||
bc0 = a00;
|
||||
bc2 = a02;
|
||||
bc3 = a03;
|
||||
bc4 = a04;
|
||||
|
||||
a02 ^= ~bc3 & bc4;
|
||||
a03 ^= ~bc4 & bc0;
|
||||
|
||||
const u32x r0 = l32_from_64 (a03);
|
||||
const u32x r1 = h32_from_64 (a03);
|
||||
const u32x r2 = l32_from_64 (a02);
|
||||
const u32x r3 = h32_from_64 (a02);
|
||||
|
||||
COMPARE_S_SIMD (r0, r1, r2, r3);
|
||||
}
|
||||
}
|
||||
|
||||
__kernel void m17900_m04 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
|
||||
{
|
||||
/**
|
||||
* base
|
||||
*/
|
||||
|
||||
const u64 gid = get_global_id (0);
|
||||
|
||||
if (gid >= gid_max) return;
|
||||
|
||||
u32 w0[4];
|
||||
|
||||
w0[0] = pws[gid].i[ 0];
|
||||
w0[1] = pws[gid].i[ 1];
|
||||
w0[2] = pws[gid].i[ 2];
|
||||
w0[3] = pws[gid].i[ 3];
|
||||
|
||||
u32 w1[4];
|
||||
|
||||
w1[0] = 0;
|
||||
w1[1] = 0;
|
||||
w1[2] = 0;
|
||||
w1[3] = 0;
|
||||
|
||||
u32 w2[4];
|
||||
|
||||
w2[0] = 0;
|
||||
w2[1] = 0;
|
||||
w2[2] = 0;
|
||||
w2[3] = 0;
|
||||
|
||||
u32 w3[4];
|
||||
|
||||
w3[0] = 0;
|
||||
w3[1] = 0;
|
||||
w3[2] = 0;
|
||||
w3[3] = 0;
|
||||
|
||||
const u32 pw_len = pws[gid].pw_len;
|
||||
|
||||
/**
|
||||
* main
|
||||
*/
|
||||
|
||||
m17500m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV0_buf, d_scryptV1_buf, d_scryptV2_buf, d_scryptV3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset);
|
||||
}
|
||||
|
||||
__kernel void m17900_m08 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
|
||||
{
|
||||
/**
|
||||
* base
|
||||
*/
|
||||
|
||||
const u64 gid = get_global_id (0);
|
||||
|
||||
if (gid >= gid_max) return;
|
||||
|
||||
u32 w0[4];
|
||||
|
||||
w0[0] = pws[gid].i[ 0];
|
||||
w0[1] = pws[gid].i[ 1];
|
||||
w0[2] = pws[gid].i[ 2];
|
||||
w0[3] = pws[gid].i[ 3];
|
||||
|
||||
u32 w1[4];
|
||||
|
||||
w1[0] = pws[gid].i[ 4];
|
||||
w1[1] = pws[gid].i[ 5];
|
||||
w1[2] = pws[gid].i[ 6];
|
||||
w1[3] = pws[gid].i[ 7];
|
||||
|
||||
u32 w2[4];
|
||||
|
||||
w2[0] = 0;
|
||||
w2[1] = 0;
|
||||
w2[2] = 0;
|
||||
w2[3] = 0;
|
||||
|
||||
u32 w3[4];
|
||||
|
||||
w3[0] = 0;
|
||||
w3[1] = 0;
|
||||
w3[2] = 0;
|
||||
w3[3] = 0;
|
||||
|
||||
const u32 pw_len = pws[gid].pw_len;
|
||||
|
||||
/**
|
||||
* main
|
||||
*/
|
||||
|
||||
m17500m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV0_buf, d_scryptV1_buf, d_scryptV2_buf, d_scryptV3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset);
|
||||
}
|
||||
|
||||
__kernel void m17900_m16 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
|
||||
{
|
||||
/**
|
||||
* base
|
||||
*/
|
||||
|
||||
const u64 gid = get_global_id (0);
|
||||
|
||||
if (gid >= gid_max) return;
|
||||
|
||||
u32 w0[4];
|
||||
|
||||
w0[0] = pws[gid].i[ 0];
|
||||
w0[1] = pws[gid].i[ 1];
|
||||
w0[2] = pws[gid].i[ 2];
|
||||
w0[3] = pws[gid].i[ 3];
|
||||
|
||||
u32 w1[4];
|
||||
|
||||
w1[0] = pws[gid].i[ 4];
|
||||
w1[1] = pws[gid].i[ 5];
|
||||
w1[2] = pws[gid].i[ 6];
|
||||
w1[3] = pws[gid].i[ 7];
|
||||
|
||||
u32 w2[4];
|
||||
|
||||
w2[0] = pws[gid].i[ 8];
|
||||
w2[1] = pws[gid].i[ 9];
|
||||
w2[2] = pws[gid].i[10];
|
||||
w2[3] = pws[gid].i[11];
|
||||
|
||||
u32 w3[4];
|
||||
|
||||
w3[0] = pws[gid].i[12];
|
||||
w3[1] = pws[gid].i[13];
|
||||
w3[2] = 0;
|
||||
w3[3] = 0;
|
||||
|
||||
const u32 pw_len = pws[gid].pw_len;
|
||||
|
||||
/**
|
||||
* main
|
||||
*/
|
||||
|
||||
m17500m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV0_buf, d_scryptV1_buf, d_scryptV2_buf, d_scryptV3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset);
|
||||
}
|
||||
|
||||
__kernel void m17900_s04 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
|
||||
{
|
||||
/**
|
||||
* base
|
||||
*/
|
||||
|
||||
const u64 gid = get_global_id (0);
|
||||
|
||||
if (gid >= gid_max) return;
|
||||
|
||||
u32 w0[4];
|
||||
|
||||
w0[0] = pws[gid].i[ 0];
|
||||
w0[1] = pws[gid].i[ 1];
|
||||
w0[2] = pws[gid].i[ 2];
|
||||
w0[3] = pws[gid].i[ 3];
|
||||
|
||||
u32 w1[4];
|
||||
|
||||
w1[0] = 0;
|
||||
w1[1] = 0;
|
||||
w1[2] = 0;
|
||||
w1[3] = 0;
|
||||
|
||||
u32 w2[4];
|
||||
|
||||
w2[0] = 0;
|
||||
w2[1] = 0;
|
||||
w2[2] = 0;
|
||||
w2[3] = 0;
|
||||
|
||||
u32 w3[4];
|
||||
|
||||
w3[0] = 0;
|
||||
w3[1] = 0;
|
||||
w3[2] = 0;
|
||||
w3[3] = 0;
|
||||
|
||||
const u32 pw_len = pws[gid].pw_len;
|
||||
|
||||
/**
|
||||
* main
|
||||
*/
|
||||
|
||||
m17500s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV0_buf, d_scryptV1_buf, d_scryptV2_buf, d_scryptV3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset);
|
||||
}
|
||||
|
||||
__kernel void m17900_s08 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
|
||||
{
|
||||
/**
|
||||
* base
|
||||
*/
|
||||
|
||||
const u64 gid = get_global_id (0);
|
||||
|
||||
if (gid >= gid_max) return;
|
||||
|
||||
u32 w0[4];
|
||||
|
||||
w0[0] = pws[gid].i[ 0];
|
||||
w0[1] = pws[gid].i[ 1];
|
||||
w0[2] = pws[gid].i[ 2];
|
||||
w0[3] = pws[gid].i[ 3];
|
||||
|
||||
u32 w1[4];
|
||||
|
||||
w1[0] = pws[gid].i[ 4];
|
||||
w1[1] = pws[gid].i[ 5];
|
||||
w1[2] = pws[gid].i[ 6];
|
||||
w1[3] = pws[gid].i[ 7];
|
||||
|
||||
u32 w2[4];
|
||||
|
||||
w2[0] = 0;
|
||||
w2[1] = 0;
|
||||
w2[2] = 0;
|
||||
w2[3] = 0;
|
||||
|
||||
u32 w3[4];
|
||||
|
||||
w3[0] = 0;
|
||||
w3[1] = 0;
|
||||
w3[2] = 0;
|
||||
w3[3] = 0;
|
||||
|
||||
const u32 pw_len = pws[gid].pw_len;
|
||||
|
||||
/**
|
||||
* main
|
||||
*/
|
||||
|
||||
m17500s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV0_buf, d_scryptV1_buf, d_scryptV2_buf, d_scryptV3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset);
|
||||
}
|
||||
|
||||
__kernel void m17900_s16 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
|
||||
{
|
||||
/**
|
||||
* base
|
||||
*/
|
||||
|
||||
const u64 gid = get_global_id (0);
|
||||
|
||||
if (gid >= gid_max) return;
|
||||
|
||||
u32 w0[4];
|
||||
|
||||
w0[0] = pws[gid].i[ 0];
|
||||
w0[1] = pws[gid].i[ 1];
|
||||
w0[2] = pws[gid].i[ 2];
|
||||
w0[3] = pws[gid].i[ 3];
|
||||
|
||||
u32 w1[4];
|
||||
|
||||
w1[0] = pws[gid].i[ 4];
|
||||
w1[1] = pws[gid].i[ 5];
|
||||
w1[2] = pws[gid].i[ 6];
|
||||
w1[3] = pws[gid].i[ 7];
|
||||
|
||||
u32 w2[4];
|
||||
|
||||
w2[0] = pws[gid].i[ 8];
|
||||
w2[1] = pws[gid].i[ 9];
|
||||
w2[2] = pws[gid].i[10];
|
||||
w2[3] = pws[gid].i[11];
|
||||
|
||||
u32 w3[4];
|
||||
|
||||
w3[0] = pws[gid].i[12];
|
||||
w3[1] = pws[gid].i[13];
|
||||
w3[2] = 0;
|
||||
w3[3] = 0;
|
||||
|
||||
const u32 pw_len = pws[gid].pw_len;
|
||||
|
||||
/**
|
||||
* main
|
||||
*/
|
||||
|
||||
m17500s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV0_buf, d_scryptV1_buf, d_scryptV2_buf, d_scryptV3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset);
|
||||
}
|
479
OpenCL/m18000_a0-optimized.cl
Normal file
479
OpenCL/m18000_a0-optimized.cl
Normal file
@ -0,0 +1,479 @@
|
||||
/**
|
||||
* Author......: See docs/credits.txt
|
||||
* License.....: MIT
|
||||
*/
|
||||
|
||||
#define NEW_SIMD_CODE
|
||||
|
||||
#include "inc_vendor.cl"
|
||||
#include "inc_hash_constants.h"
|
||||
#include "inc_hash_functions.cl"
|
||||
#include "inc_types.cl"
|
||||
#include "inc_common.cl"
|
||||
#include "inc_rp_optimized.h"
|
||||
#include "inc_rp_optimized.cl"
|
||||
#include "inc_simd.cl"
|
||||
|
||||
__constant u64a keccakf_rndc[24] =
|
||||
{
|
||||
0x0000000000000001, 0x0000000000008082, 0x800000000000808a,
|
||||
0x8000000080008000, 0x000000000000808b, 0x0000000080000001,
|
||||
0x8000000080008081, 0x8000000000008009, 0x000000000000008a,
|
||||
0x0000000000000088, 0x0000000080008009, 0x000000008000000a,
|
||||
0x000000008000808b, 0x800000000000008b, 0x8000000000008089,
|
||||
0x8000000000008003, 0x8000000000008002, 0x8000000000000080,
|
||||
0x000000000000800a, 0x800000008000000a, 0x8000000080008081,
|
||||
0x8000000000008080, 0x0000000080000001, 0x8000000080008008
|
||||
};
|
||||
|
||||
#ifndef KECCAK_ROUNDS
|
||||
#define KECCAK_ROUNDS 24
|
||||
#endif
|
||||
|
||||
__kernel void m18000_m04 (__global pw_t *pws, __constant const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
|
||||
{
|
||||
/**
|
||||
* modifier
|
||||
*/
|
||||
|
||||
const u64 lid = get_local_id (0);
|
||||
|
||||
/**
|
||||
* base
|
||||
*/
|
||||
|
||||
const u64 gid = get_global_id (0);
|
||||
|
||||
if (gid >= gid_max) return;
|
||||
|
||||
u32 pw_buf0[4];
|
||||
u32 pw_buf1[4];
|
||||
|
||||
pw_buf0[0] = pws[gid].i[0];
|
||||
pw_buf0[1] = pws[gid].i[1];
|
||||
pw_buf0[2] = pws[gid].i[2];
|
||||
pw_buf0[3] = pws[gid].i[3];
|
||||
pw_buf1[0] = pws[gid].i[4];
|
||||
pw_buf1[1] = pws[gid].i[5];
|
||||
pw_buf1[2] = pws[gid].i[6];
|
||||
pw_buf1[3] = pws[gid].i[7];
|
||||
|
||||
const u32 pw_len = pws[gid].pw_len;
|
||||
|
||||
/**
|
||||
* loop
|
||||
*/
|
||||
|
||||
for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE)
|
||||
{
|
||||
u32x w0[4] = { 0 };
|
||||
u32x w1[4] = { 0 };
|
||||
u32x w2[4] = { 0 };
|
||||
u32x w3[4] = { 0 };
|
||||
|
||||
const u32x out_len = apply_rules_vect (pw_buf0, pw_buf1, pw_len, rules_buf, il_pos, w0, w1);
|
||||
|
||||
append_0x01_2x4_VV (w0, w1, out_len);
|
||||
|
||||
/**
|
||||
* Keccak
|
||||
*/
|
||||
|
||||
u64x a00 = hl32_to_64 (w0[1], w0[0]);
|
||||
u64x a01 = hl32_to_64 (w0[3], w0[2]);
|
||||
u64x a02 = hl32_to_64 (w1[1], w1[0]);
|
||||
u64x a03 = hl32_to_64 (w1[3], w1[2]);
|
||||
u64x a04 = 0;
|
||||
u64x a10 = 0;
|
||||
u64x a11 = 0;
|
||||
u64x a12 = 0;
|
||||
u64x a13 = 0x8000000000000000;
|
||||
u64x a14 = 0;
|
||||
u64x a20 = 0;
|
||||
u64x a21 = 0;
|
||||
u64x a22 = 0;
|
||||
u64x a23 = 0;
|
||||
u64x a24 = 0;
|
||||
u64x a30 = 0;
|
||||
u64x a31 = 0;
|
||||
u64x a32 = 0;
|
||||
u64x a33 = 0;
|
||||
u64x a34 = 0;
|
||||
u64x a40 = 0;
|
||||
u64x a41 = 0;
|
||||
u64x a42 = 0;
|
||||
u64x a43 = 0;
|
||||
u64x a44 = 0;
|
||||
|
||||
#define Rho_Pi(ad,r) \
|
||||
bc0 = ad; \
|
||||
ad = rotl64 (t, r); \
|
||||
t = bc0; \
|
||||
|
||||
#ifdef _unroll
|
||||
#pragma unroll
|
||||
#endif
|
||||
for (int round = 0; round < KECCAK_ROUNDS - 1; round++)
|
||||
{
|
||||
// Theta
|
||||
|
||||
u64x bc0 = a00 ^ a10 ^ a20 ^ a30 ^ a40;
|
||||
u64x bc1 = a01 ^ a11 ^ a21 ^ a31 ^ a41;
|
||||
u64x bc2 = a02 ^ a12 ^ a22 ^ a32 ^ a42;
|
||||
u64x bc3 = a03 ^ a13 ^ a23 ^ a33 ^ a43;
|
||||
u64x bc4 = a04 ^ a14 ^ a24 ^ a34 ^ a44;
|
||||
|
||||
u64x t;
|
||||
|
||||
t = bc4 ^ rotl64 (bc1, 1); a00 ^= t; a10 ^= t; a20 ^= t; a30 ^= t; a40 ^= t;
|
||||
t = bc0 ^ rotl64 (bc2, 1); a01 ^= t; a11 ^= t; a21 ^= t; a31 ^= t; a41 ^= t;
|
||||
t = bc1 ^ rotl64 (bc3, 1); a02 ^= t; a12 ^= t; a22 ^= t; a32 ^= t; a42 ^= t;
|
||||
t = bc2 ^ rotl64 (bc4, 1); a03 ^= t; a13 ^= t; a23 ^= t; a33 ^= t; a43 ^= t;
|
||||
t = bc3 ^ rotl64 (bc0, 1); a04 ^= t; a14 ^= t; a24 ^= t; a34 ^= t; a44 ^= t;
|
||||
|
||||
// Rho Pi
|
||||
|
||||
t = a01;
|
||||
|
||||
Rho_Pi (a20, 1);
|
||||
Rho_Pi (a12, 3);
|
||||
Rho_Pi (a21, 6);
|
||||
Rho_Pi (a32, 10);
|
||||
Rho_Pi (a33, 15);
|
||||
Rho_Pi (a03, 21);
|
||||
Rho_Pi (a10, 28);
|
||||
Rho_Pi (a31, 36);
|
||||
Rho_Pi (a13, 45);
|
||||
Rho_Pi (a41, 55);
|
||||
Rho_Pi (a44, 2);
|
||||
Rho_Pi (a04, 14);
|
||||
Rho_Pi (a30, 27);
|
||||
Rho_Pi (a43, 41);
|
||||
Rho_Pi (a34, 56);
|
||||
Rho_Pi (a23, 8);
|
||||
Rho_Pi (a22, 25);
|
||||
Rho_Pi (a02, 43);
|
||||
Rho_Pi (a40, 62);
|
||||
Rho_Pi (a24, 18);
|
||||
Rho_Pi (a42, 39);
|
||||
Rho_Pi (a14, 61);
|
||||
Rho_Pi (a11, 20);
|
||||
Rho_Pi (a01, 44);
|
||||
|
||||
// Chi
|
||||
|
||||
bc0 = a00; bc1 = a01; bc2 = a02; bc3 = a03; bc4 = a04;
|
||||
a00 ^= ~bc1 & bc2; a01 ^= ~bc2 & bc3; a02 ^= ~bc3 & bc4; a03 ^= ~bc4 & bc0; a04 ^= ~bc0 & bc1;
|
||||
|
||||
bc0 = a10; bc1 = a11; bc2 = a12; bc3 = a13; bc4 = a14;
|
||||
a10 ^= ~bc1 & bc2; a11 ^= ~bc2 & bc3; a12 ^= ~bc3 & bc4; a13 ^= ~bc4 & bc0; a14 ^= ~bc0 & bc1;
|
||||
|
||||
bc0 = a20; bc1 = a21; bc2 = a22; bc3 = a23; bc4 = a24;
|
||||
a20 ^= ~bc1 & bc2; a21 ^= ~bc2 & bc3; a22 ^= ~bc3 & bc4; a23 ^= ~bc4 & bc0; a24 ^= ~bc0 & bc1;
|
||||
|
||||
bc0 = a30; bc1 = a31; bc2 = a32; bc3 = a33; bc4 = a34;
|
||||
a30 ^= ~bc1 & bc2; a31 ^= ~bc2 & bc3; a32 ^= ~bc3 & bc4; a33 ^= ~bc4 & bc0; a34 ^= ~bc0 & bc1;
|
||||
|
||||
bc0 = a40; bc1 = a41; bc2 = a42; bc3 = a43; bc4 = a44;
|
||||
a40 ^= ~bc1 & bc2; a41 ^= ~bc2 & bc3; a42 ^= ~bc3 & bc4; a43 ^= ~bc4 & bc0; a44 ^= ~bc0 & bc1;
|
||||
|
||||
// Iota
|
||||
|
||||
a00 ^= keccakf_rndc[round];
|
||||
}
|
||||
|
||||
// Theta
|
||||
|
||||
u64x bc0 = a00 ^ a10 ^ a20 ^ a30 ^ a40;
|
||||
u64x bc1 = a01 ^ a11 ^ a21 ^ a31 ^ a41;
|
||||
u64x bc2 = a02 ^ a12 ^ a22 ^ a32 ^ a42;
|
||||
u64x bc3 = a03 ^ a13 ^ a23 ^ a33 ^ a43;
|
||||
u64x bc4 = a04 ^ a14 ^ a24 ^ a34 ^ a44;
|
||||
|
||||
u64x t;
|
||||
|
||||
t = bc4 ^ rotl64 (bc1, 1); a00 ^= t; a10 ^= t; a20 ^= t; a30 ^= t;
|
||||
t = bc0 ^ rotl64 (bc2, 1); a21 ^= t; a31 ^= t; a41 ^= t;
|
||||
t = bc1 ^ rotl64 (bc3, 1); a02 ^= t; a12 ^= t; a22 ^= t; a32 ^= t;
|
||||
t = bc2 ^ rotl64 (bc4, 1); a03 ^= t; a13 ^= t; a23 ^= t; a33 ^= t; a43 ^= t;
|
||||
t = bc3 ^ rotl64 (bc0, 1); a04 ^= t; a34 ^= t; a44 ^= t;
|
||||
|
||||
// Rho Pi
|
||||
|
||||
t = a01;
|
||||
|
||||
Rho_Pi (a20, 1);
|
||||
Rho_Pi (a12, 3);
|
||||
Rho_Pi (a21, 6);
|
||||
Rho_Pi (a32, 10);
|
||||
Rho_Pi (a33, 15);
|
||||
Rho_Pi (a03, 21);
|
||||
Rho_Pi (a10, 28);
|
||||
Rho_Pi (a31, 36);
|
||||
Rho_Pi (a13, 45);
|
||||
Rho_Pi (a41, 55);
|
||||
Rho_Pi (a44, 2);
|
||||
Rho_Pi (a04, 14);
|
||||
Rho_Pi (a30, 27);
|
||||
Rho_Pi (a43, 41);
|
||||
Rho_Pi (a34, 56);
|
||||
Rho_Pi (a23, 8);
|
||||
Rho_Pi (a22, 25);
|
||||
Rho_Pi (a02, 43);
|
||||
|
||||
#undef Rho_Pi
|
||||
|
||||
bc0 = a00;
|
||||
bc2 = a02;
|
||||
bc3 = a03;
|
||||
bc4 = a04;
|
||||
|
||||
a02 ^= ~bc3 & bc4;
|
||||
a03 ^= ~bc4 & bc0;
|
||||
|
||||
const u32x r0 = l32_from_64 (a03);
|
||||
const u32x r1 = h32_from_64 (a03);
|
||||
const u32x r2 = l32_from_64 (a02);
|
||||
const u32x r3 = h32_from_64 (a02);
|
||||
|
||||
COMPARE_M_SIMD (r0, r1, r2, r3);
|
||||
}
|
||||
}
|
||||
|
||||
__kernel void m18000_m08 (__global pw_t *pws, __constant const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
|
||||
{
|
||||
}
|
||||
|
||||
__kernel void m18000_m16 (__global pw_t *pws, __constant const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
|
||||
{
|
||||
}
|
||||
|
||||
__kernel void m18000_s04 (__global pw_t *pws, __constant const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
|
||||
{
|
||||
/**
|
||||
* modifier
|
||||
*/
|
||||
|
||||
const u64 lid = get_local_id (0);
|
||||
|
||||
/**
|
||||
* base
|
||||
*/
|
||||
|
||||
const u64 gid = get_global_id (0);
|
||||
|
||||
if (gid >= gid_max) return;
|
||||
|
||||
u32 pw_buf0[4];
|
||||
u32 pw_buf1[4];
|
||||
|
||||
pw_buf0[0] = pws[gid].i[0];
|
||||
pw_buf0[1] = pws[gid].i[1];
|
||||
pw_buf0[2] = pws[gid].i[2];
|
||||
pw_buf0[3] = pws[gid].i[3];
|
||||
pw_buf1[0] = pws[gid].i[4];
|
||||
pw_buf1[1] = pws[gid].i[5];
|
||||
pw_buf1[2] = pws[gid].i[6];
|
||||
pw_buf1[3] = pws[gid].i[7];
|
||||
|
||||
const u32 pw_len = pws[gid].pw_len;
|
||||
|
||||
/**
|
||||
* digest
|
||||
*/
|
||||
|
||||
const u32 search[4] =
|
||||
{
|
||||
digests_buf[digests_offset].digest_buf[DGST_R0],
|
||||
digests_buf[digests_offset].digest_buf[DGST_R1],
|
||||
digests_buf[digests_offset].digest_buf[DGST_R2],
|
||||
digests_buf[digests_offset].digest_buf[DGST_R3]
|
||||
};
|
||||
|
||||
/**
|
||||
* loop
|
||||
*/
|
||||
|
||||
for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE)
|
||||
{
|
||||
u32x w0[4] = { 0 };
|
||||
u32x w1[4] = { 0 };
|
||||
u32x w2[4] = { 0 };
|
||||
u32x w3[4] = { 0 };
|
||||
|
||||
const u32x out_len = apply_rules_vect (pw_buf0, pw_buf1, pw_len, rules_buf, il_pos, w0, w1);
|
||||
|
||||
append_0x01_2x4_VV (w0, w1, out_len);
|
||||
|
||||
/**
|
||||
* Keccak
|
||||
*/
|
||||
|
||||
u64x a00 = hl32_to_64 (w0[1], w0[0]);
|
||||
u64x a01 = hl32_to_64 (w0[3], w0[2]);
|
||||
u64x a02 = hl32_to_64 (w1[1], w1[0]);
|
||||
u64x a03 = hl32_to_64 (w1[3], w1[2]);
|
||||
u64x a04 = 0;
|
||||
u64x a10 = 0;
|
||||
u64x a11 = 0;
|
||||
u64x a12 = 0;
|
||||
u64x a13 = 0x8000000000000000;
|
||||
u64x a14 = 0;
|
||||
u64x a20 = 0;
|
||||
u64x a21 = 0;
|
||||
u64x a22 = 0;
|
||||
u64x a23 = 0;
|
||||
u64x a24 = 0;
|
||||
u64x a30 = 0;
|
||||
u64x a31 = 0;
|
||||
u64x a32 = 0;
|
||||
u64x a33 = 0;
|
||||
u64x a34 = 0;
|
||||
u64x a40 = 0;
|
||||
u64x a41 = 0;
|
||||
u64x a42 = 0;
|
||||
u64x a43 = 0;
|
||||
u64x a44 = 0;
|
||||
|
||||
#define Rho_Pi(ad,r) \
|
||||
bc0 = ad; \
|
||||
ad = rotl64 (t, r); \
|
||||
t = bc0; \
|
||||
|
||||
#ifdef _unroll
|
||||
#pragma unroll
|
||||
#endif
|
||||
for (int round = 0; round < KECCAK_ROUNDS - 1; round++)
|
||||
{
|
||||
// Theta
|
||||
|
||||
u64x bc0 = a00 ^ a10 ^ a20 ^ a30 ^ a40;
|
||||
u64x bc1 = a01 ^ a11 ^ a21 ^ a31 ^ a41;
|
||||
u64x bc2 = a02 ^ a12 ^ a22 ^ a32 ^ a42;
|
||||
u64x bc3 = a03 ^ a13 ^ a23 ^ a33 ^ a43;
|
||||
u64x bc4 = a04 ^ a14 ^ a24 ^ a34 ^ a44;
|
||||
|
||||
u64x t;
|
||||
|
||||
t = bc4 ^ rotl64 (bc1, 1); a00 ^= t; a10 ^= t; a20 ^= t; a30 ^= t; a40 ^= t;
|
||||
t = bc0 ^ rotl64 (bc2, 1); a01 ^= t; a11 ^= t; a21 ^= t; a31 ^= t; a41 ^= t;
|
||||
t = bc1 ^ rotl64 (bc3, 1); a02 ^= t; a12 ^= t; a22 ^= t; a32 ^= t; a42 ^= t;
|
||||
t = bc2 ^ rotl64 (bc4, 1); a03 ^= t; a13 ^= t; a23 ^= t; a33 ^= t; a43 ^= t;
|
||||
t = bc3 ^ rotl64 (bc0, 1); a04 ^= t; a14 ^= t; a24 ^= t; a34 ^= t; a44 ^= t;
|
||||
|
||||
// Rho Pi
|
||||
|
||||
t = a01;
|
||||
|
||||
Rho_Pi (a20, 1);
|
||||
Rho_Pi (a12, 3);
|
||||
Rho_Pi (a21, 6);
|
||||
Rho_Pi (a32, 10);
|
||||
Rho_Pi (a33, 15);
|
||||
Rho_Pi (a03, 21);
|
||||
Rho_Pi (a10, 28);
|
||||
Rho_Pi (a31, 36);
|
||||
Rho_Pi (a13, 45);
|
||||
Rho_Pi (a41, 55);
|
||||
Rho_Pi (a44, 2);
|
||||
Rho_Pi (a04, 14);
|
||||
Rho_Pi (a30, 27);
|
||||
Rho_Pi (a43, 41);
|
||||
Rho_Pi (a34, 56);
|
||||
Rho_Pi (a23, 8);
|
||||
Rho_Pi (a22, 25);
|
||||
Rho_Pi (a02, 43);
|
||||
Rho_Pi (a40, 62);
|
||||
Rho_Pi (a24, 18);
|
||||
Rho_Pi (a42, 39);
|
||||
Rho_Pi (a14, 61);
|
||||
Rho_Pi (a11, 20);
|
||||
Rho_Pi (a01, 44);
|
||||
|
||||
// Chi
|
||||
|
||||
bc0 = a00; bc1 = a01; bc2 = a02; bc3 = a03; bc4 = a04;
|
||||
a00 ^= ~bc1 & bc2; a01 ^= ~bc2 & bc3; a02 ^= ~bc3 & bc4; a03 ^= ~bc4 & bc0; a04 ^= ~bc0 & bc1;
|
||||
|
||||
bc0 = a10; bc1 = a11; bc2 = a12; bc3 = a13; bc4 = a14;
|
||||
a10 ^= ~bc1 & bc2; a11 ^= ~bc2 & bc3; a12 ^= ~bc3 & bc4; a13 ^= ~bc4 & bc0; a14 ^= ~bc0 & bc1;
|
||||
|
||||
bc0 = a20; bc1 = a21; bc2 = a22; bc3 = a23; bc4 = a24;
|
||||
a20 ^= ~bc1 & bc2; a21 ^= ~bc2 & bc3; a22 ^= ~bc3 & bc4; a23 ^= ~bc4 & bc0; a24 ^= ~bc0 & bc1;
|
||||
|
||||
bc0 = a30; bc1 = a31; bc2 = a32; bc3 = a33; bc4 = a34;
|
||||
a30 ^= ~bc1 & bc2; a31 ^= ~bc2 & bc3; a32 ^= ~bc3 & bc4; a33 ^= ~bc4 & bc0; a34 ^= ~bc0 & bc1;
|
||||
|
||||
bc0 = a40; bc1 = a41; bc2 = a42; bc3 = a43; bc4 = a44;
|
||||
a40 ^= ~bc1 & bc2; a41 ^= ~bc2 & bc3; a42 ^= ~bc3 & bc4; a43 ^= ~bc4 & bc0; a44 ^= ~bc0 & bc1;
|
||||
|
||||
// Iota
|
||||
|
||||
a00 ^= keccakf_rndc[round];
|
||||
}
|
||||
|
||||
// Theta
|
||||
|
||||
u64x bc0 = a00 ^ a10 ^ a20 ^ a30 ^ a40;
|
||||
u64x bc1 = a01 ^ a11 ^ a21 ^ a31 ^ a41;
|
||||
u64x bc2 = a02 ^ a12 ^ a22 ^ a32 ^ a42;
|
||||
u64x bc3 = a03 ^ a13 ^ a23 ^ a33 ^ a43;
|
||||
u64x bc4 = a04 ^ a14 ^ a24 ^ a34 ^ a44;
|
||||
|
||||
u64x t;
|
||||
|
||||
t = bc4 ^ rotl64 (bc1, 1); a00 ^= t; a10 ^= t; a20 ^= t; a30 ^= t;
|
||||
t = bc0 ^ rotl64 (bc2, 1); a21 ^= t; a31 ^= t; a41 ^= t;
|
||||
t = bc1 ^ rotl64 (bc3, 1); a02 ^= t; a12 ^= t; a22 ^= t; a32 ^= t;
|
||||
t = bc2 ^ rotl64 (bc4, 1); a03 ^= t; a13 ^= t; a23 ^= t; a33 ^= t; a43 ^= t;
|
||||
t = bc3 ^ rotl64 (bc0, 1); a04 ^= t; a34 ^= t; a44 ^= t;
|
||||
|
||||
// Rho Pi
|
||||
|
||||
t = a01;
|
||||
|
||||
Rho_Pi (a20, 1);
|
||||
Rho_Pi (a12, 3);
|
||||
Rho_Pi (a21, 6);
|
||||
Rho_Pi (a32, 10);
|
||||
Rho_Pi (a33, 15);
|
||||
Rho_Pi (a03, 21);
|
||||
Rho_Pi (a10, 28);
|
||||
Rho_Pi (a31, 36);
|
||||
Rho_Pi (a13, 45);
|
||||
Rho_Pi (a41, 55);
|
||||
Rho_Pi (a44, 2);
|
||||
Rho_Pi (a04, 14);
|
||||
Rho_Pi (a30, 27);
|
||||
Rho_Pi (a43, 41);
|
||||
Rho_Pi (a34, 56);
|
||||
Rho_Pi (a23, 8);
|
||||
Rho_Pi (a22, 25);
|
||||
Rho_Pi (a02, 43);
|
||||
|
||||
#undef Rho_Pi
|
||||
|
||||
bc0 = a00;
|
||||
bc2 = a02;
|
||||
bc3 = a03;
|
||||
bc4 = a04;
|
||||
|
||||
a02 ^= ~bc3 & bc4;
|
||||
a03 ^= ~bc4 & bc0;
|
||||
|
||||
const u32x r0 = l32_from_64 (a03);
|
||||
const u32x r1 = h32_from_64 (a03);
|
||||
const u32x r2 = l32_from_64 (a02);
|
||||
const u32x r3 = h32_from_64 (a02);
|
||||
|
||||
COMPARE_S_SIMD (r0, r1, r2, r3);
|
||||
}
|
||||
}
|
||||
|
||||
__kernel void m18000_s08 (__global pw_t *pws, __constant const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
|
||||
{
|
||||
}
|
||||
|
||||
__kernel void m18000_s16 (__global pw_t *pws, __constant const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
|
||||
{
|
||||
}
|
594
OpenCL/m18000_a1-optimized.cl
Normal file
594
OpenCL/m18000_a1-optimized.cl
Normal file
@ -0,0 +1,594 @@
|
||||
/**
|
||||
* Author......: See docs/credits.txt
|
||||
* License.....: MIT
|
||||
*/
|
||||
|
||||
#define NEW_SIMD_CODE
|
||||
|
||||
#include "inc_vendor.cl"
|
||||
#include "inc_hash_constants.h"
|
||||
#include "inc_hash_functions.cl"
|
||||
#include "inc_types.cl"
|
||||
#include "inc_common.cl"
|
||||
#include "inc_simd.cl"
|
||||
|
||||
__constant u64a keccakf_rndc[24] =
|
||||
{
|
||||
0x0000000000000001, 0x0000000000008082, 0x800000000000808a,
|
||||
0x8000000080008000, 0x000000000000808b, 0x0000000080000001,
|
||||
0x8000000080008081, 0x8000000000008009, 0x000000000000008a,
|
||||
0x0000000000000088, 0x0000000080008009, 0x000000008000000a,
|
||||
0x000000008000808b, 0x800000000000008b, 0x8000000000008089,
|
||||
0x8000000000008003, 0x8000000000008002, 0x8000000000000080,
|
||||
0x000000000000800a, 0x800000008000000a, 0x8000000080008081,
|
||||
0x8000000000008080, 0x0000000080000001, 0x8000000080008008
|
||||
};
|
||||
|
||||
#ifndef KECCAK_ROUNDS
|
||||
#define KECCAK_ROUNDS 24
|
||||
#endif
|
||||
|
||||
__kernel void m18000_m04 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
|
||||
{
|
||||
/**
|
||||
* modifier
|
||||
*/
|
||||
|
||||
const u64 lid = get_local_id (0);
|
||||
|
||||
/**
|
||||
* base
|
||||
*/
|
||||
|
||||
const u64 gid = get_global_id (0);
|
||||
|
||||
if (gid >= gid_max) return;
|
||||
|
||||
u32 pw_buf0[4];
|
||||
u32 pw_buf1[4];
|
||||
|
||||
pw_buf0[0] = pws[gid].i[0];
|
||||
pw_buf0[1] = pws[gid].i[1];
|
||||
pw_buf0[2] = pws[gid].i[2];
|
||||
pw_buf0[3] = pws[gid].i[3];
|
||||
pw_buf1[0] = pws[gid].i[4];
|
||||
pw_buf1[1] = pws[gid].i[5];
|
||||
pw_buf1[2] = pws[gid].i[6];
|
||||
pw_buf1[3] = pws[gid].i[7];
|
||||
|
||||
const u32 pw_l_len = pws[gid].pw_len;
|
||||
|
||||
/**
|
||||
* loop
|
||||
*/
|
||||
|
||||
for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE)
|
||||
{
|
||||
const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos);
|
||||
|
||||
const u32x pw_len = pw_l_len + pw_r_len;
|
||||
|
||||
/**
|
||||
* concat password candidate
|
||||
*/
|
||||
|
||||
u32x wordl0[4] = { 0 };
|
||||
u32x wordl1[4] = { 0 };
|
||||
u32x wordl2[4] = { 0 };
|
||||
u32x wordl3[4] = { 0 };
|
||||
|
||||
wordl0[0] = pw_buf0[0];
|
||||
wordl0[1] = pw_buf0[1];
|
||||
wordl0[2] = pw_buf0[2];
|
||||
wordl0[3] = pw_buf0[3];
|
||||
wordl1[0] = pw_buf1[0];
|
||||
wordl1[1] = pw_buf1[1];
|
||||
wordl1[2] = pw_buf1[2];
|
||||
wordl1[3] = pw_buf1[3];
|
||||
|
||||
u32x wordr0[4] = { 0 };
|
||||
u32x wordr1[4] = { 0 };
|
||||
u32x wordr2[4] = { 0 };
|
||||
u32x wordr3[4] = { 0 };
|
||||
|
||||
wordr0[0] = ix_create_combt (combs_buf, il_pos, 0);
|
||||
wordr0[1] = ix_create_combt (combs_buf, il_pos, 1);
|
||||
wordr0[2] = ix_create_combt (combs_buf, il_pos, 2);
|
||||
wordr0[3] = ix_create_combt (combs_buf, il_pos, 3);
|
||||
wordr1[0] = ix_create_combt (combs_buf, il_pos, 4);
|
||||
wordr1[1] = ix_create_combt (combs_buf, il_pos, 5);
|
||||
wordr1[2] = ix_create_combt (combs_buf, il_pos, 6);
|
||||
wordr1[3] = ix_create_combt (combs_buf, il_pos, 7);
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
|
||||
{
|
||||
switch_buffer_by_offset_le_VV (wordr0, wordr1, wordr2, wordr3, pw_l_len);
|
||||
}
|
||||
else
|
||||
{
|
||||
switch_buffer_by_offset_le_VV (wordl0, wordl1, wordl2, wordl3, pw_r_len);
|
||||
}
|
||||
|
||||
u32x w0[4];
|
||||
u32x w1[4];
|
||||
u32x w2[4];
|
||||
u32x w3[4];
|
||||
|
||||
w0[0] = wordl0[0] | wordr0[0];
|
||||
w0[1] = wordl0[1] | wordr0[1];
|
||||
w0[2] = wordl0[2] | wordr0[2];
|
||||
w0[3] = wordl0[3] | wordr0[3];
|
||||
w1[0] = wordl1[0] | wordr1[0];
|
||||
w1[1] = wordl1[1] | wordr1[1];
|
||||
w1[2] = wordl1[2] | wordr1[2];
|
||||
w1[3] = wordl1[3] | wordr1[3];
|
||||
w2[0] = wordl2[0] | wordr2[0];
|
||||
w2[1] = wordl2[1] | wordr2[1];
|
||||
w2[2] = wordl2[2] | wordr2[2];
|
||||
w2[3] = wordl2[3] | wordr2[3];
|
||||
w3[0] = wordl3[0] | wordr3[0];
|
||||
w3[1] = wordl3[1] | wordr3[1];
|
||||
w3[2] = wordl3[2] | wordr3[2];
|
||||
w3[3] = wordl3[3] | wordr3[3];
|
||||
|
||||
/**
|
||||
* Keccak
|
||||
*/
|
||||
|
||||
u64x a00 = hl32_to_64 (w0[1], w0[0]);
|
||||
u64x a01 = hl32_to_64 (w0[3], w0[2]);
|
||||
u64x a02 = hl32_to_64 (w1[1], w1[0]);
|
||||
u64x a03 = hl32_to_64 (w1[3], w1[2]);
|
||||
u64x a04 = hl32_to_64 (w2[1], w2[0]);
|
||||
u64x a10 = hl32_to_64 (w2[3], w2[2]);
|
||||
u64x a11 = hl32_to_64 (w3[1], w3[0]);
|
||||
u64x a12 = hl32_to_64 (w3[3], w3[2]);
|
||||
u64x a13 = 0x8000000000000000;
|
||||
u64x a14 = 0;
|
||||
u64x a20 = 0;
|
||||
u64x a21 = 0;
|
||||
u64x a22 = 0;
|
||||
u64x a23 = 0;
|
||||
u64x a24 = 0;
|
||||
u64x a30 = 0;
|
||||
u64x a31 = 0;
|
||||
u64x a32 = 0;
|
||||
u64x a33 = 0;
|
||||
u64x a34 = 0;
|
||||
u64x a40 = 0;
|
||||
u64x a41 = 0;
|
||||
u64x a42 = 0;
|
||||
u64x a43 = 0;
|
||||
u64x a44 = 0;
|
||||
|
||||
#define Rho_Pi(ad,r) \
|
||||
bc0 = ad; \
|
||||
ad = rotl64 (t, r); \
|
||||
t = bc0; \
|
||||
|
||||
#ifdef _unroll
|
||||
#pragma unroll
|
||||
#endif
|
||||
for (int round = 0; round < KECCAK_ROUNDS - 1; round++)
|
||||
{
|
||||
// Theta
|
||||
|
||||
u64x bc0 = a00 ^ a10 ^ a20 ^ a30 ^ a40;
|
||||
u64x bc1 = a01 ^ a11 ^ a21 ^ a31 ^ a41;
|
||||
u64x bc2 = a02 ^ a12 ^ a22 ^ a32 ^ a42;
|
||||
u64x bc3 = a03 ^ a13 ^ a23 ^ a33 ^ a43;
|
||||
u64x bc4 = a04 ^ a14 ^ a24 ^ a34 ^ a44;
|
||||
|
||||
u64x t;
|
||||
|
||||
t = bc4 ^ rotl64 (bc1, 1); a00 ^= t; a10 ^= t; a20 ^= t; a30 ^= t; a40 ^= t;
|
||||
t = bc0 ^ rotl64 (bc2, 1); a01 ^= t; a11 ^= t; a21 ^= t; a31 ^= t; a41 ^= t;
|
||||
t = bc1 ^ rotl64 (bc3, 1); a02 ^= t; a12 ^= t; a22 ^= t; a32 ^= t; a42 ^= t;
|
||||
t = bc2 ^ rotl64 (bc4, 1); a03 ^= t; a13 ^= t; a23 ^= t; a33 ^= t; a43 ^= t;
|
||||
t = bc3 ^ rotl64 (bc0, 1); a04 ^= t; a14 ^= t; a24 ^= t; a34 ^= t; a44 ^= t;
|
||||
|
||||
// Rho Pi
|
||||
|
||||
t = a01;
|
||||
|
||||
Rho_Pi (a20, 1);
|
||||
Rho_Pi (a12, 3);
|
||||
Rho_Pi (a21, 6);
|
||||
Rho_Pi (a32, 10);
|
||||
Rho_Pi (a33, 15);
|
||||
Rho_Pi (a03, 21);
|
||||
Rho_Pi (a10, 28);
|
||||
Rho_Pi (a31, 36);
|
||||
Rho_Pi (a13, 45);
|
||||
Rho_Pi (a41, 55);
|
||||
Rho_Pi (a44, 2);
|
||||
Rho_Pi (a04, 14);
|
||||
Rho_Pi (a30, 27);
|
||||
Rho_Pi (a43, 41);
|
||||
Rho_Pi (a34, 56);
|
||||
Rho_Pi (a23, 8);
|
||||
Rho_Pi (a22, 25);
|
||||
Rho_Pi (a02, 43);
|
||||
Rho_Pi (a40, 62);
|
||||
Rho_Pi (a24, 18);
|
||||
Rho_Pi (a42, 39);
|
||||
Rho_Pi (a14, 61);
|
||||
Rho_Pi (a11, 20);
|
||||
Rho_Pi (a01, 44);
|
||||
|
||||
// Chi
|
||||
|
||||
bc0 = a00; bc1 = a01; bc2 = a02; bc3 = a03; bc4 = a04;
|
||||
a00 ^= ~bc1 & bc2; a01 ^= ~bc2 & bc3; a02 ^= ~bc3 & bc4; a03 ^= ~bc4 & bc0; a04 ^= ~bc0 & bc1;
|
||||
|
||||
bc0 = a10; bc1 = a11; bc2 = a12; bc3 = a13; bc4 = a14;
|
||||
a10 ^= ~bc1 & bc2; a11 ^= ~bc2 & bc3; a12 ^= ~bc3 & bc4; a13 ^= ~bc4 & bc0; a14 ^= ~bc0 & bc1;
|
||||
|
||||
bc0 = a20; bc1 = a21; bc2 = a22; bc3 = a23; bc4 = a24;
|
||||
a20 ^= ~bc1 & bc2; a21 ^= ~bc2 & bc3; a22 ^= ~bc3 & bc4; a23 ^= ~bc4 & bc0; a24 ^= ~bc0 & bc1;
|
||||
|
||||
bc0 = a30; bc1 = a31; bc2 = a32; bc3 = a33; bc4 = a34;
|
||||
a30 ^= ~bc1 & bc2; a31 ^= ~bc2 & bc3; a32 ^= ~bc3 & bc4; a33 ^= ~bc4 & bc0; a34 ^= ~bc0 & bc1;
|
||||
|
||||
bc0 = a40; bc1 = a41; bc2 = a42; bc3 = a43; bc4 = a44;
|
||||
a40 ^= ~bc1 & bc2; a41 ^= ~bc2 & bc3; a42 ^= ~bc3 & bc4; a43 ^= ~bc4 & bc0; a44 ^= ~bc0 & bc1;
|
||||
|
||||
// Iota
|
||||
|
||||
a00 ^= keccakf_rndc[round];
|
||||
}
|
||||
|
||||
// Theta
|
||||
|
||||
u64x bc0 = a00 ^ a10 ^ a20 ^ a30 ^ a40;
|
||||
u64x bc1 = a01 ^ a11 ^ a21 ^ a31 ^ a41;
|
||||
u64x bc2 = a02 ^ a12 ^ a22 ^ a32 ^ a42;
|
||||
u64x bc3 = a03 ^ a13 ^ a23 ^ a33 ^ a43;
|
||||
u64x bc4 = a04 ^ a14 ^ a24 ^ a34 ^ a44;
|
||||
|
||||
u64x t;
|
||||
|
||||
t = bc4 ^ rotl64 (bc1, 1); a00 ^= t; a10 ^= t; a20 ^= t; a30 ^= t;
|
||||
t = bc0 ^ rotl64 (bc2, 1); a21 ^= t; a31 ^= t; a41 ^= t;
|
||||
t = bc1 ^ rotl64 (bc3, 1); a02 ^= t; a12 ^= t; a22 ^= t; a32 ^= t;
|
||||
t = bc2 ^ rotl64 (bc4, 1); a03 ^= t; a13 ^= t; a23 ^= t; a33 ^= t; a43 ^= t;
|
||||
t = bc3 ^ rotl64 (bc0, 1); a04 ^= t; a34 ^= t; a44 ^= t;
|
||||
|
||||
// Rho Pi
|
||||
|
||||
t = a01;
|
||||
|
||||
Rho_Pi (a20, 1);
|
||||
Rho_Pi (a12, 3);
|
||||
Rho_Pi (a21, 6);
|
||||
Rho_Pi (a32, 10);
|
||||
Rho_Pi (a33, 15);
|
||||
Rho_Pi (a03, 21);
|
||||
Rho_Pi (a10, 28);
|
||||
Rho_Pi (a31, 36);
|
||||
Rho_Pi (a13, 45);
|
||||
Rho_Pi (a41, 55);
|
||||
Rho_Pi (a44, 2);
|
||||
Rho_Pi (a04, 14);
|
||||
Rho_Pi (a30, 27);
|
||||
Rho_Pi (a43, 41);
|
||||
Rho_Pi (a34, 56);
|
||||
Rho_Pi (a23, 8);
|
||||
Rho_Pi (a22, 25);
|
||||
Rho_Pi (a02, 43);
|
||||
|
||||
#undef Rho_Pi
|
||||
|
||||
bc0 = a00;
|
||||
bc2 = a02;
|
||||
bc3 = a03;
|
||||
bc4 = a04;
|
||||
|
||||
a02 ^= ~bc3 & bc4;
|
||||
a03 ^= ~bc4 & bc0;
|
||||
|
||||
const u32x r0 = l32_from_64 (a03);
|
||||
const u32x r1 = h32_from_64 (a03);
|
||||
const u32x r2 = l32_from_64 (a02);
|
||||
const u32x r3 = h32_from_64 (a02);
|
||||
|
||||
COMPARE_M_SIMD (r0, r1, r2, r3);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
__kernel void m18000_m08 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
|
||||
{
|
||||
}
|
||||
|
||||
__kernel void m18000_m16 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
|
||||
{
|
||||
}
|
||||
|
||||
__kernel void m18000_s04 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
|
||||
{
|
||||
/**
|
||||
* modifier
|
||||
*/
|
||||
|
||||
const u64 lid = get_local_id (0);
|
||||
|
||||
/**
|
||||
* base
|
||||
*/
|
||||
|
||||
const u64 gid = get_global_id (0);
|
||||
|
||||
if (gid >= gid_max) return;
|
||||
|
||||
u32 pw_buf0[4];
|
||||
u32 pw_buf1[4];
|
||||
|
||||
pw_buf0[0] = pws[gid].i[0];
|
||||
pw_buf0[1] = pws[gid].i[1];
|
||||
pw_buf0[2] = pws[gid].i[2];
|
||||
pw_buf0[3] = pws[gid].i[3];
|
||||
pw_buf1[0] = pws[gid].i[4];
|
||||
pw_buf1[1] = pws[gid].i[5];
|
||||
pw_buf1[2] = pws[gid].i[6];
|
||||
pw_buf1[3] = pws[gid].i[7];
|
||||
|
||||
const u32 pw_l_len = pws[gid].pw_len;
|
||||
|
||||
/**
|
||||
* digest
|
||||
*/
|
||||
|
||||
const u32 search[4] =
|
||||
{
|
||||
digests_buf[digests_offset].digest_buf[DGST_R0],
|
||||
digests_buf[digests_offset].digest_buf[DGST_R1],
|
||||
digests_buf[digests_offset].digest_buf[DGST_R2],
|
||||
digests_buf[digests_offset].digest_buf[DGST_R3]
|
||||
};
|
||||
|
||||
/**
|
||||
* loop
|
||||
*/
|
||||
|
||||
for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE)
|
||||
{
|
||||
const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos);
|
||||
|
||||
const u32x pw_len = pw_l_len + pw_r_len;
|
||||
|
||||
/**
|
||||
* concat password candidate
|
||||
*/
|
||||
|
||||
u32x wordl0[4] = { 0 };
|
||||
u32x wordl1[4] = { 0 };
|
||||
u32x wordl2[4] = { 0 };
|
||||
u32x wordl3[4] = { 0 };
|
||||
|
||||
wordl0[0] = pw_buf0[0];
|
||||
wordl0[1] = pw_buf0[1];
|
||||
wordl0[2] = pw_buf0[2];
|
||||
wordl0[3] = pw_buf0[3];
|
||||
wordl1[0] = pw_buf1[0];
|
||||
wordl1[1] = pw_buf1[1];
|
||||
wordl1[2] = pw_buf1[2];
|
||||
wordl1[3] = pw_buf1[3];
|
||||
|
||||
u32x wordr0[4] = { 0 };
|
||||
u32x wordr1[4] = { 0 };
|
||||
u32x wordr2[4] = { 0 };
|
||||
u32x wordr3[4] = { 0 };
|
||||
|
||||
wordr0[0] = ix_create_combt (combs_buf, il_pos, 0);
|
||||
wordr0[1] = ix_create_combt (combs_buf, il_pos, 1);
|
||||
wordr0[2] = ix_create_combt (combs_buf, il_pos, 2);
|
||||
wordr0[3] = ix_create_combt (combs_buf, il_pos, 3);
|
||||
wordr1[0] = ix_create_combt (combs_buf, il_pos, 4);
|
||||
wordr1[1] = ix_create_combt (combs_buf, il_pos, 5);
|
||||
wordr1[2] = ix_create_combt (combs_buf, il_pos, 6);
|
||||
wordr1[3] = ix_create_combt (combs_buf, il_pos, 7);
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
|
||||
{
|
||||
switch_buffer_by_offset_le_VV (wordr0, wordr1, wordr2, wordr3, pw_l_len);
|
||||
}
|
||||
else
|
||||
{
|
||||
switch_buffer_by_offset_le_VV (wordl0, wordl1, wordl2, wordl3, pw_r_len);
|
||||
}
|
||||
|
||||
u32x w0[4];
|
||||
u32x w1[4];
|
||||
u32x w2[4];
|
||||
u32x w3[4];
|
||||
|
||||
w0[0] = wordl0[0] | wordr0[0];
|
||||
w0[1] = wordl0[1] | wordr0[1];
|
||||
w0[2] = wordl0[2] | wordr0[2];
|
||||
w0[3] = wordl0[3] | wordr0[3];
|
||||
w1[0] = wordl1[0] | wordr1[0];
|
||||
w1[1] = wordl1[1] | wordr1[1];
|
||||
w1[2] = wordl1[2] | wordr1[2];
|
||||
w1[3] = wordl1[3] | wordr1[3];
|
||||
w2[0] = wordl2[0] | wordr2[0];
|
||||
w2[1] = wordl2[1] | wordr2[1];
|
||||
w2[2] = wordl2[2] | wordr2[2];
|
||||
w2[3] = wordl2[3] | wordr2[3];
|
||||
w3[0] = wordl3[0] | wordr3[0];
|
||||
w3[1] = wordl3[1] | wordr3[1];
|
||||
w3[2] = wordl3[2] | wordr3[2];
|
||||
w3[3] = wordl3[3] | wordr3[3];
|
||||
|
||||
/**
|
||||
* Keccak
|
||||
*/
|
||||
|
||||
u64x a00 = hl32_to_64 (w0[1], w0[0]);
|
||||
u64x a01 = hl32_to_64 (w0[3], w0[2]);
|
||||
u64x a02 = hl32_to_64 (w1[1], w1[0]);
|
||||
u64x a03 = hl32_to_64 (w1[3], w1[2]);
|
||||
u64x a04 = hl32_to_64 (w2[1], w2[0]);
|
||||
u64x a10 = hl32_to_64 (w2[3], w2[2]);
|
||||
u64x a11 = hl32_to_64 (w3[1], w3[0]);
|
||||
u64x a12 = hl32_to_64 (w3[3], w3[2]);
|
||||
u64x a13 = 0x8000000000000000;
|
||||
u64x a14 = 0;
|
||||
u64x a20 = 0;
|
||||
u64x a21 = 0;
|
||||
u64x a22 = 0;
|
||||
u64x a23 = 0;
|
||||
u64x a24 = 0;
|
||||
u64x a30 = 0;
|
||||
u64x a31 = 0;
|
||||
u64x a32 = 0;
|
||||
u64x a33 = 0;
|
||||
u64x a34 = 0;
|
||||
u64x a40 = 0;
|
||||
u64x a41 = 0;
|
||||
u64x a42 = 0;
|
||||
u64x a43 = 0;
|
||||
u64x a44 = 0;
|
||||
|
||||
#define Rho_Pi(ad,r) \
|
||||
bc0 = ad; \
|
||||
ad = rotl64 (t, r); \
|
||||
t = bc0; \
|
||||
|
||||
#ifdef _unroll
|
||||
#pragma unroll
|
||||
#endif
|
||||
for (int round = 0; round < KECCAK_ROUNDS - 1; round++)
|
||||
{
|
||||
// Theta
|
||||
|
||||
u64x bc0 = a00 ^ a10 ^ a20 ^ a30 ^ a40;
|
||||
u64x bc1 = a01 ^ a11 ^ a21 ^ a31 ^ a41;
|
||||
u64x bc2 = a02 ^ a12 ^ a22 ^ a32 ^ a42;
|
||||
u64x bc3 = a03 ^ a13 ^ a23 ^ a33 ^ a43;
|
||||
u64x bc4 = a04 ^ a14 ^ a24 ^ a34 ^ a44;
|
||||
|
||||
u64x t;
|
||||
|
||||
t = bc4 ^ rotl64 (bc1, 1); a00 ^= t; a10 ^= t; a20 ^= t; a30 ^= t; a40 ^= t;
|
||||
t = bc0 ^ rotl64 (bc2, 1); a01 ^= t; a11 ^= t; a21 ^= t; a31 ^= t; a41 ^= t;
|
||||
t = bc1 ^ rotl64 (bc3, 1); a02 ^= t; a12 ^= t; a22 ^= t; a32 ^= t; a42 ^= t;
|
||||
t = bc2 ^ rotl64 (bc4, 1); a03 ^= t; a13 ^= t; a23 ^= t; a33 ^= t; a43 ^= t;
|
||||
t = bc3 ^ rotl64 (bc0, 1); a04 ^= t; a14 ^= t; a24 ^= t; a34 ^= t; a44 ^= t;
|
||||
|
||||
// Rho Pi
|
||||
|
||||
t = a01;
|
||||
|
||||
Rho_Pi (a20, 1);
|
||||
Rho_Pi (a12, 3);
|
||||
Rho_Pi (a21, 6);
|
||||
Rho_Pi (a32, 10);
|
||||
Rho_Pi (a33, 15);
|
||||
Rho_Pi (a03, 21);
|
||||
Rho_Pi (a10, 28);
|
||||
Rho_Pi (a31, 36);
|
||||
Rho_Pi (a13, 45);
|
||||
Rho_Pi (a41, 55);
|
||||
Rho_Pi (a44, 2);
|
||||
Rho_Pi (a04, 14);
|
||||
Rho_Pi (a30, 27);
|
||||
Rho_Pi (a43, 41);
|
||||
Rho_Pi (a34, 56);
|
||||
Rho_Pi (a23, 8);
|
||||
Rho_Pi (a22, 25);
|
||||
Rho_Pi (a02, 43);
|
||||
Rho_Pi (a40, 62);
|
||||
Rho_Pi (a24, 18);
|
||||
Rho_Pi (a42, 39);
|
||||
Rho_Pi (a14, 61);
|
||||
Rho_Pi (a11, 20);
|
||||
Rho_Pi (a01, 44);
|
||||
|
||||
// Chi
|
||||
|
||||
bc0 = a00; bc1 = a01; bc2 = a02; bc3 = a03; bc4 = a04;
|
||||
a00 ^= ~bc1 & bc2; a01 ^= ~bc2 & bc3; a02 ^= ~bc3 & bc4; a03 ^= ~bc4 & bc0; a04 ^= ~bc0 & bc1;
|
||||
|
||||
bc0 = a10; bc1 = a11; bc2 = a12; bc3 = a13; bc4 = a14;
|
||||
a10 ^= ~bc1 & bc2; a11 ^= ~bc2 & bc3; a12 ^= ~bc3 & bc4; a13 ^= ~bc4 & bc0; a14 ^= ~bc0 & bc1;
|
||||
|
||||
bc0 = a20; bc1 = a21; bc2 = a22; bc3 = a23; bc4 = a24;
|
||||
a20 ^= ~bc1 & bc2; a21 ^= ~bc2 & bc3; a22 ^= ~bc3 & bc4; a23 ^= ~bc4 & bc0; a24 ^= ~bc0 & bc1;
|
||||
|
||||
bc0 = a30; bc1 = a31; bc2 = a32; bc3 = a33; bc4 = a34;
|
||||
a30 ^= ~bc1 & bc2; a31 ^= ~bc2 & bc3; a32 ^= ~bc3 & bc4; a33 ^= ~bc4 & bc0; a34 ^= ~bc0 & bc1;
|
||||
|
||||
bc0 = a40; bc1 = a41; bc2 = a42; bc3 = a43; bc4 = a44;
|
||||
a40 ^= ~bc1 & bc2; a41 ^= ~bc2 & bc3; a42 ^= ~bc3 & bc4; a43 ^= ~bc4 & bc0; a44 ^= ~bc0 & bc1;
|
||||
|
||||
// Iota
|
||||
|
||||
a00 ^= keccakf_rndc[round];
|
||||
}
|
||||
|
||||
// Theta
|
||||
|
||||
u64x bc0 = a00 ^ a10 ^ a20 ^ a30 ^ a40;
|
||||
u64x bc1 = a01 ^ a11 ^ a21 ^ a31 ^ a41;
|
||||
u64x bc2 = a02 ^ a12 ^ a22 ^ a32 ^ a42;
|
||||
u64x bc3 = a03 ^ a13 ^ a23 ^ a33 ^ a43;
|
||||
u64x bc4 = a04 ^ a14 ^ a24 ^ a34 ^ a44;
|
||||
|
||||
u64x t;
|
||||
|
||||
t = bc4 ^ rotl64 (bc1, 1); a00 ^= t; a10 ^= t; a20 ^= t; a30 ^= t;
|
||||
t = bc0 ^ rotl64 (bc2, 1); a21 ^= t; a31 ^= t; a41 ^= t;
|
||||
t = bc1 ^ rotl64 (bc3, 1); a02 ^= t; a12 ^= t; a22 ^= t; a32 ^= t;
|
||||
t = bc2 ^ rotl64 (bc4, 1); a03 ^= t; a13 ^= t; a23 ^= t; a33 ^= t; a43 ^= t;
|
||||
t = bc3 ^ rotl64 (bc0, 1); a04 ^= t; a34 ^= t; a44 ^= t;
|
||||
|
||||
// Rho Pi
|
||||
|
||||
t = a01;
|
||||
|
||||
Rho_Pi (a20, 1);
|
||||
Rho_Pi (a12, 3);
|
||||
Rho_Pi (a21, 6);
|
||||
Rho_Pi (a32, 10);
|
||||
Rho_Pi (a33, 15);
|
||||
Rho_Pi (a03, 21);
|
||||
Rho_Pi (a10, 28);
|
||||
Rho_Pi (a31, 36);
|
||||
Rho_Pi (a13, 45);
|
||||
Rho_Pi (a41, 55);
|
||||
Rho_Pi (a44, 2);
|
||||
Rho_Pi (a04, 14);
|
||||
Rho_Pi (a30, 27);
|
||||
Rho_Pi (a43, 41);
|
||||
Rho_Pi (a34, 56);
|
||||
Rho_Pi (a23, 8);
|
||||
Rho_Pi (a22, 25);
|
||||
Rho_Pi (a02, 43);
|
||||
|
||||
#undef Rho_Pi
|
||||
|
||||
bc0 = a00;
|
||||
bc2 = a02;
|
||||
bc3 = a03;
|
||||
bc4 = a04;
|
||||
|
||||
a02 ^= ~bc3 & bc4;
|
||||
a03 ^= ~bc4 & bc0;
|
||||
|
||||
const u32x r0 = l32_from_64 (a03);
|
||||
const u32x r1 = h32_from_64 (a03);
|
||||
const u32x r2 = l32_from_64 (a02);
|
||||
const u32x r3 = h32_from_64 (a02);
|
||||
|
||||
COMPARE_S_SIMD (r0, r1, r2, r3);
|
||||
}
|
||||
}
|
||||
|
||||
__kernel void m18000_s08 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
|
||||
{
|
||||
}
|
||||
|
||||
__kernel void m18000_s16 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
|
||||
{
|
||||
}
|
695
OpenCL/m18000_a3-optimized.cl
Normal file
695
OpenCL/m18000_a3-optimized.cl
Normal file
@ -0,0 +1,695 @@
|
||||
/**
|
||||
* Author......: See docs/credits.txt
|
||||
* License.....: MIT
|
||||
*/
|
||||
|
||||
#define NEW_SIMD_CODE
|
||||
|
||||
#include "inc_vendor.cl"
|
||||
#include "inc_hash_constants.h"
|
||||
#include "inc_hash_functions.cl"
|
||||
#include "inc_types.cl"
|
||||
#include "inc_common.cl"
|
||||
#include "inc_simd.cl"
|
||||
|
||||
__constant u64a keccakf_rndc[24] =
|
||||
{
|
||||
0x0000000000000001, 0x0000000000008082, 0x800000000000808a,
|
||||
0x8000000080008000, 0x000000000000808b, 0x0000000080000001,
|
||||
0x8000000080008081, 0x8000000000008009, 0x000000000000008a,
|
||||
0x0000000000000088, 0x0000000080008009, 0x000000008000000a,
|
||||
0x000000008000808b, 0x800000000000008b, 0x8000000000008089,
|
||||
0x8000000000008003, 0x8000000000008002, 0x8000000000000080,
|
||||
0x000000000000800a, 0x800000008000000a, 0x8000000080008081,
|
||||
0x8000000000008080, 0x0000000080000001, 0x8000000080008008
|
||||
};
|
||||
|
||||
#ifndef KECCAK_ROUNDS
|
||||
#define KECCAK_ROUNDS 24
|
||||
#endif
|
||||
|
||||
DECLSPEC void m17600m (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, __global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset)
|
||||
{
|
||||
/**
|
||||
* modifier
|
||||
*/
|
||||
|
||||
const u64 gid = get_global_id (0);
|
||||
const u64 lid = get_local_id (0);
|
||||
|
||||
/**
|
||||
* loop
|
||||
*/
|
||||
|
||||
u32 w0l = w0[0];
|
||||
|
||||
for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE)
|
||||
{
|
||||
const u32x w0r = ix_create_bft (bfs_buf, il_pos);
|
||||
|
||||
const u32x w0lr = w0l | w0r;
|
||||
|
||||
/**
|
||||
* Keccak
|
||||
*/
|
||||
|
||||
u64x a00 = hl32_to_64 (w0[1], w0lr);
|
||||
u64x a01 = hl32_to_64 (w0[3], w0[2]);
|
||||
u64x a02 = hl32_to_64 (w1[1], w1[0]);
|
||||
u64x a03 = hl32_to_64 (w1[3], w1[2]);
|
||||
u64x a04 = hl32_to_64 (w2[1], w2[0]);
|
||||
u64x a10 = hl32_to_64 (w2[3], w2[2]);
|
||||
u64x a11 = hl32_to_64 (w3[1], w3[0]);
|
||||
u64x a12 = hl32_to_64 (w3[3], w3[2]);
|
||||
u64x a13 = 0x8000000000000000;
|
||||
u64x a14 = 0;
|
||||
u64x a20 = 0;
|
||||
u64x a21 = 0;
|
||||
u64x a22 = 0;
|
||||
u64x a23 = 0;
|
||||
u64x a24 = 0;
|
||||
u64x a30 = 0;
|
||||
u64x a31 = 0;
|
||||
u64x a32 = 0;
|
||||
u64x a33 = 0;
|
||||
u64x a34 = 0;
|
||||
u64x a40 = 0;
|
||||
u64x a41 = 0;
|
||||
u64x a42 = 0;
|
||||
u64x a43 = 0;
|
||||
u64x a44 = 0;
|
||||
|
||||
#define Rho_Pi(ad,r) \
|
||||
bc0 = ad; \
|
||||
ad = rotl64 (t, r); \
|
||||
t = bc0; \
|
||||
|
||||
#ifdef _unroll
|
||||
#pragma unroll
|
||||
#endif
|
||||
for (int round = 0; round < KECCAK_ROUNDS - 1; round++)
|
||||
{
|
||||
// Theta
|
||||
|
||||
u64x bc0 = a00 ^ a10 ^ a20 ^ a30 ^ a40;
|
||||
u64x bc1 = a01 ^ a11 ^ a21 ^ a31 ^ a41;
|
||||
u64x bc2 = a02 ^ a12 ^ a22 ^ a32 ^ a42;
|
||||
u64x bc3 = a03 ^ a13 ^ a23 ^ a33 ^ a43;
|
||||
u64x bc4 = a04 ^ a14 ^ a24 ^ a34 ^ a44;
|
||||
|
||||
u64x t;
|
||||
|
||||
t = bc4 ^ rotl64 (bc1, 1); a00 ^= t; a10 ^= t; a20 ^= t; a30 ^= t; a40 ^= t;
|
||||
t = bc0 ^ rotl64 (bc2, 1); a01 ^= t; a11 ^= t; a21 ^= t; a31 ^= t; a41 ^= t;
|
||||
t = bc1 ^ rotl64 (bc3, 1); a02 ^= t; a12 ^= t; a22 ^= t; a32 ^= t; a42 ^= t;
|
||||
t = bc2 ^ rotl64 (bc4, 1); a03 ^= t; a13 ^= t; a23 ^= t; a33 ^= t; a43 ^= t;
|
||||
t = bc3 ^ rotl64 (bc0, 1); a04 ^= t; a14 ^= t; a24 ^= t; a34 ^= t; a44 ^= t;
|
||||
|
||||
// Rho Pi
|
||||
|
||||
t = a01;
|
||||
|
||||
Rho_Pi (a20, 1);
|
||||
Rho_Pi (a12, 3);
|
||||
Rho_Pi (a21, 6);
|
||||
Rho_Pi (a32, 10);
|
||||
Rho_Pi (a33, 15);
|
||||
Rho_Pi (a03, 21);
|
||||
Rho_Pi (a10, 28);
|
||||
Rho_Pi (a31, 36);
|
||||
Rho_Pi (a13, 45);
|
||||
Rho_Pi (a41, 55);
|
||||
Rho_Pi (a44, 2);
|
||||
Rho_Pi (a04, 14);
|
||||
Rho_Pi (a30, 27);
|
||||
Rho_Pi (a43, 41);
|
||||
Rho_Pi (a34, 56);
|
||||
Rho_Pi (a23, 8);
|
||||
Rho_Pi (a22, 25);
|
||||
Rho_Pi (a02, 43);
|
||||
Rho_Pi (a40, 62);
|
||||
Rho_Pi (a24, 18);
|
||||
Rho_Pi (a42, 39);
|
||||
Rho_Pi (a14, 61);
|
||||
Rho_Pi (a11, 20);
|
||||
Rho_Pi (a01, 44);
|
||||
|
||||
// Chi
|
||||
|
||||
bc0 = a00; bc1 = a01; bc2 = a02; bc3 = a03; bc4 = a04;
|
||||
a00 ^= ~bc1 & bc2; a01 ^= ~bc2 & bc3; a02 ^= ~bc3 & bc4; a03 ^= ~bc4 & bc0; a04 ^= ~bc0 & bc1;
|
||||
|
||||
bc0 = a10; bc1 = a11; bc2 = a12; bc3 = a13; bc4 = a14;
|
||||
a10 ^= ~bc1 & bc2; a11 ^= ~bc2 & bc3; a12 ^= ~bc3 & bc4; a13 ^= ~bc4 & bc0; a14 ^= ~bc0 & bc1;
|
||||
|
||||
bc0 = a20; bc1 = a21; bc2 = a22; bc3 = a23; bc4 = a24;
|
||||
a20 ^= ~bc1 & bc2; a21 ^= ~bc2 & bc3; a22 ^= ~bc3 & bc4; a23 ^= ~bc4 & bc0; a24 ^= ~bc0 & bc1;
|
||||
|
||||
bc0 = a30; bc1 = a31; bc2 = a32; bc3 = a33; bc4 = a34;
|
||||
a30 ^= ~bc1 & bc2; a31 ^= ~bc2 & bc3; a32 ^= ~bc3 & bc4; a33 ^= ~bc4 & bc0; a34 ^= ~bc0 & bc1;
|
||||
|
||||
bc0 = a40; bc1 = a41; bc2 = a42; bc3 = a43; bc4 = a44;
|
||||
a40 ^= ~bc1 & bc2; a41 ^= ~bc2 & bc3; a42 ^= ~bc3 & bc4; a43 ^= ~bc4 & bc0; a44 ^= ~bc0 & bc1;
|
||||
|
||||
// Iota
|
||||
|
||||
a00 ^= keccakf_rndc[round];
|
||||
}
|
||||
|
||||
// Theta
|
||||
|
||||
u64x bc0 = a00 ^ a10 ^ a20 ^ a30 ^ a40;
|
||||
u64x bc1 = a01 ^ a11 ^ a21 ^ a31 ^ a41;
|
||||
u64x bc2 = a02 ^ a12 ^ a22 ^ a32 ^ a42;
|
||||
u64x bc3 = a03 ^ a13 ^ a23 ^ a33 ^ a43;
|
||||
u64x bc4 = a04 ^ a14 ^ a24 ^ a34 ^ a44;
|
||||
|
||||
u64x t;
|
||||
|
||||
t = bc4 ^ rotl64 (bc1, 1); a00 ^= t; a10 ^= t; a20 ^= t; a30 ^= t;
|
||||
t = bc0 ^ rotl64 (bc2, 1); a21 ^= t; a31 ^= t; a41 ^= t;
|
||||
t = bc1 ^ rotl64 (bc3, 1); a02 ^= t; a12 ^= t; a22 ^= t; a32 ^= t;
|
||||
t = bc2 ^ rotl64 (bc4, 1); a03 ^= t; a13 ^= t; a23 ^= t; a33 ^= t; a43 ^= t;
|
||||
t = bc3 ^ rotl64 (bc0, 1); a04 ^= t; a34 ^= t; a44 ^= t;
|
||||
|
||||
// Rho Pi
|
||||
|
||||
t = a01;
|
||||
|
||||
Rho_Pi (a20, 1);
|
||||
Rho_Pi (a12, 3);
|
||||
Rho_Pi (a21, 6);
|
||||
Rho_Pi (a32, 10);
|
||||
Rho_Pi (a33, 15);
|
||||
Rho_Pi (a03, 21);
|
||||
Rho_Pi (a10, 28);
|
||||
Rho_Pi (a31, 36);
|
||||
Rho_Pi (a13, 45);
|
||||
Rho_Pi (a41, 55);
|
||||
Rho_Pi (a44, 2);
|
||||
Rho_Pi (a04, 14);
|
||||
Rho_Pi (a30, 27);
|
||||
Rho_Pi (a43, 41);
|
||||
Rho_Pi (a34, 56);
|
||||
Rho_Pi (a23, 8);
|
||||
Rho_Pi (a22, 25);
|
||||
Rho_Pi (a02, 43);
|
||||
|
||||
#undef Rho_Pi
|
||||
|
||||
bc0 = a00;
|
||||
bc2 = a02;
|
||||
bc3 = a03;
|
||||
bc4 = a04;
|
||||
|
||||
a02 ^= ~bc3 & bc4;
|
||||
a03 ^= ~bc4 & bc0;
|
||||
|
||||
const u32x r0 = l32_from_64 (a03);
|
||||
const u32x r1 = h32_from_64 (a03);
|
||||
const u32x r2 = l32_from_64 (a02);
|
||||
const u32x r3 = h32_from_64 (a02);
|
||||
|
||||
COMPARE_M_SIMD (r0, r1, r2, r3);
|
||||
}
|
||||
}
|
||||
|
||||
DECLSPEC void m17600s (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, __global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset)
|
||||
{
|
||||
/**
|
||||
* modifier
|
||||
*/
|
||||
|
||||
const u64 gid = get_global_id (0);
|
||||
const u64 lid = get_local_id (0);
|
||||
|
||||
/**
|
||||
* digest
|
||||
*/
|
||||
|
||||
const u32 search[4] =
|
||||
{
|
||||
digests_buf[digests_offset].digest_buf[DGST_R0],
|
||||
digests_buf[digests_offset].digest_buf[DGST_R1],
|
||||
digests_buf[digests_offset].digest_buf[DGST_R2],
|
||||
digests_buf[digests_offset].digest_buf[DGST_R3]
|
||||
};
|
||||
|
||||
/**
|
||||
* loop
|
||||
*/
|
||||
|
||||
u32 w0l = w0[0];
|
||||
|
||||
for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE)
|
||||
{
|
||||
const u32x w0r = ix_create_bft (bfs_buf, il_pos);
|
||||
|
||||
const u32x w0lr = w0l | w0r;
|
||||
|
||||
/**
|
||||
* Keccak
|
||||
*/
|
||||
|
||||
u64x a00 = hl32_to_64 (w0[1], w0lr);
|
||||
u64x a01 = hl32_to_64 (w0[3], w0[2]);
|
||||
u64x a02 = hl32_to_64 (w1[1], w1[0]);
|
||||
u64x a03 = hl32_to_64 (w1[3], w1[2]);
|
||||
u64x a04 = hl32_to_64 (w2[1], w2[0]);
|
||||
u64x a10 = hl32_to_64 (w2[3], w2[2]);
|
||||
u64x a11 = hl32_to_64 (w3[1], w3[0]);
|
||||
u64x a12 = hl32_to_64 (w3[3], w3[2]);
|
||||
u64x a13 = 0x8000000000000000;
|
||||
u64x a14 = 0;
|
||||
u64x a20 = 0;
|
||||
u64x a21 = 0;
|
||||
u64x a22 = 0;
|
||||
u64x a23 = 0;
|
||||
u64x a24 = 0;
|
||||
u64x a30 = 0;
|
||||
u64x a31 = 0;
|
||||
u64x a32 = 0;
|
||||
u64x a33 = 0;
|
||||
u64x a34 = 0;
|
||||
u64x a40 = 0;
|
||||
u64x a41 = 0;
|
||||
u64x a42 = 0;
|
||||
u64x a43 = 0;
|
||||
u64x a44 = 0;
|
||||
|
||||
#define Rho_Pi(ad,r) \
|
||||
bc0 = ad; \
|
||||
ad = rotl64 (t, r); \
|
||||
t = bc0; \
|
||||
|
||||
#ifdef _unroll
|
||||
#pragma unroll
|
||||
#endif
|
||||
for (int round = 0; round < KECCAK_ROUNDS - 1; round++)
|
||||
{
|
||||
// Theta
|
||||
|
||||
u64x bc0 = a00 ^ a10 ^ a20 ^ a30 ^ a40;
|
||||
u64x bc1 = a01 ^ a11 ^ a21 ^ a31 ^ a41;
|
||||
u64x bc2 = a02 ^ a12 ^ a22 ^ a32 ^ a42;
|
||||
u64x bc3 = a03 ^ a13 ^ a23 ^ a33 ^ a43;
|
||||
u64x bc4 = a04 ^ a14 ^ a24 ^ a34 ^ a44;
|
||||
|
||||
u64x t;
|
||||
|
||||
t = bc4 ^ rotl64 (bc1, 1); a00 ^= t; a10 ^= t; a20 ^= t; a30 ^= t; a40 ^= t;
|
||||
t = bc0 ^ rotl64 (bc2, 1); a01 ^= t; a11 ^= t; a21 ^= t; a31 ^= t; a41 ^= t;
|
||||
t = bc1 ^ rotl64 (bc3, 1); a02 ^= t; a12 ^= t; a22 ^= t; a32 ^= t; a42 ^= t;
|
||||
t = bc2 ^ rotl64 (bc4, 1); a03 ^= t; a13 ^= t; a23 ^= t; a33 ^= t; a43 ^= t;
|
||||
t = bc3 ^ rotl64 (bc0, 1); a04 ^= t; a14 ^= t; a24 ^= t; a34 ^= t; a44 ^= t;
|
||||
|
||||
// Rho Pi
|
||||
|
||||
t = a01;
|
||||
|
||||
Rho_Pi (a20, 1);
|
||||
Rho_Pi (a12, 3);
|
||||
Rho_Pi (a21, 6);
|
||||
Rho_Pi (a32, 10);
|
||||
Rho_Pi (a33, 15);
|
||||
Rho_Pi (a03, 21);
|
||||
Rho_Pi (a10, 28);
|
||||
Rho_Pi (a31, 36);
|
||||
Rho_Pi (a13, 45);
|
||||
Rho_Pi (a41, 55);
|
||||
Rho_Pi (a44, 2);
|
||||
Rho_Pi (a04, 14);
|
||||
Rho_Pi (a30, 27);
|
||||
Rho_Pi (a43, 41);
|
||||
Rho_Pi (a34, 56);
|
||||
Rho_Pi (a23, 8);
|
||||
Rho_Pi (a22, 25);
|
||||
Rho_Pi (a02, 43);
|
||||
Rho_Pi (a40, 62);
|
||||
Rho_Pi (a24, 18);
|
||||
Rho_Pi (a42, 39);
|
||||
Rho_Pi (a14, 61);
|
||||
Rho_Pi (a11, 20);
|
||||
Rho_Pi (a01, 44);
|
||||
|
||||
// Chi
|
||||
|
||||
bc0 = a00; bc1 = a01; bc2 = a02; bc3 = a03; bc4 = a04;
|
||||
a00 ^= ~bc1 & bc2; a01 ^= ~bc2 & bc3; a02 ^= ~bc3 & bc4; a03 ^= ~bc4 & bc0; a04 ^= ~bc0 & bc1;
|
||||
|
||||
bc0 = a10; bc1 = a11; bc2 = a12; bc3 = a13; bc4 = a14;
|
||||
a10 ^= ~bc1 & bc2; a11 ^= ~bc2 & bc3; a12 ^= ~bc3 & bc4; a13 ^= ~bc4 & bc0; a14 ^= ~bc0 & bc1;
|
||||
|
||||
bc0 = a20; bc1 = a21; bc2 = a22; bc3 = a23; bc4 = a24;
|
||||
a20 ^= ~bc1 & bc2; a21 ^= ~bc2 & bc3; a22 ^= ~bc3 & bc4; a23 ^= ~bc4 & bc0; a24 ^= ~bc0 & bc1;
|
||||
|
||||
bc0 = a30; bc1 = a31; bc2 = a32; bc3 = a33; bc4 = a34;
|
||||
a30 ^= ~bc1 & bc2; a31 ^= ~bc2 & bc3; a32 ^= ~bc3 & bc4; a33 ^= ~bc4 & bc0; a34 ^= ~bc0 & bc1;
|
||||
|
||||
bc0 = a40; bc1 = a41; bc2 = a42; bc3 = a43; bc4 = a44;
|
||||
a40 ^= ~bc1 & bc2; a41 ^= ~bc2 & bc3; a42 ^= ~bc3 & bc4; a43 ^= ~bc4 & bc0; a44 ^= ~bc0 & bc1;
|
||||
|
||||
// Iota
|
||||
|
||||
a00 ^= keccakf_rndc[round];
|
||||
}
|
||||
|
||||
// Theta
|
||||
|
||||
u64x bc0 = a00 ^ a10 ^ a20 ^ a30 ^ a40;
|
||||
u64x bc1 = a01 ^ a11 ^ a21 ^ a31 ^ a41;
|
||||
u64x bc2 = a02 ^ a12 ^ a22 ^ a32 ^ a42;
|
||||
u64x bc3 = a03 ^ a13 ^ a23 ^ a33 ^ a43;
|
||||
u64x bc4 = a04 ^ a14 ^ a24 ^ a34 ^ a44;
|
||||
|
||||
u64x t;
|
||||
|
||||
t = bc4 ^ rotl64 (bc1, 1); a00 ^= t; a10 ^= t; a20 ^= t; a30 ^= t;
|
||||
t = bc0 ^ rotl64 (bc2, 1); a21 ^= t; a31 ^= t; a41 ^= t;
|
||||
t = bc1 ^ rotl64 (bc3, 1); a02 ^= t; a12 ^= t; a22 ^= t; a32 ^= t;
|
||||
t = bc2 ^ rotl64 (bc4, 1); a03 ^= t; a13 ^= t; a23 ^= t; a33 ^= t; a43 ^= t;
|
||||
t = bc3 ^ rotl64 (bc0, 1); a04 ^= t; a34 ^= t; a44 ^= t;
|
||||
|
||||
// Rho Pi
|
||||
|
||||
t = a01;
|
||||
|
||||
Rho_Pi (a20, 1);
|
||||
Rho_Pi (a12, 3);
|
||||
Rho_Pi (a21, 6);
|
||||
Rho_Pi (a32, 10);
|
||||
Rho_Pi (a33, 15);
|
||||
Rho_Pi (a03, 21);
|
||||
Rho_Pi (a10, 28);
|
||||
Rho_Pi (a31, 36);
|
||||
Rho_Pi (a13, 45);
|
||||
Rho_Pi (a41, 55);
|
||||
Rho_Pi (a44, 2);
|
||||
Rho_Pi (a04, 14);
|
||||
Rho_Pi (a30, 27);
|
||||
Rho_Pi (a43, 41);
|
||||
Rho_Pi (a34, 56);
|
||||
Rho_Pi (a23, 8);
|
||||
Rho_Pi (a22, 25);
|
||||
Rho_Pi (a02, 43);
|
||||
|
||||
#undef Rho_Pi
|
||||
|
||||
bc0 = a00;
|
||||
bc2 = a02;
|
||||
bc3 = a03;
|
||||
bc4 = a04;
|
||||
|
||||
a02 ^= ~bc3 & bc4;
|
||||
a03 ^= ~bc4 & bc0;
|
||||
|
||||
const u32x r0 = l32_from_64 (a03);
|
||||
const u32x r1 = h32_from_64 (a03);
|
||||
const u32x r2 = l32_from_64 (a02);
|
||||
const u32x r3 = h32_from_64 (a02);
|
||||
|
||||
COMPARE_S_SIMD (r0, r1, r2, r3);
|
||||
}
|
||||
}
|
||||
|
||||
__kernel void m18000_m04 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
|
||||
{
|
||||
/**
|
||||
* base
|
||||
*/
|
||||
|
||||
const u64 gid = get_global_id (0);
|
||||
|
||||
if (gid >= gid_max) return;
|
||||
|
||||
u32 w0[4];
|
||||
|
||||
w0[0] = pws[gid].i[ 0];
|
||||
w0[1] = pws[gid].i[ 1];
|
||||
w0[2] = pws[gid].i[ 2];
|
||||
w0[3] = pws[gid].i[ 3];
|
||||
|
||||
u32 w1[4];
|
||||
|
||||
w1[0] = 0;
|
||||
w1[1] = 0;
|
||||
w1[2] = 0;
|
||||
w1[3] = 0;
|
||||
|
||||
u32 w2[4];
|
||||
|
||||
w2[0] = 0;
|
||||
w2[1] = 0;
|
||||
w2[2] = 0;
|
||||
w2[3] = 0;
|
||||
|
||||
u32 w3[4];
|
||||
|
||||
w3[0] = 0;
|
||||
w3[1] = 0;
|
||||
w3[2] = 0;
|
||||
w3[3] = 0;
|
||||
|
||||
const u32 pw_len = pws[gid].pw_len;
|
||||
|
||||
/**
|
||||
* main
|
||||
*/
|
||||
|
||||
m17600m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV0_buf, d_scryptV1_buf, d_scryptV2_buf, d_scryptV3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset);
|
||||
}
|
||||
|
||||
__kernel void m18000_m08 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
|
||||
{
|
||||
/**
|
||||
* base
|
||||
*/
|
||||
|
||||
const u64 gid = get_global_id (0);
|
||||
|
||||
if (gid >= gid_max) return;
|
||||
|
||||
u32 w0[4];
|
||||
|
||||
w0[0] = pws[gid].i[ 0];
|
||||
w0[1] = pws[gid].i[ 1];
|
||||
w0[2] = pws[gid].i[ 2];
|
||||
w0[3] = pws[gid].i[ 3];
|
||||
|
||||
u32 w1[4];
|
||||
|
||||
w1[0] = pws[gid].i[ 4];
|
||||
w1[1] = pws[gid].i[ 5];
|
||||
w1[2] = pws[gid].i[ 6];
|
||||
w1[3] = pws[gid].i[ 7];
|
||||
|
||||
u32 w2[4];
|
||||
|
||||
w2[0] = 0;
|
||||
w2[1] = 0;
|
||||
w2[2] = 0;
|
||||
w2[3] = 0;
|
||||
|
||||
u32 w3[4];
|
||||
|
||||
w3[0] = 0;
|
||||
w3[1] = 0;
|
||||
w3[2] = 0;
|
||||
w3[3] = 0;
|
||||
|
||||
const u32 pw_len = pws[gid].pw_len;
|
||||
|
||||
/**
|
||||
* main
|
||||
*/
|
||||
|
||||
m17600m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV0_buf, d_scryptV1_buf, d_scryptV2_buf, d_scryptV3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset);
|
||||
}
|
||||
|
||||
__kernel void m18000_m16 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
|
||||
{
|
||||
/**
|
||||
* base
|
||||
*/
|
||||
|
||||
const u64 gid = get_global_id (0);
|
||||
|
||||
if (gid >= gid_max) return;
|
||||
|
||||
u32 w0[4];
|
||||
|
||||
w0[0] = pws[gid].i[ 0];
|
||||
w0[1] = pws[gid].i[ 1];
|
||||
w0[2] = pws[gid].i[ 2];
|
||||
w0[3] = pws[gid].i[ 3];
|
||||
|
||||
u32 w1[4];
|
||||
|
||||
w1[0] = pws[gid].i[ 4];
|
||||
w1[1] = pws[gid].i[ 5];
|
||||
w1[2] = pws[gid].i[ 6];
|
||||
w1[3] = pws[gid].i[ 7];
|
||||
|
||||
u32 w2[4];
|
||||
|
||||
w2[0] = pws[gid].i[ 8];
|
||||
w2[1] = pws[gid].i[ 9];
|
||||
w2[2] = pws[gid].i[10];
|
||||
w2[3] = pws[gid].i[11];
|
||||
|
||||
u32 w3[4];
|
||||
|
||||
w3[0] = pws[gid].i[12];
|
||||
w3[1] = pws[gid].i[13];
|
||||
w3[2] = 0;
|
||||
w3[3] = 0;
|
||||
|
||||
const u32 pw_len = pws[gid].pw_len;
|
||||
|
||||
/**
|
||||
* main
|
||||
*/
|
||||
|
||||
m17600m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV0_buf, d_scryptV1_buf, d_scryptV2_buf, d_scryptV3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset);
|
||||
}
|
||||
|
||||
__kernel void m18000_s04 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
|
||||
{
|
||||
/**
|
||||
* base
|
||||
*/
|
||||
|
||||
const u64 gid = get_global_id (0);
|
||||
|
||||
if (gid >= gid_max) return;
|
||||
|
||||
u32 w0[4];
|
||||
|
||||
w0[0] = pws[gid].i[ 0];
|
||||
w0[1] = pws[gid].i[ 1];
|
||||
w0[2] = pws[gid].i[ 2];
|
||||
w0[3] = pws[gid].i[ 3];
|
||||
|
||||
u32 w1[4];
|
||||
|
||||
w1[0] = 0;
|
||||
w1[1] = 0;
|
||||
w1[2] = 0;
|
||||
w1[3] = 0;
|
||||
|
||||
u32 w2[4];
|
||||
|
||||
w2[0] = 0;
|
||||
w2[1] = 0;
|
||||
w2[2] = 0;
|
||||
w2[3] = 0;
|
||||
|
||||
u32 w3[4];
|
||||
|
||||
w3[0] = 0;
|
||||
w3[1] = 0;
|
||||
w3[2] = 0;
|
||||
w3[3] = 0;
|
||||
|
||||
const u32 pw_len = pws[gid].pw_len;
|
||||
|
||||
/**
|
||||
* main
|
||||
*/
|
||||
|
||||
m17600s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV0_buf, d_scryptV1_buf, d_scryptV2_buf, d_scryptV3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset);
|
||||
}
|
||||
|
||||
__kernel void m18000_s08 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
|
||||
{
|
||||
/**
|
||||
* base
|
||||
*/
|
||||
|
||||
const u64 gid = get_global_id (0);
|
||||
|
||||
if (gid >= gid_max) return;
|
||||
|
||||
u32 w0[4];
|
||||
|
||||
w0[0] = pws[gid].i[ 0];
|
||||
w0[1] = pws[gid].i[ 1];
|
||||
w0[2] = pws[gid].i[ 2];
|
||||
w0[3] = pws[gid].i[ 3];
|
||||
|
||||
u32 w1[4];
|
||||
|
||||
w1[0] = pws[gid].i[ 4];
|
||||
w1[1] = pws[gid].i[ 5];
|
||||
w1[2] = pws[gid].i[ 6];
|
||||
w1[3] = pws[gid].i[ 7];
|
||||
|
||||
u32 w2[4];
|
||||
|
||||
w2[0] = 0;
|
||||
w2[1] = 0;
|
||||
w2[2] = 0;
|
||||
w2[3] = 0;
|
||||
|
||||
u32 w3[4];
|
||||
|
||||
w3[0] = 0;
|
||||
w3[1] = 0;
|
||||
w3[2] = 0;
|
||||
w3[3] = 0;
|
||||
|
||||
const u32 pw_len = pws[gid].pw_len;
|
||||
|
||||
/**
|
||||
* main
|
||||
*/
|
||||
|
||||
m17600s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV0_buf, d_scryptV1_buf, d_scryptV2_buf, d_scryptV3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset);
|
||||
}
|
||||
|
||||
__kernel void m18000_s16 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
|
||||
{
|
||||
/**
|
||||
* base
|
||||
*/
|
||||
|
||||
const u64 gid = get_global_id (0);
|
||||
|
||||
if (gid >= gid_max) return;
|
||||
|
||||
u32 w0[4];
|
||||
|
||||
w0[0] = pws[gid].i[ 0];
|
||||
w0[1] = pws[gid].i[ 1];
|
||||
w0[2] = pws[gid].i[ 2];
|
||||
w0[3] = pws[gid].i[ 3];
|
||||
|
||||
u32 w1[4];
|
||||
|
||||
w1[0] = pws[gid].i[ 4];
|
||||
w1[1] = pws[gid].i[ 5];
|
||||
w1[2] = pws[gid].i[ 6];
|
||||
w1[3] = pws[gid].i[ 7];
|
||||
|
||||
u32 w2[4];
|
||||
|
||||
w2[0] = pws[gid].i[ 8];
|
||||
w2[1] = pws[gid].i[ 9];
|
||||
w2[2] = pws[gid].i[10];
|
||||
w2[3] = pws[gid].i[11];
|
||||
|
||||
u32 w3[4];
|
||||
|
||||
w3[0] = pws[gid].i[12];
|
||||
w3[1] = pws[gid].i[13];
|
||||
w3[2] = 0;
|
||||
w3[3] = 0;
|
||||
|
||||
const u32 pw_len = pws[gid].pw_len;
|
||||
|
||||
/**
|
||||
* main
|
||||
*/
|
||||
|
||||
m17600s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV0_buf, d_scryptV1_buf, d_scryptV2_buf, d_scryptV3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset);
|
||||
}
|
1
deps/OpenCL-Headers/CL
vendored
1
deps/OpenCL-Headers/CL
vendored
@ -1 +0,0 @@
|
||||
Subproject commit bf0f43b76f4556c3d5717f8ba8a01216b27f4af7
|
1
deps/git/OpenCL-Headers
vendored
Submodule
1
deps/git/OpenCL-Headers
vendored
Submodule
@ -0,0 +1 @@
|
||||
Subproject commit d5169245693563d4c69434ba061d92d3d68c4123
|
@ -6,12 +6,26 @@
|
||||
|
||||
- Add new option --slow-candidates which allows hashcat to generate passwords on-host
|
||||
|
||||
##
|
||||
## Algorithms
|
||||
##
|
||||
|
||||
- Added hash-mode 17300 = SHA3-224
|
||||
- Added hash-mode 17400 = SHA3-256
|
||||
- Added hash-mode 17500 = SHA3-384
|
||||
- Added hash-mode 17600 = SHA3-512
|
||||
- Added hash-mode 17700 = Keccak-224
|
||||
- Added hash-mode 17800 = Keccak-256
|
||||
- Added hash-mode 17900 = Keccak-384
|
||||
- Added hash-mode 18000 = Keccak-512
|
||||
- Removed hash-mode 5000 = SHA-3 (Keccak)
|
||||
|
||||
##
|
||||
## Improvements
|
||||
##
|
||||
|
||||
- Workaround some AMD OpenCL runtime segmentation faults
|
||||
- Allow bitcoin master key length not be exactly 96 byte a multiple of 16
|
||||
- Allow bitcoin master key lengths different from 96 bytes, but they must be always a multiple of 16
|
||||
- Getting rid of OPTS_TYPE_HASH_COPY for Ansible Vault
|
||||
- Add a tracker for salts, amplifier and iterations to status screen
|
||||
- Add option --markov-hcstat2 to make it clear that the new hcstat2 format (compressed hcstat2gen output) must be used
|
||||
@ -21,6 +35,7 @@
|
||||
- Added additional hybrid "passthrough" rules, to enable variable-length append/prepend attacks
|
||||
- Increased the maximum size of edata2 in Kerberos 5 TGS-REP etype 23
|
||||
- Allow hashfile for -m 16800 to be used with -m 16801
|
||||
- Make the masks parser more restrictive by rejecting a single '?' at the end of the mask (use ?? instead)
|
||||
|
||||
##
|
||||
## Bugs
|
||||
|
@ -41,11 +41,18 @@ NVIDIA GPUs require "NVIDIA Driver" (367.x or later)
|
||||
- MD5
|
||||
- Half MD5
|
||||
- SHA1
|
||||
- SHA-224
|
||||
- SHA-256
|
||||
- SHA-384
|
||||
- SHA-512
|
||||
- SHA-3 (Keccak)
|
||||
- SHA2-224
|
||||
- SHA2-256
|
||||
- SHA2-384
|
||||
- SHA2-512
|
||||
- SHA3-224
|
||||
- SHA3-256
|
||||
- SHA3-384
|
||||
- SHA3-512
|
||||
- Keccak-224
|
||||
- Keccak-256
|
||||
- Keccak-384
|
||||
- Keccak-512
|
||||
- BLAKE2b-512
|
||||
- SipHash
|
||||
- RIPEMD-160
|
||||
|
@ -176,7 +176,7 @@ _hashcat ()
|
||||
{
|
||||
local VERSION=4.2.1
|
||||
|
||||
local HASH_MODES="0 10 11 12 20 21 22 23 30 40 50 60 100 101 110 111 112 120 121 122 124 130 131 132 133 140 141 150 160 200 300 400 500 501 600 900 1000 1100 1400 1410 1411 1420 1421 1430 1440 1441 1450 1460 1500 1600 1700 1710 1711 1720 1722 1730 1731 1740 1750 1760 1800 2100 2400 2410 2500 2501 2600 2611 2612 2711 2811 3000 3100 3200 3710 3711 3800 3910 4010 4110 4300 4400 4500 4520 4521 4522 4700 4800 4900 5000 5100 5200 5300 5400 5500 5600 5700 5800 6000 6100 6211 6212 6213 6221 6222 6223 6231 6232 6233 6241 6242 6243 6300 6400 6500 6600 6700 6800 6900 7000 7100 7200 7300 7400 7500 7700 7800 7900 8000 8100 8200 8300 8400 8500 8600 8700 8800 8900 9000 9100 9200 9300 9400 9500 9600 9700 9710 9720 9800 9810 9820 9900 10000 10100 10200 10300 10400 10410 10420 10500 10600 10700 10800 10900 11000 11100 11200 11300 11400 11500 11600 11700 11800 11900 12000 12001 12100 12200 12300 12400 12500 12600 12700 12800 12900 13000 13100 13200 13300 13400 13500 13600 13800 13900 14000 14100 14700 14800 14900 15000 15100 15200 15300 15400 15500 15600 15700 15900 16000 16100 16200 16300 16400 16500 16600 16700 16800 16801 16900"
|
||||
local HASH_MODES="0 10 11 12 20 21 22 23 30 40 50 60 100 101 110 111 112 120 121 122 124 130 131 132 133 140 141 150 160 200 300 400 500 501 600 900 1000 1100 1400 1410 1411 1420 1421 1430 1440 1441 1450 1460 1500 1600 1700 1710 1711 1720 1722 1730 1731 1740 1750 1760 1800 2100 2400 2410 2500 2501 2600 2611 2612 2711 2811 3000 3100 3200 3710 3711 3800 3910 4010 4110 4300 4400 4500 4520 4521 4522 4700 4800 4900 5100 5200 5300 5400 5500 5600 5700 5800 6000 6100 6211 6212 6213 6221 6222 6223 6231 6232 6233 6241 6242 6243 6300 6400 6500 6600 6700 6800 6900 7000 7100 7200 7300 7400 7500 7700 7800 7900 8000 8100 8200 8300 8400 8500 8600 8700 8800 8900 9000 9100 9200 9300 9400 9500 9600 9700 9710 9720 9800 9810 9820 9900 10000 10100 10200 10300 10400 10410 10420 10500 10600 10700 10800 10900 11000 11100 11200 11300 11400 11500 11600 11700 11800 11900 12000 12001 12100 12200 12300 12400 12500 12600 12700 12800 12900 13000 13100 13200 13300 13400 13500 13600 13800 13900 14000 14100 14700 14800 14900 15000 15100 15200 15300 15400 15500 15600 15700 15900 16000 16100 16200 16300 16400 16500 16600 16700 16800 16801 16900 17300 17400 17500 17600 17700 17800 17900 18000"
|
||||
local ATTACK_MODES="0 1 3 6 7"
|
||||
local HCCAPX_MESSAGE_PAIRS="0 1 2 3 4 5"
|
||||
local OUTFILE_FORMATS="1 2 3 4 5 6 7 8 9 10 11 12 13 14 15"
|
||||
|
@ -6,6 +6,8 @@
|
||||
#ifndef _EXT_OPENCL_H
|
||||
#define _EXT_OPENCL_H
|
||||
|
||||
#define CL_TARGET_OPENCL_VERSION 120
|
||||
|
||||
#define CL_USE_DEPRECATED_OPENCL_1_2_APIS
|
||||
#define CL_USE_DEPRECATED_OPENCL_2_0_APIS
|
||||
|
||||
|
@ -1060,7 +1060,6 @@ typedef enum hash_type
|
||||
HASH_TYPE_ORACLEH = 13,
|
||||
HASH_TYPE_DESRACF = 14,
|
||||
HASH_TYPE_BCRYPT = 15,
|
||||
HASH_TYPE_KECCAK = 16,
|
||||
HASH_TYPE_NETNTLM = 17,
|
||||
HASH_TYPE_RIPEMD160 = 18,
|
||||
HASH_TYPE_WHIRLPOOL = 19,
|
||||
@ -1186,7 +1185,6 @@ typedef enum kern_type
|
||||
KERN_TYPE_SHA1_MD5 = 4700,
|
||||
KERN_TYPE_MD5_CHAP = 4800,
|
||||
KERN_TYPE_SHA1_SLT_PW_SLT = 4900,
|
||||
KERN_TYPE_KECCAK = 5000,
|
||||
KERN_TYPE_MD5H = 5100,
|
||||
KERN_TYPE_PSAFE3 = 5200,
|
||||
KERN_TYPE_IKEPSK_MD5 = 5300,
|
||||
@ -1325,6 +1323,14 @@ typedef enum kern_type
|
||||
KERN_TYPE_WPA_PMKID_PBKDF2 = 16800,
|
||||
KERN_TYPE_WPA_PMKID_PMK = 16801,
|
||||
KERN_TYPE_ANSIBLE_VAULT = 16900,
|
||||
KERN_TYPE_SHA3_224 = 17300,
|
||||
KERN_TYPE_SHA3_256 = 17400,
|
||||
KERN_TYPE_SHA3_384 = 17500,
|
||||
KERN_TYPE_SHA3_512 = 17600,
|
||||
KERN_TYPE_KECCAK_224 = 17700,
|
||||
KERN_TYPE_KECCAK_256 = 17800,
|
||||
KERN_TYPE_KECCAK_384 = 17900,
|
||||
KERN_TYPE_KECCAK_512 = 18000,
|
||||
KERN_TYPE_TOTP_HMACSHA1 = 18100,
|
||||
KERN_TYPE_PLAINTEXT = 99999,
|
||||
|
||||
@ -1419,7 +1425,10 @@ int des_parse_hash (u8 *input_buf, u32 input_len, hash_t *hash_bu
|
||||
int episerver_parse_hash (u8 *input_buf, u32 input_len, hash_t *hash_buf, MAYBE_UNUSED hashconfig_t *hashconfig);
|
||||
int postgresql_parse_hash (u8 *input_buf, u32 input_len, hash_t *hash_buf, MAYBE_UNUSED hashconfig_t *hashconfig);
|
||||
int netscreen_parse_hash (u8 *input_buf, u32 input_len, hash_t *hash_buf, MAYBE_UNUSED hashconfig_t *hashconfig);
|
||||
int keccak_parse_hash (u8 *input_buf, u32 input_len, hash_t *hash_buf, MAYBE_UNUSED hashconfig_t *hashconfig);
|
||||
int keccak_224_parse_hash (u8 *input_buf, u32 input_len, hash_t *hash_buf, MAYBE_UNUSED hashconfig_t *hashconfig);
|
||||
int keccak_256_parse_hash (u8 *input_buf, u32 input_len, hash_t *hash_buf, MAYBE_UNUSED hashconfig_t *hashconfig);
|
||||
int keccak_384_parse_hash (u8 *input_buf, u32 input_len, hash_t *hash_buf, MAYBE_UNUSED hashconfig_t *hashconfig);
|
||||
int keccak_512_parse_hash (u8 *input_buf, u32 input_len, hash_t *hash_buf, MAYBE_UNUSED hashconfig_t *hashconfig);
|
||||
int blake2b_parse_hash (u8 *input_buf, u32 input_len, hash_t *hash_buf, MAYBE_UNUSED hashconfig_t *hashconfig);
|
||||
int chacha20_parse_hash (u8 *input_buf, u32 input_len, hash_t *hash_buf, MAYBE_UNUSED hashconfig_t *hashconfig);
|
||||
int lm_parse_hash (u8 *input_buf, u32 input_len, hash_t *hash_buf, MAYBE_UNUSED hashconfig_t *hashconfig);
|
||||
|
@ -403,6 +403,7 @@ typedef enum opts_type
|
||||
OPTS_TYPE_AUX4 = (1ULL << 37),
|
||||
OPTS_TYPE_BINARY_HASHFILE = (1ULL << 38),
|
||||
OPTS_TYPE_PREFERED_THREAD = (1ULL << 39), // some algorithms (complicated ones with many branches) benefit from this
|
||||
OPTS_TYPE_PT_ADD06 = (1ULL << 40),
|
||||
|
||||
} opts_type_t;
|
||||
|
||||
@ -724,8 +725,6 @@ typedef struct salt
|
||||
u32 salt_iter2;
|
||||
u32 salt_sign[2];
|
||||
|
||||
u32 keccak_mdlen;
|
||||
|
||||
u32 digests_cnt;
|
||||
u32 digests_done;
|
||||
|
||||
|
@ -118,7 +118,7 @@ endif # MSYS2
|
||||
## You have your own headers somewhere, for example: apt-get install opencl-headers
|
||||
##
|
||||
|
||||
OPENCL_HEADERS_KHRONOS := deps/OpenCL-Headers
|
||||
OPENCL_HEADERS_KHRONOS := deps/git/OpenCL-Headers
|
||||
|
||||
##
|
||||
## Cross compiler paths
|
||||
|
404
src/interface.c
404
src/interface.c
@ -137,7 +137,6 @@ static const char *ST_HASH_04522 = "9038129c474caa3f0de56f38db84033d0fe1d4b8:365
|
||||
static const char *ST_HASH_04700 = "92d85978d884eb1d99a51652b1139c8279fa8663";
|
||||
static const char *ST_HASH_04800 = "aa4aaa1d52319525023c06a4873f4c51:35343534373533343633383832343736:dc";
|
||||
static const char *ST_HASH_04900 = "75d280ca9a0c2ee18729603104ead576d9ca6285:347070";
|
||||
static const char *ST_HASH_05000 = "203f88777f18bb4ee1226627b547808f38d90d3e106262b5de9ca943b57137b6";
|
||||
static const char *ST_HASH_05100 = "8743b52063cd8409";
|
||||
static const char *ST_HASH_05200 = "50575333e4e2a590a5e5c8269f57ec04a8a1c0c03da55b311c51236dab8c6b96b0afca02000800005eaeee20c6cc10d5caa6522b3ca545c41d9133d630ca08f467b7aae8a2bbef51aa2df968d10b9c4cfb17a182c0add7acb8c153794f51337e12f472f451d10e6dcac664ed760606aabdbb6b794a80d6ce2a330100c76de0ff961a45cca21576b893d826c52f272b97cdf48aca6fbe6c74b039f81c61b7d632fb6fddd9f96162ab1effd69a4598a331e855e38792e5365272d4791bf991d248e1585a9ad20ea3d77b5d2ef9a711ef90a70ec6991cb578f1b8bdaa9efa7b0039e9ea96f777491713047bdd99fa1d78f06f23406a66046b387d3034e46b1f84129bba853cc18fa49f107dc0290547258d30566a4b1b363ff4c1c16cb2f5f400059833d4b651bfa508200cbdc7a75fc57ef90eb1d90b0deea8505753332d454f46505753332d454f466236710e2e2477878e738b60d0aa2834a96b01e97764fe980243a06ad16939d1";
|
||||
static const char *ST_HASH_05300 = "50503326cac6e4bd892b8257805b5a59a285f464ad3f63dc01bd0335f8341ef52e00be0b8cb205422a3788f021e4e6e8ccbe34784bc85abe42f62545bac64888426a2f1264fa28cf384ff00b14cfa5eff562dda4fad2a31fd7a6715218cff959916deed856feea5bee2e773241c5fbebf202958f0ce0c432955e0f1f6d1259da:688a7bfa8d5819630a970ed6d27018021a15fbb3e2fdcc36ce9b563d8ff95f510c4b3236c014d1cde9c2f1a999b121bc3ab1bc8049c8ac1e8c167a84f53c867492723eb01ab4b38074b38f4297d6fea8f44e01ea828fce33c433430938b1551f60673ce8088e7d2f41e3b49315344046fefee1e3860064331417562761db3ba4:c66606d691eaade4:8bdc88a2cdb4a1cf:c3b13137fae9f66684d98709939e5c3454ee31a98c80a1c76427d805b5dea866eff045515e8fb42dd259b9448caba9d937f4b3b75ec1b092a92232b4c8c1e70a60a52076e907f887b731d0f66e19e09b535238169c74c04a4b393f9b815c54eef4558cd8a22c9018bb4f24ee6db0e32979f9a353361cdba948f9027551ee40b1c96ba81c28aa3e1a0fac105dc469efa83f6d3ee281b945c6fa8b4677bac26dda:53f757c5b08afad6:aa02d9289e1702e5d7ed1e4ebf35ab31c2688e00:aab8580015cf545ac0b7291d15a4f2c79e06defd:944a0df3939f3bd281c9d05fbc0e3d30";
|
||||
@ -281,6 +280,14 @@ static const char *ST_HASH_16700 = "$fvde$1$16$84286044060108438487434858307513$
|
||||
static const char *ST_HASH_16800 = "2582a8281bf9d4308d6f5731d0e61c61*4604ba734d4e*89acf0e761f4*ed487162465a774bfba60eb603a39f3a";
|
||||
static const char *ST_HASH_16801 = "2582a8281bf9d4308d6f5731d0e61c61*4604ba734d4e*89acf0e761f4";
|
||||
static const char *ST_HASH_16900 = "$ansible$0*0*6b761adc6faeb0cc0bf197d3d4a4a7d3f1682e4b169cae8fa6b459b3214ed41e*426d313c5809d4a80a4b9bc7d4823070*d8bad190c7fbc7c3cb1c60a27abfb0ff59d6fb73178681c7454d94a0f56a4360";
|
||||
static const char *ST_HASH_17300 = "412ef78534ba6ab0e9b1607d3e9767a25c1ea9d5e83176b4c2817a6c";
|
||||
static const char *ST_HASH_17400 = "d60fcf6585da4e17224f58858970f0ed5ab042c3916b76b0b828e62eaf636cbd";
|
||||
static const char *ST_HASH_17500 = "983ba28532cc6320d04f20fa485bcedb38bddb666eca5f1e5aa279ff1c6244fe5f83cf4bbf05b95ff378dd2353617221";
|
||||
static const char *ST_HASH_17600 = "7c2dc1d743735d4e069f3bda85b1b7e9172033dfdd8cd599ca094ef8570f3930c3f2c0b7afc8d6152ce4eaad6057a2ff22e71934b3a3dd0fb55a7fc84a53144e";
|
||||
static const char *ST_HASH_17700 = "e1dfad9bafeae6ef15f5bbb16cf4c26f09f5f1e7870581962fc84636";
|
||||
static const char *ST_HASH_17800 = "203f88777f18bb4ee1226627b547808f38d90d3e106262b5de9ca943b57137b6";
|
||||
static const char *ST_HASH_17900 = "5804b7ada5806ba79540100e9a7ef493654ff2a21d94d4f2ce4bf69abda5d94bf03701fe9525a15dfdc625bfbd769701";
|
||||
static const char *ST_HASH_18000 = "2fbf5c9080f0a704de2e915ba8fdae6ab00bbc026b2c1c8fa07da1239381c6b7f4dfd399bf9652500da723694a4c719587dd0219cb30eabe61210a8ae4dc0b03";
|
||||
static const char *ST_HASH_18100 = "597056:3600";
|
||||
static const char *ST_HASH_99999 = "hashcat";
|
||||
|
||||
@ -369,8 +376,8 @@ static const char *HT_00600 = "BLAKE2b";
|
||||
static const char *HT_00900 = "MD4";
|
||||
static const char *HT_01000 = "NTLM";
|
||||
static const char *HT_01100 = "Domain Cached Credentials (DCC), MS Cache";
|
||||
static const char *HT_01300 = "SHA-224";
|
||||
static const char *HT_01400 = "SHA-256";
|
||||
static const char *HT_01300 = "SHA2-224";
|
||||
static const char *HT_01400 = "SHA2-256";
|
||||
static const char *HT_01410 = "sha256($pass.$salt)";
|
||||
static const char *HT_01420 = "sha256($salt.$pass)";
|
||||
static const char *HT_01430 = "sha256(utf16le($pass).$salt)";
|
||||
@ -379,7 +386,7 @@ static const char *HT_01450 = "HMAC-SHA256 (key = $pass)";
|
||||
static const char *HT_01460 = "HMAC-SHA256 (key = $salt)";
|
||||
static const char *HT_01500 = "descrypt, DES (Unix), Traditional DES";
|
||||
static const char *HT_01600 = "Apache $apr1$ MD5, md5apr1, MD5 (APR)";
|
||||
static const char *HT_01700 = "SHA-512";
|
||||
static const char *HT_01700 = "SHA2-512";
|
||||
static const char *HT_01710 = "sha512($pass.$salt)";
|
||||
static const char *HT_01720 = "sha512($salt.$pass)";
|
||||
static const char *HT_01730 = "sha512(utf16le($pass).$salt)";
|
||||
@ -409,7 +416,6 @@ static const char *HT_04520 = "sha1($salt.sha1($pass))";
|
||||
static const char *HT_04700 = "sha1(md5($pass))";
|
||||
static const char *HT_04800 = "iSCSI CHAP authentication, MD5(CHAP)";
|
||||
static const char *HT_04900 = "sha1($salt.$pass.$salt)";
|
||||
static const char *HT_05000 = "SHA-3 (Keccak)";
|
||||
static const char *HT_05100 = "Half MD5";
|
||||
static const char *HT_05200 = "Password Safe v3";
|
||||
static const char *HT_05300 = "IKE-PSK MD5";
|
||||
@ -472,7 +478,7 @@ static const char *HT_10420 = "PDF 1.1 - 1.3 (Acrobat 2 - 4), collider #2";
|
||||
static const char *HT_10500 = "PDF 1.4 - 1.6 (Acrobat 5 - 8)";
|
||||
static const char *HT_10600 = "PDF 1.7 Level 3 (Acrobat 9)";
|
||||
static const char *HT_10700 = "PDF 1.7 Level 8 (Acrobat 10 - 11)";
|
||||
static const char *HT_10800 = "SHA-384";
|
||||
static const char *HT_10800 = "SHA2-384";
|
||||
static const char *HT_10900 = "PBKDF2-HMAC-SHA256";
|
||||
static const char *HT_11000 = "PrestaShop";
|
||||
static const char *HT_11100 = "PostgreSQL CRAM (MD5)";
|
||||
@ -530,6 +536,14 @@ static const char *HT_16700 = "FileVault 2";
|
||||
static const char *HT_16800 = "WPA-PMKID-PBKDF2";
|
||||
static const char *HT_16801 = "WPA-PMKID-PMK";
|
||||
static const char *HT_16900 = "Ansible Vault";
|
||||
static const char *HT_17300 = "SHA3-224";
|
||||
static const char *HT_17400 = "SHA3-256";
|
||||
static const char *HT_17500 = "SHA3-384";
|
||||
static const char *HT_17600 = "SHA3-512";
|
||||
static const char *HT_17700 = "Keccak-224";
|
||||
static const char *HT_17800 = "Keccak-256";
|
||||
static const char *HT_17900 = "Keccak-384";
|
||||
static const char *HT_18000 = "Keccak-512";
|
||||
static const char *HT_18100 = "TOTP (HMAC-SHA1)";
|
||||
static const char *HT_99999 = "Plaintext";
|
||||
|
||||
@ -6222,18 +6236,16 @@ int sha512crypt_parse_hash (u8 *input_buf, u32 input_len, hash_t *hash_buf, MAYB
|
||||
return (PARSER_OK);
|
||||
}
|
||||
|
||||
int keccak_parse_hash (u8 *input_buf, u32 input_len, hash_t *hash_buf, MAYBE_UNUSED hashconfig_t *hashconfig)
|
||||
int keccak_224_parse_hash (u8 *input_buf, u32 input_len, hash_t *hash_buf, MAYBE_UNUSED hashconfig_t *hashconfig)
|
||||
{
|
||||
u64 *digest = (u64 *) hash_buf->digest;
|
||||
|
||||
salt_t *salt = hash_buf->salt;
|
||||
u32 *digest = (u32 *) hash_buf->digest;
|
||||
|
||||
token_t token;
|
||||
|
||||
token.token_cnt = 1;
|
||||
|
||||
token.len_min[0] = 16;
|
||||
token.len_max[0] = 400;
|
||||
token.len_min[0] = 56;
|
||||
token.len_max[0] = 56;
|
||||
token.attr[0] = TOKEN_ATTR_FIXED_LENGTH
|
||||
| TOKEN_ATTR_VERIFY_HEX;
|
||||
|
||||
@ -6244,16 +6256,111 @@ int keccak_parse_hash (u8 *input_buf, u32 input_len, hash_t *hash_buf, MAYBE_UNU
|
||||
u8 *hash_pos = token.buf[0];
|
||||
int hash_len = token.len[0];
|
||||
|
||||
if (hash_len % 16) return (PARSER_GLOBAL_LENGTH);
|
||||
if (hash_len != 56) return (PARSER_GLOBAL_LENGTH);
|
||||
|
||||
u32 keccak_mdlen = hash_len / 2;
|
||||
digest[0] = hex_to_u32 (hash_pos + 0);
|
||||
digest[1] = hex_to_u32 (hash_pos + 8);
|
||||
digest[2] = hex_to_u32 (hash_pos + 16);
|
||||
digest[3] = hex_to_u32 (hash_pos + 24);
|
||||
digest[4] = hex_to_u32 (hash_pos + 32);
|
||||
digest[5] = hex_to_u32 (hash_pos + 40);
|
||||
digest[6] = hex_to_u32 (hash_pos + 48);
|
||||
|
||||
for (u32 i = 0, j = 0; i < keccak_mdlen / 8; i += 1, j += 16)
|
||||
{
|
||||
digest[i] = hex_to_u64 (hash_pos + j);
|
||||
}
|
||||
return (PARSER_OK);
|
||||
}
|
||||
|
||||
salt->keccak_mdlen = keccak_mdlen;
|
||||
int keccak_256_parse_hash (u8 *input_buf, u32 input_len, hash_t *hash_buf, MAYBE_UNUSED hashconfig_t *hashconfig)
|
||||
{
|
||||
u64 *digest = (u64 *) hash_buf->digest;
|
||||
|
||||
token_t token;
|
||||
|
||||
token.token_cnt = 1;
|
||||
|
||||
token.len_min[0] = 64;
|
||||
token.len_max[0] = 64;
|
||||
token.attr[0] = TOKEN_ATTR_FIXED_LENGTH
|
||||
| TOKEN_ATTR_VERIFY_HEX;
|
||||
|
||||
const int rc_tokenizer = input_tokenizer (input_buf, input_len, &token);
|
||||
|
||||
if (rc_tokenizer != PARSER_OK) return (rc_tokenizer);
|
||||
|
||||
u8 *hash_pos = token.buf[0];
|
||||
int hash_len = token.len[0];
|
||||
|
||||
if (hash_len != 64) return (PARSER_GLOBAL_LENGTH);
|
||||
|
||||
digest[0] = hex_to_u64 (hash_pos + 0);
|
||||
digest[1] = hex_to_u64 (hash_pos + 16);
|
||||
digest[2] = hex_to_u64 (hash_pos + 32);
|
||||
digest[3] = hex_to_u64 (hash_pos + 48);
|
||||
|
||||
return (PARSER_OK);
|
||||
}
|
||||
|
||||
int keccak_384_parse_hash (u8 *input_buf, u32 input_len, hash_t *hash_buf, MAYBE_UNUSED hashconfig_t *hashconfig)
|
||||
{
|
||||
u64 *digest = (u64 *) hash_buf->digest;
|
||||
|
||||
token_t token;
|
||||
|
||||
token.token_cnt = 1;
|
||||
|
||||
token.len_min[0] = 96;
|
||||
token.len_max[0] = 96;
|
||||
token.attr[0] = TOKEN_ATTR_FIXED_LENGTH
|
||||
| TOKEN_ATTR_VERIFY_HEX;
|
||||
|
||||
const int rc_tokenizer = input_tokenizer (input_buf, input_len, &token);
|
||||
|
||||
if (rc_tokenizer != PARSER_OK) return (rc_tokenizer);
|
||||
|
||||
u8 *hash_pos = token.buf[0];
|
||||
int hash_len = token.len[0];
|
||||
|
||||
if (hash_len != 96) return (PARSER_GLOBAL_LENGTH);
|
||||
|
||||
digest[0] = hex_to_u64 (hash_pos + 0);
|
||||
digest[1] = hex_to_u64 (hash_pos + 16);
|
||||
digest[2] = hex_to_u64 (hash_pos + 32);
|
||||
digest[3] = hex_to_u64 (hash_pos + 48);
|
||||
digest[4] = hex_to_u64 (hash_pos + 64);
|
||||
digest[5] = hex_to_u64 (hash_pos + 80);
|
||||
|
||||
return (PARSER_OK);
|
||||
}
|
||||
|
||||
int keccak_512_parse_hash (u8 *input_buf, u32 input_len, hash_t *hash_buf, MAYBE_UNUSED hashconfig_t *hashconfig)
|
||||
{
|
||||
u64 *digest = (u64 *) hash_buf->digest;
|
||||
|
||||
token_t token;
|
||||
|
||||
token.token_cnt = 1;
|
||||
|
||||
token.len_min[0] = 128;
|
||||
token.len_max[0] = 128;
|
||||
token.attr[0] = TOKEN_ATTR_FIXED_LENGTH
|
||||
| TOKEN_ATTR_VERIFY_HEX;
|
||||
|
||||
const int rc_tokenizer = input_tokenizer (input_buf, input_len, &token);
|
||||
|
||||
if (rc_tokenizer != PARSER_OK) return (rc_tokenizer);
|
||||
|
||||
u8 *hash_pos = token.buf[0];
|
||||
int hash_len = token.len[0];
|
||||
|
||||
if (hash_len != 128) return (PARSER_GLOBAL_LENGTH);
|
||||
|
||||
digest[0] = hex_to_u64 (hash_pos + 0);
|
||||
digest[1] = hex_to_u64 (hash_pos + 16);
|
||||
digest[2] = hex_to_u64 (hash_pos + 32);
|
||||
digest[3] = hex_to_u64 (hash_pos + 48);
|
||||
digest[4] = hex_to_u64 (hash_pos + 64);
|
||||
digest[5] = hex_to_u64 (hash_pos + 80);
|
||||
digest[6] = hex_to_u64 (hash_pos + 96);
|
||||
digest[7] = hex_to_u64 (hash_pos + 112);
|
||||
|
||||
return (PARSER_OK);
|
||||
}
|
||||
@ -18305,7 +18412,6 @@ const char *strhashtype (const u32 hash_mode)
|
||||
case 4700: return HT_04700;
|
||||
case 4800: return HT_04800;
|
||||
case 4900: return HT_04900;
|
||||
case 5000: return HT_05000;
|
||||
case 5100: return HT_05100;
|
||||
case 5200: return HT_05200;
|
||||
case 5300: return HT_05300;
|
||||
@ -18457,6 +18563,14 @@ const char *strhashtype (const u32 hash_mode)
|
||||
case 16800: return HT_16800;
|
||||
case 16801: return HT_16801;
|
||||
case 16900: return HT_16900;
|
||||
case 17300: return HT_17300;
|
||||
case 17400: return HT_17400;
|
||||
case 17500: return HT_17500;
|
||||
case 17600: return HT_17600;
|
||||
case 17700: return HT_17700;
|
||||
case 17800: return HT_17800;
|
||||
case 17900: return HT_17900;
|
||||
case 18000: return HT_18000;
|
||||
case 18100: return HT_18100;
|
||||
case 99999: return HT_99999;
|
||||
}
|
||||
@ -22173,6 +22287,56 @@ int ascii_digest (hashcat_ctx_t *hashcat_ctx, char *out_buf, const size_t out_le
|
||||
byte_swap_32 (digest_buf[6]),
|
||||
byte_swap_32 (digest_buf[7]));
|
||||
}
|
||||
else if (hash_mode == 17300 || hash_mode == 17700)
|
||||
{
|
||||
u32 *ptr = digest_buf;
|
||||
|
||||
snprintf (out_buf, out_len - 1, "%08x%08x%08x%08x%08x%08x%08x",
|
||||
ptr[1], ptr[0],
|
||||
ptr[3], ptr[2],
|
||||
ptr[5], ptr[4],
|
||||
ptr[7]
|
||||
);
|
||||
}
|
||||
else if (hash_mode == 17400 || hash_mode == 17800)
|
||||
{
|
||||
u32 *ptr = digest_buf;
|
||||
|
||||
snprintf (out_buf, out_len - 1, "%08x%08x%08x%08x%08x%08x%08x%08x",
|
||||
ptr[1], ptr[0],
|
||||
ptr[3], ptr[2],
|
||||
ptr[5], ptr[4],
|
||||
ptr[7], ptr[6]
|
||||
);
|
||||
}
|
||||
else if (hash_mode == 17500 || hash_mode == 17900)
|
||||
{
|
||||
u32 *ptr = digest_buf;
|
||||
|
||||
snprintf (out_buf, out_len - 1, "%08x%08x%08x%08x%08x%08x%08x%08x%08x%08x%08x%08x",
|
||||
ptr[ 1], ptr[ 0],
|
||||
ptr[ 3], ptr[ 2],
|
||||
ptr[ 5], ptr[ 4],
|
||||
ptr[ 7], ptr[ 6],
|
||||
ptr[ 9], ptr[ 8],
|
||||
ptr[11], ptr[10]
|
||||
);
|
||||
}
|
||||
else if (hash_mode == 17600 || hash_mode == 18000)
|
||||
{
|
||||
u32 *ptr = digest_buf;
|
||||
|
||||
snprintf (out_buf, out_len - 1, "%08x%08x%08x%08x%08x%08x%08x%08x%08x%08x%08x%08x%08x%08x%08x%08x",
|
||||
ptr[ 1], ptr[ 0],
|
||||
ptr[ 3], ptr[ 2],
|
||||
ptr[ 5], ptr[ 4],
|
||||
ptr[ 7], ptr[ 6],
|
||||
ptr[ 9], ptr[ 8],
|
||||
ptr[11], ptr[10],
|
||||
ptr[13], ptr[12],
|
||||
ptr[15], ptr[14]
|
||||
);
|
||||
}
|
||||
else if (hash_mode == 18100)
|
||||
{
|
||||
// salt_buf[1] holds our 32 bit value. salt_buf[0] and salt_buf[1] would be 64 bits.
|
||||
@ -22284,40 +22448,6 @@ int ascii_digest (hashcat_ctx_t *hashcat_ctx, char *out_buf, const size_t out_le
|
||||
|
||||
snprintf (out_buf, out_len - 1, "%s$%s", (char *) salt.salt_sign, tmp_buf);
|
||||
}
|
||||
else if (hash_type == HASH_TYPE_KECCAK)
|
||||
{
|
||||
u32 *ptr = digest_buf;
|
||||
|
||||
snprintf (out_buf, out_len - 1, "%08x%08x%08x%08x%08x%08x%08x%08x%08x%08x%08x%08x%08x%08x%08x%08x%08x%08x%08x%08x%08x%08x%08x%08x%08x%08x%08x%08x%08x%08x%08x%08x%08x%08x%08x%08x%08x%08x%08x%08x%08x%08x%08x%08x%08x%08x%08x%08x%08x%08x",
|
||||
ptr[ 1], ptr[ 0],
|
||||
ptr[ 3], ptr[ 2],
|
||||
ptr[ 5], ptr[ 4],
|
||||
ptr[ 7], ptr[ 6],
|
||||
ptr[ 9], ptr[ 8],
|
||||
ptr[11], ptr[10],
|
||||
ptr[13], ptr[12],
|
||||
ptr[15], ptr[14],
|
||||
ptr[17], ptr[16],
|
||||
ptr[19], ptr[18],
|
||||
ptr[21], ptr[20],
|
||||
ptr[23], ptr[22],
|
||||
ptr[25], ptr[24],
|
||||
ptr[27], ptr[26],
|
||||
ptr[29], ptr[28],
|
||||
ptr[31], ptr[30],
|
||||
ptr[33], ptr[32],
|
||||
ptr[35], ptr[34],
|
||||
ptr[37], ptr[36],
|
||||
ptr[39], ptr[38],
|
||||
ptr[41], ptr[30],
|
||||
ptr[43], ptr[42],
|
||||
ptr[45], ptr[44],
|
||||
ptr[47], ptr[46],
|
||||
ptr[49], ptr[48]
|
||||
);
|
||||
|
||||
out_buf[salt.keccak_mdlen * 2] = 0;
|
||||
}
|
||||
else if (hash_type == HASH_TYPE_BLAKE2B)
|
||||
{
|
||||
u32 *ptr = digest_buf;
|
||||
@ -24582,25 +24712,6 @@ int hashconfig_init (hashcat_ctx_t *hashcat_ctx)
|
||||
hashconfig->st_pass = ST_PASS_HASHCAT_PLAIN;
|
||||
break;
|
||||
|
||||
case 5000: hashconfig->hash_type = HASH_TYPE_KECCAK;
|
||||
hashconfig->salt_type = SALT_TYPE_EMBEDDED;
|
||||
hashconfig->attack_exec = ATTACK_EXEC_INSIDE_KERNEL;
|
||||
hashconfig->opts_type = OPTS_TYPE_PT_GENERATE_LE
|
||||
| OPTS_TYPE_PT_ADD01;
|
||||
hashconfig->kern_type = KERN_TYPE_KECCAK;
|
||||
hashconfig->dgst_size = DGST_SIZE_8_25;
|
||||
hashconfig->parse_func = keccak_parse_hash;
|
||||
hashconfig->opti_type = OPTI_TYPE_ZERO_BYTE
|
||||
| OPTI_TYPE_USES_BITS_64
|
||||
| OPTI_TYPE_RAW_HASH;
|
||||
hashconfig->dgst_pos0 = 6;
|
||||
hashconfig->dgst_pos1 = 7;
|
||||
hashconfig->dgst_pos2 = 4;
|
||||
hashconfig->dgst_pos3 = 5;
|
||||
hashconfig->st_hash = ST_HASH_05000;
|
||||
hashconfig->st_pass = ST_PASS_HASHCAT_PLAIN;
|
||||
break;
|
||||
|
||||
case 5100: hashconfig->hash_type = HASH_TYPE_MD5H;
|
||||
hashconfig->salt_type = SALT_TYPE_NONE;
|
||||
hashconfig->attack_exec = ATTACK_EXEC_INSIDE_KERNEL;
|
||||
@ -27378,6 +27489,150 @@ int hashconfig_init (hashcat_ctx_t *hashcat_ctx)
|
||||
hashconfig->st_pass = ST_PASS_HASHCAT_PLAIN;
|
||||
break;
|
||||
|
||||
case 17300: hashconfig->salt_type = SALT_TYPE_EMBEDDED;
|
||||
hashconfig->attack_exec = ATTACK_EXEC_INSIDE_KERNEL;
|
||||
hashconfig->opts_type = OPTS_TYPE_PT_GENERATE_LE
|
||||
| OPTS_TYPE_PT_ADD06;
|
||||
hashconfig->kern_type = KERN_TYPE_SHA3_224;
|
||||
hashconfig->dgst_size = DGST_SIZE_8_25;
|
||||
hashconfig->parse_func = keccak_224_parse_hash;
|
||||
hashconfig->opti_type = OPTI_TYPE_ZERO_BYTE
|
||||
| OPTI_TYPE_USES_BITS_64
|
||||
| OPTI_TYPE_RAW_HASH;
|
||||
hashconfig->dgst_pos0 = 6;
|
||||
hashconfig->dgst_pos1 = 7;
|
||||
hashconfig->dgst_pos2 = 4;
|
||||
hashconfig->dgst_pos3 = 5;
|
||||
hashconfig->st_hash = ST_HASH_17300;
|
||||
hashconfig->st_pass = ST_PASS_HASHCAT_PLAIN;
|
||||
break;
|
||||
|
||||
case 17400: hashconfig->salt_type = SALT_TYPE_EMBEDDED;
|
||||
hashconfig->attack_exec = ATTACK_EXEC_INSIDE_KERNEL;
|
||||
hashconfig->opts_type = OPTS_TYPE_PT_GENERATE_LE
|
||||
| OPTS_TYPE_PT_ADD06;
|
||||
hashconfig->kern_type = KERN_TYPE_SHA3_256;
|
||||
hashconfig->dgst_size = DGST_SIZE_8_25;
|
||||
hashconfig->parse_func = keccak_256_parse_hash;
|
||||
hashconfig->opti_type = OPTI_TYPE_ZERO_BYTE
|
||||
| OPTI_TYPE_USES_BITS_64
|
||||
| OPTI_TYPE_RAW_HASH;
|
||||
hashconfig->dgst_pos0 = 6;
|
||||
hashconfig->dgst_pos1 = 7;
|
||||
hashconfig->dgst_pos2 = 4;
|
||||
hashconfig->dgst_pos3 = 5;
|
||||
hashconfig->st_hash = ST_HASH_17400;
|
||||
hashconfig->st_pass = ST_PASS_HASHCAT_PLAIN;
|
||||
break;
|
||||
|
||||
case 17500: hashconfig->salt_type = SALT_TYPE_EMBEDDED;
|
||||
hashconfig->attack_exec = ATTACK_EXEC_INSIDE_KERNEL;
|
||||
hashconfig->opts_type = OPTS_TYPE_PT_GENERATE_LE
|
||||
| OPTS_TYPE_PT_ADD06;
|
||||
hashconfig->kern_type = KERN_TYPE_SHA3_384;
|
||||
hashconfig->dgst_size = DGST_SIZE_8_25;
|
||||
hashconfig->parse_func = keccak_384_parse_hash;
|
||||
hashconfig->opti_type = OPTI_TYPE_ZERO_BYTE
|
||||
| OPTI_TYPE_USES_BITS_64
|
||||
| OPTI_TYPE_RAW_HASH;
|
||||
hashconfig->dgst_pos0 = 6;
|
||||
hashconfig->dgst_pos1 = 7;
|
||||
hashconfig->dgst_pos2 = 4;
|
||||
hashconfig->dgst_pos3 = 5;
|
||||
hashconfig->st_hash = ST_HASH_17500;
|
||||
hashconfig->st_pass = ST_PASS_HASHCAT_PLAIN;
|
||||
break;
|
||||
|
||||
case 17600: hashconfig->salt_type = SALT_TYPE_EMBEDDED;
|
||||
hashconfig->attack_exec = ATTACK_EXEC_INSIDE_KERNEL;
|
||||
hashconfig->opts_type = OPTS_TYPE_PT_GENERATE_LE
|
||||
| OPTS_TYPE_PT_ADD06;
|
||||
hashconfig->kern_type = KERN_TYPE_SHA3_512;
|
||||
hashconfig->dgst_size = DGST_SIZE_8_25;
|
||||
hashconfig->parse_func = keccak_512_parse_hash;
|
||||
hashconfig->opti_type = OPTI_TYPE_ZERO_BYTE
|
||||
| OPTI_TYPE_USES_BITS_64
|
||||
| OPTI_TYPE_RAW_HASH;
|
||||
hashconfig->dgst_pos0 = 6;
|
||||
hashconfig->dgst_pos1 = 7;
|
||||
hashconfig->dgst_pos2 = 4;
|
||||
hashconfig->dgst_pos3 = 5;
|
||||
hashconfig->st_hash = ST_HASH_17600;
|
||||
hashconfig->st_pass = ST_PASS_HASHCAT_PLAIN;
|
||||
break;
|
||||
|
||||
case 17700: hashconfig->salt_type = SALT_TYPE_EMBEDDED;
|
||||
hashconfig->attack_exec = ATTACK_EXEC_INSIDE_KERNEL;
|
||||
hashconfig->opts_type = OPTS_TYPE_PT_GENERATE_LE
|
||||
| OPTS_TYPE_PT_ADD01;
|
||||
hashconfig->kern_type = KERN_TYPE_KECCAK_224;
|
||||
hashconfig->dgst_size = DGST_SIZE_8_25;
|
||||
hashconfig->parse_func = keccak_224_parse_hash;
|
||||
hashconfig->opti_type = OPTI_TYPE_ZERO_BYTE
|
||||
| OPTI_TYPE_USES_BITS_64
|
||||
| OPTI_TYPE_RAW_HASH;
|
||||
hashconfig->dgst_pos0 = 6;
|
||||
hashconfig->dgst_pos1 = 7;
|
||||
hashconfig->dgst_pos2 = 4;
|
||||
hashconfig->dgst_pos3 = 5;
|
||||
hashconfig->st_hash = ST_HASH_17700;
|
||||
hashconfig->st_pass = ST_PASS_HASHCAT_PLAIN;
|
||||
break;
|
||||
|
||||
case 17800: hashconfig->salt_type = SALT_TYPE_EMBEDDED;
|
||||
hashconfig->attack_exec = ATTACK_EXEC_INSIDE_KERNEL;
|
||||
hashconfig->opts_type = OPTS_TYPE_PT_GENERATE_LE
|
||||
| OPTS_TYPE_PT_ADD01;
|
||||
hashconfig->kern_type = KERN_TYPE_KECCAK_256;
|
||||
hashconfig->dgst_size = DGST_SIZE_8_25;
|
||||
hashconfig->parse_func = keccak_256_parse_hash;
|
||||
hashconfig->opti_type = OPTI_TYPE_ZERO_BYTE
|
||||
| OPTI_TYPE_USES_BITS_64
|
||||
| OPTI_TYPE_RAW_HASH;
|
||||
hashconfig->dgst_pos0 = 6;
|
||||
hashconfig->dgst_pos1 = 7;
|
||||
hashconfig->dgst_pos2 = 4;
|
||||
hashconfig->dgst_pos3 = 5;
|
||||
hashconfig->st_hash = ST_HASH_17800;
|
||||
hashconfig->st_pass = ST_PASS_HASHCAT_PLAIN;
|
||||
break;
|
||||
|
||||
case 17900: hashconfig->salt_type = SALT_TYPE_EMBEDDED;
|
||||
hashconfig->attack_exec = ATTACK_EXEC_INSIDE_KERNEL;
|
||||
hashconfig->opts_type = OPTS_TYPE_PT_GENERATE_LE
|
||||
| OPTS_TYPE_PT_ADD01;
|
||||
hashconfig->kern_type = KERN_TYPE_KECCAK_384;
|
||||
hashconfig->dgst_size = DGST_SIZE_8_25;
|
||||
hashconfig->parse_func = keccak_384_parse_hash;
|
||||
hashconfig->opti_type = OPTI_TYPE_ZERO_BYTE
|
||||
| OPTI_TYPE_USES_BITS_64
|
||||
| OPTI_TYPE_RAW_HASH;
|
||||
hashconfig->dgst_pos0 = 6;
|
||||
hashconfig->dgst_pos1 = 7;
|
||||
hashconfig->dgst_pos2 = 4;
|
||||
hashconfig->dgst_pos3 = 5;
|
||||
hashconfig->st_hash = ST_HASH_17900;
|
||||
hashconfig->st_pass = ST_PASS_HASHCAT_PLAIN;
|
||||
break;
|
||||
|
||||
case 18000: hashconfig->salt_type = SALT_TYPE_EMBEDDED;
|
||||
hashconfig->attack_exec = ATTACK_EXEC_INSIDE_KERNEL;
|
||||
hashconfig->opts_type = OPTS_TYPE_PT_GENERATE_LE
|
||||
| OPTS_TYPE_PT_ADD01;
|
||||
hashconfig->kern_type = KERN_TYPE_KECCAK_512;
|
||||
hashconfig->dgst_size = DGST_SIZE_8_25;
|
||||
hashconfig->parse_func = keccak_512_parse_hash;
|
||||
hashconfig->opti_type = OPTI_TYPE_ZERO_BYTE
|
||||
| OPTI_TYPE_USES_BITS_64
|
||||
| OPTI_TYPE_RAW_HASH;
|
||||
hashconfig->dgst_pos0 = 6;
|
||||
hashconfig->dgst_pos1 = 7;
|
||||
hashconfig->dgst_pos2 = 4;
|
||||
hashconfig->dgst_pos3 = 5;
|
||||
hashconfig->st_hash = ST_HASH_18000;
|
||||
hashconfig->st_pass = ST_PASS_HASHCAT_PLAIN;
|
||||
break;
|
||||
|
||||
case 18100: hashconfig->hash_type = HASH_TYPE_SHA1;
|
||||
hashconfig->salt_type = SALT_TYPE_EMBEDDED;
|
||||
hashconfig->attack_exec = ATTACK_EXEC_INSIDE_KERNEL;
|
||||
@ -27498,6 +27753,7 @@ int hashconfig_init (hashcat_ctx_t *hashcat_ctx)
|
||||
hashconfig->opts_type &= ~OPTS_TYPE_PT_UTF16BE;
|
||||
hashconfig->opts_type &= ~OPTS_TYPE_PT_ADD01;
|
||||
hashconfig->opts_type &= ~OPTS_TYPE_PT_ADD02;
|
||||
hashconfig->opts_type &= ~OPTS_TYPE_PT_ADD06;
|
||||
hashconfig->opts_type &= ~OPTS_TYPE_PT_ADD80;
|
||||
hashconfig->opts_type &= ~OPTS_TYPE_PT_ADDBITS14;
|
||||
hashconfig->opts_type &= ~OPTS_TYPE_PT_ADDBITS15;
|
||||
@ -28387,8 +28643,6 @@ void hashconfig_benchmark_defaults (hashcat_ctx_t *hashcat_ctx, salt_t *salt, vo
|
||||
break;
|
||||
case 3100: salt->salt_len = 1;
|
||||
break;
|
||||
case 5000: salt->keccak_mdlen = 32;
|
||||
break;
|
||||
case 5800: salt->salt_len = 16;
|
||||
break;
|
||||
case 6800: salt->salt_len = 32;
|
||||
|
@ -205,6 +205,14 @@ static void main_outerloop_finished (MAYBE_UNUSED hashcat_ctx_t *hashcat_ctx, MA
|
||||
hashcat_user_t *hashcat_user = hashcat_ctx->hashcat_user;
|
||||
status_ctx_t *status_ctx = hashcat_ctx->status_ctx;
|
||||
|
||||
// we should never stop hashcat with STATUS_INIT:
|
||||
// keypress thread blocks on STATUS_INIT forever!
|
||||
|
||||
if (status_ctx->devices_status == STATUS_INIT)
|
||||
{
|
||||
status_ctx->devices_status = STATUS_ERROR;
|
||||
}
|
||||
|
||||
// wait for outer threads
|
||||
|
||||
status_ctx->shutdown_outer = true;
|
||||
|
18
src/mpsp.c
18
src/mpsp.c
@ -268,7 +268,12 @@ static int mp_expand (hashcat_ctx_t *hashcat_ctx, const char *in_buf, size_t in_
|
||||
{
|
||||
in_pos++;
|
||||
|
||||
if (in_pos == in_len) break;
|
||||
if (in_pos == in_len)
|
||||
{
|
||||
event_log_error (hashcat_ctx, "Syntax error in mask: %s", in_buf);
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
u32 p1 = in_buf[in_pos] & 0xff;
|
||||
|
||||
@ -306,7 +311,7 @@ static int mp_expand (hashcat_ctx_t *hashcat_ctx, const char *in_buf, size_t in_
|
||||
break;
|
||||
case '?': rc = mp_add_cs_buf (hashcat_ctx, &p0, 1, mp_usr, mp_usr_offset);
|
||||
break;
|
||||
default: event_log_error (hashcat_ctx, "Syntax error: %s", in_buf);
|
||||
default: event_log_error (hashcat_ctx, "Syntax error in mask: %s", in_buf);
|
||||
return -1;
|
||||
}
|
||||
|
||||
@ -372,7 +377,12 @@ static int mp_gen_css (hashcat_ctx_t *hashcat_ctx, char *mask_buf, size_t mask_l
|
||||
{
|
||||
mask_pos++;
|
||||
|
||||
if (mask_pos == mask_len) break;
|
||||
if (mask_pos == mask_len)
|
||||
{
|
||||
event_log_error (hashcat_ctx, "Syntax error in mask: %s", mask_buf);
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
char p1 = mask_buf[mask_pos];
|
||||
|
||||
@ -412,7 +422,7 @@ static int mp_gen_css (hashcat_ctx_t *hashcat_ctx, char *mask_buf, size_t mask_l
|
||||
break;
|
||||
case '?': rc = mp_add_cs_buf (hashcat_ctx, &chr, 1, css_buf, css_pos);
|
||||
break;
|
||||
default: event_log_error (hashcat_ctx, "Syntax error: %s", mask_buf);
|
||||
default: event_log_error (hashcat_ctx, "Syntax error in mask: %s", mask_buf);
|
||||
return -1;
|
||||
}
|
||||
|
||||
|
21
src/opencl.c
21
src/opencl.c
@ -35,6 +35,7 @@ static const char *drm_card0_driver_path = "/sys/class/drm/card0/device/driver";
|
||||
#endif
|
||||
|
||||
static const u32 full01 = 0x01010101;
|
||||
static const u32 full06 = 0x06060606;
|
||||
static const u32 full80 = 0x80808080;
|
||||
|
||||
static double TARGET_MSEC_PROFILE[4] = { 2, 12, 96, 480 };
|
||||
@ -2153,6 +2154,10 @@ int run_copy (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, const
|
||||
{
|
||||
rebuild_pws_compressed_append (device_param, pws_cnt, 0x01);
|
||||
}
|
||||
else if (hashconfig->opts_type & OPTS_TYPE_PT_ADD06)
|
||||
{
|
||||
rebuild_pws_compressed_append (device_param, pws_cnt, 0x06);
|
||||
}
|
||||
else if (hashconfig->opts_type & OPTS_TYPE_PT_ADD80)
|
||||
{
|
||||
rebuild_pws_compressed_append (device_param, pws_cnt, 0x80);
|
||||
@ -2165,6 +2170,10 @@ int run_copy (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, const
|
||||
{
|
||||
rebuild_pws_compressed_append (device_param, pws_cnt, 0x01);
|
||||
}
|
||||
else if (hashconfig->opts_type & OPTS_TYPE_PT_ADD06)
|
||||
{
|
||||
rebuild_pws_compressed_append (device_param, pws_cnt, 0x06);
|
||||
}
|
||||
else if (hashconfig->opts_type & OPTS_TYPE_PT_ADD80)
|
||||
{
|
||||
rebuild_pws_compressed_append (device_param, pws_cnt, 0x80);
|
||||
@ -2472,6 +2481,11 @@ int run_cracker (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, co
|
||||
ptr[line_len] = 0x80;
|
||||
}
|
||||
|
||||
if (hashconfig->opts_type & OPTS_TYPE_PT_ADD06)
|
||||
{
|
||||
ptr[line_len] = 0x06;
|
||||
}
|
||||
|
||||
if (hashconfig->opts_type & OPTS_TYPE_PT_ADD01)
|
||||
{
|
||||
ptr[line_len] = 0x01;
|
||||
@ -2590,6 +2604,11 @@ int run_cracker (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, co
|
||||
ptr[line_len] = 0x80;
|
||||
}
|
||||
|
||||
if (hashconfig->opts_type & OPTS_TYPE_PT_ADD06)
|
||||
{
|
||||
ptr[line_len] = 0x06;
|
||||
}
|
||||
|
||||
if (hashconfig->opts_type & OPTS_TYPE_PT_ADD01)
|
||||
{
|
||||
ptr[line_len] = 0x01;
|
||||
@ -6313,6 +6332,7 @@ int opencl_session_begin (hashcat_ctx_t *hashcat_ctx)
|
||||
device_param->kernel_params_mp_buf32[7] = 0;
|
||||
|
||||
if (hashconfig->opts_type & OPTS_TYPE_PT_ADD01) device_param->kernel_params_mp_buf32[5] = full01;
|
||||
if (hashconfig->opts_type & OPTS_TYPE_PT_ADD06) device_param->kernel_params_mp_buf32[5] = full06;
|
||||
if (hashconfig->opts_type & OPTS_TYPE_PT_ADD80) device_param->kernel_params_mp_buf32[5] = full80;
|
||||
if (hashconfig->opts_type & OPTS_TYPE_PT_ADDBITS14) device_param->kernel_params_mp_buf32[6] = 1;
|
||||
if (hashconfig->opts_type & OPTS_TYPE_PT_ADDBITS15) device_param->kernel_params_mp_buf32[7] = 1;
|
||||
@ -6337,6 +6357,7 @@ int opencl_session_begin (hashcat_ctx_t *hashcat_ctx)
|
||||
device_param->kernel_params_mp_l_buf32[8] = 0;
|
||||
|
||||
if (hashconfig->opts_type & OPTS_TYPE_PT_ADD01) device_param->kernel_params_mp_l_buf32[6] = full01;
|
||||
if (hashconfig->opts_type & OPTS_TYPE_PT_ADD06) device_param->kernel_params_mp_l_buf32[6] = full06;
|
||||
if (hashconfig->opts_type & OPTS_TYPE_PT_ADD80) device_param->kernel_params_mp_l_buf32[6] = full80;
|
||||
if (hashconfig->opts_type & OPTS_TYPE_PT_ADDBITS14) device_param->kernel_params_mp_l_buf32[7] = 1;
|
||||
if (hashconfig->opts_type & OPTS_TYPE_PT_ADDBITS15) device_param->kernel_params_mp_l_buf32[8] = 1;
|
||||
|
@ -126,6 +126,11 @@ static int selftest (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param
|
||||
comb_ptr[comb.pw_len] = 0x01;
|
||||
}
|
||||
|
||||
if (hashconfig->opts_type & OPTS_TYPE_PT_ADD06)
|
||||
{
|
||||
comb_ptr[comb.pw_len] = 0x06;
|
||||
}
|
||||
|
||||
if (hashconfig->opts_type & OPTS_TYPE_PT_ADD80)
|
||||
{
|
||||
comb_ptr[comb.pw_len] = 0x80;
|
||||
@ -264,6 +269,11 @@ static int selftest (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param
|
||||
pw_ptr[new_pass_len] = 0x01;
|
||||
}
|
||||
|
||||
if (hashconfig->opts_type & OPTS_TYPE_PT_ADD06)
|
||||
{
|
||||
pw_ptr[new_pass_len] = 0x06;
|
||||
}
|
||||
|
||||
if (hashconfig->opts_type & OPTS_TYPE_PT_ADD80)
|
||||
{
|
||||
pw_ptr[new_pass_len] = 0x80;
|
||||
|
17
src/usage.c
17
src/usage.c
@ -125,11 +125,18 @@ static const char *const USAGE_BIG[] =
|
||||
" 0 | MD5 | Raw Hash",
|
||||
" 5100 | Half MD5 | Raw Hash",
|
||||
" 100 | SHA1 | Raw Hash",
|
||||
" 1300 | SHA-224 | Raw Hash",
|
||||
" 1400 | SHA-256 | Raw Hash",
|
||||
" 10800 | SHA-384 | Raw Hash",
|
||||
" 1700 | SHA-512 | Raw Hash",
|
||||
" 5000 | SHA-3 (Keccak) | Raw Hash",
|
||||
" 1300 | SHA2-224 | Raw Hash",
|
||||
" 1400 | SHA2-256 | Raw Hash",
|
||||
" 10800 | SHA2-384 | Raw Hash",
|
||||
" 1700 | SHA2-512 | Raw Hash",
|
||||
" 17300 | SHA3-224 | Raw Hash",
|
||||
" 17400 | SHA3-256 | Raw Hash",
|
||||
" 17500 | SHA3-384 | Raw Hash",
|
||||
" 17600 | SHA3-512 | Raw Hash",
|
||||
" 17700 | Keccak-224 | Raw Hash",
|
||||
" 17800 | Keccak-256 | Raw Hash",
|
||||
" 17900 | Keccak-384 | Raw Hash",
|
||||
" 18000 | Keccak-512 | Raw Hash",
|
||||
" 600 | BLAKE2b-512 | Raw Hash",
|
||||
" 10100 | SipHash | Raw Hash",
|
||||
" 6000 | RIPEMD-160 | Raw Hash",
|
||||
|
@ -977,7 +977,7 @@ int user_options_sanity (hashcat_ctx_t *hashcat_ctx)
|
||||
|
||||
if (user_options->attack_mode == ATTACK_MODE_COMBI)
|
||||
{
|
||||
event_log_error (hashcat_ctx, "Custom charsets re not supported in attack mode 1 (combination).");
|
||||
event_log_error (hashcat_ctx, "Custom charsets are not supported in attack mode 1 (combination).");
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
@ -7,7 +7,7 @@
|
||||
|
||||
## install help:
|
||||
##
|
||||
## cpan install Authen::Passphrase::LANManager Authen::Passphrase::MySQL323 Authen::Passphrase::NTHash Authen::Passphrase::PHPass Crypt::CBC Crypt::DES Crypt::Digest::RIPEMD160 Crypt::Digest::Whirlpool Crypt::ECB Crypt::Eksblowfish::Bcrypt Crypt::Mode::ECB Crypt::MySQL Crypt::OpenSSH::ChachaPoly Crypt::PBKDF2 Crypt::RC4 Crypt::Rijndael Crypt::ScryptKDF Crypt::Skip32 Crypt::Twofish Crypt::UnixCrypt_XS Digest::BLAKE2 Digest::CMAC Digest::CRC Digest::GOST Digest::HMAC Digest::HMAC_MD5 Digest::Keccak Digest::MD4 Digest::MD5 Digest::Perl::MD5 Digest::SHA Digest::SipHash JSON Net::DNS::RR::NSEC3 Net::DNS::SEC Convert::EBCDIC
|
||||
## cpan install Authen::Passphrase::LANManager Authen::Passphrase::MySQL323 Authen::Passphrase::NTHash Authen::Passphrase::PHPass Crypt::CBC Crypt::DES Crypt::Digest::RIPEMD160 Crypt::Digest::Whirlpool Crypt::ECB Crypt::Eksblowfish::Bcrypt Crypt::Mode::ECB Crypt::MySQL Crypt::OpenSSH::ChachaPoly Crypt::PBKDF2 Crypt::RC4 Crypt::Rijndael Crypt::ScryptKDF Crypt::Skip32 Crypt::Twofish Crypt::UnixCrypt_XS Digest::BLAKE2 Digest::CMAC Digest::CRC Digest::GOST Digest::HMAC Digest::HMAC_MD5 Digest::Keccak Digest::MD4 Digest::MD5 Digest::Perl::MD5 Digest::SHA Digest::SHA3 Digest::SipHash JSON Net::DNS::RR::NSEC3 Net::DNS::SEC Convert::EBCDIC
|
||||
##
|
||||
|
||||
use strict;
|
||||
@ -15,8 +15,9 @@ use warnings;
|
||||
use Digest::MD4 qw (md4 md4_hex);
|
||||
use Digest::MD5 qw (md5 md5_hex);
|
||||
use Digest::SHA qw (sha1 sha256 sha384 sha512 sha1_hex sha224_hex sha256_hex sha384_hex sha512_hex hmac_sha1 hmac_sha256 hmac_sha512);
|
||||
use Digest::SHA3 qw (sha3_224_hex sha3_256_hex sha3_384_hex sha3_512_hex);
|
||||
use Digest::Keccak qw (keccak_224_hex keccak_256_hex keccak_384_hex keccak_512_hex);
|
||||
use Digest::HMAC qw (hmac hmac_hex);
|
||||
use Digest::Keccak qw (keccak_256_hex);
|
||||
use Digest::BLAKE2 qw (blake2b_hex);
|
||||
use Crypt::MySQL qw (password41);
|
||||
use Digest::GOST qw (gost gost_hex);
|
||||
@ -58,7 +59,7 @@ my $hashcat = "./hashcat";
|
||||
|
||||
my $MAX_LEN = 55;
|
||||
|
||||
my @modes = (0, 10, 11, 12, 20, 21, 22, 23, 30, 40, 50, 60, 100, 101, 110, 111, 112, 120, 121, 122, 125, 130, 131, 132, 133, 140, 141, 150, 160, 200, 300, 400, 500, 600, 900, 1000, 1100, 1300, 1400, 1410, 1411, 1420, 1430, 1440, 1441, 1450, 1460, 1500, 1600, 1700, 1710, 1711, 1720, 1730, 1740, 1722, 1731, 1750, 1760, 1800, 2100, 2400, 2410, 2500, 2600, 2611, 2612, 2711, 2811, 3000, 3100, 3200, 3710, 3711, 3300, 3500, 3610, 3720, 3800, 3910, 4010, 4110, 4210, 4300, 4400, 4500, 4520, 4521, 4522, 4600, 4700, 4800, 4900, 5000, 5100, 5300, 5400, 5500, 5600, 5700, 5800, 6000, 6100, 6300, 6400, 6500, 6600, 6700, 6800, 6900, 7000, 7100, 7200, 7300, 7400, 7500, 7700, 7701, 7800, 7801, 7900, 8000, 8100, 8200, 8300, 8400, 8500, 8600, 8700, 8900, 9100, 9200, 9300, 9400, 9500, 9600, 9700, 9800, 9900, 10000, 10100, 10200, 10300, 10400, 10500, 10600, 10700, 10800, 10900, 11000, 11100, 11200, 11300, 11400, 11500, 11600, 11900, 12000, 12001, 12100, 12200, 12300, 12400, 12600, 12700, 12800, 12900, 13000, 13100, 13200, 13300, 13400, 13500, 13600, 13800, 13900, 14000, 14100, 14400, 14700, 14800, 14900, 15000, 15100, 15200, 15300, 15400, 15500, 15600, 15700, 15900, 16000, 16100, 16200, 16300, 16400, 16500, 16600, 16700, 16800, 16900, 18100, 99999);
|
||||
my @modes = (0, 10, 11, 12, 20, 21, 22, 23, 30, 40, 50, 60, 100, 101, 110, 111, 112, 120, 121, 122, 125, 130, 131, 132, 133, 140, 141, 150, 160, 200, 300, 400, 500, 600, 900, 1000, 1100, 1300, 1400, 1410, 1411, 1420, 1430, 1440, 1441, 1450, 1460, 1500, 1600, 1700, 1710, 1711, 1720, 1730, 1740, 1722, 1731, 1750, 1760, 1800, 2100, 2400, 2410, 2500, 2600, 2611, 2612, 2711, 2811, 3000, 3100, 3200, 3710, 3711, 3300, 3500, 3610, 3720, 3800, 3910, 4010, 4110, 4210, 4300, 4400, 4500, 4520, 4521, 4522, 4600, 4700, 4800, 4900, 5100, 5300, 5400, 5500, 5600, 5700, 5800, 6000, 6100, 6300, 6400, 6500, 6600, 6700, 6800, 6900, 7000, 7100, 7200, 7300, 7400, 7500, 7700, 7701, 7800, 7801, 7900, 8000, 8100, 8200, 8300, 8400, 8500, 8600, 8700, 8900, 9100, 9200, 9300, 9400, 9500, 9600, 9700, 9800, 9900, 10000, 10100, 10200, 10300, 10400, 10500, 10600, 10700, 10800, 10900, 11000, 11100, 11200, 11300, 11400, 11500, 11600, 11900, 12000, 12001, 12100, 12200, 12300, 12400, 12600, 12700, 12800, 12900, 13000, 13100, 13200, 13300, 13400, 13500, 13600, 13800, 13900, 14000, 14100, 14400, 14700, 14800, 14900, 15000, 15100, 15200, 15300, 15400, 15500, 15600, 15700, 15900, 16000, 16100, 16200, 16300, 16400, 16500, 16600, 16700, 16800, 16900, 17300, 17400, 17500, 17600, 17700, 17800, 17900, 18000, 18100, 99999);
|
||||
|
||||
my %is_utf16le = map { $_ => 1 } qw (30 40 130 131 132 133 140 141 1000 1100 1430 1440 1441 1730 1740 1731 5500 5600 8000 9400 9500 9600 9700 9800 11600 13500 13800);
|
||||
my %less_fifteen = map { $_ => 1 } qw (500 1600 1800 3200 6300 7400 10500 10700);
|
||||
@ -226,7 +227,7 @@ sub verify
|
||||
# remember always do "exists ($db->{$hash_in})" checks as soon as possible and don't forget it
|
||||
|
||||
# unsalted
|
||||
if ($mode == 0 || $mode == 100 || $mode == 101 || $mode == 133 || $mode == 200 || $mode == 300 || $mode == 600 || $mode == 900 || $mode == 1000 || $mode == 1300 || $mode == 1400 || $mode == 1700 || $mode == 2400 || $mode == 2600 || $mode == 3000 || $mode == 3500 || $mode == 4300 || $mode == 4400 || $mode == 4500 || $mode == 4600 || $mode == 4700 || $mode == 5000 || $mode == 5100 || $mode == 5700 || $mode == 6000 || $mode == 6100 || $mode == 6900 || $mode == 8600 || $mode == 9900 || $mode == 10800 || $mode == 11500 || $mode == 16000 || $mode == 16400 || $mode == 99999)
|
||||
if ($mode == 0 || $mode == 100 || $mode == 101 || $mode == 133 || $mode == 200 || $mode == 300 || $mode == 600 || $mode == 900 || $mode == 1000 || $mode == 1300 || $mode == 1400 || $mode == 1700 || $mode == 2400 || $mode == 2600 || $mode == 3000 || $mode == 3500 || $mode == 4300 || $mode == 4400 || $mode == 4500 || $mode == 4600 || $mode == 4700 || $mode == 5100 || $mode == 5700 || $mode == 6000 || $mode == 6100 || $mode == 6900 || $mode == 8600 || $mode == 9900 || $mode == 10800 || $mode == 11500 || $mode == 16000 || $mode == 16400 || $mode == 17300 || $mode == 17400 || $mode == 17500 || $mode == 17600 || $mode == 17700 || $mode == 17800 || $mode == 17900 || $mode == 18000 || $mode == 99999)
|
||||
{
|
||||
my $index = index ($line, ":");
|
||||
|
||||
@ -3547,7 +3548,7 @@ sub passthrough
|
||||
|
||||
my $tmp_hash;
|
||||
|
||||
if ($mode == 0 || $mode == 100 || $mode == 101 || $mode == 133 || $mode == 200 || $mode == 300 || $mode == 600 || $mode == 900 || $mode == 1000 || $mode == 1300 || $mode == 1400 || $mode == 1700 || $mode == 2400 || $mode == 2600 || $mode == 3500 || $mode == 4300 || $mode == 4400 || $mode == 4500 || $mode == 4600 || $mode == 4700 || $mode == 5000 || $mode == 5100 || $mode == 6000 || $mode == 6100 || $mode == 6900 || $mode == 5700 || $mode == 9900 || $mode == 10800 || $mode == 11500 || $mode == 13300 || $mode == 16400 || $mode == 99999)
|
||||
if ($mode == 0 || $mode == 100 || $mode == 101 || $mode == 133 || $mode == 200 || $mode == 300 || $mode == 600 || $mode == 900 || $mode == 1000 || $mode == 1300 || $mode == 1400 || $mode == 1700 || $mode == 2400 || $mode == 2600 || $mode == 3500 || $mode == 4300 || $mode == 4400 || $mode == 4500 || $mode == 4600 || $mode == 4700 || $mode == 5100 || $mode == 6000 || $mode == 6100 || $mode == 6900 || $mode == 5700 || $mode == 9900 || $mode == 10800 || $mode == 11500 || $mode == 13300 || $mode == 16400 || $mode == 17300 || $mode == 17400 || $mode == 17500 || $mode == 17600 || $mode == 17700 || $mode == 17800 || $mode == 17900 || $mode == 18000 || $mode == 99999)
|
||||
{
|
||||
$tmp_hash = gen_hash ($mode, $word_buf, "");
|
||||
}
|
||||
@ -4077,7 +4078,7 @@ sub single
|
||||
{
|
||||
my $mode = $modes[$j];
|
||||
|
||||
if ($mode == 0 || $mode == 100 || $mode == 101 || $mode == 133 || $mode == 200 || $mode == 300 || $mode == 600 || $mode == 900 || $mode == 1000 || $mode == 1300 || $mode == 1400 || $mode == 1700 || $mode == 2600 || $mode == 3500 || $mode == 4300 || $mode == 4400 || $mode == 4500 || $mode == 4600 || $mode == 4700 || $mode == 5000 || $mode == 5100 || $mode == 5300 || $mode == 5400 || $mode == 6000 || $mode == 6100 || $mode == 6600 || $mode == 6900 || $mode == 5700 || $mode == 8200 || $mode == 8300 || $mode == 9900 || $mode == 10800 || $mode == 11500 || $mode == 13300 || $mode == 16400 || $mode == 99999)
|
||||
if ($mode == 0 || $mode == 100 || $mode == 101 || $mode == 133 || $mode == 200 || $mode == 300 || $mode == 600 || $mode == 900 || $mode == 1000 || $mode == 1300 || $mode == 1400 || $mode == 1700 || $mode == 2600 || $mode == 3500 || $mode == 4300 || $mode == 4400 || $mode == 4500 || $mode == 4600 || $mode == 4700 || $mode == 5100 || $mode == 5300 || $mode == 5400 || $mode == 6000 || $mode == 6100 || $mode == 6600 || $mode == 6900 || $mode == 5700 || $mode == 8200 || $mode == 8300 || $mode == 9900 || $mode == 10800 || $mode == 11500 || $mode == 13300 || $mode == 16400 || $mode == 17300 || $mode == 17400 || $mode == 17500 || $mode == 17600 || $mode == 17700 || $mode == 17800 || $mode == 17900 || $mode == 18000 || $mode == 99999)
|
||||
{
|
||||
for (my $i = 1; $i < 32; $i++)
|
||||
{
|
||||
@ -6078,12 +6079,6 @@ sub gen_hash
|
||||
|
||||
$tmp_hash = sprintf ("%s:%s", $hash_buf, $salt_buf);
|
||||
}
|
||||
elsif ($mode == 5000)
|
||||
{
|
||||
$hash_buf = keccak_256_hex ($word_buf);
|
||||
|
||||
$tmp_hash = sprintf ("%s", $hash_buf);
|
||||
}
|
||||
elsif ($mode == 5100)
|
||||
{
|
||||
my $pos;
|
||||
@ -9929,25 +9924,68 @@ END_CODE
|
||||
|
||||
$tmp_hash = sprintf ("%s*%s*%s*%s", substr ($pmkid, 0, 32), $macap, $macsta, $essid);
|
||||
}
|
||||
elsif ($mode == 17300)
|
||||
{
|
||||
$hash_buf = sha3_224_hex ($word_buf);
|
||||
|
||||
$tmp_hash = sprintf ("%s", $hash_buf);
|
||||
}
|
||||
elsif ($mode == 17400)
|
||||
{
|
||||
$hash_buf = sha3_256_hex ($word_buf);
|
||||
|
||||
$tmp_hash = sprintf ("%s", $hash_buf);
|
||||
}
|
||||
elsif ($mode == 17500)
|
||||
{
|
||||
$hash_buf = sha3_384_hex ($word_buf);
|
||||
|
||||
$tmp_hash = sprintf ("%s", $hash_buf);
|
||||
}
|
||||
elsif ($mode == 17600)
|
||||
{
|
||||
$hash_buf = sha3_512_hex ($word_buf);
|
||||
|
||||
$tmp_hash = sprintf ("%s", $hash_buf);
|
||||
}
|
||||
elsif ($mode == 17700)
|
||||
{
|
||||
$hash_buf = keccak_224_hex ($word_buf);
|
||||
|
||||
$tmp_hash = sprintf ("%s", $hash_buf);
|
||||
}
|
||||
elsif ($mode == 17800)
|
||||
{
|
||||
$hash_buf = keccak_256_hex ($word_buf);
|
||||
|
||||
$tmp_hash = sprintf ("%s", $hash_buf);
|
||||
}
|
||||
elsif ($mode == 17900)
|
||||
{
|
||||
$hash_buf = keccak_384_hex ($word_buf);
|
||||
|
||||
$tmp_hash = sprintf ("%s", $hash_buf);
|
||||
}
|
||||
elsif ($mode == 18000)
|
||||
{
|
||||
$hash_buf = keccak_512_hex ($word_buf);
|
||||
|
||||
$tmp_hash = sprintf ("%s", $hash_buf);
|
||||
}
|
||||
elsif ($mode == 18100)
|
||||
{
|
||||
my $paddedTime = sprintf("%016x", int(int($salt_buf) / 30));
|
||||
my $data = pack('H*', $paddedTime);
|
||||
my $key = $word_buf;
|
||||
|
||||
#my $b32_salt_buf = encode_base32($salt_buf);
|
||||
#print "SECRET: ". $b32_salt_buf . ('=' x (8 - (length($b32_salt_buf) % 8))) ." ";
|
||||
$hash_buf = hmac_hex ($data, $key, \&sha1, 64);
|
||||
#$hash_buf = hmac ($word_buf, $salt_buf, \&sha1, 64);
|
||||
|
||||
my $offset = hex (substr ($hash_buf, -8)) & 0xf;
|
||||
$offset *= 2;
|
||||
my $token = hex (substr ($hash_buf, $offset, 8));
|
||||
$token &= 0x7fffffff;
|
||||
$token %= 1000000;
|
||||
#print "CODE: " . $token . "\n";
|
||||
|
||||
#$tmp_hash = sprintf ("%s:%s", $hash_buf, $salt_buf);
|
||||
## token must be leading zero padded, and salt leading zero stripped
|
||||
$tmp_hash = sprintf ("%06d:%d", $token, int($salt_buf));
|
||||
}
|
||||
|
@ -9,7 +9,7 @@ TDIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
|
||||
|
||||
# missing hash types: 5200,6251,6261,6271,6281
|
||||
|
||||
HASH_TYPES="0 10 11 12 20 21 22 23 30 40 50 60 100 101 110 111 112 120 121 122 125 130 131 132 133 140 141 150 160 200 300 400 500 600 900 1000 1100 1300 1400 1410 1411 1420 1430 1440 1441 1450 1460 1500 1600 1700 1710 1711 1720 1722 1730 1731 1740 1750 1760 1800 2100 2400 2410 2500 2600 2611 2612 2711 2811 3000 3100 3200 3710 3711 3800 3910 4010 4110 4300 4400 4500 4520 4521 4522 4700 4800 4900 5000 5100 5300 5400 5500 5600 5700 5800 6000 6100 6211 6212 6213 6221 6222 6223 6231 6232 6233 6241 6242 6243 6300 6400 6500 6600 6700 6800 6900 7000 7100 7200 7300 7400 7500 7700 7701 7800 7801 7900 8000 8100 8200 8300 8400 8500 8600 8700 8900 9100 9200 9300 9400 9500 9600 9700 9800 9900 10000 10100 10200 10300 10400 10500 10600 10700 10800 10900 11000 11100 11200 11300 11400 11500 11600 11900 12000 12001 12100 12200 12300 12400 12600 12700 12800 12900 13000 13100 13200 13300 13400 13500 13600 13800 13900 14000 14100 14400 14600 14700 14800 14900 15000 15100 15200 15300 15400 15500 15600 15700 15900 16000 16100 16200 16300 16400 16500 16600 16700 16800 16900 99999"
|
||||
HASH_TYPES="0 10 11 12 20 21 22 23 30 40 50 60 100 101 110 111 112 120 121 122 125 130 131 132 133 140 141 150 160 200 300 400 500 600 900 1000 1100 1300 1400 1410 1411 1420 1430 1440 1441 1450 1460 1500 1600 1700 1710 1711 1720 1722 1730 1731 1740 1750 1760 1800 2100 2400 2410 2500 2600 2611 2612 2711 2811 3000 3100 3200 3710 3711 3800 3910 4010 4110 4300 4400 4500 4520 4521 4522 4700 4800 4900 5100 5300 5400 5500 5600 5700 5800 6000 6100 6211 6212 6213 6221 6222 6223 6231 6232 6233 6241 6242 6243 6300 6400 6500 6600 6700 6800 6900 7000 7100 7200 7300 7400 7500 7700 7701 7800 7801 7900 8000 8100 8200 8300 8400 8500 8600 8700 8900 9100 9200 9300 9400 9500 9600 9700 9800 9900 10000 10100 10200 10300 10400 10500 10600 10700 10800 10900 11000 11100 11200 11300 11400 11500 11600 11900 12000 12001 12100 12200 12300 12400 12600 12700 12800 12900 13000 13100 13200 13300 13400 13500 13600 13800 13900 14000 14100 14400 14600 14700 14800 14900 15000 15100 15200 15300 15400 15500 15600 15700 15900 16000 16100 16200 16300 16400 16500 16600 16700 16800 16900 17300 17400 17500 17600 17700 17800 17900 18000 99999"
|
||||
|
||||
#ATTACK_MODES="0 1 3 6 7"
|
||||
ATTACK_MODES="0 1 3 7"
|
||||
|
Loading…
Reference in New Issue
Block a user