Rewrite Whirlpool hash with 64 bit instructions

pull/2306/head
Jens Steube 4 years ago
parent 146ca73ff9
commit 633327d8b7

File diff suppressed because it is too large Load Diff

@ -9,18 +9,24 @@
#define R 10
#if VECT_SIZE == 1
#define BOX(S,n,i) (S)[(n)][(i)]
#define BOX(S,n,i) (S)[(n)][(i)]
#define BOX64(S,n,i) (S)[(n)][(i)]
#elif VECT_SIZE == 2
#define BOX(S,n,i) make_u32x ((S)[(n)][(i).s0], (S)[(n)][(i).s1])
#define BOX(S,n,i) make_u32x ((S)[(n)][(i).s0], (S)[(n)][(i).s1])
#define BOX64(S,n,i) make_u64x ((S)[(n)][(i).s0], (S)[(n)][(i).s1])
#elif VECT_SIZE == 4
#define BOX(S,n,i) make_u32x ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3])
#define BOX(S,n,i) make_u32x ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3])
#define BOX64(S,n,i) make_u64x ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3])
#elif VECT_SIZE == 8
#define BOX(S,n,i) make_u32x ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3], (S)[(n)][(i).s4], (S)[(n)][(i).s5], (S)[(n)][(i).s6], (S)[(n)][(i).s7])
#define BOX(S,n,i) make_u32x ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3], (S)[(n)][(i).s4], (S)[(n)][(i).s5], (S)[(n)][(i).s6], (S)[(n)][(i).s7])
#define BOX64(S,n,i) make_u64x ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3], (S)[(n)][(i).s4], (S)[(n)][(i).s5], (S)[(n)][(i).s6], (S)[(n)][(i).s7])
#elif VECT_SIZE == 16
#define BOX(S,n,i) make_u32x ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3], (S)[(n)][(i).s4], (S)[(n)][(i).s5], (S)[(n)][(i).s6], (S)[(n)][(i).s7], (S)[(n)][(i).s8], (S)[(n)][(i).s9], (S)[(n)][(i).sa], (S)[(n)][(i).sb], (S)[(n)][(i).sc], (S)[(n)][(i).sd], (S)[(n)][(i).se], (S)[(n)][(i).sf])
#define BOX(S,n,i) make_u32x ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3], (S)[(n)][(i).s4], (S)[(n)][(i).s5], (S)[(n)][(i).s6], (S)[(n)][(i).s7], (S)[(n)][(i).s8], (S)[(n)][(i).s9], (S)[(n)][(i).sa], (S)[(n)][(i).sb], (S)[(n)][(i).sc], (S)[(n)][(i).sd], (S)[(n)][(i).se], (S)[(n)][(i).sf])
#define BOX64(S,n,i) make_u64x ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3], (S)[(n)][(i).s4], (S)[(n)][(i).s5], (S)[(n)][(i).s6], (S)[(n)][(i).s7], (S)[(n)][(i).s8], (S)[(n)][(i).s9], (S)[(n)][(i).sa], (S)[(n)][(i).sb], (S)[(n)][(i).sc], (S)[(n)][(i).sd], (S)[(n)][(i).se], (S)[(n)][(i).sf])
#endif
#define BOX_S(S,n,i) (S)[(n)][(i)]
#define BOX_S(S,n,i) (S)[(n)][(i)]
#define BOX64_S(S,n,i) (S)[(n)][(i)]
typedef struct whirlpool_ctx
{
@ -33,8 +39,8 @@ typedef struct whirlpool_ctx
int len;
SHM_TYPE u32 (*s_Ch)[256];
SHM_TYPE u32 (*s_Cl)[256];
SHM_TYPE u64 (*s_MT)[256];
SHM_TYPE u64 *s_RC;
} whirlpool_ctx_t;
@ -56,8 +62,8 @@ typedef struct whirlpool_ctx_vector
int len;
SHM_TYPE u32 (*s_Ch)[256];
SHM_TYPE u32 (*s_Cl)[256];
SHM_TYPE u64 (*s_MT)[256];
SHM_TYPE u64 *s_RC;
} whirlpool_ctx_vector_t;
@ -68,8 +74,8 @@ typedef struct whirlpool_hmac_ctx_vector
} whirlpool_hmac_ctx_vector_t;
DECLSPEC void whirlpool_transform (const u32 *w0, const u32 *w1, const u32 *w2, const u32 *w3, u32 *digest, SHM_TYPE u32 (*s_Ch)[256], SHM_TYPE u32 (*s_Cl)[256]);
DECLSPEC void whirlpool_init (whirlpool_ctx_t *ctx, SHM_TYPE u32 (*s_Ch)[256], SHM_TYPE u32 (*s_Cl)[256]);
DECLSPEC void whirlpool_transform (const u32 *w0, const u32 *w1, const u32 *w2, const u32 *w3, u32 *digest, SHM_TYPE u64 (*s_MT)[256], SHM_TYPE u64 *s_RC);
DECLSPEC void whirlpool_init (whirlpool_ctx_t *ctx, SHM_TYPE u64 (*s_MT)[256], SHM_TYPE u64 *s_RC);
DECLSPEC void whirlpool_update_64 (whirlpool_ctx_t *ctx, u32 *w0, u32 *w1, u32 *w2, u32 *w3, const int len);
DECLSPEC void whirlpool_update (whirlpool_ctx_t *ctx, const u32 *w, const int len);
DECLSPEC void whirlpool_update_swap (whirlpool_ctx_t *ctx, const u32 *w, const int len);
@ -80,11 +86,11 @@ DECLSPEC void whirlpool_update_global_swap (whirlpool_ctx_t *ctx, GLOBAL_AS cons
DECLSPEC void whirlpool_update_global_utf16le (whirlpool_ctx_t *ctx, GLOBAL_AS const u32 *w, const int len);
DECLSPEC void whirlpool_update_global_utf16le_swap (whirlpool_ctx_t *ctx, GLOBAL_AS const u32 *w, const int len);
DECLSPEC void whirlpool_final (whirlpool_ctx_t *ctx);
DECLSPEC void whirlpool_hmac_init_64 (whirlpool_hmac_ctx_t *ctx, const u32 *w0, const u32 *w1, const u32 *w2, const u32 *w3, SHM_TYPE u32 (*s_Ch)[256], SHM_TYPE u32 (*s_Cl)[256]);
DECLSPEC void whirlpool_hmac_init (whirlpool_hmac_ctx_t *ctx, const u32 *w, const int len, SHM_TYPE u32 (*s_Ch)[256], SHM_TYPE u32 (*s_Cl)[256]);
DECLSPEC void whirlpool_hmac_init_swap (whirlpool_hmac_ctx_t *ctx, const u32 *w, const int len, SHM_TYPE u32 (*s_Ch)[256], SHM_TYPE u32 (*s_Cl)[256]);
DECLSPEC void whirlpool_hmac_init_global (whirlpool_hmac_ctx_t *ctx, GLOBAL_AS const u32 *w, const int len, SHM_TYPE u32 (*s_Ch)[256], SHM_TYPE u32 (*s_Cl)[256]);
DECLSPEC void whirlpool_hmac_init_global_swap (whirlpool_hmac_ctx_t *ctx, GLOBAL_AS const u32 *w, const int len, SHM_TYPE u32 (*s_Ch)[256], SHM_TYPE u32 (*s_Cl)[256]);
DECLSPEC void whirlpool_hmac_init_64 (whirlpool_hmac_ctx_t *ctx, const u32 *w0, const u32 *w1, const u32 *w2, const u32 *w3, SHM_TYPE u64 (*s_MT)[256], SHM_TYPE u64 *s_RC);
DECLSPEC void whirlpool_hmac_init (whirlpool_hmac_ctx_t *ctx, const u32 *w, const int len, SHM_TYPE u64 (*s_MT)[256], SHM_TYPE u64 *s_RC);
DECLSPEC void whirlpool_hmac_init_swap (whirlpool_hmac_ctx_t *ctx, const u32 *w, const int len, SHM_TYPE u64 (*s_MT)[256], SHM_TYPE u64 *s_RC);
DECLSPEC void whirlpool_hmac_init_global (whirlpool_hmac_ctx_t *ctx, GLOBAL_AS const u32 *w, const int len, SHM_TYPE u64 (*s_MT)[256], SHM_TYPE u64 *s_RC);
DECLSPEC void whirlpool_hmac_init_global_swap (whirlpool_hmac_ctx_t *ctx, GLOBAL_AS const u32 *w, const int len, SHM_TYPE u64 (*s_MT)[256], SHM_TYPE u64 *s_RC);
DECLSPEC void whirlpool_hmac_update_64 (whirlpool_hmac_ctx_t *ctx, u32 *w0, u32 *w1, u32 *w2, u32 *w3, const int len);
DECLSPEC void whirlpool_hmac_update (whirlpool_hmac_ctx_t *ctx, const u32 *w, const int len);
DECLSPEC void whirlpool_hmac_update_swap (whirlpool_hmac_ctx_t *ctx, const u32 *w, const int len);
@ -95,8 +101,8 @@ DECLSPEC void whirlpool_hmac_update_global_swap (whirlpool_hmac_ctx_t *ctx, GLOB
DECLSPEC void whirlpool_hmac_update_global_utf16le (whirlpool_hmac_ctx_t *ctx, GLOBAL_AS const u32 *w, const int len);
DECLSPEC void whirlpool_hmac_update_global_utf16le_swap (whirlpool_hmac_ctx_t *ctx, GLOBAL_AS const u32 *w, const int len);
DECLSPEC void whirlpool_hmac_final (whirlpool_hmac_ctx_t *ctx);
DECLSPEC void whirlpool_transform_vector (const u32x *w0, const u32x *w1, const u32x *w2, const u32x *w3, u32x *digest, SHM_TYPE u32 (*s_Ch)[256], SHM_TYPE u32 (*s_Cl)[256]);
DECLSPEC void whirlpool_init_vector (whirlpool_ctx_vector_t *ctx, SHM_TYPE u32 (*s_Ch)[256], SHM_TYPE u32 (*s_Cl)[256]);
DECLSPEC void whirlpool_transform_vector (const u32x *w0, const u32x *w1, const u32x *w2, const u32x *w3, u32x *digest, SHM_TYPE u64 (*s_MT)[256], SHM_TYPE u64 *s_RC);
DECLSPEC void whirlpool_init_vector (whirlpool_ctx_vector_t *ctx, SHM_TYPE u64 (*s_MT)[256], SHM_TYPE u64 *s_RC);
DECLSPEC void whirlpool_init_vector_from_scalar (whirlpool_ctx_vector_t *ctx, whirlpool_ctx_t *ctx0);
DECLSPEC void whirlpool_update_vector_64 (whirlpool_ctx_vector_t *ctx, u32x *w0, u32x *w1, u32x *w2, u32x *w3, const int len);
DECLSPEC void whirlpool_update_vector (whirlpool_ctx_vector_t *ctx, const u32x *w, const int len);
@ -104,8 +110,8 @@ DECLSPEC void whirlpool_update_vector_swap (whirlpool_ctx_vector_t *ctx, const u
DECLSPEC void whirlpool_update_vector_utf16le (whirlpool_ctx_vector_t *ctx, const u32x *w, const int len);
DECLSPEC void whirlpool_update_vector_utf16le_swap (whirlpool_ctx_vector_t *ctx, const u32x *w, const int len);
DECLSPEC void whirlpool_final_vector (whirlpool_ctx_vector_t *ctx);
DECLSPEC void whirlpool_hmac_init_vector_64 (whirlpool_hmac_ctx_vector_t *ctx, const u32x *w0, const u32x *w1, const u32x *w2, const u32x *w3, SHM_TYPE u32 (*s_Ch)[256], SHM_TYPE u32 (*s_Cl)[256]);
DECLSPEC void whirlpool_hmac_init_vector (whirlpool_hmac_ctx_vector_t *ctx, const u32x *w, const int len, SHM_TYPE u32 (*s_Ch)[256], SHM_TYPE u32 (*s_Cl)[256]);
DECLSPEC void whirlpool_hmac_init_vector_64 (whirlpool_hmac_ctx_vector_t *ctx, const u32x *w0, const u32x *w1, const u32x *w2, const u32x *w3, SHM_TYPE u64 (*s_MT)[256], SHM_TYPE u64 *s_RC);
DECLSPEC void whirlpool_hmac_init_vector (whirlpool_hmac_ctx_vector_t *ctx, const u32x *w, const int len, SHM_TYPE u64 (*s_MT)[256], SHM_TYPE u64 *s_RC);
DECLSPEC void whirlpool_hmac_update_vector_64 (whirlpool_hmac_ctx_vector_t *ctx, u32x *w0, u32x *w1, u32x *w2, u32x *w3, const int len);
DECLSPEC void whirlpool_hmac_update_vector (whirlpool_hmac_ctx_vector_t *ctx, const u32x *w, const int len);
DECLSPEC void whirlpool_hmac_final_vector (whirlpool_hmac_ctx_vector_t *ctx);

@ -16,9 +16,9 @@
#include "inc_hash_whirlpool.cl"
#endif
DECLSPEC void whirlpool_transform_transport_vector (const u32x *w, u32x *digest, SHM_TYPE u32 (*s_Ch)[256], SHM_TYPE u32 (*s_Cl)[256])
DECLSPEC void whirlpool_transform_transport_vector (const u32x *w, u32x *digest, SHM_TYPE u64 (*s_MT)[256], SHM_TYPE u64 *s_RC)
{
whirlpool_transform_vector (w + 0, w + 4, w + 8, w + 12, digest, s_Ch, s_Cl);
whirlpool_transform_vector (w + 0, w + 4, w + 8, w + 12, digest, s_MT, s_RC);
}
KERNEL_FQ void m06100_m04 (KERN_ATTR_RULES ())
@ -37,36 +37,32 @@ KERNEL_FQ void m06100_m04 (KERN_ATTR_RULES ())
#ifdef REAL_SHM
LOCAL_VK u32 s_Ch[8][256];
LOCAL_VK u32 s_Cl[8][256];
LOCAL_VK u64 s_MT[8][256];
LOCAL_VK u64 s_RC[16];
for (u32 i = lid; i < 256; i += lsz)
{
s_Ch[0][i] = Ch[0][i];
s_Ch[1][i] = Ch[1][i];
s_Ch[2][i] = Ch[2][i];
s_Ch[3][i] = Ch[3][i];
s_Ch[4][i] = Ch[4][i];
s_Ch[5][i] = Ch[5][i];
s_Ch[6][i] = Ch[6][i];
s_Ch[7][i] = Ch[7][i];
s_Cl[0][i] = Cl[0][i];
s_Cl[1][i] = Cl[1][i];
s_Cl[2][i] = Cl[2][i];
s_Cl[3][i] = Cl[3][i];
s_Cl[4][i] = Cl[4][i];
s_Cl[5][i] = Cl[5][i];
s_Cl[6][i] = Cl[6][i];
s_Cl[7][i] = Cl[7][i];
s_MT[0][i] = MT[0][i];
s_MT[1][i] = MT[1][i];
s_MT[2][i] = MT[2][i];
s_MT[3][i] = MT[3][i];
s_MT[4][i] = MT[4][i];
s_MT[5][i] = MT[5][i];
s_MT[6][i] = MT[6][i];
s_MT[7][i] = MT[7][i];
}
for (u32 i = lid; i < 16; i += lsz)
{
s_RC[i] = RC[i];
}
SYNC_THREADS ();
#else
CONSTANT_AS u32a (*s_Ch)[256] = Ch;
CONSTANT_AS u32a (*s_Cl)[256] = Cl;
CONSTANT_AS u64a (*s_MT)[256] = MT;
CONSTANT_AS u64a *s_RC = RC;
#endif
@ -147,7 +143,7 @@ KERNEL_FQ void m06100_m04 (KERN_ATTR_RULES ())
dgst[14] = 0;
dgst[15] = 0;
whirlpool_transform_transport_vector (w, dgst, s_Ch, s_Cl);
whirlpool_transform_transport_vector (w, dgst, s_MT, s_RC);
COMPARE_M_SIMD (dgst[0], dgst[1], dgst[2], dgst[3]);
}
@ -177,36 +173,32 @@ KERNEL_FQ void m06100_s04 (KERN_ATTR_RULES ())
#ifdef REAL_SHM
LOCAL_VK u32 s_Ch[8][256];
LOCAL_VK u32 s_Cl[8][256];
LOCAL_VK u64 s_MT[8][256];
LOCAL_VK u64 s_RC[16];
for (u32 i = lid; i < 256; i += lsz)
{
s_Ch[0][i] = Ch[0][i];
s_Ch[1][i] = Ch[1][i];
s_Ch[2][i] = Ch[2][i];
s_Ch[3][i] = Ch[3][i];
s_Ch[4][i] = Ch[4][i];
s_Ch[5][i] = Ch[5][i];
s_Ch[6][i] = Ch[6][i];
s_Ch[7][i] = Ch[7][i];
s_Cl[0][i] = Cl[0][i];
s_Cl[1][i] = Cl[1][i];
s_Cl[2][i] = Cl[2][i];
s_Cl[3][i] = Cl[3][i];
s_Cl[4][i] = Cl[4][i];
s_Cl[5][i] = Cl[5][i];
s_Cl[6][i] = Cl[6][i];
s_Cl[7][i] = Cl[7][i];
s_MT[0][i] = MT[0][i];
s_MT[1][i] = MT[1][i];
s_MT[2][i] = MT[2][i];
s_MT[3][i] = MT[3][i];
s_MT[4][i] = MT[4][i];
s_MT[5][i] = MT[5][i];
s_MT[6][i] = MT[6][i];
s_MT[7][i] = MT[7][i];
}
for (u32 i = lid; i < 16; i += lsz)
{
s_RC[i] = RC[i];
}
SYNC_THREADS ();
#else
CONSTANT_AS u32a (*s_Ch)[256] = Ch;
CONSTANT_AS u32a (*s_Cl)[256] = Cl;
CONSTANT_AS u64a (*s_MT)[256] = MT;
CONSTANT_AS u64a *s_RC = RC;
#endif
@ -299,7 +291,7 @@ KERNEL_FQ void m06100_s04 (KERN_ATTR_RULES ())
dgst[14] = 0;
dgst[15] = 0;
whirlpool_transform_transport_vector (w, dgst, s_Ch, s_Cl);
whirlpool_transform_transport_vector (w, dgst, s_MT, s_RC);
COMPARE_S_SIMD (dgst[0], dgst[1], dgst[2], dgst[3]);
}

@ -32,36 +32,32 @@ KERNEL_FQ void m06100_mxx (KERN_ATTR_RULES ())
#ifdef REAL_SHM
LOCAL_VK u32 s_Ch[8][256];
LOCAL_VK u32 s_Cl[8][256];
LOCAL_VK u64 s_MT[8][256];
LOCAL_VK u64 s_RC[16];
for (u32 i = lid; i < 256; i += lsz)
{
s_Ch[0][i] = Ch[0][i];
s_Ch[1][i] = Ch[1][i];
s_Ch[2][i] = Ch[2][i];
s_Ch[3][i] = Ch[3][i];
s_Ch[4][i] = Ch[4][i];
s_Ch[5][i] = Ch[5][i];
s_Ch[6][i] = Ch[6][i];
s_Ch[7][i] = Ch[7][i];
s_Cl[0][i] = Cl[0][i];
s_Cl[1][i] = Cl[1][i];
s_Cl[2][i] = Cl[2][i];
s_Cl[3][i] = Cl[3][i];
s_Cl[4][i] = Cl[4][i];
s_Cl[5][i] = Cl[5][i];
s_Cl[6][i] = Cl[6][i];
s_Cl[7][i] = Cl[7][i];
s_MT[0][i] = MT[0][i];
s_MT[1][i] = MT[1][i];
s_MT[2][i] = MT[2][i];
s_MT[3][i] = MT[3][i];
s_MT[4][i] = MT[4][i];
s_MT[5][i] = MT[5][i];
s_MT[6][i] = MT[6][i];
s_MT[7][i] = MT[7][i];
}
for (u32 i = lid; i < 16; i += lsz)
{
s_RC[i] = RC[i];
}
SYNC_THREADS ();
#else
CONSTANT_AS u32a (*s_Ch)[256] = Ch;
CONSTANT_AS u32a (*s_Cl)[256] = Cl;
CONSTANT_AS u64a (*s_MT)[256] = MT;
CONSTANT_AS u64a *s_RC = RC;
#endif
@ -85,7 +81,7 @@ KERNEL_FQ void m06100_mxx (KERN_ATTR_RULES ())
whirlpool_ctx_t ctx;
whirlpool_init (&ctx, s_Ch, s_Cl);
whirlpool_init (&ctx, s_MT, s_RC);
whirlpool_update_swap (&ctx, tmp.i, tmp.pw_len);
@ -116,36 +112,32 @@ KERNEL_FQ void m06100_sxx (KERN_ATTR_RULES ())
#ifdef REAL_SHM
LOCAL_VK u32 s_Ch[8][256];
LOCAL_VK u32 s_Cl[8][256];
LOCAL_VK u64 s_MT[8][256];
LOCAL_VK u64 s_RC[16];
for (u32 i = lid; i < 256; i += lsz)
{
s_Ch[0][i] = Ch[0][i];
s_Ch[1][i] = Ch[1][i];
s_Ch[2][i] = Ch[2][i];
s_Ch[3][i] = Ch[3][i];
s_Ch[4][i] = Ch[4][i];
s_Ch[5][i] = Ch[5][i];
s_Ch[6][i] = Ch[6][i];
s_Ch[7][i] = Ch[7][i];
s_Cl[0][i] = Cl[0][i];
s_Cl[1][i] = Cl[1][i];
s_Cl[2][i] = Cl[2][i];
s_Cl[3][i] = Cl[3][i];
s_Cl[4][i] = Cl[4][i];
s_Cl[5][i] = Cl[5][i];
s_Cl[6][i] = Cl[6][i];
s_Cl[7][i] = Cl[7][i];
s_MT[0][i] = MT[0][i];
s_MT[1][i] = MT[1][i];
s_MT[2][i] = MT[2][i];
s_MT[3][i] = MT[3][i];
s_MT[4][i] = MT[4][i];
s_MT[5][i] = MT[5][i];
s_MT[6][i] = MT[6][i];
s_MT[7][i] = MT[7][i];
}
for (u32 i = lid; i < 16; i += lsz)
{
s_RC[i] = RC[i];
}
SYNC_THREADS ();
#else
CONSTANT_AS u32a (*s_Ch)[256] = Ch;
CONSTANT_AS u32a (*s_Cl)[256] = Cl;
CONSTANT_AS u64a (*s_MT)[256] = MT;
CONSTANT_AS u64a *s_RC = RC;
#endif
@ -181,7 +173,7 @@ KERNEL_FQ void m06100_sxx (KERN_ATTR_RULES ())
whirlpool_ctx_t ctx;
whirlpool_init (&ctx, s_Ch, s_Cl);
whirlpool_init (&ctx, s_MT, s_RC);
whirlpool_update_swap (&ctx, tmp.i, tmp.pw_len);

@ -14,9 +14,9 @@
#include "inc_hash_whirlpool.cl"
#endif
DECLSPEC void whirlpool_transform_transport_vector (const u32x *w, u32x *digest, SHM_TYPE u32 (*s_Ch)[256], SHM_TYPE u32 (*s_Cl)[256])
DECLSPEC void whirlpool_transform_transport_vector (const u32x *w, u32x *digest, SHM_TYPE u64 (*s_MT)[256], SHM_TYPE u64 *s_RC)
{
whirlpool_transform_vector (w + 0, w + 4, w + 8, w + 12, digest, s_Ch, s_Cl);
whirlpool_transform_vector (w + 0, w + 4, w + 8, w + 12, digest, s_MT, s_RC);
}
KERNEL_FQ void m06100_m04 (KERN_ATTR_BASIC ())
@ -35,36 +35,32 @@ KERNEL_FQ void m06100_m04 (KERN_ATTR_BASIC ())
#ifdef REAL_SHM
LOCAL_VK u32 s_Ch[8][256];
LOCAL_VK u32 s_Cl[8][256];
LOCAL_VK u64 s_MT[8][256];
LOCAL_VK u64 s_RC[16];
for (u32 i = lid; i < 256; i += lsz)
{
s_Ch[0][i] = Ch[0][i];
s_Ch[1][i] = Ch[1][i];
s_Ch[2][i] = Ch[2][i];
s_Ch[3][i] = Ch[3][i];
s_Ch[4][i] = Ch[4][i];
s_Ch[5][i] = Ch[5][i];
s_Ch[6][i] = Ch[6][i];
s_Ch[7][i] = Ch[7][i];
s_Cl[0][i] = Cl[0][i];
s_Cl[1][i] = Cl[1][i];
s_Cl[2][i] = Cl[2][i];
s_Cl[3][i] = Cl[3][i];
s_Cl[4][i] = Cl[4][i];
s_Cl[5][i] = Cl[5][i];
s_Cl[6][i] = Cl[6][i];
s_Cl[7][i] = Cl[7][i];
s_MT[0][i] = MT[0][i];
s_MT[1][i] = MT[1][i];
s_MT[2][i] = MT[2][i];
s_MT[3][i] = MT[3][i];
s_MT[4][i] = MT[4][i];
s_MT[5][i] = MT[5][i];
s_MT[6][i] = MT[6][i];
s_MT[7][i] = MT[7][i];
}
for (u32 i = lid; i < 16; i += lsz)
{
s_RC[i] = RC[i];
}
SYNC_THREADS ();
#else
CONSTANT_AS u32a (*s_Ch)[256] = Ch;
CONSTANT_AS u32a (*s_Cl)[256] = Cl;
CONSTANT_AS u64a (*s_MT)[256] = MT;
CONSTANT_AS u64a *s_RC = RC;
#endif
@ -203,7 +199,7 @@ KERNEL_FQ void m06100_m04 (KERN_ATTR_BASIC ())
dgst[14] = 0;
dgst[15] = 0;
whirlpool_transform_transport_vector (w, dgst, s_Ch, s_Cl);
whirlpool_transform_transport_vector (w, dgst, s_MT, s_RC);
COMPARE_M_SIMD (dgst[0], dgst[1], dgst[2], dgst[3]);
}
@ -233,36 +229,32 @@ KERNEL_FQ void m06100_s04 (KERN_ATTR_BASIC ())
#ifdef REAL_SHM
LOCAL_VK u32 s_Ch[8][256];
LOCAL_VK u32 s_Cl[8][256];
LOCAL_VK u64 s_MT[8][256];
LOCAL_VK u64 s_RC[16];
for (u32 i = lid; i < 256; i += lsz)
{
s_Ch[0][i] = Ch[0][i];
s_Ch[1][i] = Ch[1][i];
s_Ch[2][i] = Ch[2][i];
s_Ch[3][i] = Ch[3][i];
s_Ch[4][i] = Ch[4][i];
s_Ch[5][i] = Ch[5][i];
s_Ch[6][i] = Ch[6][i];
s_Ch[7][i] = Ch[7][i];
s_Cl[0][i] = Cl[0][i];
s_Cl[1][i] = Cl[1][i];
s_Cl[2][i] = Cl[2][i];
s_Cl[3][i] = Cl[3][i];
s_Cl[4][i] = Cl[4][i];
s_Cl[5][i] = Cl[5][i];
s_Cl[6][i] = Cl[6][i];
s_Cl[7][i] = Cl[7][i];
s_MT[0][i] = MT[0][i];
s_MT[1][i] = MT[1][i];
s_MT[2][i] = MT[2][i];
s_MT[3][i] = MT[3][i];
s_MT[4][i] = MT[4][i];
s_MT[5][i] = MT[5][i];
s_MT[6][i] = MT[6][i];
s_MT[7][i] = MT[7][i];
}
for (u32 i = lid; i < 16; i += lsz)
{
s_RC[i] = RC[i];
}
SYNC_THREADS ();
#else
CONSTANT_AS u32a (*s_Ch)[256] = Ch;
CONSTANT_AS u32a (*s_Cl)[256] = Cl;
CONSTANT_AS u64a (*s_MT)[256] = MT;
CONSTANT_AS u64a *s_RC = RC;
#endif
@ -413,7 +405,7 @@ KERNEL_FQ void m06100_s04 (KERN_ATTR_BASIC ())
dgst[14] = 0;
dgst[15] = 0;
whirlpool_transform_transport_vector (w, dgst, s_Ch, s_Cl);
whirlpool_transform_transport_vector (w, dgst, s_MT, s_RC);
COMPARE_S_SIMD (dgst[0], dgst[1], dgst[2], dgst[3]);
}

@ -30,36 +30,32 @@ KERNEL_FQ void m06100_mxx (KERN_ATTR_BASIC ())
#ifdef REAL_SHM
LOCAL_VK u32 s_Ch[8][256];
LOCAL_VK u32 s_Cl[8][256];
LOCAL_VK u64 s_MT[8][256];
LOCAL_VK u64 s_RC[16];
for (u32 i = lid; i < 256; i += lsz)
{
s_Ch[0][i] = Ch[0][i];
s_Ch[1][i] = Ch[1][i];
s_Ch[2][i] = Ch[2][i];
s_Ch[3][i] = Ch[3][i];
s_Ch[4][i] = Ch[4][i];
s_Ch[5][i] = Ch[5][i];
s_Ch[6][i] = Ch[6][i];
s_Ch[7][i] = Ch[7][i];
s_Cl[0][i] = Cl[0][i];
s_Cl[1][i] = Cl[1][i];
s_Cl[2][i] = Cl[2][i];
s_Cl[3][i] = Cl[3][i];
s_Cl[4][i] = Cl[4][i];
s_Cl[5][i] = Cl[5][i];
s_Cl[6][i] = Cl[6][i];
s_Cl[7][i] = Cl[7][i];
s_MT[0][i] = MT[0][i];
s_MT[1][i] = MT[1][i];
s_MT[2][i] = MT[2][i];
s_MT[3][i] = MT[3][i];
s_MT[4][i] = MT[4][i];
s_MT[5][i] = MT[5][i];
s_MT[6][i] = MT[6][i];
s_MT[7][i] = MT[7][i];
}
for (u32 i = lid; i < 16; i += lsz)
{
s_RC[i] = RC[i];
}
SYNC_THREADS ();
#else
CONSTANT_AS u32a (*s_Ch)[256] = Ch;
CONSTANT_AS u32a (*s_Cl)[256] = Cl;
CONSTANT_AS u64a (*s_MT)[256] = MT;
CONSTANT_AS u64a *s_RC = RC;
#endif
@ -71,7 +67,7 @@ KERNEL_FQ void m06100_mxx (KERN_ATTR_BASIC ())
whirlpool_ctx_t ctx0;
whirlpool_init (&ctx0, s_Ch, s_Cl);
whirlpool_init (&ctx0, s_MT, s_RC);
whirlpool_update_global_swap (&ctx0, pws[gid].i, pws[gid].pw_len);
@ -112,36 +108,32 @@ KERNEL_FQ void m06100_sxx (KERN_ATTR_BASIC ())
#ifdef REAL_SHM
LOCAL_VK u32 s_Ch[8][256];
LOCAL_VK u32 s_Cl[8][256];
LOCAL_VK u64 s_MT[8][256];
LOCAL_VK u64 s_RC[16];
for (u32 i = lid; i < 256; i += lsz)
{
s_Ch[0][i] = Ch[0][i];
s_Ch[1][i] = Ch[1][i];
s_Ch[2][i] = Ch[2][i];
s_Ch[3][i] = Ch[3][i];
s_Ch[4][i] = Ch[4][i];
s_Ch[5][i] = Ch[5][i];
s_Ch[6][i] = Ch[6][i];
s_Ch[7][i] = Ch[7][i];
s_Cl[0][i] = Cl[0][i];
s_Cl[1][i] = Cl[1][i];
s_Cl[2][i] = Cl[2][i];
s_Cl[3][i] = Cl[3][i];
s_Cl[4][i] = Cl[4][i];
s_Cl[5][i] = Cl[5][i];
s_Cl[6][i] = Cl[6][i];
s_Cl[7][i] = Cl[7][i];
s_MT[0][i] = MT[0][i];
s_MT[1][i] = MT[1][i];
s_MT[2][i] = MT[2][i];
s_MT[3][i] = MT[3][i];
s_MT[4][i] = MT[4][i];
s_MT[5][i] = MT[5][i];
s_MT[6][i] = MT[6][i];
s_MT[7][i] = MT[7][i];
}
for (u32 i = lid; i < 16; i += lsz)
{
s_RC[i] = RC[i];
}
SYNC_THREADS ();
#else
CONSTANT_AS u32a (*s_Ch)[256] = Ch;
CONSTANT_AS u32a (*s_Cl)[256] = Cl;
CONSTANT_AS u64a (*s_MT)[256] = MT;
CONSTANT_AS u64a *s_RC = RC;
#endif
@ -165,7 +157,7 @@ KERNEL_FQ void m06100_sxx (KERN_ATTR_BASIC ())
whirlpool_ctx_t ctx0;
whirlpool_init (&ctx0, s_Ch, s_Cl);
whirlpool_init (&ctx0, s_MT, s_RC);
whirlpool_update_global_swap (&ctx0, pws[gid].i, pws[gid].pw_len);

@ -14,12 +14,12 @@
#include "inc_hash_whirlpool.cl"
#endif
DECLSPEC void whirlpool_transform_transport_vector (const u32x *w, u32x *digest, SHM_TYPE u32 (*s_Ch)[256], SHM_TYPE u32 (*s_Cl)[256])
DECLSPEC void whirlpool_transform_transport_vector (const u32x *w, u32x *digest, SHM_TYPE u64 (*s_MT)[256], SHM_TYPE u64 *s_RC)
{
whirlpool_transform_vector (w + 0, w + 4, w + 8, w + 12, digest, s_Ch, s_Cl);
whirlpool_transform_vector (w + 0, w + 4, w + 8, w + 12, digest, s_MT, s_RC);
}
DECLSPEC void m06100m (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KERN_ATTR_BASIC (), SHM_TYPE u32 (*s_Cl)[256], SHM_TYPE u32 (*s_Ch)[256])
DECLSPEC void m06100m (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KERN_ATTR_BASIC (), SHM_TYPE u64 (*s_MT)[256], SHM_TYPE u64 *s_RC)
{
/**
* modifier
@ -82,13 +82,13 @@ DECLSPEC void m06100m (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KER
dgst[14] = 0;
dgst[15] = 0;
whirlpool_transform_transport_vector (w, dgst, s_Ch, s_Cl);
whirlpool_transform_transport_vector (w, dgst, s_MT, s_RC);
COMPARE_M_SIMD (dgst[0], dgst[1], dgst[2], dgst[3]);
}
}
DECLSPEC void m06100s (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KERN_ATTR_BASIC (), SHM_TYPE u32 (*s_Cl)[256], SHM_TYPE u32 (*s_Ch)[256])
DECLSPEC void m06100s (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KERN_ATTR_BASIC (), SHM_TYPE u64 (*s_MT)[256], SHM_TYPE u64 *s_RC)
{
/**
* modifier
@ -163,7 +163,7 @@ DECLSPEC void m06100s (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KER
dgst[14] = 0;
dgst[15] = 0;
whirlpool_transform_transport_vector (w, dgst, s_Ch, s_Cl);
whirlpool_transform_transport_vector (w, dgst, s_MT, s_RC);
COMPARE_S_SIMD (dgst[0], dgst[1], dgst[2], dgst[3]);
}
@ -185,36 +185,32 @@ KERNEL_FQ void m06100_m04 (KERN_ATTR_BASIC ())
#ifdef REAL_SHM
LOCAL_VK u32 s_Ch[8][256];
LOCAL_VK u32 s_Cl[8][256];
LOCAL_VK u64 s_MT[8][256];
LOCAL_VK u64 s_RC[16];
for (u32 i = lid; i < 256; i += lsz)
{
s_Ch[0][i] = Ch[0][i];
s_Ch[1][i] = Ch[1][i];
s_Ch[2][i] = Ch[2][i];
s_Ch[3][i] = Ch[3][i];
s_Ch[4][i] = Ch[4][i];
s_Ch[5][i] = Ch[5][i];
s_Ch[6][i] = Ch[6][i];
s_Ch[7][i] = Ch[7][i];
s_Cl[0][i] = Cl[0][i];
s_Cl[1][i] = Cl[1][i];
s_Cl[2][i] = Cl[2][i];
s_Cl[3][i] = Cl[3][i];
s_Cl[4][i] = Cl[4][i];
s_Cl[5][i] = Cl[5][i];
s_Cl[6][i] = Cl[6][i];
s_Cl[7][i] = Cl[7][i];
s_MT[0][i] = MT[0][i];
s_MT[1][i] = MT[1][i];
s_MT[2][i] = MT[2][i];
s_MT[3][i] = MT[3][i];
s_MT[4][i] = MT[4][i];
s_MT[5][i] = MT[5][i];
s_MT[6][i] = MT[6][i];
s_MT[7][i] = MT[7][i];
}
for (u32 i = lid; i < 16; i += lsz)
{
s_RC[i] = RC[i];
}
SYNC_THREADS ();
#else
CONSTANT_AS u32a (*s_Ch)[256] = Ch;
CONSTANT_AS u32a (*s_Cl)[256] = Cl;
CONSTANT_AS u64a (*s_MT)[256] = MT;
CONSTANT_AS u64a *s_RC = RC;
#endif
@ -258,7 +254,7 @@ KERNEL_FQ void m06100_m04 (KERN_ATTR_BASIC ())
* main
*/
m06100m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max, s_Cl, s_Ch);
m06100m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max, s_MT, s_RC);
}
KERNEL_FQ void m06100_m08 (KERN_ATTR_BASIC ())
@ -277,36 +273,32 @@ KERNEL_FQ void m06100_m08 (KERN_ATTR_BASIC ())
#ifdef REAL_SHM
LOCAL_VK u32 s_Ch[8][256];
LOCAL_VK u32 s_Cl[8][256];
LOCAL_VK u64 s_MT[8][256];
LOCAL_VK u64 s_RC[16];
for (u32 i = lid; i < 256; i += lsz)
{
s_Ch[0][i] = Ch[0][i];
s_Ch[1][i] = Ch[1][i];
s_Ch[2][i] = Ch[2][i];
s_Ch[3][i] = Ch[3][i];
s_Ch[4][i] = Ch[4][i];
s_Ch[5][i] = Ch[5][i];
s_Ch[6][i] = Ch[6][i];
s_Ch[7][i] = Ch[7][i];
s_Cl[0][i] = Cl[0][i];
s_Cl[1][i] = Cl[1][i];
s_Cl[2][i] = Cl[2][i];
s_Cl[3][i] = Cl[3][i];
s_Cl[4][i] = Cl[4][i];
s_Cl[5][i] = Cl[5][i];
s_Cl[6][i] = Cl[6][i];
s_Cl[7][i] = Cl[7][i];
s_MT[0][i] = MT[0][i];
s_MT[1][i] = MT[1][i];
s_MT[2][i] = MT[2][i];
s_MT[3][i] = MT[3][i];
s_MT[4][i] = MT[4][i];
s_MT[5][i] = MT[5][i];
s_MT[6][i] = MT[6][i];
s_MT[7][i] = MT[7][i];
}
for (u32 i = lid; i < 16; i += lsz)
{
s_RC[i] = RC[i];
}
SYNC_THREADS ();
#else
CONSTANT_AS u32a (*s_Ch)[256] = Ch;
CONSTANT_AS u32a (*s_Cl)[256] = Cl;
CONSTANT_AS u64a (*s_MT)[256] = MT;
CONSTANT_AS u64a *s_RC = RC;
#endif
@ -350,7 +342,7 @@ KERNEL_FQ void m06100_m08 (KERN_ATTR_BASIC ())
* main
*/
m06100m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max, s_Cl, s_Ch);
m06100m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max, s_MT, s_RC);
}
KERNEL_FQ void m06100_m16 (KERN_ATTR_BASIC ())
@ -373,36 +365,32 @@ KERNEL_FQ void m06100_s04 (KERN_ATTR_BASIC ())
#ifdef REAL_SHM
LOCAL_VK u32 s_Ch[8][256];
LOCAL_VK u32 s_Cl[8][256];
LOCAL_VK u64 s_MT[8][256];
LOCAL_VK u64 s_RC[16];
for (u32 i = lid; i < 256; i += lsz)
{
s_Ch[0][i] = Ch[0][i];
s_Ch[1][i] = Ch[1][i];
s_Ch[2][i] = Ch[2][i];
s_Ch[3][i] = Ch[3][i];
s_Ch[4][i] = Ch[4][i];
s_Ch[5][i] = Ch[5][i];
s_Ch[6][i] = Ch[6][i];
s_Ch[7][i] = Ch[7][i];
s_Cl[0][i] = Cl[0][i];
s_Cl[1][i] = Cl[1][i];
s_Cl[2][i] = Cl[2][i];
s_Cl[3][i] = Cl[3][i];
s_Cl[4][i] = Cl[4][i];
s_Cl[5][i] = Cl[5][i];
s_Cl[6][i] = Cl[6][i];
s_Cl[7][i] = Cl[7][i];
s_MT[0][i] = MT[0][i];
s_MT[1][i] = MT[1][i];
s_MT[2][i] = MT[2][i];
s_MT[3][i] = MT[3][i];
s_MT[4][i] = MT[4][i];
s_MT[5][i] = MT[5][i];
s_MT[6][i] = MT[6][i];
s_MT[7][i] = MT[7][i];
}
for (u32 i = lid; i < 16; i += lsz)
{
s_RC[i] = RC[i];
}
SYNC_THREADS ();
#else
CONSTANT_AS u32a (*s_Ch)[256] = Ch;
CONSTANT_AS u32a (*s_Cl)[256] = Cl;
CONSTANT_AS u64a (*s_MT)[256] = MT;
CONSTANT_AS u64a *s_RC = RC;
#endif
@ -446,7 +434,7 @@ KERNEL_FQ void m06100_s04 (KERN_ATTR_BASIC ())
* main
*/
m06100s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max, s_Cl, s_Ch);
m06100s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max, s_MT, s_RC);
}
KERNEL_FQ void m06100_s08 (KERN_ATTR_BASIC ())
@ -465,36 +453,32 @@ KERNEL_FQ void m06100_s08 (KERN_ATTR_BASIC ())
#ifdef REAL_SHM
LOCAL_VK u32 s_Ch[8][256];
LOCAL_VK u32 s_Cl[8][256];
LOCAL_VK u64 s_MT[8][256];
LOCAL_VK u64 s_RC[16];
for (u32 i = lid; i < 256; i += lsz)
{
s_Ch[0][i] = Ch[0][i];
s_Ch[1][i] = Ch[1][i];
s_Ch[2][i] = Ch[2][i];
s_Ch[3][i] = Ch[3][i];
s_Ch[4][i] = Ch[4][i];
s_Ch[5][i] = Ch[5][i];
s_Ch[6][i] = Ch[6][i];
s_Ch[7][i] = Ch[7][i];
s_Cl[0][i] = Cl[0][i];
s_Cl[1][i] = Cl[1][i];
s_Cl[2][i] = Cl[2][i];
s_Cl[3][i] = Cl[3][i];
s_Cl[4][i] = Cl[4][i];
s_Cl[5][i] = Cl[5][i];
s_Cl[6][i] = Cl[6][i];
s_Cl[7][i] = Cl[7][i];
s_MT[0][i] = MT[0][i];
s_MT[1][i] = MT[1][i];
s_MT[2][i] = MT[2][i];
s_MT[3][i] = MT[3][i];
s_MT[4][i] = MT[4][i];
s_MT[5][i] = MT[5][i];
s_MT[6][i] = MT[6][i];
s_MT[7][i] = MT[7][i];
}
for (u32 i = lid; i < 16; i += lsz)
{
s_RC[i] = RC[i];
}
SYNC_THREADS ();
#else
CONSTANT_AS u32a (*s_Ch)[256] = Ch;
CONSTANT_AS u32a (*s_Cl)[256] = Cl;
CONSTANT_AS u64a (*s_MT)[256] = MT;
CONSTANT_AS u64a *s_RC = RC;
#endif
@ -538,7 +522,7 @@ KERNEL_FQ void m06100_s08 (KERN_ATTR_BASIC ())
* main
*/
m06100s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max, s_Cl, s_Ch);
m06100s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max, s_MT, s_RC);
}
KERNEL_FQ void m06100_s16 (KERN_ATTR_BASIC ())

@ -30,36 +30,32 @@ KERNEL_FQ void m06100_mxx (KERN_ATTR_VECTOR ())
#ifdef REAL_SHM
LOCAL_VK u32 s_Ch[8][256];
LOCAL_VK u32 s_Cl[8][256];
LOCAL_VK u64 s_MT[8][256];
LOCAL_VK u64 s_RC[16];
for (u32 i = lid; i < 256; i += lsz)
{
s_Ch[0][i] = Ch[0][i];
s_Ch[1][i] = Ch[1][i];
s_Ch[2][i] = Ch[2][i];
s_Ch[3][i] = Ch[3][i];
s_Ch[4][i] = Ch[4][i];
s_Ch[5][i] = Ch[5][i];
s_Ch[6][i] = Ch[6][i];
s_Ch[7][i] = Ch[7][i];
s_Cl[0][i] = Cl[0][i];
s_Cl[1][i] = Cl[1][i];
s_Cl[2][i] = Cl[2][i];
s_Cl[3][i] = Cl[3][i];
s_Cl[4][i] = Cl[4][i];
s_Cl[5][i] = Cl[5][i];
s_Cl[6][i] = Cl[6][i];
s_Cl[7][i] = Cl[7][i];
s_MT[0][i] = MT[0][i];
s_MT[1][i] = MT[1][i];
s_MT[2][i] = MT[2][i];
s_MT[3][i] = MT[3][i];
s_MT[4][i] = MT[4][i];
s_MT[5][i] = MT[5][i];
s_MT[6][i] = MT[6][i];
s_MT[7][i] = MT[7][i];
}
for (u32 i = lid; i < 16; i += lsz)
{
s_RC[i] = RC[i];
}
SYNC_THREADS ();
#else
CONSTANT_AS u32a (*s_Ch)[256] = Ch;
CONSTANT_AS u32a (*s_Cl)[256] = Cl;
CONSTANT_AS u64a (*s_MT)[256] = MT;
CONSTANT_AS u64a *s_RC = RC;
#endif
@ -94,7 +90,7 @@ KERNEL_FQ void m06100_mxx (KERN_ATTR_VECTOR ())
whirlpool_ctx_vector_t ctx;
whirlpool_init_vector (&ctx, s_Ch, s_Cl);
whirlpool_init_vector (&ctx, s_MT, s_RC);
whirlpool_update_vector (&ctx, w, pw_len);
@ -125,36 +121,32 @@ KERNEL_FQ void m06100_sxx (KERN_ATTR_VECTOR ())
#ifdef REAL_SHM
LOCAL_VK u32 s_Ch[8][256];
LOCAL_VK u32 s_Cl[8][256];
LOCAL_VK u64 s_MT[8][256];
LOCAL_VK u64 s_RC[16];
for (u32 i = lid; i < 256; i += lsz)
{
s_Ch[0][i] = Ch[0][i];
s_Ch[1][i] = Ch[1][i];
s_Ch[2][i] = Ch[2][i];
s_Ch[3][i] = Ch[3][i];
s_Ch[4][i] = Ch[4][i];
s_Ch[5][i] = Ch[5][i];
s_Ch[6][i] = Ch[6][i];
s_Ch[7][i] = Ch[7][i];
s_Cl[0][i] = Cl[0][i];
s_Cl[1][i] = Cl[1][i];
s_Cl[2][i] = Cl[2][i];
s_Cl[3][i] = Cl[3][i];
s_Cl[4][i] = Cl[4][i];
s_Cl[5][i] = Cl[5][i];
s_Cl[6][i] = Cl[6][i];
s_Cl[7][i] = Cl[7][i];
s_MT[0][i] = MT[0][i];
s_MT[1][i] = MT[1][i];
s_MT[2][i] = MT[2][i];
s_MT[3][i] = MT[3][i];
s_MT[4][i] = MT[4][i];
s_MT[5][i] = MT[5][i];
s_MT[6][i] = MT[6][i];
s_MT[7][i] = MT[7][i];
}
for (u32 i = lid; i < 16; i += lsz)
{
s_RC[i] = RC[i];
}
SYNC_THREADS ();
#else
CONSTANT_AS u32a (*s_Ch)[256] = Ch;
CONSTANT_AS u32a (*s_Cl)[256] = Cl;
CONSTANT_AS u64a (*s_MT)[256] = MT;
CONSTANT_AS u64a *s_RC = RC;
#endif
@ -201,7 +193,7 @@ KERNEL_FQ void m06100_sxx (KERN_ATTR_VECTOR ())
whirlpool_ctx_vector_t ctx;
whirlpool_init_vector (&ctx, s_Ch, s_Cl);
whirlpool_init_vector (&ctx, s_MT, s_RC);
whirlpool_update_vector (&ctx, w, pw_len);

@ -45,7 +45,7 @@ typedef struct tc_tmp
} tc_tmp_t;
DECLSPEC void hmac_whirlpool_run_V (u32x *w0, u32x *w1, u32x *w2, u32x *w3, u32x *ipad, u32x *opad, u32x *digest, SHM_TYPE u32 (*s_Ch)[256], SHM_TYPE u32 (*s_Cl)[256])
DECLSPEC void hmac_whirlpool_run_V (u32x *w0, u32x *w1, u32x *w2, u32x *w3, u32x *ipad, u32x *opad, u32x *digest, SHM_TYPE u64 (*s_MT)[256], SHM_TYPE u64 *s_RC)
{
digest[ 0] = ipad[ 0];
digest[ 1] = ipad[ 1];
@ -64,7 +64,7 @@ DECLSPEC void hmac_whirlpool_run_V (u32x *w0, u32x *w1, u32x *w2, u32x *w3, u32x
digest[14] = ipad[14];
digest[15] = ipad[15];
whirlpool_transform_vector (w0, w1, w2, w3, digest, s_Ch, s_Cl);
whirlpool_transform_vector (w0, w1, w2, w3, digest, s_MT, s_RC);
w0[0] = 0x80000000;
w0[1] = 0;
@ -83,7 +83,7 @@ DECLSPEC void hmac_whirlpool_run_V (u32x *w0, u32x *w1, u32x *w2, u32x *w3, u32x
w3[2] = 0;
w3[3] = (64 + 64) * 8;
whirlpool_transform_vector (w0, w1, w2, w3, digest, s_Ch, s_Cl);
whirlpool_transform_vector (w0, w1, w2, w3, digest, s_MT, s_RC);
w0[0] = digest[ 0];
w0[1] = digest[ 1];
@ -119,7 +119,7 @@ DECLSPEC void hmac_whirlpool_run_V (u32x *w0, u32x *w1, u32x *w2, u32x *w3, u32x
digest[14] = opad[14];
digest[15] = opad[15];
whirlpool_transform_vector (w0, w1, w2, w3, digest, s_Ch, s_Cl);
whirlpool_transform_vector (w0, w1, w2, w3, digest, s_MT, s_RC);
w0[0] = 0x80000000;
w0[1] = 0;
@ -138,7 +138,7 @@ DECLSPEC void hmac_whirlpool_run_V (u32x *w0, u32x *w1, u32x *w2, u32x *w3, u32x
w3[2] = 0;
w3[3] = (64 + 64) * 8;
whirlpool_transform_vector (w0, w1, w2, w3, digest, s_Ch, s_Cl);
whirlpool_transform_vector (w0, w1, w2, w3, digest, s_MT, s_RC);
}
KERNEL_FQ void m06231_init (KERN_ATTR_TMPS_ESALT (tc_tmp_t, tc_t))
@ -168,36 +168,32 @@ KERNEL_FQ void m06231_init (KERN_ATTR_TMPS_ESALT (tc_tmp_t, tc_t))
#ifdef REAL_SHM
LOCAL_VK u32 s_Ch[8][256];
LOCAL_VK u32 s_Cl[8][256];
LOCAL_VK u64 s_MT[8][256];
LOCAL_VK u64 s_RC[16];
for (u32 i = lid; i < 256; i += lsz)
{
s_Ch[0][i] = Ch[0][i];
s_Ch[1][i] = Ch[1][i];
s_Ch[2][i] = Ch[2][i];
s_Ch[3][i] = Ch[3][i];
s_Ch[4][i] = Ch[4][i];
s_Ch[5][i] = Ch[5][i];
s_Ch[6][i] = Ch[6][i];
s_Ch[7][i] = Ch[7][i];
s_Cl[0][i] = Cl[0][i];
s_Cl[1][i] = Cl[1][i];
s_Cl[2][i] = Cl[2][i];
s_Cl[3][i] = Cl[3][i];
s_Cl[4][i] = Cl[4][i];
s_Cl[5][i] = Cl[5][i];
s_Cl[6][i] = Cl[6][i];
s_Cl[7][i] = Cl[7][i];
s_MT[0][i] = MT[0][i];
s_MT[1][i] = MT[1][i];
s_MT[2][i] = MT[2][i];
s_MT[3][i] = MT[3][i];
s_MT[4][i] = MT[4][i];
s_MT[5][i] = MT[5][i];
s_MT[6][i] = MT[6][i];
s_MT[7][i] = MT[7][i];
}
for (u32 i = lid; i < 16; i += lsz)
{
s_RC[i] = RC[i];
}
SYNC_THREADS ();
#else
CONSTANT_AS u32a (*s_Ch)[256] = Ch;
CONSTANT_AS u32a (*s_Cl)[256] = Cl;
CONSTANT_AS u64a (*s_MT)[256] = MT;
CONSTANT_AS u64a *s_RC = RC;
#endif
@ -269,7 +265,7 @@ KERNEL_FQ void m06231_init (KERN_ATTR_TMPS_ESALT (tc_tmp_t, tc_t))
whirlpool_hmac_ctx_t whirlpool_hmac_ctx;
whirlpool_hmac_init_64 (&whirlpool_hmac_ctx, w0, w1, w2, w3, s_Ch, s_Cl);
whirlpool_hmac_init_64 (&whirlpool_hmac_ctx, w0, w1, w2, w3, s_MT, s_RC);
tmps[gid].ipad[ 0] = whirlpool_hmac_ctx.ipad.h[ 0];
tmps[gid].ipad[ 1] = whirlpool_hmac_ctx.ipad.h[ 1];
@ -380,36 +376,32 @@ KERNEL_FQ void m06231_loop (KERN_ATTR_TMPS_ESALT (tc_tmp_t, tc_t))
#ifdef REAL_SHM
LOCAL_VK u32 s_Ch[8][256];
LOCAL_VK u32 s_Cl[8][256];
LOCAL_VK u64 s_MT[8][256];
LOCAL_VK u64 s_RC[16];
for (u32 i = lid; i < 256; i += lsz)
{
s_Ch[0][i] = Ch[0][i];
s_Ch[1][i] = Ch[1][i];
s_Ch[2][i] = Ch[2][i];
s_Ch[3][i] = Ch[3][i];
s_Ch[4][i] = Ch[4][i];
s_Ch[5][i] = Ch[5][i];
s_Ch[6][i] = Ch[6][i];
s_Ch[7][i] = Ch[7][i];
s_Cl[0][i] = Cl[0][i];
s_Cl[1][i] = Cl[1][i];
s_Cl[2][i] = Cl[2][i];
s_Cl[3][i] = Cl[3][i];
s_Cl[4][i] = Cl[4][i];
s_Cl[5][i] = Cl[5][i];
s_Cl[6][i] = Cl[6][i];
s_Cl[7][i] = Cl[7][i];
s_MT[0][i] = MT[0][i];
s_MT[1][i] = MT[1][i];
s_MT[2][i] = MT[2][i];
s_MT[3][i] = MT[3][i];
s_MT[4][i] = MT[4][i];
s_MT[5][i] = MT[5][i];
s_MT[6][i] = MT[6][i];
s_MT[7][i] = MT[7][i];
}
for (u32 i = lid; i < 16; i += lsz)
{
s_RC[i] = RC[i];
}
SYNC_THREADS ();
#else
CONSTANT_AS u32a (*s_Ch)[256] = Ch;
CONSTANT_AS u32a (*s_Cl)[256] = Cl;
CONSTANT_AS u64a (*s_MT)[256] = MT;
CONSTANT_AS u64a *s_RC = RC;
#endif
@ -515,7 +507,7 @@ KERNEL_FQ void m06231_loop (KERN_ATTR_TMPS_ESALT (tc_tmp_t, tc_t))
w3[2] = dgst[14];
w3[3] = dgst[15];
hmac_whirlpool_run_V (w0, w1, w2, w3, ipad, opad, dgst, s_Ch, s_Cl);
hmac_whirlpool_run_V (w0, w1, w2, w3, ipad, opad, dgst, s_MT, s_RC);
out[ 0] ^= dgst[ 0];
out[ 1] ^= dgst[ 1];
@ -634,36 +626,32 @@ KERNEL_FQ void m06231_comp (KERN_ATTR_TMPS_ESALT (tc_tmp_t, tc_t))
#ifdef REAL_SHM
LOCAL_VK u32 s_Ch[8][256];
LOCAL_VK u32 s_Cl[8][256];
LOCAL_VK u64 s_MT[8][256];
LOCAL_VK u64 s_RC[16];
for (u32 i = lid; i < 256; i += lsz)
{
s_Ch[0][i] = Ch[0][i];
s_Ch[1][i] = Ch[1][i];
s_Ch[2][i] = Ch[2][i];
s_Ch[3][i] = Ch[3][i];
s_Ch[4][i] = Ch[4][i];
s_Ch[5][i] = Ch[5][i];
s_Ch[6][i] = Ch[6][i];
s_Ch[7][i] = Ch[7][i];
s_Cl[0][i] = Cl[0][i];
s_Cl[1][i] = Cl[1][i];
s_Cl[2][i] = Cl[2][i];
s_Cl[3][i] = Cl[3][i];
s_Cl[4][i] = Cl[4][i];
s_Cl[5][i] = Cl[5][i];
s_Cl[6][i] = Cl[6][i];
s_Cl[7][i] = Cl[7][i];
s_MT[0][i] = MT[0][i];
s_MT[1][i] = MT[1][i];
s_MT[2][i] = MT[2][i];
s_MT[3][i] = MT[3][i];
s_MT[4][i] = MT[4][i];
s_MT[5][i] = MT[5][i];
s_MT[6][i] = MT[6][i];
s_MT[7][i] = MT[7][i];
}
for (u32 i = lid; i < 16; i += lsz)
{
s_RC[i] = RC[i];
}
SYNC_THREADS ();
#else
CONSTANT_AS u32a (*s_Ch)[256] = Ch;
CONSTANT_AS u32a (*s_Cl)[256] = Cl;
CONSTANT_AS u64a (*s_MT)[256] = MT;
CONSTANT_AS u64a *s_RC = RC;
#endif

@ -45,7 +45,7 @@ typedef struct tc_tmp
} tc_tmp_t;
DECLSPEC void hmac_whirlpool_run_V (u32x *w0, u32x *w1, u32x *w2, u32x *w3, u32x *ipad, u32x *opad, u32x *digest, SHM_TYPE u32 (*s_Ch)[256], SHM_TYPE u32 (*s_Cl)[256])
DECLSPEC void hmac_whirlpool_run_V (u32x *w0, u32x *w1, u32x *w2, u32x *w3, u32x *ipad, u32x *opad, u32x *digest, SHM_TYPE u64 (*s_MT)[256], SHM_TYPE u64 *s_RC)
{
digest[ 0] = ipad[ 0];
digest[ 1] = ipad[ 1];
@ -64,7 +64,7 @@ DECLSPEC void hmac_whirlpool_run_V (u32x *w0, u32x *w1, u32x *w2, u32x *w3, u32x
digest[14] = ipad[14];
digest[15] = ipad[15];
whirlpool_transform_vector (w0, w1, w2, w3, digest, s_Ch, s_Cl);
whirlpool_transform_vector (w0, w1, w2, w3, digest, s_MT, s_RC);
w0[0] = 0x80000000;
w0[1] = 0;
@ -83,7 +83,7 @@ DECLSPEC void hmac_whirlpool_run_V (u32x *w0, u32x *w1, u32x *w2, u32x *w3, u32x
w3[2] = 0;
w3[3] = (64 + 64) * 8;
whirlpool_transform_vector (w0, w1, w2, w3, digest, s_Ch, s_Cl);
whirlpool_transform_vector (w0, w1, w2, w3, digest, s_MT, s_RC);
w0[0] = digest[ 0];
w0[1] = digest[ 1];
@ -119,7 +119,7 @@ DECLSPEC void hmac_whirlpool_run_V (u32x *w0, u32x *w1, u32x *w2, u32x *w3, u32x
digest[14] = opad[14];
digest[15] = opad[15];
whirlpool_transform_vector (w0, w1, w2, w3, digest, s_Ch, s_Cl);
whirlpool_transform_vector (w0, w1, w2, w3, digest, s_MT, s_RC);
w0[0] = 0x80000000;
w0[1] = 0;
@ -138,7 +138,7 @@ DECLSPEC void hmac_whirlpool_run_V (u32x *w0, u32x *w1, u32x *w2, u32x *w3, u32x
w3[2] = 0;
w3[3] = (64 + 64) * 8;
whirlpool_transform_vector (w0, w1, w2, w3, digest, s_Ch, s_Cl);
whirlpool_transform_vector (w0, w1, w2, w3, digest, s_MT, s_RC);
}
KERNEL_FQ void m06232_init (KERN_ATTR_TMPS_ESALT (tc_tmp_t, tc_t))
@ -168,36 +168,32 @@ KERNEL_FQ void m06232_init (KERN_ATTR_TMPS_ESALT (tc_tmp_t, tc_t))
#ifdef REAL_SHM
LOCAL_VK u32 s_Ch[8][256];
LOCAL_VK u32 s_Cl[8][256];
LOCAL_VK u64 s_MT[8][256];
LOCAL_VK u64 s_RC[16];
for (u32 i = lid; i < 256; i += lsz)
{
s_Ch[0][i] = Ch[0][i];
s_Ch[1][i] = Ch[1][i];
s_Ch[2][i] = Ch[2][i];
s_Ch[3][i] = Ch[3][i];
s_Ch[4][i] = Ch[4][i];
s_Ch[5][i] = Ch[5][i];
s_Ch[6][i] = Ch[6][i];
s_Ch[7][i] = Ch[7][i];
s_Cl[0][i] = Cl[0][i];
s_Cl[1][i] = Cl[1][i];
s_Cl[2][i] = Cl[2][i];
s_Cl[3][i] = Cl[3][i];
s_Cl[4][i] = Cl[4][i];
s_Cl[5][i] = Cl[5][i];
s_Cl[6][i] = Cl[6][i];
s_Cl[7][i] = Cl[7][i];
s_MT[0][i] = MT[0][i];
s_MT[1][i] = MT[1][i];
s_MT[2][i] = MT[2][i];
s_MT[3][i] = MT[3][i];
s_MT[4][i] = MT[4][i];
s_MT[5][i] = MT[5][i];
s_MT[6][i] = MT[6][i];
s_MT[7][i] = MT[7][i];
}
for (u32 i = lid; i < 16; i += lsz)
{
s_RC[i] = RC[i];
}
SYNC_THREADS ();
#else
CONSTANT_AS u32a (*s_Ch)[256] = Ch;
CONSTANT_AS u32a (*s_Cl)[256] = Cl;
CONSTANT_AS u64a (*s_MT)[256] = MT;
CONSTANT_AS u64a *s_RC = RC;
#endif
@ -269,7 +265,7 @@ KERNEL_FQ void m06232_init (KERN_ATTR_TMPS_ESALT (tc_tmp_t, tc_t))
whirlpool_hmac_ctx_t whirlpool_hmac_ctx;
whirlpool_hmac_init_64 (&whirlpool_hmac_ctx, w0, w1, w2, w3, s_Ch, s_Cl);
whirlpool_hmac_init_64 (&whirlpool_hmac_ctx, w0, w1, w2, w3, s_MT, s_RC);
tmps[gid].ipad[ 0] = whirlpool_hmac_ctx.ipad.h[ 0];
tmps[gid].ipad[ 1] = whirlpool_hmac_ctx.ipad.h[ 1];
@ -380,36 +376,32 @@ KERNEL_FQ void m06232_loop (KERN_ATTR_TMPS_ESALT (tc_tmp_t, tc_t))
#ifdef REAL_SHM
LOCAL_VK u32 s_Ch[8][256];
LOCAL_VK u32 s_Cl[8][256];
LOCAL_VK u64 s_MT[8][256];
LOCAL_VK u64 s_RC[16];
for (u32 i = lid; i < 256; i += lsz)
{
s_Ch[0][i] = Ch[0][i];
s_Ch[1][i] = Ch[1][i];
s_Ch[2][i] = Ch[2][i];
s_Ch[3][i] = Ch[3][i];
s_Ch[4][i] = Ch[4][i];
s_Ch[5][i] = Ch[5][i];
s_Ch[6][i] = Ch[6][i];
s_Ch[7][i] = Ch[7][i];
s_Cl[0][i] = Cl[0][i];
s_Cl[1][i] = Cl[1][i];
s_Cl[2][i] = Cl[2][i];
s_Cl[3][i] = Cl[3][i];
s_Cl[4][i] = Cl[4][i];
s_Cl[5][i] = Cl[5][i];
s_Cl[6][i] = Cl[6][i];
s_Cl[7][i] = Cl[7][i];
s_MT[0][i] = MT[0][i];
s_MT[1][i] = MT[1][i];
s_MT[2][i] = MT[2][i];
s_MT[3][i] = MT[3][i];
s_MT[4][i] = MT[4][i];
s_MT[5][i] = MT[5][i];
s_MT[6][i] = MT[6][i];
s_MT[7][i] = MT[7][i];
}
for (u32 i = lid; i < 16; i += lsz)
{
s_RC[i] = RC[i];
}
SYNC_THREADS ();
#else
CONSTANT_AS u32a (*s_Ch)[256] = Ch;
CONSTANT_AS u32a (*s_Cl)[256] = Cl;
CONSTANT_AS u64a (*s_MT)[256] = MT;
CONSTANT_AS u64a *s_RC = RC;
#endif
@ -515,7 +507,7 @@ KERNEL_FQ void m06232_loop (KERN_ATTR_TMPS_ESALT (tc_tmp_t, tc_t))
w3[2] = dgst[14];
w3[3] = dgst[15];
hmac_whirlpool_run_V (w0, w1, w2, w3, ipad, opad, dgst, s_Ch, s_Cl);
hmac_whirlpool_run_V (w0, w1, w2, w3, ipad, opad, dgst, s_MT, s_RC);
out[ 0] ^= dgst[ 0];
out[ 1] ^= dgst[ 1];
@ -634,36 +626,32 @@ KERNEL_FQ void m06232_comp (KERN_ATTR_TMPS_ESALT (tc_tmp_t, tc_t))
#ifdef REAL_SHM
LOCAL_VK u32 s_Ch[8][256];
LOCAL_VK u32 s_Cl[8][256];
LOCAL_VK u64 s_MT[8][256];
LOCAL_VK u64 s_RC[16];
for (u32 i = lid; i < 256; i += lsz)
{
s_Ch[0][i] = Ch[0][i];
s_Ch[1][i] = Ch[1][i];
s_Ch[2][i] = Ch[2][i];
s_Ch[3][i] = Ch[3][i];
s_Ch[4][i] = Ch[4][i];
s_Ch[5][i] = Ch[5][i];
s_Ch[6][i] = Ch[6][i];
s_Ch[7][i] = Ch[7][i];
s_Cl[0][i] = Cl[0][i];
s_Cl[1][i] = Cl[1][i];
s_Cl[2][i] = Cl[2][i];
s_Cl[3][i] = Cl[3][i];
s_Cl[4][i] = Cl[4][i];
s_Cl[5][i] = Cl[5][i];
s_Cl[6][i] = Cl[6][i];
s_Cl[7][i] = Cl[7][i];
s_MT[0][i] = MT[0][i];
s_MT[1][i] = MT[1][i];
s_MT[2][i] = MT[2][i];
s_MT[3][i] = MT[3][i];
s_MT[4][i] = MT[4][i];
s_MT[5][i] = MT[5][i];
s_MT[6][i] = MT[6][i];
s_MT[7][i] = MT[7][i];
}
for (u32 i = lid; i < 16; i += lsz)
{
s_RC[i] = RC[i];
}
SYNC_THREADS ();
#else
CONSTANT_AS u32a (*s_Ch)[256] = Ch;
CONSTANT_AS u32a (*s_Cl)[256] = Cl;
CONSTANT_AS u64a (*s_MT)[256] = MT;
CONSTANT_AS u64a *s_RC = RC;
#endif

@ -45,7 +45,7 @@ typedef struct tc_tmp
} tc_tmp_t;
DECLSPEC void hmac_whirlpool_run_V (u32x *w0, u32x *w1, u32x *w2, u32x *w3, u32x *ipad, u32x *opad, u32x *digest, SHM_TYPE u32 (*s_Ch)[256], SHM_TYPE u32 (*s_Cl)[256])
DECLSPEC void hmac_whirlpool_run_V (u32x *w0, u32x *w1, u32x *w2, u32x *w3, u32x *ipad, u32x *opad, u32x *digest, SHM_TYPE u64 (*s_MT)[256], SHM_TYPE u64 *s_RC)
{
digest[ 0] = ipad[ 0];
digest[ 1] = ipad[ 1];
@ -64,7 +64,7 @@ DECLSPEC void hmac_whirlpool_run_V (u32x *w0, u32x *w1, u32x *w2, u32x *w3, u32x
digest[14] = ipad[14];
digest[15] = ipad[15];
whirlpool_transform_vector (w0, w1, w2, w3, digest, s_Ch, s_Cl);
whirlpool_transform_vector (w0, w1, w2, w3, digest, s_MT, s_RC);
w0[0] = 0x80000000;
w0[1] = 0;
@ -83,7 +83,7 @@ DECLSPEC void hmac_whirlpool_run_V (u32x *w0, u32x *w1, u32x *w2, u32x *w3, u32x
w3[2] = 0;
w3[3] = (64 + 64) * 8;
whirlpool_transform_vector (w0, w1, w2, w3, digest, s_Ch, s_Cl);
whirlpool_transform_vector (w0, w1, w2, w3, digest, s_MT, s_RC);
w0[0] = digest[ 0];
w0[1] = digest[ 1];
@ -119,7 +119,7 @@ DECLSPEC void hmac_whirlpool_run_V (u32x *w0, u32x *w1, u32x *w2, u32x *w3, u32x
digest[14] = opad[14];
digest[15] = opad[15];
whirlpool_transform_vector (w0, w1, w2, w3, digest, s_Ch, s_Cl);
whirlpool_transform_vector (w0, w1, w2, w3, digest, s_MT, s_RC);
w0[0] = 0x80000000;
w0[1] = 0;
@ -138,7 +138,7 @@ DECLSPEC void hmac_whirlpool_run_V (u32x *w0, u32x *w1, u32x *w2, u32x *w3, u32x
w3[2] = 0;
w3[3] = (64 + 64) * 8;
whirlpool_transform_vector (w0, w1, w2, w3, digest, s_Ch, s_Cl);
whirlpool_transform_vector (w0, w1, w2, w3, digest, s_MT, s_RC);
}
KERNEL_FQ void m06233_init (KERN_ATTR_TMPS_ESALT (tc_tmp_t, tc_t))
@ -168,36 +168,32 @@ KERNEL_FQ void m06233_init (KERN_ATTR_TMPS_ESALT (tc_tmp_t, tc_t))
#ifdef REAL_SHM
LOCAL_VK u32 s_Ch[8][256];
LOCAL_VK u32 s_Cl[8][256];
LOCAL_VK u64 s_MT[8][256];
LOCAL_VK u64 s_RC[16];
for (u32 i = lid; i < 256; i += lsz)
{
s_Ch[0][i] = Ch[0][i];
s_Ch[1][i] = Ch[1][i];
s_Ch[2][i] = Ch[2][i];
s_Ch[3][i] = Ch[3][i];
s_Ch[4][i] = Ch[4][i];
s_Ch[5][i] = Ch[5][i];
s_Ch[6][i] = Ch[6][i];
s_Ch[7][i] = Ch[7][i];
s_Cl[0][i] = Cl[0][i];
s_Cl[1][i] = Cl[1][i];
s_Cl[2][i] = Cl[2][i];
s_Cl[3][i] = Cl[3][i];
s_Cl[4][i] = Cl[4][i];
s_Cl[5][i] = Cl[5][i];
s_Cl[6][i] = Cl[6][i];
s_Cl[7][i] = Cl[7][i];
s_MT[0][i] = MT[0][i];
s_MT[1][i] = MT[1][i];
s_MT[2][i] = MT[2][i];
s_MT[3][i] = MT[3][i];
s_MT[4][i] = MT[4][i];
s_MT[5][i] = MT[5][i];
s_MT[6][i] = MT[6][i];
s_MT[7][i] = MT[7][i];
}
for (u32 i = lid; i < 16; i += lsz)
{
s_RC[i] = RC[i];
}
SYNC_THREADS ();
#else
CONSTANT_AS u32a (*s_Ch)[256] = Ch;
CONSTANT_AS u32a (*s_Cl)[256] = Cl;
CONSTANT_AS u64a (*s_MT)[256] = MT;
CONSTANT_AS u64a *s_RC = RC;
#endif
@ -269,7 +265,7 @@ KERNEL_FQ void m06233_init (KERN_ATTR_TMPS_ESALT (tc_tmp_t, tc_t))
whirlpool_hmac_ctx_t whirlpool_hmac_ctx;
whirlpool_hmac_init_64 (&whirlpool_hmac_ctx, w0, w1, w2, w3, s_Ch, s_Cl);
whirlpool_hmac_init_64 (&whirlpool_hmac_ctx, w0, w1, w2, w3, s_MT, s_RC);
tmps[gid].ipad[ 0] = whirlpool_hmac_ctx.ipad.h[ 0];
tmps[gid].ipad[ 1] = whirlpool_hmac_ctx.ipad.h[ 1];
@ -380,36 +376,32 @@ KERNEL_FQ void m06233_loop (KERN_ATTR_TMPS_ESALT (tc_tmp_t, tc_t))
#ifdef REAL_SHM
LOCAL_VK u32 s_Ch[8][256];
LOCAL_VK u32 s_Cl[8][256];
LOCAL_VK u64 s_MT[8][256];
LOCAL_VK u64 s_RC[16];
for (u32 i = lid; i < 256; i += lsz)
{
s_Ch[0][i] = Ch[0][i];
s_Ch[1][i] = Ch[1][i];
s_Ch[2][i] = Ch[2][i];
s_Ch[3][i] = Ch[3][i];
s_Ch[4][i] = Ch[4][i];
s_Ch[5][i] = Ch[5][i];
s_Ch[6][i] = Ch[6][i];
s_Ch[7][i] = Ch[7][i];
s_Cl[0][i] = Cl[0][i];
s_Cl[1][i] = Cl[1][i];
s_Cl[2][i] = Cl[2][i];
s_Cl[3][i] = Cl[3][i];
s_Cl[4][i] = Cl[4][i];
s_Cl[5][i] = Cl[5][i];
s_Cl[6][i] = Cl[6][i];
s_Cl[7][i] = Cl[7][i];
s_MT[0][i] = MT[0][i];
s_MT[1][i] = MT[1][i];
s_MT[2][i] = MT[2][i];
s_MT[3][i] = MT[3][i];
s_MT[4][i] = MT[4][i];
s_MT[5][i] = MT[5][i];
s_MT[6][i] = MT[6][i];
s_MT[7][i] = MT[7][i];
}
for (u32 i = lid; i < 16; i += lsz)
{
s_RC[i] = RC[i];
}
SYNC_THREADS ();
#else
CONSTANT_AS u32a (*s_Ch)[256] = Ch;
CONSTANT_AS u32a (*s_Cl)[256] = Cl;
CONSTANT_AS u64a (*s_MT)[256] = MT;
CONSTANT_AS u64a *s_RC = RC;
#endif
@ -515,7 +507,7 @@ KERNEL_FQ void m06233_loop (KERN_ATTR_TMPS_ESALT (tc_tmp_t, tc_t))
w3[2] = dgst[14];
w3[3] = dgst[15];
hmac_whirlpool_run_V (w0, w1, w2, w3, ipad, opad, dgst, s_Ch, s_Cl);
hmac_whirlpool_run_V (w0, w1, w2, w3, ipad, opad, dgst, s_MT, s_RC);
out[ 0] ^= dgst[ 0];
out[ 1] ^= dgst[ 1];
@ -634,36 +626,32 @@ KERNEL_FQ void m06233_comp (KERN_ATTR_TMPS_ESALT (tc_tmp_t, tc_t))
#ifdef REAL_SHM
LOCAL_VK u32 s_Ch[8][256];
LOCAL_VK u32 s_Cl[8][256];
LOCAL_VK u64 s_MT[8][256];
LOCAL_VK u64 s_RC[16];
for (u32 i = lid; i < 256; i += lsz)
{
s_Ch[0][i] = Ch[0][i];
s_Ch[1][i] = Ch[1][i];
s_Ch[2][i] = Ch[2][i];
s_Ch[3][i] = Ch[3][i];
s_Ch[4][i] = Ch[4][i];
s_Ch[5][i] = Ch[5][i];
s_Ch[6][i] = Ch[6][i];
s_Ch[7][i] = Ch[7][i];
s_Cl[0][i] = Cl[0][i];
s_Cl[1][i] = Cl[1][i];
s_Cl[2][i] = Cl[2][i];
s_Cl[3][i] = Cl[3][i];
s_Cl[4][i] = Cl[4][i];
s_Cl[5][i] = Cl[5][i];
s_Cl[6][i] = Cl[6][i];
s_Cl[7][i] = Cl[7][i];
s_MT[0][i] = MT[0][i];
s_MT[1][i] = MT[1][i];
s_MT[2][i] = MT[2][i];
s_MT[3][i] = MT[3][i];
s_MT[4][i] = MT[4][i];
s_MT[5][i] = MT[5][i];
s_MT[6][i] = MT[6][i];
s_MT[7][i] = MT[7][i];
}
for (u32 i = lid; i < 16; i += lsz)
{
s_RC[i] = RC[i];
}
SYNC_THREADS ();
#else
CONSTANT_AS u32a (*s_Ch)[256] = Ch;
CONSTANT_AS u32a (*s_Cl)[256] = Cl;
CONSTANT_AS u64a (*s_MT)[256] = MT;
CONSTANT_AS u64a *s_RC = RC;
#endif

@ -86,7 +86,7 @@ DECLSPEC int check_header_0512 (GLOBAL_AS const vc_t *esalt_bufs, GLOBAL_AS u32
return -1;
}
DECLSPEC void hmac_whirlpool_run_V (u32x *w0, u32x *w1, u32x *w2, u32x *w3, u32x *ipad, u32x *opad, u32x *digest, SHM_TYPE u32 (*s_Ch)[256], SHM_TYPE u32 (*s_Cl)[256])
DECLSPEC void hmac_whirlpool_run_V (u32x *w0, u32x *w1, u32x *w2, u32x *w3, u32x *ipad, u32x *opad, u32x *digest, SHM_TYPE u64 (*s_MT)[256], SHM_TYPE u64 *s_RC)
{
digest[ 0] = ipad[ 0];
digest[ 1] = ipad[ 1];
@ -105,7 +105,7 @@ DECLSPEC void hmac_whirlpool_run_V (u32x *w0, u32x *w1, u32x *w2, u32x *w3, u32x
digest[14] = ipad[14];
digest[15] = ipad[15];
whirlpool_transform_vector (w0, w1, w2, w3, digest, s_Ch, s_Cl);
whirlpool_transform_vector (w0, w1, w2, w3, digest, s_MT, s_RC);
w0[0] = 0x80000000;
w0[1] = 0;
@ -124,7 +124,7 @@ DECLSPEC void hmac_whirlpool_run_V (u32x *w0, u32x *w1, u32x *w2, u32x *w3, u32x
w3[2] = 0;
w3[3] = (64 + 64) * 8;
whirlpool_transform_vector (w0, w1, w2, w3, digest, s_Ch, s_Cl);
whirlpool_transform_vector (w0, w1, w2, w3, digest, s_MT, s_RC);
w0[0] = digest[ 0];
w0[1] = digest[ 1];
@ -160,7 +160,7 @@ DECLSPEC void hmac_whirlpool_run_V (u32x *w0, u32x *w1, u32x *w2, u32x *w3, u32x
digest[14] = opad[14];
digest[15] = opad[15];
whirlpool_transform_vector (w0, w1, w2, w3, digest, s_Ch, s_Cl);
whirlpool_transform_vector (w0, w1, w2, w3, digest, s_MT, s_RC);
w0[0] = 0x80000000;
w0[1] = 0;
@ -179,7 +179,7 @@ DECLSPEC void hmac_whirlpool_run_V (u32x *w0, u32x *w1, u32x *w2, u32x *w3, u32x
w3[2] = 0;
w3[3] = (64 + 64) * 8;
whirlpool_transform_vector (w0, w1, w2, w3, digest, s_Ch, s_Cl);
whirlpool_transform_vector (w0, w1, w2, w3, digest, s_MT, s_RC);
}
KERNEL_FQ void m13731_init (KERN_ATTR_TMPS_ESALT (vc_tmp_t, vc_t))
@ -209,36 +209,32 @@ KERNEL_FQ void m13731_init (KERN_ATTR_TMPS_ESALT (vc_tmp_t, vc_t))
#ifdef REAL_SHM
LOCAL_VK u32 s_Ch[8][256];
LOCAL_VK u32 s_Cl[8][256];
LOCAL_VK u64 s_MT[8][256];
LOCAL_VK u64 s_RC[16];
for (u32 i = lid; i < 256; i += lsz)
{
s_Ch[0][i] = Ch[0][i];
s_Ch[1][i] = Ch[1][i];
s_Ch[2][i] = Ch[2][i];
s_Ch[3][i] = Ch[3][i];
s_Ch[4][i] = Ch[4][i];
s_Ch[5][i] = Ch[5][i];
s_Ch[6][i] = Ch[6][i];
s_Ch[7][i] = Ch[7][i];
s_Cl[0][i] = Cl[0][i];
s_Cl[1][i] = Cl[1][i];
s_Cl[2][i] = Cl[2][i];
s_Cl[3][i] = Cl[3][i];
s_Cl[4][i] = Cl[4][i];
s_Cl[5][i] = Cl[5][i];
s_Cl[6][i] = Cl[6][i];
s_Cl[7][i] = Cl[7][i];
s_MT[0][i] = MT[0][i];
s_MT[1][i] = MT[1][i];
s_MT[2][i] = MT[2][i];
s_MT[3][i] = MT[3][i];
s_MT[4][i] = MT[4][i];
s_MT[5][i] = MT[5][i];
s_MT[6][i] = MT[6][i];
s_MT[7][i] = MT[7][i];
}
for (u32 i = lid; i < 16; i += lsz)
{
s_RC[i] = RC[i];
}
SYNC_THREADS ();
#else
CONSTANT_AS u32a (*s_Ch)[256] = Ch;
CONSTANT_AS u32a (*s_Cl)[256] = Cl;
CONSTANT_AS u64a (*s_MT)[256] = MT;
CONSTANT_AS u64a *s_RC = RC;
#endif
@ -310,7 +306,7 @@ KERNEL_FQ void m13731_init (KERN_ATTR_TMPS_ESALT (vc_tmp_t, vc_t))
whirlpool_hmac_ctx_t whirlpool_hmac_ctx;
whirlpool_hmac_init_64 (&whirlpool_hmac_ctx, w0, w1, w2, w3, s_Ch, s_Cl);
whirlpool_hmac_init_64 (&whirlpool_hmac_ctx, w0, w1, w2, w3, s_MT, s_RC);
tmps[gid].ipad[ 0] = whirlpool_hmac_ctx.ipad.h[ 0];
tmps[gid].ipad[ 1] = whirlpool_hmac_ctx.ipad.h[ 1];
@ -472,36 +468,32 @@ KERNEL_FQ void m13731_loop (KERN_ATTR_TMPS_ESALT (vc_tmp_t, vc_t))
#ifdef REAL_SHM
LOCAL_VK u32 s_Ch[8][256];
LOCAL_VK u32 s_Cl[8][256];
LOCAL_VK u64 s_MT[8][256];
LOCAL_VK u64 s_RC[16];
for (u32 i = lid; i < 256; i += lsz)
{
s_Ch[0][i] = Ch[0][i];
s_Ch[1][i] = Ch[1][i];
s_Ch[2][i] = Ch[2][i];
s_Ch[3][i] = Ch[3][i];
s_Ch[4][i] = Ch[4][i];
s_Ch[5][i] = Ch[5][i];
s_Ch[6][i] = Ch[6][i];
s_Ch[7][i] = Ch[7][i];
s_Cl[0][i] = Cl[0][i];
s_Cl[1][i] = Cl[1][i];
s_Cl[2][i] = Cl[2][i];
s_Cl[3][i] = Cl[3][i];
s_Cl[4][i] = Cl[4][i];
s_Cl[5][i] = Cl[5][i];
s_Cl[6][i] = Cl[6][i];
s_Cl[7][i] = Cl[7][i];
s_MT[0][i] = MT[0][i];
s_MT[1][i] = MT[1][i];
s_MT[2][i] = MT[2][i];
s_MT[3][i] = MT[3][i];
s_MT[4][i] = MT[4][i];
s_MT[5][i] = MT[5][i];
s_MT[6][i] = MT[6][i];
s_MT[7][i] = MT[7][i];
}
for (u32 i = lid; i < 16; i += lsz)
{
s_RC[i] = RC[i];
}
SYNC_THREADS ();
#else
CONSTANT_AS u32a (*s_Ch)[256] = Ch;
CONSTANT_AS u32a (*s_Cl)[256] = Cl;
CONSTANT_AS u64a (*s_MT)[256] = MT;
CONSTANT_AS u64a *s_RC = RC;
#endif
@ -638,7 +630,7 @@ KERNEL_FQ void m13731_loop (KERN_ATTR_TMPS_ESALT (vc_tmp_t, vc_t))
w3[2] = dgst[14];
w3[3] = dgst[15];
hmac_whirlpool_run_V (w0, w1, w2, w3, ipad, opad, dgst, s_Ch, s_Cl);
hmac_whirlpool_run_V (w0, w1, w2, w3, ipad, opad, dgst, s_MT, s_RC);
out[ 0] ^= dgst[ 0];
out[ 1] ^= dgst[ 1];
@ -783,36 +775,32 @@ KERNEL_FQ void m13731_comp (KERN_ATTR_TMPS_ESALT (vc_tmp_t, vc_t))
#ifdef REAL_SHM
LOCAL_VK u32 s_Ch[8][256];
LOCAL_VK u32 s_Cl[8][256];
LOCAL_VK u64 s_MT[8][256];
LOCAL_VK u64 s_RC[16];
for (u32 i = lid; i < 256; i += lsz)
{
s_Ch[0][i] = Ch[0][i];
s_Ch[1][i] = Ch[1][i];
s_Ch[2][i] = Ch[2][i];
s_Ch[3][i] = Ch[3][i];
s_Ch[4][i] = Ch[4][i];
s_Ch[5][i] = Ch[5][i];
s_Ch[6][i] = Ch[6][i];
s_Ch[7][i] = Ch[7][i];
s_Cl[0][i] = Cl[0][i];
s_Cl[1][i] = Cl[1][i];
s_Cl[2][i] = Cl[2][i];
s_Cl[3][i] = Cl[3][i];
s_Cl[4][i] = Cl[4][i];
s_Cl[5][i] = Cl[5][i];
s_Cl[6][i] = Cl[6][i];
s_Cl[7][i] = Cl[7][i];
s_MT[0][i] = MT[0][i];
s_MT[1][i] = MT[1][i];
s_MT[2][i] = MT[2][i];
s_MT[3][i] = MT[3][i];
s_MT[4][i] = MT[4][i];
s_MT[5][i] = MT[5][i];
s_MT[6][i] = MT[6][i];
s_MT[7][i] = MT[7][i];
}
for (u32 i = lid; i < 16; i += lsz)
{
s_RC[i] = RC[i];
}
SYNC_THREADS ();
#else
CONSTANT_AS u32a (*s_Ch)[256] = Ch;
CONSTANT_AS u32a (*s_Cl)[256] = Cl;
CONSTANT_AS u64a (*s_MT)[256] = MT;
CONSTANT_AS u64a *s_RC = RC;
#endif

@ -137,7 +137,7 @@ DECLSPEC int check_header_1024 (GLOBAL_AS const vc_t *esalt_bufs, GLOBAL_AS u32
return -1;
}
DECLSPEC void hmac_whirlpool_run_V (u32x *w0, u32x *w1, u32x *w2, u32x *w3, u32x *ipad, u32x *opad, u32x *digest, SHM_TYPE u32 (*s_Ch)[256], SHM_TYPE u32 (*s_Cl)[256])
DECLSPEC void hmac_whirlpool_run_V (u32x *w0, u32x *w1, u32x *w2, u32x *w3, u32x *ipad, u32x *opad, u32x *digest, SHM_TYPE u64 (*s_MT)[256], SHM_TYPE u64 *s_RC)
{
digest[ 0] = ipad[ 0];
digest[ 1] = ipad[ 1];
@ -156,7 +156,7 @@ DECLSPEC void hmac_whirlpool_run_V (u32x *w0, u32x *w1, u32x *w2, u32x *w3, u32x
digest[14] = ipad[14];
digest[15] = ipad[15];
whirlpool_transform_vector (w0, w1, w2, w3, digest, s_Ch, s_Cl);
whirlpool_transform_vector (w0, w1, w2, w3, digest, s_MT, s_RC);
w0[0] = 0x80000000;
w0[1] = 0;
@ -175,7 +175,7 @@ DECLSPEC void hmac_whirlpool_run_V (u32x *w0, u32x *w1, u32x *w2, u32x *w3, u32x
w3[2] = 0;
w3[3] = (64 + 64) * 8;
whirlpool_transform_vector (w0, w1, w2, w3, digest, s_Ch, s_Cl);
whirlpool_transform_vector (w0, w1, w2, w3, digest, s_MT, s_RC);
w0[0] = digest[ 0];
w0[1] = digest[ 1];
@ -211,7 +211,7 @@ DECLSPEC void hmac_whirlpool_run_V (u32x *w0, u32x *w1, u32x *w2, u32x *w3, u32x
digest[14] = opad[14];
digest[15] = opad[15];
whirlpool_transform_vector (w0, w1, w2, w3, digest, s_Ch, s_Cl);
whirlpool_transform_vector (w0, w1, w2, w3, digest, s_MT, s_RC);
w0[0] = 0x80000000;
w0[1] = 0;
@ -230,7 +230,7 @@ DECLSPEC void hmac_whirlpool_run_V (u32x *w0, u32x *w1, u32x *w2, u32x *w3, u32x
w3[2] = 0;
w3[3] = (64 + 64) * 8;
whirlpool_transform_vector (w0, w1, w2, w3, digest, s_Ch, s_Cl);
whirlpool_transform_vector (w0, w1, w2, w3, digest, s_MT, s_RC);
}
KERNEL_FQ void m13732_init (KERN_ATTR_TMPS_ESALT (vc_tmp_t, vc_t))
@ -260,36 +260,32 @@ KERNEL_FQ void m13732_init (KERN_ATTR_TMPS_ESALT (vc_tmp_t, vc_t))
#ifdef REAL_SHM
LOCAL_VK u32 s_Ch[8][256];
LOCAL_VK u32 s_Cl[8][256];
LOCAL_VK u64 s_MT[8][256];
LOCAL_VK u64 s_RC[16];
for (u32 i = lid; i < 256; i += lsz)
{
s_Ch[0][i] = Ch[0][i];
s_Ch[1][i] = Ch[1][i];
s_Ch[2][i] = Ch[2][i];
s_Ch[3][i] = Ch[3][i];
s_Ch[4][i] = Ch[4][i];
s_Ch[5][i] = Ch[5][i];
s_Ch[6][i] = Ch[6][i];
s_Ch[7][i] = Ch[7][i];
s_Cl[0][i] = Cl[0][i];
s_Cl[1][i] = Cl[1][i];
s_Cl[2][i] = Cl[2][i];
s_Cl[3][i] = Cl[3][i];
s_Cl[4][i] = Cl[4][i];
s_Cl[5][i] = Cl[5][i];
s_Cl[6][i] = Cl[6][i];
s_Cl[7][i] = Cl[7][i];
s_MT[0][i] = MT[0][i];
s_MT[1][i] = MT[1][i];
s_MT[2][i] = MT[2][i];
s_MT[3][i] = MT[3][i];
s_MT[4][i] = MT[4][i];
s_MT[5][i] = MT[5][i];
s_MT[6][i] = MT[6][i];
s_MT[7][i] = MT[7][i];
}
for (u32 i = lid; i < 16; i += lsz)
{
s_RC[i] = RC[i];
}
SYNC_THREADS ();
#else
CONSTANT_AS u32a (*s_Ch)[256] = Ch;
CONSTANT_AS u32a (*s_Cl)[256] = Cl;
CONSTANT_AS u64a (*s_MT)[256] = MT;
CONSTANT_AS u64a *s_RC = RC;
#endif
@ -361,7 +357,7 @@ KERNEL_FQ void m13732_init (KERN_ATTR_TMPS_ESALT (vc_tmp_t, vc_t))
whirlpool_hmac_ctx_t whirlpool_hmac_ctx;
whirlpool_hmac_init_64 (&whirlpool_hmac_ctx, w0, w1, w2, w3, s_Ch, s_Cl);
whirlpool_hmac_init_64 (&whirlpool_hmac_ctx, w0, w1, w2, w3, s_MT, s_RC);
tmps[gid].ipad[ 0] = whirlpool_hmac_ctx.ipad.h[ 0];
tmps[gid].ipad[ 1] = whirlpool_hmac_ctx.ipad.h[ 1];
@ -523,36 +519,32 @@ KERNEL_FQ void m13732_loop (KERN_ATTR_TMPS_ESALT (vc_tmp_t, vc_t))
#ifdef REAL_SHM
LOCAL_VK u32 s_Ch[8][256];
LOCAL_VK u32 s_Cl[8][256];
LOCAL_VK u64 s_MT[8][256];
LOCAL_VK u64 s_RC[16];
for (u32 i = lid; i < 256; i += lsz)
{
s_Ch[0][i] = Ch[0][i];
s_Ch[1][i] = Ch[1][i];
s_Ch[2][i] = Ch[2][i];
s_Ch[3][i] = Ch[3][i];
s_Ch[4][i] = Ch[4][i];
s_Ch[5][i] = Ch[5][i];
s_Ch[6][i] = Ch[6][i];
s_Ch[7][i] = Ch[7][i];
s_Cl[0][i] = Cl[0][i];
s_Cl[1][i] = Cl[1][i];
s_Cl[2][i] = Cl[2][i];
s_Cl[3][i] = Cl[3][i];
s_Cl[4][i] = Cl[4][i];
s_Cl[5][i] = Cl[5][i];
s_Cl[6][i] = Cl[6][i];
s_Cl[7][i] = Cl[7][i];
s_MT[0][i] = MT[0][i];
s_MT[1][i] = MT[1][i];
s_MT[2][i] = MT[2][i];
s_MT[3][i] = MT[3][i];
s_MT[4][i] = MT[4][i];
s_MT[5][i] = MT[5][i];
s_MT[6][i] = MT[6][i];
s_MT[7][i] = MT[7][i];
}
for (u32 i = lid; i < 16; i += lsz)
{
s_RC[i] = RC[i];
}
SYNC_THREADS ();
#else
CONSTANT_AS u32a (*s_Ch)[256] = Ch;
CONSTANT_AS u32a (*s_Cl)[256] = Cl;
CONSTANT_AS u64a (*s_MT)[256] = MT;
CONSTANT_AS u64a *s_RC = RC;
#endif
@ -689,7 +681,7 @@ KERNEL_FQ void m13732_loop (KERN_ATTR_TMPS_ESALT (vc_tmp_t, vc_t))
w3[2] = dgst[14];
w3[3] = dgst[15];
hmac_whirlpool_run_V (w0, w1, w2, w3, ipad, opad, dgst, s_Ch, s_Cl);
hmac_whirlpool_run_V (w0, w1, w2, w3, ipad, opad, dgst, s_MT, s_RC);
out[ 0] ^= dgst[ 0];
out[ 1] ^= dgst[ 1];
@ -835,36 +827,32 @@ KERNEL_FQ void m13732_comp (KERN_ATTR_TMPS_ESALT (vc_tmp_t, vc_t))
#ifdef REAL_SHM
LOCAL_VK u32 s_Ch[8][256];
LOCAL_VK u32 s_Cl[8][256];
LOCAL_VK u64 s_MT[8][256];
LOCAL_VK u64 s_RC[16];
for (u32 i = lid; i < 256; i += lsz)
{
s_Ch[0][i] = Ch[0][i];
s_Ch[1][i] = Ch[1][i];
s_Ch[2][i] = Ch[2][i];
s_Ch[3][i] = Ch[3][i];
s_Ch[4][i] = Ch[4][i];
s_Ch[5][i] = Ch[5][i];
s_Ch[6][i] = Ch[6][i];
s_Ch[7][i] = Ch[7][i];
s_Cl[0][i] = Cl[0][i];
s_Cl[1][i] = Cl[1][i];
s_Cl[2][i] = Cl[2][i];
s_Cl[3][i] = Cl[3][i];
s_Cl[4][i] = Cl[4][i];
s_Cl[5][i] = Cl[5][i];
s_Cl[6][i] = Cl[6][i];
s_Cl[7][i] = Cl[7][i];
s_MT[0][i] = MT[0][i];
s_MT[1][i] = MT[1][i];
s_MT[2][i] = MT[2][i];
s_MT[3][i] = MT[3][i];
s_MT[4][i] = MT[4][i];
s_MT[5][i] = MT[5][i];
s_MT[6][i] = MT[6][i];
s_MT[7][i] = MT[7][i];
}
for (u32 i = lid; i < 16; i += lsz)
{
s_RC[i] = RC[i];
}
SYNC_THREADS ();
#else
CONSTANT_AS u32a (*s_Ch)[256] = Ch;
CONSTANT_AS u32a (*s_Cl)[256] = Cl;
CONSTANT_AS u64a (*s_MT)[256] = MT;
CONSTANT_AS u64a *s_RC = RC;
#endif

@ -202,7 +202,7 @@ DECLSPEC int check_header_1536 (GLOBAL_AS const vc_t *esalt_bufs, GLOBAL_AS u32
return -1;
}
DECLSPEC void hmac_whirlpool_run_V (u32x *w0, u32x *w1, u32x *w2, u32x *w3, u32x *ipad, u32x *opad, u32x *digest, SHM_TYPE u32 (*s_Ch)[256], SHM_TYPE u32 (*s_Cl)[256])
DECLSPEC void hmac_whirlpool_run_V (u32x *w0, u32x *w1, u32x *w2, u32x *w3, u32x *ipad, u32x *opad, u32x *digest, SHM_TYPE u64 (*s_MT)[256], SHM_TYPE u64 *s_RC)
{
digest[ 0] = ipad[ 0];
digest[ 1] = ipad[ 1];
@ -221,7 +221,7 @@ DECLSPEC void hmac_whirlpool_run_V (u32x *w0, u32x *w1, u32x *w2, u32x *w3, u32x
digest[14] = ipad[14];
digest[15] = ipad[15];
whirlpool_transform_vector (w0, w1, w2, w3, digest, s_Ch, s_Cl);
whirlpool_transform_vector (w0, w1, w2, w3, digest, s_MT, s_RC);
w0[0] = 0x80000000;
w0[1] = 0;
@ -240,7 +240,7 @@ DECLSPEC void hmac_whirlpool_run_V (u32x *w0, u32x *w1, u32x *w2, u32x *w3, u32x
w3[2] = 0;
w3[3] = (64 + 64) * 8;
whirlpool_transform_vector (w0, w1, w2, w3, digest, s_Ch, s_Cl);
whirlpool_transform_vector (w0, w1, w2, w3, digest, s_MT, s_RC);
w0[0] = digest[ 0];
w0[1] = digest[ 1];
@ -276,7 +276,7 @@ DECLSPEC void hmac_whirlpool_run_V (u32x *w0, u32x *w1, u32x *w2, u32x *w3, u32x
digest[14] = opad[14];
digest[15] = opad[15];
whirlpool_transform_vector (w0, w1, w2, w3, digest, s_Ch, s_Cl);
whirlpool_transform_vector (w0, w1, w2, w3, digest, s_MT, s_RC);
w0[0] = 0x80000000;
w0[1] = 0;
@ -295,7 +295,7 @@ DECLSPEC void hmac_whirlpool_run_V (u32x *w0, u32x *w1, u32x *w2, u32x *w3, u32x
w3[2] = 0;
w3[3] = (64 + 64) * 8;
whirlpool_transform_vector (w0, w1, w2, w3, digest, s_Ch, s_Cl);
whirlpool_transform_vector (w0, w1, w2, w3, digest, s_MT, s_RC);
}
KERNEL_FQ void m13733_init (KERN_ATTR_TMPS_ESALT (vc_tmp_t, vc_t))
@ -325,36 +325,32 @@ KERNEL_FQ void m13733_init (KERN_ATTR_TMPS_ESALT (vc_tmp_t, vc_t))
#ifdef REAL_SHM
LOCAL_VK u32 s_Ch[8][256];
LOCAL_VK u32 s_Cl[8][256];
LOCAL_VK u64 s_MT[8][256];
LOCAL_VK u64 s_RC[16];
for (u32 i = lid; i < 256; i += lsz)
{
s_Ch[0][i] = Ch[0][i];
s_Ch[1][i] = Ch[1][i];
s_Ch[2][i] = Ch[2][i];
s_Ch[3][i] = Ch[3][i];
s_Ch[4][i] = Ch[4][i];
s_Ch[5][i] = Ch[5][i];
s_Ch[6][i] = Ch[6][i];
s_Ch[7][i] = Ch[7][i];
s_Cl[0][i] = Cl[0][i];
s_Cl[1][i] = Cl[1][i];
s_Cl[2][i] = Cl[2][i];
s_Cl[3][i] = Cl[3][i];
s_Cl[4][i] = Cl[4][i];
s_Cl[5][i] = Cl[5][i];
s_Cl[6][i] = Cl[6][i];
s_Cl[7][i] = Cl[7][i];
s_MT[0][i] = MT[0][i];
s_MT[1][i] = MT[1][i];
s_MT[2][i] = MT[2][i];
s_MT[3][i] = MT[3][i];
s_MT[4][i] = MT[4][i];
s_MT[5][i] = MT[5][i];
s_MT[6][i] = MT[6][i];
s_MT[7][i] = MT[7][i];
}
for (u32 i = lid; i < 16; i += lsz)
{
s_RC[i] = RC[i];
}
SYNC_THREADS ();
#else
CONSTANT_AS u32a (*s_Ch)[256] = Ch;
CONSTANT_AS u32a (*s_Cl)[256] = Cl;
CONSTANT_AS u64a (*s_MT)[256] = MT;
CONSTANT_AS u64a *s_RC = RC;
#endif
@ -426,7 +422,7 @@ KERNEL_FQ void m13733_init (KERN_ATTR_TMPS_ESALT (vc_tmp_t, vc_t))
whirlpool_hmac_ctx_t whirlpool_hmac_ctx;
whirlpool_hmac_init_64 (&whirlpool_hmac_ctx, w0, w1, w2, w3, s_Ch, s_Cl);
whirlpool_hmac_init_64 (&whirlpool_hmac_ctx, w0, w1, w2, w3, s_MT, s_RC);
tmps[gid].ipad[ 0] = whirlpool_hmac_ctx.ipad.h[ 0];
tmps[gid].ipad[ 1] = whirlpool_hmac_ctx.ipad.h[ 1];
@ -588,36 +584,32 @@ KERNEL_FQ void m13733_loop (KERN_ATTR_TMPS_ESALT (vc_tmp_t, vc_t))
#ifdef REAL_SHM
LOCAL_VK u32 s_Ch[8][256];
LOCAL_VK u32 s_Cl[8][256];
LOCAL_VK u64 s_MT[8][256];
LOCAL_VK u64 s_RC[16];
for (u32 i = lid; i < 256; i += lsz)
{
s_Ch[0][i] = Ch[0][i];
s_Ch[1][i] = Ch[1][i];
s_Ch[2][i] = Ch[2][i];
s_Ch[3][i] = Ch[3][i];
s_Ch[4][i] = Ch[4][i];
s_Ch[5][i] = Ch[5][i];
s_Ch[6][i] = Ch[6][i];
s_Ch[7][i] = Ch[7][i];
s_Cl[0][i] = Cl[0][i];
s_Cl[1][i] = Cl[1][i];
s_Cl[2][i] = Cl[2][i];
s_Cl[3][i] = Cl[3][i];
s_Cl[4][i] = Cl[4][i];
s_Cl[5][i] = Cl[5][i];
s_Cl[6][i] = Cl[6][i];
s_Cl[7][i] = Cl[7][i];
s_MT[0][i] = MT[0][i];
s_MT[1][i] = MT[1][i];
s_MT[2][i] = MT[2][i];
s_MT[3][i] = MT[3][i];
s_MT[4][i] = MT[4][i];
s_MT[5][i] = MT[5][i];
s_MT[6][i] = MT[6][i];
s_MT[7][i] = MT[7][i];
}
for (u32 i = lid; i < 16; i += lsz)
{
s_RC[i] = RC[i];
}
SYNC_THREADS ();
#else
CONSTANT_AS u32a (*s_Ch)[256] = Ch;
CONSTANT_AS u32a (*s_Cl)[256] = Cl;
CONSTANT_AS u64a (*s_MT)[256] = MT;
CONSTANT_AS u64a *s_RC = RC;
#endif
@ -754,7 +746,7 @@ KERNEL_FQ void m13733_loop (KERN_ATTR_TMPS_ESALT (vc_tmp_t, vc_t))
w3[2] = dgst[14];
w3[3] = dgst[15];
hmac_whirlpool_run_V (w0, w1, w2, w3, ipad, opad, dgst, s_Ch, s_Cl);
hmac_whirlpool_run_V (w0, w1, w2, w3, ipad, opad, dgst, s_MT, s_RC);
out[ 0] ^= dgst[ 0];
out[ 1] ^= dgst[ 1];
@ -901,36 +893,32 @@ KERNEL_FQ void m13733_comp (KERN_ATTR_TMPS_ESALT (vc_tmp_t, vc_t))
#ifdef REAL_SHM
LOCAL_VK u32 s_Ch[8][256];
LOCAL_VK u32 s_Cl[8][256];
LOCAL_VK u64 s_MT[8][256];
LOCAL_VK u64 s_RC[16];
for (u32 i = lid; i < 256; i += lsz)
{
s_Ch[0][i] = Ch[0][i];
s_Ch[1][i] = Ch[1][i];
s_Ch[2][i] = Ch[2][i];
s_Ch[3][i] = Ch[3][i];
s_Ch[4][i] = Ch[4][i];
s_Ch[5][i] = Ch[5][i];
s_Ch[6][i] = Ch[6][i];
s_Ch[7][i] = Ch[7][i];
s_Cl[0][i] = Cl[0][i];
s_Cl[1][i] = Cl[1][i];
s_Cl[2][i] = Cl[2][i];
s_Cl[3][i] = Cl[3][i];
s_Cl[4][i] = Cl[4][i];
s_Cl[5][i] = Cl[5][i];
s_Cl[6][i] = Cl[6][i];
s_Cl[7][i] = Cl[7][i];
s_MT[0][i] = MT[0][i];
s_MT[1][i] = MT[1][i];
s_MT[2][i] = MT[2][i];
s_MT[3][i] = MT[3][i];
s_MT[4][i] = MT[4][i];
s_MT[5][i] = MT[5][i];
s_MT[6][i] = MT[6][i];
s_MT[7][i] = MT[7][i];
}
for (u32 i = lid; i < 16; i += lsz)
{
s_RC[i] = RC[i];
}
SYNC_THREADS ();
#else
CONSTANT_AS u32a (*s_Ch)[256] = Ch;
CONSTANT_AS u32a (*s_Cl)[256] = Cl;
CONSTANT_AS u64a (*s_MT)[256] = MT;
CONSTANT_AS u64a *s_RC = RC;
#endif

@ -41,26 +41,6 @@ u32 module_salt_type (MAYBE_UNUSED const hashconfig_t *hashconfig,
const char *module_st_hash (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return ST_HASH; }
const char *module_st_pass (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return ST_PASS; }
bool module_unstable_warning (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra, MAYBE_UNUSED const hc_device_param_t *device_param)
{
// amdgpu-pro-19.30-934563-ubuntu-18.04: CL_OUT_OF_RESOURCES
if ((device_param->opencl_device_vendor_id == VENDOR_ID_AMD) && (device_param->has_vperm == false))
{
return true;
}
return false;
}
char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra, MAYBE_UNUSED const hashes_t *hashes, MAYBE_UNUSED const hc_device_param_t *device_param)
{
char *jit_build_options = NULL;
hc_asprintf (&jit_build_options, "-D NO_UNROLL");
return jit_build_options;
}
int module_hash_decode (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED void *digest_buf, MAYBE_UNUSED salt_t *salt, MAYBE_UNUSED void *esalt_buf, MAYBE_UNUSED void *hook_salt_buf, MAYBE_UNUSED hashinfo_t *hash_info, const char *line_buf, MAYBE_UNUSED const int line_len)
{
u32 *digest = (u32 *) digest_buf;
@ -226,7 +206,7 @@ void module_init (module_ctx_t *module_ctx)
module_ctx->module_hook23 = MODULE_DEFAULT;
module_ctx->module_hook_salt_size = MODULE_DEFAULT;
module_ctx->module_hook_size = MODULE_DEFAULT;
module_ctx->module_jit_build_options = module_jit_build_options;
module_ctx->module_jit_build_options = MODULE_DEFAULT;
module_ctx->module_jit_cache_disable = MODULE_DEFAULT;
module_ctx->module_kernel_accel_max = MODULE_DEFAULT;
module_ctx->module_kernel_accel_min = MODULE_DEFAULT;
@ -253,6 +233,6 @@ void module_init (module_ctx_t *module_ctx)
module_ctx->module_st_hash = module_st_hash;
module_ctx->module_st_pass = module_st_pass;
module_ctx->module_tmp_size = MODULE_DEFAULT;
module_ctx->module_unstable_warning = module_unstable_warning;
module_ctx->module_unstable_warning = MODULE_DEFAULT;
module_ctx->module_warmup_disable = MODULE_DEFAULT;
}

Loading…
Cancel
Save