mirror of
https://github.com/hashcat/hashcat.git
synced 2025-07-31 19:08:43 +00:00
Fix hc_bytealign_be() declaration for CUDA: use u32x instead of u32
Add hc_uint4_t to SCRYPT to work-around Intel OpenCL alignment bug. Align large buffers (V1-V4) manually to 1k-byte boundaries. Replace uint4 xor operator with xor_uint4() function.
This commit is contained in:
parent
dd02267bf2
commit
77b5c65e52
@ -1887,7 +1887,7 @@ DECLSPEC u32 hc_bytealign_S (const u32 a, const u32 b, const int c)
|
|||||||
return r;
|
return r;
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
DECLSPEC u32 hc_bytealign_be (const u32 a, const u32 b, const int c)
|
DECLSPEC u32 hc_bytealign_be (const u32x a, const u32x b, const int c)
|
||||||
{
|
{
|
||||||
const int c_mod_4 = c & 3;
|
const int c_mod_4 = c & 3;
|
||||||
|
|
||||||
|
@ -9,6 +9,18 @@
|
|||||||
#include "inc_common.h"
|
#include "inc_common.h"
|
||||||
#include "inc_hash_scrypt.h"
|
#include "inc_hash_scrypt.h"
|
||||||
|
|
||||||
|
DECLSPEC hc_uint4_t xor_uint4 (const hc_uint4_t a, const hc_uint4_t b)
|
||||||
|
{
|
||||||
|
hc_uint4_t r;
|
||||||
|
|
||||||
|
r.x = a.x ^ b.x;
|
||||||
|
r.y = a.y ^ b.y;
|
||||||
|
r.z = a.z ^ b.z;
|
||||||
|
r.w = a.w ^ b.w;
|
||||||
|
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
|
||||||
#if SCRYPT_R > 1
|
#if SCRYPT_R > 1
|
||||||
DECLSPEC void scrypt_shuffle (PRIVATE_AS u32 *TI)
|
DECLSPEC void scrypt_shuffle (PRIVATE_AS u32 *TI)
|
||||||
{
|
{
|
||||||
@ -144,27 +156,27 @@ DECLSPEC void scrypt_smix_init (GLOBAL_AS u32 *P, PRIVATE_AS u32 *X, GLOBAL_AS v
|
|||||||
const u32 xd4 = bid / 4;
|
const u32 xd4 = bid / 4;
|
||||||
const u32 xm4 = bid & 3;
|
const u32 xm4 = bid & 3;
|
||||||
|
|
||||||
PRIVATE_AS uint4 *X4 = (PRIVATE_AS uint4 *) X;
|
PRIVATE_AS hc_uint4_t *X4 = (PRIVATE_AS hc_uint4_t *) X;
|
||||||
|
|
||||||
GLOBAL_AS uint4 *V;
|
GLOBAL_AS hc_uint4_t *V;
|
||||||
|
|
||||||
switch (xm4)
|
switch (xm4)
|
||||||
{
|
{
|
||||||
case 0: V = (GLOBAL_AS uint4 *) V0; break;
|
case 0: V = (GLOBAL_AS hc_uint4_t *) ALIGN_PTR_1k (V0); break;
|
||||||
case 1: V = (GLOBAL_AS uint4 *) V1; break;
|
case 1: V = (GLOBAL_AS hc_uint4_t *) ALIGN_PTR_1k (V1); break;
|
||||||
case 2: V = (GLOBAL_AS uint4 *) V2; break;
|
case 2: V = (GLOBAL_AS hc_uint4_t *) ALIGN_PTR_1k (V2); break;
|
||||||
case 3: V = (GLOBAL_AS uint4 *) V3; break;
|
case 3: V = (GLOBAL_AS hc_uint4_t *) ALIGN_PTR_1k (V3); break;
|
||||||
}
|
}
|
||||||
|
|
||||||
GLOBAL_AS uint4 *Vx = V + (xd4 * lsz * ySIZE * zSIZE) + (lid * ySIZE * zSIZE);
|
GLOBAL_AS hc_uint4_t *Vx = V + (xd4 * lsz * ySIZE * zSIZE) + (lid * ySIZE * zSIZE);
|
||||||
|
|
||||||
for (u32 i = 0; i < STATE_CNT4; i++) X[i] = P[i];
|
for (u32 i = 0; i < STATE_CNT4; i++) X[i] = P[i];
|
||||||
|
|
||||||
for (u32 y = 0; y < ySIZE; y++)
|
for (u32 y = 0; y < ySIZE; y++)
|
||||||
{
|
{
|
||||||
GLOBAL_AS uint4 *Vxx = Vx + (y * zSIZE);
|
GLOBAL_AS hc_uint4_t *Vxx = Vx + (y * zSIZE);
|
||||||
|
|
||||||
for (u32 z = 0; z < zSIZE; z++) *Vxx++ = X4[z];
|
for (u32 z = 0; z < zSIZE; z++) Vxx[z] = X4[z];
|
||||||
|
|
||||||
for (u32 i = 0; i < (1 << SCRYPT_TMTO); i++)
|
for (u32 i = 0; i < (1 << SCRYPT_TMTO); i++)
|
||||||
{
|
{
|
||||||
@ -187,20 +199,20 @@ DECLSPEC void scrypt_smix_loop (GLOBAL_AS u32 *P, PRIVATE_AS u32 *X, PRIVATE_AS
|
|||||||
const u32 xd4 = bid / 4;
|
const u32 xd4 = bid / 4;
|
||||||
const u32 xm4 = bid & 3;
|
const u32 xm4 = bid & 3;
|
||||||
|
|
||||||
PRIVATE_AS uint4 *X4 = (PRIVATE_AS uint4 *) X;
|
PRIVATE_AS hc_uint4_t *X4 = (PRIVATE_AS hc_uint4_t *) X;
|
||||||
PRIVATE_AS uint4 *T4 = (PRIVATE_AS uint4 *) T;
|
PRIVATE_AS hc_uint4_t *T4 = (PRIVATE_AS hc_uint4_t *) T;
|
||||||
|
|
||||||
GLOBAL_AS uint4 *V;
|
GLOBAL_AS hc_uint4_t *V;
|
||||||
|
|
||||||
switch (xm4)
|
switch (xm4)
|
||||||
{
|
{
|
||||||
case 0: V = (GLOBAL_AS uint4 *) V0; break;
|
case 0: V = (GLOBAL_AS hc_uint4_t *) ALIGN_PTR_1k (V0); break;
|
||||||
case 1: V = (GLOBAL_AS uint4 *) V1; break;
|
case 1: V = (GLOBAL_AS hc_uint4_t *) ALIGN_PTR_1k (V1); break;
|
||||||
case 2: V = (GLOBAL_AS uint4 *) V2; break;
|
case 2: V = (GLOBAL_AS hc_uint4_t *) ALIGN_PTR_1k (V2); break;
|
||||||
case 3: V = (GLOBAL_AS uint4 *) V3; break;
|
case 3: V = (GLOBAL_AS hc_uint4_t *) ALIGN_PTR_1k (V3); break;
|
||||||
}
|
}
|
||||||
|
|
||||||
GLOBAL_AS uint4 *Vx = V + (xd4 * lsz * ySIZE * zSIZE) + (lid * ySIZE * zSIZE);
|
GLOBAL_AS hc_uint4_t *Vx = V + (xd4 * lsz * ySIZE * zSIZE) + (lid * ySIZE * zSIZE);
|
||||||
|
|
||||||
for (u32 i = 0; i < STATE_CNT4; i++) X[i] = P[i];
|
for (u32 i = 0; i < STATE_CNT4; i++) X[i] = P[i];
|
||||||
|
|
||||||
@ -216,7 +228,7 @@ DECLSPEC void scrypt_smix_loop (GLOBAL_AS u32 *P, PRIVATE_AS u32 *X, PRIVATE_AS
|
|||||||
|
|
||||||
const u32 km = k - (y << SCRYPT_TMTO);
|
const u32 km = k - (y << SCRYPT_TMTO);
|
||||||
|
|
||||||
GLOBAL_AS uint4 *Vxx = Vx + (y * zSIZE);
|
GLOBAL_AS hc_uint4_t *Vxx = Vx + (y * zSIZE);
|
||||||
|
|
||||||
for (u32 z = 0; z < zSIZE; z++) T4[z] = *Vxx++;
|
for (u32 z = 0; z < zSIZE; z++) T4[z] = *Vxx++;
|
||||||
|
|
||||||
@ -229,7 +241,7 @@ DECLSPEC void scrypt_smix_loop (GLOBAL_AS u32 *P, PRIVATE_AS u32 *X, PRIVATE_AS
|
|||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
for (u32 z = 0; z < zSIZE; z++) X4[z] = X4[z] ^ T4[z];
|
for (u32 z = 0; z < zSIZE; z++) X4[z] = xor_uint4 (X4[z], T4[z]);
|
||||||
|
|
||||||
salsa_r (X);
|
salsa_r (X);
|
||||||
|
|
||||||
|
@ -25,24 +25,27 @@
|
|||||||
#define SALSA_CNT4 (SALSA_SZ / 4)
|
#define SALSA_CNT4 (SALSA_SZ / 4)
|
||||||
#define SALSA_CNT44 ((SALSA_SZ / 4) / 4)
|
#define SALSA_CNT44 ((SALSA_SZ / 4) / 4)
|
||||||
|
|
||||||
//#define VIDX(bid4,lsz,lid,ySIZE,zSIZE,y,z) (((bid4) * (lsz) * (ySIZE) * (zSIZE)) + ((lid) * (ySIZE) * (zSIZE)) + ((y) * (zSIZE)) + (z))
|
// should be safe, because in backend.c we use:
|
||||||
|
// u64 size_extra_buffer1 = 4096;
|
||||||
|
// size_extra_buffer1 += base_chunk_size;
|
||||||
|
#define ALIGN_PTR_1k(p) ((GLOBAL_AS hc_uint4_t *) (((u64) (p) + 1023) & ~1023UL))
|
||||||
|
|
||||||
#if defined IS_CUDA
|
#if defined IS_INTEL_SDK
|
||||||
|
|
||||||
DECLSPEC uint4 operator ^ (const uint4 a, const uint4 b)
|
typedef struct
|
||||||
{
|
{
|
||||||
uint4 r;
|
u32 x, y, z, w;
|
||||||
|
|
||||||
r.x = a.x ^ b.x;
|
} hc_uint4_t;
|
||||||
r.y = a.y ^ b.y;
|
|
||||||
r.z = a.z ^ b.z;
|
|
||||||
r.w = a.w ^ b.w;
|
|
||||||
|
|
||||||
return r;
|
#else
|
||||||
}
|
|
||||||
|
typedef uint4 hc_uint4_t;
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
DECLSPEC hc_uint4_t xor_uint4 (const hc_uint4_t a, const hc_uint4_t b);
|
||||||
|
|
||||||
typedef struct
|
typedef struct
|
||||||
{
|
{
|
||||||
#ifndef SCRYPT_TMP_ELEM
|
#ifndef SCRYPT_TMP_ELEM
|
||||||
|
Loading…
Reference in New Issue
Block a user