mirror of
https://github.com/hashcat/hashcat.git
synced 2025-07-06 23:02:35 +00:00

Refactored inc_hash_scrypt.cl almost completely and improved macro names in inc_hash_scrypt.h. Adapted all existing SCRYPT-based plugins to the new standard. If you have custom SCRYPT based plugins use hash-mode 8900 as reference. Fixed some compiler warnings in inc_platform.cl. Cleaned up code paths in inc_vendor.h for finding values for HC_ATTR_SEQ and DECLSPEC. Removed option --device-as-default-execution-space from nvrtc for hiprtc compatibility. As a result, added __device__ back to DECLSPEC. Removed option --restrict from nvrtc compile options since we actually alias some buffers. Added --gpu-max-threads-per-block to hiprtc options. Added -D MAX_THREADS_PER_BLOCK to OpenCL options (currently unused). Removed all OPTS_TYPE_MP_MULTI_DISABLE entries for SNMPv3-based plugins. These plugins consume large amounts of memory and for this reason,limited kernel_accel max to 256. This may still be high, but hashcat will automatically tune down kernel_accel if insufficient memory is detected. Removed command `rocm-smi --resetprofile --resetclocks --resetfans` from benchmark_deep.pl, since some AMD GPUs become artificially slow for a while after running these commands. Replaced load_source() with file_to_buffer() from shared.c, which does the exact same operations. Moved suppress_stderr() and restore_stderr() to shared.c and reused them in both Python bridges and opencl_test_instruction(), where the same type of code existed.
383 lines
11 KiB
Common Lisp
383 lines
11 KiB
Common Lisp
/**
|
|
* Author......: See docs/credits.txt
|
|
* License.....: MIT
|
|
*/
|
|
|
|
#include "inc_vendor.h"
|
|
#include "inc_types.h"
|
|
#include "inc_platform.h"
|
|
#include "inc_common.h"
|
|
#include "inc_hash_scrypt.h"
|
|
|
|
DECLSPEC void salsa_r (PRIVATE_AS u32 *TI)
|
|
{
|
|
u32 TT[STATE_CNT4/2]; // we actually nned 16 here for SALSA, but we reuse that buffer for blockmix
|
|
|
|
for (int j = 0; j < SALSA_CNT4; j++) TT[j] = TI[STATE_CNT4 - 16 + j];
|
|
|
|
for (int i = 0; i < STATE_CNT4; i += SALSA_CNT4)
|
|
{
|
|
for (int j = 0; j < SALSA_CNT4; j++) TT[j] ^= TI[i + j];
|
|
|
|
for (int j = 0; j < SALSA_CNT4; j++) TI[i + j] = TT[j];
|
|
|
|
for (int r = 0; r < 4; r++)
|
|
{
|
|
u32 t0, t1, t2, t3;
|
|
|
|
t0 = TT[ 0] + TT[12];
|
|
t1 = TT[ 1] + TT[13];
|
|
t2 = TT[ 2] + TT[14];
|
|
t3 = TT[ 3] + TT[15];
|
|
TT[ 4] ^= hc_rotl32_S (t0, 7);
|
|
TT[ 5] ^= hc_rotl32_S (t1, 7);
|
|
TT[ 6] ^= hc_rotl32_S (t2, 7);
|
|
TT[ 7] ^= hc_rotl32_S (t3, 7);
|
|
|
|
t0 = TT[ 4] + TT[ 0];
|
|
t1 = TT[ 5] + TT[ 1];
|
|
t2 = TT[ 6] + TT[ 2];
|
|
t3 = TT[ 7] + TT[ 3];
|
|
TT[ 8] ^= hc_rotl32_S (t0, 9);
|
|
TT[ 9] ^= hc_rotl32_S (t1, 9);
|
|
TT[10] ^= hc_rotl32_S (t2, 9);
|
|
TT[11] ^= hc_rotl32_S (t3, 9);
|
|
|
|
t0 = TT[ 8] + TT[ 4];
|
|
t1 = TT[ 9] + TT[ 5];
|
|
t2 = TT[10] + TT[ 6];
|
|
t3 = TT[11] + TT[ 7];
|
|
TT[12] ^= hc_rotl32_S (t0, 13);
|
|
TT[13] ^= hc_rotl32_S (t1, 13);
|
|
TT[14] ^= hc_rotl32_S (t2, 13);
|
|
TT[15] ^= hc_rotl32_S (t3, 13);
|
|
|
|
t0 = TT[12] + TT[ 8];
|
|
t1 = TT[13] + TT[ 9];
|
|
t2 = TT[14] + TT[10];
|
|
t3 = TT[15] + TT[11];
|
|
TT[ 0] ^= hc_rotl32_S (t0, 18);
|
|
TT[ 1] ^= hc_rotl32_S (t1, 18);
|
|
TT[ 2] ^= hc_rotl32_S (t2, 18);
|
|
TT[ 3] ^= hc_rotl32_S (t3, 18);
|
|
|
|
t0 = TT[ 4]; TT[ 4] = TT[ 7]; TT[ 7] = TT[ 6]; TT[ 6] = TT[ 5]; TT[ 5] = t0;
|
|
t0 = TT[ 8]; TT[ 8] = TT[10]; TT[10] = t0;
|
|
t0 = TT[ 9]; TT[ 9] = TT[11]; TT[11] = t0;
|
|
t0 = TT[12]; TT[12] = TT[13]; TT[13] = TT[14]; TT[14] = TT[15]; TT[15] = t0;
|
|
|
|
t0 = TT[ 0] + TT[ 4];
|
|
t1 = TT[ 1] + TT[ 5];
|
|
t2 = TT[ 2] + TT[ 6];
|
|
t3 = TT[ 3] + TT[ 7];
|
|
TT[12] ^= hc_rotl32_S (t0, 7);
|
|
TT[13] ^= hc_rotl32_S (t1, 7);
|
|
TT[14] ^= hc_rotl32_S (t2, 7);
|
|
TT[15] ^= hc_rotl32_S (t3, 7);
|
|
|
|
t0 = TT[12] + TT[ 0];
|
|
t1 = TT[13] + TT[ 1];
|
|
t2 = TT[14] + TT[ 2];
|
|
t3 = TT[15] + TT[ 3];
|
|
TT[ 8] ^= hc_rotl32_S (t0, 9);
|
|
TT[ 9] ^= hc_rotl32_S (t1, 9);
|
|
TT[10] ^= hc_rotl32_S (t2, 9);
|
|
TT[11] ^= hc_rotl32_S (t3, 9);
|
|
|
|
t0 = TT[ 8] + TT[12];
|
|
t1 = TT[ 9] + TT[13];
|
|
t2 = TT[10] + TT[14];
|
|
t3 = TT[11] + TT[15];
|
|
TT[ 4] ^= hc_rotl32_S (t0, 13);
|
|
TT[ 5] ^= hc_rotl32_S (t1, 13);
|
|
TT[ 6] ^= hc_rotl32_S (t2, 13);
|
|
TT[ 7] ^= hc_rotl32_S (t3, 13);
|
|
|
|
t0 = TT[ 4] + TT[ 8];
|
|
t1 = TT[ 5] + TT[ 9];
|
|
t2 = TT[ 6] + TT[10];
|
|
t3 = TT[ 7] + TT[11];
|
|
TT[ 0] ^= hc_rotl32_S (t0, 18);
|
|
TT[ 1] ^= hc_rotl32_S (t1, 18);
|
|
TT[ 2] ^= hc_rotl32_S (t2, 18);
|
|
TT[ 3] ^= hc_rotl32_S (t3, 18);
|
|
|
|
t0 = TT[ 4]; TT[ 4] = TT[ 5]; TT[ 5] = TT[ 6]; TT[ 6] = TT[ 7]; TT[ 7] = t0;
|
|
t0 = TT[ 8]; TT[ 8] = TT[10]; TT[10] = t0;
|
|
t0 = TT[ 9]; TT[ 9] = TT[11]; TT[11] = t0;
|
|
t0 = TT[15]; TT[15] = TT[14]; TT[14] = TT[13]; TT[13] = TT[12]; TT[12] = t0;
|
|
}
|
|
|
|
for (int j = 0; j < SALSA_CNT4; j++) TT[j] += TI[i + j];
|
|
|
|
for (int j = 0; j < SALSA_CNT4; j++) TI[i + j] = TT[j];
|
|
}
|
|
|
|
#if SCRYPT_R > 1
|
|
|
|
for (int dst_off = 0, src_off = SALSA_CNT4; src_off < STATE_CNT4; dst_off += SALSA_CNT4, src_off += SALSA_CNT4 * 2)
|
|
{
|
|
for (int j = 0; j < SALSA_CNT4; j++) TT[dst_off + j] = TI[src_off + j];
|
|
}
|
|
|
|
for (int dst_off = SALSA_CNT4, src_off = SALSA_CNT4 * 2; src_off < STATE_CNT4; dst_off += SALSA_CNT4, src_off += SALSA_CNT4 * 2)
|
|
{
|
|
for (int j = 0; j < SALSA_CNT4; j++) TI[dst_off + j] = TI[src_off + j];
|
|
}
|
|
|
|
for (int dst_off = STATE_CNT4 / 2, src_off = 0; dst_off < STATE_CNT4; dst_off += SALSA_CNT4, src_off += SALSA_CNT4)
|
|
{
|
|
for (int j = 0; j < SALSA_CNT4; j++) TI[dst_off + j] = TT[src_off + j];
|
|
}
|
|
|
|
#endif
|
|
}
|
|
|
|
DECLSPEC void scrypt_smix_init (PRIVATE_AS u32 *X, GLOBAL_AS void *V0, GLOBAL_AS void *V1, GLOBAL_AS void *V2, GLOBAL_AS void *V3, const u32 gid, const u32 lid, const u32 lsz, const u32 bid)
|
|
{
|
|
const u32 ySIZE = SCRYPT_N >> SCRYPT_TMTO;
|
|
const u32 zSIZE = STATE_CNT44;
|
|
|
|
const u32 xd4 = bid / 4;
|
|
const u32 xm4 = bid & 3;
|
|
|
|
PRIVATE_AS uint4 *X4 = (PRIVATE_AS uint4 *) X;
|
|
|
|
GLOBAL_AS uint4 *V;
|
|
|
|
switch (xm4)
|
|
{
|
|
case 0: V = (GLOBAL_AS uint4 *) V0; break;
|
|
case 1: V = (GLOBAL_AS uint4 *) V1; break;
|
|
case 2: V = (GLOBAL_AS uint4 *) V2; break;
|
|
case 3: V = (GLOBAL_AS uint4 *) V3; break;
|
|
}
|
|
|
|
for (u32 y = 0; y < ySIZE; y++)
|
|
{
|
|
for (u32 z = 0; z < zSIZE; z++) V[VIDX(xd4, lsz, lid, ySIZE, zSIZE, y, z)] = X4[z];
|
|
|
|
for (u32 i = 0; i < (1 << SCRYPT_TMTO); i++) salsa_r (X);
|
|
}
|
|
}
|
|
|
|
DECLSPEC void scrypt_smix_loop (PRIVATE_AS u32 *X, PRIVATE_AS u32 *T, GLOBAL_AS void *V0, GLOBAL_AS void *V1, GLOBAL_AS void *V2, GLOBAL_AS void *V3, const u32 gid, const u32 lid, const u32 lsz, const u32 bid)
|
|
{
|
|
const u32 ySIZE = SCRYPT_N >> SCRYPT_TMTO;
|
|
const u32 zSIZE = STATE_CNT44;
|
|
|
|
const u32 xd4 = bid / 4;
|
|
const u32 xm4 = bid & 3;
|
|
|
|
PRIVATE_AS uint4 *X4 = (PRIVATE_AS uint4 *) X;
|
|
PRIVATE_AS uint4 *T4 = (PRIVATE_AS uint4 *) T;
|
|
|
|
GLOBAL_AS uint4 *V;
|
|
|
|
switch (xm4)
|
|
{
|
|
case 0: V = (GLOBAL_AS uint4 *) V0; break;
|
|
case 1: V = (GLOBAL_AS uint4 *) V1; break;
|
|
case 2: V = (GLOBAL_AS uint4 *) V2; break;
|
|
case 3: V = (GLOBAL_AS uint4 *) V3; break;
|
|
}
|
|
|
|
// note: max 1024 iterations = forced -u 2048
|
|
|
|
const u32 N_max = (SCRYPT_N < 2048) ? SCRYPT_N : 2048;
|
|
|
|
for (u32 N_pos = 0; N_pos < N_max; N_pos++)
|
|
{
|
|
const u32 k = X4[zSIZE - 4].x & (SCRYPT_N - 1);
|
|
|
|
const u32 y = k >> SCRYPT_TMTO;
|
|
|
|
const u32 km = k - (y << SCRYPT_TMTO);
|
|
|
|
for (u32 z = 0; z < zSIZE; z++) T4[z] = V[VIDX(xd4, lsz, lid, ySIZE, zSIZE, y, z)];
|
|
|
|
for (u32 i = 0; i < km; i++) salsa_r (T);
|
|
|
|
for (u32 z = 0; z < zSIZE; z++) X4[z] = X4[z] ^ T4[z];
|
|
|
|
salsa_r (X);
|
|
}
|
|
}
|
|
|
|
DECLSPEC void scrypt_blockmix_in (PRIVATE_AS u32 *out_buf, const int out_len)
|
|
{
|
|
for (int i = 0, j = 0; i < out_len; i += SALSA_SZ, j += SALSA_CNT4)
|
|
{
|
|
u32 X[SALSA_CNT4];
|
|
|
|
X[ 0] = out_buf[j + 0];
|
|
X[ 1] = out_buf[j + 5];
|
|
X[ 2] = out_buf[j + 10];
|
|
X[ 3] = out_buf[j + 15];
|
|
X[ 4] = out_buf[j + 4];
|
|
X[ 5] = out_buf[j + 9];
|
|
X[ 6] = out_buf[j + 14];
|
|
X[ 7] = out_buf[j + 3];
|
|
X[ 8] = out_buf[j + 8];
|
|
X[ 9] = out_buf[j + 13];
|
|
X[10] = out_buf[j + 2];
|
|
X[11] = out_buf[j + 7];
|
|
X[12] = out_buf[j + 12];
|
|
X[13] = out_buf[j + 1];
|
|
X[14] = out_buf[j + 6];
|
|
X[15] = out_buf[j + 11];
|
|
|
|
out_buf[j + 0] = X[ 0];
|
|
out_buf[j + 1] = X[ 1];
|
|
out_buf[j + 2] = X[ 2];
|
|
out_buf[j + 3] = X[ 3];
|
|
out_buf[j + 4] = X[ 4];
|
|
out_buf[j + 5] = X[ 5];
|
|
out_buf[j + 6] = X[ 6];
|
|
out_buf[j + 7] = X[ 7];
|
|
out_buf[j + 8] = X[ 8];
|
|
out_buf[j + 9] = X[ 9];
|
|
out_buf[j + 10] = X[10];
|
|
out_buf[j + 11] = X[11];
|
|
out_buf[j + 12] = X[12];
|
|
out_buf[j + 13] = X[13];
|
|
out_buf[j + 14] = X[14];
|
|
out_buf[j + 15] = X[15];
|
|
}
|
|
}
|
|
|
|
DECLSPEC void scrypt_blockmix_out (PRIVATE_AS u32 *out_buf, const int out_len)
|
|
{
|
|
for (int i = 0, j = 0; i < out_len; i += SALSA_SZ, j += SALSA_CNT4)
|
|
{
|
|
u32 T[SALSA_CNT4];
|
|
|
|
T[ 0] = out_buf[j + 0];
|
|
T[ 1] = out_buf[j + 13];
|
|
T[ 2] = out_buf[j + 10];
|
|
T[ 3] = out_buf[j + 7];
|
|
T[ 4] = out_buf[j + 4];
|
|
T[ 5] = out_buf[j + 1];
|
|
T[ 6] = out_buf[j + 14];
|
|
T[ 7] = out_buf[j + 11];
|
|
T[ 8] = out_buf[j + 8];
|
|
T[ 9] = out_buf[j + 5];
|
|
T[10] = out_buf[j + 2];
|
|
T[11] = out_buf[j + 15];
|
|
T[12] = out_buf[j + 12];
|
|
T[13] = out_buf[j + 9];
|
|
T[14] = out_buf[j + 6];
|
|
T[15] = out_buf[j + 3];
|
|
|
|
out_buf[j + 0] = T[ 0];
|
|
out_buf[j + 1] = T[ 1];
|
|
out_buf[j + 2] = T[ 2];
|
|
out_buf[j + 3] = T[ 3];
|
|
out_buf[j + 4] = T[ 4];
|
|
out_buf[j + 5] = T[ 5];
|
|
out_buf[j + 6] = T[ 6];
|
|
out_buf[j + 7] = T[ 7];
|
|
out_buf[j + 8] = T[ 8];
|
|
out_buf[j + 9] = T[ 9];
|
|
out_buf[j + 10] = T[10];
|
|
out_buf[j + 11] = T[11];
|
|
out_buf[j + 12] = T[12];
|
|
out_buf[j + 13] = T[13];
|
|
out_buf[j + 14] = T[14];
|
|
out_buf[j + 15] = T[15];
|
|
}
|
|
}
|
|
|
|
DECLSPEC void scrypt_pbkdf2_body (PRIVATE_AS sha256_hmac_ctx_t *sha256_hmac_ctx, PRIVATE_AS u32 *out_buf, const int out_len)
|
|
{
|
|
for (int i = 0, j = 1, k = 0; i < out_len; i += 32, j += 1, k += 8)
|
|
{
|
|
sha256_hmac_ctx_t sha256_hmac_ctx2 = *sha256_hmac_ctx;
|
|
|
|
u32 w0[4];
|
|
u32 w1[4];
|
|
u32 w2[4];
|
|
u32 w3[4];
|
|
|
|
w0[0] = j;
|
|
w0[1] = 0;
|
|
w0[2] = 0;
|
|
w0[3] = 0;
|
|
w1[0] = 0;
|
|
w1[1] = 0;
|
|
w1[2] = 0;
|
|
w1[3] = 0;
|
|
w2[0] = 0;
|
|
w2[1] = 0;
|
|
w2[2] = 0;
|
|
w2[3] = 0;
|
|
w3[0] = 0;
|
|
w3[1] = 0;
|
|
w3[2] = 0;
|
|
w3[3] = 0;
|
|
|
|
sha256_hmac_update_64 (&sha256_hmac_ctx2, w0, w1, w2, w3, 4);
|
|
|
|
sha256_hmac_final (&sha256_hmac_ctx2);
|
|
|
|
// this will not work if user specifies output length not a multiple of 4
|
|
// probably never happens...
|
|
// let's hope the compiler will auto optimize this since out_len is very likely
|
|
// a constant at caller level
|
|
|
|
if (out_len >= (i + 4)) out_buf[k + 0] = hc_swap32_S (sha256_hmac_ctx2.opad.h[0]);
|
|
if (out_len >= (i + 8)) out_buf[k + 1] = hc_swap32_S (sha256_hmac_ctx2.opad.h[1]);
|
|
if (out_len >= (i + 12)) out_buf[k + 2] = hc_swap32_S (sha256_hmac_ctx2.opad.h[2]);
|
|
if (out_len >= (i + 16)) out_buf[k + 3] = hc_swap32_S (sha256_hmac_ctx2.opad.h[3]);
|
|
if (out_len >= (i + 20)) out_buf[k + 4] = hc_swap32_S (sha256_hmac_ctx2.opad.h[4]);
|
|
if (out_len >= (i + 24)) out_buf[k + 5] = hc_swap32_S (sha256_hmac_ctx2.opad.h[5]);
|
|
if (out_len >= (i + 28)) out_buf[k + 6] = hc_swap32_S (sha256_hmac_ctx2.opad.h[6]);
|
|
if (out_len >= (i + 32)) out_buf[k + 7] = hc_swap32_S (sha256_hmac_ctx2.opad.h[7]);
|
|
}
|
|
}
|
|
|
|
DECLSPEC void scrypt_pbkdf2_pp (PRIVATE_AS const u32 *pw_buf, const int pw_len, PRIVATE_AS const u32 *salt_buf, const int salt_len, PRIVATE_AS u32 *out_buf, const int out_len)
|
|
{
|
|
sha256_hmac_ctx_t sha256_hmac_ctx;
|
|
|
|
sha256_hmac_init_swap (&sha256_hmac_ctx, pw_buf, pw_len);
|
|
|
|
sha256_hmac_update_swap (&sha256_hmac_ctx, salt_buf, salt_len);
|
|
|
|
scrypt_pbkdf2_body (&sha256_hmac_ctx, out_buf, out_len);
|
|
}
|
|
|
|
DECLSPEC void scrypt_pbkdf2_pg (PRIVATE_AS const u32 *pw_buf, const int pw_len, GLOBAL_AS const u32 *salt_buf, const int salt_len, PRIVATE_AS u32 *out_buf, const int out_len)
|
|
{
|
|
sha256_hmac_ctx_t sha256_hmac_ctx;
|
|
|
|
sha256_hmac_init_swap (&sha256_hmac_ctx, pw_buf, pw_len);
|
|
|
|
sha256_hmac_update_global_swap (&sha256_hmac_ctx, salt_buf, salt_len);
|
|
|
|
scrypt_pbkdf2_body (&sha256_hmac_ctx, out_buf, out_len);
|
|
}
|
|
|
|
DECLSPEC void scrypt_pbkdf2_gp (GLOBAL_AS const u32 *pw_buf, const int pw_len, PRIVATE_AS const u32 *salt_buf, const int salt_len, PRIVATE_AS u32 *out_buf, const int out_len)
|
|
{
|
|
sha256_hmac_ctx_t sha256_hmac_ctx;
|
|
|
|
sha256_hmac_init_global_swap (&sha256_hmac_ctx, pw_buf, pw_len);
|
|
|
|
sha256_hmac_update_swap (&sha256_hmac_ctx, salt_buf, salt_len);
|
|
|
|
scrypt_pbkdf2_body (&sha256_hmac_ctx, out_buf, out_len);
|
|
}
|
|
|
|
DECLSPEC void scrypt_pbkdf2_gg (GLOBAL_AS const u32 *pw_buf, const int pw_len, GLOBAL_AS const u32 *salt_buf, const int salt_len, PRIVATE_AS u32 *out_buf, const int out_len)
|
|
{
|
|
sha256_hmac_ctx_t sha256_hmac_ctx;
|
|
|
|
sha256_hmac_init_global_swap (&sha256_hmac_ctx, pw_buf, pw_len);
|
|
|
|
sha256_hmac_update_global_swap (&sha256_hmac_ctx, salt_buf, salt_len);
|
|
|
|
scrypt_pbkdf2_body (&sha256_hmac_ctx, out_buf, out_len);
|
|
}
|
|
|