1
0
mirror of https://github.com/hashcat/hashcat.git synced 2025-07-30 10:28:41 +00:00

Removed shared-memory based optimization for SCRYPT on HIP, because the shared-memory buffer is incompatible with TMTO, which is limiting SCRYPT-R to a maximum of 8. This change also simplifies the code, allowing removal of large sections of duplicated code. Removed the section in scrypt_module_extra_tuningdb_block() that increased TMTO when there was insufficient shared memory, as this is no longer applicable.

Refactored inc_hash_scrypt.cl almost completely and improved macro names in inc_hash_scrypt.h. Adapted all existing SCRYPT-based plugins to the new standard. If you have custom SCRYPT based plugins use hash-mode 8900 as reference.
Fixed some compiler warnings in inc_platform.cl.
Cleaned up code paths in inc_vendor.h for finding values for HC_ATTR_SEQ and DECLSPEC.
Removed option --device-as-default-execution-space from nvrtc for hiprtc compatibility. As a result, added __device__ back to DECLSPEC.
Removed option --restrict from nvrtc compile options since we actually alias some buffers.
Added --gpu-max-threads-per-block to hiprtc options.
Added -D MAX_THREADS_PER_BLOCK to OpenCL options (currently unused).
Removed all OPTS_TYPE_MP_MULTI_DISABLE entries for SNMPv3-based plugins.
These plugins consume large amounts of memory and for this reason,limited kernel_accel max to 256. This may still be high, but hashcat will automatically tune down kernel_accel if insufficient memory is detected.
Removed command `rocm-smi --resetprofile --resetclocks --resetfans` from benchmark_deep.pl, since some AMD GPUs become artificially slow for a while after running these commands.
Replaced load_source() with file_to_buffer() from shared.c, which does the exact same operations.
Moved suppress_stderr() and restore_stderr() to shared.c and reused them in both Python bridges and opencl_test_instruction(), where the same type of code existed.
This commit is contained in:
Jens Steube 2025-06-21 07:09:20 +02:00
parent f399c97db0
commit b7c8fcf27c
25 changed files with 702 additions and 984 deletions

View File

@ -9,441 +9,288 @@
#include "inc_common.h"
#include "inc_hash_scrypt.h"
DECLSPEC void salsa_r_l (LOCAL_AS u32 *TI)
DECLSPEC void salsa_r (PRIVATE_AS u32 *TI)
{
u32 x[16];
u32 TT[STATE_CNT4/2]; // we actually nned 16 here for SALSA, but we reuse that buffer for blockmix
for (int j = 0; j < 16; j++) x[j] = TI[STATE_CNT - 16 + j];
for (int j = 0; j < SALSA_CNT4; j++) TT[j] = TI[STATE_CNT4 - 16 + j];
for (int i = 0; i < STATE_CNT; i += 16)
for (int i = 0; i < STATE_CNT4; i += SALSA_CNT4)
{
for (int j = 0; j < 16; j++)
{
x[j] ^= TI[i + j];
}
for (int j = 0; j < SALSA_CNT4; j++) TT[j] ^= TI[i + j];
for (int j = 0; j < 16; j++)
{
TI[i + j] = x[j];
}
for (int j = 0; j < SALSA_CNT4; j++) TI[i + j] = TT[j];
for (int r = 0; r < 4; r++)
{
u32 t0, t1, t2, t3;
t0 = x[ 0] + x[12];
t1 = x[ 1] + x[13];
t2 = x[ 2] + x[14];
t3 = x[ 3] + x[15];
x[ 4] ^= hc_rotl32_S (t0, 7);
x[ 5] ^= hc_rotl32_S (t1, 7);
x[ 6] ^= hc_rotl32_S (t2, 7);
x[ 7] ^= hc_rotl32_S (t3, 7);
t0 = TT[ 0] + TT[12];
t1 = TT[ 1] + TT[13];
t2 = TT[ 2] + TT[14];
t3 = TT[ 3] + TT[15];
TT[ 4] ^= hc_rotl32_S (t0, 7);
TT[ 5] ^= hc_rotl32_S (t1, 7);
TT[ 6] ^= hc_rotl32_S (t2, 7);
TT[ 7] ^= hc_rotl32_S (t3, 7);
t0 = x[ 4] + x[ 0];
t1 = x[ 5] + x[ 1];
t2 = x[ 6] + x[ 2];
t3 = x[ 7] + x[ 3];
x[ 8] ^= hc_rotl32_S (t0, 9);
x[ 9] ^= hc_rotl32_S (t1, 9);
x[10] ^= hc_rotl32_S (t2, 9);
x[11] ^= hc_rotl32_S (t3, 9);
t0 = TT[ 4] + TT[ 0];
t1 = TT[ 5] + TT[ 1];
t2 = TT[ 6] + TT[ 2];
t3 = TT[ 7] + TT[ 3];
TT[ 8] ^= hc_rotl32_S (t0, 9);
TT[ 9] ^= hc_rotl32_S (t1, 9);
TT[10] ^= hc_rotl32_S (t2, 9);
TT[11] ^= hc_rotl32_S (t3, 9);
t0 = x[ 8] + x[ 4];
t1 = x[ 9] + x[ 5];
t2 = x[10] + x[ 6];
t3 = x[11] + x[ 7];
x[12] ^= hc_rotl32_S (t0, 13);
x[13] ^= hc_rotl32_S (t1, 13);
x[14] ^= hc_rotl32_S (t2, 13);
x[15] ^= hc_rotl32_S (t3, 13);
t0 = TT[ 8] + TT[ 4];
t1 = TT[ 9] + TT[ 5];
t2 = TT[10] + TT[ 6];
t3 = TT[11] + TT[ 7];
TT[12] ^= hc_rotl32_S (t0, 13);
TT[13] ^= hc_rotl32_S (t1, 13);
TT[14] ^= hc_rotl32_S (t2, 13);
TT[15] ^= hc_rotl32_S (t3, 13);
t0 = x[12] + x[ 8];
t1 = x[13] + x[ 9];
t2 = x[14] + x[10];
t3 = x[15] + x[11];
x[ 0] ^= hc_rotl32_S (t0, 18);
x[ 1] ^= hc_rotl32_S (t1, 18);
x[ 2] ^= hc_rotl32_S (t2, 18);
x[ 3] ^= hc_rotl32_S (t3, 18);
t0 = TT[12] + TT[ 8];
t1 = TT[13] + TT[ 9];
t2 = TT[14] + TT[10];
t3 = TT[15] + TT[11];
TT[ 0] ^= hc_rotl32_S (t0, 18);
TT[ 1] ^= hc_rotl32_S (t1, 18);
TT[ 2] ^= hc_rotl32_S (t2, 18);
TT[ 3] ^= hc_rotl32_S (t3, 18);
t0 = x[ 4]; x[ 4] = x[ 7]; x[ 7] = x[ 6]; x[ 6] = x[ 5]; x[ 5] = t0;
t0 = x[ 8]; x[ 8] = x[10]; x[10] = t0;
t0 = x[ 9]; x[ 9] = x[11]; x[11] = t0;
t0 = x[12]; x[12] = x[13]; x[13] = x[14]; x[14] = x[15]; x[15] = t0;
t0 = TT[ 4]; TT[ 4] = TT[ 7]; TT[ 7] = TT[ 6]; TT[ 6] = TT[ 5]; TT[ 5] = t0;
t0 = TT[ 8]; TT[ 8] = TT[10]; TT[10] = t0;
t0 = TT[ 9]; TT[ 9] = TT[11]; TT[11] = t0;
t0 = TT[12]; TT[12] = TT[13]; TT[13] = TT[14]; TT[14] = TT[15]; TT[15] = t0;
t0 = x[ 0] + x[ 4];
t1 = x[ 1] + x[ 5];
t2 = x[ 2] + x[ 6];
t3 = x[ 3] + x[ 7];
x[12] ^= hc_rotl32_S (t0, 7);
x[13] ^= hc_rotl32_S (t1, 7);
x[14] ^= hc_rotl32_S (t2, 7);
x[15] ^= hc_rotl32_S (t3, 7);
t0 = TT[ 0] + TT[ 4];
t1 = TT[ 1] + TT[ 5];
t2 = TT[ 2] + TT[ 6];
t3 = TT[ 3] + TT[ 7];
TT[12] ^= hc_rotl32_S (t0, 7);
TT[13] ^= hc_rotl32_S (t1, 7);
TT[14] ^= hc_rotl32_S (t2, 7);
TT[15] ^= hc_rotl32_S (t3, 7);
t0 = x[12] + x[ 0];
t1 = x[13] + x[ 1];
t2 = x[14] + x[ 2];
t3 = x[15] + x[ 3];
x[ 8] ^= hc_rotl32_S (t0, 9);
x[ 9] ^= hc_rotl32_S (t1, 9);
x[10] ^= hc_rotl32_S (t2, 9);
x[11] ^= hc_rotl32_S (t3, 9);
t0 = TT[12] + TT[ 0];
t1 = TT[13] + TT[ 1];
t2 = TT[14] + TT[ 2];
t3 = TT[15] + TT[ 3];
TT[ 8] ^= hc_rotl32_S (t0, 9);
TT[ 9] ^= hc_rotl32_S (t1, 9);
TT[10] ^= hc_rotl32_S (t2, 9);
TT[11] ^= hc_rotl32_S (t3, 9);
t0 = x[ 8] + x[12];
t1 = x[ 9] + x[13];
t2 = x[10] + x[14];
t3 = x[11] + x[15];
x[ 4] ^= hc_rotl32_S (t0, 13);
x[ 5] ^= hc_rotl32_S (t1, 13);
x[ 6] ^= hc_rotl32_S (t2, 13);
x[ 7] ^= hc_rotl32_S (t3, 13);
t0 = TT[ 8] + TT[12];
t1 = TT[ 9] + TT[13];
t2 = TT[10] + TT[14];
t3 = TT[11] + TT[15];
TT[ 4] ^= hc_rotl32_S (t0, 13);
TT[ 5] ^= hc_rotl32_S (t1, 13);
TT[ 6] ^= hc_rotl32_S (t2, 13);
TT[ 7] ^= hc_rotl32_S (t3, 13);
t0 = x[ 4] + x[ 8];
t1 = x[ 5] + x[ 9];
t2 = x[ 6] + x[10];
t3 = x[ 7] + x[11];
x[ 0] ^= hc_rotl32_S (t0, 18);
x[ 1] ^= hc_rotl32_S (t1, 18);
x[ 2] ^= hc_rotl32_S (t2, 18);
x[ 3] ^= hc_rotl32_S (t3, 18);
t0 = TT[ 4] + TT[ 8];
t1 = TT[ 5] + TT[ 9];
t2 = TT[ 6] + TT[10];
t3 = TT[ 7] + TT[11];
TT[ 0] ^= hc_rotl32_S (t0, 18);
TT[ 1] ^= hc_rotl32_S (t1, 18);
TT[ 2] ^= hc_rotl32_S (t2, 18);
TT[ 3] ^= hc_rotl32_S (t3, 18);
t0 = x[ 4]; x[ 4] = x[ 5]; x[ 5] = x[ 6]; x[ 6] = x[ 7]; x[ 7] = t0;
t0 = x[ 8]; x[ 8] = x[10]; x[10] = t0;
t0 = x[ 9]; x[ 9] = x[11]; x[11] = t0;
t0 = x[15]; x[15] = x[14]; x[14] = x[13]; x[13] = x[12]; x[12] = t0;
t0 = TT[ 4]; TT[ 4] = TT[ 5]; TT[ 5] = TT[ 6]; TT[ 6] = TT[ 7]; TT[ 7] = t0;
t0 = TT[ 8]; TT[ 8] = TT[10]; TT[10] = t0;
t0 = TT[ 9]; TT[ 9] = TT[11]; TT[11] = t0;
t0 = TT[15]; TT[15] = TT[14]; TT[14] = TT[13]; TT[13] = TT[12]; TT[12] = t0;
}
for (int j = 0; j < 16; j++)
{
x[j] += TI[i + j];
}
for (int j = 0; j < SALSA_CNT4; j++) TT[j] += TI[i + j];
for (int j = 0; j < 16; j++)
{
TI[i + j] = x[j];
}
for (int j = 0; j < SALSA_CNT4; j++) TI[i + j] = TT[j];
}
#if SCRYPT_R > 1
u32 TT[STATE_CNT / 2];
for (int dst_off = 0, src_off = 16; src_off < STATE_CNT; dst_off += 16, src_off += 32)
for (int dst_off = 0, src_off = SALSA_CNT4; src_off < STATE_CNT4; dst_off += SALSA_CNT4, src_off += SALSA_CNT4 * 2)
{
for (int j = 0; j < 16; j++) TT[dst_off + j] = TI[src_off + j];
for (int j = 0; j < SALSA_CNT4; j++) TT[dst_off + j] = TI[src_off + j];
}
for (int dst_off = 16, src_off = 32; src_off < STATE_CNT; dst_off += 16, src_off += 32)
for (int dst_off = SALSA_CNT4, src_off = SALSA_CNT4 * 2; src_off < STATE_CNT4; dst_off += SALSA_CNT4, src_off += SALSA_CNT4 * 2)
{
for (int j = 0; j < 16; j++) TI[dst_off + j] = TI[src_off + j];
for (int j = 0; j < SALSA_CNT4; j++) TI[dst_off + j] = TI[src_off + j];
}
for (int dst_off = STATE_CNT / 2, src_off = 0; dst_off < STATE_CNT; dst_off += 16, src_off += 16)
for (int dst_off = STATE_CNT4 / 2, src_off = 0; dst_off < STATE_CNT4; dst_off += SALSA_CNT4, src_off += SALSA_CNT4)
{
for (int j = 0; j < 16; j++) TI[dst_off + j] = TT[src_off + j];
for (int j = 0; j < SALSA_CNT4; j++) TI[dst_off + j] = TT[src_off + j];
}
#endif
}
DECLSPEC void salsa_r_p (PRIVATE_AS u32 *TI)
{
u32 x[16];
for (int j = 0; j < 16; j++) x[j] = TI[STATE_CNT - 16 + j];
for (int i = 0; i < STATE_CNT; i += 16)
{
for (int j = 0; j < 16; j++)
{
x[j] ^= TI[i + j];
}
for (int j = 0; j < 16; j++)
{
TI[i + j] = x[j];
}
for (int r = 0; r < 4; r++)
{
u32 t0, t1, t2, t3;
t0 = x[ 0] + x[12];
t1 = x[ 1] + x[13];
t2 = x[ 2] + x[14];
t3 = x[ 3] + x[15];
x[ 4] ^= hc_rotl32_S (t0, 7);
x[ 5] ^= hc_rotl32_S (t1, 7);
x[ 6] ^= hc_rotl32_S (t2, 7);
x[ 7] ^= hc_rotl32_S (t3, 7);
t0 = x[ 4] + x[ 0];
t1 = x[ 5] + x[ 1];
t2 = x[ 6] + x[ 2];
t3 = x[ 7] + x[ 3];
x[ 8] ^= hc_rotl32_S (t0, 9);
x[ 9] ^= hc_rotl32_S (t1, 9);
x[10] ^= hc_rotl32_S (t2, 9);
x[11] ^= hc_rotl32_S (t3, 9);
t0 = x[ 8] + x[ 4];
t1 = x[ 9] + x[ 5];
t2 = x[10] + x[ 6];
t3 = x[11] + x[ 7];
x[12] ^= hc_rotl32_S (t0, 13);
x[13] ^= hc_rotl32_S (t1, 13);
x[14] ^= hc_rotl32_S (t2, 13);
x[15] ^= hc_rotl32_S (t3, 13);
t0 = x[12] + x[ 8];
t1 = x[13] + x[ 9];
t2 = x[14] + x[10];
t3 = x[15] + x[11];
x[ 0] ^= hc_rotl32_S (t0, 18);
x[ 1] ^= hc_rotl32_S (t1, 18);
x[ 2] ^= hc_rotl32_S (t2, 18);
x[ 3] ^= hc_rotl32_S (t3, 18);
t0 = x[ 4]; x[ 4] = x[ 7]; x[ 7] = x[ 6]; x[ 6] = x[ 5]; x[ 5] = t0;
t0 = x[ 8]; x[ 8] = x[10]; x[10] = t0;
t0 = x[ 9]; x[ 9] = x[11]; x[11] = t0;
t0 = x[12]; x[12] = x[13]; x[13] = x[14]; x[14] = x[15]; x[15] = t0;
t0 = x[ 0] + x[ 4];
t1 = x[ 1] + x[ 5];
t2 = x[ 2] + x[ 6];
t3 = x[ 3] + x[ 7];
x[12] ^= hc_rotl32_S (t0, 7);
x[13] ^= hc_rotl32_S (t1, 7);
x[14] ^= hc_rotl32_S (t2, 7);
x[15] ^= hc_rotl32_S (t3, 7);
t0 = x[12] + x[ 0];
t1 = x[13] + x[ 1];
t2 = x[14] + x[ 2];
t3 = x[15] + x[ 3];
x[ 8] ^= hc_rotl32_S (t0, 9);
x[ 9] ^= hc_rotl32_S (t1, 9);
x[10] ^= hc_rotl32_S (t2, 9);
x[11] ^= hc_rotl32_S (t3, 9);
t0 = x[ 8] + x[12];
t1 = x[ 9] + x[13];
t2 = x[10] + x[14];
t3 = x[11] + x[15];
x[ 4] ^= hc_rotl32_S (t0, 13);
x[ 5] ^= hc_rotl32_S (t1, 13);
x[ 6] ^= hc_rotl32_S (t2, 13);
x[ 7] ^= hc_rotl32_S (t3, 13);
t0 = x[ 4] + x[ 8];
t1 = x[ 5] + x[ 9];
t2 = x[ 6] + x[10];
t3 = x[ 7] + x[11];
x[ 0] ^= hc_rotl32_S (t0, 18);
x[ 1] ^= hc_rotl32_S (t1, 18);
x[ 2] ^= hc_rotl32_S (t2, 18);
x[ 3] ^= hc_rotl32_S (t3, 18);
t0 = x[ 4]; x[ 4] = x[ 5]; x[ 5] = x[ 6]; x[ 6] = x[ 7]; x[ 7] = t0;
t0 = x[ 8]; x[ 8] = x[10]; x[10] = t0;
t0 = x[ 9]; x[ 9] = x[11]; x[11] = t0;
t0 = x[15]; x[15] = x[14]; x[14] = x[13]; x[13] = x[12]; x[12] = t0;
}
for (int j = 0; j < 16; j++)
{
x[j] += TI[i + j];
}
for (int j = 0; j < 16; j++)
{
TI[i + j] = x[j];
}
}
#if SCRYPT_R > 1
u32 TT[STATE_CNT / 2];
for (int dst_off = 0, src_off = 16; src_off < STATE_CNT; dst_off += 16, src_off += 32)
{
for (int j = 0; j < 16; j++) TT[dst_off + j] = TI[src_off + j];
}
for (int dst_off = 16, src_off = 32; src_off < STATE_CNT; dst_off += 16, src_off += 32)
{
for (int j = 0; j < 16; j++) TI[dst_off + j] = TI[src_off + j];
}
for (int dst_off = STATE_CNT / 2, src_off = 0; dst_off < STATE_CNT; dst_off += 16, src_off += 16)
{
for (int j = 0; j < 16; j++) TI[dst_off + j] = TT[src_off + j];
}
#endif
}
#ifdef IS_HIP
DECLSPEC void scrypt_smix_init (LOCAL_AS uint4 *X, GLOBAL_AS uint4 *V0, GLOBAL_AS uint4 *V1, GLOBAL_AS uint4 *V2, GLOBAL_AS uint4 *V3, const u64 gid, const u64 lid, const u64 lsz, const u64 bid)
#else
DECLSPEC void scrypt_smix_init (PRIVATE_AS uint4 *X, GLOBAL_AS uint4 *V0, GLOBAL_AS uint4 *V1, GLOBAL_AS uint4 *V2, GLOBAL_AS uint4 *V3, const u64 gid, const u64 lid, const u64 lsz, const u64 bid)
#endif
DECLSPEC void scrypt_smix_init (PRIVATE_AS u32 *X, GLOBAL_AS void *V0, GLOBAL_AS void *V1, GLOBAL_AS void *V2, GLOBAL_AS void *V3, const u32 gid, const u32 lid, const u32 lsz, const u32 bid)
{
const u32 ySIZE = SCRYPT_N >> SCRYPT_TMTO;
const u32 zSIZE = STATE_CNT4;
const u32 zSIZE = STATE_CNT44;
const u32 xd4 = bid / 4;
const u32 xm4 = bid & 3;
PRIVATE_AS uint4 *X4 = (PRIVATE_AS uint4 *) X;
GLOBAL_AS uint4 *V;
switch (xm4)
{
case 0: V = V0; break;
case 1: V = V1; break;
case 2: V = V2; break;
case 3: V = V3; break;
case 0: V = (GLOBAL_AS uint4 *) V0; break;
case 1: V = (GLOBAL_AS uint4 *) V1; break;
case 2: V = (GLOBAL_AS uint4 *) V2; break;
case 3: V = (GLOBAL_AS uint4 *) V3; break;
}
for (u32 y = 0; y < ySIZE; y++)
{
for (u32 z = 0; z < zSIZE; z++) V[VIDX(xd4, lsz, lid, ySIZE, zSIZE, y, z)] = X[z];
for (u32 z = 0; z < zSIZE; z++) V[VIDX(xd4, lsz, lid, ySIZE, zSIZE, y, z)] = X4[z];
#ifdef IS_HIP
for (u32 i = 0; i < (1 << SCRYPT_TMTO); i++) salsa_r_l ((LOCAL_AS u32 *) X);
#else
for (u32 i = 0; i < (1 << SCRYPT_TMTO); i++) salsa_r_p ((PRIVATE_AS u32 *) X);
#endif
for (u32 i = 0; i < (1 << SCRYPT_TMTO); i++) salsa_r (X);
}
}
#ifdef IS_HIP
DECLSPEC void scrypt_smix_loop (PRIVATE_AS uint4 *X, LOCAL_AS uint4 *T, GLOBAL_AS uint4 *V0, GLOBAL_AS uint4 *V1, GLOBAL_AS uint4 *V2, GLOBAL_AS uint4 *V3, const u64 gid, const u64 lid, const u64 lsz, const u64 bid)
#else
DECLSPEC void scrypt_smix_loop (PRIVATE_AS uint4 *X, PRIVATE_AS uint4 *T, GLOBAL_AS uint4 *V0, GLOBAL_AS uint4 *V1, GLOBAL_AS uint4 *V2, GLOBAL_AS uint4 *V3, const u64 gid, const u64 lid, const u64 lsz, const u64 bid)
#endif
DECLSPEC void scrypt_smix_loop (PRIVATE_AS u32 *X, PRIVATE_AS u32 *T, GLOBAL_AS void *V0, GLOBAL_AS void *V1, GLOBAL_AS void *V2, GLOBAL_AS void *V3, const u32 gid, const u32 lid, const u32 lsz, const u32 bid)
{
const u32 ySIZE = SCRYPT_N >> SCRYPT_TMTO;
const u32 zSIZE = STATE_CNT4;
const u32 zSIZE = STATE_CNT44;
const u32 xd4 = bid / 4;
const u32 xm4 = bid & 3;
PRIVATE_AS uint4 *X4 = (PRIVATE_AS uint4 *) X;
PRIVATE_AS uint4 *T4 = (PRIVATE_AS uint4 *) T;
GLOBAL_AS uint4 *V;
switch (xm4)
{
case 0: V = V0; break;
case 1: V = V1; break;
case 2: V = V2; break;
case 3: V = V3; break;
case 0: V = (GLOBAL_AS uint4 *) V0; break;
case 1: V = (GLOBAL_AS uint4 *) V1; break;
case 2: V = (GLOBAL_AS uint4 *) V2; break;
case 3: V = (GLOBAL_AS uint4 *) V3; break;
}
// note: max 2048 iterations = forced -u 2048
// note: max 1024 iterations = forced -u 2048
const u32 N_max = (SCRYPT_N < 2048) ? SCRYPT_N : 2048;
for (u32 N_pos = 0; N_pos < N_max; N_pos++)
{
const u32 k = X[zSIZE - 4].x & (SCRYPT_N - 1);
const u32 k = X4[zSIZE - 4].x & (SCRYPT_N - 1);
const u32 y = k >> SCRYPT_TMTO;
const u32 km = k - (y << SCRYPT_TMTO);
for (u32 z = 0; z < zSIZE; z++) T[z] = V[VIDX(xd4, lsz, lid, ySIZE, zSIZE, y, z)];
for (u32 z = 0; z < zSIZE; z++) T4[z] = V[VIDX(xd4, lsz, lid, ySIZE, zSIZE, y, z)];
#ifdef IS_HIP
for (u32 i = 0; i < km; i++) salsa_r_l ((LOCAL_AS u32 *) T);
#else
for (u32 i = 0; i < km; i++) salsa_r_p ((PRIVATE_AS u32 *) T);
#endif
for (u32 i = 0; i < km; i++) salsa_r (T);
for (u32 z = 0; z < zSIZE; z++) X[z] ^= T[z];
for (u32 z = 0; z < zSIZE; z++) X4[z] = X4[z] ^ T4[z];
salsa_r_p ((PRIVATE_AS u32 *) X);
salsa_r (X);
}
}
DECLSPEC void scrypt_blockmix_in (GLOBAL_AS uint4 *out_buf, const int out_len)
DECLSPEC void scrypt_blockmix_in (PRIVATE_AS u32 *out_buf, const int out_len)
{
for (int i = 0, j = 0; i < out_len; i += 64, j += 4)
for (int i = 0, j = 0; i < out_len; i += SALSA_SZ, j += SALSA_CNT4)
{
uint4 T[4];
u32 X[SALSA_CNT4];
T[0] = out_buf[j + 0];
T[1] = out_buf[j + 1];
T[2] = out_buf[j + 2];
T[3] = out_buf[j + 3];
X[ 0] = out_buf[j + 0];
X[ 1] = out_buf[j + 5];
X[ 2] = out_buf[j + 10];
X[ 3] = out_buf[j + 15];
X[ 4] = out_buf[j + 4];
X[ 5] = out_buf[j + 9];
X[ 6] = out_buf[j + 14];
X[ 7] = out_buf[j + 3];
X[ 8] = out_buf[j + 8];
X[ 9] = out_buf[j + 13];
X[10] = out_buf[j + 2];
X[11] = out_buf[j + 7];
X[12] = out_buf[j + 12];
X[13] = out_buf[j + 1];
X[14] = out_buf[j + 6];
X[15] = out_buf[j + 11];
uint4 X[4];
#if defined IS_CUDA || defined IS_HIP
X[0] = make_uint4 (T[0].x, T[1].y, T[2].z, T[3].w);
X[1] = make_uint4 (T[1].x, T[2].y, T[3].z, T[0].w);
X[2] = make_uint4 (T[2].x, T[3].y, T[0].z, T[1].w);
X[3] = make_uint4 (T[3].x, T[0].y, T[1].z, T[2].w);
#elif defined IS_METAL
X[0] = uint4 (T[0].x, T[1].y, T[2].z, T[3].w);
X[1] = uint4 (T[1].x, T[2].y, T[3].z, T[0].w);
X[2] = uint4 (T[2].x, T[3].y, T[0].z, T[1].w);
X[3] = uint4 (T[3].x, T[0].y, T[1].z, T[2].w);
#else
X[0] = (uint4) (T[0].x, T[1].y, T[2].z, T[3].w);
X[1] = (uint4) (T[1].x, T[2].y, T[3].z, T[0].w);
X[2] = (uint4) (T[2].x, T[3].y, T[0].z, T[1].w);
X[3] = (uint4) (T[3].x, T[0].y, T[1].z, T[2].w);
#endif
out_buf[j + 0] = X[0];
out_buf[j + 1] = X[1];
out_buf[j + 2] = X[2];
out_buf[j + 3] = X[3];
out_buf[j + 0] = X[ 0];
out_buf[j + 1] = X[ 1];
out_buf[j + 2] = X[ 2];
out_buf[j + 3] = X[ 3];
out_buf[j + 4] = X[ 4];
out_buf[j + 5] = X[ 5];
out_buf[j + 6] = X[ 6];
out_buf[j + 7] = X[ 7];
out_buf[j + 8] = X[ 8];
out_buf[j + 9] = X[ 9];
out_buf[j + 10] = X[10];
out_buf[j + 11] = X[11];
out_buf[j + 12] = X[12];
out_buf[j + 13] = X[13];
out_buf[j + 14] = X[14];
out_buf[j + 15] = X[15];
}
}
DECLSPEC void scrypt_blockmix_out (GLOBAL_AS uint4 *out_buf, const int out_len)
DECLSPEC void scrypt_blockmix_out (PRIVATE_AS u32 *out_buf, const int out_len)
{
for (int i = 0, j = 0; i < out_len; i += 64, j += 4)
for (int i = 0, j = 0; i < out_len; i += SALSA_SZ, j += SALSA_CNT4)
{
uint4 X[4];
u32 T[SALSA_CNT4];
X[0] = out_buf[j + 0];
X[1] = out_buf[j + 1];
X[2] = out_buf[j + 2];
X[3] = out_buf[j + 3];
T[ 0] = out_buf[j + 0];
T[ 1] = out_buf[j + 13];
T[ 2] = out_buf[j + 10];
T[ 3] = out_buf[j + 7];
T[ 4] = out_buf[j + 4];
T[ 5] = out_buf[j + 1];
T[ 6] = out_buf[j + 14];
T[ 7] = out_buf[j + 11];
T[ 8] = out_buf[j + 8];
T[ 9] = out_buf[j + 5];
T[10] = out_buf[j + 2];
T[11] = out_buf[j + 15];
T[12] = out_buf[j + 12];
T[13] = out_buf[j + 9];
T[14] = out_buf[j + 6];
T[15] = out_buf[j + 3];
uint4 T[4];
#if defined IS_CUDA || defined IS_HIP
T[0] = make_uint4 (X[0].x, X[3].y, X[2].z, X[1].w);
T[1] = make_uint4 (X[1].x, X[0].y, X[3].z, X[2].w);
T[2] = make_uint4 (X[2].x, X[1].y, X[0].z, X[3].w);
T[3] = make_uint4 (X[3].x, X[2].y, X[1].z, X[0].w);
#elif defined IS_METAL
T[0] = uint4 (X[0].x, X[3].y, X[2].z, X[1].w);
T[1] = uint4 (X[1].x, X[0].y, X[3].z, X[2].w);
T[2] = uint4 (X[2].x, X[1].y, X[0].z, X[3].w);
T[3] = uint4 (X[3].x, X[2].y, X[1].z, X[0].w);
#else
T[0] = (uint4) (X[0].x, X[3].y, X[2].z, X[1].w);
T[1] = (uint4) (X[1].x, X[0].y, X[3].z, X[2].w);
T[2] = (uint4) (X[2].x, X[1].y, X[0].z, X[3].w);
T[3] = (uint4) (X[3].x, X[2].y, X[1].z, X[0].w);
#endif
out_buf[j + 0] = T[0];
out_buf[j + 1] = T[1];
out_buf[j + 2] = T[2];
out_buf[j + 3] = T[3];
out_buf[j + 0] = T[ 0];
out_buf[j + 1] = T[ 1];
out_buf[j + 2] = T[ 2];
out_buf[j + 3] = T[ 3];
out_buf[j + 4] = T[ 4];
out_buf[j + 5] = T[ 5];
out_buf[j + 6] = T[ 6];
out_buf[j + 7] = T[ 7];
out_buf[j + 8] = T[ 8];
out_buf[j + 9] = T[ 9];
out_buf[j + 10] = T[10];
out_buf[j + 11] = T[11];
out_buf[j + 12] = T[12];
out_buf[j + 13] = T[13];
out_buf[j + 14] = T[14];
out_buf[j + 15] = T[15];
}
}
DECLSPEC void scrypt_pbkdf2_body (PRIVATE_AS sha256_hmac_ctx_t *sha256_hmac_ctx, GLOBAL_AS uint4 *out_buf, const int out_len)
DECLSPEC void scrypt_pbkdf2_body (PRIVATE_AS sha256_hmac_ctx_t *sha256_hmac_ctx, PRIVATE_AS u32 *out_buf, const int out_len)
{
for (int i = 0, j = 1, k = 0; i < out_len; i += 32, j += 1, k += 2)
for (int i = 0, j = 1, k = 0; i < out_len; i += 32, j += 1, k += 8)
{
sha256_hmac_ctx_t sha256_hmac_ctx2 = *sha256_hmac_ctx;
@ -473,34 +320,56 @@ DECLSPEC void scrypt_pbkdf2_body (PRIVATE_AS sha256_hmac_ctx_t *sha256_hmac_ctx,
sha256_hmac_final (&sha256_hmac_ctx2);
u32 digest[8];
// this will not work if user specifies output length not a multiple of 4
// probably never happens...
// let's hope the compiler will auto optimize this since out_len is very likely
// a constant at caller level
digest[0] = hc_swap32_S (sha256_hmac_ctx2.opad.h[0]);
digest[1] = hc_swap32_S (sha256_hmac_ctx2.opad.h[1]);
digest[2] = hc_swap32_S (sha256_hmac_ctx2.opad.h[2]);
digest[3] = hc_swap32_S (sha256_hmac_ctx2.opad.h[3]);
digest[4] = hc_swap32_S (sha256_hmac_ctx2.opad.h[4]);
digest[5] = hc_swap32_S (sha256_hmac_ctx2.opad.h[5]);
digest[6] = hc_swap32_S (sha256_hmac_ctx2.opad.h[6]);
digest[7] = hc_swap32_S (sha256_hmac_ctx2.opad.h[7]);
#if defined IS_CUDA || defined IS_HIP
const uint4 tmp0 = make_uint4 (digest[0], digest[1], digest[2], digest[3]);
const uint4 tmp1 = make_uint4 (digest[4], digest[5], digest[6], digest[7]);
#elif defined IS_METAL
const uint4 tmp0 = uint4 (digest[0], digest[1], digest[2], digest[3]);
const uint4 tmp1 = uint4 (digest[4], digest[5], digest[6], digest[7]);
#else
const uint4 tmp0 = (uint4) (digest[0], digest[1], digest[2], digest[3]);
const uint4 tmp1 = (uint4) (digest[4], digest[5], digest[6], digest[7]);
#endif
out_buf[k + 0] = tmp0;
out_buf[k + 1] = tmp1;
if (out_len >= (i + 4)) out_buf[k + 0] = hc_swap32_S (sha256_hmac_ctx2.opad.h[0]);
if (out_len >= (i + 8)) out_buf[k + 1] = hc_swap32_S (sha256_hmac_ctx2.opad.h[1]);
if (out_len >= (i + 12)) out_buf[k + 2] = hc_swap32_S (sha256_hmac_ctx2.opad.h[2]);
if (out_len >= (i + 16)) out_buf[k + 3] = hc_swap32_S (sha256_hmac_ctx2.opad.h[3]);
if (out_len >= (i + 20)) out_buf[k + 4] = hc_swap32_S (sha256_hmac_ctx2.opad.h[4]);
if (out_len >= (i + 24)) out_buf[k + 5] = hc_swap32_S (sha256_hmac_ctx2.opad.h[5]);
if (out_len >= (i + 28)) out_buf[k + 6] = hc_swap32_S (sha256_hmac_ctx2.opad.h[6]);
if (out_len >= (i + 32)) out_buf[k + 7] = hc_swap32_S (sha256_hmac_ctx2.opad.h[7]);
}
}
DECLSPEC void scrypt_pbkdf2 (GLOBAL_AS const u32 *pw_buf, const int pw_len, GLOBAL_AS const u32 *salt_buf, const int salt_len, GLOBAL_AS uint4 *out_buf, const int out_len)
DECLSPEC void scrypt_pbkdf2_pp (PRIVATE_AS const u32 *pw_buf, const int pw_len, PRIVATE_AS const u32 *salt_buf, const int salt_len, PRIVATE_AS u32 *out_buf, const int out_len)
{
sha256_hmac_ctx_t sha256_hmac_ctx;
sha256_hmac_init_swap (&sha256_hmac_ctx, pw_buf, pw_len);
sha256_hmac_update_swap (&sha256_hmac_ctx, salt_buf, salt_len);
scrypt_pbkdf2_body (&sha256_hmac_ctx, out_buf, out_len);
}
DECLSPEC void scrypt_pbkdf2_pg (PRIVATE_AS const u32 *pw_buf, const int pw_len, GLOBAL_AS const u32 *salt_buf, const int salt_len, PRIVATE_AS u32 *out_buf, const int out_len)
{
sha256_hmac_ctx_t sha256_hmac_ctx;
sha256_hmac_init_swap (&sha256_hmac_ctx, pw_buf, pw_len);
sha256_hmac_update_global_swap (&sha256_hmac_ctx, salt_buf, salt_len);
scrypt_pbkdf2_body (&sha256_hmac_ctx, out_buf, out_len);
}
DECLSPEC void scrypt_pbkdf2_gp (GLOBAL_AS const u32 *pw_buf, const int pw_len, PRIVATE_AS const u32 *salt_buf, const int salt_len, PRIVATE_AS u32 *out_buf, const int out_len)
{
sha256_hmac_ctx_t sha256_hmac_ctx;
sha256_hmac_init_global_swap (&sha256_hmac_ctx, pw_buf, pw_len);
sha256_hmac_update_swap (&sha256_hmac_ctx, salt_buf, salt_len);
scrypt_pbkdf2_body (&sha256_hmac_ctx, out_buf, out_len);
}
DECLSPEC void scrypt_pbkdf2_gg (GLOBAL_AS const u32 *pw_buf, const int pw_len, GLOBAL_AS const u32 *salt_buf, const int salt_len, PRIVATE_AS u32 *out_buf, const int out_len)
{
sha256_hmac_ctx_t sha256_hmac_ctx;

View File

@ -6,39 +6,40 @@
#ifndef INC_HASH_SCRYPT_H
#define INC_HASH_SCRYPT_H
#define GET_SCRYPT_CNT(r,p) (2 * (r) * 16 * (p))
#define GET_SMIX_CNT(r,N) (2 * (r) * 16 * (N))
#define GET_STATE_CNT(r) (2 * (r) * 16)
#define GET_SCRYPT_SZ(r,p) (128 * (r) * (p))
#define GET_STATE_SZ(r) (128 * (r))
#define SCRYPT_CNT GET_SCRYPT_CNT (SCRYPT_R, SCRYPT_P)
#define SCRYPT_CNT4 (SCRYPT_CNT / 4)
#define STATE_CNT GET_STATE_CNT (SCRYPT_R)
#define STATE_CNT4 (STATE_CNT / 4)
// _SZ is true sizes as bytes
#define SCRYPT_SZ GET_SCRYPT_SZ (SCRYPT_R, SCRYPT_P)
#define STATE_SZ GET_STATE_SZ (SCRYPT_R)
// _CNT is size as whatever /X datatype
#define SCRYPT_CNT4 (SCRYPT_SZ / 4)
#define STATE_CNT4 (STATE_SZ / 4)
// this would be uint4, feels more natural than 16
#define SCRYPT_CNT44 ((SCRYPT_SZ / 4) / 4)
#define STATE_CNT44 ((STATE_SZ / 4) / 4)
#define SALSA_SZ 64
#define SALSA_CNT4 (SALSA_SZ / 4)
#define VIDX(bid4,lsz,lid,ySIZE,zSIZE,y,z) (((bid4) * (lsz) * (ySIZE) * (zSIZE)) + ((lid) * (ySIZE) * (zSIZE)) + ((y) * (zSIZE)) + (z))
#if defined IS_CUDA
inline __device__ uint4 operator & (const uint4 a, const u32 b) { return make_uint4 ((a.x & b ), (a.y & b ), (a.z & b ), (a.w & b )); }
inline __device__ uint4 operator << (const uint4 a, const u32 b) { return make_uint4 ((a.x << b ), (a.y << b ), (a.z << b ), (a.w << b )); }
inline __device__ uint4 operator >> (const uint4 a, const u32 b) { return make_uint4 ((a.x >> b ), (a.y >> b ), (a.z >> b ), (a.w >> b )); }
inline __device__ uint4 operator + (const uint4 a, const uint4 b) { return make_uint4 ((a.x + b.x), (a.y + b.y), (a.z + b.z), (a.w + b.w)); }
inline __device__ uint4 operator ^ (const uint4 a, const uint4 b) { return make_uint4 ((a.x ^ b.x), (a.y ^ b.y), (a.z ^ b.z), (a.w ^ b.w)); }
inline __device__ uint4 operator | (const uint4 a, const uint4 b) { return make_uint4 ((a.x | b.x), (a.y | b.y), (a.z | b.z), (a.w | b.w)); }
inline __device__ void operator ^= ( uint4 &a, const uint4 b) { a.x ^= b.x; a.y ^= b.y; a.z ^= b.z; a.w ^= b.w; }
#endif
#if defined IS_CUDA || defined IS_HIP
inline __device__ uint4 rotate (const uint4 a, const int n)
DECLSPEC uint4 operator ^ (const uint4 a, const uint4 b)
{
uint4 r;
r.x = hc_rotl32_S (r.x, n);
r.y = hc_rotl32_S (r.y, n);
r.z = hc_rotl32_S (r.z, n);
r.w = hc_rotl32_S (r.w, n);
r.x = a.x ^ b.x;
r.y = a.y ^ b.y;
r.z = a.z ^ b.z;
r.w = a.w ^ b.w;
return r;
}
#endif
#endif

View File

@ -131,7 +131,7 @@ DECLSPEC u32x rotl32 (const u32x a, const int n)
return __funnelshift_l(a, a, n);
#endif
u32x t;
u32x t = 0;
#if VECT_SIZE >= 2
t.s0 = __funnelshift_l(a.s0, a.s0, n);
@ -171,7 +171,7 @@ DECLSPEC u32x rotr32 (const u32x a, const int n)
return __funnelshift_r(a, a, n);
#endif
u32x t;
u32x t = 0;
#if VECT_SIZE >= 2
t.s0 = __funnelshift_r(a.s0, a.s0, n);

View File

@ -148,30 +148,20 @@ using namespace metal;
#define HC_INLINE inline static
#endif
#if defined IS_AMD && defined IS_GPU
#define DECLSPEC HC_INLINE
#elif defined IS_CUDA
#define DECLSPEC __device__
#elif defined IS_HIP
#define DECLSPEC __device__
#else
#define DECLSPEC
#endif
#if defined FIXED_LOCAL_SIZE
#define HC_ATTR_SEQ FIXED_THREAD_COUNT((FIXED_LOCAL_SIZE))
#else
#if defined IS_AMD && defined IS_GPU
#define HC_ATTR_SEQ
#define DECLSPEC HC_INLINE
#elif defined IS_HIP
#define HC_ATTR_SEQ __launch_bounds__((MAX_THREADS_PER_BLOCK), 0)
#define DECLSPEC __device__ HC_INLINE
#elif defined IS_CUDA
#define HC_ATTR_SEQ
#define DECLSPEC
#else
#define HC_ATTR_SEQ
#define DECLSPEC
#endif
#endif
#if defined IS_AMD && defined IS_GPU
#define DECLSPEC HC_INLINE
#elif defined IS_HIP
#define DECLSPEC __device__ HC_INLINE
#else
#define DECLSPEC
#endif
/**

View File

@ -21,7 +21,7 @@ typedef struct
#define SCRYPT_TMP_ELEM 1
#endif
uint4 P[SCRYPT_TMP_ELEM];
u32 P[SCRYPT_TMP_ELEM];
} scrypt_tmp_t;
@ -31,9 +31,13 @@ KERNEL_FQ void HC_ATTR_SEQ m08900_init (KERN_ATTR_TMPS (scrypt_tmp_t))
if (gid >= GID_CNT) return;
scrypt_pbkdf2 (pws[gid].i, pws[gid].pw_len, salt_bufs[SALT_POS_HOST].salt_buf, salt_bufs[SALT_POS_HOST].salt_len, tmps[gid].P, SCRYPT_CNT * 4);
u32 out[SCRYPT_CNT4];
scrypt_blockmix_in (tmps[gid].P, SCRYPT_CNT * 4);
scrypt_pbkdf2_gg (pws[gid].i, pws[gid].pw_len, salt_bufs[SALT_POS_HOST].salt_buf, salt_bufs[SALT_POS_HOST].salt_len, out, SCRYPT_SZ);
scrypt_blockmix_in (out, SCRYPT_SZ);
for (u32 i = 0; i < SCRYPT_CNT4; i++) tmps[gid].P[i] = out[i];
}
KERNEL_FQ void HC_ATTR_SEQ m08900_loop_prepare (KERN_ATTR_TMPS (scrypt_tmp_t))
@ -45,27 +49,15 @@ KERNEL_FQ void HC_ATTR_SEQ m08900_loop_prepare (KERN_ATTR_TMPS (scrypt_tmp_t))
if (gid >= GID_CNT) return;
GLOBAL_AS uint4 *d_scrypt0_buf = (GLOBAL_AS uint4 *) d_extra0_buf;
GLOBAL_AS uint4 *d_scrypt1_buf = (GLOBAL_AS uint4 *) d_extra1_buf;
GLOBAL_AS uint4 *d_scrypt2_buf = (GLOBAL_AS uint4 *) d_extra2_buf;
GLOBAL_AS uint4 *d_scrypt3_buf = (GLOBAL_AS uint4 *) d_extra3_buf;
u32 X[STATE_CNT4];
#ifdef IS_HIP
LOCAL_VK uint4 X_s[MAX_THREADS_PER_BLOCK][STATE_CNT4];
LOCAL_AS uint4 *X = X_s[lid];
#else
uint4 X[STATE_CNT4];
#endif
GLOBAL_AS u32 *P = tmps[gid].P + (SALT_REPEAT * STATE_CNT4);
const u32 P_offset = SALT_REPEAT * STATE_CNT4;
for (u32 z = 0; z < STATE_CNT4; z++) X[z] = P[z];
GLOBAL_AS uint4 *P = tmps[gid].P + P_offset;
scrypt_smix_init (X, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, gid, lid, lsz, bid);
for (int z = 0; z < STATE_CNT4; z++) X[z] = P[z];
scrypt_smix_init (X, d_scrypt0_buf, d_scrypt1_buf, d_scrypt2_buf, d_scrypt3_buf, gid, lid, lsz, bid);
for (int z = 0; z < STATE_CNT4; z++) P[z] = X[z];
for (u32 z = 0; z < STATE_CNT4; z++) P[z] = X[z];
}
KERNEL_FQ void HC_ATTR_SEQ m08900_loop (KERN_ATTR_TMPS (scrypt_tmp_t))
@ -77,29 +69,16 @@ KERNEL_FQ void HC_ATTR_SEQ m08900_loop (KERN_ATTR_TMPS (scrypt_tmp_t))
if (gid >= GID_CNT) return;
GLOBAL_AS uint4 *d_scrypt0_buf = (GLOBAL_AS uint4 *) d_extra0_buf;
GLOBAL_AS uint4 *d_scrypt1_buf = (GLOBAL_AS uint4 *) d_extra1_buf;
GLOBAL_AS uint4 *d_scrypt2_buf = (GLOBAL_AS uint4 *) d_extra2_buf;
GLOBAL_AS uint4 *d_scrypt3_buf = (GLOBAL_AS uint4 *) d_extra3_buf;
u32 X[STATE_CNT4];
u32 T[STATE_CNT4];
uint4 X[STATE_CNT4];
GLOBAL_AS u32 *P = tmps[gid].P + (SALT_REPEAT * STATE_CNT4);
#ifdef IS_HIP
LOCAL_VK uint4 T_s[MAX_THREADS_PER_BLOCK][STATE_CNT4];
LOCAL_AS uint4 *T = T_s[lid];
#else
uint4 T[STATE_CNT4];
#endif
for (u32 z = 0; z < STATE_CNT4; z++) X[z] = P[z];
const u32 P_offset = SALT_REPEAT * STATE_CNT4;
scrypt_smix_loop (X, T, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, gid, lid, lsz, bid);
GLOBAL_AS uint4 *P = tmps[gid].P + P_offset;
for (int z = 0; z < STATE_CNT4; z++) X[z] = P[z];
scrypt_smix_loop (X, T, d_scrypt0_buf, d_scrypt1_buf, d_scrypt2_buf, d_scrypt3_buf, gid, lid, lsz, bid);
for (int z = 0; z < STATE_CNT4; z++) P[z] = X[z];
for (u32 z = 0; z < STATE_CNT4; z++) P[z] = X[z];
}
KERNEL_FQ void HC_ATTR_SEQ m08900_comp (KERN_ATTR_TMPS (scrypt_tmp_t))
@ -108,14 +87,20 @@ KERNEL_FQ void HC_ATTR_SEQ m08900_comp (KERN_ATTR_TMPS (scrypt_tmp_t))
if (gid >= GID_CNT) return;
scrypt_blockmix_out (tmps[gid].P, SCRYPT_CNT * 4);
u32 x[SCRYPT_CNT4];
scrypt_pbkdf2 (pws[gid].i, pws[gid].pw_len, (GLOBAL_AS const u32 *) tmps[gid].P, SCRYPT_CNT * 4, tmps[gid].P, 16);
for (u32 i = 0; i < SCRYPT_CNT4; i++) x[i] = tmps[gid].P[i];
const u32 r0 = tmps[gid].P[0].x;
const u32 r1 = tmps[gid].P[0].y;
const u32 r2 = tmps[gid].P[0].z;
const u32 r3 = tmps[gid].P[0].w;
scrypt_blockmix_out (x, SCRYPT_SZ);
u32 out[4];
scrypt_pbkdf2_gp (pws[gid].i, pws[gid].pw_len, x, SCRYPT_SZ, out, 16);
const u32 r0 = out[0];
const u32 r1 = out[1];
const u32 r2 = out[2];
const u32 r3 = out[3];
#define il_pos 0

View File

@ -21,7 +21,7 @@ typedef struct
#define SCRYPT_TMP_ELEM 1
#endif
uint4 P[SCRYPT_TMP_ELEM];
u32 P[SCRYPT_TMP_ELEM];
} scrypt_tmp_t;
@ -167,9 +167,13 @@ KERNEL_FQ void HC_ATTR_SEQ m15700_init (KERN_ATTR_TMPS_ESALT (scrypt_tmp_t, ethe
if (gid >= GID_CNT) return;
scrypt_pbkdf2 (pws[gid].i, pws[gid].pw_len, salt_bufs[SALT_POS_HOST].salt_buf, salt_bufs[SALT_POS_HOST].salt_len, tmps[gid].P, SCRYPT_CNT * 4);
u32 out[SCRYPT_CNT4];
scrypt_blockmix_in (tmps[gid].P, SCRYPT_CNT * 4);
scrypt_pbkdf2_gg (pws[gid].i, pws[gid].pw_len, salt_bufs[SALT_POS_HOST].salt_buf, salt_bufs[SALT_POS_HOST].salt_len, out, SCRYPT_SZ);
scrypt_blockmix_in (out, SCRYPT_SZ);
for (u32 i = 0; i < SCRYPT_CNT4; i++) tmps[gid].P[i] = out[i];
}
KERNEL_FQ void HC_ATTR_SEQ m15700_loop_prepare (KERN_ATTR_TMPS (scrypt_tmp_t))
@ -181,27 +185,15 @@ KERNEL_FQ void HC_ATTR_SEQ m15700_loop_prepare (KERN_ATTR_TMPS (scrypt_tmp_t))
if (gid >= GID_CNT) return;
GLOBAL_AS uint4 *d_scrypt0_buf = (GLOBAL_AS uint4 *) d_extra0_buf;
GLOBAL_AS uint4 *d_scrypt1_buf = (GLOBAL_AS uint4 *) d_extra1_buf;
GLOBAL_AS uint4 *d_scrypt2_buf = (GLOBAL_AS uint4 *) d_extra2_buf;
GLOBAL_AS uint4 *d_scrypt3_buf = (GLOBAL_AS uint4 *) d_extra3_buf;
u32 X[STATE_CNT4];
#ifdef IS_HIP
LOCAL_VK uint4 X_s[MAX_THREADS_PER_BLOCK][STATE_CNT4];
LOCAL_AS uint4 *X = X_s[lid];
#else
uint4 X[STATE_CNT4];
#endif
GLOBAL_AS u32 *P = tmps[gid].P + (SALT_REPEAT * STATE_CNT4);
const u32 P_offset = SALT_REPEAT * STATE_CNT4;
for (u32 z = 0; z < STATE_CNT4; z++) X[z] = P[z];
GLOBAL_AS uint4 *P = tmps[gid].P + P_offset;
scrypt_smix_init (X, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, gid, lid, lsz, bid);
for (int z = 0; z < STATE_CNT4; z++) X[z] = P[z];
scrypt_smix_init (X, d_scrypt0_buf, d_scrypt1_buf, d_scrypt2_buf, d_scrypt3_buf, gid, lid, lsz, bid);
for (int z = 0; z < STATE_CNT4; z++) P[z] = X[z];
for (u32 z = 0; z < STATE_CNT4; z++) P[z] = X[z];
}
KERNEL_FQ void HC_ATTR_SEQ m15700_loop (KERN_ATTR_TMPS (scrypt_tmp_t))
@ -213,29 +205,16 @@ KERNEL_FQ void HC_ATTR_SEQ m15700_loop (KERN_ATTR_TMPS (scrypt_tmp_t))
if (gid >= GID_CNT) return;
GLOBAL_AS uint4 *d_scrypt0_buf = (GLOBAL_AS uint4 *) d_extra0_buf;
GLOBAL_AS uint4 *d_scrypt1_buf = (GLOBAL_AS uint4 *) d_extra1_buf;
GLOBAL_AS uint4 *d_scrypt2_buf = (GLOBAL_AS uint4 *) d_extra2_buf;
GLOBAL_AS uint4 *d_scrypt3_buf = (GLOBAL_AS uint4 *) d_extra3_buf;
u32 X[STATE_CNT4];
u32 T[STATE_CNT4];
uint4 X[STATE_CNT4];
GLOBAL_AS u32 *P = tmps[gid].P + (SALT_REPEAT * STATE_CNT4);
#ifdef IS_HIP
LOCAL_VK uint4 T_s[MAX_THREADS_PER_BLOCK][STATE_CNT4];
LOCAL_AS uint4 *T = T_s[lid];
#else
uint4 T[STATE_CNT4];
#endif
for (u32 z = 0; z < STATE_CNT4; z++) X[z] = P[z];
const u32 P_offset = SALT_REPEAT * STATE_CNT4;
scrypt_smix_loop (X, T, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, gid, lid, lsz, bid);
GLOBAL_AS uint4 *P = tmps[gid].P + P_offset;
for (int z = 0; z < STATE_CNT4; z++) X[z] = P[z];
scrypt_smix_loop (X, T, d_scrypt0_buf, d_scrypt1_buf, d_scrypt2_buf, d_scrypt3_buf, gid, lid, lsz, bid);
for (int z = 0; z < STATE_CNT4; z++) P[z] = X[z];
for (u32 z = 0; z < STATE_CNT4; z++) P[z] = X[z];
}
KERNEL_FQ void HC_ATTR_SEQ m15700_comp (KERN_ATTR_TMPS_ESALT (scrypt_tmp_t, ethereum_scrypt_t))
@ -244,9 +223,15 @@ KERNEL_FQ void HC_ATTR_SEQ m15700_comp (KERN_ATTR_TMPS_ESALT (scrypt_tmp_t, ethe
if (gid >= GID_CNT) return;
scrypt_blockmix_out (tmps[gid].P, SCRYPT_CNT * 4);
u32 x[SCRYPT_CNT4];
scrypt_pbkdf2 (pws[gid].i, pws[gid].pw_len, (GLOBAL_AS const u32 *) tmps[gid].P, SCRYPT_CNT * 4, tmps[gid].P, 32);
for (u32 i = 0; i < SCRYPT_CNT4; i++) x[i] = tmps[gid].P[i];
scrypt_blockmix_out (x, SCRYPT_SZ);
u32 out[8];
scrypt_pbkdf2_gp (pws[gid].i, pws[gid].pw_len, x, SCRYPT_SZ, out, 32);
/**
* keccak
@ -265,10 +250,10 @@ KERNEL_FQ void HC_ATTR_SEQ m15700_comp (KERN_ATTR_TMPS_ESALT (scrypt_tmp_t, ethe
u32 key[4];
key[0] = tmps[gid].P[1].x;
key[1] = tmps[gid].P[1].y;
key[2] = tmps[gid].P[1].z;
key[3] = tmps[gid].P[1].w;
key[0] = out[4];
key[1] = out[5];
key[2] = out[6];
key[3] = out[7];
u64 st[25];

View File

@ -22,7 +22,7 @@ typedef struct
#define SCRYPT_TMP_ELEM 1
#endif
uint4 P[SCRYPT_TMP_ELEM];
u32 P[SCRYPT_TMP_ELEM];
} scrypt_tmp_t;
@ -96,23 +96,18 @@ KERNEL_FQ void HC_ATTR_SEQ m22700_init (KERN_ATTR_TMPS (scrypt_tmp_t))
| ((w[j] << 8) & 0xff00ff00);
}
sha256_hmac_ctx_t sha256_hmac_ctx;
u32 s[16] = { 0 };
sha256_hmac_init_swap (&sha256_hmac_ctx, w, w_len);
s[0] = hc_swap32_S (MULTIBIT_S0);
s[1] = hc_swap32_S (MULTIBIT_S1);
u32 x0[4] = { 0 };
u32 x1[4] = { 0 };
u32 x2[4] = { 0 };
u32 x3[4] = { 0 };
u32 out[SCRYPT_CNT4];
x0[0] = MULTIBIT_S0;
x0[1] = MULTIBIT_S1;
scrypt_pbkdf2_pp (w, w_len, s, 8, out, SCRYPT_SZ);
sha256_hmac_update_64 (&sha256_hmac_ctx, x0, x1, x2, x3, 8);
scrypt_blockmix_in (out, SCRYPT_SZ);
scrypt_pbkdf2_body (&sha256_hmac_ctx, tmps[gid].P, SCRYPT_CNT * 4);
scrypt_blockmix_in (tmps[gid].P, SCRYPT_CNT * 4);
for (u32 i = 0; i < SCRYPT_CNT4; i++) tmps[gid].P[i] = out[i];
}
KERNEL_FQ void HC_ATTR_SEQ m22700_loop_prepare (KERN_ATTR_TMPS (scrypt_tmp_t))
@ -124,27 +119,15 @@ KERNEL_FQ void HC_ATTR_SEQ m22700_loop_prepare (KERN_ATTR_TMPS (scrypt_tmp_t))
if (gid >= GID_CNT) return;
GLOBAL_AS uint4 *d_scrypt0_buf = (GLOBAL_AS uint4 *) d_extra0_buf;
GLOBAL_AS uint4 *d_scrypt1_buf = (GLOBAL_AS uint4 *) d_extra1_buf;
GLOBAL_AS uint4 *d_scrypt2_buf = (GLOBAL_AS uint4 *) d_extra2_buf;
GLOBAL_AS uint4 *d_scrypt3_buf = (GLOBAL_AS uint4 *) d_extra3_buf;
u32 X[STATE_CNT4];
#ifdef IS_HIP
LOCAL_VK uint4 X_s[MAX_THREADS_PER_BLOCK][STATE_CNT4];
LOCAL_AS uint4 *X = X_s[lid];
#else
uint4 X[STATE_CNT4];
#endif
GLOBAL_AS u32 *P = tmps[gid].P + (SALT_REPEAT * STATE_CNT4);
const u32 P_offset = SALT_REPEAT * STATE_CNT4;
for (u32 z = 0; z < STATE_CNT4; z++) X[z] = P[z];
GLOBAL_AS uint4 *P = tmps[gid].P + P_offset;
scrypt_smix_init (X, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, gid, lid, lsz, bid);
for (int z = 0; z < STATE_CNT4; z++) X[z] = P[z];
scrypt_smix_init (X, d_scrypt0_buf, d_scrypt1_buf, d_scrypt2_buf, d_scrypt3_buf, gid, lid, lsz, bid);
for (int z = 0; z < STATE_CNT4; z++) P[z] = X[z];
for (u32 z = 0; z < STATE_CNT4; z++) P[z] = X[z];
}
KERNEL_FQ void HC_ATTR_SEQ m22700_loop (KERN_ATTR_TMPS (scrypt_tmp_t))
@ -156,29 +139,16 @@ KERNEL_FQ void HC_ATTR_SEQ m22700_loop (KERN_ATTR_TMPS (scrypt_tmp_t))
if (gid >= GID_CNT) return;
GLOBAL_AS uint4 *d_scrypt0_buf = (GLOBAL_AS uint4 *) d_extra0_buf;
GLOBAL_AS uint4 *d_scrypt1_buf = (GLOBAL_AS uint4 *) d_extra1_buf;
GLOBAL_AS uint4 *d_scrypt2_buf = (GLOBAL_AS uint4 *) d_extra2_buf;
GLOBAL_AS uint4 *d_scrypt3_buf = (GLOBAL_AS uint4 *) d_extra3_buf;
u32 X[STATE_CNT4];
u32 T[STATE_CNT4];
uint4 X[STATE_CNT4];
GLOBAL_AS u32 *P = tmps[gid].P + (SALT_REPEAT * STATE_CNT4);
#ifdef IS_HIP
LOCAL_VK uint4 T_s[MAX_THREADS_PER_BLOCK][STATE_CNT4];
LOCAL_AS uint4 *T = T_s[lid];
#else
uint4 T[STATE_CNT4];
#endif
for (u32 z = 0; z < STATE_CNT4; z++) X[z] = P[z];
const u32 P_offset = SALT_REPEAT * STATE_CNT4;
scrypt_smix_loop (X, T, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, gid, lid, lsz, bid);
GLOBAL_AS uint4 *P = tmps[gid].P + P_offset;
for (int z = 0; z < STATE_CNT4; z++) X[z] = P[z];
scrypt_smix_loop (X, T, d_scrypt0_buf, d_scrypt1_buf, d_scrypt2_buf, d_scrypt3_buf, gid, lid, lsz, bid);
for (int z = 0; z < STATE_CNT4; z++) P[z] = X[z];
for (u32 z = 0; z < STATE_CNT4; z++) P[z] = X[z];
}
KERNEL_FQ void HC_ATTR_SEQ m22700_comp (KERN_ATTR_TMPS (scrypt_tmp_t))
@ -261,28 +231,29 @@ KERNEL_FQ void HC_ATTR_SEQ m22700_comp (KERN_ATTR_TMPS (scrypt_tmp_t))
| ((w[j] << 8) & 0xff00ff00);
}
scrypt_blockmix_out (tmps[gid].P, SCRYPT_CNT * 4);
sha256_hmac_ctx_t ctx;
u32 x[SCRYPT_CNT4];
sha256_hmac_init_swap (&ctx, w, w_len);
for (u32 i = 0; i < SCRYPT_CNT4; i++) x[i] = tmps[gid].P[i];
sha256_hmac_update_global_swap (&ctx, (GLOBAL_AS const u32 *) tmps[gid].P, SCRYPT_CNT * 4);
scrypt_blockmix_out (x, SCRYPT_SZ);
scrypt_pbkdf2_body (&ctx, tmps[gid].P, 16);
u32 out[8];
scrypt_pbkdf2_pp (w, w_len, x, SCRYPT_SZ, out, 32);
// AES256-CBC decrypt with IV from salt buffer (dynamic, alternative 1):
u32 key[8];
key[0] = tmps[gid].P[0].x;
key[1] = tmps[gid].P[0].y;
key[2] = tmps[gid].P[0].z;
key[3] = tmps[gid].P[0].w;
key[4] = tmps[gid].P[1].x;
key[5] = tmps[gid].P[1].y;
key[6] = tmps[gid].P[1].z;
key[7] = tmps[gid].P[1].w;
key[0] = out[0];
key[1] = out[1];
key[2] = out[2];
key[3] = out[3];
key[4] = out[4];
key[5] = out[5];
key[6] = out[6];
key[7] = out[7];
#define KEYLEN 60

View File

@ -22,7 +22,7 @@ typedef struct
#define SCRYPT_TMP_ELEM 1
#endif
uint4 P[SCRYPT_TMP_ELEM];
u32 P[SCRYPT_TMP_ELEM];
} scrypt_tmp_t;
@ -171,17 +171,17 @@ KERNEL_FQ void HC_ATTR_SEQ m24000_init (KERN_ATTR_TMPS_ESALT (scrypt_tmp_t, best
if (gid >= GID_CNT) return;
scrypt_pbkdf2 (pws[gid].i, pws[gid].pw_len, salt_bufs[SALT_POS_HOST].salt_buf, salt_bufs[SALT_POS_HOST].salt_len, tmps[gid].P, SCRYPT_CNT * 4);
u32 out[SCRYPT_CNT4];
scrypt_blockmix_in (tmps[gid].P, SCRYPT_CNT * 4);
scrypt_pbkdf2_gg (pws[gid].i, pws[gid].pw_len, salt_bufs[SALT_POS_HOST].salt_buf, salt_bufs[SALT_POS_HOST].salt_len, out, SCRYPT_SZ);
scrypt_blockmix_in (out, SCRYPT_SZ);
for (u32 i = 0; i < SCRYPT_CNT4; i++) tmps[gid].P[i] = out[i];
}
KERNEL_FQ void HC_ATTR_SEQ m24000_loop_prepare (KERN_ATTR_TMPS_ESALT (scrypt_tmp_t, bestcrypt_scrypt_t))
{
/**
* base
*/
const u64 gid = get_global_id (0);
const u64 lid = get_local_id (0);
const u64 lsz = get_local_size (0);
@ -189,24 +189,15 @@ KERNEL_FQ void HC_ATTR_SEQ m24000_loop_prepare (KERN_ATTR_TMPS_ESALT (scrypt_tmp
if (gid >= GID_CNT) return;
// SCRYPT part, init V
u32 X[STATE_CNT4];
GLOBAL_AS uint4 *d_scrypt0_buf = (GLOBAL_AS uint4 *) d_extra0_buf;
GLOBAL_AS uint4 *d_scrypt1_buf = (GLOBAL_AS uint4 *) d_extra1_buf;
GLOBAL_AS uint4 *d_scrypt2_buf = (GLOBAL_AS uint4 *) d_extra2_buf;
GLOBAL_AS uint4 *d_scrypt3_buf = (GLOBAL_AS uint4 *) d_extra3_buf;
GLOBAL_AS u32 *P = tmps[gid].P + (SALT_REPEAT * STATE_CNT4);
uint4 X[STATE_CNT4];
for (u32 z = 0; z < STATE_CNT4; z++) X[z] = P[z];
const u32 P_offset = SALT_REPEAT * STATE_CNT4;
scrypt_smix_init (X, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, gid, lid, lsz, bid);
GLOBAL_AS uint4 *P = tmps[gid].P + P_offset;
for (int z = 0; z < STATE_CNT4; z++) X[z] = P[z];
scrypt_smix_init (X, d_scrypt0_buf, d_scrypt1_buf, d_scrypt2_buf, d_scrypt3_buf, gid, lid, lsz, bid);
for (int z = 0; z < STATE_CNT4; z++) P[z] = X[z];
for (u32 z = 0; z < STATE_CNT4; z++) P[z] = X[z];
}
KERNEL_FQ void HC_ATTR_SEQ m24000_loop (KERN_ATTR_TMPS_ESALT (scrypt_tmp_t, bestcrypt_scrypt_t))
@ -218,29 +209,16 @@ KERNEL_FQ void HC_ATTR_SEQ m24000_loop (KERN_ATTR_TMPS_ESALT (scrypt_tmp_t, best
if (gid >= GID_CNT) return;
GLOBAL_AS uint4 *d_scrypt0_buf = (GLOBAL_AS uint4 *) d_extra0_buf;
GLOBAL_AS uint4 *d_scrypt1_buf = (GLOBAL_AS uint4 *) d_extra1_buf;
GLOBAL_AS uint4 *d_scrypt2_buf = (GLOBAL_AS uint4 *) d_extra2_buf;
GLOBAL_AS uint4 *d_scrypt3_buf = (GLOBAL_AS uint4 *) d_extra3_buf;
u32 X[STATE_CNT4];
u32 T[STATE_CNT4];
uint4 X[STATE_CNT4];
GLOBAL_AS u32 *P = tmps[gid].P + (SALT_REPEAT * STATE_CNT4);
#ifdef IS_HIP
LOCAL_VK uint4 T_s[MAX_THREADS_PER_BLOCK][STATE_CNT4];
LOCAL_AS uint4 *T = T_s[lid];
#else
uint4 T[STATE_CNT4];
#endif
for (u32 z = 0; z < STATE_CNT4; z++) X[z] = P[z];
const u32 P_offset = SALT_REPEAT * STATE_CNT4;
scrypt_smix_loop (X, T, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, gid, lid, lsz, bid);
GLOBAL_AS uint4 *P = tmps[gid].P + P_offset;
for (int z = 0; z < STATE_CNT4; z++) X[z] = P[z];
scrypt_smix_loop (X, T, d_scrypt0_buf, d_scrypt1_buf, d_scrypt2_buf, d_scrypt3_buf, gid, lid, lsz, bid);
for (int z = 0; z < STATE_CNT4; z++) P[z] = X[z];
for (u32 z = 0; z < STATE_CNT4; z++) P[z] = X[z];
}
KERNEL_FQ void HC_ATTR_SEQ m24000_comp (KERN_ATTR_TMPS_ESALT (scrypt_tmp_t, bestcrypt_scrypt_t))
@ -305,9 +283,15 @@ KERNEL_FQ void HC_ATTR_SEQ m24000_comp (KERN_ATTR_TMPS_ESALT (scrypt_tmp_t, best
if (gid >= GID_CNT) return;
scrypt_blockmix_out (tmps[gid].P, SCRYPT_CNT * 4);
u32 x[SCRYPT_CNT4];
scrypt_pbkdf2 (pws[gid].i, pws[gid].pw_len, (GLOBAL_AS const u32 *) tmps[gid].P, SCRYPT_CNT * 4, tmps[gid].P, 32);
for (u32 i = 0; i < SCRYPT_CNT4; i++) x[i] = tmps[gid].P[i];
scrypt_blockmix_out (x, SCRYPT_SZ);
u32 out[8];
scrypt_pbkdf2_gp (pws[gid].i, pws[gid].pw_len, x, SCRYPT_SZ, out, 32);
u32 version = esalt_bufs[DIGESTS_OFFSET_HOST].version;
@ -317,14 +301,14 @@ KERNEL_FQ void HC_ATTR_SEQ m24000_comp (KERN_ATTR_TMPS_ESALT (scrypt_tmp_t, best
u32 key[8];
key[0] = tmps[gid].P[0].x;
key[1] = tmps[gid].P[0].y;
key[2] = tmps[gid].P[0].z;
key[3] = tmps[gid].P[0].w;
key[4] = tmps[gid].P[1].x;
key[5] = tmps[gid].P[1].y;
key[6] = tmps[gid].P[1].z;
key[7] = tmps[gid].P[1].w;
key[0] = out[0];
key[1] = out[1];
key[2] = out[2];
key[3] = out[3];
key[4] = out[4];
key[5] = out[5];
key[6] = out[6];
key[7] = out[7];
if (version == 0x38) //0x38 is char for '8' which is the crypto type passed in position 3 of hash ( $08$ )
{

View File

@ -19,7 +19,7 @@ typedef struct
#define SCRYPT_TMP_ELEM 1
#endif
uint4 P[SCRYPT_TMP_ELEM];
u32 P[SCRYPT_TMP_ELEM];
} scrypt_tmp_t;
@ -46,23 +46,18 @@ KERNEL_FQ void HC_ATTR_SEQ m27700_init (KERN_ATTR_TMPS (scrypt_tmp_t))
| ((w[j] << 8) & 0xff00ff00);
}
sha256_hmac_ctx_t sha256_hmac_ctx;
u32 s[16] = { 0 };
sha256_hmac_init_swap (&sha256_hmac_ctx, w, w_len);
s[0] = hc_swap32_S (salt_bufs[SALT_POS_HOST].salt_buf[0]);
s[1] = hc_swap32_S (salt_bufs[SALT_POS_HOST].salt_buf[1]);
u32 x0[4] = { 0 };
u32 x1[4] = { 0 };
u32 x2[4] = { 0 };
u32 x3[4] = { 0 };
u32 out[SCRYPT_CNT4];
x0[0] = salt_bufs[SALT_POS_HOST].salt_buf[0];
x0[1] = salt_bufs[SALT_POS_HOST].salt_buf[1];
scrypt_pbkdf2_pp (w, w_len, s, 8, out, SCRYPT_SZ);
sha256_hmac_update_64 (&sha256_hmac_ctx, x0, x1, x2, x3, 8);
scrypt_blockmix_in (out, SCRYPT_SZ);
scrypt_pbkdf2_body (&sha256_hmac_ctx, tmps[gid].P, SCRYPT_CNT * 4);
scrypt_blockmix_in (tmps[gid].P, SCRYPT_CNT * 4);
for (u32 i = 0; i < SCRYPT_CNT4; i++) tmps[gid].P[i] = out[i];
}
KERNEL_FQ void HC_ATTR_SEQ m27700_loop_prepare (KERN_ATTR_TMPS (scrypt_tmp_t))
@ -74,27 +69,15 @@ KERNEL_FQ void HC_ATTR_SEQ m27700_loop_prepare (KERN_ATTR_TMPS (scrypt_tmp_t))
if (gid >= GID_CNT) return;
GLOBAL_AS uint4 *d_scrypt0_buf = (GLOBAL_AS uint4 *) d_extra0_buf;
GLOBAL_AS uint4 *d_scrypt1_buf = (GLOBAL_AS uint4 *) d_extra1_buf;
GLOBAL_AS uint4 *d_scrypt2_buf = (GLOBAL_AS uint4 *) d_extra2_buf;
GLOBAL_AS uint4 *d_scrypt3_buf = (GLOBAL_AS uint4 *) d_extra3_buf;
u32 X[STATE_CNT4];
#ifdef IS_HIP
LOCAL_VK uint4 X_s[MAX_THREADS_PER_BLOCK][STATE_CNT4];
LOCAL_AS uint4 *X = X_s[lid];
#else
uint4 X[STATE_CNT4];
#endif
GLOBAL_AS u32 *P = tmps[gid].P + (SALT_REPEAT * STATE_CNT4);
const u32 P_offset = SALT_REPEAT * STATE_CNT4;
for (u32 z = 0; z < STATE_CNT4; z++) X[z] = P[z];
GLOBAL_AS uint4 *P = tmps[gid].P + P_offset;
scrypt_smix_init (X, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, gid, lid, lsz, bid);
for (int z = 0; z < STATE_CNT4; z++) X[z] = P[z];
scrypt_smix_init (X, d_scrypt0_buf, d_scrypt1_buf, d_scrypt2_buf, d_scrypt3_buf, gid, lid, lsz, bid);
for (int z = 0; z < STATE_CNT4; z++) P[z] = X[z];
for (u32 z = 0; z < STATE_CNT4; z++) P[z] = X[z];
}
KERNEL_FQ void HC_ATTR_SEQ m27700_loop (KERN_ATTR_TMPS (scrypt_tmp_t))
@ -106,29 +89,16 @@ KERNEL_FQ void HC_ATTR_SEQ m27700_loop (KERN_ATTR_TMPS (scrypt_tmp_t))
if (gid >= GID_CNT) return;
GLOBAL_AS uint4 *d_scrypt0_buf = (GLOBAL_AS uint4 *) d_extra0_buf;
GLOBAL_AS uint4 *d_scrypt1_buf = (GLOBAL_AS uint4 *) d_extra1_buf;
GLOBAL_AS uint4 *d_scrypt2_buf = (GLOBAL_AS uint4 *) d_extra2_buf;
GLOBAL_AS uint4 *d_scrypt3_buf = (GLOBAL_AS uint4 *) d_extra3_buf;
u32 X[STATE_CNT4];
u32 T[STATE_CNT4];
uint4 X[STATE_CNT4];
GLOBAL_AS u32 *P = tmps[gid].P + (SALT_REPEAT * STATE_CNT4);
#ifdef IS_HIP
LOCAL_VK uint4 T_s[MAX_THREADS_PER_BLOCK][STATE_CNT4];
LOCAL_AS uint4 *T = T_s[lid];
#else
uint4 T[STATE_CNT4];
#endif
for (u32 z = 0; z < STATE_CNT4; z++) X[z] = P[z];
const u32 P_offset = SALT_REPEAT * STATE_CNT4;
scrypt_smix_loop (X, T, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, gid, lid, lsz, bid);
GLOBAL_AS uint4 *P = tmps[gid].P + P_offset;
for (int z = 0; z < STATE_CNT4; z++) X[z] = P[z];
scrypt_smix_loop (X, T, d_scrypt0_buf, d_scrypt1_buf, d_scrypt2_buf, d_scrypt3_buf, gid, lid, lsz, bid);
for (int z = 0; z < STATE_CNT4; z++) P[z] = X[z];
for (u32 z = 0; z < STATE_CNT4; z++) P[z] = X[z];
}
KERNEL_FQ void HC_ATTR_SEQ m27700_comp (KERN_ATTR_TMPS (scrypt_tmp_t))
@ -211,28 +181,29 @@ KERNEL_FQ void HC_ATTR_SEQ m27700_comp (KERN_ATTR_TMPS (scrypt_tmp_t))
| ((w[j] << 8) & 0xff00ff00);
}
scrypt_blockmix_out (tmps[gid].P, SCRYPT_CNT * 4);
sha256_hmac_ctx_t ctx;
u32 x[SCRYPT_CNT4];
sha256_hmac_init_swap (&ctx, w, w_len);
for (u32 i = 0; i < SCRYPT_CNT4; i++) x[i] = tmps[gid].P[i];
sha256_hmac_update_global_swap (&ctx, (GLOBAL_AS const u32 *) tmps[gid].P, SCRYPT_CNT * 4);
scrypt_blockmix_out (x, SCRYPT_SZ);
scrypt_pbkdf2_body (&ctx, tmps[gid].P, 16);
u32 out[8];
scrypt_pbkdf2_pp (w, w_len, x, SCRYPT_SZ, out, 32);
// AES256-CBC decrypt
u32 key[8];
key[0] = tmps[gid].P[0].x;
key[1] = tmps[gid].P[0].y;
key[2] = tmps[gid].P[0].z;
key[3] = tmps[gid].P[0].w;
key[4] = tmps[gid].P[1].x;
key[5] = tmps[gid].P[1].y;
key[6] = tmps[gid].P[1].z;
key[7] = tmps[gid].P[1].w;
key[0] = out[0];
key[1] = out[1];
key[2] = out[2];
key[3] = out[3];
key[4] = out[4];
key[5] = out[5];
key[6] = out[6];
key[7] = out[7];
#define KEYLEN 60

View File

@ -23,7 +23,7 @@ typedef struct exodus_tmp
#define SCRYPT_TMP_ELEM 1
#endif
uint4 P[SCRYPT_TMP_ELEM];
u32 P[SCRYPT_TMP_ELEM];
} exodus_tmp_t;
@ -41,9 +41,13 @@ KERNEL_FQ void HC_ATTR_SEQ m28200_init (KERN_ATTR_TMPS_ESALT (exodus_tmp_t, exod
if (gid >= GID_CNT) return;
scrypt_pbkdf2 (pws[gid].i, pws[gid].pw_len, salt_bufs[SALT_POS_HOST].salt_buf, salt_bufs[SALT_POS_HOST].salt_len, tmps[gid].P, SCRYPT_CNT * 4);
u32 out[SCRYPT_CNT4];
scrypt_blockmix_in (tmps[gid].P, SCRYPT_CNT * 4);
scrypt_pbkdf2_gg (pws[gid].i, pws[gid].pw_len, salt_bufs[SALT_POS_HOST].salt_buf, salt_bufs[SALT_POS_HOST].salt_len, out, SCRYPT_SZ);
scrypt_blockmix_in (out, SCRYPT_SZ);
for (u32 i = 0; i < SCRYPT_CNT4; i++) tmps[gid].P[i] = out[i];
}
KERNEL_FQ void HC_ATTR_SEQ m28200_loop_prepare (KERN_ATTR_TMPS_ESALT (exodus_tmp_t, exodus_t))
@ -55,27 +59,15 @@ KERNEL_FQ void HC_ATTR_SEQ m28200_loop_prepare (KERN_ATTR_TMPS_ESALT (exodus_tmp
if (gid >= GID_CNT) return;
GLOBAL_AS uint4 *d_scrypt0_buf = (GLOBAL_AS uint4 *) d_extra0_buf;
GLOBAL_AS uint4 *d_scrypt1_buf = (GLOBAL_AS uint4 *) d_extra1_buf;
GLOBAL_AS uint4 *d_scrypt2_buf = (GLOBAL_AS uint4 *) d_extra2_buf;
GLOBAL_AS uint4 *d_scrypt3_buf = (GLOBAL_AS uint4 *) d_extra3_buf;
u32 X[STATE_CNT4];
#ifdef IS_HIP
LOCAL_VK uint4 X_s[MAX_THREADS_PER_BLOCK][STATE_CNT4];
LOCAL_AS uint4 *X = X_s[lid];
#else
uint4 X[STATE_CNT4];
#endif
GLOBAL_AS u32 *P = tmps[gid].P + (SALT_REPEAT * STATE_CNT4);
const u32 P_offset = SALT_REPEAT * STATE_CNT4;
for (u32 z = 0; z < STATE_CNT4; z++) X[z] = P[z];
GLOBAL_AS uint4 *P = tmps[gid].P + P_offset;
scrypt_smix_init (X, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, gid, lid, lsz, bid);
for (int z = 0; z < STATE_CNT4; z++) X[z] = P[z];
scrypt_smix_init (X, d_scrypt0_buf, d_scrypt1_buf, d_scrypt2_buf, d_scrypt3_buf, gid, lid, lsz, bid);
for (int z = 0; z < STATE_CNT4; z++) P[z] = X[z];
for (u32 z = 0; z < STATE_CNT4; z++) P[z] = X[z];
}
KERNEL_FQ void HC_ATTR_SEQ m28200_loop (KERN_ATTR_TMPS_ESALT (exodus_tmp_t, exodus_t))
@ -87,29 +79,16 @@ KERNEL_FQ void HC_ATTR_SEQ m28200_loop (KERN_ATTR_TMPS_ESALT (exodus_tmp_t, exod
if (gid >= GID_CNT) return;
GLOBAL_AS uint4 *d_scrypt0_buf = (GLOBAL_AS uint4 *) d_extra0_buf;
GLOBAL_AS uint4 *d_scrypt1_buf = (GLOBAL_AS uint4 *) d_extra1_buf;
GLOBAL_AS uint4 *d_scrypt2_buf = (GLOBAL_AS uint4 *) d_extra2_buf;
GLOBAL_AS uint4 *d_scrypt3_buf = (GLOBAL_AS uint4 *) d_extra3_buf;
u32 X[STATE_CNT4];
u32 T[STATE_CNT4];
uint4 X[STATE_CNT4];
GLOBAL_AS u32 *P = tmps[gid].P + (SALT_REPEAT * STATE_CNT4);
#ifdef IS_HIP
LOCAL_VK uint4 T_s[MAX_THREADS_PER_BLOCK][STATE_CNT4];
LOCAL_AS uint4 *T = T_s[lid];
#else
uint4 T[STATE_CNT4];
#endif
for (u32 z = 0; z < STATE_CNT4; z++) X[z] = P[z];
const u32 P_offset = SALT_REPEAT * STATE_CNT4;
scrypt_smix_loop (X, T, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, gid, lid, lsz, bid);
GLOBAL_AS uint4 *P = tmps[gid].P + P_offset;
for (int z = 0; z < STATE_CNT4; z++) X[z] = P[z];
scrypt_smix_loop (X, T, d_scrypt0_buf, d_scrypt1_buf, d_scrypt2_buf, d_scrypt3_buf, gid, lid, lsz, bid);
for (int z = 0; z < STATE_CNT4; z++) P[z] = X[z];
for (u32 z = 0; z < STATE_CNT4; z++) P[z] = X[z];
}
KERNEL_FQ void HC_ATTR_SEQ m28200_comp (KERN_ATTR_TMPS_ESALT (exodus_tmp_t, exodus_t))
@ -171,22 +150,28 @@ KERNEL_FQ void HC_ATTR_SEQ m28200_comp (KERN_ATTR_TMPS_ESALT (exodus_tmp_t, exod
if (gid >= GID_CNT) return;
scrypt_blockmix_out (tmps[gid].P, SCRYPT_CNT * 4);
u32 x[SCRYPT_CNT4];
scrypt_pbkdf2 (pws[gid].i, pws[gid].pw_len, (GLOBAL_AS const u32 *) tmps[gid].P, SCRYPT_CNT * 4, tmps[gid].P, 32);
for (u32 i = 0; i < SCRYPT_CNT4; i++) x[i] = tmps[gid].P[i];
scrypt_blockmix_out (x, SCRYPT_SZ);
u32 out[8];
scrypt_pbkdf2_gp (pws[gid].i, pws[gid].pw_len, x, SCRYPT_SZ, out, 32);
// GCM stuff
u32 ukey[8];
ukey[0] = hc_swap32_S (tmps[gid].P[0].x);
ukey[1] = hc_swap32_S (tmps[gid].P[0].y);
ukey[2] = hc_swap32_S (tmps[gid].P[0].z);
ukey[3] = hc_swap32_S (tmps[gid].P[0].w);
ukey[4] = hc_swap32_S (tmps[gid].P[1].x);
ukey[5] = hc_swap32_S (tmps[gid].P[1].y);
ukey[6] = hc_swap32_S (tmps[gid].P[1].z);
ukey[7] = hc_swap32_S (tmps[gid].P[1].w);
ukey[0] = hc_swap32_S (out[0]);
ukey[1] = hc_swap32_S (out[1]);
ukey[2] = hc_swap32_S (out[2]);
ukey[3] = hc_swap32_S (out[3]);
ukey[4] = hc_swap32_S (out[4]);
ukey[5] = hc_swap32_S (out[5]);
ukey[6] = hc_swap32_S (out[6]);
ukey[7] = hc_swap32_S (out[7]);
u32 key[60] = { 0 };
u32 subKey[4] = { 0 };

View File

@ -19,7 +19,7 @@ typedef struct
#define SCRYPT_TMP_ELEM 1
#endif
uint4 P[SCRYPT_TMP_ELEM];
u32 P[SCRYPT_TMP_ELEM];
} scrypt_tmp_t;
@ -46,23 +46,18 @@ KERNEL_FQ void HC_ATTR_SEQ m29800_init (KERN_ATTR_TMPS (scrypt_tmp_t))
| ((w[j] << 8) & 0xff00ff00);
}
sha256_hmac_ctx_t sha256_hmac_ctx;
u32 s[16] = { 0 };
sha256_hmac_init_swap (&sha256_hmac_ctx, w, w_len);
s[0] = hc_swap32_S (salt_bufs[SALT_POS_HOST].salt_buf[0]);
s[1] = hc_swap32_S (salt_bufs[SALT_POS_HOST].salt_buf[1]);
u32 x0[4] = { 0 };
u32 x1[4] = { 0 };
u32 x2[4] = { 0 };
u32 x3[4] = { 0 };
u32 out[SCRYPT_CNT4];
x0[0] = salt_bufs[SALT_POS_HOST].salt_buf[0];
x0[1] = salt_bufs[SALT_POS_HOST].salt_buf[1];
scrypt_pbkdf2_pp (w, w_len, s, 8, out, SCRYPT_SZ);
sha256_hmac_update_64 (&sha256_hmac_ctx, x0, x1, x2, x3, 8);
scrypt_blockmix_in (out, SCRYPT_SZ);
scrypt_pbkdf2_body (&sha256_hmac_ctx, tmps[gid].P, SCRYPT_CNT * 4);
scrypt_blockmix_in (tmps[gid].P, SCRYPT_CNT * 4);
for (u32 i = 0; i < SCRYPT_CNT4; i++) tmps[gid].P[i] = out[i];
}
KERNEL_FQ void HC_ATTR_SEQ m29800_loop_prepare (KERN_ATTR_TMPS (scrypt_tmp_t))
@ -74,27 +69,15 @@ KERNEL_FQ void HC_ATTR_SEQ m29800_loop_prepare (KERN_ATTR_TMPS (scrypt_tmp_t))
if (gid >= GID_CNT) return;
GLOBAL_AS uint4 *d_scrypt0_buf = (GLOBAL_AS uint4 *) d_extra0_buf;
GLOBAL_AS uint4 *d_scrypt1_buf = (GLOBAL_AS uint4 *) d_extra1_buf;
GLOBAL_AS uint4 *d_scrypt2_buf = (GLOBAL_AS uint4 *) d_extra2_buf;
GLOBAL_AS uint4 *d_scrypt3_buf = (GLOBAL_AS uint4 *) d_extra3_buf;
u32 X[STATE_CNT4];
#ifdef IS_HIP
LOCAL_VK uint4 X_s[MAX_THREADS_PER_BLOCK][STATE_CNT4];
LOCAL_AS uint4 *X = X_s[lid];
#else
uint4 X[STATE_CNT4];
#endif
GLOBAL_AS u32 *P = tmps[gid].P + (SALT_REPEAT * STATE_CNT4);
const u32 P_offset = SALT_REPEAT * STATE_CNT4;
for (u32 z = 0; z < STATE_CNT4; z++) X[z] = P[z];
GLOBAL_AS uint4 *P = tmps[gid].P + P_offset;
scrypt_smix_init (X, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, gid, lid, lsz, bid);
for (int z = 0; z < STATE_CNT4; z++) X[z] = P[z];
scrypt_smix_init (X, d_scrypt0_buf, d_scrypt1_buf, d_scrypt2_buf, d_scrypt3_buf, gid, lid, lsz, bid);
for (int z = 0; z < STATE_CNT4; z++) P[z] = X[z];
for (u32 z = 0; z < STATE_CNT4; z++) P[z] = X[z];
}
KERNEL_FQ void HC_ATTR_SEQ m29800_loop (KERN_ATTR_TMPS (scrypt_tmp_t))
@ -106,29 +89,16 @@ KERNEL_FQ void HC_ATTR_SEQ m29800_loop (KERN_ATTR_TMPS (scrypt_tmp_t))
if (gid >= GID_CNT) return;
GLOBAL_AS uint4 *d_scrypt0_buf = (GLOBAL_AS uint4 *) d_extra0_buf;
GLOBAL_AS uint4 *d_scrypt1_buf = (GLOBAL_AS uint4 *) d_extra1_buf;
GLOBAL_AS uint4 *d_scrypt2_buf = (GLOBAL_AS uint4 *) d_extra2_buf;
GLOBAL_AS uint4 *d_scrypt3_buf = (GLOBAL_AS uint4 *) d_extra3_buf;
u32 X[STATE_CNT4];
u32 T[STATE_CNT4];
uint4 X[STATE_CNT4];
GLOBAL_AS u32 *P = tmps[gid].P + (SALT_REPEAT * STATE_CNT4);
#ifdef IS_HIP
LOCAL_VK uint4 T_s[MAX_THREADS_PER_BLOCK][STATE_CNT4];
LOCAL_AS uint4 *T = T_s[lid];
#else
uint4 T[STATE_CNT4];
#endif
for (u32 z = 0; z < STATE_CNT4; z++) X[z] = P[z];
const u32 P_offset = SALT_REPEAT * STATE_CNT4;
scrypt_smix_loop (X, T, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, gid, lid, lsz, bid);
GLOBAL_AS uint4 *P = tmps[gid].P + P_offset;
for (int z = 0; z < STATE_CNT4; z++) X[z] = P[z];
scrypt_smix_loop (X, T, d_scrypt0_buf, d_scrypt1_buf, d_scrypt2_buf, d_scrypt3_buf, gid, lid, lsz, bid);
for (int z = 0; z < STATE_CNT4; z++) P[z] = X[z];
for (u32 z = 0; z < STATE_CNT4; z++) P[z] = X[z];
}
KERNEL_FQ void HC_ATTR_SEQ m29800_comp (KERN_ATTR_TMPS (scrypt_tmp_t))
@ -211,28 +181,29 @@ KERNEL_FQ void HC_ATTR_SEQ m29800_comp (KERN_ATTR_TMPS (scrypt_tmp_t))
| ((w[j] << 8) & 0xff00ff00);
}
scrypt_blockmix_out (tmps[gid].P, SCRYPT_CNT * 4);
sha256_hmac_ctx_t ctx;
u32 x[SCRYPT_CNT4];
sha256_hmac_init_swap (&ctx, w, w_len);
for (u32 i = 0; i < SCRYPT_CNT4; i++) x[i] = tmps[gid].P[i];
sha256_hmac_update_global_swap (&ctx, (GLOBAL_AS const u32 *) tmps[gid].P, SCRYPT_CNT * 4);
scrypt_blockmix_out (x, SCRYPT_SZ);
scrypt_pbkdf2_body (&ctx, tmps[gid].P, 16);
u32 out[8];
scrypt_pbkdf2_pp (w, w_len, x, SCRYPT_SZ, out, 32);
// AES256-CBC decrypt
u32 key[8];
key[0] = tmps[gid].P[0].x;
key[1] = tmps[gid].P[0].y;
key[2] = tmps[gid].P[0].z;
key[3] = tmps[gid].P[0].w;
key[4] = tmps[gid].P[1].x;
key[5] = tmps[gid].P[1].y;
key[6] = tmps[gid].P[1].z;
key[7] = tmps[gid].P[1].w;
key[0] = out[0];
key[1] = out[1];
key[2] = out[2];
key[3] = out[3];
key[4] = out[4];
key[5] = out[5];
key[6] = out[6];
key[7] = out[7];
#define KEYLEN 60

View File

@ -117,4 +117,7 @@ char *file_to_buffer (const char *filename);
bool check_file_suffix (const char *file, const char *suffix);
bool remove_file_suffix (char *file, const char *suffix);
int suppress_stderr (void);
void restore_stderr (int saved_fd);
#endif // HC_SHARED_H

View File

@ -486,22 +486,13 @@ static bool opencl_test_instruction (hashcat_ctx_t *hashcat_ctx, cl_context cont
OCL_PTR *ocl = (OCL_PTR *) backend_ctx->ocl;
#ifndef DEBUG
const int fd_stderr = fileno (stderr);
const int stderr_bak = dup (fd_stderr);
#ifdef _WIN
const int tmp = open ("NUL", O_WRONLY);
#else
const int tmp = open ("/dev/null", O_WRONLY);
#endif
dup2 (tmp, fd_stderr);
close (tmp);
int saved_stderr = suppress_stderr ();
#endif
const int CL_rc = ocl->clBuildProgram (program, 1, &device, NULL, NULL, NULL);
#ifndef DEBUG
dup2 (stderr_bak, fd_stderr);
close (stderr_bak);
restore_stderr (saved_stderr);
#endif
if (CL_rc != CL_SUCCESS)
@ -8984,8 +8975,7 @@ static bool load_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_p
nvrtc_options[nvrtc_options_idx++] = "--std=c++14";
}
nvrtc_options[nvrtc_options_idx++] = "--restrict";
nvrtc_options[nvrtc_options_idx++] = "--device-as-default-execution-space";
//nvrtc_options[nvrtc_options_idx++] = "--restrict";
nvrtc_options[nvrtc_options_idx++] = "--gpu-architecture";
hc_asprintf (&nvrtc_options[nvrtc_options_idx++], "compute_%d", (device_param->sm_major * 10) + device_param->sm_minor);
@ -9243,6 +9233,7 @@ static bool load_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_p
hc_asprintf (&hiprtc_options[hiprtc_options_idx++], "-D MAX_THREADS_PER_BLOCK=%d", (user_options->kernel_threads_chgd == true) ? user_options->kernel_threads : device_param->kernel_threads_max);
hc_asprintf (&hiprtc_options[hiprtc_options_idx++], "--gpu-architecture=%s", device_param->gcnArchName);
hc_asprintf (&hiprtc_options[hiprtc_options_idx++], "--gpu-max-threads-per-block=%d", (user_options->kernel_threads_chgd == true) ? user_options->kernel_threads : device_param->kernel_threads_max);
// untested but it should work
#if defined (_WIN) || defined (__CYGWIN__) || defined (__MSYS__)
@ -10598,6 +10589,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
build_options_len += snprintf (build_options_buf + build_options_len, build_options_sz - build_options_len, "-D XM2S(x)=#x ");
build_options_len += snprintf (build_options_buf + build_options_len, build_options_sz - build_options_len, "-D M2S(x)=XM2S(x) ");
build_options_len += snprintf (build_options_buf + build_options_len, build_options_sz - build_options_len, "-D MAX_THREADS_PER_BLOCK=%d ", (user_options->kernel_threads_chgd == true) ? user_options->kernel_threads : device_param->kernel_threads_max);
#if defined (__APPLE__)
if (is_apple_silicon () == true)

View File

@ -195,9 +195,9 @@ typedef struct
} python_interpreter_t;
#if defined (_WIN) || defined (__CYGWIN__) || defined (__APPLE__)
static char *DEFAULT_SOURCE_FILENAME = "./Python/generic_hash_sp.py";
static char *DEFAULT_SOURCE_FILENAME = "Python/generic_hash_sp.py";
#else
static char *DEFAULT_SOURCE_FILENAME = "./Python/generic_hash_mp.py";
static char *DEFAULT_SOURCE_FILENAME = "Python/generic_hash_mp.py";
#endif
const char *extract_module_name (const char *path)
@ -227,78 +227,6 @@ const char *extract_module_name (const char *path)
return module_name;
}
char *load_source (const char *filename)
{
FILE *fp = fopen (filename, "r");
if (fp == NULL)
{
fprintf (stderr, "%s: %s\n", filename, strerror (errno));
return NULL;
}
fseek (fp, 0, SEEK_END);
const size_t size = ftell (fp);
fseek (fp, 0, SEEK_SET);
char *source = hcmalloc (size + 1);
if (fread (source, 1, size, fp) != size)
{
fprintf (stderr, "%s: %s\n", filename, strerror (errno));
hcfree (source);
return NULL;
}
source[size] = 0;
fclose (fp);
return source;
}
#if defined (_WIN)
#define DEVNULL "NUL"
#else
#define DEVNULL "/dev/null"
#endif
static int suppress_stderr (void)
{
int null_fd = open (DEVNULL, O_WRONLY);
if (null_fd < 0) return -1;
int saved_fd = dup (fileno (stderr));
if (saved_fd < 0)
{
close (null_fd);
return -1;
}
dup2 (null_fd, fileno (stderr));
close (null_fd);
return saved_fd;
}
static void restore_stderr (int saved_fd)
{
if (saved_fd < 0) return;
dup2 (saved_fd, fileno (stderr));
close (saved_fd);
}
static char *expand_pyenv_libpath (const char *prefix, const int maj, const int min)
{
char *out = NULL;
@ -793,9 +721,14 @@ void *platform_init (user_options_t *user_options)
python_interpreter->source_filename = (user_options->bridge_parameter1 == NULL) ? DEFAULT_SOURCE_FILENAME : user_options->bridge_parameter1;
char *source = load_source (python_interpreter->source_filename);
char *source = file_to_buffer (python_interpreter->source_filename);
if (source == NULL) return NULL;
if (source == NULL)
{
fprintf (stderr, "ERROR: %s: %s\n\n", python_interpreter->source_filename, strerror (errno));
return NULL;
}
PyObject *code = python->Py_CompileStringExFlags (source, python_interpreter->source_filename, Py_file_input, NULL, -1);

View File

@ -192,7 +192,7 @@ typedef struct
} python_interpreter_t;
static char *DEFAULT_SOURCE_FILENAME = "./Python/generic_hash_sp.py";
static char *DEFAULT_SOURCE_FILENAME = "Python/generic_hash_sp.py";
const char *extract_module_name (const char *path)
{
@ -221,78 +221,6 @@ const char *extract_module_name (const char *path)
return module_name;
}
char *load_source (const char *filename)
{
FILE *fp = fopen (filename, "r");
if (fp == NULL)
{
fprintf (stderr, "%s: %s\n", filename, strerror (errno));
return NULL;
}
fseek (fp, 0, SEEK_END);
const size_t size = ftell (fp);
fseek (fp, 0, SEEK_SET);
char *source = hcmalloc (size + 1);
if (fread (source, 1, size, fp) != size)
{
fprintf (stderr, "%s: %s\n", filename, strerror (errno));
hcfree (source);
return NULL;
}
source[size] = 0;
fclose (fp);
return source;
}
#if defined (_WIN)
#define DEVNULL "NUL"
#else
#define DEVNULL "/dev/null"
#endif
static int suppress_stderr (void)
{
int null_fd = open (DEVNULL, O_WRONLY);
if (null_fd < 0) return -1;
int saved_fd = dup (fileno (stderr));
if (saved_fd < 0)
{
close (null_fd);
return -1;
}
dup2 (null_fd, fileno (stderr));
close (null_fd);
return saved_fd;
}
static void restore_stderr (int saved_fd)
{
if (saved_fd < 0) return;
dup2 (saved_fd, fileno (stderr));
close (saved_fd);
}
static char *expand_pyenv_libpath (const char *prefix, const int maj, const int min)
{
char *out = NULL;
@ -843,7 +771,7 @@ bool thread_init (MAYBE_UNUSED void *platform_context, MAYBE_UNUSED hc_device_pa
python->Py_DecRef (path);
python->Py_DecRef (sys);
char *source = load_source (python_interpreter->source_filename);
char *source = file_to_buffer (python_interpreter->source_filename);
if (source == NULL) return NULL;
@ -1154,9 +1082,14 @@ const char *st_update_hash (MAYBE_UNUSED void *platform_context)
python->Py_DecRef (path);
python->Py_DecRef (sys);
char *source = load_source (python_interpreter->source_filename);
char *source = file_to_buffer (python_interpreter->source_filename);
if (source == NULL) return NULL;
if (source == NULL)
{
fprintf (stderr, "ERROR: %s: %s\n\n", python_interpreter->source_filename, strerror (errno));
return NULL;
}
PyObject *code = python->Py_CompileStringExFlags (source, python_interpreter->source_filename, Py_file_input, NULL, -1);
@ -1218,7 +1151,7 @@ const char *st_update_pass (MAYBE_UNUSED void *platform_context)
// this is ugly to load that entire thing just to get that one variable
char *source = load_source (python_interpreter->source_filename);
char *source = file_to_buffer (python_interpreter->source_filename);
if (source == NULL) return NULL;

View File

@ -23,8 +23,7 @@ static const char *HASH_NAME = "SNMPv3 HMAC-MD5-96/HMAC-SHA1-96";
static const u64 KERN_TYPE = 25000;
static const u32 OPTI_TYPE = OPTI_TYPE_ZERO_BYTE;
static const u64 OPTS_TYPE = OPTS_TYPE_STOCK_MODULE
| OPTS_TYPE_PT_GENERATE_LE
| OPTS_TYPE_MP_MULTI_DISABLE;
| OPTS_TYPE_PT_GENERATE_LE;
static const u32 SALT_TYPE = SALT_TYPE_EMBEDDED;
static const char *ST_PASS = "hashcat1";
static const char *ST_HASH = "$SNMPv3$0$45889431$30818f0201033011020409242fc0020300ffe304010102010304383036041180001f88808106d566db57fd600000000002011002020118040a6d61747269785f4d4435040c0000000000000000000000000400303d041180001f88808106d566db57fd60000000000400a226020411f319300201000201003018301606082b06010201010200060a2b06010401bf0803020a$80001f88808106d566db57fd6000000000$1b37c3ea872731f922959e90";
@ -118,6 +117,24 @@ u64 module_tmp_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED c
return tmp_size;
}
u32 module_kernel_accel_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
{
// due to the large tmps structure
const u32 kernel_accel_max = 256;
return kernel_accel_max;
}
u32 module_kernel_threads_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
{
// due to the large tmps structure
const u32 kernel_threads_max = 32;
return kernel_threads_max;
}
u32 module_kernel_loops_min (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
{
// we need to fix iteration count to guarantee the loop count is a multiple of 64
@ -337,11 +354,11 @@ void module_init (module_ctx_t *module_ctx)
module_ctx->module_hook_size = MODULE_DEFAULT;
module_ctx->module_jit_build_options = MODULE_DEFAULT;
module_ctx->module_jit_cache_disable = MODULE_DEFAULT;
module_ctx->module_kernel_accel_max = MODULE_DEFAULT;
module_ctx->module_kernel_accel_max = module_kernel_accel_max;
module_ctx->module_kernel_accel_min = MODULE_DEFAULT;
module_ctx->module_kernel_loops_max = module_kernel_loops_max;
module_ctx->module_kernel_loops_min = module_kernel_loops_min;
module_ctx->module_kernel_threads_max = MODULE_DEFAULT;
module_ctx->module_kernel_threads_max = module_kernel_threads_max;
module_ctx->module_kernel_threads_min = MODULE_DEFAULT;
module_ctx->module_kern_type = module_kern_type;
module_ctx->module_kern_type_dynamic = MODULE_DEFAULT;

View File

@ -23,8 +23,7 @@ static const char *HASH_NAME = "SNMPv3 HMAC-MD5-96";
static const u64 KERN_TYPE = 25100;
static const u32 OPTI_TYPE = OPTI_TYPE_ZERO_BYTE;
static const u64 OPTS_TYPE = OPTS_TYPE_STOCK_MODULE
| OPTS_TYPE_PT_GENERATE_LE
| OPTS_TYPE_MP_MULTI_DISABLE;
| OPTS_TYPE_PT_GENERATE_LE;
static const u32 SALT_TYPE = SALT_TYPE_EMBEDDED;
static const char *ST_PASS = "hashcat1";
static const char *ST_HASH = "$SNMPv3$1$45889431$30818f0201033011020409242fc0020300ffe304010102010304383036041180001f88808106d566db57fd600000000002011002020118040a6d61747269785f4d4435040c0000000000000000000000000400303d041180001f88808106d566db57fd60000000000400a226020411f319300201000201003018301606082b06010201010200060a2b06010401bf0803020a$80001f88808106d566db57fd6000000000$1b37c3ea872731f922959e90";
@ -114,6 +113,24 @@ u64 module_tmp_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED c
return tmp_size;
}
u32 module_kernel_accel_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
{
// due to the large tmps structure
const u32 kernel_accel_max = 256;
return kernel_accel_max;
}
u32 module_kernel_threads_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
{
// due to the large tmps structure
const u32 kernel_threads_max = 32;
return kernel_threads_max;
}
u32 module_kernel_loops_min (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
{
// we need to fix iteration count to guarantee the loop count is a multiple of 64
@ -318,11 +335,11 @@ void module_init (module_ctx_t *module_ctx)
module_ctx->module_hook_size = MODULE_DEFAULT;
module_ctx->module_jit_build_options = MODULE_DEFAULT;
module_ctx->module_jit_cache_disable = MODULE_DEFAULT;
module_ctx->module_kernel_accel_max = MODULE_DEFAULT;
module_ctx->module_kernel_accel_max = module_kernel_accel_max;
module_ctx->module_kernel_accel_min = MODULE_DEFAULT;
module_ctx->module_kernel_loops_max = module_kernel_loops_max;
module_ctx->module_kernel_loops_min = module_kernel_loops_min;
module_ctx->module_kernel_threads_max = MODULE_DEFAULT;
module_ctx->module_kernel_threads_max = module_kernel_threads_max;
module_ctx->module_kernel_threads_min = MODULE_DEFAULT;
module_ctx->module_kern_type = module_kern_type;
module_ctx->module_kern_type_dynamic = MODULE_DEFAULT;

View File

@ -23,8 +23,7 @@ static const char *HASH_NAME = "SNMPv3 HMAC-SHA1-96";
static const u64 KERN_TYPE = 25200;
static const u32 OPTI_TYPE = OPTI_TYPE_ZERO_BYTE;
static const u64 OPTS_TYPE = OPTS_TYPE_STOCK_MODULE
| OPTS_TYPE_PT_GENERATE_LE
| OPTS_TYPE_MP_MULTI_DISABLE;
| OPTS_TYPE_PT_GENERATE_LE;
static const u32 SALT_TYPE = SALT_TYPE_EMBEDDED;
static const char *ST_PASS = "hashcat1";
static const char *ST_HASH = "$SNMPv3$2$45889431$30818f02010330110204371780f3020300ffe304010102010304383036041180001f88808106d566db57fd600000000002011002020118040a6d61747269785f534841040c0000000000000000000000000400303d041180001f88808106d566db57fd60000000000400a2260204073557d50201000201003018301606082b06010201010200060a2b06010401bf0803020a$80001f88808106d566db57fd6000000000$81f14f1930589f26f6755f6b";
@ -114,6 +113,24 @@ u64 module_tmp_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED c
return tmp_size;
}
u32 module_kernel_accel_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
{
// due to the large tmps structure
const u32 kernel_accel_max = 256;
return kernel_accel_max;
}
u32 module_kernel_threads_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
{
// due to the large tmps structure
const u32 kernel_threads_max = 32;
return kernel_threads_max;
}
u32 module_kernel_loops_min (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
{
// we need to fix iteration count to guarantee the loop count is a multiple of 64
@ -329,11 +346,11 @@ void module_init (module_ctx_t *module_ctx)
module_ctx->module_hook_size = MODULE_DEFAULT;
module_ctx->module_jit_build_options = MODULE_DEFAULT;
module_ctx->module_jit_cache_disable = MODULE_DEFAULT;
module_ctx->module_kernel_accel_max = MODULE_DEFAULT;
module_ctx->module_kernel_accel_max = module_kernel_accel_max;
module_ctx->module_kernel_accel_min = MODULE_DEFAULT;
module_ctx->module_kernel_loops_max = module_kernel_loops_max;
module_ctx->module_kernel_loops_min = module_kernel_loops_min;
module_ctx->module_kernel_threads_max = MODULE_DEFAULT;
module_ctx->module_kernel_threads_max = module_kernel_threads_max;
module_ctx->module_kernel_threads_min = MODULE_DEFAULT;
module_ctx->module_kern_type = module_kern_type;
module_ctx->module_kern_type_dynamic = MODULE_DEFAULT;

View File

@ -115,6 +115,24 @@ u64 module_tmp_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED c
return tmp_size;
}
u32 module_kernel_accel_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
{
// due to the large tmps structure
const u32 kernel_accel_max = 256;
return kernel_accel_max;
}
u32 module_kernel_threads_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
{
// due to the large tmps structure
const u32 kernel_threads_max = 32;
return kernel_threads_max;
}
u32 module_kernel_loops_min (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
{
// we need to fix iteration count to guarantee the loop count is a multiple of 64
@ -332,11 +350,11 @@ void module_init (module_ctx_t *module_ctx)
module_ctx->module_hook_size = MODULE_DEFAULT;
module_ctx->module_jit_build_options = MODULE_DEFAULT;
module_ctx->module_jit_cache_disable = MODULE_DEFAULT;
module_ctx->module_kernel_accel_max = MODULE_DEFAULT;
module_ctx->module_kernel_accel_max = module_kernel_accel_max;
module_ctx->module_kernel_accel_min = MODULE_DEFAULT;
module_ctx->module_kernel_loops_max = module_kernel_loops_max;
module_ctx->module_kernel_loops_min = module_kernel_loops_min;
module_ctx->module_kernel_threads_max = MODULE_DEFAULT;
module_ctx->module_kernel_threads_max = module_kernel_threads_max;
module_ctx->module_kernel_threads_min = MODULE_DEFAULT;
module_ctx->module_kern_type = module_kern_type;
module_ctx->module_kern_type_dynamic = MODULE_DEFAULT;

View File

@ -115,6 +115,24 @@ u64 module_tmp_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED c
return tmp_size;
}
u32 module_kernel_accel_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
{
// due to the large tmps structure
const u32 kernel_accel_max = 256;
return kernel_accel_max;
}
u32 module_kernel_threads_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
{
// due to the large tmps structure
const u32 kernel_threads_max = 32;
return kernel_threads_max;
}
u32 module_kernel_loops_min (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
{
// we need to fix iteration count to guarantee the loop count is a multiple of 64
@ -340,11 +358,11 @@ void module_init (module_ctx_t *module_ctx)
module_ctx->module_hook_size = MODULE_DEFAULT;
module_ctx->module_jit_build_options = MODULE_DEFAULT;
module_ctx->module_jit_cache_disable = MODULE_DEFAULT;
module_ctx->module_kernel_accel_max = MODULE_DEFAULT;
module_ctx->module_kernel_accel_max = module_kernel_accel_max;
module_ctx->module_kernel_accel_min = MODULE_DEFAULT;
module_ctx->module_kernel_loops_max = module_kernel_loops_max;
module_ctx->module_kernel_loops_min = module_kernel_loops_min;
module_ctx->module_kernel_threads_max = MODULE_DEFAULT;
module_ctx->module_kernel_threads_max = module_kernel_threads_max;
module_ctx->module_kernel_threads_min = MODULE_DEFAULT;
module_ctx->module_kern_type = module_kern_type;
module_ctx->module_kern_type_dynamic = MODULE_DEFAULT;

View File

@ -149,6 +149,24 @@ u64 module_tmp_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED c
return tmp_size;
}
u32 module_kernel_accel_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
{
// due to the large tmps structure
const u32 kernel_accel_max = 256;
return kernel_accel_max;
}
u32 module_kernel_threads_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
{
// due to the large tmps structure
const u32 kernel_threads_max = 32;
return kernel_threads_max;
}
u32 module_kernel_loops_min (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
{
// we need to fix iteration count to guarantee the loop count is a multiple of SNMPV3_MAX_PW_LENGTH
@ -383,11 +401,11 @@ void module_init (module_ctx_t *module_ctx)
module_ctx->module_hook_size = MODULE_DEFAULT;
module_ctx->module_jit_build_options = module_jit_build_options;
module_ctx->module_jit_cache_disable = MODULE_DEFAULT;
module_ctx->module_kernel_accel_max = MODULE_DEFAULT;
module_ctx->module_kernel_accel_max = module_kernel_accel_max;
module_ctx->module_kernel_accel_min = MODULE_DEFAULT;
module_ctx->module_kernel_loops_max = module_kernel_loops_max;
module_ctx->module_kernel_loops_min = module_kernel_loops_min;
module_ctx->module_kernel_threads_max = MODULE_DEFAULT;
module_ctx->module_kernel_threads_max = module_kernel_threads_max;
module_ctx->module_kernel_threads_min = MODULE_DEFAULT;
module_ctx->module_kern_type = module_kern_type;
module_ctx->module_kern_type_dynamic = MODULE_DEFAULT;

View File

@ -149,6 +149,24 @@ u64 module_tmp_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED c
return tmp_size;
}
u32 module_kernel_accel_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
{
// due to the large tmps structure
const u32 kernel_accel_max = 256;
return kernel_accel_max;
}
u32 module_kernel_threads_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
{
// due to the large tmps structure
const u32 kernel_threads_max = 32;
return kernel_threads_max;
}
u32 module_kernel_loops_min (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
{
// we need to fix iteration count to guarantee the loop count is a multiple of SNMPV3_MAX_PW_LENGTH
@ -379,11 +397,11 @@ void module_init (module_ctx_t *module_ctx)
module_ctx->module_hook_size = MODULE_DEFAULT;
module_ctx->module_jit_build_options = module_jit_build_options;
module_ctx->module_jit_cache_disable = MODULE_DEFAULT;
module_ctx->module_kernel_accel_max = MODULE_DEFAULT;
module_ctx->module_kernel_accel_max = module_kernel_accel_max;
module_ctx->module_kernel_accel_min = MODULE_DEFAULT;
module_ctx->module_kernel_loops_max = module_kernel_loops_max;
module_ctx->module_kernel_loops_min = module_kernel_loops_min;
module_ctx->module_kernel_threads_max = MODULE_DEFAULT;
module_ctx->module_kernel_threads_max = module_kernel_threads_max;
module_ctx->module_kernel_threads_min = MODULE_DEFAULT;
module_ctx->module_kern_type = module_kern_type;
module_ctx->module_kern_type_dynamic = MODULE_DEFAULT;

View File

@ -54,10 +54,10 @@ const char *scrypt_module_extra_tuningdb_block (MAYBE_UNUSED const hashconfig_t
const u32 scrypt_N = (hashes->salts_buf[0].scrypt_N == 0) ? hashes->st_salts_buf[0].scrypt_N : hashes->salts_buf[0].scrypt_N;
const u32 scrypt_r = (hashes->salts_buf[0].scrypt_r == 0) ? hashes->st_salts_buf[0].scrypt_r : hashes->salts_buf[0].scrypt_r;
const u32 scrypt_p = (hashes->salts_buf[0].scrypt_p == 0) ? hashes->st_salts_buf[0].scrypt_p : hashes->salts_buf[0].scrypt_p;
//const u32 scrypt_p = (hashes->salts_buf[0].scrypt_p == 0) ? hashes->st_salts_buf[0].scrypt_p : hashes->salts_buf[0].scrypt_p;
const u64 size_per_accel = (128ULL * scrypt_r * scrypt_N * scrypt_exptected_threads (hashconfig, user_options, user_options_extra, device_param));
const u64 state_per_accel = (128ULL * scrypt_r * scrypt_p * scrypt_exptected_threads (hashconfig, user_options, user_options_extra, device_param));
//const u64 state_per_accel = (128ULL * scrypt_r * scrypt_p * scrypt_exptected_threads (hashconfig, user_options, user_options_extra, device_param));
int lines_sz = 4096;
char *lines_buf = hcmalloc (lines_sz);
@ -65,7 +65,7 @@ const char *scrypt_module_extra_tuningdb_block (MAYBE_UNUSED const hashconfig_t
const u32 device_processors = device_param->device_processors;
const u32 device_local_mem_size = device_param->device_local_mem_size;
//const u32 device_local_mem_size = device_param->device_local_mem_size;
const u64 fixed_mem = (512 * 1024 * 1024); // some storage we need for pws[], tmps[], and others
@ -160,21 +160,6 @@ const char *scrypt_module_extra_tuningdb_block (MAYBE_UNUSED const hashconfig_t
break;
}
if (device_param->is_hip == true)
{
// we use some local memory to speed up things, so
// we need to make sure there's enough local memory available
u64 state_per_accel_tmto = state_per_accel >> tmto;
while (state_per_accel_tmto > device_local_mem_size)
{
tmto++;
state_per_accel_tmto = state_per_accel >> tmto;
}
}
}
}
@ -301,7 +286,7 @@ char *scrypt_module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconf
scrypt_r,
scrypt_p,
tmto,
tmp_size / 16);
tmp_size / 4);
return jit_build_options;
}

View File

@ -1582,3 +1582,40 @@ bool remove_file_suffix (char *file, const char *suffix)
return true;
}
#if defined (_WIN)
#define DEVNULL "NUL"
#else
#define DEVNULL "/dev/null"
#endif
int suppress_stderr (void)
{
int null_fd = open (DEVNULL, O_WRONLY);
if (null_fd < 0) return -1;
int saved_fd = dup (fileno (stderr));
if (saved_fd < 0)
{
close (null_fd);
return -1;
}
dup2 (null_fd, fileno (stderr));
close (null_fd);
return saved_fd;
}
void restore_stderr (int saved_fd)
{
if (saved_fd < 0) return;
dup2 (saved_fd, fileno (stderr));
close (saved_fd);
}

View File

@ -34,7 +34,7 @@ if ($cpu_benchmark == 1)
}
else
{
system ("rocm-smi --resetprofile --resetclocks --resetfans");
#system ("rocm-smi --resetprofile --resetclocks --resetfans");
system ("rocm-smi --setfan 100% --setperflevel high");
system ("nvidia-settings -a GPUPowerMizerMode=1 -a GPUFanControlState=1 -a GPUTargetFanSpeed=100");