mirror of
https://github.com/hashcat/hashcat.git
synced 2025-07-30 18:39:15 +00:00
Removed shared-memory based optimization for SCRYPT on HIP, because the shared-memory buffer is incompatible with TMTO, which is limiting SCRYPT-R to a maximum of 8. This change also simplifies the code, allowing removal of large sections of duplicated code. Removed the section in scrypt_module_extra_tuningdb_block() that increased TMTO when there was insufficient shared memory, as this is no longer applicable.
Refactored inc_hash_scrypt.cl almost completely and improved macro names in inc_hash_scrypt.h. Adapted all existing SCRYPT-based plugins to the new standard. If you have custom SCRYPT based plugins use hash-mode 8900 as reference. Fixed some compiler warnings in inc_platform.cl. Cleaned up code paths in inc_vendor.h for finding values for HC_ATTR_SEQ and DECLSPEC. Removed option --device-as-default-execution-space from nvrtc for hiprtc compatibility. As a result, added __device__ back to DECLSPEC. Removed option --restrict from nvrtc compile options since we actually alias some buffers. Added --gpu-max-threads-per-block to hiprtc options. Added -D MAX_THREADS_PER_BLOCK to OpenCL options (currently unused). Removed all OPTS_TYPE_MP_MULTI_DISABLE entries for SNMPv3-based plugins. These plugins consume large amounts of memory and for this reason,limited kernel_accel max to 256. This may still be high, but hashcat will automatically tune down kernel_accel if insufficient memory is detected. Removed command `rocm-smi --resetprofile --resetclocks --resetfans` from benchmark_deep.pl, since some AMD GPUs become artificially slow for a while after running these commands. Replaced load_source() with file_to_buffer() from shared.c, which does the exact same operations. Moved suppress_stderr() and restore_stderr() to shared.c and reused them in both Python bridges and opencl_test_instruction(), where the same type of code existed.
This commit is contained in:
parent
f399c97db0
commit
b7c8fcf27c
@ -9,441 +9,288 @@
|
||||
#include "inc_common.h"
|
||||
#include "inc_hash_scrypt.h"
|
||||
|
||||
DECLSPEC void salsa_r_l (LOCAL_AS u32 *TI)
|
||||
DECLSPEC void salsa_r (PRIVATE_AS u32 *TI)
|
||||
{
|
||||
u32 x[16];
|
||||
u32 TT[STATE_CNT4/2]; // we actually nned 16 here for SALSA, but we reuse that buffer for blockmix
|
||||
|
||||
for (int j = 0; j < 16; j++) x[j] = TI[STATE_CNT - 16 + j];
|
||||
for (int j = 0; j < SALSA_CNT4; j++) TT[j] = TI[STATE_CNT4 - 16 + j];
|
||||
|
||||
for (int i = 0; i < STATE_CNT; i += 16)
|
||||
for (int i = 0; i < STATE_CNT4; i += SALSA_CNT4)
|
||||
{
|
||||
for (int j = 0; j < 16; j++)
|
||||
{
|
||||
x[j] ^= TI[i + j];
|
||||
}
|
||||
for (int j = 0; j < SALSA_CNT4; j++) TT[j] ^= TI[i + j];
|
||||
|
||||
for (int j = 0; j < 16; j++)
|
||||
{
|
||||
TI[i + j] = x[j];
|
||||
}
|
||||
for (int j = 0; j < SALSA_CNT4; j++) TI[i + j] = TT[j];
|
||||
|
||||
for (int r = 0; r < 4; r++)
|
||||
{
|
||||
u32 t0, t1, t2, t3;
|
||||
|
||||
t0 = x[ 0] + x[12];
|
||||
t1 = x[ 1] + x[13];
|
||||
t2 = x[ 2] + x[14];
|
||||
t3 = x[ 3] + x[15];
|
||||
x[ 4] ^= hc_rotl32_S (t0, 7);
|
||||
x[ 5] ^= hc_rotl32_S (t1, 7);
|
||||
x[ 6] ^= hc_rotl32_S (t2, 7);
|
||||
x[ 7] ^= hc_rotl32_S (t3, 7);
|
||||
t0 = TT[ 0] + TT[12];
|
||||
t1 = TT[ 1] + TT[13];
|
||||
t2 = TT[ 2] + TT[14];
|
||||
t3 = TT[ 3] + TT[15];
|
||||
TT[ 4] ^= hc_rotl32_S (t0, 7);
|
||||
TT[ 5] ^= hc_rotl32_S (t1, 7);
|
||||
TT[ 6] ^= hc_rotl32_S (t2, 7);
|
||||
TT[ 7] ^= hc_rotl32_S (t3, 7);
|
||||
|
||||
t0 = x[ 4] + x[ 0];
|
||||
t1 = x[ 5] + x[ 1];
|
||||
t2 = x[ 6] + x[ 2];
|
||||
t3 = x[ 7] + x[ 3];
|
||||
x[ 8] ^= hc_rotl32_S (t0, 9);
|
||||
x[ 9] ^= hc_rotl32_S (t1, 9);
|
||||
x[10] ^= hc_rotl32_S (t2, 9);
|
||||
x[11] ^= hc_rotl32_S (t3, 9);
|
||||
t0 = TT[ 4] + TT[ 0];
|
||||
t1 = TT[ 5] + TT[ 1];
|
||||
t2 = TT[ 6] + TT[ 2];
|
||||
t3 = TT[ 7] + TT[ 3];
|
||||
TT[ 8] ^= hc_rotl32_S (t0, 9);
|
||||
TT[ 9] ^= hc_rotl32_S (t1, 9);
|
||||
TT[10] ^= hc_rotl32_S (t2, 9);
|
||||
TT[11] ^= hc_rotl32_S (t3, 9);
|
||||
|
||||
t0 = x[ 8] + x[ 4];
|
||||
t1 = x[ 9] + x[ 5];
|
||||
t2 = x[10] + x[ 6];
|
||||
t3 = x[11] + x[ 7];
|
||||
x[12] ^= hc_rotl32_S (t0, 13);
|
||||
x[13] ^= hc_rotl32_S (t1, 13);
|
||||
x[14] ^= hc_rotl32_S (t2, 13);
|
||||
x[15] ^= hc_rotl32_S (t3, 13);
|
||||
t0 = TT[ 8] + TT[ 4];
|
||||
t1 = TT[ 9] + TT[ 5];
|
||||
t2 = TT[10] + TT[ 6];
|
||||
t3 = TT[11] + TT[ 7];
|
||||
TT[12] ^= hc_rotl32_S (t0, 13);
|
||||
TT[13] ^= hc_rotl32_S (t1, 13);
|
||||
TT[14] ^= hc_rotl32_S (t2, 13);
|
||||
TT[15] ^= hc_rotl32_S (t3, 13);
|
||||
|
||||
t0 = x[12] + x[ 8];
|
||||
t1 = x[13] + x[ 9];
|
||||
t2 = x[14] + x[10];
|
||||
t3 = x[15] + x[11];
|
||||
x[ 0] ^= hc_rotl32_S (t0, 18);
|
||||
x[ 1] ^= hc_rotl32_S (t1, 18);
|
||||
x[ 2] ^= hc_rotl32_S (t2, 18);
|
||||
x[ 3] ^= hc_rotl32_S (t3, 18);
|
||||
t0 = TT[12] + TT[ 8];
|
||||
t1 = TT[13] + TT[ 9];
|
||||
t2 = TT[14] + TT[10];
|
||||
t3 = TT[15] + TT[11];
|
||||
TT[ 0] ^= hc_rotl32_S (t0, 18);
|
||||
TT[ 1] ^= hc_rotl32_S (t1, 18);
|
||||
TT[ 2] ^= hc_rotl32_S (t2, 18);
|
||||
TT[ 3] ^= hc_rotl32_S (t3, 18);
|
||||
|
||||
t0 = x[ 4]; x[ 4] = x[ 7]; x[ 7] = x[ 6]; x[ 6] = x[ 5]; x[ 5] = t0;
|
||||
t0 = x[ 8]; x[ 8] = x[10]; x[10] = t0;
|
||||
t0 = x[ 9]; x[ 9] = x[11]; x[11] = t0;
|
||||
t0 = x[12]; x[12] = x[13]; x[13] = x[14]; x[14] = x[15]; x[15] = t0;
|
||||
t0 = TT[ 4]; TT[ 4] = TT[ 7]; TT[ 7] = TT[ 6]; TT[ 6] = TT[ 5]; TT[ 5] = t0;
|
||||
t0 = TT[ 8]; TT[ 8] = TT[10]; TT[10] = t0;
|
||||
t0 = TT[ 9]; TT[ 9] = TT[11]; TT[11] = t0;
|
||||
t0 = TT[12]; TT[12] = TT[13]; TT[13] = TT[14]; TT[14] = TT[15]; TT[15] = t0;
|
||||
|
||||
t0 = x[ 0] + x[ 4];
|
||||
t1 = x[ 1] + x[ 5];
|
||||
t2 = x[ 2] + x[ 6];
|
||||
t3 = x[ 3] + x[ 7];
|
||||
x[12] ^= hc_rotl32_S (t0, 7);
|
||||
x[13] ^= hc_rotl32_S (t1, 7);
|
||||
x[14] ^= hc_rotl32_S (t2, 7);
|
||||
x[15] ^= hc_rotl32_S (t3, 7);
|
||||
t0 = TT[ 0] + TT[ 4];
|
||||
t1 = TT[ 1] + TT[ 5];
|
||||
t2 = TT[ 2] + TT[ 6];
|
||||
t3 = TT[ 3] + TT[ 7];
|
||||
TT[12] ^= hc_rotl32_S (t0, 7);
|
||||
TT[13] ^= hc_rotl32_S (t1, 7);
|
||||
TT[14] ^= hc_rotl32_S (t2, 7);
|
||||
TT[15] ^= hc_rotl32_S (t3, 7);
|
||||
|
||||
t0 = x[12] + x[ 0];
|
||||
t1 = x[13] + x[ 1];
|
||||
t2 = x[14] + x[ 2];
|
||||
t3 = x[15] + x[ 3];
|
||||
x[ 8] ^= hc_rotl32_S (t0, 9);
|
||||
x[ 9] ^= hc_rotl32_S (t1, 9);
|
||||
x[10] ^= hc_rotl32_S (t2, 9);
|
||||
x[11] ^= hc_rotl32_S (t3, 9);
|
||||
t0 = TT[12] + TT[ 0];
|
||||
t1 = TT[13] + TT[ 1];
|
||||
t2 = TT[14] + TT[ 2];
|
||||
t3 = TT[15] + TT[ 3];
|
||||
TT[ 8] ^= hc_rotl32_S (t0, 9);
|
||||
TT[ 9] ^= hc_rotl32_S (t1, 9);
|
||||
TT[10] ^= hc_rotl32_S (t2, 9);
|
||||
TT[11] ^= hc_rotl32_S (t3, 9);
|
||||
|
||||
t0 = x[ 8] + x[12];
|
||||
t1 = x[ 9] + x[13];
|
||||
t2 = x[10] + x[14];
|
||||
t3 = x[11] + x[15];
|
||||
x[ 4] ^= hc_rotl32_S (t0, 13);
|
||||
x[ 5] ^= hc_rotl32_S (t1, 13);
|
||||
x[ 6] ^= hc_rotl32_S (t2, 13);
|
||||
x[ 7] ^= hc_rotl32_S (t3, 13);
|
||||
t0 = TT[ 8] + TT[12];
|
||||
t1 = TT[ 9] + TT[13];
|
||||
t2 = TT[10] + TT[14];
|
||||
t3 = TT[11] + TT[15];
|
||||
TT[ 4] ^= hc_rotl32_S (t0, 13);
|
||||
TT[ 5] ^= hc_rotl32_S (t1, 13);
|
||||
TT[ 6] ^= hc_rotl32_S (t2, 13);
|
||||
TT[ 7] ^= hc_rotl32_S (t3, 13);
|
||||
|
||||
t0 = x[ 4] + x[ 8];
|
||||
t1 = x[ 5] + x[ 9];
|
||||
t2 = x[ 6] + x[10];
|
||||
t3 = x[ 7] + x[11];
|
||||
x[ 0] ^= hc_rotl32_S (t0, 18);
|
||||
x[ 1] ^= hc_rotl32_S (t1, 18);
|
||||
x[ 2] ^= hc_rotl32_S (t2, 18);
|
||||
x[ 3] ^= hc_rotl32_S (t3, 18);
|
||||
t0 = TT[ 4] + TT[ 8];
|
||||
t1 = TT[ 5] + TT[ 9];
|
||||
t2 = TT[ 6] + TT[10];
|
||||
t3 = TT[ 7] + TT[11];
|
||||
TT[ 0] ^= hc_rotl32_S (t0, 18);
|
||||
TT[ 1] ^= hc_rotl32_S (t1, 18);
|
||||
TT[ 2] ^= hc_rotl32_S (t2, 18);
|
||||
TT[ 3] ^= hc_rotl32_S (t3, 18);
|
||||
|
||||
t0 = x[ 4]; x[ 4] = x[ 5]; x[ 5] = x[ 6]; x[ 6] = x[ 7]; x[ 7] = t0;
|
||||
t0 = x[ 8]; x[ 8] = x[10]; x[10] = t0;
|
||||
t0 = x[ 9]; x[ 9] = x[11]; x[11] = t0;
|
||||
t0 = x[15]; x[15] = x[14]; x[14] = x[13]; x[13] = x[12]; x[12] = t0;
|
||||
t0 = TT[ 4]; TT[ 4] = TT[ 5]; TT[ 5] = TT[ 6]; TT[ 6] = TT[ 7]; TT[ 7] = t0;
|
||||
t0 = TT[ 8]; TT[ 8] = TT[10]; TT[10] = t0;
|
||||
t0 = TT[ 9]; TT[ 9] = TT[11]; TT[11] = t0;
|
||||
t0 = TT[15]; TT[15] = TT[14]; TT[14] = TT[13]; TT[13] = TT[12]; TT[12] = t0;
|
||||
}
|
||||
|
||||
for (int j = 0; j < 16; j++)
|
||||
{
|
||||
x[j] += TI[i + j];
|
||||
}
|
||||
for (int j = 0; j < SALSA_CNT4; j++) TT[j] += TI[i + j];
|
||||
|
||||
for (int j = 0; j < 16; j++)
|
||||
{
|
||||
TI[i + j] = x[j];
|
||||
}
|
||||
for (int j = 0; j < SALSA_CNT4; j++) TI[i + j] = TT[j];
|
||||
}
|
||||
|
||||
#if SCRYPT_R > 1
|
||||
|
||||
u32 TT[STATE_CNT / 2];
|
||||
|
||||
for (int dst_off = 0, src_off = 16; src_off < STATE_CNT; dst_off += 16, src_off += 32)
|
||||
for (int dst_off = 0, src_off = SALSA_CNT4; src_off < STATE_CNT4; dst_off += SALSA_CNT4, src_off += SALSA_CNT4 * 2)
|
||||
{
|
||||
for (int j = 0; j < 16; j++) TT[dst_off + j] = TI[src_off + j];
|
||||
for (int j = 0; j < SALSA_CNT4; j++) TT[dst_off + j] = TI[src_off + j];
|
||||
}
|
||||
|
||||
for (int dst_off = 16, src_off = 32; src_off < STATE_CNT; dst_off += 16, src_off += 32)
|
||||
for (int dst_off = SALSA_CNT4, src_off = SALSA_CNT4 * 2; src_off < STATE_CNT4; dst_off += SALSA_CNT4, src_off += SALSA_CNT4 * 2)
|
||||
{
|
||||
for (int j = 0; j < 16; j++) TI[dst_off + j] = TI[src_off + j];
|
||||
for (int j = 0; j < SALSA_CNT4; j++) TI[dst_off + j] = TI[src_off + j];
|
||||
}
|
||||
|
||||
for (int dst_off = STATE_CNT / 2, src_off = 0; dst_off < STATE_CNT; dst_off += 16, src_off += 16)
|
||||
for (int dst_off = STATE_CNT4 / 2, src_off = 0; dst_off < STATE_CNT4; dst_off += SALSA_CNT4, src_off += SALSA_CNT4)
|
||||
{
|
||||
for (int j = 0; j < 16; j++) TI[dst_off + j] = TT[src_off + j];
|
||||
for (int j = 0; j < SALSA_CNT4; j++) TI[dst_off + j] = TT[src_off + j];
|
||||
}
|
||||
|
||||
#endif
|
||||
}
|
||||
|
||||
DECLSPEC void salsa_r_p (PRIVATE_AS u32 *TI)
|
||||
{
|
||||
u32 x[16];
|
||||
|
||||
for (int j = 0; j < 16; j++) x[j] = TI[STATE_CNT - 16 + j];
|
||||
|
||||
for (int i = 0; i < STATE_CNT; i += 16)
|
||||
{
|
||||
for (int j = 0; j < 16; j++)
|
||||
{
|
||||
x[j] ^= TI[i + j];
|
||||
}
|
||||
|
||||
for (int j = 0; j < 16; j++)
|
||||
{
|
||||
TI[i + j] = x[j];
|
||||
}
|
||||
|
||||
for (int r = 0; r < 4; r++)
|
||||
{
|
||||
u32 t0, t1, t2, t3;
|
||||
|
||||
t0 = x[ 0] + x[12];
|
||||
t1 = x[ 1] + x[13];
|
||||
t2 = x[ 2] + x[14];
|
||||
t3 = x[ 3] + x[15];
|
||||
x[ 4] ^= hc_rotl32_S (t0, 7);
|
||||
x[ 5] ^= hc_rotl32_S (t1, 7);
|
||||
x[ 6] ^= hc_rotl32_S (t2, 7);
|
||||
x[ 7] ^= hc_rotl32_S (t3, 7);
|
||||
|
||||
t0 = x[ 4] + x[ 0];
|
||||
t1 = x[ 5] + x[ 1];
|
||||
t2 = x[ 6] + x[ 2];
|
||||
t3 = x[ 7] + x[ 3];
|
||||
x[ 8] ^= hc_rotl32_S (t0, 9);
|
||||
x[ 9] ^= hc_rotl32_S (t1, 9);
|
||||
x[10] ^= hc_rotl32_S (t2, 9);
|
||||
x[11] ^= hc_rotl32_S (t3, 9);
|
||||
|
||||
t0 = x[ 8] + x[ 4];
|
||||
t1 = x[ 9] + x[ 5];
|
||||
t2 = x[10] + x[ 6];
|
||||
t3 = x[11] + x[ 7];
|
||||
x[12] ^= hc_rotl32_S (t0, 13);
|
||||
x[13] ^= hc_rotl32_S (t1, 13);
|
||||
x[14] ^= hc_rotl32_S (t2, 13);
|
||||
x[15] ^= hc_rotl32_S (t3, 13);
|
||||
|
||||
t0 = x[12] + x[ 8];
|
||||
t1 = x[13] + x[ 9];
|
||||
t2 = x[14] + x[10];
|
||||
t3 = x[15] + x[11];
|
||||
x[ 0] ^= hc_rotl32_S (t0, 18);
|
||||
x[ 1] ^= hc_rotl32_S (t1, 18);
|
||||
x[ 2] ^= hc_rotl32_S (t2, 18);
|
||||
x[ 3] ^= hc_rotl32_S (t3, 18);
|
||||
|
||||
t0 = x[ 4]; x[ 4] = x[ 7]; x[ 7] = x[ 6]; x[ 6] = x[ 5]; x[ 5] = t0;
|
||||
t0 = x[ 8]; x[ 8] = x[10]; x[10] = t0;
|
||||
t0 = x[ 9]; x[ 9] = x[11]; x[11] = t0;
|
||||
t0 = x[12]; x[12] = x[13]; x[13] = x[14]; x[14] = x[15]; x[15] = t0;
|
||||
|
||||
t0 = x[ 0] + x[ 4];
|
||||
t1 = x[ 1] + x[ 5];
|
||||
t2 = x[ 2] + x[ 6];
|
||||
t3 = x[ 3] + x[ 7];
|
||||
x[12] ^= hc_rotl32_S (t0, 7);
|
||||
x[13] ^= hc_rotl32_S (t1, 7);
|
||||
x[14] ^= hc_rotl32_S (t2, 7);
|
||||
x[15] ^= hc_rotl32_S (t3, 7);
|
||||
|
||||
t0 = x[12] + x[ 0];
|
||||
t1 = x[13] + x[ 1];
|
||||
t2 = x[14] + x[ 2];
|
||||
t3 = x[15] + x[ 3];
|
||||
x[ 8] ^= hc_rotl32_S (t0, 9);
|
||||
x[ 9] ^= hc_rotl32_S (t1, 9);
|
||||
x[10] ^= hc_rotl32_S (t2, 9);
|
||||
x[11] ^= hc_rotl32_S (t3, 9);
|
||||
|
||||
t0 = x[ 8] + x[12];
|
||||
t1 = x[ 9] + x[13];
|
||||
t2 = x[10] + x[14];
|
||||
t3 = x[11] + x[15];
|
||||
x[ 4] ^= hc_rotl32_S (t0, 13);
|
||||
x[ 5] ^= hc_rotl32_S (t1, 13);
|
||||
x[ 6] ^= hc_rotl32_S (t2, 13);
|
||||
x[ 7] ^= hc_rotl32_S (t3, 13);
|
||||
|
||||
t0 = x[ 4] + x[ 8];
|
||||
t1 = x[ 5] + x[ 9];
|
||||
t2 = x[ 6] + x[10];
|
||||
t3 = x[ 7] + x[11];
|
||||
x[ 0] ^= hc_rotl32_S (t0, 18);
|
||||
x[ 1] ^= hc_rotl32_S (t1, 18);
|
||||
x[ 2] ^= hc_rotl32_S (t2, 18);
|
||||
x[ 3] ^= hc_rotl32_S (t3, 18);
|
||||
|
||||
t0 = x[ 4]; x[ 4] = x[ 5]; x[ 5] = x[ 6]; x[ 6] = x[ 7]; x[ 7] = t0;
|
||||
t0 = x[ 8]; x[ 8] = x[10]; x[10] = t0;
|
||||
t0 = x[ 9]; x[ 9] = x[11]; x[11] = t0;
|
||||
t0 = x[15]; x[15] = x[14]; x[14] = x[13]; x[13] = x[12]; x[12] = t0;
|
||||
}
|
||||
|
||||
for (int j = 0; j < 16; j++)
|
||||
{
|
||||
x[j] += TI[i + j];
|
||||
}
|
||||
|
||||
for (int j = 0; j < 16; j++)
|
||||
{
|
||||
TI[i + j] = x[j];
|
||||
}
|
||||
}
|
||||
|
||||
#if SCRYPT_R > 1
|
||||
|
||||
u32 TT[STATE_CNT / 2];
|
||||
|
||||
for (int dst_off = 0, src_off = 16; src_off < STATE_CNT; dst_off += 16, src_off += 32)
|
||||
{
|
||||
for (int j = 0; j < 16; j++) TT[dst_off + j] = TI[src_off + j];
|
||||
}
|
||||
|
||||
for (int dst_off = 16, src_off = 32; src_off < STATE_CNT; dst_off += 16, src_off += 32)
|
||||
{
|
||||
for (int j = 0; j < 16; j++) TI[dst_off + j] = TI[src_off + j];
|
||||
}
|
||||
|
||||
for (int dst_off = STATE_CNT / 2, src_off = 0; dst_off < STATE_CNT; dst_off += 16, src_off += 16)
|
||||
{
|
||||
for (int j = 0; j < 16; j++) TI[dst_off + j] = TT[src_off + j];
|
||||
}
|
||||
|
||||
#endif
|
||||
}
|
||||
|
||||
#ifdef IS_HIP
|
||||
DECLSPEC void scrypt_smix_init (LOCAL_AS uint4 *X, GLOBAL_AS uint4 *V0, GLOBAL_AS uint4 *V1, GLOBAL_AS uint4 *V2, GLOBAL_AS uint4 *V3, const u64 gid, const u64 lid, const u64 lsz, const u64 bid)
|
||||
#else
|
||||
DECLSPEC void scrypt_smix_init (PRIVATE_AS uint4 *X, GLOBAL_AS uint4 *V0, GLOBAL_AS uint4 *V1, GLOBAL_AS uint4 *V2, GLOBAL_AS uint4 *V3, const u64 gid, const u64 lid, const u64 lsz, const u64 bid)
|
||||
#endif
|
||||
DECLSPEC void scrypt_smix_init (PRIVATE_AS u32 *X, GLOBAL_AS void *V0, GLOBAL_AS void *V1, GLOBAL_AS void *V2, GLOBAL_AS void *V3, const u32 gid, const u32 lid, const u32 lsz, const u32 bid)
|
||||
{
|
||||
const u32 ySIZE = SCRYPT_N >> SCRYPT_TMTO;
|
||||
const u32 zSIZE = STATE_CNT4;
|
||||
const u32 zSIZE = STATE_CNT44;
|
||||
|
||||
const u32 xd4 = bid / 4;
|
||||
const u32 xm4 = bid & 3;
|
||||
|
||||
PRIVATE_AS uint4 *X4 = (PRIVATE_AS uint4 *) X;
|
||||
|
||||
GLOBAL_AS uint4 *V;
|
||||
|
||||
switch (xm4)
|
||||
{
|
||||
case 0: V = V0; break;
|
||||
case 1: V = V1; break;
|
||||
case 2: V = V2; break;
|
||||
case 3: V = V3; break;
|
||||
case 0: V = (GLOBAL_AS uint4 *) V0; break;
|
||||
case 1: V = (GLOBAL_AS uint4 *) V1; break;
|
||||
case 2: V = (GLOBAL_AS uint4 *) V2; break;
|
||||
case 3: V = (GLOBAL_AS uint4 *) V3; break;
|
||||
}
|
||||
|
||||
for (u32 y = 0; y < ySIZE; y++)
|
||||
{
|
||||
for (u32 z = 0; z < zSIZE; z++) V[VIDX(xd4, lsz, lid, ySIZE, zSIZE, y, z)] = X[z];
|
||||
for (u32 z = 0; z < zSIZE; z++) V[VIDX(xd4, lsz, lid, ySIZE, zSIZE, y, z)] = X4[z];
|
||||
|
||||
#ifdef IS_HIP
|
||||
for (u32 i = 0; i < (1 << SCRYPT_TMTO); i++) salsa_r_l ((LOCAL_AS u32 *) X);
|
||||
#else
|
||||
for (u32 i = 0; i < (1 << SCRYPT_TMTO); i++) salsa_r_p ((PRIVATE_AS u32 *) X);
|
||||
#endif
|
||||
for (u32 i = 0; i < (1 << SCRYPT_TMTO); i++) salsa_r (X);
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef IS_HIP
|
||||
DECLSPEC void scrypt_smix_loop (PRIVATE_AS uint4 *X, LOCAL_AS uint4 *T, GLOBAL_AS uint4 *V0, GLOBAL_AS uint4 *V1, GLOBAL_AS uint4 *V2, GLOBAL_AS uint4 *V3, const u64 gid, const u64 lid, const u64 lsz, const u64 bid)
|
||||
#else
|
||||
DECLSPEC void scrypt_smix_loop (PRIVATE_AS uint4 *X, PRIVATE_AS uint4 *T, GLOBAL_AS uint4 *V0, GLOBAL_AS uint4 *V1, GLOBAL_AS uint4 *V2, GLOBAL_AS uint4 *V3, const u64 gid, const u64 lid, const u64 lsz, const u64 bid)
|
||||
#endif
|
||||
DECLSPEC void scrypt_smix_loop (PRIVATE_AS u32 *X, PRIVATE_AS u32 *T, GLOBAL_AS void *V0, GLOBAL_AS void *V1, GLOBAL_AS void *V2, GLOBAL_AS void *V3, const u32 gid, const u32 lid, const u32 lsz, const u32 bid)
|
||||
{
|
||||
const u32 ySIZE = SCRYPT_N >> SCRYPT_TMTO;
|
||||
const u32 zSIZE = STATE_CNT4;
|
||||
const u32 zSIZE = STATE_CNT44;
|
||||
|
||||
const u32 xd4 = bid / 4;
|
||||
const u32 xm4 = bid & 3;
|
||||
|
||||
PRIVATE_AS uint4 *X4 = (PRIVATE_AS uint4 *) X;
|
||||
PRIVATE_AS uint4 *T4 = (PRIVATE_AS uint4 *) T;
|
||||
|
||||
GLOBAL_AS uint4 *V;
|
||||
|
||||
switch (xm4)
|
||||
{
|
||||
case 0: V = V0; break;
|
||||
case 1: V = V1; break;
|
||||
case 2: V = V2; break;
|
||||
case 3: V = V3; break;
|
||||
case 0: V = (GLOBAL_AS uint4 *) V0; break;
|
||||
case 1: V = (GLOBAL_AS uint4 *) V1; break;
|
||||
case 2: V = (GLOBAL_AS uint4 *) V2; break;
|
||||
case 3: V = (GLOBAL_AS uint4 *) V3; break;
|
||||
}
|
||||
|
||||
// note: max 2048 iterations = forced -u 2048
|
||||
// note: max 1024 iterations = forced -u 2048
|
||||
|
||||
const u32 N_max = (SCRYPT_N < 2048) ? SCRYPT_N : 2048;
|
||||
|
||||
for (u32 N_pos = 0; N_pos < N_max; N_pos++)
|
||||
{
|
||||
const u32 k = X[zSIZE - 4].x & (SCRYPT_N - 1);
|
||||
const u32 k = X4[zSIZE - 4].x & (SCRYPT_N - 1);
|
||||
|
||||
const u32 y = k >> SCRYPT_TMTO;
|
||||
|
||||
const u32 km = k - (y << SCRYPT_TMTO);
|
||||
|
||||
for (u32 z = 0; z < zSIZE; z++) T[z] = V[VIDX(xd4, lsz, lid, ySIZE, zSIZE, y, z)];
|
||||
for (u32 z = 0; z < zSIZE; z++) T4[z] = V[VIDX(xd4, lsz, lid, ySIZE, zSIZE, y, z)];
|
||||
|
||||
#ifdef IS_HIP
|
||||
for (u32 i = 0; i < km; i++) salsa_r_l ((LOCAL_AS u32 *) T);
|
||||
#else
|
||||
for (u32 i = 0; i < km; i++) salsa_r_p ((PRIVATE_AS u32 *) T);
|
||||
#endif
|
||||
for (u32 i = 0; i < km; i++) salsa_r (T);
|
||||
|
||||
for (u32 z = 0; z < zSIZE; z++) X[z] ^= T[z];
|
||||
for (u32 z = 0; z < zSIZE; z++) X4[z] = X4[z] ^ T4[z];
|
||||
|
||||
salsa_r_p ((PRIVATE_AS u32 *) X);
|
||||
salsa_r (X);
|
||||
}
|
||||
}
|
||||
|
||||
DECLSPEC void scrypt_blockmix_in (GLOBAL_AS uint4 *out_buf, const int out_len)
|
||||
DECLSPEC void scrypt_blockmix_in (PRIVATE_AS u32 *out_buf, const int out_len)
|
||||
{
|
||||
for (int i = 0, j = 0; i < out_len; i += 64, j += 4)
|
||||
for (int i = 0, j = 0; i < out_len; i += SALSA_SZ, j += SALSA_CNT4)
|
||||
{
|
||||
uint4 T[4];
|
||||
u32 X[SALSA_CNT4];
|
||||
|
||||
T[0] = out_buf[j + 0];
|
||||
T[1] = out_buf[j + 1];
|
||||
T[2] = out_buf[j + 2];
|
||||
T[3] = out_buf[j + 3];
|
||||
X[ 0] = out_buf[j + 0];
|
||||
X[ 1] = out_buf[j + 5];
|
||||
X[ 2] = out_buf[j + 10];
|
||||
X[ 3] = out_buf[j + 15];
|
||||
X[ 4] = out_buf[j + 4];
|
||||
X[ 5] = out_buf[j + 9];
|
||||
X[ 6] = out_buf[j + 14];
|
||||
X[ 7] = out_buf[j + 3];
|
||||
X[ 8] = out_buf[j + 8];
|
||||
X[ 9] = out_buf[j + 13];
|
||||
X[10] = out_buf[j + 2];
|
||||
X[11] = out_buf[j + 7];
|
||||
X[12] = out_buf[j + 12];
|
||||
X[13] = out_buf[j + 1];
|
||||
X[14] = out_buf[j + 6];
|
||||
X[15] = out_buf[j + 11];
|
||||
|
||||
uint4 X[4];
|
||||
|
||||
#if defined IS_CUDA || defined IS_HIP
|
||||
X[0] = make_uint4 (T[0].x, T[1].y, T[2].z, T[3].w);
|
||||
X[1] = make_uint4 (T[1].x, T[2].y, T[3].z, T[0].w);
|
||||
X[2] = make_uint4 (T[2].x, T[3].y, T[0].z, T[1].w);
|
||||
X[3] = make_uint4 (T[3].x, T[0].y, T[1].z, T[2].w);
|
||||
#elif defined IS_METAL
|
||||
X[0] = uint4 (T[0].x, T[1].y, T[2].z, T[3].w);
|
||||
X[1] = uint4 (T[1].x, T[2].y, T[3].z, T[0].w);
|
||||
X[2] = uint4 (T[2].x, T[3].y, T[0].z, T[1].w);
|
||||
X[3] = uint4 (T[3].x, T[0].y, T[1].z, T[2].w);
|
||||
#else
|
||||
X[0] = (uint4) (T[0].x, T[1].y, T[2].z, T[3].w);
|
||||
X[1] = (uint4) (T[1].x, T[2].y, T[3].z, T[0].w);
|
||||
X[2] = (uint4) (T[2].x, T[3].y, T[0].z, T[1].w);
|
||||
X[3] = (uint4) (T[3].x, T[0].y, T[1].z, T[2].w);
|
||||
#endif
|
||||
|
||||
out_buf[j + 0] = X[0];
|
||||
out_buf[j + 1] = X[1];
|
||||
out_buf[j + 2] = X[2];
|
||||
out_buf[j + 3] = X[3];
|
||||
out_buf[j + 0] = X[ 0];
|
||||
out_buf[j + 1] = X[ 1];
|
||||
out_buf[j + 2] = X[ 2];
|
||||
out_buf[j + 3] = X[ 3];
|
||||
out_buf[j + 4] = X[ 4];
|
||||
out_buf[j + 5] = X[ 5];
|
||||
out_buf[j + 6] = X[ 6];
|
||||
out_buf[j + 7] = X[ 7];
|
||||
out_buf[j + 8] = X[ 8];
|
||||
out_buf[j + 9] = X[ 9];
|
||||
out_buf[j + 10] = X[10];
|
||||
out_buf[j + 11] = X[11];
|
||||
out_buf[j + 12] = X[12];
|
||||
out_buf[j + 13] = X[13];
|
||||
out_buf[j + 14] = X[14];
|
||||
out_buf[j + 15] = X[15];
|
||||
}
|
||||
}
|
||||
|
||||
DECLSPEC void scrypt_blockmix_out (GLOBAL_AS uint4 *out_buf, const int out_len)
|
||||
DECLSPEC void scrypt_blockmix_out (PRIVATE_AS u32 *out_buf, const int out_len)
|
||||
{
|
||||
for (int i = 0, j = 0; i < out_len; i += 64, j += 4)
|
||||
for (int i = 0, j = 0; i < out_len; i += SALSA_SZ, j += SALSA_CNT4)
|
||||
{
|
||||
uint4 X[4];
|
||||
u32 T[SALSA_CNT4];
|
||||
|
||||
X[0] = out_buf[j + 0];
|
||||
X[1] = out_buf[j + 1];
|
||||
X[2] = out_buf[j + 2];
|
||||
X[3] = out_buf[j + 3];
|
||||
T[ 0] = out_buf[j + 0];
|
||||
T[ 1] = out_buf[j + 13];
|
||||
T[ 2] = out_buf[j + 10];
|
||||
T[ 3] = out_buf[j + 7];
|
||||
T[ 4] = out_buf[j + 4];
|
||||
T[ 5] = out_buf[j + 1];
|
||||
T[ 6] = out_buf[j + 14];
|
||||
T[ 7] = out_buf[j + 11];
|
||||
T[ 8] = out_buf[j + 8];
|
||||
T[ 9] = out_buf[j + 5];
|
||||
T[10] = out_buf[j + 2];
|
||||
T[11] = out_buf[j + 15];
|
||||
T[12] = out_buf[j + 12];
|
||||
T[13] = out_buf[j + 9];
|
||||
T[14] = out_buf[j + 6];
|
||||
T[15] = out_buf[j + 3];
|
||||
|
||||
uint4 T[4];
|
||||
|
||||
#if defined IS_CUDA || defined IS_HIP
|
||||
T[0] = make_uint4 (X[0].x, X[3].y, X[2].z, X[1].w);
|
||||
T[1] = make_uint4 (X[1].x, X[0].y, X[3].z, X[2].w);
|
||||
T[2] = make_uint4 (X[2].x, X[1].y, X[0].z, X[3].w);
|
||||
T[3] = make_uint4 (X[3].x, X[2].y, X[1].z, X[0].w);
|
||||
#elif defined IS_METAL
|
||||
T[0] = uint4 (X[0].x, X[3].y, X[2].z, X[1].w);
|
||||
T[1] = uint4 (X[1].x, X[0].y, X[3].z, X[2].w);
|
||||
T[2] = uint4 (X[2].x, X[1].y, X[0].z, X[3].w);
|
||||
T[3] = uint4 (X[3].x, X[2].y, X[1].z, X[0].w);
|
||||
#else
|
||||
T[0] = (uint4) (X[0].x, X[3].y, X[2].z, X[1].w);
|
||||
T[1] = (uint4) (X[1].x, X[0].y, X[3].z, X[2].w);
|
||||
T[2] = (uint4) (X[2].x, X[1].y, X[0].z, X[3].w);
|
||||
T[3] = (uint4) (X[3].x, X[2].y, X[1].z, X[0].w);
|
||||
#endif
|
||||
|
||||
out_buf[j + 0] = T[0];
|
||||
out_buf[j + 1] = T[1];
|
||||
out_buf[j + 2] = T[2];
|
||||
out_buf[j + 3] = T[3];
|
||||
out_buf[j + 0] = T[ 0];
|
||||
out_buf[j + 1] = T[ 1];
|
||||
out_buf[j + 2] = T[ 2];
|
||||
out_buf[j + 3] = T[ 3];
|
||||
out_buf[j + 4] = T[ 4];
|
||||
out_buf[j + 5] = T[ 5];
|
||||
out_buf[j + 6] = T[ 6];
|
||||
out_buf[j + 7] = T[ 7];
|
||||
out_buf[j + 8] = T[ 8];
|
||||
out_buf[j + 9] = T[ 9];
|
||||
out_buf[j + 10] = T[10];
|
||||
out_buf[j + 11] = T[11];
|
||||
out_buf[j + 12] = T[12];
|
||||
out_buf[j + 13] = T[13];
|
||||
out_buf[j + 14] = T[14];
|
||||
out_buf[j + 15] = T[15];
|
||||
}
|
||||
}
|
||||
|
||||
DECLSPEC void scrypt_pbkdf2_body (PRIVATE_AS sha256_hmac_ctx_t *sha256_hmac_ctx, GLOBAL_AS uint4 *out_buf, const int out_len)
|
||||
DECLSPEC void scrypt_pbkdf2_body (PRIVATE_AS sha256_hmac_ctx_t *sha256_hmac_ctx, PRIVATE_AS u32 *out_buf, const int out_len)
|
||||
{
|
||||
for (int i = 0, j = 1, k = 0; i < out_len; i += 32, j += 1, k += 2)
|
||||
for (int i = 0, j = 1, k = 0; i < out_len; i += 32, j += 1, k += 8)
|
||||
{
|
||||
sha256_hmac_ctx_t sha256_hmac_ctx2 = *sha256_hmac_ctx;
|
||||
|
||||
@ -473,34 +320,56 @@ DECLSPEC void scrypt_pbkdf2_body (PRIVATE_AS sha256_hmac_ctx_t *sha256_hmac_ctx,
|
||||
|
||||
sha256_hmac_final (&sha256_hmac_ctx2);
|
||||
|
||||
u32 digest[8];
|
||||
// this will not work if user specifies output length not a multiple of 4
|
||||
// probably never happens...
|
||||
// let's hope the compiler will auto optimize this since out_len is very likely
|
||||
// a constant at caller level
|
||||
|
||||
digest[0] = hc_swap32_S (sha256_hmac_ctx2.opad.h[0]);
|
||||
digest[1] = hc_swap32_S (sha256_hmac_ctx2.opad.h[1]);
|
||||
digest[2] = hc_swap32_S (sha256_hmac_ctx2.opad.h[2]);
|
||||
digest[3] = hc_swap32_S (sha256_hmac_ctx2.opad.h[3]);
|
||||
digest[4] = hc_swap32_S (sha256_hmac_ctx2.opad.h[4]);
|
||||
digest[5] = hc_swap32_S (sha256_hmac_ctx2.opad.h[5]);
|
||||
digest[6] = hc_swap32_S (sha256_hmac_ctx2.opad.h[6]);
|
||||
digest[7] = hc_swap32_S (sha256_hmac_ctx2.opad.h[7]);
|
||||
|
||||
#if defined IS_CUDA || defined IS_HIP
|
||||
const uint4 tmp0 = make_uint4 (digest[0], digest[1], digest[2], digest[3]);
|
||||
const uint4 tmp1 = make_uint4 (digest[4], digest[5], digest[6], digest[7]);
|
||||
#elif defined IS_METAL
|
||||
const uint4 tmp0 = uint4 (digest[0], digest[1], digest[2], digest[3]);
|
||||
const uint4 tmp1 = uint4 (digest[4], digest[5], digest[6], digest[7]);
|
||||
#else
|
||||
const uint4 tmp0 = (uint4) (digest[0], digest[1], digest[2], digest[3]);
|
||||
const uint4 tmp1 = (uint4) (digest[4], digest[5], digest[6], digest[7]);
|
||||
#endif
|
||||
|
||||
out_buf[k + 0] = tmp0;
|
||||
out_buf[k + 1] = tmp1;
|
||||
if (out_len >= (i + 4)) out_buf[k + 0] = hc_swap32_S (sha256_hmac_ctx2.opad.h[0]);
|
||||
if (out_len >= (i + 8)) out_buf[k + 1] = hc_swap32_S (sha256_hmac_ctx2.opad.h[1]);
|
||||
if (out_len >= (i + 12)) out_buf[k + 2] = hc_swap32_S (sha256_hmac_ctx2.opad.h[2]);
|
||||
if (out_len >= (i + 16)) out_buf[k + 3] = hc_swap32_S (sha256_hmac_ctx2.opad.h[3]);
|
||||
if (out_len >= (i + 20)) out_buf[k + 4] = hc_swap32_S (sha256_hmac_ctx2.opad.h[4]);
|
||||
if (out_len >= (i + 24)) out_buf[k + 5] = hc_swap32_S (sha256_hmac_ctx2.opad.h[5]);
|
||||
if (out_len >= (i + 28)) out_buf[k + 6] = hc_swap32_S (sha256_hmac_ctx2.opad.h[6]);
|
||||
if (out_len >= (i + 32)) out_buf[k + 7] = hc_swap32_S (sha256_hmac_ctx2.opad.h[7]);
|
||||
}
|
||||
}
|
||||
|
||||
DECLSPEC void scrypt_pbkdf2 (GLOBAL_AS const u32 *pw_buf, const int pw_len, GLOBAL_AS const u32 *salt_buf, const int salt_len, GLOBAL_AS uint4 *out_buf, const int out_len)
|
||||
DECLSPEC void scrypt_pbkdf2_pp (PRIVATE_AS const u32 *pw_buf, const int pw_len, PRIVATE_AS const u32 *salt_buf, const int salt_len, PRIVATE_AS u32 *out_buf, const int out_len)
|
||||
{
|
||||
sha256_hmac_ctx_t sha256_hmac_ctx;
|
||||
|
||||
sha256_hmac_init_swap (&sha256_hmac_ctx, pw_buf, pw_len);
|
||||
|
||||
sha256_hmac_update_swap (&sha256_hmac_ctx, salt_buf, salt_len);
|
||||
|
||||
scrypt_pbkdf2_body (&sha256_hmac_ctx, out_buf, out_len);
|
||||
}
|
||||
|
||||
DECLSPEC void scrypt_pbkdf2_pg (PRIVATE_AS const u32 *pw_buf, const int pw_len, GLOBAL_AS const u32 *salt_buf, const int salt_len, PRIVATE_AS u32 *out_buf, const int out_len)
|
||||
{
|
||||
sha256_hmac_ctx_t sha256_hmac_ctx;
|
||||
|
||||
sha256_hmac_init_swap (&sha256_hmac_ctx, pw_buf, pw_len);
|
||||
|
||||
sha256_hmac_update_global_swap (&sha256_hmac_ctx, salt_buf, salt_len);
|
||||
|
||||
scrypt_pbkdf2_body (&sha256_hmac_ctx, out_buf, out_len);
|
||||
}
|
||||
|
||||
DECLSPEC void scrypt_pbkdf2_gp (GLOBAL_AS const u32 *pw_buf, const int pw_len, PRIVATE_AS const u32 *salt_buf, const int salt_len, PRIVATE_AS u32 *out_buf, const int out_len)
|
||||
{
|
||||
sha256_hmac_ctx_t sha256_hmac_ctx;
|
||||
|
||||
sha256_hmac_init_global_swap (&sha256_hmac_ctx, pw_buf, pw_len);
|
||||
|
||||
sha256_hmac_update_swap (&sha256_hmac_ctx, salt_buf, salt_len);
|
||||
|
||||
scrypt_pbkdf2_body (&sha256_hmac_ctx, out_buf, out_len);
|
||||
}
|
||||
|
||||
DECLSPEC void scrypt_pbkdf2_gg (GLOBAL_AS const u32 *pw_buf, const int pw_len, GLOBAL_AS const u32 *salt_buf, const int salt_len, PRIVATE_AS u32 *out_buf, const int out_len)
|
||||
{
|
||||
sha256_hmac_ctx_t sha256_hmac_ctx;
|
||||
|
||||
|
@ -6,39 +6,40 @@
|
||||
#ifndef INC_HASH_SCRYPT_H
|
||||
#define INC_HASH_SCRYPT_H
|
||||
|
||||
#define GET_SCRYPT_CNT(r,p) (2 * (r) * 16 * (p))
|
||||
#define GET_SMIX_CNT(r,N) (2 * (r) * 16 * (N))
|
||||
#define GET_STATE_CNT(r) (2 * (r) * 16)
|
||||
#define GET_SCRYPT_SZ(r,p) (128 * (r) * (p))
|
||||
#define GET_STATE_SZ(r) (128 * (r))
|
||||
|
||||
#define SCRYPT_CNT GET_SCRYPT_CNT (SCRYPT_R, SCRYPT_P)
|
||||
#define SCRYPT_CNT4 (SCRYPT_CNT / 4)
|
||||
#define STATE_CNT GET_STATE_CNT (SCRYPT_R)
|
||||
#define STATE_CNT4 (STATE_CNT / 4)
|
||||
// _SZ is true sizes as bytes
|
||||
#define SCRYPT_SZ GET_SCRYPT_SZ (SCRYPT_R, SCRYPT_P)
|
||||
#define STATE_SZ GET_STATE_SZ (SCRYPT_R)
|
||||
|
||||
// _CNT is size as whatever /X datatype
|
||||
#define SCRYPT_CNT4 (SCRYPT_SZ / 4)
|
||||
#define STATE_CNT4 (STATE_SZ / 4)
|
||||
|
||||
// this would be uint4, feels more natural than 16
|
||||
#define SCRYPT_CNT44 ((SCRYPT_SZ / 4) / 4)
|
||||
#define STATE_CNT44 ((STATE_SZ / 4) / 4)
|
||||
|
||||
#define SALSA_SZ 64
|
||||
#define SALSA_CNT4 (SALSA_SZ / 4)
|
||||
|
||||
#define VIDX(bid4,lsz,lid,ySIZE,zSIZE,y,z) (((bid4) * (lsz) * (ySIZE) * (zSIZE)) + ((lid) * (ySIZE) * (zSIZE)) + ((y) * (zSIZE)) + (z))
|
||||
|
||||
#if defined IS_CUDA
|
||||
inline __device__ uint4 operator & (const uint4 a, const u32 b) { return make_uint4 ((a.x & b ), (a.y & b ), (a.z & b ), (a.w & b )); }
|
||||
inline __device__ uint4 operator << (const uint4 a, const u32 b) { return make_uint4 ((a.x << b ), (a.y << b ), (a.z << b ), (a.w << b )); }
|
||||
inline __device__ uint4 operator >> (const uint4 a, const u32 b) { return make_uint4 ((a.x >> b ), (a.y >> b ), (a.z >> b ), (a.w >> b )); }
|
||||
inline __device__ uint4 operator + (const uint4 a, const uint4 b) { return make_uint4 ((a.x + b.x), (a.y + b.y), (a.z + b.z), (a.w + b.w)); }
|
||||
inline __device__ uint4 operator ^ (const uint4 a, const uint4 b) { return make_uint4 ((a.x ^ b.x), (a.y ^ b.y), (a.z ^ b.z), (a.w ^ b.w)); }
|
||||
inline __device__ uint4 operator | (const uint4 a, const uint4 b) { return make_uint4 ((a.x | b.x), (a.y | b.y), (a.z | b.z), (a.w | b.w)); }
|
||||
inline __device__ void operator ^= ( uint4 &a, const uint4 b) { a.x ^= b.x; a.y ^= b.y; a.z ^= b.z; a.w ^= b.w; }
|
||||
#endif
|
||||
|
||||
#if defined IS_CUDA || defined IS_HIP
|
||||
inline __device__ uint4 rotate (const uint4 a, const int n)
|
||||
DECLSPEC uint4 operator ^ (const uint4 a, const uint4 b)
|
||||
{
|
||||
uint4 r;
|
||||
|
||||
r.x = hc_rotl32_S (r.x, n);
|
||||
r.y = hc_rotl32_S (r.y, n);
|
||||
r.z = hc_rotl32_S (r.z, n);
|
||||
r.w = hc_rotl32_S (r.w, n);
|
||||
r.x = a.x ^ b.x;
|
||||
r.y = a.y ^ b.y;
|
||||
r.z = a.z ^ b.z;
|
||||
r.w = a.w ^ b.w;
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
@ -131,7 +131,7 @@ DECLSPEC u32x rotl32 (const u32x a, const int n)
|
||||
return __funnelshift_l(a, a, n);
|
||||
#endif
|
||||
|
||||
u32x t;
|
||||
u32x t = 0;
|
||||
|
||||
#if VECT_SIZE >= 2
|
||||
t.s0 = __funnelshift_l(a.s0, a.s0, n);
|
||||
@ -171,7 +171,7 @@ DECLSPEC u32x rotr32 (const u32x a, const int n)
|
||||
return __funnelshift_r(a, a, n);
|
||||
#endif
|
||||
|
||||
u32x t;
|
||||
u32x t = 0;
|
||||
|
||||
#if VECT_SIZE >= 2
|
||||
t.s0 = __funnelshift_r(a.s0, a.s0, n);
|
||||
|
@ -148,30 +148,20 @@ using namespace metal;
|
||||
#define HC_INLINE inline static
|
||||
#endif
|
||||
|
||||
#if defined IS_AMD && defined IS_GPU
|
||||
#define DECLSPEC HC_INLINE
|
||||
#elif defined IS_CUDA
|
||||
#define DECLSPEC __device__
|
||||
#elif defined IS_HIP
|
||||
#define DECLSPEC __device__
|
||||
#else
|
||||
#define DECLSPEC
|
||||
#endif
|
||||
|
||||
#if defined FIXED_LOCAL_SIZE
|
||||
#define HC_ATTR_SEQ FIXED_THREAD_COUNT((FIXED_LOCAL_SIZE))
|
||||
#else
|
||||
#if defined IS_AMD && defined IS_GPU
|
||||
#define HC_ATTR_SEQ
|
||||
#define DECLSPEC HC_INLINE
|
||||
#elif defined IS_HIP
|
||||
#define HC_ATTR_SEQ __launch_bounds__((MAX_THREADS_PER_BLOCK), 0)
|
||||
#define DECLSPEC __device__ HC_INLINE
|
||||
#elif defined IS_CUDA
|
||||
#define HC_ATTR_SEQ
|
||||
#define DECLSPEC
|
||||
#else
|
||||
#define HC_ATTR_SEQ
|
||||
#define DECLSPEC
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined IS_AMD && defined IS_GPU
|
||||
#define DECLSPEC HC_INLINE
|
||||
#elif defined IS_HIP
|
||||
#define DECLSPEC __device__ HC_INLINE
|
||||
#else
|
||||
#define DECLSPEC
|
||||
#endif
|
||||
|
||||
/**
|
||||
|
@ -21,7 +21,7 @@ typedef struct
|
||||
#define SCRYPT_TMP_ELEM 1
|
||||
#endif
|
||||
|
||||
uint4 P[SCRYPT_TMP_ELEM];
|
||||
u32 P[SCRYPT_TMP_ELEM];
|
||||
|
||||
} scrypt_tmp_t;
|
||||
|
||||
@ -31,9 +31,13 @@ KERNEL_FQ void HC_ATTR_SEQ m08900_init (KERN_ATTR_TMPS (scrypt_tmp_t))
|
||||
|
||||
if (gid >= GID_CNT) return;
|
||||
|
||||
scrypt_pbkdf2 (pws[gid].i, pws[gid].pw_len, salt_bufs[SALT_POS_HOST].salt_buf, salt_bufs[SALT_POS_HOST].salt_len, tmps[gid].P, SCRYPT_CNT * 4);
|
||||
u32 out[SCRYPT_CNT4];
|
||||
|
||||
scrypt_blockmix_in (tmps[gid].P, SCRYPT_CNT * 4);
|
||||
scrypt_pbkdf2_gg (pws[gid].i, pws[gid].pw_len, salt_bufs[SALT_POS_HOST].salt_buf, salt_bufs[SALT_POS_HOST].salt_len, out, SCRYPT_SZ);
|
||||
|
||||
scrypt_blockmix_in (out, SCRYPT_SZ);
|
||||
|
||||
for (u32 i = 0; i < SCRYPT_CNT4; i++) tmps[gid].P[i] = out[i];
|
||||
}
|
||||
|
||||
KERNEL_FQ void HC_ATTR_SEQ m08900_loop_prepare (KERN_ATTR_TMPS (scrypt_tmp_t))
|
||||
@ -45,27 +49,15 @@ KERNEL_FQ void HC_ATTR_SEQ m08900_loop_prepare (KERN_ATTR_TMPS (scrypt_tmp_t))
|
||||
|
||||
if (gid >= GID_CNT) return;
|
||||
|
||||
GLOBAL_AS uint4 *d_scrypt0_buf = (GLOBAL_AS uint4 *) d_extra0_buf;
|
||||
GLOBAL_AS uint4 *d_scrypt1_buf = (GLOBAL_AS uint4 *) d_extra1_buf;
|
||||
GLOBAL_AS uint4 *d_scrypt2_buf = (GLOBAL_AS uint4 *) d_extra2_buf;
|
||||
GLOBAL_AS uint4 *d_scrypt3_buf = (GLOBAL_AS uint4 *) d_extra3_buf;
|
||||
u32 X[STATE_CNT4];
|
||||
|
||||
#ifdef IS_HIP
|
||||
LOCAL_VK uint4 X_s[MAX_THREADS_PER_BLOCK][STATE_CNT4];
|
||||
LOCAL_AS uint4 *X = X_s[lid];
|
||||
#else
|
||||
uint4 X[STATE_CNT4];
|
||||
#endif
|
||||
GLOBAL_AS u32 *P = tmps[gid].P + (SALT_REPEAT * STATE_CNT4);
|
||||
|
||||
const u32 P_offset = SALT_REPEAT * STATE_CNT4;
|
||||
for (u32 z = 0; z < STATE_CNT4; z++) X[z] = P[z];
|
||||
|
||||
GLOBAL_AS uint4 *P = tmps[gid].P + P_offset;
|
||||
scrypt_smix_init (X, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, gid, lid, lsz, bid);
|
||||
|
||||
for (int z = 0; z < STATE_CNT4; z++) X[z] = P[z];
|
||||
|
||||
scrypt_smix_init (X, d_scrypt0_buf, d_scrypt1_buf, d_scrypt2_buf, d_scrypt3_buf, gid, lid, lsz, bid);
|
||||
|
||||
for (int z = 0; z < STATE_CNT4; z++) P[z] = X[z];
|
||||
for (u32 z = 0; z < STATE_CNT4; z++) P[z] = X[z];
|
||||
}
|
||||
|
||||
KERNEL_FQ void HC_ATTR_SEQ m08900_loop (KERN_ATTR_TMPS (scrypt_tmp_t))
|
||||
@ -77,29 +69,16 @@ KERNEL_FQ void HC_ATTR_SEQ m08900_loop (KERN_ATTR_TMPS (scrypt_tmp_t))
|
||||
|
||||
if (gid >= GID_CNT) return;
|
||||
|
||||
GLOBAL_AS uint4 *d_scrypt0_buf = (GLOBAL_AS uint4 *) d_extra0_buf;
|
||||
GLOBAL_AS uint4 *d_scrypt1_buf = (GLOBAL_AS uint4 *) d_extra1_buf;
|
||||
GLOBAL_AS uint4 *d_scrypt2_buf = (GLOBAL_AS uint4 *) d_extra2_buf;
|
||||
GLOBAL_AS uint4 *d_scrypt3_buf = (GLOBAL_AS uint4 *) d_extra3_buf;
|
||||
u32 X[STATE_CNT4];
|
||||
u32 T[STATE_CNT4];
|
||||
|
||||
uint4 X[STATE_CNT4];
|
||||
GLOBAL_AS u32 *P = tmps[gid].P + (SALT_REPEAT * STATE_CNT4);
|
||||
|
||||
#ifdef IS_HIP
|
||||
LOCAL_VK uint4 T_s[MAX_THREADS_PER_BLOCK][STATE_CNT4];
|
||||
LOCAL_AS uint4 *T = T_s[lid];
|
||||
#else
|
||||
uint4 T[STATE_CNT4];
|
||||
#endif
|
||||
for (u32 z = 0; z < STATE_CNT4; z++) X[z] = P[z];
|
||||
|
||||
const u32 P_offset = SALT_REPEAT * STATE_CNT4;
|
||||
scrypt_smix_loop (X, T, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, gid, lid, lsz, bid);
|
||||
|
||||
GLOBAL_AS uint4 *P = tmps[gid].P + P_offset;
|
||||
|
||||
for (int z = 0; z < STATE_CNT4; z++) X[z] = P[z];
|
||||
|
||||
scrypt_smix_loop (X, T, d_scrypt0_buf, d_scrypt1_buf, d_scrypt2_buf, d_scrypt3_buf, gid, lid, lsz, bid);
|
||||
|
||||
for (int z = 0; z < STATE_CNT4; z++) P[z] = X[z];
|
||||
for (u32 z = 0; z < STATE_CNT4; z++) P[z] = X[z];
|
||||
}
|
||||
|
||||
KERNEL_FQ void HC_ATTR_SEQ m08900_comp (KERN_ATTR_TMPS (scrypt_tmp_t))
|
||||
@ -108,14 +87,20 @@ KERNEL_FQ void HC_ATTR_SEQ m08900_comp (KERN_ATTR_TMPS (scrypt_tmp_t))
|
||||
|
||||
if (gid >= GID_CNT) return;
|
||||
|
||||
scrypt_blockmix_out (tmps[gid].P, SCRYPT_CNT * 4);
|
||||
u32 x[SCRYPT_CNT4];
|
||||
|
||||
scrypt_pbkdf2 (pws[gid].i, pws[gid].pw_len, (GLOBAL_AS const u32 *) tmps[gid].P, SCRYPT_CNT * 4, tmps[gid].P, 16);
|
||||
for (u32 i = 0; i < SCRYPT_CNT4; i++) x[i] = tmps[gid].P[i];
|
||||
|
||||
const u32 r0 = tmps[gid].P[0].x;
|
||||
const u32 r1 = tmps[gid].P[0].y;
|
||||
const u32 r2 = tmps[gid].P[0].z;
|
||||
const u32 r3 = tmps[gid].P[0].w;
|
||||
scrypt_blockmix_out (x, SCRYPT_SZ);
|
||||
|
||||
u32 out[4];
|
||||
|
||||
scrypt_pbkdf2_gp (pws[gid].i, pws[gid].pw_len, x, SCRYPT_SZ, out, 16);
|
||||
|
||||
const u32 r0 = out[0];
|
||||
const u32 r1 = out[1];
|
||||
const u32 r2 = out[2];
|
||||
const u32 r3 = out[3];
|
||||
|
||||
#define il_pos 0
|
||||
|
||||
|
@ -21,7 +21,7 @@ typedef struct
|
||||
#define SCRYPT_TMP_ELEM 1
|
||||
#endif
|
||||
|
||||
uint4 P[SCRYPT_TMP_ELEM];
|
||||
u32 P[SCRYPT_TMP_ELEM];
|
||||
|
||||
} scrypt_tmp_t;
|
||||
|
||||
@ -167,9 +167,13 @@ KERNEL_FQ void HC_ATTR_SEQ m15700_init (KERN_ATTR_TMPS_ESALT (scrypt_tmp_t, ethe
|
||||
|
||||
if (gid >= GID_CNT) return;
|
||||
|
||||
scrypt_pbkdf2 (pws[gid].i, pws[gid].pw_len, salt_bufs[SALT_POS_HOST].salt_buf, salt_bufs[SALT_POS_HOST].salt_len, tmps[gid].P, SCRYPT_CNT * 4);
|
||||
u32 out[SCRYPT_CNT4];
|
||||
|
||||
scrypt_blockmix_in (tmps[gid].P, SCRYPT_CNT * 4);
|
||||
scrypt_pbkdf2_gg (pws[gid].i, pws[gid].pw_len, salt_bufs[SALT_POS_HOST].salt_buf, salt_bufs[SALT_POS_HOST].salt_len, out, SCRYPT_SZ);
|
||||
|
||||
scrypt_blockmix_in (out, SCRYPT_SZ);
|
||||
|
||||
for (u32 i = 0; i < SCRYPT_CNT4; i++) tmps[gid].P[i] = out[i];
|
||||
}
|
||||
|
||||
KERNEL_FQ void HC_ATTR_SEQ m15700_loop_prepare (KERN_ATTR_TMPS (scrypt_tmp_t))
|
||||
@ -181,27 +185,15 @@ KERNEL_FQ void HC_ATTR_SEQ m15700_loop_prepare (KERN_ATTR_TMPS (scrypt_tmp_t))
|
||||
|
||||
if (gid >= GID_CNT) return;
|
||||
|
||||
GLOBAL_AS uint4 *d_scrypt0_buf = (GLOBAL_AS uint4 *) d_extra0_buf;
|
||||
GLOBAL_AS uint4 *d_scrypt1_buf = (GLOBAL_AS uint4 *) d_extra1_buf;
|
||||
GLOBAL_AS uint4 *d_scrypt2_buf = (GLOBAL_AS uint4 *) d_extra2_buf;
|
||||
GLOBAL_AS uint4 *d_scrypt3_buf = (GLOBAL_AS uint4 *) d_extra3_buf;
|
||||
u32 X[STATE_CNT4];
|
||||
|
||||
#ifdef IS_HIP
|
||||
LOCAL_VK uint4 X_s[MAX_THREADS_PER_BLOCK][STATE_CNT4];
|
||||
LOCAL_AS uint4 *X = X_s[lid];
|
||||
#else
|
||||
uint4 X[STATE_CNT4];
|
||||
#endif
|
||||
GLOBAL_AS u32 *P = tmps[gid].P + (SALT_REPEAT * STATE_CNT4);
|
||||
|
||||
const u32 P_offset = SALT_REPEAT * STATE_CNT4;
|
||||
for (u32 z = 0; z < STATE_CNT4; z++) X[z] = P[z];
|
||||
|
||||
GLOBAL_AS uint4 *P = tmps[gid].P + P_offset;
|
||||
scrypt_smix_init (X, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, gid, lid, lsz, bid);
|
||||
|
||||
for (int z = 0; z < STATE_CNT4; z++) X[z] = P[z];
|
||||
|
||||
scrypt_smix_init (X, d_scrypt0_buf, d_scrypt1_buf, d_scrypt2_buf, d_scrypt3_buf, gid, lid, lsz, bid);
|
||||
|
||||
for (int z = 0; z < STATE_CNT4; z++) P[z] = X[z];
|
||||
for (u32 z = 0; z < STATE_CNT4; z++) P[z] = X[z];
|
||||
}
|
||||
|
||||
KERNEL_FQ void HC_ATTR_SEQ m15700_loop (KERN_ATTR_TMPS (scrypt_tmp_t))
|
||||
@ -213,29 +205,16 @@ KERNEL_FQ void HC_ATTR_SEQ m15700_loop (KERN_ATTR_TMPS (scrypt_tmp_t))
|
||||
|
||||
if (gid >= GID_CNT) return;
|
||||
|
||||
GLOBAL_AS uint4 *d_scrypt0_buf = (GLOBAL_AS uint4 *) d_extra0_buf;
|
||||
GLOBAL_AS uint4 *d_scrypt1_buf = (GLOBAL_AS uint4 *) d_extra1_buf;
|
||||
GLOBAL_AS uint4 *d_scrypt2_buf = (GLOBAL_AS uint4 *) d_extra2_buf;
|
||||
GLOBAL_AS uint4 *d_scrypt3_buf = (GLOBAL_AS uint4 *) d_extra3_buf;
|
||||
u32 X[STATE_CNT4];
|
||||
u32 T[STATE_CNT4];
|
||||
|
||||
uint4 X[STATE_CNT4];
|
||||
GLOBAL_AS u32 *P = tmps[gid].P + (SALT_REPEAT * STATE_CNT4);
|
||||
|
||||
#ifdef IS_HIP
|
||||
LOCAL_VK uint4 T_s[MAX_THREADS_PER_BLOCK][STATE_CNT4];
|
||||
LOCAL_AS uint4 *T = T_s[lid];
|
||||
#else
|
||||
uint4 T[STATE_CNT4];
|
||||
#endif
|
||||
for (u32 z = 0; z < STATE_CNT4; z++) X[z] = P[z];
|
||||
|
||||
const u32 P_offset = SALT_REPEAT * STATE_CNT4;
|
||||
scrypt_smix_loop (X, T, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, gid, lid, lsz, bid);
|
||||
|
||||
GLOBAL_AS uint4 *P = tmps[gid].P + P_offset;
|
||||
|
||||
for (int z = 0; z < STATE_CNT4; z++) X[z] = P[z];
|
||||
|
||||
scrypt_smix_loop (X, T, d_scrypt0_buf, d_scrypt1_buf, d_scrypt2_buf, d_scrypt3_buf, gid, lid, lsz, bid);
|
||||
|
||||
for (int z = 0; z < STATE_CNT4; z++) P[z] = X[z];
|
||||
for (u32 z = 0; z < STATE_CNT4; z++) P[z] = X[z];
|
||||
}
|
||||
|
||||
KERNEL_FQ void HC_ATTR_SEQ m15700_comp (KERN_ATTR_TMPS_ESALT (scrypt_tmp_t, ethereum_scrypt_t))
|
||||
@ -244,9 +223,15 @@ KERNEL_FQ void HC_ATTR_SEQ m15700_comp (KERN_ATTR_TMPS_ESALT (scrypt_tmp_t, ethe
|
||||
|
||||
if (gid >= GID_CNT) return;
|
||||
|
||||
scrypt_blockmix_out (tmps[gid].P, SCRYPT_CNT * 4);
|
||||
u32 x[SCRYPT_CNT4];
|
||||
|
||||
scrypt_pbkdf2 (pws[gid].i, pws[gid].pw_len, (GLOBAL_AS const u32 *) tmps[gid].P, SCRYPT_CNT * 4, tmps[gid].P, 32);
|
||||
for (u32 i = 0; i < SCRYPT_CNT4; i++) x[i] = tmps[gid].P[i];
|
||||
|
||||
scrypt_blockmix_out (x, SCRYPT_SZ);
|
||||
|
||||
u32 out[8];
|
||||
|
||||
scrypt_pbkdf2_gp (pws[gid].i, pws[gid].pw_len, x, SCRYPT_SZ, out, 32);
|
||||
|
||||
/**
|
||||
* keccak
|
||||
@ -265,10 +250,10 @@ KERNEL_FQ void HC_ATTR_SEQ m15700_comp (KERN_ATTR_TMPS_ESALT (scrypt_tmp_t, ethe
|
||||
|
||||
u32 key[4];
|
||||
|
||||
key[0] = tmps[gid].P[1].x;
|
||||
key[1] = tmps[gid].P[1].y;
|
||||
key[2] = tmps[gid].P[1].z;
|
||||
key[3] = tmps[gid].P[1].w;
|
||||
key[0] = out[4];
|
||||
key[1] = out[5];
|
||||
key[2] = out[6];
|
||||
key[3] = out[7];
|
||||
|
||||
u64 st[25];
|
||||
|
||||
|
@ -22,7 +22,7 @@ typedef struct
|
||||
#define SCRYPT_TMP_ELEM 1
|
||||
#endif
|
||||
|
||||
uint4 P[SCRYPT_TMP_ELEM];
|
||||
u32 P[SCRYPT_TMP_ELEM];
|
||||
|
||||
} scrypt_tmp_t;
|
||||
|
||||
@ -96,23 +96,18 @@ KERNEL_FQ void HC_ATTR_SEQ m22700_init (KERN_ATTR_TMPS (scrypt_tmp_t))
|
||||
| ((w[j] << 8) & 0xff00ff00);
|
||||
}
|
||||
|
||||
sha256_hmac_ctx_t sha256_hmac_ctx;
|
||||
u32 s[16] = { 0 };
|
||||
|
||||
sha256_hmac_init_swap (&sha256_hmac_ctx, w, w_len);
|
||||
s[0] = hc_swap32_S (MULTIBIT_S0);
|
||||
s[1] = hc_swap32_S (MULTIBIT_S1);
|
||||
|
||||
u32 x0[4] = { 0 };
|
||||
u32 x1[4] = { 0 };
|
||||
u32 x2[4] = { 0 };
|
||||
u32 x3[4] = { 0 };
|
||||
u32 out[SCRYPT_CNT4];
|
||||
|
||||
x0[0] = MULTIBIT_S0;
|
||||
x0[1] = MULTIBIT_S1;
|
||||
scrypt_pbkdf2_pp (w, w_len, s, 8, out, SCRYPT_SZ);
|
||||
|
||||
sha256_hmac_update_64 (&sha256_hmac_ctx, x0, x1, x2, x3, 8);
|
||||
scrypt_blockmix_in (out, SCRYPT_SZ);
|
||||
|
||||
scrypt_pbkdf2_body (&sha256_hmac_ctx, tmps[gid].P, SCRYPT_CNT * 4);
|
||||
|
||||
scrypt_blockmix_in (tmps[gid].P, SCRYPT_CNT * 4);
|
||||
for (u32 i = 0; i < SCRYPT_CNT4; i++) tmps[gid].P[i] = out[i];
|
||||
}
|
||||
|
||||
KERNEL_FQ void HC_ATTR_SEQ m22700_loop_prepare (KERN_ATTR_TMPS (scrypt_tmp_t))
|
||||
@ -124,27 +119,15 @@ KERNEL_FQ void HC_ATTR_SEQ m22700_loop_prepare (KERN_ATTR_TMPS (scrypt_tmp_t))
|
||||
|
||||
if (gid >= GID_CNT) return;
|
||||
|
||||
GLOBAL_AS uint4 *d_scrypt0_buf = (GLOBAL_AS uint4 *) d_extra0_buf;
|
||||
GLOBAL_AS uint4 *d_scrypt1_buf = (GLOBAL_AS uint4 *) d_extra1_buf;
|
||||
GLOBAL_AS uint4 *d_scrypt2_buf = (GLOBAL_AS uint4 *) d_extra2_buf;
|
||||
GLOBAL_AS uint4 *d_scrypt3_buf = (GLOBAL_AS uint4 *) d_extra3_buf;
|
||||
u32 X[STATE_CNT4];
|
||||
|
||||
#ifdef IS_HIP
|
||||
LOCAL_VK uint4 X_s[MAX_THREADS_PER_BLOCK][STATE_CNT4];
|
||||
LOCAL_AS uint4 *X = X_s[lid];
|
||||
#else
|
||||
uint4 X[STATE_CNT4];
|
||||
#endif
|
||||
GLOBAL_AS u32 *P = tmps[gid].P + (SALT_REPEAT * STATE_CNT4);
|
||||
|
||||
const u32 P_offset = SALT_REPEAT * STATE_CNT4;
|
||||
for (u32 z = 0; z < STATE_CNT4; z++) X[z] = P[z];
|
||||
|
||||
GLOBAL_AS uint4 *P = tmps[gid].P + P_offset;
|
||||
scrypt_smix_init (X, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, gid, lid, lsz, bid);
|
||||
|
||||
for (int z = 0; z < STATE_CNT4; z++) X[z] = P[z];
|
||||
|
||||
scrypt_smix_init (X, d_scrypt0_buf, d_scrypt1_buf, d_scrypt2_buf, d_scrypt3_buf, gid, lid, lsz, bid);
|
||||
|
||||
for (int z = 0; z < STATE_CNT4; z++) P[z] = X[z];
|
||||
for (u32 z = 0; z < STATE_CNT4; z++) P[z] = X[z];
|
||||
}
|
||||
|
||||
KERNEL_FQ void HC_ATTR_SEQ m22700_loop (KERN_ATTR_TMPS (scrypt_tmp_t))
|
||||
@ -156,29 +139,16 @@ KERNEL_FQ void HC_ATTR_SEQ m22700_loop (KERN_ATTR_TMPS (scrypt_tmp_t))
|
||||
|
||||
if (gid >= GID_CNT) return;
|
||||
|
||||
GLOBAL_AS uint4 *d_scrypt0_buf = (GLOBAL_AS uint4 *) d_extra0_buf;
|
||||
GLOBAL_AS uint4 *d_scrypt1_buf = (GLOBAL_AS uint4 *) d_extra1_buf;
|
||||
GLOBAL_AS uint4 *d_scrypt2_buf = (GLOBAL_AS uint4 *) d_extra2_buf;
|
||||
GLOBAL_AS uint4 *d_scrypt3_buf = (GLOBAL_AS uint4 *) d_extra3_buf;
|
||||
u32 X[STATE_CNT4];
|
||||
u32 T[STATE_CNT4];
|
||||
|
||||
uint4 X[STATE_CNT4];
|
||||
GLOBAL_AS u32 *P = tmps[gid].P + (SALT_REPEAT * STATE_CNT4);
|
||||
|
||||
#ifdef IS_HIP
|
||||
LOCAL_VK uint4 T_s[MAX_THREADS_PER_BLOCK][STATE_CNT4];
|
||||
LOCAL_AS uint4 *T = T_s[lid];
|
||||
#else
|
||||
uint4 T[STATE_CNT4];
|
||||
#endif
|
||||
for (u32 z = 0; z < STATE_CNT4; z++) X[z] = P[z];
|
||||
|
||||
const u32 P_offset = SALT_REPEAT * STATE_CNT4;
|
||||
scrypt_smix_loop (X, T, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, gid, lid, lsz, bid);
|
||||
|
||||
GLOBAL_AS uint4 *P = tmps[gid].P + P_offset;
|
||||
|
||||
for (int z = 0; z < STATE_CNT4; z++) X[z] = P[z];
|
||||
|
||||
scrypt_smix_loop (X, T, d_scrypt0_buf, d_scrypt1_buf, d_scrypt2_buf, d_scrypt3_buf, gid, lid, lsz, bid);
|
||||
|
||||
for (int z = 0; z < STATE_CNT4; z++) P[z] = X[z];
|
||||
for (u32 z = 0; z < STATE_CNT4; z++) P[z] = X[z];
|
||||
}
|
||||
|
||||
KERNEL_FQ void HC_ATTR_SEQ m22700_comp (KERN_ATTR_TMPS (scrypt_tmp_t))
|
||||
@ -261,28 +231,29 @@ KERNEL_FQ void HC_ATTR_SEQ m22700_comp (KERN_ATTR_TMPS (scrypt_tmp_t))
|
||||
| ((w[j] << 8) & 0xff00ff00);
|
||||
}
|
||||
|
||||
scrypt_blockmix_out (tmps[gid].P, SCRYPT_CNT * 4);
|
||||
|
||||
sha256_hmac_ctx_t ctx;
|
||||
u32 x[SCRYPT_CNT4];
|
||||
|
||||
sha256_hmac_init_swap (&ctx, w, w_len);
|
||||
for (u32 i = 0; i < SCRYPT_CNT4; i++) x[i] = tmps[gid].P[i];
|
||||
|
||||
sha256_hmac_update_global_swap (&ctx, (GLOBAL_AS const u32 *) tmps[gid].P, SCRYPT_CNT * 4);
|
||||
scrypt_blockmix_out (x, SCRYPT_SZ);
|
||||
|
||||
scrypt_pbkdf2_body (&ctx, tmps[gid].P, 16);
|
||||
u32 out[8];
|
||||
|
||||
scrypt_pbkdf2_pp (w, w_len, x, SCRYPT_SZ, out, 32);
|
||||
|
||||
// AES256-CBC decrypt with IV from salt buffer (dynamic, alternative 1):
|
||||
|
||||
u32 key[8];
|
||||
|
||||
key[0] = tmps[gid].P[0].x;
|
||||
key[1] = tmps[gid].P[0].y;
|
||||
key[2] = tmps[gid].P[0].z;
|
||||
key[3] = tmps[gid].P[0].w;
|
||||
key[4] = tmps[gid].P[1].x;
|
||||
key[5] = tmps[gid].P[1].y;
|
||||
key[6] = tmps[gid].P[1].z;
|
||||
key[7] = tmps[gid].P[1].w;
|
||||
key[0] = out[0];
|
||||
key[1] = out[1];
|
||||
key[2] = out[2];
|
||||
key[3] = out[3];
|
||||
key[4] = out[4];
|
||||
key[5] = out[5];
|
||||
key[6] = out[6];
|
||||
key[7] = out[7];
|
||||
|
||||
#define KEYLEN 60
|
||||
|
||||
|
@ -22,7 +22,7 @@ typedef struct
|
||||
#define SCRYPT_TMP_ELEM 1
|
||||
#endif
|
||||
|
||||
uint4 P[SCRYPT_TMP_ELEM];
|
||||
u32 P[SCRYPT_TMP_ELEM];
|
||||
|
||||
} scrypt_tmp_t;
|
||||
|
||||
@ -171,17 +171,17 @@ KERNEL_FQ void HC_ATTR_SEQ m24000_init (KERN_ATTR_TMPS_ESALT (scrypt_tmp_t, best
|
||||
|
||||
if (gid >= GID_CNT) return;
|
||||
|
||||
scrypt_pbkdf2 (pws[gid].i, pws[gid].pw_len, salt_bufs[SALT_POS_HOST].salt_buf, salt_bufs[SALT_POS_HOST].salt_len, tmps[gid].P, SCRYPT_CNT * 4);
|
||||
u32 out[SCRYPT_CNT4];
|
||||
|
||||
scrypt_blockmix_in (tmps[gid].P, SCRYPT_CNT * 4);
|
||||
scrypt_pbkdf2_gg (pws[gid].i, pws[gid].pw_len, salt_bufs[SALT_POS_HOST].salt_buf, salt_bufs[SALT_POS_HOST].salt_len, out, SCRYPT_SZ);
|
||||
|
||||
scrypt_blockmix_in (out, SCRYPT_SZ);
|
||||
|
||||
for (u32 i = 0; i < SCRYPT_CNT4; i++) tmps[gid].P[i] = out[i];
|
||||
}
|
||||
|
||||
KERNEL_FQ void HC_ATTR_SEQ m24000_loop_prepare (KERN_ATTR_TMPS_ESALT (scrypt_tmp_t, bestcrypt_scrypt_t))
|
||||
{
|
||||
/**
|
||||
* base
|
||||
*/
|
||||
|
||||
const u64 gid = get_global_id (0);
|
||||
const u64 lid = get_local_id (0);
|
||||
const u64 lsz = get_local_size (0);
|
||||
@ -189,24 +189,15 @@ KERNEL_FQ void HC_ATTR_SEQ m24000_loop_prepare (KERN_ATTR_TMPS_ESALT (scrypt_tmp
|
||||
|
||||
if (gid >= GID_CNT) return;
|
||||
|
||||
// SCRYPT part, init V
|
||||
u32 X[STATE_CNT4];
|
||||
|
||||
GLOBAL_AS uint4 *d_scrypt0_buf = (GLOBAL_AS uint4 *) d_extra0_buf;
|
||||
GLOBAL_AS uint4 *d_scrypt1_buf = (GLOBAL_AS uint4 *) d_extra1_buf;
|
||||
GLOBAL_AS uint4 *d_scrypt2_buf = (GLOBAL_AS uint4 *) d_extra2_buf;
|
||||
GLOBAL_AS uint4 *d_scrypt3_buf = (GLOBAL_AS uint4 *) d_extra3_buf;
|
||||
GLOBAL_AS u32 *P = tmps[gid].P + (SALT_REPEAT * STATE_CNT4);
|
||||
|
||||
uint4 X[STATE_CNT4];
|
||||
for (u32 z = 0; z < STATE_CNT4; z++) X[z] = P[z];
|
||||
|
||||
const u32 P_offset = SALT_REPEAT * STATE_CNT4;
|
||||
scrypt_smix_init (X, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, gid, lid, lsz, bid);
|
||||
|
||||
GLOBAL_AS uint4 *P = tmps[gid].P + P_offset;
|
||||
|
||||
for (int z = 0; z < STATE_CNT4; z++) X[z] = P[z];
|
||||
|
||||
scrypt_smix_init (X, d_scrypt0_buf, d_scrypt1_buf, d_scrypt2_buf, d_scrypt3_buf, gid, lid, lsz, bid);
|
||||
|
||||
for (int z = 0; z < STATE_CNT4; z++) P[z] = X[z];
|
||||
for (u32 z = 0; z < STATE_CNT4; z++) P[z] = X[z];
|
||||
}
|
||||
|
||||
KERNEL_FQ void HC_ATTR_SEQ m24000_loop (KERN_ATTR_TMPS_ESALT (scrypt_tmp_t, bestcrypt_scrypt_t))
|
||||
@ -218,29 +209,16 @@ KERNEL_FQ void HC_ATTR_SEQ m24000_loop (KERN_ATTR_TMPS_ESALT (scrypt_tmp_t, best
|
||||
|
||||
if (gid >= GID_CNT) return;
|
||||
|
||||
GLOBAL_AS uint4 *d_scrypt0_buf = (GLOBAL_AS uint4 *) d_extra0_buf;
|
||||
GLOBAL_AS uint4 *d_scrypt1_buf = (GLOBAL_AS uint4 *) d_extra1_buf;
|
||||
GLOBAL_AS uint4 *d_scrypt2_buf = (GLOBAL_AS uint4 *) d_extra2_buf;
|
||||
GLOBAL_AS uint4 *d_scrypt3_buf = (GLOBAL_AS uint4 *) d_extra3_buf;
|
||||
u32 X[STATE_CNT4];
|
||||
u32 T[STATE_CNT4];
|
||||
|
||||
uint4 X[STATE_CNT4];
|
||||
GLOBAL_AS u32 *P = tmps[gid].P + (SALT_REPEAT * STATE_CNT4);
|
||||
|
||||
#ifdef IS_HIP
|
||||
LOCAL_VK uint4 T_s[MAX_THREADS_PER_BLOCK][STATE_CNT4];
|
||||
LOCAL_AS uint4 *T = T_s[lid];
|
||||
#else
|
||||
uint4 T[STATE_CNT4];
|
||||
#endif
|
||||
for (u32 z = 0; z < STATE_CNT4; z++) X[z] = P[z];
|
||||
|
||||
const u32 P_offset = SALT_REPEAT * STATE_CNT4;
|
||||
scrypt_smix_loop (X, T, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, gid, lid, lsz, bid);
|
||||
|
||||
GLOBAL_AS uint4 *P = tmps[gid].P + P_offset;
|
||||
|
||||
for (int z = 0; z < STATE_CNT4; z++) X[z] = P[z];
|
||||
|
||||
scrypt_smix_loop (X, T, d_scrypt0_buf, d_scrypt1_buf, d_scrypt2_buf, d_scrypt3_buf, gid, lid, lsz, bid);
|
||||
|
||||
for (int z = 0; z < STATE_CNT4; z++) P[z] = X[z];
|
||||
for (u32 z = 0; z < STATE_CNT4; z++) P[z] = X[z];
|
||||
}
|
||||
|
||||
KERNEL_FQ void HC_ATTR_SEQ m24000_comp (KERN_ATTR_TMPS_ESALT (scrypt_tmp_t, bestcrypt_scrypt_t))
|
||||
@ -305,9 +283,15 @@ KERNEL_FQ void HC_ATTR_SEQ m24000_comp (KERN_ATTR_TMPS_ESALT (scrypt_tmp_t, best
|
||||
|
||||
if (gid >= GID_CNT) return;
|
||||
|
||||
scrypt_blockmix_out (tmps[gid].P, SCRYPT_CNT * 4);
|
||||
u32 x[SCRYPT_CNT4];
|
||||
|
||||
scrypt_pbkdf2 (pws[gid].i, pws[gid].pw_len, (GLOBAL_AS const u32 *) tmps[gid].P, SCRYPT_CNT * 4, tmps[gid].P, 32);
|
||||
for (u32 i = 0; i < SCRYPT_CNT4; i++) x[i] = tmps[gid].P[i];
|
||||
|
||||
scrypt_blockmix_out (x, SCRYPT_SZ);
|
||||
|
||||
u32 out[8];
|
||||
|
||||
scrypt_pbkdf2_gp (pws[gid].i, pws[gid].pw_len, x, SCRYPT_SZ, out, 32);
|
||||
|
||||
u32 version = esalt_bufs[DIGESTS_OFFSET_HOST].version;
|
||||
|
||||
@ -317,14 +301,14 @@ KERNEL_FQ void HC_ATTR_SEQ m24000_comp (KERN_ATTR_TMPS_ESALT (scrypt_tmp_t, best
|
||||
|
||||
u32 key[8];
|
||||
|
||||
key[0] = tmps[gid].P[0].x;
|
||||
key[1] = tmps[gid].P[0].y;
|
||||
key[2] = tmps[gid].P[0].z;
|
||||
key[3] = tmps[gid].P[0].w;
|
||||
key[4] = tmps[gid].P[1].x;
|
||||
key[5] = tmps[gid].P[1].y;
|
||||
key[6] = tmps[gid].P[1].z;
|
||||
key[7] = tmps[gid].P[1].w;
|
||||
key[0] = out[0];
|
||||
key[1] = out[1];
|
||||
key[2] = out[2];
|
||||
key[3] = out[3];
|
||||
key[4] = out[4];
|
||||
key[5] = out[5];
|
||||
key[6] = out[6];
|
||||
key[7] = out[7];
|
||||
|
||||
if (version == 0x38) //0x38 is char for '8' which is the crypto type passed in position 3 of hash ( $08$ )
|
||||
{
|
||||
|
@ -19,7 +19,7 @@ typedef struct
|
||||
#define SCRYPT_TMP_ELEM 1
|
||||
#endif
|
||||
|
||||
uint4 P[SCRYPT_TMP_ELEM];
|
||||
u32 P[SCRYPT_TMP_ELEM];
|
||||
|
||||
} scrypt_tmp_t;
|
||||
|
||||
@ -46,23 +46,18 @@ KERNEL_FQ void HC_ATTR_SEQ m27700_init (KERN_ATTR_TMPS (scrypt_tmp_t))
|
||||
| ((w[j] << 8) & 0xff00ff00);
|
||||
}
|
||||
|
||||
sha256_hmac_ctx_t sha256_hmac_ctx;
|
||||
u32 s[16] = { 0 };
|
||||
|
||||
sha256_hmac_init_swap (&sha256_hmac_ctx, w, w_len);
|
||||
s[0] = hc_swap32_S (salt_bufs[SALT_POS_HOST].salt_buf[0]);
|
||||
s[1] = hc_swap32_S (salt_bufs[SALT_POS_HOST].salt_buf[1]);
|
||||
|
||||
u32 x0[4] = { 0 };
|
||||
u32 x1[4] = { 0 };
|
||||
u32 x2[4] = { 0 };
|
||||
u32 x3[4] = { 0 };
|
||||
u32 out[SCRYPT_CNT4];
|
||||
|
||||
x0[0] = salt_bufs[SALT_POS_HOST].salt_buf[0];
|
||||
x0[1] = salt_bufs[SALT_POS_HOST].salt_buf[1];
|
||||
scrypt_pbkdf2_pp (w, w_len, s, 8, out, SCRYPT_SZ);
|
||||
|
||||
sha256_hmac_update_64 (&sha256_hmac_ctx, x0, x1, x2, x3, 8);
|
||||
scrypt_blockmix_in (out, SCRYPT_SZ);
|
||||
|
||||
scrypt_pbkdf2_body (&sha256_hmac_ctx, tmps[gid].P, SCRYPT_CNT * 4);
|
||||
|
||||
scrypt_blockmix_in (tmps[gid].P, SCRYPT_CNT * 4);
|
||||
for (u32 i = 0; i < SCRYPT_CNT4; i++) tmps[gid].P[i] = out[i];
|
||||
}
|
||||
|
||||
KERNEL_FQ void HC_ATTR_SEQ m27700_loop_prepare (KERN_ATTR_TMPS (scrypt_tmp_t))
|
||||
@ -74,27 +69,15 @@ KERNEL_FQ void HC_ATTR_SEQ m27700_loop_prepare (KERN_ATTR_TMPS (scrypt_tmp_t))
|
||||
|
||||
if (gid >= GID_CNT) return;
|
||||
|
||||
GLOBAL_AS uint4 *d_scrypt0_buf = (GLOBAL_AS uint4 *) d_extra0_buf;
|
||||
GLOBAL_AS uint4 *d_scrypt1_buf = (GLOBAL_AS uint4 *) d_extra1_buf;
|
||||
GLOBAL_AS uint4 *d_scrypt2_buf = (GLOBAL_AS uint4 *) d_extra2_buf;
|
||||
GLOBAL_AS uint4 *d_scrypt3_buf = (GLOBAL_AS uint4 *) d_extra3_buf;
|
||||
u32 X[STATE_CNT4];
|
||||
|
||||
#ifdef IS_HIP
|
||||
LOCAL_VK uint4 X_s[MAX_THREADS_PER_BLOCK][STATE_CNT4];
|
||||
LOCAL_AS uint4 *X = X_s[lid];
|
||||
#else
|
||||
uint4 X[STATE_CNT4];
|
||||
#endif
|
||||
GLOBAL_AS u32 *P = tmps[gid].P + (SALT_REPEAT * STATE_CNT4);
|
||||
|
||||
const u32 P_offset = SALT_REPEAT * STATE_CNT4;
|
||||
for (u32 z = 0; z < STATE_CNT4; z++) X[z] = P[z];
|
||||
|
||||
GLOBAL_AS uint4 *P = tmps[gid].P + P_offset;
|
||||
scrypt_smix_init (X, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, gid, lid, lsz, bid);
|
||||
|
||||
for (int z = 0; z < STATE_CNT4; z++) X[z] = P[z];
|
||||
|
||||
scrypt_smix_init (X, d_scrypt0_buf, d_scrypt1_buf, d_scrypt2_buf, d_scrypt3_buf, gid, lid, lsz, bid);
|
||||
|
||||
for (int z = 0; z < STATE_CNT4; z++) P[z] = X[z];
|
||||
for (u32 z = 0; z < STATE_CNT4; z++) P[z] = X[z];
|
||||
}
|
||||
|
||||
KERNEL_FQ void HC_ATTR_SEQ m27700_loop (KERN_ATTR_TMPS (scrypt_tmp_t))
|
||||
@ -106,29 +89,16 @@ KERNEL_FQ void HC_ATTR_SEQ m27700_loop (KERN_ATTR_TMPS (scrypt_tmp_t))
|
||||
|
||||
if (gid >= GID_CNT) return;
|
||||
|
||||
GLOBAL_AS uint4 *d_scrypt0_buf = (GLOBAL_AS uint4 *) d_extra0_buf;
|
||||
GLOBAL_AS uint4 *d_scrypt1_buf = (GLOBAL_AS uint4 *) d_extra1_buf;
|
||||
GLOBAL_AS uint4 *d_scrypt2_buf = (GLOBAL_AS uint4 *) d_extra2_buf;
|
||||
GLOBAL_AS uint4 *d_scrypt3_buf = (GLOBAL_AS uint4 *) d_extra3_buf;
|
||||
u32 X[STATE_CNT4];
|
||||
u32 T[STATE_CNT4];
|
||||
|
||||
uint4 X[STATE_CNT4];
|
||||
GLOBAL_AS u32 *P = tmps[gid].P + (SALT_REPEAT * STATE_CNT4);
|
||||
|
||||
#ifdef IS_HIP
|
||||
LOCAL_VK uint4 T_s[MAX_THREADS_PER_BLOCK][STATE_CNT4];
|
||||
LOCAL_AS uint4 *T = T_s[lid];
|
||||
#else
|
||||
uint4 T[STATE_CNT4];
|
||||
#endif
|
||||
for (u32 z = 0; z < STATE_CNT4; z++) X[z] = P[z];
|
||||
|
||||
const u32 P_offset = SALT_REPEAT * STATE_CNT4;
|
||||
scrypt_smix_loop (X, T, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, gid, lid, lsz, bid);
|
||||
|
||||
GLOBAL_AS uint4 *P = tmps[gid].P + P_offset;
|
||||
|
||||
for (int z = 0; z < STATE_CNT4; z++) X[z] = P[z];
|
||||
|
||||
scrypt_smix_loop (X, T, d_scrypt0_buf, d_scrypt1_buf, d_scrypt2_buf, d_scrypt3_buf, gid, lid, lsz, bid);
|
||||
|
||||
for (int z = 0; z < STATE_CNT4; z++) P[z] = X[z];
|
||||
for (u32 z = 0; z < STATE_CNT4; z++) P[z] = X[z];
|
||||
}
|
||||
|
||||
KERNEL_FQ void HC_ATTR_SEQ m27700_comp (KERN_ATTR_TMPS (scrypt_tmp_t))
|
||||
@ -211,28 +181,29 @@ KERNEL_FQ void HC_ATTR_SEQ m27700_comp (KERN_ATTR_TMPS (scrypt_tmp_t))
|
||||
| ((w[j] << 8) & 0xff00ff00);
|
||||
}
|
||||
|
||||
scrypt_blockmix_out (tmps[gid].P, SCRYPT_CNT * 4);
|
||||
|
||||
sha256_hmac_ctx_t ctx;
|
||||
u32 x[SCRYPT_CNT4];
|
||||
|
||||
sha256_hmac_init_swap (&ctx, w, w_len);
|
||||
for (u32 i = 0; i < SCRYPT_CNT4; i++) x[i] = tmps[gid].P[i];
|
||||
|
||||
sha256_hmac_update_global_swap (&ctx, (GLOBAL_AS const u32 *) tmps[gid].P, SCRYPT_CNT * 4);
|
||||
scrypt_blockmix_out (x, SCRYPT_SZ);
|
||||
|
||||
scrypt_pbkdf2_body (&ctx, tmps[gid].P, 16);
|
||||
u32 out[8];
|
||||
|
||||
scrypt_pbkdf2_pp (w, w_len, x, SCRYPT_SZ, out, 32);
|
||||
|
||||
// AES256-CBC decrypt
|
||||
|
||||
u32 key[8];
|
||||
|
||||
key[0] = tmps[gid].P[0].x;
|
||||
key[1] = tmps[gid].P[0].y;
|
||||
key[2] = tmps[gid].P[0].z;
|
||||
key[3] = tmps[gid].P[0].w;
|
||||
key[4] = tmps[gid].P[1].x;
|
||||
key[5] = tmps[gid].P[1].y;
|
||||
key[6] = tmps[gid].P[1].z;
|
||||
key[7] = tmps[gid].P[1].w;
|
||||
key[0] = out[0];
|
||||
key[1] = out[1];
|
||||
key[2] = out[2];
|
||||
key[3] = out[3];
|
||||
key[4] = out[4];
|
||||
key[5] = out[5];
|
||||
key[6] = out[6];
|
||||
key[7] = out[7];
|
||||
|
||||
#define KEYLEN 60
|
||||
|
||||
|
@ -23,7 +23,7 @@ typedef struct exodus_tmp
|
||||
#define SCRYPT_TMP_ELEM 1
|
||||
#endif
|
||||
|
||||
uint4 P[SCRYPT_TMP_ELEM];
|
||||
u32 P[SCRYPT_TMP_ELEM];
|
||||
|
||||
} exodus_tmp_t;
|
||||
|
||||
@ -41,9 +41,13 @@ KERNEL_FQ void HC_ATTR_SEQ m28200_init (KERN_ATTR_TMPS_ESALT (exodus_tmp_t, exod
|
||||
|
||||
if (gid >= GID_CNT) return;
|
||||
|
||||
scrypt_pbkdf2 (pws[gid].i, pws[gid].pw_len, salt_bufs[SALT_POS_HOST].salt_buf, salt_bufs[SALT_POS_HOST].salt_len, tmps[gid].P, SCRYPT_CNT * 4);
|
||||
u32 out[SCRYPT_CNT4];
|
||||
|
||||
scrypt_blockmix_in (tmps[gid].P, SCRYPT_CNT * 4);
|
||||
scrypt_pbkdf2_gg (pws[gid].i, pws[gid].pw_len, salt_bufs[SALT_POS_HOST].salt_buf, salt_bufs[SALT_POS_HOST].salt_len, out, SCRYPT_SZ);
|
||||
|
||||
scrypt_blockmix_in (out, SCRYPT_SZ);
|
||||
|
||||
for (u32 i = 0; i < SCRYPT_CNT4; i++) tmps[gid].P[i] = out[i];
|
||||
}
|
||||
|
||||
KERNEL_FQ void HC_ATTR_SEQ m28200_loop_prepare (KERN_ATTR_TMPS_ESALT (exodus_tmp_t, exodus_t))
|
||||
@ -55,27 +59,15 @@ KERNEL_FQ void HC_ATTR_SEQ m28200_loop_prepare (KERN_ATTR_TMPS_ESALT (exodus_tmp
|
||||
|
||||
if (gid >= GID_CNT) return;
|
||||
|
||||
GLOBAL_AS uint4 *d_scrypt0_buf = (GLOBAL_AS uint4 *) d_extra0_buf;
|
||||
GLOBAL_AS uint4 *d_scrypt1_buf = (GLOBAL_AS uint4 *) d_extra1_buf;
|
||||
GLOBAL_AS uint4 *d_scrypt2_buf = (GLOBAL_AS uint4 *) d_extra2_buf;
|
||||
GLOBAL_AS uint4 *d_scrypt3_buf = (GLOBAL_AS uint4 *) d_extra3_buf;
|
||||
u32 X[STATE_CNT4];
|
||||
|
||||
#ifdef IS_HIP
|
||||
LOCAL_VK uint4 X_s[MAX_THREADS_PER_BLOCK][STATE_CNT4];
|
||||
LOCAL_AS uint4 *X = X_s[lid];
|
||||
#else
|
||||
uint4 X[STATE_CNT4];
|
||||
#endif
|
||||
GLOBAL_AS u32 *P = tmps[gid].P + (SALT_REPEAT * STATE_CNT4);
|
||||
|
||||
const u32 P_offset = SALT_REPEAT * STATE_CNT4;
|
||||
for (u32 z = 0; z < STATE_CNT4; z++) X[z] = P[z];
|
||||
|
||||
GLOBAL_AS uint4 *P = tmps[gid].P + P_offset;
|
||||
scrypt_smix_init (X, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, gid, lid, lsz, bid);
|
||||
|
||||
for (int z = 0; z < STATE_CNT4; z++) X[z] = P[z];
|
||||
|
||||
scrypt_smix_init (X, d_scrypt0_buf, d_scrypt1_buf, d_scrypt2_buf, d_scrypt3_buf, gid, lid, lsz, bid);
|
||||
|
||||
for (int z = 0; z < STATE_CNT4; z++) P[z] = X[z];
|
||||
for (u32 z = 0; z < STATE_CNT4; z++) P[z] = X[z];
|
||||
}
|
||||
|
||||
KERNEL_FQ void HC_ATTR_SEQ m28200_loop (KERN_ATTR_TMPS_ESALT (exodus_tmp_t, exodus_t))
|
||||
@ -87,29 +79,16 @@ KERNEL_FQ void HC_ATTR_SEQ m28200_loop (KERN_ATTR_TMPS_ESALT (exodus_tmp_t, exod
|
||||
|
||||
if (gid >= GID_CNT) return;
|
||||
|
||||
GLOBAL_AS uint4 *d_scrypt0_buf = (GLOBAL_AS uint4 *) d_extra0_buf;
|
||||
GLOBAL_AS uint4 *d_scrypt1_buf = (GLOBAL_AS uint4 *) d_extra1_buf;
|
||||
GLOBAL_AS uint4 *d_scrypt2_buf = (GLOBAL_AS uint4 *) d_extra2_buf;
|
||||
GLOBAL_AS uint4 *d_scrypt3_buf = (GLOBAL_AS uint4 *) d_extra3_buf;
|
||||
u32 X[STATE_CNT4];
|
||||
u32 T[STATE_CNT4];
|
||||
|
||||
uint4 X[STATE_CNT4];
|
||||
GLOBAL_AS u32 *P = tmps[gid].P + (SALT_REPEAT * STATE_CNT4);
|
||||
|
||||
#ifdef IS_HIP
|
||||
LOCAL_VK uint4 T_s[MAX_THREADS_PER_BLOCK][STATE_CNT4];
|
||||
LOCAL_AS uint4 *T = T_s[lid];
|
||||
#else
|
||||
uint4 T[STATE_CNT4];
|
||||
#endif
|
||||
for (u32 z = 0; z < STATE_CNT4; z++) X[z] = P[z];
|
||||
|
||||
const u32 P_offset = SALT_REPEAT * STATE_CNT4;
|
||||
scrypt_smix_loop (X, T, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, gid, lid, lsz, bid);
|
||||
|
||||
GLOBAL_AS uint4 *P = tmps[gid].P + P_offset;
|
||||
|
||||
for (int z = 0; z < STATE_CNT4; z++) X[z] = P[z];
|
||||
|
||||
scrypt_smix_loop (X, T, d_scrypt0_buf, d_scrypt1_buf, d_scrypt2_buf, d_scrypt3_buf, gid, lid, lsz, bid);
|
||||
|
||||
for (int z = 0; z < STATE_CNT4; z++) P[z] = X[z];
|
||||
for (u32 z = 0; z < STATE_CNT4; z++) P[z] = X[z];
|
||||
}
|
||||
|
||||
KERNEL_FQ void HC_ATTR_SEQ m28200_comp (KERN_ATTR_TMPS_ESALT (exodus_tmp_t, exodus_t))
|
||||
@ -171,22 +150,28 @@ KERNEL_FQ void HC_ATTR_SEQ m28200_comp (KERN_ATTR_TMPS_ESALT (exodus_tmp_t, exod
|
||||
|
||||
if (gid >= GID_CNT) return;
|
||||
|
||||
scrypt_blockmix_out (tmps[gid].P, SCRYPT_CNT * 4);
|
||||
u32 x[SCRYPT_CNT4];
|
||||
|
||||
scrypt_pbkdf2 (pws[gid].i, pws[gid].pw_len, (GLOBAL_AS const u32 *) tmps[gid].P, SCRYPT_CNT * 4, tmps[gid].P, 32);
|
||||
for (u32 i = 0; i < SCRYPT_CNT4; i++) x[i] = tmps[gid].P[i];
|
||||
|
||||
scrypt_blockmix_out (x, SCRYPT_SZ);
|
||||
|
||||
u32 out[8];
|
||||
|
||||
scrypt_pbkdf2_gp (pws[gid].i, pws[gid].pw_len, x, SCRYPT_SZ, out, 32);
|
||||
|
||||
// GCM stuff
|
||||
|
||||
u32 ukey[8];
|
||||
|
||||
ukey[0] = hc_swap32_S (tmps[gid].P[0].x);
|
||||
ukey[1] = hc_swap32_S (tmps[gid].P[0].y);
|
||||
ukey[2] = hc_swap32_S (tmps[gid].P[0].z);
|
||||
ukey[3] = hc_swap32_S (tmps[gid].P[0].w);
|
||||
ukey[4] = hc_swap32_S (tmps[gid].P[1].x);
|
||||
ukey[5] = hc_swap32_S (tmps[gid].P[1].y);
|
||||
ukey[6] = hc_swap32_S (tmps[gid].P[1].z);
|
||||
ukey[7] = hc_swap32_S (tmps[gid].P[1].w);
|
||||
ukey[0] = hc_swap32_S (out[0]);
|
||||
ukey[1] = hc_swap32_S (out[1]);
|
||||
ukey[2] = hc_swap32_S (out[2]);
|
||||
ukey[3] = hc_swap32_S (out[3]);
|
||||
ukey[4] = hc_swap32_S (out[4]);
|
||||
ukey[5] = hc_swap32_S (out[5]);
|
||||
ukey[6] = hc_swap32_S (out[6]);
|
||||
ukey[7] = hc_swap32_S (out[7]);
|
||||
|
||||
u32 key[60] = { 0 };
|
||||
u32 subKey[4] = { 0 };
|
||||
|
@ -19,7 +19,7 @@ typedef struct
|
||||
#define SCRYPT_TMP_ELEM 1
|
||||
#endif
|
||||
|
||||
uint4 P[SCRYPT_TMP_ELEM];
|
||||
u32 P[SCRYPT_TMP_ELEM];
|
||||
|
||||
} scrypt_tmp_t;
|
||||
|
||||
@ -46,23 +46,18 @@ KERNEL_FQ void HC_ATTR_SEQ m29800_init (KERN_ATTR_TMPS (scrypt_tmp_t))
|
||||
| ((w[j] << 8) & 0xff00ff00);
|
||||
}
|
||||
|
||||
sha256_hmac_ctx_t sha256_hmac_ctx;
|
||||
u32 s[16] = { 0 };
|
||||
|
||||
sha256_hmac_init_swap (&sha256_hmac_ctx, w, w_len);
|
||||
s[0] = hc_swap32_S (salt_bufs[SALT_POS_HOST].salt_buf[0]);
|
||||
s[1] = hc_swap32_S (salt_bufs[SALT_POS_HOST].salt_buf[1]);
|
||||
|
||||
u32 x0[4] = { 0 };
|
||||
u32 x1[4] = { 0 };
|
||||
u32 x2[4] = { 0 };
|
||||
u32 x3[4] = { 0 };
|
||||
u32 out[SCRYPT_CNT4];
|
||||
|
||||
x0[0] = salt_bufs[SALT_POS_HOST].salt_buf[0];
|
||||
x0[1] = salt_bufs[SALT_POS_HOST].salt_buf[1];
|
||||
scrypt_pbkdf2_pp (w, w_len, s, 8, out, SCRYPT_SZ);
|
||||
|
||||
sha256_hmac_update_64 (&sha256_hmac_ctx, x0, x1, x2, x3, 8);
|
||||
scrypt_blockmix_in (out, SCRYPT_SZ);
|
||||
|
||||
scrypt_pbkdf2_body (&sha256_hmac_ctx, tmps[gid].P, SCRYPT_CNT * 4);
|
||||
|
||||
scrypt_blockmix_in (tmps[gid].P, SCRYPT_CNT * 4);
|
||||
for (u32 i = 0; i < SCRYPT_CNT4; i++) tmps[gid].P[i] = out[i];
|
||||
}
|
||||
|
||||
KERNEL_FQ void HC_ATTR_SEQ m29800_loop_prepare (KERN_ATTR_TMPS (scrypt_tmp_t))
|
||||
@ -74,27 +69,15 @@ KERNEL_FQ void HC_ATTR_SEQ m29800_loop_prepare (KERN_ATTR_TMPS (scrypt_tmp_t))
|
||||
|
||||
if (gid >= GID_CNT) return;
|
||||
|
||||
GLOBAL_AS uint4 *d_scrypt0_buf = (GLOBAL_AS uint4 *) d_extra0_buf;
|
||||
GLOBAL_AS uint4 *d_scrypt1_buf = (GLOBAL_AS uint4 *) d_extra1_buf;
|
||||
GLOBAL_AS uint4 *d_scrypt2_buf = (GLOBAL_AS uint4 *) d_extra2_buf;
|
||||
GLOBAL_AS uint4 *d_scrypt3_buf = (GLOBAL_AS uint4 *) d_extra3_buf;
|
||||
u32 X[STATE_CNT4];
|
||||
|
||||
#ifdef IS_HIP
|
||||
LOCAL_VK uint4 X_s[MAX_THREADS_PER_BLOCK][STATE_CNT4];
|
||||
LOCAL_AS uint4 *X = X_s[lid];
|
||||
#else
|
||||
uint4 X[STATE_CNT4];
|
||||
#endif
|
||||
GLOBAL_AS u32 *P = tmps[gid].P + (SALT_REPEAT * STATE_CNT4);
|
||||
|
||||
const u32 P_offset = SALT_REPEAT * STATE_CNT4;
|
||||
for (u32 z = 0; z < STATE_CNT4; z++) X[z] = P[z];
|
||||
|
||||
GLOBAL_AS uint4 *P = tmps[gid].P + P_offset;
|
||||
scrypt_smix_init (X, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, gid, lid, lsz, bid);
|
||||
|
||||
for (int z = 0; z < STATE_CNT4; z++) X[z] = P[z];
|
||||
|
||||
scrypt_smix_init (X, d_scrypt0_buf, d_scrypt1_buf, d_scrypt2_buf, d_scrypt3_buf, gid, lid, lsz, bid);
|
||||
|
||||
for (int z = 0; z < STATE_CNT4; z++) P[z] = X[z];
|
||||
for (u32 z = 0; z < STATE_CNT4; z++) P[z] = X[z];
|
||||
}
|
||||
|
||||
KERNEL_FQ void HC_ATTR_SEQ m29800_loop (KERN_ATTR_TMPS (scrypt_tmp_t))
|
||||
@ -106,29 +89,16 @@ KERNEL_FQ void HC_ATTR_SEQ m29800_loop (KERN_ATTR_TMPS (scrypt_tmp_t))
|
||||
|
||||
if (gid >= GID_CNT) return;
|
||||
|
||||
GLOBAL_AS uint4 *d_scrypt0_buf = (GLOBAL_AS uint4 *) d_extra0_buf;
|
||||
GLOBAL_AS uint4 *d_scrypt1_buf = (GLOBAL_AS uint4 *) d_extra1_buf;
|
||||
GLOBAL_AS uint4 *d_scrypt2_buf = (GLOBAL_AS uint4 *) d_extra2_buf;
|
||||
GLOBAL_AS uint4 *d_scrypt3_buf = (GLOBAL_AS uint4 *) d_extra3_buf;
|
||||
u32 X[STATE_CNT4];
|
||||
u32 T[STATE_CNT4];
|
||||
|
||||
uint4 X[STATE_CNT4];
|
||||
GLOBAL_AS u32 *P = tmps[gid].P + (SALT_REPEAT * STATE_CNT4);
|
||||
|
||||
#ifdef IS_HIP
|
||||
LOCAL_VK uint4 T_s[MAX_THREADS_PER_BLOCK][STATE_CNT4];
|
||||
LOCAL_AS uint4 *T = T_s[lid];
|
||||
#else
|
||||
uint4 T[STATE_CNT4];
|
||||
#endif
|
||||
for (u32 z = 0; z < STATE_CNT4; z++) X[z] = P[z];
|
||||
|
||||
const u32 P_offset = SALT_REPEAT * STATE_CNT4;
|
||||
scrypt_smix_loop (X, T, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, gid, lid, lsz, bid);
|
||||
|
||||
GLOBAL_AS uint4 *P = tmps[gid].P + P_offset;
|
||||
|
||||
for (int z = 0; z < STATE_CNT4; z++) X[z] = P[z];
|
||||
|
||||
scrypt_smix_loop (X, T, d_scrypt0_buf, d_scrypt1_buf, d_scrypt2_buf, d_scrypt3_buf, gid, lid, lsz, bid);
|
||||
|
||||
for (int z = 0; z < STATE_CNT4; z++) P[z] = X[z];
|
||||
for (u32 z = 0; z < STATE_CNT4; z++) P[z] = X[z];
|
||||
}
|
||||
|
||||
KERNEL_FQ void HC_ATTR_SEQ m29800_comp (KERN_ATTR_TMPS (scrypt_tmp_t))
|
||||
@ -211,28 +181,29 @@ KERNEL_FQ void HC_ATTR_SEQ m29800_comp (KERN_ATTR_TMPS (scrypt_tmp_t))
|
||||
| ((w[j] << 8) & 0xff00ff00);
|
||||
}
|
||||
|
||||
scrypt_blockmix_out (tmps[gid].P, SCRYPT_CNT * 4);
|
||||
|
||||
sha256_hmac_ctx_t ctx;
|
||||
u32 x[SCRYPT_CNT4];
|
||||
|
||||
sha256_hmac_init_swap (&ctx, w, w_len);
|
||||
for (u32 i = 0; i < SCRYPT_CNT4; i++) x[i] = tmps[gid].P[i];
|
||||
|
||||
sha256_hmac_update_global_swap (&ctx, (GLOBAL_AS const u32 *) tmps[gid].P, SCRYPT_CNT * 4);
|
||||
scrypt_blockmix_out (x, SCRYPT_SZ);
|
||||
|
||||
scrypt_pbkdf2_body (&ctx, tmps[gid].P, 16);
|
||||
u32 out[8];
|
||||
|
||||
scrypt_pbkdf2_pp (w, w_len, x, SCRYPT_SZ, out, 32);
|
||||
|
||||
// AES256-CBC decrypt
|
||||
|
||||
u32 key[8];
|
||||
|
||||
key[0] = tmps[gid].P[0].x;
|
||||
key[1] = tmps[gid].P[0].y;
|
||||
key[2] = tmps[gid].P[0].z;
|
||||
key[3] = tmps[gid].P[0].w;
|
||||
key[4] = tmps[gid].P[1].x;
|
||||
key[5] = tmps[gid].P[1].y;
|
||||
key[6] = tmps[gid].P[1].z;
|
||||
key[7] = tmps[gid].P[1].w;
|
||||
key[0] = out[0];
|
||||
key[1] = out[1];
|
||||
key[2] = out[2];
|
||||
key[3] = out[3];
|
||||
key[4] = out[4];
|
||||
key[5] = out[5];
|
||||
key[6] = out[6];
|
||||
key[7] = out[7];
|
||||
|
||||
#define KEYLEN 60
|
||||
|
||||
|
@ -117,4 +117,7 @@ char *file_to_buffer (const char *filename);
|
||||
bool check_file_suffix (const char *file, const char *suffix);
|
||||
bool remove_file_suffix (char *file, const char *suffix);
|
||||
|
||||
int suppress_stderr (void);
|
||||
void restore_stderr (int saved_fd);
|
||||
|
||||
#endif // HC_SHARED_H
|
||||
|
@ -486,22 +486,13 @@ static bool opencl_test_instruction (hashcat_ctx_t *hashcat_ctx, cl_context cont
|
||||
OCL_PTR *ocl = (OCL_PTR *) backend_ctx->ocl;
|
||||
|
||||
#ifndef DEBUG
|
||||
const int fd_stderr = fileno (stderr);
|
||||
const int stderr_bak = dup (fd_stderr);
|
||||
#ifdef _WIN
|
||||
const int tmp = open ("NUL", O_WRONLY);
|
||||
#else
|
||||
const int tmp = open ("/dev/null", O_WRONLY);
|
||||
#endif
|
||||
dup2 (tmp, fd_stderr);
|
||||
close (tmp);
|
||||
int saved_stderr = suppress_stderr ();
|
||||
#endif
|
||||
|
||||
const int CL_rc = ocl->clBuildProgram (program, 1, &device, NULL, NULL, NULL);
|
||||
|
||||
#ifndef DEBUG
|
||||
dup2 (stderr_bak, fd_stderr);
|
||||
close (stderr_bak);
|
||||
restore_stderr (saved_stderr);
|
||||
#endif
|
||||
|
||||
if (CL_rc != CL_SUCCESS)
|
||||
@ -8984,8 +8975,7 @@ static bool load_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_p
|
||||
nvrtc_options[nvrtc_options_idx++] = "--std=c++14";
|
||||
}
|
||||
|
||||
nvrtc_options[nvrtc_options_idx++] = "--restrict";
|
||||
nvrtc_options[nvrtc_options_idx++] = "--device-as-default-execution-space";
|
||||
//nvrtc_options[nvrtc_options_idx++] = "--restrict";
|
||||
nvrtc_options[nvrtc_options_idx++] = "--gpu-architecture";
|
||||
|
||||
hc_asprintf (&nvrtc_options[nvrtc_options_idx++], "compute_%d", (device_param->sm_major * 10) + device_param->sm_minor);
|
||||
@ -9243,6 +9233,7 @@ static bool load_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_p
|
||||
|
||||
hc_asprintf (&hiprtc_options[hiprtc_options_idx++], "-D MAX_THREADS_PER_BLOCK=%d", (user_options->kernel_threads_chgd == true) ? user_options->kernel_threads : device_param->kernel_threads_max);
|
||||
hc_asprintf (&hiprtc_options[hiprtc_options_idx++], "--gpu-architecture=%s", device_param->gcnArchName);
|
||||
hc_asprintf (&hiprtc_options[hiprtc_options_idx++], "--gpu-max-threads-per-block=%d", (user_options->kernel_threads_chgd == true) ? user_options->kernel_threads : device_param->kernel_threads_max);
|
||||
|
||||
// untested but it should work
|
||||
#if defined (_WIN) || defined (__CYGWIN__) || defined (__MSYS__)
|
||||
@ -10598,6 +10589,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
|
||||
|
||||
build_options_len += snprintf (build_options_buf + build_options_len, build_options_sz - build_options_len, "-D XM2S(x)=#x ");
|
||||
build_options_len += snprintf (build_options_buf + build_options_len, build_options_sz - build_options_len, "-D M2S(x)=XM2S(x) ");
|
||||
build_options_len += snprintf (build_options_buf + build_options_len, build_options_sz - build_options_len, "-D MAX_THREADS_PER_BLOCK=%d ", (user_options->kernel_threads_chgd == true) ? user_options->kernel_threads : device_param->kernel_threads_max);
|
||||
|
||||
#if defined (__APPLE__)
|
||||
if (is_apple_silicon () == true)
|
||||
|
@ -195,9 +195,9 @@ typedef struct
|
||||
} python_interpreter_t;
|
||||
|
||||
#if defined (_WIN) || defined (__CYGWIN__) || defined (__APPLE__)
|
||||
static char *DEFAULT_SOURCE_FILENAME = "./Python/generic_hash_sp.py";
|
||||
static char *DEFAULT_SOURCE_FILENAME = "Python/generic_hash_sp.py";
|
||||
#else
|
||||
static char *DEFAULT_SOURCE_FILENAME = "./Python/generic_hash_mp.py";
|
||||
static char *DEFAULT_SOURCE_FILENAME = "Python/generic_hash_mp.py";
|
||||
#endif
|
||||
|
||||
const char *extract_module_name (const char *path)
|
||||
@ -227,78 +227,6 @@ const char *extract_module_name (const char *path)
|
||||
return module_name;
|
||||
}
|
||||
|
||||
char *load_source (const char *filename)
|
||||
{
|
||||
FILE *fp = fopen (filename, "r");
|
||||
|
||||
if (fp == NULL)
|
||||
{
|
||||
fprintf (stderr, "%s: %s\n", filename, strerror (errno));
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
fseek (fp, 0, SEEK_END);
|
||||
|
||||
const size_t size = ftell (fp);
|
||||
|
||||
fseek (fp, 0, SEEK_SET);
|
||||
|
||||
char *source = hcmalloc (size + 1);
|
||||
|
||||
if (fread (source, 1, size, fp) != size)
|
||||
{
|
||||
fprintf (stderr, "%s: %s\n", filename, strerror (errno));
|
||||
|
||||
hcfree (source);
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
source[size] = 0;
|
||||
|
||||
fclose (fp);
|
||||
|
||||
return source;
|
||||
}
|
||||
|
||||
#if defined (_WIN)
|
||||
#define DEVNULL "NUL"
|
||||
#else
|
||||
#define DEVNULL "/dev/null"
|
||||
#endif
|
||||
|
||||
static int suppress_stderr (void)
|
||||
{
|
||||
int null_fd = open (DEVNULL, O_WRONLY);
|
||||
|
||||
if (null_fd < 0) return -1;
|
||||
|
||||
int saved_fd = dup (fileno (stderr));
|
||||
|
||||
if (saved_fd < 0)
|
||||
{
|
||||
close (null_fd);
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
dup2 (null_fd, fileno (stderr));
|
||||
|
||||
close (null_fd);
|
||||
|
||||
return saved_fd;
|
||||
}
|
||||
|
||||
static void restore_stderr (int saved_fd)
|
||||
{
|
||||
if (saved_fd < 0) return;
|
||||
|
||||
dup2 (saved_fd, fileno (stderr));
|
||||
|
||||
close (saved_fd);
|
||||
}
|
||||
|
||||
static char *expand_pyenv_libpath (const char *prefix, const int maj, const int min)
|
||||
{
|
||||
char *out = NULL;
|
||||
@ -793,9 +721,14 @@ void *platform_init (user_options_t *user_options)
|
||||
|
||||
python_interpreter->source_filename = (user_options->bridge_parameter1 == NULL) ? DEFAULT_SOURCE_FILENAME : user_options->bridge_parameter1;
|
||||
|
||||
char *source = load_source (python_interpreter->source_filename);
|
||||
char *source = file_to_buffer (python_interpreter->source_filename);
|
||||
|
||||
if (source == NULL) return NULL;
|
||||
if (source == NULL)
|
||||
{
|
||||
fprintf (stderr, "ERROR: %s: %s\n\n", python_interpreter->source_filename, strerror (errno));
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
PyObject *code = python->Py_CompileStringExFlags (source, python_interpreter->source_filename, Py_file_input, NULL, -1);
|
||||
|
||||
|
@ -192,7 +192,7 @@ typedef struct
|
||||
|
||||
} python_interpreter_t;
|
||||
|
||||
static char *DEFAULT_SOURCE_FILENAME = "./Python/generic_hash_sp.py";
|
||||
static char *DEFAULT_SOURCE_FILENAME = "Python/generic_hash_sp.py";
|
||||
|
||||
const char *extract_module_name (const char *path)
|
||||
{
|
||||
@ -221,78 +221,6 @@ const char *extract_module_name (const char *path)
|
||||
return module_name;
|
||||
}
|
||||
|
||||
char *load_source (const char *filename)
|
||||
{
|
||||
FILE *fp = fopen (filename, "r");
|
||||
|
||||
if (fp == NULL)
|
||||
{
|
||||
fprintf (stderr, "%s: %s\n", filename, strerror (errno));
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
fseek (fp, 0, SEEK_END);
|
||||
|
||||
const size_t size = ftell (fp);
|
||||
|
||||
fseek (fp, 0, SEEK_SET);
|
||||
|
||||
char *source = hcmalloc (size + 1);
|
||||
|
||||
if (fread (source, 1, size, fp) != size)
|
||||
{
|
||||
fprintf (stderr, "%s: %s\n", filename, strerror (errno));
|
||||
|
||||
hcfree (source);
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
source[size] = 0;
|
||||
|
||||
fclose (fp);
|
||||
|
||||
return source;
|
||||
}
|
||||
|
||||
#if defined (_WIN)
|
||||
#define DEVNULL "NUL"
|
||||
#else
|
||||
#define DEVNULL "/dev/null"
|
||||
#endif
|
||||
|
||||
static int suppress_stderr (void)
|
||||
{
|
||||
int null_fd = open (DEVNULL, O_WRONLY);
|
||||
|
||||
if (null_fd < 0) return -1;
|
||||
|
||||
int saved_fd = dup (fileno (stderr));
|
||||
|
||||
if (saved_fd < 0)
|
||||
{
|
||||
close (null_fd);
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
dup2 (null_fd, fileno (stderr));
|
||||
|
||||
close (null_fd);
|
||||
|
||||
return saved_fd;
|
||||
}
|
||||
|
||||
static void restore_stderr (int saved_fd)
|
||||
{
|
||||
if (saved_fd < 0) return;
|
||||
|
||||
dup2 (saved_fd, fileno (stderr));
|
||||
|
||||
close (saved_fd);
|
||||
}
|
||||
|
||||
static char *expand_pyenv_libpath (const char *prefix, const int maj, const int min)
|
||||
{
|
||||
char *out = NULL;
|
||||
@ -843,7 +771,7 @@ bool thread_init (MAYBE_UNUSED void *platform_context, MAYBE_UNUSED hc_device_pa
|
||||
python->Py_DecRef (path);
|
||||
python->Py_DecRef (sys);
|
||||
|
||||
char *source = load_source (python_interpreter->source_filename);
|
||||
char *source = file_to_buffer (python_interpreter->source_filename);
|
||||
|
||||
if (source == NULL) return NULL;
|
||||
|
||||
@ -1154,9 +1082,14 @@ const char *st_update_hash (MAYBE_UNUSED void *platform_context)
|
||||
python->Py_DecRef (path);
|
||||
python->Py_DecRef (sys);
|
||||
|
||||
char *source = load_source (python_interpreter->source_filename);
|
||||
char *source = file_to_buffer (python_interpreter->source_filename);
|
||||
|
||||
if (source == NULL) return NULL;
|
||||
if (source == NULL)
|
||||
{
|
||||
fprintf (stderr, "ERROR: %s: %s\n\n", python_interpreter->source_filename, strerror (errno));
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
PyObject *code = python->Py_CompileStringExFlags (source, python_interpreter->source_filename, Py_file_input, NULL, -1);
|
||||
|
||||
@ -1218,7 +1151,7 @@ const char *st_update_pass (MAYBE_UNUSED void *platform_context)
|
||||
|
||||
// this is ugly to load that entire thing just to get that one variable
|
||||
|
||||
char *source = load_source (python_interpreter->source_filename);
|
||||
char *source = file_to_buffer (python_interpreter->source_filename);
|
||||
|
||||
if (source == NULL) return NULL;
|
||||
|
||||
|
@ -23,8 +23,7 @@ static const char *HASH_NAME = "SNMPv3 HMAC-MD5-96/HMAC-SHA1-96";
|
||||
static const u64 KERN_TYPE = 25000;
|
||||
static const u32 OPTI_TYPE = OPTI_TYPE_ZERO_BYTE;
|
||||
static const u64 OPTS_TYPE = OPTS_TYPE_STOCK_MODULE
|
||||
| OPTS_TYPE_PT_GENERATE_LE
|
||||
| OPTS_TYPE_MP_MULTI_DISABLE;
|
||||
| OPTS_TYPE_PT_GENERATE_LE;
|
||||
static const u32 SALT_TYPE = SALT_TYPE_EMBEDDED;
|
||||
static const char *ST_PASS = "hashcat1";
|
||||
static const char *ST_HASH = "$SNMPv3$0$45889431$30818f0201033011020409242fc0020300ffe304010102010304383036041180001f88808106d566db57fd600000000002011002020118040a6d61747269785f4d4435040c0000000000000000000000000400303d041180001f88808106d566db57fd60000000000400a226020411f319300201000201003018301606082b06010201010200060a2b06010401bf0803020a$80001f88808106d566db57fd6000000000$1b37c3ea872731f922959e90";
|
||||
@ -118,6 +117,24 @@ u64 module_tmp_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED c
|
||||
return tmp_size;
|
||||
}
|
||||
|
||||
u32 module_kernel_accel_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
|
||||
{
|
||||
// due to the large tmps structure
|
||||
|
||||
const u32 kernel_accel_max = 256;
|
||||
|
||||
return kernel_accel_max;
|
||||
}
|
||||
|
||||
u32 module_kernel_threads_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
|
||||
{
|
||||
// due to the large tmps structure
|
||||
|
||||
const u32 kernel_threads_max = 32;
|
||||
|
||||
return kernel_threads_max;
|
||||
}
|
||||
|
||||
u32 module_kernel_loops_min (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
|
||||
{
|
||||
// we need to fix iteration count to guarantee the loop count is a multiple of 64
|
||||
@ -337,11 +354,11 @@ void module_init (module_ctx_t *module_ctx)
|
||||
module_ctx->module_hook_size = MODULE_DEFAULT;
|
||||
module_ctx->module_jit_build_options = MODULE_DEFAULT;
|
||||
module_ctx->module_jit_cache_disable = MODULE_DEFAULT;
|
||||
module_ctx->module_kernel_accel_max = MODULE_DEFAULT;
|
||||
module_ctx->module_kernel_accel_max = module_kernel_accel_max;
|
||||
module_ctx->module_kernel_accel_min = MODULE_DEFAULT;
|
||||
module_ctx->module_kernel_loops_max = module_kernel_loops_max;
|
||||
module_ctx->module_kernel_loops_min = module_kernel_loops_min;
|
||||
module_ctx->module_kernel_threads_max = MODULE_DEFAULT;
|
||||
module_ctx->module_kernel_threads_max = module_kernel_threads_max;
|
||||
module_ctx->module_kernel_threads_min = MODULE_DEFAULT;
|
||||
module_ctx->module_kern_type = module_kern_type;
|
||||
module_ctx->module_kern_type_dynamic = MODULE_DEFAULT;
|
||||
|
@ -23,8 +23,7 @@ static const char *HASH_NAME = "SNMPv3 HMAC-MD5-96";
|
||||
static const u64 KERN_TYPE = 25100;
|
||||
static const u32 OPTI_TYPE = OPTI_TYPE_ZERO_BYTE;
|
||||
static const u64 OPTS_TYPE = OPTS_TYPE_STOCK_MODULE
|
||||
| OPTS_TYPE_PT_GENERATE_LE
|
||||
| OPTS_TYPE_MP_MULTI_DISABLE;
|
||||
| OPTS_TYPE_PT_GENERATE_LE;
|
||||
static const u32 SALT_TYPE = SALT_TYPE_EMBEDDED;
|
||||
static const char *ST_PASS = "hashcat1";
|
||||
static const char *ST_HASH = "$SNMPv3$1$45889431$30818f0201033011020409242fc0020300ffe304010102010304383036041180001f88808106d566db57fd600000000002011002020118040a6d61747269785f4d4435040c0000000000000000000000000400303d041180001f88808106d566db57fd60000000000400a226020411f319300201000201003018301606082b06010201010200060a2b06010401bf0803020a$80001f88808106d566db57fd6000000000$1b37c3ea872731f922959e90";
|
||||
@ -114,6 +113,24 @@ u64 module_tmp_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED c
|
||||
return tmp_size;
|
||||
}
|
||||
|
||||
u32 module_kernel_accel_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
|
||||
{
|
||||
// due to the large tmps structure
|
||||
|
||||
const u32 kernel_accel_max = 256;
|
||||
|
||||
return kernel_accel_max;
|
||||
}
|
||||
|
||||
u32 module_kernel_threads_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
|
||||
{
|
||||
// due to the large tmps structure
|
||||
|
||||
const u32 kernel_threads_max = 32;
|
||||
|
||||
return kernel_threads_max;
|
||||
}
|
||||
|
||||
u32 module_kernel_loops_min (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
|
||||
{
|
||||
// we need to fix iteration count to guarantee the loop count is a multiple of 64
|
||||
@ -318,11 +335,11 @@ void module_init (module_ctx_t *module_ctx)
|
||||
module_ctx->module_hook_size = MODULE_DEFAULT;
|
||||
module_ctx->module_jit_build_options = MODULE_DEFAULT;
|
||||
module_ctx->module_jit_cache_disable = MODULE_DEFAULT;
|
||||
module_ctx->module_kernel_accel_max = MODULE_DEFAULT;
|
||||
module_ctx->module_kernel_accel_max = module_kernel_accel_max;
|
||||
module_ctx->module_kernel_accel_min = MODULE_DEFAULT;
|
||||
module_ctx->module_kernel_loops_max = module_kernel_loops_max;
|
||||
module_ctx->module_kernel_loops_min = module_kernel_loops_min;
|
||||
module_ctx->module_kernel_threads_max = MODULE_DEFAULT;
|
||||
module_ctx->module_kernel_threads_max = module_kernel_threads_max;
|
||||
module_ctx->module_kernel_threads_min = MODULE_DEFAULT;
|
||||
module_ctx->module_kern_type = module_kern_type;
|
||||
module_ctx->module_kern_type_dynamic = MODULE_DEFAULT;
|
||||
|
@ -23,8 +23,7 @@ static const char *HASH_NAME = "SNMPv3 HMAC-SHA1-96";
|
||||
static const u64 KERN_TYPE = 25200;
|
||||
static const u32 OPTI_TYPE = OPTI_TYPE_ZERO_BYTE;
|
||||
static const u64 OPTS_TYPE = OPTS_TYPE_STOCK_MODULE
|
||||
| OPTS_TYPE_PT_GENERATE_LE
|
||||
| OPTS_TYPE_MP_MULTI_DISABLE;
|
||||
| OPTS_TYPE_PT_GENERATE_LE;
|
||||
static const u32 SALT_TYPE = SALT_TYPE_EMBEDDED;
|
||||
static const char *ST_PASS = "hashcat1";
|
||||
static const char *ST_HASH = "$SNMPv3$2$45889431$30818f02010330110204371780f3020300ffe304010102010304383036041180001f88808106d566db57fd600000000002011002020118040a6d61747269785f534841040c0000000000000000000000000400303d041180001f88808106d566db57fd60000000000400a2260204073557d50201000201003018301606082b06010201010200060a2b06010401bf0803020a$80001f88808106d566db57fd6000000000$81f14f1930589f26f6755f6b";
|
||||
@ -114,6 +113,24 @@ u64 module_tmp_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED c
|
||||
return tmp_size;
|
||||
}
|
||||
|
||||
u32 module_kernel_accel_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
|
||||
{
|
||||
// due to the large tmps structure
|
||||
|
||||
const u32 kernel_accel_max = 256;
|
||||
|
||||
return kernel_accel_max;
|
||||
}
|
||||
|
||||
u32 module_kernel_threads_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
|
||||
{
|
||||
// due to the large tmps structure
|
||||
|
||||
const u32 kernel_threads_max = 32;
|
||||
|
||||
return kernel_threads_max;
|
||||
}
|
||||
|
||||
u32 module_kernel_loops_min (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
|
||||
{
|
||||
// we need to fix iteration count to guarantee the loop count is a multiple of 64
|
||||
@ -329,11 +346,11 @@ void module_init (module_ctx_t *module_ctx)
|
||||
module_ctx->module_hook_size = MODULE_DEFAULT;
|
||||
module_ctx->module_jit_build_options = MODULE_DEFAULT;
|
||||
module_ctx->module_jit_cache_disable = MODULE_DEFAULT;
|
||||
module_ctx->module_kernel_accel_max = MODULE_DEFAULT;
|
||||
module_ctx->module_kernel_accel_max = module_kernel_accel_max;
|
||||
module_ctx->module_kernel_accel_min = MODULE_DEFAULT;
|
||||
module_ctx->module_kernel_loops_max = module_kernel_loops_max;
|
||||
module_ctx->module_kernel_loops_min = module_kernel_loops_min;
|
||||
module_ctx->module_kernel_threads_max = MODULE_DEFAULT;
|
||||
module_ctx->module_kernel_threads_max = module_kernel_threads_max;
|
||||
module_ctx->module_kernel_threads_min = MODULE_DEFAULT;
|
||||
module_ctx->module_kern_type = module_kern_type;
|
||||
module_ctx->module_kern_type_dynamic = MODULE_DEFAULT;
|
||||
|
@ -115,6 +115,24 @@ u64 module_tmp_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED c
|
||||
return tmp_size;
|
||||
}
|
||||
|
||||
u32 module_kernel_accel_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
|
||||
{
|
||||
// due to the large tmps structure
|
||||
|
||||
const u32 kernel_accel_max = 256;
|
||||
|
||||
return kernel_accel_max;
|
||||
}
|
||||
|
||||
u32 module_kernel_threads_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
|
||||
{
|
||||
// due to the large tmps structure
|
||||
|
||||
const u32 kernel_threads_max = 32;
|
||||
|
||||
return kernel_threads_max;
|
||||
}
|
||||
|
||||
u32 module_kernel_loops_min (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
|
||||
{
|
||||
// we need to fix iteration count to guarantee the loop count is a multiple of 64
|
||||
@ -332,11 +350,11 @@ void module_init (module_ctx_t *module_ctx)
|
||||
module_ctx->module_hook_size = MODULE_DEFAULT;
|
||||
module_ctx->module_jit_build_options = MODULE_DEFAULT;
|
||||
module_ctx->module_jit_cache_disable = MODULE_DEFAULT;
|
||||
module_ctx->module_kernel_accel_max = MODULE_DEFAULT;
|
||||
module_ctx->module_kernel_accel_max = module_kernel_accel_max;
|
||||
module_ctx->module_kernel_accel_min = MODULE_DEFAULT;
|
||||
module_ctx->module_kernel_loops_max = module_kernel_loops_max;
|
||||
module_ctx->module_kernel_loops_min = module_kernel_loops_min;
|
||||
module_ctx->module_kernel_threads_max = MODULE_DEFAULT;
|
||||
module_ctx->module_kernel_threads_max = module_kernel_threads_max;
|
||||
module_ctx->module_kernel_threads_min = MODULE_DEFAULT;
|
||||
module_ctx->module_kern_type = module_kern_type;
|
||||
module_ctx->module_kern_type_dynamic = MODULE_DEFAULT;
|
||||
|
@ -115,6 +115,24 @@ u64 module_tmp_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED c
|
||||
return tmp_size;
|
||||
}
|
||||
|
||||
u32 module_kernel_accel_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
|
||||
{
|
||||
// due to the large tmps structure
|
||||
|
||||
const u32 kernel_accel_max = 256;
|
||||
|
||||
return kernel_accel_max;
|
||||
}
|
||||
|
||||
u32 module_kernel_threads_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
|
||||
{
|
||||
// due to the large tmps structure
|
||||
|
||||
const u32 kernel_threads_max = 32;
|
||||
|
||||
return kernel_threads_max;
|
||||
}
|
||||
|
||||
u32 module_kernel_loops_min (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
|
||||
{
|
||||
// we need to fix iteration count to guarantee the loop count is a multiple of 64
|
||||
@ -340,11 +358,11 @@ void module_init (module_ctx_t *module_ctx)
|
||||
module_ctx->module_hook_size = MODULE_DEFAULT;
|
||||
module_ctx->module_jit_build_options = MODULE_DEFAULT;
|
||||
module_ctx->module_jit_cache_disable = MODULE_DEFAULT;
|
||||
module_ctx->module_kernel_accel_max = MODULE_DEFAULT;
|
||||
module_ctx->module_kernel_accel_max = module_kernel_accel_max;
|
||||
module_ctx->module_kernel_accel_min = MODULE_DEFAULT;
|
||||
module_ctx->module_kernel_loops_max = module_kernel_loops_max;
|
||||
module_ctx->module_kernel_loops_min = module_kernel_loops_min;
|
||||
module_ctx->module_kernel_threads_max = MODULE_DEFAULT;
|
||||
module_ctx->module_kernel_threads_max = module_kernel_threads_max;
|
||||
module_ctx->module_kernel_threads_min = MODULE_DEFAULT;
|
||||
module_ctx->module_kern_type = module_kern_type;
|
||||
module_ctx->module_kern_type_dynamic = MODULE_DEFAULT;
|
||||
|
@ -149,6 +149,24 @@ u64 module_tmp_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED c
|
||||
return tmp_size;
|
||||
}
|
||||
|
||||
u32 module_kernel_accel_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
|
||||
{
|
||||
// due to the large tmps structure
|
||||
|
||||
const u32 kernel_accel_max = 256;
|
||||
|
||||
return kernel_accel_max;
|
||||
}
|
||||
|
||||
u32 module_kernel_threads_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
|
||||
{
|
||||
// due to the large tmps structure
|
||||
|
||||
const u32 kernel_threads_max = 32;
|
||||
|
||||
return kernel_threads_max;
|
||||
}
|
||||
|
||||
u32 module_kernel_loops_min (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
|
||||
{
|
||||
// we need to fix iteration count to guarantee the loop count is a multiple of SNMPV3_MAX_PW_LENGTH
|
||||
@ -383,11 +401,11 @@ void module_init (module_ctx_t *module_ctx)
|
||||
module_ctx->module_hook_size = MODULE_DEFAULT;
|
||||
module_ctx->module_jit_build_options = module_jit_build_options;
|
||||
module_ctx->module_jit_cache_disable = MODULE_DEFAULT;
|
||||
module_ctx->module_kernel_accel_max = MODULE_DEFAULT;
|
||||
module_ctx->module_kernel_accel_max = module_kernel_accel_max;
|
||||
module_ctx->module_kernel_accel_min = MODULE_DEFAULT;
|
||||
module_ctx->module_kernel_loops_max = module_kernel_loops_max;
|
||||
module_ctx->module_kernel_loops_min = module_kernel_loops_min;
|
||||
module_ctx->module_kernel_threads_max = MODULE_DEFAULT;
|
||||
module_ctx->module_kernel_threads_max = module_kernel_threads_max;
|
||||
module_ctx->module_kernel_threads_min = MODULE_DEFAULT;
|
||||
module_ctx->module_kern_type = module_kern_type;
|
||||
module_ctx->module_kern_type_dynamic = MODULE_DEFAULT;
|
||||
|
@ -149,6 +149,24 @@ u64 module_tmp_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED c
|
||||
return tmp_size;
|
||||
}
|
||||
|
||||
u32 module_kernel_accel_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
|
||||
{
|
||||
// due to the large tmps structure
|
||||
|
||||
const u32 kernel_accel_max = 256;
|
||||
|
||||
return kernel_accel_max;
|
||||
}
|
||||
|
||||
u32 module_kernel_threads_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
|
||||
{
|
||||
// due to the large tmps structure
|
||||
|
||||
const u32 kernel_threads_max = 32;
|
||||
|
||||
return kernel_threads_max;
|
||||
}
|
||||
|
||||
u32 module_kernel_loops_min (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
|
||||
{
|
||||
// we need to fix iteration count to guarantee the loop count is a multiple of SNMPV3_MAX_PW_LENGTH
|
||||
@ -379,11 +397,11 @@ void module_init (module_ctx_t *module_ctx)
|
||||
module_ctx->module_hook_size = MODULE_DEFAULT;
|
||||
module_ctx->module_jit_build_options = module_jit_build_options;
|
||||
module_ctx->module_jit_cache_disable = MODULE_DEFAULT;
|
||||
module_ctx->module_kernel_accel_max = MODULE_DEFAULT;
|
||||
module_ctx->module_kernel_accel_max = module_kernel_accel_max;
|
||||
module_ctx->module_kernel_accel_min = MODULE_DEFAULT;
|
||||
module_ctx->module_kernel_loops_max = module_kernel_loops_max;
|
||||
module_ctx->module_kernel_loops_min = module_kernel_loops_min;
|
||||
module_ctx->module_kernel_threads_max = MODULE_DEFAULT;
|
||||
module_ctx->module_kernel_threads_max = module_kernel_threads_max;
|
||||
module_ctx->module_kernel_threads_min = MODULE_DEFAULT;
|
||||
module_ctx->module_kern_type = module_kern_type;
|
||||
module_ctx->module_kern_type_dynamic = MODULE_DEFAULT;
|
||||
|
@ -54,10 +54,10 @@ const char *scrypt_module_extra_tuningdb_block (MAYBE_UNUSED const hashconfig_t
|
||||
|
||||
const u32 scrypt_N = (hashes->salts_buf[0].scrypt_N == 0) ? hashes->st_salts_buf[0].scrypt_N : hashes->salts_buf[0].scrypt_N;
|
||||
const u32 scrypt_r = (hashes->salts_buf[0].scrypt_r == 0) ? hashes->st_salts_buf[0].scrypt_r : hashes->salts_buf[0].scrypt_r;
|
||||
const u32 scrypt_p = (hashes->salts_buf[0].scrypt_p == 0) ? hashes->st_salts_buf[0].scrypt_p : hashes->salts_buf[0].scrypt_p;
|
||||
//const u32 scrypt_p = (hashes->salts_buf[0].scrypt_p == 0) ? hashes->st_salts_buf[0].scrypt_p : hashes->salts_buf[0].scrypt_p;
|
||||
|
||||
const u64 size_per_accel = (128ULL * scrypt_r * scrypt_N * scrypt_exptected_threads (hashconfig, user_options, user_options_extra, device_param));
|
||||
const u64 state_per_accel = (128ULL * scrypt_r * scrypt_p * scrypt_exptected_threads (hashconfig, user_options, user_options_extra, device_param));
|
||||
//const u64 state_per_accel = (128ULL * scrypt_r * scrypt_p * scrypt_exptected_threads (hashconfig, user_options, user_options_extra, device_param));
|
||||
|
||||
int lines_sz = 4096;
|
||||
char *lines_buf = hcmalloc (lines_sz);
|
||||
@ -65,7 +65,7 @@ const char *scrypt_module_extra_tuningdb_block (MAYBE_UNUSED const hashconfig_t
|
||||
|
||||
const u32 device_processors = device_param->device_processors;
|
||||
|
||||
const u32 device_local_mem_size = device_param->device_local_mem_size;
|
||||
//const u32 device_local_mem_size = device_param->device_local_mem_size;
|
||||
|
||||
const u64 fixed_mem = (512 * 1024 * 1024); // some storage we need for pws[], tmps[], and others
|
||||
|
||||
@ -160,21 +160,6 @@ const char *scrypt_module_extra_tuningdb_block (MAYBE_UNUSED const hashconfig_t
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
if (device_param->is_hip == true)
|
||||
{
|
||||
// we use some local memory to speed up things, so
|
||||
// we need to make sure there's enough local memory available
|
||||
|
||||
u64 state_per_accel_tmto = state_per_accel >> tmto;
|
||||
|
||||
while (state_per_accel_tmto > device_local_mem_size)
|
||||
{
|
||||
tmto++;
|
||||
|
||||
state_per_accel_tmto = state_per_accel >> tmto;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -301,7 +286,7 @@ char *scrypt_module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconf
|
||||
scrypt_r,
|
||||
scrypt_p,
|
||||
tmto,
|
||||
tmp_size / 16);
|
||||
tmp_size / 4);
|
||||
|
||||
return jit_build_options;
|
||||
}
|
||||
|
37
src/shared.c
37
src/shared.c
@ -1582,3 +1582,40 @@ bool remove_file_suffix (char *file, const char *suffix)
|
||||
return true;
|
||||
}
|
||||
|
||||
#if defined (_WIN)
|
||||
#define DEVNULL "NUL"
|
||||
#else
|
||||
#define DEVNULL "/dev/null"
|
||||
#endif
|
||||
|
||||
int suppress_stderr (void)
|
||||
{
|
||||
int null_fd = open (DEVNULL, O_WRONLY);
|
||||
|
||||
if (null_fd < 0) return -1;
|
||||
|
||||
int saved_fd = dup (fileno (stderr));
|
||||
|
||||
if (saved_fd < 0)
|
||||
{
|
||||
close (null_fd);
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
dup2 (null_fd, fileno (stderr));
|
||||
|
||||
close (null_fd);
|
||||
|
||||
return saved_fd;
|
||||
}
|
||||
|
||||
void restore_stderr (int saved_fd)
|
||||
{
|
||||
if (saved_fd < 0) return;
|
||||
|
||||
dup2 (saved_fd, fileno (stderr));
|
||||
|
||||
close (saved_fd);
|
||||
}
|
||||
|
||||
|
@ -34,7 +34,7 @@ if ($cpu_benchmark == 1)
|
||||
}
|
||||
else
|
||||
{
|
||||
system ("rocm-smi --resetprofile --resetclocks --resetfans");
|
||||
#system ("rocm-smi --resetprofile --resetclocks --resetfans");
|
||||
system ("rocm-smi --setfan 100% --setperflevel high");
|
||||
|
||||
system ("nvidia-settings -a GPUPowerMizerMode=1 -a GPUFanControlState=1 -a GPUTargetFanSpeed=100");
|
||||
|
Loading…
Reference in New Issue
Block a user