mirror of
https://github.com/hashcat/hashcat.git
synced 2025-01-03 12:21:07 +00:00
Minimal psafe2 increase and autotune fix
This commit is contained in:
parent
e46aa7103a
commit
b5cb29ad1c
@ -320,16 +320,16 @@ __constant u32 c_pbox[18] =
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef IS_NV
|
#ifdef IS_NV
|
||||||
#define BF_ROUND(L,R,N) \
|
#define BF_ROUND(L,R,N) \
|
||||||
{ \
|
{ \
|
||||||
u32 tmp; \
|
u32 tmp; \
|
||||||
\
|
\
|
||||||
tmp = S0[__bfe ((L), 24, 8)]; \
|
tmp = S0[__bfe_S ((L), 24, 8)]; \
|
||||||
tmp += S1[__bfe ((L), 16, 8)]; \
|
tmp += S1[__bfe_S ((L), 16, 8)]; \
|
||||||
tmp ^= S2[__bfe ((L), 8, 8)]; \
|
tmp ^= S2[__bfe_S ((L), 8, 8)]; \
|
||||||
tmp += S3[__bfe ((L), 0, 8)]; \
|
tmp += S3[__bfe_S ((L), 0, 8)]; \
|
||||||
\
|
\
|
||||||
(R) ^= tmp ^ P[(N)]; \
|
(R) ^= tmp ^ P[(N)]; \
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
@ -747,6 +747,7 @@ __kernel void __attribute__((reqd_work_group_size (8, 1, 1))) m09000_loop (__glo
|
|||||||
|
|
||||||
u32 P[18];
|
u32 P[18];
|
||||||
|
|
||||||
|
#pragma unroll
|
||||||
for (u32 i = 0; i < 18; i++)
|
for (u32 i = 0; i < 18; i++)
|
||||||
{
|
{
|
||||||
P[i] = tmps[gid].P[i];
|
P[i] = tmps[gid].P[i];
|
||||||
@ -762,6 +763,7 @@ __kernel void __attribute__((reqd_work_group_size (8, 1, 1))) m09000_loop (__glo
|
|||||||
__local u32 *S2 = S2_all[lid];
|
__local u32 *S2 = S2_all[lid];
|
||||||
__local u32 *S3 = S3_all[lid];
|
__local u32 *S3 = S3_all[lid];
|
||||||
|
|
||||||
|
#pragma unroll
|
||||||
for (u32 i = 0; i < 256; i++)
|
for (u32 i = 0; i < 256; i++)
|
||||||
{
|
{
|
||||||
S0[i] = tmps[gid].S0[i];
|
S0[i] = tmps[gid].S0[i];
|
||||||
|
@ -257,7 +257,12 @@ inline u64x rotl64 (const u64x a, const u32 n)
|
|||||||
return rotr64 (a, 64 - n);
|
return rotr64 (a, 64 - n);
|
||||||
}
|
}
|
||||||
|
|
||||||
inline u32 __bfe (const u32 a, const u32 b, const u32 c)
|
inline u32x __bfe (const u32x a, const u32x b, const u32x c)
|
||||||
|
{
|
||||||
|
return amd_bfe (a, b, c);
|
||||||
|
}
|
||||||
|
|
||||||
|
inline u32 __bfe_S (const u32 a, const u32 b, const u32 c)
|
||||||
{
|
{
|
||||||
return amd_bfe (a, b, c);
|
return amd_bfe (a, b, c);
|
||||||
}
|
}
|
||||||
|
@ -2900,22 +2900,25 @@ static void autotune (hc_device_param_t *device_param)
|
|||||||
|
|
||||||
// balancing the workload turns out to be very efficient
|
// balancing the workload turns out to be very efficient
|
||||||
|
|
||||||
const u32 kernel_power_balance = kernel_accel * kernel_loops;
|
if (kernel_loops_min != kernel_loops_max)
|
||||||
|
|
||||||
u32 sqrtv;
|
|
||||||
|
|
||||||
for (sqrtv = 1; sqrtv < 0x100000; sqrtv++)
|
|
||||||
{
|
{
|
||||||
if ((sqrtv * sqrtv) >= kernel_power_balance) break;
|
const u32 kernel_power_balance = kernel_accel * kernel_loops;
|
||||||
}
|
|
||||||
|
|
||||||
const u32 kernel_accel_try = sqrtv;
|
u32 sqrtv;
|
||||||
const u32 kernel_loops_try = sqrtv;
|
|
||||||
|
|
||||||
if ((kernel_accel_try <= kernel_accel_max) && (kernel_loops_try >= kernel_loops_min))
|
for (sqrtv = 1; sqrtv < 0x100000; sqrtv++)
|
||||||
{
|
{
|
||||||
kernel_accel = kernel_accel_try;
|
if ((sqrtv * sqrtv) >= kernel_power_balance) break;
|
||||||
kernel_loops = kernel_loops_try;
|
}
|
||||||
|
|
||||||
|
const u32 kernel_accel_try = sqrtv;
|
||||||
|
const u32 kernel_loops_try = sqrtv;
|
||||||
|
|
||||||
|
if ((kernel_accel_try <= kernel_accel_max) && (kernel_loops_try >= kernel_loops_min))
|
||||||
|
{
|
||||||
|
kernel_accel = kernel_accel_try;
|
||||||
|
kernel_loops = kernel_loops_try;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// reset fake words
|
// reset fake words
|
||||||
|
Loading…
Reference in New Issue
Block a user