mirror of
https://github.com/hashcat/hashcat.git
synced 2024-12-22 22:58:30 +00:00
Workaround for -m 22100 on NVIDIA
This commit is contained in:
parent
a6cf7caf4a
commit
24094793da
@ -265,11 +265,35 @@ KERNEL_FQ void m22100_loop (KERN_ATTR_TMPS_ESALT (bitlocker_tmp_t, bitlocker_t))
|
||||
{
|
||||
#ifdef REAL_SHM
|
||||
|
||||
for (u32 i = lid; i < FIXED_ITER_INCR; i += lsz)
|
||||
/**
|
||||
* On NVIDIA, the __sync_threads() is not working as expected if called from inside a loop.
|
||||
* This is a unique situation across all hashcat kernels so far.
|
||||
* From CUDA manual:
|
||||
* __syncthreads() is allowed in conditional code but only if the conditional evaluates identically across the entire thread block,
|
||||
* otherwise the code execution is likely to hang or produce unintended side effects.
|
||||
* NVIDIA OpenCL runtime is also affected, but other OpenCL runtimes work as they should.
|
||||
* An workaround exists by disabling shared memory access. Speed drop is around 4%.
|
||||
* Another workaround is to let only a single thread do all the work while all other threads wait for it to finish. Speed drop is around 0.05%.
|
||||
|
||||
// original code
|
||||
for (int i = lid; i < FIXED_ITER_INCR; i += lsz)
|
||||
{
|
||||
for (int j = 0; j < 48; j++) // first 16 set to register
|
||||
{
|
||||
s_wb_ke_pc[i][j] = esalt_bufs[digests_offset].wb_ke_pc[loop_pos + t + i][j];
|
||||
}
|
||||
}
|
||||
|
||||
*/
|
||||
|
||||
if (lid == 0)
|
||||
{
|
||||
for (u32 j = 0; j < 48; j++) // first 16 set to register
|
||||
for (int i = 0; i < FIXED_ITER_INCR; i++)
|
||||
{
|
||||
s_wb_ke_pc[i][j] = esalt_bufs[digests_offset].wb_ke_pc[loop_pos + t + i][j];
|
||||
for (int j = 0; j < 48; j++) // first 16 set to register
|
||||
{
|
||||
s_wb_ke_pc[i][j] = esalt_bufs[digests_offset].wb_ke_pc[loop_pos + t + i][j];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user