mirror of
https://github.com/hashcat/hashcat.git
synced 2024-11-22 08:08:10 +00:00
OpenCL Runtime: Allow the kernel to access post-48k shared memory region on CUDA. Requires both module and kernel preparation
This commit is contained in:
parent
1fc37c25f9
commit
aef53f7e10
@ -356,6 +356,10 @@ CONSTANT_VK u32a c_pbox[18] =
|
||||
L ^= P[17]; \
|
||||
}
|
||||
|
||||
#ifdef DYNAMIC_LOCAL
|
||||
extern __shared__ u32 lm[];
|
||||
#endif
|
||||
|
||||
DECLSPEC void expand_key (u32 *E, u32 *W, const int len)
|
||||
{
|
||||
u8 *E_ptr = (u8 *) E;
|
||||
@ -456,6 +460,12 @@ KERNEL_FQ void FIXED_THREAD_COUNT(FIXED_LOCAL_SIZE) m03200_init (KERN_ATTR_TMPS
|
||||
P[i] = c_pbox[i];
|
||||
}
|
||||
|
||||
#ifdef DYNAMIC_LOCAL
|
||||
u32 *S0 = lm + (lid * 1024) + 0;
|
||||
u32 *S1 = lm + (lid * 1024) + 256;
|
||||
u32 *S2 = lm + (lid * 1024) + 512;
|
||||
u32 *S3 = lm + (lid * 1024) + 768;
|
||||
#else
|
||||
LOCAL_VK u32 S0_all[FIXED_LOCAL_SIZE][256];
|
||||
LOCAL_VK u32 S1_all[FIXED_LOCAL_SIZE][256];
|
||||
LOCAL_VK u32 S2_all[FIXED_LOCAL_SIZE][256];
|
||||
@ -465,6 +475,7 @@ KERNEL_FQ void FIXED_THREAD_COUNT(FIXED_LOCAL_SIZE) m03200_init (KERN_ATTR_TMPS
|
||||
LOCAL_AS u32 *S1 = S1_all[lid];
|
||||
LOCAL_AS u32 *S2 = S2_all[lid];
|
||||
LOCAL_AS u32 *S3 = S3_all[lid];
|
||||
#endif
|
||||
|
||||
for (u32 i = 0; i < 256; i++)
|
||||
{
|
||||
@ -614,6 +625,12 @@ KERNEL_FQ void FIXED_THREAD_COUNT(FIXED_LOCAL_SIZE) m03200_loop (KERN_ATTR_TMPS
|
||||
P[i] = tmps[gid].P[i];
|
||||
}
|
||||
|
||||
#ifdef DYNAMIC_LOCAL
|
||||
u32 *S0 = lm + (lid * 1024) + 0;
|
||||
u32 *S1 = lm + (lid * 1024) + 256;
|
||||
u32 *S2 = lm + (lid * 1024) + 512;
|
||||
u32 *S3 = lm + (lid * 1024) + 768;
|
||||
#else
|
||||
LOCAL_VK u32 S0_all[FIXED_LOCAL_SIZE][256];
|
||||
LOCAL_VK u32 S1_all[FIXED_LOCAL_SIZE][256];
|
||||
LOCAL_VK u32 S2_all[FIXED_LOCAL_SIZE][256];
|
||||
@ -623,6 +640,7 @@ KERNEL_FQ void FIXED_THREAD_COUNT(FIXED_LOCAL_SIZE) m03200_loop (KERN_ATTR_TMPS
|
||||
LOCAL_AS u32 *S1 = S1_all[lid];
|
||||
LOCAL_AS u32 *S2 = S2_all[lid];
|
||||
LOCAL_AS u32 *S3 = S3_all[lid];
|
||||
#endif
|
||||
|
||||
for (u32 i = 0; i < 256; i++)
|
||||
{
|
||||
@ -799,6 +817,12 @@ KERNEL_FQ void FIXED_THREAD_COUNT(FIXED_LOCAL_SIZE) m03200_comp (KERN_ATTR_TMPS
|
||||
P[i] = tmps[gid].P[i];
|
||||
}
|
||||
|
||||
#ifdef DYNAMIC_LOCAL
|
||||
u32 *S0 = lm + (lid * 1024) + 0;
|
||||
u32 *S1 = lm + (lid * 1024) + 256;
|
||||
u32 *S2 = lm + (lid * 1024) + 512;
|
||||
u32 *S3 = lm + (lid * 1024) + 768;
|
||||
#else
|
||||
LOCAL_VK u32 S0_all[FIXED_LOCAL_SIZE][256];
|
||||
LOCAL_VK u32 S1_all[FIXED_LOCAL_SIZE][256];
|
||||
LOCAL_VK u32 S2_all[FIXED_LOCAL_SIZE][256];
|
||||
@ -808,6 +832,7 @@ KERNEL_FQ void FIXED_THREAD_COUNT(FIXED_LOCAL_SIZE) m03200_comp (KERN_ATTR_TMPS
|
||||
LOCAL_AS u32 *S1 = S1_all[lid];
|
||||
LOCAL_AS u32 *S2 = S2_all[lid];
|
||||
LOCAL_AS u32 *S3 = S3_all[lid];
|
||||
#endif
|
||||
|
||||
for (u32 i = 0; i < 256; i++)
|
||||
{
|
||||
|
@ -115,6 +115,7 @@
|
||||
- OpenCL Runtime: Workaround JiT compiler error on ROCm 2.3 driver if the 'inline' keyword is used in function declaration
|
||||
- OpenCL Runtime: Workaround memory allocation error on AMD driver on Windows leading to CL_MEM_OBJECT_ALLOCATION_FAILURE
|
||||
- OpenCL Runtime: Workaround ROCm OpenCL driver problem trying to write temporary file into readonly folder by setting TMPDIR
|
||||
- OpenCL Runtime: Allow the kernel to access post-48k shared memory region on CUDA. Requires both module and kernel preparation
|
||||
- Startup Checks: Improved the pidfile check: Do not just check for existing PID but also check executable filename
|
||||
- Startup Checks: Prevent the user to modify options which are overwritten automatically in benchmark mode
|
||||
- Startup Screen: Add extra warning when using --force
|
||||
|
@ -1132,6 +1132,27 @@ typedef struct hc_device_param
|
||||
u64 kernel_local_mem_size_aux3;
|
||||
u64 kernel_local_mem_size_aux4;
|
||||
|
||||
u64 kernel_dynamic_local_mem_size1;
|
||||
u64 kernel_dynamic_local_mem_size12;
|
||||
u64 kernel_dynamic_local_mem_size2;
|
||||
u64 kernel_dynamic_local_mem_size23;
|
||||
u64 kernel_dynamic_local_mem_size3;
|
||||
u64 kernel_dynamic_local_mem_size4;
|
||||
u64 kernel_dynamic_local_mem_size_init2;
|
||||
u64 kernel_dynamic_local_mem_size_loop2;
|
||||
u64 kernel_dynamic_local_mem_size_mp;
|
||||
u64 kernel_dynamic_local_mem_size_mp_l;
|
||||
u64 kernel_dynamic_local_mem_size_mp_r;
|
||||
u64 kernel_dynamic_local_mem_size_amp;
|
||||
u64 kernel_dynamic_local_mem_size_tm;
|
||||
u64 kernel_dynamic_local_mem_size_memset;
|
||||
u64 kernel_dynamic_local_mem_size_atinit;
|
||||
u64 kernel_dynamic_local_mem_size_decompress;
|
||||
u64 kernel_dynamic_local_mem_size_aux1;
|
||||
u64 kernel_dynamic_local_mem_size_aux2;
|
||||
u64 kernel_dynamic_local_mem_size_aux3;
|
||||
u64 kernel_dynamic_local_mem_size_aux4;
|
||||
|
||||
u32 kernel_accel;
|
||||
u32 kernel_accel_prev;
|
||||
u32 kernel_accel_min;
|
||||
|
598
src/backend.c
598
src/backend.c
File diff suppressed because it is too large
Load Diff
@ -91,6 +91,8 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY
|
||||
if (device_param->opencl_device_type & CL_DEVICE_TYPE_CPU)
|
||||
{
|
||||
fixed_local_size = 1;
|
||||
|
||||
hc_asprintf (&jit_build_options, "-D FIXED_LOCAL_SIZE=%u", fixed_local_size);
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -120,15 +122,37 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY
|
||||
{
|
||||
fixed_local_size = (device_param->device_local_mem_size - overhead) / 4096;
|
||||
}
|
||||
|
||||
if (device_param->is_cuda == true)
|
||||
{
|
||||
hc_asprintf (&jit_build_options, "-D FIXED_LOCAL_SIZE=%u -D DYNAMIC_LOCAL", fixed_local_size);
|
||||
}
|
||||
else
|
||||
{
|
||||
hc_asprintf (&jit_build_options, "-D FIXED_LOCAL_SIZE=%u", fixed_local_size);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
fixed_local_size = (device_param->device_local_mem_size - overhead) / 4096;
|
||||
if (device_param->is_cuda == true)
|
||||
{
|
||||
// using kernel_dynamic_local_mem_size_memset is a bit hackish.
|
||||
// we had to brute-force this value out of an already loaded CUDA function.
|
||||
// there's no official way to query for this value.
|
||||
|
||||
fixed_local_size = device_param->kernel_dynamic_local_mem_size_memset / 4096;
|
||||
|
||||
hc_asprintf (&jit_build_options, "-D FIXED_LOCAL_SIZE=%u -D DYNAMIC_LOCAL", fixed_local_size);
|
||||
}
|
||||
else
|
||||
{
|
||||
fixed_local_size = (device_param->device_local_mem_size - overhead) / 4096;
|
||||
|
||||
hc_asprintf (&jit_build_options, "-D FIXED_LOCAL_SIZE=%u", fixed_local_size);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
hc_asprintf (&jit_build_options, "-D FIXED_LOCAL_SIZE=%u", fixed_local_size);
|
||||
|
||||
return jit_build_options;
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user