Revert due to module_03200.c, module_25600.c and module_25800.c using device_param->kernel_dynamic_local_mem_size_memset

pull/2894/head
Jukka Ojanen 3 years ago
parent 8b590f651b
commit d7cc8d7cd2

@ -106,6 +106,33 @@ KERNEL_FQ void gpu_decompress (GLOBAL_AS pw_idx_t *pws_idx, GLOBAL_AS u32 *pws_c
pws_buf[gid] = pw;
}
KERNEL_FQ void gpu_memset (GLOBAL_AS uint4 *buf, const u32 value, const u64 gid_max)
{
const u64 gid = get_global_id (0);
if (gid >= gid_max) return;
uint4 r;
#if defined IS_NATIVE
r = value;
#elif defined IS_OPENCL
r = (uint4) (value);
#elif defined IS_CUDA
r.x = value;
r.y = value;
r.z = value;
r.w = value;
#elif defined IS_HIP
r.x = value;
r.y = value;
r.z = value;
r.w = value;
#endif
buf[gid] = r;
}
KERNEL_FQ void gpu_bzero(GLOBAL_AS uint4* buf, const u64 gid_max)
{
const u64 gid = get_global_id(0);

@ -1139,6 +1139,7 @@ typedef struct hc_device_param
u32 kernel_wgs_mp_r;
u32 kernel_wgs_amp;
u32 kernel_wgs_tm;
u32 kernel_wgs_memset;
u32 kernel_wgs_bzero;
u32 kernel_wgs_atinit;
u32 kernel_wgs_utf8toutf16le;
@ -1164,6 +1165,7 @@ typedef struct hc_device_param
u32 kernel_preferred_wgs_multiple_mp_r;
u32 kernel_preferred_wgs_multiple_amp;
u32 kernel_preferred_wgs_multiple_tm;
u32 kernel_preferred_wgs_multiple_memset;
u32 kernel_preferred_wgs_multiple_bzero;
u32 kernel_preferred_wgs_multiple_atinit;
u32 kernel_preferred_wgs_multiple_utf8toutf16le;
@ -1189,6 +1191,7 @@ typedef struct hc_device_param
u64 kernel_local_mem_size_mp_r;
u64 kernel_local_mem_size_amp;
u64 kernel_local_mem_size_tm;
u64 kernel_local_mem_size_memset;
u64 kernel_local_mem_size_bzero;
u64 kernel_local_mem_size_atinit;
u64 kernel_local_mem_size_utf8toutf16le;
@ -1214,6 +1217,7 @@ typedef struct hc_device_param
u64 kernel_dynamic_local_mem_size_mp_r;
u64 kernel_dynamic_local_mem_size_amp;
u64 kernel_dynamic_local_mem_size_tm;
u64 kernel_dynamic_local_mem_size_memset;
u64 kernel_dynamic_local_mem_size_bzero;
u64 kernel_dynamic_local_mem_size_atinit;
u64 kernel_dynamic_local_mem_size_utf8toutf16le;
@ -1375,6 +1379,7 @@ typedef struct hc_device_param
void *kernel_params_mp_l[PARAMCNT];
void *kernel_params_amp[PARAMCNT];
void *kernel_params_tm[PARAMCNT];
void *kernel_params_memset[PARAMCNT];
void *kernel_params_bzero[PARAMCNT];
void *kernel_params_atinit[PARAMCNT];
void *kernel_params_utf8toutf16le[PARAMCNT];
@ -1395,6 +1400,9 @@ typedef struct hc_device_param
u32 kernel_params_amp_buf32[PARAMCNT];
u64 kernel_params_amp_buf64[PARAMCNT];
u32 kernel_params_memset_buf32[PARAMCNT];
u64 kernel_params_memset_buf64[PARAMCNT];
u32 kernel_params_bzero_buf32[PARAMCNT];
u64 kernel_params_bzero_buf64[PARAMCNT];
@ -1442,6 +1450,7 @@ typedef struct hc_device_param
CUfunction cuda_function_mp_r;
CUfunction cuda_function_amp;
CUfunction cuda_function_tm;
CUfunction cuda_function_memset;
CUfunction cuda_function_bzero;
CUfunction cuda_function_atinit;
CUfunction cuda_function_utf8toutf16le;
@ -1523,6 +1532,7 @@ typedef struct hc_device_param
HIPfunction hip_function_mp_r;
HIPfunction hip_function_amp;
HIPfunction hip_function_tm;
HIPfunction hip_function_memset;
HIPfunction hip_function_bzero;
HIPfunction hip_function_atinit;
HIPfunction hip_function_utf8toutf16le;
@ -1608,6 +1618,7 @@ typedef struct hc_device_param
cl_kernel opencl_kernel_mp_r;
cl_kernel opencl_kernel_amp;
cl_kernel opencl_kernel_tm;
cl_kernel opencl_kernel_memset;
cl_kernel opencl_kernel_bzero;
cl_kernel opencl_kernel_atinit;
cl_kernel opencl_kernel_utf8toutf16le;

@ -5471,6 +5471,18 @@ int run_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, con
dynamic_shared_mem = 0;
}
//if (device_param->is_cuda == true)
//{
//if ((device_param->kernel_dynamic_local_mem_size_memset % device_param->device_local_mem_size) == 0)
//{
// this is the case Compute Capability 7.5
// there is also Compute Capability 7.0 which offers a larger dynamic local size access
// however, if it's an exact multiple the driver can optimize this for us more efficient
//dynamic_shared_mem = 0;
//}
//}
kernel_threads = MIN (kernel_threads, device_param->kernel_threads);
device_param->kernel_params_buf64[35] = pws_pos;
@ -11738,6 +11750,18 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
if (device_param->is_cuda == true)
{
// GPU memset
if (hc_cuModuleGetFunction (hashcat_ctx, &device_param->cuda_function_memset, device_param->cuda_module_shared, "gpu_memset") == -1) return -1;
if (get_cuda_kernel_wgs (hashcat_ctx, device_param->cuda_function_memset, &device_param->kernel_wgs_memset) == -1) return -1;
if (get_cuda_kernel_local_mem_size (hashcat_ctx, device_param->cuda_function_memset, &device_param->kernel_local_mem_size_memset) == -1) return -1;
device_param->kernel_dynamic_local_mem_size_memset = device_param->device_local_mem_size - device_param->kernel_local_mem_size_memset;
device_param->kernel_preferred_wgs_multiple_memset = device_param->cuda_warp_size;
// GPU bzero
if (hc_cuModuleGetFunction (hashcat_ctx, &device_param->cuda_function_bzero, device_param->cuda_module_shared, "gpu_bzero") == -1) return -1;
@ -11792,6 +11816,18 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
if (device_param->is_hip == true)
{
// GPU memset
if (hc_hipModuleGetFunction (hashcat_ctx, &device_param->hip_function_memset, device_param->hip_module_shared, "gpu_memset") == -1) return -1;
if (get_hip_kernel_wgs (hashcat_ctx, device_param->hip_function_memset, &device_param->kernel_wgs_memset) == -1) return -1;
if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function_memset, &device_param->kernel_local_mem_size_memset) == -1) return -1;
device_param->kernel_dynamic_local_mem_size_memset = device_param->device_local_mem_size - device_param->kernel_local_mem_size_memset;
device_param->kernel_preferred_wgs_multiple_memset = device_param->hip_warp_size;
// GPU bzero
if (hc_hipModuleGetFunction (hashcat_ctx, &device_param->hip_function_bzero, device_param->hip_module_shared, "gpu_bzero") == -1) return -1;
@ -11846,6 +11882,18 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
if (device_param->is_opencl == true)
{
// GPU memset
if (hc_clCreateKernel (hashcat_ctx, device_param->opencl_program_shared, "gpu_memset", &device_param->opencl_kernel_memset) == -1) return -1;
if (get_opencl_kernel_wgs (hashcat_ctx, device_param, device_param->opencl_kernel_memset, &device_param->kernel_wgs_memset) == -1) return -1;
if (get_opencl_kernel_local_mem_size (hashcat_ctx, device_param, device_param->opencl_kernel_memset, &device_param->kernel_local_mem_size_memset) == -1) return -1;
if (get_opencl_kernel_dynamic_local_mem_size (hashcat_ctx, device_param, device_param->opencl_kernel_memset, &device_param->kernel_dynamic_local_mem_size_memset) == -1) return -1;
if (get_opencl_kernel_preferred_wgs_multiple (hashcat_ctx, device_param, device_param->opencl_kernel_memset, &device_param->kernel_preferred_wgs_multiple_memset) == -1) return -1;
// GPU bzero
if (hc_clCreateKernel (hashcat_ctx, device_param->opencl_program_shared, "gpu_bzero", &device_param->opencl_kernel_bzero) == -1) return -1;
@ -12792,6 +12840,13 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
}
}
device_param->kernel_params_memset_buf32[1] = 0; // value
device_param->kernel_params_memset_buf64[2] = 0; // gid_max
device_param->kernel_params_memset[0] = NULL;
device_param->kernel_params_memset[1] = &device_param->kernel_params_memset_buf32[1];
device_param->kernel_params_memset[2] = &device_param->kernel_params_memset_buf64[2];
device_param->kernel_params_bzero_buf64[1] = 0; // gid_max
device_param->kernel_params_bzero[0] = NULL;
@ -15329,6 +15384,7 @@ void backend_session_destroy (hashcat_ctx_t *hashcat_ctx)
device_param->cuda_function_mp_r = NULL;
device_param->cuda_function_tm = NULL;
device_param->cuda_function_amp = NULL;
device_param->cuda_function_memset = NULL;
device_param->cuda_function_bzero = NULL;
device_param->cuda_function_atinit = NULL;
device_param->cuda_function_utf8toutf16le = NULL;
@ -15457,6 +15513,7 @@ void backend_session_destroy (hashcat_ctx_t *hashcat_ctx)
device_param->hip_function_mp_r = NULL;
device_param->hip_function_tm = NULL;
device_param->hip_function_amp = NULL;
device_param->hip_function_memset = NULL;
device_param->hip_function_bzero = NULL;
device_param->hip_function_atinit = NULL;
device_param->hip_function_utf8toutf16le = NULL;
@ -15535,6 +15592,7 @@ void backend_session_destroy (hashcat_ctx_t *hashcat_ctx)
if (device_param->opencl_kernel_mp_r) hc_clReleaseKernel (hashcat_ctx, device_param->opencl_kernel_mp_r);
if (device_param->opencl_kernel_tm) hc_clReleaseKernel (hashcat_ctx, device_param->opencl_kernel_tm);
if (device_param->opencl_kernel_amp) hc_clReleaseKernel (hashcat_ctx, device_param->opencl_kernel_amp);
if (device_param->opencl_kernel_memset) hc_clReleaseKernel (hashcat_ctx, device_param->opencl_kernel_memset);
if (device_param->opencl_kernel_bzero) hc_clReleaseKernel (hashcat_ctx, device_param->opencl_kernel_bzero);
if (device_param->opencl_kernel_atinit) hc_clReleaseKernel (hashcat_ctx, device_param->opencl_kernel_atinit);
if (device_param->opencl_kernel_utf8toutf16le) hc_clReleaseKernel (hashcat_ctx, device_param->opencl_kernel_utf8toutf16le);
@ -15605,6 +15663,7 @@ void backend_session_destroy (hashcat_ctx_t *hashcat_ctx)
device_param->opencl_kernel_mp_r = NULL;
device_param->opencl_kernel_tm = NULL;
device_param->opencl_kernel_amp = NULL;
device_param->opencl_kernel_memset = NULL;
device_param->opencl_kernel_bzero = NULL;
device_param->opencl_kernel_atinit = NULL;
device_param->opencl_kernel_utf8toutf16le = NULL;

Loading…
Cancel
Save