1
0
mirror of https://github.com/hashcat/hashcat.git synced 2025-07-27 00:48:14 +00:00

Fix CUDA num_elements

This commit is contained in:
Jens Steube 2019-05-08 22:42:52 +02:00
parent 027af75a39
commit 3a3df091c7

View File

@ -2883,7 +2883,7 @@ int run_cuda_kernel_atinit (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *devic
const u64 kernel_threads = device_param->kernel_wgs_atinit; const u64 kernel_threads = device_param->kernel_wgs_atinit;
num_elements = round_up_multiple_64 (num_elements, kernel_threads); num_elements = CEILDIV (num_elements, kernel_threads);
CUfunction function = device_param->cuda_function_atinit; CUfunction function = device_param->cuda_function_atinit;
@ -2913,7 +2913,7 @@ int run_cuda_kernel_memset (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *devic
u64 num_elements = num16d; u64 num_elements = num16d;
num_elements = round_up_multiple_64 (num_elements, kernel_threads); num_elements = CEILDIV (num_elements, kernel_threads);
CUfunction function = device_param->cuda_function_memset; CUfunction function = device_param->cuda_function_memset;
@ -3111,12 +3111,14 @@ int run_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, con
case KERN_RUN_AUX4: local_mem_size = device_param->kernel_local_mem_size_aux4; break; case KERN_RUN_AUX4: local_mem_size = device_param->kernel_local_mem_size_aux4; break;
} }
/*
if (local_mem_size) if (local_mem_size)
{ {
const u32 max_threads_possible = (device_param->device_local_mem_size - 240) / local_mem_size; const u32 max_threads_possible = (device_param->device_local_mem_size - 240) / local_mem_size;
kernel_threads = MIN (kernel_threads, max_threads_possible); kernel_threads = MIN (kernel_threads, max_threads_possible);
} }
*/
CUfunction cuda_function = NULL; CUfunction cuda_function = NULL;
@ -3139,7 +3141,7 @@ int run_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, con
} }
} }
num_elements = round_up_multiple_64 (num_elements, kernel_threads); num_elements = CEILDIV (num_elements, kernel_threads);
if ((hashconfig->opts_type & OPTS_TYPE_PT_BITSLICE) && (user_options->attack_mode == ATTACK_MODE_BF)) if ((hashconfig->opts_type & OPTS_TYPE_PT_BITSLICE) && (user_options->attack_mode == ATTACK_MODE_BF))
{ {
@ -3147,7 +3149,7 @@ int run_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, con
if (rc_cuEventRecord1 == -1) return -1; if (rc_cuEventRecord1 == -1) return -1;
const int rc_cuLaunchKernel = hc_cuLaunchKernel (hashcat_ctx, cuda_function, num_elements / 32, 32, 1, kernel_threads, 1, 1, 0, device_param->cuda_stream, device_param->kernel_params, NULL); const int rc_cuLaunchKernel = hc_cuLaunchKernel (hashcat_ctx, cuda_function, num_elements, 32, 1, kernel_threads, 1, 1, 0, device_param->cuda_stream, device_param->kernel_params, NULL);
if (rc_cuLaunchKernel == -1) return -1; if (rc_cuLaunchKernel == -1) return -1;
@ -3179,8 +3181,6 @@ int run_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, con
} }
} }
num_elements = round_up_multiple_64 (num_elements, kernel_threads);
const int rc_cuEventRecord1 = hc_cuEventRecord (hashcat_ctx, device_param->cuda_event1, device_param->cuda_stream); const int rc_cuEventRecord1 = hc_cuEventRecord (hashcat_ctx, device_param->cuda_event1, device_param->cuda_stream);
if (rc_cuEventRecord1 == -1) return -1; if (rc_cuEventRecord1 == -1) return -1;
@ -3472,7 +3472,7 @@ int run_kernel_mp (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param,
break; break;
} }
num_elements = round_up_multiple_64 (num_elements, kernel_threads); num_elements = CEILDIV (num_elements, kernel_threads);
const int rc_cuLaunchKernel = hc_cuLaunchKernel (hashcat_ctx, cuda_function, num_elements, 1, 1, kernel_threads, 1, 1, 0, device_param->cuda_stream, cuda_args, NULL); const int rc_cuLaunchKernel = hc_cuLaunchKernel (hashcat_ctx, cuda_function, num_elements, 1, 1, kernel_threads, 1, 1, 0, device_param->cuda_stream, cuda_args, NULL);
@ -3597,7 +3597,7 @@ int run_kernel_amp (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param,
if (device_param->is_cuda == true) if (device_param->is_cuda == true)
{ {
num_elements = round_up_multiple_64 (num_elements, kernel_threads); num_elements = CEILDIV (num_elements, kernel_threads);
CUfunction cuda_function = device_param->cuda_function_amp; CUfunction cuda_function = device_param->cuda_function_amp;
@ -3651,7 +3651,7 @@ int run_kernel_decompress (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device
if (device_param->is_cuda == true) if (device_param->is_cuda == true)
{ {
num_elements = round_up_multiple_64 (num_elements, kernel_threads); num_elements = CEILDIV (num_elements, kernel_threads);
CUfunction cuda_function = device_param->cuda_function_decompress; CUfunction cuda_function = device_param->cuda_function_decompress;