Fix CUDA num_elements

pull/2022/head
Jens Steube 5 years ago
parent 027af75a39
commit 3a3df091c7

@ -2883,7 +2883,7 @@ int run_cuda_kernel_atinit (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *devic
const u64 kernel_threads = device_param->kernel_wgs_atinit;
num_elements = round_up_multiple_64 (num_elements, kernel_threads);
num_elements = CEILDIV (num_elements, kernel_threads);
CUfunction function = device_param->cuda_function_atinit;
@ -2913,7 +2913,7 @@ int run_cuda_kernel_memset (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *devic
u64 num_elements = num16d;
num_elements = round_up_multiple_64 (num_elements, kernel_threads);
num_elements = CEILDIV (num_elements, kernel_threads);
CUfunction function = device_param->cuda_function_memset;
@ -3111,12 +3111,14 @@ int run_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, con
case KERN_RUN_AUX4: local_mem_size = device_param->kernel_local_mem_size_aux4; break;
}
/*
if (local_mem_size)
{
const u32 max_threads_possible = (device_param->device_local_mem_size - 240) / local_mem_size;
kernel_threads = MIN (kernel_threads, max_threads_possible);
}
*/
CUfunction cuda_function = NULL;
@ -3139,7 +3141,7 @@ int run_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, con
}
}
num_elements = round_up_multiple_64 (num_elements, kernel_threads);
num_elements = CEILDIV (num_elements, kernel_threads);
if ((hashconfig->opts_type & OPTS_TYPE_PT_BITSLICE) && (user_options->attack_mode == ATTACK_MODE_BF))
{
@ -3147,7 +3149,7 @@ int run_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, con
if (rc_cuEventRecord1 == -1) return -1;
const int rc_cuLaunchKernel = hc_cuLaunchKernel (hashcat_ctx, cuda_function, num_elements / 32, 32, 1, kernel_threads, 1, 1, 0, device_param->cuda_stream, device_param->kernel_params, NULL);
const int rc_cuLaunchKernel = hc_cuLaunchKernel (hashcat_ctx, cuda_function, num_elements, 32, 1, kernel_threads, 1, 1, 0, device_param->cuda_stream, device_param->kernel_params, NULL);
if (rc_cuLaunchKernel == -1) return -1;
@ -3179,8 +3181,6 @@ int run_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, con
}
}
num_elements = round_up_multiple_64 (num_elements, kernel_threads);
const int rc_cuEventRecord1 = hc_cuEventRecord (hashcat_ctx, device_param->cuda_event1, device_param->cuda_stream);
if (rc_cuEventRecord1 == -1) return -1;
@ -3472,7 +3472,7 @@ int run_kernel_mp (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param,
break;
}
num_elements = round_up_multiple_64 (num_elements, kernel_threads);
num_elements = CEILDIV (num_elements, kernel_threads);
const int rc_cuLaunchKernel = hc_cuLaunchKernel (hashcat_ctx, cuda_function, num_elements, 1, 1, kernel_threads, 1, 1, 0, device_param->cuda_stream, cuda_args, NULL);
@ -3597,7 +3597,7 @@ int run_kernel_amp (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param,
if (device_param->is_cuda == true)
{
num_elements = round_up_multiple_64 (num_elements, kernel_threads);
num_elements = CEILDIV (num_elements, kernel_threads);
CUfunction cuda_function = device_param->cuda_function_amp;
@ -3651,7 +3651,7 @@ int run_kernel_decompress (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device
if (device_param->is_cuda == true)
{
num_elements = round_up_multiple_64 (num_elements, kernel_threads);
num_elements = CEILDIV (num_elements, kernel_threads);
CUfunction cuda_function = device_param->cuda_function_decompress;

Loading…
Cancel
Save