From ec9925f3b1b1cf5f6b225bebff9c1716ce47e6b9 Mon Sep 17 00:00:00 2001 From: Jens Steube Date: Sat, 4 May 2019 21:52:00 +0200 Subject: [PATCH] Warnings self-check and autotune with CUDA --- OpenCL/inc_platform.cl | 2 +- include/ext_cuda.h | 18 + include/types.h | 4 + src/autotune.c | 86 ++++- src/backend.c | 769 +++++++++++++++++++++++------------------ src/selftest.c | 293 +++++++++++++--- 6 files changed, 767 insertions(+), 405 deletions(-) diff --git a/OpenCL/inc_platform.cl b/OpenCL/inc_platform.cl index ceb12a4f1..1dc643173 100644 --- a/OpenCL/inc_platform.cl +++ b/OpenCL/inc_platform.cl @@ -30,7 +30,7 @@ DECLSPEC u32 atomic_or (u32 *p, u32 val) DECLSPEC size_t get_global_id (const u32 dimindx __attribute__((unused))) { - return blockDim.x * blockIdx.x + threadIdx.x; + return (blockIdx.x * blockDim.x) + threadIdx.x; } DECLSPEC size_t get_local_id (const u32 dimindx __attribute__((unused))) diff --git a/include/ext_cuda.h b/include/ext_cuda.h index f48cca490..eb8967f09 100644 --- a/include/ext_cuda.h +++ b/include/ext_cuda.h @@ -933,6 +933,24 @@ typedef enum CUctx_flags_enum { CU_CTX_FLAGS_MASK = 0x1f } CUctx_flags; +/** + * Stream creation flags + */ +typedef enum CUstream_flags_enum { + CU_STREAM_DEFAULT = 0x0, /**< Default stream flag */ + CU_STREAM_NON_BLOCKING = 0x1 /**< Stream does not synchronize with stream 0 (the NULL stream) */ +} CUstream_flags; + +/** + * Event creation flags + */ +typedef enum CUevent_flags_enum { + CU_EVENT_DEFAULT = 0x0, /**< Default event flag */ + CU_EVENT_BLOCKING_SYNC = 0x1, /**< Event uses blocking synchronization */ + CU_EVENT_DISABLE_TIMING = 0x2, /**< Event will not record timing data */ + CU_EVENT_INTERPROCESS = 0x4 /**< Event is suitable for interprocess use. CU_EVENT_DISABLE_TIMING must be set */ +} CUevent_flags; + #ifdef _WIN32 #define CUDAAPI __stdcall #else diff --git a/include/types.h b/include/types.h index 5ff50d5d4..76f8910a7 100644 --- a/include/types.h +++ b/include/types.h @@ -1248,6 +1248,10 @@ typedef struct hc_device_param CUdevice cuda_device; CUcontext cuda_context; + CUstream cuda_stream; + + CUevent cuda_event1; + CUevent cuda_event2; CUmodule cuda_module; CUmodule cuda_module_mp; diff --git a/src/autotune.c b/src/autotune.c index 80d438bd7..d0b99f59a 100644 --- a/src/autotune.c +++ b/src/autotune.c @@ -104,10 +104,21 @@ static int autotune (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param const u32 kernel_power_max = device_param->hardware_power * kernel_accel_max; int CL_rc; + int CU_rc; - CL_rc = run_opencl_kernel_atinit (hashcat_ctx, device_param, device_param->opencl_d_pws_buf, kernel_power_max); + if (device_param->is_cuda == true) + { + CU_rc = run_cuda_kernel_atinit (hashcat_ctx, device_param, device_param->cuda_d_pws_buf, kernel_power_max); + + if (CU_rc == -1) return -1; + } - if (CL_rc == -1) return -1; + if (device_param->is_opencl == true) + { + CL_rc = run_opencl_kernel_atinit (hashcat_ctx, device_param, device_param->opencl_d_pws_buf, kernel_power_max); + + if (CL_rc == -1) return -1; + } if (user_options->slow_candidates == true) { @@ -118,9 +129,19 @@ static int autotune (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param { if (straight_ctx->kernel_rules_cnt > 1) { - CL_rc = hc_clEnqueueCopyBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_rules, device_param->opencl_d_rules_c, 0, 0, MIN (kernel_loops_max, KERNEL_RULES) * sizeof (kernel_rule_t), 0, NULL, NULL); + if (device_param->is_cuda == true) + { + CU_rc = hc_cuMemcpyDtoD (hashcat_ctx, device_param->cuda_d_rules_c, device_param->cuda_d_rules, MIN (kernel_loops_max, KERNEL_RULES) * sizeof (kernel_rule_t)); + + if (CU_rc == -1) return -1; + } - if (CL_rc == -1) return -1; + if (device_param->is_opencl == true) + { + CL_rc = hc_clEnqueueCopyBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_rules, device_param->opencl_d_rules_c, 0, 0, MIN (kernel_loops_max, KERNEL_RULES) * sizeof (kernel_rule_t), 0, NULL, NULL); + + if (CL_rc == -1) return -1; + } } } } @@ -135,7 +156,7 @@ static int autotune (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param if (exec_msec > 2000) { - event_log_error (hashcat_ctx, "OpenCL kernel minimum runtime larger than default TDR"); + event_log_error (hashcat_ctx, "Kernel minimum runtime larger than default TDR"); return -1; } @@ -215,6 +236,8 @@ static int autotune (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param double exec_msec_pre_final = try_run (hashcat_ctx, device_param, kernel_accel, kernel_loops); +printf ("%f\n", exec_msec_pre_final); + const u32 exec_left = (const u32) (target_msec / exec_msec_pre_final); const u32 accel_left = kernel_accel_max / kernel_accel; @@ -228,25 +251,51 @@ static int autotune (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param kernel_accel *= exec_accel_min; } - // reset them fake words + if (device_param->is_cuda == true) + { + // reset them fake words + + CL_rc = run_cuda_kernel_memset (hashcat_ctx, device_param, device_param->cuda_d_pws_buf, 0, device_param->size_pws); - CL_rc = run_opencl_kernel_memset (hashcat_ctx, device_param, device_param->opencl_d_pws_buf, 0, device_param->size_pws); + if (CL_rc == -1) return -1; - if (CL_rc == -1) return -1; + // reset other buffers in case autotune cracked something - // reset other buffers in case autotune cracked something + CL_rc = run_cuda_kernel_memset (hashcat_ctx, device_param, device_param->cuda_d_plain_bufs, 0, device_param->size_plains); - CL_rc = run_opencl_kernel_memset (hashcat_ctx, device_param, device_param->opencl_d_plain_bufs, 0, device_param->size_plains); + if (CL_rc == -1) return -1; - if (CL_rc == -1) return -1; + CL_rc = run_cuda_kernel_memset (hashcat_ctx, device_param, device_param->cuda_d_digests_shown, 0, device_param->size_shown); - CL_rc = run_opencl_kernel_memset (hashcat_ctx, device_param, device_param->opencl_d_digests_shown, 0, device_param->size_shown); + if (CL_rc == -1) return -1; - if (CL_rc == -1) return -1; + CL_rc = run_cuda_kernel_memset (hashcat_ctx, device_param, device_param->cuda_d_result, 0, device_param->size_results); - CL_rc = run_opencl_kernel_memset (hashcat_ctx, device_param, device_param->opencl_d_result, 0, device_param->size_results); + if (CL_rc == -1) return -1; + } + + if (device_param->is_opencl == true) + { + // reset them fake words - if (CL_rc == -1) return -1; + CL_rc = run_opencl_kernel_memset (hashcat_ctx, device_param, device_param->opencl_d_pws_buf, 0, device_param->size_pws); + + if (CL_rc == -1) return -1; + + // reset other buffers in case autotune cracked something + + CL_rc = run_opencl_kernel_memset (hashcat_ctx, device_param, device_param->opencl_d_plain_bufs, 0, device_param->size_plains); + + if (CL_rc == -1) return -1; + + CL_rc = run_opencl_kernel_memset (hashcat_ctx, device_param, device_param->opencl_d_digests_shown, 0, device_param->size_shown); + + if (CL_rc == -1) return -1; + + CL_rc = run_opencl_kernel_memset (hashcat_ctx, device_param, device_param->opencl_d_result, 0, device_param->size_results); + + if (CL_rc == -1) return -1; + } // reset timer @@ -293,6 +342,13 @@ HC_API_CALL void *thread_autotune (void *p) if (device_param->skipped_warning == true) return NULL; + if (device_param->is_cuda == true) + { + const int rc_cuCtxSetCurrent = hc_cuCtxSetCurrent (hashcat_ctx, device_param->cuda_context); + + if (rc_cuCtxSetCurrent == -1) return NULL; + } + const int rc_autotune = autotune (hashcat_ctx, device_param); if (rc_autotune == -1) diff --git a/src/backend.c b/src/backend.c index 434abb364..7759c41fa 100644 --- a/src/backend.c +++ b/src/backend.c @@ -2775,16 +2775,199 @@ void rebuild_pws_compressed_append (hc_device_param_t *device_param, const u64 p hcfree (tmp_pws_idx); } +int run_cuda_kernel_atinit (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, CUdeviceptr buf, const u64 num) +{ + u64 num_elements = num; + + device_param->kernel_params_atinit[0] = (void *) &buf; + device_param->kernel_params_atinit_buf64[1] = num_elements; + + const u64 kernel_threads = device_param->kernel_wgs_atinit; + + num_elements = CEILDIV (num_elements, kernel_threads); + + CUfunction function = device_param->cuda_function_atinit; + + const int rc_cuLaunchKernel = hc_cuLaunchKernel (hashcat_ctx, function, num_elements, 1, 1, kernel_threads, 1, 1, 0, NULL, device_param->kernel_params_atinit, NULL); + + if (rc_cuLaunchKernel == -1) return -1; + + const int rc_cuCtxSynchronize = hc_cuCtxSynchronize (hashcat_ctx); + + if (rc_cuCtxSynchronize == -1) return -1; + + return 0; +} + +int run_cuda_kernel_memset (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, CUdeviceptr buf, const u32 value, const u64 size) +{ + const u64 num16d = size / 16; + const u64 num16m = size % 16; + + if (num16d) + { + device_param->kernel_params_memset[0] = (void *) &buf; + device_param->kernel_params_memset_buf32[1] = value; + device_param->kernel_params_memset_buf64[2] = num16d; + + const u64 kernel_threads = device_param->kernel_wgs_memset; + + u64 num_elements = num16d; + + num_elements = CEILDIV (num_elements, kernel_threads); + + CUfunction function = device_param->cuda_function_memset; + + //CU_rc = hc_clSetKernelArg (hashcat_ctx, kernel, 0, sizeof (cl_mem), (void *) &buf); if (CU_rc == -1) return -1; + //CU_rc = hc_clSetKernelArg (hashcat_ctx, kernel, 1, sizeof (cl_uint), device_param->kernel_params_memset[1]); if (CU_rc == -1) return -1; + //CU_rc = hc_clSetKernelArg (hashcat_ctx, kernel, 2, sizeof (cl_ulong), device_param->kernel_params_memset[2]); if (CU_rc == -1) return -1; + + //const size_t global_work_size[3] = { num_elements, 1, 1 }; + //const size_t local_work_size[3] = { kernel_threads, 1, 1 }; + + const int rc_cuLaunchKernel = hc_cuLaunchKernel (hashcat_ctx, function, num_elements, 1, 1, kernel_threads, 1, 1, 0, NULL, device_param->kernel_params_memset, NULL); + + if (rc_cuLaunchKernel == -1) return -1; + + const int rc_cuCtxSynchronize = hc_cuCtxSynchronize (hashcat_ctx); + + if (rc_cuCtxSynchronize == -1) return -1; + } + + if (num16m) + { + u32 tmp[4]; + + tmp[0] = value; + tmp[1] = value; + tmp[2] = value; + tmp[3] = value; + + // Apparently are allowed to do this: https://devtalk.nvidia.com/default/topic/761515/how-to-copy-to-device-memory-with-offset-/ + + const int rc_cuMemcpyHtoD = hc_cuMemcpyHtoD (hashcat_ctx, buf + (num16d * 16), tmp, num16m); + + if (rc_cuMemcpyHtoD == -1) return -1; + } + + return 0; +} + +int run_cuda_kernel_bzero (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, CUdeviceptr buf, const u64 size) +{ + return run_cuda_kernel_memset (hashcat_ctx, device_param, buf, 0, size); +} + +int run_opencl_kernel_atinit (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, cl_mem buf, const u64 num) +{ + u64 num_elements = num; + + device_param->kernel_params_atinit_buf64[1] = num_elements; + + const u64 kernel_threads = device_param->kernel_wgs_atinit; + + num_elements = round_up_multiple_64 (num_elements, kernel_threads); + + cl_kernel kernel = device_param->opencl_kernel_atinit; + + const size_t global_work_size[3] = { num_elements, 1, 1 }; + const size_t local_work_size[3] = { kernel_threads, 1, 1 }; + + int CL_rc; + + CL_rc = hc_clSetKernelArg (hashcat_ctx, kernel, 0, sizeof (cl_mem), (void *) &buf); + + if (CL_rc == -1) return -1; + + CL_rc = hc_clSetKernelArg (hashcat_ctx, kernel, 1, sizeof (cl_ulong), device_param->kernel_params_atinit[1]); + + if (CL_rc == -1) return -1; + + CL_rc = hc_clEnqueueNDRangeKernel (hashcat_ctx, device_param->opencl_command_queue, kernel, 1, NULL, global_work_size, local_work_size, 0, NULL, NULL); + + if (CL_rc == -1) return -1; + + CL_rc = hc_clFlush (hashcat_ctx, device_param->opencl_command_queue); + + if (CL_rc == -1) return -1; + + CL_rc = hc_clFinish (hashcat_ctx, device_param->opencl_command_queue); + + if (CL_rc == -1) return -1; + + return 0; +} + +int run_opencl_kernel_memset (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, cl_mem buf, const u32 value, const u64 size) +{ + const u64 num16d = size / 16; + const u64 num16m = size % 16; + + if (num16d) + { + device_param->kernel_params_memset_buf32[1] = value; + device_param->kernel_params_memset_buf64[2] = num16d; + + const u64 kernel_threads = device_param->kernel_wgs_memset; + + u64 num_elements = num16d; + + num_elements = round_up_multiple_64 (num_elements, kernel_threads); + + cl_kernel kernel = device_param->opencl_kernel_memset; + + int CL_rc; + + CL_rc = hc_clSetKernelArg (hashcat_ctx, kernel, 0, sizeof (cl_mem), (void *) &buf); if (CL_rc == -1) return -1; + CL_rc = hc_clSetKernelArg (hashcat_ctx, kernel, 1, sizeof (cl_uint), device_param->kernel_params_memset[1]); if (CL_rc == -1) return -1; + CL_rc = hc_clSetKernelArg (hashcat_ctx, kernel, 2, sizeof (cl_ulong), device_param->kernel_params_memset[2]); if (CL_rc == -1) return -1; + + const size_t global_work_size[3] = { num_elements, 1, 1 }; + const size_t local_work_size[3] = { kernel_threads, 1, 1 }; + + CL_rc = hc_clEnqueueNDRangeKernel (hashcat_ctx, device_param->opencl_command_queue, kernel, 1, NULL, global_work_size, local_work_size, 0, NULL, NULL); + + if (CL_rc == -1) return -1; + + CL_rc = hc_clFlush (hashcat_ctx, device_param->opencl_command_queue); + + if (CL_rc == -1) return -1; + + CL_rc = hc_clFinish (hashcat_ctx, device_param->opencl_command_queue); + + if (CL_rc == -1) return -1; + } + + if (num16m) + { + u32 tmp[4]; + + tmp[0] = value; + tmp[1] = value; + tmp[2] = value; + tmp[3] = value; + + int CL_rc; + + CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, buf, CL_TRUE, num16d * 16, num16m, tmp, 0, NULL, NULL); + + if (CL_rc == -1) return -1; + } + + return 0; +} + +int run_opencl_kernel_bzero (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, cl_mem buf, const u64 size) +{ + return run_opencl_kernel_memset (hashcat_ctx, device_param, buf, 0, size); +} + int run_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, const u32 kern_run, const u64 num, const u32 event_update, const u32 iteration) { const hashconfig_t *hashconfig = hashcat_ctx->hashconfig; const status_ctx_t *status_ctx = hashcat_ctx->status_ctx; const user_options_t *user_options = hashcat_ctx->user_options; - u64 num_elements = num; - - device_param->kernel_params_buf64[34] = num; - u64 kernel_threads = 0; switch (kern_run) @@ -2805,20 +2988,9 @@ int run_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, con kernel_threads = MIN (kernel_threads, device_param->kernel_threads); - // kernel_threads = power_of_two_floor_32 (kernel_threads); - - if (device_param->is_cuda == true) - { - num_elements = CEILDIV (num_elements, kernel_threads); - } - - if (device_param->is_opencl == true) - { - num_elements = round_up_multiple_64 (num_elements, kernel_threads); - } + device_param->kernel_params_buf64[34] = num; - int CL_rc; - int CU_rc; + u64 num_elements = num; if (device_param->is_cuda == true) { @@ -2843,17 +3015,21 @@ int run_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, con } } - CUevent cuda_event; + num_elements = CEILDIV (num_elements, kernel_threads); -/* if ((hashconfig->opts_type & OPTS_TYPE_PT_BITSLICE) && (user_options->attack_mode == ATTACK_MODE_BF)) { - const size_t global_work_size[3] = { num_elements, 32, 1 }; - const size_t local_work_size[3] = { kernel_threads, 1, 1 }; + const int rc_cuEventRecord1 = hc_cuEventRecord (hashcat_ctx, device_param->cuda_event1, device_param->cuda_stream); - CL_rc = hc_clEnqueueNDRangeKernel (hashcat_ctx, device_param->opencl_command_queue, kernel, 2, NULL, global_work_size, local_work_size, 0, NULL, &opencl_event); + if (rc_cuEventRecord1 == -1) return -1; - if (CL_rc == -1) return -1; + const int rc_cuLaunchKernel = hc_cuLaunchKernel (hashcat_ctx, cuda_function, num_elements, 32, 1, kernel_threads, 1, 1, 0, device_param->cuda_stream, device_param->kernel_params, NULL); + + if (rc_cuLaunchKernel == -1) return -1; + + const int rc_cuEventRecord2 = hc_cuEventRecord (hashcat_ctx, device_param->cuda_event2, device_param->cuda_stream); + + if (rc_cuEventRecord2 == -1) return -1; } else { @@ -2879,108 +3055,40 @@ int run_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, con } } - num_elements = round_up_multiple_64 (num_elements, kernel_threads); + num_elements = CEILDIV (num_elements, kernel_threads); - const size_t global_work_size[3] = { num_elements, 1, 1 }; - const size_t local_work_size[3] = { kernel_threads, 1, 1 }; + const int rc_cuEventRecord1 = hc_cuEventRecord (hashcat_ctx, device_param->cuda_event1, device_param->cuda_stream); - CL_rc = hc_clEnqueueNDRangeKernel (hashcat_ctx, device_param->opencl_command_queue, kernel, 1, NULL, global_work_size, local_work_size, 0, NULL, &opencl_event); + if (rc_cuEventRecord1 == -1) return -1; - if (CL_rc == -1) return -1; - } + const int rc_cuLaunchKernel = hc_cuLaunchKernel (hashcat_ctx, cuda_function, num_elements, 1, 1, kernel_threads, 1, 1, 0, device_param->cuda_stream, device_param->kernel_params, NULL); - CL_rc = hc_clFlush (hashcat_ctx, device_param->opencl_command_queue); + if (rc_cuLaunchKernel == -1) return -1; - if (CL_rc == -1) return -1; + const int rc_cuEventRecord2 = hc_cuEventRecord (hashcat_ctx, device_param->cuda_event2, device_param->cuda_stream); - // spin damper section - - const u32 iterationm = iteration % EXPECTED_ITERATIONS; - - cl_int opencl_event_status; - - size_t param_value_size_ret; - - CL_rc = hc_clGetEventInfo (hashcat_ctx, opencl_event, CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof (opencl_event_status), &opencl_event_status, ¶m_value_size_ret); - - if (CL_rc == -1) return -1; - - if (device_param->spin_damp > 0) - { - double spin_total = device_param->spin_damp; - - while (opencl_event_status != CL_COMPLETE) - { - if (status_ctx->devices_status == STATUS_RUNNING) - { - switch (kern_run) - { - case KERN_RUN_1: if (device_param->exec_us_prev1[iterationm] > 0) usleep ((useconds_t) (device_param->exec_us_prev1[iterationm] * device_param->spin_damp)); break; - case KERN_RUN_2: if (device_param->exec_us_prev2[iterationm] > 0) usleep ((useconds_t) (device_param->exec_us_prev2[iterationm] * device_param->spin_damp)); break; - case KERN_RUN_3: if (device_param->exec_us_prev3[iterationm] > 0) usleep ((useconds_t) (device_param->exec_us_prev3[iterationm] * device_param->spin_damp)); break; - case KERN_RUN_4: if (device_param->exec_us_prev4[iterationm] > 0) usleep ((useconds_t) (device_param->exec_us_prev4[iterationm] * device_param->spin_damp)); break; - case KERN_RUN_INIT2: if (device_param->exec_us_prev_init2[iterationm] > 0) usleep ((useconds_t) (device_param->exec_us_prev_init2[iterationm] * device_param->spin_damp)); break; - case KERN_RUN_LOOP2: if (device_param->exec_us_prev_loop2[iterationm] > 0) usleep ((useconds_t) (device_param->exec_us_prev_loop2[iterationm] * device_param->spin_damp)); break; - case KERN_RUN_AUX1: if (device_param->exec_us_prev_aux1[iterationm] > 0) usleep ((useconds_t) (device_param->exec_us_prev_aux1[iterationm] * device_param->spin_damp)); break; - case KERN_RUN_AUX2: if (device_param->exec_us_prev_aux2[iterationm] > 0) usleep ((useconds_t) (device_param->exec_us_prev_aux2[iterationm] * device_param->spin_damp)); break; - case KERN_RUN_AUX3: if (device_param->exec_us_prev_aux3[iterationm] > 0) usleep ((useconds_t) (device_param->exec_us_prev_aux3[iterationm] * device_param->spin_damp)); break; - case KERN_RUN_AUX4: if (device_param->exec_us_prev_aux4[iterationm] > 0) usleep ((useconds_t) (device_param->exec_us_prev_aux4[iterationm] * device_param->spin_damp)); break; - } - } - else - { - // we were told to be nice - - sleep (0); - } - - CL_rc = hc_clGetEventInfo (hashcat_ctx, opencl_event, CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof (opencl_event_status), &opencl_event_status, ¶m_value_size_ret); - - if (CL_rc == -1) return -1; - - spin_total += device_param->spin_damp; - - if (spin_total > 1) break; - } + if (rc_cuEventRecord2 == -1) return -1; } - CL_rc = hc_clWaitForEvents (hashcat_ctx, 1, &opencl_event); + const int rc_cuEventSynchronize = hc_cuEventSynchronize (hashcat_ctx, device_param->cuda_event2); - if (CL_rc == -1) return -1; + if (rc_cuEventSynchronize == -1) return -1; - cl_ulong time_start; - cl_ulong time_end; + const int rc_cuStreamSynchronize = hc_cuStreamSynchronize (hashcat_ctx, device_param->cuda_stream); - CL_rc = hc_clGetEventProfilingInfo (hashcat_ctx, opencl_event, CL_PROFILING_COMMAND_START, sizeof (time_start), &time_start, NULL); if (CL_rc == -1) return -1; - CL_rc = hc_clGetEventProfilingInfo (hashcat_ctx, opencl_event, CL_PROFILING_COMMAND_END, sizeof (time_end), &time_end, NULL); if (CL_rc == -1) return -1; + if (rc_cuStreamSynchronize == -1) return -1; - const double exec_us = (double) (time_end - time_start) / 1000; + float exec_ms; - if (device_param->spin_damp > 0) - { - if (status_ctx->devices_status == STATUS_RUNNING) - { - switch (kern_run) - { - case KERN_RUN_1: device_param->exec_us_prev1[iterationm] = exec_us; break; - case KERN_RUN_2: device_param->exec_us_prev2[iterationm] = exec_us; break; - case KERN_RUN_3: device_param->exec_us_prev3[iterationm] = exec_us; break; - case KERN_RUN_4: device_param->exec_us_prev4[iterationm] = exec_us; break; - case KERN_RUN_INIT2: device_param->exec_us_prev_init2[iterationm] = exec_us; break; - case KERN_RUN_LOOP2: device_param->exec_us_prev_loop2[iterationm] = exec_us; break; - case KERN_RUN_AUX1: device_param->exec_us_prev_aux1[iterationm] = exec_us; break; - case KERN_RUN_AUX2: device_param->exec_us_prev_aux2[iterationm] = exec_us; break; - case KERN_RUN_AUX3: device_param->exec_us_prev_aux3[iterationm] = exec_us; break; - case KERN_RUN_AUX4: device_param->exec_us_prev_aux4[iterationm] = exec_us; break; - } - } - } + const int rc_cuEventElapsedTime = hc_cuEventElapsedTime (hashcat_ctx, &exec_ms, device_param->cuda_event1, device_param->cuda_event2); + + if (rc_cuEventElapsedTime == -1) return -1; if (event_update) { u32 exec_pos = device_param->exec_pos; - device_param->exec_msec[exec_pos] = exec_us / 1000; + device_param->exec_msec[exec_pos] = exec_ms; exec_pos++; @@ -2991,19 +3099,12 @@ int run_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, con device_param->exec_pos = exec_pos; } - - CL_rc = hc_clReleaseEvent (hashcat_ctx, opencl_event); - - if (CL_rc == -1) return -1; - - CL_rc = hc_clFinish (hashcat_ctx, device_param->opencl_command_queue); - - if (CL_rc == -1) return -1; -*/ } if (device_param->is_opencl == true) { + int CL_rc; + cl_kernel opencl_kernel = NULL; if (device_param->is_opencl == true) @@ -3046,6 +3147,8 @@ int run_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, con if (CL_rc == -1) return -1; } + num_elements = round_up_multiple_64 (num_elements, kernel_threads); + cl_event opencl_event; if ((hashconfig->opts_type & OPTS_TYPE_PT_BITSLICE) && (user_options->attack_mode == ATTACK_MODE_BF)) @@ -3208,6 +3311,7 @@ int run_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, con int run_kernel_mp (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, const u32 kern_run, const u64 num) { +puts ("run_kernel_mp"); u64 num_elements = num; switch (kern_run) @@ -3289,6 +3393,7 @@ int run_kernel_mp (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, int run_kernel_tm (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param) { +puts ("run_kernel_tm"); const u64 num_elements = 1024; // fixed const u64 kernel_threads = MIN (num_elements, device_param->kernel_wgs_tm); @@ -3317,6 +3422,7 @@ int run_kernel_tm (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param) int run_kernel_amp (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, const u64 num) { +puts ("run_kernel_amp"); u64 num_elements = num; device_param->kernel_params_amp_buf64[6] = num_elements; @@ -3353,6 +3459,7 @@ int run_kernel_amp (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, int run_kernel_decompress (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, const u64 num) { +puts ("run_kernel_decompress"); u64 num_elements = num; device_param->kernel_params_decompress_buf64[3] = num_elements; @@ -3387,171 +3494,9 @@ int run_kernel_decompress (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device return 0; } -int run_opencl_kernel_atinit (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, cl_mem buf, const u64 num) -{ - u64 num_elements = num; - - device_param->kernel_params_atinit_buf64[1] = num_elements; - - const u64 kernel_threads = device_param->kernel_wgs_atinit; - - num_elements = round_up_multiple_64 (num_elements, kernel_threads); - - cl_kernel kernel = device_param->opencl_kernel_atinit; - - const size_t global_work_size[3] = { num_elements, 1, 1 }; - const size_t local_work_size[3] = { kernel_threads, 1, 1 }; - - int CL_rc; - - CL_rc = hc_clSetKernelArg (hashcat_ctx, kernel, 0, sizeof (cl_mem), (void *) &buf); - - if (CL_rc == -1) return -1; - - CL_rc = hc_clSetKernelArg (hashcat_ctx, kernel, 1, sizeof (cl_ulong), device_param->kernel_params_atinit[1]); - - if (CL_rc == -1) return -1; - - CL_rc = hc_clEnqueueNDRangeKernel (hashcat_ctx, device_param->opencl_command_queue, kernel, 1, NULL, global_work_size, local_work_size, 0, NULL, NULL); - - if (CL_rc == -1) return -1; - - CL_rc = hc_clFlush (hashcat_ctx, device_param->opencl_command_queue); - - if (CL_rc == -1) return -1; - - CL_rc = hc_clFinish (hashcat_ctx, device_param->opencl_command_queue); - - if (CL_rc == -1) return -1; - - return 0; -} - -int run_cuda_kernel_memset (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, CUdeviceptr buf, const u32 value, const u64 size) -{ - const u64 num16d = size / 16; - const u64 num16m = size % 16; - - if (num16d) - { - device_param->kernel_params_memset[0] = (void *) &buf; - device_param->kernel_params_memset_buf32[1] = value; - device_param->kernel_params_memset_buf64[2] = num16d; - - const u64 kernel_threads = device_param->kernel_wgs_memset; - - u64 num_elements = num16d; - - num_elements = CEILDIV (num_elements, kernel_threads); - - CUfunction function = device_param->cuda_function_memset; - - //CU_rc = hc_clSetKernelArg (hashcat_ctx, kernel, 0, sizeof (cl_mem), (void *) &buf); if (CU_rc == -1) return -1; - //CU_rc = hc_clSetKernelArg (hashcat_ctx, kernel, 1, sizeof (cl_uint), device_param->kernel_params_memset[1]); if (CU_rc == -1) return -1; - //CU_rc = hc_clSetKernelArg (hashcat_ctx, kernel, 2, sizeof (cl_ulong), device_param->kernel_params_memset[2]); if (CU_rc == -1) return -1; - - //const size_t global_work_size[3] = { num_elements, 1, 1 }; - //const size_t local_work_size[3] = { kernel_threads, 1, 1 }; - - const int rc_cuLaunchKernel = hc_cuLaunchKernel (hashcat_ctx, function, num_elements, 1, 1, kernel_threads, 1, 1, 0, NULL, device_param->kernel_params_memset, NULL); - - if (rc_cuLaunchKernel == -1) return -1; - - const int rc_cuCtxSynchronize = hc_cuCtxSynchronize (hashcat_ctx); - - if (rc_cuCtxSynchronize == -1) return -1; - } - - if (num16m) - { - u32 tmp[4]; - - tmp[0] = value; - tmp[1] = value; - tmp[2] = value; - tmp[3] = value; - - // Apparently are allowed to do this: https://devtalk.nvidia.com/default/topic/761515/how-to-copy-to-device-memory-with-offset-/ - - const int rc_cuMemcpyHtoD = hc_cuMemcpyHtoD (hashcat_ctx, buf + (num16d * 16), tmp, num16m); - - if (rc_cuMemcpyHtoD == -1) return -1; - } - - return 0; -} - -int run_cuda_kernel_bzero (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, CUdeviceptr buf, const u64 size) -{ - return run_cuda_kernel_memset (hashcat_ctx, device_param, buf, 0, size); -} - -int run_opencl_kernel_memset (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, cl_mem buf, const u32 value, const u64 size) -{ - const u64 num16d = size / 16; - const u64 num16m = size % 16; - - if (num16d) - { - device_param->kernel_params_memset_buf32[1] = value; - device_param->kernel_params_memset_buf64[2] = num16d; - - const u64 kernel_threads = device_param->kernel_wgs_memset; - - u64 num_elements = num16d; - - num_elements = round_up_multiple_64 (num_elements, kernel_threads); - - cl_kernel kernel = device_param->opencl_kernel_memset; - - int CL_rc; - - CL_rc = hc_clSetKernelArg (hashcat_ctx, kernel, 0, sizeof (cl_mem), (void *) &buf); if (CL_rc == -1) return -1; - CL_rc = hc_clSetKernelArg (hashcat_ctx, kernel, 1, sizeof (cl_uint), device_param->kernel_params_memset[1]); if (CL_rc == -1) return -1; - CL_rc = hc_clSetKernelArg (hashcat_ctx, kernel, 2, sizeof (cl_ulong), device_param->kernel_params_memset[2]); if (CL_rc == -1) return -1; - - const size_t global_work_size[3] = { num_elements, 1, 1 }; - const size_t local_work_size[3] = { kernel_threads, 1, 1 }; - - CL_rc = hc_clEnqueueNDRangeKernel (hashcat_ctx, device_param->opencl_command_queue, kernel, 1, NULL, global_work_size, local_work_size, 0, NULL, NULL); - - if (CL_rc == -1) return -1; - - CL_rc = hc_clFlush (hashcat_ctx, device_param->opencl_command_queue); - - if (CL_rc == -1) return -1; - - CL_rc = hc_clFinish (hashcat_ctx, device_param->opencl_command_queue); - - if (CL_rc == -1) return -1; - } - - if (num16m) - { - u32 tmp[4]; - - tmp[0] = value; - tmp[1] = value; - tmp[2] = value; - tmp[3] = value; - - int CL_rc; - - CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, buf, CL_TRUE, num16d * 16, num16m, tmp, 0, NULL, NULL); - - if (CL_rc == -1) return -1; - } - - return 0; -} - -int run_opencl_kernel_bzero (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, cl_mem buf, const u64 size) -{ - return run_opencl_kernel_memset (hashcat_ctx, device_param, buf, 0, size); -} - int run_copy (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, const u64 pws_cnt) { +puts ("run_copy"); combinator_ctx_t *combinator_ctx = hashcat_ctx->combinator_ctx; hashconfig_t *hashconfig = hashcat_ctx->hashconfig; user_options_t *user_options = hashcat_ctx->user_options; @@ -3755,6 +3700,7 @@ int run_copy (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, const int run_cracker (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, const u64 pws_cnt) { +puts ("run_cracker"); combinator_ctx_t *combinator_ctx = hashcat_ctx->combinator_ctx; hashconfig_t *hashconfig = hashcat_ctx->hashconfig; hashes_t *hashes = hashcat_ctx->hashes; @@ -6799,6 +6745,32 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx) if (CL_rc == -1) return -1; } + /** + * create stream for CUDA devices + */ + + if (device_param->is_cuda == true) + { + const int rc_cuStreamCreate = hc_cuStreamCreate (hashcat_ctx, &device_param->cuda_stream, CU_STREAM_DEFAULT); + + if (rc_cuStreamCreate == -1) return -1; + } + + /** + * create events for CUDA devices + */ + + if (device_param->is_cuda == true) + { + const int rc_cuEventCreate1 = hc_cuEventCreate (hashcat_ctx, &device_param->cuda_event1, CU_EVENT_DEFAULT); + + if (rc_cuEventCreate1 == -1) return -1; + + const int rc_cuEventCreate2 = hc_cuEventCreate (hashcat_ctx, &device_param->cuda_event2, CU_EVENT_DEFAULT); + + if (rc_cuEventCreate2 == -1) return -1; + } + /** * create input buffers on device : calculate size of fixed memory buffers */ @@ -10199,6 +10171,121 @@ void backend_session_destroy (hashcat_ctx_t *hashcat_ctx) hcfree (device_param->brain_link_out_buf); #endif + if (device_param->is_cuda == true) + { + if (device_param->cuda_d_pws_buf) hc_cuMemFree (hashcat_ctx, device_param->cuda_d_pws_buf); + if (device_param->cuda_d_pws_amp_buf) hc_cuMemFree (hashcat_ctx, device_param->cuda_d_pws_amp_buf); + if (device_param->cuda_d_pws_comp_buf) hc_cuMemFree (hashcat_ctx, device_param->cuda_d_pws_comp_buf); + if (device_param->cuda_d_pws_idx) hc_cuMemFree (hashcat_ctx, device_param->cuda_d_pws_idx); + if (device_param->cuda_d_rules) hc_cuMemFree (hashcat_ctx, device_param->cuda_d_rules); + if (device_param->cuda_d_rules_c) hc_cuMemFree (hashcat_ctx, device_param->cuda_d_rules_c); + if (device_param->cuda_d_combs) hc_cuMemFree (hashcat_ctx, device_param->cuda_d_combs); + if (device_param->cuda_d_combs_c) hc_cuMemFree (hashcat_ctx, device_param->cuda_d_combs_c); + if (device_param->cuda_d_bfs) hc_cuMemFree (hashcat_ctx, device_param->cuda_d_bfs); + if (device_param->cuda_d_bfs_c) hc_cuMemFree (hashcat_ctx, device_param->cuda_d_bfs_c); + if (device_param->cuda_d_bitmap_s1_a) hc_cuMemFree (hashcat_ctx, device_param->cuda_d_bitmap_s1_a); + if (device_param->cuda_d_bitmap_s1_b) hc_cuMemFree (hashcat_ctx, device_param->cuda_d_bitmap_s1_b); + if (device_param->cuda_d_bitmap_s1_c) hc_cuMemFree (hashcat_ctx, device_param->cuda_d_bitmap_s1_c); + if (device_param->cuda_d_bitmap_s1_d) hc_cuMemFree (hashcat_ctx, device_param->cuda_d_bitmap_s1_d); + if (device_param->cuda_d_bitmap_s2_a) hc_cuMemFree (hashcat_ctx, device_param->cuda_d_bitmap_s2_a); + if (device_param->cuda_d_bitmap_s2_b) hc_cuMemFree (hashcat_ctx, device_param->cuda_d_bitmap_s2_b); + if (device_param->cuda_d_bitmap_s2_c) hc_cuMemFree (hashcat_ctx, device_param->cuda_d_bitmap_s2_c); + if (device_param->cuda_d_bitmap_s2_d) hc_cuMemFree (hashcat_ctx, device_param->cuda_d_bitmap_s2_d); + if (device_param->cuda_d_plain_bufs) hc_cuMemFree (hashcat_ctx, device_param->cuda_d_plain_bufs); + if (device_param->cuda_d_digests_buf) hc_cuMemFree (hashcat_ctx, device_param->cuda_d_digests_buf); + if (device_param->cuda_d_digests_shown) hc_cuMemFree (hashcat_ctx, device_param->cuda_d_digests_shown); + if (device_param->cuda_d_salt_bufs) hc_cuMemFree (hashcat_ctx, device_param->cuda_d_salt_bufs); + if (device_param->cuda_d_esalt_bufs) hc_cuMemFree (hashcat_ctx, device_param->cuda_d_esalt_bufs); + if (device_param->cuda_d_tmps) hc_cuMemFree (hashcat_ctx, device_param->cuda_d_tmps); + if (device_param->cuda_d_hooks) hc_cuMemFree (hashcat_ctx, device_param->cuda_d_hooks); + if (device_param->cuda_d_result) hc_cuMemFree (hashcat_ctx, device_param->cuda_d_result); + if (device_param->cuda_d_extra0_buf) hc_cuMemFree (hashcat_ctx, device_param->cuda_d_extra0_buf); + if (device_param->cuda_d_extra1_buf) hc_cuMemFree (hashcat_ctx, device_param->cuda_d_extra1_buf); + if (device_param->cuda_d_extra2_buf) hc_cuMemFree (hashcat_ctx, device_param->cuda_d_extra2_buf); + if (device_param->cuda_d_extra3_buf) hc_cuMemFree (hashcat_ctx, device_param->cuda_d_extra3_buf); + if (device_param->cuda_d_root_css_buf) hc_cuMemFree (hashcat_ctx, device_param->cuda_d_root_css_buf); + if (device_param->cuda_d_markov_css_buf) hc_cuMemFree (hashcat_ctx, device_param->cuda_d_markov_css_buf); + if (device_param->cuda_d_tm_c) hc_cuMemFree (hashcat_ctx, device_param->cuda_d_tm_c); + if (device_param->cuda_d_st_digests_buf) hc_cuMemFree (hashcat_ctx, device_param->cuda_d_st_digests_buf); + if (device_param->cuda_d_st_salts_buf) hc_cuMemFree (hashcat_ctx, device_param->cuda_d_st_salts_buf); + if (device_param->cuda_d_st_esalts_buf) hc_cuMemFree (hashcat_ctx, device_param->cuda_d_st_esalts_buf); + + if (device_param->cuda_event1) hc_cuEventDestroy (hashcat_ctx, device_param->cuda_event1); + if (device_param->cuda_event2) hc_cuEventDestroy (hashcat_ctx, device_param->cuda_event2); + + if (device_param->cuda_stream) hc_cuStreamDestroy (hashcat_ctx, device_param->cuda_stream); + + if (device_param->cuda_module) hc_cuModuleUnload (hashcat_ctx, device_param->cuda_module); + if (device_param->cuda_module_mp) hc_cuModuleUnload (hashcat_ctx, device_param->cuda_module_mp); + if (device_param->cuda_module_amp) hc_cuModuleUnload (hashcat_ctx, device_param->cuda_module_amp); + + if (device_param->cuda_context) hc_cuCtxDestroy (hashcat_ctx, device_param->cuda_context); + + device_param->cuda_d_pws_buf = 0; + device_param->cuda_d_pws_amp_buf = 0; + device_param->cuda_d_pws_comp_buf = 0; + device_param->cuda_d_pws_idx = 0; + device_param->cuda_d_rules = 0; + device_param->cuda_d_rules_c = 0; + device_param->cuda_d_combs = 0; + device_param->cuda_d_combs_c = 0; + device_param->cuda_d_bfs = 0; + device_param->cuda_d_bfs_c = 0; + device_param->cuda_d_bitmap_s1_a = 0; + device_param->cuda_d_bitmap_s1_b = 0; + device_param->cuda_d_bitmap_s1_c = 0; + device_param->cuda_d_bitmap_s1_d = 0; + device_param->cuda_d_bitmap_s2_a = 0; + device_param->cuda_d_bitmap_s2_b = 0; + device_param->cuda_d_bitmap_s2_c = 0; + device_param->cuda_d_bitmap_s2_d = 0; + device_param->cuda_d_plain_bufs = 0; + device_param->cuda_d_digests_buf = 0; + device_param->cuda_d_digests_shown = 0; + device_param->cuda_d_salt_bufs = 0; + device_param->cuda_d_esalt_bufs = 0; + device_param->cuda_d_tmps = 0; + device_param->cuda_d_hooks = 0; + device_param->cuda_d_result = 0; + device_param->cuda_d_extra0_buf = 0; + device_param->cuda_d_extra1_buf = 0; + device_param->cuda_d_extra2_buf = 0; + device_param->cuda_d_extra3_buf = 0; + device_param->cuda_d_root_css_buf = 0; + device_param->cuda_d_markov_css_buf = 0; + device_param->cuda_d_tm_c = 0; + device_param->cuda_d_st_digests_buf = 0; + device_param->cuda_d_st_salts_buf = 0; + device_param->cuda_d_st_esalts_buf = 0; + + device_param->cuda_function1 = NULL; + device_param->cuda_function12 = NULL; + device_param->cuda_function2 = NULL; + device_param->cuda_function23 = NULL; + device_param->cuda_function3 = NULL; + device_param->cuda_function4 = NULL; + device_param->cuda_function_init2 = NULL; + device_param->cuda_function_loop2 = NULL; + device_param->cuda_function_mp = NULL; + device_param->cuda_function_mp_l = NULL; + device_param->cuda_function_mp_r = NULL; + device_param->cuda_function_tm = NULL; + device_param->cuda_function_amp = NULL; + device_param->cuda_function_memset = NULL; + device_param->cuda_function_atinit = NULL; + device_param->cuda_function_decompress = NULL; + device_param->cuda_function_aux1 = NULL; + device_param->cuda_function_aux2 = NULL; + device_param->cuda_function_aux3 = NULL; + device_param->cuda_function_aux4 = NULL; + + device_param->cuda_module = NULL; + device_param->cuda_module_mp = NULL; + device_param->cuda_module_amp = NULL; + + device_param->cuda_context = NULL; + } + if (device_param->is_opencl == true) { if (device_param->opencl_d_pws_buf) hc_clReleaseMemObject (hashcat_ctx, device_param->opencl_d_pws_buf); @@ -10266,6 +10353,68 @@ void backend_session_destroy (hashcat_ctx_t *hashcat_ctx) if (device_param->opencl_command_queue) hc_clReleaseCommandQueue (hashcat_ctx, device_param->opencl_command_queue); if (device_param->opencl_context) hc_clReleaseContext (hashcat_ctx, device_param->opencl_context); + + device_param->opencl_d_pws_buf = NULL; + device_param->opencl_d_pws_amp_buf = NULL; + device_param->opencl_d_pws_comp_buf = NULL; + device_param->opencl_d_pws_idx = NULL; + device_param->opencl_d_rules = NULL; + device_param->opencl_d_rules_c = NULL; + device_param->opencl_d_combs = NULL; + device_param->opencl_d_combs_c = NULL; + device_param->opencl_d_bfs = NULL; + device_param->opencl_d_bfs_c = NULL; + device_param->opencl_d_bitmap_s1_a = NULL; + device_param->opencl_d_bitmap_s1_b = NULL; + device_param->opencl_d_bitmap_s1_c = NULL; + device_param->opencl_d_bitmap_s1_d = NULL; + device_param->opencl_d_bitmap_s2_a = NULL; + device_param->opencl_d_bitmap_s2_b = NULL; + device_param->opencl_d_bitmap_s2_c = NULL; + device_param->opencl_d_bitmap_s2_d = NULL; + device_param->opencl_d_plain_bufs = NULL; + device_param->opencl_d_digests_buf = NULL; + device_param->opencl_d_digests_shown = NULL; + device_param->opencl_d_salt_bufs = NULL; + device_param->opencl_d_esalt_bufs = NULL; + device_param->opencl_d_tmps = NULL; + device_param->opencl_d_hooks = NULL; + device_param->opencl_d_result = NULL; + device_param->opencl_d_extra0_buf = NULL; + device_param->opencl_d_extra1_buf = NULL; + device_param->opencl_d_extra2_buf = NULL; + device_param->opencl_d_extra3_buf = NULL; + device_param->opencl_d_root_css_buf = NULL; + device_param->opencl_d_markov_css_buf = NULL; + device_param->opencl_d_tm_c = NULL; + device_param->opencl_d_st_digests_buf = NULL; + device_param->opencl_d_st_salts_buf = NULL; + device_param->opencl_d_st_esalts_buf = NULL; + device_param->opencl_kernel1 = NULL; + device_param->opencl_kernel12 = NULL; + device_param->opencl_kernel2 = NULL; + device_param->opencl_kernel23 = NULL; + device_param->opencl_kernel3 = NULL; + device_param->opencl_kernel4 = NULL; + device_param->opencl_kernel_init2 = NULL; + device_param->opencl_kernel_loop2 = NULL; + device_param->opencl_kernel_mp = NULL; + device_param->opencl_kernel_mp_l = NULL; + device_param->opencl_kernel_mp_r = NULL; + device_param->opencl_kernel_tm = NULL; + device_param->opencl_kernel_amp = NULL; + device_param->opencl_kernel_memset = NULL; + device_param->opencl_kernel_atinit = NULL; + device_param->opencl_kernel_decompress = NULL; + device_param->opencl_kernel_aux1 = NULL; + device_param->opencl_kernel_aux2 = NULL; + device_param->opencl_kernel_aux3 = NULL; + device_param->opencl_kernel_aux4 = NULL; + device_param->opencl_program = NULL; + device_param->opencl_program_mp = NULL; + device_param->opencl_program_amp = NULL; + device_param->opencl_command_queue = NULL; + device_param->opencl_context = NULL; } device_param->pws_comp = NULL; @@ -10279,68 +10428,6 @@ void backend_session_destroy (hashcat_ctx_t *hashcat_ctx) device_param->brain_link_in_buf = NULL; device_param->brain_link_out_buf = NULL; #endif - - device_param->opencl_d_pws_buf = NULL; - device_param->opencl_d_pws_amp_buf = NULL; - device_param->opencl_d_pws_comp_buf = NULL; - device_param->opencl_d_pws_idx = NULL; - device_param->opencl_d_rules = NULL; - device_param->opencl_d_rules_c = NULL; - device_param->opencl_d_combs = NULL; - device_param->opencl_d_combs_c = NULL; - device_param->opencl_d_bfs = NULL; - device_param->opencl_d_bfs_c = NULL; - device_param->opencl_d_bitmap_s1_a = NULL; - device_param->opencl_d_bitmap_s1_b = NULL; - device_param->opencl_d_bitmap_s1_c = NULL; - device_param->opencl_d_bitmap_s1_d = NULL; - device_param->opencl_d_bitmap_s2_a = NULL; - device_param->opencl_d_bitmap_s2_b = NULL; - device_param->opencl_d_bitmap_s2_c = NULL; - device_param->opencl_d_bitmap_s2_d = NULL; - device_param->opencl_d_plain_bufs = NULL; - device_param->opencl_d_digests_buf = NULL; - device_param->opencl_d_digests_shown = NULL; - device_param->opencl_d_salt_bufs = NULL; - device_param->opencl_d_esalt_bufs = NULL; - device_param->opencl_d_tmps = NULL; - device_param->opencl_d_hooks = NULL; - device_param->opencl_d_result = NULL; - device_param->opencl_d_extra0_buf = NULL; - device_param->opencl_d_extra1_buf = NULL; - device_param->opencl_d_extra2_buf = NULL; - device_param->opencl_d_extra3_buf = NULL; - device_param->opencl_d_root_css_buf = NULL; - device_param->opencl_d_markov_css_buf = NULL; - device_param->opencl_d_tm_c = NULL; - device_param->opencl_d_st_digests_buf = NULL; - device_param->opencl_d_st_salts_buf = NULL; - device_param->opencl_d_st_esalts_buf = NULL; - device_param->opencl_kernel1 = NULL; - device_param->opencl_kernel12 = NULL; - device_param->opencl_kernel2 = NULL; - device_param->opencl_kernel23 = NULL; - device_param->opencl_kernel3 = NULL; - device_param->opencl_kernel4 = NULL; - device_param->opencl_kernel_init2 = NULL; - device_param->opencl_kernel_loop2 = NULL; - device_param->opencl_kernel_mp = NULL; - device_param->opencl_kernel_mp_l = NULL; - device_param->opencl_kernel_mp_r = NULL; - device_param->opencl_kernel_tm = NULL; - device_param->opencl_kernel_amp = NULL; - device_param->opencl_kernel_memset = NULL; - device_param->opencl_kernel_atinit = NULL; - device_param->opencl_kernel_decompress = NULL; - device_param->opencl_kernel_aux1 = NULL; - device_param->opencl_kernel_aux2 = NULL; - device_param->opencl_kernel_aux3 = NULL; - device_param->opencl_kernel_aux4 = NULL; - device_param->opencl_program = NULL; - device_param->opencl_program_mp = NULL; - device_param->opencl_program_amp = NULL; - device_param->opencl_command_queue = NULL; - device_param->opencl_context = NULL; } } diff --git a/src/selftest.c b/src/selftest.c index 81d9b415e..144b71611 100644 --- a/src/selftest.c +++ b/src/selftest.c @@ -21,17 +21,26 @@ static int selftest (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param user_options_t *user_options = hashcat_ctx->user_options; user_options_extra_t *user_options_extra = hashcat_ctx->user_options_extra; - cl_int CL_err; - + int CU_rc; int CL_rc; if (hashconfig->st_hash == NULL) return 0; // init : replace hashes with selftest hash - device_param->kernel_params[15] = &device_param->opencl_d_st_digests_buf; - device_param->kernel_params[17] = &device_param->opencl_d_st_salts_buf; - device_param->kernel_params[18] = &device_param->opencl_d_st_esalts_buf; + if (device_param->is_cuda == true) + { + device_param->kernel_params[15] = &device_param->cuda_d_st_digests_buf; + device_param->kernel_params[17] = &device_param->cuda_d_st_salts_buf; + device_param->kernel_params[18] = &device_param->cuda_d_st_esalts_buf; + } + + if (device_param->is_opencl == true) + { + device_param->kernel_params[15] = &device_param->opencl_d_st_digests_buf; + device_param->kernel_params[17] = &device_param->opencl_d_st_salts_buf; + device_param->kernel_params[18] = &device_param->opencl_d_st_esalts_buf; + } device_param->kernel_params_buf32[31] = 1; device_param->kernel_params_buf32[32] = 0; @@ -57,9 +66,19 @@ static int selftest (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param pw.pw_len = (u32) pw_len; - CL_err = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_pws_buf, CL_TRUE, 0, 1 * sizeof (pw_t), &pw, 0, NULL, NULL); + if (device_param->is_cuda == true) + { + CU_rc = hc_cuMemcpyHtoD (hashcat_ctx, device_param->cuda_d_pws_buf, &pw, 1 * sizeof (pw_t)); + + if (CU_rc == -1) return -1; + } - if (CL_err != CL_SUCCESS) return -1; + if (device_param->is_opencl == true) + { + CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_pws_buf, CL_TRUE, 0, 1 * sizeof (pw_t), &pw, 0, NULL, NULL); + + if (CL_rc == -1) return -1; + } } else { @@ -84,9 +103,19 @@ static int selftest (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param uppercase ((u8 *) pw_ptr, pw.pw_len); } - CL_err = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_pws_buf, CL_TRUE, 0, 1 * sizeof (pw_t), &pw, 0, NULL, NULL); + if (device_param->is_cuda == true) + { + CU_rc = hc_cuMemcpyHtoD (hashcat_ctx, device_param->cuda_d_pws_buf, &pw, 1 * sizeof (pw_t)); + + if (CU_rc == -1) return -1; + } + + if (device_param->is_opencl == true) + { + CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_pws_buf, CL_TRUE, 0, 1 * sizeof (pw_t), &pw, 0, NULL, NULL); - if (CL_err != CL_SUCCESS) return -1; + if (CL_rc == -1) return -1; + } } else if (user_options_extra->attack_kern == ATTACK_KERN_COMBI) { @@ -136,13 +165,27 @@ static int selftest (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param comb_ptr[comb.pw_len] = 0x80; } - CL_err = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_combs_c, CL_TRUE, 0, 1 * sizeof (pw_t), &comb, 0, NULL, NULL); + if (device_param->is_cuda == true) + { + CU_rc = hc_cuMemcpyHtoD (hashcat_ctx, device_param->cuda_d_combs_c, &comb, 1 * sizeof (pw_t)); + + if (CU_rc == -1) return -1; + + CU_rc = hc_cuMemcpyHtoD (hashcat_ctx, device_param->cuda_d_pws_buf, &pw, 1 * sizeof (pw_t)); + + if (CU_rc == -1) return -1; + } + + if (device_param->is_opencl == true) + { + CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_combs_c, CL_TRUE, 0, 1 * sizeof (pw_t), &comb, 0, NULL, NULL); - if (CL_err != CL_SUCCESS) return -1; + if (CL_rc == -1) return -1; - CL_err = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_pws_buf, CL_TRUE, 0, 1 * sizeof (pw_t), &pw, 0, NULL, NULL); + CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_pws_buf, CL_TRUE, 0, 1 * sizeof (pw_t), &pw, 0, NULL, NULL); - if (CL_err != CL_SUCCESS) return -1; + if (CL_rc == -1) return -1; + } } else if (user_options_extra->attack_kern == ATTACK_KERN_BF) { @@ -165,9 +208,19 @@ static int selftest (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param pw.pw_len = (u32) pw_len; - CL_err = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_pws_buf, CL_TRUE, 0, 1 * sizeof (pw_t), &pw, 0, NULL, NULL); + if (device_param->is_cuda == true) + { + CU_rc = hc_cuMemcpyHtoD (hashcat_ctx, device_param->cuda_d_pws_buf, &pw, 1 * sizeof (pw_t)); + + if (CU_rc == -1) return -1; + } + + if (device_param->is_opencl == true) + { + CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_pws_buf, CL_TRUE, 0, 1 * sizeof (pw_t), &pw, 0, NULL, NULL); - if (CL_err != CL_SUCCESS) return -1; + if (CL_rc == -1) return -1; + } } else { @@ -208,9 +261,19 @@ static int selftest (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param bf.i = byte_swap_32 (bf.i); } - CL_err = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_bfs_c, CL_TRUE, 0, 1 * sizeof (bf_t), &bf, 0, NULL, NULL); + if (device_param->is_cuda == true) + { + CU_rc = hc_cuMemcpyHtoD (hashcat_ctx, device_param->cuda_d_bfs_c, &bf, 1 * sizeof (bf_t)); - if (CL_err != CL_SUCCESS) return -1; + if (CU_rc == -1) return -1; + } + + if (device_param->is_opencl == true) + { + CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_bfs_c, CL_TRUE, 0, 1 * sizeof (bf_t), &bf, 0, NULL, NULL); + + if (CL_rc == -1) return -1; + } pw_t pw; memset (&pw, 0, sizeof (pw)); @@ -296,9 +359,19 @@ static int selftest (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param for (int i = 0; i < 14; i++) pw.i[i] = byte_swap_32 (pw.i[i]); } - CL_err = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_pws_buf, CL_TRUE, 0, 1 * sizeof (pw_t), &pw, 0, NULL, NULL); + if (device_param->is_cuda == true) + { + CU_rc = hc_cuMemcpyHtoD (hashcat_ctx, device_param->cuda_d_pws_buf, &pw, 1 * sizeof (pw_t)); + + if (CU_rc == -1) return -1; + } - if (CL_err != CL_SUCCESS) return -1; + if (device_param->is_opencl == true) + { + CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_pws_buf, CL_TRUE, 0, 1 * sizeof (pw_t), &pw, 0, NULL, NULL); + + if (CL_rc == -1) return -1; + } highest_pw_len = pw.pw_len; } @@ -316,9 +389,19 @@ static int selftest (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param pw.pw_len = (u32) pw_len; - CL_err = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_pws_buf, CL_TRUE, 0, 1 * sizeof (pw_t), &pw, 0, NULL, NULL); + if (device_param->is_cuda == true) + { + CU_rc = hc_cuMemcpyHtoD (hashcat_ctx, device_param->cuda_d_pws_buf, &pw, 1 * sizeof (pw_t)); + + if (CU_rc == -1) return -1; + } + + if (device_param->is_opencl == true) + { + CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_pws_buf, CL_TRUE, 0, 1 * sizeof (pw_t), &pw, 0, NULL, NULL); - if (CL_err != CL_SUCCESS) return -1; + if (CL_rc == -1) return -1; + } } } @@ -372,15 +455,35 @@ static int selftest (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param if (CL_rc == -1) return -1; - CL_rc = hc_clEnqueueReadBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_hooks, CL_TRUE, 0, device_param->size_hooks, device_param->hooks_buf, 0, NULL, NULL); + if (device_param->is_cuda == true) + { + CU_rc = hc_cuMemcpyDtoH (hashcat_ctx, device_param->hooks_buf, device_param->cuda_d_hooks, device_param->size_hooks); - if (CL_rc == -1) return -1; + if (CU_rc == -1) return -1; + } + + if (device_param->is_opencl == true) + { + CL_rc = hc_clEnqueueReadBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_hooks, CL_TRUE, 0, device_param->size_hooks, device_param->hooks_buf, 0, NULL, NULL); + + if (CL_rc == -1) return -1; + } module_ctx->module_hook12 (device_param, hashes->st_hook_salts_buf, 0, 1); - CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_hooks, CL_TRUE, 0, device_param->size_hooks, device_param->hooks_buf, 0, NULL, NULL); + if (device_param->is_cuda == true) + { + CU_rc = hc_cuMemcpyHtoD (hashcat_ctx, device_param->cuda_d_hooks, device_param->hooks_buf, device_param->size_hooks); - if (CL_rc == -1) return -1; + if (CU_rc == -1) return -1; + } + + if (device_param->is_opencl == true) + { + CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_hooks, CL_TRUE, 0, device_param->size_hooks, device_param->hooks_buf, 0, NULL, NULL); + + if (CL_rc == -1) return -1; + } } const u32 salt_pos = 0; @@ -411,15 +514,35 @@ static int selftest (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param if (CL_rc == -1) return -1; - CL_rc = hc_clEnqueueReadBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_hooks, CL_TRUE, 0, device_param->size_hooks, device_param->hooks_buf, 0, NULL, NULL); + if (device_param->is_cuda == true) + { + CU_rc = hc_cuMemcpyDtoH (hashcat_ctx, device_param->hooks_buf, device_param->cuda_d_hooks, device_param->size_hooks); - if (CL_rc == -1) return -1; + if (CU_rc == -1) return -1; + } + + if (device_param->is_opencl == true) + { + CL_rc = hc_clEnqueueReadBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_hooks, CL_TRUE, 0, device_param->size_hooks, device_param->hooks_buf, 0, NULL, NULL); + + if (CL_rc == -1) return -1; + } module_ctx->module_hook23 (device_param, hashes->st_hook_salts_buf, 0, 1); - CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_hooks, CL_TRUE, 0, device_param->size_hooks, device_param->hooks_buf, 0, NULL, NULL); + if (device_param->is_cuda == true) + { + CU_rc = hc_cuMemcpyHtoD (hashcat_ctx, device_param->cuda_d_hooks, device_param->hooks_buf, device_param->size_hooks); - if (CL_rc == -1) return -1; + if (CU_rc == -1) return -1; + } + + if (device_param->is_opencl == true) + { + CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_hooks, CL_TRUE, 0, device_param->size_hooks, device_param->hooks_buf, 0, NULL, NULL); + + if (CL_rc == -1) return -1; + } } if (hashconfig->opts_type & OPTS_TYPE_INIT2) @@ -492,9 +615,19 @@ static int selftest (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param u32 num_cracked; - CL_err = hc_clEnqueueReadBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_result, CL_TRUE, 0, sizeof (u32), &num_cracked, 0, NULL, NULL); + if (device_param->is_cuda == true) + { + CU_rc = hc_cuMemcpyDtoH (hashcat_ctx, &num_cracked, device_param->cuda_d_result, sizeof (u32)); + + if (CU_rc == -1) return -1; + } + + if (device_param->is_opencl == true) + { + CL_rc = hc_clEnqueueReadBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_result, CL_TRUE, 0, sizeof (u32), &num_cracked, 0, NULL, NULL); - if (CL_err != CL_SUCCESS) return -1; + if (CL_rc == -1) return -1; + } // finish : cleanup and restore @@ -507,42 +640,99 @@ static int selftest (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param device_param->kernel_params_buf32[33] = 0; device_param->kernel_params_buf64[34] = 0; - device_param->kernel_params[15] = &device_param->opencl_d_digests_buf; - device_param->kernel_params[17] = &device_param->opencl_d_salt_bufs; - device_param->kernel_params[18] = &device_param->opencl_d_esalt_bufs; + if (device_param->is_cuda == true) + { + device_param->kernel_params[15] = &device_param->cuda_d_digests_buf; + device_param->kernel_params[17] = &device_param->cuda_d_salt_bufs; + device_param->kernel_params[18] = &device_param->cuda_d_esalt_bufs; + + CU_rc = run_cuda_kernel_bzero (hashcat_ctx, device_param, device_param->cuda_d_pws_buf, device_param->size_pws); if (CU_rc == -1) return -1; + CU_rc = run_cuda_kernel_bzero (hashcat_ctx, device_param, device_param->cuda_d_tmps, device_param->size_tmps); if (CU_rc == -1) return -1; + CU_rc = run_cuda_kernel_bzero (hashcat_ctx, device_param, device_param->cuda_d_hooks, device_param->size_hooks); if (CU_rc == -1) return -1; + CU_rc = run_cuda_kernel_bzero (hashcat_ctx, device_param, device_param->cuda_d_plain_bufs, device_param->size_plains); if (CU_rc == -1) return -1; + CU_rc = run_cuda_kernel_bzero (hashcat_ctx, device_param, device_param->cuda_d_digests_shown, device_param->size_shown); if (CU_rc == -1) return -1; + CU_rc = run_cuda_kernel_bzero (hashcat_ctx, device_param, device_param->cuda_d_result, device_param->size_results); if (CU_rc == -1) return -1; + } - CL_rc = run_opencl_kernel_bzero (hashcat_ctx, device_param, device_param->opencl_d_pws_buf, device_param->size_pws); if (CL_rc == -1) return -1; - CL_rc = run_opencl_kernel_bzero (hashcat_ctx, device_param, device_param->opencl_d_tmps, device_param->size_tmps); if (CL_rc == -1) return -1; - CL_rc = run_opencl_kernel_bzero (hashcat_ctx, device_param, device_param->opencl_d_hooks, device_param->size_hooks); if (CL_rc == -1) return -1; - CL_rc = run_opencl_kernel_bzero (hashcat_ctx, device_param, device_param->opencl_d_plain_bufs, device_param->size_plains); if (CL_rc == -1) return -1; - CL_rc = run_opencl_kernel_bzero (hashcat_ctx, device_param, device_param->opencl_d_digests_shown, device_param->size_shown); if (CL_rc == -1) return -1; - CL_rc = run_opencl_kernel_bzero (hashcat_ctx, device_param, device_param->opencl_d_result, device_param->size_results); if (CL_rc == -1) return -1; + if (device_param->is_opencl == true) + { + device_param->kernel_params[15] = &device_param->opencl_d_digests_buf; + device_param->kernel_params[17] = &device_param->opencl_d_salt_bufs; + device_param->kernel_params[18] = &device_param->opencl_d_esalt_bufs; + + CL_rc = run_opencl_kernel_bzero (hashcat_ctx, device_param, device_param->opencl_d_pws_buf, device_param->size_pws); if (CL_rc == -1) return -1; + CL_rc = run_opencl_kernel_bzero (hashcat_ctx, device_param, device_param->opencl_d_tmps, device_param->size_tmps); if (CL_rc == -1) return -1; + CL_rc = run_opencl_kernel_bzero (hashcat_ctx, device_param, device_param->opencl_d_hooks, device_param->size_hooks); if (CL_rc == -1) return -1; + CL_rc = run_opencl_kernel_bzero (hashcat_ctx, device_param, device_param->opencl_d_plain_bufs, device_param->size_plains); if (CL_rc == -1) return -1; + CL_rc = run_opencl_kernel_bzero (hashcat_ctx, device_param, device_param->opencl_d_digests_shown, device_param->size_shown); if (CL_rc == -1) return -1; + CL_rc = run_opencl_kernel_bzero (hashcat_ctx, device_param, device_param->opencl_d_result, device_param->size_results); if (CL_rc == -1) return -1; + } if (user_options->slow_candidates == true) { - CL_rc = run_opencl_kernel_bzero (hashcat_ctx, device_param, device_param->opencl_d_rules_c, device_param->size_rules_c); + if (device_param->is_cuda == true) + { + CU_rc = run_cuda_kernel_bzero (hashcat_ctx, device_param, device_param->cuda_d_rules_c, device_param->size_rules_c); - if (CL_rc == -1) return -1; + if (CU_rc == -1) return -1; + } + + if (device_param->is_opencl == true) + { + CL_rc = run_opencl_kernel_bzero (hashcat_ctx, device_param, device_param->opencl_d_rules_c, device_param->size_rules_c); + + if (CL_rc == -1) return -1; + } } else { if (user_options_extra->attack_kern == ATTACK_KERN_STRAIGHT) { - CL_rc = run_opencl_kernel_bzero (hashcat_ctx, device_param, device_param->opencl_d_rules_c, device_param->size_rules_c); + if (device_param->is_cuda == true) + { + CU_rc = run_cuda_kernel_bzero (hashcat_ctx, device_param, device_param->cuda_d_rules_c, device_param->size_rules_c); - if (CL_rc == -1) return -1; + if (CU_rc == -1) return -1; + } + + if (device_param->is_opencl == true) + { + CL_rc = run_opencl_kernel_bzero (hashcat_ctx, device_param, device_param->opencl_d_rules_c, device_param->size_rules_c); + + if (CL_rc == -1) return -1; + } } else if (user_options_extra->attack_kern == ATTACK_KERN_COMBI) { - CL_rc = run_opencl_kernel_bzero (hashcat_ctx, device_param, device_param->opencl_d_combs_c, device_param->size_combs); + if (device_param->is_cuda == true) + { + CU_rc = run_cuda_kernel_bzero (hashcat_ctx, device_param, device_param->cuda_d_combs_c, device_param->size_combs); - if (CL_rc == -1) return -1; + if (CU_rc == -1) return -1; + } + + if (device_param->is_opencl == true) + { + CL_rc = run_opencl_kernel_bzero (hashcat_ctx, device_param, device_param->opencl_d_combs_c, device_param->size_combs); + + if (CL_rc == -1) return -1; + } } else if (user_options_extra->attack_kern == ATTACK_KERN_BF) { - CL_rc = run_opencl_kernel_bzero (hashcat_ctx, device_param, device_param->opencl_d_bfs_c, device_param->size_bfs); + if (device_param->is_cuda == true) + { + CU_rc = run_cuda_kernel_bzero (hashcat_ctx, device_param, device_param->cuda_d_bfs_c, device_param->size_bfs); - if (CL_rc == -1) return -1; + if (CU_rc == -1) return -1; + } + + if (device_param->is_opencl == true) + { + CL_rc = run_opencl_kernel_bzero (hashcat_ctx, device_param, device_param->opencl_d_bfs_c, device_param->size_bfs); + + if (CL_rc == -1) return -1; + } } } @@ -586,6 +776,13 @@ HC_API_CALL void *thread_selftest (void *p) if (device_param->skipped_warning == true) return NULL; + if (device_param->is_cuda == true) + { + const int rc_cuCtxSetCurrent = hc_cuCtxSetCurrent (hashcat_ctx, device_param->cuda_context); + + if (rc_cuCtxSetCurrent == -1) return NULL; + } + const int rc_selftest = selftest (hashcat_ctx, device_param); if (user_options->benchmark == true)