From e67195aa11ed8038d84a86561135e93cec75ebef Mon Sep 17 00:00:00 2001 From: jsteube Date: Sun, 5 Aug 2018 23:29:54 +0200 Subject: [PATCH] Try to evaluate available OpenCL device memory and use this information instead of total available OpenCL device memory for autotune --- docs/changes.txt | 6 +++++ include/types.h | 1 + src/opencl.c | 59 +++++++++++++++++++++++++++++++++++++++++++++--- 3 files changed, 63 insertions(+), 3 deletions(-) diff --git a/docs/changes.txt b/docs/changes.txt index a2ee7967b..506aa20f0 100644 --- a/docs/changes.txt +++ b/docs/changes.txt @@ -1,5 +1,11 @@ * changes v4.2.0 -> xxx +## +## Improvements +## + +- Try to evaluate available OpenCL device memory and use this information instead of total available OpenCL device memory for autotune + * changes v4.1.0 -> v4.2.0 ## diff --git a/include/types.h b/include/types.h index ad6783c1e..4e01486b9 100644 --- a/include/types.h +++ b/include/types.h @@ -953,6 +953,7 @@ typedef struct hc_device_param u32 device_processors; u64 device_maxmem_alloc; u64 device_global_mem; + u64 device_available_mem; u32 device_maxclock_frequency; size_t device_maxworkgroup_size; u64 device_local_mem_size; diff --git a/src/opencl.c b/src/opencl.c index 9bd9e90ea..d14556962 100644 --- a/src/opencl.c +++ b/src/opencl.c @@ -3225,6 +3225,8 @@ int opencl_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime) device_param->device_global_mem = device_global_mem; + device_param->device_available_mem = 0; + // device_maxmem_alloc cl_ulong device_maxmem_alloc; @@ -4259,6 +4261,57 @@ int opencl_session_begin (hashcat_ctx_t *hashcat_ctx) if (CL_rc == -1) return -1; + // OK, so the problem here is the following: + // There's just CL_DEVICE_GLOBAL_MEM_SIZE to ask OpenCL about the total memory on the device, + // but there's no way to ask for available memory on the device. + // In combination, most OpenCL runtimes implementation of clCreateBuffer() + // are doing so called lazy memory allocation on the device. + // Now, if the user has X11 (or a game or anything that takes a lot of GPU memory) + // running on the host we end up with an error type of this: + // clEnqueueNDRangeKernel(): CL_MEM_OBJECT_ALLOCATION_FAILURE + // The clEnqueueNDRangeKernel() is because of the lazy allocation + // The best way to workaround this problem is if we would be able to ask for available memory, + // The idea here is to try to evaluate available memory by allocating it till it errors + + #define MAX_ALLOC_CHECKS_CNT 8192 + #define MAX_ALLOC_CHECKS_SIZE (32 * 1024 * 1024) + + cl_mem *tmp_device = (cl_mem *) hccalloc (MAX_ALLOC_CHECKS_CNT, sizeof (cl_mem)); + + char *tmp_host = (char *) hcmalloc (MAX_ALLOC_CHECKS_SIZE); + + int c; + + for (c = 0; c < MAX_ALLOC_CHECKS_CNT; c++) + { + cl_int CL_err; + + OCL_PTR *ocl = opencl_ctx->ocl; + + tmp_device[c] = ocl->clCreateBuffer (device_param->context, CL_MEM_READ_ONLY, MAX_ALLOC_CHECKS_SIZE, NULL, &CL_err); + + if (CL_err != CL_SUCCESS) break; + + CL_err = ocl->clEnqueueReadBuffer (device_param->command_queue, tmp_device[c], CL_TRUE, 0, MAX_ALLOC_CHECKS_SIZE, tmp_host, 0, NULL, NULL); + + if (CL_err != CL_SUCCESS) break; + } + + int r; + + for (r = 0; r < c; r++) + { + CL_rc = hc_clReleaseMemObject (hashcat_ctx, tmp_device[r]); + + if (CL_rc == -1) return -1; + } + + hcfree (tmp_host); + + hcfree (tmp_device); + + device_param->device_available_mem = c * MAX_ALLOC_CHECKS_SIZE; + /** * create input buffers on device : calculate size of fixed memory buffers */ @@ -4436,7 +4489,7 @@ int opencl_session_begin (hashcat_ctx_t *hashcat_ctx) continue; } - if ((size_scrypt + scrypt_extra_space) > device_param->device_global_mem) + if ((size_scrypt + scrypt_extra_space) > device_param->device_available_mem) { if (user_options->quiet == false) event_log_warning (hashcat_ctx, "Increasing total device memory allocatable for --scrypt-tmto %u.", tmto); @@ -6095,7 +6148,7 @@ int opencl_session_begin (hashcat_ctx_t *hashcat_ctx) #define PWS_SPACE (1024 * 1024 * 1024) - // sometimes device_global_mem and device_maxmem_alloc reported back from the opencl runtime are a bit inaccurate. + // sometimes device_available_mem and device_maxmem_alloc reported back from the opencl runtime are a bit inaccurate. // let's add some extra space just to be sure. #define EXTRA_SPACE (64 * 1024 * 1024) @@ -6173,7 +6226,7 @@ int opencl_session_begin (hashcat_ctx_t *hashcat_ctx) + size_st_salts + size_st_esalts; - if ((size_total + EXTRA_SPACE) > device_param->device_global_mem) memory_limit_hit = 1; + if ((size_total + EXTRA_SPACE) > device_param->device_available_mem) memory_limit_hit = 1; if (memory_limit_hit == 1) {