mirror of
https://github.com/hashcat/hashcat.git
synced 2025-01-09 15:20:59 +00:00
Try to evaluate available OpenCL device memory and use this information instead of total available OpenCL device memory for autotune
This commit is contained in:
parent
af2626e67a
commit
e67195aa11
@ -1,5 +1,11 @@
|
|||||||
* changes v4.2.0 -> xxx
|
* changes v4.2.0 -> xxx
|
||||||
|
|
||||||
|
##
|
||||||
|
## Improvements
|
||||||
|
##
|
||||||
|
|
||||||
|
- Try to evaluate available OpenCL device memory and use this information instead of total available OpenCL device memory for autotune
|
||||||
|
|
||||||
* changes v4.1.0 -> v4.2.0
|
* changes v4.1.0 -> v4.2.0
|
||||||
|
|
||||||
##
|
##
|
||||||
|
@ -953,6 +953,7 @@ typedef struct hc_device_param
|
|||||||
u32 device_processors;
|
u32 device_processors;
|
||||||
u64 device_maxmem_alloc;
|
u64 device_maxmem_alloc;
|
||||||
u64 device_global_mem;
|
u64 device_global_mem;
|
||||||
|
u64 device_available_mem;
|
||||||
u32 device_maxclock_frequency;
|
u32 device_maxclock_frequency;
|
||||||
size_t device_maxworkgroup_size;
|
size_t device_maxworkgroup_size;
|
||||||
u64 device_local_mem_size;
|
u64 device_local_mem_size;
|
||||||
|
59
src/opencl.c
59
src/opencl.c
@ -3225,6 +3225,8 @@ int opencl_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
|
|||||||
|
|
||||||
device_param->device_global_mem = device_global_mem;
|
device_param->device_global_mem = device_global_mem;
|
||||||
|
|
||||||
|
device_param->device_available_mem = 0;
|
||||||
|
|
||||||
// device_maxmem_alloc
|
// device_maxmem_alloc
|
||||||
|
|
||||||
cl_ulong device_maxmem_alloc;
|
cl_ulong device_maxmem_alloc;
|
||||||
@ -4259,6 +4261,57 @@ int opencl_session_begin (hashcat_ctx_t *hashcat_ctx)
|
|||||||
|
|
||||||
if (CL_rc == -1) return -1;
|
if (CL_rc == -1) return -1;
|
||||||
|
|
||||||
|
// OK, so the problem here is the following:
|
||||||
|
// There's just CL_DEVICE_GLOBAL_MEM_SIZE to ask OpenCL about the total memory on the device,
|
||||||
|
// but there's no way to ask for available memory on the device.
|
||||||
|
// In combination, most OpenCL runtimes implementation of clCreateBuffer()
|
||||||
|
// are doing so called lazy memory allocation on the device.
|
||||||
|
// Now, if the user has X11 (or a game or anything that takes a lot of GPU memory)
|
||||||
|
// running on the host we end up with an error type of this:
|
||||||
|
// clEnqueueNDRangeKernel(): CL_MEM_OBJECT_ALLOCATION_FAILURE
|
||||||
|
// The clEnqueueNDRangeKernel() is because of the lazy allocation
|
||||||
|
// The best way to workaround this problem is if we would be able to ask for available memory,
|
||||||
|
// The idea here is to try to evaluate available memory by allocating it till it errors
|
||||||
|
|
||||||
|
#define MAX_ALLOC_CHECKS_CNT 8192
|
||||||
|
#define MAX_ALLOC_CHECKS_SIZE (32 * 1024 * 1024)
|
||||||
|
|
||||||
|
cl_mem *tmp_device = (cl_mem *) hccalloc (MAX_ALLOC_CHECKS_CNT, sizeof (cl_mem));
|
||||||
|
|
||||||
|
char *tmp_host = (char *) hcmalloc (MAX_ALLOC_CHECKS_SIZE);
|
||||||
|
|
||||||
|
int c;
|
||||||
|
|
||||||
|
for (c = 0; c < MAX_ALLOC_CHECKS_CNT; c++)
|
||||||
|
{
|
||||||
|
cl_int CL_err;
|
||||||
|
|
||||||
|
OCL_PTR *ocl = opencl_ctx->ocl;
|
||||||
|
|
||||||
|
tmp_device[c] = ocl->clCreateBuffer (device_param->context, CL_MEM_READ_ONLY, MAX_ALLOC_CHECKS_SIZE, NULL, &CL_err);
|
||||||
|
|
||||||
|
if (CL_err != CL_SUCCESS) break;
|
||||||
|
|
||||||
|
CL_err = ocl->clEnqueueReadBuffer (device_param->command_queue, tmp_device[c], CL_TRUE, 0, MAX_ALLOC_CHECKS_SIZE, tmp_host, 0, NULL, NULL);
|
||||||
|
|
||||||
|
if (CL_err != CL_SUCCESS) break;
|
||||||
|
}
|
||||||
|
|
||||||
|
int r;
|
||||||
|
|
||||||
|
for (r = 0; r < c; r++)
|
||||||
|
{
|
||||||
|
CL_rc = hc_clReleaseMemObject (hashcat_ctx, tmp_device[r]);
|
||||||
|
|
||||||
|
if (CL_rc == -1) return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
hcfree (tmp_host);
|
||||||
|
|
||||||
|
hcfree (tmp_device);
|
||||||
|
|
||||||
|
device_param->device_available_mem = c * MAX_ALLOC_CHECKS_SIZE;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* create input buffers on device : calculate size of fixed memory buffers
|
* create input buffers on device : calculate size of fixed memory buffers
|
||||||
*/
|
*/
|
||||||
@ -4436,7 +4489,7 @@ int opencl_session_begin (hashcat_ctx_t *hashcat_ctx)
|
|||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
if ((size_scrypt + scrypt_extra_space) > device_param->device_global_mem)
|
if ((size_scrypt + scrypt_extra_space) > device_param->device_available_mem)
|
||||||
{
|
{
|
||||||
if (user_options->quiet == false) event_log_warning (hashcat_ctx, "Increasing total device memory allocatable for --scrypt-tmto %u.", tmto);
|
if (user_options->quiet == false) event_log_warning (hashcat_ctx, "Increasing total device memory allocatable for --scrypt-tmto %u.", tmto);
|
||||||
|
|
||||||
@ -6095,7 +6148,7 @@ int opencl_session_begin (hashcat_ctx_t *hashcat_ctx)
|
|||||||
|
|
||||||
#define PWS_SPACE (1024 * 1024 * 1024)
|
#define PWS_SPACE (1024 * 1024 * 1024)
|
||||||
|
|
||||||
// sometimes device_global_mem and device_maxmem_alloc reported back from the opencl runtime are a bit inaccurate.
|
// sometimes device_available_mem and device_maxmem_alloc reported back from the opencl runtime are a bit inaccurate.
|
||||||
// let's add some extra space just to be sure.
|
// let's add some extra space just to be sure.
|
||||||
|
|
||||||
#define EXTRA_SPACE (64 * 1024 * 1024)
|
#define EXTRA_SPACE (64 * 1024 * 1024)
|
||||||
@ -6173,7 +6226,7 @@ int opencl_session_begin (hashcat_ctx_t *hashcat_ctx)
|
|||||||
+ size_st_salts
|
+ size_st_salts
|
||||||
+ size_st_esalts;
|
+ size_st_esalts;
|
||||||
|
|
||||||
if ((size_total + EXTRA_SPACE) > device_param->device_global_mem) memory_limit_hit = 1;
|
if ((size_total + EXTRA_SPACE) > device_param->device_available_mem) memory_limit_hit = 1;
|
||||||
|
|
||||||
if (memory_limit_hit == 1)
|
if (memory_limit_hit == 1)
|
||||||
{
|
{
|
||||||
|
Loading…
Reference in New Issue
Block a user