|
|
|
@ -4261,68 +4261,73 @@ int opencl_session_begin (hashcat_ctx_t *hashcat_ctx)
|
|
|
|
|
|
|
|
|
|
if (CL_rc == -1) return -1;
|
|
|
|
|
|
|
|
|
|
// OK, so the problem here is the following:
|
|
|
|
|
// There's just CL_DEVICE_GLOBAL_MEM_SIZE to ask OpenCL about the total memory on the device,
|
|
|
|
|
// but there's no way to ask for available memory on the device.
|
|
|
|
|
// In combination, most OpenCL runtimes implementation of clCreateBuffer()
|
|
|
|
|
// are doing so called lazy memory allocation on the device.
|
|
|
|
|
// Now, if the user has X11 (or a game or anything that takes a lot of GPU memory)
|
|
|
|
|
// running on the host we end up with an error type of this:
|
|
|
|
|
// clEnqueueNDRangeKernel(): CL_MEM_OBJECT_ALLOCATION_FAILURE
|
|
|
|
|
// The clEnqueueNDRangeKernel() is because of the lazy allocation
|
|
|
|
|
// The best way to workaround this problem is if we would be able to ask for available memory,
|
|
|
|
|
// The idea here is to try to evaluate available memory by allocating it till it errors
|
|
|
|
|
// device_available_mem
|
|
|
|
|
|
|
|
|
|
#define MAX_ALLOC_CHECKS_CNT 8192
|
|
|
|
|
#define MAX_ALLOC_CHECKS_SIZE (64 * 1024 * 1024)
|
|
|
|
|
|
|
|
|
|
cl_mem *tmp_device = (cl_mem *) hccalloc (MAX_ALLOC_CHECKS_CNT, sizeof (cl_mem));
|
|
|
|
|
device_param->device_available_mem = device_param->device_global_mem - MAX_ALLOC_CHECKS_SIZE;
|
|
|
|
|
|
|
|
|
|
char *tmp_host = (char *) hcmalloc (MAX_ALLOC_CHECKS_SIZE);
|
|
|
|
|
|
|
|
|
|
u64 c;
|
|
|
|
|
|
|
|
|
|
for (c = 0; c < MAX_ALLOC_CHECKS_CNT; c++)
|
|
|
|
|
if (device_param->platform_vendor_id == VENDOR_ID_NV)
|
|
|
|
|
{
|
|
|
|
|
if (((c + 1) * MAX_ALLOC_CHECKS_SIZE) >= device_param->device_global_mem) break;
|
|
|
|
|
// OK, so the problem here is the following:
|
|
|
|
|
// There's just CL_DEVICE_GLOBAL_MEM_SIZE to ask OpenCL about the total memory on the device,
|
|
|
|
|
// but there's no way to ask for available memory on the device.
|
|
|
|
|
// In combination, most OpenCL runtimes implementation of clCreateBuffer()
|
|
|
|
|
// are doing so called lazy memory allocation on the device.
|
|
|
|
|
// Now, if the user has X11 (or a game or anything that takes a lot of GPU memory)
|
|
|
|
|
// running on the host we end up with an error type of this:
|
|
|
|
|
// clEnqueueNDRangeKernel(): CL_MEM_OBJECT_ALLOCATION_FAILURE
|
|
|
|
|
// The clEnqueueNDRangeKernel() is because of the lazy allocation
|
|
|
|
|
// The best way to workaround this problem is if we would be able to ask for available memory,
|
|
|
|
|
// The idea here is to try to evaluate available memory by allocating it till it errors
|
|
|
|
|
|
|
|
|
|
cl_int CL_err;
|
|
|
|
|
cl_mem *tmp_device = (cl_mem *) hccalloc (MAX_ALLOC_CHECKS_CNT, sizeof (cl_mem));
|
|
|
|
|
|
|
|
|
|
OCL_PTR *ocl = opencl_ctx->ocl;
|
|
|
|
|
char *tmp_host = (char *) hcmalloc (MAX_ALLOC_CHECKS_SIZE);
|
|
|
|
|
|
|
|
|
|
tmp_device[c] = ocl->clCreateBuffer (device_param->context, CL_MEM_READ_ONLY, MAX_ALLOC_CHECKS_SIZE, NULL, &CL_err);
|
|
|
|
|
u64 c;
|
|
|
|
|
|
|
|
|
|
if (CL_err != CL_SUCCESS)
|
|
|
|
|
for (c = 0; c < MAX_ALLOC_CHECKS_CNT; c++)
|
|
|
|
|
{
|
|
|
|
|
c--;
|
|
|
|
|
if (((c + 1 + 1) * MAX_ALLOC_CHECKS_SIZE) >= device_param->device_global_mem) break;
|
|
|
|
|
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
cl_int CL_err;
|
|
|
|
|
|
|
|
|
|
CL_err = ocl->clEnqueueReadBuffer (device_param->command_queue, tmp_device[c], CL_TRUE, 0, MAX_ALLOC_CHECKS_SIZE, tmp_host, 0, NULL, NULL);
|
|
|
|
|
OCL_PTR *ocl = opencl_ctx->ocl;
|
|
|
|
|
|
|
|
|
|
if (CL_err != CL_SUCCESS)
|
|
|
|
|
{
|
|
|
|
|
c--;
|
|
|
|
|
tmp_device[c] = ocl->clCreateBuffer (device_param->context, CL_MEM_READ_WRITE, MAX_ALLOC_CHECKS_SIZE, NULL, &CL_err);
|
|
|
|
|
|
|
|
|
|
break;
|
|
|
|
|
if (CL_err != CL_SUCCESS) break;
|
|
|
|
|
|
|
|
|
|
CL_err = ocl->clEnqueueReadBuffer (device_param->command_queue, tmp_device[c], CL_TRUE, 0, MAX_ALLOC_CHECKS_SIZE, tmp_host, 0, NULL, NULL);
|
|
|
|
|
|
|
|
|
|
if (CL_err != CL_SUCCESS) break;
|
|
|
|
|
|
|
|
|
|
CL_err = ocl->clEnqueueWriteBuffer (device_param->command_queue, tmp_device[c], CL_TRUE, 0, MAX_ALLOC_CHECKS_SIZE, tmp_host, 0, NULL, NULL);
|
|
|
|
|
|
|
|
|
|
if (CL_err != CL_SUCCESS) break;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
u64 r;
|
|
|
|
|
if (c >= 1) c--;
|
|
|
|
|
|
|
|
|
|
for (r = 0; r < c; r++)
|
|
|
|
|
{
|
|
|
|
|
CL_rc = hc_clReleaseMemObject (hashcat_ctx, tmp_device[r]);
|
|
|
|
|
device_param->device_available_mem = c * MAX_ALLOC_CHECKS_SIZE;
|
|
|
|
|
|
|
|
|
|
if (CL_rc == -1) return -1;
|
|
|
|
|
}
|
|
|
|
|
// clean up
|
|
|
|
|
|
|
|
|
|
u64 r;
|
|
|
|
|
|
|
|
|
|
hcfree (tmp_host);
|
|
|
|
|
for (r = 0; r < c; r++)
|
|
|
|
|
{
|
|
|
|
|
CL_rc = hc_clReleaseMemObject (hashcat_ctx, tmp_device[r]);
|
|
|
|
|
|
|
|
|
|
hcfree (tmp_device);
|
|
|
|
|
if (CL_rc == -1) return -1;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
device_param->device_available_mem = c * MAX_ALLOC_CHECKS_SIZE;
|
|
|
|
|
hcfree (tmp_host);
|
|
|
|
|
|
|
|
|
|
hcfree (tmp_device);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* create input buffers on device : calculate size of fixed memory buffers
|
|
|
|
|