1
0
mirror of https://github.com/hashcat/hashcat.git synced 2025-01-22 21:51:07 +00:00

CUDA offers a nice way to query available device memory, no need to brute force

This commit is contained in:
Jens Steube 2019-05-14 10:09:46 +02:00
parent 1943c35e4a
commit bca03bb7ed

View File

@ -1517,6 +1517,32 @@ int hc_cuModuleGetGlobal (hashcat_ctx_t *hashcat_ctx, CUdeviceptr *dptr, size_t
return 0;
}
int hc_cuMemGetInfo (hashcat_ctx_t *hashcat_ctx, size_t *free, size_t *total)
{
backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
CUDA_PTR *cuda = backend_ctx->cuda;
const CUresult CU_err = cuda->cuMemGetInfo (free, total);
if (CU_err != CUDA_SUCCESS)
{
const char *pStr = NULL;
if (cuda->cuGetErrorString (CU_err, &pStr) == CUDA_SUCCESS)
{
event_log_error (hashcat_ctx, "cuMemGetInfo(): %s", pStr);
}
else
{
event_log_error (hashcat_ctx, "cuMemGetInfo(): %d", CU_err);
}
return -1;
}
return 0;
}
int hc_cuFuncGetAttribute (hashcat_ctx_t *hashcat_ctx, int *pi, CUfunction_attribute attrib, CUfunction hfunc)
{
@ -5555,72 +5581,14 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
// device_available_mem
#define MAX_ALLOC_CHECKS_CNT 8192
#define MAX_ALLOC_CHECKS_SIZE (64 * 1024 * 1024)
size_t free = 0;
size_t total = 0;
device_param->device_available_mem = device_param->device_global_mem - MAX_ALLOC_CHECKS_SIZE;
const int rc_cuMemGetInfo = hc_cuMemGetInfo (hashcat_ctx, &free, &total);
// OK, so the problem here is the following:
// There's just CL_DEVICE_GLOBAL_MEM_SIZE to ask OpenCL about the total memory on the device,
// but there's no way to ask for available memory on the device.
// In combination, most OpenCL runtimes implementation of clCreateBuffer()
// are doing so called lazy memory allocation on the device.
// Now, if the user has X11 (or a game or anything that takes a lot of GPU memory)
// running on the host we end up with an error type of this:
// clEnqueueNDRangeKernel(): CL_MEM_OBJECT_ALLOCATION_FAILURE
// The clEnqueueNDRangeKernel() is because of the lazy allocation
// The best way to workaround this problem is if we would be able to ask for available memory,
// The idea here is to try to evaluate available memory by allocating it till it errors
if (rc_cuMemGetInfo == -1) return -1;
CUdeviceptr *tmp_device = (CUdeviceptr *) hccalloc (MAX_ALLOC_CHECKS_CNT, sizeof (CUdeviceptr));
u64 c;
for (c = 0; c < MAX_ALLOC_CHECKS_CNT; c++)
{
if (((c + 1 + 1) * MAX_ALLOC_CHECKS_SIZE) >= device_param->device_global_mem) break;
CUresult rc_tmp;
CUDA_PTR *cuda = backend_ctx->cuda;
rc_tmp = cuda->cuMemAlloc (&tmp_device[c], MAX_ALLOC_CHECKS_SIZE);
if (rc_tmp != CUDA_SUCCESS)
{
c--;
break;
}
char tmp_host[8];
rc_tmp = cuda->cuMemcpyDtoH (tmp_host, tmp_device[c], sizeof (tmp_host));
if (rc_tmp != CUDA_SUCCESS) break;
rc_tmp = cuda->cuMemcpyHtoD (tmp_device[c], tmp_host, sizeof (tmp_host));
if (rc_tmp != CUDA_SUCCESS) break;
}
device_param->device_available_mem = c * MAX_ALLOC_CHECKS_SIZE;
// clean up
for (c = 0; c < MAX_ALLOC_CHECKS_CNT; c++)
{
if (((c + 1 + 1) * MAX_ALLOC_CHECKS_SIZE) >= device_param->device_global_mem) break;
if (tmp_device[c] != 0)
{
const int rc_cuMemFree = hc_cuMemFree (hashcat_ctx, tmp_device[c]);
if (rc_cuMemFree == -1) return -1;
}
}
hcfree (tmp_device);
device_param->device_available_mem = (u64) free;
const int rc_cuCtxDestroy = hc_cuCtxDestroy (hashcat_ctx, cuda_context);