1
0
mirror of https://github.com/hashcat/hashcat.git synced 2024-12-22 22:58:30 +00:00

Memory Management: Check in OpenCL that CL_DEVICE_HOST_UNIFIED_MEMORY is true and if so, then half of the available memory

NEO driver: Activate the NEO driver (Intel GPU) after it passed several tests in hashcat using the latest NEO driver version
This commit is contained in:
Jens Steube 2021-08-18 16:10:51 +02:00
parent 9254603960
commit 76e388ea47
3 changed files with 32 additions and 10 deletions

View File

@ -65,6 +65,8 @@
- HIP Kernels: Dependency on hip/hip runtime.h has been removed to enable easier integration of the HIP backend under Windows
- Kernel cache: Add kernel threads for hash calculation, which will later be used in the file name of the kernel cache
- Memory Management: Refactored the code responsible for limiting kernel accel with the goal to avoid low host memory situations
- Memory Management: Check in OpenCL that CL_DEVICE_HOST_UNIFIED_MEMORY is true and if so, then half of the available memory
- NEO driver: Activate the NEO driver (Intel GPU) after it passed several tests in hashcat using the latest NEO driver version
- OpenCL Runtime: Workaround for Intel OpenCL runtime: segmentation fault when compiling hc_enc_next() / hc_enc_next_global()
- RC4 Kernels: Use improved native thread derivation for RC4-based hash modes 7500, 13100, 18200, 25400
- Shared Memory: Calculate the dynamic memory size of the kernel based on CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK_OPTIN

View File

@ -1111,6 +1111,7 @@ typedef struct hc_device_param
u64 device_maxmem_alloc;
u64 device_global_mem;
u64 device_available_mem;
int device_host_unified_memory;
u32 device_maxclock_frequency;
size_t device_maxworkgroup_size;
u64 device_local_mem_size;

View File

@ -8618,6 +8618,18 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
device_param->device_processors = device_processors;
// device_host_unified_memory
cl_bool device_host_unified_memory = false;
if (hc_clGetDeviceInfo (hashcat_ctx, device_param->opencl_device, CL_DEVICE_HOST_UNIFIED_MEMORY, sizeof (device_host_unified_memory), &device_host_unified_memory, NULL) == -1)
{
device_param->skipped = true;
continue;
}
device_param->device_host_unified_memory = (device_host_unified_memory == CL_TRUE) ? 1 : 0;
// device_global_mem
cl_ulong device_global_mem = 0;
@ -8644,6 +8656,13 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
device_param->device_maxmem_alloc = device_maxmem_alloc;
if (device_param->device_host_unified_memory == 1)
{
// so, we actually have only half the memory because we need the same buffers on host side
device_param->device_maxmem_alloc /= 2;
}
// note we'll limit to 2gb, otherwise this causes all kinds of weird errors because of possible integer overflows in opencl runtimes
// testwise disabling that
//device_param->device_maxmem_alloc = MIN (device_maxmem_alloc, 0x7fffffff);
@ -8914,14 +8933,11 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
lowercase ((u8 *) opencl_device_version_lower, strlen (opencl_device_version_lower));
if ((strstr (opencl_device_version_lower, "neo "))
|| (strstr (opencl_device_version_lower, " neo"))
|| (strstr (opencl_device_version_lower, "beignet "))
if ((strstr (opencl_device_version_lower, "beignet "))
|| (strstr (opencl_device_version_lower, " beignet"))
|| (strstr (opencl_device_version_lower, "mesa "))
|| (strstr (opencl_device_version_lower, " mesa")))
{
// NEO: https://github.com/hashcat/hashcat/issues/2342
// BEIGNET: https://github.com/hashcat/hashcat/issues/2243
// MESA: https://github.com/hashcat/hashcat/issues/2269
@ -9694,11 +9710,7 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
device_param->device_available_mem = device_param->device_global_mem - MAX_ALLOC_CHECKS_SIZE;
#if defined (_WIN)
if ((device_param->opencl_device_type & CL_DEVICE_TYPE_GPU) && (device_param->opencl_platform_vendor_id == VENDOR_ID_NV))
#else
if ((device_param->opencl_device_type & CL_DEVICE_TYPE_GPU) && ((device_param->opencl_platform_vendor_id == VENDOR_ID_NV) || (device_param->opencl_platform_vendor_id == VENDOR_ID_AMD)))
#endif
if (device_param->opencl_device_type & CL_DEVICE_TYPE_GPU)
{
// OK, so the problem here is the following:
// There's just CL_DEVICE_GLOBAL_MEM_SIZE to ask OpenCL about the total memory on the device,
@ -9771,6 +9783,13 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
hc_clReleaseCommandQueue (hashcat_ctx, command_queue);
hc_clReleaseContext (hashcat_ctx, context);
if (device_param->device_host_unified_memory == 1)
{
// so, we actually have only half the memory because we need the same buffers on host side
device_param->device_available_mem /= 2;
}
}
}
@ -14614,7 +14633,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
u64 EXTRA_SPACE = (1024ULL * 1024ULL) * kernel_accel_max;
EXTRA_SPACE = MAX (EXTRA_SPACE, ( 64ULL * 1024ULL * 1024ULL));
EXTRA_SPACE = MAX (EXTRA_SPACE, ( 256ULL * 1024ULL * 1024ULL));
EXTRA_SPACE = MIN (EXTRA_SPACE, (1024ULL * 1024ULL * 1024ULL));
if ((size_pws + EXTRA_SPACE) > device_param->device_maxmem_alloc) memory_limit_hit = 1;