From 76e388ea47d1193c540b4d9889ad6f217a297511 Mon Sep 17 00:00:00 2001 From: Jens Steube Date: Wed, 18 Aug 2021 16:10:51 +0200 Subject: [PATCH] Memory Management: Check in OpenCL that CL_DEVICE_HOST_UNIFIED_MEMORY is true and if so, then half of the available memory NEO driver: Activate the NEO driver (Intel GPU) after it passed several tests in hashcat using the latest NEO driver version --- docs/changes.txt | 2 ++ include/types.h | 1 + src/backend.c | 39 +++++++++++++++++++++++++++++---------- 3 files changed, 32 insertions(+), 10 deletions(-) diff --git a/docs/changes.txt b/docs/changes.txt index 9e6862f00..979eb5c9b 100644 --- a/docs/changes.txt +++ b/docs/changes.txt @@ -65,6 +65,8 @@ - HIP Kernels: Dependency on hip/hip runtime.h has been removed to enable easier integration of the HIP backend under Windows - Kernel cache: Add kernel threads for hash calculation, which will later be used in the file name of the kernel cache - Memory Management: Refactored the code responsible for limiting kernel accel with the goal to avoid low host memory situations +- Memory Management: Check in OpenCL that CL_DEVICE_HOST_UNIFIED_MEMORY is true and if so, then half of the available memory +- NEO driver: Activate the NEO driver (Intel GPU) after it passed several tests in hashcat using the latest NEO driver version - OpenCL Runtime: Workaround for Intel OpenCL runtime: segmentation fault when compiling hc_enc_next() / hc_enc_next_global() - RC4 Kernels: Use improved native thread derivation for RC4-based hash modes 7500, 13100, 18200, 25400 - Shared Memory: Calculate the dynamic memory size of the kernel based on CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK_OPTIN diff --git a/include/types.h b/include/types.h index e180da4e8..0a9ac6f92 100644 --- a/include/types.h +++ b/include/types.h @@ -1111,6 +1111,7 @@ typedef struct hc_device_param u64 device_maxmem_alloc; u64 device_global_mem; u64 device_available_mem; + int device_host_unified_memory; u32 device_maxclock_frequency; size_t device_maxworkgroup_size; u64 device_local_mem_size; diff --git a/src/backend.c b/src/backend.c index 8c1da3e38..65336dd5e 100644 --- a/src/backend.c +++ b/src/backend.c @@ -8618,6 +8618,18 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime) device_param->device_processors = device_processors; + // device_host_unified_memory + + cl_bool device_host_unified_memory = false; + + if (hc_clGetDeviceInfo (hashcat_ctx, device_param->opencl_device, CL_DEVICE_HOST_UNIFIED_MEMORY, sizeof (device_host_unified_memory), &device_host_unified_memory, NULL) == -1) + { + device_param->skipped = true; + continue; + } + + device_param->device_host_unified_memory = (device_host_unified_memory == CL_TRUE) ? 1 : 0; + // device_global_mem cl_ulong device_global_mem = 0; @@ -8644,6 +8656,13 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime) device_param->device_maxmem_alloc = device_maxmem_alloc; + if (device_param->device_host_unified_memory == 1) + { + // so, we actually have only half the memory because we need the same buffers on host side + + device_param->device_maxmem_alloc /= 2; + } + // note we'll limit to 2gb, otherwise this causes all kinds of weird errors because of possible integer overflows in opencl runtimes // testwise disabling that //device_param->device_maxmem_alloc = MIN (device_maxmem_alloc, 0x7fffffff); @@ -8914,14 +8933,11 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime) lowercase ((u8 *) opencl_device_version_lower, strlen (opencl_device_version_lower)); - if ((strstr (opencl_device_version_lower, "neo ")) - || (strstr (opencl_device_version_lower, " neo")) - || (strstr (opencl_device_version_lower, "beignet ")) + if ((strstr (opencl_device_version_lower, "beignet ")) || (strstr (opencl_device_version_lower, " beignet")) || (strstr (opencl_device_version_lower, "mesa ")) || (strstr (opencl_device_version_lower, " mesa"))) { - // NEO: https://github.com/hashcat/hashcat/issues/2342 // BEIGNET: https://github.com/hashcat/hashcat/issues/2243 // MESA: https://github.com/hashcat/hashcat/issues/2269 @@ -9694,11 +9710,7 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime) device_param->device_available_mem = device_param->device_global_mem - MAX_ALLOC_CHECKS_SIZE; - #if defined (_WIN) - if ((device_param->opencl_device_type & CL_DEVICE_TYPE_GPU) && (device_param->opencl_platform_vendor_id == VENDOR_ID_NV)) - #else - if ((device_param->opencl_device_type & CL_DEVICE_TYPE_GPU) && ((device_param->opencl_platform_vendor_id == VENDOR_ID_NV) || (device_param->opencl_platform_vendor_id == VENDOR_ID_AMD))) - #endif + if (device_param->opencl_device_type & CL_DEVICE_TYPE_GPU) { // OK, so the problem here is the following: // There's just CL_DEVICE_GLOBAL_MEM_SIZE to ask OpenCL about the total memory on the device, @@ -9771,6 +9783,13 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime) hc_clReleaseCommandQueue (hashcat_ctx, command_queue); hc_clReleaseContext (hashcat_ctx, context); + + if (device_param->device_host_unified_memory == 1) + { + // so, we actually have only half the memory because we need the same buffers on host side + + device_param->device_available_mem /= 2; + } } } @@ -14614,7 +14633,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx) u64 EXTRA_SPACE = (1024ULL * 1024ULL) * kernel_accel_max; - EXTRA_SPACE = MAX (EXTRA_SPACE, ( 64ULL * 1024ULL * 1024ULL)); + EXTRA_SPACE = MAX (EXTRA_SPACE, ( 256ULL * 1024ULL * 1024ULL)); EXTRA_SPACE = MIN (EXTRA_SPACE, (1024ULL * 1024ULL * 1024ULL)); if ((size_pws + EXTRA_SPACE) > device_param->device_maxmem_alloc) memory_limit_hit = 1;