diff --git a/OpenCL/inc_vendor.cl b/OpenCL/inc_vendor.cl index 2095cd60c..4f3ef1a22 100644 --- a/OpenCL/inc_vendor.cl +++ b/OpenCL/inc_vendor.cl @@ -66,14 +66,12 @@ #define IS_GENERIC #endif -#if DEVICE_TYPE == DEVICE_TYPE_CPU -#elif DEVICE_TYPE == DEVICE_TYPE_GPU -// AMD fails with mode 6211 -#ifdef IS_NV +#define LOCAL_MEM_TYPE_LOCAL 1 +#define LOCAL_MEM_TYPE_GLOBAL 2 + +#if LOCAL_MEM_TYPE == LOCAL_MEM_TYPE_LOCAL #define REAL_SHM #endif -#elif DEVICE_TYPE == DEVICE_TYPE_ACCEL -#endif #ifdef REAL_SHM #define SHM_TYPE __local diff --git a/docs/changes.txt b/docs/changes.txt index d6a5598e7..1f0f58a71 100644 --- a/docs/changes.txt +++ b/docs/changes.txt @@ -64,6 +64,7 @@ - Memory: Limit maximum host memory allocation depending on bitness - Memory: Reduced default maximum bitmap size from 24 to 18 and give a notice to use --bitmap-max to restore - Pidfile: Treat a corrupted pidfile like a not existing pidfile +- OpenCL Device: Do a real query on OpenCL local memory type instead of just assuming it - Tests: Added hash-mode 11700 (Streebog-256) - Tests: Added hash-mode 11750 (HMAC-Streebog-256 (key = $pass), big-endian) - Tests: Added hash-mode 11760 (HMAC-Streebog-256 (key = $salt), big-endian) diff --git a/include/types.h b/include/types.h index 83737d5fd..f1cab5964 100644 --- a/include/types.h +++ b/include/types.h @@ -1026,6 +1026,7 @@ typedef struct hc_device_param u32 device_maxclock_frequency; size_t device_maxworkgroup_size; u64 device_local_mem_size; + cl_device_local_mem_type device_local_mem_type; u32 vector_width; diff --git a/src/opencl.c b/src/opencl.c index d51dd1fcc..85c990265 100644 --- a/src/opencl.c +++ b/src/opencl.c @@ -3593,6 +3593,16 @@ int opencl_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime) device_param->device_local_mem_size = device_local_mem_size; + // device_local_mem_type + + cl_device_local_mem_type device_local_mem_type; + + CL_rc = hc_clGetDeviceInfo (hashcat_ctx, device_param->device, CL_DEVICE_LOCAL_MEM_TYPE, sizeof (device_local_mem_type), &device_local_mem_type, NULL); + + if (CL_rc == -1) return -1; + + device_param->device_local_mem_type = device_local_mem_type; + // If there's both an Intel CPU and an AMD OpenCL runtime it's a tricky situation // Both platforms support CPU device types and therefore both will try to use 100% of the physical resources // This results in both utilizing it for 50% @@ -4878,9 +4888,9 @@ int opencl_session_begin (hashcat_ctx_t *hashcat_ctx) char build_opts[2048] = { 0 }; #if defined (DEBUG) - snprintf (build_opts, sizeof (build_opts) - 1, "%s -D VENDOR_ID=%u -D CUDA_ARCH=%u -D AMD_ROCM=%u -D VECT_SIZE=%u -D DEVICE_TYPE=%u -D DGST_R0=%u -D DGST_R1=%u -D DGST_R2=%u -D DGST_R3=%u -D DGST_ELEM=%u -D KERN_TYPE=%u -D _unroll", build_opts_base, device_param->platform_vendor_id, (device_param->sm_major * 100) + device_param->sm_minor, device_param->is_rocm, device_param->vector_width, (u32) device_param->device_type, hashconfig->dgst_pos0, hashconfig->dgst_pos1, hashconfig->dgst_pos2, hashconfig->dgst_pos3, hashconfig->dgst_size / 4, hashconfig->kern_type); + snprintf (build_opts, sizeof (build_opts) - 1, "%s -D LOCAL_MEM_TYPE=%u -D VENDOR_ID=%u -D CUDA_ARCH=%u -D AMD_ROCM=%u -D VECT_SIZE=%u -D DEVICE_TYPE=%u -D DGST_R0=%u -D DGST_R1=%u -D DGST_R2=%u -D DGST_R3=%u -D DGST_ELEM=%u -D KERN_TYPE=%u -D _unroll", build_opts_base, device_param->device_local_mem_type, device_param->platform_vendor_id, (device_param->sm_major * 100) + device_param->sm_minor, device_param->is_rocm, device_param->vector_width, (u32) device_param->device_type, hashconfig->dgst_pos0, hashconfig->dgst_pos1, hashconfig->dgst_pos2, hashconfig->dgst_pos3, hashconfig->dgst_size / 4, hashconfig->kern_type); #else - snprintf (build_opts, sizeof (build_opts) - 1, "%s -D VENDOR_ID=%u -D CUDA_ARCH=%u -D AMD_ROCM=%u -D VECT_SIZE=%u -D DEVICE_TYPE=%u -D DGST_R0=%u -D DGST_R1=%u -D DGST_R2=%u -D DGST_R3=%u -D DGST_ELEM=%u -D KERN_TYPE=%u -D _unroll -w", build_opts_base, device_param->platform_vendor_id, (device_param->sm_major * 100) + device_param->sm_minor, device_param->is_rocm, device_param->vector_width, (u32) device_param->device_type, hashconfig->dgst_pos0, hashconfig->dgst_pos1, hashconfig->dgst_pos2, hashconfig->dgst_pos3, hashconfig->dgst_size / 4, hashconfig->kern_type); + snprintf (build_opts, sizeof (build_opts) - 1, "%s -D LOCAL_MEM_TYPE=%u -D VENDOR_ID=%u -D CUDA_ARCH=%u -D AMD_ROCM=%u -D VECT_SIZE=%u -D DEVICE_TYPE=%u -D DGST_R0=%u -D DGST_R1=%u -D DGST_R2=%u -D DGST_R3=%u -D DGST_ELEM=%u -D KERN_TYPE=%u -D _unroll -w", build_opts_base, device_param->device_local_mem_type, device_param->platform_vendor_id, (device_param->sm_major * 100) + device_param->sm_minor, device_param->is_rocm, device_param->vector_width, (u32) device_param->device_type, hashconfig->dgst_pos0, hashconfig->dgst_pos1, hashconfig->dgst_pos2, hashconfig->dgst_pos3, hashconfig->dgst_size / 4, hashconfig->kern_type); #endif /*