mirror of
https://github.com/hashcat/hashcat.git
synced 2024-11-22 16:18:09 +00:00
Kernel Threads: Use warp size / wavefront size query instead of hardcoded values as base for kernel threads
Kernel Cache: Add kernel threads into hash computation which is later used in the kernel cache filename Remove some unused function symbol lookups in HIP library
This commit is contained in:
parent
a09efb2634
commit
5024865d87
@ -20,6 +20,7 @@
|
|||||||
|
|
||||||
- Blake Kernels: Optimize BLAKE2B_ROUND() 64 bit rotates giving a 5% performance increase
|
- Blake Kernels: Optimize BLAKE2B_ROUND() 64 bit rotates giving a 5% performance increase
|
||||||
- Brain Session: Adds hashconfig specific opti_type and opts_type parameters to hashcat session computation to cover features like -O and -M
|
- Brain Session: Adds hashconfig specific opti_type and opts_type parameters to hashcat session computation to cover features like -O and -M
|
||||||
|
- Kernel Threads: Use warp size / wavefront size query instead of hardcoded values as base for kernel threads
|
||||||
- Shared Memory: Calculate kernel dynamic memory size based on CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK_OPTIN
|
- Shared Memory: Calculate kernel dynamic memory size based on CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK_OPTIN
|
||||||
- Slow Kernels: Set some of the slowest kernels to OPTS_TYPE_MP_MULTI_DISABLE
|
- Slow Kernels: Set some of the slowest kernels to OPTS_TYPE_MP_MULTI_DISABLE
|
||||||
|
|
||||||
@ -27,6 +28,7 @@
|
|||||||
## Technical
|
## Technical
|
||||||
##
|
##
|
||||||
|
|
||||||
|
- Kernel Cache: Add kernel threads into hash computation which is later used in the kernel cache filename
|
||||||
- HIP Kernels: Got rid of hip/hip_runtime.h dependancy to enable more easy integration of the HIP backend on Windows
|
- HIP Kernels: Got rid of hip/hip_runtime.h dependancy to enable more easy integration of the HIP backend on Windows
|
||||||
- SCRYPT Kernels: Add more optimized values for some new NV/AMD GPUs
|
- SCRYPT Kernels: Add more optimized values for some new NV/AMD GPUs
|
||||||
|
|
||||||
|
@ -1118,6 +1118,8 @@ typedef struct hc_device_param
|
|||||||
int sm_minor;
|
int sm_minor;
|
||||||
u32 kernel_exec_timeout;
|
u32 kernel_exec_timeout;
|
||||||
|
|
||||||
|
u32 kernel_preferred_wgs_multiple;
|
||||||
|
|
||||||
st_status_t st_status;
|
st_status_t st_status;
|
||||||
|
|
||||||
int vector_width;
|
int vector_width;
|
||||||
|
145
src/backend.c
145
src/backend.c
@ -2516,14 +2516,14 @@ int hip_init (hashcat_ctx_t *hashcat_ctx)
|
|||||||
|
|
||||||
HC_LOAD_FUNC_HIP (hip, hipCtxCreate, hipCtxCreate, HIP_HIPCTXCREATE, HIP, 1);
|
HC_LOAD_FUNC_HIP (hip, hipCtxCreate, hipCtxCreate, HIP_HIPCTXCREATE, HIP, 1);
|
||||||
HC_LOAD_FUNC_HIP (hip, hipCtxDestroy, hipCtxDestroy, HIP_HIPCTXDESTROY, HIP, 1);
|
HC_LOAD_FUNC_HIP (hip, hipCtxDestroy, hipCtxDestroy, HIP_HIPCTXDESTROY, HIP, 1);
|
||||||
HC_LOAD_FUNC_HIP (hip, hipCtxGetCacheConfig, hipCtxGetCacheConfig, HIP_HIPCTXGETCACHECONFIG, HIP, 1);
|
//HC_LOAD_FUNC_HIP (hip, hipCtxGetCacheConfig, hipCtxGetCacheConfig, HIP_HIPCTXGETCACHECONFIG, HIP, 1);
|
||||||
HC_LOAD_FUNC_HIP (hip, hipCtxGetCurrent, hipCtxGetCurrent, HIP_HIPCTXGETCURRENT, HIP, 1);
|
//HC_LOAD_FUNC_HIP (hip, hipCtxGetCurrent, hipCtxGetCurrent, HIP_HIPCTXGETCURRENT, HIP, 1);
|
||||||
HC_LOAD_FUNC_HIP (hip, hipCtxGetSharedMemConfig, hipCtxGetSharedMemConfig, HIP_HIPCTXGETSHAREDMEMCONFIG, HIP, 1);
|
//HC_LOAD_FUNC_HIP (hip, hipCtxGetSharedMemConfig, hipCtxGetSharedMemConfig, HIP_HIPCTXGETSHAREDMEMCONFIG, HIP, 1);
|
||||||
HC_LOAD_FUNC_HIP (hip, hipCtxPopCurrent, hipCtxPopCurrent, HIP_HIPCTXPOPCURRENT, HIP, 1);
|
HC_LOAD_FUNC_HIP (hip, hipCtxPopCurrent, hipCtxPopCurrent, HIP_HIPCTXPOPCURRENT, HIP, 1);
|
||||||
HC_LOAD_FUNC_HIP (hip, hipCtxPushCurrent, hipCtxPushCurrent, HIP_HIPCTXPUSHCURRENT, HIP, 1);
|
HC_LOAD_FUNC_HIP (hip, hipCtxPushCurrent, hipCtxPushCurrent, HIP_HIPCTXPUSHCURRENT, HIP, 1);
|
||||||
HC_LOAD_FUNC_HIP (hip, hipCtxSetCacheConfig, hipCtxSetCacheConfig, HIP_HIPCTXSETCACHECONFIG, HIP, 1);
|
HC_LOAD_FUNC_HIP (hip, hipCtxSetCacheConfig, hipCtxSetCacheConfig, HIP_HIPCTXSETCACHECONFIG, HIP, 1);
|
||||||
HC_LOAD_FUNC_HIP (hip, hipCtxSetCurrent, hipCtxSetCurrent, HIP_HIPCTXSETCURRENT, HIP, 1);
|
HC_LOAD_FUNC_HIP (hip, hipCtxSetCurrent, hipCtxSetCurrent, HIP_HIPCTXSETCURRENT, HIP, 1);
|
||||||
HC_LOAD_FUNC_HIP (hip, hipCtxSetSharedMemConfig, hipCtxSetSharedMemConfig, HIP_HIPCTXSETSHAREDMEMCONFIG, HIP, 1);
|
//HC_LOAD_FUNC_HIP (hip, hipCtxSetSharedMemConfig, hipCtxSetSharedMemConfig, HIP_HIPCTXSETSHAREDMEMCONFIG, HIP, 1);
|
||||||
HC_LOAD_FUNC_HIP (hip, hipCtxSynchronize, hipCtxSynchronize, HIP_HIPCTXSYNCHRONIZE, HIP, 1);
|
HC_LOAD_FUNC_HIP (hip, hipCtxSynchronize, hipCtxSynchronize, HIP_HIPCTXSYNCHRONIZE, HIP, 1);
|
||||||
HC_LOAD_FUNC_HIP (hip, hipDeviceGetAttribute, hipDeviceGetAttribute, HIP_HIPDEVICEGETATTRIBUTE, HIP, 1);
|
HC_LOAD_FUNC_HIP (hip, hipDeviceGetAttribute, hipDeviceGetAttribute, HIP_HIPDEVICEGETATTRIBUTE, HIP, 1);
|
||||||
HC_LOAD_FUNC_HIP (hip, hipDeviceGetCount, hipGetDeviceCount, HIP_HIPDEVICEGETCOUNT, HIP, 1);
|
HC_LOAD_FUNC_HIP (hip, hipDeviceGetCount, hipGetDeviceCount, HIP_HIPDEVICEGETCOUNT, HIP, 1);
|
||||||
@ -2539,9 +2539,9 @@ int hip_init (hashcat_ctx_t *hashcat_ctx)
|
|||||||
HC_LOAD_FUNC_HIP (hip, hipEventSynchronize, hipEventSynchronize, HIP_HIPEVENTSYNCHRONIZE, HIP, 1);
|
HC_LOAD_FUNC_HIP (hip, hipEventSynchronize, hipEventSynchronize, HIP_HIPEVENTSYNCHRONIZE, HIP, 1);
|
||||||
HC_LOAD_FUNC_HIP (hip, hipFuncGetAttribute, hipFuncGetAttribute, HIP_HIPFUNCGETATTRIBUTE, HIP, 1);
|
HC_LOAD_FUNC_HIP (hip, hipFuncGetAttribute, hipFuncGetAttribute, HIP_HIPFUNCGETATTRIBUTE, HIP, 1);
|
||||||
//HC_LOAD_FUNC_HIP (hip, hipFuncSetAttribute, hipFuncSetAttribute, HIP_HIPFUNCSETATTRIBUTE, HIP, 1);
|
//HC_LOAD_FUNC_HIP (hip, hipFuncSetAttribute, hipFuncSetAttribute, HIP_HIPFUNCSETATTRIBUTE, HIP, 1);
|
||||||
HC_LOAD_FUNC_HIP (hip, hipFuncSetCacheConfig, hipFuncSetCacheConfig, HIP_HIPFUNCSETCACHECONFIG, HIP, 1);
|
//HC_LOAD_FUNC_HIP (hip, hipFuncSetCacheConfig, hipFuncSetCacheConfig, HIP_HIPFUNCSETCACHECONFIG, HIP, 1);
|
||||||
HC_LOAD_FUNC_HIP (hip, hipFuncSetSharedMemConfig, hipFuncSetSharedMemConfig, HIP_HIPFUNCSETSHAREDMEMCONFIG, HIP, 1);
|
//HC_LOAD_FUNC_HIP (hip, hipFuncSetSharedMemConfig, hipFuncSetSharedMemConfig, HIP_HIPFUNCSETSHAREDMEMCONFIG, HIP, 1);
|
||||||
HC_LOAD_FUNC_HIP (hip, hipGetErrorName, hipGetErrorName, HIP_HIPGETERRORNAME, HIP, 1);
|
//HC_LOAD_FUNC_HIP (hip, hipGetErrorName, hipGetErrorName, HIP_HIPGETERRORNAME, HIP, 1);
|
||||||
HC_LOAD_FUNC_HIP (hip, hipGetErrorString, hipGetErrorString, HIP_HIPGETERRORSTRING, HIP, 1);
|
HC_LOAD_FUNC_HIP (hip, hipGetErrorString, hipGetErrorString, HIP_HIPGETERRORSTRING, HIP, 1);
|
||||||
HC_LOAD_FUNC_HIP (hip, hipInit, hipInit, HIP_HIPINIT, HIP, 1);
|
HC_LOAD_FUNC_HIP (hip, hipInit, hipInit, HIP_HIPINIT, HIP, 1);
|
||||||
HC_LOAD_FUNC_HIP (hip, hipLaunchKernel, hipModuleLaunchKernel, HIP_HIPLAUNCHKERNEL, HIP, 1);
|
HC_LOAD_FUNC_HIP (hip, hipLaunchKernel, hipModuleLaunchKernel, HIP_HIPLAUNCHKERNEL, HIP, 1);
|
||||||
@ -2556,20 +2556,21 @@ int hip_init (hashcat_ctx_t *hashcat_ctx)
|
|||||||
HC_LOAD_FUNC_HIP (hip, hipMemFree, hipFree, HIP_HIPMEMFREE, HIP, 1);
|
HC_LOAD_FUNC_HIP (hip, hipMemFree, hipFree, HIP_HIPMEMFREE, HIP, 1);
|
||||||
HC_LOAD_FUNC_HIP (hip, hipMemFreeHost, hipFreeHost, HIP_HIPMEMFREEHOST, HIP, 1);
|
HC_LOAD_FUNC_HIP (hip, hipMemFreeHost, hipFreeHost, HIP_HIPMEMFREEHOST, HIP, 1);
|
||||||
HC_LOAD_FUNC_HIP (hip, hipMemGetInfo, hipMemGetInfo, HIP_HIPMEMGETINFO, HIP, 1);
|
HC_LOAD_FUNC_HIP (hip, hipMemGetInfo, hipMemGetInfo, HIP_HIPMEMGETINFO, HIP, 1);
|
||||||
HC_LOAD_FUNC_HIP (hip, hipMemsetD32, hipMemsetD32, HIP_HIPMEMSETD32, HIP, 1);
|
//HC_LOAD_FUNC_HIP (hip, hipMemsetD32, hipMemsetD32, HIP_HIPMEMSETD32, HIP, 1);
|
||||||
HC_LOAD_FUNC_HIP (hip, hipMemsetD8, hipMemsetD8, HIP_HIPMEMSETD8, HIP, 1);
|
//HC_LOAD_FUNC_HIP (hip, hipMemsetD8, hipMemsetD8, HIP_HIPMEMSETD8, HIP, 1);
|
||||||
HC_LOAD_FUNC_HIP (hip, hipModuleGetFunction, hipModuleGetFunction, HIP_HIPMODULEGETFUNCTION, HIP, 1);
|
HC_LOAD_FUNC_HIP (hip, hipModuleGetFunction, hipModuleGetFunction, HIP_HIPMODULEGETFUNCTION, HIP, 1);
|
||||||
HC_LOAD_FUNC_HIP (hip, hipModuleGetGlobal, hipModuleGetGlobal, HIP_HIPMODULEGETGLOBAL, HIP, 1);
|
HC_LOAD_FUNC_HIP (hip, hipModuleGetGlobal, hipModuleGetGlobal, HIP_HIPMODULEGETGLOBAL, HIP, 1);
|
||||||
HC_LOAD_FUNC_HIP (hip, hipModuleLoad, hipModuleLoad, HIP_HIPMODULELOAD, HIP, 1);
|
HC_LOAD_FUNC_HIP (hip, hipModuleLoad, hipModuleLoad, HIP_HIPMODULELOAD, HIP, 1);
|
||||||
HC_LOAD_FUNC_HIP (hip, hipModuleLoadData, hipModuleLoadData, HIP_HIPMODULELOADDATA, HIP, 1);
|
HC_LOAD_FUNC_HIP (hip, hipModuleLoadData, hipModuleLoadData, HIP_HIPMODULELOADDATA, HIP, 1);
|
||||||
HC_LOAD_FUNC_HIP (hip, hipModuleLoadDataEx, hipModuleLoadDataEx, HIP_HIPMODULELOADDATAEX, HIP, 1);
|
HC_LOAD_FUNC_HIP (hip, hipModuleLoadDataEx, hipModuleLoadDataEx, HIP_HIPMODULELOADDATAEX, HIP, 1);
|
||||||
HC_LOAD_FUNC_HIP (hip, hipModuleUnload, hipModuleUnload, HIP_HIPMODULEUNLOAD, HIP, 1);
|
HC_LOAD_FUNC_HIP (hip, hipModuleUnload, hipModuleUnload, HIP_HIPMODULEUNLOAD, HIP, 1);
|
||||||
HC_LOAD_FUNC_HIP (hip, hipProfilerStart, hipProfilerStart, HIP_HIPPROFILERSTART, HIP, 1);
|
//HC_LOAD_FUNC_HIP (hip, hipProfilerStart, hipProfilerStart, HIP_HIPPROFILERSTART, HIP, 1);
|
||||||
HC_LOAD_FUNC_HIP (hip, hipProfilerStop, hipProfilerStop, HIP_HIPPROFILERSTOP, HIP, 1);
|
//HC_LOAD_FUNC_HIP (hip, hipProfilerStop, hipProfilerStop, HIP_HIPPROFILERSTOP, HIP, 1);
|
||||||
HC_LOAD_FUNC_HIP (hip, hipStreamCreate, hipStreamCreate, HIP_HIPSTREAMCREATE, HIP, 1);
|
HC_LOAD_FUNC_HIP (hip, hipStreamCreate, hipStreamCreate, HIP_HIPSTREAMCREATE, HIP, 1);
|
||||||
HC_LOAD_FUNC_HIP (hip, hipStreamDestroy, hipStreamDestroy, HIP_HIPSTREAMDESTROY, HIP, 1);
|
HC_LOAD_FUNC_HIP (hip, hipStreamDestroy, hipStreamDestroy, HIP_HIPSTREAMDESTROY, HIP, 1);
|
||||||
HC_LOAD_FUNC_HIP (hip, hipStreamSynchronize, hipStreamSynchronize, HIP_HIPSTREAMSYNCHRONIZE, HIP, 1);
|
HC_LOAD_FUNC_HIP (hip, hipStreamSynchronize, hipStreamSynchronize, HIP_HIPSTREAMSYNCHRONIZE, HIP, 1);
|
||||||
HC_LOAD_FUNC_HIP (hip, hipStreamWaitEvent, hipStreamWaitEvent, HIP_HIPSTREAMWAITEVENT, HIP, 1);
|
HC_LOAD_FUNC_HIP (hip, hipStreamWaitEvent, hipStreamWaitEvent, HIP_HIPSTREAMWAITEVENT, HIP, 1);
|
||||||
|
//TODO HIP?
|
||||||
#if defined (WITH_CUBINX)
|
#if defined (WITH_CUBINX)
|
||||||
HC_LOAD_FUNC_HIP (hip, hipLinkCreate, hipLinkCreate, HIP_HIPLINKCREATE, HIP, 1);
|
HC_LOAD_FUNC_HIP (hip, hipLinkCreate, hipLinkCreate, HIP_HIPLINKCREATE, HIP, 1);
|
||||||
HC_LOAD_FUNC_HIP (hip, hipLinkAddData, hipLinkAddData, HIP_HIPLINKADDDATA, HIP, 1);
|
HC_LOAD_FUNC_HIP (hip, hipLinkAddData, hipLinkAddData, HIP_HIPLINKADDDATA, HIP, 1);
|
||||||
@ -8070,6 +8071,18 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
|
|||||||
|
|
||||||
device_param->kernel_exec_timeout = kernel_exec_timeout;
|
device_param->kernel_exec_timeout = kernel_exec_timeout;
|
||||||
|
|
||||||
|
// warp size
|
||||||
|
|
||||||
|
int warp_size = 0;
|
||||||
|
|
||||||
|
if (hc_cuDeviceGetAttribute (hashcat_ctx, &warp_size, CU_DEVICE_ATTRIBUTE_WARP_SIZE, cuda_device) == -1)
|
||||||
|
{
|
||||||
|
device_param->skipped = true;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
device_param->kernel_preferred_wgs_multiple = warp_size;
|
||||||
|
|
||||||
// max_shared_memory_per_block
|
// max_shared_memory_per_block
|
||||||
|
|
||||||
int max_shared_memory_per_block = 0;
|
int max_shared_memory_per_block = 0;
|
||||||
@ -8436,11 +8449,23 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
|
|||||||
|
|
||||||
device_param->kernel_exec_timeout = kernel_exec_timeout;
|
device_param->kernel_exec_timeout = kernel_exec_timeout;
|
||||||
|
|
||||||
|
// warp size
|
||||||
|
|
||||||
|
int warp_size = 0;
|
||||||
|
|
||||||
|
if (hc_hipDeviceGetAttribute (hashcat_ctx, &warp_size, HIP_DEVICE_ATTRIBUTE_WARP_SIZE, hip_device) == -1)
|
||||||
|
{
|
||||||
|
device_param->skipped = true;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
device_param->kernel_preferred_wgs_multiple = warp_size;
|
||||||
|
|
||||||
// max_shared_memory_per_block
|
// max_shared_memory_per_block
|
||||||
|
|
||||||
int max_shared_memory_per_block = 0;
|
int max_shared_memory_per_block = 0;
|
||||||
|
|
||||||
if (hc_hipDeviceGetAttribute (hashcat_ctx, &max_shared_memory_per_block, HIP_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK, hip_device) == -1)
|
if (hc_hipDeviceGetAttribute (hashcat_ctx, &max_shared_memory_per_block, HIP_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK_OPTIN, hip_device) == -1)
|
||||||
{
|
{
|
||||||
device_param->skipped = true;
|
device_param->skipped = true;
|
||||||
continue;
|
continue;
|
||||||
@ -9053,6 +9078,13 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
|
|||||||
|
|
||||||
hcfree (device_extensions);
|
hcfree (device_extensions);
|
||||||
|
|
||||||
|
// kernel_preferred_wgs_multiple
|
||||||
|
|
||||||
|
// There is global query for this attribute on OpenCL that is not linked to a specific kernel, so we set it to a fixed value
|
||||||
|
// Later in the code, we add vendor specific extensions to query it
|
||||||
|
|
||||||
|
device_param->kernel_preferred_wgs_multiple = 8;
|
||||||
|
|
||||||
// device_local_mem_type
|
// device_local_mem_type
|
||||||
|
|
||||||
cl_device_local_mem_type device_local_mem_type;
|
cl_device_local_mem_type device_local_mem_type;
|
||||||
@ -9332,6 +9364,19 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
|
|||||||
{
|
{
|
||||||
if ((device_param->opencl_platform_vendor_id == VENDOR_ID_AMD) && (device_param->opencl_device_vendor_id == VENDOR_ID_AMD))
|
if ((device_param->opencl_platform_vendor_id == VENDOR_ID_AMD) && (device_param->opencl_device_vendor_id == VENDOR_ID_AMD))
|
||||||
{
|
{
|
||||||
|
cl_uint device_wavefront_width_amd;
|
||||||
|
|
||||||
|
// from https://www.khronos.org/registry/OpenCL/extensions/amd/cl_amd_device_attribute_query.txt
|
||||||
|
#define CL_DEVICE_WAVEFRONT_WIDTH_AMD 0x4043
|
||||||
|
|
||||||
|
if (hc_clGetDeviceInfo (hashcat_ctx, device_param->opencl_device, CL_DEVICE_WAVEFRONT_WIDTH_AMD, sizeof (device_wavefront_width_amd), &device_wavefront_width_amd, NULL) == -1)
|
||||||
|
{
|
||||||
|
device_param->skipped = true;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
device_param->kernel_preferred_wgs_multiple = device_wavefront_width_amd;
|
||||||
|
|
||||||
cl_device_topology_amd amdtopo;
|
cl_device_topology_amd amdtopo;
|
||||||
|
|
||||||
if (hc_clGetDeviceInfo (hashcat_ctx, device_param->opencl_device, CL_DEVICE_TOPOLOGY_AMD, sizeof (amdtopo), &amdtopo, NULL) == -1)
|
if (hc_clGetDeviceInfo (hashcat_ctx, device_param->opencl_device, CL_DEVICE_TOPOLOGY_AMD, sizeof (amdtopo), &amdtopo, NULL) == -1)
|
||||||
@ -9348,6 +9393,19 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
|
|||||||
|
|
||||||
if ((device_param->opencl_platform_vendor_id == VENDOR_ID_NV) && (device_param->opencl_device_vendor_id == VENDOR_ID_NV))
|
if ((device_param->opencl_platform_vendor_id == VENDOR_ID_NV) && (device_param->opencl_device_vendor_id == VENDOR_ID_NV))
|
||||||
{
|
{
|
||||||
|
cl_uint device_warp_size_nv;
|
||||||
|
|
||||||
|
// from deps/OpenCL-Headers/CL/cl_ext.h
|
||||||
|
#define CL_DEVICE_WARP_SIZE_NV 0x4003
|
||||||
|
|
||||||
|
if (hc_clGetDeviceInfo (hashcat_ctx, device_param->opencl_device, CL_DEVICE_WARP_SIZE_NV, sizeof (device_warp_size_nv), &device_warp_size_nv, NULL) == -1)
|
||||||
|
{
|
||||||
|
device_param->skipped = true;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
device_param->kernel_preferred_wgs_multiple = device_warp_size_nv;
|
||||||
|
|
||||||
cl_uint pci_bus_id_nv; // is cl_uint the right type for them??
|
cl_uint pci_bus_id_nv; // is cl_uint the right type for them??
|
||||||
cl_uint pci_slot_id_nv;
|
cl_uint pci_slot_id_nv;
|
||||||
|
|
||||||
@ -10324,9 +10382,7 @@ static u32 get_kernel_threads (const hc_device_param_t *device_param)
|
|||||||
{
|
{
|
||||||
// for all CPU we just do 1 ...
|
// for all CPU we just do 1 ...
|
||||||
|
|
||||||
const u32 cpu_prefered_thread_count = 1;
|
kernel_threads_max = MIN (kernel_threads_max, 1);
|
||||||
|
|
||||||
kernel_threads_max = MIN (kernel_threads_max, cpu_prefered_thread_count);
|
|
||||||
}
|
}
|
||||||
else if (device_param->opencl_device_type & CL_DEVICE_TYPE_GPU)
|
else if (device_param->opencl_device_type & CL_DEVICE_TYPE_GPU)
|
||||||
{
|
{
|
||||||
@ -10334,28 +10390,15 @@ static u32 get_kernel_threads (const hc_device_param_t *device_param)
|
|||||||
|
|
||||||
if (device_param->opencl_device_vendor_id == VENDOR_ID_INTEL_SDK)
|
if (device_param->opencl_device_vendor_id == VENDOR_ID_INTEL_SDK)
|
||||||
{
|
{
|
||||||
const u32 gpu_prefered_thread_count = 8;
|
kernel_threads_max = MIN (kernel_threads_max, 8);
|
||||||
|
|
||||||
kernel_threads_max = MIN (kernel_threads_max, gpu_prefered_thread_count);
|
|
||||||
}
|
}
|
||||||
else if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD)
|
else if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD)
|
||||||
{
|
{
|
||||||
u32 gpu_prefered_thread_count = 64;
|
kernel_threads_max = MIN (kernel_threads_max, device_param->kernel_preferred_wgs_multiple);
|
||||||
|
|
||||||
if (device_param->opencl_platform_vendor_id == VENDOR_ID_APPLE)
|
|
||||||
{
|
|
||||||
// based on clinfo output: Preferred work group size multiple (kernel)
|
|
||||||
|
|
||||||
gpu_prefered_thread_count = 32;
|
|
||||||
}
|
|
||||||
|
|
||||||
kernel_threads_max = MIN (kernel_threads_max, gpu_prefered_thread_count);
|
|
||||||
}
|
}
|
||||||
else if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP)
|
else if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP)
|
||||||
{
|
{
|
||||||
u32 gpu_prefered_thread_count = 64;
|
kernel_threads_max = MIN (kernel_threads_max, device_param->kernel_preferred_wgs_multiple);
|
||||||
|
|
||||||
kernel_threads_max = MIN (kernel_threads_max, gpu_prefered_thread_count);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -10371,6 +10414,7 @@ static bool load_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_p
|
|||||||
{
|
{
|
||||||
const hashconfig_t *hashconfig = hashcat_ctx->hashconfig;
|
const hashconfig_t *hashconfig = hashcat_ctx->hashconfig;
|
||||||
const folder_config_t *folder_config = hashcat_ctx->folder_config;
|
const folder_config_t *folder_config = hashcat_ctx->folder_config;
|
||||||
|
const user_options_t *user_options = hashcat_ctx->user_options;
|
||||||
|
|
||||||
bool cached = true;
|
bool cached = true;
|
||||||
|
|
||||||
@ -10658,9 +10702,9 @@ static bool load_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_p
|
|||||||
//hiprtc_options[1] = "--device-as-default-execution-space";
|
//hiprtc_options[1] = "--device-as-default-execution-space";
|
||||||
//hiprtc_options[2] = "--gpu-architecture";
|
//hiprtc_options[2] = "--gpu-architecture";
|
||||||
|
|
||||||
//hc_asprintf (&hiprtc_options[3], "compute_%d%d", device_param->sm_major, device_param->sm_minor);
|
hc_asprintf (&hiprtc_options[0], "--gpu-max-threads-per-block=%d", (user_options->kernel_threads_chgd == true) ? user_options->kernel_threads : device_param->kernel_preferred_wgs_multiple);
|
||||||
|
|
||||||
hiprtc_options[0] = "--gpu-max-threads-per-block=64";
|
//hiprtc_options[0] = "--gpu-max-threads-per-block=64";
|
||||||
hiprtc_options[1] = "-nocudainc";
|
hiprtc_options[1] = "-nocudainc";
|
||||||
hiprtc_options[2] = "-nocudalib";
|
hiprtc_options[2] = "-nocudalib";
|
||||||
hiprtc_options[3] = "";
|
hiprtc_options[3] = "";
|
||||||
@ -11408,24 +11452,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
|
|||||||
}
|
}
|
||||||
else if (device_param->opencl_device_type & CL_DEVICE_TYPE_GPU)
|
else if (device_param->opencl_device_type & CL_DEVICE_TYPE_GPU)
|
||||||
{
|
{
|
||||||
// for GPU we need to distinguish by vendor
|
native_threads = device_param->kernel_preferred_wgs_multiple;
|
||||||
|
|
||||||
if (device_param->opencl_device_vendor_id == VENDOR_ID_INTEL_SDK)
|
|
||||||
{
|
|
||||||
native_threads = 8;
|
|
||||||
}
|
|
||||||
else if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD)
|
|
||||||
{
|
|
||||||
native_threads = 64;
|
|
||||||
}
|
|
||||||
else if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP)
|
|
||||||
{
|
|
||||||
native_threads = 64;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
native_threads = 32;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
@ -11751,7 +11778,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
|
|||||||
|
|
||||||
char device_name_chksum_amp_mp[HCBUFSIZ_TINY] = { 0 };
|
char device_name_chksum_amp_mp[HCBUFSIZ_TINY] = { 0 };
|
||||||
|
|
||||||
const size_t dnclen_amp_mp = snprintf (device_name_chksum_amp_mp, HCBUFSIZ_TINY, "%d-%d-%d-%d-%u-%s-%s-%s",
|
const size_t dnclen_amp_mp = snprintf (device_name_chksum_amp_mp, HCBUFSIZ_TINY, "%d-%d-%d-%d-%u-%s-%s-%s-%u",
|
||||||
backend_ctx->comptime,
|
backend_ctx->comptime,
|
||||||
backend_ctx->cuda_driver_version,
|
backend_ctx->cuda_driver_version,
|
||||||
backend_ctx->hip_driver_version,
|
backend_ctx->hip_driver_version,
|
||||||
@ -11759,7 +11786,8 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
|
|||||||
device_param->opencl_platform_vendor_id,
|
device_param->opencl_platform_vendor_id,
|
||||||
device_param->device_name,
|
device_param->device_name,
|
||||||
device_param->opencl_device_version,
|
device_param->opencl_device_version,
|
||||||
device_param->opencl_driver_version);
|
device_param->opencl_driver_version,
|
||||||
|
(user_options->kernel_threads_chgd == true) ? user_options->kernel_threads : device_param->kernel_preferred_wgs_multiple);
|
||||||
|
|
||||||
md5_ctx_t md5_ctx;
|
md5_ctx_t md5_ctx;
|
||||||
|
|
||||||
@ -12078,7 +12106,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
|
|||||||
|
|
||||||
const u32 extra_value = (user_options->attack_mode == ATTACK_MODE_ASSOCIATION) ? ATTACK_MODE_ASSOCIATION : ATTACK_MODE_NONE;
|
const u32 extra_value = (user_options->attack_mode == ATTACK_MODE_ASSOCIATION) ? ATTACK_MODE_ASSOCIATION : ATTACK_MODE_NONE;
|
||||||
|
|
||||||
const size_t dnclen = snprintf (device_name_chksum, HCBUFSIZ_TINY, "%d-%d-%d-%d-%u-%s-%s-%s-%d-%u-%u-%s",
|
const size_t dnclen = snprintf (device_name_chksum, HCBUFSIZ_TINY, "%d-%d-%d-%d-%u-%s-%s-%s-%d-%u-%u-%u-%s",
|
||||||
backend_ctx->comptime,
|
backend_ctx->comptime,
|
||||||
backend_ctx->cuda_driver_version,
|
backend_ctx->cuda_driver_version,
|
||||||
backend_ctx->hip_driver_version,
|
backend_ctx->hip_driver_version,
|
||||||
@ -12090,6 +12118,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
|
|||||||
device_param->vector_width,
|
device_param->vector_width,
|
||||||
hashconfig->kern_type,
|
hashconfig->kern_type,
|
||||||
extra_value,
|
extra_value,
|
||||||
|
(user_options->kernel_threads_chgd == true) ? user_options->kernel_threads : device_param->kernel_preferred_wgs_multiple,
|
||||||
build_options_module_buf);
|
build_options_module_buf);
|
||||||
|
|
||||||
md5_ctx_t md5_ctx;
|
md5_ctx_t md5_ctx;
|
||||||
@ -14810,14 +14839,6 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// we
|
|
||||||
|
|
||||||
if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP)
|
|
||||||
{
|
|
||||||
device_param->kernel_threads_min = MIN (device_param->kernel_threads_min, 64);
|
|
||||||
device_param->kernel_threads_max = MIN (device_param->kernel_threads_max, 64);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* now everything that depends on threads and accel, basically dynamic workload
|
* now everything that depends on threads and accel, basically dynamic workload
|
||||||
*/
|
*/
|
||||||
|
Loading…
Reference in New Issue
Block a user