mirror of
https://github.com/hashcat/hashcat.git
synced 2024-11-26 01:50:10 +00:00
AMD GPUs: On Apple OpenCL platform, we ask for the preferred kernel thread size rather than hard-coding 32
ECC secp256k1: Removed the inline assembly code for AMD GPUs because the latest JIT compilers optimize it with the same efficiency
This commit is contained in:
parent
7f419c68af
commit
fd2cb59d26
@ -124,7 +124,9 @@ DECLSPEC u32 sub (u32 *r, const u32 *a, const u32 *b)
|
||||
: "r"(a[0]), "r"(a[1]), "r"(a[2]), "r"(a[3]), "r"(a[4]), "r"(a[5]), "r"(a[6]), "r"(a[7]),
|
||||
"r"(b[0]), "r"(b[1]), "r"(b[2]), "r"(b[3]), "r"(b[4]), "r"(b[5]), "r"(b[6]), "r"(b[7])
|
||||
);
|
||||
#elif (defined IS_AMD || defined IS_HIP) && HAS_VSUB == 1 && HAS_VSUBB == 1
|
||||
// HIP doesnt support these so we stick to OpenCL (aka IS_AMD) - is also faster without asm
|
||||
//#elif (defined IS_AMD || defined IS_HIP) && HAS_VSUB == 1 && HAS_VSUBB == 1
|
||||
#elif 0
|
||||
__asm__ __volatile__
|
||||
(
|
||||
"V_SUB_U32 %0, %9, %17;"
|
||||
@ -176,7 +178,9 @@ DECLSPEC u32 add (u32 *r, const u32 *a, const u32 *b)
|
||||
: "r"(a[0]), "r"(a[1]), "r"(a[2]), "r"(a[3]), "r"(a[4]), "r"(a[5]), "r"(a[6]), "r"(a[7]),
|
||||
"r"(b[0]), "r"(b[1]), "r"(b[2]), "r"(b[3]), "r"(b[4]), "r"(b[5]), "r"(b[6]), "r"(b[7])
|
||||
);
|
||||
#elif (defined IS_AMD || defined IS_HIP) && HAS_VADD == 1 && HAS_VADDC == 1
|
||||
// HIP doesnt support these so we stick to OpenCL (aka IS_AMD) - is also faster without asm
|
||||
//#elif (defined IS_AMD || defined IS_HIP) && HAS_VSUB == 1 && HAS_VSUBB == 1
|
||||
#elif 0
|
||||
__asm__ __volatile__
|
||||
(
|
||||
"V_ADD_U32 %0, %9, %17;"
|
||||
|
@ -9366,6 +9366,19 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
|
||||
|
||||
if (device_param->opencl_device_type & CL_DEVICE_TYPE_GPU)
|
||||
{
|
||||
if ((device_param->opencl_platform_vendor_id == VENDOR_ID_APPLE) && (device_param->opencl_device_vendor_id == VENDOR_ID_AMD))
|
||||
{
|
||||
// from https://www.khronos.org/registry/OpenCL/extensions/amd/cl_amd_device_attribute_query.txt
|
||||
#define CL_DEVICE_WAVEFRONT_WIDTH_AMD 0x4043
|
||||
|
||||
// crazy, but apple does not support this query!
|
||||
// the best alternative is "Preferred work group size multiple (kernel)", but requires to specify a kernel.
|
||||
// so we will set kernel_preferred_wgs_multiple intentionally to 0 because otherwise it it set to 8 by default.
|
||||
// we then assign the value kernel_preferred_wgs_multiple a small kernel like bzero after test if this was set to 0.
|
||||
|
||||
device_param->kernel_preferred_wgs_multiple = 0;
|
||||
}
|
||||
|
||||
if ((device_param->opencl_platform_vendor_id == VENDOR_ID_AMD) && (device_param->opencl_device_vendor_id == VENDOR_ID_AMD))
|
||||
{
|
||||
cl_uint device_wavefront_width_amd;
|
||||
@ -12023,6 +12036,10 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
|
||||
|
||||
if (get_opencl_kernel_preferred_wgs_multiple (hashcat_ctx, device_param, device_param->opencl_kernel_bzero, &device_param->kernel_preferred_wgs_multiple_bzero) == -1) return -1;
|
||||
|
||||
// apple hack, but perhaps also an alternative for other vendors
|
||||
|
||||
if (device_param->kernel_preferred_wgs_multiple == 0) device_param->kernel_preferred_wgs_multiple = device_param->kernel_preferred_wgs_multiple_bzero;
|
||||
|
||||
// GPU autotune init
|
||||
|
||||
if (hc_clCreateKernel (hashcat_ctx, device_param->opencl_program_shared, "gpu_atinit", &device_param->opencl_kernel_atinit) == -1) return -1;
|
||||
|
Loading…
Reference in New Issue
Block a user