mirror of
https://github.com/hashcat/hashcat.git
synced 2024-12-22 22:58:30 +00:00
Backport more ROCm based optimizations to HIP
This commit is contained in:
parent
2e929e692e
commit
45e65dd05a
@ -253,7 +253,7 @@ DECLSPEC u32 amd_bitalign_S (const u32 a, const u32 b, const int n)
|
||||
{
|
||||
u32 r = 0;
|
||||
|
||||
asm ("V_ALIGNBIT_B32 %0, %1, %2, %3;" : "=v"(r): "v"(a), "v"(b), "I"(n));
|
||||
__asm__ ("V_ALIGNBIT_B32 %0, %1, %2, %3;" : "=v"(r): "v"(a), "v"(b), "I"(n));
|
||||
|
||||
return r;
|
||||
}
|
||||
|
@ -95,8 +95,6 @@
|
||||
#define IS_GENERIC
|
||||
#elif VENDOR_ID == (1 << 8)
|
||||
#define IS_AMD_USE_HIP
|
||||
// TODO HIP optimization potential
|
||||
//#define IS_GENERIC
|
||||
#else
|
||||
#define IS_GENERIC
|
||||
#endif
|
||||
@ -158,10 +156,8 @@
|
||||
#endif
|
||||
|
||||
#ifdef IS_HIP
|
||||
//TODO HIP
|
||||
//#define USE_BITSELECT
|
||||
//#define USE_ROTATE
|
||||
//#define USE_SWIZZLE
|
||||
#define USE_BITSELECT
|
||||
#define USE_ROTATE
|
||||
#endif
|
||||
|
||||
#ifdef IS_ROCM
|
||||
|
@ -86,7 +86,7 @@ DECLSPEC void sha512_transform_intern (const u32x *w0, const u32x *w1, const u32
|
||||
|
||||
ROUND_STEP (0);
|
||||
|
||||
#if defined IS_CUDA || defined IS_HIP
|
||||
#if defined IS_CUDA
|
||||
ROUND_EXPAND (); ROUND_STEP (16);
|
||||
ROUND_EXPAND (); ROUND_STEP (32);
|
||||
ROUND_EXPAND (); ROUND_STEP (48);
|
||||
|
@ -84,7 +84,7 @@ DECLSPEC void sha512_transform_intern (const u32x *w0, const u32x *w1, const u32
|
||||
|
||||
ROUND_STEP (0);
|
||||
|
||||
#if defined IS_CUDA || defined IS_HIP
|
||||
#if defined IS_CUDA
|
||||
ROUND_EXPAND (); ROUND_STEP (16);
|
||||
ROUND_EXPAND (); ROUND_STEP (32);
|
||||
ROUND_EXPAND (); ROUND_STEP (48);
|
||||
|
@ -84,7 +84,7 @@ DECLSPEC void sha512_transform_intern (const u32x *w0, const u32x *w1, const u32
|
||||
|
||||
ROUND_STEP (0);
|
||||
|
||||
#if defined IS_CUDA || defined IS_HIP
|
||||
#if defined IS_CUDA
|
||||
ROUND_EXPAND (); ROUND_STEP (16);
|
||||
ROUND_EXPAND (); ROUND_STEP (32);
|
||||
ROUND_EXPAND (); ROUND_STEP (48);
|
||||
|
@ -86,7 +86,7 @@ DECLSPEC void sha512_transform_intern (const u32x *w0, const u32x *w1, const u32
|
||||
|
||||
ROUND_STEP (0);
|
||||
|
||||
#if defined IS_CUDA || defined IS_HIP
|
||||
#if defined IS_CUDA
|
||||
ROUND_EXPAND (); ROUND_STEP (16);
|
||||
ROUND_EXPAND (); ROUND_STEP (32);
|
||||
ROUND_EXPAND (); ROUND_STEP (48);
|
||||
|
@ -84,7 +84,7 @@ DECLSPEC void sha512_transform_intern (const u32x *w0, const u32x *w1, const u32
|
||||
|
||||
ROUND_STEP (0);
|
||||
|
||||
#if defined IS_CUDA || defined IS_HIP
|
||||
#if defined IS_CUDA
|
||||
ROUND_EXPAND (); ROUND_STEP (16);
|
||||
ROUND_EXPAND (); ROUND_STEP (32);
|
||||
ROUND_EXPAND (); ROUND_STEP (48);
|
||||
|
@ -84,7 +84,7 @@ DECLSPEC void sha512_transform_intern (const u32x *w0, const u32x *w1, const u32
|
||||
|
||||
ROUND_STEP (0);
|
||||
|
||||
#if defined IS_CUDA || defined IS_HIP
|
||||
#if defined IS_CUDA
|
||||
ROUND_EXPAND (); ROUND_STEP (16);
|
||||
ROUND_EXPAND (); ROUND_STEP (32);
|
||||
ROUND_EXPAND (); ROUND_STEP (48);
|
||||
|
@ -86,7 +86,7 @@ DECLSPEC void sha512_transform_intern (const u32x *w0, const u32x *w1, const u32
|
||||
|
||||
ROUND_STEP (0);
|
||||
|
||||
#if defined IS_CUDA || defined IS_HIP
|
||||
#if defined IS_CUDA
|
||||
ROUND_EXPAND (); ROUND_STEP (16);
|
||||
ROUND_EXPAND (); ROUND_STEP (32);
|
||||
ROUND_EXPAND (); ROUND_STEP (48);
|
||||
|
@ -84,7 +84,7 @@ DECLSPEC void sha512_transform_intern (const u32x *w0, const u32x *w1, const u32
|
||||
|
||||
ROUND_STEP (0);
|
||||
|
||||
#if defined IS_CUDA || defined IS_HIP
|
||||
#if defined IS_CUDA
|
||||
ROUND_EXPAND (); ROUND_STEP (16);
|
||||
ROUND_EXPAND (); ROUND_STEP (32);
|
||||
ROUND_EXPAND (); ROUND_STEP (48);
|
||||
|
@ -84,7 +84,7 @@ DECLSPEC void sha512_transform_intern (const u32x *w0, const u32x *w1, const u32
|
||||
|
||||
ROUND_STEP (0);
|
||||
|
||||
#if defined IS_CUDA || defined IS_HIP
|
||||
#if defined IS_CUDA
|
||||
ROUND_EXPAND (); ROUND_STEP (16);
|
||||
ROUND_EXPAND (); ROUND_STEP (32);
|
||||
ROUND_EXPAND (); ROUND_STEP (48);
|
||||
|
@ -86,7 +86,7 @@ DECLSPEC void sha512_transform_intern (const u32x *w0, const u32x *w1, const u32
|
||||
|
||||
ROUND_STEP (0);
|
||||
|
||||
#if defined IS_CUDA || defined IS_HIP
|
||||
#if defined IS_CUDA
|
||||
ROUND_EXPAND (); ROUND_STEP (16);
|
||||
ROUND_EXPAND (); ROUND_STEP (32);
|
||||
ROUND_EXPAND (); ROUND_STEP (48);
|
||||
|
@ -84,7 +84,7 @@ DECLSPEC void sha512_transform_intern (const u32x *w0, const u32x *w1, const u32
|
||||
|
||||
ROUND_STEP (0);
|
||||
|
||||
#if defined IS_CUDA || defined IS_HIP
|
||||
#if defined IS_CUDA
|
||||
ROUND_EXPAND (); ROUND_STEP (16);
|
||||
ROUND_EXPAND (); ROUND_STEP (32);
|
||||
ROUND_EXPAND (); ROUND_STEP (48);
|
||||
|
@ -84,7 +84,7 @@ DECLSPEC void sha512_transform_intern (const u32x *w0, const u32x *w1, const u32
|
||||
|
||||
ROUND_STEP (0);
|
||||
|
||||
#if defined IS_CUDA || defined IS_HIP
|
||||
#if defined IS_CUDA
|
||||
ROUND_EXPAND (); ROUND_STEP (16);
|
||||
ROUND_EXPAND (); ROUND_STEP (32);
|
||||
ROUND_EXPAND (); ROUND_STEP (48);
|
||||
|
@ -86,7 +86,7 @@ DECLSPEC void sha512_transform_intern (const u32x *w0, const u32x *w1, const u32
|
||||
|
||||
ROUND_STEP (0);
|
||||
|
||||
#if defined IS_CUDA || defined IS_HIP
|
||||
#if defined IS_CUDA
|
||||
ROUND_EXPAND (); ROUND_STEP (16);
|
||||
ROUND_EXPAND (); ROUND_STEP (32);
|
||||
ROUND_EXPAND (); ROUND_STEP (48);
|
||||
|
@ -84,7 +84,7 @@ DECLSPEC void sha512_transform_intern (const u32x *w0, const u32x *w1, const u32
|
||||
|
||||
ROUND_STEP (0);
|
||||
|
||||
#if defined IS_CUDA || defined IS_HIP
|
||||
#if defined IS_CUDA
|
||||
ROUND_EXPAND (); ROUND_STEP (16);
|
||||
ROUND_EXPAND (); ROUND_STEP (32);
|
||||
ROUND_EXPAND (); ROUND_STEP (48);
|
||||
|
@ -84,7 +84,7 @@ DECLSPEC void sha512_transform_intern (const u32x *w0, const u32x *w1, const u32
|
||||
|
||||
ROUND_STEP (0);
|
||||
|
||||
#if defined IS_CUDA || defined IS_HIP
|
||||
#if defined IS_CUDA
|
||||
ROUND_EXPAND (); ROUND_STEP (16);
|
||||
ROUND_EXPAND (); ROUND_STEP (32);
|
||||
ROUND_EXPAND (); ROUND_STEP (48);
|
||||
|
@ -86,7 +86,7 @@ DECLSPEC void sha256_transform_m (u32x *digest, const u32x *w)
|
||||
|
||||
ROUND_STEP (0);
|
||||
|
||||
#if defined IS_CUDA || defined IS_HIP
|
||||
#if defined IS_CUDA
|
||||
ROUND_EXPAND (); ROUND_STEP (16);
|
||||
ROUND_EXPAND (); ROUND_STEP (32);
|
||||
ROUND_EXPAND (); ROUND_STEP (48);
|
||||
@ -143,7 +143,7 @@ DECLSPEC void sha256_transform_z (u32x *digest)
|
||||
|
||||
ROUND_STEP_Z (0);
|
||||
|
||||
#if defined IS_CUDA || defined IS_HIP
|
||||
#if defined IS_CUDA
|
||||
ROUND_STEP_Z (16);
|
||||
ROUND_STEP_Z (32);
|
||||
ROUND_STEP_Z (48);
|
||||
|
@ -84,7 +84,7 @@ DECLSPEC void sha256_transform_m (u32x *digest, const u32x *w)
|
||||
|
||||
ROUND_STEP (0);
|
||||
|
||||
#if defined IS_CUDA || defined IS_HIP
|
||||
#if defined IS_CUDA
|
||||
ROUND_EXPAND (); ROUND_STEP (16);
|
||||
ROUND_EXPAND (); ROUND_STEP (32);
|
||||
ROUND_EXPAND (); ROUND_STEP (48);
|
||||
@ -141,7 +141,7 @@ DECLSPEC void sha256_transform_z (u32x *digest)
|
||||
|
||||
ROUND_STEP_Z (0);
|
||||
|
||||
#if defined IS_CUDA || defined IS_HIP
|
||||
#if defined IS_CUDA
|
||||
ROUND_STEP_Z (16);
|
||||
ROUND_STEP_Z (32);
|
||||
ROUND_STEP_Z (48);
|
||||
|
@ -84,7 +84,7 @@ DECLSPEC void sha256_transform_m (u32x *digest, const u32x *w)
|
||||
|
||||
ROUND_STEP (0);
|
||||
|
||||
#if defined IS_CUDA || defined IS_HIP
|
||||
#if defined IS_CUDA
|
||||
ROUND_EXPAND (); ROUND_STEP (16);
|
||||
ROUND_EXPAND (); ROUND_STEP (32);
|
||||
ROUND_EXPAND (); ROUND_STEP (48);
|
||||
@ -141,7 +141,7 @@ DECLSPEC void sha256_transform_z (u32x *digest)
|
||||
|
||||
ROUND_STEP_Z (0);
|
||||
|
||||
#if defined IS_CUDA || defined IS_HIP
|
||||
#if defined IS_CUDA
|
||||
ROUND_STEP_Z (16);
|
||||
ROUND_STEP_Z (32);
|
||||
ROUND_STEP_Z (48);
|
||||
|
@ -86,7 +86,7 @@ DECLSPEC void sha384_transform_intern (const u32x *w0, const u32x *w1, const u32
|
||||
|
||||
ROUND_STEP (0);
|
||||
|
||||
#if defined IS_CUDA || defined IS_HIP
|
||||
#if defined IS_CUDA
|
||||
ROUND_EXPAND (); ROUND_STEP (16);
|
||||
ROUND_EXPAND (); ROUND_STEP (32);
|
||||
ROUND_EXPAND (); ROUND_STEP (48);
|
||||
|
@ -84,7 +84,7 @@ DECLSPEC void sha384_transform_intern (const u32x *w0, const u32x *w1, const u32
|
||||
|
||||
ROUND_STEP (0);
|
||||
|
||||
#if defined IS_CUDA || defined IS_HIP
|
||||
#if defined IS_CUDA
|
||||
ROUND_EXPAND (); ROUND_STEP (16);
|
||||
ROUND_EXPAND (); ROUND_STEP (32);
|
||||
ROUND_EXPAND (); ROUND_STEP (48);
|
||||
|
@ -84,7 +84,7 @@ DECLSPEC void sha384_transform_intern (const u32x *w0, const u32x *w1, const u32
|
||||
|
||||
ROUND_STEP (0);
|
||||
|
||||
#if defined IS_CUDA || defined IS_HIP
|
||||
#if defined IS_CUDA
|
||||
ROUND_EXPAND (); ROUND_STEP (16);
|
||||
ROUND_EXPAND (); ROUND_STEP (32);
|
||||
ROUND_EXPAND (); ROUND_STEP (48);
|
||||
|
@ -89,7 +89,7 @@ DECLSPEC void sha512_transform_opt (const u32x *w0, const u32x *w1, const u32x *
|
||||
|
||||
ROUND_STEP (0);
|
||||
|
||||
#if defined IS_CUDA || defined IS_HIP
|
||||
#if defined IS_CUDA
|
||||
ROUND_EXPAND (); ROUND_STEP (16);
|
||||
ROUND_EXPAND (); ROUND_STEP (32);
|
||||
ROUND_EXPAND (); ROUND_STEP (48);
|
||||
|
@ -84,7 +84,7 @@ DECLSPEC void sha512_transform_full (const u32x *w0, const u32x *w1, const u32x
|
||||
|
||||
ROUND_STEP (0);
|
||||
|
||||
#if defined IS_CUDA || defined IS_HIP
|
||||
#if defined IS_CUDA
|
||||
ROUND_EXPAND (); ROUND_STEP (16);
|
||||
ROUND_EXPAND (); ROUND_STEP (32);
|
||||
ROUND_EXPAND (); ROUND_STEP (48);
|
||||
@ -182,7 +182,7 @@ DECLSPEC void sha512_transform_opt (const u32x *w0, const u32x *w1, const u32x *
|
||||
|
||||
ROUND_STEP (0);
|
||||
|
||||
#if defined IS_CUDA || defined IS_HIP
|
||||
#if defined IS_CUDA
|
||||
ROUND_EXPAND (); ROUND_STEP (16);
|
||||
ROUND_EXPAND (); ROUND_STEP (32);
|
||||
ROUND_EXPAND (); ROUND_STEP (48);
|
||||
|
@ -84,7 +84,7 @@ DECLSPEC void sha512_transform_full (const u32x *w0, const u32x *w1, const u32x
|
||||
|
||||
ROUND_STEP (0);
|
||||
|
||||
#if defined IS_CUDA || defined IS_HIP
|
||||
#if defined IS_CUDA
|
||||
ROUND_EXPAND (); ROUND_STEP (16);
|
||||
ROUND_EXPAND (); ROUND_STEP (32);
|
||||
ROUND_EXPAND (); ROUND_STEP (48);
|
||||
@ -182,7 +182,7 @@ DECLSPEC void sha512_transform_opt (const u32x *w0, const u32x *w1, const u32x *
|
||||
|
||||
ROUND_STEP (0);
|
||||
|
||||
#if defined IS_CUDA || defined IS_HIP
|
||||
#if defined IS_CUDA
|
||||
ROUND_EXPAND (); ROUND_STEP (16);
|
||||
ROUND_EXPAND (); ROUND_STEP (32);
|
||||
ROUND_EXPAND (); ROUND_STEP (48);
|
||||
|
@ -86,7 +86,7 @@ DECLSPEC void sha512_transform_intern (const u32x *w0, const u32x *w1, const u32
|
||||
|
||||
ROUND_STEP (0);
|
||||
|
||||
#if defined IS_CUDA || defined IS_HIP
|
||||
#if defined IS_CUDA
|
||||
ROUND_EXPAND (); ROUND_STEP (16);
|
||||
ROUND_EXPAND (); ROUND_STEP (32);
|
||||
ROUND_EXPAND (); ROUND_STEP (48);
|
||||
|
@ -84,7 +84,7 @@ DECLSPEC void sha512_transform_intern (const u32x *w0, const u32x *w1, const u32
|
||||
|
||||
ROUND_STEP (0);
|
||||
|
||||
#if defined IS_CUDA || defined IS_HIP
|
||||
#if defined IS_CUDA
|
||||
ROUND_EXPAND (); ROUND_STEP (16);
|
||||
ROUND_EXPAND (); ROUND_STEP (32);
|
||||
ROUND_EXPAND (); ROUND_STEP (48);
|
||||
|
@ -84,7 +84,7 @@ DECLSPEC void sha512_transform_intern (const u32x *w0, const u32x *w1, const u32
|
||||
|
||||
ROUND_STEP (0);
|
||||
|
||||
#if defined IS_CUDA || defined IS_HIP
|
||||
#if defined IS_CUDA
|
||||
ROUND_EXPAND (); ROUND_STEP (16);
|
||||
ROUND_EXPAND (); ROUND_STEP (32);
|
||||
ROUND_EXPAND (); ROUND_STEP (48);
|
||||
|
@ -8339,18 +8339,6 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
|
||||
device_param->has_mov64 = false;
|
||||
device_param->has_prmt = false;
|
||||
|
||||
device_param->has_vadd = true;
|
||||
device_param->has_vaddc = true;
|
||||
device_param->has_vadd_co = true;
|
||||
device_param->has_vaddc_co = true;
|
||||
device_param->has_vsub = true;
|
||||
device_param->has_vsubb = true;
|
||||
device_param->has_vsub_co = true;
|
||||
device_param->has_vsubb_co = true;
|
||||
device_param->has_vadd3 = true;
|
||||
device_param->has_vbfe = true;
|
||||
device_param->has_vperm = true;
|
||||
|
||||
// device_available_mem
|
||||
|
||||
HIPcontext hip_context;
|
||||
@ -9528,7 +9516,27 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
|
||||
|
||||
if (backend_ctx->hip)
|
||||
{
|
||||
// TODO HIP
|
||||
// TODO HIP?
|
||||
// Maybe all devices supported by hip have these instructions guaranteed?
|
||||
|
||||
for (int backend_devices_cnt = 0; backend_devices_cnt < backend_ctx->backend_devices_cnt; backend_devices_cnt++)
|
||||
{
|
||||
hc_device_param_t *device_param = &backend_ctx->devices_param[backend_devices_cnt];
|
||||
|
||||
if (device_param->is_hip == false) continue;
|
||||
|
||||
device_param->has_vadd = true;
|
||||
device_param->has_vaddc = true;
|
||||
device_param->has_vadd_co = true;
|
||||
device_param->has_vaddc_co = true;
|
||||
device_param->has_vsub = true;
|
||||
device_param->has_vsubb = true;
|
||||
device_param->has_vsub_co = true;
|
||||
device_param->has_vsubb_co = true;
|
||||
device_param->has_vadd3 = true;
|
||||
device_param->has_vbfe = true;
|
||||
device_param->has_vperm = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (backend_ctx->ocl)
|
||||
@ -10495,9 +10503,6 @@ static bool load_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_p
|
||||
|
||||
//hc_asprintf (&hiprtc_options[3], "compute_%d%d", device_param->sm_major, device_param->sm_minor);
|
||||
|
||||
// TODO HIP
|
||||
// no -offload-arch= aka --gpu-architecture because hiprtc gets native arch from hip_context
|
||||
|
||||
hiprtc_options[0] = "--gpu-max-threads-per-block=64";
|
||||
hiprtc_options[1] = "";
|
||||
hiprtc_options[2] = "";
|
||||
|
@ -13,7 +13,7 @@ my $amd_cache = "~/.AMD";
|
||||
my $hashcat_path = ".";
|
||||
my $kernels_cache = "$hashcat_path/kernels";
|
||||
my $hashcat_bin = "$hashcat_path/hashcat";
|
||||
my $device = 3;
|
||||
my $device = 1;
|
||||
my $workload_profile = 3;
|
||||
my $runtime = 24;
|
||||
my $sleep_sec = 12;
|
||||
|
Loading…
Reference in New Issue
Block a user