mirror of
https://github.com/hashcat/hashcat.git
synced 2025-03-04 17:16:05 +00:00
Add GPU threads to kernel cache checksum because it has an influence on HIP offline compile options
Add V_ALIGNBIT_B32 inline assembly wrapper because HIP does not provide amd_bitalign()
This commit is contained in:
parent
23c3c178bf
commit
674ca7d88f
@ -246,22 +246,55 @@ DECLSPEC u32 rotr32_S (const u32 a, const int n)
|
|||||||
|
|
||||||
DECLSPEC u64x rotl64 (const u64x a, const int n)
|
DECLSPEC u64x rotl64 (const u64x a, const int n)
|
||||||
{
|
{
|
||||||
return ((a << n) | ((a >> (64 - n))));
|
return rotr64 (a, 64 - n);
|
||||||
|
}
|
||||||
|
|
||||||
|
DECLSPEC u32 amd_bitalign_S (const u32 a, const u32 b, const int n)
|
||||||
|
{
|
||||||
|
u32 r = 0;
|
||||||
|
|
||||||
|
__asm__ ("V_ALIGNBIT_B32 %0, %1, %2, %3;" : "=v"(r): "v"(a), "v"(b), "v"(n));
|
||||||
|
|
||||||
|
return r;
|
||||||
}
|
}
|
||||||
|
|
||||||
DECLSPEC u64x rotr64 (const u64x a, const int n)
|
DECLSPEC u64x rotr64 (const u64x a, const int n)
|
||||||
{
|
{
|
||||||
|
#if VECT_SIZE == 1
|
||||||
|
return rotr64_S (a, n);
|
||||||
|
#else
|
||||||
return ((a >> n) | ((a << (64 - n))));
|
return ((a >> n) | ((a << (64 - n))));
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
DECLSPEC u64 rotl64_S (const u64 a, const int n)
|
DECLSPEC u64 rotl64_S (const u64 a, const int n)
|
||||||
{
|
{
|
||||||
return ((a << n) | ((a >> (64 - n))));
|
return rotr64_S (a, 64 - n);
|
||||||
}
|
}
|
||||||
|
|
||||||
DECLSPEC u64 rotr64_S (const u64 a, const int n)
|
DECLSPEC u64 rotr64_S (const u64 a, const int n)
|
||||||
{
|
{
|
||||||
return ((a >> n) | ((a << (64 - n))));
|
vconv64_t in;
|
||||||
|
|
||||||
|
in.v64 = a;
|
||||||
|
|
||||||
|
const u32 a0 = in.v32.a;
|
||||||
|
const u32 a1 = in.v32.b;
|
||||||
|
|
||||||
|
vconv64_t out;
|
||||||
|
|
||||||
|
if (n < 32)
|
||||||
|
{
|
||||||
|
out.v32.a = amd_bitalign_S (a1, a0, n);
|
||||||
|
out.v32.b = amd_bitalign_S (a0, a1, n);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
out.v32.a = amd_bitalign_S (a0, a1, n - 32);
|
||||||
|
out.v32.b = amd_bitalign_S (a1, a0, n - 32);
|
||||||
|
}
|
||||||
|
|
||||||
|
return out.v64;
|
||||||
}
|
}
|
||||||
|
|
||||||
#define FIXED_THREAD_COUNT(n) __launch_bounds__((n), 0)
|
#define FIXED_THREAD_COUNT(n) __launch_bounds__((n), 0)
|
||||||
|
@ -10498,8 +10498,9 @@ static bool load_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_p
|
|||||||
// TODO HIP
|
// TODO HIP
|
||||||
// no -offload-arch= aka --gpu-architecture because hiprtc gets native arch from hip_context
|
// no -offload-arch= aka --gpu-architecture because hiprtc gets native arch from hip_context
|
||||||
|
|
||||||
hiprtc_options[0] = "--gpu-max-threads-per-block=64";
|
hc_asprintf (&hiprtc_options[0], "--gpu-max-threads-per-block=%u", device_param->kernel_threads);
|
||||||
hiprtc_options[1] = "";
|
|
||||||
|
hiprtc_options[1] = "-O3";
|
||||||
hiprtc_options[2] = "";
|
hiprtc_options[2] = "";
|
||||||
hiprtc_options[3] = "";
|
hiprtc_options[3] = "";
|
||||||
|
|
||||||
@ -11588,7 +11589,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
|
|||||||
|
|
||||||
char device_name_chksum_amp_mp[HCBUFSIZ_TINY] = { 0 };
|
char device_name_chksum_amp_mp[HCBUFSIZ_TINY] = { 0 };
|
||||||
|
|
||||||
const size_t dnclen_amp_mp = snprintf (device_name_chksum_amp_mp, HCBUFSIZ_TINY, "%d-%d-%d-%d-%u-%s-%s-%s",
|
const size_t dnclen_amp_mp = snprintf (device_name_chksum_amp_mp, HCBUFSIZ_TINY, "%d-%d-%d-%d-%u-%s-%s-%s-%d",
|
||||||
backend_ctx->comptime,
|
backend_ctx->comptime,
|
||||||
backend_ctx->cuda_driver_version,
|
backend_ctx->cuda_driver_version,
|
||||||
backend_ctx->hip_driver_version,
|
backend_ctx->hip_driver_version,
|
||||||
@ -11596,7 +11597,8 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
|
|||||||
device_param->opencl_platform_vendor_id,
|
device_param->opencl_platform_vendor_id,
|
||||||
device_param->device_name,
|
device_param->device_name,
|
||||||
device_param->opencl_device_version,
|
device_param->opencl_device_version,
|
||||||
device_param->opencl_driver_version);
|
device_param->opencl_driver_version,
|
||||||
|
device_param->kernel_threads);
|
||||||
|
|
||||||
md5_ctx_t md5_ctx;
|
md5_ctx_t md5_ctx;
|
||||||
|
|
||||||
@ -11887,7 +11889,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
|
|||||||
|
|
||||||
const u32 extra_value = (user_options->attack_mode == ATTACK_MODE_ASSOCIATION) ? ATTACK_MODE_ASSOCIATION : ATTACK_MODE_NONE;
|
const u32 extra_value = (user_options->attack_mode == ATTACK_MODE_ASSOCIATION) ? ATTACK_MODE_ASSOCIATION : ATTACK_MODE_NONE;
|
||||||
|
|
||||||
const size_t dnclen = snprintf (device_name_chksum, HCBUFSIZ_TINY, "%d-%d-%d-%d-%u-%s-%s-%s-%d-%u-%u-%s",
|
const size_t dnclen = snprintf (device_name_chksum, HCBUFSIZ_TINY, "%d-%d-%d-%d-%u-%s-%s-%s-%d-%u-%d-%u-%s",
|
||||||
backend_ctx->comptime,
|
backend_ctx->comptime,
|
||||||
backend_ctx->cuda_driver_version,
|
backend_ctx->cuda_driver_version,
|
||||||
backend_ctx->hip_driver_version,
|
backend_ctx->hip_driver_version,
|
||||||
@ -11897,6 +11899,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
|
|||||||
device_param->opencl_device_version,
|
device_param->opencl_device_version,
|
||||||
device_param->opencl_driver_version,
|
device_param->opencl_driver_version,
|
||||||
device_param->vector_width,
|
device_param->vector_width,
|
||||||
|
device_param->kernel_threads,
|
||||||
hashconfig->kern_type,
|
hashconfig->kern_type,
|
||||||
extra_value,
|
extra_value,
|
||||||
build_options_module_buf);
|
build_options_module_buf);
|
||||||
|
Loading…
Reference in New Issue
Block a user