Add async HIP memcpy functions: hc_hipMemcpyDtoDAsync(), hc_hipMemcpyDtoHAsync() and hc_hipMemcpyHtoDAsync(). Implement partially async HIP memset and bzero kernels.

pull/2879/head
Jukka Ojanen 3 years ago
parent 4263cafdcf
commit 8674e23d79

@ -1038,8 +1038,11 @@ typedef HIPresult (HIP_API_CALL *HIP_HIPLAUNCHKERNEL) (HIPfunction, un
typedef HIPresult (HIP_API_CALL *HIP_HIPMEMALLOC) (HIPdeviceptr *, size_t);
typedef HIPresult (HIP_API_CALL *HIP_HIPMEMALLOCHOST) (void **, size_t);
typedef HIPresult (HIP_API_CALL *HIP_HIPMEMCPYDTOD) (HIPdeviceptr, HIPdeviceptr, size_t);
typedef HIPresult (HIP_API_CALL *HIP_HIPMEMCPYDTODASYNC) (HIPdeviceptr, HIPdeviceptr, size_t, HIPstream);
typedef HIPresult (HIP_API_CALL *HIP_HIPMEMCPYDTOH) (void *, HIPdeviceptr, size_t);
typedef HIPresult (HIP_API_CALL *HIP_HIPMEMCPYDTOHASYNC) (void *, HIPdeviceptr, size_t, HIPstream);
typedef HIPresult (HIP_API_CALL *HIP_HIPMEMCPYHTOD) (HIPdeviceptr, const void *, size_t);
typedef HIPresult (HIP_API_CALL *HIP_HIPMEMCPYHTODASYNC) (HIPdeviceptr, const void *, size_t, HIPstream);
typedef HIPresult (HIP_API_CALL *HIP_HIPMEMFREE) (HIPdeviceptr);
typedef HIPresult (HIP_API_CALL *HIP_HIPMEMFREEHOST) (void *);
typedef HIPresult (HIP_API_CALL *HIP_HIPMEMGETINFO) (size_t *, size_t *);
@ -1100,8 +1103,11 @@ typedef struct hc_hip_lib
HIP_HIPMEMALLOC hipMemAlloc;
HIP_HIPMEMALLOCHOST hipMemAllocHost;
HIP_HIPMEMCPYDTOD hipMemcpyDtoD;
HIP_HIPMEMCPYDTODASYNC hipMemcpyDtoDAsync;
HIP_HIPMEMCPYDTOH hipMemcpyDtoH;
HIP_HIPMEMCPYDTOHASYNC hipMemcpyDtoHAsync;
HIP_HIPMEMCPYHTOD hipMemcpyHtoD;
HIP_HIPMEMCPYHTODASYNC hipMemcpyHtoDAsync;
HIP_HIPMEMFREE hipMemFree;
HIP_HIPMEMFREEHOST hipMemFreeHost;
HIP_HIPMEMGETINFO hipMemGetInfo;
@ -1128,4 +1134,4 @@ typedef struct hc_hip_lib
typedef hc_hip_lib_t HIP_PTR;
#endif // _EXT_HIP_H
#endif // _EXT_HIP_H

@ -2548,8 +2548,11 @@ int hip_init (hashcat_ctx_t *hashcat_ctx)
HC_LOAD_FUNC_HIP (hip, hipMemAlloc, hipMalloc, HIP_HIPMEMALLOC, HIP, 1);
HC_LOAD_FUNC_HIP (hip, hipMemAllocHost, hipMemAllocHost, HIP_HIPMEMALLOCHOST, HIP, 1);
HC_LOAD_FUNC_HIP (hip, hipMemcpyDtoD, hipMemcpyDtoD, HIP_HIPMEMCPYDTOD, HIP, 1);
HC_LOAD_FUNC_HIP (hip, hipMemcpyDtoDAsync, hipMemcpyDtoDAsync, HIP_HIPMEMCPYDTODASYNC, HIP, 1);
HC_LOAD_FUNC_HIP (hip, hipMemcpyDtoH, hipMemcpyDtoH, HIP_HIPMEMCPYDTOH, HIP, 1);
HC_LOAD_FUNC_HIP (hip, hipMemcpyDtoHAsync, hipMemcpyDtoHAsync, HIP_HIPMEMCPYDTOHASYNC, HIP, 1);
HC_LOAD_FUNC_HIP (hip, hipMemcpyHtoD, hipMemcpyHtoD, HIP_HIPMEMCPYHTOD, HIP, 1);
HC_LOAD_FUNC_HIP (hip, hipMemcpyHtoDAsync, hipMemcpyHtoDAsync, HIP_HIPMEMCPYHTODASYNC, HIP, 1);
HC_LOAD_FUNC_HIP (hip, hipMemFree, hipFree, HIP_HIPMEMFREE, HIP, 1);
HC_LOAD_FUNC_HIP (hip, hipMemFreeHost, hipFreeHost, HIP_HIPMEMFREEHOST, HIP, 1);
HC_LOAD_FUNC_HIP (hip, hipMemGetInfo, hipMemGetInfo, HIP_HIPMEMGETINFO, HIP, 1);
@ -3001,6 +3004,33 @@ int hc_hipMemcpyDtoH (hashcat_ctx_t *hashcat_ctx, void *dstHost, HIPdeviceptr sr
return 0;
}
int hc_hipMemcpyDtoHAsync (hashcat_ctx_t *hashcat_ctx, void *dstHost, HIPdeviceptr srcDevice, size_t ByteCount, HIPstream hStream)
{
backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip;
const HIPresult HIP_err = hip->hipMemcpyDtoHAsync (dstHost, srcDevice, ByteCount, hStream);
if (HIP_err != HIP_SUCCESS)
{
const char *pStr = NULL;
if (hip->hipGetErrorString (HIP_err, &pStr) == HIP_SUCCESS)
{
event_log_error (hashcat_ctx, "hipMemcpyDtoHAsync(): %s", pStr);
}
else
{
event_log_error (hashcat_ctx, "hipMemcpyDtoHAsync(): %d", HIP_err);
}
return -1;
}
return 0;
}
int hc_hipMemcpyDtoD (hashcat_ctx_t *hashcat_ctx, HIPdeviceptr dstDevice, HIPdeviceptr srcDevice, size_t ByteCount)
{
backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
@ -3028,6 +3058,33 @@ int hc_hipMemcpyDtoD (hashcat_ctx_t *hashcat_ctx, HIPdeviceptr dstDevice, HIPdev
return 0;
}
int hc_hipMemcpyDtoDAsync (hashcat_ctx_t *hashcat_ctx, HIPdeviceptr dstDevice, HIPdeviceptr srcDevice, size_t ByteCount, HIPstream hStream)
{
backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip;
const HIPresult HIP_err = hip->hipMemcpyDtoDAsync (dstDevice, srcDevice, ByteCount, hStream);
if (HIP_err != HIP_SUCCESS)
{
const char *pStr = NULL;
if (hip->hipGetErrorString (HIP_err, &pStr) == HIP_SUCCESS)
{
event_log_error (hashcat_ctx, "hipMemcpyDtoDAsync(): %s", pStr);
}
else
{
event_log_error (hashcat_ctx, "hipMemcpyDtoDAsync(): %d", HIP_err);
}
return -1;
}
return 0;
}
int hc_hipMemcpyHtoD (hashcat_ctx_t *hashcat_ctx, HIPdeviceptr dstDevice, const void *srcHost, size_t ByteCount)
{
backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
@ -3055,6 +3112,33 @@ int hc_hipMemcpyHtoD (hashcat_ctx_t *hashcat_ctx, HIPdeviceptr dstDevice, const
return 0;
}
int hc_hipMemcpyHtoDAsync (hashcat_ctx_t *hashcat_ctx, HIPdeviceptr dstDevice, const void *srcHost, size_t ByteCount, HIPstream hStream)
{
backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip;
const HIPresult HIP_err = hip->hipMemcpyHtoDAsync (dstDevice, srcHost, ByteCount, hStream);
if (HIP_err != HIP_SUCCESS)
{
const char *pStr = NULL;
if (hip->hipGetErrorString (HIP_err, &pStr) == HIP_SUCCESS)
{
event_log_error (hashcat_ctx, "hipMemcpyHtoDAsync(): %s", pStr);
}
else
{
event_log_error (hashcat_ctx, "hipMemcpyHtoDAsync(): %d", HIP_err);
}
return -1;
}
return 0;
}
int hc_hipModuleGetFunction (hashcat_ctx_t *hashcat_ctx, HIPfunction *hfunc, HIPmodule hmod, const char *name)
{
backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
@ -4967,7 +5051,7 @@ int choose_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param,
if (device_param->is_hip == true)
{
if (run_hip_kernel_bzero (hashcat_ctx, device_param, device_param->hip_d_tmps, device_param->size_tmps) == -1) return -1;
if (run_hip_kernel_bzero (hashcat_ctx, device_param, device_param->hip_d_tmps, device_param->size_tmps) == -1) return -1;
}
if (device_param->is_opencl == true)
@ -4985,7 +5069,7 @@ int choose_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param,
if (device_param->is_hip == true)
{
if (run_hip_kernel_bzero (hashcat_ctx, device_param, device_param->hip_d_hooks, pws_cnt * hashconfig->hook_size) == -1) return -1;
if (run_hip_kernel_bzero (hashcat_ctx, device_param, device_param->hip_d_hooks, pws_cnt * hashconfig->hook_size) == -1) return -1;
}
if (device_param->is_opencl == true)
@ -5204,6 +5288,7 @@ int run_hip_kernel_memset (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device
{
const u64 num16d = size / 16;
const u64 num16m = size % 16;
u32 tmp[4];
if (num16d)
{
@ -5218,22 +5303,20 @@ int run_hip_kernel_memset (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device
HIPfunction function = device_param->hip_function_memset;
if (hc_hipLaunchKernel (hashcat_ctx, function, num_elements, 1, 1, kernel_threads, 1, 1, 0, device_param->hip_stream, device_param->kernel_params_memset, NULL) == -1) return -1;
if (hc_hipStreamSynchronize (hashcat_ctx, device_param->hip_stream) == -1) return -1;
}
if (num16m)
{
u32 tmp[4];
tmp[0] = value;
tmp[1] = value;
tmp[2] = value;
tmp[3] = value;
if (hc_hipMemcpyHtoD (hashcat_ctx, buf + (num16d * 16), tmp, num16m) == -1) return -1;
if (hc_hipMemcpyHtoDAsync (hashcat_ctx, buf + (num16d * 16), tmp, num16m, device_param->hip_stream) == -1) return -1;
}
if (hc_hipStreamSynchronize (hashcat_ctx, device_param->hip_stream) == -1) return -1;
return 0;
}
@ -5241,6 +5324,7 @@ int run_hip_kernel_bzero (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_
{
const u64 num16d = size / 16;
const u64 num16m = size % 16;
u32 tmp[4];
if (num16d)
{
@ -5254,22 +5338,20 @@ int run_hip_kernel_bzero (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_
HIPfunction function = device_param->hip_function_bzero;
if (hc_hipLaunchKernel (hashcat_ctx, function, num_elements, 1, 1, kernel_threads, 1, 1, 0, device_param->hip_stream, device_param->kernel_params_bzero, NULL) == -1) return -1;
if (hc_hipStreamSynchronize (hashcat_ctx, device_param->hip_stream) == -1) return -1;
}
if (num16m)
{
u32 tmp[4];
tmp[0] = 0;
tmp[1] = 0;
tmp[2] = 0;
tmp[3] = 0;
if (hc_hipMemcpyHtoD (hashcat_ctx, buf + (num16d * 16), tmp, num16m) == -1) return -1;
if (hc_hipMemcpyHtoDAsync (hashcat_ctx, buf + (num16d * 16), tmp, num16m, device_param->hip_stream) == -1) return -1;
}
if (hc_hipStreamSynchronize (hashcat_ctx, device_param->hip_stream) == -1) return -1;
return 0;
}

Loading…
Cancel
Save