From 8674e23d7963e55f3c2fed3c86fe1fc34bf578c7 Mon Sep 17 00:00:00 2001 From: Jukka Ojanen Date: Tue, 20 Jul 2021 12:47:10 +0300 Subject: [PATCH] Add async HIP memcpy functions: hc_hipMemcpyDtoDAsync(), hc_hipMemcpyDtoHAsync() and hc_hipMemcpyHtoDAsync(). Implement partially async HIP memset and bzero kernels. --- include/ext_hip.h | 8 +++- src/backend.c | 106 ++++++++++++++++++++++++++++++++++++++++------ 2 files changed, 101 insertions(+), 13 deletions(-) diff --git a/include/ext_hip.h b/include/ext_hip.h index 15840d671..a99d1e5a9 100644 --- a/include/ext_hip.h +++ b/include/ext_hip.h @@ -1038,8 +1038,11 @@ typedef HIPresult (HIP_API_CALL *HIP_HIPLAUNCHKERNEL) (HIPfunction, un typedef HIPresult (HIP_API_CALL *HIP_HIPMEMALLOC) (HIPdeviceptr *, size_t); typedef HIPresult (HIP_API_CALL *HIP_HIPMEMALLOCHOST) (void **, size_t); typedef HIPresult (HIP_API_CALL *HIP_HIPMEMCPYDTOD) (HIPdeviceptr, HIPdeviceptr, size_t); +typedef HIPresult (HIP_API_CALL *HIP_HIPMEMCPYDTODASYNC) (HIPdeviceptr, HIPdeviceptr, size_t, HIPstream); typedef HIPresult (HIP_API_CALL *HIP_HIPMEMCPYDTOH) (void *, HIPdeviceptr, size_t); +typedef HIPresult (HIP_API_CALL *HIP_HIPMEMCPYDTOHASYNC) (void *, HIPdeviceptr, size_t, HIPstream); typedef HIPresult (HIP_API_CALL *HIP_HIPMEMCPYHTOD) (HIPdeviceptr, const void *, size_t); +typedef HIPresult (HIP_API_CALL *HIP_HIPMEMCPYHTODASYNC) (HIPdeviceptr, const void *, size_t, HIPstream); typedef HIPresult (HIP_API_CALL *HIP_HIPMEMFREE) (HIPdeviceptr); typedef HIPresult (HIP_API_CALL *HIP_HIPMEMFREEHOST) (void *); typedef HIPresult (HIP_API_CALL *HIP_HIPMEMGETINFO) (size_t *, size_t *); @@ -1100,8 +1103,11 @@ typedef struct hc_hip_lib HIP_HIPMEMALLOC hipMemAlloc; HIP_HIPMEMALLOCHOST hipMemAllocHost; HIP_HIPMEMCPYDTOD hipMemcpyDtoD; + HIP_HIPMEMCPYDTODASYNC hipMemcpyDtoDAsync; HIP_HIPMEMCPYDTOH hipMemcpyDtoH; + HIP_HIPMEMCPYDTOHASYNC hipMemcpyDtoHAsync; HIP_HIPMEMCPYHTOD hipMemcpyHtoD; + HIP_HIPMEMCPYHTODASYNC hipMemcpyHtoDAsync; HIP_HIPMEMFREE hipMemFree; HIP_HIPMEMFREEHOST hipMemFreeHost; HIP_HIPMEMGETINFO hipMemGetInfo; @@ -1128,4 +1134,4 @@ typedef struct hc_hip_lib typedef hc_hip_lib_t HIP_PTR; -#endif // _EXT_HIP_H \ No newline at end of file +#endif // _EXT_HIP_H diff --git a/src/backend.c b/src/backend.c index 0277b2d5f..d1c6e9254 100644 --- a/src/backend.c +++ b/src/backend.c @@ -2548,8 +2548,11 @@ int hip_init (hashcat_ctx_t *hashcat_ctx) HC_LOAD_FUNC_HIP (hip, hipMemAlloc, hipMalloc, HIP_HIPMEMALLOC, HIP, 1); HC_LOAD_FUNC_HIP (hip, hipMemAllocHost, hipMemAllocHost, HIP_HIPMEMALLOCHOST, HIP, 1); HC_LOAD_FUNC_HIP (hip, hipMemcpyDtoD, hipMemcpyDtoD, HIP_HIPMEMCPYDTOD, HIP, 1); + HC_LOAD_FUNC_HIP (hip, hipMemcpyDtoDAsync, hipMemcpyDtoDAsync, HIP_HIPMEMCPYDTODASYNC, HIP, 1); HC_LOAD_FUNC_HIP (hip, hipMemcpyDtoH, hipMemcpyDtoH, HIP_HIPMEMCPYDTOH, HIP, 1); + HC_LOAD_FUNC_HIP (hip, hipMemcpyDtoHAsync, hipMemcpyDtoHAsync, HIP_HIPMEMCPYDTOHASYNC, HIP, 1); HC_LOAD_FUNC_HIP (hip, hipMemcpyHtoD, hipMemcpyHtoD, HIP_HIPMEMCPYHTOD, HIP, 1); + HC_LOAD_FUNC_HIP (hip, hipMemcpyHtoDAsync, hipMemcpyHtoDAsync, HIP_HIPMEMCPYHTODASYNC, HIP, 1); HC_LOAD_FUNC_HIP (hip, hipMemFree, hipFree, HIP_HIPMEMFREE, HIP, 1); HC_LOAD_FUNC_HIP (hip, hipMemFreeHost, hipFreeHost, HIP_HIPMEMFREEHOST, HIP, 1); HC_LOAD_FUNC_HIP (hip, hipMemGetInfo, hipMemGetInfo, HIP_HIPMEMGETINFO, HIP, 1); @@ -3001,6 +3004,33 @@ int hc_hipMemcpyDtoH (hashcat_ctx_t *hashcat_ctx, void *dstHost, HIPdeviceptr sr return 0; } +int hc_hipMemcpyDtoHAsync (hashcat_ctx_t *hashcat_ctx, void *dstHost, HIPdeviceptr srcDevice, size_t ByteCount, HIPstream hStream) +{ + backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; + + HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip; + + const HIPresult HIP_err = hip->hipMemcpyDtoHAsync (dstHost, srcDevice, ByteCount, hStream); + + if (HIP_err != HIP_SUCCESS) + { + const char *pStr = NULL; + + if (hip->hipGetErrorString (HIP_err, &pStr) == HIP_SUCCESS) + { + event_log_error (hashcat_ctx, "hipMemcpyDtoHAsync(): %s", pStr); + } + else + { + event_log_error (hashcat_ctx, "hipMemcpyDtoHAsync(): %d", HIP_err); + } + + return -1; + } + + return 0; +} + int hc_hipMemcpyDtoD (hashcat_ctx_t *hashcat_ctx, HIPdeviceptr dstDevice, HIPdeviceptr srcDevice, size_t ByteCount) { backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; @@ -3028,6 +3058,33 @@ int hc_hipMemcpyDtoD (hashcat_ctx_t *hashcat_ctx, HIPdeviceptr dstDevice, HIPdev return 0; } +int hc_hipMemcpyDtoDAsync (hashcat_ctx_t *hashcat_ctx, HIPdeviceptr dstDevice, HIPdeviceptr srcDevice, size_t ByteCount, HIPstream hStream) +{ + backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; + + HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip; + + const HIPresult HIP_err = hip->hipMemcpyDtoDAsync (dstDevice, srcDevice, ByteCount, hStream); + + if (HIP_err != HIP_SUCCESS) + { + const char *pStr = NULL; + + if (hip->hipGetErrorString (HIP_err, &pStr) == HIP_SUCCESS) + { + event_log_error (hashcat_ctx, "hipMemcpyDtoDAsync(): %s", pStr); + } + else + { + event_log_error (hashcat_ctx, "hipMemcpyDtoDAsync(): %d", HIP_err); + } + + return -1; + } + + return 0; +} + int hc_hipMemcpyHtoD (hashcat_ctx_t *hashcat_ctx, HIPdeviceptr dstDevice, const void *srcHost, size_t ByteCount) { backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; @@ -3055,6 +3112,33 @@ int hc_hipMemcpyHtoD (hashcat_ctx_t *hashcat_ctx, HIPdeviceptr dstDevice, const return 0; } +int hc_hipMemcpyHtoDAsync (hashcat_ctx_t *hashcat_ctx, HIPdeviceptr dstDevice, const void *srcHost, size_t ByteCount, HIPstream hStream) +{ + backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; + + HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip; + + const HIPresult HIP_err = hip->hipMemcpyHtoDAsync (dstDevice, srcHost, ByteCount, hStream); + + if (HIP_err != HIP_SUCCESS) + { + const char *pStr = NULL; + + if (hip->hipGetErrorString (HIP_err, &pStr) == HIP_SUCCESS) + { + event_log_error (hashcat_ctx, "hipMemcpyHtoDAsync(): %s", pStr); + } + else + { + event_log_error (hashcat_ctx, "hipMemcpyHtoDAsync(): %d", HIP_err); + } + + return -1; + } + + return 0; +} + int hc_hipModuleGetFunction (hashcat_ctx_t *hashcat_ctx, HIPfunction *hfunc, HIPmodule hmod, const char *name) { backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; @@ -4967,7 +5051,7 @@ int choose_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, if (device_param->is_hip == true) { - if (run_hip_kernel_bzero (hashcat_ctx, device_param, device_param->hip_d_tmps, device_param->size_tmps) == -1) return -1; + if (run_hip_kernel_bzero (hashcat_ctx, device_param, device_param->hip_d_tmps, device_param->size_tmps) == -1) return -1; } if (device_param->is_opencl == true) @@ -4985,7 +5069,7 @@ int choose_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, if (device_param->is_hip == true) { - if (run_hip_kernel_bzero (hashcat_ctx, device_param, device_param->hip_d_hooks, pws_cnt * hashconfig->hook_size) == -1) return -1; + if (run_hip_kernel_bzero (hashcat_ctx, device_param, device_param->hip_d_hooks, pws_cnt * hashconfig->hook_size) == -1) return -1; } if (device_param->is_opencl == true) @@ -5204,6 +5288,7 @@ int run_hip_kernel_memset (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device { const u64 num16d = size / 16; const u64 num16m = size % 16; + u32 tmp[4]; if (num16d) { @@ -5218,22 +5303,20 @@ int run_hip_kernel_memset (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device HIPfunction function = device_param->hip_function_memset; if (hc_hipLaunchKernel (hashcat_ctx, function, num_elements, 1, 1, kernel_threads, 1, 1, 0, device_param->hip_stream, device_param->kernel_params_memset, NULL) == -1) return -1; - - if (hc_hipStreamSynchronize (hashcat_ctx, device_param->hip_stream) == -1) return -1; } if (num16m) { - u32 tmp[4]; - tmp[0] = value; tmp[1] = value; tmp[2] = value; tmp[3] = value; - if (hc_hipMemcpyHtoD (hashcat_ctx, buf + (num16d * 16), tmp, num16m) == -1) return -1; + if (hc_hipMemcpyHtoDAsync (hashcat_ctx, buf + (num16d * 16), tmp, num16m, device_param->hip_stream) == -1) return -1; } + if (hc_hipStreamSynchronize (hashcat_ctx, device_param->hip_stream) == -1) return -1; + return 0; } @@ -5241,6 +5324,7 @@ int run_hip_kernel_bzero (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_ { const u64 num16d = size / 16; const u64 num16m = size % 16; + u32 tmp[4]; if (num16d) { @@ -5254,22 +5338,20 @@ int run_hip_kernel_bzero (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_ HIPfunction function = device_param->hip_function_bzero; if (hc_hipLaunchKernel (hashcat_ctx, function, num_elements, 1, 1, kernel_threads, 1, 1, 0, device_param->hip_stream, device_param->kernel_params_bzero, NULL) == -1) return -1; - - if (hc_hipStreamSynchronize (hashcat_ctx, device_param->hip_stream) == -1) return -1; } if (num16m) { - u32 tmp[4]; - tmp[0] = 0; tmp[1] = 0; tmp[2] = 0; tmp[3] = 0; - if (hc_hipMemcpyHtoD (hashcat_ctx, buf + (num16d * 16), tmp, num16m) == -1) return -1; + if (hc_hipMemcpyHtoDAsync (hashcat_ctx, buf + (num16d * 16), tmp, num16m, device_param->hip_stream) == -1) return -1; } + if (hc_hipStreamSynchronize (hashcat_ctx, device_param->hip_stream) == -1) return -1; + return 0; }