diff --git a/include/backend.h b/include/backend.h index a54ba357e..c2c984b24 100644 --- a/include/backend.h +++ b/include/backend.h @@ -72,6 +72,8 @@ int hc_cuMemcpyDtoDAsync (hashcat_ctx_t *hashcat_ctx, CUdeviceptr dstDev int hc_cuMemcpyDtoHAsync (hashcat_ctx_t *hashcat_ctx, void *dstHost, CUdeviceptr srcDevice, size_t ByteCount, CUstream hStream); int hc_cuMemcpyHtoDAsync (hashcat_ctx_t *hashcat_ctx, CUdeviceptr dstDevice, const void *srcHost, size_t ByteCount, CUstream hStream); int hc_cuMemFree (hashcat_ctx_t *hashcat_ctx, CUdeviceptr dptr); +int hc_cuMemsetD32Async (hashcat_ctx_t *hashcat_ctx, CUdeviceptr dstDevice, unsigned int ui, size_t N, CUstream hStream); +int hc_cuMemsetD8Async (hashcat_ctx_t *hashcat_ctx, CUdeviceptr dstDevice, unsigned char uc, size_t N, CUstream hStream); int hc_cuModuleGetFunction (hashcat_ctx_t *hashcat_ctx, CUfunction *hfunc, CUmodule hmod, const char *name); int hc_cuModuleLoadDataEx (hashcat_ctx_t *hashcat_ctx, CUmodule *module, const void *image, unsigned int numOptions, CUjit_option *options, void **optionValues); int hc_cuModuleUnload (hashcat_ctx_t *hashcat_ctx, CUmodule hmod); @@ -120,6 +122,8 @@ int hc_hipMemcpyDtoDAsync (hashcat_ctx_t *hashcat_ctx, HIPdeviceptr dstDe int hc_hipMemcpyDtoHAsync (hashcat_ctx_t *hashcat_ctx, void *dstHost, HIPdeviceptr srcDevice, size_t ByteCount, HIPstream hStream); int hc_hipMemcpyHtoDAsync (hashcat_ctx_t *hashcat_ctx, HIPdeviceptr dstDevice, const void *srcHost, size_t ByteCount, HIPstream hStream); int hc_hipMemFree (hashcat_ctx_t *hashcat_ctx, HIPdeviceptr dptr); +int hc_hipMemsetD32Async (hashcat_ctx_t *hashcat_ctx, HIPdeviceptr dstDevice, unsigned int ui, size_t N, HIPstream hStream); +int hc_hipMemsetD8Async (hashcat_ctx_t *hashcat_ctx, HIPdeviceptr dstDevice, unsigned char uc, size_t N, HIPstream hStream); int hc_hipModuleGetFunction (hashcat_ctx_t *hashcat_ctx, HIPfunction *hfunc, HIPmodule hmod, const char *name); int hc_hipModuleLoadDataEx (hashcat_ctx_t *hashcat_ctx, HIPmodule *module, const void *image, unsigned int numOptions, HIPjit_option *options, void **optionValues); int hc_hipModuleUnload (hashcat_ctx_t *hashcat_ctx, HIPmodule hmod); @@ -142,6 +146,7 @@ int hc_clCreateKernel (hashcat_ctx_t *hashcat_ctx, cl_program program int hc_clCreateProgramWithBinary (hashcat_ctx_t *hashcat_ctx, cl_context context, cl_uint num_devices, const cl_device_id *device_list, const size_t *lengths, const unsigned char **binaries, cl_int *binary_status, cl_program *program); int hc_clCreateProgramWithSource (hashcat_ctx_t *hashcat_ctx, cl_context context, cl_uint count, const char **strings, const size_t *lengths, cl_program *program); int hc_clEnqueueCopyBuffer (hashcat_ctx_t *hashcat_ctx, cl_command_queue command_queue, cl_mem src_buffer, cl_mem dst_buffer, size_t src_offset, size_t dst_offset, size_t size, cl_uint num_events_in_wait_list, const cl_event *event_wait_list, cl_event *event); +int hc_clEnqueueFillBuffer (hashcat_ctx_t *hashcat_ctx, cl_command_queue command_queue, cl_mem buffer, const void *pattern, size_t pattern_size, size_t offset, size_t size, cl_uint num_events_in_wait_list, const cl_event *event_wait_list, cl_event *event); int hc_clEnqueueMapBuffer (hashcat_ctx_t *hashcat_ctx, cl_command_queue command_queue, cl_mem buffer, cl_bool blocking_map, cl_map_flags map_flags, size_t offset, size_t size, cl_uint num_events_in_wait_list, const cl_event *event_wait_list, cl_event *event, void **buf); int hc_clEnqueueNDRangeKernel (hashcat_ctx_t *hashcat_ctx, cl_command_queue command_queue, cl_kernel kernel, cl_uint work_dim, const size_t *global_work_offset, const size_t *global_work_size, const size_t *local_work_size, cl_uint num_events_in_wait_list, const cl_event *event_wait_list, cl_event *event); int hc_clEnqueueReadBuffer (hashcat_ctx_t *hashcat_ctx, cl_command_queue command_queue, cl_mem buffer, cl_bool blocking_read, size_t offset, size_t size, void *ptr, cl_uint num_events_in_wait_list, const cl_event *event_wait_list, cl_event *event); @@ -177,17 +182,20 @@ void rebuild_pws_compressed_append (hc_device_param_t *device_param, const u64 p int run_cuda_kernel_atinit (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, CUdeviceptr buf, const u64 num); int run_cuda_kernel_utf8toutf16le (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, CUdeviceptr buf, const u64 num); -int run_cuda_kernel_memset (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, CUdeviceptr buf, const u32 value, const u64 size); +int run_cuda_kernel_memset (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, CUdeviceptr buf, const u64 offset, const u8 value, const u64 size); +int run_cuda_kernel_memset32 (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, CUdeviceptr buf, const u64 offset, const u32 value, const u64 size); int run_cuda_kernel_bzero (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, CUdeviceptr buf, const u64 size); int run_hip_kernel_atinit (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, HIPdeviceptr buf, const u64 num); int run_hip_kernel_utf8toutf16le (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, HIPdeviceptr buf, const u64 num); -int run_hip_kernel_memset (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, HIPdeviceptr buf, const u32 value, const u64 size); +int run_hip_kernel_memset (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, HIPdeviceptr buf, const u64 offset, const u8 value, const u64 size); +int run_hip_kernel_memset32 (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, HIPdeviceptr buf, const u64 offset, const u32 value, const u64 size); int run_hip_kernel_bzero (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, HIPdeviceptr buf, const u64 size); int run_opencl_kernel_atinit (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, cl_mem buf, const u64 num); int run_opencl_kernel_utf8toutf16le (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, cl_mem buf, const u64 num); -int run_opencl_kernel_memset (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, cl_mem buf, const u32 value, const u64 size); +int run_opencl_kernel_memset (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, cl_mem buf, const u64 offset, const u8 value, const u64 size); +int run_opencl_kernel_memset32 (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, cl_mem buf, const u64 offset, const u32 value, const u64 size); int run_opencl_kernel_bzero (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, cl_mem buf, const u64 size); int run_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, const u32 kern_run, const u64 pws_pos, const u64 num, const u32 event_update, const u32 iteration); diff --git a/include/ext_OpenCL.h b/include/ext_OpenCL.h index 42e37deed..40f3515ed 100644 --- a/include/ext_OpenCL.h +++ b/include/ext_OpenCL.h @@ -46,6 +46,7 @@ typedef cl_context (CL_API_CALL *OCL_CLCREATECONTEXT) (const cl_ typedef cl_kernel (CL_API_CALL *OCL_CLCREATEKERNEL) (cl_program, const char *, cl_int *); typedef cl_program (CL_API_CALL *OCL_CLCREATEPROGRAMWITHBINARY) (cl_context, cl_uint, const cl_device_id *, const size_t *, const unsigned char **, cl_int *, cl_int *); typedef cl_program (CL_API_CALL *OCL_CLCREATEPROGRAMWITHSOURCE) (cl_context, cl_uint, const char **, const size_t *, cl_int *); +typedef cl_int (CL_API_CALL *OCL_CLENQUEUEFILLBUFFER) (cl_command_queue, cl_mem, const void *, size_t, size_t, size_t, cl_uint, const cl_event *, cl_event *); typedef cl_int (CL_API_CALL *OCL_CLENQUEUECOPYBUFFER) (cl_command_queue, cl_mem, cl_mem, size_t, size_t, size_t, cl_uint, const cl_event *, cl_event *); typedef void * (CL_API_CALL *OCL_CLENQUEUEMAPBUFFER) (cl_command_queue, cl_mem, cl_bool, cl_map_flags, size_t, size_t, cl_uint, const cl_event *, cl_event *, cl_int *); typedef cl_int (CL_API_CALL *OCL_CLENQUEUENDRANGEKERNEL) (cl_command_queue, cl_kernel, cl_uint, const size_t *, const size_t *, const size_t *, cl_uint, const cl_event *, cl_event *); @@ -87,6 +88,7 @@ typedef struct hc_opencl_lib OCL_CLCREATEPROGRAMWITHBINARY clCreateProgramWithBinary; OCL_CLCREATEPROGRAMWITHSOURCE clCreateProgramWithSource; OCL_CLENQUEUECOPYBUFFER clEnqueueCopyBuffer; + OCL_CLENQUEUEFILLBUFFER clEnqueueFillBuffer; OCL_CLENQUEUEMAPBUFFER clEnqueueMapBuffer; OCL_CLENQUEUENDRANGEKERNEL clEnqueueNDRangeKernel; OCL_CLENQUEUEREADBUFFER clEnqueueReadBuffer; diff --git a/include/ext_cuda.h b/include/ext_cuda.h index 7f40e1410..11ed86048 100644 --- a/include/ext_cuda.h +++ b/include/ext_cuda.h @@ -1034,8 +1034,8 @@ typedef CUresult (CUDA_API_CALL *CUDA_CUMEMCPYHTODASYNC) (CUdeviceptr, co typedef CUresult (CUDA_API_CALL *CUDA_CUMEMFREE) (CUdeviceptr); typedef CUresult (CUDA_API_CALL *CUDA_CUMEMFREEHOST) (void *); typedef CUresult (CUDA_API_CALL *CUDA_CUMEMGETINFO) (size_t *, size_t *); -typedef CUresult (CUDA_API_CALL *CUDA_CUMEMSETD32) (CUdeviceptr, unsigned int, size_t); -typedef CUresult (CUDA_API_CALL *CUDA_CUMEMSETD8) (CUdeviceptr, unsigned char, size_t); +typedef CUresult (CUDA_API_CALL *CUDA_CUMEMSETD32ASYNC) (CUdeviceptr, unsigned int, size_t, CUstream); +typedef CUresult (CUDA_API_CALL *CUDA_CUMEMSETD8ASYNC) (CUdeviceptr, unsigned char, size_t, CUstream); typedef CUresult (CUDA_API_CALL *CUDA_CUMODULEGETFUNCTION) (CUfunction *, CUmodule, const char *); typedef CUresult (CUDA_API_CALL *CUDA_CUMODULEGETGLOBAL) (CUdeviceptr *, size_t *, CUmodule, const char *); typedef CUresult (CUDA_API_CALL *CUDA_CUMODULELOAD) (CUmodule *, const char *); @@ -1096,8 +1096,8 @@ typedef struct hc_cuda_lib CUDA_CUMEMFREE cuMemFree; CUDA_CUMEMFREEHOST cuMemFreeHost; CUDA_CUMEMGETINFO cuMemGetInfo; - CUDA_CUMEMSETD32 cuMemsetD32; - CUDA_CUMEMSETD8 cuMemsetD8; + CUDA_CUMEMSETD32ASYNC cuMemsetD32Async; + CUDA_CUMEMSETD8ASYNC cuMemsetD8Async; CUDA_CUMODULEGETFUNCTION cuModuleGetFunction; CUDA_CUMODULEGETGLOBAL cuModuleGetGlobal; CUDA_CUMODULELOAD cuModuleLoad; diff --git a/include/ext_hip.h b/include/ext_hip.h index 631dbfb33..ee344a80b 100644 --- a/include/ext_hip.h +++ b/include/ext_hip.h @@ -1043,8 +1043,8 @@ typedef HIPresult (HIP_API_CALL *HIP_HIPMEMCPYHTODASYNC) (HIPdeviceptr, c typedef HIPresult (HIP_API_CALL *HIP_HIPMEMFREE) (HIPdeviceptr); typedef HIPresult (HIP_API_CALL *HIP_HIPMEMFREEHOST) (void *); typedef HIPresult (HIP_API_CALL *HIP_HIPMEMGETINFO) (size_t *, size_t *); -typedef HIPresult (HIP_API_CALL *HIP_HIPMEMSETD32) (HIPdeviceptr, unsigned int, size_t); -typedef HIPresult (HIP_API_CALL *HIP_HIPMEMSETD8) (HIPdeviceptr, unsigned char, size_t); +typedef HIPresult (HIP_API_CALL *HIP_HIPMEMSETD32ASYNC) (HIPdeviceptr, unsigned int, size_t, HIPstream); +typedef HIPresult (HIP_API_CALL *HIP_HIPMEMSETD8ASYNC) (HIPdeviceptr, unsigned char, size_t, HIPstream); typedef HIPresult (HIP_API_CALL *HIP_HIPMODULEGETFUNCTION) (HIPfunction *, HIPmodule, const char *); typedef HIPresult (HIP_API_CALL *HIP_HIPMODULEGETGLOBAL) (HIPdeviceptr *, size_t *, HIPmodule, const char *); typedef HIPresult (HIP_API_CALL *HIP_HIPMODULELOAD) (HIPmodule *, const char *); @@ -1105,8 +1105,8 @@ typedef struct hc_hip_lib HIP_HIPMEMFREE hipMemFree; HIP_HIPMEMFREEHOST hipMemFreeHost; HIP_HIPMEMGETINFO hipMemGetInfo; - HIP_HIPMEMSETD32 hipMemsetD32; - HIP_HIPMEMSETD8 hipMemsetD8; + HIP_HIPMEMSETD32ASYNC hipMemsetD32Async; + HIP_HIPMEMSETD8ASYNC hipMemsetD8Async; HIP_HIPMODULEGETFUNCTION hipModuleGetFunction; HIP_HIPMODULEGETGLOBAL hipModuleGetGlobal; HIP_HIPMODULELOAD hipModuleLoad; diff --git a/src/backend.c b/src/backend.c index 73edce1ce..69b6345c6 100644 --- a/src/backend.c +++ b/src/backend.c @@ -1263,8 +1263,8 @@ int cuda_init (hashcat_ctx_t *hashcat_ctx) HC_LOAD_FUNC_CUDA (cuda, cuMemFree, cuMemFree_v2, CUDA_CUMEMFREE, CUDA, 1); HC_LOAD_FUNC_CUDA (cuda, cuMemFreeHost, cuMemFreeHost, CUDA_CUMEMFREEHOST, CUDA, 1); HC_LOAD_FUNC_CUDA (cuda, cuMemGetInfo, cuMemGetInfo_v2, CUDA_CUMEMGETINFO, CUDA, 1); - HC_LOAD_FUNC_CUDA (cuda, cuMemsetD32, cuMemsetD32_v2, CUDA_CUMEMSETD32, CUDA, 1); - HC_LOAD_FUNC_CUDA (cuda, cuMemsetD8, cuMemsetD8_v2, CUDA_CUMEMSETD8, CUDA, 1); + HC_LOAD_FUNC_CUDA (cuda, cuMemsetD32Async, cuMemsetD32Async, CUDA_CUMEMSETD32ASYNC, CUDA, 1); + HC_LOAD_FUNC_CUDA (cuda, cuMemsetD8Async, cuMemsetD8Async, CUDA_CUMEMSETD8ASYNC, CUDA, 1); HC_LOAD_FUNC_CUDA (cuda, cuModuleGetFunction, cuModuleGetFunction, CUDA_CUMODULEGETFUNCTION, CUDA, 1); HC_LOAD_FUNC_CUDA (cuda, cuModuleGetGlobal, cuModuleGetGlobal_v2, CUDA_CUMODULEGETGLOBAL, CUDA, 1); HC_LOAD_FUNC_CUDA (cuda, cuModuleLoad, cuModuleLoad, CUDA_CUMODULELOAD, CUDA, 1); @@ -1765,6 +1765,60 @@ int hc_cuMemcpyHtoDAsync (hashcat_ctx_t *hashcat_ctx, CUdeviceptr dstDevice, con return 0; } +int hc_cuMemsetD32Async (hashcat_ctx_t *hashcat_ctx, CUdeviceptr dstDevice, unsigned int ui, size_t N, CUstream hStream) +{ + backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; + + CUDA_PTR *cuda = (CUDA_PTR *) backend_ctx->cuda; + + const CUresult CU_err = cuda->cuMemsetD32Async (dstDevice, ui, N, hStream); + + if (CU_err != CUDA_SUCCESS) + { + const char *pStr = NULL; + + if (cuda->cuGetErrorString (CU_err, &pStr) == CUDA_SUCCESS) + { + event_log_error (hashcat_ctx, "cuMemsetD32Async(): %s", pStr); + } + else + { + event_log_error (hashcat_ctx, "cuMemsetD32Async(): %d", CU_err); + } + + return -1; + } + + return 0; +} + +int hc_cuMemsetD8Async (hashcat_ctx_t *hashcat_ctx, CUdeviceptr dstDevice, unsigned char uc, size_t N, CUstream hStream) +{ + backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; + + CUDA_PTR *cuda = (CUDA_PTR *) backend_ctx->cuda; + + const CUresult CU_err = cuda->cuMemsetD8Async (dstDevice, uc, N, hStream); + + if (CU_err != CUDA_SUCCESS) + { + const char *pStr = NULL; + + if (cuda->cuGetErrorString (CU_err, &pStr) == CUDA_SUCCESS) + { + event_log_error (hashcat_ctx, "cuMemsetD8Async(): %s", pStr); + } + else + { + event_log_error (hashcat_ctx, "cuMemsetD8Async(): %d", CU_err); + } + + return -1; + } + + return 0; +} + int hc_cuModuleGetFunction (hashcat_ctx_t *hashcat_ctx, CUfunction *hfunc, CUmodule hmod, const char *name) { backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; @@ -2472,8 +2526,8 @@ int hip_init (hashcat_ctx_t *hashcat_ctx) HC_LOAD_FUNC_HIP (hip, hipMemFree, hipFree, HIP_HIPMEMFREE, HIP, 1); HC_LOAD_FUNC_HIP (hip, hipMemFreeHost, hipFreeHost, HIP_HIPMEMFREEHOST, HIP, 1); HC_LOAD_FUNC_HIP (hip, hipMemGetInfo, hipMemGetInfo, HIP_HIPMEMGETINFO, HIP, 1); - //HC_LOAD_FUNC_HIP (hip, hipMemsetD32, hipMemsetD32, HIP_HIPMEMSETD32, HIP, 1); - //HC_LOAD_FUNC_HIP (hip, hipMemsetD8, hipMemsetD8, HIP_HIPMEMSETD8, HIP, 1); + HC_LOAD_FUNC_HIP (hip, hipMemsetD32Async, hipMemsetD32Async, HIP_HIPMEMSETD32ASYNC, HIP, -1); + HC_LOAD_FUNC_HIP (hip, hipMemsetD8Async, hipMemsetD8Async, HIP_HIPMEMSETD8ASYNC, HIP, -1); HC_LOAD_FUNC_HIP (hip, hipModuleGetFunction, hipModuleGetFunction, HIP_HIPMODULEGETFUNCTION, HIP, 1); HC_LOAD_FUNC_HIP (hip, hipModuleGetGlobal, hipModuleGetGlobal, HIP_HIPMODULEGETGLOBAL, HIP, 1); HC_LOAD_FUNC_HIP (hip, hipModuleLoad, hipModuleLoad, HIP_HIPMODULELOAD, HIP, 1); @@ -2975,6 +3029,60 @@ int hc_hipMemcpyHtoDAsync (hashcat_ctx_t *hashcat_ctx, HIPdeviceptr dstDevice, c return 0; } +int hc_hipMemsetD32Async (hashcat_ctx_t *hashcat_ctx, HIPdeviceptr dstDevice, unsigned int ui, size_t N, HIPstream hStream) +{ + backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; + + HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip; + + const HIPresult HIP_err = hip->hipMemsetD32Async (dstDevice, ui, N, hStream); + + if (HIP_err != HIP_SUCCESS) + { + const char *pStr = NULL; + + if (hip->hipGetErrorString (HIP_err, &pStr) == HIP_SUCCESS) + { + event_log_error (hashcat_ctx, "hipMemsetD32Async(): %s", pStr); + } + else + { + event_log_error (hashcat_ctx, "hipMemsetD32Async(): %d", HIP_err); + } + + return -1; + } + + return 0; +} + +int hc_hipMemsetD8Async (hashcat_ctx_t *hashcat_ctx, HIPdeviceptr dstDevice, unsigned char uc, size_t N, HIPstream hStream) +{ + backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; + + HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip; + + const HIPresult HIP_err = hip->hipMemsetD8Async (dstDevice, uc, N, hStream); + + if (HIP_err != HIP_SUCCESS) + { + const char *pStr = NULL; + + if (hip->hipGetErrorString (HIP_err, &pStr) == HIP_SUCCESS) + { + event_log_error (hashcat_ctx, "hipMemsetD8Async(): %s", pStr); + } + else + { + event_log_error (hashcat_ctx, "hipMemsetD8Async(): %d", HIP_err); + } + + return -1; + } + + return 0; +} + int hc_hipModuleGetFunction (hashcat_ctx_t *hashcat_ctx, HIPfunction *hfunc, HIPmodule hmod, const char *name) { backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; @@ -3636,6 +3744,7 @@ int ocl_init (hashcat_ctx_t *hashcat_ctx) HC_LOAD_FUNC (ocl, clCreateProgramWithBinary, OCL_CLCREATEPROGRAMWITHBINARY, OpenCL, 1); HC_LOAD_FUNC (ocl, clCreateProgramWithSource, OCL_CLCREATEPROGRAMWITHSOURCE, OpenCL, 1); HC_LOAD_FUNC (ocl, clEnqueueCopyBuffer, OCL_CLENQUEUECOPYBUFFER, OpenCL, 1); + HC_LOAD_FUNC (ocl, clEnqueueFillBuffer, OCL_CLENQUEUEFILLBUFFER, OpenCL, -1); HC_LOAD_FUNC (ocl, clEnqueueMapBuffer, OCL_CLENQUEUEMAPBUFFER, OpenCL, 1); HC_LOAD_FUNC (ocl, clEnqueueNDRangeKernel, OCL_CLENQUEUENDRANGEKERNEL, OpenCL, 1); HC_LOAD_FUNC (ocl, clEnqueueReadBuffer, OCL_CLENQUEUEREADBUFFER, OpenCL, 1); @@ -3811,6 +3920,23 @@ int hc_clEnqueueCopyBuffer (hashcat_ctx_t *hashcat_ctx, cl_command_queue command return 0; } +int hc_clEnqueueFillBuffer (hashcat_ctx_t *hashcat_ctx, cl_command_queue command_queue, cl_mem buffer, const void *pattern, size_t pattern_size, size_t offset, size_t size, cl_uint num_events_in_wait_list, const cl_event *event_wait_list, cl_event *event) +{ + const backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; + const OCL_PTR *ocl = backend_ctx->ocl; + + cl_int CL_err = ocl->clEnqueueFillBuffer (command_queue, buffer, pattern, pattern_size, offset, size, num_events_in_wait_list, event_wait_list, event); + + if (CL_err != CL_SUCCESS) + { + event_log_error (hashcat_ctx, "clEnqueueFillBuffer(): %s", val2cstr_cl (CL_err)); + + return -1; + } + + return 0; +} + int hc_clEnqueueReadBuffer (hashcat_ctx_t *hashcat_ctx, cl_command_queue command_queue, cl_mem buffer, cl_bool blocking_read, size_t offset, size_t size, void *ptr, cl_uint num_events_in_wait_list, const cl_event *event_wait_list, cl_event *event) { backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; @@ -5013,40 +5139,14 @@ int run_cuda_kernel_utf8toutf16le (hashcat_ctx_t *hashcat_ctx, hc_device_param_t return 0; } -int run_cuda_kernel_memset (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, CUdeviceptr buf, const u32 value, const u64 size) +int run_cuda_kernel_memset (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, CUdeviceptr buf, const u64 offset, const u8 value, const u64 size) { - const u64 num16d = size / 16; - const u64 num16m = size % 16; - u32 tmp[4]; - - if (num16d) - { - device_param->kernel_params_memset[0] = (void *) &buf; - device_param->kernel_params_memset_buf32[1] = value; - device_param->kernel_params_memset_buf64[2] = num16d; - - const u64 kernel_threads = device_param->kernel_wgs_memset; - - u64 num_elements = CEILDIV (num16d, kernel_threads); - - CUfunction function = device_param->cuda_function_memset; - - if (hc_cuLaunchKernel (hashcat_ctx, function, num_elements, 1, 1, kernel_threads, 1, 1, 0, device_param->cuda_stream, device_param->kernel_params_memset, NULL) == -1) return -1; - } - - if (num16m) - { - tmp[0] = value; - tmp[1] = value; - tmp[2] = value; - tmp[3] = value; - - if (hc_cuMemcpyHtoDAsync (hashcat_ctx, buf + (num16d * 16), tmp, num16m, device_param->cuda_stream) == -1) return -1; - } - - if (hc_cuStreamSynchronize (hashcat_ctx, device_param->cuda_stream) == -1) return -1; + return hc_cuMemsetD8Async (hashcat_ctx, buf + offset, value, size, device_param->cuda_stream); +} - return 0; +int run_cuda_kernel_memset32 (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, CUdeviceptr buf, const u64 offset, const u32 value, const u64 size) +{ + return hc_cuMemsetD32Async (hashcat_ctx, buf + offset * sizeof (u32), value, size, device_param->cuda_stream); } int run_cuda_kernel_bzero (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, CUdeviceptr buf, const u64 size) @@ -5112,40 +5212,14 @@ int run_hip_kernel_utf8toutf16le (hashcat_ctx_t *hashcat_ctx, hc_device_param_t return 0; } -int run_hip_kernel_memset (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, HIPdeviceptr buf, const u32 value, const u64 size) +int run_hip_kernel_memset (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, HIPdeviceptr buf, const u64 offset, const u8 value, const u64 size) { - const u64 num16d = size / 16; - const u64 num16m = size % 16; - u32 tmp[4]; - - if (num16d) - { - device_param->kernel_params_memset[0] = (void *) &buf; - device_param->kernel_params_memset_buf32[1] = value; - device_param->kernel_params_memset_buf64[2] = num16d; - - const u64 kernel_threads = device_param->kernel_wgs_memset; - - u64 num_elements = CEILDIV (num16d, kernel_threads); - - HIPfunction function = device_param->hip_function_memset; - - if (hc_hipLaunchKernel (hashcat_ctx, function, num_elements, 1, 1, kernel_threads, 1, 1, 0, device_param->hip_stream, device_param->kernel_params_memset, NULL) == -1) return -1; - } - - if (num16m) - { - tmp[0] = value; - tmp[1] = value; - tmp[2] = value; - tmp[3] = value; - - if (hc_hipMemcpyHtoDAsync (hashcat_ctx, buf + (num16d * 16), tmp, num16m, device_param->hip_stream) == -1) return -1; - } - - if (hc_hipStreamSynchronize (hashcat_ctx, device_param->hip_stream) == -1) return -1; + return hc_hipMemsetD8Async (hashcat_ctx, buf + offset, value, size, device_param->hip_stream); +} - return 0; +int run_hip_kernel_memset32 (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, HIPdeviceptr buf, const u64 offset, const u32 value, const u64 size) +{ + return hc_hipMemsetD32Async (hashcat_ctx, buf + offset * sizeof (u32), value, size, device_param->hip_stream); } int run_hip_kernel_bzero (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, HIPdeviceptr buf, const u64 size) @@ -5227,45 +5301,65 @@ int run_opencl_kernel_utf8toutf16le (hashcat_ctx_t *hashcat_ctx, hc_device_param return 0; } -int run_opencl_kernel_memset (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, cl_mem buf, const u32 value, const u64 size) +int run_opencl_kernel_memset (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, cl_mem buf, const u64 offset, const u8 value, const u64 size) { - const u64 num16d = size / 16; - const u64 num16m = size % 16; - u32 tmp[4]; + const backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; + const OCL_PTR *ocl = backend_ctx->ocl; - if (num16d) + int rc; + + /* workaround if missing clEnqueueFillBuffer() */ + if (ocl->clEnqueueFillBuffer == NULL) { - const u64 kernel_threads = device_param->kernel_wgs_memset; + const u64 len = offset + size; - u64 num_elements = round_up_multiple_64 (num16d, kernel_threads); + char *tmp = hcmalloc (len * sizeof (u8)); - cl_kernel kernel = device_param->opencl_kernel_memset; + memset(tmp, value, len); - if (hc_clSetKernelArg (hashcat_ctx, kernel, 0, sizeof(cl_mem), (void *) &buf) == -1) return -1; - if (hc_clSetKernelArg (hashcat_ctx, kernel, 1, sizeof(cl_uint), (void *) &value) == -1) return -1; - if (hc_clSetKernelArg (hashcat_ctx, kernel, 2, sizeof(cl_ulong), (void *) &num16d) == -1) return -1; + /* blocking */ + rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, buf, CL_TRUE, offset, size, tmp, 0, NULL, NULL); - const size_t global_work_size[3] = { num_elements, 1, 1 }; - const size_t local_work_size[3] = { kernel_threads, 1, 1 }; - - if (hc_clEnqueueNDRangeKernel (hashcat_ctx, device_param->opencl_command_queue, kernel, 1, NULL, global_work_size, local_work_size, 0, NULL, NULL) == -1) return -1; + free(tmp); + } + else + { + rc = hc_clEnqueueFillBuffer (hashcat_ctx, device_param->opencl_command_queue, buf, &value, sizeof (u8), offset, size, 0, NULL, NULL); } - if (num16m) + return rc; +} + +int run_opencl_kernel_memset32 (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, cl_mem buf, const u64 offset, const u32 value, const u64 size) +{ + const backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; + const OCL_PTR *ocl = backend_ctx->ocl; + + int rc; + + /* workaround if missing clEnqueueFillBuffer() */ + if (ocl->clEnqueueFillBuffer == NULL) { - tmp[0] = value; - tmp[1] = value; - tmp[2] = value; - tmp[3] = value; + const u64 len = offset + size; - if (hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, buf, CL_FALSE, num16d * 16, num16m, tmp, 0, NULL, NULL) == -1) return -1; - } + u32 *tmp = (u32 *) hcmalloc ((offset + size) * sizeof (u32)); - /*if (hc_clFlush (hashcat_ctx, device_param->opencl_command_queue) == -1) return -1;*/ + for (u64 i = 0; i < len; i++) + { + tmp[i] = value; + } - if (hc_clFinish (hashcat_ctx, device_param->opencl_command_queue) == -1) return -1; + /* blocking */ + rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, buf, CL_TRUE, offset * sizeof (u32), size * sizeof (u32), tmp, 0, NULL, NULL); - return 0; + free(tmp); + } + else + { + rc = hc_clEnqueueFillBuffer (hashcat_ctx, device_param->opencl_command_queue, buf, &value, sizeof (u32), offset, size, 0, NULL, NULL); + } + + return rc; } int run_opencl_kernel_bzero (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, cl_mem buf, const u64 size)