mirror of
https://github.com/hashcat/hashcat.git
synced 2025-01-08 23:01:14 +00:00
Cache cubin instead of PTX to decrease startup time
This commit is contained in:
parent
cc4fd48ace
commit
66ae5125ce
@ -75,6 +75,10 @@ int hc_cuStreamDestroy (hashcat_ctx_t *hashcat_ctx, CUstream hStream);
|
||||
int hc_cuStreamSynchronize (hashcat_ctx_t *hashcat_ctx, CUstream hStream);
|
||||
int hc_cuCtxPushCurrent (hashcat_ctx_t *hashcat_ctx, CUcontext ctx);
|
||||
int hc_cuCtxPopCurrent (hashcat_ctx_t *hashcat_ctx, CUcontext *pctx);
|
||||
int hc_cuLinkCreate (hashcat_ctx_t *hashcat_ctx, unsigned int numOptions, CUjit_option *options, void **optionValues, CUlinkState *stateOut);
|
||||
int hc_cuLinkAddData (hashcat_ctx_t *hashcat_ctx, CUlinkState state, CUjitInputType type, void *data, size_t size, const char *name, unsigned int numOptions, CUjit_option *options, void **optionValues);
|
||||
int hc_cuLinkDestroy (hashcat_ctx_t *hashcat_ctx, CUlinkState state);
|
||||
int hc_cuLinkComplete (hashcat_ctx_t *hashcat_ctx, CUlinkState state, void **cubinOut, size_t *sizeOut);
|
||||
|
||||
int hc_clBuildProgram (hashcat_ctx_t *hashcat_ctx, cl_program program, cl_uint num_devices, const cl_device_id *device_list, const char *options, void (CL_CALLBACK *pfn_notify) (cl_program program, void *user_data), void *user_data);
|
||||
int hc_clCreateBuffer (hashcat_ctx_t *hashcat_ctx, cl_context context, cl_mem_flags flags, size_t size, void *host_ptr, cl_mem *mem);
|
||||
|
@ -32,6 +32,7 @@ typedef struct CUevent_st *CUevent; /**< CUDA event */
|
||||
typedef struct CUfunc_st *CUfunction; /**< CUDA function */
|
||||
typedef struct CUmod_st *CUmodule; /**< CUDA module */
|
||||
typedef struct CUstream_st *CUstream; /**< CUDA stream */
|
||||
typedef struct CUlinkState_st *CUlinkState;
|
||||
|
||||
typedef enum cudaError_enum {
|
||||
/**
|
||||
@ -951,6 +952,41 @@ typedef enum CUevent_flags_enum {
|
||||
CU_EVENT_INTERPROCESS = 0x4 /**< Event is suitable for interprocess use. CU_EVENT_DISABLE_TIMING must be set */
|
||||
} CUevent_flags;
|
||||
|
||||
typedef enum CUjitInputType_enum
|
||||
{
|
||||
/**
|
||||
* Compiled device-class-specific device code\n
|
||||
* Applicable options: none
|
||||
*/
|
||||
CU_JIT_INPUT_CUBIN = 0,
|
||||
|
||||
/**
|
||||
* PTX source code\n
|
||||
* Applicable options: PTX compiler options
|
||||
*/
|
||||
CU_JIT_INPUT_PTX,
|
||||
|
||||
/**
|
||||
* Bundle of multiple cubins and/or PTX of some device code\n
|
||||
* Applicable options: PTX compiler options, ::CU_JIT_FALLBACK_STRATEGY
|
||||
*/
|
||||
CU_JIT_INPUT_FATBINARY,
|
||||
|
||||
/**
|
||||
* Host object with embedded device code\n
|
||||
* Applicable options: PTX compiler options, ::CU_JIT_FALLBACK_STRATEGY
|
||||
*/
|
||||
CU_JIT_INPUT_OBJECT,
|
||||
|
||||
/**
|
||||
* Archive of host objects with embedded device code\n
|
||||
* Applicable options: PTX compiler options, ::CU_JIT_FALLBACK_STRATEGY
|
||||
*/
|
||||
CU_JIT_INPUT_LIBRARY,
|
||||
|
||||
CU_JIT_NUM_INPUT_TYPES
|
||||
} CUjitInputType;
|
||||
|
||||
#ifdef _WIN32
|
||||
#define CUDAAPI __stdcall
|
||||
#else
|
||||
@ -1012,6 +1048,10 @@ typedef CUresult (CUDA_API_CALL *CUDA_CUSTREAMCREATE) (CUstream *, uns
|
||||
typedef CUresult (CUDA_API_CALL *CUDA_CUSTREAMDESTROY) (CUstream);
|
||||
typedef CUresult (CUDA_API_CALL *CUDA_CUSTREAMSYNCHRONIZE) (CUstream);
|
||||
typedef CUresult (CUDA_API_CALL *CUDA_CUSTREAMWAITEVENT) (CUstream, CUevent, unsigned int);
|
||||
typedef CUresult (CUDA_API_CALL *CUDA_CULINKCREATE) (unsigned int, CUjit_option *, void **, CUlinkState *);
|
||||
typedef CUresult (CUDA_API_CALL *CUDA_CULINKADDDATA) (CUlinkState, CUjitInputType, void *, size_t, const char *, unsigned int, CUjit_option *, void **);
|
||||
typedef CUresult (CUDA_API_CALL *CUDA_CULINKDESTROY) (CUlinkState);
|
||||
typedef CUresult (CUDA_API_CALL *CUDA_CULINKCOMPLETE) (CUlinkState, void **, size_t *);
|
||||
|
||||
typedef struct hc_cuda_lib
|
||||
{
|
||||
@ -1070,6 +1110,10 @@ typedef struct hc_cuda_lib
|
||||
CUDA_CUSTREAMDESTROY cuStreamDestroy;
|
||||
CUDA_CUSTREAMSYNCHRONIZE cuStreamSynchronize;
|
||||
CUDA_CUSTREAMWAITEVENT cuStreamWaitEvent;
|
||||
CUDA_CULINKCREATE cuLinkCreate;
|
||||
CUDA_CULINKADDDATA cuLinkAddData;
|
||||
CUDA_CULINKDESTROY cuLinkDestroy;
|
||||
CUDA_CULINKCOMPLETE cuLinkComplete;
|
||||
|
||||
} hc_cuda_lib_t;
|
||||
|
||||
|
212
src/backend.c
212
src/backend.c
@ -998,6 +998,10 @@ int cuda_init (hashcat_ctx_t *hashcat_ctx)
|
||||
HC_LOAD_FUNC_CUDA (cuda, cuStreamDestroy, cuStreamDestroy_v2, CUDA_CUSTREAMDESTROY, CUDA, 1);
|
||||
HC_LOAD_FUNC_CUDA (cuda, cuStreamSynchronize, cuStreamSynchronize, CUDA_CUSTREAMSYNCHRONIZE, CUDA, 1);
|
||||
HC_LOAD_FUNC_CUDA (cuda, cuStreamWaitEvent, cuStreamWaitEvent, CUDA_CUSTREAMWAITEVENT, CUDA, 1);
|
||||
HC_LOAD_FUNC_CUDA (cuda, cuLinkCreate, cuLinkCreate_v2, CUDA_CULINKCREATE, CUDA, 1);
|
||||
HC_LOAD_FUNC_CUDA (cuda, cuLinkAddData, cuLinkAddData_v2, CUDA_CULINKADDDATA, CUDA, 1);
|
||||
HC_LOAD_FUNC_CUDA (cuda, cuLinkDestroy, cuLinkDestroy, CUDA_CULINKDESTROY, CUDA, 1);
|
||||
HC_LOAD_FUNC_CUDA (cuda, cuLinkComplete, cuLinkComplete, CUDA_CULINKCOMPLETE, CUDA, 1);
|
||||
|
||||
return 0;
|
||||
}
|
||||
@ -2040,6 +2044,113 @@ int hc_cuCtxPopCurrent (hashcat_ctx_t *hashcat_ctx, CUcontext *pctx)
|
||||
return 0;
|
||||
}
|
||||
|
||||
int hc_cuLinkCreate (hashcat_ctx_t *hashcat_ctx, unsigned int numOptions, CUjit_option *options, void **optionValues, CUlinkState *stateOut)
|
||||
{
|
||||
backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
|
||||
|
||||
CUDA_PTR *cuda = (CUDA_PTR *) backend_ctx->cuda;
|
||||
|
||||
const CUresult CU_err = cuda->cuLinkCreate (numOptions, options, optionValues, stateOut);
|
||||
|
||||
if (CU_err != CUDA_SUCCESS)
|
||||
{
|
||||
const char *pStr = NULL;
|
||||
|
||||
if (cuda->cuGetErrorString (CU_err, &pStr) == CUDA_SUCCESS)
|
||||
{
|
||||
event_log_error (hashcat_ctx, "cuLinkCreate(): %s", pStr);
|
||||
}
|
||||
else
|
||||
{
|
||||
event_log_error (hashcat_ctx, "cuLinkCreate(): %d", CU_err);
|
||||
}
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int hc_cuLinkAddData (hashcat_ctx_t *hashcat_ctx, CUlinkState state, CUjitInputType type, void *data, size_t size, const char *name, unsigned int numOptions, CUjit_option *options, void **optionValues)
|
||||
{
|
||||
backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
|
||||
|
||||
CUDA_PTR *cuda = (CUDA_PTR *) backend_ctx->cuda;
|
||||
|
||||
const CUresult CU_err = cuda->cuLinkAddData (state, type, data, size, name, numOptions, options, optionValues);
|
||||
|
||||
if (CU_err != CUDA_SUCCESS)
|
||||
{
|
||||
const char *pStr = NULL;
|
||||
|
||||
if (cuda->cuGetErrorString (CU_err, &pStr) == CUDA_SUCCESS)
|
||||
{
|
||||
event_log_error (hashcat_ctx, "cuLinkAddData(): %s", pStr);
|
||||
}
|
||||
else
|
||||
{
|
||||
event_log_error (hashcat_ctx, "cuLinkAddData(): %d", CU_err);
|
||||
}
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int hc_cuLinkDestroy (hashcat_ctx_t *hashcat_ctx, CUlinkState state)
|
||||
{
|
||||
backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
|
||||
|
||||
CUDA_PTR *cuda = (CUDA_PTR *) backend_ctx->cuda;
|
||||
|
||||
const CUresult CU_err = cuda->cuLinkDestroy (state);
|
||||
|
||||
if (CU_err != CUDA_SUCCESS)
|
||||
{
|
||||
const char *pStr = NULL;
|
||||
|
||||
if (cuda->cuGetErrorString (CU_err, &pStr) == CUDA_SUCCESS)
|
||||
{
|
||||
event_log_error (hashcat_ctx, "cuLinkDestroy(): %s", pStr);
|
||||
}
|
||||
else
|
||||
{
|
||||
event_log_error (hashcat_ctx, "cuLinkDestroy(): %d", CU_err);
|
||||
}
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int hc_cuLinkComplete (hashcat_ctx_t *hashcat_ctx, CUlinkState state, void **cubinOut, size_t *sizeOut)
|
||||
{
|
||||
backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
|
||||
|
||||
CUDA_PTR *cuda = (CUDA_PTR *) backend_ctx->cuda;
|
||||
|
||||
const CUresult CU_err = cuda->cuLinkComplete (state, cubinOut, sizeOut);
|
||||
|
||||
if (CU_err != CUDA_SUCCESS)
|
||||
{
|
||||
const char *pStr = NULL;
|
||||
|
||||
if (cuda->cuGetErrorString (CU_err, &pStr) == CUDA_SUCCESS)
|
||||
{
|
||||
event_log_error (hashcat_ctx, "cuLinkComplete(): %s", pStr);
|
||||
}
|
||||
else
|
||||
{
|
||||
event_log_error (hashcat_ctx, "cuLinkComplete(): %d", CU_err);
|
||||
}
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
// OpenCL
|
||||
|
||||
@ -7438,18 +7549,41 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
|
||||
|
||||
if (hc_nvrtcDestroyProgram (hashcat_ctx, &program) == -1) return -1;
|
||||
|
||||
const int rc_cuModuleLoadDataEx = hc_cuModuleLoadDataExLog (hashcat_ctx, &device_param->cuda_module, binary);
|
||||
CUlinkState state;
|
||||
|
||||
if (rc_cuModuleLoadDataEx == -1) return -1;
|
||||
if (hc_cuLinkCreate (hashcat_ctx, 0, NULL, NULL, &state) == -1) return -1;
|
||||
|
||||
if (hc_cuLinkAddData (hashcat_ctx, state, CU_JIT_INPUT_PTX, binary, binary_size, "kernel", 0, NULL, NULL) == -1) return -1;
|
||||
|
||||
void *cubin = NULL;
|
||||
|
||||
size_t cubin_size = 0;
|
||||
|
||||
if (hc_cuLinkComplete (hashcat_ctx, state, &cubin, &cubin_size) == -1) return -1;
|
||||
|
||||
#ifdef DEBUG
|
||||
|
||||
if (hc_cuModuleLoadDataExLog (hashcat_ctx, &device_param->cuda_module, binary) == -1) return -1;
|
||||
|
||||
if (cache_disable == false)
|
||||
{
|
||||
const bool rc_write = write_kernel_binary (hashcat_ctx, cached_file, binary, binary_size);
|
||||
|
||||
if (rc_write == false) return -1;
|
||||
if (write_kernel_binary (hashcat_ctx, cached_file, binary, binary_size) == false) return -1;
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
if (hc_cuModuleLoadDataExLog (hashcat_ctx, &device_param->cuda_module, cubin) == -1) return -1;
|
||||
|
||||
if (cache_disable == false)
|
||||
{
|
||||
if (write_kernel_binary (hashcat_ctx, cached_file, cubin, cubin_size) == false) return -1;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
hcfree (binary);
|
||||
|
||||
if (hc_cuLinkDestroy (hashcat_ctx, state) == -1) return -1;
|
||||
}
|
||||
|
||||
if (device_param->is_opencl == true)
|
||||
@ -7662,20 +7796,41 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
|
||||
|
||||
if (hc_nvrtcDestroyProgram (hashcat_ctx, &program) == -1) return -1;
|
||||
|
||||
// tbd: check for some useful options
|
||||
CUlinkState state;
|
||||
|
||||
const int rc_cuModuleLoadDataEx = hc_cuModuleLoadDataExLog (hashcat_ctx, &device_param->cuda_module_mp, binary);
|
||||
if (hc_cuLinkCreate (hashcat_ctx, 0, NULL, NULL, &state) == -1) return -1;
|
||||
|
||||
if (rc_cuModuleLoadDataEx == -1) return -1;
|
||||
if (hc_cuLinkAddData (hashcat_ctx, state, CU_JIT_INPUT_PTX, binary, binary_size, "mp_kernel", 0, NULL, NULL) == -1) return -1;
|
||||
|
||||
void *cubin = NULL;
|
||||
|
||||
size_t cubin_size = 0;
|
||||
|
||||
if (hc_cuLinkComplete (hashcat_ctx, state, &cubin, &cubin_size) == -1) return -1;
|
||||
|
||||
#ifdef DEBUG
|
||||
|
||||
if (hc_cuModuleLoadDataExLog (hashcat_ctx, &device_param->cuda_module_mp, binary) == -1) return -1;
|
||||
|
||||
if (cache_disable == false)
|
||||
{
|
||||
const bool rc_write = write_kernel_binary (hashcat_ctx, cached_file, binary, binary_size);
|
||||
|
||||
if (rc_write == false) return -1;
|
||||
if (write_kernel_binary (hashcat_ctx, cached_file, binary, binary_size) == false) return -1;
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
if (hc_cuModuleLoadDataExLog (hashcat_ctx, &device_param->cuda_module_mp, cubin) == -1) return -1;
|
||||
|
||||
if (cache_disable == false)
|
||||
{
|
||||
if (write_kernel_binary (hashcat_ctx, cached_file, cubin, cubin_size) == false) return -1;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
hcfree (binary);
|
||||
|
||||
if (hc_cuLinkDestroy (hashcat_ctx, state) == -1) return -1;
|
||||
}
|
||||
|
||||
if (device_param->is_opencl == true)
|
||||
@ -7836,7 +7991,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
|
||||
{
|
||||
nvrtcProgram program;
|
||||
|
||||
if (hc_nvrtcCreateProgram (hashcat_ctx, &program, kernel_sources[0], "mp_kernel", 0, NULL, NULL) == -1) return -1;
|
||||
if (hc_nvrtcCreateProgram (hashcat_ctx, &program, kernel_sources[0], "amp_kernel", 0, NULL, NULL) == -1) return -1;
|
||||
|
||||
char **nvrtc_options = (char **) hccalloc (4 + strlen (build_options_buf) + 1, sizeof (char *)); // ...
|
||||
|
||||
@ -7893,7 +8048,25 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
|
||||
|
||||
if (hc_nvrtcDestroyProgram (hashcat_ctx, &program) == -1) return -1;
|
||||
|
||||
// tbd: check for some useful options
|
||||
CUlinkState state;
|
||||
|
||||
const int rc_cuLinkCreate = hc_cuLinkCreate (hashcat_ctx, 0, NULL, NULL, &state);
|
||||
|
||||
if (rc_cuLinkCreate == -1) return -1;
|
||||
|
||||
const int rc_cuLinkAddData = hc_cuLinkAddData (hashcat_ctx, state, CU_JIT_INPUT_PTX, binary, binary_size, "kernel_amp", 0, NULL, NULL);
|
||||
|
||||
if (rc_cuLinkAddData == -1) return -1;
|
||||
|
||||
void *cubin = NULL;
|
||||
|
||||
size_t cubin_size = 0;
|
||||
|
||||
const int rc_cuLinkComplete = hc_cuLinkComplete (hashcat_ctx, state, &cubin, &cubin_size);
|
||||
|
||||
if (rc_cuLinkComplete == -1) return -1;
|
||||
|
||||
#ifdef DEBUG
|
||||
|
||||
if (hc_cuModuleLoadDataExLog (hashcat_ctx, &device_param->cuda_module_amp, binary) == -1) return -1;
|
||||
|
||||
@ -7902,7 +8075,20 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
|
||||
if (write_kernel_binary (hashcat_ctx, cached_file, binary, binary_size) == false) return -1;
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
if (hc_cuModuleLoadDataExLog (hashcat_ctx, &device_param->cuda_module_amp, cubin) == -1) return -1;
|
||||
|
||||
if (cache_disable == false)
|
||||
{
|
||||
if (write_kernel_binary (hashcat_ctx, cached_file, cubin, cubin_size) == false) return -1;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
hcfree (binary);
|
||||
|
||||
if (hc_cuLinkDestroy (hashcat_ctx, state) == -1) return -1;
|
||||
}
|
||||
|
||||
if (device_param->is_opencl == true)
|
||||
|
Loading…
Reference in New Issue
Block a user