diff --git a/include/backend.h b/include/backend.h index 057edb87f..e2ea51fb7 100644 --- a/include/backend.h +++ b/include/backend.h @@ -42,6 +42,7 @@ int hc_nvrtcGetPTX (hashcat_ctx_t *hashcat_ctx, nvrtcProgram prog, int hc_cuCtxCreate (hashcat_ctx_t *hashcat_ctx, CUcontext *pctx, unsigned int flags, CUdevice dev); int hc_cuCtxDestroy (hashcat_ctx_t *hashcat_ctx, CUcontext ctx); int hc_cuCtxSetCurrent (hashcat_ctx_t *hashcat_ctx, CUcontext ctx); +int hc_cuCtxSetCacheConfig (hashcat_ctx_t *hashcat_ctx, CUfunc_cache config); int hc_cuCtxSynchronize (hashcat_ctx_t *hashcat_ctx); int hc_cuDeviceGetAttribute (hashcat_ctx_t *hashcat_ctx, int *pi, CUdevice_attribute attrib, CUdevice dev); int hc_cuDeviceGetCount (hashcat_ctx_t *hashcat_ctx, int *count); @@ -56,6 +57,7 @@ int hc_cuEventQuery (hashcat_ctx_t *hashcat_ctx, CUevent hEvent); int hc_cuEventRecord (hashcat_ctx_t *hashcat_ctx, CUevent hEvent, CUstream hStream); int hc_cuEventSynchronize (hashcat_ctx_t *hashcat_ctx, CUevent hEvent); int hc_cuFuncGetAttribute (hashcat_ctx_t *hashcat_ctx, int *pi, CUfunction_attribute attrib, CUfunction hfunc); +int hc_cuFuncSetAttribute (hashcat_ctx_t *hashcat_ctx, CUfunction hfunc, CUfunction_attribute attrib, int value); int hc_cuInit (hashcat_ctx_t *hashcat_ctx, unsigned int Flags); int hc_cuLaunchKernel (hashcat_ctx_t *hashcat_ctx, CUfunction f, unsigned int gridDimX, unsigned int gridDimY, unsigned int gridDimZ, unsigned int blockDimX, unsigned int blockDimY, unsigned int blockDimZ, unsigned int sharedMemBytes, CUstream hStream, void **kernelParams, void **extra); int hc_cuMemAlloc (hashcat_ctx_t *hashcat_ctx, CUdeviceptr *dptr, size_t bytesize); diff --git a/include/ext_cuda.h b/include/ext_cuda.h index eb8967f09..49257acbb 100644 --- a/include/ext_cuda.h +++ b/include/ext_cuda.h @@ -1024,6 +1024,7 @@ typedef struct hc_cuda_lib CUDA_CUCTXGETSHAREDMEMCONFIG cuCtxGetSharedMemConfig; CUDA_CUCTXPOPCURRENT cuCtxPopCurrent; CUDA_CUCTXPUSHCURRENT cuCtxPushCurrent; + CUDA_CUCTXSETCACHECONFIG cuCtxSetCacheConfig; CUDA_CUCTXSETCURRENT cuCtxSetCurrent; CUDA_CUCTXSETSHAREDMEMCONFIG cuCtxSetSharedMemConfig; CUDA_CUCTXSYNCHRONIZE cuCtxSynchronize; diff --git a/src/backend.c b/src/backend.c index ed88b191f..97c9d6b6e 100644 --- a/src/backend.c +++ b/src/backend.c @@ -886,6 +886,7 @@ int cuda_init (hashcat_ctx_t *hashcat_ctx) HC_LOAD_FUNC (cuda, cuCtxGetSharedMemConfig, CUDA_CUCTXGETSHAREDMEMCONFIG, CUDA, 1); HC_LOAD_FUNC (cuda, cuCtxPopCurrent, CUDA_CUCTXPOPCURRENT, CUDA, 1); HC_LOAD_FUNC (cuda, cuCtxPushCurrent, CUDA_CUCTXPUSHCURRENT, CUDA, 1); + HC_LOAD_FUNC (cuda, cuCtxSetCacheConfig, CUDA_CUCTXSETCACHECONFIG, CUDA, 1); HC_LOAD_FUNC (cuda, cuCtxSetCurrent, CUDA_CUCTXSETCURRENT, CUDA, 1); HC_LOAD_FUNC (cuda, cuCtxSetSharedMemConfig, CUDA_CUCTXSETSHAREDMEMCONFIG, CUDA, 1); HC_LOAD_FUNC (cuda, cuCtxSynchronize, CUDA_CUCTXSYNCHRONIZE, CUDA, 1); @@ -1467,6 +1468,33 @@ int hc_cuFuncGetAttribute (hashcat_ctx_t *hashcat_ctx, int *pi, CUfunction_attri return 0; } +int hc_cuFuncSetAttribute (hashcat_ctx_t *hashcat_ctx, CUfunction hfunc, CUfunction_attribute attrib, int value) +{ + backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; + + CUDA_PTR *cuda = backend_ctx->cuda; + + const CUresult CU_err = cuda->cuFuncSetAttribute (hfunc, attrib, value); + + if (CU_err != CUDA_SUCCESS) + { + const char *pStr = NULL; + + if (cuda->cuGetErrorString (CU_err, &pStr) == CUDA_SUCCESS) + { + event_log_error (hashcat_ctx, "cuFuncSetAttribute(): %s", pStr); + } + else + { + event_log_error (hashcat_ctx, "cuFuncSetAttribute(): %d", CU_err); + } + + return -1; + } + + return 0; +} + int hc_cuStreamCreate (hashcat_ctx_t *hashcat_ctx, CUstream *phStream, unsigned int Flags) { backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; @@ -1764,6 +1792,35 @@ int hc_cuEventSynchronize (hashcat_ctx_t *hashcat_ctx, CUevent hEvent) return 0; } +int hc_cuCtxSetCacheConfig (hashcat_ctx_t *hashcat_ctx, CUfunc_cache config) +{ + backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; + + CUDA_PTR *cuda = backend_ctx->cuda; + + const CUresult CU_err = cuda->cuCtxSetCacheConfig (config); + + if (CU_err != CUDA_SUCCESS) + { + const char *pStr = NULL; + + if (cuda->cuGetErrorString (CU_err, &pStr) == CUDA_SUCCESS) + { + event_log_error (hashcat_ctx, "cuCtxSetCacheConfig(): %s", pStr); + } + else + { + event_log_error (hashcat_ctx, "cuCtxSetCacheConfig(): %d", CU_err); + } + + return -1; + } + + return 0; +} + + + // OpenCL int ocl_init (hashcat_ctx_t *hashcat_ctx) @@ -5398,6 +5455,11 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime) if (rc_cuCtxSetCurrent == -1) return -1; + // bcrypt optimization? + //const int rc_cuCtxSetCacheConfig = hc_cuCtxSetCacheConfig (hashcat_ctx, CU_FUNC_CACHE_PREFER_SHARED); + // + //if (rc_cuCtxSetCacheConfig == -1) return -1; + const bool has_bfe = cuda_test_instruction (hashcat_ctx, sm_major, sm_minor, "__global__ void test () { unsigned int r; asm volatile (\"bfe.u32 %0, 0, 0, 0;\" : \"=r\"(r)); }"); device_param->has_bfe = has_bfe;