@ -1255,8 +1255,11 @@ int cuda_init (hashcat_ctx_t *hashcat_ctx)
HC_LOAD_FUNC_CUDA ( cuda , cuMemAlloc , cuMemAlloc_v2 , CUDA_CUMEMALLOC , CUDA , 1 ) ;
HC_LOAD_FUNC_CUDA ( cuda , cuMemAllocHost , cuMemAllocHost_v2 , CUDA_CUMEMALLOCHOST , CUDA , 1 ) ;
HC_LOAD_FUNC_CUDA ( cuda , cuMemcpyDtoD , cuMemcpyDtoD_v2 , CUDA_CUMEMCPYDTOD , CUDA , 1 ) ;
HC_LOAD_FUNC_CUDA ( cuda , cuMemcpyDtoDAsync , cuMemcpyDtoDAsync_v2 , CUDA_CUMEMCPYDTODASYNC , CUDA , 1 ) ;
HC_LOAD_FUNC_CUDA ( cuda , cuMemcpyDtoH , cuMemcpyDtoH_v2 , CUDA_CUMEMCPYDTOH , CUDA , 1 ) ;
HC_LOAD_FUNC_CUDA ( cuda , cuMemcpyDtoHAsync , cuMemcpyDtoHAsync_v2 , CUDA_CUMEMCPYDTOHASYNC , CUDA , 1 ) ;
HC_LOAD_FUNC_CUDA ( cuda , cuMemcpyHtoD , cuMemcpyHtoD_v2 , CUDA_CUMEMCPYHTOD , CUDA , 1 ) ;
HC_LOAD_FUNC_CUDA ( cuda , cuMemcpyHtoDAsync , cuMemcpyHtoDAsync_v2 , CUDA_CUMEMCPYHTODASYNC , CUDA , 1 ) ;
HC_LOAD_FUNC_CUDA ( cuda , cuMemFree , cuMemFree_v2 , CUDA_CUMEMFREE , CUDA , 1 ) ;
HC_LOAD_FUNC_CUDA ( cuda , cuMemFreeHost , cuMemFreeHost , CUDA_CUMEMFREEHOST , CUDA , 1 ) ;
HC_LOAD_FUNC_CUDA ( cuda , cuMemGetInfo , cuMemGetInfo_v2 , CUDA_CUMEMGETINFO , CUDA , 1 ) ;
@ -1708,6 +1711,33 @@ int hc_cuMemcpyDtoH (hashcat_ctx_t *hashcat_ctx, void *dstHost, CUdeviceptr srcD
return 0 ;
}
int hc_cuMemcpyDtoHAsync ( hashcat_ctx_t * hashcat_ctx , void * dstHost , CUdeviceptr srcDevice , size_t ByteCount , CUstream hStream )
{
backend_ctx_t * backend_ctx = hashcat_ctx - > backend_ctx ;
CUDA_PTR * cuda = ( CUDA_PTR * ) backend_ctx - > cuda ;
const CUresult CU_err = cuda - > cuMemcpyDtoHAsync ( dstHost , srcDevice , ByteCount , hStream ) ;
if ( CU_err ! = CUDA_SUCCESS )
{
const char * pStr = NULL ;
if ( cuda - > cuGetErrorString ( CU_err , & pStr ) = = CUDA_SUCCESS )
{
event_log_error ( hashcat_ctx , " cuMemcpyDtoHAsync(): %s " , pStr ) ;
}
else
{
event_log_error ( hashcat_ctx , " cuMemcpyDtoHAsync(): %d " , CU_err ) ;
}
return - 1 ;
}
return 0 ;
}
int hc_cuMemcpyDtoD ( hashcat_ctx_t * hashcat_ctx , CUdeviceptr dstDevice , CUdeviceptr srcDevice , size_t ByteCount )
{
backend_ctx_t * backend_ctx = hashcat_ctx - > backend_ctx ;
@ -1735,6 +1765,33 @@ int hc_cuMemcpyDtoD (hashcat_ctx_t *hashcat_ctx, CUdeviceptr dstDevice, CUdevice
return 0 ;
}
int hc_cuMemcpyDtoDAsync ( hashcat_ctx_t * hashcat_ctx , CUdeviceptr dstDevice , CUdeviceptr srcDevice , size_t ByteCount , CUstream hStream )
{
backend_ctx_t * backend_ctx = hashcat_ctx - > backend_ctx ;
CUDA_PTR * cuda = ( CUDA_PTR * ) backend_ctx - > cuda ;
const CUresult CU_err = cuda - > cuMemcpyDtoDAsync ( dstDevice , srcDevice , ByteCount , hStream ) ;
if ( CU_err ! = CUDA_SUCCESS )
{
const char * pStr = NULL ;
if ( cuda - > cuGetErrorString ( CU_err , & pStr ) = = CUDA_SUCCESS )
{
event_log_error ( hashcat_ctx , " cuMemcpyDtoDAsync(): %s " , pStr ) ;
}
else
{
event_log_error ( hashcat_ctx , " cuMemcpyDtoDAsync(): %d " , CU_err ) ;
}
return - 1 ;
}
return 0 ;
}
int hc_cuMemcpyHtoD ( hashcat_ctx_t * hashcat_ctx , CUdeviceptr dstDevice , const void * srcHost , size_t ByteCount )
{
backend_ctx_t * backend_ctx = hashcat_ctx - > backend_ctx ;
@ -1762,6 +1819,33 @@ int hc_cuMemcpyHtoD (hashcat_ctx_t *hashcat_ctx, CUdeviceptr dstDevice, const vo
return 0 ;
}
int hc_cuMemcpyHtoDAsync ( hashcat_ctx_t * hashcat_ctx , CUdeviceptr dstDevice , const void * srcHost , size_t ByteCount , CUstream hStream )
{
backend_ctx_t * backend_ctx = hashcat_ctx - > backend_ctx ;
CUDA_PTR * cuda = ( CUDA_PTR * ) backend_ctx - > cuda ;
const CUresult CU_err = cuda - > cuMemcpyHtoDAsync ( dstDevice , srcHost , ByteCount , hStream ) ;
if ( CU_err ! = CUDA_SUCCESS )
{
const char * pStr = NULL ;
if ( cuda - > cuGetErrorString ( CU_err , & pStr ) = = CUDA_SUCCESS )
{
event_log_error ( hashcat_ctx , " cuMemcpyHtoDAsync(): %s " , pStr ) ;
}
else
{
event_log_error ( hashcat_ctx , " cuMemcpyHtoDAsync(): %d " , CU_err ) ;
}
return - 1 ;
}
return 0 ;
}
int hc_cuModuleGetFunction ( hashcat_ctx_t * hashcat_ctx , CUfunction * hfunc , CUmodule hmod , const char * name )
{
backend_ctx_t * backend_ctx = hashcat_ctx - > backend_ctx ;
@ -4878,7 +4962,7 @@ int choose_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param,
if ( device_param - > is_cuda = = true )
{
if ( run_cuda_kernel_bzero ( hashcat_ctx , device_param , device_param - > cuda_d_tmps , device_param - > size_tmps ) = = - 1 ) return - 1 ;
if ( run_cuda_kernel_bzero ( hashcat_ctx , device_param , device_param - > cuda_d_tmps , device_param - > size_tmps ) = = - 1 ) return - 1 ;
}
if ( device_param - > is_hip = = true )
@ -4896,7 +4980,7 @@ int choose_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param,
{
if ( device_param - > is_cuda = = true )
{
if ( run_cuda_kernel_bzero ( hashcat_ctx , device_param , device_param - > cuda_d_hooks , pws_cnt * hashconfig - > hook_size ) = = - 1 ) return - 1 ;
if ( run_cuda_kernel_bzero ( hashcat_ctx , device_param , device_param - > cuda_d_hooks , pws_cnt * hashconfig - > hook_size ) = = - 1 ) return - 1 ;
}
if ( device_param - > is_hip = = true )
@ -5009,6 +5093,7 @@ int run_cuda_kernel_memset (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *devic
{
const u64 num16d = size / 16 ;
const u64 num16m = size % 16 ;
u32 tmp [ 4 ] ;
if ( num16d )
{
@ -5023,24 +5108,20 @@ int run_cuda_kernel_memset (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *devic
CUfunction function = device_param - > cuda_function_memset ;
if ( hc_cuLaunchKernel ( hashcat_ctx , function , num_elements , 1 , 1 , kernel_threads , 1 , 1 , 0 , device_param - > cuda_stream , device_param - > kernel_params_memset , NULL ) = = - 1 ) return - 1 ;
if ( hc_cuStreamSynchronize ( hashcat_ctx , device_param - > cuda_stream ) = = - 1 ) return - 1 ;
}
if ( num16m )
{
u32 tmp [ 4 ] ;
tmp [ 0 ] = value ;
tmp [ 1 ] = value ;
tmp [ 2 ] = value ;
tmp [ 3 ] = value ;
// Apparently are allowed to do this: https://devtalk.nvidia.com/default/topic/761515/how-to-copy-to-device-memory-with-offset-/
if ( hc_cuMemcpyHtoD ( hashcat_ctx , buf + ( num16d * 16 ) , tmp , num16m ) = = - 1 ) return - 1 ;
if ( hc_cuMemcpyHtoDAsync ( hashcat_ctx , buf + ( num16d * 16 ) , tmp , num16m , device_param - > cuda_stream ) = = - 1 ) return - 1 ;
}
if ( hc_cuStreamSynchronize ( hashcat_ctx , device_param - > cuda_stream ) = = - 1 ) return - 1 ;
return 0 ;
}
@ -5048,6 +5129,7 @@ int run_cuda_kernel_bzero (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device
{
const u64 num16d = size / 16 ;
const u64 num16m = size % 16 ;
u32 tmp [ 4 ] ;
if ( num16d )
{
@ -5061,22 +5143,20 @@ int run_cuda_kernel_bzero (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device
CUfunction function = device_param - > cuda_function_bzero ;
if ( hc_cuLaunchKernel ( hashcat_ctx , function , num_elements , 1 , 1 , kernel_threads , 1 , 1 , 0 , device_param - > cuda_stream , device_param - > kernel_params_bzero , NULL ) = = - 1 ) return - 1 ;
if ( hc_cuStreamSynchronize ( hashcat_ctx , device_param - > cuda_stream ) = = - 1 ) return - 1 ;
}
if ( num16m )
{
u32 tmp [ 4 ] ;
tmp [ 0 ] = 0 ;
tmp [ 1 ] = 0 ;
tmp [ 2 ] = 0 ;
tmp [ 3 ] = 0 ;
if ( hc_cuMemcpyHtoD ( hashcat_ctx , buf + ( num16d * 16 ) , tmp , num16 m) = = - 1 ) return - 1 ;
if ( hc_cuMemcpyHtoD Async ( hashcat_ctx , buf + ( num16d * 16 ) , tmp , num16 m, device_param - > cuda_strea m) = = - 1 ) return - 1 ;
}
if ( hc_cuStreamSynchronize ( hashcat_ctx , device_param - > cuda_stream ) = = - 1 ) return - 1 ;
return 0 ;
}