diff --git a/docs/changes.txt b/docs/changes.txt index 94f7ef326..9c85f01e8 100644 --- a/docs/changes.txt +++ b/docs/changes.txt @@ -39,6 +39,7 @@ - Unit tests: Updated test.sh to set default device-type to CPU with Apple Intel and added -f (--force) option - OpenCL Backend: moved functions to ext_OpenCL.c and includes to ext_OpenCL.h - HIP Backend: moved functions to ext_hip.c/ext_hiprtc.c and includes to ext_hip.h/ext_hiprtc.h +- CUDA Backend: moved functions to ext_cuda.c/ext_nvrtc.c and includes to ext_cuda.h/ext_nvrtc.h * changes v6.2.4 -> v6.2.5 diff --git a/include/backend.h b/include/backend.h index 0f1e23133..c7e7de0a1 100644 --- a/include/backend.h +++ b/include/backend.h @@ -23,65 +23,34 @@ static const char CL_VENDOR_MESA[] = "Mesa"; static const char CL_VENDOR_NV[] = "NVIDIA Corporation"; static const char CL_VENDOR_POCL[] = "The pocl project"; -int cuda_init (hashcat_ctx_t *hashcat_ctx); -void cuda_close (hashcat_ctx_t *hashcat_ctx); +int backend_ctx_init (hashcat_ctx_t *hashcat_ctx); +void backend_ctx_destroy (hashcat_ctx_t *hashcat_ctx); + +int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime); +void backend_ctx_devices_destroy (hashcat_ctx_t *hashcat_ctx); +void backend_ctx_devices_sync_tuning (hashcat_ctx_t *hashcat_ctx); +void backend_ctx_devices_update_power (hashcat_ctx_t *hashcat_ctx); +void backend_ctx_devices_kernel_loops (hashcat_ctx_t *hashcat_ctx); + +int backend_session_begin (hashcat_ctx_t *hashcat_ctx); +void backend_session_destroy (hashcat_ctx_t *hashcat_ctx); +void backend_session_reset (hashcat_ctx_t *hashcat_ctx); +int backend_session_update_combinator (hashcat_ctx_t *hashcat_ctx); +int backend_session_update_mp (hashcat_ctx_t *hashcat_ctx); +int backend_session_update_mp_rl (hashcat_ctx_t *hashcat_ctx, const u32 css_cnt_l, const u32 css_cnt_r); -int nvrtc_init (hashcat_ctx_t *hashcat_ctx); -void nvrtc_close (hashcat_ctx_t *hashcat_ctx); - -int hc_nvrtcCreateProgram (hashcat_ctx_t *hashcat_ctx, nvrtcProgram *prog, const char *src, const char *name, int numHeaders, const char * const *headers, const char * const *includeNames); -int hc_nvrtcDestroyProgram (hashcat_ctx_t *hashcat_ctx, nvrtcProgram *prog); -int hc_nvrtcCompileProgram (hashcat_ctx_t *hashcat_ctx, nvrtcProgram prog, int numOptions, const char * const *options); -int hc_nvrtcGetProgramLogSize (hashcat_ctx_t *hashcat_ctx, nvrtcProgram prog, size_t *logSizeRet); -int hc_nvrtcGetProgramLog (hashcat_ctx_t *hashcat_ctx, nvrtcProgram prog, char *log); -int hc_nvrtcGetPTXSize (hashcat_ctx_t *hashcat_ctx, nvrtcProgram prog, size_t *ptxSizeRet); -int hc_nvrtcGetPTX (hashcat_ctx_t *hashcat_ctx, nvrtcProgram prog, char *ptx); -int hc_nvrtcVersion (hashcat_ctx_t *hashcat_ctx, int *major, int *minor); - -int hc_cuCtxCreate (hashcat_ctx_t *hashcat_ctx, CUcontext *pctx, unsigned int flags, CUdevice dev); -int hc_cuCtxDestroy (hashcat_ctx_t *hashcat_ctx, CUcontext ctx); -int hc_cuCtxSetCurrent (hashcat_ctx_t *hashcat_ctx, CUcontext ctx); -int hc_cuCtxSetCacheConfig (hashcat_ctx_t *hashcat_ctx, CUfunc_cache config); -int hc_cuCtxSynchronize (hashcat_ctx_t *hashcat_ctx); -int hc_cuDeviceGetAttribute (hashcat_ctx_t *hashcat_ctx, int *pi, CUdevice_attribute attrib, CUdevice dev); -int hc_cuDeviceGetCount (hashcat_ctx_t *hashcat_ctx, int *count); -int hc_cuDeviceGet (hashcat_ctx_t *hashcat_ctx, CUdevice *device, int ordinal); -int hc_cuDeviceGetName (hashcat_ctx_t *hashcat_ctx, char *name, int len, CUdevice dev); -int hc_cuDeviceTotalMem (hashcat_ctx_t *hashcat_ctx, size_t *bytes, CUdevice dev); -int hc_cuDriverGetVersion (hashcat_ctx_t *hashcat_ctx, int *driverVersion); -int hc_cuEventCreate (hashcat_ctx_t *hashcat_ctx, CUevent *phEvent, unsigned int Flags); -int hc_cuEventDestroy (hashcat_ctx_t *hashcat_ctx, CUevent hEvent); -int hc_cuEventElapsedTime (hashcat_ctx_t *hashcat_ctx, float *pMilliseconds, CUevent hStart, CUevent hEnd); -int hc_cuEventQuery (hashcat_ctx_t *hashcat_ctx, CUevent hEvent); -int hc_cuEventRecord (hashcat_ctx_t *hashcat_ctx, CUevent hEvent, CUstream hStream); -int hc_cuEventSynchronize (hashcat_ctx_t *hashcat_ctx, CUevent hEvent); -int hc_cuFuncGetAttribute (hashcat_ctx_t *hashcat_ctx, int *pi, CUfunction_attribute attrib, CUfunction hfunc); -int hc_cuFuncSetAttribute (hashcat_ctx_t *hashcat_ctx, CUfunction hfunc, CUfunction_attribute attrib, int value); -int hc_cuInit (hashcat_ctx_t *hashcat_ctx, unsigned int Flags); -int hc_cuLaunchKernel (hashcat_ctx_t *hashcat_ctx, CUfunction f, unsigned int gridDimX, unsigned int gridDimY, unsigned int gridDimZ, unsigned int blockDimX, unsigned int blockDimY, unsigned int blockDimZ, unsigned int sharedMemBytes, CUstream hStream, void **kernelParams, void **extra); -int hc_cuMemAlloc (hashcat_ctx_t *hashcat_ctx, CUdeviceptr *dptr, size_t bytesize); -int hc_cuMemcpyDtoDAsync (hashcat_ctx_t *hashcat_ctx, CUdeviceptr dstDevice, CUdeviceptr srcDevice, size_t ByteCount, CUstream hStream); -int hc_cuMemcpyDtoHAsync (hashcat_ctx_t *hashcat_ctx, void *dstHost, CUdeviceptr srcDevice, size_t ByteCount, CUstream hStream); -int hc_cuMemcpyHtoDAsync (hashcat_ctx_t *hashcat_ctx, CUdeviceptr dstDevice, const void *srcHost, size_t ByteCount, CUstream hStream); -int hc_cuMemFree (hashcat_ctx_t *hashcat_ctx, CUdeviceptr dptr); -int hc_cuMemsetD32Async (hashcat_ctx_t *hashcat_ctx, CUdeviceptr dstDevice, unsigned int ui, size_t N, CUstream hStream); -int hc_cuMemsetD8Async (hashcat_ctx_t *hashcat_ctx, CUdeviceptr dstDevice, unsigned char uc, size_t N, CUstream hStream); -int hc_cuModuleGetFunction (hashcat_ctx_t *hashcat_ctx, CUfunction *hfunc, CUmodule hmod, const char *name); -int hc_cuModuleLoadDataEx (hashcat_ctx_t *hashcat_ctx, CUmodule *module, const void *image, unsigned int numOptions, CUjit_option *options, void **optionValues); -int hc_cuModuleUnload (hashcat_ctx_t *hashcat_ctx, CUmodule hmod); -int hc_cuStreamCreate (hashcat_ctx_t *hashcat_ctx, CUstream *phStream, unsigned int Flags); -int hc_cuStreamDestroy (hashcat_ctx_t *hashcat_ctx, CUstream hStream); -int hc_cuStreamSynchronize (hashcat_ctx_t *hashcat_ctx, CUstream hStream); -int hc_cuCtxPushCurrent (hashcat_ctx_t *hashcat_ctx, CUcontext ctx); -int hc_cuCtxPopCurrent (hashcat_ctx_t *hashcat_ctx, CUcontext *pctx); -int hc_cuLinkCreate (hashcat_ctx_t *hashcat_ctx, unsigned int numOptions, CUjit_option *options, void **optionValues, CUlinkState *stateOut); -int hc_cuLinkAddData (hashcat_ctx_t *hashcat_ctx, CUlinkState state, CUjitInputType type, void *data, size_t size, const char *name, unsigned int numOptions, CUjit_option *options, void **optionValues); -int hc_cuLinkDestroy (hashcat_ctx_t *hashcat_ctx, CUlinkState state); -int hc_cuLinkComplete (hashcat_ctx_t *hashcat_ctx, CUlinkState state, void **cubinOut, size_t *sizeOut); +void generate_source_kernel_filename (const bool slow_candidates, const u32 attack_exec, const u32 attack_kern, const u32 kern_type, const u32 opti_type, char *shared_dir, char *source_file); +void generate_cached_kernel_filename (const bool slow_candidates, const u32 attack_exec, const u32 attack_kern, const u32 kern_type, const u32 opti_type, char *cache_dir, const char *device_name_chksum, char *cached_file); +void generate_source_kernel_shared_filename (char *shared_dir, char *source_file); +void generate_cached_kernel_shared_filename (char *cache_dir, const char *device_name_chksum, char *cached_file); +void generate_source_kernel_mp_filename (const u32 opti_type, const u64 opts_type, char *shared_dir, char *source_file); +void generate_cached_kernel_mp_filename (const u32 opti_type, const u64 opts_type, char *cache_dir, const char *device_name_chksum, char *cached_file); +void generate_source_kernel_amp_filename (const u32 attack_kern, char *shared_dir, char *source_file); +void generate_cached_kernel_amp_filename (const u32 attack_kern, char *cache_dir, const char *device_name_chksum, char *cached_file); -int gidd_to_pw_t (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, const u64 gidd, pw_t *pw); +int gidd_to_pw_t (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, const u64 gidd, pw_t *pw); -int choose_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, const u32 highest_pw_len, const u64 pws_pos, const u64 pws_cnt, const u32 fast_iteration, const u32 salt_pos); +int choose_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, const u32 highest_pw_len, const u64 pws_pos, const u64 pws_cnt, const u32 fast_iteration, const u32 salt_pos); int run_cuda_kernel_atinit (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, CUdeviceptr buf, const u64 num); int run_cuda_kernel_utf8toutf16le (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, CUdeviceptr buf, const u64 num); @@ -101,38 +70,13 @@ int run_opencl_kernel_memset (hashcat_ctx_t *hashcat_ctx, hc_device_param int run_opencl_kernel_memset32 (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, cl_mem buf, const u64 offset, const u32 value, const u64 size); int run_opencl_kernel_bzero (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, cl_mem buf, const u64 size); -int run_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, const u32 kern_run, const u64 pws_pos, const u64 num, const u32 event_update, const u32 iteration); -int run_kernel_mp (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, const u32 kern_run, const u64 num); -int run_kernel_tm (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param); -int run_kernel_amp (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, const u64 num); -int run_kernel_decompress (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, const u64 num); -int run_copy (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, const u64 pws_cnt); -int run_cracker (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, const u64 pws_pos, const u64 pws_cnt); - -void generate_source_kernel_filename (const bool slow_candidates, const u32 attack_exec, const u32 attack_kern, const u32 kern_type, const u32 opti_type, char *shared_dir, char *source_file); -void generate_cached_kernel_filename (const bool slow_candidates, const u32 attack_exec, const u32 attack_kern, const u32 kern_type, const u32 opti_type, char *cache_dir, const char *device_name_chksum, char *cached_file); -void generate_source_kernel_shared_filename (char *shared_dir, char *source_file); -void generate_cached_kernel_shared_filename (char *cache_dir, const char *device_name_chksum, char *cached_file); -void generate_source_kernel_mp_filename (const u32 opti_type, const u64 opts_type, char *shared_dir, char *source_file); -void generate_cached_kernel_mp_filename (const u32 opti_type, const u64 opts_type, char *cache_dir, const char *device_name_chksum, char *cached_file); -void generate_source_kernel_amp_filename (const u32 attack_kern, char *shared_dir, char *source_file); -void generate_cached_kernel_amp_filename (const u32 attack_kern, char *cache_dir, const char *device_name_chksum, char *cached_file); - -int backend_ctx_init (hashcat_ctx_t *hashcat_ctx); -void backend_ctx_destroy (hashcat_ctx_t *hashcat_ctx); - -int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime); -void backend_ctx_devices_destroy (hashcat_ctx_t *hashcat_ctx); -void backend_ctx_devices_sync_tuning (hashcat_ctx_t *hashcat_ctx); -void backend_ctx_devices_update_power (hashcat_ctx_t *hashcat_ctx); -void backend_ctx_devices_kernel_loops (hashcat_ctx_t *hashcat_ctx); - -int backend_session_begin (hashcat_ctx_t *hashcat_ctx); -void backend_session_destroy (hashcat_ctx_t *hashcat_ctx); -void backend_session_reset (hashcat_ctx_t *hashcat_ctx); -int backend_session_update_combinator (hashcat_ctx_t *hashcat_ctx); -int backend_session_update_mp (hashcat_ctx_t *hashcat_ctx); -int backend_session_update_mp_rl (hashcat_ctx_t *hashcat_ctx, const u32 css_cnt_l, const u32 css_cnt_r); +int run_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, const u32 kern_run, const u64 pws_pos, const u64 num, const u32 event_update, const u32 iteration); +int run_kernel_mp (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, const u32 kern_run, const u64 num); +int run_kernel_tm (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param); +int run_kernel_amp (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, const u64 num); +int run_kernel_decompress (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, const u64 num); +int run_copy (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, const u64 pws_cnt); +int run_cracker (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, const u64 pws_pos, const u64 pws_cnt); void *hook12_thread (void *p); void *hook23_thread (void *p); diff --git a/include/ext_cuda.h b/include/ext_cuda.h index 11ed86048..a2d1736f0 100644 --- a/include/ext_cuda.h +++ b/include/ext_cuda.h @@ -34,502 +34,504 @@ typedef struct CUmod_st *CUmodule; /**< CUDA module */ typedef struct CUstream_st *CUstream; /**< CUDA stream */ typedef struct CUlinkState_st *CUlinkState; -typedef enum cudaError_enum { - /** - * The API call returned with no errors. In the case of query calls, this - * also means that the operation being queried is complete (see - * ::cuEventQuery() and ::cuStreamQuery()). - */ - CUDA_SUCCESS = 0, - - /** - * This indicates that one or more of the parameters passed to the API call - * is not within an acceptable range of values. - */ - CUDA_ERROR_INVALID_VALUE = 1, - - /** - * The API call failed because it was unable to allocate enough memory to - * perform the requested operation. - */ - CUDA_ERROR_OUT_OF_MEMORY = 2, - - /** - * This indicates that the CUDA driver has not been initialized with - * ::cuInit() or that initialization has failed. - */ - CUDA_ERROR_NOT_INITIALIZED = 3, - - /** - * This indicates that the CUDA driver is in the process of shutting down. - */ - CUDA_ERROR_DEINITIALIZED = 4, - - /** - * This indicates profiler is not initialized for this run. This can - * happen when the application is running with external profiling tools - * like visual profiler. - */ - CUDA_ERROR_PROFILER_DISABLED = 5, - - /** - * \deprecated - * This error return is deprecated as of CUDA 5.0. It is no longer an error - * to attempt to enable/disable the profiling via ::cuProfilerStart or - * ::cuProfilerStop without initialization. - */ - CUDA_ERROR_PROFILER_NOT_INITIALIZED = 6, - - /** - * \deprecated - * This error return is deprecated as of CUDA 5.0. It is no longer an error - * to call cuProfilerStart() when profiling is already enabled. - */ - CUDA_ERROR_PROFILER_ALREADY_STARTED = 7, - - /** - * \deprecated - * This error return is deprecated as of CUDA 5.0. It is no longer an error - * to call cuProfilerStop() when profiling is already disabled. - */ - CUDA_ERROR_PROFILER_ALREADY_STOPPED = 8, - - /** - * This indicates that no CUDA-capable devices were detected by the installed - * CUDA driver. - */ - CUDA_ERROR_NO_DEVICE = 100, - - /** - * This indicates that the device ordinal supplied by the user does not - * correspond to a valid CUDA device. - */ - CUDA_ERROR_INVALID_DEVICE = 101, - - - /** - * This indicates that the device kernel image is invalid. This can also - * indicate an invalid CUDA module. - */ - CUDA_ERROR_INVALID_IMAGE = 200, - - /** - * This most frequently indicates that there is no context bound to the - * current thread. This can also be returned if the context passed to an - * API call is not a valid handle (such as a context that has had - * ::cuCtxDestroy() invoked on it). This can also be returned if a user - * mixes different API versions (i.e. 3010 context with 3020 API calls). - * See ::cuCtxGetApiVersion() for more details. - */ - CUDA_ERROR_INVALID_CONTEXT = 201, - - /** - * This indicated that the context being supplied as a parameter to the - * API call was already the active context. - * \deprecated - * This error return is deprecated as of CUDA 3.2. It is no longer an - * error to attempt to push the active context via ::cuCtxPushCurrent(). - */ - CUDA_ERROR_CONTEXT_ALREADY_CURRENT = 202, - - /** - * This indicates that a map or register operation has failed. - */ - CUDA_ERROR_MAP_FAILED = 205, - - /** - * This indicates that an unmap or unregister operation has failed. - */ - CUDA_ERROR_UNMAP_FAILED = 206, - - /** - * This indicates that the specified array is currently mapped and thus - * cannot be destroyed. - */ - CUDA_ERROR_ARRAY_IS_MAPPED = 207, - - /** - * This indicates that the resource is already mapped. - */ - CUDA_ERROR_ALREADY_MAPPED = 208, - - /** - * This indicates that there is no kernel image available that is suitable - * for the device. This can occur when a user specifies code generation - * options for a particular CUDA source file that do not include the - * corresponding device configuration. - */ - CUDA_ERROR_NO_BINARY_FOR_GPU = 209, - - /** - * This indicates that a resource has already been acquired. - */ - CUDA_ERROR_ALREADY_ACQUIRED = 210, - - /** - * This indicates that a resource is not mapped. - */ - CUDA_ERROR_NOT_MAPPED = 211, - - /** - * This indicates that a mapped resource is not available for access as an - * array. - */ - CUDA_ERROR_NOT_MAPPED_AS_ARRAY = 212, - - /** - * This indicates that a mapped resource is not available for access as a - * pointer. - */ - CUDA_ERROR_NOT_MAPPED_AS_POINTER = 213, - - /** - * This indicates that an uncorrectable ECC error was detected during - * execution. - */ - CUDA_ERROR_ECC_UNCORRECTABLE = 214, - - /** - * This indicates that the ::CUlimit passed to the API call is not - * supported by the active device. - */ - CUDA_ERROR_UNSUPPORTED_LIMIT = 215, - - /** - * This indicates that the ::CUcontext passed to the API call can - * only be bound to a single CPU thread at a time but is already - * bound to a CPU thread. - */ - CUDA_ERROR_CONTEXT_ALREADY_IN_USE = 216, - - /** - * This indicates that peer access is not supported across the given - * devices. - */ - CUDA_ERROR_PEER_ACCESS_UNSUPPORTED = 217, - - /** - * This indicates that a PTX JIT compilation failed. - */ - CUDA_ERROR_INVALID_PTX = 218, - - /** - * This indicates an error with OpenGL or DirectX context. - */ - CUDA_ERROR_INVALID_GRAPHICS_CONTEXT = 219, - - /** - * This indicates that an uncorrectable NVLink error was detected during the - * execution. - */ - CUDA_ERROR_NVLINK_UNCORRECTABLE = 220, - - /** - * This indicates that the PTX JIT compiler library was not found. - */ - CUDA_ERROR_JIT_COMPILER_NOT_FOUND = 221, - - /** - * This indicates that the device kernel source is invalid. - */ - CUDA_ERROR_INVALID_SOURCE = 300, - - /** - * This indicates that the file specified was not found. - */ - CUDA_ERROR_FILE_NOT_FOUND = 301, - - /** - * This indicates that a link to a shared object failed to resolve. - */ - CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND = 302, - - /** - * This indicates that initialization of a shared object failed. - */ - CUDA_ERROR_SHARED_OBJECT_INIT_FAILED = 303, - - /** - * This indicates that an OS call failed. - */ - CUDA_ERROR_OPERATING_SYSTEM = 304, - - /** - * This indicates that a resource handle passed to the API call was not - * valid. Resource handles are opaque types like ::CUstream and ::CUevent. - */ - CUDA_ERROR_INVALID_HANDLE = 400, - - /** - * This indicates that a resource required by the API call is not in a - * valid state to perform the requested operation. - */ - CUDA_ERROR_ILLEGAL_STATE = 401, - - /** - * This indicates that a named symbol was not found. Examples of symbols - * are global/constant variable names, texture names, and surface names. - */ - CUDA_ERROR_NOT_FOUND = 500, - - /** - * This indicates that asynchronous operations issued previously have not - * completed yet. This result is not actually an error, but must be indicated - * differently than ::CUDA_SUCCESS (which indicates completion). Calls that - * may return this value include ::cuEventQuery() and ::cuStreamQuery(). - */ - CUDA_ERROR_NOT_READY = 600, - - /** - * While executing a kernel, the device encountered a - * load or store instruction on an invalid memory address. - * This leaves the process in an inconsistent state and any further CUDA work - * will return the same error. To continue using CUDA, the process must be terminated - * and relaunched. - */ - CUDA_ERROR_ILLEGAL_ADDRESS = 700, - - /** - * This indicates that a launch did not occur because it did not have - * appropriate resources. This error usually indicates that the user has - * attempted to pass too many arguments to the device kernel, or the - * kernel launch specifies too many threads for the kernel's register - * count. Passing arguments of the wrong size (i.e. a 64-bit pointer - * when a 32-bit int is expected) is equivalent to passing too many - * arguments and can also result in this error. - */ - CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES = 701, - - /** - * This indicates that the device kernel took too long to execute. This can - * only occur if timeouts are enabled - see the device attribute - * ::CU_DEVICE_ATTRIBUTE_KERNEL_EXEC_TIMEOUT for more information. - * This leaves the process in an inconsistent state and any further CUDA work - * will return the same error. To continue using CUDA, the process must be terminated - * and relaunched. - */ - CUDA_ERROR_LAUNCH_TIMEOUT = 702, - - /** - * This error indicates a kernel launch that uses an incompatible texturing - * mode. - */ - CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING = 703, - - /** - * This error indicates that a call to ::cuCtxEnablePeerAccess() is - * trying to re-enable peer access to a context which has already - * had peer access to it enabled. - */ - CUDA_ERROR_PEER_ACCESS_ALREADY_ENABLED = 704, - - /** - * This error indicates that ::cuCtxDisablePeerAccess() is - * trying to disable peer access which has not been enabled yet - * via ::cuCtxEnablePeerAccess(). - */ - CUDA_ERROR_PEER_ACCESS_NOT_ENABLED = 705, - - /** - * This error indicates that the primary context for the specified device - * has already been initialized. - */ - CUDA_ERROR_PRIMARY_CONTEXT_ACTIVE = 708, - - /** - * This error indicates that the context current to the calling thread - * has been destroyed using ::cuCtxDestroy, or is a primary context which - * has not yet been initialized. - */ - CUDA_ERROR_CONTEXT_IS_DESTROYED = 709, - - /** - * A device-side assert triggered during kernel execution. The context - * cannot be used anymore, and must be destroyed. All existing device - * memory allocations from this context are invalid and must be - * reconstructed if the program is to continue using CUDA. - */ - CUDA_ERROR_ASSERT = 710, - - /** - * This error indicates that the hardware resources required to enable - * peer access have been exhausted for one or more of the devices - * passed to ::cuCtxEnablePeerAccess(). - */ - CUDA_ERROR_TOO_MANY_PEERS = 711, - - /** - * This error indicates that the memory range passed to ::cuMemHostRegister() - * has already been registered. - */ - CUDA_ERROR_HOST_MEMORY_ALREADY_REGISTERED = 712, - - /** - * This error indicates that the pointer passed to ::cuMemHostUnregister() - * does not correspond to any currently registered memory region. - */ - CUDA_ERROR_HOST_MEMORY_NOT_REGISTERED = 713, - - /** - * While executing a kernel, the device encountered a stack error. - * This can be due to stack corruption or exceeding the stack size limit. - * This leaves the process in an inconsistent state and any further CUDA work - * will return the same error. To continue using CUDA, the process must be terminated - * and relaunched. - */ - CUDA_ERROR_HARDWARE_STACK_ERROR = 714, - - /** - * While executing a kernel, the device encountered an illegal instruction. - * This leaves the process in an inconsistent state and any further CUDA work - * will return the same error. To continue using CUDA, the process must be terminated - * and relaunched. - */ - CUDA_ERROR_ILLEGAL_INSTRUCTION = 715, - - /** - * While executing a kernel, the device encountered a load or store instruction - * on a memory address which is not aligned. - * This leaves the process in an inconsistent state and any further CUDA work - * will return the same error. To continue using CUDA, the process must be terminated - * and relaunched. - */ - CUDA_ERROR_MISALIGNED_ADDRESS = 716, - - /** - * While executing a kernel, the device encountered an instruction - * which can only operate on memory locations in certain address spaces - * (global, shared, or local), but was supplied a memory address not - * belonging to an allowed address space. - * This leaves the process in an inconsistent state and any further CUDA work - * will return the same error. To continue using CUDA, the process must be terminated - * and relaunched. - */ - CUDA_ERROR_INVALID_ADDRESS_SPACE = 717, - - /** - * While executing a kernel, the device program counter wrapped its address space. - * This leaves the process in an inconsistent state and any further CUDA work - * will return the same error. To continue using CUDA, the process must be terminated - * and relaunched. - */ - CUDA_ERROR_INVALID_PC = 718, - - /** - * An exception occurred on the device while executing a kernel. Common - * causes include dereferencing an invalid device pointer and accessing - * out of bounds shared memory. Less common cases can be system specific - more - * information about these cases can be found in the system specific user guide. - * This leaves the process in an inconsistent state and any further CUDA work - * will return the same error. To continue using CUDA, the process must be terminated - * and relaunched. - */ - CUDA_ERROR_LAUNCH_FAILED = 719, - - /** - * This error indicates that the number of blocks launched per grid for a kernel that was - * launched via either ::cuLaunchCooperativeKernel or ::cuLaunchCooperativeKernelMultiDevice - * exceeds the maximum number of blocks as allowed by ::cuOccupancyMaxActiveBlocksPerMultiprocessor - * or ::cuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags times the number of multiprocessors - * as specified by the device attribute ::CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT. - */ - CUDA_ERROR_COOPERATIVE_LAUNCH_TOO_LARGE = 720, - - /** - * This error indicates that the attempted operation is not permitted. - */ - CUDA_ERROR_NOT_PERMITTED = 800, - - /** - * This error indicates that the attempted operation is not supported - * on the current system or device. - */ - CUDA_ERROR_NOT_SUPPORTED = 801, - - /** - * This error indicates that the system is not yet ready to start any CUDA - * work. To continue using CUDA, verify the system configuration is in a - * valid state and all required driver daemons are actively running. - * More information about this error can be found in the system specific - * user guide. - */ - CUDA_ERROR_SYSTEM_NOT_READY = 802, - - /** - * This error indicates that there is a mismatch between the versions of - * the display driver and the CUDA driver. Refer to the compatibility documentation - * for supported versions. - */ - CUDA_ERROR_SYSTEM_DRIVER_MISMATCH = 803, - - /** - * This error indicates that the system was upgraded to run with forward compatibility - * but the visible hardware detected by CUDA does not support this configuration. - * Refer to the compatibility documentation for the supported hardware matrix or ensure - * that only supported hardware is visible during initialization via the CUDA_VISIBLE_DEVICES - * environment variable. - */ - CUDA_ERROR_COMPAT_NOT_SUPPORTED_ON_DEVICE = 804, - - /** - * This error indicates that the operation is not permitted when - * the stream is capturing. - */ - CUDA_ERROR_STREAM_CAPTURE_UNSUPPORTED = 900, - - /** - * This error indicates that the current capture sequence on the stream - * has been invalidated due to a previous error. - */ - CUDA_ERROR_STREAM_CAPTURE_INVALIDATED = 901, - - /** - * This error indicates that the operation would have resulted in a merge - * of two independent capture sequences. - */ - CUDA_ERROR_STREAM_CAPTURE_MERGE = 902, - - /** - * This error indicates that the capture was not initiated in this stream. - */ - CUDA_ERROR_STREAM_CAPTURE_UNMATCHED = 903, - - /** - * This error indicates that the capture sequence contains a fork that was - * not joined to the primary stream. - */ - CUDA_ERROR_STREAM_CAPTURE_UNJOINED = 904, - - /** - * This error indicates that a dependency would have been created which - * crosses the capture sequence boundary. Only implicit in-stream ordering - * dependencies are allowed to cross the boundary. - */ - CUDA_ERROR_STREAM_CAPTURE_ISOLATION = 905, - - /** - * This error indicates a disallowed implicit dependency on a current capture - * sequence from cudaStreamLegacy. - */ - CUDA_ERROR_STREAM_CAPTURE_IMPLICIT = 906, - - /** - * This error indicates that the operation is not permitted on an event which - * was last recorded in a capturing stream. - */ - CUDA_ERROR_CAPTURED_EVENT = 907, - - /** - * A stream capture sequence not initiated with the ::CU_STREAM_CAPTURE_MODE_RELAXED - * argument to ::cuStreamBeginCapture was passed to ::cuStreamEndCapture in a - * different thread. - */ - CUDA_ERROR_STREAM_CAPTURE_WRONG_THREAD = 908, - - /** - * This indicates that an unknown internal error has occurred. - */ - CUDA_ERROR_UNKNOWN = 999 +typedef enum cudaError_enum +{ + /** + * The API call returned with no errors. In the case of query calls, this + * also means that the operation being queried is complete (see + * ::cuEventQuery() and ::cuStreamQuery()). + */ + CUDA_SUCCESS = 0, + + /** + * This indicates that one or more of the parameters passed to the API call + * is not within an acceptable range of values. + */ + CUDA_ERROR_INVALID_VALUE = 1, + + /** + * The API call failed because it was unable to allocate enough memory to + * perform the requested operation. + */ + CUDA_ERROR_OUT_OF_MEMORY = 2, + + /** + * This indicates that the CUDA driver has not been initialized with + * ::cuInit() or that initialization has failed. + */ + CUDA_ERROR_NOT_INITIALIZED = 3, + + /** + * This indicates that the CUDA driver is in the process of shutting down. + */ + CUDA_ERROR_DEINITIALIZED = 4, + + /** + * This indicates profiler is not initialized for this run. This can + * happen when the application is running with external profiling tools + * like visual profiler. + */ + CUDA_ERROR_PROFILER_DISABLED = 5, + + /** + * \deprecated + * This error return is deprecated as of CUDA 5.0. It is no longer an error + * to attempt to enable/disable the profiling via ::cuProfilerStart or + * ::cuProfilerStop without initialization. + */ + CUDA_ERROR_PROFILER_NOT_INITIALIZED = 6, + + /** + * \deprecated + * This error return is deprecated as of CUDA 5.0. It is no longer an error + * to call cuProfilerStart() when profiling is already enabled. + */ + CUDA_ERROR_PROFILER_ALREADY_STARTED = 7, + + /** + * \deprecated + * This error return is deprecated as of CUDA 5.0. It is no longer an error + * to call cuProfilerStop() when profiling is already disabled. + */ + CUDA_ERROR_PROFILER_ALREADY_STOPPED = 8, + + /** + * This indicates that no CUDA-capable devices were detected by the installed + * CUDA driver. + */ + CUDA_ERROR_NO_DEVICE = 100, + + /** + * This indicates that the device ordinal supplied by the user does not + * correspond to a valid CUDA device. + */ + CUDA_ERROR_INVALID_DEVICE = 101, + + + /** + * This indicates that the device kernel image is invalid. This can also + * indicate an invalid CUDA module. + */ + CUDA_ERROR_INVALID_IMAGE = 200, + + /** + * This most frequently indicates that there is no context bound to the + * current thread. This can also be returned if the context passed to an + * API call is not a valid handle (such as a context that has had + * ::cuCtxDestroy() invoked on it). This can also be returned if a user + * mixes different API versions (i.e. 3010 context with 3020 API calls). + * See ::cuCtxGetApiVersion() for more details. + */ + CUDA_ERROR_INVALID_CONTEXT = 201, + + /** + * This indicated that the context being supplied as a parameter to the + * API call was already the active context. + * \deprecated + * This error return is deprecated as of CUDA 3.2. It is no longer an + * error to attempt to push the active context via ::cuCtxPushCurrent(). + */ + CUDA_ERROR_CONTEXT_ALREADY_CURRENT = 202, + + /** + * This indicates that a map or register operation has failed. + */ + CUDA_ERROR_MAP_FAILED = 205, + + /** + * This indicates that an unmap or unregister operation has failed. + */ + CUDA_ERROR_UNMAP_FAILED = 206, + + /** + * This indicates that the specified array is currently mapped and thus + * cannot be destroyed. + */ + CUDA_ERROR_ARRAY_IS_MAPPED = 207, + + /** + * This indicates that the resource is already mapped. + */ + CUDA_ERROR_ALREADY_MAPPED = 208, + + /** + * This indicates that there is no kernel image available that is suitable + * for the device. This can occur when a user specifies code generation + * options for a particular CUDA source file that do not include the + * corresponding device configuration. + */ + CUDA_ERROR_NO_BINARY_FOR_GPU = 209, + + /** + * This indicates that a resource has already been acquired. + */ + CUDA_ERROR_ALREADY_ACQUIRED = 210, + + /** + * This indicates that a resource is not mapped. + */ + CUDA_ERROR_NOT_MAPPED = 211, + + /** + * This indicates that a mapped resource is not available for access as an + * array. + */ + CUDA_ERROR_NOT_MAPPED_AS_ARRAY = 212, + + /** + * This indicates that a mapped resource is not available for access as a + * pointer. + */ + CUDA_ERROR_NOT_MAPPED_AS_POINTER = 213, + + /** + * This indicates that an uncorrectable ECC error was detected during + * execution. + */ + CUDA_ERROR_ECC_UNCORRECTABLE = 214, + + /** + * This indicates that the ::CUlimit passed to the API call is not + * supported by the active device. + */ + CUDA_ERROR_UNSUPPORTED_LIMIT = 215, + + /** + * This indicates that the ::CUcontext passed to the API call can + * only be bound to a single CPU thread at a time but is already + * bound to a CPU thread. + */ + CUDA_ERROR_CONTEXT_ALREADY_IN_USE = 216, + + /** + * This indicates that peer access is not supported across the given + * devices. + */ + CUDA_ERROR_PEER_ACCESS_UNSUPPORTED = 217, + + /** + * This indicates that a PTX JIT compilation failed. + */ + CUDA_ERROR_INVALID_PTX = 218, + + /** + * This indicates an error with OpenGL or DirectX context. + */ + CUDA_ERROR_INVALID_GRAPHICS_CONTEXT = 219, + + /** + * This indicates that an uncorrectable NVLink error was detected during the + * execution. + */ + CUDA_ERROR_NVLINK_UNCORRECTABLE = 220, + + /** + * This indicates that the PTX JIT compiler library was not found. + */ + CUDA_ERROR_JIT_COMPILER_NOT_FOUND = 221, + + /** + * This indicates that the device kernel source is invalid. + */ + CUDA_ERROR_INVALID_SOURCE = 300, + + /** + * This indicates that the file specified was not found. + */ + CUDA_ERROR_FILE_NOT_FOUND = 301, + + /** + * This indicates that a link to a shared object failed to resolve. + */ + CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND = 302, + + /** + * This indicates that initialization of a shared object failed. + */ + CUDA_ERROR_SHARED_OBJECT_INIT_FAILED = 303, + + /** + * This indicates that an OS call failed. + */ + CUDA_ERROR_OPERATING_SYSTEM = 304, + + /** + * This indicates that a resource handle passed to the API call was not + * valid. Resource handles are opaque types like ::CUstream and ::CUevent. + */ + CUDA_ERROR_INVALID_HANDLE = 400, + + /** + * This indicates that a resource required by the API call is not in a + * valid state to perform the requested operation. + */ + CUDA_ERROR_ILLEGAL_STATE = 401, + + /** + * This indicates that a named symbol was not found. Examples of symbols + * are global/constant variable names, texture names, and surface names. + */ + CUDA_ERROR_NOT_FOUND = 500, + + /** + * This indicates that asynchronous operations issued previously have not + * completed yet. This result is not actually an error, but must be indicated + * differently than ::CUDA_SUCCESS (which indicates completion). Calls that + * may return this value include ::cuEventQuery() and ::cuStreamQuery(). + */ + CUDA_ERROR_NOT_READY = 600, + + /** + * While executing a kernel, the device encountered a + * load or store instruction on an invalid memory address. + * This leaves the process in an inconsistent state and any further CUDA work + * will return the same error. To continue using CUDA, the process must be terminated + * and relaunched. + */ + CUDA_ERROR_ILLEGAL_ADDRESS = 700, + + /** + * This indicates that a launch did not occur because it did not have + * appropriate resources. This error usually indicates that the user has + * attempted to pass too many arguments to the device kernel, or the + * kernel launch specifies too many threads for the kernel's register + * count. Passing arguments of the wrong size (i.e. a 64-bit pointer + * when a 32-bit int is expected) is equivalent to passing too many + * arguments and can also result in this error. + */ + CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES = 701, + + /** + * This indicates that the device kernel took too long to execute. This can + * only occur if timeouts are enabled - see the device attribute + * ::CU_DEVICE_ATTRIBUTE_KERNEL_EXEC_TIMEOUT for more information. + * This leaves the process in an inconsistent state and any further CUDA work + * will return the same error. To continue using CUDA, the process must be terminated + * and relaunched. + */ + CUDA_ERROR_LAUNCH_TIMEOUT = 702, + + /** + * This error indicates a kernel launch that uses an incompatible texturing + * mode. + */ + CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING = 703, + + /** + * This error indicates that a call to ::cuCtxEnablePeerAccess() is + * trying to re-enable peer access to a context which has already + * had peer access to it enabled. + */ + CUDA_ERROR_PEER_ACCESS_ALREADY_ENABLED = 704, + + /** + * This error indicates that ::cuCtxDisablePeerAccess() is + * trying to disable peer access which has not been enabled yet + * via ::cuCtxEnablePeerAccess(). + */ + CUDA_ERROR_PEER_ACCESS_NOT_ENABLED = 705, + + /** + * This error indicates that the primary context for the specified device + * has already been initialized. + */ + CUDA_ERROR_PRIMARY_CONTEXT_ACTIVE = 708, + + /** + * This error indicates that the context current to the calling thread + * has been destroyed using ::cuCtxDestroy, or is a primary context which + * has not yet been initialized. + */ + CUDA_ERROR_CONTEXT_IS_DESTROYED = 709, + + /** + * A device-side assert triggered during kernel execution. The context + * cannot be used anymore, and must be destroyed. All existing device + * memory allocations from this context are invalid and must be + * reconstructed if the program is to continue using CUDA. + */ + CUDA_ERROR_ASSERT = 710, + + /** + * This error indicates that the hardware resources required to enable + * peer access have been exhausted for one or more of the devices + * passed to ::cuCtxEnablePeerAccess(). + */ + CUDA_ERROR_TOO_MANY_PEERS = 711, + + /** + * This error indicates that the memory range passed to ::cuMemHostRegister() + * has already been registered. + */ + CUDA_ERROR_HOST_MEMORY_ALREADY_REGISTERED = 712, + + /** + * This error indicates that the pointer passed to ::cuMemHostUnregister() + * does not correspond to any currently registered memory region. + */ + CUDA_ERROR_HOST_MEMORY_NOT_REGISTERED = 713, + + /** + * While executing a kernel, the device encountered a stack error. + * This can be due to stack corruption or exceeding the stack size limit. + * This leaves the process in an inconsistent state and any further CUDA work + * will return the same error. To continue using CUDA, the process must be terminated + * and relaunched. + */ + CUDA_ERROR_HARDWARE_STACK_ERROR = 714, + + /** + * While executing a kernel, the device encountered an illegal instruction. + * This leaves the process in an inconsistent state and any further CUDA work + * will return the same error. To continue using CUDA, the process must be terminated + * and relaunched. + */ + CUDA_ERROR_ILLEGAL_INSTRUCTION = 715, + + /** + * While executing a kernel, the device encountered a load or store instruction + * on a memory address which is not aligned. + * This leaves the process in an inconsistent state and any further CUDA work + * will return the same error. To continue using CUDA, the process must be terminated + * and relaunched. + */ + CUDA_ERROR_MISALIGNED_ADDRESS = 716, + + /** + * While executing a kernel, the device encountered an instruction + * which can only operate on memory locations in certain address spaces + * (global, shared, or local), but was supplied a memory address not + * belonging to an allowed address space. + * This leaves the process in an inconsistent state and any further CUDA work + * will return the same error. To continue using CUDA, the process must be terminated + * and relaunched. + */ + CUDA_ERROR_INVALID_ADDRESS_SPACE = 717, + + /** + * While executing a kernel, the device program counter wrapped its address space. + * This leaves the process in an inconsistent state and any further CUDA work + * will return the same error. To continue using CUDA, the process must be terminated + * and relaunched. + */ + CUDA_ERROR_INVALID_PC = 718, + + /** + * An exception occurred on the device while executing a kernel. Common + * causes include dereferencing an invalid device pointer and accessing + * out of bounds shared memory. Less common cases can be system specific - more + * information about these cases can be found in the system specific user guide. + * This leaves the process in an inconsistent state and any further CUDA work + * will return the same error. To continue using CUDA, the process must be terminated + * and relaunched. + */ + CUDA_ERROR_LAUNCH_FAILED = 719, + + /** + * This error indicates that the number of blocks launched per grid for a kernel that was + * launched via either ::cuLaunchCooperativeKernel or ::cuLaunchCooperativeKernelMultiDevice + * exceeds the maximum number of blocks as allowed by ::cuOccupancyMaxActiveBlocksPerMultiprocessor + * or ::cuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags times the number of multiprocessors + * as specified by the device attribute ::CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT. + */ + CUDA_ERROR_COOPERATIVE_LAUNCH_TOO_LARGE = 720, + + /** + * This error indicates that the attempted operation is not permitted. + */ + CUDA_ERROR_NOT_PERMITTED = 800, + + /** + * This error indicates that the attempted operation is not supported + * on the current system or device. + */ + CUDA_ERROR_NOT_SUPPORTED = 801, + + /** + * This error indicates that the system is not yet ready to start any CUDA + * work. To continue using CUDA, verify the system configuration is in a + * valid state and all required driver daemons are actively running. + * More information about this error can be found in the system specific + * user guide. + */ + CUDA_ERROR_SYSTEM_NOT_READY = 802, + + /** + * This error indicates that there is a mismatch between the versions of + * the display driver and the CUDA driver. Refer to the compatibility documentation + * for supported versions. + */ + CUDA_ERROR_SYSTEM_DRIVER_MISMATCH = 803, + + /** + * This error indicates that the system was upgraded to run with forward compatibility + * but the visible hardware detected by CUDA does not support this configuration. + * Refer to the compatibility documentation for the supported hardware matrix or ensure + * that only supported hardware is visible during initialization via the CUDA_VISIBLE_DEVICES + * environment variable. + */ + CUDA_ERROR_COMPAT_NOT_SUPPORTED_ON_DEVICE = 804, + + /** + * This error indicates that the operation is not permitted when + * the stream is capturing. + */ + CUDA_ERROR_STREAM_CAPTURE_UNSUPPORTED = 900, + + /** + * This error indicates that the current capture sequence on the stream + * has been invalidated due to a previous error. + */ + CUDA_ERROR_STREAM_CAPTURE_INVALIDATED = 901, + + /** + * This error indicates that the operation would have resulted in a merge + * of two independent capture sequences. + */ + CUDA_ERROR_STREAM_CAPTURE_MERGE = 902, + + /** + * This error indicates that the capture was not initiated in this stream. + */ + CUDA_ERROR_STREAM_CAPTURE_UNMATCHED = 903, + + /** + * This error indicates that the capture sequence contains a fork that was + * not joined to the primary stream. + */ + CUDA_ERROR_STREAM_CAPTURE_UNJOINED = 904, + + /** + * This error indicates that a dependency would have been created which + * crosses the capture sequence boundary. Only implicit in-stream ordering + * dependencies are allowed to cross the boundary. + */ + CUDA_ERROR_STREAM_CAPTURE_ISOLATION = 905, + + /** + * This error indicates a disallowed implicit dependency on a current capture + * sequence from cudaStreamLegacy. + */ + CUDA_ERROR_STREAM_CAPTURE_IMPLICIT = 906, + + /** + * This error indicates that the operation is not permitted on an event which + * was last recorded in a capturing stream. + */ + CUDA_ERROR_CAPTURED_EVENT = 907, + + /** + * A stream capture sequence not initiated with the ::CU_STREAM_CAPTURE_MODE_RELAXED + * argument to ::cuStreamBeginCapture was passed to ::cuStreamEndCapture in a + * different thread. + */ + CUDA_ERROR_STREAM_CAPTURE_WRONG_THREAD = 908, + + /** + * This indicates that an unknown internal error has occurred. + */ + CUDA_ERROR_UNKNOWN = 999 + } CUresult; /** @@ -537,454 +539,469 @@ typedef enum cudaError_enum { */ typedef enum CUjit_option_enum { - /** - * Max number of registers that a thread may use.\n - * Option type: unsigned int\n - * Applies to: compiler only - */ - CU_JIT_MAX_REGISTERS = 0, - - /** - * IN: Specifies minimum number of threads per block to target compilation - * for\n - * OUT: Returns the number of threads the compiler actually targeted. - * This restricts the resource utilization fo the compiler (e.g. max - * registers) such that a block with the given number of threads should be - * able to launch based on register limitations. Note, this option does not - * currently take into account any other resource limitations, such as - * shared memory utilization.\n - * Cannot be combined with ::CU_JIT_TARGET.\n - * Option type: unsigned int\n - * Applies to: compiler only - */ - CU_JIT_THREADS_PER_BLOCK, - - /** - * Overwrites the option value with the total wall clock time, in - * milliseconds, spent in the compiler and linker\n - * Option type: float\n - * Applies to: compiler and linker - */ - CU_JIT_WALL_TIME, - - /** - * Pointer to a buffer in which to print any log messages - * that are informational in nature (the buffer size is specified via - * option ::CU_JIT_INFO_LOG_BUFFER_SIZE_BYTES)\n - * Option type: char *\n - * Applies to: compiler and linker - */ - CU_JIT_INFO_LOG_BUFFER, - - /** - * IN: Log buffer size in bytes. Log messages will be capped at this size - * (including null terminator)\n - * OUT: Amount of log buffer filled with messages\n - * Option type: unsigned int\n - * Applies to: compiler and linker - */ - CU_JIT_INFO_LOG_BUFFER_SIZE_BYTES, - - /** - * Pointer to a buffer in which to print any log messages that - * reflect errors (the buffer size is specified via option - * ::CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES)\n - * Option type: char *\n - * Applies to: compiler and linker - */ - CU_JIT_ERROR_LOG_BUFFER, - - /** - * IN: Log buffer size in bytes. Log messages will be capped at this size - * (including null terminator)\n - * OUT: Amount of log buffer filled with messages\n - * Option type: unsigned int\n - * Applies to: compiler and linker - */ - CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES, - - /** - * Level of optimizations to apply to generated code (0 - 4), with 4 - * being the default and highest level of optimizations.\n - * Option type: unsigned int\n - * Applies to: compiler only - */ - CU_JIT_OPTIMIZATION_LEVEL, - - /** - * No option value required. Determines the target based on the current - * attached context (default)\n - * Option type: No option value needed\n - * Applies to: compiler and linker - */ - CU_JIT_TARGET_FROM_CUCONTEXT, - - /** - * Target is chosen based on supplied ::CUjit_target. Cannot be - * combined with ::CU_JIT_THREADS_PER_BLOCK.\n - * Option type: unsigned int for enumerated type ::CUjit_target\n - * Applies to: compiler and linker - */ - CU_JIT_TARGET, - - /** - * Specifies choice of fallback strategy if matching cubin is not found. - * Choice is based on supplied ::CUjit_fallback. This option cannot be - * used with cuLink* APIs as the linker requires exact matches.\n - * Option type: unsigned int for enumerated type ::CUjit_fallback\n - * Applies to: compiler only - */ - CU_JIT_FALLBACK_STRATEGY, - - /** - * Specifies whether to create debug information in output (-g) - * (0: false, default)\n - * Option type: int\n - * Applies to: compiler and linker - */ - CU_JIT_GENERATE_DEBUG_INFO, - - /** - * Generate verbose log messages (0: false, default)\n - * Option type: int\n - * Applies to: compiler and linker - */ - CU_JIT_LOG_VERBOSE, - - /** - * Generate line number information (-lineinfo) (0: false, default)\n - * Option type: int\n - * Applies to: compiler only - */ - CU_JIT_GENERATE_LINE_INFO, - - /** - * Specifies whether to enable caching explicitly (-dlcm) \n - * Choice is based on supplied ::CUjit_cacheMode_enum.\n - * Option type: unsigned int for enumerated type ::CUjit_cacheMode_enum\n - * Applies to: compiler only - */ - CU_JIT_CACHE_MODE, - - /** - * The below jit options are used for internal purposes only, in this version of CUDA - */ - CU_JIT_NEW_SM3X_OPT, - CU_JIT_FAST_COMPILE, - - /** - * Array of device symbol names that will be relocated to the corresponing - * host addresses stored in ::CU_JIT_GLOBAL_SYMBOL_ADDRESSES.\n - * Must contain ::CU_JIT_GLOBAL_SYMBOL_COUNT entries.\n - * When loding a device module, driver will relocate all encountered - * unresolved symbols to the host addresses.\n - * It is only allowed to register symbols that correspond to unresolved - * global variables.\n - * It is illegal to register the same device symbol at multiple addresses.\n - * Option type: const char **\n - * Applies to: dynamic linker only - */ - CU_JIT_GLOBAL_SYMBOL_NAMES, - - /** - * Array of host addresses that will be used to relocate corresponding - * device symbols stored in ::CU_JIT_GLOBAL_SYMBOL_NAMES.\n - * Must contain ::CU_JIT_GLOBAL_SYMBOL_COUNT entries.\n - * Option type: void **\n - * Applies to: dynamic linker only - */ - CU_JIT_GLOBAL_SYMBOL_ADDRESSES, - - /** - * Number of entries in ::CU_JIT_GLOBAL_SYMBOL_NAMES and - * ::CU_JIT_GLOBAL_SYMBOL_ADDRESSES arrays.\n - * Option type: unsigned int\n - * Applies to: dynamic linker only - */ - CU_JIT_GLOBAL_SYMBOL_COUNT, - - CU_JIT_NUM_OPTIONS + /** + * Max number of registers that a thread may use.\n + * Option type: unsigned int\n + * Applies to: compiler only + */ + CU_JIT_MAX_REGISTERS = 0, + + /** + * IN: Specifies minimum number of threads per block to target compilation + * for\n + * OUT: Returns the number of threads the compiler actually targeted. + * This restricts the resource utilization fo the compiler (e.g. max + * registers) such that a block with the given number of threads should be + * able to launch based on register limitations. Note, this option does not + * currently take into account any other resource limitations, such as + * shared memory utilization.\n + * Cannot be combined with ::CU_JIT_TARGET.\n + * Option type: unsigned int\n + * Applies to: compiler only + */ + CU_JIT_THREADS_PER_BLOCK, + + /** + * Overwrites the option value with the total wall clock time, in + * milliseconds, spent in the compiler and linker\n + * Option type: float\n + * Applies to: compiler and linker + */ + CU_JIT_WALL_TIME, + + /** + * Pointer to a buffer in which to print any log messages + * that are informational in nature (the buffer size is specified via + * option ::CU_JIT_INFO_LOG_BUFFER_SIZE_BYTES)\n + * Option type: char *\n + * Applies to: compiler and linker + */ + CU_JIT_INFO_LOG_BUFFER, + + /** + * IN: Log buffer size in bytes. Log messages will be capped at this size + * (including null terminator)\n + * OUT: Amount of log buffer filled with messages\n + * Option type: unsigned int\n + * Applies to: compiler and linker + */ + CU_JIT_INFO_LOG_BUFFER_SIZE_BYTES, + + /** + * Pointer to a buffer in which to print any log messages that + * reflect errors (the buffer size is specified via option + * ::CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES)\n + * Option type: char *\n + * Applies to: compiler and linker + */ + CU_JIT_ERROR_LOG_BUFFER, + + /** + * IN: Log buffer size in bytes. Log messages will be capped at this size + * (including null terminator)\n + * OUT: Amount of log buffer filled with messages\n + * Option type: unsigned int\n + * Applies to: compiler and linker + */ + CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES, + + /** + * Level of optimizations to apply to generated code (0 - 4), with 4 + * being the default and highest level of optimizations.\n + * Option type: unsigned int\n + * Applies to: compiler only + */ + CU_JIT_OPTIMIZATION_LEVEL, + + /** + * No option value required. Determines the target based on the current + * attached context (default)\n + * Option type: No option value needed\n + * Applies to: compiler and linker + */ + CU_JIT_TARGET_FROM_CUCONTEXT, + + /** + * Target is chosen based on supplied ::CUjit_target. Cannot be + * combined with ::CU_JIT_THREADS_PER_BLOCK.\n + * Option type: unsigned int for enumerated type ::CUjit_target\n + * Applies to: compiler and linker + */ + CU_JIT_TARGET, + + /** + * Specifies choice of fallback strategy if matching cubin is not found. + * Choice is based on supplied ::CUjit_fallback. This option cannot be + * used with cuLink* APIs as the linker requires exact matches.\n + * Option type: unsigned int for enumerated type ::CUjit_fallback\n + * Applies to: compiler only + */ + CU_JIT_FALLBACK_STRATEGY, + + /** + * Specifies whether to create debug information in output (-g) + * (0: false, default)\n + * Option type: int\n + * Applies to: compiler and linker + */ + CU_JIT_GENERATE_DEBUG_INFO, + + /** + * Generate verbose log messages (0: false, default)\n + * Option type: int\n + * Applies to: compiler and linker + */ + CU_JIT_LOG_VERBOSE, + + /** + * Generate line number information (-lineinfo) (0: false, default)\n + * Option type: int\n + * Applies to: compiler only + */ + CU_JIT_GENERATE_LINE_INFO, + + /** + * Specifies whether to enable caching explicitly (-dlcm) \n + * Choice is based on supplied ::CUjit_cacheMode_enum.\n + * Option type: unsigned int for enumerated type ::CUjit_cacheMode_enum\n + * Applies to: compiler only + */ + CU_JIT_CACHE_MODE, + + /** + * The below jit options are used for internal purposes only, in this version of CUDA + */ + CU_JIT_NEW_SM3X_OPT, + CU_JIT_FAST_COMPILE, + + /** + * Array of device symbol names that will be relocated to the corresponing + * host addresses stored in ::CU_JIT_GLOBAL_SYMBOL_ADDRESSES.\n + * Must contain ::CU_JIT_GLOBAL_SYMBOL_COUNT entries.\n + * When loding a device module, driver will relocate all encountered + * unresolved symbols to the host addresses.\n + * It is only allowed to register symbols that correspond to unresolved + * global variables.\n + * It is illegal to register the same device symbol at multiple addresses.\n + * Option type: const char **\n + * Applies to: dynamic linker only + */ + CU_JIT_GLOBAL_SYMBOL_NAMES, + + /** + * Array of host addresses that will be used to relocate corresponding + * device symbols stored in ::CU_JIT_GLOBAL_SYMBOL_NAMES.\n + * Must contain ::CU_JIT_GLOBAL_SYMBOL_COUNT entries.\n + * Option type: void **\n + * Applies to: dynamic linker only + */ + CU_JIT_GLOBAL_SYMBOL_ADDRESSES, + + /** + * Number of entries in ::CU_JIT_GLOBAL_SYMBOL_NAMES and + * ::CU_JIT_GLOBAL_SYMBOL_ADDRESSES arrays.\n + * Option type: unsigned int\n + * Applies to: dynamic linker only + */ + CU_JIT_GLOBAL_SYMBOL_COUNT, + + CU_JIT_NUM_OPTIONS } CUjit_option; /** * Device properties */ -typedef enum CUdevice_attribute_enum { - CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_BLOCK = 1, /**< Maximum number of threads per block */ - CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_X = 2, /**< Maximum block dimension X */ - CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Y = 3, /**< Maximum block dimension Y */ - CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Z = 4, /**< Maximum block dimension Z */ - CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_X = 5, /**< Maximum grid dimension X */ - CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Y = 6, /**< Maximum grid dimension Y */ - CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Z = 7, /**< Maximum grid dimension Z */ - CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK = 8, /**< Maximum shared memory available per block in bytes */ - CU_DEVICE_ATTRIBUTE_SHARED_MEMORY_PER_BLOCK = 8, /**< Deprecated, use CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK */ - CU_DEVICE_ATTRIBUTE_TOTAL_CONSTANT_MEMORY = 9, /**< Memory available on device for __constant__ variables in a CUDA C kernel in bytes */ - CU_DEVICE_ATTRIBUTE_WARP_SIZE = 10, /**< Warp size in threads */ - CU_DEVICE_ATTRIBUTE_MAX_PITCH = 11, /**< Maximum pitch in bytes allowed by memory copies */ - CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_BLOCK = 12, /**< Maximum number of 32-bit registers available per block */ - CU_DEVICE_ATTRIBUTE_REGISTERS_PER_BLOCK = 12, /**< Deprecated, use CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_BLOCK */ - CU_DEVICE_ATTRIBUTE_CLOCK_RATE = 13, /**< Typical clock frequency in kilohertz */ - CU_DEVICE_ATTRIBUTE_TEXTURE_ALIGNMENT = 14, /**< Alignment requirement for textures */ - CU_DEVICE_ATTRIBUTE_GPU_OVERLAP = 15, /**< Device can possibly copy memory and execute a kernel concurrently. Deprecated. Use instead CU_DEVICE_ATTRIBUTE_ASYNC_ENGINE_COUNT. */ - CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT = 16, /**< Number of multiprocessors on device */ - CU_DEVICE_ATTRIBUTE_KERNEL_EXEC_TIMEOUT = 17, /**< Specifies whether there is a run time limit on kernels */ - CU_DEVICE_ATTRIBUTE_INTEGRATED = 18, /**< Device is integrated with host memory */ - CU_DEVICE_ATTRIBUTE_CAN_MAP_HOST_MEMORY = 19, /**< Device can map host memory into CUDA address space */ - CU_DEVICE_ATTRIBUTE_COMPUTE_MODE = 20, /**< Compute mode (See ::CUcomputemode for details) */ - CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_WIDTH = 21, /**< Maximum 1D texture width */ - CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_WIDTH = 22, /**< Maximum 2D texture width */ - CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_HEIGHT = 23, /**< Maximum 2D texture height */ - CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_WIDTH = 24, /**< Maximum 3D texture width */ - CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_HEIGHT = 25, /**< Maximum 3D texture height */ - CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_DEPTH = 26, /**< Maximum 3D texture depth */ - CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_WIDTH = 27, /**< Maximum 2D layered texture width */ - CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_HEIGHT = 28, /**< Maximum 2D layered texture height */ - CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_LAYERS = 29, /**< Maximum layers in a 2D layered texture */ - CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_ARRAY_WIDTH = 27, /**< Deprecated, use CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_WIDTH */ - CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_ARRAY_HEIGHT = 28, /**< Deprecated, use CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_HEIGHT */ - CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_ARRAY_NUMSLICES = 29, /**< Deprecated, use CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_LAYERS */ - CU_DEVICE_ATTRIBUTE_SURFACE_ALIGNMENT = 30, /**< Alignment requirement for surfaces */ - CU_DEVICE_ATTRIBUTE_CONCURRENT_KERNELS = 31, /**< Device can possibly execute multiple kernels concurrently */ - CU_DEVICE_ATTRIBUTE_ECC_ENABLED = 32, /**< Device has ECC support enabled */ - CU_DEVICE_ATTRIBUTE_PCI_BUS_ID = 33, /**< PCI bus ID of the device */ - CU_DEVICE_ATTRIBUTE_PCI_DEVICE_ID = 34, /**< PCI device ID of the device */ - CU_DEVICE_ATTRIBUTE_TCC_DRIVER = 35, /**< Device is using TCC driver model */ - CU_DEVICE_ATTRIBUTE_MEMORY_CLOCK_RATE = 36, /**< Peak memory clock frequency in kilohertz */ - CU_DEVICE_ATTRIBUTE_GLOBAL_MEMORY_BUS_WIDTH = 37, /**< Global memory bus width in bits */ - CU_DEVICE_ATTRIBUTE_L2_CACHE_SIZE = 38, /**< Size of L2 cache in bytes */ - CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_MULTIPROCESSOR = 39, /**< Maximum resident threads per multiprocessor */ - CU_DEVICE_ATTRIBUTE_ASYNC_ENGINE_COUNT = 40, /**< Number of asynchronous engines */ - CU_DEVICE_ATTRIBUTE_UNIFIED_ADDRESSING = 41, /**< Device shares a unified address space with the host */ - CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LAYERED_WIDTH = 42, /**< Maximum 1D layered texture width */ - CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LAYERED_LAYERS = 43, /**< Maximum layers in a 1D layered texture */ - CU_DEVICE_ATTRIBUTE_CAN_TEX2D_GATHER = 44, /**< Deprecated, do not use. */ - CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_GATHER_WIDTH = 45, /**< Maximum 2D texture width if CUDA_ARRAY3D_TEXTURE_GATHER is set */ - CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_GATHER_HEIGHT = 46, /**< Maximum 2D texture height if CUDA_ARRAY3D_TEXTURE_GATHER is set */ - CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_WIDTH_ALTERNATE = 47, /**< Alternate maximum 3D texture width */ - CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_HEIGHT_ALTERNATE = 48,/**< Alternate maximum 3D texture height */ - CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_DEPTH_ALTERNATE = 49, /**< Alternate maximum 3D texture depth */ - CU_DEVICE_ATTRIBUTE_PCI_DOMAIN_ID = 50, /**< PCI domain ID of the device */ - CU_DEVICE_ATTRIBUTE_TEXTURE_PITCH_ALIGNMENT = 51, /**< Pitch alignment requirement for textures */ - CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_WIDTH = 52, /**< Maximum cubemap texture width/height */ - CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_LAYERED_WIDTH = 53, /**< Maximum cubemap layered texture width/height */ - CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_LAYERED_LAYERS = 54, /**< Maximum layers in a cubemap layered texture */ - CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_WIDTH = 55, /**< Maximum 1D surface width */ - CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_WIDTH = 56, /**< Maximum 2D surface width */ - CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_HEIGHT = 57, /**< Maximum 2D surface height */ - CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_WIDTH = 58, /**< Maximum 3D surface width */ - CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_HEIGHT = 59, /**< Maximum 3D surface height */ - CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_DEPTH = 60, /**< Maximum 3D surface depth */ - CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_LAYERED_WIDTH = 61, /**< Maximum 1D layered surface width */ - CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_LAYERED_LAYERS = 62, /**< Maximum layers in a 1D layered surface */ - CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_WIDTH = 63, /**< Maximum 2D layered surface width */ - CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_HEIGHT = 64, /**< Maximum 2D layered surface height */ - CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_LAYERS = 65, /**< Maximum layers in a 2D layered surface */ - CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_WIDTH = 66, /**< Maximum cubemap surface width */ - CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_LAYERED_WIDTH = 67, /**< Maximum cubemap layered surface width */ - CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_LAYERED_LAYERS = 68, /**< Maximum layers in a cubemap layered surface */ - CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LINEAR_WIDTH = 69, /**< Maximum 1D linear texture width */ - CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_WIDTH = 70, /**< Maximum 2D linear texture width */ - CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_HEIGHT = 71, /**< Maximum 2D linear texture height */ - CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_PITCH = 72, /**< Maximum 2D linear texture pitch in bytes */ - CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_MIPMAPPED_WIDTH = 73, /**< Maximum mipmapped 2D texture width */ - CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_MIPMAPPED_HEIGHT = 74,/**< Maximum mipmapped 2D texture height */ - CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR = 75, /**< Major compute capability version number */ - CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR = 76, /**< Minor compute capability version number */ - CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_MIPMAPPED_WIDTH = 77, /**< Maximum mipmapped 1D texture width */ - CU_DEVICE_ATTRIBUTE_STREAM_PRIORITIES_SUPPORTED = 78, /**< Device supports stream priorities */ - CU_DEVICE_ATTRIBUTE_GLOBAL_L1_CACHE_SUPPORTED = 79, /**< Device supports caching globals in L1 */ - CU_DEVICE_ATTRIBUTE_LOCAL_L1_CACHE_SUPPORTED = 80, /**< Device supports caching locals in L1 */ - CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_MULTIPROCESSOR = 81, /**< Maximum shared memory available per multiprocessor in bytes */ - CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_MULTIPROCESSOR = 82, /**< Maximum number of 32-bit registers available per multiprocessor */ - CU_DEVICE_ATTRIBUTE_MANAGED_MEMORY = 83, /**< Device can allocate managed memory on this system */ - CU_DEVICE_ATTRIBUTE_MULTI_GPU_BOARD = 84, /**< Device is on a multi-GPU board */ - CU_DEVICE_ATTRIBUTE_MULTI_GPU_BOARD_GROUP_ID = 85, /**< Unique id for a group of devices on the same multi-GPU board */ - CU_DEVICE_ATTRIBUTE_HOST_NATIVE_ATOMIC_SUPPORTED = 86, /**< Link between the device and the host supports native atomic operations (this is a placeholder attribute, and is not supported on any current hardware)*/ - CU_DEVICE_ATTRIBUTE_SINGLE_TO_DOUBLE_PRECISION_PERF_RATIO = 87, /**< Ratio of single precision performance (in floating-point operations per second) to double precision performance */ - CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS = 88, /**< Device supports coherently accessing pageable memory without calling cudaHostRegister on it */ - CU_DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS = 89, /**< Device can coherently access managed memory concurrently with the CPU */ - CU_DEVICE_ATTRIBUTE_COMPUTE_PREEMPTION_SUPPORTED = 90, /**< Device supports compute preemption. */ - CU_DEVICE_ATTRIBUTE_CAN_USE_HOST_POINTER_FOR_REGISTERED_MEM = 91, /**< Device can access host registered memory at the same virtual address as the CPU */ - CU_DEVICE_ATTRIBUTE_CAN_USE_STREAM_MEM_OPS = 92, /**< ::cuStreamBatchMemOp and related APIs are supported. */ - CU_DEVICE_ATTRIBUTE_CAN_USE_64_BIT_STREAM_MEM_OPS = 93, /**< 64-bit operations are supported in ::cuStreamBatchMemOp and related APIs. */ - CU_DEVICE_ATTRIBUTE_CAN_USE_STREAM_WAIT_VALUE_NOR = 94, /**< ::CU_STREAM_WAIT_VALUE_NOR is supported. */ - CU_DEVICE_ATTRIBUTE_COOPERATIVE_LAUNCH = 95, /**< Device supports launching cooperative kernels via ::cuLaunchCooperativeKernel */ - CU_DEVICE_ATTRIBUTE_COOPERATIVE_MULTI_DEVICE_LAUNCH = 96, /**< Device can participate in cooperative kernels launched via ::cuLaunchCooperativeKernelMultiDevice */ - CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK_OPTIN = 97, /**< Maximum optin shared memory per block */ - CU_DEVICE_ATTRIBUTE_CAN_FLUSH_REMOTE_WRITES = 98, /**< Both the ::CU_STREAM_WAIT_VALUE_FLUSH flag and the ::CU_STREAM_MEM_OP_FLUSH_REMOTE_WRITES MemOp are supported on the device. See \ref CUDA_MEMOP for additional details. */ - CU_DEVICE_ATTRIBUTE_HOST_REGISTER_SUPPORTED = 99, /**< Device supports host memory registration via ::cudaHostRegister. */ - CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS_USES_HOST_PAGE_TABLES = 100, /**< Device accesses pageable memory via the host's page tables. */ - CU_DEVICE_ATTRIBUTE_DIRECT_MANAGED_MEM_ACCESS_FROM_HOST = 101, /**< The host can directly access managed memory on the device without migration. */ - CU_DEVICE_ATTRIBUTE_MAX +typedef enum CUdevice_attribute_enum +{ + CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_BLOCK = 1, /**< Maximum number of threads per block */ + CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_X = 2, /**< Maximum block dimension X */ + CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Y = 3, /**< Maximum block dimension Y */ + CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Z = 4, /**< Maximum block dimension Z */ + CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_X = 5, /**< Maximum grid dimension X */ + CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Y = 6, /**< Maximum grid dimension Y */ + CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Z = 7, /**< Maximum grid dimension Z */ + CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK = 8, /**< Maximum shared memory available per block in bytes */ + CU_DEVICE_ATTRIBUTE_SHARED_MEMORY_PER_BLOCK = 8, /**< Deprecated, use CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK */ + CU_DEVICE_ATTRIBUTE_TOTAL_CONSTANT_MEMORY = 9, /**< Memory available on device for __constant__ variables in a CUDA C kernel in bytes */ + CU_DEVICE_ATTRIBUTE_WARP_SIZE = 10, /**< Warp size in threads */ + CU_DEVICE_ATTRIBUTE_MAX_PITCH = 11, /**< Maximum pitch in bytes allowed by memory copies */ + CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_BLOCK = 12, /**< Maximum number of 32-bit registers available per block */ + CU_DEVICE_ATTRIBUTE_REGISTERS_PER_BLOCK = 12, /**< Deprecated, use CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_BLOCK */ + CU_DEVICE_ATTRIBUTE_CLOCK_RATE = 13, /**< Typical clock frequency in kilohertz */ + CU_DEVICE_ATTRIBUTE_TEXTURE_ALIGNMENT = 14, /**< Alignment requirement for textures */ + CU_DEVICE_ATTRIBUTE_GPU_OVERLAP = 15, /**< Device can possibly copy memory and execute a kernel concurrently. Deprecated. Use instead CU_DEVICE_ATTRIBUTE_ASYNC_ENGINE_COUNT. */ + CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT = 16, /**< Number of multiprocessors on device */ + CU_DEVICE_ATTRIBUTE_KERNEL_EXEC_TIMEOUT = 17, /**< Specifies whether there is a run time limit on kernels */ + CU_DEVICE_ATTRIBUTE_INTEGRATED = 18, /**< Device is integrated with host memory */ + CU_DEVICE_ATTRIBUTE_CAN_MAP_HOST_MEMORY = 19, /**< Device can map host memory into CUDA address space */ + CU_DEVICE_ATTRIBUTE_COMPUTE_MODE = 20, /**< Compute mode (See ::CUcomputemode for details) */ + CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_WIDTH = 21, /**< Maximum 1D texture width */ + CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_WIDTH = 22, /**< Maximum 2D texture width */ + CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_HEIGHT = 23, /**< Maximum 2D texture height */ + CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_WIDTH = 24, /**< Maximum 3D texture width */ + CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_HEIGHT = 25, /**< Maximum 3D texture height */ + CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_DEPTH = 26, /**< Maximum 3D texture depth */ + CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_WIDTH = 27, /**< Maximum 2D layered texture width */ + CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_HEIGHT = 28, /**< Maximum 2D layered texture height */ + CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_LAYERS = 29, /**< Maximum layers in a 2D layered texture */ + CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_ARRAY_WIDTH = 27, /**< Deprecated, use CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_WIDTH */ + CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_ARRAY_HEIGHT = 28, /**< Deprecated, use CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_HEIGHT */ + CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_ARRAY_NUMSLICES = 29, /**< Deprecated, use CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_LAYERS */ + CU_DEVICE_ATTRIBUTE_SURFACE_ALIGNMENT = 30, /**< Alignment requirement for surfaces */ + CU_DEVICE_ATTRIBUTE_CONCURRENT_KERNELS = 31, /**< Device can possibly execute multiple kernels concurrently */ + CU_DEVICE_ATTRIBUTE_ECC_ENABLED = 32, /**< Device has ECC support enabled */ + CU_DEVICE_ATTRIBUTE_PCI_BUS_ID = 33, /**< PCI bus ID of the device */ + CU_DEVICE_ATTRIBUTE_PCI_DEVICE_ID = 34, /**< PCI device ID of the device */ + CU_DEVICE_ATTRIBUTE_TCC_DRIVER = 35, /**< Device is using TCC driver model */ + CU_DEVICE_ATTRIBUTE_MEMORY_CLOCK_RATE = 36, /**< Peak memory clock frequency in kilohertz */ + CU_DEVICE_ATTRIBUTE_GLOBAL_MEMORY_BUS_WIDTH = 37, /**< Global memory bus width in bits */ + CU_DEVICE_ATTRIBUTE_L2_CACHE_SIZE = 38, /**< Size of L2 cache in bytes */ + CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_MULTIPROCESSOR = 39, /**< Maximum resident threads per multiprocessor */ + CU_DEVICE_ATTRIBUTE_ASYNC_ENGINE_COUNT = 40, /**< Number of asynchronous engines */ + CU_DEVICE_ATTRIBUTE_UNIFIED_ADDRESSING = 41, /**< Device shares a unified address space with the host */ + CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LAYERED_WIDTH = 42, /**< Maximum 1D layered texture width */ + CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LAYERED_LAYERS = 43, /**< Maximum layers in a 1D layered texture */ + CU_DEVICE_ATTRIBUTE_CAN_TEX2D_GATHER = 44, /**< Deprecated, do not use. */ + CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_GATHER_WIDTH = 45, /**< Maximum 2D texture width if CUDA_ARRAY3D_TEXTURE_GATHER is set */ + CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_GATHER_HEIGHT = 46, /**< Maximum 2D texture height if CUDA_ARRAY3D_TEXTURE_GATHER is set */ + CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_WIDTH_ALTERNATE = 47, /**< Alternate maximum 3D texture width */ + CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_HEIGHT_ALTERNATE = 48,/**< Alternate maximum 3D texture height */ + CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_DEPTH_ALTERNATE = 49, /**< Alternate maximum 3D texture depth */ + CU_DEVICE_ATTRIBUTE_PCI_DOMAIN_ID = 50, /**< PCI domain ID of the device */ + CU_DEVICE_ATTRIBUTE_TEXTURE_PITCH_ALIGNMENT = 51, /**< Pitch alignment requirement for textures */ + CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_WIDTH = 52, /**< Maximum cubemap texture width/height */ + CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_LAYERED_WIDTH = 53, /**< Maximum cubemap layered texture width/height */ + CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_LAYERED_LAYERS = 54, /**< Maximum layers in a cubemap layered texture */ + CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_WIDTH = 55, /**< Maximum 1D surface width */ + CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_WIDTH = 56, /**< Maximum 2D surface width */ + CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_HEIGHT = 57, /**< Maximum 2D surface height */ + CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_WIDTH = 58, /**< Maximum 3D surface width */ + CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_HEIGHT = 59, /**< Maximum 3D surface height */ + CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_DEPTH = 60, /**< Maximum 3D surface depth */ + CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_LAYERED_WIDTH = 61, /**< Maximum 1D layered surface width */ + CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_LAYERED_LAYERS = 62, /**< Maximum layers in a 1D layered surface */ + CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_WIDTH = 63, /**< Maximum 2D layered surface width */ + CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_HEIGHT = 64, /**< Maximum 2D layered surface height */ + CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_LAYERS = 65, /**< Maximum layers in a 2D layered surface */ + CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_WIDTH = 66, /**< Maximum cubemap surface width */ + CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_LAYERED_WIDTH = 67, /**< Maximum cubemap layered surface width */ + CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_LAYERED_LAYERS = 68, /**< Maximum layers in a cubemap layered surface */ + CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LINEAR_WIDTH = 69, /**< Maximum 1D linear texture width */ + CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_WIDTH = 70, /**< Maximum 2D linear texture width */ + CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_HEIGHT = 71, /**< Maximum 2D linear texture height */ + CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_PITCH = 72, /**< Maximum 2D linear texture pitch in bytes */ + CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_MIPMAPPED_WIDTH = 73, /**< Maximum mipmapped 2D texture width */ + CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_MIPMAPPED_HEIGHT = 74,/**< Maximum mipmapped 2D texture height */ + CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR = 75, /**< Major compute capability version number */ + CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR = 76, /**< Minor compute capability version number */ + CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_MIPMAPPED_WIDTH = 77, /**< Maximum mipmapped 1D texture width */ + CU_DEVICE_ATTRIBUTE_STREAM_PRIORITIES_SUPPORTED = 78, /**< Device supports stream priorities */ + CU_DEVICE_ATTRIBUTE_GLOBAL_L1_CACHE_SUPPORTED = 79, /**< Device supports caching globals in L1 */ + CU_DEVICE_ATTRIBUTE_LOCAL_L1_CACHE_SUPPORTED = 80, /**< Device supports caching locals in L1 */ + CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_MULTIPROCESSOR = 81, /**< Maximum shared memory available per multiprocessor in bytes */ + CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_MULTIPROCESSOR = 82, /**< Maximum number of 32-bit registers available per multiprocessor */ + CU_DEVICE_ATTRIBUTE_MANAGED_MEMORY = 83, /**< Device can allocate managed memory on this system */ + CU_DEVICE_ATTRIBUTE_MULTI_GPU_BOARD = 84, /**< Device is on a multi-GPU board */ + CU_DEVICE_ATTRIBUTE_MULTI_GPU_BOARD_GROUP_ID = 85, /**< Unique id for a group of devices on the same multi-GPU board */ + CU_DEVICE_ATTRIBUTE_HOST_NATIVE_ATOMIC_SUPPORTED = 86, /**< Link between the device and the host supports native atomic operations (this is a placeholder attribute, and is not supported on any current hardware)*/ + CU_DEVICE_ATTRIBUTE_SINGLE_TO_DOUBLE_PRECISION_PERF_RATIO = 87, /**< Ratio of single precision performance (in floating-point operations per second) to double precision performance */ + CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS = 88, /**< Device supports coherently accessing pageable memory without calling cudaHostRegister on it */ + CU_DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS = 89, /**< Device can coherently access managed memory concurrently with the CPU */ + CU_DEVICE_ATTRIBUTE_COMPUTE_PREEMPTION_SUPPORTED = 90, /**< Device supports compute preemption. */ + CU_DEVICE_ATTRIBUTE_CAN_USE_HOST_POINTER_FOR_REGISTERED_MEM = 91, /**< Device can access host registered memory at the same virtual address as the CPU */ + CU_DEVICE_ATTRIBUTE_CAN_USE_STREAM_MEM_OPS = 92, /**< ::cuStreamBatchMemOp and related APIs are supported. */ + CU_DEVICE_ATTRIBUTE_CAN_USE_64_BIT_STREAM_MEM_OPS = 93, /**< 64-bit operations are supported in ::cuStreamBatchMemOp and related APIs. */ + CU_DEVICE_ATTRIBUTE_CAN_USE_STREAM_WAIT_VALUE_NOR = 94, /**< ::CU_STREAM_WAIT_VALUE_NOR is supported. */ + CU_DEVICE_ATTRIBUTE_COOPERATIVE_LAUNCH = 95, /**< Device supports launching cooperative kernels via ::cuLaunchCooperativeKernel */ + CU_DEVICE_ATTRIBUTE_COOPERATIVE_MULTI_DEVICE_LAUNCH = 96, /**< Device can participate in cooperative kernels launched via ::cuLaunchCooperativeKernelMultiDevice */ + CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK_OPTIN = 97, /**< Maximum optin shared memory per block */ + CU_DEVICE_ATTRIBUTE_CAN_FLUSH_REMOTE_WRITES = 98, /**< Both the ::CU_STREAM_WAIT_VALUE_FLUSH flag and the ::CU_STREAM_MEM_OP_FLUSH_REMOTE_WRITES MemOp are supported on the device. See \ref CUDA_MEMOP for additional details. */ + CU_DEVICE_ATTRIBUTE_HOST_REGISTER_SUPPORTED = 99, /**< Device supports host memory registration via ::cudaHostRegister. */ + CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS_USES_HOST_PAGE_TABLES = 100, /**< Device accesses pageable memory via the host's page tables. */ + CU_DEVICE_ATTRIBUTE_DIRECT_MANAGED_MEM_ACCESS_FROM_HOST = 101, /**< The host can directly access managed memory on the device without migration. */ + CU_DEVICE_ATTRIBUTE_MAX + } CUdevice_attribute; /** * Function cache configurations */ -typedef enum CUfunc_cache_enum { - CU_FUNC_CACHE_PREFER_NONE = 0x00, /**< no preference for shared memory or L1 (default) */ - CU_FUNC_CACHE_PREFER_SHARED = 0x01, /**< prefer larger shared memory and smaller L1 cache */ - CU_FUNC_CACHE_PREFER_L1 = 0x02, /**< prefer larger L1 cache and smaller shared memory */ - CU_FUNC_CACHE_PREFER_EQUAL = 0x03 /**< prefer equal sized L1 cache and shared memory */ +typedef enum CUfunc_cache_enum +{ + CU_FUNC_CACHE_PREFER_NONE = 0x00, /**< no preference for shared memory or L1 (default) */ + CU_FUNC_CACHE_PREFER_SHARED = 0x01, /**< prefer larger shared memory and smaller L1 cache */ + CU_FUNC_CACHE_PREFER_L1 = 0x02, /**< prefer larger L1 cache and smaller shared memory */ + CU_FUNC_CACHE_PREFER_EQUAL = 0x03 /**< prefer equal sized L1 cache and shared memory */ + } CUfunc_cache; /** * Shared memory configurations */ -typedef enum CUsharedconfig_enum { - CU_SHARED_MEM_CONFIG_DEFAULT_BANK_SIZE = 0x00, /**< set default shared memory bank size */ - CU_SHARED_MEM_CONFIG_FOUR_BYTE_BANK_SIZE = 0x01, /**< set shared memory bank width to four bytes */ - CU_SHARED_MEM_CONFIG_EIGHT_BYTE_BANK_SIZE = 0x02 /**< set shared memory bank width to eight bytes */ +typedef enum CUsharedconfig_enum +{ + CU_SHARED_MEM_CONFIG_DEFAULT_BANK_SIZE = 0x00, /**< set default shared memory bank size */ + CU_SHARED_MEM_CONFIG_FOUR_BYTE_BANK_SIZE = 0x01, /**< set shared memory bank width to four bytes */ + CU_SHARED_MEM_CONFIG_EIGHT_BYTE_BANK_SIZE = 0x02 /**< set shared memory bank width to eight bytes */ + } CUsharedconfig; /** * Function properties */ -typedef enum CUfunction_attribute_enum { - /** - * The maximum number of threads per block, beyond which a launch of the - * function would fail. This number depends on both the function and the - * device on which the function is currently loaded. - */ - CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK = 0, - - /** - * The size in bytes of statically-allocated shared memory required by - * this function. This does not include dynamically-allocated shared - * memory requested by the user at runtime. - */ - CU_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES = 1, - - /** - * The size in bytes of user-allocated constant memory required by this - * function. - */ - CU_FUNC_ATTRIBUTE_CONST_SIZE_BYTES = 2, - - /** - * The size in bytes of local memory used by each thread of this function. - */ - CU_FUNC_ATTRIBUTE_LOCAL_SIZE_BYTES = 3, - - /** - * The number of registers used by each thread of this function. - */ - CU_FUNC_ATTRIBUTE_NUM_REGS = 4, - - /** - * The PTX virtual architecture version for which the function was - * compiled. This value is the major PTX version * 10 + the minor PTX - * version, so a PTX version 1.3 function would return the value 13. - * Note that this may return the undefined value of 0 for cubins - * compiled prior to CUDA 3.0. - */ - CU_FUNC_ATTRIBUTE_PTX_VERSION = 5, - - /** - * The binary architecture version for which the function was compiled. - * This value is the major binary version * 10 + the minor binary version, - * so a binary version 1.3 function would return the value 13. Note that - * this will return a value of 10 for legacy cubins that do not have a - * properly-encoded binary architecture version. - */ - CU_FUNC_ATTRIBUTE_BINARY_VERSION = 6, - - /** - * The attribute to indicate whether the function has been compiled with - * user specified option "-Xptxas --dlcm=ca" set . - */ - CU_FUNC_ATTRIBUTE_CACHE_MODE_CA = 7, - - /** - * The maximum size in bytes of dynamically-allocated shared memory that can be used by - * this function. If the user-specified dynamic shared memory size is larger than this - * value, the launch will fail. - * See ::cuFuncSetAttribute - */ - CU_FUNC_ATTRIBUTE_MAX_DYNAMIC_SHARED_SIZE_BYTES = 8, - - /** - * On devices where the L1 cache and shared memory use the same hardware resources, - * this sets the shared memory carveout preference, in percent of the total shared memory. - * Refer to ::CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_MULTIPROCESSOR. - * This is only a hint, and the driver can choose a different ratio if required to execute the function. - * See ::cuFuncSetAttribute - */ - CU_FUNC_ATTRIBUTE_PREFERRED_SHARED_MEMORY_CARVEOUT = 9, - - CU_FUNC_ATTRIBUTE_MAX +typedef enum CUfunction_attribute_enum +{ + /** + * The maximum number of threads per block, beyond which a launch of the + * function would fail. This number depends on both the function and the + * device on which the function is currently loaded. + */ + CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK = 0, + + /** + * The size in bytes of statically-allocated shared memory required by + * this function. This does not include dynamically-allocated shared + * memory requested by the user at runtime. + */ + CU_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES = 1, + + /** + * The size in bytes of user-allocated constant memory required by this + * function. + */ + CU_FUNC_ATTRIBUTE_CONST_SIZE_BYTES = 2, + + /** + * The size in bytes of local memory used by each thread of this function. + */ + CU_FUNC_ATTRIBUTE_LOCAL_SIZE_BYTES = 3, + + /** + * The number of registers used by each thread of this function. + */ + CU_FUNC_ATTRIBUTE_NUM_REGS = 4, + + /** + * The PTX virtual architecture version for which the function was + * compiled. This value is the major PTX version * 10 + the minor PTX + * version, so a PTX version 1.3 function would return the value 13. + * Note that this may return the undefined value of 0 for cubins + * compiled prior to CUDA 3.0. + */ + CU_FUNC_ATTRIBUTE_PTX_VERSION = 5, + + /** + * The binary architecture version for which the function was compiled. + * This value is the major binary version * 10 + the minor binary version, + * so a binary version 1.3 function would return the value 13. Note that + * this will return a value of 10 for legacy cubins that do not have a + * properly-encoded binary architecture version. + */ + CU_FUNC_ATTRIBUTE_BINARY_VERSION = 6, + + /** + * The attribute to indicate whether the function has been compiled with + * user specified option "-Xptxas --dlcm=ca" set . + */ + CU_FUNC_ATTRIBUTE_CACHE_MODE_CA = 7, + + /** + * The maximum size in bytes of dynamically-allocated shared memory that can be used by + * this function. If the user-specified dynamic shared memory size is larger than this + * value, the launch will fail. + * See ::cuFuncSetAttribute + */ + CU_FUNC_ATTRIBUTE_MAX_DYNAMIC_SHARED_SIZE_BYTES = 8, + + /** + * On devices where the L1 cache and shared memory use the same hardware resources, + * this sets the shared memory carveout preference, in percent of the total shared memory. + * Refer to ::CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_MULTIPROCESSOR. + * This is only a hint, and the driver can choose a different ratio if required to execute the function. + * See ::cuFuncSetAttribute + */ + CU_FUNC_ATTRIBUTE_PREFERRED_SHARED_MEMORY_CARVEOUT = 9, + + CU_FUNC_ATTRIBUTE_MAX + } CUfunction_attribute; /** * Context creation flags */ -typedef enum CUctx_flags_enum { - CU_CTX_SCHED_AUTO = 0x00, /**< Automatic scheduling */ - CU_CTX_SCHED_SPIN = 0x01, /**< Set spin as default scheduling */ - CU_CTX_SCHED_YIELD = 0x02, /**< Set yield as default scheduling */ - CU_CTX_SCHED_BLOCKING_SYNC = 0x04, /**< Set blocking synchronization as default scheduling */ - CU_CTX_BLOCKING_SYNC = 0x04, /**< Set blocking synchronization as default scheduling - * \deprecated This flag was deprecated as of CUDA 4.0 - * and was replaced with ::CU_CTX_SCHED_BLOCKING_SYNC. */ - CU_CTX_SCHED_MASK = 0x07, - CU_CTX_MAP_HOST = 0x08, /**< Support mapped pinned allocations */ - CU_CTX_LMEM_RESIZE_TO_MAX = 0x10, /**< Keep local memory allocation after launch */ - CU_CTX_FLAGS_MASK = 0x1f +typedef enum CUctx_flags_enum +{ + CU_CTX_SCHED_AUTO = 0x00, /**< Automatic scheduling */ + CU_CTX_SCHED_SPIN = 0x01, /**< Set spin as default scheduling */ + CU_CTX_SCHED_YIELD = 0x02, /**< Set yield as default scheduling */ + CU_CTX_SCHED_BLOCKING_SYNC = 0x04, /**< Set blocking synchronization as default scheduling */ + CU_CTX_BLOCKING_SYNC = 0x04, /**< Set blocking synchronization as default scheduling + * \deprecated This flag was deprecated as of CUDA 4.0 + * and was replaced with ::CU_CTX_SCHED_BLOCKING_SYNC. */ + CU_CTX_SCHED_MASK = 0x07, + CU_CTX_MAP_HOST = 0x08, /**< Support mapped pinned allocations */ + CU_CTX_LMEM_RESIZE_TO_MAX = 0x10, /**< Keep local memory allocation after launch */ + CU_CTX_FLAGS_MASK = 0x1f + } CUctx_flags; /** * Stream creation flags */ -typedef enum CUstream_flags_enum { - CU_STREAM_DEFAULT = 0x0, /**< Default stream flag */ - CU_STREAM_NON_BLOCKING = 0x1 /**< Stream does not synchronize with stream 0 (the NULL stream) */ +typedef enum CUstream_flags_enum +{ + CU_STREAM_DEFAULT = 0x0, /**< Default stream flag */ + CU_STREAM_NON_BLOCKING = 0x1 /**< Stream does not synchronize with stream 0 (the NULL stream) */ + } CUstream_flags; /** * Event creation flags */ -typedef enum CUevent_flags_enum { - CU_EVENT_DEFAULT = 0x0, /**< Default event flag */ - CU_EVENT_BLOCKING_SYNC = 0x1, /**< Event uses blocking synchronization */ - CU_EVENT_DISABLE_TIMING = 0x2, /**< Event will not record timing data */ - CU_EVENT_INTERPROCESS = 0x4 /**< Event is suitable for interprocess use. CU_EVENT_DISABLE_TIMING must be set */ +typedef enum CUevent_flags_enum +{ + CU_EVENT_DEFAULT = 0x0, /**< Default event flag */ + CU_EVENT_BLOCKING_SYNC = 0x1, /**< Event uses blocking synchronization */ + CU_EVENT_DISABLE_TIMING = 0x2, /**< Event will not record timing data */ + CU_EVENT_INTERPROCESS = 0x4 /**< Event is suitable for interprocess use. CU_EVENT_DISABLE_TIMING must be set */ + } CUevent_flags; typedef enum CUjitInputType_enum { - /** - * Compiled device-class-specific device code\n - * Applicable options: none - */ - CU_JIT_INPUT_CUBIN = 0, - - /** - * PTX source code\n - * Applicable options: PTX compiler options - */ - CU_JIT_INPUT_PTX, - - /** - * Bundle of multiple cubins and/or PTX of some device code\n - * Applicable options: PTX compiler options, ::CU_JIT_FALLBACK_STRATEGY - */ - CU_JIT_INPUT_FATBINARY, - - /** - * Host object with embedded device code\n - * Applicable options: PTX compiler options, ::CU_JIT_FALLBACK_STRATEGY - */ - CU_JIT_INPUT_OBJECT, - - /** - * Archive of host objects with embedded device code\n - * Applicable options: PTX compiler options, ::CU_JIT_FALLBACK_STRATEGY - */ - CU_JIT_INPUT_LIBRARY, - - CU_JIT_NUM_INPUT_TYPES + /** + * Compiled device-class-specific device code\n + * Applicable options: none + */ + CU_JIT_INPUT_CUBIN = 0, + + /** + * PTX source code\n + * Applicable options: PTX compiler options + */ + CU_JIT_INPUT_PTX, + + /** + * Bundle of multiple cubins and/or PTX of some device code\n + * Applicable options: PTX compiler options, ::CU_JIT_FALLBACK_STRATEGY + */ + CU_JIT_INPUT_FATBINARY, + + /** + * Host object with embedded device code\n + * Applicable options: PTX compiler options, ::CU_JIT_FALLBACK_STRATEGY + */ + CU_JIT_INPUT_OBJECT, + + /** + * Archive of host objects with embedded device code\n + * Applicable options: PTX compiler options, ::CU_JIT_FALLBACK_STRATEGY + */ + CU_JIT_INPUT_LIBRARY, + + CU_JIT_NUM_INPUT_TYPES + } CUjitInputType; #ifdef _WIN32 @@ -1119,4 +1136,50 @@ typedef struct hc_cuda_lib typedef hc_cuda_lib_t CUDA_PTR; +int cuda_init (void *hashcat_ctx); +void cuda_close (void *hashcat_ctx); + +int hc_cuCtxCreate (void *hashcat_ctx, CUcontext *pctx, unsigned int flags, CUdevice dev); +int hc_cuCtxDestroy (void *hashcat_ctx, CUcontext ctx); +int hc_cuCtxSetCurrent (void *hashcat_ctx, CUcontext ctx); +int hc_cuCtxSetCacheConfig (void *hashcat_ctx, CUfunc_cache config); +int hc_cuCtxSynchronize (void *hashcat_ctx); +int hc_cuDeviceGetAttribute (void *hashcat_ctx, int *pi, CUdevice_attribute attrib, CUdevice dev); +int hc_cuDeviceGetCount (void *hashcat_ctx, int *count); +int hc_cuDeviceGet (void *hashcat_ctx, CUdevice *device, int ordinal); +int hc_cuDeviceGetName (void *hashcat_ctx, char *name, int len, CUdevice dev); +int hc_cuDeviceTotalMem (void *hashcat_ctx, size_t *bytes, CUdevice dev); +int hc_cuDriverGetVersion (void *hashcat_ctx, int *driverVersion); +int hc_cuEventCreate (void *hashcat_ctx, CUevent *phEvent, unsigned int Flags); +int hc_cuEventDestroy (void *hashcat_ctx, CUevent hEvent); +int hc_cuEventElapsedTime (void *hashcat_ctx, float *pMilliseconds, CUevent hStart, CUevent hEnd); +int hc_cuEventQuery (void *hashcat_ctx, CUevent hEvent); +int hc_cuEventRecord (void *hashcat_ctx, CUevent hEvent, CUstream hStream); +int hc_cuEventSynchronize (void *hashcat_ctx, CUevent hEvent); +int hc_cuFuncGetAttribute (void *hashcat_ctx, int *pi, CUfunction_attribute attrib, CUfunction hfunc); +int hc_cuFuncSetAttribute (void *hashcat_ctx, CUfunction hfunc, CUfunction_attribute attrib, int value); +int hc_cuInit (void *hashcat_ctx, unsigned int Flags); +int hc_cuLaunchKernel (void *hashcat_ctx, CUfunction f, unsigned int gridDimX, unsigned int gridDimY, unsigned int gridDimZ, unsigned int blockDimX, unsigned int blockDimY, unsigned int blockDimZ, unsigned int sharedMemBytes, CUstream hStream, void **kernelParams, void **extra); +int hc_cuMemAlloc (void *hashcat_ctx, CUdeviceptr *dptr, size_t bytesize); +int hc_cuMemcpyDtoDAsync (void *hashcat_ctx, CUdeviceptr dstDevice, CUdeviceptr srcDevice, size_t ByteCount, CUstream hStream); +int hc_cuMemcpyDtoHAsync (void *hashcat_ctx, void *dstHost, CUdeviceptr srcDevice, size_t ByteCount, CUstream hStream); +int hc_cuMemcpyHtoDAsync (void *hashcat_ctx, CUdeviceptr dstDevice, const void *srcHost, size_t ByteCount, CUstream hStream); +int hc_cuMemFree (void *hashcat_ctx, CUdeviceptr dptr); +int hc_cuMemGetInfo (void *hashcat_ctx, size_t *free, size_t *total); +int hc_cuMemsetD32Async (void *hashcat_ctx, CUdeviceptr dstDevice, unsigned int ui, size_t N, CUstream hStream); +int hc_cuMemsetD8Async (void *hashcat_ctx, CUdeviceptr dstDevice, unsigned char uc, size_t N, CUstream hStream); +int hc_cuModuleGetFunction (void *hashcat_ctx, CUfunction *hfunc, CUmodule hmod, const char *name); +int hc_cuModuleGetGlobal (void *hashcat_ctx, CUdeviceptr *dptr, size_t *bytes, CUmodule hmod, const char *name); +int hc_cuModuleLoadDataEx (void *hashcat_ctx, CUmodule *module, const void *image, unsigned int numOptions, CUjit_option *options, void **optionValues); +int hc_cuModuleUnload (void *hashcat_ctx, CUmodule hmod); +int hc_cuStreamCreate (void *hashcat_ctx, CUstream *phStream, unsigned int Flags); +int hc_cuStreamDestroy (void *hashcat_ctx, CUstream hStream); +int hc_cuStreamSynchronize (void *hashcat_ctx, CUstream hStream); +int hc_cuCtxPushCurrent (void *hashcat_ctx, CUcontext ctx); +int hc_cuCtxPopCurrent (void *hashcat_ctx, CUcontext *pctx); +int hc_cuLinkCreate (void *hashcat_ctx, unsigned int numOptions, CUjit_option *options, void **optionValues, CUlinkState *stateOut); +int hc_cuLinkAddData (void *hashcat_ctx, CUlinkState state, CUjitInputType type, void *data, size_t size, const char *name, unsigned int numOptions, CUjit_option *options, void **optionValues); +int hc_cuLinkDestroy (void *hashcat_ctx, CUlinkState state); +int hc_cuLinkComplete (void *hashcat_ctx, CUlinkState state, void **cubinOut, size_t *sizeOut); + #endif // _EXT_CUDA_H diff --git a/include/ext_nvrtc.h b/include/ext_nvrtc.h index 21485c999..ee46fc35c 100644 --- a/include/ext_nvrtc.h +++ b/include/ext_nvrtc.h @@ -84,4 +84,16 @@ typedef hc_nvrtc_lib_t NVRTC_PTR; int nvrtc_make_options_array_from_string (char *string, char **options); +int nvrtc_init (void *hashcat_ctx); +void nvrtc_close (void *hashcat_ctx); + +int hc_nvrtcCreateProgram (void *hashcat_ctx, nvrtcProgram *prog, const char *src, const char *name, int numHeaders, const char * const *headers, const char * const *includeNames); +int hc_nvrtcDestroyProgram (void *hashcat_ctx, nvrtcProgram *prog); +int hc_nvrtcCompileProgram (void *hashcat_ctx, nvrtcProgram prog, int numOptions, const char * const *options); +int hc_nvrtcGetProgramLogSize (void *hashcat_ctx, nvrtcProgram prog, size_t *logSizeRet); +int hc_nvrtcGetProgramLog (void *hashcat_ctx, nvrtcProgram prog, char *log); +int hc_nvrtcGetPTXSize (void *hashcat_ctx, nvrtcProgram prog, size_t *ptxSizeRet); +int hc_nvrtcGetPTX (void *hashcat_ctx, nvrtcProgram prog, char *ptx); +int hc_nvrtcVersion (void *hashcat_ctx, int *major, int *minor); + #endif // _EXT_NVRTC_H diff --git a/src/backend.c b/src/backend.c index cbc81f84a..ab7b44825 100644 --- a/src/backend.c +++ b/src/backend.c @@ -755,1504 +755,6 @@ void generate_cached_kernel_amp_filename (const u32 attack_kern, char *cache_dir snprintf (cached_file, 255, "%s/kernels/amp_a%u.%s.kernel", cache_dir, attack_kern, device_name_chksum_amp_mp); } -// NVRTC - -int nvrtc_init (hashcat_ctx_t *hashcat_ctx) -{ - backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; - - NVRTC_PTR *nvrtc = (NVRTC_PTR *) backend_ctx->nvrtc; - - memset (nvrtc, 0, sizeof (NVRTC_PTR)); - - #if defined (_WIN) - nvrtc->lib = hc_dlopen ("nvrtc.dll"); - - if (nvrtc->lib == NULL) - { - // super annoying: nvidia is using the CUDA version in nvrtc???.dll filename! - // however, the cuda version string comes from nvcuda.dll which is from nvidia driver, but - // the driver version and the installed CUDA toolkit version can be different, so it cannot be used as a reference. - // brute force to the rescue - - char dllname[100]; - - for (int major = 20; major >= 9; major--) // older than 3.x do not ship _v2 functions anyway - // older than 7.x does not support sm 5.x - // older than 8.x does not have documentation archive online, no way to check if nvrtc support whatever we need - // older than 9.x is just a theoretical limit since we define 9.0 as the minimum required version - { - for (int minor = 20; minor >= 0; minor--) - { - snprintf (dllname, sizeof (dllname), "nvrtc64_%d%d.dll", major, minor); - - nvrtc->lib = hc_dlopen (dllname); - - if (nvrtc->lib) break; - - snprintf (dllname, sizeof (dllname), "nvrtc64_%d%d_0.dll", major, minor); - - nvrtc->lib = hc_dlopen (dllname); - - if (nvrtc->lib) break; - } - - if (nvrtc->lib) break; - } - } - #elif defined (__APPLE__) - nvrtc->lib = hc_dlopen ("nvrtc.dylib"); - #elif defined (__CYGWIN__) - nvrtc->lib = hc_dlopen ("nvrtc.dll"); - #else - nvrtc->lib = hc_dlopen ("libnvrtc.so"); - - if (nvrtc->lib == NULL) nvrtc->lib = hc_dlopen ("libnvrtc.so.1"); - #endif - - if (nvrtc->lib == NULL) return -1; - - HC_LOAD_FUNC (nvrtc, nvrtcAddNameExpression, NVRTC_NVRTCADDNAMEEXPRESSION, NVRTC, 1); - HC_LOAD_FUNC (nvrtc, nvrtcCompileProgram, NVRTC_NVRTCCOMPILEPROGRAM, NVRTC, 1); - HC_LOAD_FUNC (nvrtc, nvrtcCreateProgram, NVRTC_NVRTCCREATEPROGRAM, NVRTC, 1); - HC_LOAD_FUNC (nvrtc, nvrtcDestroyProgram, NVRTC_NVRTCDESTROYPROGRAM, NVRTC, 1); - HC_LOAD_FUNC (nvrtc, nvrtcGetLoweredName, NVRTC_NVRTCGETLOWEREDNAME, NVRTC, 1); - HC_LOAD_FUNC (nvrtc, nvrtcGetPTX, NVRTC_NVRTCGETPTX, NVRTC, 1); - HC_LOAD_FUNC (nvrtc, nvrtcGetPTXSize, NVRTC_NVRTCGETPTXSIZE, NVRTC, 1); - HC_LOAD_FUNC (nvrtc, nvrtcGetProgramLog, NVRTC_NVRTCGETPROGRAMLOG, NVRTC, 1); - HC_LOAD_FUNC (nvrtc, nvrtcGetProgramLogSize, NVRTC_NVRTCGETPROGRAMLOGSIZE, NVRTC, 1); - HC_LOAD_FUNC (nvrtc, nvrtcGetErrorString, NVRTC_NVRTCGETERRORSTRING, NVRTC, 1); - HC_LOAD_FUNC (nvrtc, nvrtcVersion, NVRTC_NVRTCVERSION, NVRTC, 1); - - return 0; -} - -void nvrtc_close (hashcat_ctx_t *hashcat_ctx) -{ - backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; - - NVRTC_PTR *nvrtc = (NVRTC_PTR *) backend_ctx->nvrtc; - - if (nvrtc) - { - if (nvrtc->lib) - { - hc_dlclose (nvrtc->lib); - } - - hcfree (backend_ctx->nvrtc); - - backend_ctx->nvrtc = NULL; - } -} - -int hc_nvrtcCreateProgram (hashcat_ctx_t *hashcat_ctx, nvrtcProgram *prog, const char *src, const char *name, int numHeaders, const char * const *headers, const char * const *includeNames) -{ - backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; - - NVRTC_PTR *nvrtc = (NVRTC_PTR *) backend_ctx->nvrtc; - - const nvrtcResult NVRTC_err = nvrtc->nvrtcCreateProgram (prog, src, name, numHeaders, headers, includeNames); - - if (NVRTC_err != NVRTC_SUCCESS) - { - event_log_error (hashcat_ctx, "nvrtcCreateProgram(): %s", nvrtc->nvrtcGetErrorString (NVRTC_err)); - - return -1; - } - - return 0; -} - -int hc_nvrtcDestroyProgram (hashcat_ctx_t *hashcat_ctx, nvrtcProgram *prog) -{ - backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; - - NVRTC_PTR *nvrtc = (NVRTC_PTR *) backend_ctx->nvrtc; - - const nvrtcResult NVRTC_err = nvrtc->nvrtcDestroyProgram (prog); - - if (NVRTC_err != NVRTC_SUCCESS) - { - event_log_error (hashcat_ctx, "nvrtcDestroyProgram(): %s", nvrtc->nvrtcGetErrorString (NVRTC_err)); - - return -1; - } - - return 0; -} - -int hc_nvrtcCompileProgram (hashcat_ctx_t *hashcat_ctx, nvrtcProgram prog, int numOptions, const char * const *options) -{ - backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; - - NVRTC_PTR *nvrtc = (NVRTC_PTR *) backend_ctx->nvrtc; - - const nvrtcResult NVRTC_err = nvrtc->nvrtcCompileProgram (prog, numOptions, options); - - if (NVRTC_err != NVRTC_SUCCESS) - { - event_log_error (hashcat_ctx, "nvrtcCompileProgram(): %s", nvrtc->nvrtcGetErrorString (NVRTC_err)); - - return -1; - } - - return 0; -} - -int hc_nvrtcGetProgramLogSize (hashcat_ctx_t *hashcat_ctx, nvrtcProgram prog, size_t *logSizeRet) -{ - backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; - - NVRTC_PTR *nvrtc = (NVRTC_PTR *) backend_ctx->nvrtc; - - const nvrtcResult NVRTC_err = nvrtc->nvrtcGetProgramLogSize (prog, logSizeRet); - - if (NVRTC_err != NVRTC_SUCCESS) - { - event_log_error (hashcat_ctx, "nvrtcGetProgramLogSize(): %s", nvrtc->nvrtcGetErrorString (NVRTC_err)); - - return -1; - } - - return 0; -} - -int hc_nvrtcGetProgramLog (hashcat_ctx_t *hashcat_ctx, nvrtcProgram prog, char *log) -{ - backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; - - NVRTC_PTR *nvrtc = (NVRTC_PTR *) backend_ctx->nvrtc; - - const nvrtcResult NVRTC_err = nvrtc->nvrtcGetProgramLog (prog, log); - - if (NVRTC_err != NVRTC_SUCCESS) - { - event_log_error (hashcat_ctx, "nvrtcGetProgramLog(): %s", nvrtc->nvrtcGetErrorString (NVRTC_err)); - - return -1; - } - - return 0; -} - -int hc_nvrtcGetPTXSize (hashcat_ctx_t *hashcat_ctx, nvrtcProgram prog, size_t *ptxSizeRet) -{ - backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; - - NVRTC_PTR *nvrtc = (NVRTC_PTR *) backend_ctx->nvrtc; - - const nvrtcResult NVRTC_err = nvrtc->nvrtcGetPTXSize (prog, ptxSizeRet); - - if (NVRTC_err != NVRTC_SUCCESS) - { - event_log_error (hashcat_ctx, "nvrtcGetPTXSize(): %s", nvrtc->nvrtcGetErrorString (NVRTC_err)); - - return -1; - } - - return 0; -} - -int hc_nvrtcGetPTX (hashcat_ctx_t *hashcat_ctx, nvrtcProgram prog, char *ptx) -{ - backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; - - NVRTC_PTR *nvrtc = (NVRTC_PTR *) backend_ctx->nvrtc; - - const nvrtcResult NVRTC_err = nvrtc->nvrtcGetPTX (prog, ptx); - - if (NVRTC_err != NVRTC_SUCCESS) - { - event_log_error (hashcat_ctx, "nvrtcGetPTX(): %s", nvrtc->nvrtcGetErrorString (NVRTC_err)); - - return -1; - } - - return 0; -} - -int hc_nvrtcVersion (hashcat_ctx_t *hashcat_ctx, int *major, int *minor) -{ - backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; - - NVRTC_PTR *nvrtc = (NVRTC_PTR *) backend_ctx->nvrtc; - - const nvrtcResult NVRTC_err = nvrtc->nvrtcVersion (major, minor); - - if (NVRTC_err != NVRTC_SUCCESS) - { - event_log_error (hashcat_ctx, "nvrtcVersion(): %s", nvrtc->nvrtcGetErrorString (NVRTC_err)); - - return -1; - } - - return 0; -} - -// CUDA - -int cuda_init (hashcat_ctx_t *hashcat_ctx) -{ - backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; - - CUDA_PTR *cuda = (CUDA_PTR *) backend_ctx->cuda; - - memset (cuda, 0, sizeof (CUDA_PTR)); - - #if defined (_WIN) - cuda->lib = hc_dlopen ("nvcuda.dll"); - #elif defined (__APPLE__) - cuda->lib = hc_dlopen ("nvcuda.dylib"); - #elif defined (__CYGWIN__) - cuda->lib = hc_dlopen ("nvcuda.dll"); - #else - cuda->lib = hc_dlopen ("libcuda.so"); - - if (cuda->lib == NULL) cuda->lib = hc_dlopen ("libcuda.so.1"); - #endif - - if (cuda->lib == NULL) return -1; - - #define HC_LOAD_FUNC_CUDA(ptr,name,cudaname,type,libname,noerr) \ - do { \ - ptr->name = (type) hc_dlsym ((ptr)->lib, #cudaname); \ - if ((noerr) != -1) { \ - if (!(ptr)->name) { \ - if ((noerr) == 1) { \ - event_log_error (hashcat_ctx, "%s is missing from %s shared library.", #name, #libname); \ - return -1; \ - } \ - if ((noerr) != 1) { \ - event_log_warning (hashcat_ctx, "%s is missing from %s shared library.", #name, #libname); \ - return 0; \ - } \ - } \ - } \ - } while (0) - - // finding the right symbol is a PITA, because of the _v2 suffix - // a good reference is cuda.h itself - // this needs to be verified for each new cuda release - - HC_LOAD_FUNC_CUDA (cuda, cuCtxCreate, cuCtxCreate_v2, CUDA_CUCTXCREATE, CUDA, 1); - HC_LOAD_FUNC_CUDA (cuda, cuCtxDestroy, cuCtxDestroy_v2, CUDA_CUCTXDESTROY, CUDA, 1); - HC_LOAD_FUNC_CUDA (cuda, cuCtxGetCacheConfig, cuCtxGetCacheConfig, CUDA_CUCTXGETCACHECONFIG, CUDA, 1); - HC_LOAD_FUNC_CUDA (cuda, cuCtxGetCurrent, cuCtxGetCurrent, CUDA_CUCTXGETCURRENT, CUDA, 1); - HC_LOAD_FUNC_CUDA (cuda, cuCtxGetSharedMemConfig, cuCtxGetSharedMemConfig, CUDA_CUCTXGETSHAREDMEMCONFIG, CUDA, 1); - HC_LOAD_FUNC_CUDA (cuda, cuCtxPopCurrent, cuCtxPopCurrent_v2, CUDA_CUCTXPOPCURRENT, CUDA, 1); - HC_LOAD_FUNC_CUDA (cuda, cuCtxPushCurrent, cuCtxPushCurrent_v2, CUDA_CUCTXPUSHCURRENT, CUDA, 1); - HC_LOAD_FUNC_CUDA (cuda, cuCtxSetCacheConfig, cuCtxSetCacheConfig, CUDA_CUCTXSETCACHECONFIG, CUDA, 1); - HC_LOAD_FUNC_CUDA (cuda, cuCtxSetCurrent, cuCtxSetCurrent, CUDA_CUCTXSETCURRENT, CUDA, 1); - HC_LOAD_FUNC_CUDA (cuda, cuCtxSetSharedMemConfig, cuCtxSetSharedMemConfig, CUDA_CUCTXSETSHAREDMEMCONFIG, CUDA, 1); - HC_LOAD_FUNC_CUDA (cuda, cuCtxSynchronize, cuCtxSynchronize, CUDA_CUCTXSYNCHRONIZE, CUDA, 1); - HC_LOAD_FUNC_CUDA (cuda, cuDeviceGetAttribute, cuDeviceGetAttribute, CUDA_CUDEVICEGETATTRIBUTE, CUDA, 1); - HC_LOAD_FUNC_CUDA (cuda, cuDeviceGetCount, cuDeviceGetCount, CUDA_CUDEVICEGETCOUNT, CUDA, 1); - HC_LOAD_FUNC_CUDA (cuda, cuDeviceGet, cuDeviceGet, CUDA_CUDEVICEGET, CUDA, 1); - HC_LOAD_FUNC_CUDA (cuda, cuDeviceGetName, cuDeviceGetName, CUDA_CUDEVICEGETNAME, CUDA, 1); - HC_LOAD_FUNC_CUDA (cuda, cuDeviceTotalMem, cuDeviceTotalMem_v2, CUDA_CUDEVICETOTALMEM, CUDA, 1); - HC_LOAD_FUNC_CUDA (cuda, cuDriverGetVersion, cuDriverGetVersion, CUDA_CUDRIVERGETVERSION, CUDA, 1); - HC_LOAD_FUNC_CUDA (cuda, cuEventCreate, cuEventCreate, CUDA_CUEVENTCREATE, CUDA, 1); - HC_LOAD_FUNC_CUDA (cuda, cuEventDestroy, cuEventDestroy_v2, CUDA_CUEVENTDESTROY, CUDA, 1); - HC_LOAD_FUNC_CUDA (cuda, cuEventElapsedTime, cuEventElapsedTime, CUDA_CUEVENTELAPSEDTIME, CUDA, 1); - HC_LOAD_FUNC_CUDA (cuda, cuEventQuery, cuEventQuery, CUDA_CUEVENTQUERY, CUDA, 1); - HC_LOAD_FUNC_CUDA (cuda, cuEventRecord, cuEventRecord, CUDA_CUEVENTRECORD, CUDA, 1); - HC_LOAD_FUNC_CUDA (cuda, cuEventSynchronize, cuEventSynchronize, CUDA_CUEVENTSYNCHRONIZE, CUDA, 1); - HC_LOAD_FUNC_CUDA (cuda, cuFuncGetAttribute, cuFuncGetAttribute, CUDA_CUFUNCGETATTRIBUTE, CUDA, 1); - HC_LOAD_FUNC_CUDA (cuda, cuFuncSetAttribute, cuFuncSetAttribute, CUDA_CUFUNCSETATTRIBUTE, CUDA, 1); - HC_LOAD_FUNC_CUDA (cuda, cuFuncSetCacheConfig, cuFuncSetCacheConfig, CUDA_CUFUNCSETCACHECONFIG, CUDA, 1); - HC_LOAD_FUNC_CUDA (cuda, cuFuncSetSharedMemConfig, cuFuncSetSharedMemConfig, CUDA_CUFUNCSETSHAREDMEMCONFIG, CUDA, 1); - HC_LOAD_FUNC_CUDA (cuda, cuGetErrorName, cuGetErrorName, CUDA_CUGETERRORNAME, CUDA, 1); - HC_LOAD_FUNC_CUDA (cuda, cuGetErrorString, cuGetErrorString, CUDA_CUGETERRORSTRING, CUDA, 1); - HC_LOAD_FUNC_CUDA (cuda, cuInit, cuInit, CUDA_CUINIT, CUDA, 1); - HC_LOAD_FUNC_CUDA (cuda, cuLaunchKernel, cuLaunchKernel, CUDA_CULAUNCHKERNEL, CUDA, 1); - HC_LOAD_FUNC_CUDA (cuda, cuMemAlloc, cuMemAlloc_v2, CUDA_CUMEMALLOC, CUDA, 1); - HC_LOAD_FUNC_CUDA (cuda, cuMemAllocHost, cuMemAllocHost_v2, CUDA_CUMEMALLOCHOST, CUDA, 1); - HC_LOAD_FUNC_CUDA (cuda, cuMemcpyDtoDAsync, cuMemcpyDtoDAsync_v2, CUDA_CUMEMCPYDTODASYNC, CUDA, 1); - HC_LOAD_FUNC_CUDA (cuda, cuMemcpyDtoHAsync, cuMemcpyDtoHAsync_v2, CUDA_CUMEMCPYDTOHASYNC, CUDA, 1); - HC_LOAD_FUNC_CUDA (cuda, cuMemcpyHtoDAsync, cuMemcpyHtoDAsync_v2, CUDA_CUMEMCPYHTODASYNC, CUDA, 1); - HC_LOAD_FUNC_CUDA (cuda, cuMemFree, cuMemFree_v2, CUDA_CUMEMFREE, CUDA, 1); - HC_LOAD_FUNC_CUDA (cuda, cuMemFreeHost, cuMemFreeHost, CUDA_CUMEMFREEHOST, CUDA, 1); - HC_LOAD_FUNC_CUDA (cuda, cuMemGetInfo, cuMemGetInfo_v2, CUDA_CUMEMGETINFO, CUDA, 1); - HC_LOAD_FUNC_CUDA (cuda, cuMemsetD32Async, cuMemsetD32Async, CUDA_CUMEMSETD32ASYNC, CUDA, 1); - HC_LOAD_FUNC_CUDA (cuda, cuMemsetD8Async, cuMemsetD8Async, CUDA_CUMEMSETD8ASYNC, CUDA, 1); - HC_LOAD_FUNC_CUDA (cuda, cuModuleGetFunction, cuModuleGetFunction, CUDA_CUMODULEGETFUNCTION, CUDA, 1); - HC_LOAD_FUNC_CUDA (cuda, cuModuleGetGlobal, cuModuleGetGlobal_v2, CUDA_CUMODULEGETGLOBAL, CUDA, 1); - HC_LOAD_FUNC_CUDA (cuda, cuModuleLoad, cuModuleLoad, CUDA_CUMODULELOAD, CUDA, 1); - HC_LOAD_FUNC_CUDA (cuda, cuModuleLoadData, cuModuleLoadData, CUDA_CUMODULELOADDATA, CUDA, 1); - HC_LOAD_FUNC_CUDA (cuda, cuModuleLoadDataEx, cuModuleLoadDataEx, CUDA_CUMODULELOADDATAEX, CUDA, 1); - HC_LOAD_FUNC_CUDA (cuda, cuModuleUnload, cuModuleUnload, CUDA_CUMODULEUNLOAD, CUDA, 1); - HC_LOAD_FUNC_CUDA (cuda, cuProfilerStart, cuProfilerStart, CUDA_CUPROFILERSTART, CUDA, 1); - HC_LOAD_FUNC_CUDA (cuda, cuProfilerStop, cuProfilerStop, CUDA_CUPROFILERSTOP, CUDA, 1); - HC_LOAD_FUNC_CUDA (cuda, cuStreamCreate, cuStreamCreate, CUDA_CUSTREAMCREATE, CUDA, 1); - HC_LOAD_FUNC_CUDA (cuda, cuStreamDestroy, cuStreamDestroy_v2, CUDA_CUSTREAMDESTROY, CUDA, 1); - HC_LOAD_FUNC_CUDA (cuda, cuStreamSynchronize, cuStreamSynchronize, CUDA_CUSTREAMSYNCHRONIZE, CUDA, 1); - HC_LOAD_FUNC_CUDA (cuda, cuStreamWaitEvent, cuStreamWaitEvent, CUDA_CUSTREAMWAITEVENT, CUDA, 1); - #if defined (WITH_CUBIN) - HC_LOAD_FUNC_CUDA (cuda, cuLinkCreate, cuLinkCreate_v2, CUDA_CULINKCREATE, CUDA, 1); - HC_LOAD_FUNC_CUDA (cuda, cuLinkAddData, cuLinkAddData_v2, CUDA_CULINKADDDATA, CUDA, 1); - HC_LOAD_FUNC_CUDA (cuda, cuLinkDestroy, cuLinkDestroy, CUDA_CULINKDESTROY, CUDA, 1); - HC_LOAD_FUNC_CUDA (cuda, cuLinkComplete, cuLinkComplete, CUDA_CULINKCOMPLETE, CUDA, 1); - #endif - - return 0; -} - -void cuda_close (hashcat_ctx_t *hashcat_ctx) -{ - backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; - - CUDA_PTR *cuda = (CUDA_PTR *) backend_ctx->cuda; - - if (cuda) - { - if (cuda->lib) - { - hc_dlclose (cuda->lib); - } - - hcfree (backend_ctx->cuda); - - backend_ctx->cuda = NULL; - } -} - -int hc_cuInit (hashcat_ctx_t *hashcat_ctx, unsigned int Flags) -{ - backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; - - CUDA_PTR *cuda = (CUDA_PTR *) backend_ctx->cuda; - - const CUresult CU_err = cuda->cuInit (Flags); - - if (CU_err != CUDA_SUCCESS) - { - const char *pStr = NULL; - - if (cuda->cuGetErrorString (CU_err, &pStr) == CUDA_SUCCESS) - { - event_log_error (hashcat_ctx, "cuInit(): %s", pStr); - } - else - { - event_log_error (hashcat_ctx, "cuInit(): %d", CU_err); - } - - return -1; - } - - return 0; -} - -int hc_cuDeviceGetAttribute (hashcat_ctx_t *hashcat_ctx, int *pi, CUdevice_attribute attrib, CUdevice dev) -{ - backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; - - CUDA_PTR *cuda = (CUDA_PTR *) backend_ctx->cuda; - - const CUresult CU_err = cuda->cuDeviceGetAttribute (pi, attrib, dev); - - if (CU_err != CUDA_SUCCESS) - { - const char *pStr = NULL; - - if (cuda->cuGetErrorString (CU_err, &pStr) == CUDA_SUCCESS) - { - event_log_error (hashcat_ctx, "cuDeviceGetAttribute(): %s", pStr); - } - else - { - event_log_error (hashcat_ctx, "cuDeviceGetAttribute(): %d", CU_err); - } - - return -1; - } - - return 0; -} - -int hc_cuDeviceGetCount (hashcat_ctx_t *hashcat_ctx, int *count) -{ - backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; - - CUDA_PTR *cuda = (CUDA_PTR *) backend_ctx->cuda; - - const CUresult CU_err = cuda->cuDeviceGetCount (count); - - if (CU_err != CUDA_SUCCESS) - { - const char *pStr = NULL; - - if (cuda->cuGetErrorString (CU_err, &pStr) == CUDA_SUCCESS) - { - event_log_error (hashcat_ctx, "cuDeviceGetCount(): %s", pStr); - } - else - { - event_log_error (hashcat_ctx, "cuDeviceGetCount(): %d", CU_err); - } - - return -1; - } - - return 0; -} - -int hc_cuDeviceGet (hashcat_ctx_t *hashcat_ctx, CUdevice* device, int ordinal) -{ - backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; - - CUDA_PTR *cuda = (CUDA_PTR *) backend_ctx->cuda; - - const CUresult CU_err = cuda->cuDeviceGet (device, ordinal); - - if (CU_err != CUDA_SUCCESS) - { - const char *pStr = NULL; - - if (cuda->cuGetErrorString (CU_err, &pStr) == CUDA_SUCCESS) - { - event_log_error (hashcat_ctx, "cuDeviceGet(): %s", pStr); - } - else - { - event_log_error (hashcat_ctx, "cuDeviceGet(): %d", CU_err); - } - - return -1; - } - - return 0; -} - -int hc_cuDeviceGetName (hashcat_ctx_t *hashcat_ctx, char *name, int len, CUdevice dev) -{ - backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; - - CUDA_PTR *cuda = (CUDA_PTR *) backend_ctx->cuda; - - const CUresult CU_err = cuda->cuDeviceGetName (name, len, dev); - - if (CU_err != CUDA_SUCCESS) - { - const char *pStr = NULL; - - if (cuda->cuGetErrorString (CU_err, &pStr) == CUDA_SUCCESS) - { - event_log_error (hashcat_ctx, "cuDeviceGetName(): %s", pStr); - } - else - { - event_log_error (hashcat_ctx, "cuDeviceGetName(): %d", CU_err); - } - - return -1; - } - - return 0; -} - -int hc_cuDeviceTotalMem (hashcat_ctx_t *hashcat_ctx, size_t *bytes, CUdevice dev) -{ - backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; - - CUDA_PTR *cuda = (CUDA_PTR *) backend_ctx->cuda; - - const CUresult CU_err = cuda->cuDeviceTotalMem (bytes, dev); - - if (CU_err != CUDA_SUCCESS) - { - const char *pStr = NULL; - - if (cuda->cuGetErrorString (CU_err, &pStr) == CUDA_SUCCESS) - { - event_log_error (hashcat_ctx, "cuDeviceTotalMem(): %s", pStr); - } - else - { - event_log_error (hashcat_ctx, "cuDeviceTotalMem(): %d", CU_err); - } - - return -1; - } - - return 0; -} - -int hc_cuDriverGetVersion (hashcat_ctx_t *hashcat_ctx, int *driverVersion) -{ - backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; - - CUDA_PTR *cuda = (CUDA_PTR *) backend_ctx->cuda; - - const CUresult CU_err = cuda->cuDriverGetVersion (driverVersion); - - if (CU_err != CUDA_SUCCESS) - { - const char *pStr = NULL; - - if (cuda->cuGetErrorString (CU_err, &pStr) == CUDA_SUCCESS) - { - event_log_error (hashcat_ctx, "cuDriverGetVersion(): %s", pStr); - } - else - { - event_log_error (hashcat_ctx, "cuDriverGetVersion(): %d", CU_err); - } - - return -1; - } - - return 0; -} - -int hc_cuCtxCreate (hashcat_ctx_t *hashcat_ctx, CUcontext *pctx, unsigned int flags, CUdevice dev) -{ - backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; - - CUDA_PTR *cuda = (CUDA_PTR *) backend_ctx->cuda; - - const CUresult CU_err = cuda->cuCtxCreate (pctx, flags, dev); - - if (CU_err != CUDA_SUCCESS) - { - const char *pStr = NULL; - - if (cuda->cuGetErrorString (CU_err, &pStr) == CUDA_SUCCESS) - { - event_log_error (hashcat_ctx, "cuCtxCreate(): %s", pStr); - } - else - { - event_log_error (hashcat_ctx, "cuCtxCreate(): %d", CU_err); - } - - return -1; - } - - return 0; -} - -int hc_cuCtxDestroy (hashcat_ctx_t *hashcat_ctx, CUcontext ctx) -{ - backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; - - CUDA_PTR *cuda = (CUDA_PTR *) backend_ctx->cuda; - - const CUresult CU_err = cuda->cuCtxDestroy (ctx); - - if (CU_err != CUDA_SUCCESS) - { - const char *pStr = NULL; - - if (cuda->cuGetErrorString (CU_err, &pStr) == CUDA_SUCCESS) - { - event_log_error (hashcat_ctx, "cuCtxDestroy(): %s", pStr); - } - else - { - event_log_error (hashcat_ctx, "cuCtxDestroy(): %d", CU_err); - } - - return -1; - } - - return 0; -} - -int hc_cuModuleLoadDataEx (hashcat_ctx_t *hashcat_ctx, CUmodule *module, const void *image, unsigned int numOptions, CUjit_option *options, void **optionValues) -{ - backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; - - CUDA_PTR *cuda = (CUDA_PTR *) backend_ctx->cuda; - - const CUresult CU_err = cuda->cuModuleLoadDataEx (module, image, numOptions, options, optionValues); - - if (CU_err != CUDA_SUCCESS) - { - const char *pStr = NULL; - - if (cuda->cuGetErrorString (CU_err, &pStr) == CUDA_SUCCESS) - { - event_log_error (hashcat_ctx, "cuModuleLoadDataEx(): %s", pStr); - } - else - { - event_log_error (hashcat_ctx, "cuModuleLoadDataEx(): %d", CU_err); - } - - return -1; - } - - return 0; -} - -int hc_cuModuleUnload (hashcat_ctx_t *hashcat_ctx, CUmodule hmod) -{ - backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; - - CUDA_PTR *cuda = (CUDA_PTR *) backend_ctx->cuda; - - const CUresult CU_err = cuda->cuModuleUnload (hmod); - - if (CU_err != CUDA_SUCCESS) - { - const char *pStr = NULL; - - if (cuda->cuGetErrorString (CU_err, &pStr) == CUDA_SUCCESS) - { - event_log_error (hashcat_ctx, "cuModuleUnload(): %s", pStr); - } - else - { - event_log_error (hashcat_ctx, "cuModuleUnload(): %d", CU_err); - } - - return -1; - } - - return 0; -} - -int hc_cuCtxSetCurrent (hashcat_ctx_t *hashcat_ctx, CUcontext ctx) -{ - backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; - - CUDA_PTR *cuda = (CUDA_PTR *) backend_ctx->cuda; - - const CUresult CU_err = cuda->cuCtxSetCurrent (ctx); - - if (CU_err != CUDA_SUCCESS) - { - const char *pStr = NULL; - - if (cuda->cuGetErrorString (CU_err, &pStr) == CUDA_SUCCESS) - { - event_log_error (hashcat_ctx, "cuCtxSetCurrent(): %s", pStr); - } - else - { - event_log_error (hashcat_ctx, "cuCtxSetCurrent(): %d", CU_err); - } - - return -1; - } - - return 0; -} - -int hc_cuMemAlloc (hashcat_ctx_t *hashcat_ctx, CUdeviceptr *dptr, size_t bytesize) -{ - backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; - - CUDA_PTR *cuda = (CUDA_PTR *) backend_ctx->cuda; - - const CUresult CU_err = cuda->cuMemAlloc (dptr, bytesize); - - if (CU_err != CUDA_SUCCESS) - { - const char *pStr = NULL; - - if (cuda->cuGetErrorString (CU_err, &pStr) == CUDA_SUCCESS) - { - event_log_error (hashcat_ctx, "cuMemAlloc(): %s", pStr); - } - else - { - event_log_error (hashcat_ctx, "cuMemAlloc(): %d", CU_err); - } - - return -1; - } - - return 0; -} - -int hc_cuMemFree (hashcat_ctx_t *hashcat_ctx, CUdeviceptr dptr) -{ - backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; - - CUDA_PTR *cuda = (CUDA_PTR *) backend_ctx->cuda; - - const CUresult CU_err = cuda->cuMemFree (dptr); - - if (CU_err != CUDA_SUCCESS) - { - const char *pStr = NULL; - - if (cuda->cuGetErrorString (CU_err, &pStr) == CUDA_SUCCESS) - { - event_log_error (hashcat_ctx, "cuMemFree(): %s", pStr); - } - else - { - event_log_error (hashcat_ctx, "cuMemFree(): %d", CU_err); - } - - return -1; - } - - return 0; -} - -int hc_cuMemcpyDtoHAsync (hashcat_ctx_t *hashcat_ctx, void *dstHost, CUdeviceptr srcDevice, size_t ByteCount, CUstream hStream) -{ - backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; - - CUDA_PTR *cuda = (CUDA_PTR *) backend_ctx->cuda; - - const CUresult CU_err = cuda->cuMemcpyDtoHAsync (dstHost, srcDevice, ByteCount, hStream); - - if (CU_err != CUDA_SUCCESS) - { - const char *pStr = NULL; - - if (cuda->cuGetErrorString (CU_err, &pStr) == CUDA_SUCCESS) - { - event_log_error (hashcat_ctx, "cuMemcpyDtoHAsync(): %s", pStr); - } - else - { - event_log_error (hashcat_ctx, "cuMemcpyDtoHAsync(): %d", CU_err); - } - - return -1; - } - - return 0; -} - -int hc_cuMemcpyDtoDAsync (hashcat_ctx_t *hashcat_ctx, CUdeviceptr dstDevice, CUdeviceptr srcDevice, size_t ByteCount, CUstream hStream) -{ - backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; - - CUDA_PTR *cuda = (CUDA_PTR *) backend_ctx->cuda; - - const CUresult CU_err = cuda->cuMemcpyDtoDAsync (dstDevice, srcDevice, ByteCount, hStream); - - if (CU_err != CUDA_SUCCESS) - { - const char *pStr = NULL; - - if (cuda->cuGetErrorString (CU_err, &pStr) == CUDA_SUCCESS) - { - event_log_error (hashcat_ctx, "cuMemcpyDtoDAsync(): %s", pStr); - } - else - { - event_log_error (hashcat_ctx, "cuMemcpyDtoDAsync(): %d", CU_err); - } - - return -1; - } - - return 0; -} - -int hc_cuMemcpyHtoDAsync (hashcat_ctx_t *hashcat_ctx, CUdeviceptr dstDevice, const void *srcHost, size_t ByteCount, CUstream hStream) -{ - backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; - - CUDA_PTR *cuda = (CUDA_PTR *) backend_ctx->cuda; - - const CUresult CU_err = cuda->cuMemcpyHtoDAsync (dstDevice, srcHost, ByteCount, hStream); - - if (CU_err != CUDA_SUCCESS) - { - const char *pStr = NULL; - - if (cuda->cuGetErrorString (CU_err, &pStr) == CUDA_SUCCESS) - { - event_log_error (hashcat_ctx, "cuMemcpyHtoDAsync(): %s", pStr); - } - else - { - event_log_error (hashcat_ctx, "cuMemcpyHtoDAsync(): %d", CU_err); - } - - return -1; - } - - return 0; -} - -int hc_cuMemsetD32Async (hashcat_ctx_t *hashcat_ctx, CUdeviceptr dstDevice, unsigned int ui, size_t N, CUstream hStream) -{ - backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; - - CUDA_PTR *cuda = (CUDA_PTR *) backend_ctx->cuda; - - const CUresult CU_err = cuda->cuMemsetD32Async (dstDevice, ui, N, hStream); - - if (CU_err != CUDA_SUCCESS) - { - const char *pStr = NULL; - - if (cuda->cuGetErrorString (CU_err, &pStr) == CUDA_SUCCESS) - { - event_log_error (hashcat_ctx, "cuMemsetD32Async(): %s", pStr); - } - else - { - event_log_error (hashcat_ctx, "cuMemsetD32Async(): %d", CU_err); - } - - return -1; - } - - return 0; -} - -int hc_cuMemsetD8Async (hashcat_ctx_t *hashcat_ctx, CUdeviceptr dstDevice, unsigned char uc, size_t N, CUstream hStream) -{ - backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; - - CUDA_PTR *cuda = (CUDA_PTR *) backend_ctx->cuda; - - const CUresult CU_err = cuda->cuMemsetD8Async (dstDevice, uc, N, hStream); - - if (CU_err != CUDA_SUCCESS) - { - const char *pStr = NULL; - - if (cuda->cuGetErrorString (CU_err, &pStr) == CUDA_SUCCESS) - { - event_log_error (hashcat_ctx, "cuMemsetD8Async(): %s", pStr); - } - else - { - event_log_error (hashcat_ctx, "cuMemsetD8Async(): %d", CU_err); - } - - return -1; - } - - return 0; -} - -int hc_cuModuleGetFunction (hashcat_ctx_t *hashcat_ctx, CUfunction *hfunc, CUmodule hmod, const char *name) -{ - backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; - - CUDA_PTR *cuda = (CUDA_PTR *) backend_ctx->cuda; - - const CUresult CU_err = cuda->cuModuleGetFunction (hfunc, hmod, name); - - if (CU_err != CUDA_SUCCESS) - { - const char *pStr = NULL; - - if (cuda->cuGetErrorString (CU_err, &pStr) == CUDA_SUCCESS) - { - event_log_error (hashcat_ctx, "cuModuleGetFunction(): %s", pStr); - } - else - { - event_log_error (hashcat_ctx, "cuModuleGetFunction(): %d", CU_err); - } - - return -1; - } - - return 0; -} - -int hc_cuModuleGetGlobal (hashcat_ctx_t *hashcat_ctx, CUdeviceptr *dptr, size_t *bytes, CUmodule hmod, const char *name) -{ - backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; - - CUDA_PTR *cuda = (CUDA_PTR *) backend_ctx->cuda; - - const CUresult CU_err = cuda->cuModuleGetGlobal (dptr, bytes, hmod, name); - - if (CU_err != CUDA_SUCCESS) - { - const char *pStr = NULL; - - if (cuda->cuGetErrorString (CU_err, &pStr) == CUDA_SUCCESS) - { - event_log_error (hashcat_ctx, "cuModuleGetGlobal(): %s", pStr); - } - else - { - event_log_error (hashcat_ctx, "cuModuleGetGlobal(): %d", CU_err); - } - - return -1; - } - - return 0; -} - -int hc_cuMemGetInfo (hashcat_ctx_t *hashcat_ctx, size_t *free, size_t *total) -{ - backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; - - CUDA_PTR *cuda = (CUDA_PTR *) backend_ctx->cuda; - - const CUresult CU_err = cuda->cuMemGetInfo (free, total); - - if (CU_err != CUDA_SUCCESS) - { - const char *pStr = NULL; - - if (cuda->cuGetErrorString (CU_err, &pStr) == CUDA_SUCCESS) - { - event_log_error (hashcat_ctx, "cuMemGetInfo(): %s", pStr); - } - else - { - event_log_error (hashcat_ctx, "cuMemGetInfo(): %d", CU_err); - } - - return -1; - } - - return 0; -} - -int hc_cuFuncGetAttribute (hashcat_ctx_t *hashcat_ctx, int *pi, CUfunction_attribute attrib, CUfunction hfunc) -{ - backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; - - CUDA_PTR *cuda = (CUDA_PTR *) backend_ctx->cuda; - - const CUresult CU_err = cuda->cuFuncGetAttribute (pi, attrib, hfunc); - - if (CU_err != CUDA_SUCCESS) - { - const char *pStr = NULL; - - if (cuda->cuGetErrorString (CU_err, &pStr) == CUDA_SUCCESS) - { - event_log_error (hashcat_ctx, "cuFuncGetAttribute(): %s", pStr); - } - else - { - event_log_error (hashcat_ctx, "cuFuncGetAttribute(): %d", CU_err); - } - - return -1; - } - - return 0; -} - -int hc_cuFuncSetAttribute (hashcat_ctx_t *hashcat_ctx, CUfunction hfunc, CUfunction_attribute attrib, int value) -{ - backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; - - CUDA_PTR *cuda = (CUDA_PTR *) backend_ctx->cuda; - - const CUresult CU_err = cuda->cuFuncSetAttribute (hfunc, attrib, value); - - if (CU_err != CUDA_SUCCESS) - { - const char *pStr = NULL; - - if (cuda->cuGetErrorString (CU_err, &pStr) == CUDA_SUCCESS) - { - event_log_error (hashcat_ctx, "cuFuncSetAttribute(): %s", pStr); - } - else - { - event_log_error (hashcat_ctx, "cuFuncSetAttribute(): %d", CU_err); - } - - return -1; - } - - return 0; -} - -int hc_cuStreamCreate (hashcat_ctx_t *hashcat_ctx, CUstream *phStream, unsigned int Flags) -{ - backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; - - CUDA_PTR *cuda = (CUDA_PTR *) backend_ctx->cuda; - - const CUresult CU_err = cuda->cuStreamCreate (phStream, Flags); - - if (CU_err != CUDA_SUCCESS) - { - const char *pStr = NULL; - - if (cuda->cuGetErrorString (CU_err, &pStr) == CUDA_SUCCESS) - { - event_log_error (hashcat_ctx, "cuStreamCreate(): %s", pStr); - } - else - { - event_log_error (hashcat_ctx, "cuStreamCreate(): %d", CU_err); - } - - return -1; - } - - return 0; -} - -int hc_cuStreamDestroy (hashcat_ctx_t *hashcat_ctx, CUstream hStream) -{ - backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; - - CUDA_PTR *cuda = (CUDA_PTR *) backend_ctx->cuda; - - const CUresult CU_err = cuda->cuStreamDestroy (hStream); - - if (CU_err != CUDA_SUCCESS) - { - const char *pStr = NULL; - - if (cuda->cuGetErrorString (CU_err, &pStr) == CUDA_SUCCESS) - { - event_log_error (hashcat_ctx, "cuStreamDestroy(): %s", pStr); - } - else - { - event_log_error (hashcat_ctx, "cuStreamDestroy(): %d", CU_err); - } - - return -1; - } - - return 0; -} - -int hc_cuStreamSynchronize (hashcat_ctx_t *hashcat_ctx, CUstream hStream) -{ - backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; - - CUDA_PTR *cuda = (CUDA_PTR *) backend_ctx->cuda; - - const CUresult CU_err = cuda->cuStreamSynchronize (hStream); - - if (CU_err != CUDA_SUCCESS) - { - const char *pStr = NULL; - - if (cuda->cuGetErrorString (CU_err, &pStr) == CUDA_SUCCESS) - { - event_log_error (hashcat_ctx, "cuStreamSynchronize(): %s", pStr); - } - else - { - event_log_error (hashcat_ctx, "cuStreamSynchronize(): %d", CU_err); - } - - return -1; - } - - return 0; -} - -int hc_cuLaunchKernel (hashcat_ctx_t *hashcat_ctx, CUfunction f, unsigned int gridDimX, unsigned int gridDimY, unsigned int gridDimZ, unsigned int blockDimX, unsigned int blockDimY, unsigned int blockDimZ, unsigned int sharedMemBytes, CUstream hStream, void **kernelParams, void **extra) -{ - backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; - - CUDA_PTR *cuda = (CUDA_PTR *) backend_ctx->cuda; - - const CUresult CU_err = cuda->cuLaunchKernel (f, gridDimX, gridDimY, gridDimZ, blockDimX, blockDimY, blockDimZ, sharedMemBytes, hStream, kernelParams, extra); - - if (CU_err != CUDA_SUCCESS) - { - const char *pStr = NULL; - - if (cuda->cuGetErrorString (CU_err, &pStr) == CUDA_SUCCESS) - { - event_log_error (hashcat_ctx, "cuLaunchKernel(): %s", pStr); - } - else - { - event_log_error (hashcat_ctx, "cuLaunchKernel(): %d", CU_err); - } - - return -1; - } - - return 0; -} - -int hc_cuCtxSynchronize (hashcat_ctx_t *hashcat_ctx) -{ - backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; - - CUDA_PTR *cuda = (CUDA_PTR *) backend_ctx->cuda; - - const CUresult CU_err = cuda->cuCtxSynchronize (); - - if (CU_err != CUDA_SUCCESS) - { - const char *pStr = NULL; - - if (cuda->cuGetErrorString (CU_err, &pStr) == CUDA_SUCCESS) - { - event_log_error (hashcat_ctx, "cuCtxSynchronize(): %s", pStr); - } - else - { - event_log_error (hashcat_ctx, "cuCtxSynchronize(): %d", CU_err); - } - - return -1; - } - - return 0; -} - -int hc_cuEventCreate (hashcat_ctx_t *hashcat_ctx, CUevent *phEvent, unsigned int Flags) -{ - backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; - - CUDA_PTR *cuda = (CUDA_PTR *) backend_ctx->cuda; - - const CUresult CU_err = cuda->cuEventCreate (phEvent, Flags); - - if (CU_err != CUDA_SUCCESS) - { - const char *pStr = NULL; - - if (cuda->cuGetErrorString (CU_err, &pStr) == CUDA_SUCCESS) - { - event_log_error (hashcat_ctx, "cuEventCreate(): %s", pStr); - } - else - { - event_log_error (hashcat_ctx, "cuEventCreate(): %d", CU_err); - } - - return -1; - } - - return 0; -} - -int hc_cuEventDestroy (hashcat_ctx_t *hashcat_ctx, CUevent hEvent) -{ - backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; - - CUDA_PTR *cuda = (CUDA_PTR *) backend_ctx->cuda; - - const CUresult CU_err = cuda->cuEventDestroy (hEvent); - - if (CU_err != CUDA_SUCCESS) - { - const char *pStr = NULL; - - if (cuda->cuGetErrorString (CU_err, &pStr) == CUDA_SUCCESS) - { - event_log_error (hashcat_ctx, "cuEventDestroy(): %s", pStr); - } - else - { - event_log_error (hashcat_ctx, "cuEventDestroy(): %d", CU_err); - } - - return -1; - } - - return 0; -} - -int hc_cuEventElapsedTime (hashcat_ctx_t *hashcat_ctx, float *pMilliseconds, CUevent hStart, CUevent hEnd) -{ - backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; - - CUDA_PTR *cuda = (CUDA_PTR *) backend_ctx->cuda; - - const CUresult CU_err = cuda->cuEventElapsedTime (pMilliseconds, hStart, hEnd); - - if (CU_err != CUDA_SUCCESS) - { - const char *pStr = NULL; - - if (cuda->cuGetErrorString (CU_err, &pStr) == CUDA_SUCCESS) - { - event_log_error (hashcat_ctx, "cuEventElapsedTime(): %s", pStr); - } - else - { - event_log_error (hashcat_ctx, "cuEventElapsedTime(): %d", CU_err); - } - - return -1; - } - - return 0; -} - -int hc_cuEventQuery (hashcat_ctx_t *hashcat_ctx, CUevent hEvent) -{ - backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; - - CUDA_PTR *cuda = (CUDA_PTR *) backend_ctx->cuda; - - const CUresult CU_err = cuda->cuEventQuery (hEvent); - - if (CU_err != CUDA_SUCCESS) - { - const char *pStr = NULL; - - if (cuda->cuGetErrorString (CU_err, &pStr) == CUDA_SUCCESS) - { - event_log_error (hashcat_ctx, "cuEventQuery(): %s", pStr); - } - else - { - event_log_error (hashcat_ctx, "cuEventQuery(): %d", CU_err); - } - - return -1; - } - - return 0; -} - -int hc_cuEventRecord (hashcat_ctx_t *hashcat_ctx, CUevent hEvent, CUstream hStream) -{ - backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; - - CUDA_PTR *cuda = (CUDA_PTR *) backend_ctx->cuda; - - const CUresult CU_err = cuda->cuEventRecord (hEvent, hStream); - - if (CU_err != CUDA_SUCCESS) - { - const char *pStr = NULL; - - if (cuda->cuGetErrorString (CU_err, &pStr) == CUDA_SUCCESS) - { - event_log_error (hashcat_ctx, "cuEventRecord(): %s", pStr); - } - else - { - event_log_error (hashcat_ctx, "cuEventRecord(): %d", CU_err); - } - - return -1; - } - - return 0; -} - -int hc_cuEventSynchronize (hashcat_ctx_t *hashcat_ctx, CUevent hEvent) -{ - backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; - - CUDA_PTR *cuda = (CUDA_PTR *) backend_ctx->cuda; - - const CUresult CU_err = cuda->cuEventSynchronize (hEvent); - - if (CU_err != CUDA_SUCCESS) - { - const char *pStr = NULL; - - if (cuda->cuGetErrorString (CU_err, &pStr) == CUDA_SUCCESS) - { - event_log_error (hashcat_ctx, "cuEventSynchronize(): %s", pStr); - } - else - { - event_log_error (hashcat_ctx, "cuEventSynchronize(): %d", CU_err); - } - - return -1; - } - - return 0; -} - -int hc_cuCtxSetCacheConfig (hashcat_ctx_t *hashcat_ctx, CUfunc_cache config) -{ - backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; - - CUDA_PTR *cuda = (CUDA_PTR *) backend_ctx->cuda; - - const CUresult CU_err = cuda->cuCtxSetCacheConfig (config); - - if (CU_err != CUDA_SUCCESS) - { - const char *pStr = NULL; - - if (cuda->cuGetErrorString (CU_err, &pStr) == CUDA_SUCCESS) - { - event_log_error (hashcat_ctx, "cuCtxSetCacheConfig(): %s", pStr); - } - else - { - event_log_error (hashcat_ctx, "cuCtxSetCacheConfig(): %d", CU_err); - } - - return -1; - } - - return 0; -} - -int hc_cuCtxPushCurrent (hashcat_ctx_t *hashcat_ctx, CUcontext ctx) -{ - backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; - - CUDA_PTR *cuda = (CUDA_PTR *) backend_ctx->cuda; - - const CUresult CU_err = cuda->cuCtxPushCurrent (ctx); - - if (CU_err != CUDA_SUCCESS) - { - const char *pStr = NULL; - - if (cuda->cuGetErrorString (CU_err, &pStr) == CUDA_SUCCESS) - { - event_log_error (hashcat_ctx, "cuCtxPushCurrent(): %s", pStr); - } - else - { - event_log_error (hashcat_ctx, "cuCtxPushCurrent(): %d", CU_err); - } - - return -1; - } - - return 0; -} - -int hc_cuCtxPopCurrent (hashcat_ctx_t *hashcat_ctx, CUcontext *pctx) -{ - backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; - - CUDA_PTR *cuda = (CUDA_PTR *) backend_ctx->cuda; - - const CUresult CU_err = cuda->cuCtxPopCurrent (pctx); - - if (CU_err != CUDA_SUCCESS) - { - const char *pStr = NULL; - - if (cuda->cuGetErrorString (CU_err, &pStr) == CUDA_SUCCESS) - { - event_log_error (hashcat_ctx, "cuCtxPopCurrent(): %s", pStr); - } - else - { - event_log_error (hashcat_ctx, "cuCtxPopCurrent(): %d", CU_err); - } - - return -1; - } - - return 0; -} - -int hc_cuLinkCreate (hashcat_ctx_t *hashcat_ctx, unsigned int numOptions, CUjit_option *options, void **optionValues, CUlinkState *stateOut) -{ - backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; - - CUDA_PTR *cuda = (CUDA_PTR *) backend_ctx->cuda; - - const CUresult CU_err = cuda->cuLinkCreate (numOptions, options, optionValues, stateOut); - - if (CU_err != CUDA_SUCCESS) - { - const char *pStr = NULL; - - if (cuda->cuGetErrorString (CU_err, &pStr) == CUDA_SUCCESS) - { - event_log_error (hashcat_ctx, "cuLinkCreate(): %s", pStr); - } - else - { - event_log_error (hashcat_ctx, "cuLinkCreate(): %d", CU_err); - } - - return -1; - } - - return 0; -} - -int hc_cuLinkAddData (hashcat_ctx_t *hashcat_ctx, CUlinkState state, CUjitInputType type, void *data, size_t size, const char *name, unsigned int numOptions, CUjit_option *options, void **optionValues) -{ - backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; - - CUDA_PTR *cuda = (CUDA_PTR *) backend_ctx->cuda; - - const CUresult CU_err = cuda->cuLinkAddData (state, type, data, size, name, numOptions, options, optionValues); - - if (CU_err != CUDA_SUCCESS) - { - const char *pStr = NULL; - - if (cuda->cuGetErrorString (CU_err, &pStr) == CUDA_SUCCESS) - { - event_log_error (hashcat_ctx, "cuLinkAddData(): %s", pStr); - } - else - { - event_log_error (hashcat_ctx, "cuLinkAddData(): %d", CU_err); - } - - return -1; - } - - return 0; -} - -int hc_cuLinkDestroy (hashcat_ctx_t *hashcat_ctx, CUlinkState state) -{ - backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; - - CUDA_PTR *cuda = (CUDA_PTR *) backend_ctx->cuda; - - const CUresult CU_err = cuda->cuLinkDestroy (state); - - if (CU_err != CUDA_SUCCESS) - { - const char *pStr = NULL; - - if (cuda->cuGetErrorString (CU_err, &pStr) == CUDA_SUCCESS) - { - event_log_error (hashcat_ctx, "cuLinkDestroy(): %s", pStr); - } - else - { - event_log_error (hashcat_ctx, "cuLinkDestroy(): %d", CU_err); - } - - return -1; - } - - return 0; -} - -int hc_cuLinkComplete (hashcat_ctx_t *hashcat_ctx, CUlinkState state, void **cubinOut, size_t *sizeOut) -{ - backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; - - CUDA_PTR *cuda = (CUDA_PTR *) backend_ctx->cuda; - - const CUresult CU_err = cuda->cuLinkComplete (state, cubinOut, sizeOut); - - if (CU_err != CUDA_SUCCESS) - { - const char *pStr = NULL; - - if (cuda->cuGetErrorString (CU_err, &pStr) == CUDA_SUCCESS) - { - event_log_error (hashcat_ctx, "cuLinkComplete(): %s", pStr); - } - else - { - event_log_error (hashcat_ctx, "cuLinkComplete(): %d", CU_err); - } - - return -1; - } - - return 0; -} - -// Backend - int gidd_to_pw_t (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, const u64 gidd, pw_t *pw) { pw_idx_t pw_idx; diff --git a/src/ext_cuda.c b/src/ext_cuda.c index dc43e1b61..dbafa065e 100644 --- a/src/ext_cuda.c +++ b/src/ext_cuda.c @@ -5,4 +5,1267 @@ #include "common.h" #include "types.h" +#include "memory.h" +#include "event.h" #include "ext_cuda.h" + +#include "dynloader.h" + +int cuda_init (void *hashcat_ctx) +{ + backend_ctx_t *backend_ctx = ((hashcat_ctx_t *) hashcat_ctx)->backend_ctx; + + CUDA_PTR *cuda = (CUDA_PTR *) backend_ctx->cuda; + + memset (cuda, 0, sizeof (CUDA_PTR)); + + #if defined (_WIN) + cuda->lib = hc_dlopen ("nvcuda.dll"); + #elif defined (__APPLE__) + cuda->lib = hc_dlopen ("nvcuda.dylib"); + #elif defined (__CYGWIN__) + cuda->lib = hc_dlopen ("nvcuda.dll"); + #else + cuda->lib = hc_dlopen ("libcuda.so"); + + if (cuda->lib == NULL) cuda->lib = hc_dlopen ("libcuda.so.1"); + #endif + + if (cuda->lib == NULL) return -1; + + #define HC_LOAD_FUNC_CUDA(ptr,name,cudaname,type,libname,noerr) \ + do { \ + ptr->name = (type) hc_dlsym ((ptr)->lib, #cudaname); \ + if ((noerr) != -1) { \ + if (!(ptr)->name) { \ + if ((noerr) == 1) { \ + event_log_error (hashcat_ctx, "%s is missing from %s shared library.", #name, #libname); \ + return -1; \ + } \ + if ((noerr) != 1) { \ + event_log_warning (hashcat_ctx, "%s is missing from %s shared library.", #name, #libname); \ + return 0; \ + } \ + } \ + } \ + } while (0) + + // finding the right symbol is a PITA, because of the _v2 suffix + // a good reference is cuda.h itself + // this needs to be verified for each new cuda release + + HC_LOAD_FUNC_CUDA (cuda, cuCtxCreate, cuCtxCreate_v2, CUDA_CUCTXCREATE, CUDA, 1); + HC_LOAD_FUNC_CUDA (cuda, cuCtxDestroy, cuCtxDestroy_v2, CUDA_CUCTXDESTROY, CUDA, 1); + HC_LOAD_FUNC_CUDA (cuda, cuCtxGetCacheConfig, cuCtxGetCacheConfig, CUDA_CUCTXGETCACHECONFIG, CUDA, 1); + HC_LOAD_FUNC_CUDA (cuda, cuCtxGetCurrent, cuCtxGetCurrent, CUDA_CUCTXGETCURRENT, CUDA, 1); + HC_LOAD_FUNC_CUDA (cuda, cuCtxGetSharedMemConfig, cuCtxGetSharedMemConfig, CUDA_CUCTXGETSHAREDMEMCONFIG, CUDA, 1); + HC_LOAD_FUNC_CUDA (cuda, cuCtxPopCurrent, cuCtxPopCurrent_v2, CUDA_CUCTXPOPCURRENT, CUDA, 1); + HC_LOAD_FUNC_CUDA (cuda, cuCtxPushCurrent, cuCtxPushCurrent_v2, CUDA_CUCTXPUSHCURRENT, CUDA, 1); + HC_LOAD_FUNC_CUDA (cuda, cuCtxSetCacheConfig, cuCtxSetCacheConfig, CUDA_CUCTXSETCACHECONFIG, CUDA, 1); + HC_LOAD_FUNC_CUDA (cuda, cuCtxSetCurrent, cuCtxSetCurrent, CUDA_CUCTXSETCURRENT, CUDA, 1); + HC_LOAD_FUNC_CUDA (cuda, cuCtxSetSharedMemConfig, cuCtxSetSharedMemConfig, CUDA_CUCTXSETSHAREDMEMCONFIG, CUDA, 1); + HC_LOAD_FUNC_CUDA (cuda, cuCtxSynchronize, cuCtxSynchronize, CUDA_CUCTXSYNCHRONIZE, CUDA, 1); + HC_LOAD_FUNC_CUDA (cuda, cuDeviceGetAttribute, cuDeviceGetAttribute, CUDA_CUDEVICEGETATTRIBUTE, CUDA, 1); + HC_LOAD_FUNC_CUDA (cuda, cuDeviceGetCount, cuDeviceGetCount, CUDA_CUDEVICEGETCOUNT, CUDA, 1); + HC_LOAD_FUNC_CUDA (cuda, cuDeviceGet, cuDeviceGet, CUDA_CUDEVICEGET, CUDA, 1); + HC_LOAD_FUNC_CUDA (cuda, cuDeviceGetName, cuDeviceGetName, CUDA_CUDEVICEGETNAME, CUDA, 1); + HC_LOAD_FUNC_CUDA (cuda, cuDeviceTotalMem, cuDeviceTotalMem_v2, CUDA_CUDEVICETOTALMEM, CUDA, 1); + HC_LOAD_FUNC_CUDA (cuda, cuDriverGetVersion, cuDriverGetVersion, CUDA_CUDRIVERGETVERSION, CUDA, 1); + HC_LOAD_FUNC_CUDA (cuda, cuEventCreate, cuEventCreate, CUDA_CUEVENTCREATE, CUDA, 1); + HC_LOAD_FUNC_CUDA (cuda, cuEventDestroy, cuEventDestroy_v2, CUDA_CUEVENTDESTROY, CUDA, 1); + HC_LOAD_FUNC_CUDA (cuda, cuEventElapsedTime, cuEventElapsedTime, CUDA_CUEVENTELAPSEDTIME, CUDA, 1); + HC_LOAD_FUNC_CUDA (cuda, cuEventQuery, cuEventQuery, CUDA_CUEVENTQUERY, CUDA, 1); + HC_LOAD_FUNC_CUDA (cuda, cuEventRecord, cuEventRecord, CUDA_CUEVENTRECORD, CUDA, 1); + HC_LOAD_FUNC_CUDA (cuda, cuEventSynchronize, cuEventSynchronize, CUDA_CUEVENTSYNCHRONIZE, CUDA, 1); + HC_LOAD_FUNC_CUDA (cuda, cuFuncGetAttribute, cuFuncGetAttribute, CUDA_CUFUNCGETATTRIBUTE, CUDA, 1); + HC_LOAD_FUNC_CUDA (cuda, cuFuncSetAttribute, cuFuncSetAttribute, CUDA_CUFUNCSETATTRIBUTE, CUDA, 1); + HC_LOAD_FUNC_CUDA (cuda, cuFuncSetCacheConfig, cuFuncSetCacheConfig, CUDA_CUFUNCSETCACHECONFIG, CUDA, 1); + HC_LOAD_FUNC_CUDA (cuda, cuFuncSetSharedMemConfig, cuFuncSetSharedMemConfig, CUDA_CUFUNCSETSHAREDMEMCONFIG, CUDA, 1); + HC_LOAD_FUNC_CUDA (cuda, cuGetErrorName, cuGetErrorName, CUDA_CUGETERRORNAME, CUDA, 1); + HC_LOAD_FUNC_CUDA (cuda, cuGetErrorString, cuGetErrorString, CUDA_CUGETERRORSTRING, CUDA, 1); + HC_LOAD_FUNC_CUDA (cuda, cuInit, cuInit, CUDA_CUINIT, CUDA, 1); + HC_LOAD_FUNC_CUDA (cuda, cuLaunchKernel, cuLaunchKernel, CUDA_CULAUNCHKERNEL, CUDA, 1); + HC_LOAD_FUNC_CUDA (cuda, cuMemAlloc, cuMemAlloc_v2, CUDA_CUMEMALLOC, CUDA, 1); + HC_LOAD_FUNC_CUDA (cuda, cuMemAllocHost, cuMemAllocHost_v2, CUDA_CUMEMALLOCHOST, CUDA, 1); + HC_LOAD_FUNC_CUDA (cuda, cuMemcpyDtoDAsync, cuMemcpyDtoDAsync_v2, CUDA_CUMEMCPYDTODASYNC, CUDA, 1); + HC_LOAD_FUNC_CUDA (cuda, cuMemcpyDtoHAsync, cuMemcpyDtoHAsync_v2, CUDA_CUMEMCPYDTOHASYNC, CUDA, 1); + HC_LOAD_FUNC_CUDA (cuda, cuMemcpyHtoDAsync, cuMemcpyHtoDAsync_v2, CUDA_CUMEMCPYHTODASYNC, CUDA, 1); + HC_LOAD_FUNC_CUDA (cuda, cuMemFree, cuMemFree_v2, CUDA_CUMEMFREE, CUDA, 1); + HC_LOAD_FUNC_CUDA (cuda, cuMemFreeHost, cuMemFreeHost, CUDA_CUMEMFREEHOST, CUDA, 1); + HC_LOAD_FUNC_CUDA (cuda, cuMemGetInfo, cuMemGetInfo_v2, CUDA_CUMEMGETINFO, CUDA, 1); + HC_LOAD_FUNC_CUDA (cuda, cuMemsetD32Async, cuMemsetD32Async, CUDA_CUMEMSETD32ASYNC, CUDA, 1); + HC_LOAD_FUNC_CUDA (cuda, cuMemsetD8Async, cuMemsetD8Async, CUDA_CUMEMSETD8ASYNC, CUDA, 1); + HC_LOAD_FUNC_CUDA (cuda, cuModuleGetFunction, cuModuleGetFunction, CUDA_CUMODULEGETFUNCTION, CUDA, 1); + HC_LOAD_FUNC_CUDA (cuda, cuModuleGetGlobal, cuModuleGetGlobal_v2, CUDA_CUMODULEGETGLOBAL, CUDA, 1); + HC_LOAD_FUNC_CUDA (cuda, cuModuleLoad, cuModuleLoad, CUDA_CUMODULELOAD, CUDA, 1); + HC_LOAD_FUNC_CUDA (cuda, cuModuleLoadData, cuModuleLoadData, CUDA_CUMODULELOADDATA, CUDA, 1); + HC_LOAD_FUNC_CUDA (cuda, cuModuleLoadDataEx, cuModuleLoadDataEx, CUDA_CUMODULELOADDATAEX, CUDA, 1); + HC_LOAD_FUNC_CUDA (cuda, cuModuleUnload, cuModuleUnload, CUDA_CUMODULEUNLOAD, CUDA, 1); + HC_LOAD_FUNC_CUDA (cuda, cuProfilerStart, cuProfilerStart, CUDA_CUPROFILERSTART, CUDA, 1); + HC_LOAD_FUNC_CUDA (cuda, cuProfilerStop, cuProfilerStop, CUDA_CUPROFILERSTOP, CUDA, 1); + HC_LOAD_FUNC_CUDA (cuda, cuStreamCreate, cuStreamCreate, CUDA_CUSTREAMCREATE, CUDA, 1); + HC_LOAD_FUNC_CUDA (cuda, cuStreamDestroy, cuStreamDestroy_v2, CUDA_CUSTREAMDESTROY, CUDA, 1); + HC_LOAD_FUNC_CUDA (cuda, cuStreamSynchronize, cuStreamSynchronize, CUDA_CUSTREAMSYNCHRONIZE, CUDA, 1); + HC_LOAD_FUNC_CUDA (cuda, cuStreamWaitEvent, cuStreamWaitEvent, CUDA_CUSTREAMWAITEVENT, CUDA, 1); + #if defined (WITH_CUBIN) + HC_LOAD_FUNC_CUDA (cuda, cuLinkCreate, cuLinkCreate_v2, CUDA_CULINKCREATE, CUDA, 1); + HC_LOAD_FUNC_CUDA (cuda, cuLinkAddData, cuLinkAddData_v2, CUDA_CULINKADDDATA, CUDA, 1); + HC_LOAD_FUNC_CUDA (cuda, cuLinkDestroy, cuLinkDestroy, CUDA_CULINKDESTROY, CUDA, 1); + HC_LOAD_FUNC_CUDA (cuda, cuLinkComplete, cuLinkComplete, CUDA_CULINKCOMPLETE, CUDA, 1); + #endif + + return 0; +} + +void cuda_close (void *hashcat_ctx) +{ + backend_ctx_t *backend_ctx = ((hashcat_ctx_t *) hashcat_ctx)->backend_ctx; + + CUDA_PTR *cuda = (CUDA_PTR *) backend_ctx->cuda; + + if (cuda) + { + if (cuda->lib) + { + hc_dlclose (cuda->lib); + } + + hcfree (backend_ctx->cuda); + + backend_ctx->cuda = NULL; + } +} + +int hc_cuInit (void *hashcat_ctx, unsigned int Flags) +{ + backend_ctx_t *backend_ctx = ((hashcat_ctx_t *) hashcat_ctx)->backend_ctx; + + CUDA_PTR *cuda = (CUDA_PTR *) backend_ctx->cuda; + + const CUresult CU_err = cuda->cuInit (Flags); + + if (CU_err != CUDA_SUCCESS) + { + const char *pStr = NULL; + + if (cuda->cuGetErrorString (CU_err, &pStr) == CUDA_SUCCESS) + { + event_log_error (hashcat_ctx, "cuInit(): %s", pStr); + } + else + { + event_log_error (hashcat_ctx, "cuInit(): %d", CU_err); + } + + return -1; + } + + return 0; +} + +int hc_cuDeviceGetAttribute (void *hashcat_ctx, int *pi, CUdevice_attribute attrib, CUdevice dev) +{ + backend_ctx_t *backend_ctx = ((hashcat_ctx_t *) hashcat_ctx)->backend_ctx; + + CUDA_PTR *cuda = (CUDA_PTR *) backend_ctx->cuda; + + const CUresult CU_err = cuda->cuDeviceGetAttribute (pi, attrib, dev); + + if (CU_err != CUDA_SUCCESS) + { + const char *pStr = NULL; + + if (cuda->cuGetErrorString (CU_err, &pStr) == CUDA_SUCCESS) + { + event_log_error (hashcat_ctx, "cuDeviceGetAttribute(): %s", pStr); + } + else + { + event_log_error (hashcat_ctx, "cuDeviceGetAttribute(): %d", CU_err); + } + + return -1; + } + + return 0; +} + +int hc_cuDeviceGetCount (void *hashcat_ctx, int *count) +{ + backend_ctx_t *backend_ctx = ((hashcat_ctx_t *) hashcat_ctx)->backend_ctx; + + CUDA_PTR *cuda = (CUDA_PTR *) backend_ctx->cuda; + + const CUresult CU_err = cuda->cuDeviceGetCount (count); + + if (CU_err != CUDA_SUCCESS) + { + const char *pStr = NULL; + + if (cuda->cuGetErrorString (CU_err, &pStr) == CUDA_SUCCESS) + { + event_log_error (hashcat_ctx, "cuDeviceGetCount(): %s", pStr); + } + else + { + event_log_error (hashcat_ctx, "cuDeviceGetCount(): %d", CU_err); + } + + return -1; + } + + return 0; +} + +int hc_cuDeviceGet (void *hashcat_ctx, CUdevice* device, int ordinal) +{ + backend_ctx_t *backend_ctx = ((hashcat_ctx_t *) hashcat_ctx)->backend_ctx; + + CUDA_PTR *cuda = (CUDA_PTR *) backend_ctx->cuda; + + const CUresult CU_err = cuda->cuDeviceGet (device, ordinal); + + if (CU_err != CUDA_SUCCESS) + { + const char *pStr = NULL; + + if (cuda->cuGetErrorString (CU_err, &pStr) == CUDA_SUCCESS) + { + event_log_error (hashcat_ctx, "cuDeviceGet(): %s", pStr); + } + else + { + event_log_error (hashcat_ctx, "cuDeviceGet(): %d", CU_err); + } + + return -1; + } + + return 0; +} + +int hc_cuDeviceGetName (void *hashcat_ctx, char *name, int len, CUdevice dev) +{ + backend_ctx_t *backend_ctx = ((hashcat_ctx_t *) hashcat_ctx)->backend_ctx; + + CUDA_PTR *cuda = (CUDA_PTR *) backend_ctx->cuda; + + const CUresult CU_err = cuda->cuDeviceGetName (name, len, dev); + + if (CU_err != CUDA_SUCCESS) + { + const char *pStr = NULL; + + if (cuda->cuGetErrorString (CU_err, &pStr) == CUDA_SUCCESS) + { + event_log_error (hashcat_ctx, "cuDeviceGetName(): %s", pStr); + } + else + { + event_log_error (hashcat_ctx, "cuDeviceGetName(): %d", CU_err); + } + + return -1; + } + + return 0; +} + +int hc_cuDeviceTotalMem (void *hashcat_ctx, size_t *bytes, CUdevice dev) +{ + backend_ctx_t *backend_ctx = ((hashcat_ctx_t *) hashcat_ctx)->backend_ctx; + + CUDA_PTR *cuda = (CUDA_PTR *) backend_ctx->cuda; + + const CUresult CU_err = cuda->cuDeviceTotalMem (bytes, dev); + + if (CU_err != CUDA_SUCCESS) + { + const char *pStr = NULL; + + if (cuda->cuGetErrorString (CU_err, &pStr) == CUDA_SUCCESS) + { + event_log_error (hashcat_ctx, "cuDeviceTotalMem(): %s", pStr); + } + else + { + event_log_error (hashcat_ctx, "cuDeviceTotalMem(): %d", CU_err); + } + + return -1; + } + + return 0; +} + +int hc_cuDriverGetVersion (void *hashcat_ctx, int *driverVersion) +{ + backend_ctx_t *backend_ctx = ((hashcat_ctx_t *) hashcat_ctx)->backend_ctx; + + CUDA_PTR *cuda = (CUDA_PTR *) backend_ctx->cuda; + + const CUresult CU_err = cuda->cuDriverGetVersion (driverVersion); + + if (CU_err != CUDA_SUCCESS) + { + const char *pStr = NULL; + + if (cuda->cuGetErrorString (CU_err, &pStr) == CUDA_SUCCESS) + { + event_log_error (hashcat_ctx, "cuDriverGetVersion(): %s", pStr); + } + else + { + event_log_error (hashcat_ctx, "cuDriverGetVersion(): %d", CU_err); + } + + return -1; + } + + return 0; +} + +int hc_cuCtxCreate (void *hashcat_ctx, CUcontext *pctx, unsigned int flags, CUdevice dev) +{ + backend_ctx_t *backend_ctx = ((hashcat_ctx_t *) hashcat_ctx)->backend_ctx; + + CUDA_PTR *cuda = (CUDA_PTR *) backend_ctx->cuda; + + const CUresult CU_err = cuda->cuCtxCreate (pctx, flags, dev); + + if (CU_err != CUDA_SUCCESS) + { + const char *pStr = NULL; + + if (cuda->cuGetErrorString (CU_err, &pStr) == CUDA_SUCCESS) + { + event_log_error (hashcat_ctx, "cuCtxCreate(): %s", pStr); + } + else + { + event_log_error (hashcat_ctx, "cuCtxCreate(): %d", CU_err); + } + + return -1; + } + + return 0; +} + +int hc_cuCtxDestroy (void *hashcat_ctx, CUcontext ctx) +{ + backend_ctx_t *backend_ctx = ((hashcat_ctx_t *) hashcat_ctx)->backend_ctx; + + CUDA_PTR *cuda = (CUDA_PTR *) backend_ctx->cuda; + + const CUresult CU_err = cuda->cuCtxDestroy (ctx); + + if (CU_err != CUDA_SUCCESS) + { + const char *pStr = NULL; + + if (cuda->cuGetErrorString (CU_err, &pStr) == CUDA_SUCCESS) + { + event_log_error (hashcat_ctx, "cuCtxDestroy(): %s", pStr); + } + else + { + event_log_error (hashcat_ctx, "cuCtxDestroy(): %d", CU_err); + } + + return -1; + } + + return 0; +} + +int hc_cuModuleLoadDataEx (void *hashcat_ctx, CUmodule *module, const void *image, unsigned int numOptions, CUjit_option *options, void **optionValues) +{ + backend_ctx_t *backend_ctx = ((hashcat_ctx_t *) hashcat_ctx)->backend_ctx; + + CUDA_PTR *cuda = (CUDA_PTR *) backend_ctx->cuda; + + const CUresult CU_err = cuda->cuModuleLoadDataEx (module, image, numOptions, options, optionValues); + + if (CU_err != CUDA_SUCCESS) + { + const char *pStr = NULL; + + if (cuda->cuGetErrorString (CU_err, &pStr) == CUDA_SUCCESS) + { + event_log_error (hashcat_ctx, "cuModuleLoadDataEx(): %s", pStr); + } + else + { + event_log_error (hashcat_ctx, "cuModuleLoadDataEx(): %d", CU_err); + } + + return -1; + } + + return 0; +} + +int hc_cuModuleUnload (void *hashcat_ctx, CUmodule hmod) +{ + backend_ctx_t *backend_ctx = ((hashcat_ctx_t *) hashcat_ctx)->backend_ctx; + + CUDA_PTR *cuda = (CUDA_PTR *) backend_ctx->cuda; + + const CUresult CU_err = cuda->cuModuleUnload (hmod); + + if (CU_err != CUDA_SUCCESS) + { + const char *pStr = NULL; + + if (cuda->cuGetErrorString (CU_err, &pStr) == CUDA_SUCCESS) + { + event_log_error (hashcat_ctx, "cuModuleUnload(): %s", pStr); + } + else + { + event_log_error (hashcat_ctx, "cuModuleUnload(): %d", CU_err); + } + + return -1; + } + + return 0; +} + +int hc_cuCtxSetCurrent (void *hashcat_ctx, CUcontext ctx) +{ + backend_ctx_t *backend_ctx = ((hashcat_ctx_t *) hashcat_ctx)->backend_ctx; + + CUDA_PTR *cuda = (CUDA_PTR *) backend_ctx->cuda; + + const CUresult CU_err = cuda->cuCtxSetCurrent (ctx); + + if (CU_err != CUDA_SUCCESS) + { + const char *pStr = NULL; + + if (cuda->cuGetErrorString (CU_err, &pStr) == CUDA_SUCCESS) + { + event_log_error (hashcat_ctx, "cuCtxSetCurrent(): %s", pStr); + } + else + { + event_log_error (hashcat_ctx, "cuCtxSetCurrent(): %d", CU_err); + } + + return -1; + } + + return 0; +} + +int hc_cuMemAlloc (void *hashcat_ctx, CUdeviceptr *dptr, size_t bytesize) +{ + backend_ctx_t *backend_ctx = ((hashcat_ctx_t *) hashcat_ctx)->backend_ctx; + + CUDA_PTR *cuda = (CUDA_PTR *) backend_ctx->cuda; + + const CUresult CU_err = cuda->cuMemAlloc (dptr, bytesize); + + if (CU_err != CUDA_SUCCESS) + { + const char *pStr = NULL; + + if (cuda->cuGetErrorString (CU_err, &pStr) == CUDA_SUCCESS) + { + event_log_error (hashcat_ctx, "cuMemAlloc(): %s", pStr); + } + else + { + event_log_error (hashcat_ctx, "cuMemAlloc(): %d", CU_err); + } + + return -1; + } + + return 0; +} + +int hc_cuMemFree (void *hashcat_ctx, CUdeviceptr dptr) +{ + backend_ctx_t *backend_ctx = ((hashcat_ctx_t *) hashcat_ctx)->backend_ctx; + + CUDA_PTR *cuda = (CUDA_PTR *) backend_ctx->cuda; + + const CUresult CU_err = cuda->cuMemFree (dptr); + + if (CU_err != CUDA_SUCCESS) + { + const char *pStr = NULL; + + if (cuda->cuGetErrorString (CU_err, &pStr) == CUDA_SUCCESS) + { + event_log_error (hashcat_ctx, "cuMemFree(): %s", pStr); + } + else + { + event_log_error (hashcat_ctx, "cuMemFree(): %d", CU_err); + } + + return -1; + } + + return 0; +} + +int hc_cuMemcpyDtoHAsync (void *hashcat_ctx, void *dstHost, CUdeviceptr srcDevice, size_t ByteCount, CUstream hStream) +{ + backend_ctx_t *backend_ctx = ((hashcat_ctx_t *) hashcat_ctx)->backend_ctx; + + CUDA_PTR *cuda = (CUDA_PTR *) backend_ctx->cuda; + + const CUresult CU_err = cuda->cuMemcpyDtoHAsync (dstHost, srcDevice, ByteCount, hStream); + + if (CU_err != CUDA_SUCCESS) + { + const char *pStr = NULL; + + if (cuda->cuGetErrorString (CU_err, &pStr) == CUDA_SUCCESS) + { + event_log_error (hashcat_ctx, "cuMemcpyDtoHAsync(): %s", pStr); + } + else + { + event_log_error (hashcat_ctx, "cuMemcpyDtoHAsync(): %d", CU_err); + } + + return -1; + } + + return 0; +} + +int hc_cuMemcpyDtoDAsync (void *hashcat_ctx, CUdeviceptr dstDevice, CUdeviceptr srcDevice, size_t ByteCount, CUstream hStream) +{ + backend_ctx_t *backend_ctx = ((hashcat_ctx_t *) hashcat_ctx)->backend_ctx; + + CUDA_PTR *cuda = (CUDA_PTR *) backend_ctx->cuda; + + const CUresult CU_err = cuda->cuMemcpyDtoDAsync (dstDevice, srcDevice, ByteCount, hStream); + + if (CU_err != CUDA_SUCCESS) + { + const char *pStr = NULL; + + if (cuda->cuGetErrorString (CU_err, &pStr) == CUDA_SUCCESS) + { + event_log_error (hashcat_ctx, "cuMemcpyDtoDAsync(): %s", pStr); + } + else + { + event_log_error (hashcat_ctx, "cuMemcpyDtoDAsync(): %d", CU_err); + } + + return -1; + } + + return 0; +} + +int hc_cuMemcpyHtoDAsync (void *hashcat_ctx, CUdeviceptr dstDevice, const void *srcHost, size_t ByteCount, CUstream hStream) +{ + backend_ctx_t *backend_ctx = ((hashcat_ctx_t *) hashcat_ctx)->backend_ctx; + + CUDA_PTR *cuda = (CUDA_PTR *) backend_ctx->cuda; + + const CUresult CU_err = cuda->cuMemcpyHtoDAsync (dstDevice, srcHost, ByteCount, hStream); + + if (CU_err != CUDA_SUCCESS) + { + const char *pStr = NULL; + + if (cuda->cuGetErrorString (CU_err, &pStr) == CUDA_SUCCESS) + { + event_log_error (hashcat_ctx, "cuMemcpyHtoDAsync(): %s", pStr); + } + else + { + event_log_error (hashcat_ctx, "cuMemcpyHtoDAsync(): %d", CU_err); + } + + return -1; + } + + return 0; +} + +int hc_cuMemsetD32Async (void *hashcat_ctx, CUdeviceptr dstDevice, unsigned int ui, size_t N, CUstream hStream) +{ + backend_ctx_t *backend_ctx = ((hashcat_ctx_t *) hashcat_ctx)->backend_ctx; + + CUDA_PTR *cuda = (CUDA_PTR *) backend_ctx->cuda; + + const CUresult CU_err = cuda->cuMemsetD32Async (dstDevice, ui, N, hStream); + + if (CU_err != CUDA_SUCCESS) + { + const char *pStr = NULL; + + if (cuda->cuGetErrorString (CU_err, &pStr) == CUDA_SUCCESS) + { + event_log_error (hashcat_ctx, "cuMemsetD32Async(): %s", pStr); + } + else + { + event_log_error (hashcat_ctx, "cuMemsetD32Async(): %d", CU_err); + } + + return -1; + } + + return 0; +} + +int hc_cuMemsetD8Async (void *hashcat_ctx, CUdeviceptr dstDevice, unsigned char uc, size_t N, CUstream hStream) +{ + backend_ctx_t *backend_ctx = ((hashcat_ctx_t *) hashcat_ctx)->backend_ctx; + + CUDA_PTR *cuda = (CUDA_PTR *) backend_ctx->cuda; + + const CUresult CU_err = cuda->cuMemsetD8Async (dstDevice, uc, N, hStream); + + if (CU_err != CUDA_SUCCESS) + { + const char *pStr = NULL; + + if (cuda->cuGetErrorString (CU_err, &pStr) == CUDA_SUCCESS) + { + event_log_error (hashcat_ctx, "cuMemsetD8Async(): %s", pStr); + } + else + { + event_log_error (hashcat_ctx, "cuMemsetD8Async(): %d", CU_err); + } + + return -1; + } + + return 0; +} + +int hc_cuModuleGetFunction (void *hashcat_ctx, CUfunction *hfunc, CUmodule hmod, const char *name) +{ + backend_ctx_t *backend_ctx = ((hashcat_ctx_t *) hashcat_ctx)->backend_ctx; + + CUDA_PTR *cuda = (CUDA_PTR *) backend_ctx->cuda; + + const CUresult CU_err = cuda->cuModuleGetFunction (hfunc, hmod, name); + + if (CU_err != CUDA_SUCCESS) + { + const char *pStr = NULL; + + if (cuda->cuGetErrorString (CU_err, &pStr) == CUDA_SUCCESS) + { + event_log_error (hashcat_ctx, "cuModuleGetFunction(): %s", pStr); + } + else + { + event_log_error (hashcat_ctx, "cuModuleGetFunction(): %d", CU_err); + } + + return -1; + } + + return 0; +} + +int hc_cuModuleGetGlobal (void *hashcat_ctx, CUdeviceptr *dptr, size_t *bytes, CUmodule hmod, const char *name) +{ + backend_ctx_t *backend_ctx = ((hashcat_ctx_t *) hashcat_ctx)->backend_ctx; + + CUDA_PTR *cuda = (CUDA_PTR *) backend_ctx->cuda; + + const CUresult CU_err = cuda->cuModuleGetGlobal (dptr, bytes, hmod, name); + + if (CU_err != CUDA_SUCCESS) + { + const char *pStr = NULL; + + if (cuda->cuGetErrorString (CU_err, &pStr) == CUDA_SUCCESS) + { + event_log_error (hashcat_ctx, "cuModuleGetGlobal(): %s", pStr); + } + else + { + event_log_error (hashcat_ctx, "cuModuleGetGlobal(): %d", CU_err); + } + + return -1; + } + + return 0; +} + +int hc_cuMemGetInfo (void *hashcat_ctx, size_t *free, size_t *total) +{ + backend_ctx_t *backend_ctx = ((hashcat_ctx_t *) hashcat_ctx)->backend_ctx; + + CUDA_PTR *cuda = (CUDA_PTR *) backend_ctx->cuda; + + const CUresult CU_err = cuda->cuMemGetInfo (free, total); + + if (CU_err != CUDA_SUCCESS) + { + const char *pStr = NULL; + + if (cuda->cuGetErrorString (CU_err, &pStr) == CUDA_SUCCESS) + { + event_log_error (hashcat_ctx, "cuMemGetInfo(): %s", pStr); + } + else + { + event_log_error (hashcat_ctx, "cuMemGetInfo(): %d", CU_err); + } + + return -1; + } + + return 0; +} + +int hc_cuFuncGetAttribute (void *hashcat_ctx, int *pi, CUfunction_attribute attrib, CUfunction hfunc) +{ + backend_ctx_t *backend_ctx = ((hashcat_ctx_t *) hashcat_ctx)->backend_ctx; + + CUDA_PTR *cuda = (CUDA_PTR *) backend_ctx->cuda; + + const CUresult CU_err = cuda->cuFuncGetAttribute (pi, attrib, hfunc); + + if (CU_err != CUDA_SUCCESS) + { + const char *pStr = NULL; + + if (cuda->cuGetErrorString (CU_err, &pStr) == CUDA_SUCCESS) + { + event_log_error (hashcat_ctx, "cuFuncGetAttribute(): %s", pStr); + } + else + { + event_log_error (hashcat_ctx, "cuFuncGetAttribute(): %d", CU_err); + } + + return -1; + } + + return 0; +} + +int hc_cuFuncSetAttribute (void *hashcat_ctx, CUfunction hfunc, CUfunction_attribute attrib, int value) +{ + backend_ctx_t *backend_ctx = ((hashcat_ctx_t *) hashcat_ctx)->backend_ctx; + + CUDA_PTR *cuda = (CUDA_PTR *) backend_ctx->cuda; + + const CUresult CU_err = cuda->cuFuncSetAttribute (hfunc, attrib, value); + + if (CU_err != CUDA_SUCCESS) + { + const char *pStr = NULL; + + if (cuda->cuGetErrorString (CU_err, &pStr) == CUDA_SUCCESS) + { + event_log_error (hashcat_ctx, "cuFuncSetAttribute(): %s", pStr); + } + else + { + event_log_error (hashcat_ctx, "cuFuncSetAttribute(): %d", CU_err); + } + + return -1; + } + + return 0; +} + +int hc_cuStreamCreate (void *hashcat_ctx, CUstream *phStream, unsigned int Flags) +{ + backend_ctx_t *backend_ctx = ((hashcat_ctx_t *) hashcat_ctx)->backend_ctx; + + CUDA_PTR *cuda = (CUDA_PTR *) backend_ctx->cuda; + + const CUresult CU_err = cuda->cuStreamCreate (phStream, Flags); + + if (CU_err != CUDA_SUCCESS) + { + const char *pStr = NULL; + + if (cuda->cuGetErrorString (CU_err, &pStr) == CUDA_SUCCESS) + { + event_log_error (hashcat_ctx, "cuStreamCreate(): %s", pStr); + } + else + { + event_log_error (hashcat_ctx, "cuStreamCreate(): %d", CU_err); + } + + return -1; + } + + return 0; +} + +int hc_cuStreamDestroy (void *hashcat_ctx, CUstream hStream) +{ + backend_ctx_t *backend_ctx = ((hashcat_ctx_t *) hashcat_ctx)->backend_ctx; + + CUDA_PTR *cuda = (CUDA_PTR *) backend_ctx->cuda; + + const CUresult CU_err = cuda->cuStreamDestroy (hStream); + + if (CU_err != CUDA_SUCCESS) + { + const char *pStr = NULL; + + if (cuda->cuGetErrorString (CU_err, &pStr) == CUDA_SUCCESS) + { + event_log_error (hashcat_ctx, "cuStreamDestroy(): %s", pStr); + } + else + { + event_log_error (hashcat_ctx, "cuStreamDestroy(): %d", CU_err); + } + + return -1; + } + + return 0; +} + +int hc_cuStreamSynchronize (void *hashcat_ctx, CUstream hStream) +{ + backend_ctx_t *backend_ctx = ((hashcat_ctx_t *) hashcat_ctx)->backend_ctx; + + CUDA_PTR *cuda = (CUDA_PTR *) backend_ctx->cuda; + + const CUresult CU_err = cuda->cuStreamSynchronize (hStream); + + if (CU_err != CUDA_SUCCESS) + { + const char *pStr = NULL; + + if (cuda->cuGetErrorString (CU_err, &pStr) == CUDA_SUCCESS) + { + event_log_error (hashcat_ctx, "cuStreamSynchronize(): %s", pStr); + } + else + { + event_log_error (hashcat_ctx, "cuStreamSynchronize(): %d", CU_err); + } + + return -1; + } + + return 0; +} + +int hc_cuLaunchKernel (void *hashcat_ctx, CUfunction f, unsigned int gridDimX, unsigned int gridDimY, unsigned int gridDimZ, unsigned int blockDimX, unsigned int blockDimY, unsigned int blockDimZ, unsigned int sharedMemBytes, CUstream hStream, void **kernelParams, void **extra) +{ + backend_ctx_t *backend_ctx = ((hashcat_ctx_t *) hashcat_ctx)->backend_ctx; + + CUDA_PTR *cuda = (CUDA_PTR *) backend_ctx->cuda; + + const CUresult CU_err = cuda->cuLaunchKernel (f, gridDimX, gridDimY, gridDimZ, blockDimX, blockDimY, blockDimZ, sharedMemBytes, hStream, kernelParams, extra); + + if (CU_err != CUDA_SUCCESS) + { + const char *pStr = NULL; + + if (cuda->cuGetErrorString (CU_err, &pStr) == CUDA_SUCCESS) + { + event_log_error (hashcat_ctx, "cuLaunchKernel(): %s", pStr); + } + else + { + event_log_error (hashcat_ctx, "cuLaunchKernel(): %d", CU_err); + } + + return -1; + } + + return 0; +} + +int hc_cuCtxSynchronize (void *hashcat_ctx) +{ + backend_ctx_t *backend_ctx = ((hashcat_ctx_t *) hashcat_ctx)->backend_ctx; + + CUDA_PTR *cuda = (CUDA_PTR *) backend_ctx->cuda; + + const CUresult CU_err = cuda->cuCtxSynchronize (); + + if (CU_err != CUDA_SUCCESS) + { + const char *pStr = NULL; + + if (cuda->cuGetErrorString (CU_err, &pStr) == CUDA_SUCCESS) + { + event_log_error (hashcat_ctx, "cuCtxSynchronize(): %s", pStr); + } + else + { + event_log_error (hashcat_ctx, "cuCtxSynchronize(): %d", CU_err); + } + + return -1; + } + + return 0; +} + +int hc_cuEventCreate (void *hashcat_ctx, CUevent *phEvent, unsigned int Flags) +{ + backend_ctx_t *backend_ctx = ((hashcat_ctx_t *) hashcat_ctx)->backend_ctx; + + CUDA_PTR *cuda = (CUDA_PTR *) backend_ctx->cuda; + + const CUresult CU_err = cuda->cuEventCreate (phEvent, Flags); + + if (CU_err != CUDA_SUCCESS) + { + const char *pStr = NULL; + + if (cuda->cuGetErrorString (CU_err, &pStr) == CUDA_SUCCESS) + { + event_log_error (hashcat_ctx, "cuEventCreate(): %s", pStr); + } + else + { + event_log_error (hashcat_ctx, "cuEventCreate(): %d", CU_err); + } + + return -1; + } + + return 0; +} + +int hc_cuEventDestroy (void *hashcat_ctx, CUevent hEvent) +{ + backend_ctx_t *backend_ctx = ((hashcat_ctx_t *) hashcat_ctx)->backend_ctx; + + CUDA_PTR *cuda = (CUDA_PTR *) backend_ctx->cuda; + + const CUresult CU_err = cuda->cuEventDestroy (hEvent); + + if (CU_err != CUDA_SUCCESS) + { + const char *pStr = NULL; + + if (cuda->cuGetErrorString (CU_err, &pStr) == CUDA_SUCCESS) + { + event_log_error (hashcat_ctx, "cuEventDestroy(): %s", pStr); + } + else + { + event_log_error (hashcat_ctx, "cuEventDestroy(): %d", CU_err); + } + + return -1; + } + + return 0; +} + +int hc_cuEventElapsedTime (void *hashcat_ctx, float *pMilliseconds, CUevent hStart, CUevent hEnd) +{ + backend_ctx_t *backend_ctx = ((hashcat_ctx_t *) hashcat_ctx)->backend_ctx; + + CUDA_PTR *cuda = (CUDA_PTR *) backend_ctx->cuda; + + const CUresult CU_err = cuda->cuEventElapsedTime (pMilliseconds, hStart, hEnd); + + if (CU_err != CUDA_SUCCESS) + { + const char *pStr = NULL; + + if (cuda->cuGetErrorString (CU_err, &pStr) == CUDA_SUCCESS) + { + event_log_error (hashcat_ctx, "cuEventElapsedTime(): %s", pStr); + } + else + { + event_log_error (hashcat_ctx, "cuEventElapsedTime(): %d", CU_err); + } + + return -1; + } + + return 0; +} + +int hc_cuEventQuery (void *hashcat_ctx, CUevent hEvent) +{ + backend_ctx_t *backend_ctx = ((hashcat_ctx_t *) hashcat_ctx)->backend_ctx; + + CUDA_PTR *cuda = (CUDA_PTR *) backend_ctx->cuda; + + const CUresult CU_err = cuda->cuEventQuery (hEvent); + + if (CU_err != CUDA_SUCCESS) + { + const char *pStr = NULL; + + if (cuda->cuGetErrorString (CU_err, &pStr) == CUDA_SUCCESS) + { + event_log_error (hashcat_ctx, "cuEventQuery(): %s", pStr); + } + else + { + event_log_error (hashcat_ctx, "cuEventQuery(): %d", CU_err); + } + + return -1; + } + + return 0; +} + +int hc_cuEventRecord (void *hashcat_ctx, CUevent hEvent, CUstream hStream) +{ + backend_ctx_t *backend_ctx = ((hashcat_ctx_t *) hashcat_ctx)->backend_ctx; + + CUDA_PTR *cuda = (CUDA_PTR *) backend_ctx->cuda; + + const CUresult CU_err = cuda->cuEventRecord (hEvent, hStream); + + if (CU_err != CUDA_SUCCESS) + { + const char *pStr = NULL; + + if (cuda->cuGetErrorString (CU_err, &pStr) == CUDA_SUCCESS) + { + event_log_error (hashcat_ctx, "cuEventRecord(): %s", pStr); + } + else + { + event_log_error (hashcat_ctx, "cuEventRecord(): %d", CU_err); + } + + return -1; + } + + return 0; +} + +int hc_cuEventSynchronize (void *hashcat_ctx, CUevent hEvent) +{ + backend_ctx_t *backend_ctx = ((hashcat_ctx_t *) hashcat_ctx)->backend_ctx; + + CUDA_PTR *cuda = (CUDA_PTR *) backend_ctx->cuda; + + const CUresult CU_err = cuda->cuEventSynchronize (hEvent); + + if (CU_err != CUDA_SUCCESS) + { + const char *pStr = NULL; + + if (cuda->cuGetErrorString (CU_err, &pStr) == CUDA_SUCCESS) + { + event_log_error (hashcat_ctx, "cuEventSynchronize(): %s", pStr); + } + else + { + event_log_error (hashcat_ctx, "cuEventSynchronize(): %d", CU_err); + } + + return -1; + } + + return 0; +} + +int hc_cuCtxSetCacheConfig (void *hashcat_ctx, CUfunc_cache config) +{ + backend_ctx_t *backend_ctx = ((hashcat_ctx_t *) hashcat_ctx)->backend_ctx; + + CUDA_PTR *cuda = (CUDA_PTR *) backend_ctx->cuda; + + const CUresult CU_err = cuda->cuCtxSetCacheConfig (config); + + if (CU_err != CUDA_SUCCESS) + { + const char *pStr = NULL; + + if (cuda->cuGetErrorString (CU_err, &pStr) == CUDA_SUCCESS) + { + event_log_error (hashcat_ctx, "cuCtxSetCacheConfig(): %s", pStr); + } + else + { + event_log_error (hashcat_ctx, "cuCtxSetCacheConfig(): %d", CU_err); + } + + return -1; + } + + return 0; +} + +int hc_cuCtxPushCurrent (void *hashcat_ctx, CUcontext ctx) +{ + backend_ctx_t *backend_ctx = ((hashcat_ctx_t *) hashcat_ctx)->backend_ctx; + + CUDA_PTR *cuda = (CUDA_PTR *) backend_ctx->cuda; + + const CUresult CU_err = cuda->cuCtxPushCurrent (ctx); + + if (CU_err != CUDA_SUCCESS) + { + const char *pStr = NULL; + + if (cuda->cuGetErrorString (CU_err, &pStr) == CUDA_SUCCESS) + { + event_log_error (hashcat_ctx, "cuCtxPushCurrent(): %s", pStr); + } + else + { + event_log_error (hashcat_ctx, "cuCtxPushCurrent(): %d", CU_err); + } + + return -1; + } + + return 0; +} + +int hc_cuCtxPopCurrent (void *hashcat_ctx, CUcontext *pctx) +{ + backend_ctx_t *backend_ctx = ((hashcat_ctx_t *) hashcat_ctx)->backend_ctx; + + CUDA_PTR *cuda = (CUDA_PTR *) backend_ctx->cuda; + + const CUresult CU_err = cuda->cuCtxPopCurrent (pctx); + + if (CU_err != CUDA_SUCCESS) + { + const char *pStr = NULL; + + if (cuda->cuGetErrorString (CU_err, &pStr) == CUDA_SUCCESS) + { + event_log_error (hashcat_ctx, "cuCtxPopCurrent(): %s", pStr); + } + else + { + event_log_error (hashcat_ctx, "cuCtxPopCurrent(): %d", CU_err); + } + + return -1; + } + + return 0; +} + +int hc_cuLinkCreate (void *hashcat_ctx, unsigned int numOptions, CUjit_option *options, void **optionValues, CUlinkState *stateOut) +{ + backend_ctx_t *backend_ctx = ((hashcat_ctx_t *) hashcat_ctx)->backend_ctx; + + CUDA_PTR *cuda = (CUDA_PTR *) backend_ctx->cuda; + + const CUresult CU_err = cuda->cuLinkCreate (numOptions, options, optionValues, stateOut); + + if (CU_err != CUDA_SUCCESS) + { + const char *pStr = NULL; + + if (cuda->cuGetErrorString (CU_err, &pStr) == CUDA_SUCCESS) + { + event_log_error (hashcat_ctx, "cuLinkCreate(): %s", pStr); + } + else + { + event_log_error (hashcat_ctx, "cuLinkCreate(): %d", CU_err); + } + + return -1; + } + + return 0; +} + +int hc_cuLinkAddData (void *hashcat_ctx, CUlinkState state, CUjitInputType type, void *data, size_t size, const char *name, unsigned int numOptions, CUjit_option *options, void **optionValues) +{ + backend_ctx_t *backend_ctx = ((hashcat_ctx_t *) hashcat_ctx)->backend_ctx; + + CUDA_PTR *cuda = (CUDA_PTR *) backend_ctx->cuda; + + const CUresult CU_err = cuda->cuLinkAddData (state, type, data, size, name, numOptions, options, optionValues); + + if (CU_err != CUDA_SUCCESS) + { + const char *pStr = NULL; + + if (cuda->cuGetErrorString (CU_err, &pStr) == CUDA_SUCCESS) + { + event_log_error (hashcat_ctx, "cuLinkAddData(): %s", pStr); + } + else + { + event_log_error (hashcat_ctx, "cuLinkAddData(): %d", CU_err); + } + + return -1; + } + + return 0; +} + +int hc_cuLinkDestroy (void *hashcat_ctx, CUlinkState state) +{ + backend_ctx_t *backend_ctx = ((hashcat_ctx_t *) hashcat_ctx)->backend_ctx; + + CUDA_PTR *cuda = (CUDA_PTR *) backend_ctx->cuda; + + const CUresult CU_err = cuda->cuLinkDestroy (state); + + if (CU_err != CUDA_SUCCESS) + { + const char *pStr = NULL; + + if (cuda->cuGetErrorString (CU_err, &pStr) == CUDA_SUCCESS) + { + event_log_error (hashcat_ctx, "cuLinkDestroy(): %s", pStr); + } + else + { + event_log_error (hashcat_ctx, "cuLinkDestroy(): %d", CU_err); + } + + return -1; + } + + return 0; +} + +int hc_cuLinkComplete (void *hashcat_ctx, CUlinkState state, void **cubinOut, size_t *sizeOut) +{ + backend_ctx_t *backend_ctx = ((hashcat_ctx_t *) hashcat_ctx)->backend_ctx; + + CUDA_PTR *cuda = (CUDA_PTR *) backend_ctx->cuda; + + const CUresult CU_err = cuda->cuLinkComplete (state, cubinOut, sizeOut); + + if (CU_err != CUDA_SUCCESS) + { + const char *pStr = NULL; + + if (cuda->cuGetErrorString (CU_err, &pStr) == CUDA_SUCCESS) + { + event_log_error (hashcat_ctx, "cuLinkComplete(): %s", pStr); + } + else + { + event_log_error (hashcat_ctx, "cuLinkComplete(): %d", CU_err); + } + + return -1; + } + + return 0; +} diff --git a/src/ext_nvrtc.c b/src/ext_nvrtc.c index 91d68c0bd..ed4ce358c 100644 --- a/src/ext_nvrtc.c +++ b/src/ext_nvrtc.c @@ -5,8 +5,12 @@ #include "common.h" #include "types.h" +#include "memory.h" +#include "event.h" #include "ext_nvrtc.h" +#include "dynloader.h" + int nvrtc_make_options_array_from_string (char *string, char **options) { char *saveptr = NULL; @@ -25,3 +29,238 @@ int nvrtc_make_options_array_from_string (char *string, char **options) return cnt; } + +// NVRTC + +int nvrtc_init (void *hashcat_ctx) +{ + backend_ctx_t *backend_ctx = ((hashcat_ctx_t *) hashcat_ctx)->backend_ctx; + + NVRTC_PTR *nvrtc = (NVRTC_PTR *) backend_ctx->nvrtc; + + memset (nvrtc, 0, sizeof (NVRTC_PTR)); + + #if defined (_WIN) + nvrtc->lib = hc_dlopen ("nvrtc.dll"); + + if (nvrtc->lib == NULL) + { + // super annoying: nvidia is using the CUDA version in nvrtc???.dll filename! + // however, the cuda version string comes from nvcuda.dll which is from nvidia driver, but + // the driver version and the installed CUDA toolkit version can be different, so it cannot be used as a reference. + // brute force to the rescue + + char dllname[100]; + + for (int major = 20; major >= 9; major--) // older than 3.x do not ship _v2 functions anyway + // older than 7.x does not support sm 5.x + // older than 8.x does not have documentation archive online, no way to check if nvrtc support whatever we need + // older than 9.x is just a theoretical limit since we define 9.0 as the minimum required version + { + for (int minor = 20; minor >= 0; minor--) + { + snprintf (dllname, sizeof (dllname), "nvrtc64_%d%d.dll", major, minor); + + nvrtc->lib = hc_dlopen (dllname); + + if (nvrtc->lib) break; + + snprintf (dllname, sizeof (dllname), "nvrtc64_%d%d_0.dll", major, minor); + + nvrtc->lib = hc_dlopen (dllname); + + if (nvrtc->lib) break; + } + + if (nvrtc->lib) break; + } + } + #elif defined (__APPLE__) + nvrtc->lib = hc_dlopen ("nvrtc.dylib"); + #elif defined (__CYGWIN__) + nvrtc->lib = hc_dlopen ("nvrtc.dll"); + #else + nvrtc->lib = hc_dlopen ("libnvrtc.so"); + + if (nvrtc->lib == NULL) nvrtc->lib = hc_dlopen ("libnvrtc.so.1"); + #endif + + if (nvrtc->lib == NULL) return -1; + + HC_LOAD_FUNC (nvrtc, nvrtcAddNameExpression, NVRTC_NVRTCADDNAMEEXPRESSION, NVRTC, 1); + HC_LOAD_FUNC (nvrtc, nvrtcCompileProgram, NVRTC_NVRTCCOMPILEPROGRAM, NVRTC, 1); + HC_LOAD_FUNC (nvrtc, nvrtcCreateProgram, NVRTC_NVRTCCREATEPROGRAM, NVRTC, 1); + HC_LOAD_FUNC (nvrtc, nvrtcDestroyProgram, NVRTC_NVRTCDESTROYPROGRAM, NVRTC, 1); + HC_LOAD_FUNC (nvrtc, nvrtcGetLoweredName, NVRTC_NVRTCGETLOWEREDNAME, NVRTC, 1); + HC_LOAD_FUNC (nvrtc, nvrtcGetPTX, NVRTC_NVRTCGETPTX, NVRTC, 1); + HC_LOAD_FUNC (nvrtc, nvrtcGetPTXSize, NVRTC_NVRTCGETPTXSIZE, NVRTC, 1); + HC_LOAD_FUNC (nvrtc, nvrtcGetProgramLog, NVRTC_NVRTCGETPROGRAMLOG, NVRTC, 1); + HC_LOAD_FUNC (nvrtc, nvrtcGetProgramLogSize, NVRTC_NVRTCGETPROGRAMLOGSIZE, NVRTC, 1); + HC_LOAD_FUNC (nvrtc, nvrtcGetErrorString, NVRTC_NVRTCGETERRORSTRING, NVRTC, 1); + HC_LOAD_FUNC (nvrtc, nvrtcVersion, NVRTC_NVRTCVERSION, NVRTC, 1); + + return 0; +} + +void nvrtc_close (void *hashcat_ctx) +{ + backend_ctx_t *backend_ctx = ((hashcat_ctx_t *) hashcat_ctx)->backend_ctx; + + NVRTC_PTR *nvrtc = (NVRTC_PTR *) backend_ctx->nvrtc; + + if (nvrtc) + { + if (nvrtc->lib) + { + hc_dlclose (nvrtc->lib); + } + + hcfree (backend_ctx->nvrtc); + + backend_ctx->nvrtc = NULL; + } +} + +int hc_nvrtcCreateProgram (void *hashcat_ctx, nvrtcProgram *prog, const char *src, const char *name, int numHeaders, const char * const *headers, const char * const *includeNames) +{ + backend_ctx_t *backend_ctx = ((hashcat_ctx_t *) hashcat_ctx)->backend_ctx; + + NVRTC_PTR *nvrtc = (NVRTC_PTR *) backend_ctx->nvrtc; + + const nvrtcResult NVRTC_err = nvrtc->nvrtcCreateProgram (prog, src, name, numHeaders, headers, includeNames); + + if (NVRTC_err != NVRTC_SUCCESS) + { + event_log_error (hashcat_ctx, "nvrtcCreateProgram(): %s", nvrtc->nvrtcGetErrorString (NVRTC_err)); + + return -1; + } + + return 0; +} + +int hc_nvrtcDestroyProgram (void *hashcat_ctx, nvrtcProgram *prog) +{ + backend_ctx_t *backend_ctx = ((hashcat_ctx_t *) hashcat_ctx)->backend_ctx; + + NVRTC_PTR *nvrtc = (NVRTC_PTR *) backend_ctx->nvrtc; + + const nvrtcResult NVRTC_err = nvrtc->nvrtcDestroyProgram (prog); + + if (NVRTC_err != NVRTC_SUCCESS) + { + event_log_error (hashcat_ctx, "nvrtcDestroyProgram(): %s", nvrtc->nvrtcGetErrorString (NVRTC_err)); + + return -1; + } + + return 0; +} + +int hc_nvrtcCompileProgram (void *hashcat_ctx, nvrtcProgram prog, int numOptions, const char * const *options) +{ + backend_ctx_t *backend_ctx = ((hashcat_ctx_t *) hashcat_ctx)->backend_ctx; + + NVRTC_PTR *nvrtc = (NVRTC_PTR *) backend_ctx->nvrtc; + + const nvrtcResult NVRTC_err = nvrtc->nvrtcCompileProgram (prog, numOptions, options); + + if (NVRTC_err != NVRTC_SUCCESS) + { + event_log_error (hashcat_ctx, "nvrtcCompileProgram(): %s", nvrtc->nvrtcGetErrorString (NVRTC_err)); + + return -1; + } + + return 0; +} + +int hc_nvrtcGetProgramLogSize (void *hashcat_ctx, nvrtcProgram prog, size_t *logSizeRet) +{ + backend_ctx_t *backend_ctx = ((hashcat_ctx_t *) hashcat_ctx)->backend_ctx; + + NVRTC_PTR *nvrtc = (NVRTC_PTR *) backend_ctx->nvrtc; + + const nvrtcResult NVRTC_err = nvrtc->nvrtcGetProgramLogSize (prog, logSizeRet); + + if (NVRTC_err != NVRTC_SUCCESS) + { + event_log_error (hashcat_ctx, "nvrtcGetProgramLogSize(): %s", nvrtc->nvrtcGetErrorString (NVRTC_err)); + + return -1; + } + + return 0; +} + +int hc_nvrtcGetProgramLog (void *hashcat_ctx, nvrtcProgram prog, char *log) +{ + backend_ctx_t *backend_ctx = ((hashcat_ctx_t *) hashcat_ctx)->backend_ctx; + + NVRTC_PTR *nvrtc = (NVRTC_PTR *) backend_ctx->nvrtc; + + const nvrtcResult NVRTC_err = nvrtc->nvrtcGetProgramLog (prog, log); + + if (NVRTC_err != NVRTC_SUCCESS) + { + event_log_error (hashcat_ctx, "nvrtcGetProgramLog(): %s", nvrtc->nvrtcGetErrorString (NVRTC_err)); + + return -1; + } + + return 0; +} + +int hc_nvrtcGetPTXSize (void *hashcat_ctx, nvrtcProgram prog, size_t *ptxSizeRet) +{ + backend_ctx_t *backend_ctx = ((hashcat_ctx_t *) hashcat_ctx)->backend_ctx; + + NVRTC_PTR *nvrtc = (NVRTC_PTR *) backend_ctx->nvrtc; + + const nvrtcResult NVRTC_err = nvrtc->nvrtcGetPTXSize (prog, ptxSizeRet); + + if (NVRTC_err != NVRTC_SUCCESS) + { + event_log_error (hashcat_ctx, "nvrtcGetPTXSize(): %s", nvrtc->nvrtcGetErrorString (NVRTC_err)); + + return -1; + } + + return 0; +} + +int hc_nvrtcGetPTX (void *hashcat_ctx, nvrtcProgram prog, char *ptx) +{ + backend_ctx_t *backend_ctx = ((hashcat_ctx_t *) hashcat_ctx)->backend_ctx; + + NVRTC_PTR *nvrtc = (NVRTC_PTR *) backend_ctx->nvrtc; + + const nvrtcResult NVRTC_err = nvrtc->nvrtcGetPTX (prog, ptx); + + if (NVRTC_err != NVRTC_SUCCESS) + { + event_log_error (hashcat_ctx, "nvrtcGetPTX(): %s", nvrtc->nvrtcGetErrorString (NVRTC_err)); + + return -1; + } + + return 0; +} + +int hc_nvrtcVersion (void *hashcat_ctx, int *major, int *minor) +{ + backend_ctx_t *backend_ctx = ((hashcat_ctx_t *) hashcat_ctx)->backend_ctx; + + NVRTC_PTR *nvrtc = (NVRTC_PTR *) backend_ctx->nvrtc; + + const nvrtcResult NVRTC_err = nvrtc->nvrtcVersion (major, minor); + + if (NVRTC_err != NVRTC_SUCCESS) + { + event_log_error (hashcat_ctx, "nvrtcVersion(): %s", nvrtc->nvrtcGetErrorString (NVRTC_err)); + + return -1; + } + + return 0; +}