mirror of
https://github.com/hashcat/hashcat.git
synced 2025-07-21 14:08:21 +00:00
Added workaround to get rid of internal runtimes memory leaks
As of now, especially in the benchmark mode, hashcat will not go to create and destroy context and command-queue for each enabled device each time it switches from one hash-mode to the next. Specifically using OpenCL with an NVIDIA device, it was not possible to complete the benchmark because clCreateContext has memory leaks that slowly consume all available GPU memory until hashcat can activate a new context and disable the device. Avoid deprecated HIP functions All hipCtx* features have been declared deprecated, so we have replaced them with the new ones, also fixing a critical bug on handling multiple AMD devices in the same system.
This commit is contained in:
parent
0576c41491
commit
f663abee44
@ -129,6 +129,7 @@
|
|||||||
- Alias Devices: Prevents hashcat, when started with x86_64 emulation on Apple Silicon, from showing the Apple M1 OpenCL CPU as an alias for the Apple M1 Metal GPU
|
- Alias Devices: Prevents hashcat, when started with x86_64 emulation on Apple Silicon, from showing the Apple M1 OpenCL CPU as an alias for the Apple M1 Metal GPU
|
||||||
- Apple Driver: Automatically enable GPU support on Apple OpenCL instead of CPU support
|
- Apple Driver: Automatically enable GPU support on Apple OpenCL instead of CPU support
|
||||||
- Apple Driver: Updated requirements to use Apple OpenCL API to macOS 13.0 - use
|
- Apple Driver: Updated requirements to use Apple OpenCL API to macOS 13.0 - use
|
||||||
|
- Backend: Added workaround to get rid of internal runtimes memory leaks
|
||||||
- Backend: Updated filename chksum format to prevent invalid cache on Apple Silicon when switching arch
|
- Backend: Updated filename chksum format to prevent invalid cache on Apple Silicon when switching arch
|
||||||
- Backend: Splitting backend_ctx_devices_init into smaller runtime-specific functions
|
- Backend: Splitting backend_ctx_devices_init into smaller runtime-specific functions
|
||||||
- Backend Checks: Describe workaround in error message when detecting more than 64 backend devices
|
- Backend Checks: Describe workaround in error message when detecting more than 64 backend devices
|
||||||
@ -141,6 +142,7 @@
|
|||||||
- Building: Support building windows binaries on macOS using MinGW
|
- Building: Support building windows binaries on macOS using MinGW
|
||||||
- Dependencies: Updated OpenCL-Headers to v2024.10.24 (commit 265df85)
|
- Dependencies: Updated OpenCL-Headers to v2024.10.24 (commit 265df85)
|
||||||
- Documents: Updated BUILD.md and added BUILD_macOS.md (containing instructions for building windows binaries on macOS)
|
- Documents: Updated BUILD.md and added BUILD_macOS.md (containing instructions for building windows binaries on macOS)
|
||||||
|
- HIP Backend: Avoid deprecated functions
|
||||||
- Modules: Added support for non-zero IVs for -m 6800 (Lastpass). Also added `tools/lastpass2hashcat.py`
|
- Modules: Added support for non-zero IVs for -m 6800 (Lastpass). Also added `tools/lastpass2hashcat.py`
|
||||||
- Modules: Updated module_unstable_warning
|
- Modules: Updated module_unstable_warning
|
||||||
- Open Document Format: Added support for small documents with content length < 1024
|
- Open Document Format: Added support for small documents with content length < 1024
|
||||||
|
@ -14,18 +14,20 @@
|
|||||||
|
|
||||||
typedef void *hipDeviceptr_t;
|
typedef void *hipDeviceptr_t;
|
||||||
|
|
||||||
typedef enum hipFunction_attribute {
|
typedef enum hipFunction_attribute
|
||||||
HIP_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK, ///< The maximum number of threads per block. Depends on function and device.
|
{
|
||||||
HIP_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES, ///< The statically allocated shared memory size in bytes per block required by the function.
|
HIP_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK, // The maximum number of threads per block. Depends on function and device.
|
||||||
HIP_FUNC_ATTRIBUTE_CONST_SIZE_BYTES, ///< The user-allocated constant memory by the function in bytes.
|
HIP_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES, // The statically allocated shared memory size in bytes per block required by the function.
|
||||||
HIP_FUNC_ATTRIBUTE_LOCAL_SIZE_BYTES, ///< The local memory usage of each thread by this function in bytes.
|
HIP_FUNC_ATTRIBUTE_CONST_SIZE_BYTES, // The user-allocated constant memory by the function in bytes.
|
||||||
HIP_FUNC_ATTRIBUTE_NUM_REGS, ///< The number of registers used by each thread of this function.
|
HIP_FUNC_ATTRIBUTE_LOCAL_SIZE_BYTES, // The local memory usage of each thread by this function in bytes.
|
||||||
HIP_FUNC_ATTRIBUTE_PTX_VERSION, ///< PTX version
|
HIP_FUNC_ATTRIBUTE_NUM_REGS, // The number of registers used by each thread of this function.
|
||||||
HIP_FUNC_ATTRIBUTE_BINARY_VERSION, ///< Binary version
|
HIP_FUNC_ATTRIBUTE_PTX_VERSION, // PTX version
|
||||||
HIP_FUNC_ATTRIBUTE_CACHE_MODE_CA, ///< Cache mode
|
HIP_FUNC_ATTRIBUTE_BINARY_VERSION, // Binary version
|
||||||
HIP_FUNC_ATTRIBUTE_MAX_DYNAMIC_SHARED_SIZE_BYTES, ///< The maximum dynamic shared memory per block for this function in bytes.
|
HIP_FUNC_ATTRIBUTE_CACHE_MODE_CA, // Cache mode
|
||||||
HIP_FUNC_ATTRIBUTE_PREFERRED_SHARED_MEMORY_CARVEOUT, ///< The shared memory carveout preference in percent of the maximum shared memory.
|
HIP_FUNC_ATTRIBUTE_MAX_DYNAMIC_SHARED_SIZE_BYTES, // The maximum dynamic shared memory per block for this function in bytes.
|
||||||
|
HIP_FUNC_ATTRIBUTE_PREFERRED_SHARED_MEMORY_CARVEOUT, // The shared memory carveout preference in percent of the maximum shared memory.
|
||||||
HIP_FUNC_ATTRIBUTE_MAX
|
HIP_FUNC_ATTRIBUTE_MAX
|
||||||
|
|
||||||
} hipFunction_attribute;
|
} hipFunction_attribute;
|
||||||
|
|
||||||
// stop: driver_types.h
|
// stop: driver_types.h
|
||||||
@ -47,13 +49,14 @@ typedef struct ihipModuleSymbol_t* hipFunction_t;
|
|||||||
#define __HIP_NODISCARD
|
#define __HIP_NODISCARD
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
typedef enum __HIP_NODISCARD hipError_t {
|
typedef enum __HIP_NODISCARD hipError_t
|
||||||
hipSuccess = 0, ///< Successful completion.
|
{
|
||||||
hipErrorInvalidValue = 1, ///< One or more of the parameters passed to the API call is NULL
|
hipSuccess = 0, // Successful completion.
|
||||||
///< or not in an acceptable range.
|
hipErrorInvalidValue = 1, // One or more of the parameters passed to the API call is NULL
|
||||||
|
// or not in an acceptable range.
|
||||||
hipErrorOutOfMemory = 2,
|
hipErrorOutOfMemory = 2,
|
||||||
// Deprecated
|
// Deprecated
|
||||||
hipErrorMemoryAllocation = 2, ///< Memory allocation error.
|
hipErrorMemoryAllocation = 2, // Memory allocation error.
|
||||||
hipErrorNotInitialized = 3,
|
hipErrorNotInitialized = 3,
|
||||||
// Deprecated
|
// Deprecated
|
||||||
hipErrorInitializationError = 3,
|
hipErrorInitializationError = 3,
|
||||||
@ -65,20 +68,20 @@ typedef enum __HIP_NODISCARD hipError_t {
|
|||||||
hipErrorInvalidConfiguration = 9,
|
hipErrorInvalidConfiguration = 9,
|
||||||
hipErrorInvalidPitchValue = 12,
|
hipErrorInvalidPitchValue = 12,
|
||||||
hipErrorInvalidSymbol = 13,
|
hipErrorInvalidSymbol = 13,
|
||||||
hipErrorInvalidDevicePointer = 17, ///< Invalid Device Pointer
|
hipErrorInvalidDevicePointer = 17, // Invalid Device Pointer
|
||||||
hipErrorInvalidMemcpyDirection = 21, ///< Invalid memory copy direction
|
hipErrorInvalidMemcpyDirection = 21, // Invalid memory copy direction
|
||||||
hipErrorInsufficientDriver = 35,
|
hipErrorInsufficientDriver = 35,
|
||||||
hipErrorMissingConfiguration = 52,
|
hipErrorMissingConfiguration = 52,
|
||||||
hipErrorPriorLaunchFailure = 53,
|
hipErrorPriorLaunchFailure = 53,
|
||||||
hipErrorInvalidDeviceFunction = 98,
|
hipErrorInvalidDeviceFunction = 98,
|
||||||
hipErrorNoDevice = 100, ///< Call to hipGetDeviceCount returned 0 devices
|
hipErrorNoDevice = 100, // Call to hipGetDeviceCount returned 0 devices
|
||||||
hipErrorInvalidDevice = 101, ///< DeviceID must be in range 0...#compute-devices.
|
hipErrorInvalidDevice = 101, // DeviceID must be in range 0...#compute-devices.
|
||||||
hipErrorInvalidImage = 200,
|
hipErrorInvalidImage = 200,
|
||||||
hipErrorInvalidContext = 201, ///< Produced when input context is invalid.
|
hipErrorInvalidContext = 201, // Produced when input context is invalid.
|
||||||
hipErrorContextAlreadyCurrent = 202,
|
hipErrorContextAlreadyCurrent = 202,
|
||||||
hipErrorMapFailed = 205,
|
hipErrorMapFailed = 205,
|
||||||
// Deprecated
|
// Deprecated
|
||||||
hipErrorMapBufferObjectFailed = 205, ///< Produced when the IPC memory attach failed from ROCr.
|
hipErrorMapBufferObjectFailed = 205, // Produced when the IPC memory attach failed from ROCr.
|
||||||
hipErrorUnmapFailed = 206,
|
hipErrorUnmapFailed = 206,
|
||||||
hipErrorArrayIsMapped = 207,
|
hipErrorArrayIsMapped = 207,
|
||||||
hipErrorAlreadyMapped = 208,
|
hipErrorAlreadyMapped = 208,
|
||||||
@ -91,7 +94,7 @@ typedef enum __HIP_NODISCARD hipError_t {
|
|||||||
hipErrorUnsupportedLimit = 215,
|
hipErrorUnsupportedLimit = 215,
|
||||||
hipErrorContextAlreadyInUse = 216,
|
hipErrorContextAlreadyInUse = 216,
|
||||||
hipErrorPeerAccessUnsupported = 217,
|
hipErrorPeerAccessUnsupported = 217,
|
||||||
hipErrorInvalidKernelFile = 218, ///< In CUDA DRV, it is CUDA_ERROR_INVALID_PTX
|
hipErrorInvalidKernelFile = 218, // In CUDA DRV, it is CUDA_ERROR_INVALID_PTX
|
||||||
hipErrorInvalidGraphicsContext = 219,
|
hipErrorInvalidGraphicsContext = 219,
|
||||||
hipErrorInvalidSource = 300,
|
hipErrorInvalidSource = 300,
|
||||||
hipErrorFileNotFound = 301,
|
hipErrorFileNotFound = 301,
|
||||||
@ -100,67 +103,48 @@ typedef enum __HIP_NODISCARD hipError_t {
|
|||||||
hipErrorOperatingSystem = 304,
|
hipErrorOperatingSystem = 304,
|
||||||
hipErrorInvalidHandle = 400,
|
hipErrorInvalidHandle = 400,
|
||||||
// Deprecated
|
// Deprecated
|
||||||
hipErrorInvalidResourceHandle = 400, ///< Resource handle (hipEvent_t or hipStream_t) invalid.
|
hipErrorInvalidResourceHandle = 400, // Resource handle (hipEvent_t or hipStream_t) invalid.
|
||||||
hipErrorIllegalState = 401, ///< Resource required is not in a valid state to perform operation.
|
hipErrorIllegalState = 401, // Resource required is not in a valid state to perform operation.
|
||||||
hipErrorNotFound = 500,
|
hipErrorNotFound = 500,
|
||||||
hipErrorNotReady = 600, ///< Indicates that asynchronous operations enqueued earlier are not
|
hipErrorNotReady = 600, // Indicates that asynchronous operations enqueued earlier are not ready.
|
||||||
///< ready. This is not actually an error, but is used to distinguish
|
// This is not actually an error, but is used to distinguish from hipSuccess (which indicates completion).
|
||||||
///< from hipSuccess (which indicates completion). APIs that return
|
// APIs that return this error include hipEventQuery and hipStreamQuery.
|
||||||
///< this error include hipEventQuery and hipStreamQuery.
|
|
||||||
hipErrorIllegalAddress = 700,
|
hipErrorIllegalAddress = 700,
|
||||||
hipErrorLaunchOutOfResources = 701, ///< Out of resources error.
|
hipErrorLaunchOutOfResources = 701, // Out of resources error.
|
||||||
hipErrorLaunchTimeOut = 702,
|
hipErrorLaunchTimeOut = 702,
|
||||||
hipErrorPeerAccessAlreadyEnabled =
|
hipErrorPeerAccessAlreadyEnabled = 704, // Peer access was already enabled from the current device.
|
||||||
704, ///< Peer access was already enabled from the current device.
|
hipErrorPeerAccessNotEnabled = 705, // Peer access was never enabled from the current device.
|
||||||
hipErrorPeerAccessNotEnabled =
|
|
||||||
705, ///< Peer access was never enabled from the current device.
|
|
||||||
hipErrorSetOnActiveProcess = 708,
|
hipErrorSetOnActiveProcess = 708,
|
||||||
hipErrorContextIsDestroyed = 709,
|
hipErrorContextIsDestroyed = 709,
|
||||||
hipErrorAssert = 710, ///< Produced when the kernel calls assert.
|
hipErrorAssert = 710, // Produced when the kernel calls assert.
|
||||||
hipErrorHostMemoryAlreadyRegistered =
|
hipErrorHostMemoryAlreadyRegistered = 712, // Produced when trying to lock a page-locked memory.
|
||||||
712, ///< Produced when trying to lock a page-locked memory.
|
hipErrorHostMemoryNotRegistered = 713, // Produced when trying to unlock a non-page-locked memory.
|
||||||
hipErrorHostMemoryNotRegistered =
|
hipErrorLaunchFailure = 719, // An exception occurred on the device while executing a kernel.
|
||||||
713, ///< Produced when trying to unlock a non-page-locked memory.
|
hipErrorCooperativeLaunchTooLarge = 720, // This error indicates that the number of blocks launched per grid for a kernel
|
||||||
hipErrorLaunchFailure =
|
// that was launched via cooperative launch APIs exceeds the maximum number of
|
||||||
719, ///< An exception occurred on the device while executing a kernel.
|
// allowed blocks for the current device
|
||||||
hipErrorCooperativeLaunchTooLarge =
|
hipErrorNotSupported = 801, // Produced when the hip API is not supported/implemented
|
||||||
720, ///< This error indicates that the number of blocks launched per grid for a kernel
|
hipErrorStreamCaptureUnsupported = 900, // The operation is not permitted when the stream is capturing.
|
||||||
///< that was launched via cooperative launch APIs exceeds the maximum number of
|
hipErrorStreamCaptureInvalidated = 901, // The current capture sequence on the stream
|
||||||
///< allowed blocks for the current device
|
// has been invalidated due to a previous error.
|
||||||
hipErrorNotSupported = 801, ///< Produced when the hip API is not supported/implemented
|
hipErrorStreamCaptureMerge = 902, // The operation would have resulted in a merge of two independent capture sequences.
|
||||||
hipErrorStreamCaptureUnsupported = 900, ///< The operation is not permitted when the stream
|
hipErrorStreamCaptureUnmatched = 903, // The capture was not initiated in this stream.
|
||||||
///< is capturing.
|
hipErrorStreamCaptureUnjoined = 904, // The capture sequence contains a fork that was not joined to the primary stream.
|
||||||
hipErrorStreamCaptureInvalidated = 901, ///< The current capture sequence on the stream
|
hipErrorStreamCaptureIsolation = 905, // A dependency would have been created which crosses the capture sequence boundary.
|
||||||
///< has been invalidated due to a previous error.
|
// Only implicit in-stream ordering dependencies are allowed to cross the boundary
|
||||||
hipErrorStreamCaptureMerge = 902, ///< The operation would have resulted in a merge of
|
hipErrorStreamCaptureImplicit = 906, // The operation would have resulted in a disallowed implicit dependency on a current
|
||||||
///< two independent capture sequences.
|
// capture sequence from hipStreamLegacy.
|
||||||
hipErrorStreamCaptureUnmatched = 903, ///< The capture was not initiated in this stream.
|
hipErrorCapturedEvent = 907, // The operation is not permitted on an event which was last recorded in a capturing stream.
|
||||||
hipErrorStreamCaptureUnjoined = 904, ///< The capture sequence contains a fork that was not
|
hipErrorStreamCaptureWrongThread = 908, // A stream capture sequence not initiated with the hipStreamCaptureModeRelaxed argument to
|
||||||
///< joined to the primary stream.
|
// hipStreamBeginCapture was passed to hipStreamEndCapture in a different thread.
|
||||||
hipErrorStreamCaptureIsolation = 905, ///< A dependency would have been created which crosses
|
hipErrorGraphExecUpdateFailure = 910, // This error indicates that the graph update not performed because it included changes which
|
||||||
///< the capture sequence boundary. Only implicit
|
// violated constraints specific to instantiated graph update.
|
||||||
///< in-stream ordering dependencies are allowed
|
hipErrorUnknown = 999, // Unknown error.
|
||||||
///< to cross the boundary
|
|
||||||
hipErrorStreamCaptureImplicit = 906, ///< The operation would have resulted in a disallowed
|
|
||||||
///< implicit dependency on a current capture sequence
|
|
||||||
///< from hipStreamLegacy.
|
|
||||||
hipErrorCapturedEvent = 907, ///< The operation is not permitted on an event which was last
|
|
||||||
///< recorded in a capturing stream.
|
|
||||||
hipErrorStreamCaptureWrongThread = 908, ///< A stream capture sequence not initiated with
|
|
||||||
///< the hipStreamCaptureModeRelaxed argument to
|
|
||||||
///< hipStreamBeginCapture was passed to
|
|
||||||
///< hipStreamEndCapture in a different thread.
|
|
||||||
hipErrorGraphExecUpdateFailure = 910, ///< This error indicates that the graph update
|
|
||||||
///< not performed because it included changes which
|
|
||||||
///< violated constraints specific to instantiated graph
|
|
||||||
///< update.
|
|
||||||
hipErrorUnknown = 999, //< Unknown error.
|
|
||||||
// HSA Runtime Error Codes start here.
|
// HSA Runtime Error Codes start here.
|
||||||
hipErrorRuntimeMemory = 1052, ///< HSA runtime memory call returned error. Typically not seen
|
hipErrorRuntimeMemory = 1052, // HSA runtime memory call returned error. Typically not seen in production systems.
|
||||||
///< in production systems.
|
hipErrorRuntimeOther = 1053, // HSA runtime call other than memory returned error. Typically not seen in production systems.
|
||||||
hipErrorRuntimeOther = 1053, ///< HSA runtime call other than memory returned error. Typically
|
hipErrorTbd // Marker that more error codes are needed.
|
||||||
///< not seen in production systems.
|
|
||||||
hipErrorTbd ///< Marker that more error codes are needed.
|
|
||||||
} hipError_t;
|
} hipError_t;
|
||||||
|
|
||||||
#undef __HIP_NODISCARD
|
#undef __HIP_NODISCARD
|
||||||
@ -170,178 +154,178 @@ typedef enum __HIP_NODISCARD hipError_t {
|
|||||||
* hipDeviceAttribute_t
|
* hipDeviceAttribute_t
|
||||||
* hipDeviceAttributeUnused number: 5
|
* hipDeviceAttributeUnused number: 5
|
||||||
*/
|
*/
|
||||||
typedef enum hipDeviceAttribute_t {
|
typedef enum hipDeviceAttribute_t
|
||||||
|
{
|
||||||
hipDeviceAttributeCudaCompatibleBegin = 0,
|
hipDeviceAttributeCudaCompatibleBegin = 0,
|
||||||
|
|
||||||
hipDeviceAttributeEccEnabled = hipDeviceAttributeCudaCompatibleBegin, ///< Whether ECC support is enabled.
|
hipDeviceAttributeEccEnabled = hipDeviceAttributeCudaCompatibleBegin, // Whether ECC support is enabled.
|
||||||
hipDeviceAttributeAccessPolicyMaxWindowSize, ///< Cuda only. The maximum size of the window policy in bytes.
|
hipDeviceAttributeAccessPolicyMaxWindowSize, // Cuda only. The maximum size of the window policy in bytes.
|
||||||
hipDeviceAttributeAsyncEngineCount, ///< Asynchronous engines number.
|
hipDeviceAttributeAsyncEngineCount, // Asynchronous engines number.
|
||||||
hipDeviceAttributeCanMapHostMemory, ///< Whether host memory can be mapped into device address space
|
hipDeviceAttributeCanMapHostMemory, // Whether host memory can be mapped into device address space
|
||||||
hipDeviceAttributeCanUseHostPointerForRegisteredMem,///< Device can access host registered memory
|
hipDeviceAttributeCanUseHostPointerForRegisteredMem, // Device can access host registered memory
|
||||||
///< at the same virtual address as the CPU
|
// at the same virtual address as the CPU
|
||||||
hipDeviceAttributeClockRate, ///< Peak clock frequency in kilohertz.
|
hipDeviceAttributeClockRate, // Peak clock frequency in kilohertz.
|
||||||
hipDeviceAttributeComputeMode, ///< Compute mode that device is currently in.
|
hipDeviceAttributeComputeMode, // Compute mode that device is currently in.
|
||||||
hipDeviceAttributeComputePreemptionSupported, ///< Device supports Compute Preemption.
|
hipDeviceAttributeComputePreemptionSupported, // Device supports Compute Preemption.
|
||||||
hipDeviceAttributeConcurrentKernels, ///< Device can possibly execute multiple kernels concurrently.
|
hipDeviceAttributeConcurrentKernels, // Device can possibly execute multiple kernels concurrently.
|
||||||
hipDeviceAttributeConcurrentManagedAccess, ///< Device can coherently access managed memory concurrently with the CPU
|
hipDeviceAttributeConcurrentManagedAccess, // Device can coherently access managed memory concurrently with the CPU
|
||||||
hipDeviceAttributeCooperativeLaunch, ///< Support cooperative launch
|
hipDeviceAttributeCooperativeLaunch, // Support cooperative launch
|
||||||
hipDeviceAttributeCooperativeMultiDeviceLaunch, ///< Support cooperative launch on multiple devices
|
hipDeviceAttributeCooperativeMultiDeviceLaunch, // Support cooperative launch on multiple devices
|
||||||
hipDeviceAttributeDeviceOverlap, ///< Device can concurrently copy memory and execute a kernel.
|
hipDeviceAttributeDeviceOverlap, // Device can concurrently copy memory and execute a kernel.
|
||||||
///< Deprecated. Use instead asyncEngineCount.
|
// Deprecated. Use instead asyncEngineCount.
|
||||||
hipDeviceAttributeDirectManagedMemAccessFromHost, ///< Host can directly access managed memory on
|
hipDeviceAttributeDirectManagedMemAccessFromHost, // Host can directly access managed memory on
|
||||||
///< the device without migration
|
// the device without migration
|
||||||
hipDeviceAttributeGlobalL1CacheSupported, ///< Device supports caching globals in L1
|
hipDeviceAttributeGlobalL1CacheSupported, // Device supports caching globals in L1
|
||||||
hipDeviceAttributeHostNativeAtomicSupported, ///< Link between the device and the host supports native atomic operations
|
hipDeviceAttributeHostNativeAtomicSupported, // Link between the device and the host supports native atomic operations
|
||||||
hipDeviceAttributeIntegrated, ///< Device is integrated GPU
|
hipDeviceAttributeIntegrated, // Device is integrated GPU
|
||||||
hipDeviceAttributeIsMultiGpuBoard, ///< Multiple GPU devices.
|
hipDeviceAttributeIsMultiGpuBoard, // Multiple GPU devices.
|
||||||
hipDeviceAttributeKernelExecTimeout, ///< Run time limit for kernels executed on the device
|
hipDeviceAttributeKernelExecTimeout, // Run time limit for kernels executed on the device
|
||||||
hipDeviceAttributeL2CacheSize, ///< Size of L2 cache in bytes. 0 if the device doesn't have L2 cache.
|
hipDeviceAttributeL2CacheSize, // Size of L2 cache in bytes. 0 if the device doesn't have L2 cache.
|
||||||
hipDeviceAttributeLocalL1CacheSupported, ///< caching locals in L1 is supported
|
hipDeviceAttributeLocalL1CacheSupported, // caching locals in L1 is supported
|
||||||
hipDeviceAttributeLuid, ///< 8-byte locally unique identifier in 8 bytes. Undefined on TCC and non-Windows platforms
|
hipDeviceAttributeLuid, // 8-byte locally unique identifier in 8 bytes. Undefined on TCC and non-Windows platforms
|
||||||
hipDeviceAttributeLuidDeviceNodeMask, ///< Luid device node mask. Undefined on TCC and non-Windows platforms
|
hipDeviceAttributeLuidDeviceNodeMask, // Luid device node mask. Undefined on TCC and non-Windows platforms
|
||||||
hipDeviceAttributeComputeCapabilityMajor, ///< Major compute capability version number.
|
hipDeviceAttributeComputeCapabilityMajor, // Major compute capability version number.
|
||||||
hipDeviceAttributeManagedMemory, ///< Device supports allocating managed memory on this system
|
hipDeviceAttributeManagedMemory, // Device supports allocating managed memory on this system
|
||||||
hipDeviceAttributeMaxBlocksPerMultiProcessor, ///< Max block size per multiprocessor
|
hipDeviceAttributeMaxBlocksPerMultiProcessor, // Max block size per multiprocessor
|
||||||
hipDeviceAttributeMaxBlockDimX, ///< Max block size in width.
|
hipDeviceAttributeMaxBlockDimX, // Max block size in width.
|
||||||
hipDeviceAttributeMaxBlockDimY, ///< Max block size in height.
|
hipDeviceAttributeMaxBlockDimY, // Max block size in height.
|
||||||
hipDeviceAttributeMaxBlockDimZ, ///< Max block size in depth.
|
hipDeviceAttributeMaxBlockDimZ, // Max block size in depth.
|
||||||
hipDeviceAttributeMaxGridDimX, ///< Max grid size in width.
|
hipDeviceAttributeMaxGridDimX, // Max grid size in width.
|
||||||
hipDeviceAttributeMaxGridDimY, ///< Max grid size in height.
|
hipDeviceAttributeMaxGridDimY, // Max grid size in height.
|
||||||
hipDeviceAttributeMaxGridDimZ, ///< Max grid size in depth.
|
hipDeviceAttributeMaxGridDimZ, // Max grid size in depth.
|
||||||
hipDeviceAttributeMaxSurface1D, ///< Maximum size of 1D surface.
|
hipDeviceAttributeMaxSurface1D, // Maximum size of 1D surface.
|
||||||
hipDeviceAttributeMaxSurface1DLayered, ///< Cuda only. Maximum dimensions of 1D layered surface.
|
hipDeviceAttributeMaxSurface1DLayered, // Cuda only. Maximum dimensions of 1D layered surface.
|
||||||
hipDeviceAttributeMaxSurface2D, ///< Maximum dimension (width, height) of 2D surface.
|
hipDeviceAttributeMaxSurface2D, // Maximum dimension (width, height) of 2D surface.
|
||||||
hipDeviceAttributeMaxSurface2DLayered, ///< Cuda only. Maximum dimensions of 2D layered surface.
|
hipDeviceAttributeMaxSurface2DLayered, // Cuda only. Maximum dimensions of 2D layered surface.
|
||||||
hipDeviceAttributeMaxSurface3D, ///< Maximum dimension (width, height, depth) of 3D surface.
|
hipDeviceAttributeMaxSurface3D, // Maximum dimension (width, height, depth) of 3D surface.
|
||||||
hipDeviceAttributeMaxSurfaceCubemap, ///< Cuda only. Maximum dimensions of Cubemap surface.
|
hipDeviceAttributeMaxSurfaceCubemap, // Cuda only. Maximum dimensions of Cubemap surface.
|
||||||
hipDeviceAttributeMaxSurfaceCubemapLayered, ///< Cuda only. Maximum dimension of Cubemap layered surface.
|
hipDeviceAttributeMaxSurfaceCubemapLayered, // Cuda only. Maximum dimension of Cubemap layered surface.
|
||||||
hipDeviceAttributeMaxTexture1DWidth, ///< Maximum size of 1D texture.
|
hipDeviceAttributeMaxTexture1DWidth, // Maximum size of 1D texture.
|
||||||
hipDeviceAttributeMaxTexture1DLayered, ///< Maximum dimensions of 1D layered texture.
|
hipDeviceAttributeMaxTexture1DLayered, // Maximum dimensions of 1D layered texture.
|
||||||
hipDeviceAttributeMaxTexture1DLinear, ///< Maximum number of elements allocatable in a 1D linear texture.
|
hipDeviceAttributeMaxTexture1DLinear, // Maximum number of elements allocatable in a 1D linear texture.
|
||||||
///< Use cudaDeviceGetTexture1DLinearMaxWidth() instead on Cuda.
|
// Use cudaDeviceGetTexture1DLinearMaxWidth() instead on Cuda.
|
||||||
hipDeviceAttributeMaxTexture1DMipmap, ///< Maximum size of 1D mipmapped texture.
|
hipDeviceAttributeMaxTexture1DMipmap, // Maximum size of 1D mipmapped texture.
|
||||||
hipDeviceAttributeMaxTexture2DWidth, ///< Maximum dimension width of 2D texture.
|
hipDeviceAttributeMaxTexture2DWidth, // Maximum dimension width of 2D texture.
|
||||||
hipDeviceAttributeMaxTexture2DHeight, ///< Maximum dimension hight of 2D texture.
|
hipDeviceAttributeMaxTexture2DHeight, // Maximum dimension hight of 2D texture.
|
||||||
hipDeviceAttributeMaxTexture2DGather, ///< Maximum dimensions of 2D texture if gather operations performed.
|
hipDeviceAttributeMaxTexture2DGather, // Maximum dimensions of 2D texture if gather operations performed.
|
||||||
hipDeviceAttributeMaxTexture2DLayered, ///< Maximum dimensions of 2D layered texture.
|
hipDeviceAttributeMaxTexture2DLayered, // Maximum dimensions of 2D layered texture.
|
||||||
hipDeviceAttributeMaxTexture2DLinear, ///< Maximum dimensions (width, height, pitch) of 2D textures bound to pitched memory.
|
hipDeviceAttributeMaxTexture2DLinear, // Maximum dimensions (width, height, pitch) of 2D textures bound to pitched memory.
|
||||||
hipDeviceAttributeMaxTexture2DMipmap, ///< Maximum dimensions of 2D mipmapped texture.
|
hipDeviceAttributeMaxTexture2DMipmap, // Maximum dimensions of 2D mipmapped texture.
|
||||||
hipDeviceAttributeMaxTexture3DWidth, ///< Maximum dimension width of 3D texture.
|
hipDeviceAttributeMaxTexture3DWidth, // Maximum dimension width of 3D texture.
|
||||||
hipDeviceAttributeMaxTexture3DHeight, ///< Maximum dimension height of 3D texture.
|
hipDeviceAttributeMaxTexture3DHeight, // Maximum dimension height of 3D texture.
|
||||||
hipDeviceAttributeMaxTexture3DDepth, ///< Maximum dimension depth of 3D texture.
|
hipDeviceAttributeMaxTexture3DDepth, // Maximum dimension depth of 3D texture.
|
||||||
hipDeviceAttributeMaxTexture3DAlt, ///< Maximum dimensions of alternate 3D texture.
|
hipDeviceAttributeMaxTexture3DAlt, // Maximum dimensions of alternate 3D texture.
|
||||||
hipDeviceAttributeMaxTextureCubemap, ///< Maximum dimensions of Cubemap texture
|
hipDeviceAttributeMaxTextureCubemap, // Maximum dimensions of Cubemap texture
|
||||||
hipDeviceAttributeMaxTextureCubemapLayered, ///< Maximum dimensions of Cubemap layered texture.
|
hipDeviceAttributeMaxTextureCubemapLayered, // Maximum dimensions of Cubemap layered texture.
|
||||||
hipDeviceAttributeMaxThreadsDim, ///< Maximum dimension of a block
|
hipDeviceAttributeMaxThreadsDim, // Maximum dimension of a block
|
||||||
hipDeviceAttributeMaxThreadsPerBlock, ///< Maximum number of threads per block.
|
hipDeviceAttributeMaxThreadsPerBlock, // Maximum number of threads per block.
|
||||||
hipDeviceAttributeMaxThreadsPerMultiProcessor, ///< Maximum resident threads per multiprocessor.
|
hipDeviceAttributeMaxThreadsPerMultiProcessor, // Maximum resident threads per multiprocessor.
|
||||||
hipDeviceAttributeMaxPitch, ///< Maximum pitch in bytes allowed by memory copies
|
hipDeviceAttributeMaxPitch, // Maximum pitch in bytes allowed by memory copies
|
||||||
hipDeviceAttributeMemoryBusWidth, ///< Global memory bus width in bits.
|
hipDeviceAttributeMemoryBusWidth, // Global memory bus width in bits.
|
||||||
hipDeviceAttributeMemoryClockRate, ///< Peak memory clock frequency in kilohertz.
|
hipDeviceAttributeMemoryClockRate, // Peak memory clock frequency in kilohertz.
|
||||||
hipDeviceAttributeComputeCapabilityMinor, ///< Minor compute capability version number.
|
hipDeviceAttributeComputeCapabilityMinor, // Minor compute capability version number.
|
||||||
hipDeviceAttributeMultiGpuBoardGroupID, ///< Unique ID of device group on the same multi-GPU board
|
hipDeviceAttributeMultiGpuBoardGroupID, // Unique ID of device group on the same multi-GPU board
|
||||||
hipDeviceAttributeMultiprocessorCount, ///< Number of multiprocessors on the device.
|
hipDeviceAttributeMultiprocessorCount, // Number of multiprocessors on the device.
|
||||||
hipDeviceAttributeUnused1, ///< Previously hipDeviceAttributeName
|
hipDeviceAttributeUnused1, // Previously hipDeviceAttributeName
|
||||||
hipDeviceAttributePageableMemoryAccess, ///< Device supports coherently accessing pageable memory
|
hipDeviceAttributePageableMemoryAccess, // Device supports coherently accessing pageable memory
|
||||||
///< without calling hipHostRegister on it
|
// without calling hipHostRegister on it
|
||||||
hipDeviceAttributePageableMemoryAccessUsesHostPageTables, ///< Device accesses pageable memory via the host's page tables
|
hipDeviceAttributePageableMemoryAccessUsesHostPageTables, // Device accesses pageable memory via the host's page tables
|
||||||
hipDeviceAttributePciBusId, ///< PCI Bus ID.
|
hipDeviceAttributePciBusId, // PCI Bus ID.
|
||||||
hipDeviceAttributePciDeviceId, ///< PCI Device ID.
|
hipDeviceAttributePciDeviceId, // PCI Device ID.
|
||||||
hipDeviceAttributePciDomainID, ///< PCI Domain ID.
|
hipDeviceAttributePciDomainID, // PCI Domain ID.
|
||||||
hipDeviceAttributePersistingL2CacheMaxSize, ///< Maximum l2 persisting lines capacity in bytes
|
hipDeviceAttributePersistingL2CacheMaxSize, // Maximum l2 persisting lines capacity in bytes
|
||||||
hipDeviceAttributeMaxRegistersPerBlock, ///< 32-bit registers available to a thread block. This number is shared
|
hipDeviceAttributeMaxRegistersPerBlock, // 32-bit registers available to a thread block. This number is shared
|
||||||
///< by all thread blocks simultaneously resident on a multiprocessor.
|
// by all thread blocks simultaneously resident on a multiprocessor.
|
||||||
hipDeviceAttributeMaxRegistersPerMultiprocessor, ///< 32-bit registers available per block.
|
hipDeviceAttributeMaxRegistersPerMultiprocessor, // 32-bit registers available per block.
|
||||||
hipDeviceAttributeReservedSharedMemPerBlock, ///< Shared memory reserved by CUDA driver per block.
|
hipDeviceAttributeReservedSharedMemPerBlock, // Shared memory reserved by CUDA driver per block.
|
||||||
hipDeviceAttributeMaxSharedMemoryPerBlock, ///< Maximum shared memory available per block in bytes.
|
hipDeviceAttributeMaxSharedMemoryPerBlock, // Maximum shared memory available per block in bytes.
|
||||||
hipDeviceAttributeSharedMemPerBlockOptin, ///< Maximum shared memory per block usable by special opt in.
|
hipDeviceAttributeSharedMemPerBlockOptin, // Maximum shared memory per block usable by special opt in.
|
||||||
hipDeviceAttributeSharedMemPerMultiprocessor, ///< Shared memory available per multiprocessor.
|
hipDeviceAttributeSharedMemPerMultiprocessor, // Shared memory available per multiprocessor.
|
||||||
hipDeviceAttributeSingleToDoublePrecisionPerfRatio, ///< Cuda only. Performance ratio of single precision to double precision.
|
hipDeviceAttributeSingleToDoublePrecisionPerfRatio, // Cuda only. Performance ratio of single precision to double precision.
|
||||||
hipDeviceAttributeStreamPrioritiesSupported, ///< Whether to support stream priorities.
|
hipDeviceAttributeStreamPrioritiesSupported, // Whether to support stream priorities.
|
||||||
hipDeviceAttributeSurfaceAlignment, ///< Alignment requirement for surfaces
|
hipDeviceAttributeSurfaceAlignment, // Alignment requirement for surfaces
|
||||||
hipDeviceAttributeTccDriver, ///< Cuda only. Whether device is a Tesla device using TCC driver
|
hipDeviceAttributeTccDriver, // Cuda only. Whether device is a Tesla device using TCC driver
|
||||||
hipDeviceAttributeTextureAlignment, ///< Alignment requirement for textures
|
hipDeviceAttributeTextureAlignment, // Alignment requirement for textures
|
||||||
hipDeviceAttributeTexturePitchAlignment, ///< Pitch alignment requirement for 2D texture references bound to pitched memory;
|
hipDeviceAttributeTexturePitchAlignment, // Pitch alignment requirement for 2D texture references bound to pitched memory;
|
||||||
hipDeviceAttributeTotalConstantMemory, ///< Constant memory size in bytes.
|
hipDeviceAttributeTotalConstantMemory, // Constant memory size in bytes.
|
||||||
hipDeviceAttributeTotalGlobalMem, ///< Global memory available on devicice.
|
hipDeviceAttributeTotalGlobalMem, // Global memory available on devicice.
|
||||||
hipDeviceAttributeUnifiedAddressing, ///< Cuda only. An unified address space shared with the host.
|
hipDeviceAttributeUnifiedAddressing, // Cuda only. An unified address space shared with the host.
|
||||||
hipDeviceAttributeUnused2, ///< Previously hipDeviceAttributeUuid
|
hipDeviceAttributeUnused2, // Previously hipDeviceAttributeUuid
|
||||||
hipDeviceAttributeWarpSize, ///< Warp size in threads.
|
hipDeviceAttributeWarpSize, // Warp size in threads.
|
||||||
hipDeviceAttributeMemoryPoolsSupported, ///< Device supports HIP Stream Ordered Memory Allocator
|
hipDeviceAttributeMemoryPoolsSupported, // Device supports HIP Stream Ordered Memory Allocator
|
||||||
hipDeviceAttributeVirtualMemoryManagementSupported, ///< Device supports HIP virtual memory management
|
hipDeviceAttributeVirtualMemoryManagementSupported, // Device supports HIP virtual memory management
|
||||||
hipDeviceAttributeHostRegisterSupported, ///< Can device support host memory registration via hipHostRegister
|
hipDeviceAttributeHostRegisterSupported, // Can device support host memory registration via hipHostRegister
|
||||||
hipDeviceAttributeMemoryPoolSupportedHandleTypes, ///< Supported handle mask for HIP Stream Ordered Memory Allocator
|
hipDeviceAttributeMemoryPoolSupportedHandleTypes, // Supported handle mask for HIP Stream Ordered Memory Allocator
|
||||||
|
|
||||||
hipDeviceAttributeCudaCompatibleEnd = 9999,
|
hipDeviceAttributeCudaCompatibleEnd = 9999,
|
||||||
hipDeviceAttributeAmdSpecificBegin = 10000,
|
hipDeviceAttributeAmdSpecificBegin = 10000,
|
||||||
|
|
||||||
hipDeviceAttributeClockInstructionRate = hipDeviceAttributeAmdSpecificBegin, ///< Frequency in khz of the timer used by the device-side "clock*"
|
hipDeviceAttributeClockInstructionRate = hipDeviceAttributeAmdSpecificBegin, // Frequency in khz of the timer used by the device-side "clock*"
|
||||||
hipDeviceAttributeUnused3, ///< Previously hipDeviceAttributeArch
|
hipDeviceAttributeUnused3, // Previously hipDeviceAttributeArch
|
||||||
hipDeviceAttributeMaxSharedMemoryPerMultiprocessor, ///< Maximum Shared Memory PerMultiprocessor.
|
hipDeviceAttributeMaxSharedMemoryPerMultiprocessor, // Maximum Shared Memory PerMultiprocessor.
|
||||||
hipDeviceAttributeUnused4, ///< Previously hipDeviceAttributeGcnArch
|
hipDeviceAttributeUnused4, // Previously hipDeviceAttributeGcnArch
|
||||||
hipDeviceAttributeUnused5, ///< Previously hipDeviceAttributeGcnArchName
|
hipDeviceAttributeUnused5, // Previously hipDeviceAttributeGcnArchName
|
||||||
hipDeviceAttributeHdpMemFlushCntl, ///< Address of the HDP_MEM_COHERENCY_FLUSH_CNTL register
|
hipDeviceAttributeHdpMemFlushCntl, // Address of the HDP_MEM_COHERENCY_FLUSH_CNTL register
|
||||||
hipDeviceAttributeHdpRegFlushCntl, ///< Address of the HDP_REG_COHERENCY_FLUSH_CNTL register
|
hipDeviceAttributeHdpRegFlushCntl, // Address of the HDP_REG_COHERENCY_FLUSH_CNTL register
|
||||||
hipDeviceAttributeCooperativeMultiDeviceUnmatchedFunc, ///< Supports cooperative launch on multiple
|
hipDeviceAttributeCooperativeMultiDeviceUnmatchedFunc, // Supports cooperative launch on multiple devices with unmatched functions
|
||||||
///< devices with unmatched functions
|
hipDeviceAttributeCooperativeMultiDeviceUnmatchedGridDim, // Supports cooperative launch on multiple devices with unmatched grid dimensions
|
||||||
hipDeviceAttributeCooperativeMultiDeviceUnmatchedGridDim, ///< Supports cooperative launch on multiple
|
hipDeviceAttributeCooperativeMultiDeviceUnmatchedBlockDim, // Supports cooperative launch on multiple devices with unmatched block dimensions
|
||||||
///< devices with unmatched grid dimensions
|
hipDeviceAttributeCooperativeMultiDeviceUnmatchedSharedMem, // Supports cooperative launch on multiple devices with unmatched shared memories
|
||||||
hipDeviceAttributeCooperativeMultiDeviceUnmatchedBlockDim, ///< Supports cooperative launch on multiple
|
hipDeviceAttributeIsLargeBar, // Whether it is LargeBar
|
||||||
///< devices with unmatched block dimensions
|
hipDeviceAttributeAsicRevision, // Revision of the GPU in this device
|
||||||
hipDeviceAttributeCooperativeMultiDeviceUnmatchedSharedMem, ///< Supports cooperative launch on multiple
|
hipDeviceAttributeCanUseStreamWaitValue, // '1' if Device supports hipStreamWaitValue32() and hipStreamWaitValue64(), '0' otherwise.
|
||||||
///< devices with unmatched shared memories
|
hipDeviceAttributeImageSupport, // '1' if Device supports image, '0' otherwise.
|
||||||
hipDeviceAttributeIsLargeBar, ///< Whether it is LargeBar
|
hipDeviceAttributePhysicalMultiProcessorCount, // All available physical compute units for the device
|
||||||
hipDeviceAttributeAsicRevision, ///< Revision of the GPU in this device
|
hipDeviceAttributeFineGrainSupport, // '1' if Device supports fine grain, '0' otherwise
|
||||||
hipDeviceAttributeCanUseStreamWaitValue, ///< '1' if Device supports hipStreamWaitValue32() and
|
hipDeviceAttributeWallClockRate, // Constant frequency of wall clock in kilohertz.
|
||||||
///< hipStreamWaitValue64(), '0' otherwise.
|
|
||||||
hipDeviceAttributeImageSupport, ///< '1' if Device supports image, '0' otherwise.
|
|
||||||
hipDeviceAttributePhysicalMultiProcessorCount, ///< All available physical compute
|
|
||||||
///< units for the device
|
|
||||||
hipDeviceAttributeFineGrainSupport, ///< '1' if Device supports fine grain, '0' otherwise
|
|
||||||
hipDeviceAttributeWallClockRate, ///< Constant frequency of wall clock in kilohertz.
|
|
||||||
|
|
||||||
hipDeviceAttributeAmdSpecificEnd = 19999,
|
hipDeviceAttributeAmdSpecificEnd = 19999,
|
||||||
hipDeviceAttributeVendorSpecificBegin = 20000,
|
hipDeviceAttributeVendorSpecificBegin = 20000,
|
||||||
// Extended attributes for vendors
|
// Extended attributes for vendors
|
||||||
|
|
||||||
} hipDeviceAttribute_t;
|
} hipDeviceAttribute_t;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* hipDeviceArch_t
|
* hipDeviceArch_t
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
typedef struct {
|
typedef struct
|
||||||
|
{
|
||||||
// 32-bit Atomics
|
// 32-bit Atomics
|
||||||
unsigned hasGlobalInt32Atomics : 1; ///< 32-bit integer atomics for global memory.
|
unsigned hasGlobalInt32Atomics : 1; // 32-bit integer atomics for global memory.
|
||||||
unsigned hasGlobalFloatAtomicExch : 1; ///< 32-bit float atomic exch for global memory.
|
unsigned hasGlobalFloatAtomicExch : 1; // 32-bit float atomic exch for global memory.
|
||||||
unsigned hasSharedInt32Atomics : 1; ///< 32-bit integer atomics for shared memory.
|
unsigned hasSharedInt32Atomics : 1; // 32-bit integer atomics for shared memory.
|
||||||
unsigned hasSharedFloatAtomicExch : 1; ///< 32-bit float atomic exch for shared memory.
|
unsigned hasSharedFloatAtomicExch : 1; // 32-bit float atomic exch for shared memory.
|
||||||
unsigned hasFloatAtomicAdd : 1; ///< 32-bit float atomic add in global and shared memory.
|
unsigned hasFloatAtomicAdd : 1; // 32-bit float atomic add in global and shared memory.
|
||||||
|
|
||||||
// 64-bit Atomics
|
// 64-bit Atomics
|
||||||
unsigned hasGlobalInt64Atomics : 1; ///< 64-bit integer atomics for global memory.
|
unsigned hasGlobalInt64Atomics : 1; // 64-bit integer atomics for global memory.
|
||||||
unsigned hasSharedInt64Atomics : 1; ///< 64-bit integer atomics for shared memory.
|
unsigned hasSharedInt64Atomics : 1; // 64-bit integer atomics for shared memory.
|
||||||
|
|
||||||
// Doubles
|
// Doubles
|
||||||
unsigned hasDoubles : 1; ///< Double-precision floating point.
|
unsigned hasDoubles : 1; // Double-precision floating point.
|
||||||
|
|
||||||
// Warp cross-lane operations
|
// Warp cross-lane operations
|
||||||
unsigned hasWarpVote : 1; ///< Warp vote instructions (__any, __all).
|
unsigned hasWarpVote : 1; // Warp vote instructions (__any, __all).
|
||||||
unsigned hasWarpBallot : 1; ///< Warp ballot instructions (__ballot).
|
unsigned hasWarpBallot : 1; // Warp ballot instructions (__ballot).
|
||||||
unsigned hasWarpShuffle : 1; ///< Warp shuffle operations. (__shfl_*).
|
unsigned hasWarpShuffle : 1; // Warp shuffle operations. (__shfl_*).
|
||||||
unsigned hasFunnelShift : 1; ///< Funnel two words into one with shift&mask caps.
|
unsigned hasFunnelShift : 1; // Funnel two words into one with shift&mask caps.
|
||||||
|
|
||||||
// Sync
|
// Sync
|
||||||
unsigned hasThreadFenceSystem : 1; ///< __threadfence_system.
|
unsigned hasThreadFenceSystem : 1; // __threadfence_system.
|
||||||
unsigned hasSyncThreadsExt : 1; ///< __syncthreads_count, syncthreads_and, syncthreads_or.
|
unsigned hasSyncThreadsExt : 1; // __syncthreads_count, syncthreads_and, syncthreads_or.
|
||||||
|
|
||||||
// Misc
|
// Misc
|
||||||
unsigned hasSurfaceFuncs : 1; ///< Surface functions.
|
unsigned hasSurfaceFuncs : 1; // Surface functions.
|
||||||
unsigned has3dGrid : 1; ///< Grid and group dims are 3D (rather than 2D).
|
unsigned has3dGrid : 1; // Grid and group dims are 3D (rather than 2D).
|
||||||
unsigned hasDynamicParallelism : 1; ///< Dynamic parallelism.
|
unsigned hasDynamicParallelism : 1; // Dynamic parallelism.
|
||||||
|
|
||||||
} hipDeviceArch_t;
|
} hipDeviceArch_t;
|
||||||
|
|
||||||
typedef struct hipUUID_t {
|
typedef struct hipUUID_t
|
||||||
|
{
|
||||||
char bytes[16];
|
char bytes[16];
|
||||||
|
|
||||||
} hipUUID;
|
} hipUUID;
|
||||||
|
|
||||||
|
|
||||||
@ -349,144 +333,122 @@ typedef struct hipUUID_t {
|
|||||||
* hipDeviceProp
|
* hipDeviceProp
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
typedef struct hipDeviceProp_t {
|
typedef struct hipDeviceProp_t
|
||||||
char name[256]; ///< Device name.
|
{
|
||||||
hipUUID uuid; ///< UUID of a device
|
char name[256]; // Device name.
|
||||||
char luid[8]; ///< 8-byte unique identifier. Only valid on windows
|
hipUUID uuid; // UUID of a device
|
||||||
unsigned int luidDeviceNodeMask; ///< LUID node mask
|
char luid[8]; // 8-byte unique identifier. Only valid on windows
|
||||||
size_t totalGlobalMem; ///< Size of global memory region (in bytes).
|
unsigned int luidDeviceNodeMask; // LUID node mask
|
||||||
size_t sharedMemPerBlock; ///< Size of shared memory per block (in bytes).
|
size_t totalGlobalMem; // Size of global memory region (in bytes).
|
||||||
int regsPerBlock; ///< Registers per block.
|
size_t sharedMemPerBlock; // Size of shared memory per block (in bytes).
|
||||||
int warpSize; ///< Warp size.
|
int regsPerBlock; // Registers per block.
|
||||||
size_t memPitch; ///< Maximum pitch in bytes allowed by memory copies
|
int warpSize; // Warp size.
|
||||||
///< pitched memory
|
size_t memPitch; // Maximum pitch in bytes allowed by memory copies pitched memory
|
||||||
int maxThreadsPerBlock; ///< Max work items per work group or workgroup max size.
|
int maxThreadsPerBlock; // Max work items per work group or workgroup max size.
|
||||||
int maxThreadsDim[3]; ///< Max number of threads in each dimension (XYZ) of a block.
|
int maxThreadsDim[3]; // Max number of threads in each dimension (XYZ) of a block.
|
||||||
int maxGridSize[3]; ///< Max grid dimensions (XYZ).
|
int maxGridSize[3]; // Max grid dimensions (XYZ).
|
||||||
int clockRate; ///< Max clock frequency of the multiProcessors in khz.
|
int clockRate; // Max clock frequency of the multiProcessors in khz.
|
||||||
size_t totalConstMem; ///< Size of shared constant memory region on the device
|
size_t totalConstMem; // Size of shared constant memory region on the device (in bytes).
|
||||||
///< (in bytes).
|
int major; // Major compute capability. On HCC, this is an approximation and features may
|
||||||
int major; ///< Major compute capability. On HCC, this is an approximation and features may
|
// differ from CUDA CC. See the arch feature flags for portable ways to query feature caps.
|
||||||
///< differ from CUDA CC. See the arch feature flags for portable ways to query
|
int minor; // Minor compute capability. On HCC, this is an approximation and features may
|
||||||
///< feature caps.
|
// differ from CUDA CC. See the arch feature flags for portable ways to query feature caps.
|
||||||
int minor; ///< Minor compute capability. On HCC, this is an approximation and features may
|
size_t textureAlignment; // Alignment requirement for textures
|
||||||
///< differ from CUDA CC. See the arch feature flags for portable ways to query
|
size_t texturePitchAlignment; // Pitch alignment requirement for texture references bound to
|
||||||
///< feature caps.
|
int deviceOverlap; // Deprecated. Use asyncEngineCount instead
|
||||||
size_t textureAlignment; ///< Alignment requirement for textures
|
int multiProcessorCount; // Number of multi-processors (compute units).
|
||||||
size_t texturePitchAlignment; ///< Pitch alignment requirement for texture references bound to
|
int kernelExecTimeoutEnabled; // Run time limit for kernels executed on the device
|
||||||
int deviceOverlap; ///< Deprecated. Use asyncEngineCount instead
|
int integrated; // APU vs dGPU
|
||||||
int multiProcessorCount; ///< Number of multi-processors (compute units).
|
int canMapHostMemory; // Check whether HIP can map host memory
|
||||||
int kernelExecTimeoutEnabled; ///< Run time limit for kernels executed on the device
|
int computeMode; // Compute mode.
|
||||||
int integrated; ///< APU vs dGPU
|
int maxTexture1D; // Maximum number of elements in 1D images
|
||||||
int canMapHostMemory; ///< Check whether HIP can map host memory
|
int maxTexture1DMipmap; // Maximum 1D mipmap texture size
|
||||||
int computeMode; ///< Compute mode.
|
int maxTexture1DLinear; // Maximum size for 1D textures bound to linear memory
|
||||||
int maxTexture1D; ///< Maximum number of elements in 1D images
|
int maxTexture2D[2]; // Maximum dimensions (width, height) of 2D images, in image elements
|
||||||
int maxTexture1DMipmap; ///< Maximum 1D mipmap texture size
|
int maxTexture2DMipmap[2]; // Maximum number of elements in 2D array mipmap of images
|
||||||
int maxTexture1DLinear; ///< Maximum size for 1D textures bound to linear memory
|
int maxTexture2DLinear[3]; // Maximum 2D tex dimensions if tex are bound to pitched memory
|
||||||
int maxTexture2D[2]; ///< Maximum dimensions (width, height) of 2D images, in image elements
|
int maxTexture2DGather[2]; // Maximum 2D tex dimensions if gather has to be performed
|
||||||
int maxTexture2DMipmap[2]; ///< Maximum number of elements in 2D array mipmap of images
|
int maxTexture3D[3]; // Maximum dimensions (width, height, depth) of 3D images, in image elements
|
||||||
int maxTexture2DLinear[3]; ///< Maximum 2D tex dimensions if tex are bound to pitched memory
|
int maxTexture3DAlt[3]; // Maximum alternate 3D texture dims
|
||||||
int maxTexture2DGather[2]; ///< Maximum 2D tex dimensions if gather has to be performed
|
int maxTextureCubemap; // Maximum cubemap texture dims
|
||||||
int maxTexture3D[3]; ///< Maximum dimensions (width, height, depth) of 3D images, in image
|
int maxTexture1DLayered[2]; // Maximum number of elements in 1D array images
|
||||||
///< elements
|
int maxTexture2DLayered[3]; // Maximum number of elements in 2D array images
|
||||||
int maxTexture3DAlt[3]; ///< Maximum alternate 3D texture dims
|
int maxTextureCubemapLayered[2]; // Maximum cubemaps layered texture dims
|
||||||
int maxTextureCubemap; ///< Maximum cubemap texture dims
|
int maxSurface1D; // Maximum 1D surface size
|
||||||
int maxTexture1DLayered[2]; ///< Maximum number of elements in 1D array images
|
int maxSurface2D[2]; // Maximum 2D surface size
|
||||||
int maxTexture2DLayered[3]; ///< Maximum number of elements in 2D array images
|
int maxSurface3D[3]; // Maximum 3D surface size
|
||||||
int maxTextureCubemapLayered[2]; ///< Maximum cubemaps layered texture dims
|
int maxSurface1DLayered[2]; // Maximum 1D layered surface size
|
||||||
int maxSurface1D; ///< Maximum 1D surface size
|
int maxSurface2DLayered[3]; // Maximum 2D layared surface size
|
||||||
int maxSurface2D[2]; ///< Maximum 2D surface size
|
int maxSurfaceCubemap; // Maximum cubemap surface size
|
||||||
int maxSurface3D[3]; ///< Maximum 3D surface size
|
int maxSurfaceCubemapLayered[2]; // Maximum cubemap layered surface size
|
||||||
int maxSurface1DLayered[2]; ///< Maximum 1D layered surface size
|
size_t surfaceAlignment; // Alignment requirement for surface
|
||||||
int maxSurface2DLayered[3]; ///< Maximum 2D layared surface size
|
int concurrentKernels; // Device can possibly execute multiple kernels concurrently.
|
||||||
int maxSurfaceCubemap; ///< Maximum cubemap surface size
|
int ECCEnabled; // Device has ECC support enabled
|
||||||
int maxSurfaceCubemapLayered[2]; ///< Maximum cubemap layered surface size
|
int pciBusID; // PCI Bus ID.
|
||||||
size_t surfaceAlignment; ///< Alignment requirement for surface
|
int pciDeviceID; // PCI Device ID.
|
||||||
int concurrentKernels; ///< Device can possibly execute multiple kernels concurrently.
|
int pciDomainID; // PCI Domain ID
|
||||||
int ECCEnabled; ///< Device has ECC support enabled
|
int tccDriver; // 1:If device is Tesla device using TCC driver, else 0
|
||||||
int pciBusID; ///< PCI Bus ID.
|
int asyncEngineCount; // Number of async engines
|
||||||
int pciDeviceID; ///< PCI Device ID.
|
int unifiedAddressing; // Does device and host share unified address space
|
||||||
int pciDomainID; ///< PCI Domain ID
|
int memoryClockRate; // Max global memory clock frequency in khz.
|
||||||
int tccDriver; ///< 1:If device is Tesla device using TCC driver, else 0
|
int memoryBusWidth; // Global memory bus width in bits.
|
||||||
int asyncEngineCount; ///< Number of async engines
|
int l2CacheSize; // L2 cache size.
|
||||||
int unifiedAddressing; ///< Does device and host share unified address space
|
int persistingL2CacheMaxSize; // Device's max L2 persisting lines in bytes
|
||||||
int memoryClockRate; ///< Max global memory clock frequency in khz.
|
int maxThreadsPerMultiProcessor; // Maximum resident threads per multi-processor.
|
||||||
int memoryBusWidth; ///< Global memory bus width in bits.
|
int streamPrioritiesSupported; // Device supports stream priority
|
||||||
int l2CacheSize; ///< L2 cache size.
|
int globalL1CacheSupported; // Indicates globals are cached in L1
|
||||||
int persistingL2CacheMaxSize; ///< Device's max L2 persisting lines in bytes
|
int localL1CacheSupported; // Locals are cahced in L1
|
||||||
int maxThreadsPerMultiProcessor; ///< Maximum resident threads per multi-processor.
|
size_t sharedMemPerMultiprocessor; // Amount of shared memory available per multiprocessor.
|
||||||
int streamPrioritiesSupported; ///< Device supports stream priority
|
int regsPerMultiprocessor; // registers available per multiprocessor
|
||||||
int globalL1CacheSupported; ///< Indicates globals are cached in L1
|
int managedMemory; // Device supports allocating managed memory on this system
|
||||||
int localL1CacheSupported; ///< Locals are cahced in L1
|
int isMultiGpuBoard; // 1 if device is on a multi-GPU board, 0 if not.
|
||||||
size_t sharedMemPerMultiprocessor; ///< Amount of shared memory available per multiprocessor.
|
int multiGpuBoardGroupID; // Unique identifier for a group of devices on same multiboard GPU
|
||||||
int regsPerMultiprocessor; ///< registers available per multiprocessor
|
int hostNativeAtomicSupported; // Link between host and device supports native atomics
|
||||||
int managedMemory; ///< Device supports allocating managed memory on this system
|
int singleToDoublePrecisionPerfRatio; // Deprecated. CUDA only.
|
||||||
int isMultiGpuBoard; ///< 1 if device is on a multi-GPU board, 0 if not.
|
int pageableMemoryAccess; // Device supports coherently accessing pageable memory
|
||||||
int multiGpuBoardGroupID; ///< Unique identifier for a group of devices on same multiboard GPU
|
// without calling hipHostRegister on it
|
||||||
int hostNativeAtomicSupported; ///< Link between host and device supports native atomics
|
int concurrentManagedAccess; // Device can coherently access managed memory concurrently with the CPU
|
||||||
int singleToDoublePrecisionPerfRatio; ///< Deprecated. CUDA only.
|
int computePreemptionSupported; // Is compute preemption supported on the device
|
||||||
int pageableMemoryAccess; ///< Device supports coherently accessing pageable memory
|
int canUseHostPointerForRegisteredMem; // Device can access host registered memory with same address as the host
|
||||||
///< without calling hipHostRegister on it
|
int cooperativeLaunch; // HIP device supports cooperative launch
|
||||||
int concurrentManagedAccess; ///< Device can coherently access managed memory concurrently with
|
int cooperativeMultiDeviceLaunch; // HIP device supports cooperative launch on multiple devices
|
||||||
///< the CPU
|
size_t sharedMemPerBlockOptin; // Per device m ax shared mem per block usable by special opt in
|
||||||
int computePreemptionSupported; ///< Is compute preemption supported on the device
|
int pageableMemoryAccessUsesHostPageTables; // Device accesses pageable memory via the host's page tables
|
||||||
int canUseHostPointerForRegisteredMem; ///< Device can access host registered memory with same
|
int directManagedMemAccessFromHost; // Host can directly access managed memory on the device without migration
|
||||||
///< address as the host
|
int maxBlocksPerMultiProcessor; // Max number of blocks on CU
|
||||||
int cooperativeLaunch; ///< HIP device supports cooperative launch
|
int accessPolicyMaxWindowSize; // Max value of access policy window
|
||||||
int cooperativeMultiDeviceLaunch; ///< HIP device supports cooperative launch on multiple
|
size_t reservedSharedMemPerBlock; // Shared memory reserved by driver per block
|
||||||
///< devices
|
int hostRegisterSupported; // Device supports hipHostRegister
|
||||||
size_t
|
int sparseHipArraySupported; // Indicates if device supports sparse hip arrays
|
||||||
sharedMemPerBlockOptin; ///< Per device m ax shared mem per block usable by special opt in
|
int hostRegisterReadOnlySupported; // Device supports using the hipHostRegisterReadOnly flag with hipHostRegistger
|
||||||
int pageableMemoryAccessUsesHostPageTables; ///< Device accesses pageable memory via the host's
|
int timelineSemaphoreInteropSupported; // Indicates external timeline semaphore support
|
||||||
///< page tables
|
int memoryPoolsSupported; // Indicates if device supports hipMallocAsync and hipMemPool APIs
|
||||||
int directManagedMemAccessFromHost; ///< Host can directly access managed memory on the device
|
int gpuDirectRDMASupported; // Indicates device support of RDMA APIs
|
||||||
///< without migration
|
unsigned int gpuDirectRDMAFlushWritesOptions; // Bitmask to be interpreted according to hipFlushGPUDirectRDMAWritesOptions
|
||||||
int maxBlocksPerMultiProcessor; ///< Max number of blocks on CU
|
int gpuDirectRDMAWritesOrdering; // value of hipGPUDirectRDMAWritesOrdering
|
||||||
int accessPolicyMaxWindowSize; ///< Max value of access policy window
|
unsigned int memoryPoolSupportedHandleTypes; // Bitmask of handle types support with mempool based IPC
|
||||||
size_t reservedSharedMemPerBlock; ///< Shared memory reserved by driver per block
|
int deferredMappingHipArraySupported; // Device supports deferred mapping HIP arrays and HIP mipmapped arrays
|
||||||
int hostRegisterSupported; ///< Device supports hipHostRegister
|
int ipcEventSupported; // Device supports IPC events
|
||||||
int sparseHipArraySupported; ///< Indicates if device supports sparse hip arrays
|
int clusterLaunch; // Device supports cluster launch
|
||||||
int hostRegisterReadOnlySupported; ///< Device supports using the hipHostRegisterReadOnly flag
|
int unifiedFunctionPointers; // Indicates device supports unified function pointers
|
||||||
///< with hipHostRegistger
|
int reserved[63]; // CUDA Reserved.
|
||||||
int timelineSemaphoreInteropSupported; ///< Indicates external timeline semaphore support
|
|
||||||
int memoryPoolsSupported; ///< Indicates if device supports hipMallocAsync and hipMemPool APIs
|
|
||||||
int gpuDirectRDMASupported; ///< Indicates device support of RDMA APIs
|
|
||||||
unsigned int gpuDirectRDMAFlushWritesOptions; ///< Bitmask to be interpreted according to
|
|
||||||
///< hipFlushGPUDirectRDMAWritesOptions
|
|
||||||
int gpuDirectRDMAWritesOrdering; ///< value of hipGPUDirectRDMAWritesOrdering
|
|
||||||
unsigned int
|
|
||||||
memoryPoolSupportedHandleTypes; ///< Bitmask of handle types support with mempool based IPC
|
|
||||||
int deferredMappingHipArraySupported; ///< Device supports deferred mapping HIP arrays and HIP
|
|
||||||
///< mipmapped arrays
|
|
||||||
int ipcEventSupported; ///< Device supports IPC events
|
|
||||||
int clusterLaunch; ///< Device supports cluster launch
|
|
||||||
int unifiedFunctionPointers; ///< Indicates device supports unified function pointers
|
|
||||||
int reserved[63]; ///< CUDA Reserved.
|
|
||||||
|
|
||||||
int hipReserved[32]; ///< Reserved for adding new entries for HIP/CUDA.
|
int hipReserved[32]; // Reserved for adding new entries for HIP/CUDA.
|
||||||
|
|
||||||
/* HIP Only struct members */
|
/* HIP Only struct members */
|
||||||
char gcnArchName[256]; ///< AMD GCN Arch Name. HIP Only.
|
char gcnArchName[256]; // AMD GCN Arch Name. HIP Only.
|
||||||
size_t maxSharedMemoryPerMultiProcessor; ///< Maximum Shared Memory Per CU. HIP Only.
|
size_t maxSharedMemoryPerMultiProcessor; // Maximum Shared Memory Per CU. HIP Only.
|
||||||
int clockInstructionRate; ///< Frequency in khz of the timer used by the device-side "clock*"
|
int clockInstructionRate; // Frequency in khz of the timer used by the device-side "clock*" instructions. New for HIP.
|
||||||
///< instructions. New for HIP.
|
hipDeviceArch_t arch; // Architectural feature flags. New for HIP.
|
||||||
hipDeviceArch_t arch; ///< Architectural feature flags. New for HIP.
|
unsigned int* hdpMemFlushCntl; // Addres of HDP_MEM_COHERENCY_FLUSH_CNTL register
|
||||||
unsigned int* hdpMemFlushCntl; ///< Addres of HDP_MEM_COHERENCY_FLUSH_CNTL register
|
unsigned int* hdpRegFlushCntl; // Addres of HDP_REG_COHERENCY_FLUSH_CNTL register
|
||||||
unsigned int* hdpRegFlushCntl; ///< Addres of HDP_REG_COHERENCY_FLUSH_CNTL register
|
int cooperativeMultiDeviceUnmatchedFunc; // HIP device supports cooperative launch on multiple devices with unmatched functions
|
||||||
int cooperativeMultiDeviceUnmatchedFunc; ///< HIP device supports cooperative launch on
|
int cooperativeMultiDeviceUnmatchedGridDim; // HIP device supports cooperative launch on multiple devices with unmatched grid dimensions
|
||||||
///< multiple
|
int cooperativeMultiDeviceUnmatchedBlockDim; // HIP device supports cooperative launch on multiple devices with unmatched block dimensions
|
||||||
/// devices with unmatched functions
|
int cooperativeMultiDeviceUnmatchedSharedMem; // HIP device supports cooperative launch on multiple devices with unmatched shared memories
|
||||||
int cooperativeMultiDeviceUnmatchedGridDim; ///< HIP device supports cooperative launch on
|
int isLargeBar; // 1: if it is a large PCI bar device, else 0
|
||||||
///< multiple
|
int asicRevision; // Revision of the GPU in this device
|
||||||
/// devices with unmatched grid dimensions
|
|
||||||
int cooperativeMultiDeviceUnmatchedBlockDim; ///< HIP device supports cooperative launch on
|
|
||||||
///< multiple
|
|
||||||
/// devices with unmatched block dimensions
|
|
||||||
int cooperativeMultiDeviceUnmatchedSharedMem; ///< HIP device supports cooperative launch on
|
|
||||||
///< multiple
|
|
||||||
/// devices with unmatched shared memories
|
|
||||||
int isLargeBar; ///< 1: if it is a large PCI bar device, else 0
|
|
||||||
int asicRevision; ///< Revision of the GPU in this device
|
|
||||||
} hipDeviceProp_t;
|
} hipDeviceProp_t;
|
||||||
|
|
||||||
//Flags that can be used with hipStreamCreateWithFlags.
|
//Flags that can be used with hipStreamCreateWithFlags.
|
||||||
@ -532,7 +494,8 @@ typedef struct hipDeviceProp_t {
|
|||||||
#define hipDeviceMapHost 0x8
|
#define hipDeviceMapHost 0x8
|
||||||
#define hipDeviceLmemResizeToMax 0x16
|
#define hipDeviceLmemResizeToMax 0x16
|
||||||
|
|
||||||
typedef enum hipJitOption {
|
typedef enum hipJitOption
|
||||||
|
{
|
||||||
hipJitOptionMaxRegisters = 0,
|
hipJitOptionMaxRegisters = 0,
|
||||||
hipJitOptionThreadsPerBlock,
|
hipJitOptionThreadsPerBlock,
|
||||||
hipJitOptionWallTime,
|
hipJitOptionWallTime,
|
||||||
@ -551,6 +514,7 @@ typedef enum hipJitOption {
|
|||||||
hipJitOptionSm3xOpt,
|
hipJitOptionSm3xOpt,
|
||||||
hipJitOptionFastCompile,
|
hipJitOptionFastCompile,
|
||||||
hipJitOptionNumOptions
|
hipJitOptionNumOptions
|
||||||
|
|
||||||
} hipJitOption;
|
} hipJitOption;
|
||||||
|
|
||||||
// stop: hip_runtime_api.h
|
// stop: hip_runtime_api.h
|
||||||
@ -563,11 +527,17 @@ typedef enum hipJitOption {
|
|||||||
|
|
||||||
#define HIP_API_CALL HIPAPI
|
#define HIP_API_CALL HIPAPI
|
||||||
|
|
||||||
|
// deprecated
|
||||||
typedef hipError_t (HIP_API_CALL *HIP_HIPCTXCREATE) (hipCtx_t *, unsigned int, hipDevice_t);
|
typedef hipError_t (HIP_API_CALL *HIP_HIPCTXCREATE) (hipCtx_t *, unsigned int, hipDevice_t);
|
||||||
|
// deprecated
|
||||||
typedef hipError_t (HIP_API_CALL *HIP_HIPCTXDESTROY) (hipCtx_t);
|
typedef hipError_t (HIP_API_CALL *HIP_HIPCTXDESTROY) (hipCtx_t);
|
||||||
|
// deprecated
|
||||||
typedef hipError_t (HIP_API_CALL *HIP_HIPCTXPOPCURRENT) (hipCtx_t *);
|
typedef hipError_t (HIP_API_CALL *HIP_HIPCTXPOPCURRENT) (hipCtx_t *);
|
||||||
|
// deprecated
|
||||||
typedef hipError_t (HIP_API_CALL *HIP_HIPCTXPUSHCURRENT) (hipCtx_t);
|
typedef hipError_t (HIP_API_CALL *HIP_HIPCTXPUSHCURRENT) (hipCtx_t);
|
||||||
|
// deprecated
|
||||||
typedef hipError_t (HIP_API_CALL *HIP_HIPCTXSETCURRENT) (hipCtx_t);
|
typedef hipError_t (HIP_API_CALL *HIP_HIPCTXSETCURRENT) (hipCtx_t);
|
||||||
|
// deprecated
|
||||||
typedef hipError_t (HIP_API_CALL *HIP_HIPCTXSYNCHRONIZE) (void);
|
typedef hipError_t (HIP_API_CALL *HIP_HIPCTXSYNCHRONIZE) (void);
|
||||||
typedef hipError_t (HIP_API_CALL *HIP_HIPDEVICEGETATTRIBUTE) (int *, hipDeviceAttribute_t, hipDevice_t);
|
typedef hipError_t (HIP_API_CALL *HIP_HIPDEVICEGETATTRIBUTE) (int *, hipDeviceAttribute_t, hipDevice_t);
|
||||||
typedef hipError_t (HIP_API_CALL *HIP_HIPDEVICEGETCOUNT) (int *);
|
typedef hipError_t (HIP_API_CALL *HIP_HIPDEVICEGETCOUNT) (int *);
|
||||||
@ -575,7 +545,8 @@ typedef hipError_t (HIP_API_CALL *HIP_HIPDEVICEGET) (hipDevice_t *,
|
|||||||
typedef hipError_t (HIP_API_CALL *HIP_HIPDEVICEGETNAME) (char *, int, hipDevice_t);
|
typedef hipError_t (HIP_API_CALL *HIP_HIPDEVICEGETNAME) (char *, int, hipDevice_t);
|
||||||
typedef hipError_t (HIP_API_CALL *HIP_HIPDEVICETOTALMEM) (size_t *, hipDevice_t);
|
typedef hipError_t (HIP_API_CALL *HIP_HIPDEVICETOTALMEM) (size_t *, hipDevice_t);
|
||||||
typedef hipError_t (HIP_API_CALL *HIP_HIPDRIVERGETVERSION) (int *);
|
typedef hipError_t (HIP_API_CALL *HIP_HIPDRIVERGETVERSION) (int *);
|
||||||
typedef hipError_t (HIP_API_CALL *HIP_HIPEVENTCREATE) (hipEvent_t *, unsigned int);
|
typedef hipError_t (HIP_API_CALL *HIP_HIPEVENTCREATE) (hipEvent_t *);
|
||||||
|
typedef hipError_t (HIP_API_CALL *HIP_HIPEVENTCREATEWITHFLAGS) (hipEvent_t *, unsigned int);
|
||||||
typedef hipError_t (HIP_API_CALL *HIP_HIPEVENTDESTROY) (hipEvent_t);
|
typedef hipError_t (HIP_API_CALL *HIP_HIPEVENTDESTROY) (hipEvent_t);
|
||||||
typedef hipError_t (HIP_API_CALL *HIP_HIPEVENTELAPSEDTIME) (float *, hipEvent_t, hipEvent_t);
|
typedef hipError_t (HIP_API_CALL *HIP_HIPEVENTELAPSEDTIME) (float *, hipEvent_t, hipEvent_t);
|
||||||
typedef hipError_t (HIP_API_CALL *HIP_HIPEVENTRECORD) (hipEvent_t, hipStream_t);
|
typedef hipError_t (HIP_API_CALL *HIP_HIPEVENTRECORD) (hipEvent_t, hipStream_t);
|
||||||
@ -603,7 +574,10 @@ typedef hipError_t (HIP_API_CALL *HIP_HIPMODULEGETGLOBAL) (hipDeviceptr_t
|
|||||||
typedef hipError_t (HIP_API_CALL *HIP_HIPMODULELOADDATAEX) (hipModule_t *, const void *, unsigned int, hipJitOption *, void **);
|
typedef hipError_t (HIP_API_CALL *HIP_HIPMODULELOADDATAEX) (hipModule_t *, const void *, unsigned int, hipJitOption *, void **);
|
||||||
typedef hipError_t (HIP_API_CALL *HIP_HIPMODULEUNLOAD) (hipModule_t);
|
typedef hipError_t (HIP_API_CALL *HIP_HIPMODULEUNLOAD) (hipModule_t);
|
||||||
typedef hipError_t (HIP_API_CALL *HIP_HIPRUNTIMEGETVERSION) (int *);
|
typedef hipError_t (HIP_API_CALL *HIP_HIPRUNTIMEGETVERSION) (int *);
|
||||||
typedef hipError_t (HIP_API_CALL *HIP_HIPSTREAMCREATE) (hipStream_t *, unsigned int);
|
typedef hipError_t (HIP_API_CALL *HIP_HIPSETDEVICE) (hipDevice_t);
|
||||||
|
typedef hipError_t (HIP_API_CALL *HIP_HIPSETDEVICEFLAGS) (unsigned int);
|
||||||
|
typedef hipError_t (HIP_API_CALL *HIP_HIPSTREAMCREATE) (hipStream_t *);
|
||||||
|
typedef hipError_t (HIP_API_CALL *HIP_HIPSTREAMCREATEWITHFLAGS) (hipStream_t *, unsigned int);
|
||||||
typedef hipError_t (HIP_API_CALL *HIP_HIPSTREAMDESTROY) (hipStream_t);
|
typedef hipError_t (HIP_API_CALL *HIP_HIPSTREAMDESTROY) (hipStream_t);
|
||||||
typedef hipError_t (HIP_API_CALL *HIP_HIPSTREAMSYNCHRONIZE) (hipStream_t);
|
typedef hipError_t (HIP_API_CALL *HIP_HIPSTREAMSYNCHRONIZE) (hipStream_t);
|
||||||
typedef hipError_t (HIP_API_CALL *HIP_HIPGETDEVICEPROPERTIES) (hipDeviceProp_t *, hipDevice_t);
|
typedef hipError_t (HIP_API_CALL *HIP_HIPGETDEVICEPROPERTIES) (hipDeviceProp_t *, hipDevice_t);
|
||||||
@ -613,11 +587,17 @@ typedef struct hc_hip_lib
|
|||||||
{
|
{
|
||||||
hc_dynlib_t lib;
|
hc_dynlib_t lib;
|
||||||
|
|
||||||
|
// deprecated
|
||||||
HIP_HIPCTXCREATE hipCtxCreate;
|
HIP_HIPCTXCREATE hipCtxCreate;
|
||||||
|
// deprecated
|
||||||
HIP_HIPCTXDESTROY hipCtxDestroy;
|
HIP_HIPCTXDESTROY hipCtxDestroy;
|
||||||
|
// deprecated
|
||||||
HIP_HIPCTXPOPCURRENT hipCtxPopCurrent;
|
HIP_HIPCTXPOPCURRENT hipCtxPopCurrent;
|
||||||
|
// deprecated
|
||||||
HIP_HIPCTXPUSHCURRENT hipCtxPushCurrent;
|
HIP_HIPCTXPUSHCURRENT hipCtxPushCurrent;
|
||||||
|
// deprecated
|
||||||
HIP_HIPCTXSETCURRENT hipCtxSetCurrent;
|
HIP_HIPCTXSETCURRENT hipCtxSetCurrent;
|
||||||
|
// deprecated
|
||||||
HIP_HIPCTXSYNCHRONIZE hipCtxSynchronize;
|
HIP_HIPCTXSYNCHRONIZE hipCtxSynchronize;
|
||||||
HIP_HIPDEVICEGETATTRIBUTE hipDeviceGetAttribute;
|
HIP_HIPDEVICEGETATTRIBUTE hipDeviceGetAttribute;
|
||||||
HIP_HIPDEVICEGETCOUNT hipDeviceGetCount;
|
HIP_HIPDEVICEGETCOUNT hipDeviceGetCount;
|
||||||
@ -626,6 +606,7 @@ typedef struct hc_hip_lib
|
|||||||
HIP_HIPDEVICETOTALMEM hipDeviceTotalMem;
|
HIP_HIPDEVICETOTALMEM hipDeviceTotalMem;
|
||||||
HIP_HIPDRIVERGETVERSION hipDriverGetVersion;
|
HIP_HIPDRIVERGETVERSION hipDriverGetVersion;
|
||||||
HIP_HIPEVENTCREATE hipEventCreate;
|
HIP_HIPEVENTCREATE hipEventCreate;
|
||||||
|
HIP_HIPEVENTCREATEWITHFLAGS hipEventCreateWithFlags;
|
||||||
HIP_HIPEVENTDESTROY hipEventDestroy;
|
HIP_HIPEVENTDESTROY hipEventDestroy;
|
||||||
HIP_HIPEVENTELAPSEDTIME hipEventElapsedTime;
|
HIP_HIPEVENTELAPSEDTIME hipEventElapsedTime;
|
||||||
HIP_HIPEVENTRECORD hipEventRecord;
|
HIP_HIPEVENTRECORD hipEventRecord;
|
||||||
@ -653,7 +634,10 @@ typedef struct hc_hip_lib
|
|||||||
HIP_HIPMODULELOADDATAEX hipModuleLoadDataEx;
|
HIP_HIPMODULELOADDATAEX hipModuleLoadDataEx;
|
||||||
HIP_HIPMODULEUNLOAD hipModuleUnload;
|
HIP_HIPMODULEUNLOAD hipModuleUnload;
|
||||||
HIP_HIPRUNTIMEGETVERSION hipRuntimeGetVersion;
|
HIP_HIPRUNTIMEGETVERSION hipRuntimeGetVersion;
|
||||||
|
HIP_HIPSETDEVICE hipSetDevice;
|
||||||
|
HIP_HIPSETDEVICEFLAGS hipSetDeviceFlags;
|
||||||
HIP_HIPSTREAMCREATE hipStreamCreate;
|
HIP_HIPSTREAMCREATE hipStreamCreate;
|
||||||
|
HIP_HIPSTREAMCREATEWITHFLAGS hipStreamCreateWithFlags;
|
||||||
HIP_HIPSTREAMDESTROY hipStreamDestroy;
|
HIP_HIPSTREAMDESTROY hipStreamDestroy;
|
||||||
HIP_HIPSTREAMSYNCHRONIZE hipStreamSynchronize;
|
HIP_HIPSTREAMSYNCHRONIZE hipStreamSynchronize;
|
||||||
HIP_HIPGETDEVICEPROPERTIES hipGetDeviceProperties;
|
HIP_HIPGETDEVICEPROPERTIES hipGetDeviceProperties;
|
||||||
@ -666,11 +650,17 @@ typedef hc_hip_lib_t HIP_PTR;
|
|||||||
int hip_init (void *hashcat_ctx);
|
int hip_init (void *hashcat_ctx);
|
||||||
void hip_close (void *hashcat_ctx);
|
void hip_close (void *hashcat_ctx);
|
||||||
|
|
||||||
|
// deprecated
|
||||||
int hc_hipCtxCreate (void *hashcat_ctx, hipCtx_t *pctx, unsigned int flags, hipDevice_t dev);
|
int hc_hipCtxCreate (void *hashcat_ctx, hipCtx_t *pctx, unsigned int flags, hipDevice_t dev);
|
||||||
|
// deprecated
|
||||||
int hc_hipCtxDestroy (void *hashcat_ctx, hipCtx_t ctx);
|
int hc_hipCtxDestroy (void *hashcat_ctx, hipCtx_t ctx);
|
||||||
|
// deprecated
|
||||||
int hc_hipCtxPopCurrent (void *hashcat_ctx, hipCtx_t *pctx);
|
int hc_hipCtxPopCurrent (void *hashcat_ctx, hipCtx_t *pctx);
|
||||||
|
// deprecated
|
||||||
int hc_hipCtxPushCurrent (void *hashcat_ctx, hipCtx_t ctx);
|
int hc_hipCtxPushCurrent (void *hashcat_ctx, hipCtx_t ctx);
|
||||||
|
// deprecated
|
||||||
int hc_hipCtxSetCurrent (void *hashcat_ctx, hipCtx_t ctx);
|
int hc_hipCtxSetCurrent (void *hashcat_ctx, hipCtx_t ctx);
|
||||||
|
// deprecated
|
||||||
int hc_hipCtxSynchronize (void *hashcat_ctx);
|
int hc_hipCtxSynchronize (void *hashcat_ctx);
|
||||||
int hc_hipDeviceGet (void *hashcat_ctx, hipDevice_t *device, int ordinal);
|
int hc_hipDeviceGet (void *hashcat_ctx, hipDevice_t *device, int ordinal);
|
||||||
int hc_hipDeviceGetAttribute (void *hashcat_ctx, int *pi, hipDeviceAttribute_t attrib, hipDevice_t dev);
|
int hc_hipDeviceGetAttribute (void *hashcat_ctx, int *pi, hipDeviceAttribute_t attrib, hipDevice_t dev);
|
||||||
@ -678,7 +668,8 @@ int hc_hipDeviceGetCount (void *hashcat_ctx, int *count);
|
|||||||
int hc_hipDeviceGetName (void *hashcat_ctx, char *name, int len, hipDevice_t dev);
|
int hc_hipDeviceGetName (void *hashcat_ctx, char *name, int len, hipDevice_t dev);
|
||||||
int hc_hipDeviceTotalMem (void *hashcat_ctx, size_t *bytes, hipDevice_t dev);
|
int hc_hipDeviceTotalMem (void *hashcat_ctx, size_t *bytes, hipDevice_t dev);
|
||||||
int hc_hipDriverGetVersion (void *hashcat_ctx, int *driverVersion);
|
int hc_hipDriverGetVersion (void *hashcat_ctx, int *driverVersion);
|
||||||
int hc_hipEventCreate (void *hashcat_ctx, hipEvent_t *phEvent, unsigned int Flags);
|
int hc_hipEventCreate (void *hashcat_ctx, hipEvent_t *phEvent);
|
||||||
|
int hc_hipEventCreateWithFlags (void *hashcat_ctx, hipEvent_t *phEvent, unsigned int Flags);
|
||||||
int hc_hipEventDestroy (void *hashcat_ctx, hipEvent_t hEvent);
|
int hc_hipEventDestroy (void *hashcat_ctx, hipEvent_t hEvent);
|
||||||
int hc_hipEventElapsedTime (void *hashcat_ctx, float *pMilliseconds, hipEvent_t hStart, hipEvent_t hEnd);
|
int hc_hipEventElapsedTime (void *hashcat_ctx, float *pMilliseconds, hipEvent_t hStart, hipEvent_t hEnd);
|
||||||
int hc_hipEventQuery (void *hashcat_ctx, hipEvent_t hEvent);
|
int hc_hipEventQuery (void *hashcat_ctx, hipEvent_t hEvent);
|
||||||
@ -705,7 +696,10 @@ int hc_hipModuleGetGlobal (void *hashcat_ctx, hipDeviceptr_t *dptr, size_t
|
|||||||
int hc_hipModuleLoadDataEx (void *hashcat_ctx, hipModule_t *module, const void *image, unsigned int numOptions, hipJitOption *options, void **optionValues);
|
int hc_hipModuleLoadDataEx (void *hashcat_ctx, hipModule_t *module, const void *image, unsigned int numOptions, hipJitOption *options, void **optionValues);
|
||||||
int hc_hipModuleUnload (void *hashcat_ctx, hipModule_t hmod);
|
int hc_hipModuleUnload (void *hashcat_ctx, hipModule_t hmod);
|
||||||
int hc_hipRuntimeGetVersion (void *hashcat_ctx, int *runtimeVersion);
|
int hc_hipRuntimeGetVersion (void *hashcat_ctx, int *runtimeVersion);
|
||||||
int hc_hipStreamCreate (void *hashcat_ctx, hipStream_t *phStream, unsigned int Flags);
|
int hc_hipSetDevice (void *hashcat_ctx, hipDevice_t dev);
|
||||||
|
int hc_hipSetDeviceFlags (void *hashcat_ctx, unsigned int flags);
|
||||||
|
int hc_hipStreamCreate (void *hashcat_ctx, hipStream_t *phStream);
|
||||||
|
int hc_hipStreamCreateWithFlags (void *hashcat_ctx, hipStream_t *phStream, unsigned int flags);
|
||||||
int hc_hipStreamDestroy (void *hashcat_ctx, hipStream_t hStream);
|
int hc_hipStreamDestroy (void *hashcat_ctx, hipStream_t hStream);
|
||||||
int hc_hipStreamSynchronize (void *hashcat_ctx, hipStream_t hStream);
|
int hc_hipStreamSynchronize (void *hashcat_ctx, hipStream_t hStream);
|
||||||
int hc_hipGetDeviceProperties (void *hashcat_ctx, hipDeviceProp_t *prop, hipDevice_t dev);
|
int hc_hipGetDeviceProperties (void *hashcat_ctx, hipDeviceProp_t *prop, hipDevice_t dev);
|
||||||
|
@ -679,7 +679,7 @@ HC_API_CALL void *thread_autotune (void *p)
|
|||||||
|
|
||||||
if (device_param->is_hip == true)
|
if (device_param->is_hip == true)
|
||||||
{
|
{
|
||||||
if (hc_hipCtxPushCurrent (hashcat_ctx, device_param->hip_context) == -1) return NULL;
|
if (hc_hipSetDevice (hashcat_ctx, device_param->hip_device) == -1) return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
// check for autotune failure
|
// check for autotune failure
|
||||||
@ -695,11 +695,6 @@ HC_API_CALL void *thread_autotune (void *p)
|
|||||||
if (hc_cuCtxPopCurrent (hashcat_ctx, &device_param->cuda_context) == -1) return NULL;
|
if (hc_cuCtxPopCurrent (hashcat_ctx, &device_param->cuda_context) == -1) return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (device_param->is_hip == true)
|
|
||||||
{
|
|
||||||
if (hc_hipCtxPopCurrent (hashcat_ctx, &device_param->hip_context) == -1) return NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
366
src/backend.c
366
src/backend.c
@ -993,7 +993,7 @@ int gidd_to_pw_t (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, c
|
|||||||
|
|
||||||
if (device_param->is_hip == true)
|
if (device_param->is_hip == true)
|
||||||
{
|
{
|
||||||
if (hc_hipCtxPushCurrent (hashcat_ctx, device_param->hip_context) == -1) return -1;
|
if (hc_hipSetDevice (hashcat_ctx, device_param->hip_device) == -1) return -1;
|
||||||
|
|
||||||
if (hc_hipMemcpyDtoH (hashcat_ctx, &pw_idx, device_param->hip_d_pws_idx + (gidd * sizeof (pw_idx_t)), sizeof (pw_idx_t)) == -1) return -1;
|
if (hc_hipMemcpyDtoH (hashcat_ctx, &pw_idx, device_param->hip_d_pws_idx + (gidd * sizeof (pw_idx_t)), sizeof (pw_idx_t)) == -1) return -1;
|
||||||
|
|
||||||
@ -1059,11 +1059,6 @@ int gidd_to_pw_t (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, c
|
|||||||
if (hc_cuCtxPopCurrent (hashcat_ctx, &device_param->cuda_context) == -1) return -1;
|
if (hc_cuCtxPopCurrent (hashcat_ctx, &device_param->cuda_context) == -1) return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (device_param->is_hip == true)
|
|
||||||
{
|
|
||||||
if (hc_hipCtxPopCurrent (hashcat_ctx, &device_param->hip_context) == -1) return -1;
|
|
||||||
}
|
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1082,13 +1077,11 @@ int copy_pws_idx (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, u
|
|||||||
|
|
||||||
if (device_param->is_hip == true)
|
if (device_param->is_hip == true)
|
||||||
{
|
{
|
||||||
if (hc_hipCtxPushCurrent (hashcat_ctx, device_param->hip_context) == -1) return -1;
|
if (hc_hipSetDevice (hashcat_ctx, device_param->hip_device) == -1) return -1;
|
||||||
|
|
||||||
if (hc_hipMemcpyDtoH (hashcat_ctx, dest, device_param->hip_d_pws_idx + (gidd * sizeof (pw_idx_t)), (cnt * sizeof (pw_idx_t))) == -1) return -1;
|
if (hc_hipMemcpyDtoH (hashcat_ctx, dest, device_param->hip_d_pws_idx + (gidd * sizeof (pw_idx_t)), (cnt * sizeof (pw_idx_t))) == -1) return -1;
|
||||||
|
|
||||||
if (hc_hipStreamSynchronize (hashcat_ctx, device_param->hip_stream) == -1) return -1;
|
if (hc_hipStreamSynchronize (hashcat_ctx, device_param->hip_stream) == -1) return -1;
|
||||||
|
|
||||||
if (hc_hipCtxPopCurrent (hashcat_ctx, &device_param->hip_context) == -1) return -1;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#if defined (__APPLE__)
|
#if defined (__APPLE__)
|
||||||
@ -1122,13 +1115,11 @@ int copy_pws_comp (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param,
|
|||||||
|
|
||||||
if (device_param->is_hip == true)
|
if (device_param->is_hip == true)
|
||||||
{
|
{
|
||||||
if (hc_hipCtxPushCurrent (hashcat_ctx, device_param->hip_context) == -1) return -1;
|
if (hc_hipSetDevice (hashcat_ctx, device_param->hip_device) == -1) return -1;
|
||||||
|
|
||||||
if (hc_hipMemcpyDtoH (hashcat_ctx, dest, device_param->hip_d_pws_comp_buf + (off * sizeof (u32)), cnt * sizeof (u32)) == -1) return -1;
|
if (hc_hipMemcpyDtoH (hashcat_ctx, dest, device_param->hip_d_pws_comp_buf + (off * sizeof (u32)), cnt * sizeof (u32)) == -1) return -1;
|
||||||
|
|
||||||
if (hc_hipStreamSynchronize (hashcat_ctx, device_param->hip_stream) == -1) return -1;
|
if (hc_hipStreamSynchronize (hashcat_ctx, device_param->hip_stream) == -1) return -1;
|
||||||
|
|
||||||
if (hc_hipCtxPopCurrent (hashcat_ctx, &device_param->hip_context) == -1) return -1;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#if defined (__APPLE__)
|
#if defined (__APPLE__)
|
||||||
@ -5937,24 +5928,24 @@ static void backend_ctx_devices_init_cuda (hashcat_ctx_t *hashcat_ctx, int *virt
|
|||||||
device_param->has_prmt = (sm >= 20) ? true : false;
|
device_param->has_prmt = (sm >= 20) ? true : false;
|
||||||
device_param->has_shfw = (sm >= 70) ? true : false;
|
device_param->has_shfw = (sm >= 70) ? true : false;
|
||||||
|
|
||||||
|
// one-time init cuda context
|
||||||
|
|
||||||
|
if (hc_cuCtxCreate (hashcat_ctx, &device_param->cuda_context, CU_CTX_SCHED_BLOCKING_SYNC, device_param->cuda_device) == -1)
|
||||||
|
{
|
||||||
|
device_param->skipped = true;
|
||||||
|
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (hc_cuCtxPushCurrent (hashcat_ctx, device_param->cuda_context) == -1)
|
||||||
|
{
|
||||||
|
device_param->skipped = true;
|
||||||
|
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
// device_available_mem
|
// device_available_mem
|
||||||
|
|
||||||
CUcontext cuda_context;
|
|
||||||
|
|
||||||
if (hc_cuCtxCreate (hashcat_ctx, &cuda_context, CU_CTX_SCHED_BLOCKING_SYNC, device_param->cuda_device) == -1)
|
|
||||||
{
|
|
||||||
device_param->skipped = true;
|
|
||||||
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (hc_cuCtxPushCurrent (hashcat_ctx, cuda_context) == -1)
|
|
||||||
{
|
|
||||||
device_param->skipped = true;
|
|
||||||
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
size_t free = 0;
|
size_t free = 0;
|
||||||
size_t total = 0;
|
size_t total = 0;
|
||||||
|
|
||||||
@ -5967,14 +5958,7 @@ static void backend_ctx_devices_init_cuda (hashcat_ctx_t *hashcat_ctx, int *virt
|
|||||||
|
|
||||||
device_param->device_available_mem = ((u64) free * (100 - user_options->backend_devices_keepfree)) / 100;
|
device_param->device_available_mem = ((u64) free * (100 - user_options->backend_devices_keepfree)) / 100;
|
||||||
|
|
||||||
if (hc_cuCtxPopCurrent (hashcat_ctx, &cuda_context) == -1)
|
if (hc_cuCtxPopCurrent (hashcat_ctx, &device_param->cuda_context) == -1)
|
||||||
{
|
|
||||||
device_param->skipped = true;
|
|
||||||
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (hc_cuCtxDestroy (hashcat_ctx, cuda_context) == -1)
|
|
||||||
{
|
{
|
||||||
device_param->skipped = true;
|
device_param->skipped = true;
|
||||||
|
|
||||||
@ -6440,24 +6424,24 @@ static void backend_ctx_devices_init_hip (hashcat_ctx_t *hashcat_ctx, int *virth
|
|||||||
device_param->has_prmt = false;
|
device_param->has_prmt = false;
|
||||||
device_param->has_shfw = true; // always reports false : prop.arch.hasFunnelShift;
|
device_param->has_shfw = true; // always reports false : prop.arch.hasFunnelShift;
|
||||||
|
|
||||||
|
// one-time init hip context
|
||||||
|
|
||||||
|
if (hc_hipSetDeviceFlags (hashcat_ctx, hipDeviceScheduleBlockingSync) == -1)
|
||||||
|
{
|
||||||
|
device_param->skipped = true;
|
||||||
|
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (hc_hipSetDevice (hashcat_ctx, device_param->hip_device) == -1)
|
||||||
|
{
|
||||||
|
device_param->skipped = true;
|
||||||
|
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
// device_available_mem
|
// device_available_mem
|
||||||
|
|
||||||
hipCtx_t hip_context;
|
|
||||||
|
|
||||||
if (hc_hipCtxCreate (hashcat_ctx, &hip_context, hipDeviceScheduleBlockingSync, device_param->hip_device) == -1)
|
|
||||||
{
|
|
||||||
device_param->skipped = true;
|
|
||||||
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (hc_hipCtxPushCurrent (hashcat_ctx, hip_context) == -1)
|
|
||||||
{
|
|
||||||
device_param->skipped = true;
|
|
||||||
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
size_t free = 0;
|
size_t free = 0;
|
||||||
size_t total = 0;
|
size_t total = 0;
|
||||||
|
|
||||||
@ -6470,20 +6454,6 @@ static void backend_ctx_devices_init_hip (hashcat_ctx_t *hashcat_ctx, int *virth
|
|||||||
|
|
||||||
device_param->device_available_mem = ((u64) free * (100 - user_options->backend_devices_keepfree)) / 100;
|
device_param->device_available_mem = ((u64) free * (100 - user_options->backend_devices_keepfree)) / 100;
|
||||||
|
|
||||||
if (hc_hipCtxPopCurrent (hashcat_ctx, &hip_context) == -1)
|
|
||||||
{
|
|
||||||
device_param->skipped = true;
|
|
||||||
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (hc_hipCtxDestroy (hashcat_ctx, hip_context) == -1)
|
|
||||||
{
|
|
||||||
device_param->skipped = true;
|
|
||||||
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
#if defined (__linux__)
|
#if defined (__linux__)
|
||||||
if (strchr (folder_config->cpath_real, ' ') != NULL)
|
if (strchr (folder_config->cpath_real, ' ') != NULL)
|
||||||
{
|
{
|
||||||
@ -8623,13 +8593,9 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
|
|||||||
if (device_param->skipped == true) continue;
|
if (device_param->skipped == true) continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
// one-time init metal command-queue
|
||||||
* create command-queue
|
|
||||||
*/
|
|
||||||
|
|
||||||
mtl_command_queue command_queue;
|
if (hc_mtlCreateCommandQueue (hashcat_ctx, device_param->metal_device, &device_param->metal_command_queue) == -1)
|
||||||
|
|
||||||
if (hc_mtlCreateCommandQueue (hashcat_ctx, device_param->metal_device, &command_queue) == -1)
|
|
||||||
{
|
{
|
||||||
device_param->skipped = true;
|
device_param->skipped = true;
|
||||||
|
|
||||||
@ -8678,11 +8644,11 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
|
|||||||
|
|
||||||
u8 tmp_host[8] = { 1, 2, 3, 4, 5, 6, 7, 8 };
|
u8 tmp_host[8] = { 1, 2, 3, 4, 5, 6, 7, 8 };
|
||||||
|
|
||||||
if (hc_mtlMemcpyHtoD (hashcat_ctx, command_queue, tmp_device[c], 0, tmp_host, sizeof (tmp_host)) == -1) break;
|
if (hc_mtlMemcpyHtoD (hashcat_ctx, device_param->metal_command_queue, tmp_device[c], 0, tmp_host, sizeof (tmp_host)) == -1) break;
|
||||||
if (hc_mtlMemcpyDtoH (hashcat_ctx, command_queue, tmp_host, tmp_device[c], 0, sizeof (tmp_host)) == -1) break;
|
if (hc_mtlMemcpyDtoH (hashcat_ctx, device_param->metal_command_queue, tmp_host, tmp_device[c], 0, sizeof (tmp_host)) == -1) break;
|
||||||
|
|
||||||
if (hc_mtlMemcpyHtoD (hashcat_ctx, command_queue, tmp_device[c], MAX_ALLOC_CHECKS_SIZE - sizeof (tmp_host), tmp_host, sizeof (tmp_host)) == -1) break;
|
if (hc_mtlMemcpyHtoD (hashcat_ctx, device_param->metal_command_queue, tmp_device[c], MAX_ALLOC_CHECKS_SIZE - sizeof (tmp_host), tmp_host, sizeof (tmp_host)) == -1) break;
|
||||||
if (hc_mtlMemcpyDtoH (hashcat_ctx, command_queue, tmp_host, tmp_device[c], MAX_ALLOC_CHECKS_SIZE - sizeof (tmp_host), sizeof (tmp_host)) == -1) break;
|
if (hc_mtlMemcpyDtoH (hashcat_ctx, device_param->metal_command_queue, tmp_host, tmp_device[c], MAX_ALLOC_CHECKS_SIZE - sizeof (tmp_host), sizeof (tmp_host)) == -1) break;
|
||||||
}
|
}
|
||||||
|
|
||||||
device_param->device_available_mem = MAX_ALLOC_CHECKS_SIZE;
|
device_param->device_available_mem = MAX_ALLOC_CHECKS_SIZE;
|
||||||
@ -8707,8 +8673,6 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
|
|||||||
hcfree (tmp_device);
|
hcfree (tmp_device);
|
||||||
}
|
}
|
||||||
|
|
||||||
hc_mtlReleaseCommandQueue (hashcat_ctx, command_queue);
|
|
||||||
|
|
||||||
if (device_param->device_host_unified_memory == 1)
|
if (device_param->device_host_unified_memory == 1)
|
||||||
{
|
{
|
||||||
// so, we actually have only half the memory because we need the same buffers on host side
|
// so, we actually have only half the memory because we need the same buffers on host side
|
||||||
@ -8734,11 +8698,7 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
|
|||||||
if (device_param->skipped == true) continue;
|
if (device_param->skipped == true) continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
// one-time init opencl context
|
||||||
* create context for each device
|
|
||||||
*/
|
|
||||||
|
|
||||||
cl_context context;
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
cl_context_properties properties[3];
|
cl_context_properties properties[3];
|
||||||
@ -8747,10 +8707,10 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
|
|||||||
properties[1] = (cl_context_properties) device_param->opencl_platform;
|
properties[1] = (cl_context_properties) device_param->opencl_platform;
|
||||||
properties[2] = 0;
|
properties[2] = 0;
|
||||||
|
|
||||||
CL_rc = hc_clCreateContext (hashcat_ctx, properties, 1, &device_param->opencl_device, NULL, NULL, &context);
|
CL_rc = hc_clCreateContext (hashcat_ctx, properties, 1, &device_param->opencl_device, NULL, NULL, &device_param->opencl_context);
|
||||||
*/
|
*/
|
||||||
|
|
||||||
if (hc_clCreateContext (hashcat_ctx, NULL, 1, &device_param->opencl_device, NULL, NULL, &context) == -1)
|
if (hc_clCreateContext (hashcat_ctx, NULL, 1, &device_param->opencl_device, NULL, NULL, &device_param->opencl_context) == -1)
|
||||||
{
|
{
|
||||||
device_param->skipped = true;
|
device_param->skipped = true;
|
||||||
|
|
||||||
@ -8760,13 +8720,9 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
|
|||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
// one-time init open command-queue
|
||||||
* create command-queue
|
|
||||||
*/
|
|
||||||
|
|
||||||
cl_command_queue command_queue;
|
if (hc_clCreateCommandQueue (hashcat_ctx, device_param->opencl_context, device_param->opencl_device, CL_QUEUE_PROFILING_ENABLE, &device_param->opencl_command_queue) == -1)
|
||||||
|
|
||||||
if (hc_clCreateCommandQueue (hashcat_ctx, context, device_param->opencl_device, 0, &command_queue) == -1)
|
|
||||||
{
|
{
|
||||||
device_param->skipped = true;
|
device_param->skipped = true;
|
||||||
|
|
||||||
@ -8781,17 +8737,17 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
|
|||||||
if ((device_param->opencl_device_type & CL_DEVICE_TYPE_GPU) && (device_param->opencl_platform_vendor_id == VENDOR_ID_AMD))
|
if ((device_param->opencl_device_type & CL_DEVICE_TYPE_GPU) && (device_param->opencl_platform_vendor_id == VENDOR_ID_AMD))
|
||||||
{
|
{
|
||||||
#define RUN_INSTRUCTION_CHECKS() \
|
#define RUN_INSTRUCTION_CHECKS() \
|
||||||
device_param->has_vadd = opencl_test_instruction (hashcat_ctx, context, device_param->opencl_device, "__kernel void test () { uint r1; __asm__ __volatile__ (\"V_ADD_U32 %0, vcc, 0, 0;\" : \"=v\"(r1)); }"); \
|
device_param->has_vadd = opencl_test_instruction (hashcat_ctx, device_param->opencl_context, device_param->opencl_device, "__kernel void test () { uint r1; __asm__ __volatile__ (\"V_ADD_U32 %0, vcc, 0, 0;\" : \"=v\"(r1)); }"); \
|
||||||
device_param->has_vaddc = opencl_test_instruction (hashcat_ctx, context, device_param->opencl_device, "__kernel void test () { uint r1; __asm__ __volatile__ (\"V_ADDC_U32 %0, vcc, 0, 0, vcc;\" : \"=v\"(r1)); }"); \
|
device_param->has_vaddc = opencl_test_instruction (hashcat_ctx, device_param->opencl_context, device_param->opencl_device, "__kernel void test () { uint r1; __asm__ __volatile__ (\"V_ADDC_U32 %0, vcc, 0, 0, vcc;\" : \"=v\"(r1)); }"); \
|
||||||
device_param->has_vadd_co = opencl_test_instruction (hashcat_ctx, context, device_param->opencl_device, "__kernel void test () { uint r1; __asm__ __volatile__ (\"V_ADD_CO_U32 %0, vcc, 0, 0;\" : \"=v\"(r1)); }"); \
|
device_param->has_vadd_co = opencl_test_instruction (hashcat_ctx, device_param->opencl_context, device_param->opencl_device, "__kernel void test () { uint r1; __asm__ __volatile__ (\"V_ADD_CO_U32 %0, vcc, 0, 0;\" : \"=v\"(r1)); }"); \
|
||||||
device_param->has_vaddc_co = opencl_test_instruction (hashcat_ctx, context, device_param->opencl_device, "__kernel void test () { uint r1; __asm__ __volatile__ (\"V_ADDC_CO_U32 %0, vcc, 0, 0, vcc;\" : \"=v\"(r1)); }"); \
|
device_param->has_vaddc_co = opencl_test_instruction (hashcat_ctx, device_param->opencl_context, device_param->opencl_device, "__kernel void test () { uint r1; __asm__ __volatile__ (\"V_ADDC_CO_U32 %0, vcc, 0, 0, vcc;\" : \"=v\"(r1)); }"); \
|
||||||
device_param->has_vsub = opencl_test_instruction (hashcat_ctx, context, device_param->opencl_device, "__kernel void test () { uint r1; __asm__ __volatile__ (\"V_SUB_U32 %0, vcc, 0, 0;\" : \"=v\"(r1)); }"); \
|
device_param->has_vsub = opencl_test_instruction (hashcat_ctx, device_param->opencl_context, device_param->opencl_device, "__kernel void test () { uint r1; __asm__ __volatile__ (\"V_SUB_U32 %0, vcc, 0, 0;\" : \"=v\"(r1)); }"); \
|
||||||
device_param->has_vsubb = opencl_test_instruction (hashcat_ctx, context, device_param->opencl_device, "__kernel void test () { uint r1; __asm__ __volatile__ (\"V_SUBB_U32 %0, vcc, 0, 0, vcc;\" : \"=v\"(r1)); }"); \
|
device_param->has_vsubb = opencl_test_instruction (hashcat_ctx, device_param->opencl_context, device_param->opencl_device, "__kernel void test () { uint r1; __asm__ __volatile__ (\"V_SUBB_U32 %0, vcc, 0, 0, vcc;\" : \"=v\"(r1)); }"); \
|
||||||
device_param->has_vsub_co = opencl_test_instruction (hashcat_ctx, context, device_param->opencl_device, "__kernel void test () { uint r1; __asm__ __volatile__ (\"V_SUB_CO_U32 %0, vcc, 0, 0;\" : \"=v\"(r1)); }"); \
|
device_param->has_vsub_co = opencl_test_instruction (hashcat_ctx, device_param->opencl_context, device_param->opencl_device, "__kernel void test () { uint r1; __asm__ __volatile__ (\"V_SUB_CO_U32 %0, vcc, 0, 0;\" : \"=v\"(r1)); }"); \
|
||||||
device_param->has_vsubb_co = opencl_test_instruction (hashcat_ctx, context, device_param->opencl_device, "__kernel void test () { uint r1; __asm__ __volatile__ (\"V_SUBB_CO_U32 %0, vcc, 0, 0, vcc;\" : \"=v\"(r1)); }"); \
|
device_param->has_vsubb_co = opencl_test_instruction (hashcat_ctx, device_param->opencl_context, device_param->opencl_device, "__kernel void test () { uint r1; __asm__ __volatile__ (\"V_SUBB_CO_U32 %0, vcc, 0, 0, vcc;\" : \"=v\"(r1)); }"); \
|
||||||
device_param->has_vadd3 = opencl_test_instruction (hashcat_ctx, context, device_param->opencl_device, "__kernel void test () { uint r1; __asm__ __volatile__ (\"V_ADD3_U32 %0, 0, 0, 0;\" : \"=v\"(r1)); }"); \
|
device_param->has_vadd3 = opencl_test_instruction (hashcat_ctx, device_param->opencl_context, device_param->opencl_device, "__kernel void test () { uint r1; __asm__ __volatile__ (\"V_ADD3_U32 %0, 0, 0, 0;\" : \"=v\"(r1)); }"); \
|
||||||
device_param->has_vbfe = opencl_test_instruction (hashcat_ctx, context, device_param->opencl_device, "__kernel void test () { uint r1; __asm__ __volatile__ (\"V_BFE_U32 %0, 0, 0, 0;\" : \"=v\"(r1)); }"); \
|
device_param->has_vbfe = opencl_test_instruction (hashcat_ctx, device_param->opencl_context, device_param->opencl_device, "__kernel void test () { uint r1; __asm__ __volatile__ (\"V_BFE_U32 %0, 0, 0, 0;\" : \"=v\"(r1)); }"); \
|
||||||
device_param->has_vperm = opencl_test_instruction (hashcat_ctx, context, device_param->opencl_device, "__kernel void test () { uint r1; __asm__ __volatile__ (\"V_PERM_B32 %0, 0, 0, 0;\" : \"=v\"(r1)); }"); \
|
device_param->has_vperm = opencl_test_instruction (hashcat_ctx, device_param->opencl_context, device_param->opencl_device, "__kernel void test () { uint r1; __asm__ __volatile__ (\"V_PERM_B32 %0, 0, 0, 0;\" : \"=v\"(r1)); }"); \
|
||||||
|
|
||||||
if (backend_devices_idx > 0)
|
if (backend_devices_idx > 0)
|
||||||
{
|
{
|
||||||
@ -8979,7 +8935,7 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
|
|||||||
|
|
||||||
OCL_PTR *ocl = (OCL_PTR *) backend_ctx->ocl;
|
OCL_PTR *ocl = (OCL_PTR *) backend_ctx->ocl;
|
||||||
|
|
||||||
tmp_device[c] = ocl->clCreateBuffer (context, CL_MEM_READ_WRITE, MAX_ALLOC_CHECKS_SIZE, NULL, &CL_err);
|
tmp_device[c] = ocl->clCreateBuffer (device_param->opencl_context, CL_MEM_READ_WRITE, MAX_ALLOC_CHECKS_SIZE, NULL, &CL_err);
|
||||||
|
|
||||||
if (CL_err != CL_SUCCESS)
|
if (CL_err != CL_SUCCESS)
|
||||||
{
|
{
|
||||||
@ -8992,11 +8948,11 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
|
|||||||
|
|
||||||
u8 tmp_host[8];
|
u8 tmp_host[8];
|
||||||
|
|
||||||
if (ocl->clEnqueueReadBuffer (command_queue, tmp_device[c], CL_TRUE, 0, sizeof (tmp_host), tmp_host, 0, NULL, NULL) != CL_SUCCESS) break;
|
if (ocl->clEnqueueReadBuffer (device_param->opencl_command_queue, tmp_device[c], CL_TRUE, 0, sizeof (tmp_host), tmp_host, 0, NULL, NULL) != CL_SUCCESS) break;
|
||||||
if (ocl->clEnqueueWriteBuffer (command_queue, tmp_device[c], CL_TRUE, 0, sizeof (tmp_host), tmp_host, 0, NULL, NULL) != CL_SUCCESS) break;
|
if (ocl->clEnqueueWriteBuffer (device_param->opencl_command_queue, tmp_device[c], CL_TRUE, 0, sizeof (tmp_host), tmp_host, 0, NULL, NULL) != CL_SUCCESS) break;
|
||||||
|
|
||||||
if (ocl->clEnqueueReadBuffer (command_queue, tmp_device[c], CL_TRUE, MAX_ALLOC_CHECKS_SIZE - sizeof (tmp_host), sizeof (tmp_host), tmp_host, 0, NULL, NULL) != CL_SUCCESS) break;
|
if (ocl->clEnqueueReadBuffer (device_param->opencl_command_queue, tmp_device[c], CL_TRUE, MAX_ALLOC_CHECKS_SIZE - sizeof (tmp_host), sizeof (tmp_host), tmp_host, 0, NULL, NULL) != CL_SUCCESS) break;
|
||||||
if (ocl->clEnqueueWriteBuffer (command_queue, tmp_device[c], CL_TRUE, MAX_ALLOC_CHECKS_SIZE - sizeof (tmp_host), sizeof (tmp_host), tmp_host, 0, NULL, NULL) != CL_SUCCESS) break;
|
if (ocl->clEnqueueWriteBuffer (device_param->opencl_command_queue, tmp_device[c], CL_TRUE, MAX_ALLOC_CHECKS_SIZE - sizeof (tmp_host), sizeof (tmp_host), tmp_host, 0, NULL, NULL) != CL_SUCCESS) break;
|
||||||
}
|
}
|
||||||
|
|
||||||
device_param->device_available_mem = MAX_ALLOC_CHECKS_SIZE;
|
device_param->device_available_mem = MAX_ALLOC_CHECKS_SIZE;
|
||||||
@ -9008,23 +8964,27 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
|
|||||||
|
|
||||||
// clean up
|
// clean up
|
||||||
|
|
||||||
|
int r = 0;
|
||||||
|
|
||||||
for (c = 0; c < MAX_ALLOC_CHECKS_CNT; c++)
|
for (c = 0; c < MAX_ALLOC_CHECKS_CNT; c++)
|
||||||
{
|
{
|
||||||
if (((c + 1 + 1) * MAX_ALLOC_CHECKS_SIZE) >= device_param->device_global_mem) break;
|
if (((c + 1 + 1) * MAX_ALLOC_CHECKS_SIZE) >= device_param->device_global_mem) break;
|
||||||
|
|
||||||
if (tmp_device[c] != NULL)
|
if (tmp_device[c] != NULL)
|
||||||
{
|
{
|
||||||
if (hc_clReleaseMemObject (hashcat_ctx, tmp_device[c]) == -1) return -1;
|
if (hc_clReleaseMemObject (hashcat_ctx, tmp_device[c]) == -1) r = -1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
hcfree (tmp_device);
|
hcfree (tmp_device);
|
||||||
|
|
||||||
|
if (r == -1)
|
||||||
|
{
|
||||||
|
// return -1 here is blocking, to be better evaluated
|
||||||
|
//return -1;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
hc_clReleaseCommandQueue (hashcat_ctx, command_queue);
|
|
||||||
|
|
||||||
hc_clReleaseContext (hashcat_ctx, context);
|
|
||||||
|
|
||||||
if (device_param->device_host_unified_memory == 1)
|
if (device_param->device_host_unified_memory == 1)
|
||||||
{
|
{
|
||||||
@ -9065,23 +9025,61 @@ void backend_ctx_devices_destroy (hashcat_ctx_t *hashcat_ctx)
|
|||||||
hcfree (backend_ctx->opencl_platforms_version[opencl_platforms_idx]);
|
hcfree (backend_ctx->opencl_platforms_version[opencl_platforms_idx]);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// one-time release context/command-queue from all runtimes
|
||||||
|
|
||||||
for (int backend_devices_idx = 0; backend_devices_idx < backend_ctx->backend_devices_cnt; backend_devices_idx++)
|
for (int backend_devices_idx = 0; backend_devices_idx < backend_ctx->backend_devices_cnt; backend_devices_idx++)
|
||||||
{
|
{
|
||||||
hc_device_param_t *device_param = &backend_ctx->devices_param[backend_devices_idx];
|
hc_device_param_t *device_param = &backend_ctx->devices_param[backend_devices_idx];
|
||||||
|
|
||||||
hcfree (device_param->device_name);
|
hcfree (device_param->device_name);
|
||||||
|
|
||||||
|
if (device_param->is_cuda == true)
|
||||||
|
{
|
||||||
|
if (device_param->cuda_context)
|
||||||
|
{
|
||||||
|
hc_cuCtxDestroy (hashcat_ctx, device_param->cuda_context);
|
||||||
|
|
||||||
|
device_param->cuda_context = NULL;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (device_param->is_hip == true)
|
||||||
|
{
|
||||||
|
hcfree (device_param->gcnArchName);
|
||||||
|
}
|
||||||
|
|
||||||
|
#if defined (__APPLE__)
|
||||||
|
if (device_param->is_metal == true)
|
||||||
|
{
|
||||||
|
if (device_param->metal_command_queue)
|
||||||
|
{
|
||||||
|
hc_mtlReleaseCommandQueue (hashcat_ctx, device_param->metal_command_queue);
|
||||||
|
|
||||||
|
device_param->metal_command_queue = NULL;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
if (device_param->is_opencl == true)
|
if (device_param->is_opencl == true)
|
||||||
{
|
{
|
||||||
hcfree (device_param->opencl_driver_version);
|
hcfree (device_param->opencl_driver_version);
|
||||||
hcfree (device_param->opencl_device_version);
|
hcfree (device_param->opencl_device_version);
|
||||||
hcfree (device_param->opencl_device_c_version);
|
hcfree (device_param->opencl_device_c_version);
|
||||||
hcfree (device_param->opencl_device_vendor);
|
hcfree (device_param->opencl_device_vendor);
|
||||||
|
|
||||||
|
if (device_param->opencl_command_queue)
|
||||||
|
{
|
||||||
|
hc_clReleaseCommandQueue (hashcat_ctx, device_param->opencl_command_queue);
|
||||||
|
|
||||||
|
device_param->opencl_command_queue = NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (device_param->is_hip == true)
|
if (device_param->opencl_context)
|
||||||
{
|
{
|
||||||
hcfree (device_param->gcnArchName);
|
hc_clReleaseContext (hashcat_ctx, device_param->opencl_context);
|
||||||
|
|
||||||
|
device_param->opencl_context = NULL;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -10730,93 +10728,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
|
|||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/**
|
// re-using context/command-queue, there is no need to re-initialize them
|
||||||
* create context for each device
|
|
||||||
*/
|
|
||||||
|
|
||||||
if (device_param->is_cuda == true)
|
|
||||||
{
|
|
||||||
if (hc_cuCtxCreate (hashcat_ctx, &device_param->cuda_context, CU_CTX_SCHED_BLOCKING_SYNC, device_param->cuda_device) == -1)
|
|
||||||
{
|
|
||||||
device_param->skipped = true;
|
|
||||||
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (hc_cuCtxPushCurrent (hashcat_ctx, device_param->cuda_context) == -1)
|
|
||||||
{
|
|
||||||
device_param->skipped = true;
|
|
||||||
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (device_param->is_hip == true)
|
|
||||||
{
|
|
||||||
if (hc_hipCtxCreate (hashcat_ctx, &device_param->hip_context, hipDeviceScheduleBlockingSync, device_param->hip_device) == -1)
|
|
||||||
{
|
|
||||||
device_param->skipped = true;
|
|
||||||
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (hc_hipCtxPushCurrent (hashcat_ctx, device_param->hip_context) == -1)
|
|
||||||
{
|
|
||||||
device_param->skipped = true;
|
|
||||||
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#if defined (__APPLE__)
|
|
||||||
if (device_param->is_metal == true)
|
|
||||||
{
|
|
||||||
/**
|
|
||||||
* create command-queue
|
|
||||||
*/
|
|
||||||
|
|
||||||
if (hc_mtlCreateCommandQueue (hashcat_ctx, device_param->metal_device, &device_param->metal_command_queue) == -1)
|
|
||||||
{
|
|
||||||
device_param->skipped = true;
|
|
||||||
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
if (device_param->is_opencl == true)
|
|
||||||
{
|
|
||||||
/*
|
|
||||||
cl_context_properties properties[3];
|
|
||||||
|
|
||||||
properties[0] = CL_CONTEXT_PLATFORM;
|
|
||||||
properties[1] = (cl_context_properties) device_param->opencl_platform;
|
|
||||||
properties[2] = 0;
|
|
||||||
|
|
||||||
CL_rc = hc_clCreateContext (hashcat_ctx, properties, 1, &device_param->opencl_device, NULL, NULL, &device_param->opencl_context);
|
|
||||||
*/
|
|
||||||
|
|
||||||
if (hc_clCreateContext (hashcat_ctx, NULL, 1, &device_param->opencl_device, NULL, NULL, &device_param->opencl_context) == -1)
|
|
||||||
{
|
|
||||||
device_param->skipped = true;
|
|
||||||
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* create command-queue
|
|
||||||
*/
|
|
||||||
|
|
||||||
// not supported with NV
|
|
||||||
// device_param->opencl_command_queue = hc_clCreateCommandQueueWithProperties (hashcat_ctx, device_param->opencl_device, NULL);
|
|
||||||
|
|
||||||
if (hc_clCreateCommandQueue (hashcat_ctx, device_param->opencl_context, device_param->opencl_device, CL_QUEUE_PROFILING_ENABLE, &device_param->opencl_command_queue) == -1)
|
|
||||||
{
|
|
||||||
device_param->skipped = true;
|
|
||||||
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* create stream for CUDA devices
|
* create stream for CUDA devices
|
||||||
@ -10824,6 +10736,13 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
|
|||||||
|
|
||||||
if (device_param->is_cuda == true)
|
if (device_param->is_cuda == true)
|
||||||
{
|
{
|
||||||
|
if (hc_cuCtxPushCurrent (hashcat_ctx, device_param->cuda_context) == -1)
|
||||||
|
{
|
||||||
|
device_param->skipped = true;
|
||||||
|
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
if (hc_cuStreamCreate (hashcat_ctx, &device_param->cuda_stream, CU_STREAM_DEFAULT) == -1)
|
if (hc_cuStreamCreate (hashcat_ctx, &device_param->cuda_stream, CU_STREAM_DEFAULT) == -1)
|
||||||
{
|
{
|
||||||
device_param->skipped = true;
|
device_param->skipped = true;
|
||||||
@ -10838,7 +10757,14 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
|
|||||||
|
|
||||||
if (device_param->is_hip == true)
|
if (device_param->is_hip == true)
|
||||||
{
|
{
|
||||||
if (hc_hipStreamCreate (hashcat_ctx, &device_param->hip_stream, hipStreamDefault) == -1)
|
if (hc_hipSetDevice (hashcat_ctx, device_param->hip_device) == -1)
|
||||||
|
{
|
||||||
|
device_param->skipped = true;
|
||||||
|
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (hc_hipStreamCreateWithFlags (hashcat_ctx, &device_param->hip_stream, hipStreamDefault) == -1)
|
||||||
{
|
{
|
||||||
device_param->skipped = true;
|
device_param->skipped = true;
|
||||||
|
|
||||||
@ -10880,21 +10806,21 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
|
|||||||
|
|
||||||
if (device_param->is_hip == true)
|
if (device_param->is_hip == true)
|
||||||
{
|
{
|
||||||
if (hc_hipEventCreate (hashcat_ctx, &device_param->hip_event1, hipEventBlockingSync) == -1)
|
if (hc_hipEventCreateWithFlags (hashcat_ctx, &device_param->hip_event1, hipEventBlockingSync) == -1)
|
||||||
{
|
{
|
||||||
device_param->skipped = true;
|
device_param->skipped = true;
|
||||||
|
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (hc_hipEventCreate (hashcat_ctx, &device_param->hip_event2, hipEventBlockingSync) == -1)
|
if (hc_hipEventCreateWithFlags (hashcat_ctx, &device_param->hip_event2, hipEventBlockingSync) == -1)
|
||||||
{
|
{
|
||||||
device_param->skipped = true;
|
device_param->skipped = true;
|
||||||
|
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (hc_hipEventCreate (hashcat_ctx, &device_param->hip_event3, hipEventDisableTiming) == -1)
|
if (hc_hipEventCreateWithFlags (hashcat_ctx, &device_param->hip_event3, hipEventDisableTiming) == -1)
|
||||||
{
|
{
|
||||||
device_param->skipped = true;
|
device_param->skipped = true;
|
||||||
|
|
||||||
@ -16927,16 +16853,6 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (device_param->is_hip == true)
|
|
||||||
{
|
|
||||||
if (hc_hipCtxPopCurrent (hashcat_ctx, &device_param->hip_context) == -1)
|
|
||||||
{
|
|
||||||
device_param->skipped = true;
|
|
||||||
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
hardware_power_all += hardware_power_max;
|
hardware_power_all += hardware_power_max;
|
||||||
|
|
||||||
EVENT_DATA (EVENT_BACKEND_DEVICE_INIT_POST, &backend_devices_idx, sizeof (int));
|
EVENT_DATA (EVENT_BACKEND_DEVICE_INIT_POST, &backend_devices_idx, sizeof (int));
|
||||||
@ -17058,7 +16974,7 @@ void backend_session_destroy (hashcat_ctx_t *hashcat_ctx)
|
|||||||
if (device_param->cuda_module_amp) hc_cuModuleUnload (hashcat_ctx, device_param->cuda_module_amp);
|
if (device_param->cuda_module_amp) hc_cuModuleUnload (hashcat_ctx, device_param->cuda_module_amp);
|
||||||
if (device_param->cuda_module_shared) hc_cuModuleUnload (hashcat_ctx, device_param->cuda_module_shared);
|
if (device_param->cuda_module_shared) hc_cuModuleUnload (hashcat_ctx, device_param->cuda_module_shared);
|
||||||
|
|
||||||
if (device_param->cuda_context) hc_cuCtxDestroy (hashcat_ctx, device_param->cuda_context);
|
//if (device_param->cuda_context) hc_cuCtxDestroy (hashcat_ctx, device_param->cuda_context);
|
||||||
|
|
||||||
device_param->cuda_d_pws_buf = 0;
|
device_param->cuda_d_pws_buf = 0;
|
||||||
device_param->cuda_d_pws_amp_buf = 0;
|
device_param->cuda_d_pws_amp_buf = 0;
|
||||||
@ -17135,7 +17051,7 @@ void backend_session_destroy (hashcat_ctx_t *hashcat_ctx)
|
|||||||
device_param->cuda_module_amp = NULL;
|
device_param->cuda_module_amp = NULL;
|
||||||
device_param->cuda_module_shared = NULL;
|
device_param->cuda_module_shared = NULL;
|
||||||
|
|
||||||
device_param->cuda_context = NULL;
|
//device_param->cuda_context = NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (device_param->is_hip == true)
|
if (device_param->is_hip == true)
|
||||||
@ -17189,8 +17105,6 @@ void backend_session_destroy (hashcat_ctx_t *hashcat_ctx)
|
|||||||
if (device_param->hip_module_amp) hc_hipModuleUnload (hashcat_ctx, device_param->hip_module_amp);
|
if (device_param->hip_module_amp) hc_hipModuleUnload (hashcat_ctx, device_param->hip_module_amp);
|
||||||
if (device_param->hip_module_shared) hc_hipModuleUnload (hashcat_ctx, device_param->hip_module_shared);
|
if (device_param->hip_module_shared) hc_hipModuleUnload (hashcat_ctx, device_param->hip_module_shared);
|
||||||
|
|
||||||
if (device_param->hip_context) hc_hipCtxDestroy (hashcat_ctx, device_param->hip_context);
|
|
||||||
|
|
||||||
device_param->hip_d_pws_buf = 0;
|
device_param->hip_d_pws_buf = 0;
|
||||||
device_param->hip_d_pws_amp_buf = 0;
|
device_param->hip_d_pws_amp_buf = 0;
|
||||||
device_param->hip_d_pws_comp_buf = 0;
|
device_param->hip_d_pws_comp_buf = 0;
|
||||||
@ -17265,8 +17179,6 @@ void backend_session_destroy (hashcat_ctx_t *hashcat_ctx)
|
|||||||
device_param->hip_module_mp = NULL;
|
device_param->hip_module_mp = NULL;
|
||||||
device_param->hip_module_amp = NULL;
|
device_param->hip_module_amp = NULL;
|
||||||
device_param->hip_module_shared = NULL;
|
device_param->hip_module_shared = NULL;
|
||||||
|
|
||||||
device_param->hip_context = NULL;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#if defined (__APPLE__)
|
#if defined (__APPLE__)
|
||||||
@ -17341,7 +17253,7 @@ void backend_session_destroy (hashcat_ctx_t *hashcat_ctx)
|
|||||||
if (device_param->metal_library_amp) hc_mtlReleaseLibrary (hashcat_ctx, device_param->metal_library_amp);
|
if (device_param->metal_library_amp) hc_mtlReleaseLibrary (hashcat_ctx, device_param->metal_library_amp);
|
||||||
if (device_param->metal_library_shared) hc_mtlReleaseLibrary (hashcat_ctx, device_param->metal_library_shared);
|
if (device_param->metal_library_shared) hc_mtlReleaseLibrary (hashcat_ctx, device_param->metal_library_shared);
|
||||||
|
|
||||||
if (device_param->metal_command_queue) hc_mtlReleaseCommandQueue (hashcat_ctx, device_param->metal_command_queue);
|
//if (device_param->metal_command_queue) hc_mtlReleaseCommandQueue (hashcat_ctx, device_param->metal_command_queue);
|
||||||
|
|
||||||
//if (device_param->metal_device) hc_mtlReleaseDevice (hashcat_ctx, device_param->metal_device);
|
//if (device_param->metal_device) hc_mtlReleaseDevice (hashcat_ctx, device_param->metal_device);
|
||||||
|
|
||||||
@ -17411,7 +17323,7 @@ void backend_session_destroy (hashcat_ctx_t *hashcat_ctx)
|
|||||||
device_param->metal_library_mp = NULL;
|
device_param->metal_library_mp = NULL;
|
||||||
device_param->metal_library_amp = NULL;
|
device_param->metal_library_amp = NULL;
|
||||||
device_param->metal_library_shared = NULL;
|
device_param->metal_library_shared = NULL;
|
||||||
device_param->metal_command_queue = NULL;
|
//device_param->metal_command_queue = NULL;
|
||||||
//device_param->metal_device = NULL;
|
//device_param->metal_device = NULL;
|
||||||
}
|
}
|
||||||
#endif // __APPLE__
|
#endif // __APPLE__
|
||||||
@ -17487,9 +17399,9 @@ void backend_session_destroy (hashcat_ctx_t *hashcat_ctx)
|
|||||||
if (device_param->opencl_program_amp) hc_clReleaseProgram (hashcat_ctx, device_param->opencl_program_amp);
|
if (device_param->opencl_program_amp) hc_clReleaseProgram (hashcat_ctx, device_param->opencl_program_amp);
|
||||||
if (device_param->opencl_program_shared) hc_clReleaseProgram (hashcat_ctx, device_param->opencl_program_shared);
|
if (device_param->opencl_program_shared) hc_clReleaseProgram (hashcat_ctx, device_param->opencl_program_shared);
|
||||||
|
|
||||||
if (device_param->opencl_command_queue) hc_clReleaseCommandQueue (hashcat_ctx, device_param->opencl_command_queue);
|
//if (device_param->opencl_command_queue) hc_clReleaseCommandQueue (hashcat_ctx, device_param->opencl_command_queue);
|
||||||
|
|
||||||
if (device_param->opencl_context) hc_clReleaseContext (hashcat_ctx, device_param->opencl_context);
|
//if (device_param->opencl_context) hc_clReleaseContext (hashcat_ctx, device_param->opencl_context);
|
||||||
|
|
||||||
device_param->opencl_d_pws_buf = NULL;
|
device_param->opencl_d_pws_buf = NULL;
|
||||||
device_param->opencl_d_pws_amp_buf = NULL;
|
device_param->opencl_d_pws_amp_buf = NULL;
|
||||||
@ -17557,8 +17469,8 @@ void backend_session_destroy (hashcat_ctx_t *hashcat_ctx)
|
|||||||
device_param->opencl_program_mp = NULL;
|
device_param->opencl_program_mp = NULL;
|
||||||
device_param->opencl_program_amp = NULL;
|
device_param->opencl_program_amp = NULL;
|
||||||
device_param->opencl_program_shared = NULL;
|
device_param->opencl_program_shared = NULL;
|
||||||
device_param->opencl_command_queue = NULL;
|
//device_param->opencl_command_queue = NULL;
|
||||||
device_param->opencl_context = NULL;
|
//device_param->opencl_context = NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
device_param->h_tmps = NULL;
|
device_param->h_tmps = NULL;
|
||||||
|
@ -381,7 +381,7 @@ HC_API_CALL void *thread_calc_stdin (void *p)
|
|||||||
|
|
||||||
if (device_param->is_hip == true)
|
if (device_param->is_hip == true)
|
||||||
{
|
{
|
||||||
if (hc_hipCtxPushCurrent (hashcat_ctx, device_param->hip_context) == -1) return NULL;
|
if (hc_hipSetDevice (hashcat_ctx, device_param->hip_device) == -1) return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (calc_stdin (hashcat_ctx, device_param) == -1)
|
if (calc_stdin (hashcat_ctx, device_param) == -1)
|
||||||
@ -396,11 +396,6 @@ HC_API_CALL void *thread_calc_stdin (void *p)
|
|||||||
if (hc_cuCtxPopCurrent (hashcat_ctx, &device_param->cuda_context) == -1) return NULL;
|
if (hc_cuCtxPopCurrent (hashcat_ctx, &device_param->cuda_context) == -1) return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (device_param->is_hip == true)
|
|
||||||
{
|
|
||||||
if (hc_hipCtxPopCurrent (hashcat_ctx, &device_param->hip_context) == -1) return NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (bridge_ctx->enabled == true)
|
if (bridge_ctx->enabled == true)
|
||||||
{
|
{
|
||||||
if (bridge_ctx->thread_term != BRIDGE_DEFAULT)
|
if (bridge_ctx->thread_term != BRIDGE_DEFAULT)
|
||||||
@ -1685,7 +1680,7 @@ HC_API_CALL void *thread_calc (void *p)
|
|||||||
|
|
||||||
if (device_param->is_hip == true)
|
if (device_param->is_hip == true)
|
||||||
{
|
{
|
||||||
if (hc_hipCtxPushCurrent (hashcat_ctx, device_param->hip_context) == -1) return NULL;
|
if (hc_hipSetDevice (hashcat_ctx, device_param->hip_device) == -1) return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (calc (hashcat_ctx, device_param) == -1)
|
if (calc (hashcat_ctx, device_param) == -1)
|
||||||
@ -1700,11 +1695,6 @@ HC_API_CALL void *thread_calc (void *p)
|
|||||||
if (hc_cuCtxPopCurrent (hashcat_ctx, &device_param->cuda_context) == -1) return NULL;
|
if (hc_cuCtxPopCurrent (hashcat_ctx, &device_param->cuda_context) == -1) return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (device_param->is_hip == true)
|
|
||||||
{
|
|
||||||
if (hc_hipCtxPopCurrent (hashcat_ctx, &device_param->hip_context) == -1) return NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (bridge_ctx->enabled == true)
|
if (bridge_ctx->enabled == true)
|
||||||
{
|
{
|
||||||
if (bridge_ctx->thread_term != BRIDGE_DEFAULT)
|
if (bridge_ctx->thread_term != BRIDGE_DEFAULT)
|
||||||
|
122
src/ext_hip.c
122
src/ext_hip.c
@ -127,7 +127,8 @@ int hip_init (void *hashcat_ctx)
|
|||||||
HC_LOAD_FUNC_HIP (hip, hipDeviceGetName, hipDeviceGetName, HIP_HIPDEVICEGETNAME, HIP, 1);
|
HC_LOAD_FUNC_HIP (hip, hipDeviceGetName, hipDeviceGetName, HIP_HIPDEVICEGETNAME, HIP, 1);
|
||||||
HC_LOAD_FUNC_HIP (hip, hipDeviceTotalMem, hipDeviceTotalMem, HIP_HIPDEVICETOTALMEM, HIP, 1);
|
HC_LOAD_FUNC_HIP (hip, hipDeviceTotalMem, hipDeviceTotalMem, HIP_HIPDEVICETOTALMEM, HIP, 1);
|
||||||
HC_LOAD_FUNC_HIP (hip, hipDriverGetVersion, hipDriverGetVersion, HIP_HIPDRIVERGETVERSION, HIP, 1);
|
HC_LOAD_FUNC_HIP (hip, hipDriverGetVersion, hipDriverGetVersion, HIP_HIPDRIVERGETVERSION, HIP, 1);
|
||||||
HC_LOAD_FUNC_HIP (hip, hipEventCreate, hipEventCreateWithFlags, HIP_HIPEVENTCREATE, HIP, 1);
|
HC_LOAD_FUNC_HIP (hip, hipEventCreate, hipEventCreate, HIP_HIPEVENTCREATE, HIP, 1);
|
||||||
|
HC_LOAD_FUNC_HIP (hip, hipEventCreateWithFlags, hipEventCreateWithFlags, HIP_HIPEVENTCREATEWITHFLAGS, HIP, 1);
|
||||||
HC_LOAD_FUNC_HIP (hip, hipEventDestroy, hipEventDestroy, HIP_HIPEVENTDESTROY, HIP, 1);
|
HC_LOAD_FUNC_HIP (hip, hipEventDestroy, hipEventDestroy, HIP_HIPEVENTDESTROY, HIP, 1);
|
||||||
HC_LOAD_FUNC_HIP (hip, hipEventElapsedTime, hipEventElapsedTime, HIP_HIPEVENTELAPSEDTIME, HIP, 1);
|
HC_LOAD_FUNC_HIP (hip, hipEventElapsedTime, hipEventElapsedTime, HIP_HIPEVENTELAPSEDTIME, HIP, 1);
|
||||||
HC_LOAD_FUNC_HIP (hip, hipEventRecord, hipEventRecord, HIP_HIPEVENTRECORD, HIP, 1);
|
HC_LOAD_FUNC_HIP (hip, hipEventRecord, hipEventRecord, HIP_HIPEVENTRECORD, HIP, 1);
|
||||||
@ -155,7 +156,10 @@ int hip_init (void *hashcat_ctx)
|
|||||||
HC_LOAD_FUNC_HIP (hip, hipModuleLoadDataEx, hipModuleLoadDataEx, HIP_HIPMODULELOADDATAEX, HIP, 1);
|
HC_LOAD_FUNC_HIP (hip, hipModuleLoadDataEx, hipModuleLoadDataEx, HIP_HIPMODULELOADDATAEX, HIP, 1);
|
||||||
HC_LOAD_FUNC_HIP (hip, hipModuleUnload, hipModuleUnload, HIP_HIPMODULEUNLOAD, HIP, 1);
|
HC_LOAD_FUNC_HIP (hip, hipModuleUnload, hipModuleUnload, HIP_HIPMODULEUNLOAD, HIP, 1);
|
||||||
HC_LOAD_FUNC_HIP (hip, hipRuntimeGetVersion, hipRuntimeGetVersion, HIP_HIPRUNTIMEGETVERSION, HIP, 1);
|
HC_LOAD_FUNC_HIP (hip, hipRuntimeGetVersion, hipRuntimeGetVersion, HIP_HIPRUNTIMEGETVERSION, HIP, 1);
|
||||||
|
HC_LOAD_FUNC_HIP (hip, hipSetDevice, hipSetDevice, HIP_HIPSETDEVICE, HIP, 1);
|
||||||
|
HC_LOAD_FUNC_HIP (hip, hipSetDeviceFlags, hipSetDeviceFlags, HIP_HIPSETDEVICEFLAGS, HIP, 1);
|
||||||
HC_LOAD_FUNC_HIP (hip, hipStreamCreate, hipStreamCreate, HIP_HIPSTREAMCREATE, HIP, 1);
|
HC_LOAD_FUNC_HIP (hip, hipStreamCreate, hipStreamCreate, HIP_HIPSTREAMCREATE, HIP, 1);
|
||||||
|
HC_LOAD_FUNC_HIP (hip, hipStreamCreateWithFlags, hipStreamCreateWithFlags, HIP_HIPSTREAMCREATEWITHFLAGS, HIP, 1);
|
||||||
HC_LOAD_FUNC_HIP (hip, hipStreamDestroy, hipStreamDestroy, HIP_HIPSTREAMDESTROY, HIP, 1);
|
HC_LOAD_FUNC_HIP (hip, hipStreamDestroy, hipStreamDestroy, HIP_HIPSTREAMDESTROY, HIP, 1);
|
||||||
HC_LOAD_FUNC_HIP (hip, hipStreamSynchronize, hipStreamSynchronize, HIP_HIPSTREAMSYNCHRONIZE, HIP, 1);
|
HC_LOAD_FUNC_HIP (hip, hipStreamSynchronize, hipStreamSynchronize, HIP_HIPSTREAMSYNCHRONIZE, HIP, 1);
|
||||||
HC_LOAD_FUNC_HIP (hip, hipGetDeviceProperties, hipGetDevicePropertiesR0600, HIP_HIPGETDEVICEPROPERTIES, HIP, 1);
|
HC_LOAD_FUNC_HIP (hip, hipGetDeviceProperties, hipGetDevicePropertiesR0600, HIP_HIPGETDEVICEPROPERTIES, HIP, 1);
|
||||||
@ -507,13 +511,13 @@ int hc_hipDriverGetVersion (void *hashcat_ctx, int *driverVersion)
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
int hc_hipEventCreate (void *hashcat_ctx, hipEvent_t *phEvent, unsigned int Flags)
|
int hc_hipEventCreate (void *hashcat_ctx, hipEvent_t *phEvent)
|
||||||
{
|
{
|
||||||
backend_ctx_t *backend_ctx = ((hashcat_ctx_t *) hashcat_ctx)->backend_ctx;
|
backend_ctx_t *backend_ctx = ((hashcat_ctx_t *) hashcat_ctx)->backend_ctx;
|
||||||
|
|
||||||
HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip;
|
HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip;
|
||||||
|
|
||||||
const hipError_t HIP_err = hip->hipEventCreate (phEvent, Flags);
|
const hipError_t HIP_err = hip->hipEventCreate (phEvent);
|
||||||
|
|
||||||
if (HIP_err != hipSuccess)
|
if (HIP_err != hipSuccess)
|
||||||
{
|
{
|
||||||
@ -534,6 +538,33 @@ int hc_hipEventCreate (void *hashcat_ctx, hipEvent_t *phEvent, unsigned int Flag
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int hc_hipEventCreateWithFlags (void *hashcat_ctx, hipEvent_t *phEvent, unsigned int flags)
|
||||||
|
{
|
||||||
|
backend_ctx_t *backend_ctx = ((hashcat_ctx_t *) hashcat_ctx)->backend_ctx;
|
||||||
|
|
||||||
|
HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip;
|
||||||
|
|
||||||
|
const hipError_t HIP_err = hip->hipEventCreateWithFlags (phEvent, flags);
|
||||||
|
|
||||||
|
if (HIP_err != hipSuccess)
|
||||||
|
{
|
||||||
|
const char *pStr = NULL;
|
||||||
|
|
||||||
|
if (hip->hipGetErrorString (HIP_err, &pStr) == hipSuccess)
|
||||||
|
{
|
||||||
|
event_log_error (hashcat_ctx, "hipEventCreateWithFlags(): %s", pStr);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
event_log_error (hashcat_ctx, "hipEventCreateWithFlags(): %d", HIP_err);
|
||||||
|
}
|
||||||
|
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
int hc_hipEventDestroy (void *hashcat_ctx, hipEvent_t hEvent)
|
int hc_hipEventDestroy (void *hashcat_ctx, hipEvent_t hEvent)
|
||||||
{
|
{
|
||||||
backend_ctx_t *backend_ctx = ((hashcat_ctx_t *) hashcat_ctx)->backend_ctx;
|
backend_ctx_t *backend_ctx = ((hashcat_ctx_t *) hashcat_ctx)->backend_ctx;
|
||||||
@ -1211,13 +1242,67 @@ int hc_hipRuntimeGetVersion (void *hashcat_ctx, int *runtimeVersion)
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
int hc_hipStreamCreate (void *hashcat_ctx, hipStream_t *phStream, unsigned int Flags)
|
int hc_hipSetDevice (void *hashcat_ctx, hipDevice_t dev)
|
||||||
{
|
{
|
||||||
backend_ctx_t *backend_ctx = ((hashcat_ctx_t *) hashcat_ctx)->backend_ctx;
|
backend_ctx_t *backend_ctx = ((hashcat_ctx_t *) hashcat_ctx)->backend_ctx;
|
||||||
|
|
||||||
HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip;
|
HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip;
|
||||||
|
|
||||||
const hipError_t HIP_err = hip->hipStreamCreate (phStream, Flags);
|
const hipError_t HIP_err = hip->hipSetDevice (dev);
|
||||||
|
|
||||||
|
if (HIP_err != hipSuccess)
|
||||||
|
{
|
||||||
|
const char *pStr = NULL;
|
||||||
|
|
||||||
|
if (hip->hipGetErrorString (HIP_err, &pStr) == hipSuccess)
|
||||||
|
{
|
||||||
|
event_log_error (hashcat_ctx, "hipSetDevice(): %s", pStr);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
event_log_error (hashcat_ctx, "hipSetDevice(): %d", HIP_err);
|
||||||
|
}
|
||||||
|
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
int hc_hipSetDeviceFlags (void *hashcat_ctx, unsigned int flags)
|
||||||
|
{
|
||||||
|
backend_ctx_t *backend_ctx = ((hashcat_ctx_t *) hashcat_ctx)->backend_ctx;
|
||||||
|
|
||||||
|
HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip;
|
||||||
|
|
||||||
|
const hipError_t HIP_err = hip->hipSetDeviceFlags (flags);
|
||||||
|
|
||||||
|
if (HIP_err != hipSuccess)
|
||||||
|
{
|
||||||
|
const char *pStr = NULL;
|
||||||
|
|
||||||
|
if (hip->hipGetErrorString (HIP_err, &pStr) == hipSuccess)
|
||||||
|
{
|
||||||
|
event_log_error (hashcat_ctx, "hipSetDeviceFlags(): %s", pStr);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
event_log_error (hashcat_ctx, "hipSetDeviceFlags(): %d", HIP_err);
|
||||||
|
}
|
||||||
|
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
int hc_hipStreamCreate (void *hashcat_ctx, hipStream_t *phStream)
|
||||||
|
{
|
||||||
|
backend_ctx_t *backend_ctx = ((hashcat_ctx_t *) hashcat_ctx)->backend_ctx;
|
||||||
|
|
||||||
|
HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip;
|
||||||
|
|
||||||
|
const hipError_t HIP_err = hip->hipStreamCreate (phStream);
|
||||||
|
|
||||||
if (HIP_err != hipSuccess)
|
if (HIP_err != hipSuccess)
|
||||||
{
|
{
|
||||||
@ -1238,6 +1323,33 @@ int hc_hipStreamCreate (void *hashcat_ctx, hipStream_t *phStream, unsigned int F
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int hc_hipStreamCreateWithFlags (void *hashcat_ctx, hipStream_t *phStream, unsigned int Flags)
|
||||||
|
{
|
||||||
|
backend_ctx_t *backend_ctx = ((hashcat_ctx_t *) hashcat_ctx)->backend_ctx;
|
||||||
|
|
||||||
|
HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip;
|
||||||
|
|
||||||
|
const hipError_t HIP_err = hip->hipStreamCreateWithFlags (phStream, Flags);
|
||||||
|
|
||||||
|
if (HIP_err != hipSuccess)
|
||||||
|
{
|
||||||
|
const char *pStr = NULL;
|
||||||
|
|
||||||
|
if (hip->hipGetErrorString (HIP_err, &pStr) == hipSuccess)
|
||||||
|
{
|
||||||
|
event_log_error (hashcat_ctx, "hipStreamCreateWithFlags(): %s", pStr);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
event_log_error (hashcat_ctx, "hipStreamCreateWithFlags(): %d", HIP_err);
|
||||||
|
}
|
||||||
|
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
int hc_hipStreamDestroy (void *hashcat_ctx, hipStream_t hStream)
|
int hc_hipStreamDestroy (void *hashcat_ctx, hipStream_t hStream)
|
||||||
{
|
{
|
||||||
backend_ctx_t *backend_ctx = ((hashcat_ctx_t *) hashcat_ctx)->backend_ctx;
|
backend_ctx_t *backend_ctx = ((hashcat_ctx_t *) hashcat_ctx)->backend_ctx;
|
||||||
|
@ -1272,7 +1272,7 @@ HC_API_CALL void *thread_selftest (void *p)
|
|||||||
|
|
||||||
if (device_param->is_hip == true)
|
if (device_param->is_hip == true)
|
||||||
{
|
{
|
||||||
if (hc_hipCtxPushCurrent (hashcat_ctx, device_param->hip_context) == -1) return NULL;
|
if (hc_hipSetDevice (hashcat_ctx, device_param->hip_device) == -1) return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
const int rc_selftest = process_selftest (hashcat_ctx, device_param);
|
const int rc_selftest = process_selftest (hashcat_ctx, device_param);
|
||||||
@ -1303,8 +1303,6 @@ HC_API_CALL void *thread_selftest (void *p)
|
|||||||
if (device_param->is_hip == true)
|
if (device_param->is_hip == true)
|
||||||
{
|
{
|
||||||
if (hc_hipStreamSynchronize (hashcat_ctx, device_param->hip_stream) == -1) return NULL;
|
if (hc_hipStreamSynchronize (hashcat_ctx, device_param->hip_stream) == -1) return NULL;
|
||||||
|
|
||||||
if (hc_hipCtxPopCurrent (hashcat_ctx, &device_param->hip_context) == -1) return NULL;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (bridge_ctx->enabled == true)
|
if (bridge_ctx->enabled == true)
|
||||||
|
Loading…
Reference in New Issue
Block a user