1
0
mirror of https://github.com/hashcat/hashcat.git synced 2025-07-21 14:08:21 +00:00

Added workaround to get rid of internal runtimes memory leaks

As of now, especially in the benchmark mode, hashcat will not go to create and destroy context and command-queue for each enabled device each time it switches from one hash-mode to the next.
Specifically using OpenCL with an NVIDIA device, it was not possible to complete the benchmark because clCreateContext has memory leaks that slowly consume all available GPU memory until hashcat can activate a new context and disable the device.

Avoid deprecated HIP functions

All hipCtx* features have been declared deprecated, so we have replaced them with the new ones, also fixing a critical bug on handling multiple AMD devices in the same system.
This commit is contained in:
Gabriele Gristina 2025-07-06 21:28:37 +02:00
parent 0576c41491
commit f663abee44
No known key found for this signature in database
GPG Key ID: 9F68B59298F311F0
7 changed files with 793 additions and 790 deletions

View File

@ -129,6 +129,7 @@
- Alias Devices: Prevents hashcat, when started with x86_64 emulation on Apple Silicon, from showing the Apple M1 OpenCL CPU as an alias for the Apple M1 Metal GPU - Alias Devices: Prevents hashcat, when started with x86_64 emulation on Apple Silicon, from showing the Apple M1 OpenCL CPU as an alias for the Apple M1 Metal GPU
- Apple Driver: Automatically enable GPU support on Apple OpenCL instead of CPU support - Apple Driver: Automatically enable GPU support on Apple OpenCL instead of CPU support
- Apple Driver: Updated requirements to use Apple OpenCL API to macOS 13.0 - use - Apple Driver: Updated requirements to use Apple OpenCL API to macOS 13.0 - use
- Backend: Added workaround to get rid of internal runtimes memory leaks
- Backend: Updated filename chksum format to prevent invalid cache on Apple Silicon when switching arch - Backend: Updated filename chksum format to prevent invalid cache on Apple Silicon when switching arch
- Backend: Splitting backend_ctx_devices_init into smaller runtime-specific functions - Backend: Splitting backend_ctx_devices_init into smaller runtime-specific functions
- Backend Checks: Describe workaround in error message when detecting more than 64 backend devices - Backend Checks: Describe workaround in error message when detecting more than 64 backend devices
@ -141,6 +142,7 @@
- Building: Support building windows binaries on macOS using MinGW - Building: Support building windows binaries on macOS using MinGW
- Dependencies: Updated OpenCL-Headers to v2024.10.24 (commit 265df85) - Dependencies: Updated OpenCL-Headers to v2024.10.24 (commit 265df85)
- Documents: Updated BUILD.md and added BUILD_macOS.md (containing instructions for building windows binaries on macOS) - Documents: Updated BUILD.md and added BUILD_macOS.md (containing instructions for building windows binaries on macOS)
- HIP Backend: Avoid deprecated functions
- Modules: Added support for non-zero IVs for -m 6800 (Lastpass). Also added `tools/lastpass2hashcat.py` - Modules: Added support for non-zero IVs for -m 6800 (Lastpass). Also added `tools/lastpass2hashcat.py`
- Modules: Updated module_unstable_warning - Modules: Updated module_unstable_warning
- Open Document Format: Added support for small documents with content length < 1024 - Open Document Format: Added support for small documents with content length < 1024

View File

@ -12,20 +12,22 @@
// start: driver_types.h // start: driver_types.h
typedef void* hipDeviceptr_t; typedef void *hipDeviceptr_t;
typedef enum hipFunction_attribute { typedef enum hipFunction_attribute
HIP_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK, ///< The maximum number of threads per block. Depends on function and device. {
HIP_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES, ///< The statically allocated shared memory size in bytes per block required by the function. HIP_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK, // The maximum number of threads per block. Depends on function and device.
HIP_FUNC_ATTRIBUTE_CONST_SIZE_BYTES, ///< The user-allocated constant memory by the function in bytes. HIP_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES, // The statically allocated shared memory size in bytes per block required by the function.
HIP_FUNC_ATTRIBUTE_LOCAL_SIZE_BYTES, ///< The local memory usage of each thread by this function in bytes. HIP_FUNC_ATTRIBUTE_CONST_SIZE_BYTES, // The user-allocated constant memory by the function in bytes.
HIP_FUNC_ATTRIBUTE_NUM_REGS, ///< The number of registers used by each thread of this function. HIP_FUNC_ATTRIBUTE_LOCAL_SIZE_BYTES, // The local memory usage of each thread by this function in bytes.
HIP_FUNC_ATTRIBUTE_PTX_VERSION, ///< PTX version HIP_FUNC_ATTRIBUTE_NUM_REGS, // The number of registers used by each thread of this function.
HIP_FUNC_ATTRIBUTE_BINARY_VERSION, ///< Binary version HIP_FUNC_ATTRIBUTE_PTX_VERSION, // PTX version
HIP_FUNC_ATTRIBUTE_CACHE_MODE_CA, ///< Cache mode HIP_FUNC_ATTRIBUTE_BINARY_VERSION, // Binary version
HIP_FUNC_ATTRIBUTE_MAX_DYNAMIC_SHARED_SIZE_BYTES, ///< The maximum dynamic shared memory per block for this function in bytes. HIP_FUNC_ATTRIBUTE_CACHE_MODE_CA, // Cache mode
HIP_FUNC_ATTRIBUTE_PREFERRED_SHARED_MEMORY_CARVEOUT, ///< The shared memory carveout preference in percent of the maximum shared memory. HIP_FUNC_ATTRIBUTE_MAX_DYNAMIC_SHARED_SIZE_BYTES, // The maximum dynamic shared memory per block for this function in bytes.
HIP_FUNC_ATTRIBUTE_PREFERRED_SHARED_MEMORY_CARVEOUT, // The shared memory carveout preference in percent of the maximum shared memory.
HIP_FUNC_ATTRIBUTE_MAX HIP_FUNC_ATTRIBUTE_MAX
} hipFunction_attribute; } hipFunction_attribute;
// stop: driver_types.h // stop: driver_types.h
@ -47,13 +49,14 @@ typedef struct ihipModuleSymbol_t* hipFunction_t;
#define __HIP_NODISCARD #define __HIP_NODISCARD
#endif #endif
typedef enum __HIP_NODISCARD hipError_t { typedef enum __HIP_NODISCARD hipError_t
hipSuccess = 0, ///< Successful completion. {
hipErrorInvalidValue = 1, ///< One or more of the parameters passed to the API call is NULL hipSuccess = 0, // Successful completion.
///< or not in an acceptable range. hipErrorInvalidValue = 1, // One or more of the parameters passed to the API call is NULL
// or not in an acceptable range.
hipErrorOutOfMemory = 2, hipErrorOutOfMemory = 2,
// Deprecated // Deprecated
hipErrorMemoryAllocation = 2, ///< Memory allocation error. hipErrorMemoryAllocation = 2, // Memory allocation error.
hipErrorNotInitialized = 3, hipErrorNotInitialized = 3,
// Deprecated // Deprecated
hipErrorInitializationError = 3, hipErrorInitializationError = 3,
@ -65,20 +68,20 @@ typedef enum __HIP_NODISCARD hipError_t {
hipErrorInvalidConfiguration = 9, hipErrorInvalidConfiguration = 9,
hipErrorInvalidPitchValue = 12, hipErrorInvalidPitchValue = 12,
hipErrorInvalidSymbol = 13, hipErrorInvalidSymbol = 13,
hipErrorInvalidDevicePointer = 17, ///< Invalid Device Pointer hipErrorInvalidDevicePointer = 17, // Invalid Device Pointer
hipErrorInvalidMemcpyDirection = 21, ///< Invalid memory copy direction hipErrorInvalidMemcpyDirection = 21, // Invalid memory copy direction
hipErrorInsufficientDriver = 35, hipErrorInsufficientDriver = 35,
hipErrorMissingConfiguration = 52, hipErrorMissingConfiguration = 52,
hipErrorPriorLaunchFailure = 53, hipErrorPriorLaunchFailure = 53,
hipErrorInvalidDeviceFunction = 98, hipErrorInvalidDeviceFunction = 98,
hipErrorNoDevice = 100, ///< Call to hipGetDeviceCount returned 0 devices hipErrorNoDevice = 100, // Call to hipGetDeviceCount returned 0 devices
hipErrorInvalidDevice = 101, ///< DeviceID must be in range 0...#compute-devices. hipErrorInvalidDevice = 101, // DeviceID must be in range 0...#compute-devices.
hipErrorInvalidImage = 200, hipErrorInvalidImage = 200,
hipErrorInvalidContext = 201, ///< Produced when input context is invalid. hipErrorInvalidContext = 201, // Produced when input context is invalid.
hipErrorContextAlreadyCurrent = 202, hipErrorContextAlreadyCurrent = 202,
hipErrorMapFailed = 205, hipErrorMapFailed = 205,
// Deprecated // Deprecated
hipErrorMapBufferObjectFailed = 205, ///< Produced when the IPC memory attach failed from ROCr. hipErrorMapBufferObjectFailed = 205, // Produced when the IPC memory attach failed from ROCr.
hipErrorUnmapFailed = 206, hipErrorUnmapFailed = 206,
hipErrorArrayIsMapped = 207, hipErrorArrayIsMapped = 207,
hipErrorAlreadyMapped = 208, hipErrorAlreadyMapped = 208,
@ -91,7 +94,7 @@ typedef enum __HIP_NODISCARD hipError_t {
hipErrorUnsupportedLimit = 215, hipErrorUnsupportedLimit = 215,
hipErrorContextAlreadyInUse = 216, hipErrorContextAlreadyInUse = 216,
hipErrorPeerAccessUnsupported = 217, hipErrorPeerAccessUnsupported = 217,
hipErrorInvalidKernelFile = 218, ///< In CUDA DRV, it is CUDA_ERROR_INVALID_PTX hipErrorInvalidKernelFile = 218, // In CUDA DRV, it is CUDA_ERROR_INVALID_PTX
hipErrorInvalidGraphicsContext = 219, hipErrorInvalidGraphicsContext = 219,
hipErrorInvalidSource = 300, hipErrorInvalidSource = 300,
hipErrorFileNotFound = 301, hipErrorFileNotFound = 301,
@ -100,67 +103,48 @@ typedef enum __HIP_NODISCARD hipError_t {
hipErrorOperatingSystem = 304, hipErrorOperatingSystem = 304,
hipErrorInvalidHandle = 400, hipErrorInvalidHandle = 400,
// Deprecated // Deprecated
hipErrorInvalidResourceHandle = 400, ///< Resource handle (hipEvent_t or hipStream_t) invalid. hipErrorInvalidResourceHandle = 400, // Resource handle (hipEvent_t or hipStream_t) invalid.
hipErrorIllegalState = 401, ///< Resource required is not in a valid state to perform operation. hipErrorIllegalState = 401, // Resource required is not in a valid state to perform operation.
hipErrorNotFound = 500, hipErrorNotFound = 500,
hipErrorNotReady = 600, ///< Indicates that asynchronous operations enqueued earlier are not hipErrorNotReady = 600, // Indicates that asynchronous operations enqueued earlier are not ready.
///< ready. This is not actually an error, but is used to distinguish // This is not actually an error, but is used to distinguish from hipSuccess (which indicates completion).
///< from hipSuccess (which indicates completion). APIs that return // APIs that return this error include hipEventQuery and hipStreamQuery.
///< this error include hipEventQuery and hipStreamQuery.
hipErrorIllegalAddress = 700, hipErrorIllegalAddress = 700,
hipErrorLaunchOutOfResources = 701, ///< Out of resources error. hipErrorLaunchOutOfResources = 701, // Out of resources error.
hipErrorLaunchTimeOut = 702, hipErrorLaunchTimeOut = 702,
hipErrorPeerAccessAlreadyEnabled = hipErrorPeerAccessAlreadyEnabled = 704, // Peer access was already enabled from the current device.
704, ///< Peer access was already enabled from the current device. hipErrorPeerAccessNotEnabled = 705, // Peer access was never enabled from the current device.
hipErrorPeerAccessNotEnabled =
705, ///< Peer access was never enabled from the current device.
hipErrorSetOnActiveProcess = 708, hipErrorSetOnActiveProcess = 708,
hipErrorContextIsDestroyed = 709, hipErrorContextIsDestroyed = 709,
hipErrorAssert = 710, ///< Produced when the kernel calls assert. hipErrorAssert = 710, // Produced when the kernel calls assert.
hipErrorHostMemoryAlreadyRegistered = hipErrorHostMemoryAlreadyRegistered = 712, // Produced when trying to lock a page-locked memory.
712, ///< Produced when trying to lock a page-locked memory. hipErrorHostMemoryNotRegistered = 713, // Produced when trying to unlock a non-page-locked memory.
hipErrorHostMemoryNotRegistered = hipErrorLaunchFailure = 719, // An exception occurred on the device while executing a kernel.
713, ///< Produced when trying to unlock a non-page-locked memory. hipErrorCooperativeLaunchTooLarge = 720, // This error indicates that the number of blocks launched per grid for a kernel
hipErrorLaunchFailure = // that was launched via cooperative launch APIs exceeds the maximum number of
719, ///< An exception occurred on the device while executing a kernel. // allowed blocks for the current device
hipErrorCooperativeLaunchTooLarge = hipErrorNotSupported = 801, // Produced when the hip API is not supported/implemented
720, ///< This error indicates that the number of blocks launched per grid for a kernel hipErrorStreamCaptureUnsupported = 900, // The operation is not permitted when the stream is capturing.
///< that was launched via cooperative launch APIs exceeds the maximum number of hipErrorStreamCaptureInvalidated = 901, // The current capture sequence on the stream
///< allowed blocks for the current device // has been invalidated due to a previous error.
hipErrorNotSupported = 801, ///< Produced when the hip API is not supported/implemented hipErrorStreamCaptureMerge = 902, // The operation would have resulted in a merge of two independent capture sequences.
hipErrorStreamCaptureUnsupported = 900, ///< The operation is not permitted when the stream hipErrorStreamCaptureUnmatched = 903, // The capture was not initiated in this stream.
///< is capturing. hipErrorStreamCaptureUnjoined = 904, // The capture sequence contains a fork that was not joined to the primary stream.
hipErrorStreamCaptureInvalidated = 901, ///< The current capture sequence on the stream hipErrorStreamCaptureIsolation = 905, // A dependency would have been created which crosses the capture sequence boundary.
///< has been invalidated due to a previous error. // Only implicit in-stream ordering dependencies are allowed to cross the boundary
hipErrorStreamCaptureMerge = 902, ///< The operation would have resulted in a merge of hipErrorStreamCaptureImplicit = 906, // The operation would have resulted in a disallowed implicit dependency on a current
///< two independent capture sequences. // capture sequence from hipStreamLegacy.
hipErrorStreamCaptureUnmatched = 903, ///< The capture was not initiated in this stream. hipErrorCapturedEvent = 907, // The operation is not permitted on an event which was last recorded in a capturing stream.
hipErrorStreamCaptureUnjoined = 904, ///< The capture sequence contains a fork that was not hipErrorStreamCaptureWrongThread = 908, // A stream capture sequence not initiated with the hipStreamCaptureModeRelaxed argument to
///< joined to the primary stream. // hipStreamBeginCapture was passed to hipStreamEndCapture in a different thread.
hipErrorStreamCaptureIsolation = 905, ///< A dependency would have been created which crosses hipErrorGraphExecUpdateFailure = 910, // This error indicates that the graph update not performed because it included changes which
///< the capture sequence boundary. Only implicit // violated constraints specific to instantiated graph update.
///< in-stream ordering dependencies are allowed hipErrorUnknown = 999, // Unknown error.
///< to cross the boundary
hipErrorStreamCaptureImplicit = 906, ///< The operation would have resulted in a disallowed
///< implicit dependency on a current capture sequence
///< from hipStreamLegacy.
hipErrorCapturedEvent = 907, ///< The operation is not permitted on an event which was last
///< recorded in a capturing stream.
hipErrorStreamCaptureWrongThread = 908, ///< A stream capture sequence not initiated with
///< the hipStreamCaptureModeRelaxed argument to
///< hipStreamBeginCapture was passed to
///< hipStreamEndCapture in a different thread.
hipErrorGraphExecUpdateFailure = 910, ///< This error indicates that the graph update
///< not performed because it included changes which
///< violated constraints specific to instantiated graph
///< update.
hipErrorUnknown = 999, //< Unknown error.
// HSA Runtime Error Codes start here. // HSA Runtime Error Codes start here.
hipErrorRuntimeMemory = 1052, ///< HSA runtime memory call returned error. Typically not seen hipErrorRuntimeMemory = 1052, // HSA runtime memory call returned error. Typically not seen in production systems.
///< in production systems. hipErrorRuntimeOther = 1053, // HSA runtime call other than memory returned error. Typically not seen in production systems.
hipErrorRuntimeOther = 1053, ///< HSA runtime call other than memory returned error. Typically hipErrorTbd // Marker that more error codes are needed.
///< not seen in production systems.
hipErrorTbd ///< Marker that more error codes are needed.
} hipError_t; } hipError_t;
#undef __HIP_NODISCARD #undef __HIP_NODISCARD
@ -170,178 +154,178 @@ typedef enum __HIP_NODISCARD hipError_t {
* hipDeviceAttribute_t * hipDeviceAttribute_t
* hipDeviceAttributeUnused number: 5 * hipDeviceAttributeUnused number: 5
*/ */
typedef enum hipDeviceAttribute_t { typedef enum hipDeviceAttribute_t
{
hipDeviceAttributeCudaCompatibleBegin = 0, hipDeviceAttributeCudaCompatibleBegin = 0,
hipDeviceAttributeEccEnabled = hipDeviceAttributeCudaCompatibleBegin, ///< Whether ECC support is enabled. hipDeviceAttributeEccEnabled = hipDeviceAttributeCudaCompatibleBegin, // Whether ECC support is enabled.
hipDeviceAttributeAccessPolicyMaxWindowSize, ///< Cuda only. The maximum size of the window policy in bytes. hipDeviceAttributeAccessPolicyMaxWindowSize, // Cuda only. The maximum size of the window policy in bytes.
hipDeviceAttributeAsyncEngineCount, ///< Asynchronous engines number. hipDeviceAttributeAsyncEngineCount, // Asynchronous engines number.
hipDeviceAttributeCanMapHostMemory, ///< Whether host memory can be mapped into device address space hipDeviceAttributeCanMapHostMemory, // Whether host memory can be mapped into device address space
hipDeviceAttributeCanUseHostPointerForRegisteredMem,///< Device can access host registered memory hipDeviceAttributeCanUseHostPointerForRegisteredMem, // Device can access host registered memory
///< at the same virtual address as the CPU // at the same virtual address as the CPU
hipDeviceAttributeClockRate, ///< Peak clock frequency in kilohertz. hipDeviceAttributeClockRate, // Peak clock frequency in kilohertz.
hipDeviceAttributeComputeMode, ///< Compute mode that device is currently in. hipDeviceAttributeComputeMode, // Compute mode that device is currently in.
hipDeviceAttributeComputePreemptionSupported, ///< Device supports Compute Preemption. hipDeviceAttributeComputePreemptionSupported, // Device supports Compute Preemption.
hipDeviceAttributeConcurrentKernels, ///< Device can possibly execute multiple kernels concurrently. hipDeviceAttributeConcurrentKernels, // Device can possibly execute multiple kernels concurrently.
hipDeviceAttributeConcurrentManagedAccess, ///< Device can coherently access managed memory concurrently with the CPU hipDeviceAttributeConcurrentManagedAccess, // Device can coherently access managed memory concurrently with the CPU
hipDeviceAttributeCooperativeLaunch, ///< Support cooperative launch hipDeviceAttributeCooperativeLaunch, // Support cooperative launch
hipDeviceAttributeCooperativeMultiDeviceLaunch, ///< Support cooperative launch on multiple devices hipDeviceAttributeCooperativeMultiDeviceLaunch, // Support cooperative launch on multiple devices
hipDeviceAttributeDeviceOverlap, ///< Device can concurrently copy memory and execute a kernel. hipDeviceAttributeDeviceOverlap, // Device can concurrently copy memory and execute a kernel.
///< Deprecated. Use instead asyncEngineCount. // Deprecated. Use instead asyncEngineCount.
hipDeviceAttributeDirectManagedMemAccessFromHost, ///< Host can directly access managed memory on hipDeviceAttributeDirectManagedMemAccessFromHost, // Host can directly access managed memory on
///< the device without migration // the device without migration
hipDeviceAttributeGlobalL1CacheSupported, ///< Device supports caching globals in L1 hipDeviceAttributeGlobalL1CacheSupported, // Device supports caching globals in L1
hipDeviceAttributeHostNativeAtomicSupported, ///< Link between the device and the host supports native atomic operations hipDeviceAttributeHostNativeAtomicSupported, // Link between the device and the host supports native atomic operations
hipDeviceAttributeIntegrated, ///< Device is integrated GPU hipDeviceAttributeIntegrated, // Device is integrated GPU
hipDeviceAttributeIsMultiGpuBoard, ///< Multiple GPU devices. hipDeviceAttributeIsMultiGpuBoard, // Multiple GPU devices.
hipDeviceAttributeKernelExecTimeout, ///< Run time limit for kernels executed on the device hipDeviceAttributeKernelExecTimeout, // Run time limit for kernels executed on the device
hipDeviceAttributeL2CacheSize, ///< Size of L2 cache in bytes. 0 if the device doesn't have L2 cache. hipDeviceAttributeL2CacheSize, // Size of L2 cache in bytes. 0 if the device doesn't have L2 cache.
hipDeviceAttributeLocalL1CacheSupported, ///< caching locals in L1 is supported hipDeviceAttributeLocalL1CacheSupported, // caching locals in L1 is supported
hipDeviceAttributeLuid, ///< 8-byte locally unique identifier in 8 bytes. Undefined on TCC and non-Windows platforms hipDeviceAttributeLuid, // 8-byte locally unique identifier in 8 bytes. Undefined on TCC and non-Windows platforms
hipDeviceAttributeLuidDeviceNodeMask, ///< Luid device node mask. Undefined on TCC and non-Windows platforms hipDeviceAttributeLuidDeviceNodeMask, // Luid device node mask. Undefined on TCC and non-Windows platforms
hipDeviceAttributeComputeCapabilityMajor, ///< Major compute capability version number. hipDeviceAttributeComputeCapabilityMajor, // Major compute capability version number.
hipDeviceAttributeManagedMemory, ///< Device supports allocating managed memory on this system hipDeviceAttributeManagedMemory, // Device supports allocating managed memory on this system
hipDeviceAttributeMaxBlocksPerMultiProcessor, ///< Max block size per multiprocessor hipDeviceAttributeMaxBlocksPerMultiProcessor, // Max block size per multiprocessor
hipDeviceAttributeMaxBlockDimX, ///< Max block size in width. hipDeviceAttributeMaxBlockDimX, // Max block size in width.
hipDeviceAttributeMaxBlockDimY, ///< Max block size in height. hipDeviceAttributeMaxBlockDimY, // Max block size in height.
hipDeviceAttributeMaxBlockDimZ, ///< Max block size in depth. hipDeviceAttributeMaxBlockDimZ, // Max block size in depth.
hipDeviceAttributeMaxGridDimX, ///< Max grid size in width. hipDeviceAttributeMaxGridDimX, // Max grid size in width.
hipDeviceAttributeMaxGridDimY, ///< Max grid size in height. hipDeviceAttributeMaxGridDimY, // Max grid size in height.
hipDeviceAttributeMaxGridDimZ, ///< Max grid size in depth. hipDeviceAttributeMaxGridDimZ, // Max grid size in depth.
hipDeviceAttributeMaxSurface1D, ///< Maximum size of 1D surface. hipDeviceAttributeMaxSurface1D, // Maximum size of 1D surface.
hipDeviceAttributeMaxSurface1DLayered, ///< Cuda only. Maximum dimensions of 1D layered surface. hipDeviceAttributeMaxSurface1DLayered, // Cuda only. Maximum dimensions of 1D layered surface.
hipDeviceAttributeMaxSurface2D, ///< Maximum dimension (width, height) of 2D surface. hipDeviceAttributeMaxSurface2D, // Maximum dimension (width, height) of 2D surface.
hipDeviceAttributeMaxSurface2DLayered, ///< Cuda only. Maximum dimensions of 2D layered surface. hipDeviceAttributeMaxSurface2DLayered, // Cuda only. Maximum dimensions of 2D layered surface.
hipDeviceAttributeMaxSurface3D, ///< Maximum dimension (width, height, depth) of 3D surface. hipDeviceAttributeMaxSurface3D, // Maximum dimension (width, height, depth) of 3D surface.
hipDeviceAttributeMaxSurfaceCubemap, ///< Cuda only. Maximum dimensions of Cubemap surface. hipDeviceAttributeMaxSurfaceCubemap, // Cuda only. Maximum dimensions of Cubemap surface.
hipDeviceAttributeMaxSurfaceCubemapLayered, ///< Cuda only. Maximum dimension of Cubemap layered surface. hipDeviceAttributeMaxSurfaceCubemapLayered, // Cuda only. Maximum dimension of Cubemap layered surface.
hipDeviceAttributeMaxTexture1DWidth, ///< Maximum size of 1D texture. hipDeviceAttributeMaxTexture1DWidth, // Maximum size of 1D texture.
hipDeviceAttributeMaxTexture1DLayered, ///< Maximum dimensions of 1D layered texture. hipDeviceAttributeMaxTexture1DLayered, // Maximum dimensions of 1D layered texture.
hipDeviceAttributeMaxTexture1DLinear, ///< Maximum number of elements allocatable in a 1D linear texture. hipDeviceAttributeMaxTexture1DLinear, // Maximum number of elements allocatable in a 1D linear texture.
///< Use cudaDeviceGetTexture1DLinearMaxWidth() instead on Cuda. // Use cudaDeviceGetTexture1DLinearMaxWidth() instead on Cuda.
hipDeviceAttributeMaxTexture1DMipmap, ///< Maximum size of 1D mipmapped texture. hipDeviceAttributeMaxTexture1DMipmap, // Maximum size of 1D mipmapped texture.
hipDeviceAttributeMaxTexture2DWidth, ///< Maximum dimension width of 2D texture. hipDeviceAttributeMaxTexture2DWidth, // Maximum dimension width of 2D texture.
hipDeviceAttributeMaxTexture2DHeight, ///< Maximum dimension hight of 2D texture. hipDeviceAttributeMaxTexture2DHeight, // Maximum dimension hight of 2D texture.
hipDeviceAttributeMaxTexture2DGather, ///< Maximum dimensions of 2D texture if gather operations performed. hipDeviceAttributeMaxTexture2DGather, // Maximum dimensions of 2D texture if gather operations performed.
hipDeviceAttributeMaxTexture2DLayered, ///< Maximum dimensions of 2D layered texture. hipDeviceAttributeMaxTexture2DLayered, // Maximum dimensions of 2D layered texture.
hipDeviceAttributeMaxTexture2DLinear, ///< Maximum dimensions (width, height, pitch) of 2D textures bound to pitched memory. hipDeviceAttributeMaxTexture2DLinear, // Maximum dimensions (width, height, pitch) of 2D textures bound to pitched memory.
hipDeviceAttributeMaxTexture2DMipmap, ///< Maximum dimensions of 2D mipmapped texture. hipDeviceAttributeMaxTexture2DMipmap, // Maximum dimensions of 2D mipmapped texture.
hipDeviceAttributeMaxTexture3DWidth, ///< Maximum dimension width of 3D texture. hipDeviceAttributeMaxTexture3DWidth, // Maximum dimension width of 3D texture.
hipDeviceAttributeMaxTexture3DHeight, ///< Maximum dimension height of 3D texture. hipDeviceAttributeMaxTexture3DHeight, // Maximum dimension height of 3D texture.
hipDeviceAttributeMaxTexture3DDepth, ///< Maximum dimension depth of 3D texture. hipDeviceAttributeMaxTexture3DDepth, // Maximum dimension depth of 3D texture.
hipDeviceAttributeMaxTexture3DAlt, ///< Maximum dimensions of alternate 3D texture. hipDeviceAttributeMaxTexture3DAlt, // Maximum dimensions of alternate 3D texture.
hipDeviceAttributeMaxTextureCubemap, ///< Maximum dimensions of Cubemap texture hipDeviceAttributeMaxTextureCubemap, // Maximum dimensions of Cubemap texture
hipDeviceAttributeMaxTextureCubemapLayered, ///< Maximum dimensions of Cubemap layered texture. hipDeviceAttributeMaxTextureCubemapLayered, // Maximum dimensions of Cubemap layered texture.
hipDeviceAttributeMaxThreadsDim, ///< Maximum dimension of a block hipDeviceAttributeMaxThreadsDim, // Maximum dimension of a block
hipDeviceAttributeMaxThreadsPerBlock, ///< Maximum number of threads per block. hipDeviceAttributeMaxThreadsPerBlock, // Maximum number of threads per block.
hipDeviceAttributeMaxThreadsPerMultiProcessor, ///< Maximum resident threads per multiprocessor. hipDeviceAttributeMaxThreadsPerMultiProcessor, // Maximum resident threads per multiprocessor.
hipDeviceAttributeMaxPitch, ///< Maximum pitch in bytes allowed by memory copies hipDeviceAttributeMaxPitch, // Maximum pitch in bytes allowed by memory copies
hipDeviceAttributeMemoryBusWidth, ///< Global memory bus width in bits. hipDeviceAttributeMemoryBusWidth, // Global memory bus width in bits.
hipDeviceAttributeMemoryClockRate, ///< Peak memory clock frequency in kilohertz. hipDeviceAttributeMemoryClockRate, // Peak memory clock frequency in kilohertz.
hipDeviceAttributeComputeCapabilityMinor, ///< Minor compute capability version number. hipDeviceAttributeComputeCapabilityMinor, // Minor compute capability version number.
hipDeviceAttributeMultiGpuBoardGroupID, ///< Unique ID of device group on the same multi-GPU board hipDeviceAttributeMultiGpuBoardGroupID, // Unique ID of device group on the same multi-GPU board
hipDeviceAttributeMultiprocessorCount, ///< Number of multiprocessors on the device. hipDeviceAttributeMultiprocessorCount, // Number of multiprocessors on the device.
hipDeviceAttributeUnused1, ///< Previously hipDeviceAttributeName hipDeviceAttributeUnused1, // Previously hipDeviceAttributeName
hipDeviceAttributePageableMemoryAccess, ///< Device supports coherently accessing pageable memory hipDeviceAttributePageableMemoryAccess, // Device supports coherently accessing pageable memory
///< without calling hipHostRegister on it // without calling hipHostRegister on it
hipDeviceAttributePageableMemoryAccessUsesHostPageTables, ///< Device accesses pageable memory via the host's page tables hipDeviceAttributePageableMemoryAccessUsesHostPageTables, // Device accesses pageable memory via the host's page tables
hipDeviceAttributePciBusId, ///< PCI Bus ID. hipDeviceAttributePciBusId, // PCI Bus ID.
hipDeviceAttributePciDeviceId, ///< PCI Device ID. hipDeviceAttributePciDeviceId, // PCI Device ID.
hipDeviceAttributePciDomainID, ///< PCI Domain ID. hipDeviceAttributePciDomainID, // PCI Domain ID.
hipDeviceAttributePersistingL2CacheMaxSize, ///< Maximum l2 persisting lines capacity in bytes hipDeviceAttributePersistingL2CacheMaxSize, // Maximum l2 persisting lines capacity in bytes
hipDeviceAttributeMaxRegistersPerBlock, ///< 32-bit registers available to a thread block. This number is shared hipDeviceAttributeMaxRegistersPerBlock, // 32-bit registers available to a thread block. This number is shared
///< by all thread blocks simultaneously resident on a multiprocessor. // by all thread blocks simultaneously resident on a multiprocessor.
hipDeviceAttributeMaxRegistersPerMultiprocessor, ///< 32-bit registers available per block. hipDeviceAttributeMaxRegistersPerMultiprocessor, // 32-bit registers available per block.
hipDeviceAttributeReservedSharedMemPerBlock, ///< Shared memory reserved by CUDA driver per block. hipDeviceAttributeReservedSharedMemPerBlock, // Shared memory reserved by CUDA driver per block.
hipDeviceAttributeMaxSharedMemoryPerBlock, ///< Maximum shared memory available per block in bytes. hipDeviceAttributeMaxSharedMemoryPerBlock, // Maximum shared memory available per block in bytes.
hipDeviceAttributeSharedMemPerBlockOptin, ///< Maximum shared memory per block usable by special opt in. hipDeviceAttributeSharedMemPerBlockOptin, // Maximum shared memory per block usable by special opt in.
hipDeviceAttributeSharedMemPerMultiprocessor, ///< Shared memory available per multiprocessor. hipDeviceAttributeSharedMemPerMultiprocessor, // Shared memory available per multiprocessor.
hipDeviceAttributeSingleToDoublePrecisionPerfRatio, ///< Cuda only. Performance ratio of single precision to double precision. hipDeviceAttributeSingleToDoublePrecisionPerfRatio, // Cuda only. Performance ratio of single precision to double precision.
hipDeviceAttributeStreamPrioritiesSupported, ///< Whether to support stream priorities. hipDeviceAttributeStreamPrioritiesSupported, // Whether to support stream priorities.
hipDeviceAttributeSurfaceAlignment, ///< Alignment requirement for surfaces hipDeviceAttributeSurfaceAlignment, // Alignment requirement for surfaces
hipDeviceAttributeTccDriver, ///< Cuda only. Whether device is a Tesla device using TCC driver hipDeviceAttributeTccDriver, // Cuda only. Whether device is a Tesla device using TCC driver
hipDeviceAttributeTextureAlignment, ///< Alignment requirement for textures hipDeviceAttributeTextureAlignment, // Alignment requirement for textures
hipDeviceAttributeTexturePitchAlignment, ///< Pitch alignment requirement for 2D texture references bound to pitched memory; hipDeviceAttributeTexturePitchAlignment, // Pitch alignment requirement for 2D texture references bound to pitched memory;
hipDeviceAttributeTotalConstantMemory, ///< Constant memory size in bytes. hipDeviceAttributeTotalConstantMemory, // Constant memory size in bytes.
hipDeviceAttributeTotalGlobalMem, ///< Global memory available on devicice. hipDeviceAttributeTotalGlobalMem, // Global memory available on devicice.
hipDeviceAttributeUnifiedAddressing, ///< Cuda only. An unified address space shared with the host. hipDeviceAttributeUnifiedAddressing, // Cuda only. An unified address space shared with the host.
hipDeviceAttributeUnused2, ///< Previously hipDeviceAttributeUuid hipDeviceAttributeUnused2, // Previously hipDeviceAttributeUuid
hipDeviceAttributeWarpSize, ///< Warp size in threads. hipDeviceAttributeWarpSize, // Warp size in threads.
hipDeviceAttributeMemoryPoolsSupported, ///< Device supports HIP Stream Ordered Memory Allocator hipDeviceAttributeMemoryPoolsSupported, // Device supports HIP Stream Ordered Memory Allocator
hipDeviceAttributeVirtualMemoryManagementSupported, ///< Device supports HIP virtual memory management hipDeviceAttributeVirtualMemoryManagementSupported, // Device supports HIP virtual memory management
hipDeviceAttributeHostRegisterSupported, ///< Can device support host memory registration via hipHostRegister hipDeviceAttributeHostRegisterSupported, // Can device support host memory registration via hipHostRegister
hipDeviceAttributeMemoryPoolSupportedHandleTypes, ///< Supported handle mask for HIP Stream Ordered Memory Allocator hipDeviceAttributeMemoryPoolSupportedHandleTypes, // Supported handle mask for HIP Stream Ordered Memory Allocator
hipDeviceAttributeCudaCompatibleEnd = 9999, hipDeviceAttributeCudaCompatibleEnd = 9999,
hipDeviceAttributeAmdSpecificBegin = 10000, hipDeviceAttributeAmdSpecificBegin = 10000,
hipDeviceAttributeClockInstructionRate = hipDeviceAttributeAmdSpecificBegin, ///< Frequency in khz of the timer used by the device-side "clock*" hipDeviceAttributeClockInstructionRate = hipDeviceAttributeAmdSpecificBegin, // Frequency in khz of the timer used by the device-side "clock*"
hipDeviceAttributeUnused3, ///< Previously hipDeviceAttributeArch hipDeviceAttributeUnused3, // Previously hipDeviceAttributeArch
hipDeviceAttributeMaxSharedMemoryPerMultiprocessor, ///< Maximum Shared Memory PerMultiprocessor. hipDeviceAttributeMaxSharedMemoryPerMultiprocessor, // Maximum Shared Memory PerMultiprocessor.
hipDeviceAttributeUnused4, ///< Previously hipDeviceAttributeGcnArch hipDeviceAttributeUnused4, // Previously hipDeviceAttributeGcnArch
hipDeviceAttributeUnused5, ///< Previously hipDeviceAttributeGcnArchName hipDeviceAttributeUnused5, // Previously hipDeviceAttributeGcnArchName
hipDeviceAttributeHdpMemFlushCntl, ///< Address of the HDP_MEM_COHERENCY_FLUSH_CNTL register hipDeviceAttributeHdpMemFlushCntl, // Address of the HDP_MEM_COHERENCY_FLUSH_CNTL register
hipDeviceAttributeHdpRegFlushCntl, ///< Address of the HDP_REG_COHERENCY_FLUSH_CNTL register hipDeviceAttributeHdpRegFlushCntl, // Address of the HDP_REG_COHERENCY_FLUSH_CNTL register
hipDeviceAttributeCooperativeMultiDeviceUnmatchedFunc, ///< Supports cooperative launch on multiple hipDeviceAttributeCooperativeMultiDeviceUnmatchedFunc, // Supports cooperative launch on multiple devices with unmatched functions
///< devices with unmatched functions hipDeviceAttributeCooperativeMultiDeviceUnmatchedGridDim, // Supports cooperative launch on multiple devices with unmatched grid dimensions
hipDeviceAttributeCooperativeMultiDeviceUnmatchedGridDim, ///< Supports cooperative launch on multiple hipDeviceAttributeCooperativeMultiDeviceUnmatchedBlockDim, // Supports cooperative launch on multiple devices with unmatched block dimensions
///< devices with unmatched grid dimensions hipDeviceAttributeCooperativeMultiDeviceUnmatchedSharedMem, // Supports cooperative launch on multiple devices with unmatched shared memories
hipDeviceAttributeCooperativeMultiDeviceUnmatchedBlockDim, ///< Supports cooperative launch on multiple hipDeviceAttributeIsLargeBar, // Whether it is LargeBar
///< devices with unmatched block dimensions hipDeviceAttributeAsicRevision, // Revision of the GPU in this device
hipDeviceAttributeCooperativeMultiDeviceUnmatchedSharedMem, ///< Supports cooperative launch on multiple hipDeviceAttributeCanUseStreamWaitValue, // '1' if Device supports hipStreamWaitValue32() and hipStreamWaitValue64(), '0' otherwise.
///< devices with unmatched shared memories hipDeviceAttributeImageSupport, // '1' if Device supports image, '0' otherwise.
hipDeviceAttributeIsLargeBar, ///< Whether it is LargeBar hipDeviceAttributePhysicalMultiProcessorCount, // All available physical compute units for the device
hipDeviceAttributeAsicRevision, ///< Revision of the GPU in this device hipDeviceAttributeFineGrainSupport, // '1' if Device supports fine grain, '0' otherwise
hipDeviceAttributeCanUseStreamWaitValue, ///< '1' if Device supports hipStreamWaitValue32() and hipDeviceAttributeWallClockRate, // Constant frequency of wall clock in kilohertz.
///< hipStreamWaitValue64(), '0' otherwise.
hipDeviceAttributeImageSupport, ///< '1' if Device supports image, '0' otherwise.
hipDeviceAttributePhysicalMultiProcessorCount, ///< All available physical compute
///< units for the device
hipDeviceAttributeFineGrainSupport, ///< '1' if Device supports fine grain, '0' otherwise
hipDeviceAttributeWallClockRate, ///< Constant frequency of wall clock in kilohertz.
hipDeviceAttributeAmdSpecificEnd = 19999, hipDeviceAttributeAmdSpecificEnd = 19999,
hipDeviceAttributeVendorSpecificBegin = 20000, hipDeviceAttributeVendorSpecificBegin = 20000,
// Extended attributes for vendors // Extended attributes for vendors
} hipDeviceAttribute_t; } hipDeviceAttribute_t;
/** /**
* hipDeviceArch_t * hipDeviceArch_t
* *
*/ */
typedef struct { typedef struct
{
// 32-bit Atomics // 32-bit Atomics
unsigned hasGlobalInt32Atomics : 1; ///< 32-bit integer atomics for global memory. unsigned hasGlobalInt32Atomics : 1; // 32-bit integer atomics for global memory.
unsigned hasGlobalFloatAtomicExch : 1; ///< 32-bit float atomic exch for global memory. unsigned hasGlobalFloatAtomicExch : 1; // 32-bit float atomic exch for global memory.
unsigned hasSharedInt32Atomics : 1; ///< 32-bit integer atomics for shared memory. unsigned hasSharedInt32Atomics : 1; // 32-bit integer atomics for shared memory.
unsigned hasSharedFloatAtomicExch : 1; ///< 32-bit float atomic exch for shared memory. unsigned hasSharedFloatAtomicExch : 1; // 32-bit float atomic exch for shared memory.
unsigned hasFloatAtomicAdd : 1; ///< 32-bit float atomic add in global and shared memory. unsigned hasFloatAtomicAdd : 1; // 32-bit float atomic add in global and shared memory.
// 64-bit Atomics // 64-bit Atomics
unsigned hasGlobalInt64Atomics : 1; ///< 64-bit integer atomics for global memory. unsigned hasGlobalInt64Atomics : 1; // 64-bit integer atomics for global memory.
unsigned hasSharedInt64Atomics : 1; ///< 64-bit integer atomics for shared memory. unsigned hasSharedInt64Atomics : 1; // 64-bit integer atomics for shared memory.
// Doubles // Doubles
unsigned hasDoubles : 1; ///< Double-precision floating point. unsigned hasDoubles : 1; // Double-precision floating point.
// Warp cross-lane operations // Warp cross-lane operations
unsigned hasWarpVote : 1; ///< Warp vote instructions (__any, __all). unsigned hasWarpVote : 1; // Warp vote instructions (__any, __all).
unsigned hasWarpBallot : 1; ///< Warp ballot instructions (__ballot). unsigned hasWarpBallot : 1; // Warp ballot instructions (__ballot).
unsigned hasWarpShuffle : 1; ///< Warp shuffle operations. (__shfl_*). unsigned hasWarpShuffle : 1; // Warp shuffle operations. (__shfl_*).
unsigned hasFunnelShift : 1; ///< Funnel two words into one with shift&mask caps. unsigned hasFunnelShift : 1; // Funnel two words into one with shift&mask caps.
// Sync // Sync
unsigned hasThreadFenceSystem : 1; ///< __threadfence_system. unsigned hasThreadFenceSystem : 1; // __threadfence_system.
unsigned hasSyncThreadsExt : 1; ///< __syncthreads_count, syncthreads_and, syncthreads_or. unsigned hasSyncThreadsExt : 1; // __syncthreads_count, syncthreads_and, syncthreads_or.
// Misc // Misc
unsigned hasSurfaceFuncs : 1; ///< Surface functions. unsigned hasSurfaceFuncs : 1; // Surface functions.
unsigned has3dGrid : 1; ///< Grid and group dims are 3D (rather than 2D). unsigned has3dGrid : 1; // Grid and group dims are 3D (rather than 2D).
unsigned hasDynamicParallelism : 1; ///< Dynamic parallelism. unsigned hasDynamicParallelism : 1; // Dynamic parallelism.
} hipDeviceArch_t; } hipDeviceArch_t;
typedef struct hipUUID_t { typedef struct hipUUID_t
{
char bytes[16]; char bytes[16];
} hipUUID; } hipUUID;
@ -349,144 +333,122 @@ typedef struct hipUUID_t {
* hipDeviceProp * hipDeviceProp
* *
*/ */
typedef struct hipDeviceProp_t { typedef struct hipDeviceProp_t
char name[256]; ///< Device name. {
hipUUID uuid; ///< UUID of a device char name[256]; // Device name.
char luid[8]; ///< 8-byte unique identifier. Only valid on windows hipUUID uuid; // UUID of a device
unsigned int luidDeviceNodeMask; ///< LUID node mask char luid[8]; // 8-byte unique identifier. Only valid on windows
size_t totalGlobalMem; ///< Size of global memory region (in bytes). unsigned int luidDeviceNodeMask; // LUID node mask
size_t sharedMemPerBlock; ///< Size of shared memory per block (in bytes). size_t totalGlobalMem; // Size of global memory region (in bytes).
int regsPerBlock; ///< Registers per block. size_t sharedMemPerBlock; // Size of shared memory per block (in bytes).
int warpSize; ///< Warp size. int regsPerBlock; // Registers per block.
size_t memPitch; ///< Maximum pitch in bytes allowed by memory copies int warpSize; // Warp size.
///< pitched memory size_t memPitch; // Maximum pitch in bytes allowed by memory copies pitched memory
int maxThreadsPerBlock; ///< Max work items per work group or workgroup max size. int maxThreadsPerBlock; // Max work items per work group or workgroup max size.
int maxThreadsDim[3]; ///< Max number of threads in each dimension (XYZ) of a block. int maxThreadsDim[3]; // Max number of threads in each dimension (XYZ) of a block.
int maxGridSize[3]; ///< Max grid dimensions (XYZ). int maxGridSize[3]; // Max grid dimensions (XYZ).
int clockRate; ///< Max clock frequency of the multiProcessors in khz. int clockRate; // Max clock frequency of the multiProcessors in khz.
size_t totalConstMem; ///< Size of shared constant memory region on the device size_t totalConstMem; // Size of shared constant memory region on the device (in bytes).
///< (in bytes). int major; // Major compute capability. On HCC, this is an approximation and features may
int major; ///< Major compute capability. On HCC, this is an approximation and features may // differ from CUDA CC. See the arch feature flags for portable ways to query feature caps.
///< differ from CUDA CC. See the arch feature flags for portable ways to query int minor; // Minor compute capability. On HCC, this is an approximation and features may
///< feature caps. // differ from CUDA CC. See the arch feature flags for portable ways to query feature caps.
int minor; ///< Minor compute capability. On HCC, this is an approximation and features may size_t textureAlignment; // Alignment requirement for textures
///< differ from CUDA CC. See the arch feature flags for portable ways to query size_t texturePitchAlignment; // Pitch alignment requirement for texture references bound to
///< feature caps. int deviceOverlap; // Deprecated. Use asyncEngineCount instead
size_t textureAlignment; ///< Alignment requirement for textures int multiProcessorCount; // Number of multi-processors (compute units).
size_t texturePitchAlignment; ///< Pitch alignment requirement for texture references bound to int kernelExecTimeoutEnabled; // Run time limit for kernels executed on the device
int deviceOverlap; ///< Deprecated. Use asyncEngineCount instead int integrated; // APU vs dGPU
int multiProcessorCount; ///< Number of multi-processors (compute units). int canMapHostMemory; // Check whether HIP can map host memory
int kernelExecTimeoutEnabled; ///< Run time limit for kernels executed on the device int computeMode; // Compute mode.
int integrated; ///< APU vs dGPU int maxTexture1D; // Maximum number of elements in 1D images
int canMapHostMemory; ///< Check whether HIP can map host memory int maxTexture1DMipmap; // Maximum 1D mipmap texture size
int computeMode; ///< Compute mode. int maxTexture1DLinear; // Maximum size for 1D textures bound to linear memory
int maxTexture1D; ///< Maximum number of elements in 1D images int maxTexture2D[2]; // Maximum dimensions (width, height) of 2D images, in image elements
int maxTexture1DMipmap; ///< Maximum 1D mipmap texture size int maxTexture2DMipmap[2]; // Maximum number of elements in 2D array mipmap of images
int maxTexture1DLinear; ///< Maximum size for 1D textures bound to linear memory int maxTexture2DLinear[3]; // Maximum 2D tex dimensions if tex are bound to pitched memory
int maxTexture2D[2]; ///< Maximum dimensions (width, height) of 2D images, in image elements int maxTexture2DGather[2]; // Maximum 2D tex dimensions if gather has to be performed
int maxTexture2DMipmap[2]; ///< Maximum number of elements in 2D array mipmap of images int maxTexture3D[3]; // Maximum dimensions (width, height, depth) of 3D images, in image elements
int maxTexture2DLinear[3]; ///< Maximum 2D tex dimensions if tex are bound to pitched memory int maxTexture3DAlt[3]; // Maximum alternate 3D texture dims
int maxTexture2DGather[2]; ///< Maximum 2D tex dimensions if gather has to be performed int maxTextureCubemap; // Maximum cubemap texture dims
int maxTexture3D[3]; ///< Maximum dimensions (width, height, depth) of 3D images, in image int maxTexture1DLayered[2]; // Maximum number of elements in 1D array images
///< elements int maxTexture2DLayered[3]; // Maximum number of elements in 2D array images
int maxTexture3DAlt[3]; ///< Maximum alternate 3D texture dims int maxTextureCubemapLayered[2]; // Maximum cubemaps layered texture dims
int maxTextureCubemap; ///< Maximum cubemap texture dims int maxSurface1D; // Maximum 1D surface size
int maxTexture1DLayered[2]; ///< Maximum number of elements in 1D array images int maxSurface2D[2]; // Maximum 2D surface size
int maxTexture2DLayered[3]; ///< Maximum number of elements in 2D array images int maxSurface3D[3]; // Maximum 3D surface size
int maxTextureCubemapLayered[2]; ///< Maximum cubemaps layered texture dims int maxSurface1DLayered[2]; // Maximum 1D layered surface size
int maxSurface1D; ///< Maximum 1D surface size int maxSurface2DLayered[3]; // Maximum 2D layared surface size
int maxSurface2D[2]; ///< Maximum 2D surface size int maxSurfaceCubemap; // Maximum cubemap surface size
int maxSurface3D[3]; ///< Maximum 3D surface size int maxSurfaceCubemapLayered[2]; // Maximum cubemap layered surface size
int maxSurface1DLayered[2]; ///< Maximum 1D layered surface size size_t surfaceAlignment; // Alignment requirement for surface
int maxSurface2DLayered[3]; ///< Maximum 2D layared surface size int concurrentKernels; // Device can possibly execute multiple kernels concurrently.
int maxSurfaceCubemap; ///< Maximum cubemap surface size int ECCEnabled; // Device has ECC support enabled
int maxSurfaceCubemapLayered[2]; ///< Maximum cubemap layered surface size int pciBusID; // PCI Bus ID.
size_t surfaceAlignment; ///< Alignment requirement for surface int pciDeviceID; // PCI Device ID.
int concurrentKernels; ///< Device can possibly execute multiple kernels concurrently. int pciDomainID; // PCI Domain ID
int ECCEnabled; ///< Device has ECC support enabled int tccDriver; // 1:If device is Tesla device using TCC driver, else 0
int pciBusID; ///< PCI Bus ID. int asyncEngineCount; // Number of async engines
int pciDeviceID; ///< PCI Device ID. int unifiedAddressing; // Does device and host share unified address space
int pciDomainID; ///< PCI Domain ID int memoryClockRate; // Max global memory clock frequency in khz.
int tccDriver; ///< 1:If device is Tesla device using TCC driver, else 0 int memoryBusWidth; // Global memory bus width in bits.
int asyncEngineCount; ///< Number of async engines int l2CacheSize; // L2 cache size.
int unifiedAddressing; ///< Does device and host share unified address space int persistingL2CacheMaxSize; // Device's max L2 persisting lines in bytes
int memoryClockRate; ///< Max global memory clock frequency in khz. int maxThreadsPerMultiProcessor; // Maximum resident threads per multi-processor.
int memoryBusWidth; ///< Global memory bus width in bits. int streamPrioritiesSupported; // Device supports stream priority
int l2CacheSize; ///< L2 cache size. int globalL1CacheSupported; // Indicates globals are cached in L1
int persistingL2CacheMaxSize; ///< Device's max L2 persisting lines in bytes int localL1CacheSupported; // Locals are cahced in L1
int maxThreadsPerMultiProcessor; ///< Maximum resident threads per multi-processor. size_t sharedMemPerMultiprocessor; // Amount of shared memory available per multiprocessor.
int streamPrioritiesSupported; ///< Device supports stream priority int regsPerMultiprocessor; // registers available per multiprocessor
int globalL1CacheSupported; ///< Indicates globals are cached in L1 int managedMemory; // Device supports allocating managed memory on this system
int localL1CacheSupported; ///< Locals are cahced in L1 int isMultiGpuBoard; // 1 if device is on a multi-GPU board, 0 if not.
size_t sharedMemPerMultiprocessor; ///< Amount of shared memory available per multiprocessor. int multiGpuBoardGroupID; // Unique identifier for a group of devices on same multiboard GPU
int regsPerMultiprocessor; ///< registers available per multiprocessor int hostNativeAtomicSupported; // Link between host and device supports native atomics
int managedMemory; ///< Device supports allocating managed memory on this system int singleToDoublePrecisionPerfRatio; // Deprecated. CUDA only.
int isMultiGpuBoard; ///< 1 if device is on a multi-GPU board, 0 if not. int pageableMemoryAccess; // Device supports coherently accessing pageable memory
int multiGpuBoardGroupID; ///< Unique identifier for a group of devices on same multiboard GPU // without calling hipHostRegister on it
int hostNativeAtomicSupported; ///< Link between host and device supports native atomics int concurrentManagedAccess; // Device can coherently access managed memory concurrently with the CPU
int singleToDoublePrecisionPerfRatio; ///< Deprecated. CUDA only. int computePreemptionSupported; // Is compute preemption supported on the device
int pageableMemoryAccess; ///< Device supports coherently accessing pageable memory int canUseHostPointerForRegisteredMem; // Device can access host registered memory with same address as the host
///< without calling hipHostRegister on it int cooperativeLaunch; // HIP device supports cooperative launch
int concurrentManagedAccess; ///< Device can coherently access managed memory concurrently with int cooperativeMultiDeviceLaunch; // HIP device supports cooperative launch on multiple devices
///< the CPU size_t sharedMemPerBlockOptin; // Per device m ax shared mem per block usable by special opt in
int computePreemptionSupported; ///< Is compute preemption supported on the device int pageableMemoryAccessUsesHostPageTables; // Device accesses pageable memory via the host's page tables
int canUseHostPointerForRegisteredMem; ///< Device can access host registered memory with same int directManagedMemAccessFromHost; // Host can directly access managed memory on the device without migration
///< address as the host int maxBlocksPerMultiProcessor; // Max number of blocks on CU
int cooperativeLaunch; ///< HIP device supports cooperative launch int accessPolicyMaxWindowSize; // Max value of access policy window
int cooperativeMultiDeviceLaunch; ///< HIP device supports cooperative launch on multiple size_t reservedSharedMemPerBlock; // Shared memory reserved by driver per block
///< devices int hostRegisterSupported; // Device supports hipHostRegister
size_t int sparseHipArraySupported; // Indicates if device supports sparse hip arrays
sharedMemPerBlockOptin; ///< Per device m ax shared mem per block usable by special opt in int hostRegisterReadOnlySupported; // Device supports using the hipHostRegisterReadOnly flag with hipHostRegistger
int pageableMemoryAccessUsesHostPageTables; ///< Device accesses pageable memory via the host's int timelineSemaphoreInteropSupported; // Indicates external timeline semaphore support
///< page tables int memoryPoolsSupported; // Indicates if device supports hipMallocAsync and hipMemPool APIs
int directManagedMemAccessFromHost; ///< Host can directly access managed memory on the device int gpuDirectRDMASupported; // Indicates device support of RDMA APIs
///< without migration unsigned int gpuDirectRDMAFlushWritesOptions; // Bitmask to be interpreted according to hipFlushGPUDirectRDMAWritesOptions
int maxBlocksPerMultiProcessor; ///< Max number of blocks on CU int gpuDirectRDMAWritesOrdering; // value of hipGPUDirectRDMAWritesOrdering
int accessPolicyMaxWindowSize; ///< Max value of access policy window unsigned int memoryPoolSupportedHandleTypes; // Bitmask of handle types support with mempool based IPC
size_t reservedSharedMemPerBlock; ///< Shared memory reserved by driver per block int deferredMappingHipArraySupported; // Device supports deferred mapping HIP arrays and HIP mipmapped arrays
int hostRegisterSupported; ///< Device supports hipHostRegister int ipcEventSupported; // Device supports IPC events
int sparseHipArraySupported; ///< Indicates if device supports sparse hip arrays int clusterLaunch; // Device supports cluster launch
int hostRegisterReadOnlySupported; ///< Device supports using the hipHostRegisterReadOnly flag int unifiedFunctionPointers; // Indicates device supports unified function pointers
///< with hipHostRegistger int reserved[63]; // CUDA Reserved.
int timelineSemaphoreInteropSupported; ///< Indicates external timeline semaphore support
int memoryPoolsSupported; ///< Indicates if device supports hipMallocAsync and hipMemPool APIs
int gpuDirectRDMASupported; ///< Indicates device support of RDMA APIs
unsigned int gpuDirectRDMAFlushWritesOptions; ///< Bitmask to be interpreted according to
///< hipFlushGPUDirectRDMAWritesOptions
int gpuDirectRDMAWritesOrdering; ///< value of hipGPUDirectRDMAWritesOrdering
unsigned int
memoryPoolSupportedHandleTypes; ///< Bitmask of handle types support with mempool based IPC
int deferredMappingHipArraySupported; ///< Device supports deferred mapping HIP arrays and HIP
///< mipmapped arrays
int ipcEventSupported; ///< Device supports IPC events
int clusterLaunch; ///< Device supports cluster launch
int unifiedFunctionPointers; ///< Indicates device supports unified function pointers
int reserved[63]; ///< CUDA Reserved.
int hipReserved[32]; ///< Reserved for adding new entries for HIP/CUDA. int hipReserved[32]; // Reserved for adding new entries for HIP/CUDA.
/* HIP Only struct members */ /* HIP Only struct members */
char gcnArchName[256]; ///< AMD GCN Arch Name. HIP Only. char gcnArchName[256]; // AMD GCN Arch Name. HIP Only.
size_t maxSharedMemoryPerMultiProcessor; ///< Maximum Shared Memory Per CU. HIP Only. size_t maxSharedMemoryPerMultiProcessor; // Maximum Shared Memory Per CU. HIP Only.
int clockInstructionRate; ///< Frequency in khz of the timer used by the device-side "clock*" int clockInstructionRate; // Frequency in khz of the timer used by the device-side "clock*" instructions. New for HIP.
///< instructions. New for HIP. hipDeviceArch_t arch; // Architectural feature flags. New for HIP.
hipDeviceArch_t arch; ///< Architectural feature flags. New for HIP. unsigned int* hdpMemFlushCntl; // Addres of HDP_MEM_COHERENCY_FLUSH_CNTL register
unsigned int* hdpMemFlushCntl; ///< Addres of HDP_MEM_COHERENCY_FLUSH_CNTL register unsigned int* hdpRegFlushCntl; // Addres of HDP_REG_COHERENCY_FLUSH_CNTL register
unsigned int* hdpRegFlushCntl; ///< Addres of HDP_REG_COHERENCY_FLUSH_CNTL register int cooperativeMultiDeviceUnmatchedFunc; // HIP device supports cooperative launch on multiple devices with unmatched functions
int cooperativeMultiDeviceUnmatchedFunc; ///< HIP device supports cooperative launch on int cooperativeMultiDeviceUnmatchedGridDim; // HIP device supports cooperative launch on multiple devices with unmatched grid dimensions
///< multiple int cooperativeMultiDeviceUnmatchedBlockDim; // HIP device supports cooperative launch on multiple devices with unmatched block dimensions
/// devices with unmatched functions int cooperativeMultiDeviceUnmatchedSharedMem; // HIP device supports cooperative launch on multiple devices with unmatched shared memories
int cooperativeMultiDeviceUnmatchedGridDim; ///< HIP device supports cooperative launch on int isLargeBar; // 1: if it is a large PCI bar device, else 0
///< multiple int asicRevision; // Revision of the GPU in this device
/// devices with unmatched grid dimensions
int cooperativeMultiDeviceUnmatchedBlockDim; ///< HIP device supports cooperative launch on
///< multiple
/// devices with unmatched block dimensions
int cooperativeMultiDeviceUnmatchedSharedMem; ///< HIP device supports cooperative launch on
///< multiple
/// devices with unmatched shared memories
int isLargeBar; ///< 1: if it is a large PCI bar device, else 0
int asicRevision; ///< Revision of the GPU in this device
} hipDeviceProp_t; } hipDeviceProp_t;
//Flags that can be used with hipStreamCreateWithFlags. //Flags that can be used with hipStreamCreateWithFlags.
@ -532,7 +494,8 @@ typedef struct hipDeviceProp_t {
#define hipDeviceMapHost 0x8 #define hipDeviceMapHost 0x8
#define hipDeviceLmemResizeToMax 0x16 #define hipDeviceLmemResizeToMax 0x16
typedef enum hipJitOption { typedef enum hipJitOption
{
hipJitOptionMaxRegisters = 0, hipJitOptionMaxRegisters = 0,
hipJitOptionThreadsPerBlock, hipJitOptionThreadsPerBlock,
hipJitOptionWallTime, hipJitOptionWallTime,
@ -551,6 +514,7 @@ typedef enum hipJitOption {
hipJitOptionSm3xOpt, hipJitOptionSm3xOpt,
hipJitOptionFastCompile, hipJitOptionFastCompile,
hipJitOptionNumOptions hipJitOptionNumOptions
} hipJitOption; } hipJitOption;
// stop: hip_runtime_api.h // stop: hip_runtime_api.h
@ -563,11 +527,17 @@ typedef enum hipJitOption {
#define HIP_API_CALL HIPAPI #define HIP_API_CALL HIPAPI
// deprecated
typedef hipError_t (HIP_API_CALL *HIP_HIPCTXCREATE) (hipCtx_t *, unsigned int, hipDevice_t); typedef hipError_t (HIP_API_CALL *HIP_HIPCTXCREATE) (hipCtx_t *, unsigned int, hipDevice_t);
// deprecated
typedef hipError_t (HIP_API_CALL *HIP_HIPCTXDESTROY) (hipCtx_t); typedef hipError_t (HIP_API_CALL *HIP_HIPCTXDESTROY) (hipCtx_t);
// deprecated
typedef hipError_t (HIP_API_CALL *HIP_HIPCTXPOPCURRENT) (hipCtx_t *); typedef hipError_t (HIP_API_CALL *HIP_HIPCTXPOPCURRENT) (hipCtx_t *);
// deprecated
typedef hipError_t (HIP_API_CALL *HIP_HIPCTXPUSHCURRENT) (hipCtx_t); typedef hipError_t (HIP_API_CALL *HIP_HIPCTXPUSHCURRENT) (hipCtx_t);
// deprecated
typedef hipError_t (HIP_API_CALL *HIP_HIPCTXSETCURRENT) (hipCtx_t); typedef hipError_t (HIP_API_CALL *HIP_HIPCTXSETCURRENT) (hipCtx_t);
// deprecated
typedef hipError_t (HIP_API_CALL *HIP_HIPCTXSYNCHRONIZE) (void); typedef hipError_t (HIP_API_CALL *HIP_HIPCTXSYNCHRONIZE) (void);
typedef hipError_t (HIP_API_CALL *HIP_HIPDEVICEGETATTRIBUTE) (int *, hipDeviceAttribute_t, hipDevice_t); typedef hipError_t (HIP_API_CALL *HIP_HIPDEVICEGETATTRIBUTE) (int *, hipDeviceAttribute_t, hipDevice_t);
typedef hipError_t (HIP_API_CALL *HIP_HIPDEVICEGETCOUNT) (int *); typedef hipError_t (HIP_API_CALL *HIP_HIPDEVICEGETCOUNT) (int *);
@ -575,7 +545,8 @@ typedef hipError_t (HIP_API_CALL *HIP_HIPDEVICEGET) (hipDevice_t *,
typedef hipError_t (HIP_API_CALL *HIP_HIPDEVICEGETNAME) (char *, int, hipDevice_t); typedef hipError_t (HIP_API_CALL *HIP_HIPDEVICEGETNAME) (char *, int, hipDevice_t);
typedef hipError_t (HIP_API_CALL *HIP_HIPDEVICETOTALMEM) (size_t *, hipDevice_t); typedef hipError_t (HIP_API_CALL *HIP_HIPDEVICETOTALMEM) (size_t *, hipDevice_t);
typedef hipError_t (HIP_API_CALL *HIP_HIPDRIVERGETVERSION) (int *); typedef hipError_t (HIP_API_CALL *HIP_HIPDRIVERGETVERSION) (int *);
typedef hipError_t (HIP_API_CALL *HIP_HIPEVENTCREATE) (hipEvent_t *, unsigned int); typedef hipError_t (HIP_API_CALL *HIP_HIPEVENTCREATE) (hipEvent_t *);
typedef hipError_t (HIP_API_CALL *HIP_HIPEVENTCREATEWITHFLAGS) (hipEvent_t *, unsigned int);
typedef hipError_t (HIP_API_CALL *HIP_HIPEVENTDESTROY) (hipEvent_t); typedef hipError_t (HIP_API_CALL *HIP_HIPEVENTDESTROY) (hipEvent_t);
typedef hipError_t (HIP_API_CALL *HIP_HIPEVENTELAPSEDTIME) (float *, hipEvent_t, hipEvent_t); typedef hipError_t (HIP_API_CALL *HIP_HIPEVENTELAPSEDTIME) (float *, hipEvent_t, hipEvent_t);
typedef hipError_t (HIP_API_CALL *HIP_HIPEVENTRECORD) (hipEvent_t, hipStream_t); typedef hipError_t (HIP_API_CALL *HIP_HIPEVENTRECORD) (hipEvent_t, hipStream_t);
@ -603,7 +574,10 @@ typedef hipError_t (HIP_API_CALL *HIP_HIPMODULEGETGLOBAL) (hipDeviceptr_t
typedef hipError_t (HIP_API_CALL *HIP_HIPMODULELOADDATAEX) (hipModule_t *, const void *, unsigned int, hipJitOption *, void **); typedef hipError_t (HIP_API_CALL *HIP_HIPMODULELOADDATAEX) (hipModule_t *, const void *, unsigned int, hipJitOption *, void **);
typedef hipError_t (HIP_API_CALL *HIP_HIPMODULEUNLOAD) (hipModule_t); typedef hipError_t (HIP_API_CALL *HIP_HIPMODULEUNLOAD) (hipModule_t);
typedef hipError_t (HIP_API_CALL *HIP_HIPRUNTIMEGETVERSION) (int *); typedef hipError_t (HIP_API_CALL *HIP_HIPRUNTIMEGETVERSION) (int *);
typedef hipError_t (HIP_API_CALL *HIP_HIPSTREAMCREATE) (hipStream_t *, unsigned int); typedef hipError_t (HIP_API_CALL *HIP_HIPSETDEVICE) (hipDevice_t);
typedef hipError_t (HIP_API_CALL *HIP_HIPSETDEVICEFLAGS) (unsigned int);
typedef hipError_t (HIP_API_CALL *HIP_HIPSTREAMCREATE) (hipStream_t *);
typedef hipError_t (HIP_API_CALL *HIP_HIPSTREAMCREATEWITHFLAGS) (hipStream_t *, unsigned int);
typedef hipError_t (HIP_API_CALL *HIP_HIPSTREAMDESTROY) (hipStream_t); typedef hipError_t (HIP_API_CALL *HIP_HIPSTREAMDESTROY) (hipStream_t);
typedef hipError_t (HIP_API_CALL *HIP_HIPSTREAMSYNCHRONIZE) (hipStream_t); typedef hipError_t (HIP_API_CALL *HIP_HIPSTREAMSYNCHRONIZE) (hipStream_t);
typedef hipError_t (HIP_API_CALL *HIP_HIPGETDEVICEPROPERTIES) (hipDeviceProp_t *, hipDevice_t); typedef hipError_t (HIP_API_CALL *HIP_HIPGETDEVICEPROPERTIES) (hipDeviceProp_t *, hipDevice_t);
@ -613,11 +587,17 @@ typedef struct hc_hip_lib
{ {
hc_dynlib_t lib; hc_dynlib_t lib;
// deprecated
HIP_HIPCTXCREATE hipCtxCreate; HIP_HIPCTXCREATE hipCtxCreate;
// deprecated
HIP_HIPCTXDESTROY hipCtxDestroy; HIP_HIPCTXDESTROY hipCtxDestroy;
// deprecated
HIP_HIPCTXPOPCURRENT hipCtxPopCurrent; HIP_HIPCTXPOPCURRENT hipCtxPopCurrent;
// deprecated
HIP_HIPCTXPUSHCURRENT hipCtxPushCurrent; HIP_HIPCTXPUSHCURRENT hipCtxPushCurrent;
// deprecated
HIP_HIPCTXSETCURRENT hipCtxSetCurrent; HIP_HIPCTXSETCURRENT hipCtxSetCurrent;
// deprecated
HIP_HIPCTXSYNCHRONIZE hipCtxSynchronize; HIP_HIPCTXSYNCHRONIZE hipCtxSynchronize;
HIP_HIPDEVICEGETATTRIBUTE hipDeviceGetAttribute; HIP_HIPDEVICEGETATTRIBUTE hipDeviceGetAttribute;
HIP_HIPDEVICEGETCOUNT hipDeviceGetCount; HIP_HIPDEVICEGETCOUNT hipDeviceGetCount;
@ -626,6 +606,7 @@ typedef struct hc_hip_lib
HIP_HIPDEVICETOTALMEM hipDeviceTotalMem; HIP_HIPDEVICETOTALMEM hipDeviceTotalMem;
HIP_HIPDRIVERGETVERSION hipDriverGetVersion; HIP_HIPDRIVERGETVERSION hipDriverGetVersion;
HIP_HIPEVENTCREATE hipEventCreate; HIP_HIPEVENTCREATE hipEventCreate;
HIP_HIPEVENTCREATEWITHFLAGS hipEventCreateWithFlags;
HIP_HIPEVENTDESTROY hipEventDestroy; HIP_HIPEVENTDESTROY hipEventDestroy;
HIP_HIPEVENTELAPSEDTIME hipEventElapsedTime; HIP_HIPEVENTELAPSEDTIME hipEventElapsedTime;
HIP_HIPEVENTRECORD hipEventRecord; HIP_HIPEVENTRECORD hipEventRecord;
@ -653,7 +634,10 @@ typedef struct hc_hip_lib
HIP_HIPMODULELOADDATAEX hipModuleLoadDataEx; HIP_HIPMODULELOADDATAEX hipModuleLoadDataEx;
HIP_HIPMODULEUNLOAD hipModuleUnload; HIP_HIPMODULEUNLOAD hipModuleUnload;
HIP_HIPRUNTIMEGETVERSION hipRuntimeGetVersion; HIP_HIPRUNTIMEGETVERSION hipRuntimeGetVersion;
HIP_HIPSETDEVICE hipSetDevice;
HIP_HIPSETDEVICEFLAGS hipSetDeviceFlags;
HIP_HIPSTREAMCREATE hipStreamCreate; HIP_HIPSTREAMCREATE hipStreamCreate;
HIP_HIPSTREAMCREATEWITHFLAGS hipStreamCreateWithFlags;
HIP_HIPSTREAMDESTROY hipStreamDestroy; HIP_HIPSTREAMDESTROY hipStreamDestroy;
HIP_HIPSTREAMSYNCHRONIZE hipStreamSynchronize; HIP_HIPSTREAMSYNCHRONIZE hipStreamSynchronize;
HIP_HIPGETDEVICEPROPERTIES hipGetDeviceProperties; HIP_HIPGETDEVICEPROPERTIES hipGetDeviceProperties;
@ -666,11 +650,17 @@ typedef hc_hip_lib_t HIP_PTR;
int hip_init (void *hashcat_ctx); int hip_init (void *hashcat_ctx);
void hip_close (void *hashcat_ctx); void hip_close (void *hashcat_ctx);
// deprecated
int hc_hipCtxCreate (void *hashcat_ctx, hipCtx_t *pctx, unsigned int flags, hipDevice_t dev); int hc_hipCtxCreate (void *hashcat_ctx, hipCtx_t *pctx, unsigned int flags, hipDevice_t dev);
// deprecated
int hc_hipCtxDestroy (void *hashcat_ctx, hipCtx_t ctx); int hc_hipCtxDestroy (void *hashcat_ctx, hipCtx_t ctx);
// deprecated
int hc_hipCtxPopCurrent (void *hashcat_ctx, hipCtx_t *pctx); int hc_hipCtxPopCurrent (void *hashcat_ctx, hipCtx_t *pctx);
// deprecated
int hc_hipCtxPushCurrent (void *hashcat_ctx, hipCtx_t ctx); int hc_hipCtxPushCurrent (void *hashcat_ctx, hipCtx_t ctx);
// deprecated
int hc_hipCtxSetCurrent (void *hashcat_ctx, hipCtx_t ctx); int hc_hipCtxSetCurrent (void *hashcat_ctx, hipCtx_t ctx);
// deprecated
int hc_hipCtxSynchronize (void *hashcat_ctx); int hc_hipCtxSynchronize (void *hashcat_ctx);
int hc_hipDeviceGet (void *hashcat_ctx, hipDevice_t *device, int ordinal); int hc_hipDeviceGet (void *hashcat_ctx, hipDevice_t *device, int ordinal);
int hc_hipDeviceGetAttribute (void *hashcat_ctx, int *pi, hipDeviceAttribute_t attrib, hipDevice_t dev); int hc_hipDeviceGetAttribute (void *hashcat_ctx, int *pi, hipDeviceAttribute_t attrib, hipDevice_t dev);
@ -678,7 +668,8 @@ int hc_hipDeviceGetCount (void *hashcat_ctx, int *count);
int hc_hipDeviceGetName (void *hashcat_ctx, char *name, int len, hipDevice_t dev); int hc_hipDeviceGetName (void *hashcat_ctx, char *name, int len, hipDevice_t dev);
int hc_hipDeviceTotalMem (void *hashcat_ctx, size_t *bytes, hipDevice_t dev); int hc_hipDeviceTotalMem (void *hashcat_ctx, size_t *bytes, hipDevice_t dev);
int hc_hipDriverGetVersion (void *hashcat_ctx, int *driverVersion); int hc_hipDriverGetVersion (void *hashcat_ctx, int *driverVersion);
int hc_hipEventCreate (void *hashcat_ctx, hipEvent_t *phEvent, unsigned int Flags); int hc_hipEventCreate (void *hashcat_ctx, hipEvent_t *phEvent);
int hc_hipEventCreateWithFlags (void *hashcat_ctx, hipEvent_t *phEvent, unsigned int Flags);
int hc_hipEventDestroy (void *hashcat_ctx, hipEvent_t hEvent); int hc_hipEventDestroy (void *hashcat_ctx, hipEvent_t hEvent);
int hc_hipEventElapsedTime (void *hashcat_ctx, float *pMilliseconds, hipEvent_t hStart, hipEvent_t hEnd); int hc_hipEventElapsedTime (void *hashcat_ctx, float *pMilliseconds, hipEvent_t hStart, hipEvent_t hEnd);
int hc_hipEventQuery (void *hashcat_ctx, hipEvent_t hEvent); int hc_hipEventQuery (void *hashcat_ctx, hipEvent_t hEvent);
@ -705,7 +696,10 @@ int hc_hipModuleGetGlobal (void *hashcat_ctx, hipDeviceptr_t *dptr, size_t
int hc_hipModuleLoadDataEx (void *hashcat_ctx, hipModule_t *module, const void *image, unsigned int numOptions, hipJitOption *options, void **optionValues); int hc_hipModuleLoadDataEx (void *hashcat_ctx, hipModule_t *module, const void *image, unsigned int numOptions, hipJitOption *options, void **optionValues);
int hc_hipModuleUnload (void *hashcat_ctx, hipModule_t hmod); int hc_hipModuleUnload (void *hashcat_ctx, hipModule_t hmod);
int hc_hipRuntimeGetVersion (void *hashcat_ctx, int *runtimeVersion); int hc_hipRuntimeGetVersion (void *hashcat_ctx, int *runtimeVersion);
int hc_hipStreamCreate (void *hashcat_ctx, hipStream_t *phStream, unsigned int Flags); int hc_hipSetDevice (void *hashcat_ctx, hipDevice_t dev);
int hc_hipSetDeviceFlags (void *hashcat_ctx, unsigned int flags);
int hc_hipStreamCreate (void *hashcat_ctx, hipStream_t *phStream);
int hc_hipStreamCreateWithFlags (void *hashcat_ctx, hipStream_t *phStream, unsigned int flags);
int hc_hipStreamDestroy (void *hashcat_ctx, hipStream_t hStream); int hc_hipStreamDestroy (void *hashcat_ctx, hipStream_t hStream);
int hc_hipStreamSynchronize (void *hashcat_ctx, hipStream_t hStream); int hc_hipStreamSynchronize (void *hashcat_ctx, hipStream_t hStream);
int hc_hipGetDeviceProperties (void *hashcat_ctx, hipDeviceProp_t *prop, hipDevice_t dev); int hc_hipGetDeviceProperties (void *hashcat_ctx, hipDeviceProp_t *prop, hipDevice_t dev);

View File

@ -679,7 +679,7 @@ HC_API_CALL void *thread_autotune (void *p)
if (device_param->is_hip == true) if (device_param->is_hip == true)
{ {
if (hc_hipCtxPushCurrent (hashcat_ctx, device_param->hip_context) == -1) return NULL; if (hc_hipSetDevice (hashcat_ctx, device_param->hip_device) == -1) return NULL;
} }
// check for autotune failure // check for autotune failure
@ -695,11 +695,6 @@ HC_API_CALL void *thread_autotune (void *p)
if (hc_cuCtxPopCurrent (hashcat_ctx, &device_param->cuda_context) == -1) return NULL; if (hc_cuCtxPopCurrent (hashcat_ctx, &device_param->cuda_context) == -1) return NULL;
} }
if (device_param->is_hip == true)
{
if (hc_hipCtxPopCurrent (hashcat_ctx, &device_param->hip_context) == -1) return NULL;
}
return NULL; return NULL;
} }

View File

@ -993,7 +993,7 @@ int gidd_to_pw_t (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, c
if (device_param->is_hip == true) if (device_param->is_hip == true)
{ {
if (hc_hipCtxPushCurrent (hashcat_ctx, device_param->hip_context) == -1) return -1; if (hc_hipSetDevice (hashcat_ctx, device_param->hip_device) == -1) return -1;
if (hc_hipMemcpyDtoH (hashcat_ctx, &pw_idx, device_param->hip_d_pws_idx + (gidd * sizeof (pw_idx_t)), sizeof (pw_idx_t)) == -1) return -1; if (hc_hipMemcpyDtoH (hashcat_ctx, &pw_idx, device_param->hip_d_pws_idx + (gidd * sizeof (pw_idx_t)), sizeof (pw_idx_t)) == -1) return -1;
@ -1059,11 +1059,6 @@ int gidd_to_pw_t (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, c
if (hc_cuCtxPopCurrent (hashcat_ctx, &device_param->cuda_context) == -1) return -1; if (hc_cuCtxPopCurrent (hashcat_ctx, &device_param->cuda_context) == -1) return -1;
} }
if (device_param->is_hip == true)
{
if (hc_hipCtxPopCurrent (hashcat_ctx, &device_param->hip_context) == -1) return -1;
}
return 0; return 0;
} }
@ -1082,13 +1077,11 @@ int copy_pws_idx (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, u
if (device_param->is_hip == true) if (device_param->is_hip == true)
{ {
if (hc_hipCtxPushCurrent (hashcat_ctx, device_param->hip_context) == -1) return -1; if (hc_hipSetDevice (hashcat_ctx, device_param->hip_device) == -1) return -1;
if (hc_hipMemcpyDtoH (hashcat_ctx, dest, device_param->hip_d_pws_idx + (gidd * sizeof (pw_idx_t)), (cnt * sizeof (pw_idx_t))) == -1) return -1; if (hc_hipMemcpyDtoH (hashcat_ctx, dest, device_param->hip_d_pws_idx + (gidd * sizeof (pw_idx_t)), (cnt * sizeof (pw_idx_t))) == -1) return -1;
if (hc_hipStreamSynchronize (hashcat_ctx, device_param->hip_stream) == -1) return -1; if (hc_hipStreamSynchronize (hashcat_ctx, device_param->hip_stream) == -1) return -1;
if (hc_hipCtxPopCurrent (hashcat_ctx, &device_param->hip_context) == -1) return -1;
} }
#if defined (__APPLE__) #if defined (__APPLE__)
@ -1122,13 +1115,11 @@ int copy_pws_comp (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param,
if (device_param->is_hip == true) if (device_param->is_hip == true)
{ {
if (hc_hipCtxPushCurrent (hashcat_ctx, device_param->hip_context) == -1) return -1; if (hc_hipSetDevice (hashcat_ctx, device_param->hip_device) == -1) return -1;
if (hc_hipMemcpyDtoH (hashcat_ctx, dest, device_param->hip_d_pws_comp_buf + (off * sizeof (u32)), cnt * sizeof (u32)) == -1) return -1; if (hc_hipMemcpyDtoH (hashcat_ctx, dest, device_param->hip_d_pws_comp_buf + (off * sizeof (u32)), cnt * sizeof (u32)) == -1) return -1;
if (hc_hipStreamSynchronize (hashcat_ctx, device_param->hip_stream) == -1) return -1; if (hc_hipStreamSynchronize (hashcat_ctx, device_param->hip_stream) == -1) return -1;
if (hc_hipCtxPopCurrent (hashcat_ctx, &device_param->hip_context) == -1) return -1;
} }
#if defined (__APPLE__) #if defined (__APPLE__)
@ -5937,24 +5928,24 @@ static void backend_ctx_devices_init_cuda (hashcat_ctx_t *hashcat_ctx, int *virt
device_param->has_prmt = (sm >= 20) ? true : false; device_param->has_prmt = (sm >= 20) ? true : false;
device_param->has_shfw = (sm >= 70) ? true : false; device_param->has_shfw = (sm >= 70) ? true : false;
// one-time init cuda context
if (hc_cuCtxCreate (hashcat_ctx, &device_param->cuda_context, CU_CTX_SCHED_BLOCKING_SYNC, device_param->cuda_device) == -1)
{
device_param->skipped = true;
continue;
}
if (hc_cuCtxPushCurrent (hashcat_ctx, device_param->cuda_context) == -1)
{
device_param->skipped = true;
continue;
}
// device_available_mem // device_available_mem
CUcontext cuda_context;
if (hc_cuCtxCreate (hashcat_ctx, &cuda_context, CU_CTX_SCHED_BLOCKING_SYNC, device_param->cuda_device) == -1)
{
device_param->skipped = true;
continue;
}
if (hc_cuCtxPushCurrent (hashcat_ctx, cuda_context) == -1)
{
device_param->skipped = true;
continue;
}
size_t free = 0; size_t free = 0;
size_t total = 0; size_t total = 0;
@ -5967,14 +5958,7 @@ static void backend_ctx_devices_init_cuda (hashcat_ctx_t *hashcat_ctx, int *virt
device_param->device_available_mem = ((u64) free * (100 - user_options->backend_devices_keepfree)) / 100; device_param->device_available_mem = ((u64) free * (100 - user_options->backend_devices_keepfree)) / 100;
if (hc_cuCtxPopCurrent (hashcat_ctx, &cuda_context) == -1) if (hc_cuCtxPopCurrent (hashcat_ctx, &device_param->cuda_context) == -1)
{
device_param->skipped = true;
continue;
}
if (hc_cuCtxDestroy (hashcat_ctx, cuda_context) == -1)
{ {
device_param->skipped = true; device_param->skipped = true;
@ -6440,24 +6424,24 @@ static void backend_ctx_devices_init_hip (hashcat_ctx_t *hashcat_ctx, int *virth
device_param->has_prmt = false; device_param->has_prmt = false;
device_param->has_shfw = true; // always reports false : prop.arch.hasFunnelShift; device_param->has_shfw = true; // always reports false : prop.arch.hasFunnelShift;
// one-time init hip context
if (hc_hipSetDeviceFlags (hashcat_ctx, hipDeviceScheduleBlockingSync) == -1)
{
device_param->skipped = true;
continue;
}
if (hc_hipSetDevice (hashcat_ctx, device_param->hip_device) == -1)
{
device_param->skipped = true;
continue;
}
// device_available_mem // device_available_mem
hipCtx_t hip_context;
if (hc_hipCtxCreate (hashcat_ctx, &hip_context, hipDeviceScheduleBlockingSync, device_param->hip_device) == -1)
{
device_param->skipped = true;
continue;
}
if (hc_hipCtxPushCurrent (hashcat_ctx, hip_context) == -1)
{
device_param->skipped = true;
continue;
}
size_t free = 0; size_t free = 0;
size_t total = 0; size_t total = 0;
@ -6470,20 +6454,6 @@ static void backend_ctx_devices_init_hip (hashcat_ctx_t *hashcat_ctx, int *virth
device_param->device_available_mem = ((u64) free * (100 - user_options->backend_devices_keepfree)) / 100; device_param->device_available_mem = ((u64) free * (100 - user_options->backend_devices_keepfree)) / 100;
if (hc_hipCtxPopCurrent (hashcat_ctx, &hip_context) == -1)
{
device_param->skipped = true;
continue;
}
if (hc_hipCtxDestroy (hashcat_ctx, hip_context) == -1)
{
device_param->skipped = true;
continue;
}
#if defined (__linux__) #if defined (__linux__)
if (strchr (folder_config->cpath_real, ' ') != NULL) if (strchr (folder_config->cpath_real, ' ') != NULL)
{ {
@ -8623,13 +8593,9 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
if (device_param->skipped == true) continue; if (device_param->skipped == true) continue;
} }
/** // one-time init metal command-queue
* create command-queue
*/
mtl_command_queue command_queue; if (hc_mtlCreateCommandQueue (hashcat_ctx, device_param->metal_device, &device_param->metal_command_queue) == -1)
if (hc_mtlCreateCommandQueue (hashcat_ctx, device_param->metal_device, &command_queue) == -1)
{ {
device_param->skipped = true; device_param->skipped = true;
@ -8678,11 +8644,11 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
u8 tmp_host[8] = { 1, 2, 3, 4, 5, 6, 7, 8 }; u8 tmp_host[8] = { 1, 2, 3, 4, 5, 6, 7, 8 };
if (hc_mtlMemcpyHtoD (hashcat_ctx, command_queue, tmp_device[c], 0, tmp_host, sizeof (tmp_host)) == -1) break; if (hc_mtlMemcpyHtoD (hashcat_ctx, device_param->metal_command_queue, tmp_device[c], 0, tmp_host, sizeof (tmp_host)) == -1) break;
if (hc_mtlMemcpyDtoH (hashcat_ctx, command_queue, tmp_host, tmp_device[c], 0, sizeof (tmp_host)) == -1) break; if (hc_mtlMemcpyDtoH (hashcat_ctx, device_param->metal_command_queue, tmp_host, tmp_device[c], 0, sizeof (tmp_host)) == -1) break;
if (hc_mtlMemcpyHtoD (hashcat_ctx, command_queue, tmp_device[c], MAX_ALLOC_CHECKS_SIZE - sizeof (tmp_host), tmp_host, sizeof (tmp_host)) == -1) break; if (hc_mtlMemcpyHtoD (hashcat_ctx, device_param->metal_command_queue, tmp_device[c], MAX_ALLOC_CHECKS_SIZE - sizeof (tmp_host), tmp_host, sizeof (tmp_host)) == -1) break;
if (hc_mtlMemcpyDtoH (hashcat_ctx, command_queue, tmp_host, tmp_device[c], MAX_ALLOC_CHECKS_SIZE - sizeof (tmp_host), sizeof (tmp_host)) == -1) break; if (hc_mtlMemcpyDtoH (hashcat_ctx, device_param->metal_command_queue, tmp_host, tmp_device[c], MAX_ALLOC_CHECKS_SIZE - sizeof (tmp_host), sizeof (tmp_host)) == -1) break;
} }
device_param->device_available_mem = MAX_ALLOC_CHECKS_SIZE; device_param->device_available_mem = MAX_ALLOC_CHECKS_SIZE;
@ -8707,8 +8673,6 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
hcfree (tmp_device); hcfree (tmp_device);
} }
hc_mtlReleaseCommandQueue (hashcat_ctx, command_queue);
if (device_param->device_host_unified_memory == 1) if (device_param->device_host_unified_memory == 1)
{ {
// so, we actually have only half the memory because we need the same buffers on host side // so, we actually have only half the memory because we need the same buffers on host side
@ -8734,11 +8698,7 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
if (device_param->skipped == true) continue; if (device_param->skipped == true) continue;
} }
/** // one-time init opencl context
* create context for each device
*/
cl_context context;
/* /*
cl_context_properties properties[3]; cl_context_properties properties[3];
@ -8747,10 +8707,10 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
properties[1] = (cl_context_properties) device_param->opencl_platform; properties[1] = (cl_context_properties) device_param->opencl_platform;
properties[2] = 0; properties[2] = 0;
CL_rc = hc_clCreateContext (hashcat_ctx, properties, 1, &device_param->opencl_device, NULL, NULL, &context); CL_rc = hc_clCreateContext (hashcat_ctx, properties, 1, &device_param->opencl_device, NULL, NULL, &device_param->opencl_context);
*/ */
if (hc_clCreateContext (hashcat_ctx, NULL, 1, &device_param->opencl_device, NULL, NULL, &context) == -1) if (hc_clCreateContext (hashcat_ctx, NULL, 1, &device_param->opencl_device, NULL, NULL, &device_param->opencl_context) == -1)
{ {
device_param->skipped = true; device_param->skipped = true;
@ -8760,13 +8720,9 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
continue; continue;
} }
/** // one-time init open command-queue
* create command-queue
*/
cl_command_queue command_queue; if (hc_clCreateCommandQueue (hashcat_ctx, device_param->opencl_context, device_param->opencl_device, CL_QUEUE_PROFILING_ENABLE, &device_param->opencl_command_queue) == -1)
if (hc_clCreateCommandQueue (hashcat_ctx, context, device_param->opencl_device, 0, &command_queue) == -1)
{ {
device_param->skipped = true; device_param->skipped = true;
@ -8781,17 +8737,17 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
if ((device_param->opencl_device_type & CL_DEVICE_TYPE_GPU) && (device_param->opencl_platform_vendor_id == VENDOR_ID_AMD)) if ((device_param->opencl_device_type & CL_DEVICE_TYPE_GPU) && (device_param->opencl_platform_vendor_id == VENDOR_ID_AMD))
{ {
#define RUN_INSTRUCTION_CHECKS() \ #define RUN_INSTRUCTION_CHECKS() \
device_param->has_vadd = opencl_test_instruction (hashcat_ctx, context, device_param->opencl_device, "__kernel void test () { uint r1; __asm__ __volatile__ (\"V_ADD_U32 %0, vcc, 0, 0;\" : \"=v\"(r1)); }"); \ device_param->has_vadd = opencl_test_instruction (hashcat_ctx, device_param->opencl_context, device_param->opencl_device, "__kernel void test () { uint r1; __asm__ __volatile__ (\"V_ADD_U32 %0, vcc, 0, 0;\" : \"=v\"(r1)); }"); \
device_param->has_vaddc = opencl_test_instruction (hashcat_ctx, context, device_param->opencl_device, "__kernel void test () { uint r1; __asm__ __volatile__ (\"V_ADDC_U32 %0, vcc, 0, 0, vcc;\" : \"=v\"(r1)); }"); \ device_param->has_vaddc = opencl_test_instruction (hashcat_ctx, device_param->opencl_context, device_param->opencl_device, "__kernel void test () { uint r1; __asm__ __volatile__ (\"V_ADDC_U32 %0, vcc, 0, 0, vcc;\" : \"=v\"(r1)); }"); \
device_param->has_vadd_co = opencl_test_instruction (hashcat_ctx, context, device_param->opencl_device, "__kernel void test () { uint r1; __asm__ __volatile__ (\"V_ADD_CO_U32 %0, vcc, 0, 0;\" : \"=v\"(r1)); }"); \ device_param->has_vadd_co = opencl_test_instruction (hashcat_ctx, device_param->opencl_context, device_param->opencl_device, "__kernel void test () { uint r1; __asm__ __volatile__ (\"V_ADD_CO_U32 %0, vcc, 0, 0;\" : \"=v\"(r1)); }"); \
device_param->has_vaddc_co = opencl_test_instruction (hashcat_ctx, context, device_param->opencl_device, "__kernel void test () { uint r1; __asm__ __volatile__ (\"V_ADDC_CO_U32 %0, vcc, 0, 0, vcc;\" : \"=v\"(r1)); }"); \ device_param->has_vaddc_co = opencl_test_instruction (hashcat_ctx, device_param->opencl_context, device_param->opencl_device, "__kernel void test () { uint r1; __asm__ __volatile__ (\"V_ADDC_CO_U32 %0, vcc, 0, 0, vcc;\" : \"=v\"(r1)); }"); \
device_param->has_vsub = opencl_test_instruction (hashcat_ctx, context, device_param->opencl_device, "__kernel void test () { uint r1; __asm__ __volatile__ (\"V_SUB_U32 %0, vcc, 0, 0;\" : \"=v\"(r1)); }"); \ device_param->has_vsub = opencl_test_instruction (hashcat_ctx, device_param->opencl_context, device_param->opencl_device, "__kernel void test () { uint r1; __asm__ __volatile__ (\"V_SUB_U32 %0, vcc, 0, 0;\" : \"=v\"(r1)); }"); \
device_param->has_vsubb = opencl_test_instruction (hashcat_ctx, context, device_param->opencl_device, "__kernel void test () { uint r1; __asm__ __volatile__ (\"V_SUBB_U32 %0, vcc, 0, 0, vcc;\" : \"=v\"(r1)); }"); \ device_param->has_vsubb = opencl_test_instruction (hashcat_ctx, device_param->opencl_context, device_param->opencl_device, "__kernel void test () { uint r1; __asm__ __volatile__ (\"V_SUBB_U32 %0, vcc, 0, 0, vcc;\" : \"=v\"(r1)); }"); \
device_param->has_vsub_co = opencl_test_instruction (hashcat_ctx, context, device_param->opencl_device, "__kernel void test () { uint r1; __asm__ __volatile__ (\"V_SUB_CO_U32 %0, vcc, 0, 0;\" : \"=v\"(r1)); }"); \ device_param->has_vsub_co = opencl_test_instruction (hashcat_ctx, device_param->opencl_context, device_param->opencl_device, "__kernel void test () { uint r1; __asm__ __volatile__ (\"V_SUB_CO_U32 %0, vcc, 0, 0;\" : \"=v\"(r1)); }"); \
device_param->has_vsubb_co = opencl_test_instruction (hashcat_ctx, context, device_param->opencl_device, "__kernel void test () { uint r1; __asm__ __volatile__ (\"V_SUBB_CO_U32 %0, vcc, 0, 0, vcc;\" : \"=v\"(r1)); }"); \ device_param->has_vsubb_co = opencl_test_instruction (hashcat_ctx, device_param->opencl_context, device_param->opencl_device, "__kernel void test () { uint r1; __asm__ __volatile__ (\"V_SUBB_CO_U32 %0, vcc, 0, 0, vcc;\" : \"=v\"(r1)); }"); \
device_param->has_vadd3 = opencl_test_instruction (hashcat_ctx, context, device_param->opencl_device, "__kernel void test () { uint r1; __asm__ __volatile__ (\"V_ADD3_U32 %0, 0, 0, 0;\" : \"=v\"(r1)); }"); \ device_param->has_vadd3 = opencl_test_instruction (hashcat_ctx, device_param->opencl_context, device_param->opencl_device, "__kernel void test () { uint r1; __asm__ __volatile__ (\"V_ADD3_U32 %0, 0, 0, 0;\" : \"=v\"(r1)); }"); \
device_param->has_vbfe = opencl_test_instruction (hashcat_ctx, context, device_param->opencl_device, "__kernel void test () { uint r1; __asm__ __volatile__ (\"V_BFE_U32 %0, 0, 0, 0;\" : \"=v\"(r1)); }"); \ device_param->has_vbfe = opencl_test_instruction (hashcat_ctx, device_param->opencl_context, device_param->opencl_device, "__kernel void test () { uint r1; __asm__ __volatile__ (\"V_BFE_U32 %0, 0, 0, 0;\" : \"=v\"(r1)); }"); \
device_param->has_vperm = opencl_test_instruction (hashcat_ctx, context, device_param->opencl_device, "__kernel void test () { uint r1; __asm__ __volatile__ (\"V_PERM_B32 %0, 0, 0, 0;\" : \"=v\"(r1)); }"); \ device_param->has_vperm = opencl_test_instruction (hashcat_ctx, device_param->opencl_context, device_param->opencl_device, "__kernel void test () { uint r1; __asm__ __volatile__ (\"V_PERM_B32 %0, 0, 0, 0;\" : \"=v\"(r1)); }"); \
if (backend_devices_idx > 0) if (backend_devices_idx > 0)
{ {
@ -8979,7 +8935,7 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
OCL_PTR *ocl = (OCL_PTR *) backend_ctx->ocl; OCL_PTR *ocl = (OCL_PTR *) backend_ctx->ocl;
tmp_device[c] = ocl->clCreateBuffer (context, CL_MEM_READ_WRITE, MAX_ALLOC_CHECKS_SIZE, NULL, &CL_err); tmp_device[c] = ocl->clCreateBuffer (device_param->opencl_context, CL_MEM_READ_WRITE, MAX_ALLOC_CHECKS_SIZE, NULL, &CL_err);
if (CL_err != CL_SUCCESS) if (CL_err != CL_SUCCESS)
{ {
@ -8992,11 +8948,11 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
u8 tmp_host[8]; u8 tmp_host[8];
if (ocl->clEnqueueReadBuffer (command_queue, tmp_device[c], CL_TRUE, 0, sizeof (tmp_host), tmp_host, 0, NULL, NULL) != CL_SUCCESS) break; if (ocl->clEnqueueReadBuffer (device_param->opencl_command_queue, tmp_device[c], CL_TRUE, 0, sizeof (tmp_host), tmp_host, 0, NULL, NULL) != CL_SUCCESS) break;
if (ocl->clEnqueueWriteBuffer (command_queue, tmp_device[c], CL_TRUE, 0, sizeof (tmp_host), tmp_host, 0, NULL, NULL) != CL_SUCCESS) break; if (ocl->clEnqueueWriteBuffer (device_param->opencl_command_queue, tmp_device[c], CL_TRUE, 0, sizeof (tmp_host), tmp_host, 0, NULL, NULL) != CL_SUCCESS) break;
if (ocl->clEnqueueReadBuffer (command_queue, tmp_device[c], CL_TRUE, MAX_ALLOC_CHECKS_SIZE - sizeof (tmp_host), sizeof (tmp_host), tmp_host, 0, NULL, NULL) != CL_SUCCESS) break; if (ocl->clEnqueueReadBuffer (device_param->opencl_command_queue, tmp_device[c], CL_TRUE, MAX_ALLOC_CHECKS_SIZE - sizeof (tmp_host), sizeof (tmp_host), tmp_host, 0, NULL, NULL) != CL_SUCCESS) break;
if (ocl->clEnqueueWriteBuffer (command_queue, tmp_device[c], CL_TRUE, MAX_ALLOC_CHECKS_SIZE - sizeof (tmp_host), sizeof (tmp_host), tmp_host, 0, NULL, NULL) != CL_SUCCESS) break; if (ocl->clEnqueueWriteBuffer (device_param->opencl_command_queue, tmp_device[c], CL_TRUE, MAX_ALLOC_CHECKS_SIZE - sizeof (tmp_host), sizeof (tmp_host), tmp_host, 0, NULL, NULL) != CL_SUCCESS) break;
} }
device_param->device_available_mem = MAX_ALLOC_CHECKS_SIZE; device_param->device_available_mem = MAX_ALLOC_CHECKS_SIZE;
@ -9008,23 +8964,27 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
// clean up // clean up
int r = 0;
for (c = 0; c < MAX_ALLOC_CHECKS_CNT; c++) for (c = 0; c < MAX_ALLOC_CHECKS_CNT; c++)
{ {
if (((c + 1 + 1) * MAX_ALLOC_CHECKS_SIZE) >= device_param->device_global_mem) break; if (((c + 1 + 1) * MAX_ALLOC_CHECKS_SIZE) >= device_param->device_global_mem) break;
if (tmp_device[c] != NULL) if (tmp_device[c] != NULL)
{ {
if (hc_clReleaseMemObject (hashcat_ctx, tmp_device[c]) == -1) return -1; if (hc_clReleaseMemObject (hashcat_ctx, tmp_device[c]) == -1) r = -1;
} }
} }
hcfree (tmp_device); hcfree (tmp_device);
if (r == -1)
{
// return -1 here is blocking, to be better evaluated
//return -1;
}
} }
} }
hc_clReleaseCommandQueue (hashcat_ctx, command_queue);
hc_clReleaseContext (hashcat_ctx, context);
if (device_param->device_host_unified_memory == 1) if (device_param->device_host_unified_memory == 1)
{ {
@ -9065,23 +9025,61 @@ void backend_ctx_devices_destroy (hashcat_ctx_t *hashcat_ctx)
hcfree (backend_ctx->opencl_platforms_version[opencl_platforms_idx]); hcfree (backend_ctx->opencl_platforms_version[opencl_platforms_idx]);
} }
// one-time release context/command-queue from all runtimes
for (int backend_devices_idx = 0; backend_devices_idx < backend_ctx->backend_devices_cnt; backend_devices_idx++) for (int backend_devices_idx = 0; backend_devices_idx < backend_ctx->backend_devices_cnt; backend_devices_idx++)
{ {
hc_device_param_t *device_param = &backend_ctx->devices_param[backend_devices_idx]; hc_device_param_t *device_param = &backend_ctx->devices_param[backend_devices_idx];
hcfree (device_param->device_name); hcfree (device_param->device_name);
if (device_param->is_cuda == true)
{
if (device_param->cuda_context)
{
hc_cuCtxDestroy (hashcat_ctx, device_param->cuda_context);
device_param->cuda_context = NULL;
}
}
if (device_param->is_hip == true)
{
hcfree (device_param->gcnArchName);
}
#if defined (__APPLE__)
if (device_param->is_metal == true)
{
if (device_param->metal_command_queue)
{
hc_mtlReleaseCommandQueue (hashcat_ctx, device_param->metal_command_queue);
device_param->metal_command_queue = NULL;
}
}
#endif
if (device_param->is_opencl == true) if (device_param->is_opencl == true)
{ {
hcfree (device_param->opencl_driver_version); hcfree (device_param->opencl_driver_version);
hcfree (device_param->opencl_device_version); hcfree (device_param->opencl_device_version);
hcfree (device_param->opencl_device_c_version); hcfree (device_param->opencl_device_c_version);
hcfree (device_param->opencl_device_vendor); hcfree (device_param->opencl_device_vendor);
if (device_param->opencl_command_queue)
{
hc_clReleaseCommandQueue (hashcat_ctx, device_param->opencl_command_queue);
device_param->opencl_command_queue = NULL;
} }
if (device_param->is_hip == true) if (device_param->opencl_context)
{ {
hcfree (device_param->gcnArchName); hc_clReleaseContext (hashcat_ctx, device_param->opencl_context);
device_param->opencl_context = NULL;
}
} }
} }
@ -10730,93 +10728,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
} }
#endif #endif
/** // re-using context/command-queue, there is no need to re-initialize them
* create context for each device
*/
if (device_param->is_cuda == true)
{
if (hc_cuCtxCreate (hashcat_ctx, &device_param->cuda_context, CU_CTX_SCHED_BLOCKING_SYNC, device_param->cuda_device) == -1)
{
device_param->skipped = true;
continue;
}
if (hc_cuCtxPushCurrent (hashcat_ctx, device_param->cuda_context) == -1)
{
device_param->skipped = true;
continue;
}
}
if (device_param->is_hip == true)
{
if (hc_hipCtxCreate (hashcat_ctx, &device_param->hip_context, hipDeviceScheduleBlockingSync, device_param->hip_device) == -1)
{
device_param->skipped = true;
continue;
}
if (hc_hipCtxPushCurrent (hashcat_ctx, device_param->hip_context) == -1)
{
device_param->skipped = true;
continue;
}
}
#if defined (__APPLE__)
if (device_param->is_metal == true)
{
/**
* create command-queue
*/
if (hc_mtlCreateCommandQueue (hashcat_ctx, device_param->metal_device, &device_param->metal_command_queue) == -1)
{
device_param->skipped = true;
continue;
}
}
#endif
if (device_param->is_opencl == true)
{
/*
cl_context_properties properties[3];
properties[0] = CL_CONTEXT_PLATFORM;
properties[1] = (cl_context_properties) device_param->opencl_platform;
properties[2] = 0;
CL_rc = hc_clCreateContext (hashcat_ctx, properties, 1, &device_param->opencl_device, NULL, NULL, &device_param->opencl_context);
*/
if (hc_clCreateContext (hashcat_ctx, NULL, 1, &device_param->opencl_device, NULL, NULL, &device_param->opencl_context) == -1)
{
device_param->skipped = true;
continue;
}
/**
* create command-queue
*/
// not supported with NV
// device_param->opencl_command_queue = hc_clCreateCommandQueueWithProperties (hashcat_ctx, device_param->opencl_device, NULL);
if (hc_clCreateCommandQueue (hashcat_ctx, device_param->opencl_context, device_param->opencl_device, CL_QUEUE_PROFILING_ENABLE, &device_param->opencl_command_queue) == -1)
{
device_param->skipped = true;
continue;
}
}
/** /**
* create stream for CUDA devices * create stream for CUDA devices
@ -10824,6 +10736,13 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
if (device_param->is_cuda == true) if (device_param->is_cuda == true)
{ {
if (hc_cuCtxPushCurrent (hashcat_ctx, device_param->cuda_context) == -1)
{
device_param->skipped = true;
continue;
}
if (hc_cuStreamCreate (hashcat_ctx, &device_param->cuda_stream, CU_STREAM_DEFAULT) == -1) if (hc_cuStreamCreate (hashcat_ctx, &device_param->cuda_stream, CU_STREAM_DEFAULT) == -1)
{ {
device_param->skipped = true; device_param->skipped = true;
@ -10838,7 +10757,14 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
if (device_param->is_hip == true) if (device_param->is_hip == true)
{ {
if (hc_hipStreamCreate (hashcat_ctx, &device_param->hip_stream, hipStreamDefault) == -1) if (hc_hipSetDevice (hashcat_ctx, device_param->hip_device) == -1)
{
device_param->skipped = true;
continue;
}
if (hc_hipStreamCreateWithFlags (hashcat_ctx, &device_param->hip_stream, hipStreamDefault) == -1)
{ {
device_param->skipped = true; device_param->skipped = true;
@ -10880,21 +10806,21 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
if (device_param->is_hip == true) if (device_param->is_hip == true)
{ {
if (hc_hipEventCreate (hashcat_ctx, &device_param->hip_event1, hipEventBlockingSync) == -1) if (hc_hipEventCreateWithFlags (hashcat_ctx, &device_param->hip_event1, hipEventBlockingSync) == -1)
{ {
device_param->skipped = true; device_param->skipped = true;
continue; continue;
} }
if (hc_hipEventCreate (hashcat_ctx, &device_param->hip_event2, hipEventBlockingSync) == -1) if (hc_hipEventCreateWithFlags (hashcat_ctx, &device_param->hip_event2, hipEventBlockingSync) == -1)
{ {
device_param->skipped = true; device_param->skipped = true;
continue; continue;
} }
if (hc_hipEventCreate (hashcat_ctx, &device_param->hip_event3, hipEventDisableTiming) == -1) if (hc_hipEventCreateWithFlags (hashcat_ctx, &device_param->hip_event3, hipEventDisableTiming) == -1)
{ {
device_param->skipped = true; device_param->skipped = true;
@ -16927,16 +16853,6 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
} }
} }
if (device_param->is_hip == true)
{
if (hc_hipCtxPopCurrent (hashcat_ctx, &device_param->hip_context) == -1)
{
device_param->skipped = true;
continue;
}
}
hardware_power_all += hardware_power_max; hardware_power_all += hardware_power_max;
EVENT_DATA (EVENT_BACKEND_DEVICE_INIT_POST, &backend_devices_idx, sizeof (int)); EVENT_DATA (EVENT_BACKEND_DEVICE_INIT_POST, &backend_devices_idx, sizeof (int));
@ -17058,7 +16974,7 @@ void backend_session_destroy (hashcat_ctx_t *hashcat_ctx)
if (device_param->cuda_module_amp) hc_cuModuleUnload (hashcat_ctx, device_param->cuda_module_amp); if (device_param->cuda_module_amp) hc_cuModuleUnload (hashcat_ctx, device_param->cuda_module_amp);
if (device_param->cuda_module_shared) hc_cuModuleUnload (hashcat_ctx, device_param->cuda_module_shared); if (device_param->cuda_module_shared) hc_cuModuleUnload (hashcat_ctx, device_param->cuda_module_shared);
if (device_param->cuda_context) hc_cuCtxDestroy (hashcat_ctx, device_param->cuda_context); //if (device_param->cuda_context) hc_cuCtxDestroy (hashcat_ctx, device_param->cuda_context);
device_param->cuda_d_pws_buf = 0; device_param->cuda_d_pws_buf = 0;
device_param->cuda_d_pws_amp_buf = 0; device_param->cuda_d_pws_amp_buf = 0;
@ -17135,7 +17051,7 @@ void backend_session_destroy (hashcat_ctx_t *hashcat_ctx)
device_param->cuda_module_amp = NULL; device_param->cuda_module_amp = NULL;
device_param->cuda_module_shared = NULL; device_param->cuda_module_shared = NULL;
device_param->cuda_context = NULL; //device_param->cuda_context = NULL;
} }
if (device_param->is_hip == true) if (device_param->is_hip == true)
@ -17189,8 +17105,6 @@ void backend_session_destroy (hashcat_ctx_t *hashcat_ctx)
if (device_param->hip_module_amp) hc_hipModuleUnload (hashcat_ctx, device_param->hip_module_amp); if (device_param->hip_module_amp) hc_hipModuleUnload (hashcat_ctx, device_param->hip_module_amp);
if (device_param->hip_module_shared) hc_hipModuleUnload (hashcat_ctx, device_param->hip_module_shared); if (device_param->hip_module_shared) hc_hipModuleUnload (hashcat_ctx, device_param->hip_module_shared);
if (device_param->hip_context) hc_hipCtxDestroy (hashcat_ctx, device_param->hip_context);
device_param->hip_d_pws_buf = 0; device_param->hip_d_pws_buf = 0;
device_param->hip_d_pws_amp_buf = 0; device_param->hip_d_pws_amp_buf = 0;
device_param->hip_d_pws_comp_buf = 0; device_param->hip_d_pws_comp_buf = 0;
@ -17265,8 +17179,6 @@ void backend_session_destroy (hashcat_ctx_t *hashcat_ctx)
device_param->hip_module_mp = NULL; device_param->hip_module_mp = NULL;
device_param->hip_module_amp = NULL; device_param->hip_module_amp = NULL;
device_param->hip_module_shared = NULL; device_param->hip_module_shared = NULL;
device_param->hip_context = NULL;
} }
#if defined (__APPLE__) #if defined (__APPLE__)
@ -17341,7 +17253,7 @@ void backend_session_destroy (hashcat_ctx_t *hashcat_ctx)
if (device_param->metal_library_amp) hc_mtlReleaseLibrary (hashcat_ctx, device_param->metal_library_amp); if (device_param->metal_library_amp) hc_mtlReleaseLibrary (hashcat_ctx, device_param->metal_library_amp);
if (device_param->metal_library_shared) hc_mtlReleaseLibrary (hashcat_ctx, device_param->metal_library_shared); if (device_param->metal_library_shared) hc_mtlReleaseLibrary (hashcat_ctx, device_param->metal_library_shared);
if (device_param->metal_command_queue) hc_mtlReleaseCommandQueue (hashcat_ctx, device_param->metal_command_queue); //if (device_param->metal_command_queue) hc_mtlReleaseCommandQueue (hashcat_ctx, device_param->metal_command_queue);
//if (device_param->metal_device) hc_mtlReleaseDevice (hashcat_ctx, device_param->metal_device); //if (device_param->metal_device) hc_mtlReleaseDevice (hashcat_ctx, device_param->metal_device);
@ -17411,7 +17323,7 @@ void backend_session_destroy (hashcat_ctx_t *hashcat_ctx)
device_param->metal_library_mp = NULL; device_param->metal_library_mp = NULL;
device_param->metal_library_amp = NULL; device_param->metal_library_amp = NULL;
device_param->metal_library_shared = NULL; device_param->metal_library_shared = NULL;
device_param->metal_command_queue = NULL; //device_param->metal_command_queue = NULL;
//device_param->metal_device = NULL; //device_param->metal_device = NULL;
} }
#endif // __APPLE__ #endif // __APPLE__
@ -17487,9 +17399,9 @@ void backend_session_destroy (hashcat_ctx_t *hashcat_ctx)
if (device_param->opencl_program_amp) hc_clReleaseProgram (hashcat_ctx, device_param->opencl_program_amp); if (device_param->opencl_program_amp) hc_clReleaseProgram (hashcat_ctx, device_param->opencl_program_amp);
if (device_param->opencl_program_shared) hc_clReleaseProgram (hashcat_ctx, device_param->opencl_program_shared); if (device_param->opencl_program_shared) hc_clReleaseProgram (hashcat_ctx, device_param->opencl_program_shared);
if (device_param->opencl_command_queue) hc_clReleaseCommandQueue (hashcat_ctx, device_param->opencl_command_queue); //if (device_param->opencl_command_queue) hc_clReleaseCommandQueue (hashcat_ctx, device_param->opencl_command_queue);
if (device_param->opencl_context) hc_clReleaseContext (hashcat_ctx, device_param->opencl_context); //if (device_param->opencl_context) hc_clReleaseContext (hashcat_ctx, device_param->opencl_context);
device_param->opencl_d_pws_buf = NULL; device_param->opencl_d_pws_buf = NULL;
device_param->opencl_d_pws_amp_buf = NULL; device_param->opencl_d_pws_amp_buf = NULL;
@ -17557,8 +17469,8 @@ void backend_session_destroy (hashcat_ctx_t *hashcat_ctx)
device_param->opencl_program_mp = NULL; device_param->opencl_program_mp = NULL;
device_param->opencl_program_amp = NULL; device_param->opencl_program_amp = NULL;
device_param->opencl_program_shared = NULL; device_param->opencl_program_shared = NULL;
device_param->opencl_command_queue = NULL; //device_param->opencl_command_queue = NULL;
device_param->opencl_context = NULL; //device_param->opencl_context = NULL;
} }
device_param->h_tmps = NULL; device_param->h_tmps = NULL;

View File

@ -381,7 +381,7 @@ HC_API_CALL void *thread_calc_stdin (void *p)
if (device_param->is_hip == true) if (device_param->is_hip == true)
{ {
if (hc_hipCtxPushCurrent (hashcat_ctx, device_param->hip_context) == -1) return NULL; if (hc_hipSetDevice (hashcat_ctx, device_param->hip_device) == -1) return NULL;
} }
if (calc_stdin (hashcat_ctx, device_param) == -1) if (calc_stdin (hashcat_ctx, device_param) == -1)
@ -396,11 +396,6 @@ HC_API_CALL void *thread_calc_stdin (void *p)
if (hc_cuCtxPopCurrent (hashcat_ctx, &device_param->cuda_context) == -1) return NULL; if (hc_cuCtxPopCurrent (hashcat_ctx, &device_param->cuda_context) == -1) return NULL;
} }
if (device_param->is_hip == true)
{
if (hc_hipCtxPopCurrent (hashcat_ctx, &device_param->hip_context) == -1) return NULL;
}
if (bridge_ctx->enabled == true) if (bridge_ctx->enabled == true)
{ {
if (bridge_ctx->thread_term != BRIDGE_DEFAULT) if (bridge_ctx->thread_term != BRIDGE_DEFAULT)
@ -1685,7 +1680,7 @@ HC_API_CALL void *thread_calc (void *p)
if (device_param->is_hip == true) if (device_param->is_hip == true)
{ {
if (hc_hipCtxPushCurrent (hashcat_ctx, device_param->hip_context) == -1) return NULL; if (hc_hipSetDevice (hashcat_ctx, device_param->hip_device) == -1) return NULL;
} }
if (calc (hashcat_ctx, device_param) == -1) if (calc (hashcat_ctx, device_param) == -1)
@ -1700,11 +1695,6 @@ HC_API_CALL void *thread_calc (void *p)
if (hc_cuCtxPopCurrent (hashcat_ctx, &device_param->cuda_context) == -1) return NULL; if (hc_cuCtxPopCurrent (hashcat_ctx, &device_param->cuda_context) == -1) return NULL;
} }
if (device_param->is_hip == true)
{
if (hc_hipCtxPopCurrent (hashcat_ctx, &device_param->hip_context) == -1) return NULL;
}
if (bridge_ctx->enabled == true) if (bridge_ctx->enabled == true)
{ {
if (bridge_ctx->thread_term != BRIDGE_DEFAULT) if (bridge_ctx->thread_term != BRIDGE_DEFAULT)

View File

@ -127,7 +127,8 @@ int hip_init (void *hashcat_ctx)
HC_LOAD_FUNC_HIP (hip, hipDeviceGetName, hipDeviceGetName, HIP_HIPDEVICEGETNAME, HIP, 1); HC_LOAD_FUNC_HIP (hip, hipDeviceGetName, hipDeviceGetName, HIP_HIPDEVICEGETNAME, HIP, 1);
HC_LOAD_FUNC_HIP (hip, hipDeviceTotalMem, hipDeviceTotalMem, HIP_HIPDEVICETOTALMEM, HIP, 1); HC_LOAD_FUNC_HIP (hip, hipDeviceTotalMem, hipDeviceTotalMem, HIP_HIPDEVICETOTALMEM, HIP, 1);
HC_LOAD_FUNC_HIP (hip, hipDriverGetVersion, hipDriverGetVersion, HIP_HIPDRIVERGETVERSION, HIP, 1); HC_LOAD_FUNC_HIP (hip, hipDriverGetVersion, hipDriverGetVersion, HIP_HIPDRIVERGETVERSION, HIP, 1);
HC_LOAD_FUNC_HIP (hip, hipEventCreate, hipEventCreateWithFlags, HIP_HIPEVENTCREATE, HIP, 1); HC_LOAD_FUNC_HIP (hip, hipEventCreate, hipEventCreate, HIP_HIPEVENTCREATE, HIP, 1);
HC_LOAD_FUNC_HIP (hip, hipEventCreateWithFlags, hipEventCreateWithFlags, HIP_HIPEVENTCREATEWITHFLAGS, HIP, 1);
HC_LOAD_FUNC_HIP (hip, hipEventDestroy, hipEventDestroy, HIP_HIPEVENTDESTROY, HIP, 1); HC_LOAD_FUNC_HIP (hip, hipEventDestroy, hipEventDestroy, HIP_HIPEVENTDESTROY, HIP, 1);
HC_LOAD_FUNC_HIP (hip, hipEventElapsedTime, hipEventElapsedTime, HIP_HIPEVENTELAPSEDTIME, HIP, 1); HC_LOAD_FUNC_HIP (hip, hipEventElapsedTime, hipEventElapsedTime, HIP_HIPEVENTELAPSEDTIME, HIP, 1);
HC_LOAD_FUNC_HIP (hip, hipEventRecord, hipEventRecord, HIP_HIPEVENTRECORD, HIP, 1); HC_LOAD_FUNC_HIP (hip, hipEventRecord, hipEventRecord, HIP_HIPEVENTRECORD, HIP, 1);
@ -155,7 +156,10 @@ int hip_init (void *hashcat_ctx)
HC_LOAD_FUNC_HIP (hip, hipModuleLoadDataEx, hipModuleLoadDataEx, HIP_HIPMODULELOADDATAEX, HIP, 1); HC_LOAD_FUNC_HIP (hip, hipModuleLoadDataEx, hipModuleLoadDataEx, HIP_HIPMODULELOADDATAEX, HIP, 1);
HC_LOAD_FUNC_HIP (hip, hipModuleUnload, hipModuleUnload, HIP_HIPMODULEUNLOAD, HIP, 1); HC_LOAD_FUNC_HIP (hip, hipModuleUnload, hipModuleUnload, HIP_HIPMODULEUNLOAD, HIP, 1);
HC_LOAD_FUNC_HIP (hip, hipRuntimeGetVersion, hipRuntimeGetVersion, HIP_HIPRUNTIMEGETVERSION, HIP, 1); HC_LOAD_FUNC_HIP (hip, hipRuntimeGetVersion, hipRuntimeGetVersion, HIP_HIPRUNTIMEGETVERSION, HIP, 1);
HC_LOAD_FUNC_HIP (hip, hipSetDevice, hipSetDevice, HIP_HIPSETDEVICE, HIP, 1);
HC_LOAD_FUNC_HIP (hip, hipSetDeviceFlags, hipSetDeviceFlags, HIP_HIPSETDEVICEFLAGS, HIP, 1);
HC_LOAD_FUNC_HIP (hip, hipStreamCreate, hipStreamCreate, HIP_HIPSTREAMCREATE, HIP, 1); HC_LOAD_FUNC_HIP (hip, hipStreamCreate, hipStreamCreate, HIP_HIPSTREAMCREATE, HIP, 1);
HC_LOAD_FUNC_HIP (hip, hipStreamCreateWithFlags, hipStreamCreateWithFlags, HIP_HIPSTREAMCREATEWITHFLAGS, HIP, 1);
HC_LOAD_FUNC_HIP (hip, hipStreamDestroy, hipStreamDestroy, HIP_HIPSTREAMDESTROY, HIP, 1); HC_LOAD_FUNC_HIP (hip, hipStreamDestroy, hipStreamDestroy, HIP_HIPSTREAMDESTROY, HIP, 1);
HC_LOAD_FUNC_HIP (hip, hipStreamSynchronize, hipStreamSynchronize, HIP_HIPSTREAMSYNCHRONIZE, HIP, 1); HC_LOAD_FUNC_HIP (hip, hipStreamSynchronize, hipStreamSynchronize, HIP_HIPSTREAMSYNCHRONIZE, HIP, 1);
HC_LOAD_FUNC_HIP (hip, hipGetDeviceProperties, hipGetDevicePropertiesR0600, HIP_HIPGETDEVICEPROPERTIES, HIP, 1); HC_LOAD_FUNC_HIP (hip, hipGetDeviceProperties, hipGetDevicePropertiesR0600, HIP_HIPGETDEVICEPROPERTIES, HIP, 1);
@ -507,13 +511,13 @@ int hc_hipDriverGetVersion (void *hashcat_ctx, int *driverVersion)
return 0; return 0;
} }
int hc_hipEventCreate (void *hashcat_ctx, hipEvent_t *phEvent, unsigned int Flags) int hc_hipEventCreate (void *hashcat_ctx, hipEvent_t *phEvent)
{ {
backend_ctx_t *backend_ctx = ((hashcat_ctx_t *) hashcat_ctx)->backend_ctx; backend_ctx_t *backend_ctx = ((hashcat_ctx_t *) hashcat_ctx)->backend_ctx;
HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip; HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip;
const hipError_t HIP_err = hip->hipEventCreate (phEvent, Flags); const hipError_t HIP_err = hip->hipEventCreate (phEvent);
if (HIP_err != hipSuccess) if (HIP_err != hipSuccess)
{ {
@ -534,6 +538,33 @@ int hc_hipEventCreate (void *hashcat_ctx, hipEvent_t *phEvent, unsigned int Flag
return 0; return 0;
} }
int hc_hipEventCreateWithFlags (void *hashcat_ctx, hipEvent_t *phEvent, unsigned int flags)
{
backend_ctx_t *backend_ctx = ((hashcat_ctx_t *) hashcat_ctx)->backend_ctx;
HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip;
const hipError_t HIP_err = hip->hipEventCreateWithFlags (phEvent, flags);
if (HIP_err != hipSuccess)
{
const char *pStr = NULL;
if (hip->hipGetErrorString (HIP_err, &pStr) == hipSuccess)
{
event_log_error (hashcat_ctx, "hipEventCreateWithFlags(): %s", pStr);
}
else
{
event_log_error (hashcat_ctx, "hipEventCreateWithFlags(): %d", HIP_err);
}
return -1;
}
return 0;
}
int hc_hipEventDestroy (void *hashcat_ctx, hipEvent_t hEvent) int hc_hipEventDestroy (void *hashcat_ctx, hipEvent_t hEvent)
{ {
backend_ctx_t *backend_ctx = ((hashcat_ctx_t *) hashcat_ctx)->backend_ctx; backend_ctx_t *backend_ctx = ((hashcat_ctx_t *) hashcat_ctx)->backend_ctx;
@ -1211,13 +1242,67 @@ int hc_hipRuntimeGetVersion (void *hashcat_ctx, int *runtimeVersion)
return 0; return 0;
} }
int hc_hipStreamCreate (void *hashcat_ctx, hipStream_t *phStream, unsigned int Flags) int hc_hipSetDevice (void *hashcat_ctx, hipDevice_t dev)
{ {
backend_ctx_t *backend_ctx = ((hashcat_ctx_t *) hashcat_ctx)->backend_ctx; backend_ctx_t *backend_ctx = ((hashcat_ctx_t *) hashcat_ctx)->backend_ctx;
HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip; HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip;
const hipError_t HIP_err = hip->hipStreamCreate (phStream, Flags); const hipError_t HIP_err = hip->hipSetDevice (dev);
if (HIP_err != hipSuccess)
{
const char *pStr = NULL;
if (hip->hipGetErrorString (HIP_err, &pStr) == hipSuccess)
{
event_log_error (hashcat_ctx, "hipSetDevice(): %s", pStr);
}
else
{
event_log_error (hashcat_ctx, "hipSetDevice(): %d", HIP_err);
}
return -1;
}
return 0;
}
int hc_hipSetDeviceFlags (void *hashcat_ctx, unsigned int flags)
{
backend_ctx_t *backend_ctx = ((hashcat_ctx_t *) hashcat_ctx)->backend_ctx;
HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip;
const hipError_t HIP_err = hip->hipSetDeviceFlags (flags);
if (HIP_err != hipSuccess)
{
const char *pStr = NULL;
if (hip->hipGetErrorString (HIP_err, &pStr) == hipSuccess)
{
event_log_error (hashcat_ctx, "hipSetDeviceFlags(): %s", pStr);
}
else
{
event_log_error (hashcat_ctx, "hipSetDeviceFlags(): %d", HIP_err);
}
return -1;
}
return 0;
}
int hc_hipStreamCreate (void *hashcat_ctx, hipStream_t *phStream)
{
backend_ctx_t *backend_ctx = ((hashcat_ctx_t *) hashcat_ctx)->backend_ctx;
HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip;
const hipError_t HIP_err = hip->hipStreamCreate (phStream);
if (HIP_err != hipSuccess) if (HIP_err != hipSuccess)
{ {
@ -1238,6 +1323,33 @@ int hc_hipStreamCreate (void *hashcat_ctx, hipStream_t *phStream, unsigned int F
return 0; return 0;
} }
int hc_hipStreamCreateWithFlags (void *hashcat_ctx, hipStream_t *phStream, unsigned int Flags)
{
backend_ctx_t *backend_ctx = ((hashcat_ctx_t *) hashcat_ctx)->backend_ctx;
HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip;
const hipError_t HIP_err = hip->hipStreamCreateWithFlags (phStream, Flags);
if (HIP_err != hipSuccess)
{
const char *pStr = NULL;
if (hip->hipGetErrorString (HIP_err, &pStr) == hipSuccess)
{
event_log_error (hashcat_ctx, "hipStreamCreateWithFlags(): %s", pStr);
}
else
{
event_log_error (hashcat_ctx, "hipStreamCreateWithFlags(): %d", HIP_err);
}
return -1;
}
return 0;
}
int hc_hipStreamDestroy (void *hashcat_ctx, hipStream_t hStream) int hc_hipStreamDestroy (void *hashcat_ctx, hipStream_t hStream)
{ {
backend_ctx_t *backend_ctx = ((hashcat_ctx_t *) hashcat_ctx)->backend_ctx; backend_ctx_t *backend_ctx = ((hashcat_ctx_t *) hashcat_ctx)->backend_ctx;

View File

@ -1272,7 +1272,7 @@ HC_API_CALL void *thread_selftest (void *p)
if (device_param->is_hip == true) if (device_param->is_hip == true)
{ {
if (hc_hipCtxPushCurrent (hashcat_ctx, device_param->hip_context) == -1) return NULL; if (hc_hipSetDevice (hashcat_ctx, device_param->hip_device) == -1) return NULL;
} }
const int rc_selftest = process_selftest (hashcat_ctx, device_param); const int rc_selftest = process_selftest (hashcat_ctx, device_param);
@ -1303,8 +1303,6 @@ HC_API_CALL void *thread_selftest (void *p)
if (device_param->is_hip == true) if (device_param->is_hip == true)
{ {
if (hc_hipStreamSynchronize (hashcat_ctx, device_param->hip_stream) == -1) return NULL; if (hc_hipStreamSynchronize (hashcat_ctx, device_param->hip_stream) == -1) return NULL;
if (hc_hipCtxPopCurrent (hashcat_ctx, &device_param->hip_context) == -1) return NULL;
} }
if (bridge_ctx->enabled == true) if (bridge_ctx->enabled == true)