mirror of
https://github.com/hashcat/hashcat.git
synced 2025-07-06 06:42:35 +00:00

- Integrated occupancy hints from vendor APIs (CUDA, HIP) to set a dynamic threads-per-block limit per kernel instead of using static values. - Added `find_tuning_function()` to identify the relevant kernel. - Autotuner now runs in three stages: threads -> loops -> accel. The first two stages now stop increasing when the tested kernel runtime gets too close to the target runtime (96ms for `-w 3`), leaving headroom for the next stage to adjust in a finer sense. - Accel tuning now uses a capped floating-point multiplier instead of powers of two. - Removed workarounds for missing thread autotuning in plugins. - Removed the hardcoded 4GiB host memory limit for accel. Added a cross-platform `get_free_memory()` to check actual free RAM during GPU initialization, preventing underutilization of high-end GPUs like the 4090. If needed, users can still cap memory usage with `-T` or `-n`. - Updated enums for ROCm 6.4.x and CUDA 12.9. - Added code to detect kernel register spilling. That's relevant so we can keep free enough global memory on the runtime for the runtime to handle spills efficiently.
700 lines
47 KiB
C
700 lines
47 KiB
C
/**
|
|
* Author......: See docs/credits.txt
|
|
* License.....: MIT
|
|
*/
|
|
|
|
#ifndef HC_EXT_HIP_H
|
|
#define HC_EXT_HIP_H
|
|
|
|
// The general Idea with HIP is to use it for AMD GPU since we use CUDA for NV
|
|
// Therefore, we need to take certain items, such as hipDeviceptr_t from driver specific paths like amd_driver_types.h
|
|
// We just need to keep this in mind in case we need to update these constants from future SDK versions
|
|
|
|
// start: driver_types.h
|
|
|
|
typedef void* hipDeviceptr_t;
|
|
|
|
typedef enum hipFunction_attribute {
|
|
HIP_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK, ///< The maximum number of threads per block. Depends on function and device.
|
|
HIP_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES, ///< The statically allocated shared memory size in bytes per block required by the function.
|
|
HIP_FUNC_ATTRIBUTE_CONST_SIZE_BYTES, ///< The user-allocated constant memory by the function in bytes.
|
|
HIP_FUNC_ATTRIBUTE_LOCAL_SIZE_BYTES, ///< The local memory usage of each thread by this function in bytes.
|
|
HIP_FUNC_ATTRIBUTE_NUM_REGS, ///< The number of registers used by each thread of this function.
|
|
HIP_FUNC_ATTRIBUTE_PTX_VERSION, ///< PTX version
|
|
HIP_FUNC_ATTRIBUTE_BINARY_VERSION, ///< Binary version
|
|
HIP_FUNC_ATTRIBUTE_CACHE_MODE_CA, ///< Cache mode
|
|
HIP_FUNC_ATTRIBUTE_MAX_DYNAMIC_SHARED_SIZE_BYTES, ///< The maximum dynamic shared memory per block for this function in bytes.
|
|
HIP_FUNC_ATTRIBUTE_PREFERRED_SHARED_MEMORY_CARVEOUT, ///< The shared memory carveout preference in percent of the maximum shared memory.
|
|
HIP_FUNC_ATTRIBUTE_MAX
|
|
} hipFunction_attribute;
|
|
|
|
// stop: driver_types.h
|
|
|
|
// start: hip_runtime_api.h
|
|
|
|
typedef int hipDevice_t;
|
|
typedef struct ihipCtx_t* hipCtx_t;
|
|
typedef struct ihipEvent_t* hipEvent_t;
|
|
typedef struct ihipStream_t* hipStream_t;
|
|
typedef struct ihipModule_t* hipModule_t;
|
|
typedef struct ihipModuleSymbol_t* hipFunction_t;
|
|
|
|
// Ignoring error-code return values from hip APIs is discouraged. On C++17,
|
|
// we can make that yield a warning
|
|
#if __cplusplus >= 201703L
|
|
#define __HIP_NODISCARD [[nodiscard]]
|
|
#else
|
|
#define __HIP_NODISCARD
|
|
#endif
|
|
|
|
typedef enum __HIP_NODISCARD hipError_t {
|
|
hipSuccess = 0, ///< Successful completion.
|
|
hipErrorInvalidValue = 1, ///< One or more of the parameters passed to the API call is NULL
|
|
///< or not in an acceptable range.
|
|
hipErrorOutOfMemory = 2,
|
|
// Deprecated
|
|
hipErrorMemoryAllocation = 2, ///< Memory allocation error.
|
|
hipErrorNotInitialized = 3,
|
|
// Deprecated
|
|
hipErrorInitializationError = 3,
|
|
hipErrorDeinitialized = 4,
|
|
hipErrorProfilerDisabled = 5,
|
|
hipErrorProfilerNotInitialized = 6,
|
|
hipErrorProfilerAlreadyStarted = 7,
|
|
hipErrorProfilerAlreadyStopped = 8,
|
|
hipErrorInvalidConfiguration = 9,
|
|
hipErrorInvalidPitchValue = 12,
|
|
hipErrorInvalidSymbol = 13,
|
|
hipErrorInvalidDevicePointer = 17, ///< Invalid Device Pointer
|
|
hipErrorInvalidMemcpyDirection = 21, ///< Invalid memory copy direction
|
|
hipErrorInsufficientDriver = 35,
|
|
hipErrorMissingConfiguration = 52,
|
|
hipErrorPriorLaunchFailure = 53,
|
|
hipErrorInvalidDeviceFunction = 98,
|
|
hipErrorNoDevice = 100, ///< Call to hipGetDeviceCount returned 0 devices
|
|
hipErrorInvalidDevice = 101, ///< DeviceID must be in range 0...#compute-devices.
|
|
hipErrorInvalidImage = 200,
|
|
hipErrorInvalidContext = 201, ///< Produced when input context is invalid.
|
|
hipErrorContextAlreadyCurrent = 202,
|
|
hipErrorMapFailed = 205,
|
|
// Deprecated
|
|
hipErrorMapBufferObjectFailed = 205, ///< Produced when the IPC memory attach failed from ROCr.
|
|
hipErrorUnmapFailed = 206,
|
|
hipErrorArrayIsMapped = 207,
|
|
hipErrorAlreadyMapped = 208,
|
|
hipErrorNoBinaryForGpu = 209,
|
|
hipErrorAlreadyAcquired = 210,
|
|
hipErrorNotMapped = 211,
|
|
hipErrorNotMappedAsArray = 212,
|
|
hipErrorNotMappedAsPointer = 213,
|
|
hipErrorECCNotCorrectable = 214,
|
|
hipErrorUnsupportedLimit = 215,
|
|
hipErrorContextAlreadyInUse = 216,
|
|
hipErrorPeerAccessUnsupported = 217,
|
|
hipErrorInvalidKernelFile = 218, ///< In CUDA DRV, it is CUDA_ERROR_INVALID_PTX
|
|
hipErrorInvalidGraphicsContext = 219,
|
|
hipErrorInvalidSource = 300,
|
|
hipErrorFileNotFound = 301,
|
|
hipErrorSharedObjectSymbolNotFound = 302,
|
|
hipErrorSharedObjectInitFailed = 303,
|
|
hipErrorOperatingSystem = 304,
|
|
hipErrorInvalidHandle = 400,
|
|
// Deprecated
|
|
hipErrorInvalidResourceHandle = 400, ///< Resource handle (hipEvent_t or hipStream_t) invalid.
|
|
hipErrorIllegalState = 401, ///< Resource required is not in a valid state to perform operation.
|
|
hipErrorNotFound = 500,
|
|
hipErrorNotReady = 600, ///< Indicates that asynchronous operations enqueued earlier are not
|
|
///< ready. This is not actually an error, but is used to distinguish
|
|
///< from hipSuccess (which indicates completion). APIs that return
|
|
///< this error include hipEventQuery and hipStreamQuery.
|
|
hipErrorIllegalAddress = 700,
|
|
hipErrorLaunchOutOfResources = 701, ///< Out of resources error.
|
|
hipErrorLaunchTimeOut = 702,
|
|
hipErrorPeerAccessAlreadyEnabled =
|
|
704, ///< Peer access was already enabled from the current device.
|
|
hipErrorPeerAccessNotEnabled =
|
|
705, ///< Peer access was never enabled from the current device.
|
|
hipErrorSetOnActiveProcess = 708,
|
|
hipErrorContextIsDestroyed = 709,
|
|
hipErrorAssert = 710, ///< Produced when the kernel calls assert.
|
|
hipErrorHostMemoryAlreadyRegistered =
|
|
712, ///< Produced when trying to lock a page-locked memory.
|
|
hipErrorHostMemoryNotRegistered =
|
|
713, ///< Produced when trying to unlock a non-page-locked memory.
|
|
hipErrorLaunchFailure =
|
|
719, ///< An exception occurred on the device while executing a kernel.
|
|
hipErrorCooperativeLaunchTooLarge =
|
|
720, ///< This error indicates that the number of blocks launched per grid for a kernel
|
|
///< that was launched via cooperative launch APIs exceeds the maximum number of
|
|
///< allowed blocks for the current device
|
|
hipErrorNotSupported = 801, ///< Produced when the hip API is not supported/implemented
|
|
hipErrorStreamCaptureUnsupported = 900, ///< The operation is not permitted when the stream
|
|
///< is capturing.
|
|
hipErrorStreamCaptureInvalidated = 901, ///< The current capture sequence on the stream
|
|
///< has been invalidated due to a previous error.
|
|
hipErrorStreamCaptureMerge = 902, ///< The operation would have resulted in a merge of
|
|
///< two independent capture sequences.
|
|
hipErrorStreamCaptureUnmatched = 903, ///< The capture was not initiated in this stream.
|
|
hipErrorStreamCaptureUnjoined = 904, ///< The capture sequence contains a fork that was not
|
|
///< joined to the primary stream.
|
|
hipErrorStreamCaptureIsolation = 905, ///< A dependency would have been created which crosses
|
|
///< the capture sequence boundary. Only implicit
|
|
///< in-stream ordering dependencies are allowed
|
|
///< to cross the boundary
|
|
hipErrorStreamCaptureImplicit = 906, ///< The operation would have resulted in a disallowed
|
|
///< implicit dependency on a current capture sequence
|
|
///< from hipStreamLegacy.
|
|
hipErrorCapturedEvent = 907, ///< The operation is not permitted on an event which was last
|
|
///< recorded in a capturing stream.
|
|
hipErrorStreamCaptureWrongThread = 908, ///< A stream capture sequence not initiated with
|
|
///< the hipStreamCaptureModeRelaxed argument to
|
|
///< hipStreamBeginCapture was passed to
|
|
///< hipStreamEndCapture in a different thread.
|
|
hipErrorGraphExecUpdateFailure = 910, ///< This error indicates that the graph update
|
|
///< not performed because it included changes which
|
|
///< violated constraints specific to instantiated graph
|
|
///< update.
|
|
hipErrorUnknown = 999, //< Unknown error.
|
|
// HSA Runtime Error Codes start here.
|
|
hipErrorRuntimeMemory = 1052, ///< HSA runtime memory call returned error. Typically not seen
|
|
///< in production systems.
|
|
hipErrorRuntimeOther = 1053, ///< HSA runtime call other than memory returned error. Typically
|
|
///< not seen in production systems.
|
|
hipErrorTbd ///< Marker that more error codes are needed.
|
|
} hipError_t;
|
|
|
|
#undef __HIP_NODISCARD
|
|
|
|
|
|
/**
|
|
* hipDeviceAttribute_t
|
|
* hipDeviceAttributeUnused number: 5
|
|
*/
|
|
typedef enum hipDeviceAttribute_t {
|
|
hipDeviceAttributeCudaCompatibleBegin = 0,
|
|
|
|
hipDeviceAttributeEccEnabled = hipDeviceAttributeCudaCompatibleBegin, ///< Whether ECC support is enabled.
|
|
hipDeviceAttributeAccessPolicyMaxWindowSize, ///< Cuda only. The maximum size of the window policy in bytes.
|
|
hipDeviceAttributeAsyncEngineCount, ///< Asynchronous engines number.
|
|
hipDeviceAttributeCanMapHostMemory, ///< Whether host memory can be mapped into device address space
|
|
hipDeviceAttributeCanUseHostPointerForRegisteredMem,///< Device can access host registered memory
|
|
///< at the same virtual address as the CPU
|
|
hipDeviceAttributeClockRate, ///< Peak clock frequency in kilohertz.
|
|
hipDeviceAttributeComputeMode, ///< Compute mode that device is currently in.
|
|
hipDeviceAttributeComputePreemptionSupported, ///< Device supports Compute Preemption.
|
|
hipDeviceAttributeConcurrentKernels, ///< Device can possibly execute multiple kernels concurrently.
|
|
hipDeviceAttributeConcurrentManagedAccess, ///< Device can coherently access managed memory concurrently with the CPU
|
|
hipDeviceAttributeCooperativeLaunch, ///< Support cooperative launch
|
|
hipDeviceAttributeCooperativeMultiDeviceLaunch, ///< Support cooperative launch on multiple devices
|
|
hipDeviceAttributeDeviceOverlap, ///< Device can concurrently copy memory and execute a kernel.
|
|
///< Deprecated. Use instead asyncEngineCount.
|
|
hipDeviceAttributeDirectManagedMemAccessFromHost, ///< Host can directly access managed memory on
|
|
///< the device without migration
|
|
hipDeviceAttributeGlobalL1CacheSupported, ///< Device supports caching globals in L1
|
|
hipDeviceAttributeHostNativeAtomicSupported, ///< Link between the device and the host supports native atomic operations
|
|
hipDeviceAttributeIntegrated, ///< Device is integrated GPU
|
|
hipDeviceAttributeIsMultiGpuBoard, ///< Multiple GPU devices.
|
|
hipDeviceAttributeKernelExecTimeout, ///< Run time limit for kernels executed on the device
|
|
hipDeviceAttributeL2CacheSize, ///< Size of L2 cache in bytes. 0 if the device doesn't have L2 cache.
|
|
hipDeviceAttributeLocalL1CacheSupported, ///< caching locals in L1 is supported
|
|
hipDeviceAttributeLuid, ///< 8-byte locally unique identifier in 8 bytes. Undefined on TCC and non-Windows platforms
|
|
hipDeviceAttributeLuidDeviceNodeMask, ///< Luid device node mask. Undefined on TCC and non-Windows platforms
|
|
hipDeviceAttributeComputeCapabilityMajor, ///< Major compute capability version number.
|
|
hipDeviceAttributeManagedMemory, ///< Device supports allocating managed memory on this system
|
|
hipDeviceAttributeMaxBlocksPerMultiProcessor, ///< Max block size per multiprocessor
|
|
hipDeviceAttributeMaxBlockDimX, ///< Max block size in width.
|
|
hipDeviceAttributeMaxBlockDimY, ///< Max block size in height.
|
|
hipDeviceAttributeMaxBlockDimZ, ///< Max block size in depth.
|
|
hipDeviceAttributeMaxGridDimX, ///< Max grid size in width.
|
|
hipDeviceAttributeMaxGridDimY, ///< Max grid size in height.
|
|
hipDeviceAttributeMaxGridDimZ, ///< Max grid size in depth.
|
|
hipDeviceAttributeMaxSurface1D, ///< Maximum size of 1D surface.
|
|
hipDeviceAttributeMaxSurface1DLayered, ///< Cuda only. Maximum dimensions of 1D layered surface.
|
|
hipDeviceAttributeMaxSurface2D, ///< Maximum dimension (width, height) of 2D surface.
|
|
hipDeviceAttributeMaxSurface2DLayered, ///< Cuda only. Maximum dimensions of 2D layered surface.
|
|
hipDeviceAttributeMaxSurface3D, ///< Maximum dimension (width, height, depth) of 3D surface.
|
|
hipDeviceAttributeMaxSurfaceCubemap, ///< Cuda only. Maximum dimensions of Cubemap surface.
|
|
hipDeviceAttributeMaxSurfaceCubemapLayered, ///< Cuda only. Maximum dimension of Cubemap layered surface.
|
|
hipDeviceAttributeMaxTexture1DWidth, ///< Maximum size of 1D texture.
|
|
hipDeviceAttributeMaxTexture1DLayered, ///< Maximum dimensions of 1D layered texture.
|
|
hipDeviceAttributeMaxTexture1DLinear, ///< Maximum number of elements allocatable in a 1D linear texture.
|
|
///< Use cudaDeviceGetTexture1DLinearMaxWidth() instead on Cuda.
|
|
hipDeviceAttributeMaxTexture1DMipmap, ///< Maximum size of 1D mipmapped texture.
|
|
hipDeviceAttributeMaxTexture2DWidth, ///< Maximum dimension width of 2D texture.
|
|
hipDeviceAttributeMaxTexture2DHeight, ///< Maximum dimension hight of 2D texture.
|
|
hipDeviceAttributeMaxTexture2DGather, ///< Maximum dimensions of 2D texture if gather operations performed.
|
|
hipDeviceAttributeMaxTexture2DLayered, ///< Maximum dimensions of 2D layered texture.
|
|
hipDeviceAttributeMaxTexture2DLinear, ///< Maximum dimensions (width, height, pitch) of 2D textures bound to pitched memory.
|
|
hipDeviceAttributeMaxTexture2DMipmap, ///< Maximum dimensions of 2D mipmapped texture.
|
|
hipDeviceAttributeMaxTexture3DWidth, ///< Maximum dimension width of 3D texture.
|
|
hipDeviceAttributeMaxTexture3DHeight, ///< Maximum dimension height of 3D texture.
|
|
hipDeviceAttributeMaxTexture3DDepth, ///< Maximum dimension depth of 3D texture.
|
|
hipDeviceAttributeMaxTexture3DAlt, ///< Maximum dimensions of alternate 3D texture.
|
|
hipDeviceAttributeMaxTextureCubemap, ///< Maximum dimensions of Cubemap texture
|
|
hipDeviceAttributeMaxTextureCubemapLayered, ///< Maximum dimensions of Cubemap layered texture.
|
|
hipDeviceAttributeMaxThreadsDim, ///< Maximum dimension of a block
|
|
hipDeviceAttributeMaxThreadsPerBlock, ///< Maximum number of threads per block.
|
|
hipDeviceAttributeMaxThreadsPerMultiProcessor, ///< Maximum resident threads per multiprocessor.
|
|
hipDeviceAttributeMaxPitch, ///< Maximum pitch in bytes allowed by memory copies
|
|
hipDeviceAttributeMemoryBusWidth, ///< Global memory bus width in bits.
|
|
hipDeviceAttributeMemoryClockRate, ///< Peak memory clock frequency in kilohertz.
|
|
hipDeviceAttributeComputeCapabilityMinor, ///< Minor compute capability version number.
|
|
hipDeviceAttributeMultiGpuBoardGroupID, ///< Unique ID of device group on the same multi-GPU board
|
|
hipDeviceAttributeMultiprocessorCount, ///< Number of multiprocessors on the device.
|
|
hipDeviceAttributeUnused1, ///< Previously hipDeviceAttributeName
|
|
hipDeviceAttributePageableMemoryAccess, ///< Device supports coherently accessing pageable memory
|
|
///< without calling hipHostRegister on it
|
|
hipDeviceAttributePageableMemoryAccessUsesHostPageTables, ///< Device accesses pageable memory via the host's page tables
|
|
hipDeviceAttributePciBusId, ///< PCI Bus ID.
|
|
hipDeviceAttributePciDeviceId, ///< PCI Device ID.
|
|
hipDeviceAttributePciDomainID, ///< PCI Domain ID.
|
|
hipDeviceAttributePersistingL2CacheMaxSize, ///< Maximum l2 persisting lines capacity in bytes
|
|
hipDeviceAttributeMaxRegistersPerBlock, ///< 32-bit registers available to a thread block. This number is shared
|
|
///< by all thread blocks simultaneously resident on a multiprocessor.
|
|
hipDeviceAttributeMaxRegistersPerMultiprocessor, ///< 32-bit registers available per block.
|
|
hipDeviceAttributeReservedSharedMemPerBlock, ///< Shared memory reserved by CUDA driver per block.
|
|
hipDeviceAttributeMaxSharedMemoryPerBlock, ///< Maximum shared memory available per block in bytes.
|
|
hipDeviceAttributeSharedMemPerBlockOptin, ///< Maximum shared memory per block usable by special opt in.
|
|
hipDeviceAttributeSharedMemPerMultiprocessor, ///< Shared memory available per multiprocessor.
|
|
hipDeviceAttributeSingleToDoublePrecisionPerfRatio, ///< Cuda only. Performance ratio of single precision to double precision.
|
|
hipDeviceAttributeStreamPrioritiesSupported, ///< Whether to support stream priorities.
|
|
hipDeviceAttributeSurfaceAlignment, ///< Alignment requirement for surfaces
|
|
hipDeviceAttributeTccDriver, ///< Cuda only. Whether device is a Tesla device using TCC driver
|
|
hipDeviceAttributeTextureAlignment, ///< Alignment requirement for textures
|
|
hipDeviceAttributeTexturePitchAlignment, ///< Pitch alignment requirement for 2D texture references bound to pitched memory;
|
|
hipDeviceAttributeTotalConstantMemory, ///< Constant memory size in bytes.
|
|
hipDeviceAttributeTotalGlobalMem, ///< Global memory available on devicice.
|
|
hipDeviceAttributeUnifiedAddressing, ///< Cuda only. An unified address space shared with the host.
|
|
hipDeviceAttributeUnused2, ///< Previously hipDeviceAttributeUuid
|
|
hipDeviceAttributeWarpSize, ///< Warp size in threads.
|
|
hipDeviceAttributeMemoryPoolsSupported, ///< Device supports HIP Stream Ordered Memory Allocator
|
|
hipDeviceAttributeVirtualMemoryManagementSupported, ///< Device supports HIP virtual memory management
|
|
hipDeviceAttributeHostRegisterSupported, ///< Can device support host memory registration via hipHostRegister
|
|
hipDeviceAttributeMemoryPoolSupportedHandleTypes, ///< Supported handle mask for HIP Stream Ordered Memory Allocator
|
|
|
|
hipDeviceAttributeCudaCompatibleEnd = 9999,
|
|
hipDeviceAttributeAmdSpecificBegin = 10000,
|
|
|
|
hipDeviceAttributeClockInstructionRate = hipDeviceAttributeAmdSpecificBegin, ///< Frequency in khz of the timer used by the device-side "clock*"
|
|
hipDeviceAttributeUnused3, ///< Previously hipDeviceAttributeArch
|
|
hipDeviceAttributeMaxSharedMemoryPerMultiprocessor, ///< Maximum Shared Memory PerMultiprocessor.
|
|
hipDeviceAttributeUnused4, ///< Previously hipDeviceAttributeGcnArch
|
|
hipDeviceAttributeUnused5, ///< Previously hipDeviceAttributeGcnArchName
|
|
hipDeviceAttributeHdpMemFlushCntl, ///< Address of the HDP_MEM_COHERENCY_FLUSH_CNTL register
|
|
hipDeviceAttributeHdpRegFlushCntl, ///< Address of the HDP_REG_COHERENCY_FLUSH_CNTL register
|
|
hipDeviceAttributeCooperativeMultiDeviceUnmatchedFunc, ///< Supports cooperative launch on multiple
|
|
///< devices with unmatched functions
|
|
hipDeviceAttributeCooperativeMultiDeviceUnmatchedGridDim, ///< Supports cooperative launch on multiple
|
|
///< devices with unmatched grid dimensions
|
|
hipDeviceAttributeCooperativeMultiDeviceUnmatchedBlockDim, ///< Supports cooperative launch on multiple
|
|
///< devices with unmatched block dimensions
|
|
hipDeviceAttributeCooperativeMultiDeviceUnmatchedSharedMem, ///< Supports cooperative launch on multiple
|
|
///< devices with unmatched shared memories
|
|
hipDeviceAttributeIsLargeBar, ///< Whether it is LargeBar
|
|
hipDeviceAttributeAsicRevision, ///< Revision of the GPU in this device
|
|
hipDeviceAttributeCanUseStreamWaitValue, ///< '1' if Device supports hipStreamWaitValue32() and
|
|
///< hipStreamWaitValue64(), '0' otherwise.
|
|
hipDeviceAttributeImageSupport, ///< '1' if Device supports image, '0' otherwise.
|
|
hipDeviceAttributePhysicalMultiProcessorCount, ///< All available physical compute
|
|
///< units for the device
|
|
hipDeviceAttributeFineGrainSupport, ///< '1' if Device supports fine grain, '0' otherwise
|
|
hipDeviceAttributeWallClockRate, ///< Constant frequency of wall clock in kilohertz.
|
|
|
|
hipDeviceAttributeAmdSpecificEnd = 19999,
|
|
hipDeviceAttributeVendorSpecificBegin = 20000,
|
|
// Extended attributes for vendors
|
|
} hipDeviceAttribute_t;
|
|
|
|
/**
|
|
* hipDeviceArch_t
|
|
*
|
|
*/
|
|
typedef struct {
|
|
// 32-bit Atomics
|
|
unsigned hasGlobalInt32Atomics : 1; ///< 32-bit integer atomics for global memory.
|
|
unsigned hasGlobalFloatAtomicExch : 1; ///< 32-bit float atomic exch for global memory.
|
|
unsigned hasSharedInt32Atomics : 1; ///< 32-bit integer atomics for shared memory.
|
|
unsigned hasSharedFloatAtomicExch : 1; ///< 32-bit float atomic exch for shared memory.
|
|
unsigned hasFloatAtomicAdd : 1; ///< 32-bit float atomic add in global and shared memory.
|
|
|
|
// 64-bit Atomics
|
|
unsigned hasGlobalInt64Atomics : 1; ///< 64-bit integer atomics for global memory.
|
|
unsigned hasSharedInt64Atomics : 1; ///< 64-bit integer atomics for shared memory.
|
|
|
|
// Doubles
|
|
unsigned hasDoubles : 1; ///< Double-precision floating point.
|
|
|
|
// Warp cross-lane operations
|
|
unsigned hasWarpVote : 1; ///< Warp vote instructions (__any, __all).
|
|
unsigned hasWarpBallot : 1; ///< Warp ballot instructions (__ballot).
|
|
unsigned hasWarpShuffle : 1; ///< Warp shuffle operations. (__shfl_*).
|
|
unsigned hasFunnelShift : 1; ///< Funnel two words into one with shift&mask caps.
|
|
|
|
// Sync
|
|
unsigned hasThreadFenceSystem : 1; ///< __threadfence_system.
|
|
unsigned hasSyncThreadsExt : 1; ///< __syncthreads_count, syncthreads_and, syncthreads_or.
|
|
|
|
// Misc
|
|
unsigned hasSurfaceFuncs : 1; ///< Surface functions.
|
|
unsigned has3dGrid : 1; ///< Grid and group dims are 3D (rather than 2D).
|
|
unsigned hasDynamicParallelism : 1; ///< Dynamic parallelism.
|
|
} hipDeviceArch_t;
|
|
|
|
typedef struct hipUUID_t {
|
|
char bytes[16];
|
|
} hipUUID;
|
|
|
|
|
|
/**
|
|
* hipDeviceProp
|
|
*
|
|
*/
|
|
typedef struct hipDeviceProp_t {
|
|
char name[256]; ///< Device name.
|
|
hipUUID uuid; ///< UUID of a device
|
|
char luid[8]; ///< 8-byte unique identifier. Only valid on windows
|
|
unsigned int luidDeviceNodeMask; ///< LUID node mask
|
|
size_t totalGlobalMem; ///< Size of global memory region (in bytes).
|
|
size_t sharedMemPerBlock; ///< Size of shared memory per block (in bytes).
|
|
int regsPerBlock; ///< Registers per block.
|
|
int warpSize; ///< Warp size.
|
|
size_t memPitch; ///< Maximum pitch in bytes allowed by memory copies
|
|
///< pitched memory
|
|
int maxThreadsPerBlock; ///< Max work items per work group or workgroup max size.
|
|
int maxThreadsDim[3]; ///< Max number of threads in each dimension (XYZ) of a block.
|
|
int maxGridSize[3]; ///< Max grid dimensions (XYZ).
|
|
int clockRate; ///< Max clock frequency of the multiProcessors in khz.
|
|
size_t totalConstMem; ///< Size of shared constant memory region on the device
|
|
///< (in bytes).
|
|
int major; ///< Major compute capability. On HCC, this is an approximation and features may
|
|
///< differ from CUDA CC. See the arch feature flags for portable ways to query
|
|
///< feature caps.
|
|
int minor; ///< Minor compute capability. On HCC, this is an approximation and features may
|
|
///< differ from CUDA CC. See the arch feature flags for portable ways to query
|
|
///< feature caps.
|
|
size_t textureAlignment; ///< Alignment requirement for textures
|
|
size_t texturePitchAlignment; ///< Pitch alignment requirement for texture references bound to
|
|
int deviceOverlap; ///< Deprecated. Use asyncEngineCount instead
|
|
int multiProcessorCount; ///< Number of multi-processors (compute units).
|
|
int kernelExecTimeoutEnabled; ///< Run time limit for kernels executed on the device
|
|
int integrated; ///< APU vs dGPU
|
|
int canMapHostMemory; ///< Check whether HIP can map host memory
|
|
int computeMode; ///< Compute mode.
|
|
int maxTexture1D; ///< Maximum number of elements in 1D images
|
|
int maxTexture1DMipmap; ///< Maximum 1D mipmap texture size
|
|
int maxTexture1DLinear; ///< Maximum size for 1D textures bound to linear memory
|
|
int maxTexture2D[2]; ///< Maximum dimensions (width, height) of 2D images, in image elements
|
|
int maxTexture2DMipmap[2]; ///< Maximum number of elements in 2D array mipmap of images
|
|
int maxTexture2DLinear[3]; ///< Maximum 2D tex dimensions if tex are bound to pitched memory
|
|
int maxTexture2DGather[2]; ///< Maximum 2D tex dimensions if gather has to be performed
|
|
int maxTexture3D[3]; ///< Maximum dimensions (width, height, depth) of 3D images, in image
|
|
///< elements
|
|
int maxTexture3DAlt[3]; ///< Maximum alternate 3D texture dims
|
|
int maxTextureCubemap; ///< Maximum cubemap texture dims
|
|
int maxTexture1DLayered[2]; ///< Maximum number of elements in 1D array images
|
|
int maxTexture2DLayered[3]; ///< Maximum number of elements in 2D array images
|
|
int maxTextureCubemapLayered[2]; ///< Maximum cubemaps layered texture dims
|
|
int maxSurface1D; ///< Maximum 1D surface size
|
|
int maxSurface2D[2]; ///< Maximum 2D surface size
|
|
int maxSurface3D[3]; ///< Maximum 3D surface size
|
|
int maxSurface1DLayered[2]; ///< Maximum 1D layered surface size
|
|
int maxSurface2DLayered[3]; ///< Maximum 2D layared surface size
|
|
int maxSurfaceCubemap; ///< Maximum cubemap surface size
|
|
int maxSurfaceCubemapLayered[2]; ///< Maximum cubemap layered surface size
|
|
size_t surfaceAlignment; ///< Alignment requirement for surface
|
|
int concurrentKernels; ///< Device can possibly execute multiple kernels concurrently.
|
|
int ECCEnabled; ///< Device has ECC support enabled
|
|
int pciBusID; ///< PCI Bus ID.
|
|
int pciDeviceID; ///< PCI Device ID.
|
|
int pciDomainID; ///< PCI Domain ID
|
|
int tccDriver; ///< 1:If device is Tesla device using TCC driver, else 0
|
|
int asyncEngineCount; ///< Number of async engines
|
|
int unifiedAddressing; ///< Does device and host share unified address space
|
|
int memoryClockRate; ///< Max global memory clock frequency in khz.
|
|
int memoryBusWidth; ///< Global memory bus width in bits.
|
|
int l2CacheSize; ///< L2 cache size.
|
|
int persistingL2CacheMaxSize; ///< Device's max L2 persisting lines in bytes
|
|
int maxThreadsPerMultiProcessor; ///< Maximum resident threads per multi-processor.
|
|
int streamPrioritiesSupported; ///< Device supports stream priority
|
|
int globalL1CacheSupported; ///< Indicates globals are cached in L1
|
|
int localL1CacheSupported; ///< Locals are cahced in L1
|
|
size_t sharedMemPerMultiprocessor; ///< Amount of shared memory available per multiprocessor.
|
|
int regsPerMultiprocessor; ///< registers available per multiprocessor
|
|
int managedMemory; ///< Device supports allocating managed memory on this system
|
|
int isMultiGpuBoard; ///< 1 if device is on a multi-GPU board, 0 if not.
|
|
int multiGpuBoardGroupID; ///< Unique identifier for a group of devices on same multiboard GPU
|
|
int hostNativeAtomicSupported; ///< Link between host and device supports native atomics
|
|
int singleToDoublePrecisionPerfRatio; ///< Deprecated. CUDA only.
|
|
int pageableMemoryAccess; ///< Device supports coherently accessing pageable memory
|
|
///< without calling hipHostRegister on it
|
|
int concurrentManagedAccess; ///< Device can coherently access managed memory concurrently with
|
|
///< the CPU
|
|
int computePreemptionSupported; ///< Is compute preemption supported on the device
|
|
int canUseHostPointerForRegisteredMem; ///< Device can access host registered memory with same
|
|
///< address as the host
|
|
int cooperativeLaunch; ///< HIP device supports cooperative launch
|
|
int cooperativeMultiDeviceLaunch; ///< HIP device supports cooperative launch on multiple
|
|
///< devices
|
|
size_t
|
|
sharedMemPerBlockOptin; ///< Per device m ax shared mem per block usable by special opt in
|
|
int pageableMemoryAccessUsesHostPageTables; ///< Device accesses pageable memory via the host's
|
|
///< page tables
|
|
int directManagedMemAccessFromHost; ///< Host can directly access managed memory on the device
|
|
///< without migration
|
|
int maxBlocksPerMultiProcessor; ///< Max number of blocks on CU
|
|
int accessPolicyMaxWindowSize; ///< Max value of access policy window
|
|
size_t reservedSharedMemPerBlock; ///< Shared memory reserved by driver per block
|
|
int hostRegisterSupported; ///< Device supports hipHostRegister
|
|
int sparseHipArraySupported; ///< Indicates if device supports sparse hip arrays
|
|
int hostRegisterReadOnlySupported; ///< Device supports using the hipHostRegisterReadOnly flag
|
|
///< with hipHostRegistger
|
|
int timelineSemaphoreInteropSupported; ///< Indicates external timeline semaphore support
|
|
int memoryPoolsSupported; ///< Indicates if device supports hipMallocAsync and hipMemPool APIs
|
|
int gpuDirectRDMASupported; ///< Indicates device support of RDMA APIs
|
|
unsigned int gpuDirectRDMAFlushWritesOptions; ///< Bitmask to be interpreted according to
|
|
///< hipFlushGPUDirectRDMAWritesOptions
|
|
int gpuDirectRDMAWritesOrdering; ///< value of hipGPUDirectRDMAWritesOrdering
|
|
unsigned int
|
|
memoryPoolSupportedHandleTypes; ///< Bitmask of handle types support with mempool based IPC
|
|
int deferredMappingHipArraySupported; ///< Device supports deferred mapping HIP arrays and HIP
|
|
///< mipmapped arrays
|
|
int ipcEventSupported; ///< Device supports IPC events
|
|
int clusterLaunch; ///< Device supports cluster launch
|
|
int unifiedFunctionPointers; ///< Indicates device supports unified function pointers
|
|
int reserved[63]; ///< CUDA Reserved.
|
|
|
|
int hipReserved[32]; ///< Reserved for adding new entries for HIP/CUDA.
|
|
|
|
/* HIP Only struct members */
|
|
char gcnArchName[256]; ///< AMD GCN Arch Name. HIP Only.
|
|
size_t maxSharedMemoryPerMultiProcessor; ///< Maximum Shared Memory Per CU. HIP Only.
|
|
int clockInstructionRate; ///< Frequency in khz of the timer used by the device-side "clock*"
|
|
///< instructions. New for HIP.
|
|
hipDeviceArch_t arch; ///< Architectural feature flags. New for HIP.
|
|
unsigned int* hdpMemFlushCntl; ///< Addres of HDP_MEM_COHERENCY_FLUSH_CNTL register
|
|
unsigned int* hdpRegFlushCntl; ///< Addres of HDP_REG_COHERENCY_FLUSH_CNTL register
|
|
int cooperativeMultiDeviceUnmatchedFunc; ///< HIP device supports cooperative launch on
|
|
///< multiple
|
|
/// devices with unmatched functions
|
|
int cooperativeMultiDeviceUnmatchedGridDim; ///< HIP device supports cooperative launch on
|
|
///< multiple
|
|
/// devices with unmatched grid dimensions
|
|
int cooperativeMultiDeviceUnmatchedBlockDim; ///< HIP device supports cooperative launch on
|
|
///< multiple
|
|
/// devices with unmatched block dimensions
|
|
int cooperativeMultiDeviceUnmatchedSharedMem; ///< HIP device supports cooperative launch on
|
|
///< multiple
|
|
/// devices with unmatched shared memories
|
|
int isLargeBar; ///< 1: if it is a large PCI bar device, else 0
|
|
int asicRevision; ///< Revision of the GPU in this device
|
|
} hipDeviceProp_t;
|
|
|
|
//Flags that can be used with hipStreamCreateWithFlags.
|
|
/** Default stream creation flags. These are used with hipStreamCreate().*/
|
|
#define hipStreamDefault 0x00
|
|
|
|
/** Stream does not implicitly synchronize with null stream.*/
|
|
#define hipStreamNonBlocking 0x01
|
|
|
|
//Flags that can be used with hipEventCreateWithFlags.
|
|
/** Default flags.*/
|
|
#define hipEventDefault 0x0
|
|
|
|
/** Waiting will yield CPU. Power-friendly and usage-friendly but may increase latency.*/
|
|
#define hipEventBlockingSync 0x1
|
|
|
|
/** Disable event's capability to record timing information. May improve performance.*/
|
|
#define hipEventDisableTiming 0x2
|
|
|
|
/** Event can support IPC. Warning: It is not supported in HIP.*/
|
|
#define hipEventInterprocess 0x4
|
|
|
|
/** Use a device-scope release when recording this event. This flag is useful to obtain more
|
|
* precise timings of commands between events. The flag is a no-op on CUDA platforms.*/
|
|
#define hipEventReleaseToDevice 0x40000000
|
|
|
|
/** Use a system-scope release when recording this event. This flag is useful to make
|
|
* non-coherent host memory visible to the host. The flag is a no-op on CUDA platforms.*/
|
|
#define hipEventReleaseToSystem 0x80000000
|
|
|
|
/** Automatically select between Spin and Yield.*/
|
|
#define hipDeviceScheduleAuto 0x0
|
|
|
|
/** Dedicate a CPU core to spin-wait. Provides lowest latency, but burns a CPU core and may
|
|
* consume more power.*/
|
|
#define hipDeviceScheduleSpin 0x1
|
|
|
|
/** Yield the CPU to the operating system when waiting. May increase latency, but lowers power
|
|
* and is friendlier to other threads in the system.*/
|
|
#define hipDeviceScheduleYield 0x2
|
|
#define hipDeviceScheduleBlockingSync 0x4
|
|
#define hipDeviceScheduleMask 0x7
|
|
#define hipDeviceMapHost 0x8
|
|
#define hipDeviceLmemResizeToMax 0x16
|
|
|
|
typedef enum hipJitOption {
|
|
hipJitOptionMaxRegisters = 0,
|
|
hipJitOptionThreadsPerBlock,
|
|
hipJitOptionWallTime,
|
|
hipJitOptionInfoLogBuffer,
|
|
hipJitOptionInfoLogBufferSizeBytes,
|
|
hipJitOptionErrorLogBuffer,
|
|
hipJitOptionErrorLogBufferSizeBytes,
|
|
hipJitOptionOptimizationLevel,
|
|
hipJitOptionTargetFromContext,
|
|
hipJitOptionTarget,
|
|
hipJitOptionFallbackStrategy,
|
|
hipJitOptionGenerateDebugInfo,
|
|
hipJitOptionLogVerbose,
|
|
hipJitOptionGenerateLineInfo,
|
|
hipJitOptionCacheMode,
|
|
hipJitOptionSm3xOpt,
|
|
hipJitOptionFastCompile,
|
|
hipJitOptionNumOptions
|
|
} hipJitOption;
|
|
|
|
// stop: hip_runtime_api.h
|
|
|
|
#ifdef _WIN32
|
|
#define HIPAPI __stdcall
|
|
#else
|
|
#define HIPAPI
|
|
#endif
|
|
|
|
#define HIP_API_CALL HIPAPI
|
|
|
|
typedef hipError_t (HIP_API_CALL *HIP_HIPCTXCREATE) (hipCtx_t *, unsigned int, hipDevice_t);
|
|
typedef hipError_t (HIP_API_CALL *HIP_HIPCTXDESTROY) (hipCtx_t);
|
|
typedef hipError_t (HIP_API_CALL *HIP_HIPCTXPOPCURRENT) (hipCtx_t *);
|
|
typedef hipError_t (HIP_API_CALL *HIP_HIPCTXPUSHCURRENT) (hipCtx_t);
|
|
typedef hipError_t (HIP_API_CALL *HIP_HIPCTXSETCURRENT) (hipCtx_t);
|
|
typedef hipError_t (HIP_API_CALL *HIP_HIPCTXSYNCHRONIZE) (void);
|
|
typedef hipError_t (HIP_API_CALL *HIP_HIPDEVICEGETATTRIBUTE) (int *, hipDeviceAttribute_t, hipDevice_t);
|
|
typedef hipError_t (HIP_API_CALL *HIP_HIPDEVICEGETCOUNT) (int *);
|
|
typedef hipError_t (HIP_API_CALL *HIP_HIPDEVICEGET) (hipDevice_t *, int);
|
|
typedef hipError_t (HIP_API_CALL *HIP_HIPDEVICEGETNAME) (char *, int, hipDevice_t);
|
|
typedef hipError_t (HIP_API_CALL *HIP_HIPDEVICETOTALMEM) (size_t *, hipDevice_t);
|
|
typedef hipError_t (HIP_API_CALL *HIP_HIPDRIVERGETVERSION) (int *);
|
|
typedef hipError_t (HIP_API_CALL *HIP_HIPEVENTCREATE) (hipEvent_t *, unsigned int);
|
|
typedef hipError_t (HIP_API_CALL *HIP_HIPEVENTDESTROY) (hipEvent_t);
|
|
typedef hipError_t (HIP_API_CALL *HIP_HIPEVENTELAPSEDTIME) (float *, hipEvent_t, hipEvent_t);
|
|
typedef hipError_t (HIP_API_CALL *HIP_HIPEVENTRECORD) (hipEvent_t, hipStream_t);
|
|
typedef hipError_t (HIP_API_CALL *HIP_HIPEVENTSYNCHRONIZE) (hipEvent_t);
|
|
typedef hipError_t (HIP_API_CALL *HIP_HIPFUNCGETATTRIBUTE) (int *, hipFunction_attribute, hipFunction_t);
|
|
typedef hipError_t (HIP_API_CALL *HIP_HIPGETERRORNAME) (hipError_t, const char **);
|
|
typedef hipError_t (HIP_API_CALL *HIP_HIPGETERRORSTRING) (hipError_t, const char **);
|
|
typedef hipError_t (HIP_API_CALL *HIP_HIPINIT) (unsigned int);
|
|
typedef hipError_t (HIP_API_CALL *HIP_HIPLAUNCHKERNEL) (hipFunction_t, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, hipStream_t, void **, void **);
|
|
typedef hipError_t (HIP_API_CALL *HIP_HIPMEMALLOC) (hipDeviceptr_t *, size_t);
|
|
typedef hipError_t (HIP_API_CALL *HIP_HIPMEMFREE) (hipDeviceptr_t);
|
|
typedef hipError_t (HIP_API_CALL *HIP_HIPMEMGETINFO) (size_t *, size_t *);
|
|
typedef hipError_t (HIP_API_CALL *HIP_HIPMEMCPYDTODASYNC) (hipDeviceptr_t, hipDeviceptr_t, size_t, hipStream_t);
|
|
typedef hipError_t (HIP_API_CALL *HIP_HIPMEMCPYDTOHASYNC) (void *, hipDeviceptr_t, size_t, hipStream_t);
|
|
typedef hipError_t (HIP_API_CALL *HIP_HIPMEMCPYHTODASYNC) (hipDeviceptr_t, const void *, size_t, hipStream_t);
|
|
typedef hipError_t (HIP_API_CALL *HIP_HIPMEMSETD32ASYNC) (hipDeviceptr_t, unsigned int, size_t, hipStream_t);
|
|
typedef hipError_t (HIP_API_CALL *HIP_HIPMEMSETD8ASYNC) (hipDeviceptr_t, unsigned char, size_t, hipStream_t);
|
|
typedef hipError_t (HIP_API_CALL *HIP_HIPMODULEGETFUNCTION) (hipFunction_t *, hipModule_t, const char *);
|
|
typedef hipError_t (HIP_API_CALL *HIP_HIPMODULEGETGLOBAL) (hipDeviceptr_t *, size_t *, hipModule_t, const char *);
|
|
typedef hipError_t (HIP_API_CALL *HIP_HIPMODULELOADDATAEX) (hipModule_t *, const void *, unsigned int, hipJitOption *, void **);
|
|
typedef hipError_t (HIP_API_CALL *HIP_HIPMODULEUNLOAD) (hipModule_t);
|
|
typedef hipError_t (HIP_API_CALL *HIP_HIPRUNTIMEGETVERSION) (int *);
|
|
typedef hipError_t (HIP_API_CALL *HIP_HIPSTREAMCREATE) (hipStream_t *, unsigned int);
|
|
typedef hipError_t (HIP_API_CALL *HIP_HIPSTREAMDESTROY) (hipStream_t);
|
|
typedef hipError_t (HIP_API_CALL *HIP_HIPSTREAMSYNCHRONIZE) (hipStream_t);
|
|
typedef hipError_t (HIP_API_CALL *HIP_HIPGETDEVICEPROPERTIES) (hipDeviceProp_t *, hipDevice_t);
|
|
typedef hipError_t (HIP_API_CALL *HIP_HIPMODULEOCCUPANCYMAXACTIVEBLOCKSPERMULTIPROCESSOR) (int *, hipFunction_t, int, size_t);
|
|
|
|
typedef struct hc_hip_lib
|
|
{
|
|
hc_dynlib_t lib;
|
|
|
|
HIP_HIPCTXCREATE hipCtxCreate;
|
|
HIP_HIPCTXDESTROY hipCtxDestroy;
|
|
HIP_HIPCTXPOPCURRENT hipCtxPopCurrent;
|
|
HIP_HIPCTXPUSHCURRENT hipCtxPushCurrent;
|
|
HIP_HIPCTXSETCURRENT hipCtxSetCurrent;
|
|
HIP_HIPCTXSYNCHRONIZE hipCtxSynchronize;
|
|
HIP_HIPDEVICEGETATTRIBUTE hipDeviceGetAttribute;
|
|
HIP_HIPDEVICEGETCOUNT hipDeviceGetCount;
|
|
HIP_HIPDEVICEGET hipDeviceGet;
|
|
HIP_HIPDEVICEGETNAME hipDeviceGetName;
|
|
HIP_HIPDEVICETOTALMEM hipDeviceTotalMem;
|
|
HIP_HIPDRIVERGETVERSION hipDriverGetVersion;
|
|
HIP_HIPEVENTCREATE hipEventCreate;
|
|
HIP_HIPEVENTDESTROY hipEventDestroy;
|
|
HIP_HIPEVENTELAPSEDTIME hipEventElapsedTime;
|
|
HIP_HIPEVENTRECORD hipEventRecord;
|
|
HIP_HIPEVENTSYNCHRONIZE hipEventSynchronize;
|
|
HIP_HIPFUNCGETATTRIBUTE hipFuncGetAttribute;
|
|
HIP_HIPGETERRORNAME hipGetErrorName;
|
|
HIP_HIPGETERRORSTRING hipGetErrorString;
|
|
HIP_HIPINIT hipInit;
|
|
HIP_HIPLAUNCHKERNEL hipLaunchKernel;
|
|
HIP_HIPMEMALLOC hipMemAlloc;
|
|
HIP_HIPMEMFREE hipMemFree;
|
|
HIP_HIPMEMGETINFO hipMemGetInfo;
|
|
HIP_HIPMEMCPYDTODASYNC hipMemcpyDtoDAsync;
|
|
HIP_HIPMEMCPYDTOHASYNC hipMemcpyDtoHAsync;
|
|
HIP_HIPMEMCPYHTODASYNC hipMemcpyHtoDAsync;
|
|
HIP_HIPMEMSETD32ASYNC hipMemsetD32Async;
|
|
HIP_HIPMEMSETD8ASYNC hipMemsetD8Async;
|
|
HIP_HIPMODULEGETFUNCTION hipModuleGetFunction;
|
|
HIP_HIPMODULEGETGLOBAL hipModuleGetGlobal;
|
|
HIP_HIPMODULELOADDATAEX hipModuleLoadDataEx;
|
|
HIP_HIPMODULEUNLOAD hipModuleUnload;
|
|
HIP_HIPRUNTIMEGETVERSION hipRuntimeGetVersion;
|
|
HIP_HIPSTREAMCREATE hipStreamCreate;
|
|
HIP_HIPSTREAMDESTROY hipStreamDestroy;
|
|
HIP_HIPSTREAMSYNCHRONIZE hipStreamSynchronize;
|
|
HIP_HIPGETDEVICEPROPERTIES hipGetDeviceProperties;
|
|
HIP_HIPMODULEOCCUPANCYMAXACTIVEBLOCKSPERMULTIPROCESSOR hipModuleOccupancyMaxActiveBlocksPerMultiprocessor;
|
|
|
|
} hc_hip_lib_t;
|
|
|
|
typedef hc_hip_lib_t HIP_PTR;
|
|
|
|
int hip_init (void *hashcat_ctx);
|
|
void hip_close (void *hashcat_ctx);
|
|
|
|
int hc_hipCtxCreate (void *hashcat_ctx, hipCtx_t *pctx, unsigned int flags, hipDevice_t dev);
|
|
int hc_hipCtxDestroy (void *hashcat_ctx, hipCtx_t ctx);
|
|
int hc_hipCtxPopCurrent (void *hashcat_ctx, hipCtx_t *pctx);
|
|
int hc_hipCtxPushCurrent (void *hashcat_ctx, hipCtx_t ctx);
|
|
int hc_hipCtxSetCurrent (void *hashcat_ctx, hipCtx_t ctx);
|
|
int hc_hipCtxSynchronize (void *hashcat_ctx);
|
|
int hc_hipDeviceGet (void *hashcat_ctx, hipDevice_t *device, int ordinal);
|
|
int hc_hipDeviceGetAttribute (void *hashcat_ctx, int *pi, hipDeviceAttribute_t attrib, hipDevice_t dev);
|
|
int hc_hipDeviceGetCount (void *hashcat_ctx, int *count);
|
|
int hc_hipDeviceGetName (void *hashcat_ctx, char *name, int len, hipDevice_t dev);
|
|
int hc_hipDeviceTotalMem (void *hashcat_ctx, size_t *bytes, hipDevice_t dev);
|
|
int hc_hipDriverGetVersion (void *hashcat_ctx, int *driverVersion);
|
|
int hc_hipEventCreate (void *hashcat_ctx, hipEvent_t *phEvent, unsigned int Flags);
|
|
int hc_hipEventDestroy (void *hashcat_ctx, hipEvent_t hEvent);
|
|
int hc_hipEventElapsedTime (void *hashcat_ctx, float *pMilliseconds, hipEvent_t hStart, hipEvent_t hEnd);
|
|
int hc_hipEventQuery (void *hashcat_ctx, hipEvent_t hEvent);
|
|
int hc_hipEventRecord (void *hashcat_ctx, hipEvent_t hEvent, hipStream_t hStream);
|
|
int hc_hipEventSynchronize (void *hashcat_ctx, hipEvent_t hEvent);
|
|
int hc_hipFuncGetAttribute (void *hashcat_ctx, int *pi, hipFunction_attribute attrib, hipFunction_t hfunc);
|
|
int hc_hipInit (void *hashcat_ctx, unsigned int Flags);
|
|
int hc_hipLaunchKernel (void *hashcat_ctx, hipFunction_t f, unsigned int gridDimX, unsigned int gridDimY, unsigned int gridDimZ, unsigned int blockDimX, unsigned int blockDimY, unsigned int blockDimZ, unsigned int sharedMemBytes, hipStream_t hStream, void **kernelParams, void **extra);
|
|
int hc_hipMemAlloc (void *hashcat_ctx, hipDeviceptr_t *dptr, size_t bytesize);
|
|
int hc_hipMemFree (void *hashcat_ctx, hipDeviceptr_t dptr);
|
|
int hc_hipMemGetInfo (void *hashcat_ctx, size_t *free, size_t *total);
|
|
int hc_hipMemcpyDtoDAsync (void *hashcat_ctx, hipDeviceptr_t dstDevice, hipDeviceptr_t srcDevice, size_t ByteCount, hipStream_t hStream);
|
|
int hc_hipMemcpyDtoHAsync (void *hashcat_ctx, void *dstHost, hipDeviceptr_t srcDevice, size_t ByteCount, hipStream_t hStream);
|
|
int hc_hipMemcpyHtoDAsync (void *hashcat_ctx, hipDeviceptr_t dstDevice, const void *srcHost, size_t ByteCount, hipStream_t hStream);
|
|
int hc_hipMemsetD32Async (void *hashcat_ctx, hipDeviceptr_t dstDevice, unsigned int ui, size_t N, hipStream_t hStream);
|
|
int hc_hipMemsetD8Async (void *hashcat_ctx, hipDeviceptr_t dstDevice, unsigned char uc, size_t N, hipStream_t hStream);
|
|
int hc_hipModuleGetFunction (void *hashcat_ctx, hipFunction_t *hfunc, hipModule_t hmod, const char *name);
|
|
int hc_hipModuleGetGlobal (void *hashcat_ctx, hipDeviceptr_t *dptr, size_t *bytes, hipModule_t hmod, const char *name);
|
|
int hc_hipModuleLoadDataEx (void *hashcat_ctx, hipModule_t *module, const void *image, unsigned int numOptions, hipJitOption *options, void **optionValues);
|
|
int hc_hipModuleUnload (void *hashcat_ctx, hipModule_t hmod);
|
|
int hc_hipRuntimeGetVersion (void *hashcat_ctx, int *runtimeVersion);
|
|
int hc_hipStreamCreate (void *hashcat_ctx, hipStream_t *phStream, unsigned int Flags);
|
|
int hc_hipStreamDestroy (void *hashcat_ctx, hipStream_t hStream);
|
|
int hc_hipStreamSynchronize (void *hashcat_ctx, hipStream_t hStream);
|
|
int hc_hipGetDeviceProperties (void *hashcat_ctx, hipDeviceProp_t *prop, hipDevice_t dev);
|
|
int hc_hipModuleOccupancyMaxActiveBlocksPerMultiprocessor (void *hashcat_ctx, int *numBlocks, hipFunction_t f, int blockSize, size_t dynSharedMemPerBlk);
|
|
|
|
#endif // HC_EXT_HIP_H
|