/** * Author......: See docs/credits.txt * License.....: MIT */ #ifndef _EXT_HIP_H #define _EXT_HIP_H // The general Idea with HIP is to use it for AMD GPU since we use CUDA for NV // Therefore, we need to take certain items, such as hipDeviceptr_t from driver specific paths like amd_driver_types.h // We just need to keep this in mind in case we need to update these constants from future SDK versions // start: amd_driver_types.h typedef void* hipDeviceptr_t; typedef enum hipFunction_attribute { HIP_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK, HIP_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES, HIP_FUNC_ATTRIBUTE_CONST_SIZE_BYTES, HIP_FUNC_ATTRIBUTE_LOCAL_SIZE_BYTES, HIP_FUNC_ATTRIBUTE_NUM_REGS, HIP_FUNC_ATTRIBUTE_PTX_VERSION, HIP_FUNC_ATTRIBUTE_BINARY_VERSION, HIP_FUNC_ATTRIBUTE_CACHE_MODE_CA, HIP_FUNC_ATTRIBUTE_MAX_DYNAMIC_SHARED_SIZE_BYTES, HIP_FUNC_ATTRIBUTE_PREFERRED_SHARED_MEMORY_CARVEOUT, HIP_FUNC_ATTRIBUTE_MAX }hipFunction_attribute; // stop: amd_driver_types.h // start: hip_runtime_api.h typedef int hipDevice_t; typedef struct ihipCtx_t* hipCtx_t; typedef struct ihipEvent_t* hipEvent_t; typedef struct ihipStream_t* hipStream_t; typedef struct ihipModule_t* hipModule_t; typedef struct ihipModuleSymbol_t* hipFunction_t; // Ignoring error-code return values from hip APIs is discouraged. On C++17, // we can make that yield a warning #if __cplusplus >= 201703L #define __HIP_NODISCARD [[nodiscard]] #else #define __HIP_NODISCARD #endif typedef enum __HIP_NODISCARD hipError_t { hipSuccess = 0, ///< Successful completion. hipErrorInvalidValue = 1, ///< One or more of the parameters passed to the API call is NULL ///< or not in an acceptable range. hipErrorOutOfMemory = 2, // Deprecated hipErrorMemoryAllocation = 2, ///< Memory allocation error. hipErrorNotInitialized = 3, // Deprecated hipErrorInitializationError = 3, hipErrorDeinitialized = 4, hipErrorProfilerDisabled = 5, hipErrorProfilerNotInitialized = 6, hipErrorProfilerAlreadyStarted = 7, hipErrorProfilerAlreadyStopped = 8, hipErrorInvalidConfiguration = 9, hipErrorInvalidPitchValue = 12, hipErrorInvalidSymbol = 13, hipErrorInvalidDevicePointer = 17, ///< Invalid Device Pointer hipErrorInvalidMemcpyDirection = 21, ///< Invalid memory copy direction hipErrorInsufficientDriver = 35, hipErrorMissingConfiguration = 52, hipErrorPriorLaunchFailure = 53, hipErrorInvalidDeviceFunction = 98, hipErrorNoDevice = 100, ///< Call to hipGetDeviceCount returned 0 devices hipErrorInvalidDevice = 101, ///< DeviceID must be in range 0...#compute-devices. hipErrorInvalidImage = 200, hipErrorInvalidContext = 201, ///< Produced when input context is invalid. hipErrorContextAlreadyCurrent = 202, hipErrorMapFailed = 205, // Deprecated hipErrorMapBufferObjectFailed = 205, ///< Produced when the IPC memory attach failed from ROCr. hipErrorUnmapFailed = 206, hipErrorArrayIsMapped = 207, hipErrorAlreadyMapped = 208, hipErrorNoBinaryForGpu = 209, hipErrorAlreadyAcquired = 210, hipErrorNotMapped = 211, hipErrorNotMappedAsArray = 212, hipErrorNotMappedAsPointer = 213, hipErrorECCNotCorrectable = 214, hipErrorUnsupportedLimit = 215, hipErrorContextAlreadyInUse = 216, hipErrorPeerAccessUnsupported = 217, hipErrorInvalidKernelFile = 218, ///< In CUDA DRV, it is CUDA_ERROR_INVALID_PTX hipErrorInvalidGraphicsContext = 219, hipErrorInvalidSource = 300, hipErrorFileNotFound = 301, hipErrorSharedObjectSymbolNotFound = 302, hipErrorSharedObjectInitFailed = 303, hipErrorOperatingSystem = 304, hipErrorInvalidHandle = 400, // Deprecated hipErrorInvalidResourceHandle = 400, ///< Resource handle (hipEvent_t or hipStream_t) invalid. hipErrorNotFound = 500, hipErrorNotReady = 600, ///< Indicates that asynchronous operations enqueued earlier are not ///< ready. This is not actually an error, but is used to distinguish ///< from hipSuccess (which indicates completion). APIs that return ///< this error include hipEventQuery and hipStreamQuery. hipErrorIllegalAddress = 700, hipErrorLaunchOutOfResources = 701, ///< Out of resources error. hipErrorLaunchTimeOut = 702, hipErrorPeerAccessAlreadyEnabled = 704, ///< Peer access was already enabled from the current device. hipErrorPeerAccessNotEnabled = 705, ///< Peer access was never enabled from the current device. hipErrorSetOnActiveProcess = 708, hipErrorContextIsDestroyed = 709, hipErrorAssert = 710, ///< Produced when the kernel calls assert. hipErrorHostMemoryAlreadyRegistered = 712, ///< Produced when trying to lock a page-locked memory. hipErrorHostMemoryNotRegistered = 713, ///< Produced when trying to unlock a non-page-locked memory. hipErrorLaunchFailure = 719, ///< An exception occurred on the device while executing a kernel. hipErrorCooperativeLaunchTooLarge = 720, ///< This error indicates that the number of blocks launched per grid for a kernel ///< that was launched via cooperative launch APIs exceeds the maximum number of ///< allowed blocks for the current device hipErrorNotSupported = 801, ///< Produced when the hip API is not supported/implemented hipErrorStreamCaptureUnsupported = 900, ///< The operation is not permitted when the stream ///< is capturing. hipErrorStreamCaptureInvalidated = 901, ///< The current capture sequence on the stream ///< has been invalidated due to a previous error. hipErrorStreamCaptureMerge = 902, ///< The operation would have resulted in a merge of ///< two independent capture sequences. hipErrorStreamCaptureUnmatched = 903, ///< The capture was not initiated in this stream. hipErrorStreamCaptureUnjoined = 904, ///< The capture sequence contains a fork that was not ///< joined to the primary stream. hipErrorStreamCaptureIsolation = 905, ///< A dependency would have been created which crosses ///< the capture sequence boundary. Only implicit ///< in-stream ordering dependencies are allowed ///< to cross the boundary hipErrorStreamCaptureImplicit = 906, ///< The operation would have resulted in a disallowed ///< implicit dependency on a current capture sequence ///< from hipStreamLegacy. hipErrorCapturedEvent = 907, ///< The operation is not permitted on an event which was last ///< recorded in a capturing stream. hipErrorStreamCaptureWrongThread = 908, ///< A stream capture sequence not initiated with ///< the hipStreamCaptureModeRelaxed argument to ///< hipStreamBeginCapture was passed to ///< hipStreamEndCapture in a different thread. hipErrorUnknown = 999, //< Unknown error. // HSA Runtime Error Codes start here. hipErrorRuntimeMemory = 1052, ///< HSA runtime memory call returned error. Typically not seen ///< in production systems. hipErrorRuntimeOther = 1053, ///< HSA runtime call other than memory returned error. Typically ///< not seen in production systems. hipErrorTbd ///< Marker that more error codes are needed. } hipError_t; #undef __HIP_NODISCARD typedef enum hipDeviceAttribute_t { hipDeviceAttributeMaxThreadsPerBlock, ///< Maximum number of threads per block. hipDeviceAttributeMaxBlockDimX, ///< Maximum x-dimension of a block. hipDeviceAttributeMaxBlockDimY, ///< Maximum y-dimension of a block. hipDeviceAttributeMaxBlockDimZ, ///< Maximum z-dimension of a block. hipDeviceAttributeMaxGridDimX, ///< Maximum x-dimension of a grid. hipDeviceAttributeMaxGridDimY, ///< Maximum y-dimension of a grid. hipDeviceAttributeMaxGridDimZ, ///< Maximum z-dimension of a grid. hipDeviceAttributeMaxSharedMemoryPerBlock, ///< Maximum shared memory available per block in ///< bytes. hipDeviceAttributeTotalConstantMemory, ///< Constant memory size in bytes. hipDeviceAttributeWarpSize, ///< Warp size in threads. hipDeviceAttributeMaxRegistersPerBlock, ///< Maximum number of 32-bit registers available to a ///< thread block. This number is shared by all thread ///< blocks simultaneously resident on a ///< multiprocessor. hipDeviceAttributeClockRate, ///< Peak clock frequency in kilohertz. hipDeviceAttributeMemoryClockRate, ///< Peak memory clock frequency in kilohertz. hipDeviceAttributeMemoryBusWidth, ///< Global memory bus width in bits. hipDeviceAttributeMultiprocessorCount, ///< Number of multiprocessors on the device. hipDeviceAttributeComputeMode, ///< Compute mode that device is currently in. hipDeviceAttributeL2CacheSize, ///< Size of L2 cache in bytes. 0 if the device doesn't have L2 ///< cache. hipDeviceAttributeMaxThreadsPerMultiProcessor, ///< Maximum resident threads per ///< multiprocessor. hipDeviceAttributeComputeCapabilityMajor, ///< Major compute capability version number. hipDeviceAttributeComputeCapabilityMinor, ///< Minor compute capability version number. hipDeviceAttributeConcurrentKernels, ///< Device can possibly execute multiple kernels ///< concurrently. hipDeviceAttributePciBusId, ///< PCI Bus ID. hipDeviceAttributePciDeviceId, ///< PCI Device ID. hipDeviceAttributeMaxSharedMemoryPerMultiprocessor, ///< Maximum Shared Memory Per ///< Multiprocessor. hipDeviceAttributeIsMultiGpuBoard, ///< Multiple GPU devices. hipDeviceAttributeIntegrated, ///< iGPU hipDeviceAttributeCooperativeLaunch, ///< Support cooperative launch hipDeviceAttributeCooperativeMultiDeviceLaunch, ///< Support cooperative launch on multiple devices hipDeviceAttributeMaxTexture1DWidth, ///< Maximum number of elements in 1D images hipDeviceAttributeMaxTexture2DWidth, ///< Maximum dimension width of 2D images in image elements hipDeviceAttributeMaxTexture2DHeight, ///< Maximum dimension height of 2D images in image elements hipDeviceAttributeMaxTexture3DWidth, ///< Maximum dimension width of 3D images in image elements hipDeviceAttributeMaxTexture3DHeight, ///< Maximum dimensions height of 3D images in image elements hipDeviceAttributeMaxTexture3DDepth, ///< Maximum dimensions depth of 3D images in image elements hipDeviceAttributeHdpMemFlushCntl, ///< Address of the HDP_MEM_COHERENCY_FLUSH_CNTL register hipDeviceAttributeHdpRegFlushCntl, ///< Address of the HDP_REG_COHERENCY_FLUSH_CNTL register hipDeviceAttributeMaxPitch, ///< Maximum pitch in bytes allowed by memory copies hipDeviceAttributeTextureAlignment, ///