mirror of
https://github.com/hashcat/hashcat.git
synced 2025-07-04 05:42:35 +00:00
Update hipDeviceAttribute_t for ROCm 6.x
Add hipDeviceProp_t and bindings for hipGetDeviceProperties(), hipGetDeviceProperties is required to retrieve gcnArchName[]. Add gcnArchName[] to select the correct --gpu-architecture value for a specific device when using hiprtc. Include sm_major and sm_minor for CUDA and gcnArchName for HIP in the kernel filename hash. Update nvrtc_options[] and hiprtc_options[] to avoid unused variables, eliminating the use of --restrict as a placeholder and preventing nvrtc from aborting. Add check_file_suffix() and remove_file_suffix() helper functions.
This commit is contained in:
parent
13245b5563
commit
c033873e4b
@ -165,38 +165,43 @@ typedef enum __HIP_NODISCARD hipError_t {
|
||||
|
||||
#undef __HIP_NODISCARD
|
||||
|
||||
|
||||
/**
|
||||
* hipDeviceAttribute_t
|
||||
* hipDeviceAttributeUnused number: 5
|
||||
*/
|
||||
typedef enum hipDeviceAttribute_t {
|
||||
hipDeviceAttributeCudaCompatibleBegin = 0,
|
||||
|
||||
hipDeviceAttributeEccEnabled = hipDeviceAttributeCudaCompatibleBegin, ///< Whether ECC support is enabled.
|
||||
hipDeviceAttributeAccessPolicyMaxWindowSize, ///< Cuda only. The maximum size of the window policy in bytes.
|
||||
hipDeviceAttributeAsyncEngineCount, ///< Cuda only. Asynchronous engines number.
|
||||
hipDeviceAttributeAsyncEngineCount, ///< Asynchronous engines number.
|
||||
hipDeviceAttributeCanMapHostMemory, ///< Whether host memory can be mapped into device address space
|
||||
hipDeviceAttributeCanUseHostPointerForRegisteredMem,///< Cuda only. Device can access host registered memory
|
||||
hipDeviceAttributeCanUseHostPointerForRegisteredMem,///< Device can access host registered memory
|
||||
///< at the same virtual address as the CPU
|
||||
hipDeviceAttributeClockRate, ///< Peak clock frequency in kilohertz.
|
||||
hipDeviceAttributeComputeMode, ///< Compute mode that device is currently in.
|
||||
hipDeviceAttributeComputePreemptionSupported, ///< Cuda only. Device supports Compute Preemption.
|
||||
hipDeviceAttributeComputePreemptionSupported, ///< Device supports Compute Preemption.
|
||||
hipDeviceAttributeConcurrentKernels, ///< Device can possibly execute multiple kernels concurrently.
|
||||
hipDeviceAttributeConcurrentManagedAccess, ///< Device can coherently access managed memory concurrently with the CPU
|
||||
hipDeviceAttributeCooperativeLaunch, ///< Support cooperative launch
|
||||
hipDeviceAttributeCooperativeMultiDeviceLaunch, ///< Support cooperative launch on multiple devices
|
||||
hipDeviceAttributeDeviceOverlap, ///< Cuda only. Device can concurrently copy memory and execute a kernel.
|
||||
hipDeviceAttributeDeviceOverlap, ///< Device can concurrently copy memory and execute a kernel.
|
||||
///< Deprecated. Use instead asyncEngineCount.
|
||||
hipDeviceAttributeDirectManagedMemAccessFromHost, ///< Host can directly access managed memory on
|
||||
///< the device without migration
|
||||
hipDeviceAttributeGlobalL1CacheSupported, ///< Cuda only. Device supports caching globals in L1
|
||||
hipDeviceAttributeHostNativeAtomicSupported, ///< Cuda only. Link between the device and the host supports native atomic operations
|
||||
hipDeviceAttributeGlobalL1CacheSupported, ///< Device supports caching globals in L1
|
||||
hipDeviceAttributeHostNativeAtomicSupported, ///< Link between the device and the host supports native atomic operations
|
||||
hipDeviceAttributeIntegrated, ///< Device is integrated GPU
|
||||
hipDeviceAttributeIsMultiGpuBoard, ///< Multiple GPU devices.
|
||||
hipDeviceAttributeKernelExecTimeout, ///< Run time limit for kernels executed on the device
|
||||
hipDeviceAttributeL2CacheSize, ///< Size of L2 cache in bytes. 0 if the device doesn't have L2 cache.
|
||||
hipDeviceAttributeLocalL1CacheSupported, ///< caching locals in L1 is supported
|
||||
hipDeviceAttributeLuid, ///< Cuda only. 8-byte locally unique identifier in 8 bytes. Undefined on TCC and non-Windows platforms
|
||||
hipDeviceAttributeLuidDeviceNodeMask, ///< Cuda only. Luid device node mask. Undefined on TCC and non-Windows platforms
|
||||
hipDeviceAttributeLuid, ///< 8-byte locally unique identifier in 8 bytes. Undefined on TCC and non-Windows platforms
|
||||
hipDeviceAttributeLuidDeviceNodeMask, ///< Luid device node mask. Undefined on TCC and non-Windows platforms
|
||||
hipDeviceAttributeComputeCapabilityMajor, ///< Major compute capability version number.
|
||||
hipDeviceAttributeManagedMemory, ///< Device supports allocating managed memory on this system
|
||||
hipDeviceAttributeMaxBlocksPerMultiProcessor, ///< Cuda only. Max block size per multiprocessor
|
||||
hipDeviceAttributeMaxBlocksPerMultiProcessor, ///< Max block size per multiprocessor
|
||||
hipDeviceAttributeMaxBlockDimX, ///< Max block size in width.
|
||||
hipDeviceAttributeMaxBlockDimY, ///< Max block size in height.
|
||||
hipDeviceAttributeMaxBlockDimZ, ///< Max block size in depth.
|
||||
@ -211,22 +216,22 @@ typedef enum hipDeviceAttribute_t {
|
||||
hipDeviceAttributeMaxSurfaceCubemap, ///< Cuda only. Maximum dimensions of Cubemap surface.
|
||||
hipDeviceAttributeMaxSurfaceCubemapLayered, ///< Cuda only. Maximum dimension of Cubemap layered surface.
|
||||
hipDeviceAttributeMaxTexture1DWidth, ///< Maximum size of 1D texture.
|
||||
hipDeviceAttributeMaxTexture1DLayered, ///< Cuda only. Maximum dimensions of 1D layered texture.
|
||||
hipDeviceAttributeMaxTexture1DLayered, ///< Maximum dimensions of 1D layered texture.
|
||||
hipDeviceAttributeMaxTexture1DLinear, ///< Maximum number of elements allocatable in a 1D linear texture.
|
||||
///< Use cudaDeviceGetTexture1DLinearMaxWidth() instead on Cuda.
|
||||
hipDeviceAttributeMaxTexture1DMipmap, ///< Cuda only. Maximum size of 1D mipmapped texture.
|
||||
hipDeviceAttributeMaxTexture1DMipmap, ///< Maximum size of 1D mipmapped texture.
|
||||
hipDeviceAttributeMaxTexture2DWidth, ///< Maximum dimension width of 2D texture.
|
||||
hipDeviceAttributeMaxTexture2DHeight, ///< Maximum dimension height of 2D texture.
|
||||
hipDeviceAttributeMaxTexture2DGather, ///< Cuda only. Maximum dimensions of 2D texture if gather operations performed.
|
||||
hipDeviceAttributeMaxTexture2DLayered, ///< Cuda only. Maximum dimensions of 2D layered texture.
|
||||
hipDeviceAttributeMaxTexture2DLinear, ///< Cuda only. Maximum dimensions (width, height, pitch) of 2D textures bound to pitched memory.
|
||||
hipDeviceAttributeMaxTexture2DMipmap, ///< Cuda only. Maximum dimensions of 2D mipmapped texture.
|
||||
hipDeviceAttributeMaxTexture2DHeight, ///< Maximum dimension hight of 2D texture.
|
||||
hipDeviceAttributeMaxTexture2DGather, ///< Maximum dimensions of 2D texture if gather operations performed.
|
||||
hipDeviceAttributeMaxTexture2DLayered, ///< Maximum dimensions of 2D layered texture.
|
||||
hipDeviceAttributeMaxTexture2DLinear, ///< Maximum dimensions (width, height, pitch) of 2D textures bound to pitched memory.
|
||||
hipDeviceAttributeMaxTexture2DMipmap, ///< Maximum dimensions of 2D mipmapped texture.
|
||||
hipDeviceAttributeMaxTexture3DWidth, ///< Maximum dimension width of 3D texture.
|
||||
hipDeviceAttributeMaxTexture3DHeight, ///< Maximum dimension height of 3D texture.
|
||||
hipDeviceAttributeMaxTexture3DDepth, ///< Maximum dimension depth of 3D texture.
|
||||
hipDeviceAttributeMaxTexture3DAlt, ///< Cuda only. Maximum dimensions of alternate 3D texture.
|
||||
hipDeviceAttributeMaxTextureCubemap, ///< Cuda only. Maximum dimensions of Cubemap texture
|
||||
hipDeviceAttributeMaxTextureCubemapLayered, ///< Cuda only. Maximum dimensions of Cubemap layered texture.
|
||||
hipDeviceAttributeMaxTexture3DAlt, ///< Maximum dimensions of alternate 3D texture.
|
||||
hipDeviceAttributeMaxTextureCubemap, ///< Maximum dimensions of Cubemap texture
|
||||
hipDeviceAttributeMaxTextureCubemapLayered, ///< Maximum dimensions of Cubemap layered texture.
|
||||
hipDeviceAttributeMaxThreadsDim, ///< Maximum dimension of a block
|
||||
hipDeviceAttributeMaxThreadsPerBlock, ///< Maximum number of threads per block.
|
||||
hipDeviceAttributeMaxThreadsPerMultiProcessor, ///< Maximum resident threads per multiprocessor.
|
||||
@ -234,45 +239,47 @@ typedef enum hipDeviceAttribute_t {
|
||||
hipDeviceAttributeMemoryBusWidth, ///< Global memory bus width in bits.
|
||||
hipDeviceAttributeMemoryClockRate, ///< Peak memory clock frequency in kilohertz.
|
||||
hipDeviceAttributeComputeCapabilityMinor, ///< Minor compute capability version number.
|
||||
hipDeviceAttributeMultiGpuBoardGroupID, ///< Cuda only. Unique ID of device group on the same multi-GPU board
|
||||
hipDeviceAttributeMultiGpuBoardGroupID, ///< Unique ID of device group on the same multi-GPU board
|
||||
hipDeviceAttributeMultiprocessorCount, ///< Number of multiprocessors on the device.
|
||||
hipDeviceAttributeName, ///< Device name.
|
||||
hipDeviceAttributeUnused1, ///< Previously hipDeviceAttributeName
|
||||
hipDeviceAttributePageableMemoryAccess, ///< Device supports coherently accessing pageable memory
|
||||
///< without calling hipHostRegister on it
|
||||
hipDeviceAttributePageableMemoryAccessUsesHostPageTables, ///< Device accesses pageable memory via the host's page tables
|
||||
hipDeviceAttributePciBusId, ///< PCI Bus ID.
|
||||
hipDeviceAttributePciDeviceId, ///< PCI Device ID.
|
||||
hipDeviceAttributePciDomainID, ///< PCI Domain ID.
|
||||
hipDeviceAttributePersistingL2CacheMaxSize, ///< Cuda11 only. Maximum l2 persisting lines capacity in bytes
|
||||
hipDeviceAttributePersistingL2CacheMaxSize, ///< Maximum l2 persisting lines capacity in bytes
|
||||
hipDeviceAttributeMaxRegistersPerBlock, ///< 32-bit registers available to a thread block. This number is shared
|
||||
///< by all thread blocks simultaneously resident on a multiprocessor.
|
||||
hipDeviceAttributeMaxRegistersPerMultiprocessor, ///< 32-bit registers available per block.
|
||||
hipDeviceAttributeReservedSharedMemPerBlock, ///< Cuda11 only. Shared memory reserved by CUDA driver per block.
|
||||
hipDeviceAttributeReservedSharedMemPerBlock, ///< Shared memory reserved by CUDA driver per block.
|
||||
hipDeviceAttributeMaxSharedMemoryPerBlock, ///< Maximum shared memory available per block in bytes.
|
||||
hipDeviceAttributeSharedMemPerBlockOptin, ///< Cuda only. Maximum shared memory per block usable by special opt in.
|
||||
hipDeviceAttributeSharedMemPerMultiprocessor, ///< Cuda only. Shared memory available per multiprocessor.
|
||||
hipDeviceAttributeSharedMemPerBlockOptin, ///< Maximum shared memory per block usable by special opt in.
|
||||
hipDeviceAttributeSharedMemPerMultiprocessor, ///< Shared memory available per multiprocessor.
|
||||
hipDeviceAttributeSingleToDoublePrecisionPerfRatio, ///< Cuda only. Performance ratio of single precision to double precision.
|
||||
hipDeviceAttributeStreamPrioritiesSupported, ///< Cuda only. Whether to support stream priorities.
|
||||
hipDeviceAttributeSurfaceAlignment, ///< Cuda only. Alignment requirement for surfaces
|
||||
hipDeviceAttributeStreamPrioritiesSupported, ///< Whether to support stream priorities.
|
||||
hipDeviceAttributeSurfaceAlignment, ///< Alignment requirement for surfaces
|
||||
hipDeviceAttributeTccDriver, ///< Cuda only. Whether device is a Tesla device using TCC driver
|
||||
hipDeviceAttributeTextureAlignment, ///< Alignment requirement for textures
|
||||
hipDeviceAttributeTexturePitchAlignment, ///< Pitch alignment requirement for 2D texture references bound to pitched memory;
|
||||
hipDeviceAttributeTotalConstantMemory, ///< Constant memory size in bytes.
|
||||
hipDeviceAttributeTotalGlobalMem, ///< Global memory available on device.
|
||||
hipDeviceAttributeTotalGlobalMem, ///< Global memory available on devicice.
|
||||
hipDeviceAttributeUnifiedAddressing, ///< Cuda only. An unified address space shared with the host.
|
||||
hipDeviceAttributeUuid, ///< Cuda only. Unique ID in 16 byte.
|
||||
hipDeviceAttributeUnused2, ///< Previously hipDeviceAttributeUuid
|
||||
hipDeviceAttributeWarpSize, ///< Warp size in threads.
|
||||
hipDeviceAttributeMemoryPoolsSupported, ///< Device supports HIP Stream Ordered Memory Allocator
|
||||
hipDeviceAttributeVirtualMemoryManagementSupported, ///< Device supports HIP virtual memory management
|
||||
hipDeviceAttributeHostRegisterSupported, ///< Can device support host memory registration via hipHostRegister
|
||||
hipDeviceAttributeMemoryPoolSupportedHandleTypes, ///< Supported handle mask for HIP Stream Ordered Memory Allocator
|
||||
|
||||
hipDeviceAttributeCudaCompatibleEnd = 9999,
|
||||
hipDeviceAttributeAmdSpecificBegin = 10000,
|
||||
|
||||
hipDeviceAttributeClockInstructionRate = hipDeviceAttributeAmdSpecificBegin, ///< Frequency in khz of the timer used by the device-side "clock*"
|
||||
hipDeviceAttributeArch, ///< Device architecture
|
||||
hipDeviceAttributeUnused3, ///< Previously hipDeviceAttributeArch
|
||||
hipDeviceAttributeMaxSharedMemoryPerMultiprocessor, ///< Maximum Shared Memory PerMultiprocessor.
|
||||
hipDeviceAttributeGcnArch, ///< Device gcn architecture
|
||||
hipDeviceAttributeGcnArchName, ///< Device gcnArch name in 256 bytes
|
||||
hipDeviceAttributeUnused4, ///< Previously hipDeviceAttributeGcnArch
|
||||
hipDeviceAttributeUnused5, ///< Previously hipDeviceAttributeGcnArchName
|
||||
hipDeviceAttributeHdpMemFlushCntl, ///< Address of the HDP_MEM_COHERENCY_FLUSH_CNTL register
|
||||
hipDeviceAttributeHdpRegFlushCntl, ///< Address of the HDP_REG_COHERENCY_FLUSH_CNTL register
|
||||
hipDeviceAttributeCooperativeMultiDeviceUnmatchedFunc, ///< Supports cooperative launch on multiple
|
||||
@ -298,6 +305,190 @@ typedef enum hipDeviceAttribute_t {
|
||||
// Extended attributes for vendors
|
||||
} hipDeviceAttribute_t;
|
||||
|
||||
/**
|
||||
* hipDeviceArch_t
|
||||
*
|
||||
*/
|
||||
typedef struct {
|
||||
// 32-bit Atomics
|
||||
unsigned hasGlobalInt32Atomics : 1; ///< 32-bit integer atomics for global memory.
|
||||
unsigned hasGlobalFloatAtomicExch : 1; ///< 32-bit float atomic exch for global memory.
|
||||
unsigned hasSharedInt32Atomics : 1; ///< 32-bit integer atomics for shared memory.
|
||||
unsigned hasSharedFloatAtomicExch : 1; ///< 32-bit float atomic exch for shared memory.
|
||||
unsigned hasFloatAtomicAdd : 1; ///< 32-bit float atomic add in global and shared memory.
|
||||
|
||||
// 64-bit Atomics
|
||||
unsigned hasGlobalInt64Atomics : 1; ///< 64-bit integer atomics for global memory.
|
||||
unsigned hasSharedInt64Atomics : 1; ///< 64-bit integer atomics for shared memory.
|
||||
|
||||
// Doubles
|
||||
unsigned hasDoubles : 1; ///< Double-precision floating point.
|
||||
|
||||
// Warp cross-lane operations
|
||||
unsigned hasWarpVote : 1; ///< Warp vote instructions (__any, __all).
|
||||
unsigned hasWarpBallot : 1; ///< Warp ballot instructions (__ballot).
|
||||
unsigned hasWarpShuffle : 1; ///< Warp shuffle operations. (__shfl_*).
|
||||
unsigned hasFunnelShift : 1; ///< Funnel two words into one with shift&mask caps.
|
||||
|
||||
// Sync
|
||||
unsigned hasThreadFenceSystem : 1; ///< __threadfence_system.
|
||||
unsigned hasSyncThreadsExt : 1; ///< __syncthreads_count, syncthreads_and, syncthreads_or.
|
||||
|
||||
// Misc
|
||||
unsigned hasSurfaceFuncs : 1; ///< Surface functions.
|
||||
unsigned has3dGrid : 1; ///< Grid and group dims are 3D (rather than 2D).
|
||||
unsigned hasDynamicParallelism : 1; ///< Dynamic parallelism.
|
||||
} hipDeviceArch_t;
|
||||
|
||||
typedef struct hipUUID_t {
|
||||
char bytes[16];
|
||||
} hipUUID;
|
||||
|
||||
|
||||
/**
|
||||
* hipDeviceProp
|
||||
*
|
||||
*/
|
||||
typedef struct hipDeviceProp_t {
|
||||
char name[256]; ///< Device name.
|
||||
hipUUID uuid; ///< UUID of a device
|
||||
char luid[8]; ///< 8-byte unique identifier. Only valid on windows
|
||||
unsigned int luidDeviceNodeMask; ///< LUID node mask
|
||||
size_t totalGlobalMem; ///< Size of global memory region (in bytes).
|
||||
size_t sharedMemPerBlock; ///< Size of shared memory per block (in bytes).
|
||||
int regsPerBlock; ///< Registers per block.
|
||||
int warpSize; ///< Warp size.
|
||||
size_t memPitch; ///< Maximum pitch in bytes allowed by memory copies
|
||||
///< pitched memory
|
||||
int maxThreadsPerBlock; ///< Max work items per work group or workgroup max size.
|
||||
int maxThreadsDim[3]; ///< Max number of threads in each dimension (XYZ) of a block.
|
||||
int maxGridSize[3]; ///< Max grid dimensions (XYZ).
|
||||
int clockRate; ///< Max clock frequency of the multiProcessors in khz.
|
||||
size_t totalConstMem; ///< Size of shared constant memory region on the device
|
||||
///< (in bytes).
|
||||
int major; ///< Major compute capability. On HCC, this is an approximation and features may
|
||||
///< differ from CUDA CC. See the arch feature flags for portable ways to query
|
||||
///< feature caps.
|
||||
int minor; ///< Minor compute capability. On HCC, this is an approximation and features may
|
||||
///< differ from CUDA CC. See the arch feature flags for portable ways to query
|
||||
///< feature caps.
|
||||
size_t textureAlignment; ///< Alignment requirement for textures
|
||||
size_t texturePitchAlignment; ///< Pitch alignment requirement for texture references bound to
|
||||
int deviceOverlap; ///< Deprecated. Use asyncEngineCount instead
|
||||
int multiProcessorCount; ///< Number of multi-processors (compute units).
|
||||
int kernelExecTimeoutEnabled; ///< Run time limit for kernels executed on the device
|
||||
int integrated; ///< APU vs dGPU
|
||||
int canMapHostMemory; ///< Check whether HIP can map host memory
|
||||
int computeMode; ///< Compute mode.
|
||||
int maxTexture1D; ///< Maximum number of elements in 1D images
|
||||
int maxTexture1DMipmap; ///< Maximum 1D mipmap texture size
|
||||
int maxTexture1DLinear; ///< Maximum size for 1D textures bound to linear memory
|
||||
int maxTexture2D[2]; ///< Maximum dimensions (width, height) of 2D images, in image elements
|
||||
int maxTexture2DMipmap[2]; ///< Maximum number of elements in 2D array mipmap of images
|
||||
int maxTexture2DLinear[3]; ///< Maximum 2D tex dimensions if tex are bound to pitched memory
|
||||
int maxTexture2DGather[2]; ///< Maximum 2D tex dimensions if gather has to be performed
|
||||
int maxTexture3D[3]; ///< Maximum dimensions (width, height, depth) of 3D images, in image
|
||||
///< elements
|
||||
int maxTexture3DAlt[3]; ///< Maximum alternate 3D texture dims
|
||||
int maxTextureCubemap; ///< Maximum cubemap texture dims
|
||||
int maxTexture1DLayered[2]; ///< Maximum number of elements in 1D array images
|
||||
int maxTexture2DLayered[3]; ///< Maximum number of elements in 2D array images
|
||||
int maxTextureCubemapLayered[2]; ///< Maximum cubemaps layered texture dims
|
||||
int maxSurface1D; ///< Maximum 1D surface size
|
||||
int maxSurface2D[2]; ///< Maximum 2D surface size
|
||||
int maxSurface3D[3]; ///< Maximum 3D surface size
|
||||
int maxSurface1DLayered[2]; ///< Maximum 1D layered surface size
|
||||
int maxSurface2DLayered[3]; ///< Maximum 2D layared surface size
|
||||
int maxSurfaceCubemap; ///< Maximum cubemap surface size
|
||||
int maxSurfaceCubemapLayered[2]; ///< Maximum cubemap layered surface size
|
||||
size_t surfaceAlignment; ///< Alignment requirement for surface
|
||||
int concurrentKernels; ///< Device can possibly execute multiple kernels concurrently.
|
||||
int ECCEnabled; ///< Device has ECC support enabled
|
||||
int pciBusID; ///< PCI Bus ID.
|
||||
int pciDeviceID; ///< PCI Device ID.
|
||||
int pciDomainID; ///< PCI Domain ID
|
||||
int tccDriver; ///< 1:If device is Tesla device using TCC driver, else 0
|
||||
int asyncEngineCount; ///< Number of async engines
|
||||
int unifiedAddressing; ///< Does device and host share unified address space
|
||||
int memoryClockRate; ///< Max global memory clock frequency in khz.
|
||||
int memoryBusWidth; ///< Global memory bus width in bits.
|
||||
int l2CacheSize; ///< L2 cache size.
|
||||
int persistingL2CacheMaxSize; ///< Device's max L2 persisting lines in bytes
|
||||
int maxThreadsPerMultiProcessor; ///< Maximum resident threads per multi-processor.
|
||||
int streamPrioritiesSupported; ///< Device supports stream priority
|
||||
int globalL1CacheSupported; ///< Indicates globals are cached in L1
|
||||
int localL1CacheSupported; ///< Locals are cahced in L1
|
||||
size_t sharedMemPerMultiprocessor; ///< Amount of shared memory available per multiprocessor.
|
||||
int regsPerMultiprocessor; ///< registers available per multiprocessor
|
||||
int managedMemory; ///< Device supports allocating managed memory on this system
|
||||
int isMultiGpuBoard; ///< 1 if device is on a multi-GPU board, 0 if not.
|
||||
int multiGpuBoardGroupID; ///< Unique identifier for a group of devices on same multiboard GPU
|
||||
int hostNativeAtomicSupported; ///< Link between host and device supports native atomics
|
||||
int singleToDoublePrecisionPerfRatio; ///< Deprecated. CUDA only.
|
||||
int pageableMemoryAccess; ///< Device supports coherently accessing pageable memory
|
||||
///< without calling hipHostRegister on it
|
||||
int concurrentManagedAccess; ///< Device can coherently access managed memory concurrently with
|
||||
///< the CPU
|
||||
int computePreemptionSupported; ///< Is compute preemption supported on the device
|
||||
int canUseHostPointerForRegisteredMem; ///< Device can access host registered memory with same
|
||||
///< address as the host
|
||||
int cooperativeLaunch; ///< HIP device supports cooperative launch
|
||||
int cooperativeMultiDeviceLaunch; ///< HIP device supports cooperative launch on multiple
|
||||
///< devices
|
||||
size_t
|
||||
sharedMemPerBlockOptin; ///< Per device m ax shared mem per block usable by special opt in
|
||||
int pageableMemoryAccessUsesHostPageTables; ///< Device accesses pageable memory via the host's
|
||||
///< page tables
|
||||
int directManagedMemAccessFromHost; ///< Host can directly access managed memory on the device
|
||||
///< without migration
|
||||
int maxBlocksPerMultiProcessor; ///< Max number of blocks on CU
|
||||
int accessPolicyMaxWindowSize; ///< Max value of access policy window
|
||||
size_t reservedSharedMemPerBlock; ///< Shared memory reserved by driver per block
|
||||
int hostRegisterSupported; ///< Device supports hipHostRegister
|
||||
int sparseHipArraySupported; ///< Indicates if device supports sparse hip arrays
|
||||
int hostRegisterReadOnlySupported; ///< Device supports using the hipHostRegisterReadOnly flag
|
||||
///< with hipHostRegistger
|
||||
int timelineSemaphoreInteropSupported; ///< Indicates external timeline semaphore support
|
||||
int memoryPoolsSupported; ///< Indicates if device supports hipMallocAsync and hipMemPool APIs
|
||||
int gpuDirectRDMASupported; ///< Indicates device support of RDMA APIs
|
||||
unsigned int gpuDirectRDMAFlushWritesOptions; ///< Bitmask to be interpreted according to
|
||||
///< hipFlushGPUDirectRDMAWritesOptions
|
||||
int gpuDirectRDMAWritesOrdering; ///< value of hipGPUDirectRDMAWritesOrdering
|
||||
unsigned int
|
||||
memoryPoolSupportedHandleTypes; ///< Bitmask of handle types support with mempool based IPC
|
||||
int deferredMappingHipArraySupported; ///< Device supports deferred mapping HIP arrays and HIP
|
||||
///< mipmapped arrays
|
||||
int ipcEventSupported; ///< Device supports IPC events
|
||||
int clusterLaunch; ///< Device supports cluster launch
|
||||
int unifiedFunctionPointers; ///< Indicates device supports unified function pointers
|
||||
int reserved[63]; ///< CUDA Reserved.
|
||||
|
||||
int hipReserved[32]; ///< Reserved for adding new entries for HIP/CUDA.
|
||||
|
||||
/* HIP Only struct members */
|
||||
char gcnArchName[256]; ///< AMD GCN Arch Name. HIP Only.
|
||||
size_t maxSharedMemoryPerMultiProcessor; ///< Maximum Shared Memory Per CU. HIP Only.
|
||||
int clockInstructionRate; ///< Frequency in khz of the timer used by the device-side "clock*"
|
||||
///< instructions. New for HIP.
|
||||
hipDeviceArch_t arch; ///< Architectural feature flags. New for HIP.
|
||||
unsigned int* hdpMemFlushCntl; ///< Addres of HDP_MEM_COHERENCY_FLUSH_CNTL register
|
||||
unsigned int* hdpRegFlushCntl; ///< Addres of HDP_REG_COHERENCY_FLUSH_CNTL register
|
||||
int cooperativeMultiDeviceUnmatchedFunc; ///< HIP device supports cooperative launch on
|
||||
///< multiple
|
||||
/// devices with unmatched functions
|
||||
int cooperativeMultiDeviceUnmatchedGridDim; ///< HIP device supports cooperative launch on
|
||||
///< multiple
|
||||
/// devices with unmatched grid dimensions
|
||||
int cooperativeMultiDeviceUnmatchedBlockDim; ///< HIP device supports cooperative launch on
|
||||
///< multiple
|
||||
/// devices with unmatched block dimensions
|
||||
int cooperativeMultiDeviceUnmatchedSharedMem; ///< HIP device supports cooperative launch on
|
||||
///< multiple
|
||||
/// devices with unmatched shared memories
|
||||
int isLargeBar; ///< 1: if it is a large PCI bar device, else 0
|
||||
int asicRevision; ///< Revision of the GPU in this device
|
||||
} hipDeviceProp_t;
|
||||
|
||||
//Flags that can be used with hipStreamCreateWithFlags.
|
||||
/** Default stream creation flags. These are used with hipStreamCreate().*/
|
||||
#define hipStreamDefault 0x00
|
||||
@ -410,6 +601,7 @@ typedef hipError_t (HIP_API_CALL *HIP_HIPRUNTIMEGETVERSION) (int *);
|
||||
typedef hipError_t (HIP_API_CALL *HIP_HIPSTREAMCREATE) (hipStream_t *, unsigned int);
|
||||
typedef hipError_t (HIP_API_CALL *HIP_HIPSTREAMDESTROY) (hipStream_t);
|
||||
typedef hipError_t (HIP_API_CALL *HIP_HIPSTREAMSYNCHRONIZE) (hipStream_t);
|
||||
typedef hipError_t (HIP_API_CALL *HIP_HIPGETDEVICEPROPERTIES) (hipDeviceProp_t *, hipDevice_t);
|
||||
|
||||
typedef struct hc_hip_lib
|
||||
{
|
||||
@ -453,6 +645,7 @@ typedef struct hc_hip_lib
|
||||
HIP_HIPSTREAMCREATE hipStreamCreate;
|
||||
HIP_HIPSTREAMDESTROY hipStreamDestroy;
|
||||
HIP_HIPSTREAMSYNCHRONIZE hipStreamSynchronize;
|
||||
HIP_HIPGETDEVICEPROPERTIES hipGetDeviceProperties;
|
||||
|
||||
} hc_hip_lib_t;
|
||||
|
||||
@ -498,5 +691,6 @@ int hc_hipRuntimeGetVersion (void *hashcat_ctx, int *runtimeVersion);
|
||||
int hc_hipStreamCreate (void *hashcat_ctx, hipStream_t *phStream, unsigned int Flags);
|
||||
int hc_hipStreamDestroy (void *hashcat_ctx, hipStream_t hStream);
|
||||
int hc_hipStreamSynchronize (void *hashcat_ctx, hipStream_t hStream);
|
||||
int hc_hipGetDeviceProperties (void *hashcat_ctx, hipDeviceProp_t *prop, hipDevice_t dev);
|
||||
|
||||
#endif // HC_EXT_HIP_H
|
||||
|
@ -114,4 +114,7 @@ bool is_apple_silicon (void);
|
||||
|
||||
char *file_to_buffer (const char *filename);
|
||||
|
||||
bool check_file_suffix (const char *file, const char *suffix);
|
||||
bool remove_file_suffix (char *file, const char *suffix);
|
||||
|
||||
#endif // HC_SHARED_H
|
||||
|
@ -1235,6 +1235,7 @@ typedef struct hc_device_param
|
||||
|
||||
int sm_major;
|
||||
int sm_minor;
|
||||
char *gcnArchName;
|
||||
u32 kernel_exec_timeout;
|
||||
|
||||
u32 kernel_preferred_wgs_multiple;
|
||||
|
106
src/backend.c
106
src/backend.c
@ -408,7 +408,7 @@ static bool cuda_test_instruction (hashcat_ctx_t *hashcat_ctx, const int sm_majo
|
||||
nvrtc_options[0] = "--restrict";
|
||||
nvrtc_options[1] = "--gpu-architecture";
|
||||
|
||||
hc_asprintf (&nvrtc_options[2], "compute_%d%d", sm_major, sm_minor);
|
||||
hc_asprintf (&nvrtc_options[2], "compute_%d", (device_param->sm_major * 10) + device_param->sm_minor);
|
||||
|
||||
nvrtc_options[3] = NULL;
|
||||
|
||||
@ -5804,6 +5804,19 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
|
||||
|
||||
device_param->hip_warp_size = hip_warp_size;
|
||||
|
||||
// gcnArchName
|
||||
|
||||
hipDeviceProp_t prop;
|
||||
|
||||
if (hc_hipGetDeviceProperties (hashcat_ctx, &prop, hip_device) == -1)
|
||||
{
|
||||
device_param->skipped = true;
|
||||
|
||||
continue;
|
||||
}
|
||||
|
||||
device_param->gcnArchName = strdup (prop.gcnArchName);
|
||||
|
||||
// sm_minor, sm_major
|
||||
|
||||
int sm_major = 0;
|
||||
@ -8962,61 +8975,47 @@ static bool load_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_p
|
||||
|
||||
if (hc_nvrtcCreateProgram (hashcat_ctx, &program, kernel_sources[0], kernel_name, 0, NULL, NULL) == -1) return false;
|
||||
|
||||
char **nvrtc_options = (char **) hccalloc (12 + strlen (build_options_buf) + 1, sizeof (char *)); // ...
|
||||
char **nvrtc_options = (char **) hccalloc (16 + strlen (build_options_buf) + 1, sizeof (char *)); // ...
|
||||
|
||||
int nvrtc_options_idx = 0;
|
||||
|
||||
if (backend_ctx->nvrtc_driver_version >= 12000)
|
||||
{
|
||||
nvrtc_options[0] = "--std=c++14";
|
||||
}
|
||||
else
|
||||
{
|
||||
// some placeholder
|
||||
nvrtc_options[0] = "--restrict";
|
||||
nvrtc_options[nvrtc_options_idx++] = "--std=c++14";
|
||||
}
|
||||
|
||||
nvrtc_options[1] = "--restrict";
|
||||
nvrtc_options[2] = "--device-as-default-execution-space";
|
||||
nvrtc_options[3] = "--gpu-architecture";
|
||||
nvrtc_options[nvrtc_options_idx++] = "--restrict";
|
||||
nvrtc_options[nvrtc_options_idx++] = "--device-as-default-execution-space";
|
||||
nvrtc_options[nvrtc_options_idx++] = "--gpu-architecture";
|
||||
|
||||
hc_asprintf (&nvrtc_options[4], "compute_%d%d", device_param->sm_major, device_param->sm_minor);
|
||||
hc_asprintf (&nvrtc_options[nvrtc_options_idx++], "compute_%d", (device_param->sm_major * 10) + device_param->sm_minor);
|
||||
|
||||
if (backend_ctx->nvrtc_driver_version >= 12010)
|
||||
{
|
||||
nvrtc_options[5] = "--split-compile";
|
||||
nvrtc_options[nvrtc_options_idx++] = "--split-compile";
|
||||
|
||||
hc_asprintf (&nvrtc_options[6], "%d", 0);
|
||||
}
|
||||
else
|
||||
{
|
||||
// some placeholder
|
||||
nvrtc_options[5] = "--restrict";
|
||||
nvrtc_options[6] = "--restrict";
|
||||
hc_asprintf (&nvrtc_options[nvrtc_options_idx++], "%d", 0);
|
||||
}
|
||||
|
||||
if (backend_ctx->nvrtc_driver_version >= 12040)
|
||||
{
|
||||
nvrtc_options[7] = "--minimal";
|
||||
}
|
||||
else
|
||||
{
|
||||
// some placeholder
|
||||
nvrtc_options[7] = "--restrict";
|
||||
nvrtc_options[nvrtc_options_idx++] = "--minimal";
|
||||
}
|
||||
|
||||
// untested on windows, but it should work
|
||||
#if defined (_WIN) || defined (__CYGWIN__) || defined (__MSYS__)
|
||||
hc_asprintf (&nvrtc_options[8], "-D INCLUDE_PATH=%s", "OpenCL");
|
||||
hc_asprintf (&nvrtc_options[nvrtc_options_idx++], "-D INCLUDE_PATH=%s", "OpenCL");
|
||||
#else
|
||||
hc_asprintf (&nvrtc_options[8], "-D INCLUDE_PATH=%s", folder_config->cpath_real);
|
||||
hc_asprintf (&nvrtc_options[nvrtc_options_idx++], "-D INCLUDE_PATH=%s", folder_config->cpath_real);
|
||||
#endif
|
||||
|
||||
hc_asprintf (&nvrtc_options[9], "-D XM2S(x)=#x");
|
||||
hc_asprintf (&nvrtc_options[10], "-D M2S(x)=XM2S(x)");
|
||||
hc_asprintf (&nvrtc_options[11], "-D MAX_THREADS_PER_BLOCK=%d", (user_options->kernel_threads_chgd == true) ? user_options->kernel_threads : device_param->kernel_threads_max);
|
||||
hc_asprintf (&nvrtc_options[nvrtc_options_idx++], "-D XM2S(x)=#x");
|
||||
hc_asprintf (&nvrtc_options[nvrtc_options_idx++], "-D M2S(x)=XM2S(x)");
|
||||
hc_asprintf (&nvrtc_options[nvrtc_options_idx++], "-D MAX_THREADS_PER_BLOCK=%d", (user_options->kernel_threads_chgd == true) ? user_options->kernel_threads : device_param->kernel_threads_max);
|
||||
|
||||
char *nvrtc_options_string = hcstrdup (build_options_buf);
|
||||
|
||||
const int num_options = 12 + nvrtc_make_options_array_from_string (nvrtc_options_string, nvrtc_options + 12);
|
||||
const int num_options = nvrtc_options_idx + nvrtc_make_options_array_from_string (nvrtc_options_string, nvrtc_options + nvrtc_options_idx);
|
||||
|
||||
const int rc_nvrtcCompileProgram = hc_nvrtcCompileProgram (hashcat_ctx, program, num_options, (const char * const *) nvrtc_options);
|
||||
|
||||
@ -9238,42 +9237,27 @@ static bool load_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_p
|
||||
|
||||
if (hc_hiprtcCreateProgram (hashcat_ctx, &program, kernel_sources[0], kernel_name, 0, NULL, NULL) == -1) return false;
|
||||
|
||||
char **hiprtc_options = (char **) hccalloc (8 + strlen (build_options_buf) + 1, sizeof (char *)); // ...
|
||||
char **hiprtc_options = (char **) hccalloc (16 + strlen (build_options_buf) + 1, sizeof (char *)); // ...
|
||||
|
||||
//hiprtc_options[0] = "--restrict";
|
||||
//hiprtc_options[1] = "--device-as-default-execution-space";
|
||||
//hiprtc_options[2] = "--gpu-architecture";
|
||||
int hiprtc_options_idx = 0;
|
||||
|
||||
hc_asprintf (&hiprtc_options[0], "-D MAX_THREADS_PER_BLOCK=%d", (user_options->kernel_threads_chgd == true) ? user_options->kernel_threads : device_param->kernel_threads_max);
|
||||
|
||||
/* 4.3 linux
|
||||
hiprtc_options[1] = "-I";
|
||||
hiprtc_options[2] = "/opt/rocm/hip/bin/include";
|
||||
hiprtc_options[3] = "-I";
|
||||
hiprtc_options[4] = "/opt/rocm/include";
|
||||
hiprtc_options[5] = "-I";
|
||||
*/
|
||||
|
||||
hiprtc_options[1] = "";
|
||||
hiprtc_options[2] = "";
|
||||
hiprtc_options[3] = "";
|
||||
hiprtc_options[4] = "";
|
||||
hiprtc_options[5] = "";
|
||||
hc_asprintf (&hiprtc_options[hiprtc_options_idx++], "-D MAX_THREADS_PER_BLOCK=%d", (user_options->kernel_threads_chgd == true) ? user_options->kernel_threads : device_param->kernel_threads_max);
|
||||
hc_asprintf (&hiprtc_options[hiprtc_options_idx++], "--gpu-architecture=%s", device_param->gcnArchName);
|
||||
|
||||
// untested but it should work
|
||||
#if defined (_WIN) || defined (__CYGWIN__) || defined (__MSYS__)
|
||||
hc_asprintf (&hiprtc_options[5], "-D INCLUDE_PATH=%s/OpenCL/", folder_config->cwd);
|
||||
hc_asprintf (&hiprtc_options[hiprtc_options_idx++], "-D INCLUDE_PATH=%s/OpenCL/", folder_config->cwd);
|
||||
// ugly, but required since HIPRTC is changing the current working folder to the temporary compile folder
|
||||
#else
|
||||
hc_asprintf (&hiprtc_options[5], "-D INCLUDE_PATH=%s", folder_config->cpath_real);
|
||||
hc_asprintf (&hiprtc_options[hiprtc_options_idx++], "-D INCLUDE_PATH=%s", folder_config->cpath_real);
|
||||
#endif
|
||||
|
||||
hc_asprintf (&hiprtc_options[6], "-D XM2S(x)=#x");
|
||||
hc_asprintf (&hiprtc_options[7], "-D M2S(x)=XM2S(x)");
|
||||
hc_asprintf (&hiprtc_options[hiprtc_options_idx++], "-D XM2S(x)=#x");
|
||||
hc_asprintf (&hiprtc_options[hiprtc_options_idx++], "-D M2S(x)=XM2S(x)");
|
||||
|
||||
char *hiprtc_options_string = hcstrdup (build_options_buf);
|
||||
|
||||
const int num_options = 8 + hiprtc_make_options_array_from_string (hiprtc_options_string, hiprtc_options + 8);
|
||||
const int num_options = hiprtc_options_idx + hiprtc_make_options_array_from_string (hiprtc_options_string, hiprtc_options + hiprtc_options_idx);
|
||||
|
||||
const int rc_hiprtcCompileProgram = hc_hiprtcCompileProgram (hashcat_ctx, program, num_options, (const char * const *) hiprtc_options);
|
||||
|
||||
@ -10674,11 +10658,14 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
|
||||
|
||||
char device_name_chksum_amp_mp[HCBUFSIZ_TINY] = { 0 };
|
||||
|
||||
const size_t dnclen_amp_mp = snprintf (device_name_chksum_amp_mp, HCBUFSIZ_TINY, "%d-%d-%d-%u-%d-%u-%s-%s-%s-%u-%u",
|
||||
const size_t dnclen_amp_mp = snprintf (device_name_chksum_amp_mp, HCBUFSIZ_TINY, "%d-%d-%d-%u-%u-%u-%s-%d-%u-%s-%s-%s-%u-%u",
|
||||
backend_ctx->comptime,
|
||||
backend_ctx->cuda_driver_version,
|
||||
backend_ctx->hip_runtimeVersion,
|
||||
backend_ctx->metal_runtimeVersion,
|
||||
device_param->sm_major,
|
||||
device_param->sm_minor,
|
||||
(device_param->is_hip == true) ? device_param->gcnArchName : "",
|
||||
device_param->is_opencl,
|
||||
device_param->opencl_platform_vendor_id,
|
||||
device_param->device_name,
|
||||
@ -11237,11 +11224,14 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
|
||||
|
||||
const u32 extra_value = (user_options->attack_mode == ATTACK_MODE_ASSOCIATION) ? ATTACK_MODE_ASSOCIATION : ATTACK_MODE_NONE;
|
||||
|
||||
const size_t dnclen = snprintf (device_name_chksum, HCBUFSIZ_TINY, "%d-%d-%d-%u-%d-%u-%s-%s-%s-%d-%u-%u-%u-%u-%s",
|
||||
const size_t dnclen = snprintf (device_name_chksum, HCBUFSIZ_TINY, "%d-%d-%d-%u-%u-%u-%s-%d-%u-%s-%s-%s-%d-%u-%u-%u-%u-%s",
|
||||
backend_ctx->comptime,
|
||||
backend_ctx->cuda_driver_version,
|
||||
backend_ctx->hip_runtimeVersion,
|
||||
backend_ctx->metal_runtimeVersion,
|
||||
device_param->sm_major,
|
||||
device_param->sm_minor,
|
||||
(device_param->is_hip == true) ? device_param->gcnArchName : "",
|
||||
device_param->is_opencl,
|
||||
device_param->opencl_platform_vendor_id,
|
||||
device_param->device_name,
|
||||
|
@ -154,6 +154,7 @@ int hip_init (void *hashcat_ctx)
|
||||
HC_LOAD_FUNC_HIP (hip, hipStreamCreate, hipStreamCreate, HIP_HIPSTREAMCREATE, HIP, 1);
|
||||
HC_LOAD_FUNC_HIP (hip, hipStreamDestroy, hipStreamDestroy, HIP_HIPSTREAMDESTROY, HIP, 1);
|
||||
HC_LOAD_FUNC_HIP (hip, hipStreamSynchronize, hipStreamSynchronize, HIP_HIPSTREAMSYNCHRONIZE, HIP, 1);
|
||||
HC_LOAD_FUNC_HIP (hip, hipGetDeviceProperties, hipGetDevicePropertiesR0600, HIP_HIPGETDEVICEPROPERTIES, HIP, 1);
|
||||
|
||||
return 0;
|
||||
}
|
||||
@ -1148,3 +1149,30 @@ int hc_hipStreamSynchronize (void *hashcat_ctx, hipStream_t hStream)
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int hc_hipGetDeviceProperties (void *hashcat_ctx, hipDeviceProp_t *prop, hipDevice_t dev)
|
||||
{
|
||||
backend_ctx_t *backend_ctx = ((hashcat_ctx_t *) hashcat_ctx)->backend_ctx;
|
||||
|
||||
HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip;
|
||||
|
||||
const hipError_t HIP_err = hip->hipGetDeviceProperties (prop, dev);
|
||||
|
||||
if (HIP_err != hipSuccess)
|
||||
{
|
||||
const char *pStr = NULL;
|
||||
|
||||
if (hip->hipGetErrorString (HIP_err, &pStr) == hipSuccess)
|
||||
{
|
||||
event_log_error (hashcat_ctx, "hipDeviceGetAttribute(): %s", pStr);
|
||||
}
|
||||
else
|
||||
{
|
||||
event_log_error (hashcat_ctx, "hipDeviceGetAttribute(): %d", HIP_err);
|
||||
}
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
29
src/shared.c
29
src/shared.c
@ -1553,3 +1553,32 @@ int extract_dynamicx_hash (const u8 *input_buf, const int input_len, u8 **output
|
||||
|
||||
return hash_mode;
|
||||
}
|
||||
|
||||
bool check_file_suffix (const char *file, const char *suffix)
|
||||
{
|
||||
if (file == NULL) return false;
|
||||
if (suffix == NULL) return false;
|
||||
|
||||
const size_t len_file = strlen (file);
|
||||
const size_t len_suffix = strlen (suffix);
|
||||
|
||||
if (len_suffix > len_file) return false;
|
||||
|
||||
return strcmp (file + len_file - len_suffix, suffix) == 0;
|
||||
}
|
||||
|
||||
bool remove_file_suffix (char *file, const char *suffix)
|
||||
{
|
||||
if (file == NULL) return false;
|
||||
if (suffix == NULL) return false;
|
||||
|
||||
if (check_file_suffix (file, suffix) == false) return false;
|
||||
|
||||
const size_t len_file = strlen (file);
|
||||
const size_t len_suffix = strlen (suffix);
|
||||
|
||||
file[len_file - len_suffix] = 0;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user