1
0
mirror of https://github.com/hashcat/hashcat.git synced 2025-07-19 13:08:19 +00:00
hashcat/OpenCL/inc_vendor.h
Jens Steube 06344910a4 Refactored HIP kernel code for improved performance and cleanup
- Replaced inline asm in hc_byte_perm() with __builtin_amdgcn_perm()
- Replaced inline asm in hc_bytealign() with __builtin_amdgcn_alignbyte()
- Defined HC_INLINE as default for HIP, significantly boosting kernel performance of pure kernels
- Removed IS_ROCM from inc_vendor.h as it's no longer needed
- Removed backend-specific code from several hash-modes and inc_rp_optimized.cl, as hc_bytealign_S() is now available on all backends
2025-07-10 13:31:00 +02:00

223 lines
4.1 KiB
C++

/**
* Author......: See docs/credits.txt
* License.....: MIT
*/
#ifndef INC_VENDOR_H
#define INC_VENDOR_H
#if defined HC_CPU_OPENCL_EMU_H
#define IS_NATIVE
#elif defined __CUDACC__
#define IS_CUDA
#elif defined __HIPCC__
#define IS_HIP
#elif defined __METAL__ || defined __METAL_MACOS__
#define IS_METAL
#else
#define IS_OPENCL
#endif
#if defined IS_METAL
#include <metal_stdlib>
using namespace metal;
#endif
#if defined IS_NATIVE
#define CONSTANT_VK
#define CONSTANT_AS
#define GLOBAL_AS
#define LOCAL_VK
#define LOCAL_AS
#define PRIVATE_AS
#define KERNEL_FQ
#elif defined IS_CUDA
#define CONSTANT_VK __constant__
#define CONSTANT_AS
#define GLOBAL_AS
#define LOCAL_VK __shared__
#define LOCAL_AS
#define PRIVATE_AS
#define KERNEL_FQ extern "C" __global__
#elif defined IS_HIP
#define CONSTANT_VK __constant__
#define CONSTANT_AS
#define GLOBAL_AS
#define LOCAL_VK __shared__
#define LOCAL_AS
#define PRIVATE_AS
#define KERNEL_FQ extern "C" __global__
#elif defined IS_METAL
#define CONSTANT_VK constant
#define CONSTANT_AS constant
#define GLOBAL_AS device
#define LOCAL_VK threadgroup
#define LOCAL_AS threadgroup
#define PRIVATE_AS thread
#define KERNEL_FQ kernel
#elif defined IS_OPENCL
#define CONSTANT_VK __constant
#define CONSTANT_AS __constant
#define GLOBAL_AS __global
#define LOCAL_VK __local
#define LOCAL_AS __local
#define PRIVATE_AS
#define KERNEL_FQ __kernel
#endif
#if defined FIXED_LOCAL_SIZE
#define KERNEL_FA FIXED_THREAD_COUNT(FIXED_LOCAL_SIZE)
#else
#define KERNEL_FA
#endif
#ifndef MAYBE_UNUSED
#define MAYBE_UNUSED
#endif
/**
* device type
*/
#define DEVICE_TYPE_CPU 2
#define DEVICE_TYPE_GPU 4
#define DEVICE_TYPE_ACCEL 8
#if DEVICE_TYPE == DEVICE_TYPE_CPU
#define IS_CPU
#elif DEVICE_TYPE == DEVICE_TYPE_GPU
#define IS_GPU
#elif DEVICE_TYPE == DEVICE_TYPE_ACCEL
#define IS_ACCEL
#endif
/**
* vendor specific
*/
#if VENDOR_ID == (1 << 0)
#define IS_AMD
#elif VENDOR_ID == (1 << 1)
#define IS_APPLE
#define IS_GENERIC
#elif VENDOR_ID == (1 << 2)
#define IS_INTEL_BEIGNET
#define IS_GENERIC
#elif VENDOR_ID == (1 << 3)
#define IS_INTEL_SDK
#define IS_GENERIC
#elif VENDOR_ID == (1 << 4)
#define IS_MESA
#define IS_GENERIC
#elif VENDOR_ID == (1 << 5)
#define IS_NV
#elif VENDOR_ID == (1 << 6)
#define IS_POCL
#define IS_GENERIC
#elif VENDOR_ID == (1 << 8)
#define IS_AMD_USE_HIP
#else
#define IS_GENERIC
#endif
#define LOCAL_MEM_TYPE_LOCAL 1
#define LOCAL_MEM_TYPE_GLOBAL 2
#if LOCAL_MEM_TYPE == LOCAL_MEM_TYPE_LOCAL
#define REAL_SHM
#endif
// So far, only used by -m 22100 and only affects NVIDIA on OpenCL. CUDA seems to work fine.
#ifdef FORCE_DISABLE_SHM
#undef REAL_SHM
#endif
#ifdef REAL_SHM
#define SHM_TYPE LOCAL_AS
#else
#define SHM_TYPE CONSTANT_AS
#endif
/**
* function declarations can have a large influence depending on the opencl runtime
* fast but pure kernels on rocm is a good example
*/
#ifdef NO_INLINE
#define HC_INLINE
#else
#define HC_INLINE inline static
#endif
#if defined IS_AMD && defined IS_GPU
#define DECLSPEC HC_INLINE
#elif defined IS_CUDA
#define DECLSPEC __device__
#elif defined IS_HIP
#define DECLSPEC __device__ HC_INLINE
#else
#define DECLSPEC
#endif
/**
* AMD specific
*/
#ifdef IS_AMD
#if defined(cl_amd_media_ops)
#pragma OPENCL EXTENSION cl_amd_media_ops : enable
#endif
#if defined(cl_amd_media_ops2)
#pragma OPENCL EXTENSION cl_amd_media_ops2 : enable
#endif
#endif
// Whitelist some OpenCL specific functions
// This could create more stable kernels on systems with bad OpenCL drivers
#ifdef IS_CUDA
#define USE_BITSELECT
#define USE_ROTATE
#endif
#ifdef IS_HIP
#define USE_BITSELECT
#define USE_ROTATE
#endif
#ifdef IS_INTEL_SDK
#ifdef IS_CPU
//#define USE_BITSELECT
//#define USE_ROTATE
#endif
#endif
#ifdef IS_OPENCL
//#define USE_BITSELECT
//#define USE_ROTATE
//#define USE_SWIZZLE
#endif
#ifdef IS_METAL
#define USE_ROTATE
// Metal support max VECT_SIZE = 4
#define s0 x
#define s1 y
#define s2 z
#define s3 w
#endif
#if HAS_SHFW == 1
#define USE_FUNNELSHIFT
#endif
// some algorithms do not like this, eg 150, 1100, ...
#ifdef NO_FUNNELSHIFT
#undef USE_FUNNELSHIFT
#endif
#endif // INC_VENDOR_H