1
0
mirror of https://github.com/hashcat/hashcat.git synced 2025-07-05 06:12:35 +00:00
hashcat/OpenCL/inc_platform.cl
Jens Steube e8052a004b - Replace naive 32 bit rotate with funnelshift on CUDA/HIP
- Replace V_ALIGNBIT_B32 with funnelshift on HIP
- Improve RC4 performance by preventing inlineing
- Fix leftover code in yescrypt-platform.c
- Update docs/hashcat-assimilation-bridge-development.md
- Only initialize hwmon on host for virtualized backends
- Improve SCRYPT tunings on AMD RX6900XT
2025-06-02 11:50:08 +02:00

496 lines
11 KiB
Common Lisp

/**
* Author......: See docs/credits.txt
* License.....: MIT
*/
#include "inc_vendor.h"
#include "inc_types.h"
#include "inc_platform.h"
#ifdef IS_NATIVE
#define FIXED_THREAD_COUNT(n)
#define SYNC_THREADS()
#endif
#ifdef IS_AMD
DECLSPEC u64x rotl64 (const u64x a, const int n)
{
return rotr64 (a, 64 - n);
}
DECLSPEC u64x rotr64 (const u64x a, const int n)
{
#if VECT_SIZE == 1
return rotr64_S (a, n);
#else
return ((a >> n) | ((a << (64 - n))));
#endif
}
DECLSPEC u64 rotl64_S (const u64 a, const int n)
{
return rotr64_S (a, 64 - n);
}
DECLSPEC u64 rotr64_S (const u64 a, const int n)
{
vconv64_t in;
in.v64 = a;
const u32 a0 = in.v32.a;
const u32 a1 = in.v32.b;
vconv64_t out;
if (n < 32)
{
out.v32.a = amd_bitalign (a1, a0, n);
out.v32.b = amd_bitalign (a0, a1, n);
}
else
{
out.v32.a = amd_bitalign (a0, a1, n - 32);
out.v32.b = amd_bitalign (a1, a0, n - 32);
}
return out.v64;
}
#endif // IS_AMD
#if defined IS_CUDA
#if ATTACK_EXEC == 11
CONSTANT_VK u32 generic_constant[8192]; // 32k
#if ATTACK_KERN == 0
#define bfs_buf g_bfs_buf
#define rules_buf ((const kernel_rule_t *) generic_constant)
#define words_buf_s g_words_buf_s
#define words_buf_r g_words_buf_r
#elif ATTACK_KERN == 1
#define bfs_buf g_bfs_buf
#define rules_buf g_rules_buf
#define words_buf_s g_words_buf_s
#define words_buf_r g_words_buf_r
#elif ATTACK_KERN == 3
#define rules_buf g_rules_buf
#define bfs_buf ((const bf_t *) generic_constant)
#define words_buf_s ((const bs_word_t *) generic_constant)
#define words_buf_r ((const u32x *) generic_constant)
#endif // ATTACK_KERN
#endif // ATTACK_EXEC
DECLSPEC u32 hc_atomic_dec (GLOBAL_AS u32 *p)
{
volatile const u32 val = 1;
return atomicSub (p, val);
}
DECLSPEC u32 hc_atomic_inc (GLOBAL_AS u32 *p)
{
volatile const u32 val = 1;
return atomicAdd (p, val);
}
DECLSPEC u32 hc_atomic_or (GLOBAL_AS u32 *p, volatile const u32 val)
{
return atomicOr (p, val);
}
DECLSPEC size_t get_global_id (const u32 dimindx __attribute__((unused)))
{
return (blockIdx.x * blockDim.x) + threadIdx.x;
}
DECLSPEC size_t get_local_id (const u32 dimindx __attribute__((unused)))
{
return threadIdx.x;
}
DECLSPEC size_t get_local_size (const u32 dimindx __attribute__((unused)))
{
// verify
return blockDim.x;
}
DECLSPEC u32x rotl32 (const u32x a, const int n)
{
#if VECT_SIZE == 1
return __funnelshift_l(a, a, n);
#endif
u32x t;
#if VECT_SIZE >= 2
t.s0 = __funnelshift_l(a.s0, a.s0, n);
t.s1 = __funnelshift_l(a.s1, a.s1, n);
#endif
#if VECT_SIZE >= 4
t.s2 = __funnelshift_l(a.s2, a.s2, n);
t.s3 = __funnelshift_l(a.s3, a.s3, n);
#endif
#if VECT_SIZE >= 8
t.s4 = __funnelshift_l(a.s4, a.s4, n);
t.s5 = __funnelshift_l(a.s5, a.s5, n);
t.s6 = __funnelshift_l(a.s6, a.s6, n);
t.s7 = __funnelshift_l(a.s7, a.s7, n);
#endif
#if VECT_SIZE >= 16
t.s8 = __funnelshift_l(a.s8, a.s8, n);
t.s9 = __funnelshift_l(a.s9, a.s9, n);
t.sa = __funnelshift_l(a.sa, a.sa, n);
t.sb = __funnelshift_l(a.sb, a.sb, n);
t.sc = __funnelshift_l(a.sc, a.sc, n);
t.sd = __funnelshift_l(a.sd, a.sd, n);
t.se = __funnelshift_l(a.se, a.se, n);
t.sf = __funnelshift_l(a.sf, a.sf, n);
#endif
return t;
//return ((a << n) | ((a >> (32 - n))));
}
DECLSPEC u32x rotr32 (const u32x a, const int n)
{
#if VECT_SIZE == 1
return __funnelshift_r(a, a, n);
#endif
u32x t;
#if VECT_SIZE >= 2
t.s0 = __funnelshift_r(a.s0, a.s0, n);
t.s1 = __funnelshift_r(a.s1, a.s1, n);
#endif
#if VECT_SIZE >= 4
t.s2 = __funnelshift_r(a.s2, a.s2, n);
t.s3 = __funnelshift_r(a.s3, a.s3, n);
#endif
#if VECT_SIZE >= 8
t.s4 = __funnelshift_r(a.s4, a.s4, n);
t.s5 = __funnelshift_r(a.s5, a.s5, n);
t.s6 = __funnelshift_r(a.s6, a.s6, n);
t.s7 = __funnelshift_r(a.s7, a.s7, n);
#endif
#if VECT_SIZE >= 16
t.s8 = __funnelshift_r(a.s8, a.s8, n);
t.s9 = __funnelshift_r(a.s9, a.s9, n);
t.sa = __funnelshift_r(a.sa, a.sa, n);
t.sb = __funnelshift_r(a.sb, a.sb, n);
t.sc = __funnelshift_r(a.sc, a.sc, n);
t.sd = __funnelshift_r(a.sd, a.sd, n);
t.se = __funnelshift_r(a.se, a.se, n);
t.sf = __funnelshift_r(a.sf, a.sf, n);
#endif
return t;
//return ((a >> n) | ((a << (32 - n))));
}
DECLSPEC u32 rotl32_S (const u32 a, const int n)
{
return __funnelshift_l(a, a, n);
//return ((a << n) | ((a >> (32 - n))));
}
DECLSPEC u32 rotr32_S (const u32 a, const int n)
{
return __funnelshift_r(a, a, n);
//return ((a >> n) | ((a << (32 - n))));
}
DECLSPEC u64x rotl64 (const u64x a, const int n)
{
return ((a << n) | ((a >> (64 - n))));
}
DECLSPEC u64x rotr64 (const u64x a, const int n)
{
return ((a >> n) | ((a << (64 - n))));
}
DECLSPEC u64 rotl64_S (const u64 a, const int n)
{
return ((a << n) | ((a >> (64 - n))));
}
DECLSPEC u64 rotr64_S (const u64 a, const int n)
{
return ((a >> n) | ((a << (64 - n))));
}
#define FIXED_THREAD_COUNT(n) __launch_bounds__((n), 0)
#define SYNC_THREADS() __syncthreads ()
#endif // IS_CUDA
#if defined IS_HIP
#if ATTACK_EXEC == 11
CONSTANT_VK u32 generic_constant[8192] __attribute__((used)); // 32k
#if ATTACK_KERN == 0
#define bfs_buf g_bfs_buf
#define rules_buf ((const kernel_rule_t *) generic_constant)
#define words_buf_s g_words_buf_s
#define words_buf_r g_words_buf_r
#elif ATTACK_KERN == 1
#define bfs_buf g_bfs_buf
#define rules_buf g_rules_buf
#define words_buf_s g_words_buf_s
#define words_buf_r g_words_buf_r
#elif ATTACK_KERN == 3
#define rules_buf g_rules_buf
#define bfs_buf ((const bf_t *) generic_constant)
#define words_buf_s ((const bs_word_t *) generic_constant)
#define words_buf_r ((const u32x *) generic_constant)
#endif // ATTACK_KERN
#endif // ATTACK_EXEC
DECLSPEC u32 hc_atomic_dec (GLOBAL_AS u32 *p)
{
volatile const u32 val = 1;
return atomicSub (p, val);
}
DECLSPEC u32 hc_atomic_inc (GLOBAL_AS u32 *p)
{
volatile const u32 val = 1;
return atomicAdd (p, val);
}
DECLSPEC u32 hc_atomic_or (GLOBAL_AS u32 *p, volatile const u32 val)
{
return atomicOr (p, val);
}
DECLSPEC size_t get_global_id (const u32 dimindx __attribute__((unused)))
{
return (blockIdx.x * blockDim.x) + threadIdx.x;
}
DECLSPEC size_t get_local_id (const u32 dimindx __attribute__((unused)))
{
return threadIdx.x;
}
DECLSPEC size_t get_local_size (const u32 dimindx __attribute__((unused)))
{
// verify
return blockDim.x;
}
DECLSPEC u32x rotl32 (const u32x a, const int n)
{
#if VECT_SIZE == 1
return __funnelshift_l(a, a, n);
#endif
u32x t;
#if VECT_SIZE >= 2
t.s0 = __funnelshift_l(a.s0, a.s0, n);
t.s1 = __funnelshift_l(a.s1, a.s1, n);
#endif
#if VECT_SIZE >= 4
t.s2 = __funnelshift_l(a.s2, a.s2, n);
t.s3 = __funnelshift_l(a.s3, a.s3, n);
#endif
#if VECT_SIZE >= 8
t.s4 = __funnelshift_l(a.s4, a.s4, n);
t.s5 = __funnelshift_l(a.s5, a.s5, n);
t.s6 = __funnelshift_l(a.s6, a.s6, n);
t.s7 = __funnelshift_l(a.s7, a.s7, n);
#endif
#if VECT_SIZE >= 16
t.s8 = __funnelshift_l(a.s8, a.s8, n);
t.s9 = __funnelshift_l(a.s9, a.s9, n);
t.sa = __funnelshift_l(a.sa, a.sa, n);
t.sb = __funnelshift_l(a.sb, a.sb, n);
t.sc = __funnelshift_l(a.sc, a.sc, n);
t.sd = __funnelshift_l(a.sd, a.sd, n);
t.se = __funnelshift_l(a.se, a.se, n);
t.sf = __funnelshift_l(a.sf, a.sf, n);
#endif
return t;
//return ((a << n) | ((a >> (32 - n))));
}
DECLSPEC u32x rotr32 (const u32x a, const int n)
{
#if VECT_SIZE == 1
return __funnelshift_r(a, a, n);
#endif
u32x t;
#if VECT_SIZE >= 2
t.s0 = __funnelshift_r(a.s0, a.s0, n);
t.s1 = __funnelshift_r(a.s1, a.s1, n);
#endif
#if VECT_SIZE >= 4
t.s2 = __funnelshift_r(a.s2, a.s2, n);
t.s3 = __funnelshift_r(a.s3, a.s3, n);
#endif
#if VECT_SIZE >= 8
t.s4 = __funnelshift_r(a.s4, a.s4, n);
t.s5 = __funnelshift_r(a.s5, a.s5, n);
t.s6 = __funnelshift_r(a.s6, a.s6, n);
t.s7 = __funnelshift_r(a.s7, a.s7, n);
#endif
#if VECT_SIZE >= 16
t.s8 = __funnelshift_r(a.s8, a.s8, n);
t.s9 = __funnelshift_r(a.s9, a.s9, n);
t.sa = __funnelshift_r(a.sa, a.sa, n);
t.sb = __funnelshift_r(a.sb, a.sb, n);
t.sc = __funnelshift_r(a.sc, a.sc, n);
t.sd = __funnelshift_r(a.sd, a.sd, n);
t.se = __funnelshift_r(a.se, a.se, n);
t.sf = __funnelshift_r(a.sf, a.sf, n);
#endif
return t;
//return ((a >> n) | ((a << (32 - n))));
}
DECLSPEC u32 rotl32_S (const u32 a, const int n)
{
return __funnelshift_l(a, a, n);
//return ((a << n) | ((a >> (32 - n))));
}
DECLSPEC u32 rotr32_S (const u32 a, const int n)
{
return __funnelshift_r(a, a, n);
//return ((a >> n) | ((a << (32 - n))));
}
DECLSPEC u64x rotl64 (const u64x a, const int n)
{
#if VECT_SIZE == 1
return rotl64_S (a, n);
#else
return ((a << n) | ((a >> (64 - n))));
#endif
}
DECLSPEC u64x rotr64 (const u64x a, const int n)
{
#if VECT_SIZE == 1
return rotr64_S (a, n);
#else
return ((a >> n) | ((a << (64 - n))));
#endif
}
DECLSPEC u64 rotl64_S (const u64 a, const int n)
{
return rotr64_S (a, 64 - n);
}
DECLSPEC u64 rotr64_S (const u64 a, const int n)
{
vconv64_t in;
in.v64 = a;
const u32 a0 = in.v32.a;
const u32 a1 = in.v32.b;
vconv64_t out;
if (n < 32)
{
out.v32.a = __funnelshift_r (a0, a1, n);
out.v32.b = __funnelshift_r (a1, a0, n);
}
else
{
out.v32.a = __funnelshift_r (a1, a0, n - 32);
out.v32.b = __funnelshift_r (a0, a1, n - 32);
}
return out.v64;
}
#define FIXED_THREAD_COUNT(n) __launch_bounds__((n), 0)
#define SYNC_THREADS() __syncthreads ()
#endif // IS_HIP
#ifdef IS_METAL
DECLSPEC u32 hc_atomic_dec (volatile GLOBAL_AS u32 *p)
{
volatile const u32 val = 1;
volatile GLOBAL_AS atomic_int *pd = (volatile GLOBAL_AS atomic_int *) p;
return atomic_fetch_sub_explicit (pd, val, memory_order_relaxed);
}
DECLSPEC u32 hc_atomic_inc (volatile GLOBAL_AS u32 *p)
{
volatile const u32 val = 1;
volatile GLOBAL_AS atomic_int *pd = (volatile GLOBAL_AS atomic_int *) p;
return atomic_fetch_add_explicit (pd, val, memory_order_relaxed);
}
DECLSPEC u32 hc_atomic_or (volatile GLOBAL_AS u32 *p, volatile const u32 val)
{
volatile GLOBAL_AS atomic_int *pd = (volatile GLOBAL_AS atomic_int *) p;
return atomic_fetch_or_explicit (pd, val, memory_order_relaxed);
}
#define FIXED_THREAD_COUNT(n)
#define SYNC_THREADS() threadgroup_barrier (mem_flags::mem_threadgroup)
#endif // IS_METAL
#ifdef IS_OPENCL
DECLSPEC u32 hc_atomic_dec (volatile GLOBAL_AS u32 *p)
{
volatile const u32 val = 1;
return atomic_sub (p, val);
}
DECLSPEC u32 hc_atomic_inc (volatile GLOBAL_AS u32 *p)
{
volatile const u32 val = 1;
return atomic_add (p, val);
}
DECLSPEC u32 hc_atomic_or (volatile GLOBAL_AS u32 *p, volatile const u32 val)
{
return atomic_or (p, val);
}
#define FIXED_THREAD_COUNT(n) __attribute__((reqd_work_group_size((n), 1, 1)))
#define SYNC_THREADS() barrier (CLK_LOCAL_MEM_FENCE)
#endif // IS_OPENCL