mirror of
https://github.com/hashcat/hashcat.git
synced 2025-07-19 13:08:19 +00:00

While HIP doesn't have funnelshift emulation, it's better to use the native __builtin_amdgcn_alignbit() on AMD. For NVIDIA, we need to make sure the target matches a supported SM version and fall back to byte_perm() otherwise. Fix hash-mode 6900 in optimized mode and attack mode 3 This hash-mode doesn't use any stopbit, and if the password length is exactly 16 or 32, then hashcat selects the next higher kernel, e.g., s16 for password length 32. For such corner cases, we must copy s08 code to s16. It doesn't seem other algorithms are affected. Some have the s16 body left empty, but they have a password length limit < 32. Add test_edge* to .gitignore
614 lines
11 KiB
Common Lisp
614 lines
11 KiB
Common Lisp
/**
|
|
* Author......: See docs/credits.txt
|
|
* License.....: MIT
|
|
*/
|
|
|
|
#include "inc_vendor.h"
|
|
#include "inc_types.h"
|
|
#include "inc_platform.h"
|
|
#include "inc_common.h"
|
|
|
|
#ifdef IS_NATIVE
|
|
#define FIXED_THREAD_COUNT(n)
|
|
#define SYNC_THREADS()
|
|
#endif
|
|
|
|
#ifdef IS_AMD
|
|
|
|
DECLSPEC u64x rotl64 (const u64x a, const int n)
|
|
{
|
|
return rotr64 (a, 64 - n);
|
|
}
|
|
|
|
DECLSPEC u64x rotr64 (const u64x a, const int n)
|
|
{
|
|
#if VECT_SIZE == 1
|
|
return rotr64_S (a, n);
|
|
#else
|
|
return ((a >> n) | ((a << (64 - n))));
|
|
#endif
|
|
}
|
|
|
|
DECLSPEC u64 rotl64_S (const u64 a, const int n)
|
|
{
|
|
return rotr64_S (a, 64 - n);
|
|
}
|
|
|
|
DECLSPEC u64 rotr64_S (const u64 a, const int n)
|
|
{
|
|
vconv64_t in;
|
|
|
|
in.v64 = a;
|
|
|
|
const u32 a0 = in.v32.a;
|
|
const u32 a1 = in.v32.b;
|
|
|
|
vconv64_t out;
|
|
|
|
if (n < 32)
|
|
{
|
|
out.v32.a = amd_bitalign (a1, a0, n);
|
|
out.v32.b = amd_bitalign (a0, a1, n);
|
|
}
|
|
else
|
|
{
|
|
out.v32.a = amd_bitalign (a0, a1, n - 32);
|
|
out.v32.b = amd_bitalign (a1, a0, n - 32);
|
|
}
|
|
|
|
return out.v64;
|
|
}
|
|
|
|
#endif // IS_AMD
|
|
|
|
// this applies to cuda and opencl
|
|
#if defined IS_NV
|
|
|
|
#ifdef USE_FUNNELSHIFT
|
|
|
|
DECLSPEC u32 hc_funnelshift_l (const u32 lo, const u32 hi, const int shift)
|
|
{
|
|
u32 result;
|
|
|
|
asm volatile ("shf.l.wrap.b32 %0, %1, %2, %3;" : "=r"(result) : "r"(lo), "r"(hi), "r"(shift));
|
|
|
|
return result;
|
|
}
|
|
|
|
DECLSPEC u32 hc_funnelshift_r (const u32 lo, const u32 hi, const int shift)
|
|
{
|
|
u32 result;
|
|
|
|
asm volatile ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(result) : "r"(lo), "r"(hi), "r"(shift));
|
|
|
|
return result;
|
|
}
|
|
|
|
#endif
|
|
|
|
#endif // IS_NV
|
|
|
|
#if defined IS_CUDA
|
|
|
|
#if ATTACK_EXEC == 11
|
|
|
|
CONSTANT_VK u32 generic_constant[8192]; // 32k
|
|
|
|
#if ATTACK_KERN == 0
|
|
#define bfs_buf g_bfs_buf
|
|
#define rules_buf ((const kernel_rule_t *) generic_constant)
|
|
#define words_buf_s g_words_buf_s
|
|
#define words_buf_r g_words_buf_r
|
|
#elif ATTACK_KERN == 1
|
|
#define bfs_buf g_bfs_buf
|
|
#define rules_buf g_rules_buf
|
|
#define words_buf_s g_words_buf_s
|
|
#define words_buf_r g_words_buf_r
|
|
#elif ATTACK_KERN == 3
|
|
#define rules_buf g_rules_buf
|
|
#define bfs_buf ((const bf_t *) generic_constant)
|
|
#define words_buf_s ((const bs_word_t *) generic_constant)
|
|
#define words_buf_r ((const u32x *) generic_constant)
|
|
#endif // ATTACK_KERN
|
|
|
|
#endif // ATTACK_EXEC
|
|
|
|
DECLSPEC u32 hc_atomic_dec (GLOBAL_AS u32 *p)
|
|
{
|
|
volatile const u32 val = 1;
|
|
|
|
return atomicSub (p, val);
|
|
}
|
|
|
|
DECLSPEC u32 hc_atomic_inc (GLOBAL_AS u32 *p)
|
|
{
|
|
volatile const u32 val = 1;
|
|
|
|
return atomicAdd (p, val);
|
|
}
|
|
|
|
DECLSPEC u32 hc_atomic_or (GLOBAL_AS u32 *p, volatile const u32 val)
|
|
{
|
|
return atomicOr (p, val);
|
|
}
|
|
|
|
DECLSPEC size_t get_group_id (const u32 dimindx)
|
|
{
|
|
switch (dimindx)
|
|
{
|
|
case 0:
|
|
return blockIdx.x;
|
|
case 1:
|
|
return blockIdx.y;
|
|
case 2:
|
|
return blockIdx.z;
|
|
}
|
|
|
|
return (size_t) -1;
|
|
}
|
|
|
|
DECLSPEC size_t get_global_id (const u32 dimindx __attribute__((unused)))
|
|
{
|
|
return (blockIdx.x * blockDim.x) + threadIdx.x;
|
|
}
|
|
|
|
DECLSPEC size_t get_local_id (const u32 dimindx)
|
|
{
|
|
switch (dimindx)
|
|
{
|
|
case 0:
|
|
return threadIdx.x;
|
|
case 1:
|
|
return threadIdx.y;
|
|
case 2:
|
|
return threadIdx.z;
|
|
}
|
|
|
|
return (size_t) -1;
|
|
}
|
|
|
|
DECLSPEC size_t get_local_size (const u32 dimindx)
|
|
{
|
|
switch (dimindx)
|
|
{
|
|
case 0:
|
|
return blockDim.x;
|
|
case 1:
|
|
return blockDim.y;
|
|
case 2:
|
|
return blockDim.z;
|
|
}
|
|
|
|
return (size_t) -1;
|
|
}
|
|
|
|
DECLSPEC u32x rotl32 (const u32x a, const int n)
|
|
{
|
|
#if VECT_SIZE == 1
|
|
|
|
return rotl32_S (a, n);
|
|
|
|
#else
|
|
|
|
u32x t = 0;
|
|
|
|
#if VECT_SIZE >= 2
|
|
t.s0 = rotl32_S (a.s0, n);
|
|
t.s1 = rotl32_S (a.s1, n);
|
|
#endif
|
|
|
|
#if VECT_SIZE >= 4
|
|
t.s2 = rotl32_S (a.s2, n);
|
|
t.s3 = rotl32_S (a.s3, n);
|
|
#endif
|
|
|
|
#if VECT_SIZE >= 8
|
|
t.s4 = rotl32_S (a.s4, n);
|
|
t.s5 = rotl32_S (a.s5, n);
|
|
t.s6 = rotl32_S (a.s6, n);
|
|
t.s7 = rotl32_S (a.s7, n);
|
|
#endif
|
|
|
|
#if VECT_SIZE >= 16
|
|
t.s8 = rotl32_S (a.s8, n);
|
|
t.s9 = rotl32_S (a.s9, n);
|
|
t.sa = rotl32_S (a.sa, n);
|
|
t.sb = rotl32_S (a.sb, n);
|
|
t.sc = rotl32_S (a.sc, n);
|
|
t.sd = rotl32_S (a.sd, n);
|
|
t.se = rotl32_S (a.se, n);
|
|
t.sf = rotl32_S (a.sf, n);
|
|
#endif
|
|
|
|
return t;
|
|
|
|
#endif
|
|
}
|
|
|
|
DECLSPEC u32x rotr32 (const u32x a, const int n)
|
|
{
|
|
#if VECT_SIZE == 1
|
|
|
|
return rotr32_S (a, n);
|
|
|
|
#else
|
|
|
|
u32x t = 0;
|
|
|
|
#if VECT_SIZE >= 2
|
|
t.s0 = rotr32_S (a.s0, n);
|
|
t.s1 = rotr32_S (a.s1, n);
|
|
#endif
|
|
|
|
#if VECT_SIZE >= 4
|
|
t.s2 = rotr32_S (a.s2, n);
|
|
t.s3 = rotr32_S (a.s3, n);
|
|
#endif
|
|
|
|
#if VECT_SIZE >= 8
|
|
t.s4 = rotr32_S (a.s4, n);
|
|
t.s5 = rotr32_S (a.s5, n);
|
|
t.s6 = rotr32_S (a.s6, n);
|
|
t.s7 = rotr32_S (a.s7, n);
|
|
#endif
|
|
|
|
#if VECT_SIZE >= 16
|
|
t.s8 = rotr32_S (a.s8, n);
|
|
t.s9 = rotr32_S (a.s9, n);
|
|
t.sa = rotr32_S (a.sa, n);
|
|
t.sb = rotr32_S (a.sb, n);
|
|
t.sc = rotr32_S (a.sc, n);
|
|
t.sd = rotr32_S (a.sd, n);
|
|
t.se = rotr32_S (a.se, n);
|
|
t.sf = rotr32_S (a.sf, n);
|
|
#endif
|
|
|
|
return t;
|
|
|
|
#endif
|
|
}
|
|
|
|
DECLSPEC u32 rotl32_S (const u32 a, const int n)
|
|
{
|
|
#ifdef USE_FUNNELSHIFT
|
|
return hc_funnelshift_l (a, a, n);
|
|
#else
|
|
return ((a << n) | ((a >> (32 - n))));
|
|
#endif
|
|
}
|
|
|
|
DECLSPEC u32 rotr32_S (const u32 a, const int n)
|
|
{
|
|
#ifdef USE_FUNNELSHIFT
|
|
return hc_funnelshift_r (a, a, n);
|
|
#else
|
|
return ((a >> n) | ((a << (32 - n))));
|
|
#endif
|
|
}
|
|
|
|
DECLSPEC u64x rotl64 (const u64x a, const int n)
|
|
{
|
|
#if VECT_SIZE == 1
|
|
return rotl64_S (a, n);
|
|
#else
|
|
return ((a << n) | ((a >> (64 - n))));
|
|
#endif
|
|
}
|
|
|
|
DECLSPEC u64x rotr64 (const u64x a, const int n)
|
|
{
|
|
#if VECT_SIZE == 1
|
|
return rotr64_S (a, n);
|
|
#else
|
|
return ((a >> n) | ((a << (64 - n))));
|
|
#endif
|
|
}
|
|
|
|
DECLSPEC u64 rotl64_S (const u64 a, const int n)
|
|
{
|
|
return rotr64_S (a, 64 - n);
|
|
}
|
|
|
|
DECLSPEC u64 rotr64_S (const u64 a, const int n)
|
|
{
|
|
return ((a >> n) | ((a << (64 - n))));
|
|
}
|
|
|
|
#define FIXED_THREAD_COUNT(n) __launch_bounds__((n), 0)
|
|
#define SYNC_THREADS() __syncthreads ()
|
|
#endif // IS_CUDA
|
|
|
|
#if defined IS_HIP
|
|
|
|
#if ATTACK_EXEC == 11
|
|
|
|
CONSTANT_VK u32 generic_constant[8192] __attribute__((used)); // 32k
|
|
|
|
#if ATTACK_KERN == 0
|
|
#define bfs_buf g_bfs_buf
|
|
#define rules_buf ((const kernel_rule_t *) generic_constant)
|
|
#define words_buf_s g_words_buf_s
|
|
#define words_buf_r g_words_buf_r
|
|
#elif ATTACK_KERN == 1
|
|
#define bfs_buf g_bfs_buf
|
|
#define rules_buf g_rules_buf
|
|
#define words_buf_s g_words_buf_s
|
|
#define words_buf_r g_words_buf_r
|
|
#elif ATTACK_KERN == 3
|
|
#define rules_buf g_rules_buf
|
|
#define bfs_buf ((const bf_t *) generic_constant)
|
|
#define words_buf_s ((const bs_word_t *) generic_constant)
|
|
#define words_buf_r ((const u32x *) generic_constant)
|
|
#endif // ATTACK_KERN
|
|
|
|
#endif // ATTACK_EXEC
|
|
|
|
DECLSPEC u32 hc_atomic_dec (GLOBAL_AS u32 *p)
|
|
{
|
|
volatile const u32 val = 1;
|
|
|
|
return atomicSub (p, val);
|
|
}
|
|
|
|
DECLSPEC u32 hc_atomic_inc (GLOBAL_AS u32 *p)
|
|
{
|
|
volatile const u32 val = 1;
|
|
|
|
return atomicAdd (p, val);
|
|
}
|
|
|
|
DECLSPEC u32 hc_atomic_or (GLOBAL_AS u32 *p, volatile const u32 val)
|
|
{
|
|
return atomicOr (p, val);
|
|
}
|
|
|
|
DECLSPEC size_t get_group_id (const u32 dimindx)
|
|
{
|
|
switch (dimindx)
|
|
{
|
|
case 0:
|
|
return blockIdx.x;
|
|
case 1:
|
|
return blockIdx.y;
|
|
case 2:
|
|
return blockIdx.z;
|
|
}
|
|
|
|
return (size_t) -1;
|
|
}
|
|
|
|
DECLSPEC size_t get_global_id (const u32 dimindx __attribute__((unused)))
|
|
{
|
|
return (blockIdx.x * blockDim.x) + threadIdx.x;
|
|
}
|
|
|
|
DECLSPEC size_t get_local_id (const u32 dimindx)
|
|
{
|
|
switch (dimindx)
|
|
{
|
|
case 0:
|
|
return threadIdx.x;
|
|
case 1:
|
|
return threadIdx.y;
|
|
case 2:
|
|
return threadIdx.z;
|
|
}
|
|
|
|
return (size_t) -1;
|
|
}
|
|
|
|
DECLSPEC size_t get_local_size (const u32 dimindx)
|
|
{
|
|
switch (dimindx)
|
|
{
|
|
case 0:
|
|
return blockDim.x;
|
|
case 1:
|
|
return blockDim.y;
|
|
case 2:
|
|
return blockDim.z;
|
|
}
|
|
|
|
return (size_t) -1;
|
|
}
|
|
|
|
DECLSPEC u32x rotl32 (const u32x a, const int n)
|
|
{
|
|
#if VECT_SIZE == 1
|
|
|
|
return rotl32_S (a, n);
|
|
|
|
#else
|
|
|
|
u32x t = 0;
|
|
|
|
#if VECT_SIZE >= 2
|
|
t.s0 = rotl32_S (a.s0, n);
|
|
t.s1 = rotl32_S (a.s1, n);
|
|
#endif
|
|
|
|
#if VECT_SIZE >= 4
|
|
t.s2 = rotl32_S (a.s2, n);
|
|
t.s3 = rotl32_S (a.s3, n);
|
|
#endif
|
|
|
|
#if VECT_SIZE >= 8
|
|
t.s4 = rotl32_S (a.s4, n);
|
|
t.s5 = rotl32_S (a.s5, n);
|
|
t.s6 = rotl32_S (a.s6, n);
|
|
t.s7 = rotl32_S (a.s7, n);
|
|
#endif
|
|
|
|
#if VECT_SIZE >= 16
|
|
t.s8 = rotl32_S (a.s8, n);
|
|
t.s9 = rotl32_S (a.s9, n);
|
|
t.sa = rotl32_S (a.sa, n);
|
|
t.sb = rotl32_S (a.sb, n);
|
|
t.sc = rotl32_S (a.sc, n);
|
|
t.sd = rotl32_S (a.sd, n);
|
|
t.se = rotl32_S (a.se, n);
|
|
t.sf = rotl32_S (a.sf, n);
|
|
#endif
|
|
|
|
return t;
|
|
|
|
#endif
|
|
}
|
|
|
|
DECLSPEC u32x rotr32 (const u32x a, const int n)
|
|
{
|
|
#if VECT_SIZE == 1
|
|
|
|
return rotr32_S (a, n);
|
|
|
|
#else
|
|
|
|
u32x t = 0;
|
|
|
|
#if VECT_SIZE >= 2
|
|
t.s0 = rotr32_S (a.s0, n);
|
|
t.s1 = rotr32_S (a.s1, n);
|
|
#endif
|
|
|
|
#if VECT_SIZE >= 4
|
|
t.s2 = rotr32_S (a.s2, n);
|
|
t.s3 = rotr32_S (a.s3, n);
|
|
#endif
|
|
|
|
#if VECT_SIZE >= 8
|
|
t.s4 = rotr32_S (a.s4, n);
|
|
t.s5 = rotr32_S (a.s5, n);
|
|
t.s6 = rotr32_S (a.s6, n);
|
|
t.s7 = rotr32_S (a.s7, n);
|
|
#endif
|
|
|
|
#if VECT_SIZE >= 16
|
|
t.s8 = rotr32_S (a.s8, n);
|
|
t.s9 = rotr32_S (a.s9, n);
|
|
t.sa = rotr32_S (a.sa, n);
|
|
t.sb = rotr32_S (a.sb, n);
|
|
t.sc = rotr32_S (a.sc, n);
|
|
t.sd = rotr32_S (a.sd, n);
|
|
t.se = rotr32_S (a.se, n);
|
|
t.sf = rotr32_S (a.sf, n);
|
|
#endif
|
|
|
|
return t;
|
|
|
|
#endif
|
|
}
|
|
|
|
DECLSPEC u32 rotl32_S (const u32 a, const int n)
|
|
{
|
|
return rotr32_S (a, 32 - n);
|
|
}
|
|
|
|
DECLSPEC u32 rotr32_S (const u32 a, const int n)
|
|
{
|
|
return __builtin_amdgcn_alignbit (a, a, n);
|
|
}
|
|
|
|
DECLSPEC u64x rotl64 (const u64x a, const int n)
|
|
{
|
|
return rotr64 (a, 64 - n);
|
|
}
|
|
|
|
DECLSPEC u64x rotr64 (const u64x a, const int n)
|
|
{
|
|
#if VECT_SIZE == 1
|
|
return rotr64_S (a, n);
|
|
#else
|
|
return ((a >> n) | ((a << (64 - n))));
|
|
#endif
|
|
}
|
|
|
|
DECLSPEC u64 rotl64_S (const u64 a, const int n)
|
|
{
|
|
return rotr64_S (a, 64 - n);
|
|
}
|
|
|
|
DECLSPEC u64 rotr64_S (const u64 a, const int n)
|
|
{
|
|
vconv64_t in;
|
|
|
|
in.v64 = a;
|
|
|
|
const u32 a0 = in.v32.a;
|
|
const u32 a1 = in.v32.b;
|
|
|
|
vconv64_t out;
|
|
|
|
const int n31 = n & 31;
|
|
|
|
if (n < 32)
|
|
{
|
|
out.v32.a = __builtin_amdgcn_alignbit (a1, a0, n31);
|
|
out.v32.b = __builtin_amdgcn_alignbit (a0, a1, n31);
|
|
}
|
|
else
|
|
{
|
|
out.v32.a = __builtin_amdgcn_alignbit (a0, a1, n31);
|
|
out.v32.b = __builtin_amdgcn_alignbit (a1, a0, n31);
|
|
}
|
|
|
|
return out.v64;
|
|
}
|
|
|
|
#define FIXED_THREAD_COUNT(n) __launch_bounds__((n), 0)
|
|
#define SYNC_THREADS() __syncthreads ()
|
|
#endif // IS_HIP
|
|
|
|
#ifdef IS_METAL
|
|
|
|
DECLSPEC u32 hc_atomic_dec (volatile GLOBAL_AS u32 *p)
|
|
{
|
|
volatile const u32 val = 1;
|
|
volatile GLOBAL_AS atomic_int *pd = (volatile GLOBAL_AS atomic_int *) p;
|
|
|
|
return atomic_fetch_sub_explicit (pd, val, memory_order_relaxed);
|
|
}
|
|
|
|
DECLSPEC u32 hc_atomic_inc (volatile GLOBAL_AS u32 *p)
|
|
{
|
|
volatile const u32 val = 1;
|
|
volatile GLOBAL_AS atomic_int *pd = (volatile GLOBAL_AS atomic_int *) p;
|
|
|
|
return atomic_fetch_add_explicit (pd, val, memory_order_relaxed);
|
|
}
|
|
|
|
DECLSPEC u32 hc_atomic_or (volatile GLOBAL_AS u32 *p, volatile const u32 val)
|
|
{
|
|
volatile GLOBAL_AS atomic_int *pd = (volatile GLOBAL_AS atomic_int *) p;
|
|
|
|
return atomic_fetch_or_explicit (pd, val, memory_order_relaxed);
|
|
}
|
|
|
|
#define FIXED_THREAD_COUNT(n)
|
|
#define SYNC_THREADS() threadgroup_barrier (mem_flags::mem_threadgroup)
|
|
#endif // IS_METAL
|
|
|
|
#ifdef IS_OPENCL
|
|
|
|
DECLSPEC u32 hc_atomic_dec (volatile GLOBAL_AS u32 *p)
|
|
{
|
|
volatile const u32 val = 1;
|
|
|
|
return atomic_sub (p, val);
|
|
}
|
|
|
|
DECLSPEC u32 hc_atomic_inc (volatile GLOBAL_AS u32 *p)
|
|
{
|
|
volatile const u32 val = 1;
|
|
|
|
return atomic_add (p, val);
|
|
}
|
|
|
|
DECLSPEC u32 hc_atomic_or (volatile GLOBAL_AS u32 *p, volatile const u32 val)
|
|
{
|
|
return atomic_or (p, val);
|
|
}
|
|
|
|
#define FIXED_THREAD_COUNT(n) __attribute__((reqd_work_group_size((n), 1, 1)))
|
|
#define SYNC_THREADS() barrier (CLK_LOCAL_MEM_FENCE)
|
|
#endif // IS_OPENCL
|